如何通过B站视频AV号找到弹幕对应的xml文件号
首先爬取视频网页,将对应视频网页源码获得
就可以找到该视频的av号aid=8678034
还有弹幕序号,cid=14295428
弹幕存放位置为 http://comment.bilibili.com/14295428.xml
获得该链接内容即可。
1 package BiliBili弹幕爬取;
2
3 import org.apache.http.HttpEntity;
4 import org.apache.http.client.methods.CloseableHttpResponse;
5 import org.apache.http.client.methods.HttpGet;
6 import org.apache.http.impl.client.CloseableHttpClient;
7 import org.apache.http.impl.client.HttpClients;
8 import org.apache.http.util.EntityUtils;
9
10 import java.util.regex.Matcher;
11 import java.util.regex.Pattern;
12
13 public class getBiliBiliBofqi {
14 public static void getBofqi(String aid) throws Exception{
15 CloseableHttpClient closeableHttpClient = HttpClients.createDefault() ;
16 HttpGet httpGet = new HttpGet("https://www.bilibili.com/video/av"+aid+"/") ;
17 CloseableHttpResponse httpResponse = closeableHttpClient.execute(httpGet) ;
18 HttpEntity httpEntity = httpResponse.getEntity() ;
19 String en= EntityUtils.toString(httpEntity) ;
20 //"cid=16496518&aid=9979006&pre_ad="
21 String con = "cid=(.*)?&aid=" ;
22 Pattern ah = Pattern.compile(con);
23 Matcher mr = ah.matcher(en);
24 while(mr.find()) {
25 String id = mr.group() ;
26 String newUrl = id.replace("cid=","") ;
27 String x = newUrl.replace("&aid=","") ;
28 HttpGet httpGet1 = new HttpGet("http://comment.bilibili.com/"+x+".xml");
29 CloseableHttpResponse httpResponse1 = closeableHttpClient.execute(httpGet1) ;
30 HttpEntity httpEntity1 = httpResponse1.getEntity() ;
31 String en1 = EntityUtils.toString(httpEntity1) ;
32 String c = "\">(.*?)<" ;
33 Pattern a = Pattern.compile(c);
34 Matcher m = a.matcher(en1);
35 while(m.find()){
36 String speak = m.group().replace("\">","") ;
37 speak = speak.replace("<","") ;
38 System.out.println(speak);
39 }
40 }
41 }
42 public static void main(String[] args) throws Exception{
43 getBofqi("8678034");
44 }
45 }
运行结果: