天天看點

哔哩哔哩網視訊彈幕-單個爬取

# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup

headers = {
    "Accept": "*/*",
    "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3",
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0"
}


def get():
    # 1.擷取哔哩哔哩視訊cid,F12 ->heartbeat(Name) ->headers ->cid
    url = 'http://comment.bilibili.com/33532891.xml'
    req = requests.get(url)
    html = req.content
    html_doc = str(html, 'utf-8')  # 修改成utf-8
    # 2.解析
    soup = BeautifulSoup(html_doc, "lxml")
    results = soup.find_all('d')
    contents = [x.text for x in results]
    # 3.儲存結果
    for li in set(contents):
        print(li)
        barrage_download(li + "\n")


def barrage_download(barrage_url):
    f = open('./test.txt', 'a', encoding='utf-8')
    f.write(barrage_url)
    f.close()


if __name__ == '__main__':
    get()

           

繼續閱讀