# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
headers = {
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0"
}
def get():
# 1.擷取哔哩哔哩視訊cid,F12 ->heartbeat(Name) ->headers ->cid
url = 'http://comment.bilibili.com/33532891.xml'
req = requests.get(url)
html = req.content
html_doc = str(html, 'utf-8') # 修改成utf-8
# 2.解析
soup = BeautifulSoup(html_doc, "lxml")
results = soup.find_all('d')
contents = [x.text for x in results]
# 3.儲存結果
for li in set(contents):
print(li)
barrage_download(li + "\n")
def barrage_download(barrage_url):
f = open('./test.txt', 'a', encoding='utf-8')
f.write(barrage_url)
f.close()
if __name__ == '__main__':
get()