豆瓣音樂爬取
# coding=UTF-8
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import requests
import bs4
import json
import xlwt
# header = {
# 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'}
header = {
'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'}
n =1
workbook = xlwt.Workbook(encoding='utf-8')
worksheet = workbook.add_sheet('My Worksheet')
worksheet.write(0, 0, label='音樂名稱')
worksheet.write(0, 1, label='音樂人')
worksheet.write(0, 2, label='發行日期')
worksheet.write(0, 3, label='音樂類型')
worksheet.write(0, 4, label='評分')
for a in range(0,101,25):
url = "https://music.douban.com/top250?start=".format(a)
info = Request(headers=header,url=url)
html = urlopen(info)
bs = bs4.BeautifulSoup(html, 'html.parser')
alls = bs.find_all("div",{"class":"pl2"})
for x in alls:
music_name=x.find('a').get_text()
qita = x.find('p').get_text()
qita_splite = qita.split('/')
people_name = qita_splite[0]
date = qita_splite[1]
style = qita_splite[-1]
score = x.find('span',{'class':'rating_nums'}).get_text()
worksheet.write(n, 0, label=music_name)
worksheet.write(n, 1, label=people_name)
worksheet.write(n, 2, label=date)
worksheet.write(n, 3, label=style)
worksheet.write(n, 4, label=score)
n+=1
workbook.save('豆瓣音樂.xls')