import json
from urllib.parse import urlencode
import requests
from lxml import etree
from requests import RequestException
from selenium import webdriver
import time
import csv
# 擷取歌手id和歌手姓名
def read_csv():
with open("files/music_163_artists.csv", "r", encoding="utf-8") as csvfile:
reader = csv.reader(csvfile)
for row in reader:
artist_name, artist_id = row
if str(artist_id) is "artist_id":
continue
else:
yield artist_name, artist_id
# 當程式的控制流程離開with語句塊後, 檔案将自動關閉
def get_toal(music_id,song_name,driver):
url = "https://music.163.com/#/song?id="+music_id
driver.get(url)
# 切換成frame
driver.switch_to_frame("g_iframe")
# 休眠3秒,等待加載完成!
time.sleep(3)
response = driver.page_source
html = etree.HTML(response)
comments = html.xpath("//span[@class='j-flag']/text()")
if len(comments) > 0 and int(comments[0]) > 100000:
print("擷取 %s 的評論 %s 存儲" % (song_name, comments[0]))
return comments[0]
else:
print("擷取 %s 的評論 %s 廢棄" % (song_name, comments[0]))
return None
# 将獲得的歌手的熱門歌曲id和名字寫入csv檔案
def write_to_csv(song_name,song_url,artist_name,driver):
csvfile = open('./songs/hotsongs.csv', 'a', encoding='utf-8', newline='') # 檔案存儲的位置
writer = csv.writer(csvfile)
#writer.writerow(('歌曲名稱', '歌曲url','評論總數','歌手'))
for name, url in zip(song_name, song_url):
music_id = url.split('=')[-1]
url = "https://music.163.com/#" + url
try:
if name is not None and url is not None:
song_comments = get_toal(music_id,name,driver)
if song_comments is not None:
writer.writerow([name, url,song_comments,artist_name])
except Exception as msg:
print(msg)
# 當程式的控制流程離開with語句塊後, 檔案将自動關閉
def main(driver):
for item in read_csv():
artist_name, artist_id = item
# 可以任意選擇浏覽器,前提是要配置好相關環境,更多請參考selenium官方文檔
# 避免多次打開浏覽器
if artist_id != 'artist_id':
url = "https://music.163.com/#/artist?id=" + str(artist_id)
print("正在擷取{}的熱門歌曲...".format(artist_name))
driver.get(url)
# 切換成frame
driver.switch_to.frame("g_iframe")
# 休眠3秒,等待加載完成!
time.sleep(2)
response = driver.page_source
html = etree.HTML(response)
song_name = html.xpath("//span[@class='txt']/a/b/@title")
song_url = html.xpath("//span[@class='txt']/a/@href")
# 寫入到csv檔案裡面
write_to_csv(song_name, song_url,artist_name,driver)
print("{}的熱門歌曲寫入到本地成功!".format(artist_name))
if __name__ == "__main__":
driver = webdriver.Chrome(executable_path="/www/spider-music163/songs/chromedriver.exe")
main(driver)