天天看點

爬蟲:網易雲流行歌手

import requests,os,re,socket

from bs4 import BeautifulSoup

import urllib.request

try:

    os.mkdir("流行歌手")

    os.chdir("流行歌手")

except:

    os.chdir("流行歌手")

wz=os.getcwd()

headers={

"Host":"music.163.com",

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0",

"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,**;q=0.8',

    }

for i in id:

    os.chdir(wz)

    http="https://music.163.com/artist?id="+str(i)

    play_url =http

    s = requests.session()

    response=s.get(play_url,headers = headers,timeout=30).content

    responsetext=s.get(play_url,headers = headers,timeout=30).text

    rename='"title": ".+"'

    geshouname=re.search(rename,responsetext).group()

    geshouname=re.sub('"title": "','',geshouname)

    geshouname=re.sub('"','',geshouname)

    print(geshouname)

    try:

        os.mkdir(geshouname)

        os.chdir(geshouname)

    except:

        os.chdir(geshouname)

    s = BeautifulSoup(response,'lxml')

    main = s.find('ul',{'class':'f-hide'})

    lists=[]

    for music in main.find_all('a'):

        list=[]

        musicUrl='http://music.163.com/song/media/outer/url'+music['href'][5:]+'.mp3'

        musicName=music.text

        list.append(musicName)

        list.append(musicUrl)

        lists.append(list)

    for i in lists:

        url=i[1]

        name=i[0]

        try:

            print('正在下載下傳',name)

            urllib.request.urlretrieve(url,'./%s.mp3'% name)

            print('下載下傳成功')

        except:

            print('下載下傳失敗')

with open ("報告.txt","w+",encoding="utf-8")as file:

    file.write("已經完成了采集任務")

#有任何問題技術或者其他問題可以加QQ2604086068聯系我