這兩天又寫了一個爬取豆瓣前250部高分電影的爬蟲,并把電影名字和圖檔儲存到本地。
用的是requests和BeautifulSoup。
@requires_authorization
import requests
from bs4 import BeautifulSoup
def get_(url):
'''
獲得電影的名字,并且儲存電影的圖檔
'''
name_list = []
turn =
page = url
while turn < :
r = requests.get(page, timeout=)
soup = BeautifulSoup(r.text,'lxml')
div_list = soup.find_all('div',{'class':'item'})
for img in div_list:
#尋找電影的名字
movie_text = img.find('span', {'class': 'title'}).text
name_list.append(movie_text)
#尋找圖檔的url
movie_img = img.find('img')['src']
ir = requests.get(movie_img)
#如果成功,以電影名儲存圖檔到img檔案夾
if ir.status_code == :
#需要在程式目錄下建立一個img檔案夾
with open('img/'+movie_text+'.jpg', 'wb') as f:
#with open(movie_text + '.jpg', 'wb') as f: 不需要建立檔案夾
f.write(ir.content)
# 獲得下一頁的url
turn +=
yema = * turn
page = url + '?start=' + str(yema) + '&filter='
print('完成第{}頁的儲存,共10頁'.format(turn))
return name_list
def main():
url = 'http://movie.douban.com/top250'
name_list = get_(url)
#将電影名寫入到moviename.txt
with open('moviename.txt','w',encoding='utf-8') as f:
for i,name in enumerate(name_list):
f.write(name)
f.write('\n')
if __name__=='__main__':
main()