天天看點

下載下傳三國演義(案例)

爬取三國演義
import requests
from bs4 import BeautifulSoup

#使用bs4解析爬取三國演義整片小說内容http://www.shicimingju.com/book/sanguoyanyi.html

#從首頁解析出章節的标題和詳情頁的url
url = 'http://www.shicimingju.com/book/sanguoyanyi.html'
page_text = requests.get(url,headers=headers).text #首頁的頁面源碼資料
fp = open('./sanguo.txt','a+',encoding='utf-8')
#資料解析(章節标題,詳情頁的url)
soup = BeautifulSoup(page_text,'lxml')
#定位到了所有的标題對應的a标簽
a_list = soup.select('.book-mulu > ul > li > a')
for a in a_list:
    title = a.string
    detail_url = 'http://www.shicimingju.com'+a['href']
    
    #解析提取章節内容
    page_text_detail = requests.get(url=detail_url,headers=headers).text
    #解析詳情頁中的章節内容
    soup = BeautifulSoup(page_text_detail,'lxml')
    content = soup.find('div',class_='chapter_content').text
    
    fp.write(title+':'+content+'\n')
    
    print(title,'下載下傳成功!')