爬取三國演義
import requests
from bs4 import BeautifulSoup
#使用bs4解析爬取三國演義整片小說内容http://www.shicimingju.com/book/sanguoyanyi.html
#從首頁解析出章節的标題和詳情頁的url
url = 'http://www.shicimingju.com/book/sanguoyanyi.html'
page_text = requests.get(url,headers=headers).text #首頁的頁面源碼資料
fp = open('./sanguo.txt','a+',encoding='utf-8')
#資料解析(章節标題,詳情頁的url)
soup = BeautifulSoup(page_text,'lxml')
#定位到了所有的标題對應的a标簽
a_list = soup.select('.book-mulu > ul > li > a')
for a in a_list:
title = a.string
detail_url = 'http://www.shicimingju.com'+a['href']
#解析提取章節内容
page_text_detail = requests.get(url=detail_url,headers=headers).text
#解析詳情頁中的章節内容
soup = BeautifulSoup(page_text_detail,'lxml')
content = soup.find('div',class_='chapter_content').text
fp.write(title+':'+content+'\n')
print(title,'下載下傳成功!')