天天看点

07 爬取知乎张佳玮文章

# Author:Nimo_Ding

import requests
from bs4 import BeautifulSoup
import csv
import openpyxl

# 使用headers是一种习惯
headers = {
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
url='https://www.zhihu.com/api/v4/members/zhang-jia-wei/articles?'

f=open('zjw.csv','w',encoding='utf-8')
wb=openpyxl.Workbook()
sheet=wb.active
sheet.title='zjw_articles'
sheet['A1']='标题'
sheet['B1']='链接'
sheet['C1']='摘要'

for i in range(2):
    params={
        'include': 'data[*].comment_count,suggest_edit,is_normal,thumbnail_extra_info,thumbnail,can_comment,comment_permission,admin_closed_comment,content,voteup_count,created,updated,upvoted_followees,voting,review_info,is_labeled,label_info;data[*].author.badge[?(type=best_answerer)].topics',
        'offset': (i+1)*20, # 第一页为20,第二页为40
        'limit': 20,
        'sort_by': 'created'
    }
    res=requests.get(url,params=params,headers=headers)
    res_json=res.json()
    for i in res_json['data']:
        print('标题为:{}\n链接为:{}\n摘要为:{}\n'.format(
            i['title'],i['url'],i['excerpt']
        ))

        f.write('标题为:{}\n链接为:{}\n摘要为:{}\n'.format(
                i['title'],i['url'],i['excerpt']))
        sheet.append([i['title'],i['url'],i['excerpt']])

f.close()
wb.save('zjw.xlsx')