requests簡介
- 我們已經講解了Python内置的urllib子產品,用于通路網絡資源。但是,它用起來比較麻煩,而且,缺少很多實用的進階功能。更好的方案是使用requests。它是一個Python第三方庫,處理URL資源特别友善。
python爬蟲 - requests庫
安裝requests
- 如果安裝了Anaconda,requests就已經可用了。否則,需要在指令行下通過pip安裝:
$ pip install requests
requests_get
- 通過GET通路一個頁面
import requests
#帶參數的get請求
url = 'https://www.baidu.com/s?'
data = {
'wd':'中國'
}
header = {'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64;'
' zh-CN; rv:1.9.2.10) Gecko/20100922'
' Ubuntu/10.10 (maverick) Firefox/3.6.10'
}
r = requests.get(url, headers=header, params=data)
#print(r.text)
#print(r.status_code)
#print(r.headers)
#print(r.url)
with open('Requests_file/zhongguo.html','wb') as fp:
fp.write(r.content)
requests_cookie
import requests
#建立一個會話
s = requests.Session()
post_url = 'http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2019341636849 HTTP/1.1'
header = {'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64;'
' zh-CN; rv:1.9.2.10) Gecko/20100922'
' Ubuntu/10.10 (maverick) Firefox/3.6.10'
}
formdata = {
'email':'17320015926',
'password':'123456',
'icode':'',
'origURL':'http://www.renren.com/home',
'domain':'renren.com',
'key_id':'1',
'captcha_type':'web_login',
'f':'https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D_4eOtFSXfVrfNtOlNBgoyTjnVMk2CRdO44Rf-7VG4AG%26wd%3D%'
'26eqid%3D8b5865030001e71f000000035caefb80',
}
r = s.post(url=post_url, headers=header, data=formdata)
#print(r.text)
get_url = 'http://www.renren.com/969564068/profile'
r = s.get(url=get_url, headers=header)
print(r.text)
with open('renrenzhuyie.html', 'wb') as fp:
fp.write(r.content)
requests代理
import requests
url = 'https://www.baidu.com/s?ie=UTF-8&wd=ip'
proxies = {
'https':'203.42.227.113:8080'
}
header = {'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64;'
' zh-CN; rv:1.9.2.10) Gecko/20100922'
' Ubuntu/10.10 (maverick) Firefox/3.6.10'
}
r = requests.get(url=url, headers=header, proxies=proxies)
with open('daili.html', 'wb') as fp:
fp.write(r.content)
requests_post
import requests
url = 'http://cn.bing.com/ttranslationlookup?&IG=D6F5982DA96A4F8E98B007A143DEEEF6&IID=translator.5038.3'
formdata = {
'from':'en',
'to':'zh-CHS',
'text':'pig',
}
header = {'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64;'
' zh-CN; rv:1.9.2.10) Gecko/20100922'
' Ubuntu/10.10 (maverick) Firefox/3.6.10'
}
r = requests.post(url=url, headers=header, data=formdata)
print(r.json())