天天看点

python get请求 url传参_requests的get请求url参数、url重定向处理及cookies

需求:在百度搜索www.python66.com,然后将搜索结果保存到文件bd_python66.html

百度搜索的url:https://www.baidu.com/s?wd=搜索词

params参数进行url传参,代码如下:

# -*- coding: utf-8 -*-

import requests

import re

def get_html(url,key_value,retry=2):

try:

r = requests.get(url=url,headers=headers,params=key_value,timeout=5)

except Exception as e:

print(e)

if retry > 0:

get_html(url,retry-1)

else:

r.encoding = 'utf-8'

page_text = r.text

return page_text

if __name__ == "__main__":

# 自定义请求头信息

headers = {

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',

}

url = 'https://www.baidu.com/s?' # 注意该url

kw = {'wd':'www.python66.com'}

html = get_html(url,kw)

# 提取网页title

title = re.search('

(.*?)',html)

print(title.group(1))

D:python3installpython.exe D:/python/py3script/test.py

www.python66.com_百度搜索

Process finished with exit code 0

url重定向演示,Github 将所有的 HTTP 请求重定向到 HTTPS。案例代码如下:

# -*- coding: utf-8 -*-

import requests

def get_html(url,retry=2):

try:

r = requests.get(url=url,headers=headers,timeout=5)

except Exception as e:

print(e)

if retry > 0:

get_html(url,retry-1)

else:

print('重定向',r.history)

print('重定向后的请求url',r.url)

print(r.status_code)

if __name__ == "__main__":

# 自定义请求头信息

headers = {

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',

}

url = 'http://github.com/'

get_html(url)

D:python3installpython.exe D:/python/py3script/test.py

重定向 []

重定向后的请求url https://github.com/

200

Process finished with exit code 0

通过 allow_redirects 参数禁用重定向处理:

# -*- coding: utf-8 -*-

import requests

def get_html(url,retry=2):

try:

r = requests.get(url=url,headers=headers,allow_redirects=False,timeout=5)

except Exception as e:

print(e)

if retry > 0:

get_html(url,retry-1)

else:

print('重定向',r.history)

print('请求url',r.url)

print(r.status_code)

if __name__ == "__main__":

# 自定义请求头信息

headers = {

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',

}

url = 'http://github.com/'

get_html(url)

D:python3installpython.exe D:/python/py3script/test.py

重定向 []

请求url http://github.com/

301

Process finished with exit code 0

cookie自动登录:如果我们不登录人人网的是不能访问个人主页的,我们登录人人网后通过浏览器抓包找到cookie,然后把cookie加到自己构造的请求头里面,再访问个人主页的url一样可以获取到正常信息(推荐阅读:cookie是什么),代码如下:

# -*- coding: utf-8 -*-

import requests

import re

def get_html(url,retry=2):

try:

r = requests.get(url=url,headers=headers,timeout=5)

except Exception as e:

print(e)

if retry > 0:

get_html(url,retry-1)

else:

page_text = r.text

return page_text

if __name__ == "__main__":

# 自定义请求头信息

headers = {

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36',

'Cookie': 'anonymid=jy80yf87nu48vb; depovince=BJ; jebecookies=b398e24f-6670-48af-a58e-e6fd6456bcd6|||||; _r01_=1; JSESSIONID=abcrmwfsBwR_ufXciZcWw; ick_login=8a9f224f-1671-41ed-ab25-74e0a42ac995; _de=96965DC06F71F402340E4CEC836F3769696BF75400CE19CC; p=cae405d7c6e785f089ca39606c9d88695; first_login_flag=1; [email protected]; ln_hurl=http://hdn.xnimg.cn/photos/hdn521/20101208/1350/h_main_sadA_14a1000031012f76.jpg; t=940ac4b9ace0423b80a81dee055637955; societyguester=940ac4b9ace0423b80a81dee055637955; id=347908095; xnsid=fd3328ae; ver=7.0; loginfrom=null; jebe_key=8cc12fc2-9a64-4553-85ea-671b395d345b%7Cef398f6216b3a86b3d29665bee53e231%7C1563415076620%7C1%7C1563415078837; jebe_key=8cc12fc2-9a64-4553-85ea-671b395d345b%7Cef398f6216b3a86b3d29665bee53e231%7C1563415076620%7C1%7C1563415078840; wp_fold=0',

}

url = 'http://www.renren.com/347908095/profile'

html = get_html(url)

title = re.search('

(.*?)',html)

print(title.group(1))

D:python3installpython.exe D:/python/py3script/test.py

人人网 - 老董

Process finished with exit code 0

python get请求 url传参_requests的get请求url参数、url重定向处理及cookies