天天看点

爬取有道翻译

入门学习了爬虫,尝试写一段爬取有道翻译的代码.

import urllib.request as ur
import urllib.parse as up
import chardet
import json
string = input('please enter the words needing to translate:')#在input()中用中文输入法提示会出现错误,有没有大佬懂啊..
URL = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
data = {}
data['i'] = string
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = '1536587001028'
data['sign'] = '9fe501a15b60074aa1fbbdc15baeac93'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_REALTIME'
data['typoResult'] = 'false'
data = up.urlencode(data).encode('utf-8')
# header = {}   #直接设置参数修改隐藏
# header['Ueser-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
response = ur.Request(URL,data) #使用参数隐藏的话,只能用于ur.Request(URL,data,header)中
response.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36')
response = ur.urlopen(response)
html = response.read()
type_encode = chardet.detect(html)['encoding']#通过chardet.detect()获得编码方式
html = html.decode(type_encode)
html = json.loads(html)#json是轻量级的字符串封装方式
answer = html['translateResult'][][]['tgt']
print(answer)
           

继续阅读