1 # from urllib import request,parse 2 import requests 3 import time 4 import random 5 import hashlib 6 import json 7 class YouDao(): 8 ‘‘‘ 9 salt: i i = r + parseInt(10 * Math.random(), 10); 10 sign: n.md5("fanyideskweb" + e + i + "Nw(nmmbP%A-r6U3EUn]Aj") e代表输入信息 i代表salt 11 ts:r r = "" + (new Date).getTime(), 12 ‘‘‘ 13 def __init__(self,key): 14 self.base_url = ‘http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule‘ 15 self.key = key 16 self.ts = self.get_ts() 17 self.salt = self.get_salt() 18 19 def get_ts(self): 20 ts = int(time.time() * 1000) 21 return str(ts) 22 23 def get_salt(self): 24 salt = int(time.time() * 1000)+random.randint(0,10) 25 return str(salt) 26 27 def get_sign(self): 28 words = ‘fanyideskweb‘+self.key+self.salt+"Nw(nmmbP%A-r6U3EUn]Aj" 29 hashlib.md5() 30 md5 = hashlib.md5() 31 md5.update(words.encode(‘utf-8‘)) 32 33 return md5.hexdigest() 34 35 def get_request(self): 36 data = { 37 ‘i‘: self.key, 38 ‘from‘: ‘zh-CHS‘, 39 ‘to‘: ‘en‘, 40 ‘smartresult‘: "dict", 41 ‘client‘: "fanyideskweb", 42 ‘salt‘: self.salt, 43 ‘sign‘: self.get_sign(), 44 ‘ts‘: self.ts, 45 ‘bv ‘: "e2a78ed30c66e16a857c5b6486a1d326", 46 ‘doctype‘: "json", 47 ‘version‘: ‘2.1‘, 48 ‘keyfrom‘: "fanyi.web", 49 ‘action‘: ‘FY_BY_REALTlME‘, 50 } 51 52 headers = { 53 # ‘Host‘:‘fanyi.youdao.com‘, 54 # ‘Accept‘:‘application/json, text/javascript, */*; q=0.01‘, 55 # ‘Accept-Language‘:‘zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2‘, 56 # ‘Content-Type‘:‘application/x-www-form-urlencoded; charset=UTF-8‘, 57 # ‘X-Requested-With‘:‘XMLHttpRequest‘, 58 # ‘Content-Length‘:str(len(data)), 59 # ‘Origin‘:‘http://fanyi.youdao.com‘, 60 # ‘Connection‘:‘keep-alive‘, 61 ‘Referer‘: ‘http://fanyi.youdao.com/‘, 62 ‘Cookie‘: ‘OUTFOX_SEARCH_USER_ID=1389460813@123.125.1.12‘, 63 ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0‘, 64 } 65 req = requests.post(self.base_url,data=data,headers=headers) 66 data_json = req.json() 67 info = data_json[‘smartResult‘][‘entries‘] 68 print(info) 69 # json_data = json.loads(req) 70 # infos = ‘‘ 71 # print(json_data) 72 # for info in json_data[‘translateResult‘]: 73 # infos += info[0][‘tgt‘] 74 75 76 if __name__ == ‘__main__‘: 77 ‘‘‘ 78 分析: 79 爬取步骤 80 1 获取url 81 2 data数据填写 headers数据填写 82 3 构造request对象生成响应 83 data中的数据{ 84 i "job" 85 from "AUTO" 86 to "AUTO" 87 smartresult "dict" 88 client "fanyideskweb" 89 salt "15842516730329" 90 sign "60f53618f3fa667e6d3192148c8c1a03" 91 ts "1584251673032" 92 bv "e2a78ed30c66e16a857c5b6486a1d326" 93 doctype "json" 94 version "2.1" 95 keyfrom "fanyi.web" 96 action "FY_BY_REALTlME" 97 } 98 headers中的数据{ 99 Host: fanyi.youdao.com 100 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0 101 Accept: application/json, text/javascript, */*; q=0.01 102 Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 103 Accept-Encoding: gzip, deflate # 此行使用时需要注释掉 不要接受压缩信息文件 104 Content-Type: application/x-www-form-urlencoded; charset=UTF-8 105 X-Requested-With: XMLHttpRequest 106 Content-Length: 236 107 Origin: http://fanyi.youdao.com 108 Connection: keep-alive 109 Referer: http://fanyi.youdao.com/111 Cookie: OUTFOX_SEARCH_USER_ID=-140868279@123.121.59.79; OUTFOX_SEARCH_USER_ID_NCOO=1766192849.313334; _ntes_nnid=700fcdc75a16b68417175b615d961ea3,1537510225088; YOUDAO_MOBILE_ACCESS_TYPE=1; JSESSIONID=aaa_XsoMzWSA17Bi3OCdx; ___rl__test__cookies=1584251673023 112 } 113 难点 114 因为是post方式提交的数据所以在浏览器调试模式中查找请求方式为post的进行查看 115 post提交数据中i 为查询的数据 salt、sign和ts 会发生变化 116 获取salt和sign 的加密方式 在 fanyi.min.js 文件中使用代码格式化工具https://tool.oschina.net/codeformat/js 查找salt与sign关键字 117 salt: i i = r + parseInt(10 * Math.random(), 10); 118 sign: n.md5("fanyideskweb" + e + i + "Nw(nmmbP%A-r6U3EUn]Aj") 119 ts:r r = "" + (new Date).getTime(), 120 ‘‘‘ 121 122 # post携带数据 123 key = input(">>") 124 youdao = YouDao(key).get_request()
原文:https://www.cnblogs.com/lizhihoublog/p/12550218.html