前情回顧:有道翻譯參數破解
在上一篇部落格中,我們系統地分析了有道翻譯的JS逆向過程。不難看出,JS逆向其實就是用Python重新來複寫Javascript的代碼。但當JS代碼很長時,弄懂裡面的邏輯關系就相當耗時,這時就需要
execjs
庫來幫忙了(它是一個可以執行JS代碼的Python庫),今天就以百度翻譯的參數破解為例進行說明。
目錄
-
- 一、分析參數構成
- 二、Python執行JS代碼
- 三、完善JS代碼
-
- 3.1 i 值建構
- 3.2 函數n建構
- 四、全部代碼
爬取網址為百度翻譯
一、分析參數構成
抓包的過程與有道翻譯類似,在此不再贅述。通過比較不同的響應可知,參數
query
和
sign
是動态變化的,其中參數
query
為翻譯的詞
那麼我們隻需要搜尋
sign
參數即可,如下圖所示,共出現10個JS檔案。依據經驗,我們可确定
sign
是在第一個JS檔案中構成的。
搜尋sign值,并在7047行打下斷點進行調試,結果如下
分析可知,參數
n
為翻譯的詞,而參數
sign
則是通過
f(n)
得到的。此時,我們點開
f
函數,複制全部代碼,其内容如下,并将其存為
baidu.js
function e(r) {
var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
if (null === o) {
var t = r.length;
t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
} else {
for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)
"" !== e[C] && f.push.apply(f, a(e[C].split(""))),
C !== h - 1 && f.push(o[C]);
var g = f.length;
g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
}
var u = void 0
, l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
u = null !== i ? i : (i = window[l] || "") || "";
for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
var A = r.charCodeAt(v);
128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)),
S[c++] = A >> 18 | 240,
S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224,
S[c++] = A >> 6 & 63 | 128),
S[c++] = 63 & A | 128)
}
for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++)
p += S[b],
p = n(p, F);
return p = n(p, D),
p ^= s,
0 > p && (p = (2147483647 & p) + 2147483648),
p %= 1e6,
p.toString() + "." + (p ^ m)
}
二、Python執行JS代碼
不同于有道翻譯,若人為地去拆解其内在邏輯再用Python複寫較為耗時,這裡我們使用Python中的
execjs
庫來直接執行JS代碼。
import execjs
def get_sign(word):
with open('c:/users/dell/desktop/baidu.js','r') as f:
jscode = f.read()
sign = execjs.compile(jscode).call('e',word)
return sign
get_sign('tall')
執行結果如下
根據程式報錯結果可知,我們複制的JS代碼中缺少參數
i
的定義,這裡也告訴我們:JS逆向的過程中,複制下來的JS代碼或多或少是不完整的,這時候隻能缺啥補啥,挨個去補全。
三、完善JS代碼
3.1 i 值建構
這裡依舊是通過打斷點進行
i
值的尋找。在2535行處打下斷點,将滑鼠放置
i
,即可顯示其值
"320305.131321201"
(或者直接在console控制台輸入
i
)
知道參數
i
的值後,我們完善一下之前的JS代碼,如下所示
function e(r) {
var i = "320305.131321201" //添加的i值定義!
var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
if (null === o) {
var t = r.length;
t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
} else {
for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)
"" !== e[C] && f.push.apply(f, a(e[C].split(""))),
C !== h - 1 && f.push(o[C]);
var g = f.length;
g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
}
var u = void 0
, l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
u = null !== i ? i : (i = window[l] || "") || "";
for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
var A = r.charCodeAt(v);
128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)),
S[c++] = A >> 18 | 240,
S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224,
S[c++] = A >> 6 & 63 | 128),
S[c++] = 63 & A | 128)
}
for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++)
p += S[b],
p = n(p, F);
return p = n(p, D),
p ^= s,
0 > p && (p = (2147483647 & p) + 2147483648),
p %= 1e6,
p.toString() + "." + (p ^ m)
}
此時,再次利用
execjs
庫調用JS代碼,運作結果如下
從結果來看:缺少對象,檢查JS代碼可知,
p = n(p, F)
中函數
n
尚未定義。接下來,就是在源碼中尋找函數n的定義。
3.2 函數n建構
同樣,需要在我們想要知道的函數n下方打斷點進行調試,如下圖所示将滑鼠放置函數n上即可點選連結進行跳轉。(注意:如果不打斷點,是無法跳轉的!)
跳轉函數n的結果如下
其實,就是在我們一開始找到的
e
函數正上方!此時,再将函數n的定義加入我們的JS代碼中進行完善,代碼如下
//函數n的定義
function n(r, o) {
for (var t = 0; t < o.length - 2; t += 3) {
var a = o.charAt(t + 2);
a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a),
a = "+" === o.charAt(t + 1) ? r >>> a : r << a,
r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
}
return r
}
function e(r) {
var i ="320305.131321201" //參數i的定義
var o = r.match(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g);
if (null === o) {
var t = r.length;
t > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(t / 2) - 5, 10) + r.substr(-10, 10))
} else {
for (var e = r.split(/[\uD800-\uDBFF][\uDC00-\uDFFF]/), C = 0, h = e.length, f = []; h > C; C++)
"" !== e[C] && f.push.apply(f, a(e[C].split(""))),
C !== h - 1 && f.push(o[C]);
var g = f.length;
g > 30 && (r = f.slice(0, 10).join("") + f.slice(Math.floor(g / 2) - 5, Math.floor(g / 2) + 5).join("") + f.slice(-10).join(""))
}
var u = void 0
, l = "" + String.fromCharCode(103) + String.fromCharCode(116) + String.fromCharCode(107);
u = null !== i ? i : (i = window[l] || "") || "";
for (var d = u.split("."), m = Number(d[0]) || 0, s = Number(d[1]) || 0, S = [], c = 0, v = 0; v < r.length; v++) {
var A = r.charCodeAt(v);
128 > A ? S[c++] = A : (2048 > A ? S[c++] = A >> 6 | 192 : (55296 === (64512 & A) && v + 1 < r.length && 56320 === (64512 & r.charCodeAt(v + 1)) ? (A = 65536 + ((1023 & A) << 10) + (1023 & r.charCodeAt(++v)),
S[c++] = A >> 18 | 240,
S[c++] = A >> 12 & 63 | 128) : S[c++] = A >> 12 | 224,
S[c++] = A >> 6 & 63 | 128),
S[c++] = 63 & A | 128)
}
for (var p = m, F = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(97) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(54)), D = "" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(51) + ("" + String.fromCharCode(94) + String.fromCharCode(43) + String.fromCharCode(98)) + ("" + String.fromCharCode(43) + String.fromCharCode(45) + String.fromCharCode(102)), b = 0; b < S.length; b++)
p += S[b],
p = n(p, F);
return p = n(p, D),
p ^= s,
0 > p && (p = (2147483647 & p) + 2147483648),
p %= 1e6,
p.toString() + "." + (p ^ m)
}
添加完成後,再次運作
execjs
庫調用
baidu.js
檔案,結果如下
成功傳回
sign
參數!
四、全部代碼
參數
sign
破解後,剩下就是利用requests傳參即可。全部代碼如下
import execjs
import requests
def get_sign(word):
with open('baidu.js','r') as f:
jscode = f.read()
sign = execjs.compile(jscode).call('e',word)
return sign
def main(word):
from_data = {
'from': 'en',
'to': 'zh',
'query': word,
'transtype': 'realtime',
'simple_means_flag': '3',
'sign': get_sign(word),
'token': '5822d89edd8120552250bd957d623139',
'domain': 'common'
}
headers = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36',
'cookie':'PSTM=1596245797; BIDUPSID=F0E7203595DAE0CCDB3B84642A11DE00; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDUSS=jU2ZDhUZVFMaDM1aFJGQnBNMkJnOG92d1dtNllaU20wek9Qb2hQN3VkSWoxdWxmRVFBQUFBJCQAAAAAAAAAAAEAAADgH7kfNzI5NzU3OTE1AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACNJwl8jScJfd; BDUSS_BFESS=jU2ZDhUZVFMaDM1aFJGQnBNMkJnOG92d1dtNllaU20wek9Qb2hQN3VkSWoxdWxmRVFBQUFBJCQAAAAAAAAAAAEAAADgH7kfNzI5NzU3OTE1AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACNJwl8jScJfd; H_WISE_SIDS=107319_110085_127969_131423_144966_151532_155689_155933_156286_158995_161422_162079_162155_162898_163233_163274_163321_163390_163805_163933_164109_164163_164215_164545_164692_164869_164940_164946_164954_164961_165048_165086_165133_165135_165144_165328_165565_165689_165736_165801_165963_166055_166148_166167_166174_166176_166181_166184_166209_166214_166312_166450_166570_166631_166692_166696_166826_167305_167388_167393_167405; BAIDUID=444709A0C93D294B855A4CF6C022A543:FG=1; __yjs_duid=1_a7791c0f7d2021de3208effb4940d4de1611904747262; BAIDUID_BFESS=444709A0C93D294B855A4CF6C022A543:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1613629191; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1613631158; __yjsv5_shitong=1.0_7_16675289e0373770030fa50e129a31867a16_300_1613631148848_120.243.195.4_43f8de8f; ab_sr=1.0.0_YmEwNjI3ZjEzMDEyYWM5Mzc0ZTJmMDMxNDdmMWFiYzhkZWM0MDViYjg1MTQ0ZGY0YzA3OWZkNzhhMjg5MzRiMzgwYzg4ZjY1MGE1ZTQ4MTc5M2M0MTAyNTkyZDgzY2Mz'
}
url = 'https://fanyi.baidu.com/v2transapi?from=en&to=zh'
r = requests.post(url,headers=headers,data=from_data)
data = r.json()['trans_result']['data'][0]['dst']
print('輸入的詞為{} , 翻譯為{}'.format(word,data))
if __name__ =='__main__':
main('ensure')
JS逆向基礎總結
通過有道翻譯和百度翻譯的逆向過程,我們不難看出JS逆向就是慢慢"扣代碼"。遇到簡單點的JS代碼,可以利用Python進行複寫;若JS代碼較為複雜,可以借助execjs庫來代替執行,但執行過程必定不是一帆風順的,通常會缺少某個參數,這時我們遵循缺啥補啥的原則,挨個慢慢進行即可。
以上就是本次分享的全部内容~