天天看點

python-css僞類元素反爬(二)

目标網站為http://js-crack-course-14-2.crawler-lab.com/

觀察源碼得到某些字在僞類裡面,并且是由js生成css。是以又要進行扣代碼。

在扣代碼時陷入了一種坑,導緻扣了一天,最後在調試中恍然大悟。

那就是當變量是由一段很長的啥玩意得到的時候(如下),秉着缺啥補啥原則,補完了,然後報一個很懵逼的錯,最後百思不得其解。其實這個變量是一個固定值,多重新整理幾遍多調試幾遍,然後把值直接賦上即可。

        _0x177677 = _0x350191()['' + _0x1573d3() + _0x106d9b() + _0x4ce581() + _0x398f4b() + function () {

            'return\x20jN_';

            return 'ICo';

        }() + _0xb2755c + _0x51ef06() + _0x2ada77() + function () {

            'return\x20wV_';

            return 'n';

        }() + _0x5a39ba()]('' + _0x25e7cb() + '中價' + _0x594a95() + _0x3bf458() + _0x5c4049() + _0x34cbde() + _0x51e8c4() + _0x1374f3() + _0x1a2d89() + _0x2a7b8e() + '号合' + _0x684929 + function (_0x5a77e6) {

            'return\x20yc_';

            return _0x5a77e6;

        }('後商') + _0x3177d6() + function (_0x25ce2b) {

            'return\x20IN_';

            return _0x25ce2b;

        }('型增') + _0x296fca() + _0x196ff1() + _0x2b7913() + _0x3ffdfe() + _0x3e2116 + _0x49ef41() + _0x5b5d21() + _0x2ce7f1() + _0x40c965() + _0x2e8d7f() + function () {

            'return\x20NT_';

            return '或';

        }() + _0x196e52 + _0x97a1e2() + _0x1ee14f() + _0x30bf64() + _0x299605() + function () {

            'return\x20sn_';

            return '機';

        }() + '杆構' + _0x180141() + _0x4d3d70() + _0x337624() + function (_0x319521) {

            'return\x20Ye_';

            return _0x319521;

        }('測渦') + function () {

            'return\x20HB_';

            return '燃片版';

        }() + _0x3a8b92() + _0x262cdf() + _0x340e48 + _0x44fead() + _0x457a31 + _0x30724d() + _0xf80c64() + _0x22b83e() + _0x3c4dd0() + function (_0x3d9b39) {

            'return\x20Cy_';

            return _0x3d9b39;

        }('耗胎') + _0x2226a2 + _0x14bdb4() + _0x28fc49() + _0x27c309() + _0x12128e() + _0x3cefef() + function () {

            'return\x20Vs_';

            return '适';

        }() + '通速' + _0x169493() + _0x28e027 + _0x21fea1() + _0x544ee9() + function (_0x3a7a6f) {

            'return\x20aZ_';

            return _0x3a7a6f;

        }('風馬') + _0x2351f2() + _0xf6e1b6(''));

/

以下是扣好的代碼:  JS逆向第14課-2.js

var _0x1dbeb3 = function () {
    'return\x20mt_';
    return 'c';
};

function _0x265117() {
    'return\x20Ei_';
    return 'h';
}

var _0x4f0639 = function () {
    'return\x20LN_';
    return 'a';
};

function _0x1867d8() {
    'return\x20Mp_';
    return 'rA';
}

function _0x1b5075() {
    'return\x20KT_';
    return 't';
}

function _0x13ff1b(_0x451167) {
    return _0x177677['' + _0x1dbeb3() + _0x265117() + _0x4f0639() + _0x1867d8() + _0x1b5075()](parseInt(_0x451167));
}

function _0x3e9934() {
    function _0x532cf3() {
        return 's';
    }

    if (_0x532cf3() == 's') {
        return 's';
    } else {
        return _0x532cf3();
    }
}

function _0x43bfd9() {
    function _0x5bcf9d() {
        return 'pl';
    }

    if (_0x5bcf9d() == 'pl,') {
        return 'uN_';
    } else {
        return _0x5bcf9d();
    }
}

var _0x4c9e32 = function () {
    'uD_';
    var _0x4f49a2 = function () {
        return 'i';
    };
    return _0x4f49a2();
};
var _0x3e81f0 = function () {
    'return\x20vy_';
    return 't';
};

function _0x59c0a4(_0xe1921, _0x5993bb) {
    if (_0xe1921) {
        return _0xe1921['' + _0x3e9934() + _0x43bfd9() + _0x4c9e32() + _0x3e81f0()](_0x5993bb);
    } else {
        return '';
    }
}

var _0x44edbf = ["93", "39,77", "0", "71,23,102,103", "59,76", "49,26", "88,95", "36,43", "19,106", "51,28", "58,101,48", "87,101,48", "64", "107,87", "19,41,55", "30,105", "27,63", "47,97", "45,91,38", "59,81", "4,59", "96,58", "9,89,86", "33,37", "42,11", "78,17", "41,55", "79,67", "9,8,12,22", "27,92,53", "25,15", "1,29", "26,82", "50,102", "11,66", "17,98", "47,7", "9,80", "9,41,55", "108,37", "99", "19,89,82", "84,56", "2", "90,86", "46", "44,70", "79,3", "100,37", "1,24,87", "13,10", "83,74", "68,38", "6", "79,40", "20", "69,21", "14", "65,75", "62,59", "32,72", "31,60", "9,89,82", "52,54", "71,17,22", "18,73", "85,5", "58,79", "9,57", "85,97", "19,8,12,22", "61,89", "35,95,22", "94,104", "19,89,86", "16", "34"]
var _0x177677 = "萬中價體供保元列制前力功動助華壓号合名後商噴器地型增備多大央寶實容寬導差年度式彈徑懸成或扭承指排數整時最機杆構架格橋氣油測渦燃片版獨率蓋盤直矩離積稱程立箱簧綜缸置耗胎行規質距車轉輪軸載連适通速配量金鋁長門間隙風馬驅駐高"

function _0x359c64() {
    var obj = {}
    for ($index$ = 0; $index$ < _0x44edbf['length']; $index$++) {
        var _0x552262 = _0x59c0a4(_0x44edbf[$index$], ',');
        var _0x50d667 = '';
        for ($itemIndex$ = 0; $itemIndex$ < _0x552262['length']; $itemIndex$++) {
            _0x50d667 += _0x13ff1b(_0x552262[$itemIndex$]) + '';
        }
        // console.log($index$, _0x50d667);
        obj[$index$] = _0x50d667;
    }
    return obj
}      

/

以下為py程式

import requests
import execjs
import re
from parsel import Selector

def get_data():
    with open('JS逆向第14課-2.js','r',encoding='utf-8') as f:
        ctx = execjs.compile(f.read())
    data = ctx.call('_0x359c64')
    return data

# 此資料可從以上函數獲得,也是一個固定值。
data = {
        '0': '适', '1': '彈簧', '2': '萬', '3': '離地間隙', '4': '油箱', '5': '整備', '6': '轉速',
        '7': '年或', '8': '後驅', '9': '最大', '10': '氣門數', '11': '車門數', '12': '版', '13': '駐車',
        '14': '後懸架', '15': '寶馬', '16': '多片', '17': '排量', '18': '承載式', '19': '油耗', '20': '供油',
        '21': '配氣', '22': '前輪距', '23': '寬度', '24': '成功', '25': '綜合', '26': '懸架', '27': '缸蓋',
        '28': '前制動器', '29': '多連杆', '30': '增壓', '31': '中央', '32': '備胎', '33': '時間', '34': '功率',
        '35': '合金', '36': '排列', '37': '前置', '38': '前懸架', '39': '高度', '40': '鋁', '41': '後輪胎',
        '42': '規格', '43': '價', '44': '軸距', '45': '指', '46': '扭矩', '47': '缸體', '48': '長度',
        '49': '中型車', '50': '助力', '51': '行程', '52': '盤式', '53': '元', '54': '缸徑', '55': '商',
        '56': '直噴', '57': '華', '58': '獨立', '59': '燃油', '60': '容積', '61': '實測', '62': '前輪胎',
        '63': '機構', '64': '離合器', '65': '名稱', '66': '質保', '67': '氣缸', '68': '前橋', '69': '品質',
        '70': '後制動器', '71': '渦輪', '72': '差速器', '73': '通風', '74': '後輪距', '75': '号', '76': '導'
}


def spdier():
    url = 'http://js-crack-course-14-2.crawler-lab.com/'
    headers = {
        'Cookie': 'crawlerlab_token=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE1ODY5NjI1ODYsImlkIjoxNjA1LCJuYW1lIjoiMTU4Njg4MTU2ODcifQ.Fhx2VXHieFT0Sg7KZElTLqdUucgDEgZIhTDn7YaubhY'
    }
    resp_text = requests.get(url,headers=headers).content.decode()
    # 在原文中進行替換
    for i in re.findall(r'<span class="hs_kw\d+_configCP"></span>', resp_text):
        num = re.search(r'<span class="hs_kw(\d+)_configCP"></span>', i).group(1)
        resp_text = resp_text.replace(i, data.get(num))
    # 采用xpath
    # sel = Selector(resp_text)
    # trs = sel.xpath('//tbody/tr[position()>1]')
    # for tr in trs:
    #     th = tr.xpath('./th/div/a/text()').extract_first().replace('\n','').replace(' ','')
    #     td = tr.xpath('./td/div/text()').extract_first()
    #     print(th,td)
    # 采用css
    sel = Selector(resp_text)
    trs = sel.css('tbody tr')
    trs.pop(0)
    for tr in trs:
        th = tr.css('th div a::text').extract_first().replace('\n','').replace(' ','')
        td = tr.css('td div::text').extract_first()
        print(th,td)


spdier()