天天看點

python + selenium 模拟登陸b站

目錄

    • 截取驗證碼圖檔
    • 識别缺口
    • 模拟滑動
    • 爬坑
    • 代碼
    • 相關優化
    • 參考資料

截取驗證碼圖檔

首先将滑鼠移至小塊處,會出現滑動後的圖檔(即原圖)

python + selenium 模拟登陸b站

利用selenium找到驗證碼圖檔所在元素 這裡為BY.CLASS_NAME = ‘gt_box’

python + selenium 模拟登陸b站

這樣,我們就可以截取整個網頁,再利用圖檔元素傳回的位置和寬高,裁切出驗證碼圖檔并儲存

接下來,點選小塊并按住不放,出現帶缺口的圖檔

python + selenium 模拟登陸b站

通過同樣的方法,可以利用selenium裁剪出帶缺口的驗證碼

識别缺口

具體想法,比較兩張圖檔,得到待拼合的滑塊和缺口的距離,依照距離拖動滑鼠,完成拼圖

首先,滑塊的x軸坐标是不變的,滑塊的大小也是不變,是以,我們隻要比較在滑塊右側兩張圖檔的像素點對應的RGB資料,如果差距超過一定範圍,則代表像素點不同,這樣我們就找到了缺口位置。

模拟滑動

人為的滑動一般是先快後慢,我們需要模拟這個滑動軌迹,否則會被識别人機器,驗證無法通過。這裡使用先加速後減速,并加入随機波動的方法拟合人為滑動,成功率較好。

爬坑

問題

由于之前的顯示比例不是100%,是以使用selenium截取元素圖檔會出現位置上偏差,截出的圖檔與我們想要的圖檔不同

解決方案

1.将顯示比例改為100% (win10 桌面右擊 顯示設定)

2.對截圖坐标進行放縮 (見代碼)

python + selenium 模拟登陸b站

代碼

import random
import time
from io import BytesIO
from PIL import Image
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC



USER_NAME = ''  # 賬戶
PASSWORD = ''  # 密碼
MULTIPE = 1.5  # 顯示比例,我這裡是150%
BORDER = 6  # 滑塊左側在驗證碼圖檔上的x軸坐标為6


class CrackBili(object):
    def __init__(self):
        self.url = 'https://passport.bilibili.com/login'
        self.browser = webdriver.Chrome()
        self.browser.maximize_window()
        self.wait = WebDriverWait(self.browser, 10)
        self.wait_pass = WebDriverWait(self.browser, 1)
        self.user_name = USER_NAME
        self.password = PASSWORD

    def __del__(self):
        self.browser.close()

    def get_slider(self):
        """
        擷取滑塊
        :return: 滑塊對象
        """
        slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'gt_slider_knob')))
        return slider

    def get_position(self):
        """
        擷取驗證碼位置
        :return: 驗證碼位置元組
        """
        img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'gt_box')))
        time.sleep(2)
        location = img.location
        size = img.size
        top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[
            'width']
        return (top, bottom, left, right)

    def get_screenshot(self):
        """
        擷取網頁截圖
        :return: 截圖對象
        """
        screenshot = self.browser.get_screenshot_as_png()
        screenshot = Image.open(BytesIO(screenshot))
        return screenshot

    def get_bili_image(self, name='captcha.png'):
        """
        擷取驗證碼圖檔
        :return: 圖檔對象
        """
        top, bottom, left, right = self.get_position()
        print('驗證碼位置', left, top, right, bottom)
        screenshot = self.get_screenshot()
        captcha = screenshot.crop(map(lambda x: int(x * MULTIPE), (left, top, right, bottom)))
        captcha.save(name)
        return captcha

    def open(self):
        """
        打開網頁輸入使用者名密碼
        :return: None
        """
        self.browser.get(self.url)
        user_name = self.wait.until(EC.presence_of_element_located((By.ID, 'login-username')))
        password = self.wait.until(EC.presence_of_element_located((By.ID, 'login-passwd')))
        user_name.send_keys(self.user_name)
        password.send_keys(self.password)

    def get_gap(self, image1, image2):
        """
        擷取缺口偏移量
        :param image1: 不帶缺口圖檔
        :param image2: 帶缺口圖檔
        :return:
        """
        left = int(60 * MULTIPE)  # 滑塊最右測在驗證碼圖檔上的x坐标為60
        for i in range(left, image1.size[0]):
            for j in range(image1.size[1]):
                if not self.is_pixel_equal(image1, image2, i, j):
                    left = i
                    return left
        return left

    def is_pixel_equal(self, image1, image2, x, y):
        """
        判斷兩個像素是否相同
        :param image1: 圖檔1
        :param image2: 圖檔2
        :param x: 位置x
        :param y: 位置y
        :return: 像素是否相同
        """
        # 取兩個圖檔的像素點
        pixel1 = image1.load()[x, y]
        pixel2 = image2.load()[x, y]
        threshold = 60
        if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(
                pixel1[2] - pixel2[2]) < threshold:
            return True
        else:
            return False

    def get_track(self, distance):
        """
        根據偏移量擷取移動軌迹
        一開始加速,然後減速,生長曲線,且加入點随機變動
        :param distance: 偏移量
        :return: 移動軌迹
        """
        # 移動軌迹
        track = []
        # 目前位移
        current = 0
        # 減速門檻值
        mid = distance * 3 / 4
        # 間隔時間
        t = 0.1
        v = 0
        while current < distance:
            if current < mid:
                a = random.randint(2, 3)
            else:
                a = - random.randint(6, 7)
            v0 = v
            v = v0 + a * t
            move = v0 * t + 1 / 2 * a * t * t
            current += move
            track.append(round(move))
        return track

    def move_to_gap(self, slider, track):
        """
        拖動滑塊到缺口處
        :param slider: 滑塊
        :param track: 軌迹
        :return:
        """
        ActionChains(self.browser).click_and_hold(slider).perform()
        for x in track:
            ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
        time.sleep(0.5)
        ActionChains(self.browser).release().perform()

    def login(self):
        """
        登入
        :return: None
        """
        submit = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'btn-login')))
        submit.click()
        time.sleep(10)
        print('登入成功')

    def crack(self):
        # 輸入使用者名密碼
        self.open()
        # 滑鼠移到滑塊對象上
        slider = self.get_slider()
        ActionChains(self.browser).move_to_element(slider).perform()
        # 擷取驗證碼圖檔
        image1 = self.get_bili_image('captcha1.png')
        ActionChains(self.browser).click_and_hold(slider).perform()
        # 擷取帶缺口的驗證碼圖檔
        image2 = self.get_bili_image('captcha2.png')
        # 擷取缺口位置
        gap = self.get_gap(image1, image2)
        print('缺口位置', gap)
        # 截圖中是150%的距離,要除掉
        gap = int(gap / MULTIPE)
        # 減去缺口位移
        gap -= BORDER
        # 擷取移動軌迹
        track = self.get_track(gap)
        print('滑動軌迹', track)
        # 拖動滑塊
        self.move_to_gap(slider, track)
        try:
            success = self.wait_pass.until(
                EC.text_to_be_present_in_element((By.CLASS_NAME, 'gt_info_text'), '驗證通過'))
            print(success)
        except TimeoutException:
            success = None

        # 失敗後重試
        if not success:
            # 滑鼠移到滑塊對象上
            self.crack()
        else:
            self.login()


if __name__ == '__main__':
    crack = CrackBili()
    crack.crack()	
           

相關優化

這裡 我将擷取網頁、輸入使用者名密碼、擷取滑塊方法單獨拿了出來,主要是考慮到重新重新整理頁面再重新輸入使用者名密碼、擷取滑塊可能會比較耗時,不過這個代碼要多擷取重新整理驗證碼的按鈕,實際上可能并差不了多少。

import random
import time
from io import BytesIO
from PIL import Image
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC



USER_NAME = ''  # 賬戶
PASSWORD = ''  # 密碼
MULTIPE = 1.5  # 顯示比例,我這裡是150%
BORDER = 6  # 滑塊左側在驗證碼圖檔上的x軸坐标為6


class CrackBili(object):
    def __init__(self):
        self.url = 'https://passport.bilibili.com/login'
        self.browser = webdriver.Chrome()
        self.browser.maximize_window()
        self.wait = WebDriverWait(self.browser, 10)
        self.wait = WebDriverWait(self.browser, 1)
        self.user_name = USER_NAME
        self.password = PASSWORD

    def __del__(self):
        self.browser.close()

    def get_slider(self):
        """
        擷取滑塊
        :return: 滑塊對象
        """
        slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'gt_slider_knob')))
        return slider

    def get_position(self):
        """
        擷取驗證碼位置
        :return: 驗證碼位置元組
        """
        img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'gt_box')))
        time.sleep(2)
        location = img.location
        size = img.size
        top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[
            'width']
        return (top, bottom, left, right)

    def get_screenshot(self):
        """
        擷取網頁截圖
        :return: 截圖對象
        """
        screenshot = self.browser.get_screenshot_as_png()
        screenshot = Image.open(BytesIO(screenshot))
        return screenshot

    def get_bili_image(self, name='captcha.png'):
        """
        擷取驗證碼圖檔
        :return: 圖檔對象
        """
        top, bottom, left, right = self.get_position()
        print('驗證碼位置', left, top, right, bottom)
        screenshot = self.get_screenshot()
        captcha = screenshot.crop(map(lambda x: int(x * MULTIPE), (left, top, right, bottom)))
        captcha.save(name)
        return captcha

    def open(self):
        """
        打開網頁輸入使用者名密碼
        :return: None
        """
        self.browser.get(self.url)
        user_name = self.wait.until(EC.presence_of_element_located((By.ID, 'login-username')))
        password = self.wait.until(EC.presence_of_element_located((By.ID, 'login-passwd')))
        user_name.send_keys(self.user_name)
        password.send_keys(self.password)

    def get_gap(self, image1, image2):
        """
        擷取缺口偏移量
        :param image1: 不帶缺口圖檔
        :param image2: 帶缺口圖檔
        :return:
        """
        left = int(60 * MULTIPE)  # 滑塊最右測在驗證碼圖檔上的x坐标為60
        for i in range(left, image1.size[0]):
            for j in range(image1.size[1]):
                if not self.is_pixel_equal(image1, image2, i, j):
                    left = i
                    return left
        return left

    def is_pixel_equal(self, image1, image2, x, y):
        """
        判斷兩個像素是否相同
        :param image1: 圖檔1
        :param image2: 圖檔2
        :param x: 位置x
        :param y: 位置y
        :return: 像素是否相同
        """
        # 取兩個圖檔的像素點
        pixel1 = image1.load()[x, y]
        pixel2 = image2.load()[x, y]
        threshold = 60
        if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(
                pixel1[2] - pixel2[2]) < threshold:
            return True
        else:
            return False

    def get_track(self, distance):
        """
        根據偏移量擷取移動軌迹
        一開始加速,然後減速,生長曲線,且加入點随機變動
        :param distance: 偏移量
        :return: 移動軌迹
        """
        # 移動軌迹
        track = []
        # 目前位移
        current = 0
        # 減速門檻值
        mid = distance * 3 / 4
        # 間隔時間
        t = 0.10
        v = 0
        while current < distance:
            if current < mid:
                a = random.randint(2, 3)
            else:
                a = - random.randint(6, 7)
            v0 = v
            v = v0 + a * t
            move = v0 * t + 1 / 2 * a * t * t
            current += move
            track.append(round(move))
        return track

    def move_to_gap(self, slider, track):
        """
        拖動滑塊到缺口處
        :param slider: 滑塊
        :param track: 軌迹
        :return:
        """
        ActionChains(self.browser).click_and_hold(slider).perform()
        for x in track:
            ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
        time.sleep(0.5)
        ActionChains(self.browser).release().perform()

    def login(self):
        """
        登入
        :return: None
        """
        submit = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'btn-login')))
        submit.click()
        time.sleep(10)
        print('登入成功')

    def refresh(self):
        refresh = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'gt_refresh_button')))
        refresh.click()

    def crack(self, slider):
        # 将滑鼠移至滑塊對象
        ActionChains(self.browser).move_to_element(slider).perform()
        # 擷取驗證碼圖檔
        image1 = self.get_bili_image('captcha1.png')
        ActionChains(self.browser).click_and_hold(slider).perform()
        # 擷取帶缺口的驗證碼圖檔
        image2 = self.get_bili_image('captcha2.png')
        # 擷取缺口位置
        gap = self.get_gap(image1, image2)
        # 截圖中是150%的距離,要除掉
        gap = int(gap / MULTIPE)
        print('缺口位置', gap)
        # 減去缺口位移
        gap -= BORDER
        # 擷取移動軌迹
        track = self.get_track(gap)
        print('滑動軌迹', track)
        # 拖動滑塊
        self.move_to_gap(slider, track)
        try:
            success = self.wait_pass.until(
                EC.text_to_be_present_in_element((By.CLASS_NAME, 'gt_info_text'), '驗證通過'))
            print(success)
        except TimeoutException:
            success = None

        # 失敗後重試
        if not success:
            ActionChains(self.browser).move_to_element(slider).perform()
            self.refresh()
            self.crack(slider)
        else:
            self.login()


if __name__ == '__main__':
    # 建立執行個體
    crack = CrackBili()
    # 輸入使用者名密碼
    crack.open()
    # 擷取滑塊對象
    slider = crack.get_slider()
    crack.crack(slider)

           

參考資料

Python3網絡爬蟲開發實戰教程