登入界面如下

爬蟲代碼
from selenium import webdriver
import time,random, re, os
from test_chaojiying import Chaojiying_Client # 導入超級鷹工具類
from PIL import Image
driver = webdriver.Chrome() #啟動谷歌浏覽器
driver.implicitly_wait(10) # 隐式等待( 等頁面元素加載完畢再開始爬蟲)
driver.get('http://127.0.0.1:8999') # 使用本地django項目
# 通過xpath 定位到賬号密碼輸入框 并使用send_keys 發送賬号密碼到輸入框
driver.find_element_by_xpath('//*[@id="app"]/div/div[4]/div/form/div[1]/div[2]/div/div/input').send_keys('賬号')
driver.find_element_by_xpath('//*[@id="app"]/div/div[4]/div/form/div[2]/div[2]/div/div/input').send_keys('密碼')
driver.save_screenshot('one.png') # 先截取整個螢幕
element = driver.find_element_by_xpath('//*[@id="app"]/div/div[4]/div/form/div[4]') #通過xpath找到驗證碼圖檔
# print(element.location) # 列印元素坐标
# print(element.size) # 列印元素大小
left = element.location['x'] + 150
top = element.location['y']
right = element.location['x'] + (element.size['width']+220)
bottom = element.location['y'] + (element.size['height']+20)
im = Image.open('one.png')
im = im.crop((left, top, right, bottom))
now = time.strftime("%m-%d %H-%M",time.localtime())
imgName = str(now) + ".png"
im.save(imgName) # 經過坐标的過濾,将有驗證碼的圖檔儲存至本地
chaojiying = Chaojiying_Client('使用者名', ' 密碼', '使用者中心的軟體ID')
im = open(imgName, 'rb').read() # 本地圖檔與目前py是同級,如不同級請寫路徑
# print(chaojiying.PostPic(im, 1902)) #1902是驗證碼類型中的常見4-6位英文數字
result_code = chaojiying.PostPic(im, 4004) #result: {'err_str': 'OK', 'pic_id': '2090615382056500023', 'err_no': 0, 'md5': '47298d1d39c7b28c1c0269d87b8f7c33', 'pic_str': '1336'}
code = result_code['pic_str'] # 取出驗證碼的識别結果
print(code)
# driver.find_element_by_xpath('//*[@id="app"]/div/div[4]/div/form/div[4]/div[2]/div/div/input').send_keys(code) #發送驗證碼
# driver.find_element_by_xpath('//*[@id="app"]/div/div[4]/div/form/div[5]/button/div/div').click() #點選送出按鈕
driver.close() #關閉浏覽器
通過selenium截取的驗證碼圖檔
超級鷹
test_chaojiying.py
import requests
from hashlib import md5
class Chaojiying_Client(object):
def __init__(self, username, password, soft_id):
self.username = username
password = password.encode('utf8')
self.password = md5(password).hexdigest()
self.soft_id = soft_id
self.base_params = {
'user': self.username,
'pass2': self.password,
'softid': self.soft_id,
}
self.headers = {
'Connection': 'Keep-Alive',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
}
def PostPic(self, im, codetype):
#m: 圖檔位元組
#codetype: 題目類型 參考 http://www.chaojiying.com/price.html
params = {
'codetype': codetype,
}
params.update(self.base_params)
files = {'userfile': ('ccc.jpg', im)}
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
return r.json()
def ReportError(self, im_id):
# im_id:報錯題目的圖檔ID
params = {
'id': im_id,
}
params.update(self.base_params)
r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
return r.json()
gif動圖
超級鷹驗證碼類型及單價
更多爬蟲知識 點選這裡
另附上一篇 免費識别驗證碼
點選這裡