登录界面如下

爬虫代码
from selenium import webdriver
import time,random, re, os
from test_chaojiying import Chaojiying_Client # 导入超级鹰工具类
from PIL import Image
driver = webdriver.Chrome() #启动谷歌浏览器
driver.implicitly_wait(10) # 隐式等待( 等页面元素加载完毕再开始爬虫)
driver.get('http://127.0.0.1:8999') # 使用本地django项目
# 通过xpath 定位到账号密码输入框 并使用send_keys 发送账号密码到输入框
driver.find_element_by_xpath('//*[@id="app"]/div/div[4]/div/form/div[1]/div[2]/div/div/input').send_keys('账号')
driver.find_element_by_xpath('//*[@id="app"]/div/div[4]/div/form/div[2]/div[2]/div/div/input').send_keys('密码')
driver.save_screenshot('one.png') # 先截取整个屏幕
element = driver.find_element_by_xpath('//*[@id="app"]/div/div[4]/div/form/div[4]') #通过xpath找到验证码图片
# print(element.location) # 打印元素坐标
# print(element.size) # 打印元素大小
left = element.location['x'] + 150
top = element.location['y']
right = element.location['x'] + (element.size['width']+220)
bottom = element.location['y'] + (element.size['height']+20)
im = Image.open('one.png')
im = im.crop((left, top, right, bottom))
now = time.strftime("%m-%d %H-%M",time.localtime())
imgName = str(now) + ".png"
im.save(imgName) # 经过坐标的过滤,将有验证码的图片保存至本地
chaojiying = Chaojiying_Client('用户名', ' 密码', '用户中心的软件ID')
im = open(imgName, 'rb').read() # 本地图片与当前py是同级,如不同级请写路径
# print(chaojiying.PostPic(im, 1902)) #1902是验证码类型中的常见4-6位英文数字
result_code = chaojiying.PostPic(im, 4004) #result: {'err_str': 'OK', 'pic_id': '2090615382056500023', 'err_no': 0, 'md5': '47298d1d39c7b28c1c0269d87b8f7c33', 'pic_str': '1336'}
code = result_code['pic_str'] # 取出验证码的识别结果
print(code)
# driver.find_element_by_xpath('//*[@id="app"]/div/div[4]/div/form/div[4]/div[2]/div/div/input').send_keys(code) #发送验证码
# driver.find_element_by_xpath('//*[@id="app"]/div/div[4]/div/form/div[5]/button/div/div').click() #点击提交按钮
driver.close() #关闭浏览器
通过selenium截取的验证码图片
超级鹰
test_chaojiying.py
import requests
from hashlib import md5
class Chaojiying_Client(object):
def __init__(self, username, password, soft_id):
self.username = username
password = password.encode('utf8')
self.password = md5(password).hexdigest()
self.soft_id = soft_id
self.base_params = {
'user': self.username,
'pass2': self.password,
'softid': self.soft_id,
}
self.headers = {
'Connection': 'Keep-Alive',
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
}
def PostPic(self, im, codetype):
#m: 图片字节
#codetype: 题目类型 参考 http://www.chaojiying.com/price.html
params = {
'codetype': codetype,
}
params.update(self.base_params)
files = {'userfile': ('ccc.jpg', im)}
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
return r.json()
def ReportError(self, im_id):
# im_id:报错题目的图片ID
params = {
'id': im_id,
}
params.update(self.base_params)
r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
return r.json()
gif动图
超级鹰验证码类型及单价
更多爬虫知识 点击这里
另附上一篇 免费识别验证码
点击这里