python爬虫--------处理极验验证(滑块拼图验证)bilibili模式
King~~~ 人气:0from selenium import webdriver from PIL import Image import time import random from selenium.webdriver import ActionChains import cv2 from matplotlib import pyplot as plt class slide(): #初始化 def __init__(self): self.driver = webdriver.Chrome(executable_path='..') #自己的驱动地址 self.driver.maximize_window() self.trance =0 self.driver.get("http://passport.bilibili.com/login") self.driver.find_element_by_id('login-username').send_keys('....')#用户名 self.driver.find_element_by_id('login-passwd').send_keys('....')#密码 self.driver.find_element_by_class_name('btn-login').click() print('login..........') time.sleep(3) #有完整背景图的网页截图 def before_deal_image(self): self.js1 = 'document.getElementsByClassName("geetest_canvas_fullbg")[0].style.display="block"' #获得 self.driver.execute_script(self.js1) time.sleep(2) self.location = self.driver.find_element_by_class_name('geetest_widget').location self.size = self.driver.find_element_by_class_name('geetest_widget').size time.sleep(2) self.driver.save_screenshot('capture1.png') # 截取全屏 self.left = self.location['x'] + 220 # 后面的数字自己调节控制截图包含验证码图片 self.top = self.location['y'] + 57 self.right = self.location['x'] + self.size['width'] + 295 self.bottom = self.location['y'] + self.size['height'] + 113 # print(self.location, self.size) self.im = Image.open('capture1.png') self.im = self.im.crop((self.left, self.top, self.right, self.bottom)) self.im.save('ele_capture1.png') # 滑块移动 def slide(self,num): self.num=num print('滑块应该移动距离------------->',self.num) self.button = self.driver.find_element_by_class_name('geetest_slider_button') #处理人机行为 first_distance = self.num/4*3 second_distance = self.num-first_distance-3 third_distance = self.num-second_distance-first_distance ActionChains(self.driver).click_and_hold(self.button).move_by_offset(first_distance,random.random()).perform() time.sleep(0.5) ActionChains(self.driver).click_and_hold(self.button).move_by_offset(second_distance,random.random()).perform() time.sleep(0.9) ActionChains(self.driver).click_and_hold(self.button).move_by_offset(third_distance,random.random()).release().perform() # 有缺口背景图的网页截图 def after_deal_image(self): self.js2 = 'document.getElementsByClassName("geetest_canvas_fullbg")[0].style.display="none"' self.driver.execute_script(self.js2) self.driver.save_screenshot('capture2.png') #整张网页图 self.left = self.location['x'] + 220#与上面相同 self.top = self.location['y'] + 57 self.right = self.location['x'] + self.size['width'] + 295 self.bottom = self.location['y'] + self.size['height'] + 113 # print(self.location, self.size) self.im = Image.open('capture2.png') self.im = self.im.crop((self.left, self.top, self.right, self.bottom)) self.im.save('ele_capture2.png') #元素切割图 #---------------------获得移动距离-----------------# #获得滑动距离 def slide_distance(self,image1, image2): cut_image = Image.open(image2) full_image = Image.open(image1) threshold = 86 # 边界值颜色都为85,85,85 for i in range(75, cut_image.size[0]): for j in range(0, cut_image.size[1]): pixel1 = cut_image.getpixel((i, j)) pixel2 = full_image.getpixel((i, j)) res_R = abs(pixel1[0] - pixel2[0]) # 计算RGB差 res_G = abs(pixel1[1] - pixel2[1]) # 计算RGB差 res_B = abs(pixel1[2] - pixel2[2]) # 计算RGB差 if res_R > threshold and res_G > threshold and res_B > threshold: print(i - 7) return i - 7 #-------------------------------------处理相似度问题--------------------------------# # size=(256, 256) def classify_gray_hist(self,image1, image2, size=(328, 211)): #截图大小 # 先计算直方图 # 几个参数必须用方括号括起来 # 这里直接用灰度图计算直方图,所以是使用第一个通道, # 也可以进行通道分离后,得到多个通道的直方图 # bins 取为16 image1 = cv2.resize(image1, size) image2 = cv2.resize(image2, size) hist1 = cv2.calcHist([image1], [0], None, [256], [0.0, 255.0]) #缺口背景图 hist2 = cv2.calcHist([image2], [0], None, [256], [0.0, 255.0]) #完整背景图 # 可以比较下直方图 # plt.plot(range(256),hist1,'r') # plt.plot(range(256),hist2,'b') # plt.show() # 计算直方图的重合度 degree = 0 for i in range(75,len(hist1)): if hist1[i] != hist2[i]: degree = degree + (1 - abs(hist1[i] - hist2[i]) / max(hist1[i], hist2[i])) else: degree = degree + 1 degree = degree / len(hist1) return degree # -------------------------------------获得相似度-------------------------------# def run0(self,image1, image2): img1 = cv2.imread(image1) # cv2.imshow('img1', img1) img2 = cv2.imread(image2) # cv2.imshow('img2', img2) degree = self.classify_gray_hist(img1, img2) print('两张图片相似度为。。。。。',int(100 * degree[0])) #这是完整背景图和验证码图的相似度 cv2.waitKey(0) return int(degree[0] * 100) # 函数的启动 def run(self): try: self.before_deal_image() self.after_deal_image() self.num = self.slide_distance('ele_capture1.png', 'ele_capture2.png') print('原始距离--------->',self.num) # 'ele_capture1.png', 'ele_capture2.png' self.image1 = 'ele_capture1.png' self.image2 = 'ele_capture2.png' self.result = self.run0(self.image1, self.image2) real_distance = self.num*(262/328) #这是实际截图大长度和验证码实际图片的的长度,自己更改 print('按照图像大小比列计算实际移动距离',real_distance) self.slide(real_distance) time.sleep(6) except: print('login_out..............') time.sleep(7) finally: self.driver.quit() print('程序运行结束') slide().run()
透明度0 和1 的色差85,86左右徘徊,R,G,B三值都相等,均为左边的85,86,因此。阀值86,85均可,即上面函数的 threshold,主要的难点就是缺口位置的查找。故此需要一个函数来测试是否符合下面介绍。
import cv2 from PIL import Image def get_distance(cut_image, full_image): cut_image = Image.open(cut_image) #缺口背景图 full_image = Image.open(full_image) #完整背景图 threshold = 86 #灰度值正好为86,86,86 这个是透明度的差值,边界值像素的RGB中的B值为准 for i in range(75, cut_image.size[0]): #75为滑块的截图最右边阴影到图片最左端的长度 for j in range(0, cut_image.size[1]): pixel1 = cut_image.getpixel((i, j)) pixel2 = full_image.getpixel((i, j)) res_R = abs(pixel1[0] - pixel2[0]) # 计算RGB差 res_G = abs(pixel1[1] - pixel2[1]) # 计算RGB差 res_B = abs(pixel1[2] - pixel2[2]) # 计算RGB差 if res_R > threshold and res_G > threshold and res_B > threshold: print(i-7) return i-7 get_distance('ele_capture2.png','ele_capture1.png') #'ele_capture2.png','ele_capture1.png'(缺口背景图,完整背景图)
执行完第一个类,如果没登陆成功,就执行第上面这个,更改threshold ,并且将第一个类中的slide_distance()函数替换,上面函数中75均为为滑块的截图最右边阴影到图片最左端的长度,这个得自己修改。
还有另一个方法就是移动滑块进行截图,86的色差变小,然后移动距离变小,移动一次进行图片相似度处理,与背景完全图相似度90%之上后将移动的距离保留,进行行为处理,模拟人的移动方法,然后移动滑块,也可以成功,这需要图像算法,我是不会,但我找到资料后测试过,也能通过验证。慢,但是准确度100%。
如果都能成功,就可以将driver设置为无头模式,让selenium不在界面上显示。
后续会出更好的爬虫博文。
喜欢就点个赞,萌萌哒。
加载全部内容