Untitled
unknown
plain_text
3 years ago
8.8 kB
8
Indexable
import json
import random
import time
from typing import Dict, List
import requests
from time import sleep
from selenium import webdriver
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import TimeoutException
import base64
def human_click(browser: webdriver, element: WebElement):
"""
Clicks an element on a web page in a 'human like' way to avoid bot detection
:param browser: The web driver
:param element: The element to click
"""
action_chains = ActionChains(browser)
wait_time = random.uniform(0, 0.5)
action_chains.pause(wait_time).move_to_element(element).pause(wait_time).click().perform()
class CaptchaSolver:
def __init__(self, key: str, max_element_wait_time=60, request_timeout_length=120, solving_timeout_length=300):
self.api_key = key
self.max_element_wait_time = max_element_wait_time
self.request_timeout_length = request_timeout_length
self.solving_timeout_length = solving_timeout_length
def solve_captcha_selenium(self, browser: webdriver) -> bool:
wait = WebDriverWait(browser, self.max_element_wait_time)
# Go to clicking iframe
try:
wait.until(EC.frame_to_be_available_and_switch_to_it(
(By.XPATH, "//iframe[@title='widget containing checkbox for hCaptcha security challenge']")))
except TimeoutException:
print("[ERROR] : Can't find captcha!")
return False
# Click Start button
start_button = wait.until(EC.element_to_be_clickable((By.ID, "anchor")))
human_click(browser, start_button)
time.sleep(10)
# Go to form iframe
browser.switch_to.default_content()
try:
# Check if frame can be found (if not an exception is thrown and the captcha is solved)
WebDriverWait(browser, 5).until(EC.frame_to_be_available_and_switch_to_it(
(By.XPATH, "//iframe[@title='Main content of the hCaptcha challenge']")))
except TimeoutException:
return True
# Go through different screens until finished
time_out_time = time.time() + self.solving_timeout_length
while time.time() < time_out_time:
prompt_text: str = wait.until(
EC.presence_of_element_located((By.CLASS_NAME, "prompt-text"))).text
# Get images
images: List[WebElement] = []
for i in range(1, 10):
element_xpath = f"/html/body/div[1]/div/div/div[2]/div[{i}]/div[2]/div"
wait.until(EC.text_to_be_present_in_element_attribute(
(By.XPATH, element_xpath), "style", "url"))
images.append(wait.until(EC.presence_of_element_located(
(By.XPATH, element_xpath))))
base64_images = CaptchaSolver.get_base64_images_from_captcha(images)
try:
actions = self.get_captcha_solution(base64_images, prompt_text)
except Exception as e:
print(f"Got exception: {e} when trying to get solution")
return False
# Check if it is making us click all squares (normally means it has no clue what it's doing)
click_count = sum(map(lambda x: 1 if x == 'CLICK' else 0, actions))
if click_count == 9:
# Remove some random clicks
for _ in range(random.randint(1, 4)):
actions[random.randint(0, len(actions) - 1)] = "NOPE"
# Click on images
for i in range(9):
if actions[i] == "CLICK":
image = wait.until(
EC.element_to_be_clickable((By.XPATH, f"/html/body/div[1]/div/div/div[2]/div[{i + 1}]")))
human_click(browser, image)
# Submit
submit_button = wait.until(
EC.element_to_be_clickable((By.CLASS_NAME, "button-submit")))
human_click(browser, submit_button)
# Leave iframe and go back to start iframe
browser.switch_to.default_content()
wait.until(EC.frame_to_be_available_and_switch_to_it(
(By.XPATH, "//iframe[@title='widget containing checkbox for hCaptcha security challenge']")))
# Check if finished
try:
if WebDriverWait(browser, 5).until(EC.text_to_be_present_in_element_attribute(
(By.CLASS_NAME, "check"), "style", "display: block")
):
browser.switch_to.default_content() # Exit iframe
return True
except TimeoutException:
# Exit iframe and go back to captcha iframe
browser.switch_to.default_content()
wait.until(EC.frame_to_be_available_and_switch_to_it(
(By.XPATH, "//iframe[@title='Main content of the hCaptcha challenge']")))
"""
# Use this code for a check if the captcha is removed from the page or you're redirected once it is finished
try:
WebDriverWait(browser, 15).until(EC.frame_to_be_available_and_switch_to_it(
(By.XPATH, "//iframe[@title='Main content of the hCaptcha challenge']")))
except TimeoutException:
return True
"""
return False
@staticmethod
def get_base64_images_from_captcha(images: List[WebElement]) -> List[str]:
results = []
for image in images:
# Get url of each image
style = image.get_attribute("style")
url = style.split("url")[1].split(')')[0][1:].replace('"', '')
# Get image encoding
results.append(base64.b64encode(requests.get(url).content).decode('ascii'))
time.sleep(0.01)
return results
def get_captcha_solution(self, base64_images: List[str], prompt_text: str) -> List[str]:
create_task_endpoint = "https://api.anycaptcha.com/createTask"
headers = {
"Host": "api.anycaptcha.com",
"Content-Type": "application/json"
}
body = {
"clientKey": self.api_key,
"task": {
"type": "HCaptchaClickTask",
"ChallengeCaption": prompt_text,
"ImageIndex1": base64_images[0],
"ImageIndex2": base64_images[1],
"ImageIndex3": base64_images[2],
"ImageIndex4": base64_images[3],
"ImageIndex5": base64_images[4],
"ImageIndex6": base64_images[5],
"ImageIndex7": base64_images[6],
"ImageIndex8": base64_images[7],
"ImageIndex9": base64_images[8]
}
}
response = requests.post(create_task_endpoint, json=body, headers=headers)
json_response = json.loads(response.content)
if json_response['errorId'] == 0:
print("[SUCCESS] : Created captcha task")
else:
raise Exception(f"[ERROR]: Couldn't create task, reason {json_response['errorCode']}")
get_task_endpoint = "https://api.anycaptcha.com/getTaskResult"
body = {
"clientKey": self.api_key,
"taskId": json_response['taskId']
}
time_out = time.time() + self.request_timeout_length
while time.time() < time_out:
response = requests.post(get_task_endpoint, json=body, headers=headers)
json_response: Dict = json.loads(response.content)
if json_response['errorId'] > 0:
# If captcha can't be solved then click random images
if "NotSupportChallenge" in json_response['errorDescription']:
return ["CLICK" if random.randint(1, 3) == 3 else "NO" for _ in range(9)]
else:
raise Exception(
f"[ERROR] : Couldn't get response from task, reason {json_response['errorDescription']}")
elif json_response['errorId'] == 0 and json_response['status'] == "ready":
return list(json_response['solution'].values())
else:
sleep(1) # Is processing
raise Exception("[ERROR] : Captcha solver timed out")Editor is loading...