Untitled
unknown
plain_text
a year ago
8.8 kB
3
Indexable
Never
from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import time from scraper import Scraper import random from selenium.webdriver.common.action_chains import ActionChains class Twitter(Scraper): def __init__(self): self.url = 'https://twitter.com' def twitter_routine(self, driver): login_status = self.check_login(driver) if login_status['Twitter'] == 'Logged in': actions_done = self.like_follow(driver) for action in actions_done: login_status['Twitter']+=('+'+action) return login_status def check_login(self, driver, email=None, password=None, username=None): #establish a wait object with a max wait time of 7 seconds wait = WebDriverWait(driver, timeout=7) #open twitter driver.get(self.url) try: #if twitter/home is opened it means that we are logged in, so return wait.until(EC.url_to_be('https://twitter.com/home')) #try to find posts/suspension banner wait.until(EC.presence_of_element_located((By.XPATH, '//div[contains(@class, "css-1dbjc4n") and contains(@class, "r-1adg3ll") and contains(@class, "r-1ny4l3l")]'))) try: elements = driver.find_elements(By.XPATH, '//h1[(@dir="ltr") and (@role="heading")]') print(time.perf_counter(), elements) for element in elements: print('text', element.text) if 'suspended' in element.text: return {'Twitter':'suspended'} return {'Twitter':'Logged in'} except: return {'Twitter':'Logged in'} except: return {'Twitter':'Not in'} try: #if twitter.com/home isn't opened it means we are not logged in and we have 2 possibilities # 1) twitter.com opened and we can login 2) an error occured and twitter.com/404 opened from which we cannot login, if this is the case, refresh page if driver.current_url != 'https://twitter.com/': print(driver.current_url) driver.get(self.url) #try to find and click the signin button, if the button is not found it means that a cookies element is blocking it, so we have to accept cookies first try: signin_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="react-root"]/div/div/div[2]/main/div/div/div[1]/div[1]/div/div[3]/div[5]/a/div/span/span'))) signin_element.click() except: cookies_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div/div/div/div/div/div[2]/div[2]/div/span/span'))) cookies_element.click() signin_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="react-root"]/div/div/div[2]/main/div/div/div[1]/div[1]/div/div[3]/div[5]/a/div/span/span'))) signin_element.click() #try to find the mail element, enter email, find the button next and click it to advance mail_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div/div/div/div[5]/label/div/div[2]/div/input'))) mail_element.send_keys(email) next_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div/div/div/div[6]/div/span/span'))) next_element.click() #sometimes twitter asks for your username, sometimes it doesn't, so if there isn't a password element, theres probably a username element first try: password_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[1]/div/div/div[3]/div/label/div/div[2]/div[1]/input'))) password_element.send_keys(password) login_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[2]/div/div[1]/div/div/div/div/span/span'))) login_element.click() return {'Twitter':'Logged in'} except: username_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[1]/div/div[2]/label/div/div[2]/div/input'))) username_element.send_keys(username) next_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[2]/div/div/div/div/div/span/span'))) next_element.click() #find and enter password, find and click login password_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[1]/div/div/div[3]/div/label/div/div[2]/div[1]/input'))) password_element.send_keys(password) login_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[2]/div/div[1]/div/div/div/div/span/span'))) login_element.click() return {'Twitter':'Logged in'} except: #if there was a problem with any of the previous steps, either something bugged out or the account is suspended try: wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="react-root"]/div/div/div[2]/main/div/div/div/div/div/div[5]/div/section/div/div/div/div/div[2]/div/div/div/div/h1/span/span/span'))) if driver.current_url == 'https://twitter.com/account/access': return {'Twitter':'temp-suspended'} except: return {'Twitter':'Failed'} def like_follow(self, driver): wait = WebDriverWait(driver, timeout=4) actions_done = [] try: update = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div/div/div[2]/div'))) self.click_element(driver, update) except: pass try: like = wait.until(EC.element_to_be_clickable((By.XPATH, '//div[(@data-testid="like")]'))) self.scroll_down_page(driver, random.randint(2000,2500)) self.scroll_to_element(driver, like) self.click_element(driver, like) actions_done.append('L') except: pass #three follow buttons: each list has two variants (with or without invisible underline) follow_xpaths = [["//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[1]/div[1]/div[2]/div/div[2]/div", "//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[1]/div/div[2]/div/div[2]/div"], ["//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[2]/div[1]/div[2]/div/div[2]/div", "//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[2]/div/div[2]/div/div[2]/div"], ["//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[3]/div[1]/div[2]/div/div[2]/div", "//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[3]/div/div[2]/div/div[2]/div"]] try: try: follow = wait.until(EC.element_to_be_clickable((By.XPATH, follow_xpaths[random.randint(0,2)][0]))) except: follow = wait.until(EC.element_to_be_clickable((By.XPATH, follow_xpaths[random.randint(0,2)][1]))) self.scroll_to_element(driver, follow) self.click_element(driver, follow) actions_done.append('F') except: pass return actions_done