Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
8.8 kB
3
Indexable
Never
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
from scraper import Scraper
import random
from selenium.webdriver.common.action_chains import ActionChains
class Twitter(Scraper):
    def __init__(self):
        self.url = 'https://twitter.com'

    def twitter_routine(self, driver):
        login_status = self.check_login(driver)
        if login_status['Twitter'] == 'Logged in':
            actions_done = self.like_follow(driver)
            for action in actions_done:
                login_status['Twitter']+=('+'+action)
        return login_status
    def check_login(self, driver, email=None, password=None, username=None):
        #establish a wait object with a max wait time of 7 seconds
        wait = WebDriverWait(driver, timeout=7)
        #open twitter
        driver.get(self.url)
        try:
            #if twitter/home is opened it means that we are logged in, so return
            wait.until(EC.url_to_be('https://twitter.com/home'))
            #try to find posts/suspension banner
            wait.until(EC.presence_of_element_located((By.XPATH, '//div[contains(@class, "css-1dbjc4n") and contains(@class, "r-1adg3ll") and contains(@class, "r-1ny4l3l")]')))
            try:
                elements = driver.find_elements(By.XPATH, '//h1[(@dir="ltr") and (@role="heading")]')
                print(time.perf_counter(), elements)
                for element in elements:
                    print('text', element.text)
                    if 'suspended' in element.text:
                        return {'Twitter':'suspended'}
                return {'Twitter':'Logged in'}
            except:
                return {'Twitter':'Logged in'}
            
        except:
            return {'Twitter':'Not in'}
            try:
                #if twitter.com/home isn't opened it means we are not logged in and we have 2 possibilities
                # 1) twitter.com opened and we can login 2) an error occured and twitter.com/404 opened from which we cannot login, if this is the case, refresh page
                if driver.current_url != 'https://twitter.com/':
                    print(driver.current_url)
                    driver.get(self.url)
                #try to find and click the signin button, if the button is not found it means that a cookies element is blocking it, so we have to accept cookies first
                try:
                    signin_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="react-root"]/div/div/div[2]/main/div/div/div[1]/div[1]/div/div[3]/div[5]/a/div/span/span')))
                    signin_element.click()
                except:
                    cookies_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div/div/div/div/div/div[2]/div[2]/div/span/span')))
                    cookies_element.click()
                    signin_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="react-root"]/div/div/div[2]/main/div/div/div[1]/div[1]/div/div[3]/div[5]/a/div/span/span')))
                    signin_element.click()

                #try to find the mail element, enter email, find the button next and click it to advance
                mail_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div/div/div/div[5]/label/div/div[2]/div/input')))
                mail_element.send_keys(email)
                next_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div/div/div/div[6]/div/span/span')))
                next_element.click()

                #sometimes twitter asks for your username, sometimes it doesn't, so if there isn't a password element, theres probably a username element first 
                try:
                    password_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[1]/div/div/div[3]/div/label/div/div[2]/div[1]/input')))
                    password_element.send_keys(password)
                    login_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[2]/div/div[1]/div/div/div/div/span/span')))
                    login_element.click()
                    return {'Twitter':'Logged in'}

                except:
                    username_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[1]/div/div[2]/label/div/div[2]/div/input')))
                    username_element.send_keys(username)                
                    next_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[2]/div/div/div/div/div/span/span')))
                    next_element.click()
                    #find and enter password, find and click login
                    password_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[1]/div/div/div[3]/div/label/div/div[2]/div[1]/input')))
                    password_element.send_keys(password)
                    login_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[2]/div/div[1]/div/div/div/div/span/span')))
                    login_element.click()
                    return {'Twitter':'Logged in'}
            except:
                #if there was a problem with any of the previous steps, either something bugged out or the account is suspended
                try:
                    wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="react-root"]/div/div/div[2]/main/div/div/div/div/div/div[5]/div/section/div/div/div/div/div[2]/div/div/div/div/h1/span/span/span')))
                    if driver.current_url == 'https://twitter.com/account/access':
                        return {'Twitter':'temp-suspended'}
                except:
                    return {'Twitter':'Failed'}
    def like_follow(self, driver):
        wait = WebDriverWait(driver, timeout=4)
        actions_done = []
        try:
            update = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div/div/div[2]/div')))
            self.click_element(driver, update)
        except:
            pass
        try:
            like = wait.until(EC.element_to_be_clickable((By.XPATH, '//div[(@data-testid="like")]')))
            self.scroll_down_page(driver, random.randint(2000,2500))
            self.scroll_to_element(driver, like)
            self.click_element(driver, like)
            actions_done.append('L')
        except:
            pass
        #three follow buttons: each list has two variants (with or without invisible underline)
        follow_xpaths = [["//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[1]/div[1]/div[2]/div/div[2]/div",
                          "//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[1]/div/div[2]/div/div[2]/div"],
                         ["//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[2]/div[1]/div[2]/div/div[2]/div",
                          "//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[2]/div/div[2]/div/div[2]/div"],
                         ["//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[3]/div[1]/div[2]/div/div[2]/div",
                          "//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[3]/div/div[2]/div/div[2]/div"]]
        try:
            try:
                follow = wait.until(EC.element_to_be_clickable((By.XPATH, follow_xpaths[random.randint(0,2)][0])))
            except:
                follow = wait.until(EC.element_to_be_clickable((By.XPATH, follow_xpaths[random.randint(0,2)][1])))
            self.scroll_to_element(driver, follow)
            self.click_element(driver, follow)
            actions_done.append('F')
        except:
            pass
        return actions_done