Untitled
unknown
plain_text
2 years ago
8.8 kB
9
Indexable
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
from scraper import Scraper
import random
from selenium.webdriver.common.action_chains import ActionChains
class Twitter(Scraper):
def __init__(self):
self.url = 'https://twitter.com'
def twitter_routine(self, driver):
login_status = self.check_login(driver)
if login_status['Twitter'] == 'Logged in':
actions_done = self.like_follow(driver)
for action in actions_done:
login_status['Twitter']+=('+'+action)
return login_status
def check_login(self, driver, email=None, password=None, username=None):
#establish a wait object with a max wait time of 7 seconds
wait = WebDriverWait(driver, timeout=7)
#open twitter
driver.get(self.url)
try:
#if twitter/home is opened it means that we are logged in, so return
wait.until(EC.url_to_be('https://twitter.com/home'))
#try to find posts/suspension banner
wait.until(EC.presence_of_element_located((By.XPATH, '//div[contains(@class, "css-1dbjc4n") and contains(@class, "r-1adg3ll") and contains(@class, "r-1ny4l3l")]')))
try:
elements = driver.find_elements(By.XPATH, '//h1[(@dir="ltr") and (@role="heading")]')
print(time.perf_counter(), elements)
for element in elements:
print('text', element.text)
if 'suspended' in element.text:
return {'Twitter':'suspended'}
return {'Twitter':'Logged in'}
except:
return {'Twitter':'Logged in'}
except:
return {'Twitter':'Not in'}
try:
#if twitter.com/home isn't opened it means we are not logged in and we have 2 possibilities
# 1) twitter.com opened and we can login 2) an error occured and twitter.com/404 opened from which we cannot login, if this is the case, refresh page
if driver.current_url != 'https://twitter.com/':
print(driver.current_url)
driver.get(self.url)
#try to find and click the signin button, if the button is not found it means that a cookies element is blocking it, so we have to accept cookies first
try:
signin_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="react-root"]/div/div/div[2]/main/div/div/div[1]/div[1]/div/div[3]/div[5]/a/div/span/span')))
signin_element.click()
except:
cookies_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div/div/div/div/div/div[2]/div[2]/div/span/span')))
cookies_element.click()
signin_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="react-root"]/div/div/div[2]/main/div/div/div[1]/div[1]/div/div[3]/div[5]/a/div/span/span')))
signin_element.click()
#try to find the mail element, enter email, find the button next and click it to advance
mail_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div/div/div/div[5]/label/div/div[2]/div/input')))
mail_element.send_keys(email)
next_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div/div/div/div[6]/div/span/span')))
next_element.click()
#sometimes twitter asks for your username, sometimes it doesn't, so if there isn't a password element, theres probably a username element first
try:
password_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[1]/div/div/div[3]/div/label/div/div[2]/div[1]/input')))
password_element.send_keys(password)
login_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[2]/div/div[1]/div/div/div/div/span/span')))
login_element.click()
return {'Twitter':'Logged in'}
except:
username_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[1]/div/div[2]/label/div/div[2]/div/input')))
username_element.send_keys(username)
next_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[2]/div/div/div/div/div/span/span')))
next_element.click()
#find and enter password, find and click login
password_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[1]/div/div/div[3]/div/label/div/div[2]/div[1]/input')))
password_element.send_keys(password)
login_element = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[2]/div/div[1]/div/div/div/div/span/span')))
login_element.click()
return {'Twitter':'Logged in'}
except:
#if there was a problem with any of the previous steps, either something bugged out or the account is suspended
try:
wait.until(EC.visibility_of_element_located((By.XPATH, '//*[@id="react-root"]/div/div/div[2]/main/div/div/div/div/div/div[5]/div/section/div/div/div/div/div[2]/div/div/div/div/h1/span/span/span')))
if driver.current_url == 'https://twitter.com/account/access':
return {'Twitter':'temp-suspended'}
except:
return {'Twitter':'Failed'}
def like_follow(self, driver):
wait = WebDriverWait(driver, timeout=4)
actions_done = []
try:
update = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="layers"]/div[2]/div/div/div/div/div/div[2]/div[2]/div/div/div/div/div[2]/div')))
self.click_element(driver, update)
except:
pass
try:
like = wait.until(EC.element_to_be_clickable((By.XPATH, '//div[(@data-testid="like")]')))
self.scroll_down_page(driver, random.randint(2000,2500))
self.scroll_to_element(driver, like)
self.click_element(driver, like)
actions_done.append('L')
except:
pass
#three follow buttons: each list has two variants (with or without invisible underline)
follow_xpaths = [["//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[1]/div[1]/div[2]/div/div[2]/div",
"//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[1]/div/div[2]/div/div[2]/div"],
["//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[2]/div[1]/div[2]/div/div[2]/div",
"//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[2]/div/div[2]/div/div[2]/div"],
["//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[3]/div[1]/div[2]/div/div[2]/div",
"//*[@id='react-root']/div/div/div[2]/main/div/div/div/div[2]/div/div[2]/div/div/div/div[4]/div/aside/div[2]/div[3]/div/div[2]/div/div[2]/div"]]
try:
try:
follow = wait.until(EC.element_to_be_clickable((By.XPATH, follow_xpaths[random.randint(0,2)][0])))
except:
follow = wait.until(EC.element_to_be_clickable((By.XPATH, follow_xpaths[random.randint(0,2)][1])))
self.scroll_to_element(driver, follow)
self.click_element(driver, follow)
actions_done.append('F')
except:
pass
return actions_doneEditor is loading...