Untitled
unknown
python
2 years ago
1.9 kB
3
Indexable
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from bs4 import BeautifulSoup def get_bib_info(title): options = Options() options.headless = True service = Service('path_to_chromedriver') # Replace with the path to your ChromeDriver executable driver = webdriver.Chrome(service=service, options=options) driver.get("https://scholar.google.com/") search_box = driver.find_element(By.NAME, "q") search_box.send_keys(title) search_box.submit() try: quote_button = WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.XPATH, "//button[@aria-label='Quote']")) ) ActionChains(driver).move_to_element(quote_button).click().perform() bib_button = WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.XPATH, "//a[contains(text(), 'BibTeX')]")) ) bib_button.click() bib_content = WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.CSS_SELECTOR, ".gs_citi > .gs_prfl")) ).text return bib_content finally: driver.quit() def crawl_bib_info(titles): bib_info = [] for title in titles: info = get_bib_info(title) if info: bib_info.append(info) return bib_info # Example usage titles = ["Image Super-Resolution Via Sparse Representation"] bibliographic_info = crawl_bib_info(titles) # Print the bibliographic information for info in bibliographic_info: print(info) print()
Editor is loading...