Untitled

mail@pastecode.io avatar
unknown
python
a year ago
1.9 kB
0
Indexable
Never
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

def get_bib_info(title):
    options = Options()
    options.headless = True
    service = Service('path_to_chromedriver')  # Replace with the path to your ChromeDriver executable
    driver = webdriver.Chrome(service=service, options=options)

    driver.get("https://scholar.google.com/")
    search_box = driver.find_element(By.NAME, "q")
    search_box.send_keys(title)
    search_box.submit()

    try:
        quote_button = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "//button[@aria-label='Quote']"))
        )
        ActionChains(driver).move_to_element(quote_button).click().perform()

        bib_button = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "//a[contains(text(), 'BibTeX')]"))
        )
        bib_button.click()

        bib_content = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, ".gs_citi > .gs_prfl"))
        ).text

        return bib_content

    finally:
        driver.quit()

def crawl_bib_info(titles):
    bib_info = []
    for title in titles:
        info = get_bib_info(title)
        if info:
            bib_info.append(info)
    return bib_info

# Example usage
titles = ["Image Super-Resolution Via Sparse Representation"]
bibliographic_info = crawl_bib_info(titles)

# Print the bibliographic information
for info in bibliographic_info:
    print(info)
    print()