Untitled
unknown
python
2 years ago
1.9 kB
4
Indexable
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
def get_bib_info(title):
options = Options()
options.headless = True
service = Service('path_to_chromedriver') # Replace with the path to your ChromeDriver executable
driver = webdriver.Chrome(service=service, options=options)
driver.get("https://scholar.google.com/")
search_box = driver.find_element(By.NAME, "q")
search_box.send_keys(title)
search_box.submit()
try:
quote_button = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, "//button[@aria-label='Quote']"))
)
ActionChains(driver).move_to_element(quote_button).click().perform()
bib_button = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, "//a[contains(text(), 'BibTeX')]"))
)
bib_button.click()
bib_content = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, ".gs_citi > .gs_prfl"))
).text
return bib_content
finally:
driver.quit()
def crawl_bib_info(titles):
bib_info = []
for title in titles:
info = get_bib_info(title)
if info:
bib_info.append(info)
return bib_info
# Example usage
titles = ["Image Super-Resolution Via Sparse Representation"]
bibliographic_info = crawl_bib_info(titles)
# Print the bibliographic information
for info in bibliographic_info:
print(info)
print()
Editor is loading...