Untitled
unknown
python
4 years ago
1.0 kB
5
Indexable
import time from selenium import webdriver from webdriver_manager.chrome import ChromeDriverManager from urllib.parse import urljoin from bs4 import BeautifulSoup driver = webdriver.Chrome(ChromeDriverManager().install()) driver.get("https://solid.jobs/offers/it") time.sleep(0.5) scroll_pause_time = 1 screen_height = driver.execute_script("return window.screen.height;") i = 1 while True: driver.execute_script("window.scrollTo(0, {screen_height}*{i});".format(screen_height=screen_height, i=i)) i += 1 time.sleep(scroll_pause_time) scroll_height = driver.execute_script("return document.body.scrollHeight;") if (screen_height) * i > scroll_height: break urls = [] soup = BeautifulSoup(driver.page_source, "html.parser") for parent in soup.find_all(class_="font-weight-400 h5"): a_tag = parent.find("a", class_="color-dark-grey color-blue-onhover") base = "https://solid.jobs/offers/it" link = a_tag.attrs['href'] url = urljoin(base, link) urls.append(url)
Editor is loading...