webscraper
unknown
python
a year ago
1.5 kB
1
Indexable
Never
from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from bs4 import BeautifulSoup driver = webdriver.Chrome() url = 'https://www.elitebmxshop.com/bmx-kerekpar?infinite_page=3' driver.get(url) while True: wait = WebDriverWait(driver, 10) wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="page_artlist_content"]/div'))) page_source = driver.page_source soup = BeautifulSoup(page_source, 'html.parser') product_divs = soup.find_all('div', class_='artlist__product h-100') for product_div in product_divs: product_name = product_div.find('h2', class_='font-s font-sm-m line-clamp--2-14 font-weight-bold').text image_src = product_div.find('img', class_='artlist__img product-img js-main-img lazyloaded').get('data-src') image_link = product_div.find('a', class_='artlist__img-link product_link_normal prodcut-img-wrapper').get( 'href') print(f'Kép URL: {image_src}') print(f'Link: {image_link}') print(f'Név: {product_name}') try: next_button = driver.find_element(By.XPATH, '//*[@id="pagination_next_page"]/a') if next_button: next_button.click() wait.until(EC.staleness_of(next_button)) except: break driver.quit()