webscraper

mail@pastecode.io avatar
unknown
python
a year ago
1.5 kB
1
Indexable
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup


driver = webdriver.Chrome()


url = 'https://www.elitebmxshop.com/bmx-kerekpar?infinite_page=3'
driver.get(url)

while True:
    wait = WebDriverWait(driver, 10)
    wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="page_artlist_content"]/div')))

    page_source = driver.page_source

    soup = BeautifulSoup(page_source, 'html.parser')

    product_divs = soup.find_all('div', class_='artlist__product h-100')

    for product_div in product_divs:
        product_name = product_div.find('h2', class_='font-s font-sm-m line-clamp--2-14 font-weight-bold').text
        image_src = product_div.find('img', class_='artlist__img product-img js-main-img lazyloaded').get('data-src')
        image_link = product_div.find('a', class_='artlist__img-link product_link_normal prodcut-img-wrapper').get(
            'href')
        print(f'Kép URL: {image_src}')
        print(f'Link: {image_link}')
        print(f'Név: {product_name}')


    try:
        next_button = driver.find_element(By.XPATH, '//*[@id="pagination_next_page"]/a')
        if next_button:
            next_button.click()
            wait.until(EC.staleness_of(next_button))
    except:
        break
    
driver.quit()