Untitled

import requests
from bs4 import BeautifulSoup
import csv
import time

# get url
url = "https://ceresbookshop.com/fr/s/4704/romans"
if url:
    print("got url")

# initiate page number
page_num = 1

while (page_num<470):
    try:
        # send GET request to current page
        response = requests.get(f"{url}?page={page_num}")

        # check if page exists
        if response.status_code == 200:
            # parse HTML content using BeautifulSoup
            soup = BeautifulSoup(response.content, 'html.parser')
            print(f"Page {page_num-1} opened.")

            # extract product links from page
            products = soup.find_all("div", class_="product_name")
            for product in products:
                link = product.find("a")["href"]

                # write link to CSV file
                with open("Books_links.csv", "a", newline='') as csvfile:
                    writer = csv.writer(csvfile)
                    writer.writerow([link])
                    print(f"Link written to CSV file: {link}")
           
                
            print(f"Scraping done! {page_num-1} pages scraped.")

            # move on to next page
            page_num += 1

            # add delay of 1 second
            time.sleep(1)

        else:
            print(f"Scraping stopped not 200! {page_num-1} pages scraped.")
            break


    except Exception as e:
        print(f"Encountered an exception: {e}")
        print(f"Will retry in 5 seconds...")
        time.sleep(5)
Editor is loading...