Untitled
unknown
plain_text
a year ago
3.2 kB
2
Indexable
Never
import csv import requests from bs4 import BeautifulSoup # create CSV file and write header row with open("scraped_data.csv", "w", newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow(['ISBN', 'Bookname', 'Price', 'Author', 'Editor']) import requests from bs4 import BeautifulSoup import csv # Send a GET request to the URL url = 'https://www.alkitab.tn/list-105801/new-english-books/' response = requests.get(url) # Parse the HTML content using Beautiful Soup soup = BeautifulSoup(response.content, 'html.parser') # Find all div elements with class 'meta_produit col-md-10 col-xs-8 no-padding' product_divs = soup.find_all('div', class_='meta_produit col-md-10 col-xs-8 no-padding') # Create a CSV file to store the scraped data with open('scraped_data.csv', 'w', newline='', encoding='utf-8') as csv_file: csv_writer = csv.writer(csv_file) # Write the headers to the CSV file csv_writer.writerow(['ISBN', 'Bookname', 'Price', 'Author', 'Editor']) # Loop through each product div and extract the relevant data for product_div in product_divs: # Extract the ISBN, Bookname, Price, Author, and Editor from the relevant div elements isbn = product_div.find('div', class_='hidden-xs').text.strip() bookname = product_div.find('h4', class_='livre_titre').text.strip() price = product_div.find('div', class_='item_prix ml-01 table_prix_livraison paper ').text.strip() author = product_div.find('h6', class_='livre_auteur').text.strip() editor = product_div.find('div', class_='editeur').text.strip() # Write the extracted data to the CSV file csv_writer.writerow([isbn, bookname, price, author, editor]) import requests from bs4 import BeautifulSoup import csv import time # get url url = "https://ceresbookshop.com/fr/s/4704/romans" if url: print("got url") # initiate page number page_num = 1 while (page_num<470): try: # send GET request to current page response = requests.get(f"{url}?page={page_num}") # check if page exists if response.status_code == 200: # parse HTML content using BeautifulSoup soup = BeautifulSoup(response.content, 'html.parser') print(f"Page {page_num-1} opened.") # extract product links from page products = soup.find_all("div", class_="product_name") for product in products: link = product.find("a")["href"] # write link to CSV file with open("Books_links.csv", "a", newline='') as csvfile: writer = csv.writer(csvfile) writer.writerow([link]) print(f"Link written to CSV file: {link}") print(f"Scraping done! {page_num-1} pages scraped.") # move on to next page page_num += 1 # add delay of 1 second time.sleep(1) else: print(f"Scraping stopped not 200! {page_num-1} pages scraped.") break except Exception as e: print(f"Encountered an exception: {e}") print(f"Will retry in 5 seconds...") time.sleep(5)