Untitled
unknown
plain_text
2 years ago
3.2 kB
8
Indexable
import csv
import requests
from bs4 import BeautifulSoup
# create CSV file and write header row
with open("scraped_data.csv", "w", newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(['ISBN', 'Bookname', 'Price', 'Author', 'Editor'])
import requests
from bs4 import BeautifulSoup
import csv
# Send a GET request to the URL
url = 'https://www.alkitab.tn/list-105801/new-english-books/'
response = requests.get(url)
# Parse the HTML content using Beautiful Soup
soup = BeautifulSoup(response.content, 'html.parser')
# Find all div elements with class 'meta_produit col-md-10 col-xs-8 no-padding'
product_divs = soup.find_all('div', class_='meta_produit col-md-10 col-xs-8 no-padding')
# Create a CSV file to store the scraped data
with open('scraped_data.csv', 'w', newline='', encoding='utf-8') as csv_file:
csv_writer = csv.writer(csv_file)
# Write the headers to the CSV file
csv_writer.writerow(['ISBN', 'Bookname', 'Price', 'Author', 'Editor'])
# Loop through each product div and extract the relevant data
for product_div in product_divs:
# Extract the ISBN, Bookname, Price, Author, and Editor from the relevant div elements
isbn = product_div.find('div', class_='hidden-xs').text.strip()
bookname = product_div.find('h4', class_='livre_titre').text.strip()
price = product_div.find('div', class_='item_prix ml-01 table_prix_livraison paper ').text.strip()
author = product_div.find('h6', class_='livre_auteur').text.strip()
editor = product_div.find('div', class_='editeur').text.strip()
# Write the extracted data to the CSV file
csv_writer.writerow([isbn, bookname, price, author, editor])
import requests
from bs4 import BeautifulSoup
import csv
import time
# get url
url = "https://ceresbookshop.com/fr/s/4704/romans"
if url:
print("got url")
# initiate page number
page_num = 1
while (page_num<470):
try:
# send GET request to current page
response = requests.get(f"{url}?page={page_num}")
# check if page exists
if response.status_code == 200:
# parse HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
print(f"Page {page_num-1} opened.")
# extract product links from page
products = soup.find_all("div", class_="product_name")
for product in products:
link = product.find("a")["href"]
# write link to CSV file
with open("Books_links.csv", "a", newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow([link])
print(f"Link written to CSV file: {link}")
print(f"Scraping done! {page_num-1} pages scraped.")
# move on to next page
page_num += 1
# add delay of 1 second
time.sleep(1)
else:
print(f"Scraping stopped not 200! {page_num-1} pages scraped.")
break
except Exception as e:
print(f"Encountered an exception: {e}")
print(f"Will retry in 5 seconds...")
time.sleep(5)Editor is loading...