Untitled
unknown
python
3 years ago
3.6 kB
11
Indexable
import pandas as pd
import requests
from bs4 import BeautifulSoup
from typing import List
import csv
from dataclasses import dataclass, asdict
class Car:
link: str
full_name: str
year_class: str
year: str
mileage: str
engine_capacity: str
fuel_type: str
price_pln: int
class OtomotoScraper:
def __init__(self, car_make: str) -> None:
self.headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) "
"AppleWebKit/537.11 (KHTML, like Gecko) "
"Chrome/23.0.1271.64 Safari/537.11",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3",
"Accept-Encoding": "none",
"Accept-Language": "en-US,en;q=0.8",
"Connection": "keep-alive",
}
self.car_make = car_make
self.website = "https://www.otomoto.pl/osobowe"
def scrape_pages(self, number_of_pages: int) -> List[Car]:
cars = []
for i in range(1, number_of_pages + 1):
current_page = f"{self.website}/{self.car_make}/?page{i}"
new_cars = self.scrape_cars_from_page(current_page)
if new_cars:
cars += new_cars
return cars
def scrape_cars_from_page(self, current_page: str) -> List[Car]:
try:
response = requests.get(current_page, headers= self.headers).text
soup = BeautifulSoup(response, "html.parser")
cars = self.extract_cars_from_page(soup)
return cars
except Exception as e:
print(f"Problem z {current_page}, reason: {e}")
return []
def extract_cars_from_page(self, soup: BeautifulSoup) -> List[Car]:
offers_table = soup.find('main', attrs = {'data-testid': 'search-results'})
cars = offers_table.find_all('article')
list_of_cars = []
for car in cars:
try:
link = car.find('h2', attrs = {'data-testid': 'ad-title'}).find('a', href=True).get('href')
full_name = car.find('h2', attrs = {'data-testid': 'ad-title'}).text
price_pln = car.find('div', class_="e1b25f6f10 ooa-dsk6y6 er34gjf0").find('span', class_='ooa-1bmnxg7 e1b25f6f11').text
attrs = car.find_all('li', class_='ooa-1k7nwcr e1teo0cs0')
year = attrs[0].text
mileage = attrs[1].text
engine_capacity = attrs[2].text
fuel_type = attrs[3].text
list_of_cars.append(
car(
link=link,
full_name=full_name,
price_pln=price_pln,
year=year,
mileage=mileage,
engine_capacity=engine_capacity,
fuel_type=fuel_type
))
except Exception as e:
print(f"Error {e}")
return list_of_cars
def write_to_csv(cars: List[Car]) -> None:
with open('cars.csv', mode='w') as file:
writer = csv.writer(file)
writer.writerow(['link','full_name', 'price_pln','year','mileage','engine_capacity','fuel_type'])
for car in cars:
writer.writerow(asdict(car))
def scrape_otomoto() -> None:
make = 'bmw'
scraper = OtomotoScraper(make)
cars = scraper.scrape_pages(1)
print('a')
'''write_to_csv(cars)'''
if __name__ == '__main__':
scrape_otomoto()
Editor is loading...