Untitled
unknown
python
2 years ago
3.6 kB
8
Indexable
import pandas as pd import requests from bs4 import BeautifulSoup from typing import List import csv from dataclasses import dataclass, asdict class Car: link: str full_name: str year_class: str year: str mileage: str engine_capacity: str fuel_type: str price_pln: int class OtomotoScraper: def __init__(self, car_make: str) -> None: self.headers = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) " "AppleWebKit/537.11 (KHTML, like Gecko) " "Chrome/23.0.1271.64 Safari/537.11", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.3", "Accept-Encoding": "none", "Accept-Language": "en-US,en;q=0.8", "Connection": "keep-alive", } self.car_make = car_make self.website = "https://www.otomoto.pl/osobowe" def scrape_pages(self, number_of_pages: int) -> List[Car]: cars = [] for i in range(1, number_of_pages + 1): current_page = f"{self.website}/{self.car_make}/?page{i}" new_cars = self.scrape_cars_from_page(current_page) if new_cars: cars += new_cars return cars def scrape_cars_from_page(self, current_page: str) -> List[Car]: try: response = requests.get(current_page, headers= self.headers).text soup = BeautifulSoup(response, "html.parser") cars = self.extract_cars_from_page(soup) return cars except Exception as e: print(f"Problem z {current_page}, reason: {e}") return [] def extract_cars_from_page(self, soup: BeautifulSoup) -> List[Car]: offers_table = soup.find('main', attrs = {'data-testid': 'search-results'}) cars = offers_table.find_all('article') list_of_cars = [] for car in cars: try: link = car.find('h2', attrs = {'data-testid': 'ad-title'}).find('a', href=True).get('href') full_name = car.find('h2', attrs = {'data-testid': 'ad-title'}).text price_pln = car.find('div', class_="e1b25f6f10 ooa-dsk6y6 er34gjf0").find('span', class_='ooa-1bmnxg7 e1b25f6f11').text attrs = car.find_all('li', class_='ooa-1k7nwcr e1teo0cs0') year = attrs[0].text mileage = attrs[1].text engine_capacity = attrs[2].text fuel_type = attrs[3].text list_of_cars.append( car( link=link, full_name=full_name, price_pln=price_pln, year=year, mileage=mileage, engine_capacity=engine_capacity, fuel_type=fuel_type )) except Exception as e: print(f"Error {e}") return list_of_cars def write_to_csv(cars: List[Car]) -> None: with open('cars.csv', mode='w') as file: writer = csv.writer(file) writer.writerow(['link','full_name', 'price_pln','year','mileage','engine_capacity','fuel_type']) for car in cars: writer.writerow(asdict(car)) def scrape_otomoto() -> None: make = 'bmw' scraper = OtomotoScraper(make) cars = scraper.scrape_pages(1) print('a') '''write_to_csv(cars)''' if __name__ == '__main__': scrape_otomoto()
Editor is loading...