Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
2.0 kB
2
Indexable
Never
# Excel file path
excel_file_path = 'turbo_cars.xlsx'

# Function to scrape and process the website
def job():
    global url



    # URL of the page to scrape
    url = 'https://turbo.az/autos?q%5Bsort%5D=&q%5Bmake%5D%5B%5D=1&q%5Bmodel%5D%5B%5D=&q%5Bused%5D=&q%5Bregion%5D%5B%5D=&q%5Bprice_from%5D=&q%5Bprice_to%5D=&q%5Bcurrency%5D=azn&q%5Bloan%5D=0&q%5Bbarter%5D=0&q%5Bcategory%5D%5B%5D=&q%5Byear_from%5D=2015&q%5Byear_to%5D=&q%5Bcolor%5D%5B%5D=&q%5Bfuel_type%5D%5B%5D=&q%5Bgear%5D%5B%5D=&q%5Btransmission%5D%5B%5D=&q%5Bengine_volume_from%5D=&q%5Bengine_volume_to%5D=&q%5Bpower_from%5D=&q%5Bpower_to%5D=&q%5Bmileage_from%5D=&q%5Bmileage_to%5D=&q%5Bonly_shops%5D=&q%5Bprior_owners_count%5D%5B%5D=&q%5Bseats_count%5D%5B%5D=&q%5Bmarket%5D%5B%5D=&q%5Bcrashed%5D=1&q%5Bpainted%5D=1&q%5Bfor_spare_parts%5D=0'

    # Load last results if exist
    last_results = pd.read_excel(excel_file_path) if os.path.exists(excel_file_path) else None

    # Creating a PrettyTable with the specified columns
    table = PrettyTable()
    table.field_names = ["Avtomobil", "İl", "Mühərrik", "Yürüş", "Qiymət", "Şəhər", "Yerləşdirilib", "Saytdakı vaxtı", "Link", "Vəziyyəti"]

    # List to store data for Excel export
    excel_data = []

    product_count = 0

    # Headers for the request
    headers = {
        'User-Agent': 'Mozilla/5.0',
    }

    while url:
        # Sending a GET request to fetch the HTML content
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Manually identify the part of the HTML that contains the product listings
        elanlar_section = soup.find('p', class_='section-title_name', string='ELANLAR')
        products_section = elanlar_section.find_next('div', class_='products')

        if products_section is None:
            print("Failed to find product listings.")
            break

        # Extracting the required details for each product within the products section