Untitled

mail@pastecode.io avatarunknown
plain_text
2 months ago
9.3 kB
3
Indexable
Never
import requests
from bs4 import BeautifulSoup
from prettytable import PrettyTable
import pandas as pd
from datetime import datetime, timedelta
import os
import time
import asyncio
from telegram import Bot
import matplotlib.pyplot as plt
import seaborn as sns

bot_token = '6380703806:AAEkDw_JAveSyWoz99pfgD_D0Kjgv8BjzaQ'
channel_id = '@carpointaz'

async def send_to_telegram(message):
    bot = Bot(token=bot_token)
    await bot.send_message(chat_id=channel_id, text=message, parse_mode='Markdown')
    time.sleep(15)  # add delay to avoid hitting Telegram's rate limits

def clean_excel_data():
    df = pd.read_excel('turbo_cars.xlsx')
    df.drop_duplicates(subset='Link', keep='first', inplace=True)
    df.to_excel('turbo_cars.xlsx', index=False)

def update_veziyyeti():
    df = pd.read_excel('turbo_cars.xlsx')
    df.loc[df['Vəziyyəti'] == 'Satılıb', 'Vəziyyəti'] = '+'
    df.to_excel('turbo_cars.xlsx', index=False)

def read_excel_data():
    clean_excel_data()  # Clean the Excel data before reading
    df = pd.read_excel('turbo_cars.xlsx')
    return df

def create_plot(df):
    df_count = df.groupby(["Avtomobil", "Vəziyyəti"]).size().reset_index(name="count")
    plt.figure(figsize=(10,5))
    sns.barplot(y="Avtomobil", x="count", hue="Vəziyyəti", data=df_count)
    plt.title('Avtomobil Statistics')
    plt.show()

def main():
    update_veziyyeti()
    df = read_excel_data()
    create_plot(df)
    asyncio.run(send_to_telegram('Satılıb'))

if __name__ == "__main__":
    main()

# Excel file path
excel_file_path = 'turbo_cars.xlsx'

# Function to scrape and process the website
def job():
    global url



    # URL of the page to scrape
    url = 'https://turbo.az/autos?q%5Bsort%5D=&q%5Bmake%5D%5B%5D=1&q%5Bmodel%5D%5B%5D=&q%5Bused%5D=&q%5Bregion%5D%5B%5D=&q%5Bprice_from%5D=&q%5Bprice_to%5D=&q%5Bcurrency%5D=azn&q%5Bloan%5D=0&q%5Bbarter%5D=0&q%5Bcategory%5D%5B%5D=&q%5Byear_from%5D=2015&q%5Byear_to%5D=&q%5Bcolor%5D%5B%5D=&q%5Bfuel_type%5D%5B%5D=&q%5Bgear%5D%5B%5D=&q%5Btransmission%5D%5B%5D=&q%5Bengine_volume_from%5D=&q%5Bengine_volume_to%5D=&q%5Bpower_from%5D=&q%5Bpower_to%5D=&q%5Bmileage_from%5D=&q%5Bmileage_to%5D=&q%5Bonly_shops%5D=&q%5Bprior_owners_count%5D%5B%5D=&q%5Bseats_count%5D%5B%5D=&q%5Bmarket%5D%5B%5D=&q%5Bcrashed%5D=1&q%5Bpainted%5D=1&q%5Bfor_spare_parts%5D=0'

    # Load last results if exist
    last_results = pd.read_excel(excel_file_path) if os.path.exists(excel_file_path) else None

    # Creating a PrettyTable with the specified columns
    table = PrettyTable()
    table.field_names = ["Avtomobil", "İl", "Mühərrik", "Yürüş", "Qiymət", "Şəhər", "Yerləşdirilib", "Saytdakı vaxtı", "Link", "Vəziyyəti"]

    # List to store data for Excel export
    excel_data = []

    product_count = 0

    # Headers for the request
    headers = {
        'User-Agent': 'Mozilla/5.0',
    }

    while url:
        # Sending a GET request to fetch the HTML content
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Manually identify the part of the HTML that contains the product listings
        elanlar_section = soup.find('p', class_='section-title_name', string='ELANLAR')
        products_section = elanlar_section.find_next('div', class_='products')

        if products_section is None:
            print("Failed to find product listings.")
            break

        # Extracting the required details for each product within the products section
        for product_div in products_section.find_all('div', class_='products-i'):
            try:
                avtomobil = product_div.find('div', class_='products-i__name products-i__bottom-text').get_text().strip()
                il_muherrik_yurus = product_div.find('div', class_='products-i__attributes products-i__bottom-text').get_text().strip()
                il, muherrik, yurus = il_muherrik_yurus.split(', ')
                qiymet = product_div.find('div', class_='product-price').get_text().strip()
                sheher, yerlesdirilib = product_div.find('div', class_='products-i__datetime').get_text().strip().split(' ', 1)
                sheher = sheher.rstrip(',')

                # Replacing specific words with formatted dates
                today = datetime.today()
                if 'bugün' in yerlesdirilib:
                    yerlesdirilib = yerlesdirilib.replace('bugün', today.strftime('%d.%m.%Y'))
                elif 'dünən' in yerlesdirilib:
                    yesterday = today - timedelta(days=1)
                    yerlesdirilib = yerlesdirilib.replace('dünən', yesterday.strftime('%d.%m.%Y'))

                # Extracting date from the "Yerləşdirilib" column and calculating "Saytdakı vaxtı"
                yerlesdirilib_date_str = yerlesdirilib.split(' ')[0]
                yerlesdirilib_date = datetime.strptime(yerlesdirilib_date_str, '%d.%m.%Y')
                saytdaki_vaxti = (today - yerlesdirilib_date).days

                relative_link = product_div.find('a', class_='products-i__link')['href']
                link = "https://turbo.az" + relative_link

                # Adding the extracted details to the table
                row_data = [avtomobil, il, muherrik, yurus, qiymet, sheher, yerlesdirilib, saytdaki_vaxti, link]

                # Check the "Vəziyyəti" of the product
                if last_results is not None:
                    if link in last_results['Link'].values:
                        vaziyyeti = "-"
                    else:
                        vaziyyeti = "Yeni"
                else:
                    vaziyyeti = "Yeni"

                row_data.append(vaziyyeti)

                table.add_row(row_data)
                excel_data.append(row_data)

                product_count += 1
            except AttributeError:
                # Handle cases where the expected elements are not found
                continue

        # Find the next page URL
        next_link = soup.find('a', rel='next')
        url = "https://turbo.az" + next_link['href'] if next_link else None

    # Update "Vəziyyəti" of the products that are not in new results but in the last results
    if last_results is not None:
        last_links = set(last_results['Link'].values)
        new_links = set([row[-2] for row in excel_data])
        sold_links = last_links - new_links

        for link in sold_links:
            row_index = last_results[last_results['Link'] == link].index[0]
            last_results.loc[row_index, 'Vəziyyəti'] = 'Satılıb'

        # Append the sold products to new results
        sold_products = last_results[last_results['Vəziyyəti'] == 'Satılıb']
        excel_data.extend(sold_products.values.tolist())

    # Printing the PrettyTable
    print(table)

    # Printing the product count
    print(f"Total products found: {product_count}")

    # Creating a DataFrame and saving to Excel (using Excel format)
    columns = ["Avtomobil", "İl", "Mühərrik", "Yürüş", "Qiymət", "Şəhər", "Yerləşdirilib", "Saytdakı vaxtı", "Link", "Vəziyyəti"]
    df = pd.DataFrame(excel_data, columns=columns)
    df.to_excel(excel_file_path, index=False, engine='openpyxl')

# Create a set to store the links of cars that have been sent
sent_cars = set()

while True:
    print("Starting a new run...")
    asyncio.run(send_to_telegram("Starting a new run..."))

    # Run the job function inside the while loop
    job()

    car_data = read_excel_data()

    yeni_count = 0
    satilib_count = 0
    total_cars = len(car_data)

    for index, row in car_data.iterrows():
        if row['Vəziyyəti'] in ['Yeni', 'Satılıb'] and row['Link'] not in sent_cars:
            message = f"*{row['Avtomobil']}*\nİl: {row['İl']}\nMühərrik: {row['Mühərrik']}\nYürüş: {row['Yürüş']}\nQiymət: {row['Qiymət']}\nŞəhər: {row['Şəhər']}\nYerləşdirilib: {row['Yerləşdirilib']}\nSaytdakı vaxtı: {row['Saytdakı vaxtı']}\nLink: [Baxmaq üçün basın]({row['Link']})\nVəziyyəti: {row['Vəziyyəti']}\n-------------------"
            asyncio.run(send_to_telegram(message))

            # Add the link of the car to the set of sent cars
            sent_cars.add(row['Link'])

            if row['Vəziyyəti'] == 'Yeni':
                yeni_count += 1
            elif row['Vəziyyəti'] == 'Satılıb':
                satilib_count += 1

    current_month = datetime.now().strftime('%B')
    statistika_table = PrettyTable()
    statistika_table.field_names = [current_month + " Statistika", "Sayı"]
    statistika_table.add_row(["Yeni", yeni_count])
    statistika_table.add_row(["Satılıb", satilib_count])
    statistika_table.add_row(["Total Cars on Site", total_cars])

    print(statistika_table)
    asyncio.run(send_to_telegram(f"```\n{statistika_table}\n```"))

    df = car_data.groupby(["Avtomobil", "Vəziyyəti"]).size().reset_index(name="count")
    create_plot(df)

    # Send the image to telegram:
    bot = Bot(token=bot_token)
    asyncio.run(bot.send_photo(chat_id=channel_id, photo=open('statistics.png', 'rb')))

    time.sleep(60)