Untitled
plain_text
2 months ago
2.0 kB
1
Indexable
Never
# Excel file path excel_file_path = 'turbo_cars.xlsx' # Function to scrape and process the website def job(): global url # URL of the page to scrape url = 'https://turbo.az/autos?q%5Bsort%5D=&q%5Bmake%5D%5B%5D=1&q%5Bmodel%5D%5B%5D=&q%5Bused%5D=&q%5Bregion%5D%5B%5D=&q%5Bprice_from%5D=&q%5Bprice_to%5D=&q%5Bcurrency%5D=azn&q%5Bloan%5D=0&q%5Bbarter%5D=0&q%5Bcategory%5D%5B%5D=&q%5Byear_from%5D=2015&q%5Byear_to%5D=&q%5Bcolor%5D%5B%5D=&q%5Bfuel_type%5D%5B%5D=&q%5Bgear%5D%5B%5D=&q%5Btransmission%5D%5B%5D=&q%5Bengine_volume_from%5D=&q%5Bengine_volume_to%5D=&q%5Bpower_from%5D=&q%5Bpower_to%5D=&q%5Bmileage_from%5D=&q%5Bmileage_to%5D=&q%5Bonly_shops%5D=&q%5Bprior_owners_count%5D%5B%5D=&q%5Bseats_count%5D%5B%5D=&q%5Bmarket%5D%5B%5D=&q%5Bcrashed%5D=1&q%5Bpainted%5D=1&q%5Bfor_spare_parts%5D=0' # Load last results if exist last_results = pd.read_excel(excel_file_path) if os.path.exists(excel_file_path) else None # Creating a PrettyTable with the specified columns table = PrettyTable() table.field_names = ["Avtomobil", "İl", "Mühərrik", "Yürüş", "Qiymət", "Şəhər", "Yerləşdirilib", "Saytdakı vaxtı", "Link", "Vəziyyəti"] # List to store data for Excel export excel_data = [] product_count = 0 # Headers for the request headers = { 'User-Agent': 'Mozilla/5.0', } while url: # Sending a GET request to fetch the HTML content response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') # Manually identify the part of the HTML that contains the product listings elanlar_section = soup.find('p', class_='section-title_name', string='ELANLAR') products_section = elanlar_section.find_next('div', class_='products') if products_section is None: print("Failed to find product listings.") break # Extracting the required details for each product within the products section