Untitled
plain_text
2 months ago
2.7 kB
2
Indexable
Never
# Extracting the required details for each product within the products section for product_div in products_section.find_all('div', class_='products-i'): try: avtomobil = product_div.find('div', class_='products-i__name products-i__bottom-text').get_text().strip() il_muherrik_yurus = product_div.find('div', class_='products-i__attributes products-i__bottom-text').get_text().strip() il, muherrik, yurus = il_muherrik_yurus.split(', ') qiymet = product_div.find('div', class_='product-price').get_text().strip() sheher, yerlesdirilib = product_div.find('div', class_='products-i__datetime').get_text().strip().split(' ', 1) sheher = sheher.rstrip(',') # Replacing specific words with formatted dates today = datetime.today() if 'bugün' in yerlesdirilib: yerlesdirilib = yerlesdirilib.replace('bugün', today.strftime('%d.%m.%Y')) elif 'dünən' in yerlesdirilib: yesterday = today - timedelta(days=1) yerlesdirilib = yerlesdirilib.replace('dünən', yesterday.strftime('%d.%m.%Y')) # Extracting date from the "Yerləşdirilib" column and calculating "Saytdakı vaxtı" yerlesdirilib_date_str = yerlesdirilib.split(' ')[0] yerlesdirilib_date = datetime.strptime(yerlesdirilib_date_str, '%d.%m.%Y') saytdaki_vaxti = (today - yerlesdirilib_date).days relative_link = product_div.find('a', class_='products-i__link')['href'] link = "https://turbo.az" + relative_link # Adding the extracted details to the table row_data = [avtomobil, il, muherrik, yurus, qiymet, sheher, yerlesdirilib, saytdaki_vaxti, link] # Check the "Vəziyyəti" of the product if last_results is not None: if link in last_results['Link'].values: vaziyyeti = "-" else: vaziyyeti = "Yeni" else: vaziyyeti = "Yeni" row_data.append(vaziyyeti) table.add_row(row_data) excel_data.append(row_data) product_count += 1 except AttributeError: # Handle cases where the expected elements are not found continue # Find the next page URL next_link = soup.find('a', rel='next') url = "https://turbo.az" + next_link['href'] if next_link else None