Untitled
unknown
plain_text
2 years ago
2.0 kB
11
Indexable
# Excel file path
excel_file_path = 'turbo_cars.xlsx'
# Function to scrape and process the website
def job():
global url
# URL of the page to scrape
url = 'https://turbo.az/autos?q%5Bsort%5D=&q%5Bmake%5D%5B%5D=1&q%5Bmodel%5D%5B%5D=&q%5Bused%5D=&q%5Bregion%5D%5B%5D=&q%5Bprice_from%5D=&q%5Bprice_to%5D=&q%5Bcurrency%5D=azn&q%5Bloan%5D=0&q%5Bbarter%5D=0&q%5Bcategory%5D%5B%5D=&q%5Byear_from%5D=2015&q%5Byear_to%5D=&q%5Bcolor%5D%5B%5D=&q%5Bfuel_type%5D%5B%5D=&q%5Bgear%5D%5B%5D=&q%5Btransmission%5D%5B%5D=&q%5Bengine_volume_from%5D=&q%5Bengine_volume_to%5D=&q%5Bpower_from%5D=&q%5Bpower_to%5D=&q%5Bmileage_from%5D=&q%5Bmileage_to%5D=&q%5Bonly_shops%5D=&q%5Bprior_owners_count%5D%5B%5D=&q%5Bseats_count%5D%5B%5D=&q%5Bmarket%5D%5B%5D=&q%5Bcrashed%5D=1&q%5Bpainted%5D=1&q%5Bfor_spare_parts%5D=0'
# Load last results if exist
last_results = pd.read_excel(excel_file_path) if os.path.exists(excel_file_path) else None
# Creating a PrettyTable with the specified columns
table = PrettyTable()
table.field_names = ["Avtomobil", "İl", "Mühərrik", "Yürüş", "Qiymət", "Şəhər", "Yerləşdirilib", "Saytdakı vaxtı", "Link", "Vəziyyəti"]
# List to store data for Excel export
excel_data = []
product_count = 0
# Headers for the request
headers = {
'User-Agent': 'Mozilla/5.0',
}
while url:
# Sending a GET request to fetch the HTML content
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
# Manually identify the part of the HTML that contains the product listings
elanlar_section = soup.find('p', class_='section-title_name', string='ELANLAR')
products_section = elanlar_section.find_next('div', class_='products')
if products_section is None:
print("Failed to find product listings.")
break
# Extracting the required details for each product within the products section
Editor is loading...