Untitled
price trackerunknown
plain_text
3 years ago
3.9 kB
30
Indexable
import requests from bs4 import BeautifulSoup from datetime import date, datetime import pandas as pd from openpyxl import Workbook import xlsxwriter from os.path import exists fileExists = exists("Price_Comparison.xlsx") if not fileExists: writer = xlsxwriter.Workbook("Price_Comparison.xlsx") amazonSite = writer.add_worksheet('Amazon') chemWarehouseSite = writer.add_worksheet('ChemWH') bold = writer.add_format({'bold': 1}) # bold property for Excel headers for _ in amazonSite, chemWarehouseSite: _.write('A1', 'Date', bold) _.write('B1', 'Time', bold) _.write('C1', 'Item', bold) _.write('D1', 'Cost', bold) writer.close() writerPD = pd.ExcelWriter("Price_Comparison.xlsx", engine='openpyxl', mode = 'a', if_sheet_exists = 'overlay') def dateTimeDetails(): dateToday = date.today() currentDate = dateToday.strftime("%B %d, %Y") timeToday = datetime.now() currentTime = timeToday.strftime("%H:%M:%S") return [currentDate, currentTime] dateTimeVar = dateTimeDetails() def amazonURLs(): headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"} URLs = ["https://www.amazon.com.au/Versace-Fraiche-Gianni-Spray-Ounce/dp/B000XE5260", "https://www.amazon.com.au/Burberry-London-Women-Parfum-Ounces/dp/B000GHWSYS"] for url in URLs: page = requests.get(url, headers = headers) soup = BeautifulSoup(page.content, 'html.parser') productName = soup.find(id="productTitle").get_text().strip() priceWhole = soup.find("span", {"class": "a-price-whole"}) price1 = [int("".join(d for d in priceWhole.text if d.isdigit()))] priceFraction = soup.find("span", {"class": "a-price-fraction"}) price2 = [int("".join(d for d in priceFraction.text if d.isdigit()))] productPrice = (price1[0] + (price2[0]/100)) writeOnToExcel = pd.DataFrame([[dateTimeVar[0], dateTimeVar[1], productName, productPrice]], columns = ["Date", "Time", "Product", "Cost"]) writeOnToExcel.to_excel(writerPD, sheet_name='Amazon', startrow=writerPD.sheets["Amazon"].max_row, index=False, header = None) return [productName, productPrice] def chemWarehouseURLs(): URLs = ["https://www.chemistwarehouse.com.au/buy/53378/versace-eau-fraiche-eau-de-toilette-spray-100ml", "https://www.chemistwarehouse.com.au/buy/117965/calvin-klein-defy-eau-de-parfum-100ml"] headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"} for url in URLs: page = requests.get(url, headers = headers) soup = BeautifulSoup(page.content, 'html.parser') productName = soup.find("h1", {"style": "clear:both"}) productName = productName.text.strip() productPrice = soup.find(id="p_lt_ctl10_pageplaceholder_p_lt_ctl00_wBR_P_D1_ctl00_ctl00_ctl00_ctl00_ctl02_lblActualPrice").get_text() productPrice = float(productPrice.strip().replace('$', '')) writeOnToExcel = pd.DataFrame([[dateTimeVar[0], dateTimeVar[1], productName, productPrice]], columns = ["Date", "Time", "Product", "Cost"], index= None) writeOnToExcel.to_excel(writerPD, sheet_name='ChemWH', startrow=writerPD.sheets["ChemWH"].max_row, index=False, header = None) return [writeOnToExcel] def priceComparison(): ''' ignore this block, just testing out how to access dataframe elements ''' amazonObj = amazonURLs() chemWHObj = chemWarehouseURLs() print(amazonObj[1]) print(chemWHObj[1]['Product']) print(chemWHObj[0]['Product'] == chemWHObj[1]['Product']) priceComparison() amazonURLs() chemWarehouseURLs() writerPD.close()
Editor is loading...