Untitled
price trackerunknown
plain_text
3 years ago
3.9 kB
38
Indexable
import requests
from bs4 import BeautifulSoup
from datetime import date, datetime
import pandas as pd
from openpyxl import Workbook
import xlsxwriter
from os.path import exists
fileExists = exists("Price_Comparison.xlsx")
if not fileExists:
writer = xlsxwriter.Workbook("Price_Comparison.xlsx")
amazonSite = writer.add_worksheet('Amazon')
chemWarehouseSite = writer.add_worksheet('ChemWH')
bold = writer.add_format({'bold': 1}) # bold property for Excel headers
for _ in amazonSite, chemWarehouseSite:
_.write('A1', 'Date', bold)
_.write('B1', 'Time', bold)
_.write('C1', 'Item', bold)
_.write('D1', 'Cost', bold)
writer.close()
writerPD = pd.ExcelWriter("Price_Comparison.xlsx", engine='openpyxl', mode = 'a', if_sheet_exists = 'overlay')
def dateTimeDetails():
dateToday = date.today()
currentDate = dateToday.strftime("%B %d, %Y")
timeToday = datetime.now()
currentTime = timeToday.strftime("%H:%M:%S")
return [currentDate, currentTime]
dateTimeVar = dateTimeDetails()
def amazonURLs():
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"}
URLs = ["https://www.amazon.com.au/Versace-Fraiche-Gianni-Spray-Ounce/dp/B000XE5260", "https://www.amazon.com.au/Burberry-London-Women-Parfum-Ounces/dp/B000GHWSYS"]
for url in URLs:
page = requests.get(url, headers = headers)
soup = BeautifulSoup(page.content, 'html.parser')
productName = soup.find(id="productTitle").get_text().strip()
priceWhole = soup.find("span", {"class": "a-price-whole"})
price1 = [int("".join(d for d in priceWhole.text if d.isdigit()))]
priceFraction = soup.find("span", {"class": "a-price-fraction"})
price2 = [int("".join(d for d in priceFraction.text if d.isdigit()))]
productPrice = (price1[0] + (price2[0]/100))
writeOnToExcel = pd.DataFrame([[dateTimeVar[0], dateTimeVar[1], productName, productPrice]], columns = ["Date", "Time", "Product", "Cost"])
writeOnToExcel.to_excel(writerPD, sheet_name='Amazon', startrow=writerPD.sheets["Amazon"].max_row, index=False, header = None)
return [productName, productPrice]
def chemWarehouseURLs():
URLs = ["https://www.chemistwarehouse.com.au/buy/53378/versace-eau-fraiche-eau-de-toilette-spray-100ml", "https://www.chemistwarehouse.com.au/buy/117965/calvin-klein-defy-eau-de-parfum-100ml"]
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"}
for url in URLs:
page = requests.get(url, headers = headers)
soup = BeautifulSoup(page.content, 'html.parser')
productName = soup.find("h1", {"style": "clear:both"})
productName = productName.text.strip()
productPrice = soup.find(id="p_lt_ctl10_pageplaceholder_p_lt_ctl00_wBR_P_D1_ctl00_ctl00_ctl00_ctl00_ctl02_lblActualPrice").get_text()
productPrice = float(productPrice.strip().replace('$', ''))
writeOnToExcel = pd.DataFrame([[dateTimeVar[0], dateTimeVar[1], productName, productPrice]], columns = ["Date", "Time", "Product", "Cost"], index= None)
writeOnToExcel.to_excel(writerPD, sheet_name='ChemWH', startrow=writerPD.sheets["ChemWH"].max_row, index=False, header = None)
return [writeOnToExcel]
def priceComparison():
'''
ignore this block, just testing out how to access dataframe elements
'''
amazonObj = amazonURLs()
chemWHObj = chemWarehouseURLs()
print(amazonObj[1])
print(chemWHObj[1]['Product'])
print(chemWHObj[0]['Product'] == chemWHObj[1]['Product'])
priceComparison()
amazonURLs()
chemWarehouseURLs()
writerPD.close()Editor is loading...