Untitled

price tracker
import requests
from bs4 import BeautifulSoup
from datetime import date, datetime
import pandas as pd
from openpyxl import Workbook
import xlsxwriter
from os.path import exists

fileExists = exists("Price_Comparison.xlsx")
if not fileExists:
    writer = xlsxwriter.Workbook("Price_Comparison.xlsx")
    amazonSite = writer.add_worksheet('Amazon')
    chemWarehouseSite = writer.add_worksheet('ChemWH')
    bold = writer.add_format({'bold': 1})  # bold property for Excel headers
    
    for _ in amazonSite, chemWarehouseSite:
        _.write('A1', 'Date', bold)
        _.write('B1', 'Time', bold)
        _.write('C1', 'Item', bold)
        _.write('D1', 'Cost', bold)
    writer.close()
    
writerPD = pd.ExcelWriter("Price_Comparison.xlsx", engine='openpyxl',  mode = 'a', if_sheet_exists = 'overlay')

def dateTimeDetails():
    dateToday = date.today()
    currentDate = dateToday.strftime("%B %d, %Y")
    timeToday = datetime.now()
    currentTime = timeToday.strftime("%H:%M:%S")
    return [currentDate, currentTime]

dateTimeVar = dateTimeDetails()

def amazonURLs():
    
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"}
    URLs = ["https://www.amazon.com.au/Versace-Fraiche-Gianni-Spray-Ounce/dp/B000XE5260", "https://www.amazon.com.au/Burberry-London-Women-Parfum-Ounces/dp/B000GHWSYS"]
    for url in URLs:
        page = requests.get(url, headers = headers)
        soup = BeautifulSoup(page.content, 'html.parser')
        productName = soup.find(id="productTitle").get_text().strip()
        priceWhole = soup.find("span", {"class": "a-price-whole"})
        price1 = [int("".join(d for d in priceWhole.text if d.isdigit()))]
        priceFraction = soup.find("span", {"class": "a-price-fraction"})
        price2 = [int("".join(d for d in priceFraction.text if d.isdigit()))]
        productPrice = (price1[0] + (price2[0]/100))
        
        writeOnToExcel = pd.DataFrame([[dateTimeVar[0], dateTimeVar[1], productName, productPrice]], columns = ["Date", "Time", "Product", "Cost"])
        writeOnToExcel.to_excel(writerPD, sheet_name='Amazon', startrow=writerPD.sheets["Amazon"].max_row, index=False, header = None)

    return [productName, productPrice]

def chemWarehouseURLs():
    
    URLs = ["https://www.chemistwarehouse.com.au/buy/53378/versace-eau-fraiche-eau-de-toilette-spray-100ml", "https://www.chemistwarehouse.com.au/buy/117965/calvin-klein-defy-eau-de-parfum-100ml"]
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"}
    for url in URLs:
        page = requests.get(url, headers = headers)
        soup = BeautifulSoup(page.content, 'html.parser')
        productName = soup.find("h1", {"style": "clear:both"})
        productName = productName.text.strip()
        productPrice = soup.find(id="p_lt_ctl10_pageplaceholder_p_lt_ctl00_wBR_P_D1_ctl00_ctl00_ctl00_ctl00_ctl02_lblActualPrice").get_text()
        productPrice = float(productPrice.strip().replace('$', ''))
        writeOnToExcel = pd.DataFrame([[dateTimeVar[0], dateTimeVar[1], productName, productPrice]], columns = ["Date", "Time", "Product", "Cost"], index= None)
        writeOnToExcel.to_excel(writerPD, sheet_name='ChemWH', startrow=writerPD.sheets["ChemWH"].max_row, index=False, header = None)

    return [writeOnToExcel]

def priceComparison():
    '''
    ignore this block, just testing out how to access dataframe elements
    '''

    amazonObj = amazonURLs()
    chemWHObj = chemWarehouseURLs()
    print(amazonObj[1]) 
    print(chemWHObj[1]['Product'])
    print(chemWHObj[0]['Product'] == chemWHObj[1]['Product'])
    


priceComparison()    
amazonURLs()
chemWarehouseURLs()
writerPD.close()
Editor is loading...