Untitled

price tracker
 avatar
unknown
plain_text
3 years ago
3.9 kB
30
Indexable
import requests
from bs4 import BeautifulSoup
from datetime import date, datetime
import pandas as pd
from openpyxl import Workbook
import xlsxwriter
from os.path import exists

fileExists = exists("Price_Comparison.xlsx")
if not fileExists:
    writer = xlsxwriter.Workbook("Price_Comparison.xlsx")
    amazonSite = writer.add_worksheet('Amazon')
    chemWarehouseSite = writer.add_worksheet('ChemWH')
    bold = writer.add_format({'bold': 1})  # bold property for Excel headers
    
    for _ in amazonSite, chemWarehouseSite:
        _.write('A1', 'Date', bold)
        _.write('B1', 'Time', bold)
        _.write('C1', 'Item', bold)
        _.write('D1', 'Cost', bold)
    writer.close()
    
writerPD = pd.ExcelWriter("Price_Comparison.xlsx", engine='openpyxl',  mode = 'a', if_sheet_exists = 'overlay')

def dateTimeDetails():
    dateToday = date.today()
    currentDate = dateToday.strftime("%B %d, %Y")
    timeToday = datetime.now()
    currentTime = timeToday.strftime("%H:%M:%S")
    return [currentDate, currentTime]

dateTimeVar = dateTimeDetails()

def amazonURLs():
    
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"}
    URLs = ["https://www.amazon.com.au/Versace-Fraiche-Gianni-Spray-Ounce/dp/B000XE5260", "https://www.amazon.com.au/Burberry-London-Women-Parfum-Ounces/dp/B000GHWSYS"]
    for url in URLs:
        page = requests.get(url, headers = headers)
        soup = BeautifulSoup(page.content, 'html.parser')
        productName = soup.find(id="productTitle").get_text().strip()
        priceWhole = soup.find("span", {"class": "a-price-whole"})
        price1 = [int("".join(d for d in priceWhole.text if d.isdigit()))]
        priceFraction = soup.find("span", {"class": "a-price-fraction"})
        price2 = [int("".join(d for d in priceFraction.text if d.isdigit()))]
        productPrice = (price1[0] + (price2[0]/100))
        
        writeOnToExcel = pd.DataFrame([[dateTimeVar[0], dateTimeVar[1], productName, productPrice]], columns = ["Date", "Time", "Product", "Cost"])
        writeOnToExcel.to_excel(writerPD, sheet_name='Amazon', startrow=writerPD.sheets["Amazon"].max_row, index=False, header = None)

    return [productName, productPrice]

def chemWarehouseURLs():
    
    URLs = ["https://www.chemistwarehouse.com.au/buy/53378/versace-eau-fraiche-eau-de-toilette-spray-100ml", "https://www.chemistwarehouse.com.au/buy/117965/calvin-klein-defy-eau-de-parfum-100ml"]
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"}
    for url in URLs:
        page = requests.get(url, headers = headers)
        soup = BeautifulSoup(page.content, 'html.parser')
        productName = soup.find("h1", {"style": "clear:both"})
        productName = productName.text.strip()
        productPrice = soup.find(id="p_lt_ctl10_pageplaceholder_p_lt_ctl00_wBR_P_D1_ctl00_ctl00_ctl00_ctl00_ctl02_lblActualPrice").get_text()
        productPrice = float(productPrice.strip().replace('$', ''))
        writeOnToExcel = pd.DataFrame([[dateTimeVar[0], dateTimeVar[1], productName, productPrice]], columns = ["Date", "Time", "Product", "Cost"], index= None)
        writeOnToExcel.to_excel(writerPD, sheet_name='ChemWH', startrow=writerPD.sheets["ChemWH"].max_row, index=False, header = None)

    return [writeOnToExcel]

def priceComparison():
    '''
    ignore this block, just testing out how to access dataframe elements
    '''

    amazonObj = amazonURLs()
    chemWHObj = chemWarehouseURLs()
    print(amazonObj[1]) 
    print(chemWHObj[1]['Product'])
    print(chemWHObj[0]['Product'] == chemWHObj[1]['Product'])
    


priceComparison()    
amazonURLs()
chemWarehouseURLs()
writerPD.close()
Editor is loading...