Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
2.3 kB
1
Indexable
Never
import pandas as pd
from gspread_dataframe import set_with_dataframe
from oauth2client.service_account import ServiceAccountCredentials
import gspread

# define the scope of the credentials
scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']

# load the credentials from a JSON file 
creds = ServiceAccountCredentials.from_json_keyfile_name(r'C:\Users\naufal-project\Desktop\Shopee Interview\API Key\shopee_analysis_apikey.json', scope)
client = gspread.authorize(creds)

df_dummy1= pd.read_csv(r'C:\Users\naufal-project\Desktop\Shopee Interview\Data\dummy_data_1.csv')
df_dummy2= pd.read_csv(r'C:\Users\naufal-project\Desktop\Shopee Interview\Data\dummy_data_2.csv')

#Merge Data
df_rawdata= pd.merge(df_dummy1, df_dummy2, on= 'ID')

#Remove whitespaces
df_rawdata.columns= df_rawdata.columns.str.strip()
df_rawdata= df_rawdata.applymap(lambda x: ' '.join(x.split()) if isinstance(x, str) else x)
# print(df_rawdata)

#Convert format to numeric
df_operationdata= df_rawdata.copy()
for col in ['Gross Sales','Discounts','Sales','COGS']:
    df_operationdata[col] = pd.to_numeric(df_operationdata[col].str.replace('$', '').str.replace(',', '').str.replace('-', ''))

#Group by Gross Sales, Sort   
df_GSBM = df_operationdata.groupby(['Product', 'Year', 'Month Number','Month Name'])['Gross Sales'].sum()
df_GSBM= df_GSBM.reset_index(name= 'Gross Sales')
#Convert back to Dollar
df_GSBM['Gross Sales'] = df_GSBM['Gross Sales'].apply(lambda x: "${:,.2f}".format(x))
df_GSBM= df_GSBM.sort_values(by=['Product', 'Year', 'Month Number', 'Gross Sales'], ascending=[True, True, True, False])
# print(df_GSBM)

GSAsheet= client.open('Gross Sales Analysis')
GSAworksheets= GSAsheet.worksheets()
GSAsheet.add_worksheet(title= 'Blank', rows=1000, cols=26)

for worksheet in GSAworksheets:
    GSAsheet.del_worksheet(worksheet)
    
sheetnames= [['Merged Data',df_rawdata],['Gross Sales by Month',df_GSBM]]  
for sheetname, dfname in sheetnames:
    try: 
        GSAsheet.worksheet(sheetname).clear()  
    except:
        GSAsheet.add_worksheet(title= sheetname, rows=1000, cols=26)
    
    set_with_dataframe(GSAsheet.worksheet(sheetname), dfname)

GSAsheet.del_worksheet(GSAsheet.worksheet('Blank'))    
print('Done with Gross Sales Analysis')