Untitled
unknown
plain_text
2 years ago
2.1 kB
6
Indexable
import pandas as pd
from tqdm import tqdm
import logging
# Set up logging
logging.basicConfig(filename='process_log.log', level=logging.INFO)
def check_continuity(group):
min_year = group['Start Year'].min()
max_year = group['End Year'].max()
group['Continuous'] = True
group['Start Year'] = min_year
group['End Year'] = max_year
return group
def process_sheet(sheet_name, xls):
try:
df = xls.parse(sheet_name)
# Identifying Duplicates
df['Duplicate'] = df.duplicated(subset=['Make', 'Model', 'Style', 'Includes', 'Bundle Part Numbers', 'Start Year', 'End Year'], keep=False)
# Identifying Continuous Rows
df = df.groupby(['Make', 'Model', 'Style', 'Includes', 'Bundle Part Numbers']).apply(check_continuity)
# Identifying Unique Rows
df['Unique'] = ~df['Duplicate'] & ~df['Continuous']
# Creating a Unique Key
df['Unique Key'] = df.apply(lambda row: f"{row['Make']}_{row['Model']}_{row['Style']}_{row['Includes']}_{row['Bundle Part Numbers']}_{row['Start Year']}_{row['End Year']}", axis=1)
logging.info(f'Successfully processed sheet: {sheet_name}')
return df
except Exception as e:
logging.error(f'Error processing sheet {sheet_name}: {str(e)}')
return None
def main():
try:
# Load the Excel File
xls = pd.ExcelFile('bundles.xlsx')
sheet_names = xls.sheet_names
# Create a dictionary to hold the processed dataframes
df_dict = {}
# Process each sheet
for sheet_name in tqdm(sheet_names, desc="Processing Sheets"):
processed_df = process_sheet(sheet_name, xls)
if processed_df is not None:
df_dict[sheet_name] = processed_df
# Save the Output
with pd.ExcelWriter('output.xlsx') as writer:
for sheet_name, df in df_dict.items():
df.to_excel(writer, sheet_name=sheet_name)
logging.info('Processing completed successfully.')
except Exception as e:
logging.error(f'Error: {str(e)}')
if __name__ == '__main__':
main()
Editor is loading...