Untitled
unknown
plain_text
2 years ago
2.1 kB
5
Indexable
import pandas as pd from tqdm import tqdm import logging # Set up logging logging.basicConfig(filename='process_log.log', level=logging.INFO) def check_continuity(group): min_year = group['Start Year'].min() max_year = group['End Year'].max() group['Continuous'] = True group['Start Year'] = min_year group['End Year'] = max_year return group def process_sheet(sheet_name, xls): try: df = xls.parse(sheet_name) # Identifying Duplicates df['Duplicate'] = df.duplicated(subset=['Make', 'Model', 'Style', 'Includes', 'Bundle Part Numbers', 'Start Year', 'End Year'], keep=False) # Identifying Continuous Rows df = df.groupby(['Make', 'Model', 'Style', 'Includes', 'Bundle Part Numbers']).apply(check_continuity) # Identifying Unique Rows df['Unique'] = ~df['Duplicate'] & ~df['Continuous'] # Creating a Unique Key df['Unique Key'] = df.apply(lambda row: f"{row['Make']}_{row['Model']}_{row['Style']}_{row['Includes']}_{row['Bundle Part Numbers']}_{row['Start Year']}_{row['End Year']}", axis=1) logging.info(f'Successfully processed sheet: {sheet_name}') return df except Exception as e: logging.error(f'Error processing sheet {sheet_name}: {str(e)}') return None def main(): try: # Load the Excel File xls = pd.ExcelFile('bundles.xlsx') sheet_names = xls.sheet_names # Create a dictionary to hold the processed dataframes df_dict = {} # Process each sheet for sheet_name in tqdm(sheet_names, desc="Processing Sheets"): processed_df = process_sheet(sheet_name, xls) if processed_df is not None: df_dict[sheet_name] = processed_df # Save the Output with pd.ExcelWriter('output.xlsx') as writer: for sheet_name, df in df_dict.items(): df.to_excel(writer, sheet_name=sheet_name) logging.info('Processing completed successfully.') except Exception as e: logging.error(f'Error: {str(e)}') if __name__ == '__main__': main()
Editor is loading...