Untitled

mail@pastecode.io avatar
unknown
plain_text
7 months ago
2.1 kB
2
Indexable
Never
import pandas as pd
from tqdm import tqdm
import logging

# Set up logging
logging.basicConfig(filename='process_log.log', level=logging.INFO)


def check_continuity(group):
    min_year = group['Start Year'].min()
    max_year = group['End Year'].max()
    group['Continuous'] = True
    group['Start Year'] = min_year
    group['End Year'] = max_year
    return group


def process_sheet(sheet_name, xls):
    try:
        df = xls.parse(sheet_name)

        # Identifying Duplicates
        df['Duplicate'] = df.duplicated(subset=['Make', 'Model', 'Style', 'Includes', 'Bundle Part Numbers', 'Start Year', 'End Year'], keep=False)

        # Identifying Continuous Rows
        df = df.groupby(['Make', 'Model', 'Style', 'Includes', 'Bundle Part Numbers']).apply(check_continuity)

        # Identifying Unique Rows
        df['Unique'] = ~df['Duplicate'] & ~df['Continuous']

        # Creating a Unique Key
        df['Unique Key'] = df.apply(lambda row: f"{row['Make']}_{row['Model']}_{row['Style']}_{row['Includes']}_{row['Bundle Part Numbers']}_{row['Start Year']}_{row['End Year']}", axis=1)

        logging.info(f'Successfully processed sheet: {sheet_name}')
        return df

    except Exception as e:
        logging.error(f'Error processing sheet {sheet_name}: {str(e)}')
        return None


def main():
    try:
        # Load the Excel File
        xls = pd.ExcelFile('bundles.xlsx')
        sheet_names = xls.sheet_names

        # Create a dictionary to hold the processed dataframes
        df_dict = {}

        # Process each sheet
        for sheet_name in tqdm(sheet_names, desc="Processing Sheets"):
            processed_df = process_sheet(sheet_name, xls)
            if processed_df is not None:
                df_dict[sheet_name] = processed_df

        # Save the Output
        with pd.ExcelWriter('output.xlsx') as writer:
            for sheet_name, df in df_dict.items():
                df.to_excel(writer, sheet_name=sheet_name)

        logging.info('Processing completed successfully.')

    except Exception as e:
        logging.error(f'Error: {str(e)}')


if __name__ == '__main__':
    main()