Untitled
user_1718919
plain_text
4 months ago
4.7 kB
4
Indexable
import pandas as pd import numpy as np import glob import re from calendar import month_name # Helper function to parse line manager name def parse_line_manager(name): if pd.isna(name): return pd.Series([None, None]) parts = name.split(',') if len(parts) == 2: first_known = parts[0].strip() # e.g. "Joanne Jo" last_name = parts[1].strip() fk_parts = first_known.split() if len(fk_parts) == 2: first_name = fk_parts[0].strip() known_as = fk_parts[1].strip() return pd.Series([first_name + " " + last_name, known_as + " " + last_name]) return pd.Series([None, None]) # Read all ACL monthly report files files = glob.glob("ACL Monthly Report *.xlsx") all_dataframes = [] for file in files: # Extract month/year from filename match = re.search(r'ACL Monthly Report (\d{2})\.(\d{2})\.(\d{2})\.xlsx', file) if match: day_str = match.group(1) month_str = match.group(2) year_str = match.group(3) month_int = int(month_str) full_year = int("20" + year_str) month_name_str = month_name[month_int] # Read the Excel file with correct header skipping (adjust if needed) df = pd.read_excel(file, skiprows=2, header=0) df.columns = df.columns.str.strip().str.replace(' ', '_') # Add Month/Year columns df['Month'] = month_name_str df['MonthNumber'] = month_int df['Year'] = full_year all_dataframes.append(df) final_df = pd.concat(all_dataframes, ignore_index=True) # Filter to only Tutors and LSAs final_df = final_df[final_df['Position_Name'].isin(['Tutor', 'Learning Support Assistant'])] # Determine Contract Type (Sessional vs Salaried) final_df['Working_Hours'] = pd.to_numeric(final_df['Working_Hours'], errors='coerce') final_df['ContractType'] = final_df['Working_Hours'].apply(lambda x: "Sessional" if x <= 3 else "Salaried") # Calculate Target Hours final_df['Full-Time_Equivalent'] = pd.to_numeric(final_df['Full-Time_Equivalent'], errors='coerce') final_df['Target_Hours'] = final_df['Full-Time_Equivalent'] * 840 # Create Employee Name Variations final_df['FullName'] = final_df['First_Name'].str.strip() + " " + final_df['Last_Name'].str.strip() final_df['KnownAsFullName'] = final_df['Known_As'].str.strip() + " " + final_df['Last_Name'].str.strip() # Parse Line Manager Names final_df[['LineManager_FullName', 'LineManager_KnownAsFullName']] = final_df['Line_Manager_Name'].apply(parse_line_manager) # Identify changes month-to-month final_df['YearMonth'] = final_df['Year']*100 + final_df['MonthNumber'] final_df = final_df.sort_values(by=['Assignment_Number', 'YearMonth']) grouped = final_df.groupby('Assignment_Number', group_keys=False) final_df['Prev_Position_Name'] = grouped['Position_Name'].shift(1) final_df['Prev_ContractType'] = grouped['ContractType'].shift(1) final_df['Prev_YearMonth'] = grouped['YearMonth'].shift(1) final_df['Is_New_Contract_This_Month'] = final_df['Prev_YearMonth'].isna() final_df['Next_YearMonth'] = grouped['YearMonth'].shift(-1) final_df['Will_End_After_This_Month'] = final_df['Next_YearMonth'].isna() final_df['Position_Changed'] = (final_df['Position_Name'] != final_df['Prev_Position_Name']) & final_df['Prev_Position_Name'].notna() final_df['ContractType_Changed'] = (final_df['ContractType'] != final_df['Prev_ContractType']) & final_df['Prev_ContractType'].notna() # Select only necessary columns (adjust if needed) final_columns = [ 'Assignment_Number', 'Title', 'First_Name', 'Known_As', 'Last_Name', 'Position_Name', 'Working_Hours', 'Full-Time_Equivalent', 'Month', 'MonthNumber', 'Year', 'Line_Manager_Name', 'ContractType', 'Target_Hours', 'FullName', 'KnownAsFullName', 'LineManager_FullName', 'LineManager_KnownAsFullName', 'YearMonth', 'Is_New_Contract_This_Month', 'Will_End_After_This_Month', 'Position_Changed', 'ContractType_Changed' ] # Ensure all these columns exist before subsetting: existing_cols = final_df.columns missing_cols = [c for c in final_columns if c not in existing_cols] if missing_cols: print("Missing columns:", missing_cols) else: final_df = final_df[final_columns] # Extract unique employees from final_df unique_employees_previous = final_df[['First_Name', 'Known_As', 'Last_Name', 'FullName', 'KnownAsFullName']].drop_duplicates() print("final_df shape:", final_df.shape) print("unique_employees_previous shape:", unique_employees_previous.shape) print("Sample of final_df:") print(final_df.head())
Editor is loading...
Leave a Comment