C6
user_1718919
plain_text
4 months ago
6.0 kB
3
Indexable
import pandas as pd import numpy as np from datetime import datetime def process_tutor_report(file_path, staff_data_path): """Process the TutorPayReport and link with staff data""" # Read the TutorPayReport df = pd.read_excel(file_path) # Read the processed staff data for name matching staff_df = pd.read_csv(staff_data_path) # Create standardized name format for matching df['Staff_Standard'] = df.apply( lambda x: f"{x['PEOPLENAME']} {x['PEOPLESURNAME']}", axis=1) # Filter for relevant staff roles df = df[df['STAFFROLE'].isin(['Tutor', 'Learning Support'])] # Process time adjustments def convert_ms_to_hours(ms_value): if pd.isna(ms_value) or ms_value == 0: return 0 # Convert milliseconds to hours and make positive return abs(ms_value) / (1000 * 60 * 60) df['EarlyAdjust'] = df['TIMEADJUSTEARLY'].apply(convert_ms_to_hours) df['LateAdjust'] = df['TIMEADJUSTLATE'].apply(convert_ms_to_hours) # Calculate actual hours df['Actual_Hours'] = df.apply( lambda x: max(0, x['Potential Hours'] - (x['EarlyAdjust'] + x['LateAdjust'])), axis=1 ) # Group activity types assessment_types = [ 'Access Assessments', 'Assessments', 'Beauty OL Assess', 'CFL PA Vols Assess', 'Creative Assessments', 'E&M ApprenticeAssess', 'English FS Assess', 'English GCSE Assess', 'ESOL F2F Assess', 'ESOL Online Assess', 'Hair/Barb F2F Assess', 'Maths Assessments', 'SWiS Assessments' ] def categorize_activity(activity): if activity in assessment_types: return 'Assessment' elif activity in ['Community Engagement', 'Tutorials/Drop Ins']: return activity else: return 'Other' df['Activity_Category'] = df['ACTIVITYTYPE'].apply(categorize_activity) # Convert event date to datetime if not already df['EVENTDATE'] = pd.to_datetime(df['EVENTDATE']) # Create summary dataframes # 1. Daily summary daily_summary = df.groupby( ['Staff_Standard', 'EVENTDATE', 'Activity_Category'] ).agg({ 'Actual_Hours': 'sum' }).reset_index() # 2. Weekly summary df['Week'] = df['EVENTDATE'].dt.isocalendar().week df['Year'] = df['EVENTDATE'].dt.isocalendar().year weekly_summary = df.groupby( ['Staff_Standard', 'Year', 'Week', 'Activity_Category'] ).agg({ 'Actual_Hours': 'sum' }).reset_index() # 3. Monthly summary df['Month'] = df['EVENTDATE'].dt.month monthly_summary = df.groupby( ['Staff_Standard', 'Year', 'Month', 'Activity_Category'] ).agg({ 'Actual_Hours': 'sum' }).reset_index() # Calculate utilization (excluding Assessment hours) def calculate_utilization(group_df, target_hours, period_divisor): """ Calculate utilization percentage period_divisor: 12 for monthly, 52 for weekly """ non_assessment_hours = group_df[ group_df['Activity_Category'] != 'Assessment' ]['Actual_Hours'].sum() period_target = target_hours / period_divisor return (non_assessment_hours / period_target * 100) if period_target > 0 else 0 # Add utilization to monthly summary monthly_utilization = [] for staff in monthly_summary['Staff_Standard'].unique(): staff_target = staff_df[ staff_df['Staff_Standard'] == staff ]['Target_Hours'].iloc[-1] if len(staff_df[staff_df['Staff_Standard'] == staff]) > 0 else 0 staff_monthly = monthly_summary[ monthly_summary['Staff_Standard'] == staff ].copy() staff_monthly['Utilization_Percentage'] = calculate_utilization( staff_monthly, staff_target, 12 ) monthly_utilization.append(staff_monthly) monthly_summary_with_utilization = pd.concat(monthly_utilization) return { 'daily_summary': daily_summary, 'weekly_summary': weekly_summary, 'monthly_summary': monthly_summary_with_utilization, 'detailed_data': df } def save_processed_data(data_dict, output_folder): """Save all processed dataframes to CSV files""" if not os.path.exists(output_folder): os.makedirs(output_folder) for name, df in data_dict.items(): output_file = os.path.join(output_folder, f'{name}.csv') df.to_csv(output_file, index=False) print(f"Saved {name} to {output_file}") # Usage if __name__ == "__main__": # Print current directory and files for debugging current_dir = os.getcwd() print(f"Current working directory: {current_dir}") print("\nFiles in current directory:") for file in os.listdir(current_dir): print(f"- {file}") # Define paths input_file = "TutorPayReport_New 23.24 FY 22.11.24.xlsx" staff_data = os.path.join("processed_data", "staff_monthly_data.csv") output_folder = "processed_data" # Verify files exist if not os.path.exists(input_file): print(f"\nERROR: Could not find {input_file}") print("Please make sure the TutorPayReport file is in the same folder as this notebook") exit() if not os.path.exists(staff_data): print(f"\nERROR: Could not find {staff_data}") print("Please make sure the processed_data folder contains staff_monthly_data.csv") exit() # Process the data processed_data = process_tutor_report(input_file, staff_data) # Save the results save_processed_data(processed_data, output_folder) print("\nProcessing completed!") print("\nSummary stats:") for name, df in processed_data.items(): print(f"{name}: {len(df)} records")
Editor is loading...
Leave a Comment