C6
user_1718919
plain_text
10 months ago
6.0 kB
4
Indexable
import pandas as pd
import numpy as np
from datetime import datetime
def process_tutor_report(file_path, staff_data_path):
"""Process the TutorPayReport and link with staff data"""
# Read the TutorPayReport
df = pd.read_excel(file_path)
# Read the processed staff data for name matching
staff_df = pd.read_csv(staff_data_path)
# Create standardized name format for matching
df['Staff_Standard'] = df.apply(
lambda x: f"{x['PEOPLENAME']} {x['PEOPLESURNAME']}", axis=1)
# Filter for relevant staff roles
df = df[df['STAFFROLE'].isin(['Tutor', 'Learning Support'])]
# Process time adjustments
def convert_ms_to_hours(ms_value):
if pd.isna(ms_value) or ms_value == 0:
return 0
# Convert milliseconds to hours and make positive
return abs(ms_value) / (1000 * 60 * 60)
df['EarlyAdjust'] = df['TIMEADJUSTEARLY'].apply(convert_ms_to_hours)
df['LateAdjust'] = df['TIMEADJUSTLATE'].apply(convert_ms_to_hours)
# Calculate actual hours
df['Actual_Hours'] = df.apply(
lambda x: max(0, x['Potential Hours'] - (x['EarlyAdjust'] + x['LateAdjust'])),
axis=1
)
# Group activity types
assessment_types = [
'Access Assessments', 'Assessments', 'Beauty OL Assess',
'CFL PA Vols Assess', 'Creative Assessments', 'E&M ApprenticeAssess',
'English FS Assess', 'English GCSE Assess', 'ESOL F2F Assess',
'ESOL Online Assess', 'Hair/Barb F2F Assess', 'Maths Assessments',
'SWiS Assessments'
]
def categorize_activity(activity):
if activity in assessment_types:
return 'Assessment'
elif activity in ['Community Engagement', 'Tutorials/Drop Ins']:
return activity
else:
return 'Other'
df['Activity_Category'] = df['ACTIVITYTYPE'].apply(categorize_activity)
# Convert event date to datetime if not already
df['EVENTDATE'] = pd.to_datetime(df['EVENTDATE'])
# Create summary dataframes
# 1. Daily summary
daily_summary = df.groupby(
['Staff_Standard', 'EVENTDATE', 'Activity_Category']
).agg({
'Actual_Hours': 'sum'
}).reset_index()
# 2. Weekly summary
df['Week'] = df['EVENTDATE'].dt.isocalendar().week
df['Year'] = df['EVENTDATE'].dt.isocalendar().year
weekly_summary = df.groupby(
['Staff_Standard', 'Year', 'Week', 'Activity_Category']
).agg({
'Actual_Hours': 'sum'
}).reset_index()
# 3. Monthly summary
df['Month'] = df['EVENTDATE'].dt.month
monthly_summary = df.groupby(
['Staff_Standard', 'Year', 'Month', 'Activity_Category']
).agg({
'Actual_Hours': 'sum'
}).reset_index()
# Calculate utilization (excluding Assessment hours)
def calculate_utilization(group_df, target_hours, period_divisor):
"""
Calculate utilization percentage
period_divisor: 12 for monthly, 52 for weekly
"""
non_assessment_hours = group_df[
group_df['Activity_Category'] != 'Assessment'
]['Actual_Hours'].sum()
period_target = target_hours / period_divisor
return (non_assessment_hours / period_target * 100) if period_target > 0 else 0
# Add utilization to monthly summary
monthly_utilization = []
for staff in monthly_summary['Staff_Standard'].unique():
staff_target = staff_df[
staff_df['Staff_Standard'] == staff
]['Target_Hours'].iloc[-1] if len(staff_df[staff_df['Staff_Standard'] == staff]) > 0 else 0
staff_monthly = monthly_summary[
monthly_summary['Staff_Standard'] == staff
].copy()
staff_monthly['Utilization_Percentage'] = calculate_utilization(
staff_monthly, staff_target, 12
)
monthly_utilization.append(staff_monthly)
monthly_summary_with_utilization = pd.concat(monthly_utilization)
return {
'daily_summary': daily_summary,
'weekly_summary': weekly_summary,
'monthly_summary': monthly_summary_with_utilization,
'detailed_data': df
}
def save_processed_data(data_dict, output_folder):
"""Save all processed dataframes to CSV files"""
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for name, df in data_dict.items():
output_file = os.path.join(output_folder, f'{name}.csv')
df.to_csv(output_file, index=False)
print(f"Saved {name} to {output_file}")
# Usage
if __name__ == "__main__":
# Print current directory and files for debugging
current_dir = os.getcwd()
print(f"Current working directory: {current_dir}")
print("\nFiles in current directory:")
for file in os.listdir(current_dir):
print(f"- {file}")
# Define paths
input_file = "TutorPayReport_New 23.24 FY 22.11.24.xlsx"
staff_data = os.path.join("processed_data", "staff_monthly_data.csv")
output_folder = "processed_data"
# Verify files exist
if not os.path.exists(input_file):
print(f"\nERROR: Could not find {input_file}")
print("Please make sure the TutorPayReport file is in the same folder as this notebook")
exit()
if not os.path.exists(staff_data):
print(f"\nERROR: Could not find {staff_data}")
print("Please make sure the processed_data folder contains staff_monthly_data.csv")
exit()
# Process the data
processed_data = process_tutor_report(input_file, staff_data)
# Save the results
save_processed_data(processed_data, output_folder)
print("\nProcessing completed!")
print("\nSummary stats:")
for name, df in processed_data.items():
print(f"{name}: {len(df)} records")Editor is loading...
Leave a Comment