4
user_1718919
plain_text
a year ago
3.5 kB
4
Indexable
# ------------------------------------------------------------
# CODE BLOCK 2: Processing TutorPayReport_New FY 22.11.24.xlsx
# ------------------------------------------------------------
# Load TutorPayReport
df_tutorpay = pd.read_excel("TutorPayReport_New FY 22.11.24.xlsx")
df_tutorpay.columns = df_tutorpay.columns.str.strip().str.replace(' ', '_')
# Normalize STAFFROLE
df_tutorpay['STAFFROLE'] = (df_tutorpay['STAFFROLE']
.astype(str)
.str.upper()
.str.replace(r'\s+', ' ', regex=True)
.str.strip())
# Normalize CONTRACTNAME
df_tutorpay['CONTRACTNAME'] = (df_tutorpay['CONTRACTNAME']
.astype(str)
.str.upper()
.str.replace(r'\s+', ' ', regex=True)
.str.strip())
# Some known replacements for CONTRACTNAME if needed:
# If we know 'LSA- NB' should be 'LSA-NB', let's fix that:
df_tutorpay['CONTRACTNAME'] = df_tutorpay['CONTRACTNAME'].str.replace('LSA- NB', 'LSA-NB', regex=False)
print("Unique STAFFROLE (normalized):", df_tutorpay['STAFFROLE'].unique())
print("Unique CONTRACTNAME (normalized):", df_tutorpay['CONTRACTNAME'].unique())
# Filter by STAFFROLE and CONTRACTNAME based on what we actually have
# After printing unique values, adjust the filters if needed
# For now, let's assume we want these three contract names exactly:
valid_staffroles = ['TUTOR', 'LEARNING SUPPORT']
valid_contracts = ['LSA-NB', 'SALARIED TUTOR', 'SESSIONAL TUTOR']
df_tutorpay = df_tutorpay[df_tutorpay['STAFFROLE'].isin(valid_staffroles)]
print("Shape after STAFFROLE filter:", df_tutorpay.shape)
df_tutorpay = df_tutorpay[df_tutorpay['CONTRACTNAME'].isin(valid_contracts)]
print("Shape after CONTRACTNAME filter:", df_tutorpay.shape)
# Normalize and categorize ACTIVITYTYPE
# First ensure ACTIVITYTYPE is string and strip spaces
df_tutorpay['ACTIVITYTYPE'] = (df_tutorpay['ACTIVITYTYPE']
.astype(str)
.str.strip())
assessment_values = [
'ACCESS ASSESSMENTS', 'ASSESSMENTS', 'BEAUTY OL ASSESS', 'CFL PA VOLS ASSESS',
'CREATIVE ASSESSMENTS', 'E&M APPRENTICEASSESS', 'ENGLISH FS ASSESS',
'ENGLISH GCSE ASSESS', 'ESOL F2F ASSESS', 'ESOL ONLINE ASSESS',
'HAIR/BARB F2F ASSESS', 'MATHS ASSESSMENTS', 'SWIS ASSESSMENTS'
]
# Convert ACTIVITYTYPE to uppercase for comparison
df_tutorpay['ACTIVITYTYPE'] = df_tutorpay['ACTIVITYTYPE'].str.upper()
def map_activity_type(x):
if pd.isna(x):
return None
x = x.strip()
if x in assessment_values:
return 'Assessment'
elif x == 'COMMUNITY ENGAGEMENT':
return 'Community Engagement'
elif x == 'TUTORIALS/DROP INS':
return 'Tutorials/Drop Ins'
else:
return None
df_tutorpay['Activity_Category'] = df_tutorpay['ACTIVITYTYPE'].apply(map_activity_type)
print("Activity_Category value counts:\n", df_tutorpay['Activity_Category'].value_counts(dropna=False))
df_tutorpay = df_tutorpay[df_tutorpay['Activity_Category'].notna()]
print("Shape after Activity_Category filter:", df_tutorpay.shape)
# If you still get zero rows, print out what ACTIVITYTYPE values exist:
if df_tutorpay.shape[0] == 0:
print("No rows passed the filters. Check ACTIVITYTYPE unique values again:")
print(df_tutorpay['ACTIVITYTYPE'].unique())
Editor is loading...
Leave a Comment