4
user_1718919
plain_text
4 months ago
3.5 kB
3
Indexable
# ------------------------------------------------------------ # CODE BLOCK 2: Processing TutorPayReport_New FY 22.11.24.xlsx # ------------------------------------------------------------ # Load TutorPayReport df_tutorpay = pd.read_excel("TutorPayReport_New FY 22.11.24.xlsx") df_tutorpay.columns = df_tutorpay.columns.str.strip().str.replace(' ', '_') # Normalize STAFFROLE df_tutorpay['STAFFROLE'] = (df_tutorpay['STAFFROLE'] .astype(str) .str.upper() .str.replace(r'\s+', ' ', regex=True) .str.strip()) # Normalize CONTRACTNAME df_tutorpay['CONTRACTNAME'] = (df_tutorpay['CONTRACTNAME'] .astype(str) .str.upper() .str.replace(r'\s+', ' ', regex=True) .str.strip()) # Some known replacements for CONTRACTNAME if needed: # If we know 'LSA- NB' should be 'LSA-NB', let's fix that: df_tutorpay['CONTRACTNAME'] = df_tutorpay['CONTRACTNAME'].str.replace('LSA- NB', 'LSA-NB', regex=False) print("Unique STAFFROLE (normalized):", df_tutorpay['STAFFROLE'].unique()) print("Unique CONTRACTNAME (normalized):", df_tutorpay['CONTRACTNAME'].unique()) # Filter by STAFFROLE and CONTRACTNAME based on what we actually have # After printing unique values, adjust the filters if needed # For now, let's assume we want these three contract names exactly: valid_staffroles = ['TUTOR', 'LEARNING SUPPORT'] valid_contracts = ['LSA-NB', 'SALARIED TUTOR', 'SESSIONAL TUTOR'] df_tutorpay = df_tutorpay[df_tutorpay['STAFFROLE'].isin(valid_staffroles)] print("Shape after STAFFROLE filter:", df_tutorpay.shape) df_tutorpay = df_tutorpay[df_tutorpay['CONTRACTNAME'].isin(valid_contracts)] print("Shape after CONTRACTNAME filter:", df_tutorpay.shape) # Normalize and categorize ACTIVITYTYPE # First ensure ACTIVITYTYPE is string and strip spaces df_tutorpay['ACTIVITYTYPE'] = (df_tutorpay['ACTIVITYTYPE'] .astype(str) .str.strip()) assessment_values = [ 'ACCESS ASSESSMENTS', 'ASSESSMENTS', 'BEAUTY OL ASSESS', 'CFL PA VOLS ASSESS', 'CREATIVE ASSESSMENTS', 'E&M APPRENTICEASSESS', 'ENGLISH FS ASSESS', 'ENGLISH GCSE ASSESS', 'ESOL F2F ASSESS', 'ESOL ONLINE ASSESS', 'HAIR/BARB F2F ASSESS', 'MATHS ASSESSMENTS', 'SWIS ASSESSMENTS' ] # Convert ACTIVITYTYPE to uppercase for comparison df_tutorpay['ACTIVITYTYPE'] = df_tutorpay['ACTIVITYTYPE'].str.upper() def map_activity_type(x): if pd.isna(x): return None x = x.strip() if x in assessment_values: return 'Assessment' elif x == 'COMMUNITY ENGAGEMENT': return 'Community Engagement' elif x == 'TUTORIALS/DROP INS': return 'Tutorials/Drop Ins' else: return None df_tutorpay['Activity_Category'] = df_tutorpay['ACTIVITYTYPE'].apply(map_activity_type) print("Activity_Category value counts:\n", df_tutorpay['Activity_Category'].value_counts(dropna=False)) df_tutorpay = df_tutorpay[df_tutorpay['Activity_Category'].notna()] print("Shape after Activity_Category filter:", df_tutorpay.shape) # If you still get zero rows, print out what ACTIVITYTYPE values exist: if df_tutorpay.shape[0] == 0: print("No rows passed the filters. Check ACTIVITYTYPE unique values again:") print(df_tutorpay['ACTIVITYTYPE'].unique())
Editor is loading...
Leave a Comment