C-2
unknown
plain_text
a year ago
10 kB
4
Indexable
def track_contract_changes(df: pd.DataFrame) -> pd.DataFrame:
"""
Track staff contract changes with full history, including overlapping contracts
"""
logging.info("Starting contract change tracking...")
changes = []
for name in df['Staff_Standard'].unique():
logging.debug(f"Processing contracts for: {name}")
staff_data = df[df['Staff_Standard'] == name].sort_values('Report_Date')
contract_periods = []
current_contracts = set() # Track currently active contracts
for idx, row in staff_data.iterrows():
current_contract = (row['Position Name'], row['Contract_Type'], row['Target_Hours'])
# Check for changes in existing contracts
if current_contract not in current_contracts:
# New contract type detected
contract_periods.append({
'Staff_Name': name,
'Start_Date': row['Report_Date'],
'End_Date': staff_data['Report_Date'].max(),
'Position': row['Position Name'],
'Contract_Type': row['Contract_Type'],
'Target_Hours': row['Target_Hours'],
'Change_Type': 'New Contract' if idx > staff_data.index[0] else 'Initial Contract'
})
current_contracts.add(current_contract)
# Check for ended contracts
ended_contracts = set()
for contract in current_contracts:
pos, type_, target = contract
if not any((staff_data.loc[idx:, 'Position Name'] == pos) &
(staff_data.loc[idx:, 'Contract_Type'] == type_) &
(staff_data.loc[idx:, 'Target_Hours'] == target)):
# Contract has ended
ended_contracts.add(contract)
contract_periods.append({
'Staff_Name': name,
'Start_Date': row['Report_Date'],
'End_Date': row['Report_Date'],
'Position': pos,
'Contract_Type': type_,
'Target_Hours': target,
'Change_Type': 'Contract End'
})
current_contracts -= ended_contracts
# Process contract periods to identify overlaps
if contract_periods:
processed_periods = identify_overlapping_contracts(contract_periods)
changes.extend(processed_periods)
changes_df = consolidate_contract_changes(pd.DataFrame(changes))
# Log summary statistics
total_staff = len(changes_df['Staff_Name'].unique())
multi_contract_staff = len(changes_df[changes_df['Has_Multiple_Contracts']]['Staff_Name'].unique())
logging.info(f"Processed contract changes for {total_staff} staff members")
logging.info(f"Found {multi_contract_staff} staff with multiple contracts")
if multi_contract_staff > 0:
logging.info("Staff with multiple contracts:")
for staff in changes_df[changes_df['Has_Multiple_Contracts']]['Staff_Name'].unique():
staff_records = changes_df[
(changes_df['Staff_Name'] == staff) &
(changes_df['Has_Multiple_Contracts'])
]
logging.info(f" - {staff}:")
for _, record in staff_records.iterrows():
logging.info(f" * {record['Position']} ({record['Contract_Type']}) - "
f"Target Hours: {record['Target_Hours']}")
return changes_df
def process_monthly_reports(folder_path: str) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
"""
Process all monthly reports and track staff/contract changes
"""
logging.info(f"Processing Monthly Reports from: {folder_path}")
all_data = []
# Get list of files and sort chronologically
files = [f for f in os.listdir(folder_path) if f.startswith('ACL Monthly Report')]
files.sort()
required_columns = [
'Assignment Number', 'First Name', 'Known As', 'Last Name',
'Position Name', 'Working Hours', 'Full-Time Equivalent',
'Line Manager Name'
]
for file in files:
logging.info(f"Processing file: {file}")
# Read file with error handling
df, error = safe_read_excel(os.path.join(folder_path, file), skiprows=2)
if error:
logging.error(f"Skipping file {file} due to error: {error}")
continue
# Validate required columns
try:
validate_processed_data(df, required_columns, name=file)
except ValueError as e:
logging.error(f"Validation failed for {file}: {str(e)}")
continue
report_date = extract_month_year(file)
df['Report_Date'] = report_date
# Filter for Tutors and Learning Support Assistant
mask = df['Position Name'].isin(['Learning Support Assistant', 'Tutor'])
df = df[mask].copy()
# Calculate Target Hours - only for salaried tutors
def calculate_target_hours(row):
try:
working_hours = float(row['Working Hours'])
fte = float(row['Full-Time Equivalent'])
if row['Position Name'] == 'Tutor' and working_hours > 3:
return fte * 840
except (ValueError, TypeError) as e:
logging.warning(f"Error calculating target hours: {str(e)}")
return None
df['Target_Hours'] = df.apply(calculate_target_hours, axis=1)
# Determine Contract Type
df['Contract_Type'] = df['Working Hours'].apply(
lambda x: 'Sessional' if float(x) <= 3 else 'Salaried'
)
# Process Line Manager names
def split_manager_name(name):
if pd.isna(name):
return pd.NA, pd.NA, pd.NA
parts = name.split(',')
if len(parts) != 2:
logging.warning(f"Unexpected manager name format: {name}")
return name, name, name
first_known = parts[0].strip()
last = parts[1].strip()
if ' ' in first_known:
first, known = first_known.split(' ', 1)
else:
first = known = first_known
return first, known, last
# Create manager name formats
df[['Manager_First', 'Manager_Known', 'Manager_Last']] = df['Line Manager Name'].apply(
split_manager_name).apply(pd.Series)
df['Manager_Standard'] = df.apply(
lambda x: f"{x['Manager_First']} {x['Manager_Last']}"
if pd.notna(x['Manager_First']) else pd.NA, axis=1)
df['Manager_Known_As'] = df.apply(
lambda x: f"{x['Manager_Known']}, {x['Manager_Last']}"
if pd.notna(x['Manager_Known']) else pd.NA, axis=1)
# Create staff name formats
df['Staff_Standard'] = df.apply(
lambda x: f"{x['First Name']} {x['Last Name']}", axis=1)
df['Staff_Known_As'] = df.apply(
lambda x: f"{x['Known As']}, {x['Last Name']}", axis=1)
# Clean all name columns
name_columns = [
'Staff_Standard', 'Staff_Known_As',
'Manager_Standard', 'Manager_Known_As'
]
df = standardize_names(df, name_columns)
columns_to_keep = [
'Assignment Number', 'Staff_Standard', 'Staff_Known_As',
'Position Name', 'Contract_Type', 'Target_Hours',
'Manager_Standard', 'Manager_Known_As', 'Report_Date'
]
all_data.append(df[columns_to_keep])
logging.info(f"Processed {len(df)} records from {file}")
if not all_data:
raise ValueError("No data was successfully processed from monthly reports")
# Combine all monthly data
combined_df = pd.concat(all_data, ignore_index=True)
combined_df = combined_df.sort_values(['Report_Date', 'Staff_Standard'])
# Track contract history with enhanced tracking
contract_history = track_contract_changes(combined_df)
# Create staff summary with contract history
staff_summary = []
for name in combined_df['Staff_Standard'].unique():
staff_data = combined_df[combined_df['Staff_Standard'] == name].copy()
contract_data = contract_history[contract_history['Staff_Name'] == name]
latest_contracts = contract_data[
contract_data['End_Date'] == staff_data['Report_Date'].max()
]
staff_summary.append({
'Staff_Name': name,
'First_Appearance': staff_data['Report_Date'].min(),
'Last_Appearance': staff_data['Report_Date'].max(),
'Current_Position': latest_contracts.iloc[0]['Position'],
'Current_Contract': latest_contracts.iloc[0]['Contract_Type'],
'Current_Target': latest_contracts.iloc[0]['Target_Hours'],
'Has_Multiple_Contracts': any(latest_contracts['Has_Multiple_Contracts']),
'Number_Of_Changes': len(contract_data),
'Contract_History': contract_data.to_dict('records')
})
staff_summary_df = pd.DataFrame(staff_summary)
# Final validation
validate_processed_data(
combined_df,
['Staff_Standard', 'Position Name', 'Contract_Type', 'Report_Date'],
'final_staff_data'
)
logging.info("Monthly reports processing completed!")
logging.info(f"Processed data for {len(staff_summary_df)} staff members")
logging.info(f"Generated {len(contract_history)} contract records")
return combined_df, staff_summary_df, contract_historyEditor is loading...
Leave a Comment