Untitled
unknown
python
10 months ago
3.8 kB
6
Indexable
import pandas as pd
# Load the Excel file
file_path = '/mnt/data/nearu-customair-import.xlsx'
data = pd.read_excel(file_path, sheet_name='Sheet1')
# Add a unique identifier for each customer
data['Customer Identifier'] = (
data['Customer Name'] + " | " +
data['Location Street'] + " | " +
data['Location Zip'].astype(str) + " | " +
data['Installed On'].astype(str)
)
# Extract model prefixes
data['Model_Prefix'] = data['Model'].fillna('').str[:3]
# Prepare variables
merged_rows = []
unique_id_start = 10000
used_units = set()
# Group by customer
customer_groups = data.groupby('Customer Identifier')
# Process each customer group
for customer_id, customer_data in customer_groups:
outdoor_units = customer_data[customer_data['Unit Type'] == 'Outdoor Unit']
other_units = customer_data[customer_data['Unit Type'] != 'Outdoor Unit']
for _, outdoor_unit in outdoor_units.iterrows():
matched_prefixes = set()
merged_row = {
'Unique ID': unique_id_start,
'Merged Units': str(outdoor_unit['unit_number']),
'Installed On': outdoor_unit['Installed On'],
'Customer Name': outdoor_unit['Customer Name'],
'Location Street': outdoor_unit['Location Street'],
'Location Zip': outdoor_unit['Location Zip'],
'unit1-model': outdoor_unit['Model'],
'unit1-serial': outdoor_unit['Serial Number']
}
for unit in other_units.itertuples():
if unit.unit_number not in used_units and unit.Model_Prefix not in matched_prefixes:
index = len(matched_prefixes) + 2
merged_row[f'unit{index}-model'] = unit.Model
merged_row[f'unit{index}-serial'] = unit._4
merged_row['Merged Units'] += f", {unit.unit_number}"
matched_prefixes.add(unit.Model_Prefix)
used_units.add(unit.unit_number)
if len(matched_prefixes) >= 3:
break
merged_rows.append(merged_row)
unique_id_start += 1
# Create DataFrame from merged rows
transformed_data = pd.DataFrame(merged_rows)
# Save merged data
merged_output_path = '/mnt/data/final_corrected_multimatch_customer_data.csv'
transformed_data.to_csv(merged_output_path, index=False)
# Calculate summary
all_unit_numbers = set(data['unit_number'])
merged_unit_numbers = set()
for row in merged_rows:
merged_unit_numbers.update(map(int, row['Merged Units'].split(", ")))
unmatched_unit_numbers = all_unit_numbers - merged_unit_numbers
# Prepare unmatched data
unmatched_df = pd.DataFrame({'Unmatched Unit Numbers': sorted(unmatched_unit_numbers)})
# Count rows merged by year
transformed_data['Installed Year'] = pd.to_datetime(transformed_data['Installed On']).dt.year
yearly_counts = transformed_data['Installed Year'].value_counts().reset_index()
yearly_counts.columns = ['Year', 'Count']
yearly_counts = yearly_counts.sort_values('Year')
# Prepare summary
summary_data = {
"Total Units (unit_number)": [len(all_unit_numbers)],
"Total Rows Merged": [len(merged_rows)],
"Total Rows Not Merged": [len(unmatched_unit_numbers)]
}
summary_df = pd.DataFrame(summary_data)
# Save summary to Excel
summary_output_path = '/mnt/data/updated_summary_report.xlsx'
with pd.ExcelWriter(summary_output_path, engine='xlsxwriter') as writer:
summary_df.to_excel(writer, sheet_name='Summary', index=False)
unmatched_df.to_excel(writer, sheet_name='Unmatched Units', index=False)
yearly_counts.to_excel(writer, sheet_name='Yearly Counts', index=False)
print(f"Merged data saved at: {merged_output_path}")
print(f"Summary report saved at: {summary_output_path}")
Editor is loading...
Leave a Comment