Untitled
unknown
plain_text
a year ago
37 kB
7
Indexable
def process_data(request):
def try_parsing_date(date_str):
"""
Attempts to parse the date string using multiple formats.
Returns the date if parsing is successful; otherwise, returns pd.NaT.
"""
for fmt in ('%d-%m-%Y', '%d/%m/%Y','%Y-%m-%d %H:%M:%S'): # Add or modify formats as needed
try:
return pd.to_datetime(date_str, format=fmt)
except (ValueError, TypeError):
continue
return pd.NaT # Return NaT if all parsing attempts fail
print("entered in process")
selected_values = request.session.get('selectedValues', [])
print("Selected values are",selected_values)
import pandas as pd
global entries1, entries2, entries3, entries4, entries5, entries6,filtered_df
try:
# Load the dataframe from the session
#merged_df = pd.read_json(request.session.get('merged_df', '{}'), orient='split')
# moiz added code for reading data from db start
merged_data = MergedDataModel2.objects.first()
if merged_data and merged_data.file_path:
file_path = merged_data.file_path
# Read the CSV file with the correct encoding
try:
merged_df = pd.read_csv(file_path, encoding='utf-8')
except UnicodeDecodeError:
merged_df = pd.read_csv(file_path, encoding='ISO-8859-1')
print("Dataframe created successfully")
print(merged_df.head())
else:
print("No file path found in the database")
# moiz added code for reading data from db end
if selected_values is not None and selected_values:
uid_col = 'UID'
merged_df['UID'] = merged_df[selected_values].astype(str).agg('-'.join, axis=1)
print("Selected values",selected_values)
print("column is",merged_df[uid_col])
print("In condition")
else:
uid_col = request.POST.get('uid')
print("UID COL IS",uid_col)
date_format = request.POST.get('date_format')
print("Format is",date_format)
total_counts = len(merged_df)
unique_values2 = request.session.get('unique_values', [])
print("***********************************************************")
print("Unique values are",unique_values2)
start_dates = request.POST.getlist('start_date[]')
end_dates = request.POST.getlist('end_date[]')
#convert start and end date to date time
for column_name in start_dates:
if column_name in merged_df.columns:
merged_df[column_name] = merged_df[column_name].apply(lambda x: try_parsing_date(x))
print(f"After conversion, dtype of {column_name}: {merged_df[column_name].dtype}")
print("END DATE COLUMNS BELOW")
# Process each column listed in end_dates
for column_name in end_dates:
if column_name in merged_df.columns:
merged_df[column_name] = merged_df[column_name].apply(lambda x: try_parsing_date(x))
print(f"After conversion, dtype of {column_name}: {merged_df[column_name].dtype}")
group_by_types = request.POST.getlist('group_by_type[]')
# group_by_type_start_date= request.POST.getlist('group_by_type_start_date[]')
# group_by_type_end_date= request.POST.getlist('group_by_type_end_date[]')
print("Start Dates:", start_dates)
print("End Dates:", end_dates)
print("Group By Types:", group_by_types)
print("Counts:", len(unique_values2), len(start_dates), len(end_dates), len(group_by_types))
print("***************************************************************")
# 1. Make a copy of merged_df
# 2. Sort data in ascending on the basis of endorsement_date_col and uid_col
#uid_col = request.POST.get('uid')
print("UID COLUMN 1",uid_col)
original_count = len(merged_df)
null_uids_df = merged_df[merged_df[uid_col].isnull()]
merged_df.dropna(subset=[uid_col], inplace=True)
final_count = len(merged_df)
entries1 = original_count - final_count
print("UIDS Removed due to NUll are: ",entries1)
endorsement_date_col = request.POST.get('endoresement_date')
null_end_df = merged_df[merged_df[endorsement_date_col].isnull()]
null_count_endor = merged_df[endorsement_date_col].isnull().sum()
def translate_format(input_format):
# Improved translation to account for different separators like '-' and '/'
format_translation = {
'yyyy': '%Y', # Replace 'yyyy' first to prevent conflict with 'yy'
'yy': '%y', # 'yy' should be replaced after 'yyyy' has been replaced
'mm': '%m',
'dd': '%d'
}
for key, value in format_translation.items():
input_format = input_format.replace(key, value)
# Detect and handle separators in the input format dynamically
for sep in ['-', '/', '.', ' ']:
if sep in input_format:
return input_format.replace(sep, '-')
return input_format
# Simulate getting the format from a request
received_format = request.POST.get('date_format')
translated_format = translate_format(received_format)
# Convert dates using the translated format, handling errors
try:
original_count = len(merged_df)
print("Original count:", original_count)
# Apply 'try_parsing_date' to each value in the column
merged_df[endorsement_date_col] = merged_df[endorsement_date_col].apply(lambda x: try_parsing_date(x))
# Optionally, you can still drop rows where dates could not be parsed and are NaT
# If you prefer to keep these rows, you can comment out or remove the line below
merged_df.dropna(subset=[endorsement_date_col], inplace=True)
final_count = len(merged_df)
print("Data after Conversion Endorsement Date")
print(merged_df[endorsement_date_col])
print(f"Total entries: {original_count}")
print(f"Total after removal: {final_count}")
print("Check end entries After",merged_df[endorsement_date_col])
except Exception as e: # Catching a more general exception
print(f"There was an issue with the date parsing: {e}")
endor_datatype = merged_df[endorsement_date_col].dtype
print("type of edorsement column ",endor_datatype)
# merged_df[endorsement_date_col] = merged_df[endorsement_date_col].dt.strftime(date_format)
copy_dataframe = merged_df.copy()
print("UID COLUMN 2",uid_col)
copy_dataframe.sort_values(by=[uid_col, endorsement_date_col], ascending=[True, True], inplace=True)
print("UID COLUMN 3",uid_col)
endor_datatype = copy_dataframe[endorsement_date_col].dtype
print(f"The datatype of the '{endorsement_date_col}' column is: {endor_datatype}")
radio_choice = request.POST.get('radio_button_choice', 'Last') # Default to 'Last' if no choice given
selectedPreference = request.POST.get('selectedPreference')
print("GOT THE SELECTED PREFERENCE",selectedPreference)
# Handling duplicates in copy_dataframe
if selectedPreference == "First":
copy_dataframe = copy_dataframe.drop_duplicates(subset=uid_col, keep='first')
else: # If choice is 'Last' or any other unexpected value
copy_dataframe = copy_dataframe.drop_duplicates(subset=uid_col, keep='last')
# 6. Only include uid_col, endorsement_date_col, other columns selected using additional
other_col = request.POST.getlist('other')
custom_selected_values = request.session.get('customSelectedValues', [])
print("***********************************")
print(custom_selected_values)
print("***********************************")
columns_to_display = [uid_col, endorsement_date_col] + custom_selected_values
print("***********************************")
print(columns_to_display)
print("***********************************")
negative_exposures = request.session.get('negative_exposures', [])
# uid_col = request.POST.getlist('uid')[0]
gross_premium_col = request.POST.getlist('gross_premium')[0]
# uid_col = request.POST.get('uid')
gross_premium_col = request.POST.get('gross_premium')
print("actual column of gross",gross_premium_col)
merged_df[gross_premium_col] = pd.to_numeric(merged_df[gross_premium_col], errors='coerce')
merged_df[gross_premium_col] = merged_df[gross_premium_col].fillna(0)
merged_df[gross_premium_col] = merged_df[gross_premium_col].astype(int)
print("*****************Converted grois to numeric= type*******************")
# Create the UID_DATAFRAME
UID_DATAFRAME = merged_df.groupby(uid_col)[gross_premium_col].sum().reset_index()
print("Papu2")
cancellation_col = request.POST.get('cancellation')
other_col = request.POST.getlist('other')
print("names of cols")
print(merged_df.columns)
print("*****************Converted All colums*******************")
# POSITIVE_ID = request.POST.get('POSITIVE_ID')
def normalize_date(date_val):
"""
Attempts to convert the given date_val to a date object.
If date_val is pd.NaT or leads to a conversion error, None is returned.
"""
if pd.isnull(date_val): # Check if date_val is NaT or NaN
return None
try:
# Assuming date_val is already a datetime object, just extract the date part
return date_val.date()
except (ValueError, TypeError, AttributeError):
# Handle cases where conversion to date fails or date_val doesn't have a date() method
return None
start_date_preference = request.POST.get('start_date_preference','last')
first_start_date = 'first_start_date'
# Example usage:
def apply_date_logic(dates, logic):
"""Applies a specified logic to a series of dates."""
if logic == 'min':
return dates.min()
elif logic == 'max':
return dates.max()
elif logic == 'first':
return dates.iloc[0] if not dates.empty else pd.NaT
elif logic == 'last':
return dates.iloc[-1] if not dates.empty else pd.NaT
return pd.NaT
def set_dates(row,group_by_type_start, group_by_type_end):
"""
Assigns start_date and end_date for a policy based on the row's data.
Iterates through unique_values to find a match and assigns normalized dates from the respective columns.
Additionally, applies specific logic to the start and end dates.
"""
column_select = request.POST.get('column_select')
policy_type = row[column_select] # Adjust the column name to your DataFrame's column name
start_date, end_date = pd.NaT, pd.NaT
for index, value in enumerate(unique_values2):
if policy_type == value:
start_col = start_dates[index]
end_col = end_dates[index]
# Assuming the row has multiple date entries in an array-like format under each column
start_date_series = pd.Series(row[start_col]) if start_col in row else pd.Series()
end_date_series = pd.Series(row[end_col]) if end_col in row else pd.Series()
# Apply the specified logic to these series
start_date = apply_date_logic(start_date_series, group_by_type_start[index])
end_date = apply_date_logic(end_date_series, group_by_type_end[index])
#break # assuming one match per row, remove if multiple matches should be considered
return pd.Series([start_date, end_date])
print("Data in merged")
print(merged_df.columns)
print("UID COLUMN IS ",uid_col)
# Sort based on UID and ENDORSEMENT_DATE
merged_df.sort_values(by=[uid_col, endorsement_date_col], ascending=[True, True], inplace=True)
try:
print("Starting date processing")
group_by_type_start = request.POST.getlist('group_by_type_start_date[]')
group_by_type_end = request.POST.getlist('group_by_type_end_date[]')
print("Group by start dates:", group_by_type_start) # Debug print
print("Group by end dates:", group_by_type_end) # Debug print
# Applying set_dates without dropping duplicates
print("Applying date settings...")
merged_df[['POLICY_START_DATE', 'POLICY_END_DATE']] = merged_df.apply(
lambda row: set_dates(row, group_by_type_start, group_by_type_end), axis=1
)
print("Dates applied successfully.")
# drop duplicates to keep only the last entry per UID if necessary
print(f"Dropping duplicates based on {uid_col}...")
updated_merged_df = merged_df.drop_duplicates(subset=uid_col, keep='last')
print("Date processing completed successfully.")
print("Resulting DataFrame head:", updated_merged_df.head()) # Show the first few rows of the updated DataFrame
except Exception as e:
print(f"Error during date processing: {e}")
updated_merged_df = updated_merged_df[updated_merged_df[uid_col].notna()]
# 7. Apply left join on copy_dataframe uid with updated_merged_df uid
final_df = pd.merge(copy_dataframe, updated_merged_df[columns_to_display], on=uid_col, how='left')
duplicated_entries = copy_dataframe[copy_dataframe.duplicated(subset=uid_col, keep=False)]
show_sum_insured = request.POST.get('show_sum_insured')
if show_sum_insured == 'Yes':
SumInsured_DF = merged_df.copy()
print(SumInsured_DF.shape)
sum_insured_column = request.POST.get('column_name')
print(f"Received sum_insured_column from POST request: {sum_insured_column}")
sum_insured_choice = request.POST.get('sum_insured_choice', 'Total')
print(f"Received sum_insured_choice from POST request: {sum_insured_choice}")
if sum_insured_choice == "Incremental":
SumInsured_DF[sum_insured_column] = pd.to_numeric(SumInsured_DF[sum_insured_column], errors='coerce')
SumInsured_DF[sum_insured_column] = SumInsured_DF[sum_insured_column].fillna(0)
SumInsured_DF[sum_insured_column] = SumInsured_DF[sum_insured_column].astype(int)
SumInsured_DF_grouped = SumInsured_DF.groupby(uid_col)[sum_insured_column].sum().reset_index(name=f'{sum_insured_column}_SUM')
sum_insured_sum_column = f"{sum_insured_column}_SUM"
SumInsured_DF = SumInsured_DF_grouped # Assigning grouped df back to SumInsured_DF for consistency
print(SumInsured_DF.shape)
else:
print(sum_insured_column)
# Sort based on ENDORESEMENT_DATE in ascending order
#endorsement_date_col = request.POST.get('endoresement_date')
SumInsured_DF.sort_values(by=endorsement_date_col, ascending=True, inplace=True)
# Get the date_preference from POST data ('first' or 'last')
# Get the date_preference from POST data ('first' or 'last')
date_preference = request.POST.get('sum_insured_timeframe')
print("Value is ",date_preference)
if date_preference not in ['First', 'Last']:
date_preference = 'last' # Default to 'last' if the value is anything else or not provided
# Drop duplicates but keep either the 'first' or 'last' based on date_preference
keep = 'first' if date_preference == 'First' else 'last'
SumInsured_DF = SumInsured_DF.drop_duplicates(subset=uid_col, keep=keep)[[uid_col, sum_insured_column]]
# Renaming the column to reflect whether it's the 'FIRST' or 'LATEST' value
sum_insured_sum_column = f"{sum_insured_column}_{'FIRST' if date_preference == 'First' else 'LATEST'}"
SumInsured_DF.rename(columns={sum_insured_column: sum_insured_sum_column}, inplace=True)
print(SumInsured_DF.shape)
duplicated_entries = SumInsured_DF[SumInsured_DF.duplicated(subset=uid_col, keep=False)]
if not duplicated_entries.empty:
print(f"Duplicated entries found in copy_dataframe based on {uid_col}:")
print(duplicated_entries)
# If you're in a web context, you may replace the print statement with a logger or return a response
return HttpResponse(f"Duplicated entries found in copy_dataframe based on {uid_col}.")
final_df = pd.merge(final_df, SumInsured_DF, on=uid_col, how='left')
if UID_DATAFRAME.shape[0] == final_df.shape[0]:
# Print message to console
print("Both UID_DATAFRAME and updated_merged_df have the same number of entries.")
# Perform a left join to add GROSS_PREMIUM_LC column from UID_DATAFRAME to updated_merged_df
updated_merged_df = pd.merge(updated_merged_df, UID_DATAFRAME, on=uid_col, how='left', suffixes=('', '_SUM'))
print("CHECK 4")
if (updated_merged_df['UID'] == 'P/300/2904/19/000025-3212').any():
subset_df = updated_merged_df.loc[updated_merged_df['UID'] == 'P/300/2904/19/000025-3212', ['POLICY_START_DATE','POLICY_END_DATE']]
print(subset_df)
else:
print("No rows with UID equal to 1 found.")
print("****************************")
print(updated_merged_df.head())
print("***************************8")
# final_df1 = pd.merge(final_df, updated_merged_df, on=uid_col, how='left')
# Columns that you want from `updated_merged_df`. This is just an example; adjust it to your needs
updated_merged_df_cols = list(updated_merged_df.columns.difference(final_df.columns))
updated_merged_df_cols.append(uid_col) # Make sure to include the uid column for merging
final_df = pd.merge(final_df, updated_merged_df[updated_merged_df_cols], on=uid_col, how='left')
original_count = len(final_df)
print("final issssssssssss")
print(final_df.head())
#Data frames having null entries
null_start_date_df = final_df[final_df['POLICY_START_DATE'].isnull()]
null_end_date_df = final_df[final_df['POLICY_START_DATE'].isnull()]
gross_null = final_df[final_df[gross_premium_col+'_SUM'].isnull()]
negative_gross_df = final_df[final_df[gross_premium_col+'_SUM'] < 0]
# Remove entries with a null 'POLICY_START_DATE' from the original DataFrame
final_df = final_df[final_df['POLICY_START_DATE'].notnull()]
final_count = len(final_df)
entries_removed_due_to_null_start_date = original_count - final_count
#for end
final_df = final_df[final_df['POLICY_END_DATE'].notnull()]
final_count = len(final_df)
entries_removed_due_to_null_end_date = original_count - final_count
#invalid_date_df = final_df[final_df['POLICY_START_DATE'] > final_df['POLICY_END_DATE']]
final_df = final_df[final_df['POLICY_START_DATE'] <= final_df['POLICY_END_DATE']]
final_count = len(final_df)
entries6 = original_count - final_count
print("Enrties removed due to Date less than PSD PED ",entries6)
print("UIDS Removed due to NUll are: ",entries1)
print("Enrties removed due to Date conversion of edorsement ",entries2)
print("Enrties removed due to Date conversion of Start Date ",entries3)
print("Enrties removed due to Date conversion of End Date ",entries4)
print("Enrties removed due to Date conversion of Cancellation ",entries5)
print("Enrties removed due to Date LESS THAN PSD PED ",entries6)
print("finallll2",final_df.head())
final_df['POLICY_END_DATE'] = pd.to_datetime(final_df['POLICY_END_DATE']).dt.normalize()
final_df['POLICY_START_DATE'] = pd.to_datetime(final_df['POLICY_START_DATE']).dt.normalize()
print("finallll3",final_df.head())
print("START DATE TYPE",final_df['POLICY_START_DATE'].dtype)
print("END DATE TYPE",final_df['POLICY_END_DATE'].dtype)
print("finallll",final_df.head())
final_df['UID_expsoure'] = ((final_df['POLICY_END_DATE'] - final_df['POLICY_START_DATE']).dt.days + 1) / 365.25
exposure_df = final_df[(final_df['UID_expsoure'] < 0) | (final_df['UID_expsoure'] > 1.002054)]
expsoure_df_size = len(exposure_df)
show_sum_insured = request.POST.get('show_sum_insured')
if show_sum_insured == 'Yes':
PolicyFrame = [uid_col, 'POLICY_START_DATE', 'POLICY_END_DATE', gross_premium_col+'_SUM',sum_insured_sum_column,'UID_expsoure']+custom_selected_values
else:
PolicyFrame = [uid_col, 'POLICY_START_DATE', 'POLICY_END_DATE', gross_premium_col+'_SUM','UID_expsoure']+custom_selected_values
PolicyD = final_df[PolicyFrame]
print("PolicyFrame before",PolicyD.head())
print("****************processsssssssssng rowsssssssssssssSzzzzzz*******************")
new_rows = []
# Iterate over each row in the PolicyD DataFrame
for _, row in PolicyD.iterrows():
# Extract the start and end dates
start_date = row['POLICY_START_DATE']
end_date = row['POLICY_END_DATE']
# Get the range of years
start_year = start_date.year
end_year = end_date.year
# Loop over each year in the range of the policy
for year in range(start_year, end_year + 1):
# Calculate the EffectiveStartDate and EffectiveEndDate for the current year
year_start = pd.Timestamp(year, 1, 1)
year_end = pd.Timestamp(year, 12, 31)
# Skip years not in range
if end_date < year_start or start_date > year_end:
continue
effective_start_date = max(start_date, year_start)
effective_end_date = min(end_date, year_end)
# Calculate Exposure in years
Exposure = (effective_end_date - effective_start_date + pd.Timedelta(days=1)).days / 365.25
earned = (Exposure / row['UID_expsoure']) * row[gross_premium_col+'_SUM']
# Append the new row to the list
new_rows.append({
'UID': row[uid_col],
'EffectiveStartDate': effective_start_date,
'EffectiveEndDate': effective_end_date,
'Period': year,
'Exposure': Exposure,
'EarnedPremium': earned,
})
print("*****************DONE*******************")
# Create a new DataFrame with the split periods
new_df = pd.DataFrame(new_rows)
# Formatting the dates to string if needed (dd-mm-yyyy format)
new_df['EffectiveStartDate'] = new_df['EffectiveStartDate'].dt.strftime('%d-%m-%Y')
new_df['EffectiveEndDate'] = new_df['EffectiveEndDate'].dt.strftime('%d-%m-%Y')
# Display the new DataFrame
print("Desired")
print(new_df.head())
# Merge PolicyD and new_df on 'UID'
# Merge PolicyD and new_df on different UID column names
new_df = new_df.merge(PolicyD, left_on='UID', right_on=uid_col, how='left')
print("******************************new")
print(new_df.head())
new_df['POLICY_START_DATE'] = new_df['POLICY_START_DATE'].dt.strftime('%d-%m-%Y')
new_df['POLICY_END_DATE'] = new_df['POLICY_END_DATE'].dt.strftime('%d-%m-%Y')
# for column in custom_selected_values:
# if column not in new_df:
# new_df[column] = column
request.session['new_df'] = new_df.to_json(orient='split')
# Format the dates in the desired format including the time
final_df['POLICY_END_DATE'] = final_df['POLICY_END_DATE'].dt.strftime('%Y-%m-%d %H:%M:%S')
# Print the result to verify
#print(final_df['POLICY_END_DATE'].iloc[0])
final_df[endorsement_date_col] = final_df[endorsement_date_col].dt.strftime(translated_format)
print(final_df['POLICY_START_DATE'].iloc[0])
final_df['POLICY_START_DATE'] = final_df['POLICY_START_DATE'].dt.strftime(translated_format)
print(final_df['POLICY_START_DATE'].iloc[0])
print(final_df['POLICY_END_DATE'].iloc[0])
final_df['POLICY_END_DATE'] = pd.to_datetime(final_df['POLICY_END_DATE']).dt.strftime('%d-%m-%Y')
print(final_df['POLICY_END_DATE'].iloc[0])
TExposure = final_df['UID_expsoure']
if show_sum_insured == 'Yes':
columns_to_display = [uid_col, 'POLICY_START_DATE', 'POLICY_END_DATE', gross_premium_col+'_SUM',sum_insured_sum_column,'UID_expsoure']+custom_selected_values
else:
columns_to_display = [uid_col, 'POLICY_START_DATE', 'POLICY_END_DATE', gross_premium_col+'_SUM','UID_expsoure']+custom_selected_values
filtered_df = final_df[columns_to_display]
print("check filter",filtered_df.head())
if show_sum_insured == 'Yes':
filtered_df = filtered_df.rename(columns={
uid_col: 'UID',
sum_insured_sum_column: 'SumInsured',
'POLICY_START_DATE': 'PolicyStartDate',
'POLICY_END_DATE': 'PolicyEndDate',
gross_premium_col+'_SUM': 'GrossPremium'
}, inplace=False)
else:
filtered_df = filtered_df.rename(columns={
uid_col: 'UID',
'POLICY_START_DATE': 'PolicyStartDate',
'POLICY_END_DATE': 'PolicyEndDate',
gross_premium_col+'_SUM': 'GrossPremium'
}, inplace=False)
print("Filtered")
print(filtered_df.head())
uids = filtered_df['UID']
if show_sum_insured == 'Yes':
sum_insured = filtered_df['SumInsured']
# else:
# sum_insured = filtered_df['GrossPremium']
policy_start_dates = filtered_df['PolicyStartDate']
policy_end_dates = filtered_df['PolicyEndDate']
gross_premiums = filtered_df['GrossPremium']
UID_expsoure = filtered_df['UID_expsoure']
print(gross_premiums.head())
if (len(gross_premiums.shape) == 2):
gross_premiums = gross_premiums.iloc[0]
print(gross_premiums.head())
print("UID shape:", uids.shape)
print("PolicyStartDate shape:", policy_start_dates.shape)
print("PolicyEndDate shape:", policy_end_dates.shape)
print("GrossPremium shape:", gross_premiums.shape)
print("UID_exposure shape:", UID_expsoure.shape)
if show_sum_insured == 'Yes':
if (len(sum_insured.shape) == 2):
sum_insured = sum_insured.iloc[0]
print("SumInsured shape:", sum_insured.shape)
PolicyDataframe = pd.DataFrame({
'UID': uids,
'SumInsured': sum_insured,
'PolicyStartDate': policy_start_dates,
'PolicyEndDate': policy_end_dates,
'GrossPremium': gross_premiums,
'UID_expsoure':UID_expsoure,
})
else:
PolicyDataframe = pd.DataFrame({
'UID': uids,
'PolicyStartDate': policy_start_dates,
'PolicyEndDate': policy_end_dates,
'GrossPremium': gross_premiums,
'UID_expsoure':UID_expsoure,
})
for column in custom_selected_values:
if column not in PolicyDataframe:
PolicyDataframe[column] = column
print(PolicyDataframe.head())
print("AFter update")
# sum_insured_available = request.POST.get('sum_insured_available') # This will be either "True" or "False"
# if sum_insured_available == "True":
# else:
# # The sum insured is not available, handle this case as needed
if show_sum_insured == 'Yes':
filtered_df = filtered_df.rename(columns={
sum_insured_column+'_SUM':'SumInsured',
}, inplace=False)
filtered_df = filtered_df.rename(columns={
gross_premium_col+'_SUM': 'GrossPremium',
}, inplace=False)
# Save the updated dataframe back to the session
request.session['final_df'] = filtered_df.to_json(orient='split')
# print(endorsement_date_col, start_date_col)
sum = entries1+entries2+entries3+entries4+entries5+entries6
if entries1<=0:
entries1=0
elif entries2<=0:
entries2=0
elif entries3<=0:
entries3=0
elif entries4<=0:
entries4=0
elif entries5<=0:
entries5=0
elif entries6<=0:
entries6=0
elif entries_removed_due_to_null_start_date<=0:
entries_removed_due_to_null_start_date=0
elif entries_removed_due_to_null_end_date<=0:
entries_removed_due_to_null_end_date=0
print("negative gross",negative_gross_df.head())
print("NULL",null_uids_df.head())
print("Size is ",len(filtered_df))
print("AT THE END DF")
print(null_end_df.head())
print("last",new_df.head())
print(expsoure_df_size)
NULL_ENDOR_UIDS_len = len(null_end_df)
NULL_END_UIDS_len= len(null_end_date_df)
NULL_EXPO_UIDS_len= len(exposure_df)
#NULL_INVALID_UIDS_len = len(invalid_date_df)
NULL_NEGATIVE_UIDS_len = len(negative_gross_df)
NULL_PREMIUM_UIDS_len = len(gross_null)
NULL_START_UIDS_len = len(null_start_date_df)
NULL_UIDS_UIDS_len = len(null_uids_df)
request.session['NULL_START_UIDS'] = null_start_date_df.to_json(orient='split')
request.session['NULL_END_UIDS'] = null_end_date_df.to_json(orient='split')
#request.session['Invalid_date'] = invalid_date_df.to_json(orient='split')
request.session['gross_null'] = gross_null.to_json(orient='split')
request.session['negative_gross_df'] = negative_gross_df.to_json(orient='split')
request.session['null_uids_df'] = null_uids_df.to_json(orient='split')
request.session['null_end_df'] = null_end_df.to_json(orient='split')
request.session['exposure_df'] = exposure_df.to_json(orient='split')
results={
'dataframe': filtered_df.head().to_html(classes='dataframe', index=False, escape=False),
'dataframe1': new_df.head().to_html(classes='dataframe', index=False, escape=False),
'NULL_START_UIDS': null_start_date_df.to_html(classes='dataframe',index=False, escape=False),
'NULL_END_UIDS': null_end_date_df.to_html(classes='dataframe',index=False, escape=False),
'show_download_link': True,
'original_count': total_counts,
'removed_count': sum,
'final_count': final_count,
'UID_count': entries1,
'END_DATE_count': entries2,
'START_DATECOUNT':entries3,
'END_DATE_COUNT':entries4,
'CANCELLATION_DATE_COUNT':entries5,
'GREATER_COUNT':entries6,
'START_NULL_COUNT':entries_removed_due_to_null_start_date,
'END_NULL_COUNT':entries_removed_due_to_null_end_date,
#'Invalid_date':invalid_date_df.head().to_html(classes='dataframe', index=False, escape=False),
'gross_null':gross_null.head().to_html(classes='dataframe', index=False, escape=False),
'negative_gross_df':negative_gross_df.head().to_html(classes='dataframe', index=False, escape=False),
'null_uids_df':null_uids_df.head().to_html(classes='dataframe', index=False, escape=False),
'null_end_df':null_end_df.head().to_html(classes='dataframe', index=False, escape=False),
'Null_endorsement_count':null_count_endor,
'exposure_df':exposure_df.head().to_html(classes='dataframe', index=False, escape=False),
'NULL_ENDOR_UIDS_len': NULL_ENDOR_UIDS_len,
'NULL_END_UIDS_len': NULL_END_UIDS_len,
'NULL_EXPO_UIDS_len': NULL_EXPO_UIDS_len,
#'NULL_INVALID_UIDS_len': NULL_INVALID_UIDS_len,
'NULL_NEGATIVE_UIDS_len': NULL_NEGATIVE_UIDS_len,
'NULL_PREMIUM_UIDS_len': NULL_PREMIUM_UIDS_len,
'NULL_START_UIDS_len': NULL_START_UIDS_len,
'NULL_UIDS_UIDS_len': NULL_UIDS_UIDS_len
}
serializable_results = convert_numpy(results)
# Store the serializable results in the session
serializable_results = convert_numpy(results)
# Store the serializable results in the session
store_results_in_session(request, 'premium_results', serializable_results)
# Redirect to the display view
return redirect('display_results')
else:
# Find out which UIDs are causing the discrepancy
diff_uids = set(UID_DATAFRAME[uid_col]) - set(updated_merged_df[uid_col])
print(f"UIDs present in UID_DATAFRAME but not in updated_merged_df: {diff_uids}")
diff_uids = set(updated_merged_df[uid_col]) - set(UID_DATAFRAME[uid_col])
print(f"UIDs present in updated_merged_df but not in UID_DATAFRAME: {diff_uids}")
# Return a message if the entries are not the same
return HttpResponse("Dataframes don't have the same number of entries.")
except KeyError as e:
print(e)
return HttpResponse(f"Error: Column {e} not found in merged dataframe.")Editor is loading...
Leave a Comment