Untitled
unknown
plain_text
24 days ago
37 kB
1
Indexable
Never
def process_data(request): def try_parsing_date(date_str): """ Attempts to parse the date string using multiple formats. Returns the date if parsing is successful; otherwise, returns pd.NaT. """ for fmt in ('%d-%m-%Y', '%d/%m/%Y','%Y-%m-%d %H:%M:%S'): # Add or modify formats as needed try: return pd.to_datetime(date_str, format=fmt) except (ValueError, TypeError): continue return pd.NaT # Return NaT if all parsing attempts fail print("entered in process") selected_values = request.session.get('selectedValues', []) print("Selected values are",selected_values) import pandas as pd global entries1, entries2, entries3, entries4, entries5, entries6,filtered_df try: # Load the dataframe from the session #merged_df = pd.read_json(request.session.get('merged_df', '{}'), orient='split') # moiz added code for reading data from db start merged_data = MergedDataModel2.objects.first() if merged_data and merged_data.file_path: file_path = merged_data.file_path # Read the CSV file with the correct encoding try: merged_df = pd.read_csv(file_path, encoding='utf-8') except UnicodeDecodeError: merged_df = pd.read_csv(file_path, encoding='ISO-8859-1') print("Dataframe created successfully") print(merged_df.head()) else: print("No file path found in the database") # moiz added code for reading data from db end if selected_values is not None and selected_values: uid_col = 'UID' merged_df['UID'] = merged_df[selected_values].astype(str).agg('-'.join, axis=1) print("Selected values",selected_values) print("column is",merged_df[uid_col]) print("In condition") else: uid_col = request.POST.get('uid') print("UID COL IS",uid_col) date_format = request.POST.get('date_format') print("Format is",date_format) total_counts = len(merged_df) unique_values2 = request.session.get('unique_values', []) print("***********************************************************") print("Unique values are",unique_values2) start_dates = request.POST.getlist('start_date[]') end_dates = request.POST.getlist('end_date[]') #convert start and end date to date time for column_name in start_dates: if column_name in merged_df.columns: merged_df[column_name] = merged_df[column_name].apply(lambda x: try_parsing_date(x)) print(f"After conversion, dtype of {column_name}: {merged_df[column_name].dtype}") print("END DATE COLUMNS BELOW") # Process each column listed in end_dates for column_name in end_dates: if column_name in merged_df.columns: merged_df[column_name] = merged_df[column_name].apply(lambda x: try_parsing_date(x)) print(f"After conversion, dtype of {column_name}: {merged_df[column_name].dtype}") group_by_types = request.POST.getlist('group_by_type[]') # group_by_type_start_date= request.POST.getlist('group_by_type_start_date[]') # group_by_type_end_date= request.POST.getlist('group_by_type_end_date[]') print("Start Dates:", start_dates) print("End Dates:", end_dates) print("Group By Types:", group_by_types) print("Counts:", len(unique_values2), len(start_dates), len(end_dates), len(group_by_types)) print("***************************************************************") # 1. Make a copy of merged_df # 2. Sort data in ascending on the basis of endorsement_date_col and uid_col #uid_col = request.POST.get('uid') print("UID COLUMN 1",uid_col) original_count = len(merged_df) null_uids_df = merged_df[merged_df[uid_col].isnull()] merged_df.dropna(subset=[uid_col], inplace=True) final_count = len(merged_df) entries1 = original_count - final_count print("UIDS Removed due to NUll are: ",entries1) endorsement_date_col = request.POST.get('endoresement_date') null_end_df = merged_df[merged_df[endorsement_date_col].isnull()] null_count_endor = merged_df[endorsement_date_col].isnull().sum() def translate_format(input_format): # Improved translation to account for different separators like '-' and '/' format_translation = { 'yyyy': '%Y', # Replace 'yyyy' first to prevent conflict with 'yy' 'yy': '%y', # 'yy' should be replaced after 'yyyy' has been replaced 'mm': '%m', 'dd': '%d' } for key, value in format_translation.items(): input_format = input_format.replace(key, value) # Detect and handle separators in the input format dynamically for sep in ['-', '/', '.', ' ']: if sep in input_format: return input_format.replace(sep, '-') return input_format # Simulate getting the format from a request received_format = request.POST.get('date_format') translated_format = translate_format(received_format) # Convert dates using the translated format, handling errors try: original_count = len(merged_df) print("Original count:", original_count) # Apply 'try_parsing_date' to each value in the column merged_df[endorsement_date_col] = merged_df[endorsement_date_col].apply(lambda x: try_parsing_date(x)) # Optionally, you can still drop rows where dates could not be parsed and are NaT # If you prefer to keep these rows, you can comment out or remove the line below merged_df.dropna(subset=[endorsement_date_col], inplace=True) final_count = len(merged_df) print("Data after Conversion Endorsement Date") print(merged_df[endorsement_date_col]) print(f"Total entries: {original_count}") print(f"Total after removal: {final_count}") print("Check end entries After",merged_df[endorsement_date_col]) except Exception as e: # Catching a more general exception print(f"There was an issue with the date parsing: {e}") endor_datatype = merged_df[endorsement_date_col].dtype print("type of edorsement column ",endor_datatype) # merged_df[endorsement_date_col] = merged_df[endorsement_date_col].dt.strftime(date_format) copy_dataframe = merged_df.copy() print("UID COLUMN 2",uid_col) copy_dataframe.sort_values(by=[uid_col, endorsement_date_col], ascending=[True, True], inplace=True) print("UID COLUMN 3",uid_col) endor_datatype = copy_dataframe[endorsement_date_col].dtype print(f"The datatype of the '{endorsement_date_col}' column is: {endor_datatype}") radio_choice = request.POST.get('radio_button_choice', 'Last') # Default to 'Last' if no choice given selectedPreference = request.POST.get('selectedPreference') print("GOT THE SELECTED PREFERENCE",selectedPreference) # Handling duplicates in copy_dataframe if selectedPreference == "First": copy_dataframe = copy_dataframe.drop_duplicates(subset=uid_col, keep='first') else: # If choice is 'Last' or any other unexpected value copy_dataframe = copy_dataframe.drop_duplicates(subset=uid_col, keep='last') # 6. Only include uid_col, endorsement_date_col, other columns selected using additional other_col = request.POST.getlist('other') custom_selected_values = request.session.get('customSelectedValues', []) print("***********************************") print(custom_selected_values) print("***********************************") columns_to_display = [uid_col, endorsement_date_col] + custom_selected_values print("***********************************") print(columns_to_display) print("***********************************") negative_exposures = request.session.get('negative_exposures', []) # uid_col = request.POST.getlist('uid')[0] gross_premium_col = request.POST.getlist('gross_premium')[0] # uid_col = request.POST.get('uid') gross_premium_col = request.POST.get('gross_premium') print("actual column of gross",gross_premium_col) merged_df[gross_premium_col] = pd.to_numeric(merged_df[gross_premium_col], errors='coerce') merged_df[gross_premium_col] = merged_df[gross_premium_col].fillna(0) merged_df[gross_premium_col] = merged_df[gross_premium_col].astype(int) print("*****************Converted grois to numeric= type*******************") # Create the UID_DATAFRAME UID_DATAFRAME = merged_df.groupby(uid_col)[gross_premium_col].sum().reset_index() print("Papu2") cancellation_col = request.POST.get('cancellation') other_col = request.POST.getlist('other') print("names of cols") print(merged_df.columns) print("*****************Converted All colums*******************") # POSITIVE_ID = request.POST.get('POSITIVE_ID') def normalize_date(date_val): """ Attempts to convert the given date_val to a date object. If date_val is pd.NaT or leads to a conversion error, None is returned. """ if pd.isnull(date_val): # Check if date_val is NaT or NaN return None try: # Assuming date_val is already a datetime object, just extract the date part return date_val.date() except (ValueError, TypeError, AttributeError): # Handle cases where conversion to date fails or date_val doesn't have a date() method return None start_date_preference = request.POST.get('start_date_preference','last') first_start_date = 'first_start_date' # Example usage: def apply_date_logic(dates, logic): """Applies a specified logic to a series of dates.""" if logic == 'min': return dates.min() elif logic == 'max': return dates.max() elif logic == 'first': return dates.iloc[0] if not dates.empty else pd.NaT elif logic == 'last': return dates.iloc[-1] if not dates.empty else pd.NaT return pd.NaT def set_dates(row,group_by_type_start, group_by_type_end): """ Assigns start_date and end_date for a policy based on the row's data. Iterates through unique_values to find a match and assigns normalized dates from the respective columns. Additionally, applies specific logic to the start and end dates. """ column_select = request.POST.get('column_select') policy_type = row[column_select] # Adjust the column name to your DataFrame's column name start_date, end_date = pd.NaT, pd.NaT for index, value in enumerate(unique_values2): if policy_type == value: start_col = start_dates[index] end_col = end_dates[index] # Assuming the row has multiple date entries in an array-like format under each column start_date_series = pd.Series(row[start_col]) if start_col in row else pd.Series() end_date_series = pd.Series(row[end_col]) if end_col in row else pd.Series() # Apply the specified logic to these series start_date = apply_date_logic(start_date_series, group_by_type_start[index]) end_date = apply_date_logic(end_date_series, group_by_type_end[index]) #break # assuming one match per row, remove if multiple matches should be considered return pd.Series([start_date, end_date]) print("Data in merged") print(merged_df.columns) print("UID COLUMN IS ",uid_col) # Sort based on UID and ENDORSEMENT_DATE merged_df.sort_values(by=[uid_col, endorsement_date_col], ascending=[True, True], inplace=True) try: print("Starting date processing") group_by_type_start = request.POST.getlist('group_by_type_start_date[]') group_by_type_end = request.POST.getlist('group_by_type_end_date[]') print("Group by start dates:", group_by_type_start) # Debug print print("Group by end dates:", group_by_type_end) # Debug print # Applying set_dates without dropping duplicates print("Applying date settings...") merged_df[['POLICY_START_DATE', 'POLICY_END_DATE']] = merged_df.apply( lambda row: set_dates(row, group_by_type_start, group_by_type_end), axis=1 ) print("Dates applied successfully.") # drop duplicates to keep only the last entry per UID if necessary print(f"Dropping duplicates based on {uid_col}...") updated_merged_df = merged_df.drop_duplicates(subset=uid_col, keep='last') print("Date processing completed successfully.") print("Resulting DataFrame head:", updated_merged_df.head()) # Show the first few rows of the updated DataFrame except Exception as e: print(f"Error during date processing: {e}") updated_merged_df = updated_merged_df[updated_merged_df[uid_col].notna()] # 7. Apply left join on copy_dataframe uid with updated_merged_df uid final_df = pd.merge(copy_dataframe, updated_merged_df[columns_to_display], on=uid_col, how='left') duplicated_entries = copy_dataframe[copy_dataframe.duplicated(subset=uid_col, keep=False)] show_sum_insured = request.POST.get('show_sum_insured') if show_sum_insured == 'Yes': SumInsured_DF = merged_df.copy() print(SumInsured_DF.shape) sum_insured_column = request.POST.get('column_name') print(f"Received sum_insured_column from POST request: {sum_insured_column}") sum_insured_choice = request.POST.get('sum_insured_choice', 'Total') print(f"Received sum_insured_choice from POST request: {sum_insured_choice}") if sum_insured_choice == "Incremental": SumInsured_DF[sum_insured_column] = pd.to_numeric(SumInsured_DF[sum_insured_column], errors='coerce') SumInsured_DF[sum_insured_column] = SumInsured_DF[sum_insured_column].fillna(0) SumInsured_DF[sum_insured_column] = SumInsured_DF[sum_insured_column].astype(int) SumInsured_DF_grouped = SumInsured_DF.groupby(uid_col)[sum_insured_column].sum().reset_index(name=f'{sum_insured_column}_SUM') sum_insured_sum_column = f"{sum_insured_column}_SUM" SumInsured_DF = SumInsured_DF_grouped # Assigning grouped df back to SumInsured_DF for consistency print(SumInsured_DF.shape) else: print(sum_insured_column) # Sort based on ENDORESEMENT_DATE in ascending order #endorsement_date_col = request.POST.get('endoresement_date') SumInsured_DF.sort_values(by=endorsement_date_col, ascending=True, inplace=True) # Get the date_preference from POST data ('first' or 'last') # Get the date_preference from POST data ('first' or 'last') date_preference = request.POST.get('sum_insured_timeframe') print("Value is ",date_preference) if date_preference not in ['First', 'Last']: date_preference = 'last' # Default to 'last' if the value is anything else or not provided # Drop duplicates but keep either the 'first' or 'last' based on date_preference keep = 'first' if date_preference == 'First' else 'last' SumInsured_DF = SumInsured_DF.drop_duplicates(subset=uid_col, keep=keep)[[uid_col, sum_insured_column]] # Renaming the column to reflect whether it's the 'FIRST' or 'LATEST' value sum_insured_sum_column = f"{sum_insured_column}_{'FIRST' if date_preference == 'First' else 'LATEST'}" SumInsured_DF.rename(columns={sum_insured_column: sum_insured_sum_column}, inplace=True) print(SumInsured_DF.shape) duplicated_entries = SumInsured_DF[SumInsured_DF.duplicated(subset=uid_col, keep=False)] if not duplicated_entries.empty: print(f"Duplicated entries found in copy_dataframe based on {uid_col}:") print(duplicated_entries) # If you're in a web context, you may replace the print statement with a logger or return a response return HttpResponse(f"Duplicated entries found in copy_dataframe based on {uid_col}.") final_df = pd.merge(final_df, SumInsured_DF, on=uid_col, how='left') if UID_DATAFRAME.shape[0] == final_df.shape[0]: # Print message to console print("Both UID_DATAFRAME and updated_merged_df have the same number of entries.") # Perform a left join to add GROSS_PREMIUM_LC column from UID_DATAFRAME to updated_merged_df updated_merged_df = pd.merge(updated_merged_df, UID_DATAFRAME, on=uid_col, how='left', suffixes=('', '_SUM')) print("CHECK 4") if (updated_merged_df['UID'] == 'P/300/2904/19/000025-3212').any(): subset_df = updated_merged_df.loc[updated_merged_df['UID'] == 'P/300/2904/19/000025-3212', ['POLICY_START_DATE','POLICY_END_DATE']] print(subset_df) else: print("No rows with UID equal to 1 found.") print("****************************") print(updated_merged_df.head()) print("***************************8") # final_df1 = pd.merge(final_df, updated_merged_df, on=uid_col, how='left') # Columns that you want from `updated_merged_df`. This is just an example; adjust it to your needs updated_merged_df_cols = list(updated_merged_df.columns.difference(final_df.columns)) updated_merged_df_cols.append(uid_col) # Make sure to include the uid column for merging final_df = pd.merge(final_df, updated_merged_df[updated_merged_df_cols], on=uid_col, how='left') original_count = len(final_df) print("final issssssssssss") print(final_df.head()) #Data frames having null entries null_start_date_df = final_df[final_df['POLICY_START_DATE'].isnull()] null_end_date_df = final_df[final_df['POLICY_START_DATE'].isnull()] gross_null = final_df[final_df[gross_premium_col+'_SUM'].isnull()] negative_gross_df = final_df[final_df[gross_premium_col+'_SUM'] < 0] # Remove entries with a null 'POLICY_START_DATE' from the original DataFrame final_df = final_df[final_df['POLICY_START_DATE'].notnull()] final_count = len(final_df) entries_removed_due_to_null_start_date = original_count - final_count #for end final_df = final_df[final_df['POLICY_END_DATE'].notnull()] final_count = len(final_df) entries_removed_due_to_null_end_date = original_count - final_count #invalid_date_df = final_df[final_df['POLICY_START_DATE'] > final_df['POLICY_END_DATE']] final_df = final_df[final_df['POLICY_START_DATE'] <= final_df['POLICY_END_DATE']] final_count = len(final_df) entries6 = original_count - final_count print("Enrties removed due to Date less than PSD PED ",entries6) print("UIDS Removed due to NUll are: ",entries1) print("Enrties removed due to Date conversion of edorsement ",entries2) print("Enrties removed due to Date conversion of Start Date ",entries3) print("Enrties removed due to Date conversion of End Date ",entries4) print("Enrties removed due to Date conversion of Cancellation ",entries5) print("Enrties removed due to Date LESS THAN PSD PED ",entries6) print("finallll2",final_df.head()) final_df['POLICY_END_DATE'] = pd.to_datetime(final_df['POLICY_END_DATE']).dt.normalize() final_df['POLICY_START_DATE'] = pd.to_datetime(final_df['POLICY_START_DATE']).dt.normalize() print("finallll3",final_df.head()) print("START DATE TYPE",final_df['POLICY_START_DATE'].dtype) print("END DATE TYPE",final_df['POLICY_END_DATE'].dtype) print("finallll",final_df.head()) final_df['UID_expsoure'] = ((final_df['POLICY_END_DATE'] - final_df['POLICY_START_DATE']).dt.days + 1) / 365.25 exposure_df = final_df[(final_df['UID_expsoure'] < 0) | (final_df['UID_expsoure'] > 1.002054)] expsoure_df_size = len(exposure_df) show_sum_insured = request.POST.get('show_sum_insured') if show_sum_insured == 'Yes': PolicyFrame = [uid_col, 'POLICY_START_DATE', 'POLICY_END_DATE', gross_premium_col+'_SUM',sum_insured_sum_column,'UID_expsoure']+custom_selected_values else: PolicyFrame = [uid_col, 'POLICY_START_DATE', 'POLICY_END_DATE', gross_premium_col+'_SUM','UID_expsoure']+custom_selected_values PolicyD = final_df[PolicyFrame] print("PolicyFrame before",PolicyD.head()) print("****************processsssssssssng rowsssssssssssssSzzzzzz*******************") new_rows = [] # Iterate over each row in the PolicyD DataFrame for _, row in PolicyD.iterrows(): # Extract the start and end dates start_date = row['POLICY_START_DATE'] end_date = row['POLICY_END_DATE'] # Get the range of years start_year = start_date.year end_year = end_date.year # Loop over each year in the range of the policy for year in range(start_year, end_year + 1): # Calculate the EffectiveStartDate and EffectiveEndDate for the current year year_start = pd.Timestamp(year, 1, 1) year_end = pd.Timestamp(year, 12, 31) # Skip years not in range if end_date < year_start or start_date > year_end: continue effective_start_date = max(start_date, year_start) effective_end_date = min(end_date, year_end) # Calculate Exposure in years Exposure = (effective_end_date - effective_start_date + pd.Timedelta(days=1)).days / 365.25 earned = (Exposure / row['UID_expsoure']) * row[gross_premium_col+'_SUM'] # Append the new row to the list new_rows.append({ 'UID': row[uid_col], 'EffectiveStartDate': effective_start_date, 'EffectiveEndDate': effective_end_date, 'Period': year, 'Exposure': Exposure, 'EarnedPremium': earned, }) print("*****************DONE*******************") # Create a new DataFrame with the split periods new_df = pd.DataFrame(new_rows) # Formatting the dates to string if needed (dd-mm-yyyy format) new_df['EffectiveStartDate'] = new_df['EffectiveStartDate'].dt.strftime('%d-%m-%Y') new_df['EffectiveEndDate'] = new_df['EffectiveEndDate'].dt.strftime('%d-%m-%Y') # Display the new DataFrame print("Desired") print(new_df.head()) # Merge PolicyD and new_df on 'UID' # Merge PolicyD and new_df on different UID column names new_df = new_df.merge(PolicyD, left_on='UID', right_on=uid_col, how='left') print("******************************new") print(new_df.head()) new_df['POLICY_START_DATE'] = new_df['POLICY_START_DATE'].dt.strftime('%d-%m-%Y') new_df['POLICY_END_DATE'] = new_df['POLICY_END_DATE'].dt.strftime('%d-%m-%Y') # for column in custom_selected_values: # if column not in new_df: # new_df[column] = column request.session['new_df'] = new_df.to_json(orient='split') # Format the dates in the desired format including the time final_df['POLICY_END_DATE'] = final_df['POLICY_END_DATE'].dt.strftime('%Y-%m-%d %H:%M:%S') # Print the result to verify #print(final_df['POLICY_END_DATE'].iloc[0]) final_df[endorsement_date_col] = final_df[endorsement_date_col].dt.strftime(translated_format) print(final_df['POLICY_START_DATE'].iloc[0]) final_df['POLICY_START_DATE'] = final_df['POLICY_START_DATE'].dt.strftime(translated_format) print(final_df['POLICY_START_DATE'].iloc[0]) print(final_df['POLICY_END_DATE'].iloc[0]) final_df['POLICY_END_DATE'] = pd.to_datetime(final_df['POLICY_END_DATE']).dt.strftime('%d-%m-%Y') print(final_df['POLICY_END_DATE'].iloc[0]) TExposure = final_df['UID_expsoure'] if show_sum_insured == 'Yes': columns_to_display = [uid_col, 'POLICY_START_DATE', 'POLICY_END_DATE', gross_premium_col+'_SUM',sum_insured_sum_column,'UID_expsoure']+custom_selected_values else: columns_to_display = [uid_col, 'POLICY_START_DATE', 'POLICY_END_DATE', gross_premium_col+'_SUM','UID_expsoure']+custom_selected_values filtered_df = final_df[columns_to_display] print("check filter",filtered_df.head()) if show_sum_insured == 'Yes': filtered_df = filtered_df.rename(columns={ uid_col: 'UID', sum_insured_sum_column: 'SumInsured', 'POLICY_START_DATE': 'PolicyStartDate', 'POLICY_END_DATE': 'PolicyEndDate', gross_premium_col+'_SUM': 'GrossPremium' }, inplace=False) else: filtered_df = filtered_df.rename(columns={ uid_col: 'UID', 'POLICY_START_DATE': 'PolicyStartDate', 'POLICY_END_DATE': 'PolicyEndDate', gross_premium_col+'_SUM': 'GrossPremium' }, inplace=False) print("Filtered") print(filtered_df.head()) uids = filtered_df['UID'] if show_sum_insured == 'Yes': sum_insured = filtered_df['SumInsured'] # else: # sum_insured = filtered_df['GrossPremium'] policy_start_dates = filtered_df['PolicyStartDate'] policy_end_dates = filtered_df['PolicyEndDate'] gross_premiums = filtered_df['GrossPremium'] UID_expsoure = filtered_df['UID_expsoure'] print(gross_premiums.head()) if (len(gross_premiums.shape) == 2): gross_premiums = gross_premiums.iloc[0] print(gross_premiums.head()) print("UID shape:", uids.shape) print("PolicyStartDate shape:", policy_start_dates.shape) print("PolicyEndDate shape:", policy_end_dates.shape) print("GrossPremium shape:", gross_premiums.shape) print("UID_exposure shape:", UID_expsoure.shape) if show_sum_insured == 'Yes': if (len(sum_insured.shape) == 2): sum_insured = sum_insured.iloc[0] print("SumInsured shape:", sum_insured.shape) PolicyDataframe = pd.DataFrame({ 'UID': uids, 'SumInsured': sum_insured, 'PolicyStartDate': policy_start_dates, 'PolicyEndDate': policy_end_dates, 'GrossPremium': gross_premiums, 'UID_expsoure':UID_expsoure, }) else: PolicyDataframe = pd.DataFrame({ 'UID': uids, 'PolicyStartDate': policy_start_dates, 'PolicyEndDate': policy_end_dates, 'GrossPremium': gross_premiums, 'UID_expsoure':UID_expsoure, }) for column in custom_selected_values: if column not in PolicyDataframe: PolicyDataframe[column] = column print(PolicyDataframe.head()) print("AFter update") # sum_insured_available = request.POST.get('sum_insured_available') # This will be either "True" or "False" # if sum_insured_available == "True": # else: # # The sum insured is not available, handle this case as needed if show_sum_insured == 'Yes': filtered_df = filtered_df.rename(columns={ sum_insured_column+'_SUM':'SumInsured', }, inplace=False) filtered_df = filtered_df.rename(columns={ gross_premium_col+'_SUM': 'GrossPremium', }, inplace=False) # Save the updated dataframe back to the session request.session['final_df'] = filtered_df.to_json(orient='split') # print(endorsement_date_col, start_date_col) sum = entries1+entries2+entries3+entries4+entries5+entries6 if entries1<=0: entries1=0 elif entries2<=0: entries2=0 elif entries3<=0: entries3=0 elif entries4<=0: entries4=0 elif entries5<=0: entries5=0 elif entries6<=0: entries6=0 elif entries_removed_due_to_null_start_date<=0: entries_removed_due_to_null_start_date=0 elif entries_removed_due_to_null_end_date<=0: entries_removed_due_to_null_end_date=0 print("negative gross",negative_gross_df.head()) print("NULL",null_uids_df.head()) print("Size is ",len(filtered_df)) print("AT THE END DF") print(null_end_df.head()) print("last",new_df.head()) print(expsoure_df_size) NULL_ENDOR_UIDS_len = len(null_end_df) NULL_END_UIDS_len= len(null_end_date_df) NULL_EXPO_UIDS_len= len(exposure_df) #NULL_INVALID_UIDS_len = len(invalid_date_df) NULL_NEGATIVE_UIDS_len = len(negative_gross_df) NULL_PREMIUM_UIDS_len = len(gross_null) NULL_START_UIDS_len = len(null_start_date_df) NULL_UIDS_UIDS_len = len(null_uids_df) request.session['NULL_START_UIDS'] = null_start_date_df.to_json(orient='split') request.session['NULL_END_UIDS'] = null_end_date_df.to_json(orient='split') #request.session['Invalid_date'] = invalid_date_df.to_json(orient='split') request.session['gross_null'] = gross_null.to_json(orient='split') request.session['negative_gross_df'] = negative_gross_df.to_json(orient='split') request.session['null_uids_df'] = null_uids_df.to_json(orient='split') request.session['null_end_df'] = null_end_df.to_json(orient='split') request.session['exposure_df'] = exposure_df.to_json(orient='split') results={ 'dataframe': filtered_df.head().to_html(classes='dataframe', index=False, escape=False), 'dataframe1': new_df.head().to_html(classes='dataframe', index=False, escape=False), 'NULL_START_UIDS': null_start_date_df.to_html(classes='dataframe',index=False, escape=False), 'NULL_END_UIDS': null_end_date_df.to_html(classes='dataframe',index=False, escape=False), 'show_download_link': True, 'original_count': total_counts, 'removed_count': sum, 'final_count': final_count, 'UID_count': entries1, 'END_DATE_count': entries2, 'START_DATECOUNT':entries3, 'END_DATE_COUNT':entries4, 'CANCELLATION_DATE_COUNT':entries5, 'GREATER_COUNT':entries6, 'START_NULL_COUNT':entries_removed_due_to_null_start_date, 'END_NULL_COUNT':entries_removed_due_to_null_end_date, #'Invalid_date':invalid_date_df.head().to_html(classes='dataframe', index=False, escape=False), 'gross_null':gross_null.head().to_html(classes='dataframe', index=False, escape=False), 'negative_gross_df':negative_gross_df.head().to_html(classes='dataframe', index=False, escape=False), 'null_uids_df':null_uids_df.head().to_html(classes='dataframe', index=False, escape=False), 'null_end_df':null_end_df.head().to_html(classes='dataframe', index=False, escape=False), 'Null_endorsement_count':null_count_endor, 'exposure_df':exposure_df.head().to_html(classes='dataframe', index=False, escape=False), 'NULL_ENDOR_UIDS_len': NULL_ENDOR_UIDS_len, 'NULL_END_UIDS_len': NULL_END_UIDS_len, 'NULL_EXPO_UIDS_len': NULL_EXPO_UIDS_len, #'NULL_INVALID_UIDS_len': NULL_INVALID_UIDS_len, 'NULL_NEGATIVE_UIDS_len': NULL_NEGATIVE_UIDS_len, 'NULL_PREMIUM_UIDS_len': NULL_PREMIUM_UIDS_len, 'NULL_START_UIDS_len': NULL_START_UIDS_len, 'NULL_UIDS_UIDS_len': NULL_UIDS_UIDS_len } serializable_results = convert_numpy(results) # Store the serializable results in the session serializable_results = convert_numpy(results) # Store the serializable results in the session store_results_in_session(request, 'premium_results', serializable_results) # Redirect to the display view return redirect('display_results') else: # Find out which UIDs are causing the discrepancy diff_uids = set(UID_DATAFRAME[uid_col]) - set(updated_merged_df[uid_col]) print(f"UIDs present in UID_DATAFRAME but not in updated_merged_df: {diff_uids}") diff_uids = set(updated_merged_df[uid_col]) - set(UID_DATAFRAME[uid_col]) print(f"UIDs present in updated_merged_df but not in UID_DATAFRAME: {diff_uids}") # Return a message if the entries are not the same return HttpResponse("Dataframes don't have the same number of entries.") except KeyError as e: print(e) return HttpResponse(f"Error: Column {e} not found in merged dataframe.")
Leave a Comment