Untitled

mail@pastecode.io avatar
unknown
plain_text
5 months ago
37 kB
1
Indexable
def process_data(request):
    def try_parsing_date(date_str):
            """
            Attempts to parse the date string using multiple formats.
            Returns the date if parsing is successful; otherwise, returns pd.NaT.
            """
            for fmt in ('%d-%m-%Y', '%d/%m/%Y','%Y-%m-%d %H:%M:%S'):  # Add or modify formats as needed
                try:
                    return pd.to_datetime(date_str, format=fmt)
                except (ValueError, TypeError):
                    continue
            return pd.NaT  # Return NaT if all parsing attempts fail
    
    print("entered in process")
    selected_values = request.session.get('selectedValues', [])
    print("Selected values are",selected_values)
    import pandas as pd 
    global entries1, entries2, entries3, entries4, entries5, entries6,filtered_df
    try:
        # Load the dataframe  from the session
        #merged_df = pd.read_json(request.session.get('merged_df', '{}'), orient='split')

        # moiz added code for reading data from db start

        merged_data = MergedDataModel2.objects.first()

        if merged_data and merged_data.file_path:
            file_path = merged_data.file_path
            
            # Read the CSV file with the correct encoding
            try:
                merged_df = pd.read_csv(file_path, encoding='utf-8')  
            except UnicodeDecodeError:
                merged_df = pd.read_csv(file_path, encoding='ISO-8859-1') 
            
            print("Dataframe created successfully")
            print(merged_df.head())
        else:
            print("No file path found in the database")

        
        # moiz added code for reading data from db end

        if selected_values is not None and selected_values:
            uid_col = 'UID'
            merged_df['UID'] = merged_df[selected_values].astype(str).agg('-'.join, axis=1)
            print("Selected values",selected_values)
            print("column is",merged_df[uid_col])
            print("In condition")
        else:
            uid_col = request.POST.get('uid')
            print("UID COL IS",uid_col)

        date_format = request.POST.get('date_format')
        print("Format is",date_format)
        total_counts = len(merged_df)
        unique_values2 = request.session.get('unique_values', [])

        print("***********************************************************")

        print("Unique values are",unique_values2)
        start_dates = request.POST.getlist('start_date[]')
        end_dates = request.POST.getlist('end_date[]')

        #convert start and end date to date time
        for column_name in start_dates:
            if column_name in merged_df.columns:
                merged_df[column_name] = merged_df[column_name].apply(lambda x: try_parsing_date(x))
                print(f"After conversion, dtype of {column_name}: {merged_df[column_name].dtype}")

        print("END DATE COLUMNS BELOW")

        # Process each column listed in end_dates
        for column_name in end_dates:
            if column_name in merged_df.columns:
                merged_df[column_name] = merged_df[column_name].apply(lambda x: try_parsing_date(x))
                print(f"After conversion, dtype of {column_name}: {merged_df[column_name].dtype}")


        group_by_types = request.POST.getlist('group_by_type[]')
        # group_by_type_start_date= request.POST.getlist('group_by_type_start_date[]')
        # group_by_type_end_date= request.POST.getlist('group_by_type_end_date[]')
        print("Start Dates:", start_dates)
        print("End Dates:", end_dates)
        print("Group By Types:", group_by_types)
        print("Counts:", len(unique_values2), len(start_dates), len(end_dates), len(group_by_types))
        
        print("***************************************************************")



          # 1. Make a copy of merged_df
        

        # 2. Sort data in ascending on the basis of endorsement_date_col and uid_col
        #uid_col = request.POST.get('uid')

        print("UID COLUMN 1",uid_col)
        original_count = len(merged_df)
        null_uids_df = merged_df[merged_df[uid_col].isnull()]
        merged_df.dropna(subset=[uid_col], inplace=True)
        
        
        final_count = len(merged_df)
        entries1 = original_count - final_count
        print("UIDS Removed due to NUll are: ",entries1)


        endorsement_date_col = request.POST.get('endoresement_date')
        null_end_df = merged_df[merged_df[endorsement_date_col].isnull()]
        null_count_endor = merged_df[endorsement_date_col].isnull().sum()

        
        def translate_format(input_format):
            # Improved translation to account for different separators like '-' and '/'
            format_translation = {
                'yyyy': '%Y',  # Replace 'yyyy' first to prevent conflict with 'yy'
                'yy': '%y',    # 'yy' should be replaced after 'yyyy' has been replaced
                'mm': '%m',
                'dd': '%d'
            }
            for key, value in format_translation.items():
                input_format = input_format.replace(key, value)
            # Detect and handle separators in the input format dynamically
            for sep in ['-', '/', '.', ' ']:
                if sep in input_format:
                    return input_format.replace(sep, '-')
            return input_format
        
        


        # Simulate getting the format from a request
        received_format = request.POST.get('date_format')
        translated_format = translate_format(received_format)


        # Convert dates using the translated format, handling errors
        try:
            original_count = len(merged_df)
            print("Original count:", original_count)

            # Apply 'try_parsing_date' to each value in the column
            merged_df[endorsement_date_col] = merged_df[endorsement_date_col].apply(lambda x: try_parsing_date(x))

            # Optionally, you can still drop rows where dates could not be parsed and are NaT
            # If you prefer to keep these rows, you can comment out or remove the line below
            merged_df.dropna(subset=[endorsement_date_col], inplace=True)
            
            final_count = len(merged_df)
            print("Data after Conversion Endorsement Date")
            print(merged_df[endorsement_date_col])
            print(f"Total entries: {original_count}")
            print(f"Total after removal: {final_count}")
            print("Check end entries After",merged_df[endorsement_date_col])

        except Exception as e:  # Catching a more general exception
            print(f"There was an issue with the date parsing: {e}")



        endor_datatype = merged_df[endorsement_date_col].dtype
        print("type of edorsement column ",endor_datatype)

        # merged_df[endorsement_date_col] = merged_df[endorsement_date_col].dt.strftime(date_format)

        copy_dataframe = merged_df.copy()
        


        print("UID COLUMN 2",uid_col)
        
        copy_dataframe.sort_values(by=[uid_col, endorsement_date_col], ascending=[True, True], inplace=True)

        print("UID COLUMN 3",uid_col)

        endor_datatype = copy_dataframe[endorsement_date_col].dtype
        print(f"The datatype of the '{endorsement_date_col}' column is: {endor_datatype}")

        radio_choice = request.POST.get('radio_button_choice', 'Last')  # Default to 'Last' if no choice given

        selectedPreference = request.POST.get('selectedPreference')
        print("GOT THE SELECTED PREFERENCE",selectedPreference)

        # Handling duplicates in copy_dataframe
        if selectedPreference == "First":
            copy_dataframe = copy_dataframe.drop_duplicates(subset=uid_col, keep='first')
        else:  # If choice is 'Last' or any other unexpected value
            copy_dataframe = copy_dataframe.drop_duplicates(subset=uid_col, keep='last')

        # 6. Only include uid_col, endorsement_date_col, other columns selected using additional
        
        other_col = request.POST.getlist('other')
        custom_selected_values = request.session.get('customSelectedValues', [])
        print("***********************************")
        print(custom_selected_values)
        print("***********************************")
        columns_to_display = [uid_col, endorsement_date_col] + custom_selected_values
        print("***********************************")
        print(columns_to_display)
        print("***********************************")
        negative_exposures = request.session.get('negative_exposures', [])

        # uid_col = request.POST.getlist('uid')[0]  
        gross_premium_col = request.POST.getlist('gross_premium')[0]

        # uid_col = request.POST.get('uid')
        gross_premium_col = request.POST.get('gross_premium')
        print("actual column of gross",gross_premium_col)


        merged_df[gross_premium_col] = pd.to_numeric(merged_df[gross_premium_col], errors='coerce')
        merged_df[gross_premium_col] = merged_df[gross_premium_col].fillna(0)
        merged_df[gross_premium_col] = merged_df[gross_premium_col].astype(int)

        print("*****************Converted grois to numeric=  type*******************")

        # Create the UID_DATAFRAME
        UID_DATAFRAME = merged_df.groupby(uid_col)[gross_premium_col].sum().reset_index()
        
        print("Papu2")
        
        cancellation_col = request.POST.get('cancellation')
     
        
        other_col = request.POST.getlist('other')
        print("names of cols")
        print(merged_df.columns)

        print("*****************Converted All colums*******************")
        # POSITIVE_ID = request.POST.get('POSITIVE_ID')

        def normalize_date(date_val):
            """
            Attempts to convert the given date_val to a date object.
            If date_val is pd.NaT or leads to a conversion error, None is returned.
            """
            if pd.isnull(date_val):  # Check if date_val is NaT or NaN
                return None
            try:
                # Assuming date_val is already a datetime object, just extract the date part
                return date_val.date()
            except (ValueError, TypeError, AttributeError):
                # Handle cases where conversion to date fails or date_val doesn't have a date() method
                return None
            
        start_date_preference = request.POST.get('start_date_preference','last')
        first_start_date = 'first_start_date'
        
        # Example usage:
        def apply_date_logic(dates, logic):
            """Applies a specified logic to a series of dates."""
            if logic == 'min':
                return dates.min()
            elif logic == 'max':
                return dates.max()
            elif logic == 'first':
                return dates.iloc[0] if not dates.empty else pd.NaT
            elif logic == 'last':
                return dates.iloc[-1] if not dates.empty else pd.NaT
            return pd.NaT

        def set_dates(row,group_by_type_start, group_by_type_end):
            """
            Assigns start_date and end_date for a policy based on the row's data.
            Iterates through unique_values to find a match and assigns normalized dates from the respective columns.
            Additionally, applies specific logic to the start and end dates.
            """
            column_select = request.POST.get('column_select')
            policy_type = row[column_select]  # Adjust the column name to your DataFrame's column name



            start_date, end_date = pd.NaT, pd.NaT

            for index, value in enumerate(unique_values2):
                if policy_type == value:
                    start_col = start_dates[index]
                    end_col = end_dates[index]

                    # Assuming the row has multiple date entries in an array-like format under each column
                    start_date_series = pd.Series(row[start_col]) if start_col in row else pd.Series()
                    end_date_series = pd.Series(row[end_col]) if end_col in row else pd.Series()

                    # Apply the specified logic to these series
                    start_date = apply_date_logic(start_date_series, group_by_type_start[index])
                    end_date = apply_date_logic(end_date_series, group_by_type_end[index])

                    #break  # assuming one match per row, remove if multiple matches should be considered

            return pd.Series([start_date, end_date])


        print("Data in merged")
        print(merged_df.columns)

        print("UID COLUMN IS ",uid_col)
        
        # Sort based on UID and ENDORSEMENT_DATE
        merged_df.sort_values(by=[uid_col, endorsement_date_col], ascending=[True, True], inplace=True)

        try:
            print("Starting date processing")
            group_by_type_start = request.POST.getlist('group_by_type_start_date[]')
            group_by_type_end = request.POST.getlist('group_by_type_end_date[]')

            print("Group by start dates:", group_by_type_start)  # Debug print
            print("Group by end dates:", group_by_type_end)      # Debug print

            # Applying set_dates without dropping duplicates
            print("Applying date settings...")
            merged_df[['POLICY_START_DATE', 'POLICY_END_DATE']] = merged_df.apply(
                lambda row: set_dates(row, group_by_type_start, group_by_type_end), axis=1
            )
            print("Dates applied successfully.")

            # drop duplicates to keep only the last entry per UID if necessary
            print(f"Dropping duplicates based on {uid_col}...")
            updated_merged_df = merged_df.drop_duplicates(subset=uid_col, keep='last')
            
            print("Date processing completed successfully.")
            print("Resulting DataFrame head:", updated_merged_df.head())  # Show the first few rows of the updated DataFrame

        except Exception as e:
            print(f"Error during date processing: {e}")

         
        updated_merged_df = updated_merged_df[updated_merged_df[uid_col].notna()]

        # 7. Apply left join on copy_dataframe uid with updated_merged_df uid
        final_df = pd.merge(copy_dataframe, updated_merged_df[columns_to_display], on=uid_col, how='left')



        duplicated_entries = copy_dataframe[copy_dataframe.duplicated(subset=uid_col, keep=False)]

        
        show_sum_insured = request.POST.get('show_sum_insured')
        
        if show_sum_insured == 'Yes':
            SumInsured_DF = merged_df.copy()

            print(SumInsured_DF.shape)


            sum_insured_column = request.POST.get('column_name')
            print(f"Received sum_insured_column from POST request: {sum_insured_column}")

            sum_insured_choice = request.POST.get('sum_insured_choice', 'Total')
            print(f"Received sum_insured_choice from POST request: {sum_insured_choice}")

            if sum_insured_choice == "Incremental":
                SumInsured_DF[sum_insured_column] = pd.to_numeric(SumInsured_DF[sum_insured_column], errors='coerce')
                SumInsured_DF[sum_insured_column] = SumInsured_DF[sum_insured_column].fillna(0)
                SumInsured_DF[sum_insured_column] = SumInsured_DF[sum_insured_column].astype(int)

                SumInsured_DF_grouped = SumInsured_DF.groupby(uid_col)[sum_insured_column].sum().reset_index(name=f'{sum_insured_column}_SUM')
                sum_insured_sum_column = f"{sum_insured_column}_SUM"
                SumInsured_DF = SumInsured_DF_grouped  # Assigning grouped df back to SumInsured_DF for consistency
                print(SumInsured_DF.shape)

            else: 
                print(sum_insured_column)
                # Sort based on ENDORESEMENT_DATE in ascending order
                #endorsement_date_col = request.POST.get('endoresement_date')
                SumInsured_DF.sort_values(by=endorsement_date_col, ascending=True, inplace=True)
                
                # Get the date_preference from POST data ('first' or 'last')
                # Get the date_preference from POST data ('first' or 'last')
                date_preference = request.POST.get('sum_insured_timeframe')
                print("Value is ",date_preference)
                if date_preference not in ['First', 'Last']:
                    date_preference = 'last'  # Default to 'last' if the value is anything else or not provided
                # Drop duplicates but keep either the 'first' or 'last' based on date_preference
                keep = 'first' if date_preference == 'First' else 'last'
                SumInsured_DF = SumInsured_DF.drop_duplicates(subset=uid_col, keep=keep)[[uid_col, sum_insured_column]]

                # Renaming the column to reflect whether it's the 'FIRST' or 'LATEST' value
                sum_insured_sum_column = f"{sum_insured_column}_{'FIRST' if date_preference == 'First' else 'LATEST'}"
                SumInsured_DF.rename(columns={sum_insured_column: sum_insured_sum_column}, inplace=True)
                print(SumInsured_DF.shape)



            
            duplicated_entries = SumInsured_DF[SumInsured_DF.duplicated(subset=uid_col, keep=False)]

            if not duplicated_entries.empty:
                print(f"Duplicated entries found in copy_dataframe based on {uid_col}:")
                print(duplicated_entries)
                # If you're in a web context, you may replace the print statement with a logger or return a response
                return HttpResponse(f"Duplicated entries found in copy_dataframe based on {uid_col}.")


            final_df = pd.merge(final_df, SumInsured_DF, on=uid_col, how='left')
        
            

        if UID_DATAFRAME.shape[0] == final_df.shape[0]:

            # Print message to console
            print("Both UID_DATAFRAME and updated_merged_df have the same number of entries.")
            
            # Perform a left join to add GROSS_PREMIUM_LC column from UID_DATAFRAME to updated_merged_df
            updated_merged_df = pd.merge(updated_merged_df, UID_DATAFRAME, on=uid_col, how='left', suffixes=('', '_SUM'))

            
            print("CHECK 4")
            if (updated_merged_df['UID'] == 'P/300/2904/19/000025-3212').any():
                subset_df = updated_merged_df.loc[updated_merged_df['UID'] == 'P/300/2904/19/000025-3212', ['POLICY_START_DATE','POLICY_END_DATE']]
                print(subset_df)
            else:
                print("No rows with UID equal to 1 found.")


            print("****************************")
            print(updated_merged_df.head())
            print("***************************8")
            # final_df1 = pd.merge(final_df, updated_merged_df, on=uid_col, how='left')

            # Columns that you want from `updated_merged_df`. This is just an example; adjust it to your needs
            updated_merged_df_cols = list(updated_merged_df.columns.difference(final_df.columns))
            updated_merged_df_cols.append(uid_col)  # Make sure to include the uid column for merging

            final_df = pd.merge(final_df, updated_merged_df[updated_merged_df_cols], on=uid_col, how='left')
            original_count = len(final_df)

            print("final issssssssssss")
            print(final_df.head())



            #Data frames having null entries
            null_start_date_df = final_df[final_df['POLICY_START_DATE'].isnull()]
            null_end_date_df = final_df[final_df['POLICY_START_DATE'].isnull()]

            gross_null = final_df[final_df[gross_premium_col+'_SUM'].isnull()]
            negative_gross_df = final_df[final_df[gross_premium_col+'_SUM'] < 0]

            # Remove entries with a null 'POLICY_START_DATE' from the original DataFrame
            final_df = final_df[final_df['POLICY_START_DATE'].notnull()]
            final_count = len(final_df)
            entries_removed_due_to_null_start_date = original_count - final_count

            #for end
            final_df = final_df[final_df['POLICY_END_DATE'].notnull()]
            final_count = len(final_df)
            entries_removed_due_to_null_end_date = original_count - final_count
            

            #invalid_date_df = final_df[final_df['POLICY_START_DATE'] > final_df['POLICY_END_DATE']]
            final_df = final_df[final_df['POLICY_START_DATE'] <= final_df['POLICY_END_DATE']]
            final_count = len(final_df)
            entries6 = original_count - final_count
            print("Enrties removed due to Date less than PSD PED ",entries6)

            print("UIDS Removed due to NUll are: ",entries1)
            print("Enrties removed due to Date conversion of edorsement ",entries2)
            print("Enrties removed due to Date conversion of Start Date ",entries3)
            print("Enrties removed due to Date conversion of End Date ",entries4)
            print("Enrties removed due to Date conversion of Cancellation ",entries5)
            print("Enrties removed due to Date LESS THAN PSD PED ",entries6)


            print("finallll2",final_df.head())


            final_df['POLICY_END_DATE'] = pd.to_datetime(final_df['POLICY_END_DATE']).dt.normalize()
            final_df['POLICY_START_DATE'] = pd.to_datetime(final_df['POLICY_START_DATE']).dt.normalize()

            print("finallll3",final_df.head())

            print("START DATE TYPE",final_df['POLICY_START_DATE'].dtype)
            print("END DATE TYPE",final_df['POLICY_END_DATE'].dtype)
            
            print("finallll",final_df.head())

            
            final_df['UID_expsoure'] = ((final_df['POLICY_END_DATE'] - final_df['POLICY_START_DATE']).dt.days + 1) / 365.25

            exposure_df = final_df[(final_df['UID_expsoure'] < 0) | (final_df['UID_expsoure'] > 1.002054)]
            expsoure_df_size = len(exposure_df)
          
            show_sum_insured = request.POST.get('show_sum_insured')

            if show_sum_insured == 'Yes':
                PolicyFrame = [uid_col, 'POLICY_START_DATE', 'POLICY_END_DATE', gross_premium_col+'_SUM',sum_insured_sum_column,'UID_expsoure']+custom_selected_values
            else:
                PolicyFrame = [uid_col, 'POLICY_START_DATE', 'POLICY_END_DATE', gross_premium_col+'_SUM','UID_expsoure']+custom_selected_values

            PolicyD = final_df[PolicyFrame]

           
            print("PolicyFrame before",PolicyD.head())
            print("****************processsssssssssng rowsssssssssssssSzzzzzz*******************")
            new_rows = []

            # Iterate over each row in the PolicyD DataFrame
            for _, row in PolicyD.iterrows():
                # Extract the start and end dates
                start_date = row['POLICY_START_DATE']
                end_date = row['POLICY_END_DATE']
                
                # Get the range of years
                start_year = start_date.year
                end_year = end_date.year
                
                # Loop over each year in the range of the policy
                for year in range(start_year, end_year + 1):
                    # Calculate the EffectiveStartDate and EffectiveEndDate for the current year
                    year_start = pd.Timestamp(year, 1, 1)
                    year_end = pd.Timestamp(year, 12, 31)
                    
                    # Skip years not in range
                    if end_date < year_start or start_date > year_end:
                        continue
                    
                    effective_start_date = max(start_date, year_start)
                    effective_end_date = min(end_date, year_end)


                    
                    # Calculate Exposure in years
                    Exposure = (effective_end_date - effective_start_date + pd.Timedelta(days=1)).days / 365.25
                  
                    earned = (Exposure / row['UID_expsoure']) * row[gross_premium_col+'_SUM']
                    
                    # Append the new row to the list
                    new_rows.append({
                        'UID': row[uid_col],
                        'EffectiveStartDate': effective_start_date,
                        'EffectiveEndDate': effective_end_date,
                        'Period': year,
                        'Exposure': Exposure,
                        'EarnedPremium': earned,
                    })



            print("*****************DONE*******************")
            # Create a new DataFrame with the split periods
            new_df = pd.DataFrame(new_rows)

            # Formatting the dates to string if needed (dd-mm-yyyy format)
            new_df['EffectiveStartDate'] = new_df['EffectiveStartDate'].dt.strftime('%d-%m-%Y')
            new_df['EffectiveEndDate'] = new_df['EffectiveEndDate'].dt.strftime('%d-%m-%Y')

            

            # Display the new DataFrame
            print("Desired")
            print(new_df.head())

            # Merge PolicyD and new_df on 'UID'
            # Merge PolicyD and new_df on different UID column names
            new_df = new_df.merge(PolicyD, left_on='UID', right_on=uid_col, how='left')

            
            print("******************************new")
            print(new_df.head())
            new_df['POLICY_START_DATE'] = new_df['POLICY_START_DATE'].dt.strftime('%d-%m-%Y')
            new_df['POLICY_END_DATE'] = new_df['POLICY_END_DATE'].dt.strftime('%d-%m-%Y')

            # for column in custom_selected_values:
            #     if column not in new_df:
            #         new_df[column] = column

            request.session['new_df'] = new_df.to_json(orient='split')


            # Format the dates in the desired format including the time
            final_df['POLICY_END_DATE'] = final_df['POLICY_END_DATE'].dt.strftime('%Y-%m-%d %H:%M:%S')

            # Print the result to verify
            #print(final_df['POLICY_END_DATE'].iloc[0])


            final_df[endorsement_date_col] = final_df[endorsement_date_col].dt.strftime(translated_format)
            print(final_df['POLICY_START_DATE'].iloc[0])
            final_df['POLICY_START_DATE'] = final_df['POLICY_START_DATE'].dt.strftime(translated_format)
            print(final_df['POLICY_START_DATE'].iloc[0])
            print(final_df['POLICY_END_DATE'].iloc[0])
            final_df['POLICY_END_DATE'] = pd.to_datetime(final_df['POLICY_END_DATE']).dt.strftime('%d-%m-%Y')
            print(final_df['POLICY_END_DATE'].iloc[0])
           

            TExposure = final_df['UID_expsoure']
            if show_sum_insured == 'Yes':
                columns_to_display = [uid_col, 'POLICY_START_DATE', 'POLICY_END_DATE', gross_premium_col+'_SUM',sum_insured_sum_column,'UID_expsoure']+custom_selected_values
            else:
                columns_to_display = [uid_col, 'POLICY_START_DATE', 'POLICY_END_DATE', gross_premium_col+'_SUM','UID_expsoure']+custom_selected_values
            filtered_df = final_df[columns_to_display]
            print("check filter",filtered_df.head())
            

            if show_sum_insured == 'Yes':
                filtered_df = filtered_df.rename(columns={
                uid_col: 'UID',
                sum_insured_sum_column: 'SumInsured',
                'POLICY_START_DATE': 'PolicyStartDate',
                'POLICY_END_DATE': 'PolicyEndDate',
                gross_premium_col+'_SUM': 'GrossPremium'
            }, inplace=False)
            else:
                filtered_df = filtered_df.rename(columns={
                uid_col: 'UID',
                'POLICY_START_DATE': 'PolicyStartDate',
                'POLICY_END_DATE': 'PolicyEndDate',
                gross_premium_col+'_SUM': 'GrossPremium'
            }, inplace=False)

            print("Filtered")
            print(filtered_df.head())
            
            uids = filtered_df['UID']
            if show_sum_insured == 'Yes':
                sum_insured = filtered_df['SumInsured']
            # else:
            #     sum_insured = filtered_df['GrossPremium']

            policy_start_dates = filtered_df['PolicyStartDate']
            policy_end_dates = filtered_df['PolicyEndDate']
            gross_premiums = filtered_df['GrossPremium']
            UID_expsoure = filtered_df['UID_expsoure']

            
            print(gross_premiums.head())
            if (len(gross_premiums.shape) == 2):
                gross_premiums = gross_premiums.iloc[0]
            print(gross_premiums.head())
            print("UID shape:", uids.shape)
            print("PolicyStartDate shape:", policy_start_dates.shape)
            print("PolicyEndDate shape:", policy_end_dates.shape)
            print("GrossPremium shape:", gross_premiums.shape)
            print("UID_exposure shape:", UID_expsoure.shape)
            

            if show_sum_insured == 'Yes':
                if (len(sum_insured.shape) == 2):
                    sum_insured = sum_insured.iloc[0]
                    print("SumInsured shape:", sum_insured.shape)

                PolicyDataframe = pd.DataFrame({
                'UID': uids,
                'SumInsured': sum_insured,
                'PolicyStartDate': policy_start_dates,
                'PolicyEndDate': policy_end_dates,
                'GrossPremium': gross_premiums,
                'UID_expsoure':UID_expsoure,
            })
            else:
                PolicyDataframe = pd.DataFrame({
                'UID': uids,
                'PolicyStartDate': policy_start_dates,
                'PolicyEndDate': policy_end_dates,
                'GrossPremium': gross_premiums,
                'UID_expsoure':UID_expsoure,
            })
                
            for column in custom_selected_values:
                if column not in PolicyDataframe:
                    PolicyDataframe[column] = column
               
            
            print(PolicyDataframe.head())
            print("AFter update")
            
            # sum_insured_available = request.POST.get('sum_insured_available')  # This will be either "True" or "False"
            # if sum_insured_available == "True":

            # else:
            #     # The sum insured is not available, handle this case as needed


            if show_sum_insured == 'Yes':
                filtered_df = filtered_df.rename(columns={
                sum_insured_column+'_SUM':'SumInsured',
            }, inplace=False)

            filtered_df = filtered_df.rename(columns={
                gross_premium_col+'_SUM': 'GrossPremium',
            }, inplace=False)
    
            # Save the updated dataframe back to the session
            request.session['final_df'] = filtered_df.to_json(orient='split')

            # print(endorsement_date_col, start_date_col)

            sum = entries1+entries2+entries3+entries4+entries5+entries6
            if entries1<=0:
                entries1=0
            elif entries2<=0:
                entries2=0
            elif entries3<=0:
                entries3=0
            elif entries4<=0:
                entries4=0
            elif entries5<=0:
                entries5=0
            elif entries6<=0:
                entries6=0
            elif entries_removed_due_to_null_start_date<=0:
                entries_removed_due_to_null_start_date=0
            elif entries_removed_due_to_null_end_date<=0:
                entries_removed_due_to_null_end_date=0


            print("negative gross",negative_gross_df.head())
            print("NULL",null_uids_df.head())
           
            print("Size is ",len(filtered_df))
            print("AT THE END DF")
            print(null_end_df.head())


            print("last",new_df.head())
            print(expsoure_df_size)
            
            
            NULL_ENDOR_UIDS_len = len(null_end_df)
            NULL_END_UIDS_len= len(null_end_date_df)
            NULL_EXPO_UIDS_len= len(exposure_df)
            #NULL_INVALID_UIDS_len = len(invalid_date_df)
            NULL_NEGATIVE_UIDS_len = len(negative_gross_df)
            NULL_PREMIUM_UIDS_len = len(gross_null)
            NULL_START_UIDS_len = len(null_start_date_df)
            NULL_UIDS_UIDS_len = len(null_uids_df)
            request.session['NULL_START_UIDS'] = null_start_date_df.to_json(orient='split')
            request.session['NULL_END_UIDS'] = null_end_date_df.to_json(orient='split')
            #request.session['Invalid_date'] = invalid_date_df.to_json(orient='split')
            request.session['gross_null'] = gross_null.to_json(orient='split')
            request.session['negative_gross_df'] = negative_gross_df.to_json(orient='split')
            request.session['null_uids_df'] = null_uids_df.to_json(orient='split')
            request.session['null_end_df'] = null_end_df.to_json(orient='split')
            request.session['exposure_df'] = exposure_df.to_json(orient='split')



            results={
                'dataframe': filtered_df.head().to_html(classes='dataframe', index=False, escape=False),
                'dataframe1': new_df.head().to_html(classes='dataframe', index=False, escape=False),
                'NULL_START_UIDS': null_start_date_df.to_html(classes='dataframe',index=False, escape=False),
                'NULL_END_UIDS': null_end_date_df.to_html(classes='dataframe',index=False, escape=False),
                'show_download_link': True,
                'original_count': total_counts,
                'removed_count': sum,
                'final_count': final_count,
                'UID_count': entries1,
                'END_DATE_count': entries2,
                'START_DATECOUNT':entries3,
                'END_DATE_COUNT':entries4,
                'CANCELLATION_DATE_COUNT':entries5,
                'GREATER_COUNT':entries6,
                'START_NULL_COUNT':entries_removed_due_to_null_start_date,
                'END_NULL_COUNT':entries_removed_due_to_null_end_date,
                #'Invalid_date':invalid_date_df.head().to_html(classes='dataframe', index=False, escape=False),
                'gross_null':gross_null.head().to_html(classes='dataframe', index=False, escape=False),
                'negative_gross_df':negative_gross_df.head().to_html(classes='dataframe', index=False, escape=False),
                'null_uids_df':null_uids_df.head().to_html(classes='dataframe', index=False, escape=False),
                'null_end_df':null_end_df.head().to_html(classes='dataframe', index=False, escape=False),
                'Null_endorsement_count':null_count_endor,
                'exposure_df':exposure_df.head().to_html(classes='dataframe', index=False, escape=False),
                'NULL_ENDOR_UIDS_len': NULL_ENDOR_UIDS_len,
                'NULL_END_UIDS_len': NULL_END_UIDS_len,
                'NULL_EXPO_UIDS_len': NULL_EXPO_UIDS_len,
                #'NULL_INVALID_UIDS_len': NULL_INVALID_UIDS_len,
                'NULL_NEGATIVE_UIDS_len': NULL_NEGATIVE_UIDS_len,
                'NULL_PREMIUM_UIDS_len': NULL_PREMIUM_UIDS_len,
                'NULL_START_UIDS_len': NULL_START_UIDS_len,
                'NULL_UIDS_UIDS_len': NULL_UIDS_UIDS_len
            }
            serializable_results = convert_numpy(results)

        # Store the serializable results in the session
            serializable_results = convert_numpy(results)

        # Store the serializable results in the session
            store_results_in_session(request, 'premium_results', serializable_results)
        
            # Redirect to the display view
            return redirect('display_results')
        else:
            # Find out which UIDs are causing the discrepancy
            diff_uids = set(UID_DATAFRAME[uid_col]) - set(updated_merged_df[uid_col])
            print(f"UIDs present in UID_DATAFRAME but not in updated_merged_df: {diff_uids}")

            diff_uids = set(updated_merged_df[uid_col]) - set(UID_DATAFRAME[uid_col])
            print(f"UIDs present in updated_merged_df but not in UID_DATAFRAME: {diff_uids}")
            
            # Return a message if the entries are not the same
            return HttpResponse("Dataframes don't have the same number of entries.")

    except KeyError as e:
        print(e)
        return HttpResponse(f"Error: Column {e} not found in merged dataframe.")
Leave a Comment