Untitled

mail@pastecode.io avatar
unknown
plain_text
24 days ago
125 kB
2
Indexable
Never
from django.shortcuts import render
from django.shortcuts import render, redirect
from .forms import CSVFileUploadForm1
from django.http import JsonResponse
from django.views.decorators.csrf import csrf_exempt
from django.http import HttpResponse
from django.template import loader
from django.http import HttpResponse
from dateutil.parser import parse
import pandas as pd
import numpy as np
import json
import pyarrow.csv as pc
import pandas as pd
entries1 = 0
entries2 = 0
entries3 = 0
entries4 = 0
entries5 = 0
entries6 = 0
entries7 = 0
entries8 = 0

claim_entries1 = 0
claim_entries2 = 0
claim_entries3 = 0
claim_entries = 0

os_entries1 = 0
os_entries2 = 0
os_entries3 = 0


column = ''
def os_file_upload(request):
    message = ""
    columns = []
    is_merged = False
    file_details = []  # List to store file names and number of entries
    total_entries = 0  # Variable to store the total number of entries across all files

    if request.method == 'POST':
        form = CSVFileUploadForm1(request.POST, request.FILES)
        if form.is_valid():
            files = request.FILES.getlist('files')
            dataframes = []
            
            for file in files:
                df = pd.read_csv(file)
                num_entries = len(df)
                total_entries += num_entries  # Add to total entries
                file_details.append({'name': file.name, 'entries': num_entries})
                dataframes.append(df)

            merged_df = pd.concat(dataframes)
            # Convert the merged DataFrame to JSON and store it in the session
            request.session['merged_df'] = merged_df.to_json(orient='split')
            message = "Files Merged Successfully"
            is_merged = True
            # Store the column names in the session
            request.session['merged_columns'] = merged_df.columns.tolist()
            columns = merged_df.columns.tolist()

    else:
        form = CSVFileUploadForm1()

    return render(request, 'myapp/os_page.html', {
        'form': form,
        'message': message,
        'is_merged': is_merged,
        'columns': columns,
        'file_details': file_details,  # Pass the file details to the template
        'total_entries': total_entries  # Pass the total entries to the template
    })

# Your utility functions
def convert_numpy(obj):
    if isinstance(obj, np.generic):
        return obj.item()  # np.generic includes np.int64, np.float64 and more
    elif isinstance(obj, dict):
        return {k: convert_numpy(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_numpy(v) for v in obj]
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

def store_results_in_session(request, key, results):
    #print(f"Storing data for key: {key}")  # Debug print
    request.session[key] = results

def get_results_from_session(request, key):
    data = request.session.get(key)
    #print(f"Retrieving data for key: {key}, Data: {data}")  # Debug print
    return data

def clear_results_from_session(request, key):
    if key in request.session:
        del request.session[key]



def process_os_data(request):
    print("hello world")
    # Check if the 'merged_df' key exists in the session
    if 'merged_df' in request.session:
        # Retrieve the merged DataFrame from the session
        merged_df_json = request.session['merged_df']
        merged_df = pd.read_json(merged_df_json, orient='split')
        print("Here is merged", merged_df.head())
    else:
        return HttpResponse("Merged DataFrame not found in session.", status=400)
    
    def try_parsing_date(date_str):
            """
            Attempts to parse the date string using multiple formats.
            Returns the date if parsing is successful; otherwise, returns pd.NaT.
            """
            for fmt in ('%d-%m-%Y', '%d/%m/%Y','%Y-%m-%d %H:%M:%S'):  # Add or modify formats as needed
                try:
                    return pd.to_datetime(date_str, format=fmt)
                except (ValueError, TypeError):
                    continue
            return pd.NaT  # Return NaT if all parsing attempts fai
    
    def translate_format(input_format):
            format_translation = {
                'yyyy': '%Y',  # Replace 'yyyy' first to prevent conflict with 'yy'
                'mm': '%m',
                'dd': '%d',
                'yy': '%y'    # 'yy' should be replaced after 'yyyy' has been replaced
            }
            for key, value in format_translation.items():
                input_format = input_format.replace(key, value)
            return input_format
    # lossDate= request.POST.get('LOSS_DATE')
    # date_format = request.POST.get('date_format')
    



    # Process the DataFrame based on the user's input
    if request.method == 'POST':
        print("check1")
        # Determine if 'Yes' was selected for claimId availability
        selected_values = request.session.get('selectedValues', [])
        claimId_available = request.POST.get('claimId_available') == 'yes'
        if selected_values is not None and selected_values:
            uid_column = 'UID'
            merged_df['UID'] = merged_df[selected_values].astype(str).agg('-'.join, axis=1)
            print("Selected values",selected_values)
            print("column is",merged_df[uid_column])
        else:
            uid_column = request.POST.get('UID')
            print("UID COL IS",uid_column)

        # uid_column = request.POST.get('UID')
        loss_date_column = request.POST.get('LOSS_DATE')
        claim_type_column = request.POST.get('CLAIM_TYPE')
        claim_amount_column = request.POST.get('CLAIM_AMOUNT')
        AsAtDate = request.POST.get('AsAtDate')


        AsAtDate_Selection = request.POST.get('showSection')
        print("AsAtDate selection is",AsAtDate)
        selected_value = request.POST.get('selected_value') #selected value for AsAtDate
        print("column is ",column)  #Selected column for AsAtDate
        print("Value is",selected_value)

        if AsAtDate_Selection =='yes':
            merged_df = merged_df[merged_df[column] == selected_value]
            print("After AsAtDate removal is",merged_df[column])
            
        
        print("2 merged", merged_df.head())
        received_format = request.POST.get('date_format')
        
        null_lossDate1 = merged_df[loss_date_column].isnull()
        null_entries_df1 = merged_df[null_lossDate1]
        null_lossDate_df1 = pd.DataFrame(null_entries_df1)
        
        
        translated_format = translate_format(received_format)
        print("Translated format:", translated_format)
        merged_df[loss_date_column] = merged_df[loss_date_column].apply(lambda x: try_parsing_date(x))
        
        print("3 merged", merged_df.head())

        # merged_df[payment_column] = pd.to_datetime(
        #     merged_df[payment_column],format=translated_format, errors='coerce'
        #     )



        print("4 merged", merged_df.head())
        print("*****************************")

        print("**********************")
        print('TOTAL DF',merged_df.head())
        print("UID COMING",uid_column)

        print("UID",merged_df['UID'].head())
        print("LossDate",merged_df[loss_date_column].head())
        print("ClaimType",merged_df[claim_type_column].head())


        

        claim_id_column = request.POST.get('CLAIMID')
        if claimId_available:
            No_Claim_df = merged_df.groupby([uid_column,claim_id_column,claim_type_column,loss_date_column]).agg({
                claim_amount_column: 'sum',
            }).reset_index()
            No_Claim_df.columns = ['UID','ClaimId', 'ClaimType', 'LossDate', 'OsPaid']
        else:
            No_Claim_df= merged_df.groupby(['UID', loss_date_column,claim_type_column])[claim_amount_column].sum().reset_index()
            No_Claim_df.columns = ['UID', 'LossDate','ClaimType', 'OsPaid']
        
        df_display=No_Claim_df


        print("converted claims data",No_Claim_df)


        print("This is not_claim_df",No_Claim_df.head())
        print("check4")

        claims_data = request.session.get('No_Claim_df')

        if claims_data:
            if claimId_available:
                claims_data = pd.read_json(claims_data, orient='split')

                print("check 88")
                print("**************************************")
                print(claims_data.head())
                print("**************************************")
                claims_data.columns = ['UID','ClaimId', 'ClaimType', 'LossDate', 'ClaimPaid']
                claims_data['LossDate'] = pd.to_datetime(claims_data['LossDate'], unit='ms')
                print("check 90")
                print("**************************************")
                print(claims_data.head())
                print("**************************************")


                claims_data['UID'] = claims_data['UID'].astype(str)
                No_Claim_df['UID'] = No_Claim_df['UID'].astype(str)
                claims_data['ClaimId'] = claims_data['ClaimId'].astype(str)
                No_Claim_df['ClaimId'] = No_Claim_df['ClaimId'].astype(str)
                claims_data['LossDate'] = claims_data['LossDate'].astype(str)
                No_Claim_df['LossDate'] = No_Claim_df['LossDate'].astype(str)
                claims_data['ClaimType'] = claims_data['ClaimType'].astype(str)
                No_Claim_df['ClaimType'] = No_Claim_df['ClaimType'].astype(str)
            else:
                claims_data = pd.read_json(claims_data, orient='split')
                claims_data.columns = ['UID', 'LossDate','ClaimType', 'ClaimPaid']
                claims_data['UID'] = claims_data['UID'].astype(str)
                No_Claim_df['UID'] = No_Claim_df['UID'].astype(str)
                claims_data['LossDate'] = claims_data['LossDate'].astype(str)
                No_Claim_df['LossDate'] = No_Claim_df['LossDate'].astype(str)
                claims_data['ClaimType'] = claims_data['ClaimType'].astype(str)
                No_Claim_df['ClaimType'] = No_Claim_df['ClaimType'].astype(str)
        else:
            claims_data=pd.DataFrame()        

            print("Both data")
            print(claims_data)
            print(No_Claim_df)

        if not claims_data.empty:
            print("data of claims ",claims_data.head())
            print("data of Os ",No_Claim_df.head())
            #outter = pd.merge(claims_data, No_Claim_df, on=['UID','LossDate','ClaimType'], how='outer')
            # if not 'ClaimType' in claims_data:
            if 'ClaimType' in claims_data.columns:
                if claimId_available:
                    outter = pd.merge(claims_data, No_Claim_df, on=['UID','ClaimId', 'ClaimType', 'LossDate'], how='outer')
                else:
                    outter = pd.merge(claims_data, No_Claim_df, on=['UID', 'LossDate', 'ClaimType'], how='outer')
            else:
                No_Claim_df2= No_Claim_df.groupby(['UID', loss_date_column])[claim_amount_column].sum().reset_index()
                outter = pd.merge(claims_data, No_Claim_df2, on=['UID', 'LossDate'], how='outer')

            print("Outter data is ",outter.head())
            outter['ClaimPaid'].fillna(0, inplace=True)
            outter['OsPaid'].fillna(0, inplace=True)
            outter['ClaimPaid'] = pd.to_numeric(outter['ClaimPaid'], errors='coerce')
            outter['OsPaid'] = pd.to_numeric(outter['OsPaid'], errors='coerce')
            outter['ReportedClaim'] = outter['ClaimPaid'] + outter['OsPaid']
            negative_reported_claims = outter[outter['ReportedClaim'] <= 0]
            print("Reported Claims data is ",outter.head())
        else:
            outter=pd.DataFrame()
        
        
        outter1 = outter.copy() 
        check_df = outter1
        if not claims_data.empty:
            check_df['LossDate'] = check_df['LossDate'].astype(str)
            #check_df['LossDate'] = check_df['LossDate'].dt.strftime(translated_format)
        claim_amounts_df = No_Claim_df.copy()

        # Convert the ClaimAmount_SUM column to numeric, setting errors to NaN
        claim_amounts_df['OsPaid'] = pd.to_numeric(claim_amounts_df['OsPaid'], errors='coerce')
        nan_count_before = claim_amounts_df['OsPaid'].isna().sum()

        # Drop NaN values that resulted from the conversion
        claim_amounts_df.dropna(inplace=True)

        os_entries1=nan_count_before

        # Filter the DataFrame to only contain negative values
        negative_claims_df = claim_amounts_df[claim_amounts_df['OsPaid'] < 0]

        # Count the number of negative ClaimAmount_SUM entries
        negative_claims_count = negative_claims_df['OsPaid'].count()

        print(f"The count of negative OsPaid entries is: {negative_claims_count}")

        
        # try:
        #         original_count = len(merged_df)
        #         print(original_count)
        #         print(outter.shape)
        #         outter['LossDate'] = pd.to_datetime(
        #             outter['LossDate'],
        #             format=translated_format, errors='coerce'
        #             #errors='coerce'  # This will replace non-parsable dates with NaT
        #         )
        #         # Drop rows where dates could not be parsed
        #         print("Check end entries before",outter['LossDate'])
        #         #merged_df.dropna(subset=[loss_date_column], inplace=True)
        #         final_count = len(outter)
        #         entries2 = original_count - final_count
        #         # print("Enrties removed due to Date conversion of edorsement ",entries2)
        #         print(f"Total entries: {original_count}")
        #         # print(f"Removed entries: {removed_count}")
        #         print(f"Total after removal: {final_count}")
        #         print("Check end entries After",outter['LossDate'])
        # except ValueError as e:
        #     print(f"There was an issue with the date format: {e}")

        outter['LossDate'] = outter['LossDate'].apply(lambda x: try_parsing_date(x))
        original_count = len(merged_df)
        print(original_count)
        final_count = len(outter)
        entries2 = original_count - final_count
        # print("Enrties removed due to Date conversion of edorsement ",entries2)
        print(f"Total entries: {original_count}")
        # print(f"Removed entries: {removed_count}")
        print(f"Total after removal: {final_count}")
        print("Check end entries After",outter['LossDate'])


        
        outter['Period'] = outter['LossDate'].dt.year
        outter['claim_count'] = 1



        # No_Claim_df['period'] = No_Claim_df['LossDate'].dt.year
        # Assuming 'No_Claim_df' is your DataFrame
        print("No_Claim",No_Claim_df.head())
        # Store the DataFrame in the session for download




        # Group by 'UID' and 'Period', and then calculate the sum of 'claim_count' and 'ClaimAmount_SUM'
        # grouped_df = outter.groupby(['UID', 'Period']).agg({
        #     'claim_count': 'sum',
        #     'OSPaid': 'sum'
        # }).reset_index()

        # print("this is grouped_df",grouped_df.head())

        
        
        
        consol = outter
        # No_Claim_df.drop(['Period', 'claim_count'], axis=1, inplace=True)
        outter['LossDate'] = outter['LossDate'].dt.strftime(translated_format)

        # The grouped_df DataFrame now contains the sum of 'claim_count' and 'ClaimAmount_SUM' for each combination of 'UID' and 'Period'


        #new_df_json = request.session.get('new_df')
        new_df_json = request.session.get('mapping_data_final')
        if not new_df_json:  # Check if new_df_json is empty
            new_df_json = request.session.get('new_df')


        # print("new DF is 0",new_df.head())
        
        if new_df_json:
            new_df = pd.read_json(new_df_json, orient='split')
            print("New DF head",new_df.head())
            print("Consol DF head",consol.head())
            consol['UID'] = consol['UID'].astype(str)
            consol['Period'] = consol['Period'].astype(str)
            new_df['UID'] = new_df['UID'].astype(str)
            new_df['Period'] = new_df['Period'].astype(str)
        else:
            # Handle the case where new_df is not in the session
            new_df = pd.DataFrame()  # or handle this scenario appropriately




        if not new_df.empty:
            result_df = new_df.merge(consol, on=['UID', 'Period'], how='left') #new_df is premium and consol is claims
            result_df[consol.columns] = result_df[consol.columns].fillna(0)
        else:
            result_df = pd.DataFrame()  # or handle this scenario appropriately



        # Replace 'Column1', 'Column2', etc. with actual column names from new_df
        # result_df = result_df.dropna(subset=['UID', 'Period'])
        # print("Successful matches:\n", result_df)
            

        def convert_to_date(value):
            try:
                numeric_value = pd.to_numeric(value)
                return pd.to_datetime(numeric_value, unit='ms').strftime('%d-%m-%Y')
            except ValueError:
                return 'Invalid Date'

    # Assuming `check_df` is your DataFrame with the 'lossdate' column
        



    # Store the DataFrame in the session for download
        if not claims_data.empty:
            request.session['check_df'] = check_df.to_json(orient='split')
            # check_df['LossDate'] = pd.to_datetime(
            # check_df['LossDate'],format=translated_format, errors='coerce'
            # )
            # check_df['LossDate'] = check_df['LossDate'].dt.strftime(translated_format)
            #check_df['LossDate'] = check_df['LossDate'].apply(convert_to_date)
        request.session['result_df'] = result_df.to_json(orient='split')
        consol = result_df.head().to_html(classes='dataframe', index=False, escape=False)
        request.session['download_df'] = df_display.to_json(orient='split')

        #grouped_df_html = grouped_df.to_html(classes='dataframe', index=False, escape=False)



        result_df_html = result_df.to_html(classes='dataframe', index=False, escape=False)
        check_df1 = pd.read_json(request.session.get('check_df', '{}'), orient='split')
        result_df1 = pd.read_json(request.session.get('result_df', '{}'), orient='split')

        request.session['null_lossDate_df1'] = null_lossDate_df1.to_json(orient='split')
        request.session['negative_reported_claims'] = negative_reported_claims.to_json(orient='split')

        null_lossDate_df1_len = len(null_lossDate_df1)
        negative_reported_claims_len = len(negative_reported_claims)



        results = {
            'df_html': df_display.head().to_html(classes='dataframe', index=False, escape=False),
            'grouped_df_html': check_df.head().to_html(classes='dataframe', index=False, escape=False),
            'new_df_html': consol,
            'download_ready': True,
            # 'check_result': check_df1.to_dict('records') if isinstance(check_df1, pd.DataFrame) else check_df1,  # Convert DataFrame to list of dicts
            # 'result_result': result_df1.to_dict('records') if isinstance(result_df1, pd.DataFrame) else result_df1,  # Convert DataFrame to list of dicts
            #'BlankPDate':null_payment.head().to_html(classes='dataframe', index=False, escape=False),
            'null_lossDate_df1': null_lossDate_df1.head().to_html(classes='dataframe', index=False, escape=False),
            'negative_reported_claims':negative_reported_claims.head().to_html(classes='dataframe', index=False, escape=False),
            'null_lossDate_df1_len':null_lossDate_df1_len,
            'negative_reported_claims_len':negative_reported_claims_len,
        }
        
        serializable_results = convert_numpy(results)

        # Store the serializable results in the session
        store_results_in_session(request, 'os_results', serializable_results)

        # Redirect to the display view
        return redirect('display_os_results')

        #return render(request, 'myapp/osData.html', context)
    else:
        # Handle the case where method is not POST
        # For example, redirect to the home page or show an error message
        return HttpResponse("Invalid request.", status=400)


def display_os_results(request):
    results = get_results_from_session(request, 'os_results')
    if results is None:
        return HttpResponse("No results available. Please go through the processing step first.")
    return render(request, 'myapp/osData.html', results)




def claims_file_upload(request):
    message = ""
    columns = []
    is_merged = False
    file_details = []  # List to store file names and number of entries
    total_entries = 0  # Variable to store the total number of entries across all files

    if request.method == 'POST':
        form = CSVFileUploadForm1(request.POST, request.FILES)
        if form.is_valid():
            files = request.FILES.getlist('files')
            dataframes = []
            
            for file in files:
                df = pd.read_csv(file)
                num_entries = len(df)
                total_entries += num_entries  # Add to total entries
                file_details.append({'name': file.name, 'entries': num_entries})
                dataframes.append(df)

            merged_df = pd.concat(dataframes)
            # Convert the merged DataFrame to JSON and store it in the session
            request.session['merged_df'] = merged_df.to_json(orient='split')
            message = "Files Merged Successfully"
            is_merged = True
            # Store the column names in the session
            request.session['merged_columns'] = merged_df.columns.tolist()
            columns = merged_df.columns.tolist()

    else:
        form = CSVFileUploadForm1()

    return render(request, 'myapp/claims.html', {
        'form': form,
        'message': message,
        'is_merged': is_merged,
        'columns': columns,
        'file_details': file_details,  # Pass the file details to the template
        'total_entries': total_entries  # Pass the total entries to the template
    })




def process_claims_data(request):
    print("hello world")
    # Check if the 'merged_df' key exists in the session
    selected_values = request.session.get('selectedValues', [])
    print("Selected values are",selected_values)
    if 'merged_df' in request.session:
        # Retrieve the merged DataFrame from the session
        merged_df_json = request.session['merged_df']
        merged_df = pd.read_json(merged_df_json, orient='split')
        print("Here is merged", merged_df.head())
    else:
        return HttpResponse("Merged DataFrame not found in session.", status=400)
    

    print("aelected**********",selected_values)

    def translate_format(input_format):
            format_translation = {
                'yyyy': '%Y',  # Replace 'yyyy' first to prevent conflict with 'yy'
                'mm': '%m',
                'dd': '%d',
                'yy': '%y'    # 'yy' should be replaced after 'yyyy' has been replaced
            }
            for key, value in format_translation.items():
                input_format = input_format.replace(key, value)
            return input_format
    # lossDate= request.POST.get('LOSS_DATE')
    # date_format = request.POST.get('date_format')
    def try_parsing_date(date_str):
            """
            Attempts to parse the date string using multiple formats.
            Returns the date if parsing is successful; otherwise, returns pd.NaT.
            """
            for fmt in ('%d-%m-%Y', '%d/%m/%Y','%Y-%m-%d %H:%M:%S'):  # Add or modify formats as needed
                try:
                    return pd.to_datetime(date_str, format=fmt)
                except (ValueError, TypeError):
                    continue
            return pd.NaT  # Return NaT if all parsing attempts fail
    



    # Process the DataFrame based on the user's input
    if request.method == 'POST':
        print("check1")
        # Determine if 'Yes' was selected for claimId availability
        total_entries=len(merged_df)
        claimId_available = request.POST.get('claimId_available') == 'yes'
        if selected_values is not None and selected_values:
            uid_column = 'UID'
            merged_df['UID'] = merged_df[selected_values].astype(str).agg('-'.join, axis=1)
            print("Selected values",selected_values)
            print("column is",merged_df[uid_column])
        else:
            uid_column = request.POST.get('UID')
            print("UID COL IS",uid_column)

        #uid_column = request.POST.get('UID')
        loss_date_column = request.POST.get('LOSS_DATE')
        claim_type_column = request.POST.get('CLAIM_TYPE')
        claim_amount_column = request.POST.get('CLAIM_AMOUNT')
        # print("Initial")
        # print("UID",uid_column)
        # print("Loss Date",loss_date_column)
        # print("Claim Type",claim_type_column)
        # print("Claim Amount",claim_amount_column)
        

        
        if claimId_available:
            print("check2")
            claim_id_column1 = request.POST.get('CLAIMID')
            # Group by ClaimId and ClaimType
            if selected_values is not None and selected_values:
                uid_column1 = 'UID'
                merged_df['UID'] = merged_df[selected_values].astype(str).agg('-'.join, axis=1)
                print("Selected values",selected_values)
                print("column is",merged_df['UID'])
            else:
                uid_column1 = request.POST.get('UID1')

                print("UID COL IS",uid_column1)
        

            #uid_column1 = request.POST.get('UID1')
            lossDate = request.POST.get('LOSS_DATE1')
            claim_type_column1 = request.POST.get('CLAIM_TYPE1')
            claim_amount_column1 = request.POST.get('CLAIM_AMOUNT1')


            received_format = request.POST.get('date_format')
            translated_format = translate_format(received_format)
            print("Translated format:", translated_format)
            print("Before")
            print("LossDate column is ",merged_df[lossDate].head()) 
            merged_df[lossDate] = merged_df[lossDate].apply(lambda x: try_parsing_date(x))

            print("Data After Translating Formatting",merged_df.head())


            null_lossDate = merged_df[lossDate].isnull()
            null_entries_df = merged_df[null_lossDate]
            null_lossDate_df = pd.DataFrame(null_entries_df) 
            print("After")
            print("LossDate column is ",merged_df[lossDate].head())   

            one_to_one = merged_df.groupby(claim_id_column1).filter(lambda x: x[lossDate].nunique() > 1)
            
    

            print("1 merged", merged_df.head())
            
            print("type is ",merged_df[lossDate].dtype)

            # Claim_df = merged_df.groupby([claim_id_column1, claim_type_column1]).agg({
            #     uid_column1: 'last',
            #     lossDate: 'max',
            #     claim_amount_column1: 'sum',
            # }).reset_index()
            print("UID column is ",merged_df[uid_column1].head())
            print("Claim ID column is ",merged_df[claim_id_column1].head())
            print("Claim Type column is ",merged_df[claim_type_column1].head())
            print("LossDate column is ",merged_df[lossDate].head())

            

            Claim_df = merged_df.groupby([uid_column1,claim_id_column1,claim_type_column1,lossDate]).agg({
                claim_amount_column1: 'sum',
            }).reset_index()

            Claim_df.columns = ['UID','ClaimId', 'ClaimType', 'LossDate', 'ClaimPaid']

            print("Data in the Claim_df ", Claim_df.head())
            #Claim_df['LossDate'] = Claim_df['LossDate'].dt.strftime(translated_format)

            request.session['No_Claim_df'] = Claim_df.to_json(orient='split')

            claim_amounts_df = Claim_df.copy()

            # Convert the ClaimAmount_SUM column to numeric, setting errors to NaN
            claim_amounts_df['ClaimPaid'] = pd.to_numeric(claim_amounts_df['ClaimPaid'], errors='coerce')

            nan_count_before = claim_amounts_df['ClaimPaid'].isna().sum()

            # Drop NaN values that resulted from the conversion
            claim_amounts_df.dropna(subset=['ClaimPaid'], inplace=True)
            
            claim_entries1=nan_count_before

            total_after = claim_amounts_df.shape[0]


            # Filter the DataFrame to only contain negative values in 'ClaimPaid'
            negative_claims_df = claim_amounts_df[claim_amounts_df['ClaimPaid'] < 0]

            # Count the number of negative ClaimAmount_SUM entries
            negative_claims_count = negative_claims_df['ClaimPaid'].count()

            print(f"The count of negative ClaimAmount_SUM entries is: {negative_claims_count}")

            Claim_df['LossDate'] = Claim_df['LossDate'].apply(lambda x: try_parsing_date(x))
            
            
            # try:
            #         original_count = len(merged_df)
            #         print(original_count)
            #         print(Claim_df.shape)
            #         Claim_df['LossDate'] = pd.to_datetime(
            #             Claim_df['LossDate'],
            #             format=translated_format, errors='coerce'
            #             #errors='coerce'  # This will replace non-parsable dates with NaT
            #         )
            #         # Drop rows where dates could not be parsed
            #         print("Check end entries before",Claim_df['LossDate'])
            #         #merged_df.dropna(subset=[loss_date_column], inplace=True)
            #         final_count = len(Claim_df)
            #         entries2 = original_count - final_count
            #         # print("Enrties removed due to Date conversion of edorsement ",entries2)
            #         print(f"Total entries: {original_count}")
            #         # print(f"Removed entries: {removed_count}")
            #         print(f"Total after removal: {final_count}")
            #         print("Check end entries After",Claim_df['LossDate'])
            # except ValueError as e:
            #     print(f"There was an issue with the date format: {e}")


            #df_display = Claim_df
            original_count = len(merged_df)
            print(original_count)
            final_count = len(Claim_df)
            entries2 = original_count - final_count
            # print("Enrties removed due to Date conversion of edorsement ",entries2)
            print(f"Total entries: {original_count}")
            # print(f"Removed entries: {removed_count}")
            print(f"Total after removal: {final_count}")
            print("Check end entries After",Claim_df['LossDate'])


            
            Claim_df['Period'] = Claim_df['LossDate'].dt.year
            Claim_df['claim_count'] = 1
            #Claim_df['period'] = No_Claim_df['LossDate'].dt.year


            

            print("*****************************")
            print("DF_DISPLAYYYYYYYYYY",Claim_df.head())
            print("*****************************")



            print("check 4")
            # Store the DataFrame in the session for download


            print("NaN in UID:", Claim_df['UID'].isna().sum())
            print("NaN in Period:", Claim_df['Period'].isna().sum())
            Claim_df['UID'] = Claim_df['UID'].astype(str)
            Claim_df['Period'] = Claim_df['Period'].astype(str)

            check_df = Claim_df.groupby(['UID', 'Period']).agg({
                'claim_count': 'sum',
                'ClaimPaid': 'sum'
            }).reset_index()

            consol = check_df

            print("*********************************")
            print("CHECK DF IS ",check_df.head())
            print("*************************************")



            Claim_df.drop(['Period', 'claim_count'], axis=1, inplace=True)
            #No_Claim_df['LossDate'] = No_Claim_df['LossDate'].dt.strftime(translated_format)
            Claim_df['LossDate'] = Claim_df['LossDate'].dt.strftime(translated_format)
            df_display = Claim_df
            request.session['download_df'] = df_display.to_json(orient='split')




            new_df_json = request.session.get('new_df')
            if new_df_json:
                new_df = pd.read_json(new_df_json, orient='split')
                new_df['UID'] = new_df['UID'].astype(str)
                new_df['Period'] = new_df['Period'].astype(str)
            else:
                # Handle the case where new_df is not in the session
                new_df = pd.DataFrame()  # or handle this scenario appropriately
            
            consol['UID'] = consol['UID'].astype(str)
            consol['Period'] = consol['Period'].astype(str)
            

            if not new_df.empty:
                result_df = new_df.merge(consol, on=['UID', 'Period'], how='left') #new_df is premium and consol is claims
                result_df[consol.columns] = result_df[consol.columns].fillna(0)
            else:
                result_df = consol  # or handle this scenario appropriately

            

            #request.session['download_df'] = Claim_df.to_json(orient='split')
            request.session['check_df'] = check_df.to_json(orient='split')
            request.session['result_df'] = result_df.to_json(orient='split')
            request.session['consol_df'] = result_df.to_json(orient='split')
            check_df = check_df.head().to_html(classes='dataframe', index=False, escape=False)
            consol = result_df.head().to_html(classes='dataframe', index=False, escape=False)
        else:
            one_to_one = pd.DataFrame()
            identifier_format = request.POST.get('identifier_format')
            print("*******************",identifier_format)
            print("2 merged", merged_df.head())
            received_format = request.POST.get('date_format')
            translated_format = translate_format(received_format)
            print("Translated format:", translated_format)
            merged_df[loss_date_column] = merged_df[loss_date_column].apply(lambda x: try_parsing_date(x))
            print("3 merged", merged_df.head())

            # merged_df[payment_column] = pd.to_datetime(
            #     merged_df[payment_column],format=translated_format, errors='coerce'
            #          )
            print("4 merged", merged_df.head())
            print("*****************************")
            print("**********************")
            print('TOTAL DF',merged_df.head())
            print("UID COMING",uid_column)

            print("UID",merged_df['UID'].head())
            print("LossDate",merged_df[loss_date_column].head())
            print("ClaimType",merged_df[claim_type_column].head())

            null_lossDate_count = merged_df[loss_date_column].isnull().sum()
            null_lossDate = merged_df[loss_date_column].isnull()
            null_entries_df = merged_df[null_lossDate]
            null_lossDate_df = pd.DataFrame(null_entries_df)


            negative_claims_df = merged_df[merged_df[claim_amount_column] < 0]



            if identifier_format == 'ULC':
                No_Claim_df= merged_df.groupby(['UID', loss_date_column,claim_type_column])[claim_amount_column].sum().reset_index()
                No_Claim_df.columns = ['UID', 'LossDate','ClaimType', 'ClaimPaid']
            else:
                No_Claim_df= merged_df.groupby(['UID', loss_date_column])[claim_amount_column].sum().reset_index()
                No_Claim_df.columns = ['UID', 'LossDate', 'ClaimPaid']


            request.session['No_Claim_df'] = No_Claim_df.to_json(orient='split')
            
            print("This is not_claim_df",No_Claim_df.head())
            print("check4")

            
            
            claim_amounts_df = No_Claim_df.copy()

            # Convert the ClaimAmount_SUM column to numeric, setting errors to NaN
            claim_amounts_df['ClaimPaid'] = pd.to_numeric(claim_amounts_df['ClaimPaid'], errors='coerce')

            # Drop NaN values that resulted from the conversion
            claim_amounts_df.dropna(inplace=True)

            nan_count_before = claim_amounts_df['ClaimPaid'].isna().sum()

            # Drop NaN values that resulted from the conversion
            claim_amounts_df.dropna(subset=['ClaimPaid'], inplace=True)
            
            claim_entries1=nan_count_before

            total_after = claim_amounts_df.shape[0]

            # Filter the DataFrame to only contain negative values
           

            # Count the number of negative ClaimAmount_SUM entries
            negative_claims_count = negative_claims_df['ClaimPaid'].count()

            print(f"The count of negative ClaimPaid entries is: {negative_claims_count}")
            No_Claim_df['LossDate'] = No_Claim_df['LossDate'].apply(lambda x: try_parsing_date(x))
            
            # try:
            #         original_count = len(merged_df)
            #         print(original_count)
            #         print(No_Claim_df.shape)
            #         No_Claim_df['LossDate'] = pd.to_datetime(
            #             No_Claim_df['LossDate'],
            #             format=translated_format, errors='coerce'
            #             #errors='coerce'  # This will replace non-parsable dates with NaT
            #         )
            #         # Drop rows where dates could not be parsed
            #         print("Check end entries before",No_Claim_df['LossDate'])
            #         #merged_df.dropna(subset=[loss_date_column], inplace=True)
            #         final_count = len(No_Claim_df)
            #         entries2 = original_count - final_count
            #         # print("Enrties removed due to Date conversion of edorsement ",entries2)
            #         print(f"Total entries: {original_count}")
            #         # print(f"Removed entries: {removed_count}")
            #         print(f"Total after removal: {final_count}")
            #         print("Check end entries After",No_Claim_df['LossDate'])
            # except ValueError as e:
            #     print(f"There was an issue with the date format: {e}")
            original_count = len(merged_df)
            print(original_count)
            final_count = len(No_Claim_df)
            entries2 = original_count - final_count
            # print("Enrties removed due to Date conversion of edorsement ",entries2)
            print(f"Total entries: {original_count}")
            # print(f"Removed entries: {removed_count}")
            print(f"Total after removal: {final_count}")
            print("Check end entries After",No_Claim_df['LossDate'])

            No_Claim_df['Period'] = No_Claim_df['LossDate'].dt.year
            No_Claim_df['claim_count'] = 1

            # No_Claim_df['period'] = No_Claim_df['LossDate'].dt.year
            # Assuming 'No_Claim_df' is your DataFrame

            print("No_Claim",No_Claim_df.head())
            
            # Store the DataFrame in the session for download


            # Group by 'UID' and 'Period', and then calculate the sum of 'claim_count' and 'ClaimAmount_SUM'
            grouped_df = No_Claim_df.groupby(['UID', 'Period']).agg({
                'claim_count': 'sum',
                'ClaimPaid': 'sum'
            }).reset_index()

            print("this is grouped_df",grouped_df.head())

            check_df = grouped_df
            consol = grouped_df
            No_Claim_df.drop(['Period', 'claim_count'], axis=1, inplace=True)
            No_Claim_df['LossDate'] = No_Claim_df['LossDate'].dt.strftime(translated_format)
            df_display = No_Claim_df

            # The grouped_df DataFrame now contains the sum of 'claim_count' and 'ClaimAmount_SUM' for each combination of 'UID' and 'Period'


            new_df_json = request.session.get('new_df')
            # print("new DF is 0",new_df.head())
           
            if new_df_json:
                new_df = pd.read_json(new_df_json, orient='split')
                consol['UID'] = consol['UID'].astype(str)
                consol['Period'] = consol['Period'].astype(str)
                new_df['UID'] = new_df['UID'].astype(str)
                new_df['Period'] = new_df['Period'].astype(str)
            else:
                # Handle the case where new_df is not in the session
                new_df = pd.DataFrame()  # or handle this scenario appropriately



            if not new_df.empty:
                result_df = new_df.merge(consol, on=['UID', 'Period'], how='left') #new_df is premium and consol is claims
                result_df[consol.columns] = result_df[consol.columns].fillna(0)
            else:
                result_df = pd.DataFrame()  # or handle this scenario appropriately
                print("result df s")



            # Replace 'Column1', 'Column2', etc. with actual column names from new_df
            # result_df = result_df.dropna(subset=['UID', 'Period'])
            # print("Successful matches:\n", result_df)



        # Store the DataFrame in the session for download
            request.session['check_df'] = check_df.to_json(orient='split')
            request.session['result_df'] = result_df.to_json(orient='split')
            request.session['NullLossDate'] = null_lossDate_df.to_json(orient='split')

            check_df = check_df.head().to_html(classes='dataframe', index=False, escape=False)
            consol = result_df.head().to_html(classes='dataframe', index=False, escape=False)
            request.session['download_df'] = df_display.to_json(orient='split')
            #grouped_df_html = grouped_df.to_html(classes='dataframe', index=False, escape=False)



        result_df_html = result_df.to_html(classes='dataframe', index=False, escape=False)
        check_df1 = pd.read_json(request.session.get('check_df', '{}'), orient='split')
        result_df1 = pd.read_json(request.session.get('result_df', '{}'), orient='split')


        # results = {
        #     'df_html': df_display.head().to_html(classes='dataframe', index=False, escape=False),
        #     'grouped_df_html': check_df,
        #     'new_df_html': consol,
        #     'download_ready': True,
        #     'check_result':check_df1,
        #     'result_result':result_df1,
            
        # }

        NullLossDate_len = len(null_lossDate_df)
        negative_claim_entries_len = len(negative_claims_df)

        one_to_one_len = len(one_to_one)

        request.session['NullLossDate'] = null_lossDate_df.to_json(orient='split')
        request.session['negative_claim_entries'] = negative_claims_df.to_json(orient='split')
        request.session['one_to_one'] = one_to_one.to_json(orient='split')




        results = {
        'df_html': df_display.head().to_html(classes='dataframe', index=False, escape=False),
        'grouped_df_html': check_df.to_html(classes='dataframe', index=False, escape=False) if isinstance(check_df, pd.DataFrame) else check_df,  # Assuming check_df is a DataFrame
        'new_df_html': consol.to_html(classes='dataframe', index=False, escape=False) if isinstance(consol, pd.DataFrame) else consol,  # Assuming consol is a DataFrame
        'download_ready': True,
        # 'check_result': check_df1.to_dict('records') if isinstance(check_df1, pd.DataFrame) else check_df1,  # Convert DataFrame to list of dicts
        # 'result_result': result_df1.to_dict('records') if isinstance(result_df1, pd.DataFrame) else result_df1,  # Convert DataFrame to list of dicts
        'NullLossDate': null_lossDate_df.head().to_html(classes='dataframe', index=False, escape=False),
        'negative_claim_entries':negative_claims_df.head().to_html(classes='dataframe', index=False, escape=False),
        'one_to_one':one_to_one.head().to_html(classes='dataframe', index=False, escape=False),
        #'BlankPDate':null_payment.head().to_html(classes='dataframe', index=False, escape=False),
        'NaN_Claim':claim_entries1,
        'Total_Entries1':total_entries,
        'Total_after1':total_after,
        'NullLossDate_len':NullLossDate_len,
        'negative_claim_entries_len':negative_claim_entries_len,
        'one_to_one_len':one_to_one_len,

    }
        serializable_results = convert_numpy(results)

        # Store the serializable results in the session
        store_results_in_session(request, 'claims_results', serializable_results)

        # Redirect to the display view
        
            # Redirect to the display view
        return redirect('display_claims_results')
        # Render the template with the context

        #return render(request, 'myapp/claimData.html', context)
    else:
        # Handle the case where method is not POST
        # For example, redirect to the home page or show an error message
        return HttpResponse("Invalid request.", status=400)
    



def display_claims_results(request):
    results = get_results_from_session(request, 'claims_results')
    if results is None:
        return HttpResponse("No results available. Please go through the processing step first.")
    return render(request, 'myapp/claimData.html', results)




def display_results(request):
    results = get_results_from_session(request, 'premium_results')
    if results is None:
        return HttpResponse("No results available. Please go through the processing step first.")
    
    return render(request, 'myapp/navbar.html', results)


import tempfile
import os

import sys
import pandas as pd
from django.shortcuts import render
# Assuming CSVFileUploadForm is defined elsewhere in your code

from django.shortcuts import render
from .forms import CSVFileUploadForm
import pandas as pd

import pyarrow.csv as pc
import pandas as pd
from django.shortcuts import render
from .forms import CSVFileUploadForm  # Ensure this is correctly imported from your forms module
from .models import MergedDataModel2



def flush_session(request):
    request.session.flush()
    return JsonResponse({'message': 'Session has been flushed successfully!'})

def merge_and_display(request):
    print("*****************Loading Data*******************")
    message = ""
    columns = []
    file_details = []  
    total_entries = 0 
    is_merged = False
    dataframes = []

    if request.method == 'POST':
        form = CSVFileUploadForm1(request.POST, request.FILES)
        if form.is_valid():
            files = request.FILES.getlist('files')
            dataframes = []

            for file in files:
                df = pd.read_csv(file)
                num_entries = len(df)
                total_entries += num_entries  # Add to total entries
                file_details.append({'name': file.name, 'entries': num_entries})
                dataframes.append(df)

            merged_df = pd.concat(dataframes)
            # Convert the merged DataFrame to JSON and store it in the session
            request.session['merged_df'] = merged_df.to_json(orient='split')
            message = "Files Merged Successfully"
            is_merged = True
            # Store the column names in the session
            request.session['merged_columns'] = merged_df.columns.tolist()
            columns = merged_df.columns.tolist()

            # Save the merged DataFrame to the database
            json_data = merged_df.to_json(orient='split')
            MergedDataModel2.objects.create(
                file_name='merged_file',  # Use a suitable file name or identifier
                file_path='csv_files/merged_file.csv',  # Optional: store the path if needed
                data=json_data
            )

    else:
        form = CSVFileUploadForm1()

    return render(request, 'myapp/upload.html', {
        'form': form,
        'message': message,
        'is_merged': is_merged,
        'columns': columns,
        'file_details': file_details,
        'total_entries': total_entries,
    })



def fetch_unique_dates(request):
    # Attempt to load the DataFrame from the session
    merged_df = pd.read_json(request.session.get('merged_df', '{}'), orient='split')
    print(merged_df.head())
    
    # Get the column name from the request
    column = request.GET.get('column', '') #Column is name of cancellation column
    if column:
        try:
            # Ensure the column exists in the DataFrame to avoid KeyError
            if column in merged_df.columns:
                unique_values = sorted(merged_df[column].dropna().unique().tolist())
                return JsonResponse({'unique_values': unique_values})
            else:
                return JsonResponse({'error': f'Column {column} not found in DataFrame'}, status=404)
        except Exception as e:
            # Log the error or print to the console
            print(f"Error processing column {column}: {e}")
            return JsonResponse({'error': 'Internal Server Error'}, status=500)
    else:
        return JsonResponse({'error': 'No column specified'}, status=400)


def select_column_view(request):
    merged_df = pd.read_json(request.session.get('merged_df', '{}'), orient='split')
    totalEntries= len(merged_df)
    columns = merged_df.columns.tolist()
    return render(request, 'myapp/select_column.html', {'columns': columns})


def get_columns(request):
    # Load the DataFrame from the session
    merged_df = pd.read_json(request.session.get('merged_df', '{}'), orient='split')

    # Get the column names of the DataFrame
    columns = merged_df.columns.tolist()

    # Return the column names as a JSON response
    return JsonResponse({'columns': columns})



# def get_columns(request):
#     premium_csv = pd.read_csv(request.FILES.get('premium_file'))
#     excel_file = pd.ExcelFile(request.FILES.get('excel_file'))

#     print("Premium CSV Columns:", premium_csv.columns.tolist())  # This will show the column names
#     premium_columns = premium_csv.columns.tolist()

#     excel_columns = excel_file.sheet_names

#     return JsonResponse({'premium_columns': premium_columns, 'excel_columns': excel_columns})


def get_columns1(request):
    # Load the DataFrame from the session
    merged_df = pd.read_json(request.session.get('merged_df', '{}'), orient='split')

    # Assume 'premium_columns' and 'excel_columns' are predefined or calculated
    premium_columns = [col for col in merged_df.columns if "premium" in col]  # Example condition
    excel_columns = [col for col in merged_df.columns if col not in premium_columns]

    # Return the column names as a JSON response
    return JsonResponse({'premium_columns': premium_columns, 'excel_columns': excel_columns})


def get_unique_values(request):
    global column
    # Load the dataframe from the session
    merged_df = pd.read_json(request.session.get('merged_df', '{}'), orient='split')

    column = request.GET.get('column', '')
    unique_values = merged_df[column].unique().tolist()
    request.session['unique_values'] = unique_values
    print(request.POST) 
    return JsonResponse({'unique_values': unique_values})



def get_unique_values1(request):
    column_name = request.GET.get('column')
    if not column_name:
        return JsonResponse({'error': 'Column name is required'}, status=400)

    # Assuming you have your DataFrame stored in the session or otherwise accessible
    df = pd.read_json(request.session.get('merged_df', '{}'), orient='split')
    if column_name not in df.columns:
        return JsonResponse({'error': 'Invalid column name'}, status=400)

    unique_values = df[column_name].value_counts().reset_index().values.tolist()
    data = [{'value': value[0], 'count': value[1]} for value in unique_values]

    return JsonResponse(data, safe=False)


def get_values(request):
    global column
    # Load the dataframe from the session
    merged_df = pd.read_json(request.session.get('merged_df', '{}'), orient='split')

    column = request.GET.get('column', '')
    values = merged_df[column].tolist()
    print(request.POST) 
    return JsonResponse({'values': values})

def process_data(request):
    def try_parsing_date(date_str):
            """
            Attempts to parse the date string using multiple formats.
            Returns the date if parsing is successful; otherwise, returns pd.NaT.
            """
            for fmt in ('%d-%m-%Y', '%d/%m/%Y','%Y-%m-%d %H:%M:%S'):  # Add or modify formats as needed
                try:
                    return pd.to_datetime(date_str, format=fmt)
                except (ValueError, TypeError):
                    continue
            return pd.NaT  # Return NaT if all parsing attempts fail
    
    print("entered in process")
    selected_values = request.session.get('selectedValues', [])
    print("Selected values are",selected_values)
    import pandas as pd 
    global entries1, entries2, entries3, entries4, entries5, entries6,filtered_df
    try:
        # Load the dataframe  from the session
        merged_df = pd.read_json(request.session.get('merged_df', '{}'), orient='split')

        if selected_values is not None and selected_values:
            uid_col = 'UID'
            merged_df['UID'] = merged_df[selected_values].astype(str).agg('-'.join, axis=1)
            print("Selected values",selected_values)
            print("column is",merged_df[uid_col])
            print("In condition")
        else:
            uid_col = request.POST.get('uid')
            print("UID COL IS",uid_col)

        date_format = request.POST.get('date_format')
        print("Format is",date_format)
        total_counts = len(merged_df)
        unique_values2 = request.session.get('unique_values', [])

        print("***********************************************************")

        print("Unique values are",unique_values2)
        start_dates = request.POST.getlist('start_date[]')
        end_dates = request.POST.getlist('end_date[]')

        #convert start and end date to date time
        for column_name in start_dates:
            if column_name in merged_df.columns:
                merged_df[column_name] = merged_df[column_name].apply(lambda x: try_parsing_date(x))
                print(f"After conversion, dtype of {column_name}: {merged_df[column_name].dtype}")

        print("END DATE COLUMNS BELOW")

        # Process each column listed in end_dates
        for column_name in end_dates:
            if column_name in merged_df.columns:
                merged_df[column_name] = merged_df[column_name].apply(lambda x: try_parsing_date(x))
                print(f"After conversion, dtype of {column_name}: {merged_df[column_name].dtype}")


        group_by_types = request.POST.getlist('group_by_type[]')
        # group_by_type_start_date= request.POST.getlist('group_by_type_start_date[]')
        # group_by_type_end_date= request.POST.getlist('group_by_type_end_date[]')
        print("Start Dates:", start_dates)
        print("End Dates:", end_dates)
        print("Group By Types:", group_by_types)
        print("Counts:", len(unique_values2), len(start_dates), len(end_dates), len(group_by_types))
        
        print("***************************************************************")



          # 1. Make a copy of merged_df
        

        # 2. Sort data in ascending on the basis of endorsement_date_col and uid_col
        #uid_col = request.POST.get('uid')

        print("UID COLUMN 1",uid_col)
        original_count = len(merged_df)
        null_uids_df = merged_df[merged_df[uid_col].isnull()]
        merged_df.dropna(subset=[uid_col], inplace=True)
        
        
        final_count = len(merged_df)
        entries1 = original_count - final_count
        print("UIDS Removed due to NUll are: ",entries1)


        endorsement_date_col = request.POST.get('endoresement_date')
        null_end_df = merged_df[merged_df[endorsement_date_col].isnull()]
        null_count_endor = merged_df[endorsement_date_col].isnull().sum()

        # def translate_format(input_format):
        #     format_translation = {
        #         'yyyy': '%Y',  # Replace 'yyyy' first to prevent conflict with 'yy'
        #         'mm': '%m',
        #         'dd': '%d',
        #         'yy': '%y'    # 'yy' should be replaced after 'yyyy' has been replaced
        #     }
        #     for key, value in format_translation.items():
        #         input_format = input_format.replace(key, value)
        #     return input_format

        # # Assuming 'request' is your HttpRequest object from Django
        # received_format = request.POST.get('date_format')
        # translated_format = translate_format(received_format)
        # print("Translated format:", translated_format)
        # print("Data before Conversion")
        # print(merged_df[endorsement_date_col])

        # merged_df[endorsement_date_col] = pd.to_datetime(merged_df[endorsement_date_col],format='%d-%m-%Y')
       
        # try:
        #     original_count = len(merged_df)
        #     print(original_count)
        #     print(merged_df.shape)
        #     merged_df[endorsement_date_col] = pd.to_datetime(
        #         merged_df[endorsement_date_col],
        #         format=translated_format,
        #         #errors='coerce',  # This will replace non-parsable dates with NaT
        #     )
        #     # Drop rows where dates could not be parsed
        #     print("Check end entries before",merged_df[endorsement_date_col])
        #     merged_df.dropna(subset=[endorsement_date_col], inplace=True)
        #     final_count = len(merged_df)
        #     entries2 = original_count - final_count
        #     # print("Enrties removed due to Date conversion of edorsement ",entries2)
        #     print(f"Total entries: {original_count}")
        #     # print(f"Removed entries: {removed_count}")
        #     print(f"Total after removal: {final_count}")
        #     print("Check end entries After",merged_df[endorsement_date_col])
        #     print("*******************Converted *********************")
        
        # except ValueError as e:
        #     print(f"There was an issue with the date format: {e}")

        
                

        def translate_format(input_format):
            # Improved translation to account for different separators like '-' and '/'
            format_translation = {
                'yyyy': '%Y',  # Replace 'yyyy' first to prevent conflict with 'yy'
                'yy': '%y',    # 'yy' should be replaced after 'yyyy' has been replaced
                'mm': '%m',
                'dd': '%d'
            }
            for key, value in format_translation.items():
                input_format = input_format.replace(key, value)
            # Detect and handle separators in the input format dynamically
            for sep in ['-', '/', '.', ' ']:
                if sep in input_format:
                    return input_format.replace(sep, '-')
            return input_format
        
        


        # Simulate getting the format from a request
        received_format = request.POST.get('date_format')
        translated_format = translate_format(received_format)


        # Convert dates using the translated format, handling errors
        try:
            original_count = len(merged_df)
            print("Original count:", original_count)

            # Apply 'try_parsing_date' to each value in the column
            merged_df[endorsement_date_col] = merged_df[endorsement_date_col].apply(lambda x: try_parsing_date(x))

            # Optionally, you can still drop rows where dates could not be parsed and are NaT
            # If you prefer to keep these rows, you can comment out or remove the line below
            merged_df.dropna(subset=[endorsement_date_col], inplace=True)
            
            final_count = len(merged_df)
            print("Data after Conversion Endorsement Date")
            print(merged_df[endorsement_date_col])
            print(f"Total entries: {original_count}")
            print(f"Total after removal: {final_count}")
            print("Check end entries After",merged_df[endorsement_date_col])

        except Exception as e:  # Catching a more general exception
            print(f"There was an issue with the date parsing: {e}")



        endor_datatype = merged_df[endorsement_date_col].dtype
        print("type of edorsement column ",endor_datatype)

        # merged_df[endorsement_date_col] = merged_df[endorsement_date_col].dt.strftime(date_format)

        copy_dataframe = merged_df.copy()
        


        print("UID COLUMN 2",uid_col)
        
        copy_dataframe.sort_values(by=[uid_col, endorsement_date_col], ascending=[True, True], inplace=True)

        print("UID COLUMN 3",uid_col)

        endor_datatype = copy_dataframe[endorsement_date_col].dtype
        print(f"The datatype of the '{endorsement_date_col}' column is: {endor_datatype}")

        radio_choice = request.POST.get('radio_button_choice', 'Last')  # Default to 'Last' if no choice given

        selectedPreference = request.POST.get('selectedPreference')
        print("GOT THE SELECTED PREFERENCE",selectedPreference)

        # Handling duplicates in copy_dataframe
        if selectedPreference == "First":
            copy_dataframe = copy_dataframe.drop_duplicates(subset=uid_col, keep='first')
        else:  # If choice is 'Last' or any other unexpected value
            copy_dataframe = copy_dataframe.drop_duplicates(subset=uid_col, keep='last')

        # 6. Only include uid_col, endorsement_date_col, other columns selected using additional
        
        other_col = request.POST.getlist('other')
        custom_selected_values = request.session.get('customSelectedValues', [])
        print("***********************************")
        print(custom_selected_values)
        print("***********************************")
        columns_to_display = [uid_col, endorsement_date_col] + custom_selected_values
        print("***********************************")
        print(columns_to_display)
        print("***********************************")
        negative_exposures = request.session.get('negative_exposures', [])

        # uid_col = request.POST.getlist('uid')[0]  
        gross_premium_col = request.POST.getlist('gross_premium')[0]

        # uid_col = request.POST.get('uid')
        gross_premium_col = request.POST.get('gross_premium')
        print("actual column of gross",gross_premium_col)


        merged_df[gross_premium_col] = pd.to_numeric(merged_df[gross_premium_col], errors='coerce')
        merged_df[gross_premium_col] = merged_df[gross_premium_col].fillna(0)
        merged_df[gross_premium_col] = merged_df[gross_premium_col].astype(int)

        print("*****************Converted grois to numeric=  type*******************")

        # Create the UID_DATAFRAME
        UID_DATAFRAME = merged_df.groupby(uid_col)[gross_premium_col].sum().reset_index()
        
        # endorsement_date_col = request.POST.get('endoresement_date')
        # start_date_col = request.POST.get('start_date')



        # try:
        #     original_count = len(merged_df)
        #     print("Original count:", original_count)
            
        #     # Use the translated format for conversion, with error handling
        #     merged_df[start_date_col] = merged_df[start_date_col].apply(lambda x: try_parsing_date(x))
            

        #     # Drop rows where dates could not be parsed
        #     merged_df.dropna(subset=[start_date_col], inplace=True)
        #     final_count = len(merged_df)
            
        #     print("Data after Conversion START DATE COLUMN")
        #     print(merged_df[start_date_col])
        #     print(f"Total entries: {original_count}")
        #     print(f"Total after removal: {final_count}")
        #     print("Check end entries After",merged_df[start_date_col])

        # except ValueError as e:
        #     print(f"There was an issue with the date format: {e}")
        print("Papu2")

                
        cancellation_col = request.POST.get('cancellation')
     

        # try:
        #     original_count = len(merged_df)
        #     print("Original count:", original_count)
            
        #     # Use the translated format for conversion, with error handling
        #     merged_df[cancellation_col] = merged_df[cancellation_col].apply(lambda x: try_parsing_date(x))

        #     # Drop rows where dates could not be parsed
        #     merged_df.dropna(subset=[cancellation_col], inplace=True)
        #     final_count = len(merged_df)

        #     print("Data after Conversion Cancellation column")
        #     print(merged_df[cancellation_col])
        #     print(f"Total entries: {original_count}")
        #     print(f"Total after removal: {final_count}")
        #     print("Check end entries After",merged_df[cancellation_col])

        # except ValueError as e:
        #     print(f"There was an issue with the date format: {e}")

        
        other_col = request.POST.getlist('other')
        print("names of cols")
        print(merged_df.columns)

        
        print("*****************Converted All colums*******************")
        # POSITIVE_ID = request.POST.get('POSITIVE_ID')


        # try:
        #     original_count = len(merged_df)
        #     print("Original count:", original_count)
            
        #     # Use the translated format for conversion, with error handling
        #     merged_df[POSITIVE_ID] = merged_df[POSITIVE_ID].apply(lambda x: try_parsing_date(x))

        #     # Drop rows where dates could not be parsed
        #     merged_df.dropna(subset=[POSITIVE_ID], inplace=True)
        #     final_count = len(merged_df)
        #     print("Data after Conversion END DATE column")
        #     print(merged_df[POSITIVE_ID])
        #     print(f"Total entries: {original_count}")
        #     print(f"Total after removal: {final_count}")
        #     print("Check end entries After",merged_df[POSITIVE_ID])

        # except ValueError as e:
        #     print(f"There was an issue with the date format: {e}")
        


        # def set_dates(row):
        #     start_date = normalize_date(row[start_date_col])
        #     end_date = None
        #     if row[column] not in negative_exposures:
        #         end_date = normalize_date(row[POSITIVE_ID])
        #     else:
        #         end_date = normalize_date(row[cancellation_col])
        #     return pd.Series([start_date, end_date])



        # try:
        #     print("Starting date processing")
        #     merged_df[['POLICY_START_DATE', 'POLICY_END_DATE']] = merged_df.apply(set_dates, axis=1)
        # except ValueError as e:
        #     print(f"Error: {e}")
            

        def normalize_date(date_val):
            """
            Attempts to convert the given date_val to a date object.
            If date_val is pd.NaT or leads to a conversion error, None is returned.
            """
            if pd.isnull(date_val):  # Check if date_val is NaT or NaN
                return None
            try:
                # Assuming date_val is already a datetime object, just extract the date part
                return date_val.date()
            except (ValueError, TypeError, AttributeError):
                # Handle cases where conversion to date fails or date_val doesn't have a date() method
                return None
            
        start_date_preference = request.POST.get('start_date_preference','last')
        first_start_date = 'first_start_date'
        # def set_dates(row):
        #     """
        #     Sets start_date and end_date for a policy based on the row's data.
        #     Handles cases where the input dates might be NaT by returning None.
        #     """
        #     # if start_date_preference =='first' and row[column] in negative_exposures:
        #     #     start_date = normalize_date(row[first_start_date])
        #     # else:
        #     #     start_date = normalize_date(row[start_date_col])
        #     # end_date = None
        #     # if row[column] not in negative_exposures:
        #     #     end_date = normalize_date(row[POSITIVE_ID])
        #     # else:
        #     #     end_date = normalize_date(row[cancellation_col])
            

        #     return pd.Series([start_date, end_date])

        # Example usage:
        def apply_date_logic(dates, logic):
            """Applies a specified logic to a series of dates."""
            if logic == 'min':
                return dates.min()
            elif logic == 'max':
                return dates.max()
            elif logic == 'first':
                return dates.iloc[0] if not dates.empty else pd.NaT
            elif logic == 'last':
                return dates.iloc[-1] if not dates.empty else pd.NaT
            return pd.NaT

        def set_dates(row,group_by_type_start, group_by_type_end):
            """
            Assigns start_date and end_date for a policy based on the row's data.
            Iterates through unique_values to find a match and assigns normalized dates from the respective columns.
            Additionally, applies specific logic to the start and end dates.
            """
            column_select = request.POST.get('column_select')
            policy_type = row[column_select]  # Adjust the column name to your DataFrame's column name



            start_date, end_date = pd.NaT, pd.NaT

            for index, value in enumerate(unique_values2):
                if policy_type == value:
                    start_col = start_dates[index]
                    end_col = end_dates[index]

                    # Assuming the row has multiple date entries in an array-like format under each column
                    start_date_series = pd.Series(row[start_col]) if start_col in row else pd.Series()
                    end_date_series = pd.Series(row[end_col]) if end_col in row else pd.Series()

                    # Apply the specified logic to these series
                    start_date = apply_date_logic(start_date_series, group_by_type_start[index])
                    end_date = apply_date_logic(end_date_series, group_by_type_end[index])

                    #break  # assuming one match per row, remove if multiple matches should be considered

            return pd.Series([start_date, end_date])


        print("Data in merged")
        print(merged_df.columns)

        print("UID COLUMN IS ",uid_col)
        
        # Sort based on UID and ENDORSEMENT_DATE
        merged_df.sort_values(by=[uid_col, endorsement_date_col], ascending=[True, True], inplace=True)
        # Drop duplicates based on UID and keep the last (latest) entry
        #updated_merged_df = merged_df.drop_duplicates(subset=uid_col, keep='last')

        # # if start_date_preference=='first':
        # #     print("FIRST IS SELECTED************************************************************************")
        # #     updated_merged_df.loc[:, 'first_start_date'] = merged_df.groupby(uid_col)[start_date_col].transform('first')
        # #     try:
        # #         original_count = len(updated_merged_df)
        # #         print("Original count:", original_count)
                
        # #         # Use the translated format for conversion, with error handling
        # #         updated_merged_df[first_start_date] = updated_merged_df[first_start_date].apply(lambda x: try_parsing_date(x))

        # #         # Drop rows where dates could not be parsed
        # #         updated_merged_df.dropna(subset=[first_start_date], inplace=True)
        # #         final_count = len(updated_merged_df)
        # #         print("Data after Conversion END DATE column")
        # #         print(updated_merged_df[first_start_date])
        # #         print(f"Total entries: {original_count}")
        # #         print(f"Total after removal: {final_count}")
        # #         print("Check end entries After",updated_merged_df[first_start_date])
        # #     except ValueError as e:
        # #         print(f"There was an issue with the date format: {e}")
        


        
        #     print("CHECK 1")
        #     if (updated_merged_df['UID'] == 'P/300/2904/19/000025-3212').any():
        #         subset_df = updated_merged_df.loc[updated_merged_df['UID'] == 'P/300/2904/19/000025-3212', ['first_start_date',start_date_col]]
        #         print(subset_df)
        #     else:
        #         print("No rows with UID equal to 1 found.")

        # try:
        #     print("Starting date processing")
        #     # Fetch the logic types from POST request or define them here if static
        #     group_by_type_start = request.POST.getlist('group_by_type_start_date[]')
        #     group_by_type_end = request.POST.getlist('group_by_type_end_date[]')

        #     # Apply the set_dates function across the DataFrame
        #     updated_merged_df[['POLICY_START_DATE', 'POLICY_END_DATE']] = updated_merged_df.apply(
        #         lambda row: set_dates(row, group_by_type_start, group_by_type_end), axis=1
        #     )
        #     print("Date processing completed successfully.")
        # except Exception as e:
        #     print(f"Error during date processing: {e}")
        
        try:
            print("Starting date processing")
            group_by_type_start = request.POST.getlist('group_by_type_start_date[]')
            group_by_type_end = request.POST.getlist('group_by_type_end_date[]')

            print("Group by start dates:", group_by_type_start)  # Debug print
            print("Group by end dates:", group_by_type_end)      # Debug print

            # Applying set_dates without dropping duplicates
            print("Applying date settings...")
            merged_df[['POLICY_START_DATE', 'POLICY_END_DATE']] = merged_df.apply(
                lambda row: set_dates(row, group_by_type_start, group_by_type_end), axis=1
            )
            print("Dates applied successfully.")

            # drop duplicates to keep only the last entry per UID if necessary
            print(f"Dropping duplicates based on {uid_col}...")
            updated_merged_df = merged_df.drop_duplicates(subset=uid_col, keep='last')
            
            print("Date processing completed successfully.")
            print("Resulting DataFrame head:", updated_merged_df.head())  # Show the first few rows of the updated DataFrame

        except Exception as e:
            print(f"Error during date processing: {e}")

         
        updated_merged_df = updated_merged_df[updated_merged_df[uid_col].notna()]

        # 7. Apply left join on copy_dataframe uid with updated_merged_df uid
        final_df = pd.merge(copy_dataframe, updated_merged_df[columns_to_display], on=uid_col, how='left')



        duplicated_entries = copy_dataframe[copy_dataframe.duplicated(subset=uid_col, keep=False)]

        
        show_sum_insured = request.POST.get('show_sum_insured')
        
        if show_sum_insured == 'Yes':
            SumInsured_DF = merged_df.copy()

            print(SumInsured_DF.shape)


            sum_insured_column = request.POST.get('column_name')
            print(f"Received sum_insured_column from POST request: {sum_insured_column}")

            sum_insured_choice = request.POST.get('sum_insured_choice', 'Total')
            print(f"Received sum_insured_choice from POST request: {sum_insured_choice}")

            if sum_insured_choice == "Incremental":
                SumInsured_DF[sum_insured_column] = pd.to_numeric(SumInsured_DF[sum_insured_column], errors='coerce')
                SumInsured_DF[sum_insured_column] = SumInsured_DF[sum_insured_column].fillna(0)
                SumInsured_DF[sum_insured_column] = SumInsured_DF[sum_insured_column].astype(int)

                SumInsured_DF_grouped = SumInsured_DF.groupby(uid_col)[sum_insured_column].sum().reset_index(name=f'{sum_insured_column}_SUM')
                sum_insured_sum_column = f"{sum_insured_column}_SUM"
                SumInsured_DF = SumInsured_DF_grouped  # Assigning grouped df back to SumInsured_DF for consistency
                print(SumInsured_DF.shape)

            else: 
                print(sum_insured_column)
                # Sort based on ENDORESEMENT_DATE in ascending order
                #endorsement_date_col = request.POST.get('endoresement_date')
                SumInsured_DF.sort_values(by=endorsement_date_col, ascending=True, inplace=True)
                
                # Get the date_preference from POST data ('first' or 'last')
                # Get the date_preference from POST data ('first' or 'last')
                date_preference = request.POST.get('sum_insured_timeframe')
                print("Value is ",date_preference)
                if date_preference not in ['First', 'Last']:
                    date_preference = 'last'  # Default to 'last' if the value is anything else or not provided
                # Drop duplicates but keep either the 'first' or 'last' based on date_preference
                keep = 'first' if date_preference == 'First' else 'last'
                SumInsured_DF = SumInsured_DF.drop_duplicates(subset=uid_col, keep=keep)[[uid_col, sum_insured_column]]

                # Renaming the column to reflect whether it's the 'FIRST' or 'LATEST' value
                sum_insured_sum_column = f"{sum_insured_column}_{'FIRST' if date_preference == 'First' else 'LATEST'}"
                SumInsured_DF.rename(columns={sum_insured_column: sum_insured_sum_column}, inplace=True)
                print(SumInsured_DF.shape)



            
            duplicated_entries = SumInsured_DF[SumInsured_DF.duplicated(subset=uid_col, keep=False)]

            if not duplicated_entries.empty:
                print(f"Duplicated entries found in copy_dataframe based on {uid_col}:")
                print(duplicated_entries)
                # If you're in a web context, you may replace the print statement with a logger or return a response
                return HttpResponse(f"Duplicated entries found in copy_dataframe based on {uid_col}.")


            final_df = pd.merge(final_df, SumInsured_DF, on=uid_col, how='left')
        
            

        if UID_DATAFRAME.shape[0] == final_df.shape[0]:

            # Print message to console
            print("Both UID_DATAFRAME and updated_merged_df have the same number of entries.")
            
            # Perform a left join to add GROSS_PREMIUM_LC column from UID_DATAFRAME to updated_merged_df
            updated_merged_df = pd.merge(updated_merged_df, UID_DATAFRAME, on=uid_col, how='left', suffixes=('', '_SUM'))

            
            print("CHECK 4")
            if (updated_merged_df['UID'] == 'P/300/2904/19/000025-3212').any():
                subset_df = updated_merged_df.loc[updated_merged_df['UID'] == 'P/300/2904/19/000025-3212', ['POLICY_START_DATE','POLICY_END_DATE']]
                print(subset_df)
            else:
                print("No rows with UID equal to 1 found.")


            print("****************************")
            print(updated_merged_df.head())
            print("***************************8")
            # final_df1 = pd.merge(final_df, updated_merged_df, on=uid_col, how='left')

            # Columns that you want from `updated_merged_df`. This is just an example; adjust it to your needs
            updated_merged_df_cols = list(updated_merged_df.columns.difference(final_df.columns))
            updated_merged_df_cols.append(uid_col)  # Make sure to include the uid column for merging

            final_df = pd.merge(final_df, updated_merged_df[updated_merged_df_cols], on=uid_col, how='left')
            original_count = len(final_df)

            print("final issssssssssss")
            print(final_df.head())



            #Data frames having null entries
            null_start_date_df = final_df[final_df['POLICY_START_DATE'].isnull()]
            null_end_date_df = final_df[final_df['POLICY_START_DATE'].isnull()]

            gross_null = final_df[final_df[gross_premium_col+'_SUM'].isnull()]
            negative_gross_df = final_df[final_df[gross_premium_col+'_SUM'] < 0]





            # Remove entries with a null 'POLICY_START_DATE' from the original DataFrame
            final_df = final_df[final_df['POLICY_START_DATE'].notnull()]
            final_count = len(final_df)
            entries_removed_due_to_null_start_date = original_count - final_count

            #for end
            final_df = final_df[final_df['POLICY_END_DATE'].notnull()]
            final_count = len(final_df)
            entries_removed_due_to_null_end_date = original_count - final_count
            


            





            invalid_date_df = final_df[final_df['POLICY_START_DATE'] > final_df['POLICY_END_DATE']]
            final_df = final_df[final_df['POLICY_START_DATE'] <= final_df['POLICY_END_DATE']]
            final_count = len(final_df)
            entries6 = original_count - final_count
            print("Enrties removed due to Date less than PSD PED ",entries6)

            print("UIDS Removed due to NUll are: ",entries1)
            print("Enrties removed due to Date conversion of edorsement ",entries2)
            print("Enrties removed due to Date conversion of Start Date ",entries3)
            print("Enrties removed due to Date conversion of End Date ",entries4)
            print("Enrties removed due to Date conversion of Cancellation ",entries5)
            print("Enrties removed due to Date LESS THAN PSD PED ",entries6)


            print("finallll2",final_df.head())


            final_df['POLICY_END_DATE'] = pd.to_datetime(final_df['POLICY_END_DATE']).dt.normalize()
            final_df['POLICY_START_DATE'] = pd.to_datetime(final_df['POLICY_START_DATE']).dt.normalize()

            print("finallll3",final_df.head())

            print("START DATE TYPE",final_df['POLICY_START_DATE'].dtype)
            print("END DATE TYPE",final_df['POLICY_END_DATE'].dtype)
            
            print("finallll",final_df.head())

            
            final_df['UID_expsoure'] = ((final_df['POLICY_END_DATE'] - final_df['POLICY_START_DATE']).dt.days + 1) / 365.25

            exposure_df = final_df[(final_df['UID_expsoure'] < 0) | (final_df['UID_expsoure'] > 1.002054)]
            expsoure_df_size = len(exposure_df)
          
            show_sum_insured = request.POST.get('show_sum_insured')

            if show_sum_insured == 'Yes':
                PolicyFrame = [uid_col, 'POLICY_START_DATE', 'POLICY_END_DATE', gross_premium_col+'_SUM',sum_insured_sum_column,'UID_expsoure']+custom_selected_values
            else:
                PolicyFrame = [uid_col, 'POLICY_START_DATE', 'POLICY_END_DATE', gross_premium_col+'_SUM','UID_expsoure']+custom_selected_values

            PolicyD = final_df[PolicyFrame]

           
            print("PolicyFrame before",PolicyD.head())
            print("****************processsssssssssng rowsssssssssssssSzzzzzz*******************")
            new_rows = []

            # Iterate over each row in the PolicyD DataFrame
            for _, row in PolicyD.iterrows():
                # Extract the start and end dates
                start_date = row['POLICY_START_DATE']
                end_date = row['POLICY_END_DATE']
                
                # Get the range of years
                start_year = start_date.year
                end_year = end_date.year
                
                # Loop over each year in the range of the policy
                for year in range(start_year, end_year + 1):
                    # Calculate the EffectiveStartDate and EffectiveEndDate for the current year
                    year_start = pd.Timestamp(year, 1, 1)
                    year_end = pd.Timestamp(year, 12, 31)
                    
                    # Skip years not in range
                    if end_date < year_start or start_date > year_end:
                        continue
                    
                    effective_start_date = max(start_date, year_start)
                    effective_end_date = min(end_date, year_end)


                    
                    # Calculate Exposure in years
                    Exposure = (effective_end_date - effective_start_date + pd.Timedelta(days=1)).days / 365.25
                  
                    earned = (Exposure / row['UID_expsoure']) * row[gross_premium_col+'_SUM']
                    
                    # Append the new row to the list
                    new_rows.append({
                        'UID': row[uid_col],
                        'EffectiveStartDate': effective_start_date,
                        'EffectiveEndDate': effective_end_date,
                        'Period': year,
                        'Exposure': Exposure,
                        'EarnedPremium': earned,
                    })



            print("*****************DONE*******************")
            # Create a new DataFrame with the split periods
            new_df = pd.DataFrame(new_rows)

            # Formatting the dates to string if needed (dd-mm-yyyy format)
            new_df['EffectiveStartDate'] = new_df['EffectiveStartDate'].dt.strftime('%d-%m-%Y')
            new_df['EffectiveEndDate'] = new_df['EffectiveEndDate'].dt.strftime('%d-%m-%Y')

            

            # Display the new DataFrame
            print("Desired")
            print(new_df.head())

            # Merge PolicyD and new_df on 'UID'
            # Merge PolicyD and new_df on different UID column names
            new_df = new_df.merge(PolicyD, left_on='UID', right_on=uid_col, how='left')

            
            print("******************************new")
            print(new_df.head())
            new_df['POLICY_START_DATE'] = new_df['POLICY_START_DATE'].dt.strftime('%d-%m-%Y')
            new_df['POLICY_END_DATE'] = new_df['POLICY_END_DATE'].dt.strftime('%d-%m-%Y')

            # for column in custom_selected_values:
            #     if column not in new_df:
            #         new_df[column] = column

            request.session['new_df'] = new_df.to_json(orient='split')


            

                        











            # Format the dates in the desired format including the time
            final_df['POLICY_END_DATE'] = final_df['POLICY_END_DATE'].dt.strftime('%Y-%m-%d %H:%M:%S')

            # Print the result to verify
            #print(final_df['POLICY_END_DATE'].iloc[0])

            
            
            




            final_df[endorsement_date_col] = final_df[endorsement_date_col].dt.strftime(translated_format)
            print(final_df['POLICY_START_DATE'].iloc[0])
            final_df['POLICY_START_DATE'] = final_df['POLICY_START_DATE'].dt.strftime(translated_format)
            print(final_df['POLICY_START_DATE'].iloc[0])
            print(final_df['POLICY_END_DATE'].iloc[0])
            final_df['POLICY_END_DATE'] = pd.to_datetime(final_df['POLICY_END_DATE']).dt.strftime('%d-%m-%Y')
            print(final_df['POLICY_END_DATE'].iloc[0])
           

            




            TExposure = final_df['UID_expsoure']
            if show_sum_insured == 'Yes':
                columns_to_display = [uid_col, 'POLICY_START_DATE', 'POLICY_END_DATE', gross_premium_col+'_SUM',sum_insured_sum_column,'UID_expsoure']+custom_selected_values
            else:
                columns_to_display = [uid_col, 'POLICY_START_DATE', 'POLICY_END_DATE', gross_premium_col+'_SUM','UID_expsoure']+custom_selected_values
            filtered_df = final_df[columns_to_display]
            print("check filter",filtered_df.head())
            

            if show_sum_insured == 'Yes':
                filtered_df = filtered_df.rename(columns={
                uid_col: 'UID',
                sum_insured_sum_column: 'SumInsured',
                'POLICY_START_DATE': 'PolicyStartDate',
                'POLICY_END_DATE': 'PolicyEndDate',
                gross_premium_col+'_SUM': 'GrossPremium'
            }, inplace=False)
            else:
                filtered_df = filtered_df.rename(columns={
                uid_col: 'UID',
                'POLICY_START_DATE': 'PolicyStartDate',
                'POLICY_END_DATE': 'PolicyEndDate',
                gross_premium_col+'_SUM': 'GrossPremium'
            }, inplace=False)

            print("Filtered")
            print(filtered_df.head())
            
            uids = filtered_df['UID']
            if show_sum_insured == 'Yes':
                sum_insured = filtered_df['SumInsured']
            # else:
            #     sum_insured = filtered_df['GrossPremium']

            policy_start_dates = filtered_df['PolicyStartDate']
            policy_end_dates = filtered_df['PolicyEndDate']
            gross_premiums = filtered_df['GrossPremium']
            UID_expsoure = filtered_df['UID_expsoure']

            
            print(gross_premiums.head())
            if (len(gross_premiums.shape) == 2):
                gross_premiums = gross_premiums.iloc[0]
            print(gross_premiums.head())
            print("UID shape:", uids.shape)
            print("PolicyStartDate shape:", policy_start_dates.shape)
            print("PolicyEndDate shape:", policy_end_dates.shape)
            print("GrossPremium shape:", gross_premiums.shape)
            print("UID_exposure shape:", UID_expsoure.shape)
            

            if show_sum_insured == 'Yes':
                if (len(sum_insured.shape) == 2):
                    sum_insured = sum_insured.iloc[0]
                    print("SumInsured shape:", sum_insured.shape)

                PolicyDataframe = pd.DataFrame({
                'UID': uids,
                'SumInsured': sum_insured,
                'PolicyStartDate': policy_start_dates,
                'PolicyEndDate': policy_end_dates,
                'GrossPremium': gross_premiums,
                'UID_expsoure':UID_expsoure,
            })
            else:
                PolicyDataframe = pd.DataFrame({
                'UID': uids,
                'PolicyStartDate': policy_start_dates,
                'PolicyEndDate': policy_end_dates,
                'GrossPremium': gross_premiums,
                'UID_expsoure':UID_expsoure,
            })
                
            for column in custom_selected_values:
                if column not in PolicyDataframe:
                    PolicyDataframe[column] = column
            
            
                
            
            print(PolicyDataframe.head())
            print("AFter update")
            
            # sum_insured_available = request.POST.get('sum_insured_available')  # This will be either "True" or "False"
            # if sum_insured_available == "True":

            # else:
            #     # The sum insured is not available, handle this case as needed


            if show_sum_insured == 'Yes':
                filtered_df = filtered_df.rename(columns={
                sum_insured_column+'_SUM':'SumInsured',
            }, inplace=False)

            filtered_df = filtered_df.rename(columns={
                gross_premium_col+'_SUM': 'GrossPremium',
            }, inplace=False)
    
            # Save the updated dataframe back to the session
            request.session['final_df'] = filtered_df.to_json(orient='split')

            # print(endorsement_date_col, start_date_col)

            sum = entries1+entries2+entries3+entries4+entries5+entries6
            if entries1<=0:
                entries1=0
            elif entries2<=0:
                entries2=0
            elif entries3<=0:
                entries3=0
            elif entries4<=0:
                entries4=0
            elif entries5<=0:
                entries5=0
            elif entries6<=0:
                entries6=0
            elif entries_removed_due_to_null_start_date<=0:
                entries_removed_due_to_null_start_date=0
            elif entries_removed_due_to_null_end_date<=0:
                entries_removed_due_to_null_end_date=0


            print("negative gross",negative_gross_df.head())
            print("NULL",null_uids_df.head())
           
            print("Size is ",len(filtered_df))
            print("AT THE END DF")
            print(null_end_df.head())


            print("last",new_df.head())
            print(expsoure_df_size)
            



        #     return render(request, 'myapp/navbar.html', {
        #     'dataframe': filtered_df.head().to_html(classes='dataframe', index=False, escape=False),
        #     'dataframe1': new_df.head().to_html(classes='dataframe', index=False, escape=False),
        #     'NULL_START_UIDS': null_start_date_df.to_html(classes='dataframe',index=False, escape=False),
        #     'NULL_END_UIDS': null_end_date_df.to_html(classes='dataframe',index=False, escape=False),
        #     'show_download_link': True,
        #     'original_count': total_counts,
        #     'removed_count': sum,
        #     'final_count': final_count,
        #     'UID_count': entries1,
        #     'END_DATE_count': entries2,
        #     'START_DATECOUNT':entries3,
        #     'END_DATE_COUNT':entries4,
        #     'CANCELLATION_DATE_COUNT':entries5,
        #     'GREATER_COUNT':entries6,
        #     'START_NULL_COUNT':entries_removed_due_to_null_start_date,
        #     'END_NULL_COUNT':entries_removed_due_to_null_end_date,
        #     'Invalid_date':invalid_date_df.to_html(classes='dataframe', index=False, escape=False),
        #     'gross_null':gross_null.to_html(classes='dataframe', index=False, escape=False),
        #     'negative_gross_df':negative_gross_df.to_html(classes='dataframe', index=False, escape=False),
        #     'null_uids_df':null_uids_df.to_html(classes='dataframe', index=False, escape=False),
        #     'null_end_df':null_end_df.to_html(classes='dataframe', index=False, escape=False),
        #     'Null_endorsement_count':null_count_endor,
        # })
            
            NULL_ENDOR_UIDS_len = len(null_end_df)
            NULL_END_UIDS_len= len(null_end_date_df)
            NULL_EXPO_UIDS_len= len(exposure_df)
            NULL_INVALID_UIDS_len = len(invalid_date_df)
            NULL_NEGATIVE_UIDS_len = len(negative_gross_df)
            NULL_PREMIUM_UIDS_len = len(gross_null)
            NULL_START_UIDS_len = len(null_start_date_df)
            NULL_UIDS_UIDS_len = len(null_uids_df)
            request.session['NULL_START_UIDS'] = null_start_date_df.to_json(orient='split')
            request.session['NULL_END_UIDS'] = null_end_date_df.to_json(orient='split')
            request.session['Invalid_date'] = invalid_date_df.to_json(orient='split')
            request.session['gross_null'] = gross_null.to_json(orient='split')
            request.session['negative_gross_df'] = negative_gross_df.to_json(orient='split')
            request.session['null_uids_df'] = null_uids_df.to_json(orient='split')
            request.session['null_end_df'] = null_end_df.to_json(orient='split')
            request.session['exposure_df'] = exposure_df.to_json(orient='split')



            results={
                'dataframe': filtered_df.head().to_html(classes='dataframe', index=False, escape=False),
                'dataframe1': new_df.head().to_html(classes='dataframe', index=False, escape=False),
                'NULL_START_UIDS': null_start_date_df.to_html(classes='dataframe',index=False, escape=False),
                'NULL_END_UIDS': null_end_date_df.to_html(classes='dataframe',index=False, escape=False),
                'show_download_link': True,
                'original_count': total_counts,
                'removed_count': sum,
                'final_count': final_count,
                'UID_count': entries1,
                'END_DATE_count': entries2,
                'START_DATECOUNT':entries3,
                'END_DATE_COUNT':entries4,
                'CANCELLATION_DATE_COUNT':entries5,
                'GREATER_COUNT':entries6,
                'START_NULL_COUNT':entries_removed_due_to_null_start_date,
                'END_NULL_COUNT':entries_removed_due_to_null_end_date,
                'Invalid_date':invalid_date_df.head().to_html(classes='dataframe', index=False, escape=False),
                'gross_null':gross_null.head().to_html(classes='dataframe', index=False, escape=False),
                'negative_gross_df':negative_gross_df.head().to_html(classes='dataframe', index=False, escape=False),
                'null_uids_df':null_uids_df.head().to_html(classes='dataframe', index=False, escape=False),
                'null_end_df':null_end_df.head().to_html(classes='dataframe', index=False, escape=False),
                'Null_endorsement_count':null_count_endor,
                'exposure_df':exposure_df.head().to_html(classes='dataframe', index=False, escape=False),
                'NULL_ENDOR_UIDS_len': NULL_ENDOR_UIDS_len,
                'NULL_END_UIDS_len': NULL_END_UIDS_len,
                'NULL_EXPO_UIDS_len': NULL_EXPO_UIDS_len,
                'NULL_INVALID_UIDS_len': NULL_INVALID_UIDS_len,
                'NULL_NEGATIVE_UIDS_len': NULL_NEGATIVE_UIDS_len,
                'NULL_PREMIUM_UIDS_len': NULL_PREMIUM_UIDS_len,
                'NULL_START_UIDS_len': NULL_START_UIDS_len,
                'NULL_UIDS_UIDS_len': NULL_UIDS_UIDS_len
            }
            serializable_results = convert_numpy(results)

        # Store the serializable results in the session
            serializable_results = convert_numpy(results)

        # Store the serializable results in the session
            store_results_in_session(request, 'premium_results', serializable_results)
        
            # Redirect to the display view
            return redirect('display_results')
        else:
            # Find out which UIDs are causing the discrepancy
            diff_uids = set(UID_DATAFRAME[uid_col]) - set(updated_merged_df[uid_col])
            print(f"UIDs present in UID_DATAFRAME but not in updated_merged_df: {diff_uids}")

            diff_uids = set(updated_merged_df[uid_col]) - set(UID_DATAFRAME[uid_col])
            print(f"UIDs present in updated_merged_df but not in UID_DATAFRAME: {diff_uids}")
            
            # Return a message if the entries are not the same
            return HttpResponse("Dataframes don't have the same number of entries.")

    except KeyError as e:
        print(e)
        return HttpResponse(f"Error: Column {e} not found in merged dataframe.")

def combined_results(request):
    result1 = get_results_from_session(request, 'os_results')
    result2 = get_results_from_session(request, 'claims_results')
    result3 = get_results_from_session(request, 'premium_results')
    combined = {
        'os_results':result1,
        'claims_results':result2,
        'premium_results':result3,
    }
    if result3 and 'dataframe' in result3:
        if result3['dataframe']:
            print("Premium Results df_html contains data.")
        else:
            print("Premium Results df_html is empty.")
    else:
        print("Premium Results does not exist or df_html attribute is not present.")
    return render(request, 'myapp/combined_results.html', combined)




import numpy as np




@csrf_exempt
def save_negative_exposures(request):
    if request.method == "POST":
        data = json.loads(request.body)
        request.session['negative_exposures'] = data.get('negative_exposures', [])
    return JsonResponse({'status': 'success'})


def handle_selected_values(request):
    if request.method == 'POST':
        data = json.loads(request.body)
        request.session['selectedValues'] = data.get('selectedValues', [])
        # Process the selected_values as needed...
        return JsonResponse({'status': 'success', 'message': 'Selected values received'})
    return JsonResponse({'status': 'error', 'message': 'Invalid request'}, status=400)

def handle_custom_selected_values(request):
    if request.method == 'POST':
        try:
            data = json.loads(request.body)
            custom_selected_values = data.get('customSelectedValues', [])
            request.session['customSelectedValues'] = custom_selected_values
            return JsonResponse({'status': 'success', 'message': 'Custom selected values received and processed'})
        except json.JSONDecodeError:
            return JsonResponse({'status': 'error', 'message': 'Invalid JSON'}, status=400)
        except Exception as e:
            return JsonResponse({'status': 'error', 'message': str(e)}, status=500)
    else:
        return JsonResponse({'status': 'error', 'message': 'Invalid request method'}, status=405)

def checks_left_view(request):
    context = {
        'dataframe': filtered_df.head().to_html(classes='dataframe', index=False, escape=False),
        }
    return render(request, 'myapp/checksLeft.html', context)


def download_csv(request):
    # Load the updated dataframe from the session
    updated_merged_df = pd.read_json(request.session.get('final_df', '{}'), orient='split')
    #updated_merged_df = pd.read_json(request.session.get('updated_merged_df', '{}'), orient='split')
    
    # Convert the dataframe to CSV
    csv_data = updated_merged_df.to_csv(index=False)
    
    # Create a response with the CSV data
    response = HttpResponse(csv_data, content_type='text/csv')
    response['Content-Disposition'] = 'attachment; filename="updated_merged_data.csv"'
    
    return response



def claims(request):
    try:
        template = loader.get_template('myapp/claims.html')
    except Exception as e:
        return HttpResponse(f"Error loading template: {e}", status=500)
    return HttpResponse(template.render({}, request))



# def download_reported_claims(request):
#     # Retrieve the DataFrame to download from the session
#     df_json = request.session.get('download_reported')
#     if df_json:
#         print(df_json)  # Debug: See the JSON string
#         try:
#             df = pd.read_json(df_json, orient='split')
#         except ValueError as e:
#             # Handle the error if the JSON structure is unexpected
#             print(e)
#             return HttpResponse("Error processing data to download.", status=500)

#         response = HttpResponse(content_type='text/csv')
#         response['Content-Disposition'] = 'attachment; filename="Reported_Claims.csv"'
#         df.to_csv(path_or_buf=response, index=False)
#         return response
#     else:
#         return HttpResponse("No data to download.", status=400)

    
def download_reported_claims(request):
    # Retrieve the DataFrame to download from the session
    df_json = request.session.get('check_df')
    if df_json:
        df = pd.read_json(df_json, orient='split')
        response = HttpResponse(content_type='text/csv')
        response['Content-Disposition'] = 'attachment; filename="Reported_Claims.csv"'
        df.to_csv(path_or_buf=response, index=False)
        return response
    else:
        return HttpResponse("No data to download.", status=400)

def download_processed_data(request):
    # Retrieve the DataFrame to download from the session
    df_json = request.session.get('download_df')
    if df_json:
        df = pd.read_json(df_json, orient='split')
        response = HttpResponse(content_type='text/csv')
        response['Content-Disposition'] = 'attachment; filename="processed_data.csv"'
        df.to_csv(path_or_buf=response, index=False)
        return response
    else:
        return HttpResponse("No data to download.", status=400)
    

def download_new_df_csv(request):
    # Load new_df from the session
    new_df = pd.read_json(request.session.get('new_df', '{}'), orient='split')
    # Convert the dataframe to CSV
    csv_data = new_df.to_csv(index=False)
    
    # Create a response with the CSV data
    response = HttpResponse(csv_data, content_type='text/csv')
    response['Content-Disposition'] = 'attachment; filename="new_data.csv"'
    
    return response

import logging

# Get an instance of a logger
logger = logging.getLogger(__name__)

from django.shortcuts import render

def premium_checks(request):
    return render(request, 'myapp/PremiumChecks.html')




def download_result_df_csv(request):
    # Load result_df from the session
    result_df = pd.read_json(request.session.get('result_df', '{}'), orient='split')
    
    # Convert the dataframe to CSV
    csv_data = result_df.to_csv(index=False)
    
    # Create a response with the CSV data
    response = HttpResponse(csv_data, content_type='text/csv')
    response['Content-Disposition'] = 'attachment; filename="result_data.csv"'
    
    return response


def download_checks(request):
    # Extract the name of the DataFrame from the request
    dataframe_name = request.GET.get('name')  # Assuming passed as a GET parameter
    
    # Retrieve the DataFrame JSON string from the session
    df_json = request.session.get(dataframe_name, None)
    
    if not df_json:
        return HttpResponse("Requested data not found in session", status=404)
    
    # Convert JSON string back to DataFrame
    df = pd.read_json(df_json, orient='split')
    
    # Convert the DataFrame to CSV
    csv_data = df.to_csv(index=False)
    
    # Create a response with the CSV data
    response = HttpResponse(csv_data, content_type='text/csv')
    response['Content-Disposition'] = f'attachment; filename="{dataframe_name}.csv"'
    
    return response



# def upload_mapping(request):
#     #request.session.flush()
#     print("*****************Loading Data*******************")
#     message = ""
#     columns = []
#     file_details = []  
#     total_entries = 0 
#     is_merged = False
#     dataframes = []

#     if request.method == 'POST':
#         form = CSVFileUploadForm1(request.POST, request.FILES)
#         if form.is_valid():
#             files = request.FILES.getlist('files')
#             dataframes = []
            
#             for file in files:
#                 df = pd.read_csv(file)
#                 num_entries = len(df)
#                 total_entries += num_entries  # Add to total entries
#                 file_details.append({'name': file.name, 'entries': num_entries})
#                 dataframes.append(df)

#             merged_df = pd.concat(dataframes)
#             # Convert the merged DataFrame to JSON and store it in the session
#             request.session['merged_df'] = merged_df.to_json(orient='split')
#             message = "Files Merged Successfully"
#             is_merged = True
#             # Store the column names in the session
#             request.session['merged_columns'] = merged_df.columns.tolist()
#             columns = merged_df.columns.tolist()

#     else:
#         form = CSVFileUploadForm1()

#     return render(request, 'myapp/Mapping_upload.html', {
#         'form': form,
#         'message': message,
#         'is_merged': is_merged,
#         'columns': columns,
#         'file_details': file_details,  # Pass the file details to the template
#         'total_entries': total_entries, # Pass the total entries to the template
#     })



from io import BytesIO
from django.http import HttpResponse


# Mapping as header

# def download_excel(request):
#     import json
#     import pandas as pd
#     from io import BytesIO
#     from django.http import HttpResponse

#     # Parse JSON data from the request body
#     data = json.loads(request.body)
#     selected_columns = data.get('selected_columns', [])

#     # Log the selected columns to verify
#     print("Selected Columns:", selected_columns)

#     # Load DataFrame from session (make sure it's properly stored as JSON)
#     merged_df = pd.read_json(request.session.get('merged_df', '{}'), orient='split')
#     print("DataFrame Head:", merged_df.head())

#     # Create filtered data for selected columns
#     filtered_data = {column: merged_df[[column]].dropna() for column in selected_columns if column in merged_df.columns}
#     print("Filtered Data Keys:", filtered_data.keys())

#     # Create an Excel file in-memory
#     output = BytesIO()
#     with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
#         for column, data in filtered_data.items():
#             if not data.empty:
#                 # Create value counts with dynamic column names
#                 value_counts = data[column].value_counts().reset_index()
#                 value_counts.columns = [column, 'Count']  # Use the column name dynamically

#                 # Add a new empty "Mapping" column
#                 value_counts['Mapping'] = pd.Series([''] * len(value_counts))

#                 value_counts.to_excel(writer, sheet_name=column, index=False)
    
#     # Reset the position to the beginning of the stream
#     output.seek(0)

#     # Create a response
#     response = HttpResponse(output.getvalue(), content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
#     response['Content-Disposition'] = 'attachment; filename="selected_data.xlsx"'
#     return response


from django.shortcuts import render
import pandas as pd

def upload_mapping(request):
    print("*****************Loading Data*******************")
    message = ""
    columns = []
    file_details = []  
    total_entries = 0 
    is_merged = False
    dataframes = []

    # Check if data is available in session
    merged_df_json = request.session.get('new_df')
    if merged_df_json:
        merged_df = pd.read_json(merged_df_json, orient='split')
        total_entries = merged_df.shape[0]  # Get the total number of entries
        columns = merged_df.columns.tolist()  # Get the list of columns
        message = "Data retrieved from session."
        is_merged = True
    else:
        message = "No data found in session. Please upload data."

    return render(request, 'myapp/Mapping_upload.html', {
        'message': message,
        'is_merged': is_merged,
        'columns': columns,
        'total_entries': total_entries,  # Pass the total entries to the template
    })


def download_excel(request):
    import json
    import pandas as pd
    from io import BytesIO
    from django.http import HttpResponse

    # Parse JSON data from the request body
    data = json.loads(request.body)
    selected_columns = data.get('selected_columns', [])

    # Log the selected columns to verify
    print("Selected Columns:", selected_columns)

    # Load DataFrame from session (make sure it's properly stored as JSON)
    merged_df = pd.read_json(request.session.get('new_df', '{}'), orient='split')
    print("DataFrame Head:", merged_df.head())

    # Create filtered data for selected columns
    filtered_data = {column: merged_df[[column]].dropna() for column in selected_columns if column in merged_df.columns}
    print("Filtered Data Keys:", filtered_data.keys())

    # Create an Excel file in-memory
    output = BytesIO()
    with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
        for column, data in filtered_data.items():
            if not data.empty:
                # Create value counts with dynamic column names
                value_counts = data[column].value_counts().reset_index()
                value_counts.columns = [column, 'Count']  # Use the column name dynamically

                # Add a new "Mapping{sheetName}" column dynamically named based on the sheet name
                mapping_column_name = f'Mapping_{column}'
                value_counts[mapping_column_name] = pd.Series([''] * len(value_counts))

                value_counts.to_excel(writer, sheet_name=column, index=False)

    # Reset the position to the beginning of the stream
    output.seek(0)

    # Create a response
    response = HttpResponse(output.getvalue(), content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
    response['Content-Disposition'] = 'attachment; filename="selected_data.xlsx"'
    return response



from .forms import ExcelUploadForm, CSVFileUploadForm
import pandas as pd

# def MappingData(request):
#     excel_form = ExcelUploadForm(request.POST or None, request.FILES or None)
#     csv_form = CSVFileUploadForm(request.POST or None, request.FILES or None)
#     message = ''

#     if request.method == 'POST':
#         if excel_form.is_valid() and 'excel_file' in request.FILES:
#             excel_file = excel_form.cleaned_data['excel_file']
#             excel_path = handle_uploaded_file(excel_file, 'excel')
#             # Here you could process the excel file directly or return the path to the user for confirmation
#             message = 'Excel file uploaded successfully! '

#         if csv_form.is_valid() and 'premium_file' in request.FILES:
#             csv_file = csv_form.cleaned_data['premium_file']
#             csv_path = handle_uploaded_file(csv_file, 'csv')
#             # Similarly, handle the CSV file directly here
#             message += 'CSV file uploaded successfully! '

#     context = {
#         'excel_form': excel_form,
#         'csv_form': csv_form,
#         'message': message
#     }
#     return render(request, 'myapp/MappingData.html', context)

def MappingData(request):
    excel_form = ExcelUploadForm(request.POST or None, request.FILES or None)
    message = ''

    if request.method == 'POST':
        if excel_form.is_valid() and 'excel_file' in request.FILES:
            excel_file = excel_form.cleaned_data['excel_file']
            excel_path = handle_uploaded_file(excel_file, 'excel')
            message = 'Excel file uploaded successfully!'

    context = {
        'excel_form': excel_form,
        'message': message
    }
    return render(request, 'myapp/MappingData.html', context)


import os

def fetch_column_data(request):
    excel_file = request.FILES.get('excel_file')
    if excel_file is None:
        return JsonResponse({'error': 'Excel file not provided'}, status=400)

    # Read premium data from session
    new_df_json = request.session.get('new_df')
    if new_df_json:
        premium_df = pd.read_json(new_df_json, orient='split')
        premium_columns = premium_df.columns.tolist()
    else:
        return JsonResponse({'error': 'Session does not contain premium data'}, status=500)

    try:
        excel = pd.ExcelFile(excel_file)
        excel_columns = excel.sheet_names
    except Exception as e:
        return JsonResponse({'error': f'Failed to read Excel file: {str(e)}'}, status=500)

    return JsonResponse({'premium_columns': premium_columns, 'excel_columns': excel_columns})


def handle_uploaded_file(file, file_type):
    # Define file path based on file type
    file_path = f'media/{file_type}_{file.name}'
    with open(file_path, 'wb+') as destination:
        for chunk in file.chunks():
            destination.write(chunk)
    return file_path





# def get_columns(request):
#     excel_columns = request.session.get('excel_columns', [])
#     premium_columns = request.session.get('premium_columns', [])
#     return JsonResponse({
#         'excel_columns': excel_columns,
#         'premium_columns': premium_columns
#     })

# def process_mapping_data(request):
#     if request.method == 'POST':
#         try:
#             excel_file = request.FILES.get('excel_file')
#             csv_file = request.FILES.get('premium_file')

#             if not excel_file or not csv_file:
#                 return JsonResponse({'error': 'Excel or CSV file not found in request'}, status=400)

#             mappings_data = request.POST.get('mappings')
#             if not mappings_data:
#                 return JsonResponse({'error': 'Mappings data not provided'}, status=400)

#             mappings = json.loads(mappings_data)
#             premium_df = pd.read_csv(csv_file)
#             excel = pd.ExcelFile(excel_file)
#             results = []

#             # Process all mappings and combine them into a single DataFrame for download
#             for mapping in mappings:
#                 sheet_df = pd.read_excel(excel, sheet_name=mapping['sheetName'])
#                 if 'Count' in sheet_df.columns:
#                     sheet_df.drop('Count', axis=1, inplace=True)

#                 result_df = pd.merge(
#                     sheet_df, premium_df, left_on=sheet_df.columns[0], right_on=mapping['columnName'], how='left'
#                 )
#                 results.append(result_df)

#             # Combine all results into one DataFrame if there are multiple mappings
#             if results:
#                 combined_df = pd.concat(results)

#                 # Convert DataFrame to CSV
#                 response = HttpResponse(content_type='text/csv')
#                 response['Content-Disposition'] = 'attachment; filename="updated_premium_file.csv"'

#                 combined_df.to_csv(path_or_buf=response, index=False)
#                 return response
#             else:
#                 return JsonResponse({'error': 'No data processed'}, status=400)

#         except Exception as e:
#             return JsonResponse({'error': str(e)}, status=500)
#     else:
#         return JsonResponse({'error': 'Invalid HTTP method'}, status=405)


# def process_mapping_data(request):
#     import json
#     import pandas as pd
#     from django.http import JsonResponse, HttpResponse

#     if request.method == 'POST':
#         try:
#             excel_file = request.FILES.get('excel_file')
#             csv_file = request.FILES.get('premium_file')

#             if not excel_file or not csv_file:
#                 return JsonResponse({'error': 'Excel or CSV file not found in request'}, status=400)

#             mappings_data = request.POST.get('mappings')
#             if not mappings_data:
#                 return JsonResponse({'error': 'Mappings data not provided'}, status=400)

#             mappings = json.loads(mappings_data)
#             premium_df = pd.read_csv(csv_file)
#             excel = pd.ExcelFile(excel_file)
#             results = []

#             # Process all mappings and combine them into a single DataFrame for download
#             for mapping in mappings:
#                 sheet_name = mapping['sheetName']
#                 sheet_df = pd.read_excel(excel, sheet_name=sheet_name)
#                 mapping_column_name = f'Mapping_{sheet_name}'  # dynamically construct the mapping column name

#                 # Ensure only necessary columns are processed
#                 if mapping_column_name in sheet_df.columns:
#                     sheet_df = sheet_df[[sheet_name, mapping_column_name]]
#                     # Perform the left join using the specified mapping
#                     result_df = pd.merge(
#                         premium_df, sheet_df, left_on=mapping['columnName'], right_on=sheet_name, how='left'
#                     )
#                     # Include this merged data in results
#                     results.append(result_df)
#                 else:
#                     # Log or handle the case where the expected mapping column does not exist
#                     print(f"Expected mapping column '{mapping_column_name}' not found in sheet '{sheet_name}'.")

#             # Combine all results into one DataFrame if there are multiple mappings
#             if results:
#                 combined_df = pd.concat(results, ignore_index=True)

#                 # Convert DataFrame to CSV
#                 response = HttpResponse(content_type='text/csv')
#                 response['Content-Disposition'] = 'attachment; filename="updated_premium_file.csv"'

#                 combined_df.to_csv(path_or_buf=response, index=False)
#                 return response
#             else:
#                 return JsonResponse({'error': 'No data processed'}, status=400)

#         except Exception as e:
#             return JsonResponse({'error': str(e)}, status=500)
#     else:
#         return JsonResponse({'error': 'Invalid HTTP method'}, status=405)
# mapping_data_final = pd.concat([df for df in filtered_data.values()], axis=1)

#     # Serialize the final DataFrame and store it in the session
#     request.session['mapping_data_final'] = mapping_data_final.to_json(orient='split')
#     request.session.modified = True

def process_mapping_data(request):
    import json
    from django.http import JsonResponse, HttpResponse

    if request.method == 'POST':
        excel_file = request.FILES.get('excel_file')
        if not excel_file:
            return JsonResponse({'error': 'Excel file not found in request'}, status=400)

        mappings_data = request.POST.get('mappings')
        if not mappings_data:
            return JsonResponse({'error': 'Mappings data not provided'}, status=400)

        # Retrieve premium DataFrame from session
        new_df_json = request.session.get('new_df')
        if new_df_json:
            premium_df = pd.read_json(new_df_json, orient='split')
            print("The premiu_df is")
            print(premium_df.head())
        else:
            return JsonResponse({'error': 'Session does not contain premium data'}, status=500)

        try:
            mappings = json.loads(mappings_data)
            excel = pd.ExcelFile(excel_file)

            # Same mapping process as before
            for mapping in mappings:
                sheet_name = mapping['sheetName']
                sheet_df = pd.read_excel(excel, sheet_name=sheet_name)
                mapping_column_name = f'Mapping_{sheet_name}'

                if mapping_column_name in sheet_df.columns:
                    sheet_df = sheet_df[[sheet_name, mapping_column_name]]
                    premium_df = pd.merge(
                        premium_df, sheet_df, left_on=mapping['columnName'], right_on=sheet_name, how='left', suffixes=('', f'_{sheet_name}')
                    )
            request.session['mapping_data_final'] = premium_df.to_json(orient='split')
            response = HttpResponse(content_type='text/csv')
            response['Content-Disposition'] = 'attachment; filename="updated_premium_file.csv"'
            premium_df.to_csv(path_or_buf=response, index=False)
            return response
        except Exception as e:
            return JsonResponse({'error': str(e)}, status=500)
    else:
        return JsonResponse({'error': 'Invalid HTTP method'}, status=405)
Leave a Comment