Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
7.7 kB
1
Indexable
import pandas as pd
from nltk.corpus import stopwords
import string
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from string import digits
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
import pdb

from fuzzywuzzy import fuzz


from feature_engineering import *
from param_config import config
from model_loading import loading_model

models = loading_model()

tfidf_matrix,tf_count_matrix,tfidf_vector,count_vector,df_act,Matching_data,embedding_dict,df_act_context = models.load_models()
#print("Initial TFIDF MAtrix : ",tfidf_matrix)
#print(" Initial TF Count Matrix ",tf_count_matrix)
#print("Inital TFIDF Vector",tfidf_vector)
#print("Initial Count Vector ",count_vector)
#print("DF ACT",df_act.head())
#print("Initial Embedding Dict",embedding_dict)
#print("DF ACT Context ",df_act_context.head())


def event_prediction(input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity):
    try:
        user_recommendation_list_tfidf = event_prediction_tfidf(input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity)
        print("TFIDF Prediction Done")
        user_recommendation_list_context = event_prediction_context(input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity)
        print("Contexual Prediction Done")
        
        # Combine the recommendations from both methods
        user_recommendation_list = user_recommendation_list_tfidf + user_recommendation_list_context
        
        return user_recommendation_list
    
    except:
        user_recommendation_list = []
        return user_recommendation_list

    

def event_prediction_tfidf(input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity):
    #pdb.set_trace()
    
    global tfidf_matrix,tf_count_matrix,tfidf_vector,count_vector,df_act
    ## First join 5 parameters andd then call input_data_preprocessing
    data_to_be_processed=str(input_ticket_category) +' ' + str(input_ticket_type) +' ' +str(input_ticket_item) + ' ' + str(input_ticket_summary) + ' ' +str(input_ticket_desc) + ' ' + str(input_ticket_severity)

    ## Input Data Preprocessing
    input_processed_text = input_data_preprocessing(data_to_be_processed) ## 5 different fields
    
    print("Input processed Text : ",input_processed_text)
    #pdb.set_trace()
      
    ##TFIDF Prediction
    tfidf_pred,input_tfidfmatrx = input_evalution(input_processed_text,tfidf_matrix,tfidf_vector,df_act)
    
    ##TF_count Prediction
    tf_count_pred,input_tfcountmatrx = input_evalution_count(input_processed_text,tf_count_matrix,count_vector,df_act)
    #pdb.set_trace()
    
    
    tfidf_pred['score_new'] = tfidf_pred['score']*0.5
    tf_count_pred['score_new'] = tf_count_pred['score']*0.5
    
    tfidf_pred['flag'] = 'tfidf'
    tf_count_pred['flag'] = 'tf_count'
    
    overall_result = pd.concat([tfidf_pred,tf_count_pred])
    print("Overall Result : ",overall_result)
    if len(overall_result)>0:
    
        overall_result = overall_result.sort_values(by='score_new',ascending=False)
        print("Sorted Overall Result : ",overall_result)
        
        overall_result['fuzz_valid_score'] = overall_result.apply(lambda row: fuzz_score(input_processed_text, row['concatenated_string']), axis=1)
        
        # Continue with your filtering and sorting logic
        overall_result = overall_result[(overall_result['fuzz_valid_score'] > config.fuzzy_threshold) | (overall_result['score_new'] >= config.tf_threshold)]
        overall_result = overall_result.head(config.max_reccom)
        print("Overall Result : ",overall_result)
        
        # Check if recommendations are found
        if not overall_result.empty:
            # Concatenate your final result lists
            user_recommendation_list = overall_result[config.target_column].tolist()
        else:
            # No recommendations found, return empty lists or a message indicating no recommendations
            user_recommendation_list = []

    return user_recommendation_list


def input_evalution(input_processed_text, df_train_mtrx,tfidf_vector,df_act):
    print("Into Input Evaluation function")
    text=input_processed_text
    print("Text : ",text)
    tfidf_vector=tfidf_vector
    print("TFIDF Vector : ",tfidf_vector)
    df_train_mtrx=df_train_mtrx
    #print("DF Train Matrix : ",df_train_mtrx)
    
    input_tfidf=tfidf_vector.transform([text])
    print(input_tfidf)
    x=input_tfidf.todense()
    df_tst = pd.DataFrame(x, 
                      columns=tfidf_vector.get_feature_names(), 
                      index=['test123'])
    print("Df Test Input Evaluation : ",df_tst)
    ## Appending df_tst to df_train
    df_train_mtrx = df_train_mtrx.append(df_tst)
    #print("DF Train Matrix after appending : ",df_train_mtrx)
    ## Calculating Cosine Similarity
    scr=cosine_similarity(df_train_mtrx, df_tst)
    print("Cosine Similarity : ",scr)
    df_chk = pd.DataFrame()
    df_chk['ticket_id']=df_train_mtrx.index
    df_chk['score']=scr
    score = df_chk[(df_chk['score']>0.50) & (df_chk['ticket_id']!='test123')]['score'].tolist()
    df_eval = df_act[df_act['ticket_id'].isin(df_chk[df_chk['score']>0.50]['ticket_id'])]
    df_eval['score'] = score
    
    return df_eval,df_tst


def input_evalution_count(text, df_train_mtrx,count_vector,df_act):
    print("Into Input Evaluation Count function")
    input_count=count_vector.transform([text])
    x=input_count.todense()
    df_tst = pd.DataFrame(x, 
                      columns=count_vector.get_feature_names(), 
                      index=['test123'])
    print("DF Test in evaluation count : ",df_tst)
    # Appending input data to train dataset 
    df_train_mtrx = df_train_mtrx.append(df_tst.head())
    #print("DF Train Matrix after appending : ",df_train_mtrx)
    ## Calculating cosine similarity
    scr=cosine_similarity(df_train_mtrx, df_tst)
    print("Cosine Similarity inside Input evaluation : ",scr)
    df_chk = pd.DataFrame()
    df_chk['ticket_id']=df_train_mtrx.index
    df_chk['score']=scr
    
    score = df_chk[(df_chk['score']>0.50) & (df_chk['ticket_id']!='test123')]['score'].tolist()
    print("Score : ",score)
    df_eval = df_act[df_act['ticket_id'].isin(df_chk[df_chk['score']>0.50]['ticket_id'])]
    df_eval['score'] = score
    
    return df_eval,df_tst


##Testing this -

# Sample input data
input_ticket_category = 'Process'
input_ticket_type = 'HRO - Payroll'
input_ticket_item = 'Benefits and Payments'
input_ticket_summary = 'Incorrect Result'
input_ticket_desc = 'Dear Sir, As per the attached screen shots...'
input_ticket_severity = '4 - Default'

# Call the event_prediction function
user_recommendations = event_prediction(
    input_ticket_category,
    input_ticket_type,
    input_ticket_item,
    input_ticket_summary,
    input_ticket_desc,
    input_ticket_severity
)

# Print the user recommendations
print("User Recommendations:", user_recommendations)

# Add debug information to check if the functions are being called
#print("Debug Info - input_evalution:", input_evalution(input_processed_text, tfidf_matrix, tfidf_vector, df_act))
#print("Debug Info - input_evalution_count:", input_evalution_count(input_processed_text, tf_count_matrix, count_vector, df_act))