Untitled
plain_text
a month ago
7.7 kB
0
Indexable
Never
import pandas as pd from nltk.corpus import stopwords import string import pickle from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from string import digits from sklearn.feature_extraction.text import CountVectorizer import numpy as np import pdb from fuzzywuzzy import fuzz from feature_engineering import * from param_config import config from model_loading import loading_model models = loading_model() tfidf_matrix,tf_count_matrix,tfidf_vector,count_vector,df_act,Matching_data,embedding_dict,df_act_context = models.load_models() #print("Initial TFIDF MAtrix : ",tfidf_matrix) #print(" Initial TF Count Matrix ",tf_count_matrix) #print("Inital TFIDF Vector",tfidf_vector) #print("Initial Count Vector ",count_vector) #print("DF ACT",df_act.head()) #print("Initial Embedding Dict",embedding_dict) #print("DF ACT Context ",df_act_context.head()) def event_prediction(input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity): try: user_recommendation_list_tfidf = event_prediction_tfidf(input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity) print("TFIDF Prediction Done") user_recommendation_list_context = event_prediction_context(input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity) print("Contexual Prediction Done") # Combine the recommendations from both methods user_recommendation_list = user_recommendation_list_tfidf + user_recommendation_list_context return user_recommendation_list except: user_recommendation_list = [] return user_recommendation_list def event_prediction_tfidf(input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity): #pdb.set_trace() global tfidf_matrix,tf_count_matrix,tfidf_vector,count_vector,df_act ## First join 5 parameters andd then call input_data_preprocessing data_to_be_processed=str(input_ticket_category) +' ' + str(input_ticket_type) +' ' +str(input_ticket_item) + ' ' + str(input_ticket_summary) + ' ' +str(input_ticket_desc) + ' ' + str(input_ticket_severity) ## Input Data Preprocessing input_processed_text = input_data_preprocessing(data_to_be_processed) ## 5 different fields print("Input processed Text : ",input_processed_text) #pdb.set_trace() ##TFIDF Prediction tfidf_pred,input_tfidfmatrx = input_evalution(input_processed_text,tfidf_matrix,tfidf_vector,df_act) ##TF_count Prediction tf_count_pred,input_tfcountmatrx = input_evalution_count(input_processed_text,tf_count_matrix,count_vector,df_act) #pdb.set_trace() tfidf_pred['score_new'] = tfidf_pred['score']*0.5 tf_count_pred['score_new'] = tf_count_pred['score']*0.5 tfidf_pred['flag'] = 'tfidf' tf_count_pred['flag'] = 'tf_count' overall_result = pd.concat([tfidf_pred,tf_count_pred]) print("Overall Result : ",overall_result) if len(overall_result)>0: overall_result = overall_result.sort_values(by='score_new',ascending=False) print("Sorted Overall Result : ",overall_result) overall_result['fuzz_valid_score'] = overall_result.apply(lambda row: fuzz_score(input_processed_text, row['concatenated_string']), axis=1) # Continue with your filtering and sorting logic overall_result = overall_result[(overall_result['fuzz_valid_score'] > config.fuzzy_threshold) | (overall_result['score_new'] >= config.tf_threshold)] overall_result = overall_result.head(config.max_reccom) print("Overall Result : ",overall_result) # Check if recommendations are found if not overall_result.empty: # Concatenate your final result lists user_recommendation_list = overall_result[config.target_column].tolist() else: # No recommendations found, return empty lists or a message indicating no recommendations user_recommendation_list = [] return user_recommendation_list def input_evalution(input_processed_text, df_train_mtrx,tfidf_vector,df_act): print("Into Input Evaluation function") text=input_processed_text print("Text : ",text) tfidf_vector=tfidf_vector print("TFIDF Vector : ",tfidf_vector) df_train_mtrx=df_train_mtrx #print("DF Train Matrix : ",df_train_mtrx) input_tfidf=tfidf_vector.transform([text]) print(input_tfidf) x=input_tfidf.todense() df_tst = pd.DataFrame(x, columns=tfidf_vector.get_feature_names(), index=['test123']) print("Df Test Input Evaluation : ",df_tst) ## Appending df_tst to df_train df_train_mtrx = df_train_mtrx.append(df_tst) #print("DF Train Matrix after appending : ",df_train_mtrx) ## Calculating Cosine Similarity scr=cosine_similarity(df_train_mtrx, df_tst) print("Cosine Similarity : ",scr) df_chk = pd.DataFrame() df_chk['ticket_id']=df_train_mtrx.index df_chk['score']=scr score = df_chk[(df_chk['score']>0.50) & (df_chk['ticket_id']!='test123')]['score'].tolist() df_eval = df_act[df_act['ticket_id'].isin(df_chk[df_chk['score']>0.50]['ticket_id'])] df_eval['score'] = score return df_eval,df_tst def input_evalution_count(text, df_train_mtrx,count_vector,df_act): print("Into Input Evaluation Count function") input_count=count_vector.transform([text]) x=input_count.todense() df_tst = pd.DataFrame(x, columns=count_vector.get_feature_names(), index=['test123']) print("DF Test in evaluation count : ",df_tst) # Appending input data to train dataset df_train_mtrx = df_train_mtrx.append(df_tst.head()) #print("DF Train Matrix after appending : ",df_train_mtrx) ## Calculating cosine similarity scr=cosine_similarity(df_train_mtrx, df_tst) print("Cosine Similarity inside Input evaluation : ",scr) df_chk = pd.DataFrame() df_chk['ticket_id']=df_train_mtrx.index df_chk['score']=scr score = df_chk[(df_chk['score']>0.50) & (df_chk['ticket_id']!='test123')]['score'].tolist() print("Score : ",score) df_eval = df_act[df_act['ticket_id'].isin(df_chk[df_chk['score']>0.50]['ticket_id'])] df_eval['score'] = score return df_eval,df_tst ##Testing this - # Sample input data input_ticket_category = 'Process' input_ticket_type = 'HRO - Payroll' input_ticket_item = 'Benefits and Payments' input_ticket_summary = 'Incorrect Result' input_ticket_desc = 'Dear Sir, As per the attached screen shots...' input_ticket_severity = '4 - Default' # Call the event_prediction function user_recommendations = event_prediction( input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity ) # Print the user recommendations print("User Recommendations:", user_recommendations) # Add debug information to check if the functions are being called #print("Debug Info - input_evalution:", input_evalution(input_processed_text, tfidf_matrix, tfidf_vector, df_act)) #print("Debug Info - input_evalution_count:", input_evalution_count(input_processed_text, tf_count_matrix, count_vector, df_act))