import pandas as pd
from nltk.corpus import stopwords
import string
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from string import digits
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
import pdb
from fuzzywuzzy import fuzz
from feature_engineering import *
from param_config import config
from model_loading import loading_model
models = loading_model()
tfidf_matrix,tf_count_matrix,tfidf_vector,count_vector,df_act,Matching_data,embedding_dict,df_act_context = models.load_models()
#print("Initial TFIDF MAtrix : ",tfidf_matrix)
#print(" Initial TF Count Matrix ",tf_count_matrix)
#print("Inital TFIDF Vector",tfidf_vector)
#print("Initial Count Vector ",count_vector)
#print("DF ACT",df_act.head())
#print("Initial Embedding Dict",embedding_dict)
#print("DF ACT Context ",df_act_context.head())
def event_prediction(input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity):
try:
user_recommendation_list_tfidf = event_prediction_tfidf(input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity)
print("TFIDF Prediction Done")
user_recommendation_list_context = event_prediction_context(input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity)
print("Contexual Prediction Done")
# Combine the recommendations from both methods
user_recommendation_list = user_recommendation_list_tfidf + user_recommendation_list_context
return user_recommendation_list
except:
user_recommendation_list = []
return user_recommendation_list
def event_prediction_tfidf(input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity):
#pdb.set_trace()
global tfidf_matrix,tf_count_matrix,tfidf_vector,count_vector,df_act
## First join 5 parameters andd then call input_data_preprocessing
data_to_be_processed=str(input_ticket_category) +' ' + str(input_ticket_type) +' ' +str(input_ticket_item) + ' ' + str(input_ticket_summary) + ' ' +str(input_ticket_desc) + ' ' + str(input_ticket_severity)
## Input Data Preprocessing
input_processed_text = input_data_preprocessing(data_to_be_processed) ## 5 different fields
print("Input processed Text : ",input_processed_text)
#pdb.set_trace()
##TFIDF Prediction
tfidf_pred,input_tfidfmatrx = input_evalution(input_processed_text,tfidf_matrix,tfidf_vector,df_act)
##TF_count Prediction
tf_count_pred,input_tfcountmatrx = input_evalution_count(input_processed_text,tf_count_matrix,count_vector,df_act)
#pdb.set_trace()
tfidf_pred['score_new'] = tfidf_pred['score']*0.5
tf_count_pred['score_new'] = tf_count_pred['score']*0.5
tfidf_pred['flag'] = 'tfidf'
tf_count_pred['flag'] = 'tf_count'
overall_result = pd.concat([tfidf_pred,tf_count_pred])
print("Overall Result : ",overall_result)
if len(overall_result)>0:
overall_result = overall_result.sort_values(by='score_new',ascending=False)
print("Sorted Overall Result : ",overall_result)
overall_result['fuzz_valid_score'] = overall_result.apply(lambda row: fuzz_score(input_processed_text, row['concatenated_string']), axis=1)
# Continue with your filtering and sorting logic
overall_result = overall_result[(overall_result['fuzz_valid_score'] > config.fuzzy_threshold) | (overall_result['score_new'] >= config.tf_threshold)]
overall_result = overall_result.head(config.max_reccom)
print("Overall Result : ",overall_result)
# Check if recommendations are found
if not overall_result.empty:
# Concatenate your final result lists
user_recommendation_list = overall_result[config.target_column].tolist()
else:
# No recommendations found, return empty lists or a message indicating no recommendations
user_recommendation_list = []
return user_recommendation_list
def input_evalution(input_processed_text, df_train_mtrx,tfidf_vector,df_act):
print("Into Input Evaluation function")
text=input_processed_text
print("Text : ",text)
tfidf_vector=tfidf_vector
print("TFIDF Vector : ",tfidf_vector)
df_train_mtrx=df_train_mtrx
#print("DF Train Matrix : ",df_train_mtrx)
input_tfidf=tfidf_vector.transform([text])
print(input_tfidf)
x=input_tfidf.todense()
df_tst = pd.DataFrame(x,
columns=tfidf_vector.get_feature_names(),
index=['test123'])
print("Df Test Input Evaluation : ",df_tst)
## Appending df_tst to df_train
df_train_mtrx = df_train_mtrx.append(df_tst)
#print("DF Train Matrix after appending : ",df_train_mtrx)
## Calculating Cosine Similarity
scr=cosine_similarity(df_train_mtrx, df_tst)
print("Cosine Similarity : ",scr)
df_chk = pd.DataFrame()
df_chk['ticket_id']=df_train_mtrx.index
df_chk['score']=scr
score = df_chk[(df_chk['score']>0.50) & (df_chk['ticket_id']!='test123')]['score'].tolist()
df_eval = df_act[df_act['ticket_id'].isin(df_chk[df_chk['score']>0.50]['ticket_id'])]
df_eval['score'] = score
return df_eval,df_tst
def input_evalution_count(text, df_train_mtrx,count_vector,df_act):
print("Into Input Evaluation Count function")
input_count=count_vector.transform([text])
x=input_count.todense()
df_tst = pd.DataFrame(x,
columns=count_vector.get_feature_names(),
index=['test123'])
print("DF Test in evaluation count : ",df_tst)
# Appending input data to train dataset
df_train_mtrx = df_train_mtrx.append(df_tst.head())
#print("DF Train Matrix after appending : ",df_train_mtrx)
## Calculating cosine similarity
scr=cosine_similarity(df_train_mtrx, df_tst)
print("Cosine Similarity inside Input evaluation : ",scr)
df_chk = pd.DataFrame()
df_chk['ticket_id']=df_train_mtrx.index
df_chk['score']=scr
score = df_chk[(df_chk['score']>0.50) & (df_chk['ticket_id']!='test123')]['score'].tolist()
print("Score : ",score)
df_eval = df_act[df_act['ticket_id'].isin(df_chk[df_chk['score']>0.50]['ticket_id'])]
df_eval['score'] = score
return df_eval,df_tst
##Testing this -
# Sample input data
input_ticket_category = 'Process'
input_ticket_type = 'HRO - Payroll'
input_ticket_item = 'Benefits and Payments'
input_ticket_summary = 'Incorrect Result'
input_ticket_desc = 'Dear Sir, As per the attached screen shots...'
input_ticket_severity = '4 - Default'
# Call the event_prediction function
user_recommendations = event_prediction(
input_ticket_category,
input_ticket_type,
input_ticket_item,
input_ticket_summary,
input_ticket_desc,
input_ticket_severity
)
# Print the user recommendations
print("User Recommendations:", user_recommendations)
# Add debug information to check if the functions are being called
#print("Debug Info - input_evalution:", input_evalution(input_processed_text, tfidf_matrix, tfidf_vector, df_act))
#print("Debug Info - input_evalution_count:", input_evalution_count(input_processed_text, tf_count_matrix, count_vector, df_act))
And the output we getting is -
loading models Matrix ................
loading model Config................
loading Actual Data...................
loading unique noun in train data with vector value for context search ................
Input processed Text : process hro - payroll benefits payments incorrect result dear sir, per attached screen shots - default
Into Input Evaluation function
Text : process hro - payroll benefits payments incorrect result dear sir, per attached screen shots - default
TFIDF Vector : TfidfVectorizer(stop_words='english')
(0, 6601) 0.3458743793621518
(0, 6534) 0.3281113991319641
(0, 6282) 0.18200065470566787
(0, 6005) 0.333358520603423
(0, 5358) 0.22843450766813428
(0, 5005) 0.21924256415838775
(0, 4999) 0.339224303354272
(0, 3300) 0.20477584279934954
(0, 3060) 0.3281113991319641
(0, 1810) 0.3458743793621518
(0, 1767) 0.17768942962976833
(0, 780) 0.2911617377934605
(0, 564) 0.16604491675165825
User Recommendations: []
It is not even printing the scr -score (Cosine Similarity) and giving empty recommendation, not sure if it is even going the function.
Can you show any loopholes if any in code.