import pandas as pd
from nltk.corpus import stopwords
import string
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from string import digits
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
import pdb
from fuzzywuzzy import fuzz
from feature_engineering import *
from param_config import config
from model_loading import loading_model
models = loading_model()
tfidf_matrix,tf_count_matrix,tfidf_vector,count_vector,df_act,Matching_data,embedding_dict,df_act_context = models.load_models()
def event_prediction(input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity):
try:
user_recommendation_list_tfidf = event_prediction_tfidf(input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity)
print("TFIDF Prediction Done")
user_recommendation_list_context = event_prediction_context(input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity)
print("Contexual Prediction Done")
# Combine the recommendations from both methods
user_recommendation_list = user_recommendation_list_tfidf + user_recommendation_list_context
return user_recommendation_list
except:
user_recommendation_list = []
return user_recommendation_list
def event_prediction_tfidf(input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc, input_ticket_severity):
#pdb.set_trace()
global tfidf_matrix,tf_count_matrix,tfidf_vector,count_vector,df_act
## First join 5 parameters andd then call input_data_preprocessing
data_to_be_processed=str(input_ticket_category) +' ' + str(input_ticket_type) +' ' +str(input_ticket_item) + ' ' + str(input_ticket_summary) + ' ' +str(input_ticket_desc) + ' ' + str(input_ticket_severity)
## Input Data Preprocessing
input_processed_text = input_data_preprocessing(data_to_be_processed) ## 5 different fields
print("Input processed Text : ",input_processed_text)
#pdb.set_trace()
##TFIDF Prediction
tfidf_pred,input_tfidfmatrx = input_evalution(input_processed_text,tfidf_matrix,tfidf_vector,df_act)
##TF_count Prediction
tf_count_pred,input_tfcountmatrx = input_evalution_count(input_processed_text,tf_count_matrix,count_vector,df_act)
#pdb.set_trace()
tfidf_pred['score_new'] = tfidf_pred['score']*0.5
tf_count_pred['score_new'] = tf_count_pred['score']*0.5
tfidf_pred['flag'] = 'tfidf'
tf_count_pred['flag'] = 'tf_count'
overall_result = pd.concat([tfidf_pred,tf_count_pred])
print("Overall Result : ",overall_result)
if len(overall_result)>0:
overall_result = overall_result.sort_values(by='score_new',ascending=False)
print("Sorted Overall Result : ",overall_result)
overall_result['fuzz_valid_score'] = overall_result.apply(lambda row: fuzz_score(input_processed_text, row['concatenated_string']), axis=1)
# Continue with your filtering and sorting logic
overall_result = overall_result[(overall_result['fuzz_valid_score'] > config.fuzzy_threshold) | (overall_result['score_new'] >= config.tf_threshold)]
overall_result = overall_result.head(config.max_reccom)
# Check if recommendations are found
if not overall_result.empty:
# Concatenate your final result lists
event_title_list = overall_result[config.target_column].tolist()
else:
# No recommendations found, return empty lists or a message indicating no recommendations
event_title_list = []
return event_title_list
def input_evalution(text, df_train_mtrx,tfidf_vector,df_act):
input_tfidf=tfidf_vector.transform([text])
x=input_tfidf.todense()
df_tst = pd.DataFrame(x,
columns=tfidf_vector.get_feature_names(),
index=['test123'])
print("Df Test Input Evaluation : ",df_tst)
## Appending df_tst to df_train
df_train_mtrx = df_train_mtrx.append(df_tst)
print("DF Train Matrix after appending : ",df_train_mtrx)
## Calculating Cosine Similarity
scr=cosine_similarity(df_train_mtrx, df_tst)
df_chk = pd.DataFrame()
df_chk['ticket_id']=df_train_mtrx.index
df_chk['score']=scr
score = df_chk[(df_chk['score']>0.50) & (df_chk['ticket_id']!='test123')]['score'].tolist()
df_eval = df_act[df_act['ticket_id'].isin(df_chk[df_chk['score']>0.50]['ticket_id'])]
df_eval['score'] = score
return df_eval,df_tst
def input_evalution_count(text, df_train_mtrx,count_vector,df_act):
print("Into Input Evaluation Count function")
input_count=count_vector.transform([text])
x=input_count.todense()
df_tst = pd.DataFrame(x,
columns=count_vector.get_feature_names(),
index=['test123'])
print("DF Test in evaluation count : ",df_tst)
# Appending input data to train dataset
df_train_mtrx = df_train_mtrx.append(df_tst.head())
print("DF Train Matrix after appending : ",df_train_mtrx)
## Calculating cosine similarity
scr=cosine_similarity(df_train_mtrx, df_tst)
print("Cosine Similarity inside Input evaluation : ",scr)
df_chk = pd.DataFrame()
df_chk['ticket_id']=df_train_mtrx.index
df_chk['score']=scr
score = df_chk[(df_chk['score']>0.50) & (df_chk['ticket_id']!='test123')]['score'].tolist()
df_eval = df_act[df_act['ticket_id'].isin(df_chk[df_chk['score']>0.50]['ticket_id'])]
df_eval['score'] = score
return df_eval,df_tst