Untitled

mail@pastecode.io avatar
unknown
plain_text
21 days ago
8.2 kB
1
Indexable
Never
import pandas as pd
from nltk.corpus import stopwords
import string
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from string import digits
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
import pdb
import re

from fuzzywuzzy import fuzz


from feature_engineering import *
from param_config import config
from model_loading import loading_model

models = loading_model()

tfidf_matrix,tf_count_matrix,tfidf_vector,count_vector,df_act,Matching_data,embedding_dict,df_act_context = models.load_models()


def extract_combined_info(text):
    words = text.split()
    info = []
    current_info = []

    for word in words:
        if word.isnumeric():
            current_info.append(word)
        elif word[0].isupper() and current_info:
            current_name = ' '.join(words[words.index(word):])
            info.append(current_info + [current_name])
            current_info = []

    return info

def process_text(text):
    # Remove leading numbers using regular expression
    trimmed_text = re.sub(r'^\d+\s*', '', text)
    return trimmed_text

def extract_role_name(text):
    role_name = re.findall(r'\d+\s+([\w\s]+?)\s+[A-Z]', text)
    return role_name

def user_recommendation(input_tenant_id,input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc):
    try:
        print("Input Tenant ID from API : ",input_tenant_id)
        user_recommendation_list_tfidf = user_recommendation_tfidf(input_tenant_id,input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc)
        print("TFIDF Prediction Done", user_recommendation_list_tfidf)
        
        processed_recommendations_list =[process_text(text) for text in user_recommendation_list_tfidf]
        #print("Processed Recommendations List : ",processed_recommendations_list)
        
        user_recommendation_info = []
        for res in processed_recommendations_list:
            #print("Res : ",res)
            info = extract_combined_info(res)
            role_name = extract_role_name(res)
            if info:
                info[0].append(role_name[0] if role_name else None)
                user_recommendation_info.append(info[0])
        
        return user_recommendation_info
    
    except:
        user_recommendation_list = []
        return user_recommendation_list


    

def user_recommendation_tfidf(input_tenant_id,input_ticket_category, input_ticket_type, input_ticket_item, input_ticket_summary, input_ticket_desc):
    #pdb.set_trace()
    
    global tfidf_matrix,tf_count_matrix,tfidf_vector,count_vector,df_act
    ## First join 5 parameters andd then call input_data_preprocessing
    data_to_be_processed=str(input_tenant_id) + ' ' + str(input_ticket_category) +' ' + str(input_ticket_type) +' ' +str(input_ticket_item) + ' ' + str(input_ticket_summary) + ' ' +str(input_ticket_desc)

    ## Input Data Preprocessing
    input_processed_text = input_data_preprocessing(data_to_be_processed) ## 5 different fields
    
    print("Input processed Text : ",input_processed_text)
    
    print("Getting Tenant ID from input_processed_text")
    tenant_id = [int(i) for i in input_processed_text.split() if i.isdigit()][0]
    print("Tenant ID from Input processed text : ",tenant_id)
      
    ##TFIDF Prediction
    tfidf_pred,input_tfidfmatrx = input_evalution(input_processed_text,tenant_id,tfidf_matrix,tfidf_vector,df_act)
    #print("TF IDF Pred : ",tfidf_pred)
    #print("Input TFIDF Matrix : ",input_tfidfmatrx)
    
    ##TF_count Prediction
    tf_count_pred,input_tfcountmatrx = input_evalution_count(input_processed_text,tenant_id,tf_count_matrix,count_vector,df_act)
    #print("TF Count Pred : ",tf_count_pred)
    #print("INput Count Matrix : ",input_tfcountmatrx)
    
    
    tfidf_pred['score_new'] = tfidf_pred['score']*0.5
    tf_count_pred['score_new'] = tf_count_pred['score']*0.5
    
    tfidf_pred['flag'] = 'tfidf'
    tf_count_pred['flag'] = 'tf_count'
    
    overall_result = pd.concat([tfidf_pred,tf_count_pred])
    #print("Overall Result : ",overall_result)
    if len(overall_result)>0:
    
        overall_result = overall_result.sort_values(by='score_new',ascending=False)
        
        overall_result = overall_result.head(config.max_reccom)
        #print("Overall Result : ",overall_result)
        
        user_recommendation_list = overall_result[config.target_column].tolist()
        print("USer recommendation List from event_prediction_tfidf function : ",user_recommendation_list)

    return user_recommendation_list



def input_evalution(input_processed_text,tenant_id, df_train_mtrx,tfidf_vector,df_act):
    print("Into Input Evaluation function")
    text=input_processed_text
    print("Text : ",text)
    tfidf_vector=tfidf_vector
    tenant_id = tenant_id
    print("Tenant ID Inside INput Evaluation : ", tenant_id)
    #print("TFIDF Vector : ",tfidf_vector)
    df_train_mtrx=df_train_mtrx
    #print("DF Train Matrix : ",df_train_mtrx)
    df_train_mtrx_filtered = df_train_mtrx[df_train_mtrx[str(tenant_id)] > 0]
    #print("DF Train Matrix Filtered : ",df_train_mtrx_filtered)
    
    ## Replacing Nan values in matrix with 0
    df_train_mtrx_filtered_nan=np.isnan(df_train_mtrx_filtered)

    input_tfidf=tfidf_vector.transform([text])
    #print("Input TF IDF : ",input_tfidf)
    x=input_tfidf.todense()
    print("X : ",x)
    df_tst = pd.DataFrame(x)
    #print("Df Test Input Evaluation : ",df_tst)
    scr=cosine_similarity(df_train_mtrx_filtered, df_tst)
    #print("Cosine Similarity Input Evaluation : ",scr)
    df_chk = pd.DataFrame()
    df_chk['ticket_id']=df_train_mtrx_filtered.index
    df_chk['score']=scr
    print("DF CHeck Input Evaluation: ",df_chk.head())
    
    # Filter 'df_chk' to keep rows where the 'score' is greater than 0.50
    score = df_chk[df_chk['score'] > 0.50]['score'].tolist()
    print("Score : ", score)
    
    # Get the indexes where the score is above the threshold
    indexes = df_chk[df_chk['score'] > 0.50].index
    print("Indexes : ",indexes)
    
    # Retrieve values from the 'df_train_mtrx' DataFrame based on the indexes
    df_eval = df_act[df_act['ticket_id'].isin(df_chk[df_chk['score']>0.50]['ticket_id'])]
    df_eval['score'] = score

    #print("DF eval Input Evaluation: ", df_eval.head())

    return df_eval, df_tst


def input_evalution_count(text, tenant_id,df_train_mtrx,count_vector,df_act):
    print("Into Input Evaluation Count function")
    text=text
    print("Text : ",text)
    tenant_id = tenant_id
    print("Tenant ID inside INput EValuation Count fn : ",tenant_id)
 
    df_train_mtrx=df_train_mtrx
    #print("DF Train Matrix : ",df_train_mtrx)
    df_train_mtrx_filtered = df_train_mtrx[df_train_mtrx[str(tenant_id)] > 0]
    
    ## Replacing Nan values in matrix with 0
    df_train_mtrx_filtered_nan=np.isnan(df_train_mtrx_filtered)
    ## Transforming into COunt Vector
    input_count=count_vector.transform([text])
    #print("Input Count : ",input_count)
    x=input_count.todense()
    print("X : ",x)
    df_tst = pd.DataFrame(x)
    #print("DF Test in evaluation count : ",df_tst)
    ## Calculating cosine similarity
    scr=cosine_similarity(df_train_mtrx_filtered, df_tst)
    #print("Cosine Similarity inside Input evaluation : ",scr)
    df_chk = pd.DataFrame()
    df_chk['ticket_id']=df_train_mtrx_filtered.index
    df_chk['score']=scr
    print("DF CHeck Input Evaluation Count: ",df_chk.head())
    
    # Filter 'df_chk' to keep rows where the 'score' is greater than 0.50
    score = df_chk[df_chk['score'] > 0.50]['score'].tolist()
    print("Score : ", score)
    
    # Get the indexes where the score is above the threshold
    indexes = df_chk[df_chk['score'] > 0.50].index
    print("Indexes : ",indexes)
    
    # Retrieve values from the 'df_train_mtrx' DataFrame based on the indexes
    df_eval = df_act[df_act['ticket_id'].isin(df_chk[df_chk['score']>0.50]['ticket_id'])]
    #df_eval = df_train_mtrx.iloc[indexes]
    df_eval['score'] = score
    #print("DF eval inside Input Evaluation Count: ", df_eval.head())

    return df_eval, df_tst