Untitled
unknown
plain_text
a year ago
3.7 kB
2
Indexable
Never
from sklearn.metrics import pairwise_distances import pandas as pd import numpy as np from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split from feature_engineering import * import pdb def get_top_5_person_who_resolved(df, row,distance_metric='cosine'): ##Fetching the ticket data details from API #pdb.set_trace() #ticket_data= data #print("Ticket Details are :",ticket_data) # Concatenate the input data into a single string input_data = ' '.join([str(row['ticket_category']), str(row['ticket_type']), str(row['ticket_item']),str(row['ticket_summary']), str(row['ticket_severity']),str(row['resolution_sla_violated']),str(row['reopen_count']), str(row['owner_user_id']),str(row['role_name_encoded']),str(row['ticket_resolution_time'])]) # Calculate the pairwise distances between the input vector and X input_vector_x = np.array(list(row[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary', 'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'owner_user_id','role_name_encoded','ticket_resolution_time']])) if distance_metric == 'cosine': distances = pairwise_distances(input_vector_x.reshape(1, -1), df[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary', 'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'owner_user_id','role_name_encoded','ticket_resolution_time']], metric='cosine')[0] elif distance_metric == 'euclidean': distances = pairwise_distances(input_vector_x.reshape(1, -1), df[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary', 'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'owner_user_id','role_name_encoded','ticket_resolution_time']], metric='euclidean')[0] elif distance_metric == 'manhattan': distances = pairwise_distances(input_vector_x.reshape(1, -1), df[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary', 'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'owner_user_id','role_name_encoded','ticket_resolution_time']], metric='manhattan')[0] else: raise ValueError('Invalid distance metric') # Get the indices of the top 5 closest tickets closest_indices = np.argsort(distances)[:5] # Get the person_who_resolved, owner_user_id, and role_name values for the closest tickets closest_person_who_resolved = df.iloc[closest_indices]['person_who_resolved'] closest_owner_user_id = df.iloc[closest_indices]['owner_user_id'] closest_role_name_encoded = df.iloc[closest_indices]['role_name_encoded'] closest_role_name_decoded = df.iloc[closest_indices]['role_name_decoded'] # Get the actual person_who_resolved, owner_user_id, and role_name value for the input ticket actual_person_who_resolved = row['person_who_resolved'] actual_owner_user_id = row['owner_user_id'] actual_role_name_encoded = row['role_name_encoded'] actual_role_name_decoded = row['role_name_decoded'] # Apply the function to the input data to get the recommendations ticket_data['recommendations'], ticket_data['actual_person_who_resolved'] = zip(*ticket_data.apply(lambda row: get_top_5_person_who_resolved(df, row), axis=1)) # Remove duplicate values from recommendations ticket_data['recommendations'] = ticket_data['recommendations'].apply(lambda x: list(set(x))) # Return the recommendations as a list recommendations = ticket_data['recommendations'].tolist() return {"recommendations": recommendations}