Untitled
unknown
plain_text
2 years ago
2.8 kB
3
Indexable
from sklearn.metrics import pairwise_distances import pandas as pd import numpy as np from sklearn.preprocessing import LabelEncoder # Link the X vector with index index = data.index.values def get_top_5_person_who_resolved(row, distance_metric='cosine'): # Concatenate the input data into a single string input_data = ' '.join([str(row['ticket_category']), str(row['ticket_type']), str(row['ticket_item']),str(row['ticket_summary']), str(row['ticket_severity']),str(row['resolution_sla_violated']),str(row['reopen_count']), str(row['role_id']),str(row['ticket_resolution_time'])]) # Calculate the pairwise distances between the input vector and X input_vector_x = np.array(list(row[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary', 'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'role_id','ticket_resolution_time']])) if distance_metric == 'cosine': distances = pairwise_distances(input_vector_x.reshape(1, -1), data[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary', 'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'role_id','ticket_resolution_time']], metric='cosine')[0] elif distance_metric == 'euclidean': distances = pairwise_distances(input_vector_x.reshape(1, -1), data[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary', 'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'role_id','ticket_resolution_time']], metric='euclidean')[0] elif distance_metric == 'manhattan': distances = pairwise_distances(input_vector_x.reshape(1, -1), data[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary', 'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'role_id','ticket_resolution_time']], metric='manhattan')[0] else: raise ValueError('Invalid distance metric') # Get the indices of the top 5 closest tickets closest_indices = np.argsort(distances)[:5] # Get the person_who_resolved values for the closest tickets closest_person_who_resolved = data.iloc[closest_indices]['person_who_resolved'] return closest_person_who_resolved.tolist() # Apply the function to each row to get the top 5 person_who_resolved based on other features data['top_5_person_who_resolved'] = data.apply(get_top_5_person_who_resolved, axis=1) # Getting unique values unique_values = data['top_5_person_who_resolved'].apply(lambda x: list(set(x))) # Remove duplicate values in each list data['unique_top_5_person_who_resolved'] = unique_values.apply(lambda x: x[:5]) # Take only the first 5 unique values # Display the updated dataframe print(data.head())
Editor is loading...