Untitled
unknown
plain_text
2 years ago
4.6 kB
5
Indexable
from sklearn.metrics import pairwise_distances
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
# Link the X vector with index
index = data.index.values
def get_top_5_person_who_resolved(row, distance_metric='cosine'):
# Concatenate the input data into a single string
input_data = ' '.join([str(row['ticket_category']), str(row['ticket_type']), str(row['ticket_item']),str(row['ticket_summary']),
str(row['ticket_severity']),str(row['resolution_sla_violated']),str(row['reopen_count']),
str(row['role_id']),str(row['ticket_resolution_time'])])
# Calculate the pairwise distances between the input vector and X
input_vector_x = np.array(list(row[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary',
'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'role_id','ticket_resolution_time']]))
if distance_metric == 'cosine':
distances = pairwise_distances(input_vector_x.reshape(1, -1), data[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary',
'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'role_id','ticket_resolution_time']], metric='cosine')[0]
elif distance_metric == 'euclidean':
distances = pairwise_distances(input_vector_x.reshape(1, -1), data[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary',
'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'role_id','ticket_resolution_time']], metric='euclidean')[0]
elif distance_metric == 'manhattan':
distances = pairwise_distances(input_vector_x.reshape(1, -1), data[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary',
'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'role_id','ticket_resolution_time']], metric='manhattan')[0]
else:
raise ValueError('Invalid distance metric')
# Get the indices of the top 5 closest tickets
closest_indices = np.argsort(distances)[:5]
# Get the person_who_resolved values for the closest tickets
closest_person_who_resolved = data.iloc[closest_indices]['person_who_resolved']
# Get the actual person_who_resolved value for the input ticket
actual_person_who_resolved = row['person_who_resolved']
return closest_person_who_resolved.tolist(), actual_person_who_resolved
# Apply the function to each row to get the top 5 person_who_resolved based on other features
data['recommendations'], data['actual_person_who_resolved'] = zip(*data.apply(get_top_5_person_who_resolved, axis=1))
data.head()
# Evaluation Metrics
def calculate_map(actual, predicted):
"""
Calculate Mean Average Precision (MAP) for the recommendations.
Args:
actual (str or int): Actual user who resolved the ticket.
predicted (list): List of recommended users.
Returns:
float: Mean Average Precision (MAP) score.
"""
avg_precision = 0.0
num_correct = 0
if actual in predicted:
num_correct += 1
precision_at_k = num_correct / (predicted.index(actual) + 1)
avg_precision += precision_at_k
if num_correct == 0:
return 0.0
return avg_precision
def calculate_topk_accuracy(actual, predicted, k):
"""
Calculate Top-k Accuracy for the recommendations.
Args:
actual (str or int): Actual user who resolved the ticket.
predicted (list): List of recommended users.
k (int): Number of top-k recommendations to consider.
Returns:
float: Top-k Accuracy score.
"""
topk_predictions = predicted[:k]
if actual in topk_predictions:
return 1.0
else:
return 0.0
# Calculate overall MAP score
overall_map_score = data.apply(lambda row: calculate_map(row['actual_person_who_resolved'], row['recommendations']), axis=1).mean()
# Calculate overall Top-k Accuracy score
overall_topk_accuracy = data.apply(lambda row: calculate_topk_accuracy(row['actual_person_who_resolved'], row['recommendations'], k=5), axis=1).mean()
print("Overall MAP score:", overall_map_score)
print("Overall Top-k Accuracy score:", overall_topk_accuracy)
# Getting unique values
unique_values = data['recommendations'].apply(lambda x: list(set(x))) # Remove duplicate values in each list
data['unique_top_5_person_who_resolved'] = unique_values.apply(lambda x: x[:5]) # Take only the first 5 unique values
# Display the updated dataframe
print(data.head())Editor is loading...