Untitled
unknown
plain_text
2 years ago
3.1 kB
3
Indexable
from sklearn.metrics import pairwise_distances import pandas as pd import numpy as np from sklearn.preprocessing import LabelEncoder # Load the preprocessed data into a pandas dataframe df = data.copy() # Link the X vector with index index = df.index.values def get_top_5_person_who_resolved(row, distance_metric='cosine'): # Concatenate the input data into a single string input_data = ' '.join([str(row['ticket_category']), str(row['ticket_type']), str(row['ticket_item']),str(row['ticket_summary']), str(row['ticket_severity']),str(row['resolution_sla_violated']),str(row['reopen_count']), str(row['role_id']),str(row['ticket_resolution_time'])]) # Calculate the pairwise distances between the input vector and X input_vector_x = np.array(list(row[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary', 'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'role_id','ticket_resolution_time']])) if distance_metric == 'cosine': distances = pairwise_distances(input_vector_x.reshape(1, -1), df[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary', 'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'role_id','ticket_resolution_time']], metric='cosine')[0] elif distance_metric == 'euclidean': distances = pairwise_distances(input_vector_x.reshape(1, -1), df[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary', 'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'role_id','ticket_resolution_time']], metric='euclidean')[0] elif distance_metric == 'manhattan': distances = pairwise_distances(input_vector_x.reshape(1, -1), df[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary', 'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'role_id','ticket_resolution_time']], metric='manhattan')[0] else: raise ValueError('Invalid distance metric') #print(distances) # Get the indices of the top 5 closest tickets closest_indices = np.argsort(distances)[:5] # Get the person_who_resolved values for the closest tickets closest_person_who_resolved = df.iloc[closest_indices]['person_who_resolved'] return closest_person_who_resolved.tolist() # Apply the function to each row to get the top 5 person_who_resolved based on other features df['top_5_person_who_resolved'] = df.apply(get_top_5_person_who_resolved, axis=1) ## Getting unique Values unique_values = df['top_5_person_who_resolved'].apply(lambda x: list(set(x))) # Remove duplicate values in each list df['unique_top_5_person_who_resolved'] = unique_values.apply(lambda x: x[:5]) # Take only the first 5 unique values ## Creating an INstance of Label Encoder label_encoder = LabelEncoder() # Fit the label encoder to the encoded column and transform it back to the original form decoded_column = label_encoder.inverse_transform(df['unique_top_5_person_who_resolved']) # Display the updated dataframe print(df.head())
Editor is loading...