Untitled

from sklearn.metrics import pairwise_distances
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Load the preprocessed data into a pandas dataframe
df = data.copy()

# Link the X vector with index
index = df.index.values

def get_top_5_person_who_resolved(row, distance_metric='cosine'):
    # Concatenate the input data into a single string
    input_data = ' '.join([str(row['ticket_category']), str(row['ticket_type']), str(row['ticket_item']),str(row['ticket_summary']),
                          str(row['ticket_severity']),str(row['resolution_sla_violated']),str(row['reopen_count']),
                          str(row['role_id']),str(row['ticket_resolution_time'])])

    # Calculate the pairwise distances between the input vector and X
    input_vector_x = np.array(list(row[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary',
       'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'role_id','ticket_resolution_time']]))
    if distance_metric == 'cosine':
        distances = pairwise_distances(input_vector_x.reshape(1, -1), df[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary',
       'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'role_id','ticket_resolution_time']], metric='cosine')[0]
    elif distance_metric == 'euclidean':
        distances = pairwise_distances(input_vector_x.reshape(1, -1), df[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary',
       'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'role_id','ticket_resolution_time']], metric='euclidean')[0]
    elif distance_metric == 'manhattan':
        distances = pairwise_distances(input_vector_x.reshape(1, -1), df[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary',
       'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'role_id','ticket_resolution_time']], metric='manhattan')[0]
    else:
        raise ValueError('Invalid distance metric')
    
    #print(distances)

    # Get the indices of the top 5 closest tickets
    closest_indices = np.argsort(distances)[:5]
    
    
    # Get the person_who_resolved values for the closest tickets
    closest_person_who_resolved = df.iloc[closest_indices]['person_who_resolved']

    return closest_person_who_resolved.tolist()


# Apply the function to each row to get the top 5 person_who_resolved based on other features
df['top_5_person_who_resolved'] = df.apply(get_top_5_person_who_resolved, axis=1)

## Getting unique Values
unique_values = df['top_5_person_who_resolved'].apply(lambda x: list(set(x)))  # Remove duplicate values in each list
df['unique_top_5_person_who_resolved'] = unique_values.apply(lambda x: x[:5])  # Take only the first 5 unique values

## Creating an INstance of Label Encoder

# Convert the lists into strings
#df['unique_top_5_person_who_resolved_str'] = df['unique_top_5_person_who_resolved'].apply(lambda x: ','.join(map(str, x)))

# Fit the label encoder on the 'unique_top_5_person_who_resolved_str' column
#label_encoder.fit(df['unique_top_5_person_who_resolved_str'])

# Transform the encoded column back to the original form
decoded_column = label_enc.inverse_transform(df['person_who_resolved'])

# Display the updated dataframe
print(df.head())
Editor is loading...