Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
9.9 kB
1
Indexable
Never
from sklearn.metrics import pairwise_distances
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from feature_engineering import *
import pdb

def get_top_5_person_who_resolved(df, row, data ,distance_metric='cosine'):
    ##Fetching the ticket data details from API
    #pdb.set_trace()
    ticket_data= data
    print("Ticket Details are :",ticket_data)
    
    
    
    # Concatenate the input data into a single string
    input_data = ' '.join([str(row['ticket_category']), str(row['ticket_type']), str(row['ticket_item']),str(row['ticket_summary']),
                          str(row['ticket_severity']),str(row['resolution_sla_violated']),str(row['reopen_count']),
                          str(row['owner_user_id']),str(row['role_name_encoded']),str(row['ticket_resolution_time'])])

    # Calculate the pairwise distances between the input vector and X
    input_vector_x = np.array(list(row[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary',
       'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'owner_user_id','role_name_encoded','ticket_resolution_time']]))
    if distance_metric == 'cosine':
        distances = pairwise_distances(input_vector_x.reshape(1, -1), df[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary',
       'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'owner_user_id','role_name_encoded','ticket_resolution_time']], metric='cosine')[0]
    elif distance_metric == 'euclidean':
        distances = pairwise_distances(input_vector_x.reshape(1, -1), df[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary',
       'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'owner_user_id','role_name_encoded','ticket_resolution_time']], metric='euclidean')[0]
    elif distance_metric == 'manhattan':
        distances = pairwise_distances(input_vector_x.reshape(1, -1), df[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary',
       'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'owner_user_id','role_name_encoded','ticket_resolution_time']], metric='manhattan')[0]
    else:
        raise ValueError('Invalid distance metric')
    
    # Get the indices of the top 5 closest tickets
    closest_indices = np.argsort(distances)[:5]
    
    # Get the person_who_resolved, owner_user_id, and role_name values for the closest tickets
    closest_person_who_resolved = df.iloc[closest_indices]['person_who_resolved']
    closest_owner_user_id = df.iloc[closest_indices]['owner_user_id']
    closest_role_name_encoded = df.iloc[closest_indices]['role_name_encoded']
    closest_role_name_decoded = df.iloc[closest_indices]['role_name_decoded']
    
    # Get the actual person_who_resolved, owner_user_id, and role_name value for the input ticket
    actual_person_who_resolved = row['person_who_resolved']
    actual_owner_user_id = row['owner_user_id']
    actual_role_name_encoded = row['role_name_encoded']
    actual_role_name_decoded = row['role_name_decoded']
    
    # Apply the function to the input data to get the recommendations
    ticket_data['recommendations'], ticket_data['actual_person_who_resolved'] = zip(*ticket_data.apply(lambda row: get_top_5_person_who_resolved(df, row), axis=1))

    # Remove duplicate values from recommendations
    ticket_data['recommendations'] = ticket_data['recommendations'].apply(lambda x: list(set(x)))

    # Return the recommendations as a list
    recommendations = ticket_data['recommendations'].tolist()
    return {"recommendations": recommendations}

    I am making a changes in the code like below-

from sklearn.metrics import pairwise_distances
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from feature_engineering import *
import pdb

def get_top_5_person_who_resolved(df, row, data ,distance_metric='cosine'):
    ##Fetching the ticket data details from API
    #pdb.set_trace()
    ticket_data= data
    print("Ticket Details are :",ticket_data)
    
    X=df.drop(columns=['person_who_resolved'])
    
    # Concatenate the input data into a single string
    input_data = ' '.join([str(row['ticket_category']), str(row['ticket_type']), str(row['ticket_item']),str(row['ticket_summary']),
                          str(row['ticket_severity']),str(row['resolution_sla_violated']),str(row['reopen_count']),
                          str(row['owner_user_id']),str(row['role_name_encoded']),str(row['ticket_resolution_time'])])

    # Calculate the pairwise distances between the input vector and X
    input_vector_x = np.array(list(row[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary',
       'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'owner_user_id','role_name_encoded','ticket_resolution_time']]))
    if distance_metric == 'cosine':
        distances = pairwise_distances(input_vector_x.reshape(1, -1), df[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary',
       'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'owner_user_id','role_name_encoded','ticket_resolution_time']], metric='cosine')[0]
    elif distance_metric == 'euclidean':
        distances = pairwise_distances(input_vector_x.reshape(1, -1), df[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary',
       'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'owner_user_id','role_name_encoded','ticket_resolution_time']], metric='euclidean')[0]
    elif distance_metric == 'manhattan':
        distances = pairwise_distances(input_vector_x.reshape(1, -1), df[['ticket_category', 'ticket_type', 'ticket_item','ticket_summary',
       'ticket_severity', 'resolution_sla_violated', 'reopen_count', 'owner_user_id','role_name_encoded','ticket_resolution_time']], metric='manhattan')[0]
    else:
        raise ValueError('Invalid distance metric')
    
    # Get the indices of the top 5 closest tickets
    closest_indices = np.argsort(distances)[:5]
    
    # Get the person_who_resolved, owner_user_id, and role_name values for the closest tickets
    closest_person_who_resolved = df.iloc[closest_indices]['person_who_resolved']
    closest_owner_user_id = df.iloc[closest_indices]['owner_user_id']
    closest_role_name_encoded = df.iloc[closest_indices]['role_name_encoded']
    closest_role_name_decoded = df.iloc[closest_indices]['role_name_decoded']
    
    # Get the actual person_who_resolved, owner_user_id, and role_name value for the input ticket
    actual_person_who_resolved = row['person_who_resolved']
    actual_owner_user_id = row['owner_user_id']
    actual_role_name_encoded = row['role_name_encoded']
    actual_role_name_decoded = row['role_name_decoded']
    
    # Apply the function to the input data to get the recommendations
    ticket_data['recommendations'], ticket_data['actual_person_who_resolved'] = zip(*ticket_data.apply(lambda row: get_top_5_person_who_resolved(X, row), axis=1))

    # Remove duplicate values from recommendations
    ticket_data['recommendations'] = ticket_data['recommendations'].apply(lambda x: list(set(x)))

    # Return the recommendations as a list
    recommendations = ticket_data['recommendations'].tolist()
    return {"recommendations": recommendations}

    

Will this code work still? 

Because we are passing through FAST API the below code-

from fastapi import FastAPI, Request
from feature_engineering import feature_engineering
from model_training_building import get_top_5_person_who_resolved
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import pdb



app = FastAPI()

@app.post("/run_recommendation/")
async def run_recommendation(request: Request):
    pdb.set_trace()
    
    if request.method ==['POST']:
        param_vals = await request.json()  # Retrieve the JSON data from the request body
        ticket_data = param_vals['ticket_data']  

        data = pd.DataFrame(ticket_data)
        print("Data", data)
        
        ## Making a function call for data pre-processing
        processed_data= feature_engineering(data)
        
        print("Processed Data : ",processed_data)
        
        df=processed_data
        label_enc = LabelEncoder()

        df['role_name_encoded'] = label_enc.fit_transform(df['role_name'])
        df['role_name_decoded'] = label_enc.inverse_transform(df['role_name_encoded'])
        
        # Link the X vector with index
        index = df.index.values
        
        recommendations=get_top_5_person_who_resolved(df, row, data ,distance_metric='cosine')
        
        print("Recommended users for the sample ticket:")
        for i, rec in enumerate(recommendations[0]):
            print(f"Recommendation {i+1}: User {rec[0]}, Owner User ID {rec[1]}, Role Name {rec[2]}")
            
        return recommendations
        
if __name__ == "__main__":
    app.run(host='100.87.2.56', port=8895, threaded=True)

        
ANd this is the URL we are passing through for API testing where we are passing all parameters in a dictionary named ticket_data-

http://100.87.12.56:8895/run_recommendation/?ticket_data={'ticket_category':'Process','ticket_type':'HRO - Payroll','ticket_item':'Benefits and Payments','ticket_summary':'Incorrect Result','ticket_severity':'4 -Default','resolution_sla_violated':False,'reopen_count':0,'owner_user_id':104,'role_name':'L2 Support','created_date':'2020-08-06 10:35:33','ticket_resolution_date':'2020-08-06 17:07:04','person_who_resolved'=' '}

Can you show the changes in original code as we dont want to include person_who_resolved as it is the target column and we need to predict/recommend .