Untitled

mail@pastecode.io avatarunknown
plain_text
2 months ago
2.1 kB
1
Indexable
Never
def get_top_5_person_who_resolved(pred_data):
    distance_metric='cosine'
    print("pred_data: ", pred_data)
    row=feature_engineering(pred_data)
    print("row: ", row)
    label_enc = LabelEncoder()
    #row['role_name_encoded'] = label_enc.fit_transform(row['role_name'])
    #row['role_name_decoded'] = label_enc.inverse_transform(row['role_name_encoded'])
    
    # Link the X vector with index
    #index = row.index.values
    
    ##Fetching the ticket data details from API
    #pdb.set_trace()
    ticket_data= ticket_data = pd.concat(map(pd.read_csv, ['/Analytics/venv/Jup/CAPE_ServicePlus_UC/ServicePlusIncidentData_Post_01-01-2019_Till_07-07-2019.csv', '/Analytics/venv/Jup/CAPE_ServicePlus_UC/ServicePlusTicket_Data_Till-2019-01-01.csv']), ignore_index=True)
    df=feature_engineering(ticket_data)
    
    # Sample training data with text features
    train_data = df.drop(columns=['person_who_resolved',,'owner_user_id','role_name'])

    output_df =df[['person_who_resolved','owner_user_id','role_name']]

    # New data for similarity calculation
    new_data = row

    # Create TF-IDF vectorizer and fit on training data
    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(train_data)
    
    # Transform new data using the same vectorizer
    X_new = vectorizer.transform(new_data)

    # Calculate cosine similarity between new data and training data
    similarity_matrix = cosine_similarity(X_new, X)

    # Find the most similar training data indices for each new data point
    similar_indices = np.where(similarity_matrix > 0.5)
    print('Similar Indices',similar_indices)

    # Get the corresponding output TF-IDF vectors for new data
    predicted_output_data=output_df.iloc[similar_indices]
    print("Predicted Output",predicted_output_data)

    print("Similarity Matrix:")
    print(similarity_matrix)
    print("\nSimilar Data Indices with Cosine Similarity > 0.5:", similar_indices)
    print("\nPredicted Output Data:")
    print(predicted_output_data)