Untitled
unknown
plain_text
a year ago
1.2 kB
1
Indexable
Never
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import numpy as np # Sample training data with text features train_data = df.drop(columns=['person_who_resolved',,'owner_user_id','role_name']) output_df =df[['person_who_resolved','owner_user_id','role_name']] # New data for similarity calculation new_data = row # Create TF-IDF vectorizer and fit on training data vectorizer = TfidfVectorizer() X = vectorizer.fit_transform(train_data) # Transform new data using the same vectorizer X_new = vectorizer.transform(new_data) # Calculate cosine similarity between new data and training data similarity_matrix = cosine_similarity(X_new, X) # Find the most similar training data indices for each new data point most_similar_indices = np.argmax(similarity_matrix, axis=1) # Get the corresponding output TF-IDF vectors for new data corresponding_output_tfidf = output_tfidf[most_similar_indices] print("Similarity Matrix:") print(similarity_matrix) print("\nMost Similar Indices:", most_similar_indices) print("\nCorresponding Output TF-IDF:") print(corresponding_output_tfidf)