Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
1.2 kB
1
Indexable
Never
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Sample training data with text features
train_data = df.drop(columns=['person_who_resolved',,'owner_user_id','role_name'])

output_df =df[['person_who_resolved','owner_user_id','role_name']]

# New data for similarity calculation
new_data = row

# Create TF-IDF vectorizer and fit on training data
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(train_data)

# Transform new data using the same vectorizer
X_new = vectorizer.transform(new_data)

# Calculate cosine similarity between new data and training data
similarity_matrix = cosine_similarity(X_new, X)

# Find the most similar training data indices for each new data point
most_similar_indices = np.argmax(similarity_matrix, axis=1)

# Get the corresponding output TF-IDF vectors for new data
corresponding_output_tfidf = output_tfidf[most_similar_indices]

print("Similarity Matrix:")
print(similarity_matrix)
print("\nMost Similar Indices:", most_similar_indices)
print("\nCorresponding Output TF-IDF:")
print(corresponding_output_tfidf)