Untitled
unknown
plain_text
a month ago
896 B
1
Indexable
Never
from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity def read_file(file_path): with open(file_path, 'r', encoding='utf-8') as file: content = file.read() return content def similarity_ratio(file_path1, file_path2): content1 = read_file(file_path1) content2 = read_file(file_path2) vectorizer = TfidfVectorizer() vectors = vectorizer.fit_transform([content1, content2]) similarity = cosine_similarity(vectors[0], vectors[1])[0][0] similarity_percentage = similarity * 100 return similarity_percentage file_path1 = "C:\\Users\\Admin\\Desktop\\AdPython\\crawl\\dấu\\demo-title.txt" file_path2 = "C:\\Users\\Admin\\Desktop\\AdPython\\crawl\\dấu\\ten_tep_tin_nguoc_lai.txt" ratio = similarity_ratio(file_path1, file_path2) print(f"Cosine Similarity Ratio: {ratio}%")