Untitled
unknown
plain_text
2 years ago
896 B
10
Indexable
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
def read_file(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
return content
def similarity_ratio(file_path1, file_path2):
content1 = read_file(file_path1)
content2 = read_file(file_path2)
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform([content1, content2])
similarity = cosine_similarity(vectors[0], vectors[1])[0][0]
similarity_percentage = similarity * 100
return similarity_percentage
file_path1 = "C:\\Users\\Admin\\Desktop\\AdPython\\crawl\\dấu\\demo-title.txt"
file_path2 = "C:\\Users\\Admin\\Desktop\\AdPython\\crawl\\dấu\\ten_tep_tin_nguoc_lai.txt"
ratio = similarity_ratio(file_path1, file_path2)
print(f"Cosine Similarity Ratio: {ratio}%")Editor is loading...