Untitled
unknown
plain_text
2 years ago
978 B
9
Indexable
def train_data(cleaned_data, column):
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_vectorizer = TfidfVectorizer()
tfidf_vectorizer = tfidf_vectorizer.fit(cleaned_data[column])
sparse_matrix = tfidf_vectorizer.fit_transform(cleaned_data[column])
doc_term_matrix = sparse_matrix.todense()
data_train_tfidf = pd.DataFrame(doc_term_matrix,
columns=tfidf_vectorizer.get_feature_names())
count_vectorizer = CountVectorizer(stop_words='english')
count_vectorizer = CountVectorizer()
count_vectorizer = count_vectorizer.fit(cleaned_data[column])
sparse_matrix = count_vectorizer.fit_transform(cleaned_data[column])
doc_term_matrix = sparse_matrix.todense()
data_train_count = pd.DataFrame(doc_term_matrix,
columns=count_vectorizer.get_feature_names())
return data_train_tfidf, data_train_count, tfidf_vectorizer, count_vectorizerEditor is loading...