Untitled
plain_text
a month ago
978 B
1
Indexable
Never
def train_data(cleaned_data, column): tfidf_vectorizer = TfidfVectorizer(stop_words='english') tfidf_vectorizer = TfidfVectorizer() tfidf_vectorizer = tfidf_vectorizer.fit(cleaned_data[column]) sparse_matrix = tfidf_vectorizer.fit_transform(cleaned_data[column]) doc_term_matrix = sparse_matrix.todense() data_train_tfidf = pd.DataFrame(doc_term_matrix, columns=tfidf_vectorizer.get_feature_names()) count_vectorizer = CountVectorizer(stop_words='english') count_vectorizer = CountVectorizer() count_vectorizer = count_vectorizer.fit(cleaned_data[column]) sparse_matrix = count_vectorizer.fit_transform(cleaned_data[column]) doc_term_matrix = sparse_matrix.todense() data_train_count = pd.DataFrame(doc_term_matrix, columns=count_vectorizer.get_feature_names()) return data_train_tfidf, data_train_count, tfidf_vectorizer, count_vectorizer