Untitled
plain_text
a month ago
1.0 kB
1
Indexable
Never
def train_data(df, column): tfidf_vectorizer = TfidfVectorizer(stop_words='english') tfidf_vectorizer = TfidfVectorizer() tfidf_vectorizer = tfidf_vectorizer.fit(df[column]) sparse_matrix = tfidf_vectorizer.fit_transform(df[column]) doc_term_matrix = sparse_matrix.todense() df_train_tfidf = pd.DataFrame(doc_term_matrix, columns=tfidf_vectorizer.get_feature_names(), index=df.event_id) count_vectorizer = CountVectorizer(stop_words='english') count_vectorizer = CountVectorizer() count_vectorizer = count_vectorizer.fit(df[column]) sparse_matrix = count_vectorizer.fit_transform(df[column]) doc_term_matrix = sparse_matrix.todense() df_train_count = pd.DataFrame(doc_term_matrix, columns=count_vectorizer.get_feature_names(), index=df.event_id) return df_train_tfidf, df_train_count, tfidf_vectorizer, count_vectorizer