Untitled
unknown
plain_text
2 years ago
1.0 kB
10
Indexable
def train_data(df, column):
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_vectorizer = TfidfVectorizer()
tfidf_vectorizer = tfidf_vectorizer.fit(df[column])
sparse_matrix = tfidf_vectorizer.fit_transform(df[column])
doc_term_matrix = sparse_matrix.todense()
df_train_tfidf = pd.DataFrame(doc_term_matrix,
columns=tfidf_vectorizer.get_feature_names(),
index=df.event_id)
count_vectorizer = CountVectorizer(stop_words='english')
count_vectorizer = CountVectorizer()
count_vectorizer = count_vectorizer.fit(df[column])
sparse_matrix = count_vectorizer.fit_transform(df[column])
doc_term_matrix = sparse_matrix.todense()
df_train_count = pd.DataFrame(doc_term_matrix,
columns=count_vectorizer.get_feature_names(),
index=df.event_id)
return df_train_tfidf, df_train_count, tfidf_vectorizer, count_vectorizerEditor is loading...