Untitled
unknown
python
2 years ago
573 B
5
Indexable
import pandas as pd import nltk from nltk.corpus import stopwords as nltk_stopwords from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import LogisticRegression import optuna train = pd.read_csv('tweets_lemm_train.csv') test = pd.read_csv('tweets_lemm_test.csv') nltk.download('stopwords') stopwords = set(nltk_stopwords.words('russian')) count_tf_idf = TfidfVectorizer(stop_words=stopwords) tf_idf = count_tf_idf.fit_transform(train['lemm_text']) aaa = pd.DataFrame(tf_idf).join(train[['lemm_text', 'positive']]) aaa
Editor is loading...
Leave a Comment