Untitled
unknown
python
2 years ago
573 B
10
Indexable
import pandas as pd
import nltk
from nltk.corpus import stopwords as nltk_stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
import optuna
train = pd.read_csv('tweets_lemm_train.csv')
test = pd.read_csv('tweets_lemm_test.csv')
nltk.download('stopwords')
stopwords = set(nltk_stopwords.words('russian'))
count_tf_idf = TfidfVectorizer(stop_words=stopwords)
tf_idf = count_tf_idf.fit_transform(train['lemm_text'])
aaa = pd.DataFrame(tf_idf).join(train[['lemm_text', 'positive']])
aaaEditor is loading...
Leave a Comment