Untitled
user_9363972
python
a year ago
2.6 kB
12
Indexable
import pandas as pd import seaborn as sns import re import nltk import pickle from nltk.corpus import stopwords from nltk.stem.porter import PorterStemmer from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn import metrics from sklearn.metrics import ConfusionMatrixDisplay nltk.download("stopwords") tfidf_v = TfidfVectorizer() classifier = LogisticRegression() true = pd.read_csv('data/True.csv') fake = pd.read_csv('data/Fake.csv') true.isnull().sum() fake.isnull().sum() true['label'] = 1 fake['label'] = 0 frames = [true,fake] df = pd.concat(frames, ignore_index=True) df['date'] = pd.to_datetime(df['date'], errors = 'coerce') df['Year'] = df['date'].dt.year df['Month'] = df['date'].dt.month df.subject.value_counts() sns.countplot(data= df, x="Year", hue="subject") frames = [true.head(200), fake.head(200)] df = pd.concat(frames) X = df.drop("label", axis = 1) y = df["label"] df = df.dropna() df2 = df.copy() df2.reset_index(inplace=True) ps = PorterStemmer() corpus = [] for i in range(0, len(df2)): review = re.sub('[^a-zA-Z]', ' ', df2['text'][i]) review = review.lower() review = review.split() review = [ps.stem(word) for word in review if not word in stopwords.words('english')] review = ' '.join(review) corpus.append(review) X = tfidf_v.fit_transform(corpus).toarray() y = df2['label'] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=0) classifier.fit(X_train, y_train) pred = classifier.predict(X_test) score = metrics.accuracy_score(y_test, pred) disp = ConfusionMatrixDisplay.from_estimator(classifier, X_test, y_test) disp.ax_.set_title("Confusion Matrix") pickle.dump(classifier, open("model2.pkl","wb")) pickle.dump(tfidf_v, open("tfidfvect2.pkl","wb")) model = pickle.load(open('model2.pkl', 'rb')) tfidfvect = pickle.load(open('tfidfvect2.pkl', 'rb')) def predict(text): review = re.sub('[^a-zA-Z]', ' ', text) review = review.lower() review = review.split() review = [ps.stem(word) for word in review if not word in stopwords.words('english')] review = ' '.join(review) review_vect = tfidfvect.transform([review]).toarray() prediction = 'FAKE' if model.predict(review_vect) == 0 else 'REAL' return prediction text = "Trump is the former president" prediction = predict(text) print(prediction)
Editor is loading...
Leave a Comment