Untitled
user_9363972
python
a year ago
2.6 kB
16
Indexable
import pandas as pd
import seaborn as sns
import re
import nltk
import pickle
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.metrics import ConfusionMatrixDisplay
nltk.download("stopwords")
tfidf_v = TfidfVectorizer()
classifier = LogisticRegression()
true = pd.read_csv('data/True.csv')
fake = pd.read_csv('data/Fake.csv')
true.isnull().sum()
fake.isnull().sum()
true['label'] = 1
fake['label'] = 0
frames = [true,fake]
df = pd.concat(frames, ignore_index=True)
df['date'] = pd.to_datetime(df['date'], errors = 'coerce')
df['Year'] = df['date'].dt.year
df['Month'] = df['date'].dt.month
df.subject.value_counts()
sns.countplot(data= df, x="Year", hue="subject")
frames = [true.head(200), fake.head(200)]
df = pd.concat(frames)
X = df.drop("label", axis = 1)
y = df["label"]
df = df.dropna()
df2 = df.copy()
df2.reset_index(inplace=True)
ps = PorterStemmer()
corpus = []
for i in range(0, len(df2)):
review = re.sub('[^a-zA-Z]', ' ', df2['text'][i])
review = review.lower()
review = review.split()
review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
review = ' '.join(review)
corpus.append(review)
X = tfidf_v.fit_transform(corpus).toarray()
y = df2['label']
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=0)
classifier.fit(X_train, y_train)
pred = classifier.predict(X_test)
score = metrics.accuracy_score(y_test, pred)
disp = ConfusionMatrixDisplay.from_estimator(classifier, X_test, y_test)
disp.ax_.set_title("Confusion Matrix")
pickle.dump(classifier, open("model2.pkl","wb"))
pickle.dump(tfidf_v, open("tfidfvect2.pkl","wb"))
model = pickle.load(open('model2.pkl', 'rb'))
tfidfvect = pickle.load(open('tfidfvect2.pkl', 'rb'))
def predict(text):
review = re.sub('[^a-zA-Z]', ' ', text)
review = review.lower()
review = review.split()
review = [ps.stem(word) for word in review if not word in stopwords.words('english')]
review = ' '.join(review)
review_vect = tfidfvect.transform([review]).toarray()
prediction = 'FAKE' if model.predict(review_vect) == 0 else 'REAL'
return prediction
text = "Trump is the former president"
prediction = predict(text)
print(prediction)
Editor is loading...
Leave a Comment