Untitled
user_8080122
plain_text
2 years ago
953 B
3
Indexable
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
#Statsmodels
import statsmodels.api as sm
#Scikit learn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
#Before we imported LinearRegression from scikit learn, now we import LogisticRegression
df1.isnull().any()
df2.describe()
df1["Gender_dummy"] = df1["Gender"].replace({"Male":0, "Female":1})
df2 = df1.drop("Gender", axis = 1)
correlations = df2.corr()
sns.heatmap(correlations, annot = True).set(title = "Heatmap of Consumption Data - Pearson Correlations")
y = df2["Purchased"]
X = df2[["Gender_dummy", "Age", "AnnualSalary"]]
X = sm.add_constant(X)
log_reg = sm.Logit(y,X, data=df2).fit()
log_reg.summary()
import numpy as np
odds_ratios = pd.DataFrame(
{
"OR": log_reg.params,
}
)
odds_ratios = np.exp(odds_ratios)
print(odds_ratios)
Editor is loading...