Untitled
user_8080122
plain_text
2 years ago
953 B
2
Indexable
import pandas as pd from matplotlib import pyplot as plt import seaborn as sns #Statsmodels import statsmodels.api as sm #Scikit learn from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression #Before we imported LinearRegression from scikit learn, now we import LogisticRegression df1.isnull().any() df2.describe() df1["Gender_dummy"] = df1["Gender"].replace({"Male":0, "Female":1}) df2 = df1.drop("Gender", axis = 1) correlations = df2.corr() sns.heatmap(correlations, annot = True).set(title = "Heatmap of Consumption Data - Pearson Correlations") y = df2["Purchased"] X = df2[["Gender_dummy", "Age", "AnnualSalary"]] X = sm.add_constant(X) log_reg = sm.Logit(y,X, data=df2).fit() log_reg.summary() import numpy as np odds_ratios = pd.DataFrame( { "OR": log_reg.params, } ) odds_ratios = np.exp(odds_ratios) print(odds_ratios)
Editor is loading...