Untitled

 avatar
unknown
plain_text
a year ago
1.4 kB
5
Indexable
import pandas as pd
import numpy as np
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.stats.outliers_influence import variance_inflation_factor
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.nonparametric.smoothers_lowess import lowess  # Add this import

# Load the iris dataset
iris = sns.load_dataset('iris')

# Create a new DataFrame with only the relevant columns
data = iris[['species', 'sepal_length', 'petal_length', 'petal_width']]

# Fit ordinary least squares (OLS) model
model = ols('sepal_length ~ species + petal_length + petal_width', data=data).fit()

# Perform ANOVA on the model
anova_table = anova_lm(model)

# Print the ANOVA
print(anova_table)  # Corrected variable name

# Check for multicollinearity using Variance Inflation Factor (VIF)
X = data[['petal_length', 'petal_width']]  # Corrected variable name
vif_data = pd.DataFrame()
vif_data['Feature'] = X.columns
vif_data['VIF'] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
print("\nVariance Inflation Factor (VIF):\n", vif_data)

# Plot residuals vs. fitted values
plt.figure(figsize=(8, 6))
sns.residplot(model.fittedvalues, model.resid, lowess=lowess, line_kws={'color': 'red'})  # Use lowess parameter
plt.title('Residuals vs Fitted')
plt.xlabel('Fitted values')
plt.ylabel('Residuals')
plt.show()
Editor is loading...
Leave a Comment