Untitled

 avatar
unknown
plain_text
a year ago
1.2 kB
4
Indexable
import pandas as pd import numpy as np
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.stats.outliers_influence import variance_inflation_factor import seaborn as sns
import matplotlib.pyplot as plt
# Load the iris dataset
iris = sns.load_dataset('iris')

# Create a new DataFrame with only the relevant columns
data = iris[['species', 'sepal_length', 'petal_length', 'petal_width']]

# Fit ordinary least squares (OLS) model
model = ols('sepal_length ~ species + petal_length + petal_width', data=data).fit()

# Perform ANOVA on the model anova_table = anova_lm(model)

# Print the ANOVA table print(anova_table)

# Check for multicollinearity using Variance Inflation Factor (VIF) X = data[['petal_length', 'petal_width']]
vif_data = pd.DataFrame() vif_data['Feature'] = X.columns
vif_data['VIF'] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])] print("\nVariance Inflation Factor (VIF):\n", vif_data)

# Plot residuals vs. fitted values plt.figure(figsize=(8, 6))
sns.residplot(model.fittedvalues, model.resid, lowess=True, line_kws={'color': 'red'}) plt.title('Residuals vs Fitted')
plt.xlabel('Fitted values') plt.ylabel('Residuals') plt.show()
Editor is loading...
Leave a Comment