Untitled
unknown
plain_text
a year ago
1.2 kB
4
Indexable
import pandas as pd import numpy as np from statsmodels.formula.api import ols from statsmodels.stats.anova import anova_lm from statsmodels.stats.outliers_influence import variance_inflation_factor import seaborn as sns import matplotlib.pyplot as plt # Load the iris dataset iris = sns.load_dataset('iris') # Create a new DataFrame with only the relevant columns data = iris[['species', 'sepal_length', 'petal_length', 'petal_width']] # Fit ordinary least squares (OLS) model model = ols('sepal_length ~ species + petal_length + petal_width', data=data).fit() # Perform ANOVA on the model anova_table = anova_lm(model) # Print the ANOVA table print(anova_table) # Check for multicollinearity using Variance Inflation Factor (VIF) X = data[['petal_length', 'petal_width']] vif_data = pd.DataFrame() vif_data['Feature'] = X.columns vif_data['VIF'] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])] print("\nVariance Inflation Factor (VIF):\n", vif_data) # Plot residuals vs. fitted values plt.figure(figsize=(8, 6)) sns.residplot(model.fittedvalues, model.resid, lowess=True, line_kws={'color': 'red'}) plt.title('Residuals vs Fitted') plt.xlabel('Fitted values') plt.ylabel('Residuals') plt.show()
Editor is loading...
Leave a Comment