Untitled
unknown
plain_text
a year ago
1.4 kB
5
Indexable
import pandas as pd import numpy as np from statsmodels.formula.api import ols from statsmodels.stats.anova import anova_lm from statsmodels.stats.outliers_influence import variance_inflation_factor import seaborn as sns import matplotlib.pyplot as plt from statsmodels.nonparametric.smoothers_lowess import lowess # Add this import # Load the iris dataset iris = sns.load_dataset('iris') # Create a new DataFrame with only the relevant columns data = iris[['species', 'sepal_length', 'petal_length', 'petal_width']] # Fit ordinary least squares (OLS) model model = ols('sepal_length ~ species + petal_length + petal_width', data=data).fit() # Perform ANOVA on the model anova_table = anova_lm(model) # Print the ANOVA print(anova_table) # Corrected variable name # Check for multicollinearity using Variance Inflation Factor (VIF) X = data[['petal_length', 'petal_width']] # Corrected variable name vif_data = pd.DataFrame() vif_data['Feature'] = X.columns vif_data['VIF'] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])] print("\nVariance Inflation Factor (VIF):\n", vif_data) # Plot residuals vs. fitted values plt.figure(figsize=(8, 6)) sns.residplot(model.fittedvalues, model.resid, lowess=lowess, line_kws={'color': 'red'}) # Use lowess parameter plt.title('Residuals vs Fitted') plt.xlabel('Fitted values') plt.ylabel('Residuals') plt.show()
Editor is loading...
Leave a Comment