Untitled
unknown
plain_text
3 months ago
4.8 kB
18
Indexable
# ============================================
# Step 1: Import Libraries
# ============================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.metrics import mean_squared_error, r2_score
# ============================================
# Step 2: Load the Dataset
# ============================================
diabetes = load_diabetes()
X = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
y = pd.Series(diabetes.target, name="target")
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)
print("\nFirst 5 rows of dataset:")
print(X.head())
# ============================================
# Step 3: Exploratory Data Analysis (EDA)
# ============================================
print("\nSummary Statistics:")
print(X.describe())
# Correlation Heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(X.corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap of Diabetes Features")
plt.show()
# Scatter plots for selected features
plt.figure(figsize=(12, 5))
features = ['bmi', 'bp', 's5']
for i, col in enumerate(features):
plt.subplot(1, 3, i + 1)
plt.scatter(X[col], y, alpha=0.6)
plt.xlabel(col)
plt.ylabel("Target")
plt.title(f"{col} vs Target")
plt.tight_layout()
plt.show()
# ============================================
# Step 4: Correlation Analysis & Feature Dropping
# ============================================
corr_matrix = X.corr().abs()
upper = corr_matrix.where(
np.triu(np.ones(corr_matrix.shape), k=1).astype(bool)
)
to_drop = [col for col in upper.columns if any(upper[col] >= 0.8)]
print("\nHighly correlated columns to drop:", to_drop)
X_reduced = X.drop(columns=to_drop)
print("Shape after feature reduction:", X_reduced.shape)
# ============================================
# Step 5: Train-Test Split
# ============================================
X_train, X_test, y_train, y_test = train_test_split(
X_reduced, y, test_size=0.2, random_state=42
)
# ============================================
# Step 6: Model Training
# ============================================
models = {
"Linear Regression": LinearRegression(),
"Lasso": Lasso(alpha=0.1),
"Ridge": Ridge(alpha=1.0)
}
results = {}
for name, model in models.items():
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
results[name] = {"MSE": mse, "R2": r2}
print(f"\n{name}")
print("MSE:", mse)
print("R2 Score:", r2)
# ============================================
# Step 7: Result Comparison
# ============================================
results_df = pd.DataFrame(results).T
print("\nModel Comparison:")
print(results_df)
# Optional: Plot comparison
results_df.plot(kind='bar', figsize=(8,5))
plt.title("Model Performance Comparison")
plt.ylabel("Score")
plt.xticks(rotation=0)
plt.grid(axis='y')
plt.show()
}
)
)Editor is loading...
Leave a Comment