Untitled

# Import necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.cluster import KMeans
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import matplotlib.pyplot as plt

# Load your dataset
data = pd.read_csv('Dataset loan.csv')

# Handle missing values if any
data.ffill(inplace=True)  # Forward fill to handle missing values

# Replace '3+' with 3 in Dependents column and convert to numeric
data['Dependents'] = data['Dependents'].replace('3+', 3).astype(int)

# Encode categorical variables
label_encoders = {}
for column in ['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Define features and target variable
X = data.drop(['Loan_ID', 'LoanAmount'], axis=1)  # Features
y = data['LoanAmount']  # Target variable

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the scaled data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Support Vector Regression
svr_model = SVR(kernel='rbf')  # You can also try 'linear', 'poly', etc.
svr_model.fit(X_train, y_train)  # Ensure this line runs without errors
svr_predictions = svr_model.predict(X_test)

# Display SVR predictions
svr_predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': svr_predictions})
print("Support Vector Regression Predictions:")
print(svr_predictions_df)

# K-Nearest Neighbors Regression
knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train, y_train)
knn_predictions = knn_model.predict(X_test)

# Display KNN predictions
knn_predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': knn_predictions})
print("\nK-Nearest Neighbors Predictions:")
print(knn_predictions_df)

# K-Means Clustering (for demonstration, not for prediction)
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X_scaled)  # Fit on scaled features
data['Cluster'] = kmeans.labels_  # Add cluster labels to the original data
print("\nK-Means Clustering Results:")
print(data[['Loan_ID', 'Cluster']].head())  # Display first few rows with cluster labels

# Logistic Regression (Note: This is typically for classification, not regression)
# For demonstration, we will convert LoanAmount to a binary classification problem
y_class = (y > y.median()).astype(int)  # Create a binary target variable
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_scaled, y_class, test_size=0.2, random_state=42)

logistic_model = LogisticRegression()
logistic_model.fit(X_train_class, y_train_class)
logistic_predictions = logistic_model.predict(X_test_class)

# Display Logistic Regression predictions
logistic_predictions_df = pd.DataFrame({'Actual': y_test_class, 'Predicted': logistic_predictions})
print("\nLogistic Regression Predictions:")
print(logistic_predictions_df)

# Linear Regression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
linear_predictions = linear_model.predict(X_test)

# Display Linear Regression predictions
linear_predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': linear_predictions})
print("\nLinear Regression Predictions:")
print(linear_predictions_df)

# Evaluation Metrics
print("\nEvaluation Metrics:")
print("SVR Mean Squared Error:", mean_squared_error(y_test, svr_predictions))
print("SVR R^2 Score:", r2_score(y_test, svr_predictions))

print("KNN Mean Squared Error:", mean_squared_error(y_test, knn_predictions))
print("KNN R^2 Score:", r2_score(y_test, knn_predictions))

print("Linear Regression Mean Squared Error:", mean_squared_error(y_test, linear_predictions))
print("Linear Regression R^2 Score:", r2_score(y_test, linear_predictions))

# Visualization of Predictions
plt.figure(figsize=(15, 5))

# SVR Predictions
plt.subplot(1, 3, 1)
plt.scatter(y_test, svr_predictions, color='blue', alpha= 0.5)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')  # Diagonal line
plt.title('SVR: Actual vs Predicted')
plt.xlabel('Actual Loan Amount')
plt.ylabel('Predicted Loan Amount')

# KNN Predictions
plt.subplot(1, 3, 2)
plt.scatter(y_test, knn_predictions, color='green', alpha=0.5)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')  # Diagonal line
plt.title('KNN: Actual vs Predicted')
plt.xlabel('Actual Loan Amount')
plt.ylabel('Predicted Loan Amount')

# Linear Regression Predictions
plt.subplot(1, 3, 3)
plt.scatter(y_test, linear_predictions, color='orange', alpha=0.5)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')  # Diagonal line
plt.title('Linear Regression: Actual vs Predicted')
plt.xlabel('Actual Loan Amount')
plt.ylabel('Predicted Loan Amount')

plt.tight_layout()
plt.show()
Editor is loading...