Untitled
# Import necessary libraries from sklearn.model_selection import train_test_split from sklearn.svm import SVR from sklearn.preprocessing import StandardScaler, LabelEncoder from sklearn.neighbors import KNeighborsRegressor from sklearn.linear_model import LogisticRegression, LinearRegression from sklearn.cluster import KMeans from sklearn.metrics import mean_squared_error, r2_score import pandas as pd import matplotlib.pyplot as plt # Load your dataset data = pd.read_csv('Dataset loan.csv') # Handle missing values if any data.ffill(inplace=True) # Forward fill to handle missing values # Replace '3+' with 3 in Dependents column and convert to numeric data['Dependents'] = data['Dependents'].replace('3+', 3).astype(int) # Encode categorical variables label_encoders = {} for column in ['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area']: le = LabelEncoder() data[column] = le.fit_transform(data[column]) label_encoders[column] = le # Define features and target variable X = data.drop(['Loan_ID', 'LoanAmount'], axis=1) # Features y = data['LoanAmount'] # Target variable # Scale the features scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # Split the scaled data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) # Support Vector Regression svr_model = SVR(kernel='rbf') # You can also try 'linear', 'poly', etc. svr_model.fit(X_train, y_train) # Ensure this line runs without errors svr_predictions = svr_model.predict(X_test) # Display SVR predictions svr_predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': svr_predictions}) print("Support Vector Regression Predictions:") print(svr_predictions_df) # K-Nearest Neighbors Regression knn_model = KNeighborsRegressor(n_neighbors=5) knn_model.fit(X_train, y_train) knn_predictions = knn_model.predict(X_test) # Display KNN predictions knn_predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': knn_predictions}) print("\nK-Nearest Neighbors Predictions:") print(knn_predictions_df) # K-Means Clustering (for demonstration, not for prediction) kmeans = KMeans(n_clusters=3, random_state=42) kmeans.fit(X_scaled) # Fit on scaled features data['Cluster'] = kmeans.labels_ # Add cluster labels to the original data print("\nK-Means Clustering Results:") print(data[['Loan_ID', 'Cluster']].head()) # Display first few rows with cluster labels # Logistic Regression (Note: This is typically for classification, not regression) # For demonstration, we will convert LoanAmount to a binary classification problem y_class = (y > y.median()).astype(int) # Create a binary target variable X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_scaled, y_class, test_size=0.2, random_state=42) logistic_model = LogisticRegression() logistic_model.fit(X_train_class, y_train_class) logistic_predictions = logistic_model.predict(X_test_class) # Display Logistic Regression predictions logistic_predictions_df = pd.DataFrame({'Actual': y_test_class, 'Predicted': logistic_predictions}) print("\nLogistic Regression Predictions:") print(logistic_predictions_df) # Linear Regression linear_model = LinearRegression() linear_model.fit(X_train, y_train) linear_predictions = linear_model.predict(X_test) # Display Linear Regression predictions linear_predictions_df = pd.DataFrame({'Actual': y_test, 'Predicted': linear_predictions}) print("\nLinear Regression Predictions:") print(linear_predictions_df) # Evaluation Metrics print("\nEvaluation Metrics:") print("SVR Mean Squared Error:", mean_squared_error(y_test, svr_predictions)) print("SVR R^2 Score:", r2_score(y_test, svr_predictions)) print("KNN Mean Squared Error:", mean_squared_error(y_test, knn_predictions)) print("KNN R^2 Score:", r2_score(y_test, knn_predictions)) print("Linear Regression Mean Squared Error:", mean_squared_error(y_test, linear_predictions)) print("Linear Regression R^2 Score:", r2_score(y_test, linear_predictions)) # Visualization of Predictions plt.figure(figsize=(15, 5)) # SVR Predictions plt.subplot(1, 3, 1) plt.scatter(y_test, svr_predictions, color='blue', alpha= 0.5) plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--') # Diagonal line plt.title('SVR: Actual vs Predicted') plt.xlabel('Actual Loan Amount') plt.ylabel('Predicted Loan Amount') # KNN Predictions plt.subplot(1, 3, 2) plt.scatter(y_test, knn_predictions, color='green', alpha=0.5) plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--') # Diagonal line plt.title('KNN: Actual vs Predicted') plt.xlabel('Actual Loan Amount') plt.ylabel('Predicted Loan Amount') # Linear Regression Predictions plt.subplot(1, 3, 3) plt.scatter(y_test, linear_predictions, color='orange', alpha=0.5) plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--') # Diagonal line plt.title('Linear Regression: Actual vs Predicted') plt.xlabel('Actual Loan Amount') plt.ylabel('Predicted Loan Amount') plt.tight_layout() plt.show()
Leave a Comment