COMPAS Dataset - Model Training
import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from sklearn.neural_network import MLPClassifier from sklearn.metrics import accuracy_score, roc_auc_score, classification_report # Load COMPAS dataset compas_data = pd.read_csv('compas-scores-two-years.csv') # Data Preprocessing # Selecting relevant features and target variable features = ['age', 'priors_count', 'juv_fel_count', 'juv_misd_count', 'juv_other_count', 'c_charge_degree_F'] X = pd.get_dummies(compas_data[features], drop_first=True) y = compas_data['is_recid'] # Target variable: Recidivism # Train-test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # Standardizing the features scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # Logistic Regression Model log_reg = LogisticRegression() log_reg.fit(X_train, y_train) y_pred_log_reg = log_reg.predict(X_test) # Neural Network Model mlp = MLPClassifier(hidden_layer_sizes=(50,), max_iter=1000, random_state=42) mlp.fit(X_train, y_train) y_pred_mlp = mlp.predict(X_test) # Evaluation # Accuracy accuracy_log_reg = accuracy_score(y_test, y_pred_log_reg) accuracy_mlp = accuracy_score(y_test, y_pred_mlp) # AUC-ROC roc_log_reg = roc_auc_score(y_test, log_reg.predict_proba(X_test)[:, 1]) roc_mlp = roc_auc_score(y_test, mlp.predict_proba(X_test)[:, 1]) # Fairness metrics (Example: False Positive Rate and False Negative Rate) report_log_reg = classification_report(y_test, y_pred_log_reg, target_names=['No Recidivism', 'Recidivism']) report_mlp = classification_report(y_test, y_pred_mlp, target_names=['No Recidivism', 'Recidivism']) # Output results print(f"Logistic Regression - Accuracy: {accuracy_log_reg}, AUC-ROC: {roc_log_reg}") print(f"Neural Network - Accuracy: {accuracy_mlp}, AUC-ROC: {roc_mlp}") print("\nLogistic Regression Classification Report:\n", report_log_reg) print("\nNeural Network Classification Report:\n", report_mlp)
Leave a Comment