q1
import pandas as pd from sklearn.model_selection import train_test_split from sklearn.svm import SVC from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.metrics import confusion_matrix # Load the dataset file_path = '/mnt/data/healthcare-dataset-stroke-data.csv' data = pd.read_csv(file_path) # Data preprocessing # Drop irrelevant columns data = data.drop(['id'], axis=1) # Handle missing values by filling with the mean of the column (for simplicity) data['bmi'] = data['bmi'].fillna(data['bmi'].mean()) # Encode categorical variables label_encoders = {} for column in ['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status']: le = LabelEncoder() data[column] = le.fit_transform(data[column]) label_encoders[column] = le # Features and target X = data.drop('stroke', axis=1) y = data['stroke'] # Standardize numerical features scaler = StandardScaler() X = scaler.fit_transform(X) # Split data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y) # Train an SVM model model = SVC(kernel='linear', probability=True, random_state=42, class_weight='balanced') model.fit(X_train, y_train) # Make predictions y_pred = model.predict(X_test) # Confusion Matrix cm = confusion_matrix(y_test, y_pred) # Calculate precision, recall, and F1-score manually tp = cm[1, 1] # True Positives fp = cm[0, 1] # False Positives fn = cm[1, 0] # False Negatives tn = cm[0, 0] # True Negatives precision = tp / (tp + fp) if (tp + fp) > 0 else 0 recall = tp / (tp + fn) if (tp + fn) > 0 else 0 f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0 # Print results print("Confusion Matrix:") print(cm) print(f"Precision: {precision}") print(f"Recall: {recall}") print(f"F1-Score: {f1_score}")
Leave a Comment