q1
unknown
plain_text
a year ago
1.9 kB
14
Indexable
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import confusion_matrix
# Load the dataset
file_path = '/mnt/data/healthcare-dataset-stroke-data.csv'
data = pd.read_csv(file_path)
# Data preprocessing
# Drop irrelevant columns
data = data.drop(['id'], axis=1)
# Handle missing values by filling with the mean of the column (for simplicity)
data['bmi'] = data['bmi'].fillna(data['bmi'].mean())
# Encode categorical variables
label_encoders = {}
for column in ['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status']:
le = LabelEncoder()
data[column] = le.fit_transform(data[column])
label_encoders[column] = le
# Features and target
X = data.drop('stroke', axis=1)
y = data['stroke']
# Standardize numerical features
scaler = StandardScaler()
X = scaler.fit_transform(X)
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
# Train an SVM model
model = SVC(kernel='linear', probability=True, random_state=42, class_weight='balanced')
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
# Calculate precision, recall, and F1-score manually
tp = cm[1, 1] # True Positives
fp = cm[0, 1] # False Positives
fn = cm[1, 0] # False Negatives
tn = cm[0, 0] # True Negatives
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
# Print results
print("Confusion Matrix:")
print(cm)
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1_score}")
Editor is loading...
Leave a Comment