Untitled
raf
plain_text
8 days ago
3.7 kB
7
Indexable
# Step 1: Import necessary libraries and set up paths import os import glob import pandas as pd import polars as pl from sklearn.metrics import roc_auc_score # Define key directories ROOT_DIR = "/home/sagemaker-user/rafi" MEDS_DIR = f"{ROOT_DIR}/mimic-3.1-meds_rafi-aws" TENSOR_DIR = f"{ROOT_DIR}/triplet_tensors" TASK_NAME = "mortality/in_hospital/first_24h" TASKS_DIR = f"{MEDS_DIR}/tasks" # Path to the trained model directory TRAIN_OUTPUT_DIR = f"{ROOT_DIR}/results/triplet_mtr/{TASK_NAME}/supervised/train" # Step 2: Find the latest training checkpoint # This command gets the path to the most recent training directory and adds the checkpoint path !meds-torch-latest-dir path={TRAIN_OUTPUT_DIR} TRAIN_CKPT_PATH = !echo "$(meds-torch-latest-dir path={TRAIN_OUTPUT_DIR})/checkpoints/best_model.ckpt" TRAIN_CKPT_PATH = TRAIN_CKPT_PATH[0] print(f"Using model checkpoint: {TRAIN_CKPT_PATH}") # Step 3: Create a directory for prediction outputs PREDICT_OUTPUT_DIR = f"{ROOT_DIR}/results/predictions/{TASK_NAME}" !mkdir -p {PREDICT_OUTPUT_DIR} print(f"Predictions will be stored in: {PREDICT_OUTPUT_DIR}") # Step 4: Run inference using the trained model # The command runs the prediction on the test dataset predict_cmd = f""" meds-torch-predict \\ experiment=triplet_mtr \\ ckpt_path={TRAIN_CKPT_PATH} \\ paths.data_dir={TENSOR_DIR} \\ paths.meds_cohort_dir={MEDS_DIR} \\ paths.output_dir={PREDICT_OUTPUT_DIR} \\ data.task_name={TASK_NAME} \\ data.task_root_dir={TASKS_DIR} \\ data.do_include_subject_id=True \\ data.do_include_prediction_time=True \\ hydra.searchpath=[pkg://meds_torch.configs,{ROOT_DIR}/meds-torch/MIMICIV_INDUCTIVE_EXPERIMENTS/configs/meds-torch-configs] """ print("Running prediction command:") print(predict_cmd) !{predict_cmd} # Step 5: Find and load the prediction results PREDICT_OUTPUT_FP = !echo $(meds-torch-latest-dir path={PREDICT_OUTPUT_DIR})/ PREDICT_OUTPUT_FP = PREDICT_OUTPUT_FP[0] + "/predict.parquet" print(f"Loading predictions from: {PREDICT_OUTPUT_FP}") # Step 6: Analyze the prediction results predictions_df = pl.read_parquet(PREDICT_OUTPUT_FP) print("Prediction data preview:") print(predictions_df.head()) # Step 7: Calculate AUC score to evaluate model performance auc_score = roc_auc_score(predictions_df['boolean_value'], predictions_df['predicted_boolean_probability']) print(f"ROC AUC Score: {auc_score:.4f}") # Step 8: Analyze performance in more detail # Show confusion matrix statistics true_positives = ((predictions_df['boolean_value'] == True) & (predictions_df['predicted_boolean'] == True)).sum() false_positives = ((predictions_df['boolean_value'] == False) & (predictions_df['predicted_boolean'] == True)).sum() true_negatives = ((predictions_df['boolean_value'] == False) & (predictions_df['predicted_boolean'] == False)).sum() false_negatives = ((predictions_df['boolean_value'] == True) & (predictions_df['predicted_boolean'] == False)).sum() print("\nConfusion Matrix Statistics:") print(f"True Positives: {true_positives}") print(f"False Positives: {false_positives}") print(f"True Negatives: {true_negatives}") print(f"False Negatives: {false_negatives}") # Step 9: Calculate additional metrics precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0 recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0 f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0 print("\nAdditional Metrics:") print(f"Precision: {precision:.4f}") print(f"Recall: {recall:.4f}") print(f"F1 Score: {f1_score:.4f}")
Editor is loading...
Leave a Comment