Untitled

 avatar
unknown
python
a month ago
1.9 kB
5
Indexable
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sksurv.datasets import load_veterans_lung_cancer
from sksurv.linear_model import CoxPHSurvivalAnalysis
from sksurv.metrics import concordance_index_censored

# 1️⃣ data ---------------------------------------------------------------------
X, y = load_veterans_lung_cancer()
event = y["Status"].astype(int)          # 1 = event, 0 = censored

# your original dataframe
X = df_cleaned2.drop(["time", "event"], axis=1)
y = df_cleaned2[["time", "event"]]
event = df_cleaned2["event"]

# 2️⃣ stratified CV -----------------------------------------------------------
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cindex_scores = []

for train_idx, test_idx in cv.split(X, event):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    # Imputation
    imputer = Imputer("mean")
    X_train_imputed = imputer.fit_transform(X_train)
    X_train_imputed = pd.DataFrame(X_train_imputed, columns=X_train_imputed.columns, index=X_train_imputed.index)

    X_test_imputed = imputer.transform(X_test)
    X_test_imputed = pd.DataFrame(X_test_imputed, columns=X_test_imputed.columns, index=X_test_imputed.index)    

    # Scaling
    def scale_5pct(X):
        return ...

    X_train_scaled = scale_5pct(X_train_imputed)
    X_test_scaled = scale_5pct(X_test_imputed)

    model = CoxPHSurvivalAnalysis().fit(X_train_scaled, y_train)   # fit on training fold
    risk   = model.predict(X_test_scaled)                          # evaluate on val fold
    cidx, _ = concordance_index_censored(
        y_test["event"], y_test["time"], risk
    )
    cindex_scores.append(cidx)

cindex_scores = np.asarray(cindex_scores)
print("C-index per fold:", cindex_scores.round(3))
print("Mean C-index:    ", cindex_scores.mean().round(3))
Editor is loading...
Leave a Comment