Untitled

 avatar
unknown
plain_text
13 days ago
14 kB
4
Indexable
```{r pih-onset-plot}
ggplot(pih_onset, aes(x = onset_s / 60)) +
  geom_histogram(binwidth = 1, boundary = 0) +
  geom_vline(xintercept = median(pih_onset$onset_s / 60),
             linetype = "dashed", colour = "red") +
  annotate("text",
           x = median(pih_onset$onset_s / 60) + 0.3,
           y = Inf, vjust = 1.5, hjust = 0,
           label = sprintf("Median: %.1f min", median(pih_onset$onset_s / 60)),
           colour = "red", size = 3.5) +
  scale_x_continuous(breaks = seq(0, 10, by = 1)) +
  labs(
    title = "Time of first hypotension onset after induction",
    x     = "Time after induction (minutes)",
    y     = "Number of patients"
  )
```

```{r hrv-cases-build}
# Build the case list for HRV analysis from the final case_data_withPPF
hrv_cases <- tibble(
  caseid    = as.integer(names(case_data_withPPF)),
  ane_start = sapply(case_data_withPPF, \(x) as.numeric(x$PPF_start))
) |>
  filter(!is.na(ane_start)) |>
  left_join(
    pih_resultater |> mutate(caseid = as.integer(caseid)),
    by = "caseid"
  )

cat("Cases til HRV-analyse:", nrow(hrv_cases), "\n")
cat("Med PIH:", sum(hrv_cases$pih, na.rm = TRUE),
    "  Uden PIH:", sum(!hrv_cases$pih, na.rm = TRUE), "\n")
```

```{r hrv-helpers}
# ── R-peak detection ─────────────────────────────────────────────────────────
# Local adaptive threshold per 10-second chunk. Avoids surgical artifact
# (cautery, defibrillation) inflating a global threshold above true R-peaks.
detect_r_peaks <- function(ecg_signal, fs = 500) {
  n          <- length(ecg_signal)
  chunk_size <- fs * 10
  min_dist   <- floor(fs * 0.3)  # 300 ms refractory (max ~200 bpm)

  all_peaks <- c()
  for (start in seq(1, n, by = chunk_size)) {
    end   <- min(start + chunk_size - 1, n)
    chunk <- ecg_signal[start:end]
    local_thr <- 0.4 * quantile(chunk, 0.99, na.rm = TRUE)

    above <- which(chunk > local_thr)
    if (length(above) < 2) next

    groups      <- cumsum(c(1, diff(above) > min_dist / 2))
    local_peaks <- as.integer(tapply(above, groups, function(i) i[which.max(chunk[i])]))
    all_peaks   <- c(all_peaks, local_peaks + start - 1)
  }

  all_peaks <- sort(unique(all_peaks))
  if (length(all_peaks) > 1) {
    keep <- rep(TRUE, length(all_peaks))
    for (i in 2:length(all_peaks)) {
      if (all_peaks[i] - all_peaks[i - 1] < min_dist) keep[i] <- FALSE
    }
    all_peaks <- all_peaks[keep]
  }
  all_peaks
}

# ── Artifact correction ──────────────────────────────────────────────────────
# Step 1: remove physiologically implausible RR (HR 30–200 bpm).
# Step 2: Malik criterion — remove beats where RR deviates >20% from
#          a 5-beat running median (flags ectopic beats and missed detections).
artifact_correct_rr <- function(times, rr) {
  valid <- rr > 0.3 & rr < 2.0
  times <- times[valid]; rr <- rr[valid]
  if (length(rr) < 5) return(tibble(Time = times, RR = rr))

  k <- min(5, length(rr))
  if (k %% 2 == 0) k <- k - 1
  if (k >= 3) {
    local_med <- runmed(rr, k = k)
    keep <- abs(rr - local_med) / local_med < 0.20
    times <- times[keep]; rr <- rr[keep]
  }
  tibble(Time = times, RR = rr)
}

# ── Time-domain HRV ──────────────────────────────────────────────────────────
# RMSSD and SDNN reported in ms; HR in bpm.
compute_time_domain <- function(rr_s) {
  rr_ms <- rr_s * 1000
  tibble(
    RMSSD = sqrt(mean(diff(rr_ms)^2)),
    SDNN  = sd(rr_ms),
    HR    = 60 / mean(rr_s)
  )
}

# ── Frequency-domain HRV ─────────────────────────────────────────────────────
# Interpolates RR intervals to a 4 Hz evenly sampled signal, then estimates
# power spectral density via a smoothed periodogram (modified Daniell kernel).
#   LF band: 0.04–0.15 Hz  |  HF band: 0.15–0.40 Hz
#
# WHY LF/HF IS NOT COMPUTED IN SHORT (60-SECOND) WINDOWS:
# Reliable LF estimation requires multiple complete cycles at the lowest LF
# frequency (0.04 Hz = 25-second period). At minimum ~2 minutes of stationary
# data are recommended. The 60-second peri-induction windows (a) contain too
# few LF cycles for stable spectral estimation, and (b) are physiologically
# non-stationary (rapidly changing autonomic state during induction), making
# any LF or LF/HF estimate unreliable and uninterpretable.
compute_freq_domain <- function(rr_times, rr_values, fs = 4) {
  if (length(rr_values) < 20 || diff(range(rr_times)) < 60) {
    return(tibble(LF = NA_real_, HF = NA_real_, LF_HF = NA_real_))
  }

  t_even    <- seq(min(rr_times), max(rr_times), by = 1 / fs)
  rr_interp <- approx(rr_times, rr_values, xout = t_even, rule = 2)$y
  rr_interp <- rr_interp - mean(rr_interp)

  n <- length(rr_interp)
  if (n < 128) return(tibble(LF = NA_real_, HF = NA_real_, LF_HF = NA_real_))

  sp      <- spec.pgram(ts(rr_interp, frequency = fs),
                        spans = c(7, 7), taper = 0.1, plot = FALSE)
  freq    <- sp$freq
  power   <- sp$spec
  df_freq <- freq[2] - freq[1]

  lf <- sum(power[freq >= 0.04 & freq <= 0.15]) * df_freq
  hf <- sum(power[freq >= 0.15 & freq <= 0.40]) * df_freq
  tibble(LF = lf, HF = hf, LF_HF = lf / hf)
}

# ── Per-case HRV computation ─────────────────────────────────────────────────
compute_case_hrv <- function(cid, ane_start) {
  tryCatch({
    ecg_df <- VitalDBR::load_case(tname = "SNUADC/ECG_II", caseid = cid)
    if (is.null(ecg_df) || nrow(ecg_df) == 0) return(NULL)

    ecg_col <- names(ecg_df)[2]

    ecg_win <- ecg_df |>
      filter(Time >= max(0, ane_start - 310), Time <= ane_start + 130)
    if (nrow(ecg_win) < 1000) return(NULL)

    R_idx <- detect_r_peaks(ecg_win[[ecg_col]], fs = 500)
    if (length(R_idx) < 10) return(NULL)
    r_times   <- ecg_win$Time[R_idx]
    rr_values <- diff(r_times)
    rr_times  <- r_times[-1]

    clean <- artifact_correct_rr(rr_times, rr_values)
    if (nrow(clean) < 10) return(NULL)

    # ── Baseline: -300 to -60 s relative to induction ──
    bl_start <- max(0, ane_start - 300)
    bl_end   <- ane_start - 60
    bl       <- clean |> filter(Time >= bl_start, Time < bl_end)
    if (nrow(bl) < 10) return(NULL)

    bl_td <- compute_time_domain(bl$RR)
    bl_fd <- compute_freq_domain(bl$Time, bl$RR)

    baseline_row <- tibble(
      caseid       = cid,
      window_type  = "baseline",
      window_start = -300,
      window_end   = -60,
      RMSSD = bl_td$RMSSD, SDNN = bl_td$SDNN, HR = bl_td$HR,
      LF = bl_fd$LF, HF = bl_fd$HF, LF_HF = bl_fd$LF_HF,
      delta_RMSSD = 0, delta_SDNN = 0, delta_HR = 0
    )

    # ── Peri-induction: sliding 60-s windows, 30-s step ──
    peri_rows <- map(seq(-60, 60, by = 30), function(ws) {
      we     <- ws + 60
      win_rr <- clean |> filter(Time >= ane_start + ws, Time < ane_start + we)
      if (nrow(win_rr) < 5) return(NULL)

      td <- compute_time_domain(win_rr$RR)
      tibble(
        caseid       = cid,
        window_type  = "peri-induction",
        window_start = ws,
        window_end   = we,
        RMSSD = td$RMSSD, SDNN = td$SDNN, HR = td$HR,
        LF = NA_real_, HF = NA_real_, LF_HF = NA_real_,
        delta_RMSSD = td$RMSSD - bl_td$RMSSD,
        delta_SDNN  = td$SDNN  - bl_td$SDNN,
        delta_HR    = td$HR    - bl_td$HR
      )
    }) |> list_rbind()

    bind_rows(baseline_row, peri_rows)
  }, error = function(e) NULL)
}
```

```{r hrv-pilot, cache=TRUE}
library(tidyverse)
# ── Pilot run: 50 random cases ────────────────────────────────────────────────
# Run this first to confirm the pipeline works before the full batch.
set.seed(4821)
pilot_cases <- hrv_cases |> slice_sample(n = 50)

pilot_results <- map2(
  pilot_cases$caseid,
  pilot_cases$ane_start,
  \(cid, ane) {
    res <- compute_case_hrv(cid, ane)
    for (j in 3:127) try(close(getConnection(j)), silent = TRUE)
    res
  },
  .progress = TRUE
) |>
  list_rbind() |>
  left_join(
    pih_resultater |> mutate(caseid = as.integer(caseid)) |> select(caseid, pih),
    by = "caseid"
  )

n_pilot <- n_distinct(pilot_results$caseid)
cat(sprintf("Pilot: %d/%d cases returned HRV data (%d windows)\n",
            n_pilot, nrow(pilot_cases), nrow(pilot_results)))
cat(sprintf("PIH: %d  |  No PIH: %d\n",
            n_distinct(pilot_results$caseid[pilot_results$pih]),
            n_distinct(pilot_results$caseid[!pilot_results$pih])))

pilot_results |>
  filter(window_type == "baseline") |>
  group_by(pih) |>
  summarise(
    n            = n(),
    median_RMSSD = round(median(RMSSD, na.rm = TRUE), 1),
    median_SDNN  = round(median(SDNN,  na.rm = TRUE), 1),
    median_HR    = round(median(HR,    na.rm = TRUE), 1),
    .groups = "drop"
  ) |>
  print()
```

```{r hrv-compute}
# ── Full batch HRV computation ────────────────────────────────────────────────
# Processes all cases in hrv_cases in batches of 20.
# Prints progress after each batch. Expect this to take several hours.
# Results are saved as both RDS (reload without re-running) and CSV.
batch_size  <- 20
n_hrv       <- nrow(hrv_cases)
n_batches   <- ceiling(n_hrv / batch_size)
hrv_batches <- vector("list", n_batches)

for (b in seq_len(n_batches)) {
  idx <- ((b - 1) * batch_size + 1):min(b * batch_size, n_hrv)
  hrv_batches[[b]] <- map2(
    hrv_cases$caseid[idx],
    hrv_cases$ane_start[idx],
    compute_case_hrv,
    .progress = TRUE
  ) |> list_rbind()
  for (j in 3:127) try(close(getConnection(j)), silent = TRUE)
  gc()
  cat(sprintf("HRV batch %d/%d done  (%d/%d cases)\n",
              b, n_batches, min(b * batch_size, n_hrv), n_hrv))
}

hrv_features <- list_rbind(hrv_batches) |>
  left_join(
    pih_resultater |> mutate(caseid = as.integer(caseid)),
    by = "caseid"
  )

# Save as RDS so you never have to re-run this
saveRDS(hrv_features, "~/Desktop/hrv_features.rds")
write_csv(hrv_features, "~/Desktop/hrv_features.csv")
cat(sprintf("Saved: %d rows, %d patients\n",
            nrow(hrv_features), n_distinct(hrv_features$caseid)))
```

```{r hrv-load}
# ── Load saved HRV results (use this instead of re-running hrv-compute) ───────
hrv_features <- readRDS("~/Desktop/hrv_features.rds")
cat(sprintf("Loaded: %d rows, %d patients\n",
            nrow(hrv_features), n_distinct(hrv_features$caseid)))
```

```{r hrv-summary}
for (i in 3:127) try(close(getConnection(i)), silent = TRUE)

n_patients <- n_distinct(hrv_features$caseid)
cat(sprintf("HRV computed for %d patients (%d total windows)\n\n", n_patients, nrow(hrv_features)))

# Baseline summary
hrv_features |>
  filter(window_type == "baseline") |>
  summarise(
    n = n(),
    across(c(RMSSD, SDNN, HR, LF, HF, LF_HF),
           list(median = ~median(., na.rm = TRUE), iqr = ~IQR(., na.rm = TRUE)),
           .names = "{.col}_{.fn}")
  ) |>
  pivot_longer(everything(), names_to = "metric", values_to = "value") |>
  knitr::kable(digits = 2, caption = "Baseline HRV summary (median [IQR])")
```
```{r hrv-peri-plot}
# Peri-induction HRV trajectories split by hypotension status
peri_summary <- hrv_features |>
  filter(window_type == "peri-induction") |>
  mutate(group = if_else(pih, "Hypotension", "No hypotension")) |>
  group_by(window_start, group) |>
  summarise(
    across(c(delta_RMSSD, delta_SDNN, delta_HR),
           list(median = ~median(., na.rm = TRUE),
                q25 = ~quantile(., 0.25, na.rm = TRUE),
                q75 = ~quantile(., 0.75, na.rm = TRUE)),
           .names = "{.col}_{.fn}"),
    n = n(),
    .groups = "drop"
  ) |>
  mutate(window_mid = window_start + 30)

peri_long <- peri_summary |>
  pivot_longer(
    cols = matches("^delta_.*(median|q25|q75)$"),
    names_to = c("metric", ".value"),
    names_pattern = "^(delta_\\w+)_(median|q25|q75)$"
  )

n_pih    <- n_distinct(hrv_features$caseid[hrv_features$pih])
n_no_pih <- n_distinct(hrv_features$caseid[!hrv_features$pih])

ggplot(peri_long, aes(x = window_mid, y = median, colour = group, fill = group)) +
  geom_ribbon(aes(ymin = q25, ymax = q75), alpha = 0.15, colour = NA) +
  geom_line(linewidth = 0.8) +
  geom_point(size = 2) +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "grey50") +
  geom_vline(xintercept = 0, linetype = "dashed", colour = "grey50") +
  facet_wrap(~metric, scales = "free_y",
             labeller = as_labeller(c(
               delta_RMSSD = "\u0394RMSSD (ms)",
               delta_SDNN  = "\u0394SDNN (ms)",
               delta_HR    = "\u0394HR (bpm)"
             ))) +
  scale_colour_manual(values = c("Hypotension" = "#d62728", "No hypotension" = "#1f77b4")) +
  scale_fill_manual(  values = c("Hypotension" = "#d62728", "No hypotension" = "#1f77b4")) +
  labs(
    title    = "Change from baseline HRV around induction",
    subtitle = sprintf("Hypotension n=%d  |  No hypotension n=%d  |  shaded = IQR",
                       n_pih, n_no_pih),
    x        = "Time relative to induction (s, window midpoint)",
    colour   = NULL,
    fill     = NULL,
    y        = NULL
  )
```
Editor is loading...
Leave a Comment