Untitled

mail@pastecode.io avatar
unknown
r
5 months ago
4.6 kB
8
Indexable
# Loaded files onto environment, encoding is UTF-16LE, separated by tab
library(dplyr)
library(doBy)
library(lubridate)
library(ggplot2)
library(ggforce)
library(tidyverse)
library(lme4)
library(nlme)

age <- function(birth, base = Sys.Date()){
  i <- interval(birth, base)
  p <- as.period(i)
  year(p)
}

setupFacetPages <- function(df, xAxisDfVariable, yAxisDfVariable) {
  tempDf <- df %>%
    ggplot(df, mapping = aes(x = xAxisDfVariable, y = yAxisDfVariable)) +
    geom_text(aes(label = df$slopeValue), x = 200, y = 5, stat = 'unique') +
    facet_wrap_paginate(~df$usubjid, ncol = 4, nrow = 4) + #building facet grid here
    geom_smooth(method = "lm") +
    geom_line() +
    geom_point() +
    scale_y_continuous(limits = c(0, 120), breaks = seq(0, 120, 20)) +
    labs(x = "Difference In Days", y = "SCNT1")

  for (i in 1:n_pages(tempDf)) {
    df_print <- tempDf + facet_wrap_paginate(~df$usubjid, ncol = 4, nrow = 4, page = i)
    print(df_print)
  }
}

# merging demographics and BL_enroll file as they are unequal (151 rows vs 150 rows)
tempDfBL <- BL_enrollhd_cognitive %>% inner_join(enrollhd_demog %>% distinct(usubjid), by = 'usubjid')
tempDfBL <- cbind(tempDfBL, age = age(enrollhd_demog$dm1__brthdtc, tempDfBL$visdat))

# now add cag data as new column to tempDFbl
tempDfBL <- unique(left_join(tempDfBL, select(enrollhd_cag, usubjid, allele2l), by = "usubjid"))
#calculating CAP score and adding it to data frame
tempDfBL <- tempDfBL %>%
  mutate(capScore = age * ((allele2l - 30)/6.49))

#adding age variable from demographic to follow up data frame
tempDfFU <- FU_enrollhd_cognitive %>%
  mutate(age = age(enrollhd_demog$dm1__brthdtc[match(usubjid, enrollhd_demog$usubjid)], tempDfFU$visdat))

#now adding cag length to follow up data frame
tempDfFU <- unique(left_join(tempDfFU, select(enrollhd_cag, usubjid, allele2l), by = "usubjid"))
tempDfFU <- tempDfFU %>%
  mutate(capScore = tempDfBL$capScore[match(usubjid, tempDfBL$usubjid)])

mergedDf <- rbind(tempDfBL, tempDfFU)
# order data by usubjid
ordered_fu <- orderBy(~usubjid, data = mergedDf)

# find cumulative date difference and save as diffInDays column
modifiedDf <- ordered_fu %>%
  mutate(visdat = as.POSIXct.Date(visdat, format = "%Y-%m-%d")) %>%
  arrange(visdat) %>%
  group_by(usubjid) %>%
  mutate(
    diffInDays = difftime(visdat, lag(visdat, 1, default = visdat[1]), unit = "days") %>%
      as.numeric() %>%
      cumsum()
  ) %>%
  filter(!is.na(allele2l) | !is.na(capScore)) %>%
  filter(scnt1 != 9997) %>%
  mutate(
    slopeValue = format(coef(lm(scnt1 ~ diffInDays))[2], digits = 4)
    # using f tests to check if ANCOVA model is statistically significant -> general linear f test
    # co-variant cannot have an interaction with y ~ x (can be used as diffInDays does not interact with age)
  )
 # if 9997 has been entered then it means that value was expected but not applicable, remove it before plotting and set to 0

modifiedDf <- orderBy(~usubjid, data = modifiedDf)

capPositive <- modifiedDf[modifiedDf$capScore >= 100,]
capNegative <- modifiedDf[modifiedDf$capScore < 100,]


cap1 <- modifiedDf[modifiedDf$capScore > 102 & modifiedDf$capScore < 107,]
cap2 <- modifiedDf[modifiedDf$capScore > 107 & modifiedDf$capScore < 111,]
cap3 <- modifiedDf[modifiedDf$capScore > 111 & modifiedDf$capScore < 117,]
cap4 <- modifiedDf[modifiedDf$capScore > 117 & modifiedDf$capScore < 125,]
cap5 <- modifiedDf[modifiedDf$capScore > 125 & modifiedDf$capScore < 300,]

modelCap1 = lme(scnt1 ~ diffInDays, random = (~1+diffInDays|usubjid), data = cap1)
modelCap2 = lme(scnt1 ~ diffInDays, random = (~1+diffInDays|usubjid), data = cap2)
modelCap3 = lme(scnt1 ~ diffInDays, random = (~1+diffInDays|usubjid), data = cap3, control = lmeControl(opt = 'optim'))
modelCap4 = lme(scnt1 ~ diffInDays, random = (~1+diffInDays|usubjid), data = cap4)
modelCap5 = lme(scnt1 ~ diffInDays, random = (~1+diffInDays|usubjid), data = cap5)


fullModel = lme(scnt1 ~ diffInDays, random = (~1+diffInDays|usubjid), data = capPositive)
# having to use this formula to avoid pseudoreplications as observations are linked to each other

fullModelNegativeCap = lme(scnt1 ~ diffInDays, random = (~1+diffInDays|usubjid), data = capNegative)

anova(fullModel)


print(ggplot(data = modifiedDf, aes(x = diffInDays, y = scnt1, group = usubjid)) +
        geom_point()) + geom_line() + geom_smooth(aes(colour = capScore >= 100), method = "lm", se = F)

print(ggplot(data = capPositive, aes(x = diffInDays, y = scnt1, group = usubjid)) + geom_point() + geom_line() + geom_smooth(method = "lm", se = F)
)

setupFacetPages(modifiedDf, modifiedDf$diffInDays, modifiedDf$scnt1)
Leave a Comment