Get the Data
demog <- read_csv("./proact_demographics.csv")
## Rows: 11675 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Ethnicity, Race_Other_Specify, Sex
## dbl (10): subject_id, Demographics_Delta, Age, Date_of_Birth, Race_Americ_In...
## lgl (1): Race_Hawaiian_Pacific_Islander
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
alsfrs <- read_csv("./proact_alsfrs.csv")
## Rows: 67612 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Mode_of_Administration, ALSFRS_Responded_By
## dbl (18): subject_id, Q1_Speech, Q2_Salivation, Q3_Swallowing, Q4_Handwritin...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
deathData <- read_csv("./proact_deathdata.csv")
## Rows: 4875 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Subject_Died
## dbl (2): subject_id, Death_Days
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
treatment <- read_csv("./proact_treatment.csv")
## Rows: 9994 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Study_Arm
## dbl (2): subject_id, Treatment_Group_Delta
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Merge the Data
deathData %>% group_by(subject_id) %>% summarise(n=n()) %>% filter(n > 1)
## # A tibble: 1 × 2
## subject_id n
## <dbl> <int>
## 1 442984 2
deathData <- deathData %>% group_by(subject_id) %>% arrange(subject_id, .by_group = TRUE) %>% mutate(max_days = max(Death_Days)) %>% filter(Death_Days == max_days | is.na(Death_Days)) %>% select(-max_days)
deathData %>% filter(subject_id == 442984)
## # A tibble: 1 × 3
## # Groups: subject_id [1]
## subject_id Subject_Died Death_Days
## <dbl> <chr> <dbl>
## 1 442984 Yes 95
dat <- alsfrs %>%
left_join(deathData, by="subject_id") %>%
left_join(demog, by="subject_id") %>%
left_join(treatment, by="subject_id")
Get Time Data
tmp <- dat %>%
select(subject_id, ALSFRS_Delta, Subject_Died,
Death_Days, Demographics_Delta,
Treatment_Group_Delta) %>%
group_by(subject_id) %>%
arrange(subject_id, ALSFRS_Delta, .group_by=TRUE)
summary(deathData$Death_Days, na.rm=TRUE)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0 161.0 289.0 306.4 416.5 2033.0 1443
Quick Look at Survival
p_load(survival, survminer)
survdiff(formula = Surv(Death_Days, Subject_Died=="Yes") ~ Sex, data = dat)
## Call:
## survdiff(formula = Surv(Death_Days, Subject_Died == "Yes") ~
## Sex, data = dat)
##
## n=14648, 52964 observations deleted due to missingness.
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Sex=Female 6000 6000 6174 4.92 8.63
## Sex=Male 8648 8648 8474 3.58 8.63
##
## Chisq= 8.6 on 1 degrees of freedom, p= 0.003
plot(survfit(Surv(Death_Days, Subject_Died=="Yes") ~ Sex, data = dat),
xlab = "Days",
ylab = "Overall survival probability")
cox.sex.age <- coxph(Surv(Death_Days, Subject_Died=="Yes") ~ Sex * Age, data = dat)
summary(cox.sex.age)
## Call:
## coxph(formula = Surv(Death_Days, Subject_Died == "Yes") ~ Sex *
## Age, data = dat)
##
## n= 12594, number of events= 12594
## (55018 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## SexMale 0.530425 1.699655 0.117985 4.496 6.93e-06 ***
## Age 0.019532 1.019724 0.001561 12.513 < 2e-16 ***
## SexMale:Age -0.006728 0.993295 0.001921 -3.503 0.00046 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## SexMale 1.6997 0.5884 1.3488 2.142
## Age 1.0197 0.9807 1.0166 1.023
## SexMale:Age 0.9933 1.0068 0.9896 0.997
##
## Concordance= 0.539 (se = 0.003 )
## Likelihood ratio test= 304.4 on 3 df, p=<2e-16
## Wald test = 293.9 on 3 df, p=<2e-16
## Score (logrank) test = 294.9 on 3 df, p=<2e-16
anova(cox.sex.age)
## Analysis of Deviance Table
## Cox model: response is Surv(Death_Days, Subject_Died == "Yes")
## Terms added sequentially (first to last)
##
## loglik Chisq Df Pr(>|Chi|)
## NULL -106311
## Sex -106303 15.780 1 7.115e-05 ***
## Age -106165 276.313 1 < 2.2e-16 ***
## Sex:Age -106159 12.332 1 0.0004452 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sex_df <- with(dat,
data.frame(Sex = c("Female", "Male"),
Age = rep(mean(Age, na.rm = TRUE), 2)
)
)
sex_df
## Sex Age
## 1 Female 55.32813
## 2 Male 55.32813
fit <- survfit(cox.sex.age, newdata = sex_df)
ggsurvplot(fit, data=dat, conf.int = TRUE, legend.labs=c("Female", "Male"),
ggtheme = theme_minimal())
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the survminer package.
## Please report the issue at <]8;;https://github.com/kassambara/survminer/issueshttps://github.com/kassambara/survminer/issues]8;;>.