Get the Data

  demog <- read_csv("./proact_demographics.csv")
## Rows: 11675 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): Ethnicity, Race_Other_Specify, Sex
## dbl (10): subject_id, Demographics_Delta, Age, Date_of_Birth, Race_Americ_In...
## lgl  (1): Race_Hawaiian_Pacific_Islander
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
  alsfrs <- read_csv("./proact_alsfrs.csv")
## Rows: 67612 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): Mode_of_Administration, ALSFRS_Responded_By
## dbl (18): subject_id, Q1_Speech, Q2_Salivation, Q3_Swallowing, Q4_Handwritin...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
  deathData <- read_csv("./proact_deathdata.csv")
## Rows: 4875 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Subject_Died
## dbl (2): subject_id, Death_Days
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
  treatment <- read_csv("./proact_treatment.csv")
## Rows: 9994 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Study_Arm
## dbl (2): subject_id, Treatment_Group_Delta
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Merge the Data

  deathData %>% group_by(subject_id) %>% summarise(n=n()) %>% filter(n > 1)
## # A tibble: 1 × 2
##   subject_id     n
##        <dbl> <int>
## 1     442984     2
  deathData <- deathData %>% group_by(subject_id) %>% arrange(subject_id, .by_group = TRUE) %>% mutate(max_days = max(Death_Days)) %>% filter(Death_Days == max_days | is.na(Death_Days)) %>% select(-max_days)
  deathData %>% filter(subject_id == 442984)
## # A tibble: 1 × 3
## # Groups:   subject_id [1]
##   subject_id Subject_Died Death_Days
##        <dbl> <chr>             <dbl>
## 1     442984 Yes                  95
  dat <- alsfrs %>% 
           left_join(deathData, by="subject_id") %>%
           left_join(demog, by="subject_id") %>%
           left_join(treatment, by="subject_id")

Get Time Data

  tmp <- dat %>%
    select(subject_id, ALSFRS_Delta, Subject_Died, 
           Death_Days, Demographics_Delta,
           Treatment_Group_Delta) %>% 
    group_by(subject_id) %>%
    arrange(subject_id, ALSFRS_Delta, .group_by=TRUE)

    summary(deathData$Death_Days, na.rm=TRUE)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     0.0   161.0   289.0   306.4   416.5  2033.0    1443

Quick Look at Survival

  p_load(survival, survminer)
  survdiff(formula = Surv(Death_Days, Subject_Died=="Yes") ~ Sex, data = dat)
## Call:
## survdiff(formula = Surv(Death_Days, Subject_Died == "Yes") ~ 
##     Sex, data = dat)
## 
## n=14648, 52964 observations deleted due to missingness.
## 
##               N Observed Expected (O-E)^2/E (O-E)^2/V
## Sex=Female 6000     6000     6174      4.92      8.63
## Sex=Male   8648     8648     8474      3.58      8.63
## 
##  Chisq= 8.6  on 1 degrees of freedom, p= 0.003
  plot(survfit(Surv(Death_Days, Subject_Died=="Yes") ~ Sex, data = dat), 
     xlab = "Days", 
     ylab = "Overall survival probability")
  
  cox.sex.age <- coxph(Surv(Death_Days, Subject_Died=="Yes") ~ Sex * Age, data = dat)
  summary(cox.sex.age)
## Call:
## coxph(formula = Surv(Death_Days, Subject_Died == "Yes") ~ Sex * 
##     Age, data = dat)
## 
##   n= 12594, number of events= 12594 
##    (55018 observations deleted due to missingness)
## 
##                  coef exp(coef)  se(coef)      z Pr(>|z|)    
## SexMale      0.530425  1.699655  0.117985  4.496 6.93e-06 ***
## Age          0.019532  1.019724  0.001561 12.513  < 2e-16 ***
## SexMale:Age -0.006728  0.993295  0.001921 -3.503  0.00046 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##             exp(coef) exp(-coef) lower .95 upper .95
## SexMale        1.6997     0.5884    1.3488     2.142
## Age            1.0197     0.9807    1.0166     1.023
## SexMale:Age    0.9933     1.0068    0.9896     0.997
## 
## Concordance= 0.539  (se = 0.003 )
## Likelihood ratio test= 304.4  on 3 df,   p=<2e-16
## Wald test            = 293.9  on 3 df,   p=<2e-16
## Score (logrank) test = 294.9  on 3 df,   p=<2e-16
  anova(cox.sex.age)
## Analysis of Deviance Table
##  Cox model: response is Surv(Death_Days, Subject_Died == "Yes")
## Terms added sequentially (first to last)
## 
##          loglik   Chisq Df Pr(>|Chi|)    
## NULL    -106311                          
## Sex     -106303  15.780  1  7.115e-05 ***
## Age     -106165 276.313  1  < 2.2e-16 ***
## Sex:Age -106159  12.332  1  0.0004452 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  sex_df <- with(dat,
               data.frame(Sex = c("Female", "Male"), 
                          Age = rep(mean(Age, na.rm = TRUE), 2)
                          )
               )
  sex_df
##      Sex      Age
## 1 Female 55.32813
## 2   Male 55.32813
  fit <- survfit(cox.sex.age, newdata = sex_df)
  ggsurvplot(fit, data=dat, conf.int = TRUE, legend.labs=c("Female", "Male"),
             ggtheme = theme_minimal())
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the survminer package.
##   Please report the issue at <]8;;https://github.com/kassambara/survminer/issueshttps://github.com/kassambara/survminer/issues]8;;>.