The following code reads the titanic data that we will use in our examples.
titanic = read.csv("http://facweb1.redlands.edu/fac/jim_bentley/downloads/math111/titanic.csv")
# titanic = read.csv("E:/Web16/Downloads/Math111/titanic.csv")
titanic$AGE=factor(titanic$AGE,labels=c("Child","Adult"))
titanic$CLASS=factor(titanic$CLASS,labels=c("0","1","2","3"))
titanic$SEX=factor(titanic$SEX, labels=c("Female","Male"))
titanic$SURVIVED=factor(titanic$SURVIVED,labels=c("No","Yes"))
We can now check to see if the data frame has been created by entering:
ls()
## [1] "titanic"
Additional functions necessary for validation and graphical analysis of the quality of logistic models can be found in Frank Harrell’s {} package. Harrell provides some functions to make pretty output in {}.
## load a few packages
p_load(Hmisc)
p_load(xtable)
p_load(lattice)
p_load(rms) ### Modern R replacement for Design package
The models fitted here are the equivalent of those fitted in the SAS documentation.
A model to test for the difference in odds of survival as determined by class may be fitted using the lrm function.
dd = datadist(titanic)
options(datadist="dd")
titanic.lrm.class=lrm(SURVIVED~CLASS, x=TRUE, y=TRUE, data=titanic)
titanic.lrm.class
## Logistic Regression Model
##
## lrm(formula = SURVIVED ~ CLASS, data = titanic, x = TRUE, y = TRUE)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 2201 LR chi2 180.90 R2 0.110 C 0.642
## No 1490 d.f. 3 g 0.545 Dxy 0.283
## Yes 711 Pr(> chi2) <0.0001 gr 1.725 gamma 0.386
## max |deriv| 6e-12 gp 0.124 tau-a 0.124
## Brier 0.200
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -1.1552 0.0788 -14.67 <0.0001
## CLASS=1 1.6643 0.1390 11.97 <0.0001
## CLASS=2 0.8078 0.1438 5.62 <0.0001
## CLASS=3 0.0678 0.1171 0.58 0.5624
##
anova(titanic.lrm.class)
## Wald Statistics Response: SURVIVED
##
## Factor Chi-Square d.f. P
## CLASS 173.23 3 <.0001
## TOTAL 173.23 3 <.0001
Note that the (log) odds of survival do not differ for classes 0 (viewed as baseline) and 3. However, classes 1 and 2 differ from 0 (and thus 3) as well as from each other. This can most easily be seen using the odds ratios.
summary(titanic.lrm.class,CLASS="0")
## Effects Response : SURVIVED
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## CLASS - 1:0 1 2 NA 1.664300 0.13902 1.39190 1.93680
## Odds Ratio 1 2 NA 5.282200 NA 4.02240 6.93660
## CLASS - 2:0 1 3 NA 0.807850 0.14375 0.52610 1.08960
## Odds Ratio 1 3 NA 2.243100 NA 1.69230 2.97310
## CLASS - 3:0 1 4 NA 0.067846 0.11711 -0.16169 0.29738
## Odds Ratio 1 4 NA 1.070200 NA 0.85071 1.34630
summary(titanic.lrm.class,CLASS="3")
## Effects Response : SURVIVED
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## CLASS - 0:3 4 1 NA -0.067846 0.11711 -0.29738 0.16169
## Odds Ratio 4 1 NA 0.934400 NA 0.74276 1.17550
## CLASS - 1:3 4 2 NA 1.596500 0.14365 1.31500 1.87800
## Odds Ratio 4 2 NA 4.935700 NA 3.72460 6.54070
## CLASS - 2:3 4 3 NA 0.740000 0.14824 0.44946 1.03050
## Odds Ratio 4 3 NA 2.095900 NA 1.56750 2.80260
summary(titanic.lrm.class,CLASS="1")
## Effects Response : SURVIVED
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## CLASS - 0:1 2 1 NA -1.66430 0.13902 -1.93680 -1.39190
## Odds Ratio 2 1 NA 0.18931 NA 0.14416 0.24861
## CLASS - 2:1 2 3 NA -0.85649 0.16609 -1.18200 -0.53097
## Odds Ratio 2 3 NA 0.42465 NA 0.30666 0.58804
## CLASS - 3:1 2 4 NA -1.59650 0.14365 -1.87800 -1.31500
## Odds Ratio 2 4 NA 0.20260 NA 0.15289 0.26849
summary(titanic.lrm.class,CLASS="2")
## Effects Response : SURVIVED
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## CLASS - 0:2 3 1 NA -0.80785 0.14375 -1.08960 -0.52610
## Odds Ratio 3 1 NA 0.44582 NA 0.33635 0.59091
## CLASS - 1:2 3 2 NA 0.85649 0.16609 0.53097 1.18200
## Odds Ratio 3 2 NA 2.35490 NA 1.70060 3.26100
## CLASS - 3:2 3 4 NA -0.74000 0.14824 -1.03050 -0.44946
## Odds Ratio 3 4 NA 0.47711 NA 0.35681 0.63797
While the odds for class 3 relative to class 0 are essentially 1:1, class 1 has a 5.28:1 odds of survival and class 2 has a 2.24:1 odds of survival relative to class 0.
The probability of survival for the different classes may be plotted.
print(plot(Predict(titanic.lrm.class, fun=plogis), ylab="Probability of Survival"))
A nomogram may be helpful at this point.
nom = nomogram(titanic.lrm.class, fun=plogis)
print(plot(nom))
## NULL
A model to test for the difference in odds of survival as determined by age and sex may be fitted using the lmr function.
dd = datadist(titanic)
options(datadist="dd")
titanic.lrm.agesex=lrm(SURVIVED~AGE*SEX, x=TRUE, y=TRUE, data=titanic)
titanic.lrm.agesex
## Logistic Regression Model
##
## lrm(formula = SURVIVED ~ AGE * SEX, data = titanic, x = TRUE,
## y = TRUE)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 2201 LR chi2 456.68 R2 0.262 C 0.713
## No 1490 d.f. 3 g 0.841 Dxy 0.427
## Yes 711 Pr(> chi2) <0.0001 gr 2.320 gamma 0.787
## max |deriv| 1e-10 gp 0.187 tau-a 0.187
## Brier 0.171
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept 0.4990 0.3075 1.62 0.1046
## AGE=Adult 0.5654 0.3269 1.73 0.0837
## SEX=Male -0.6870 0.3970 -1.73 0.0835
## AGE=Adult * SEX=Male -1.7465 0.4167 -4.19 <0.0001
##
anova(titanic.lrm.agesex)
## Wald Statistics Response: SURVIVED
##
## Factor Chi-Square d.f. P
## AGE (Factor+Higher Order Factors) 23.88 2 <.0001
## All Interactions 17.57 1 <.0001
## SEX (Factor+Higher Order Factors) 371.97 2 <.0001
## All Interactions 17.57 1 <.0001
## AGE * SEX (Factor+Higher Order Factors) 17.57 1 <.0001
## TOTAL 391.59 3 <.0001
The odds associated with the model are:
summary(titanic.lrm.agesex, AGE="Adult", SEX="Male")
## Effects Response : SURVIVED
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## AGE - Child:Adult 2 1 NA 1.1811 0.25839 0.67465 1.6875
## Odds Ratio 2 1 NA 3.2579 NA 1.96330 5.4060
## SEX - Female:Male 2 1 NA 2.4335 0.12669 2.18520 2.6818
## Odds Ratio 2 1 NA 11.3990 NA 8.89270 14.6120
##
## Adjusted to: AGE=Adult SEX=Male
summary(titanic.lrm.agesex, AGE="Child", SEX="Female")
## Effects Response : SURVIVED
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## AGE - Adult:Child 1 2 NA 0.56540 0.32692 -0.075348 1.20620
## Odds Ratio 1 2 NA 1.76020 NA 0.927420 3.34060
## SEX - Male:Female 1 2 NA -0.68704 0.39698 -1.465100 0.09102
## Odds Ratio 1 2 NA 0.50306 NA 0.231050 1.09530
##
## Adjusted to: AGE=Child SEX=Female
The probability of survival for the different combinations of sex and age group may be plotted.
Predict(titanic.lrm.agesex, fun=plogis, AGE=c("Child","Adult"), SEX=c("Female","Male"))
## AGE SEX yhat lower upper
## 1 Child Female 0.6222222 0.4741134 0.7505638
## 2 Adult Female 0.7435294 0.6998704 0.7828084
## 3 Child Male 0.4531250 0.3362143 0.5754460
## 4 Adult Male 0.2027594 0.1841419 0.2227454
##
## Response variable (y):
##
## Limits are 0.95 confidence limits
print(plot(Predict(titanic.lrm.agesex, fun=plogis, AGE=c("Child","Adult"), SEX=c("Female","Male"))))
A nomogram may be helpful at this point.
nom = nomogram(titanic.lrm.agesex, fun=plogis)
print(plot(nom))
## NULL
A model to test for the difference in odds of survival as determined by class, age and sex may be fitted using the lmr function.
dd = datadist(titanic)
options(datadist="dd")
titanic.lrm.classagesex=lrm(SURVIVED~CLASS*SEX+AGE*SEX, data=titanic, x=TRUE, y=TRUE)
titanic.lrm.classagesex
## Logistic Regression Model
##
## lrm(formula = SURVIVED ~ CLASS * SEX + AGE * SEX, data = titanic,
## x = TRUE, y = TRUE)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 2201 LR chi2 634.70 R2 0.350 C 0.766
## No 1490 d.f. 9 g 1.341 Dxy 0.532
## Yes 711 Pr(> chi2) <0.0001 gr 3.823 gamma 0.638
## max |deriv| 5e-10 gp 0.233 tau-a 0.233
## Brier 0.157
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept 2.0775 0.7171 2.90 0.0038
## CLASS=1 1.6642 0.8003 2.08 0.0376
## CLASS=2 0.0497 0.6874 0.07 0.9424
## CLASS=3 -2.0894 0.6381 -3.27 0.0011
## SEX=Male -1.7888 0.7728 -2.31 0.0206
## AGE=Adult -0.1803 0.3618 -0.50 0.6182
## CLASS=1 * SEX=Male -1.1033 0.8199 -1.35 0.1784
## CLASS=2 * SEX=Male -0.7647 0.7271 -1.05 0.2929
## CLASS=3 * SEX=Male 1.5623 0.6562 2.38 0.0173
## SEX=Male * AGE=Adult -1.3581 0.4551 -2.98 0.0028
##
anova(titanic.lrm.classagesex)
## Wald Statistics Response: SURVIVED
##
## Factor Chi-Square d.f. P
## CLASS (Factor+Higher Order Factors) 124.28 6 <.0001
## All Interactions 48.25 3 <.0001
## SEX (Factor+Higher Order Factors) 254.38 5 <.0001
## All Interactions 63.47 4 <.0001
## AGE (Factor+Higher Order Factors) 31.30 2 <.0001
## All Interactions 8.91 1 0.0028
## CLASS * SEX (Factor+Higher Order Factors) 48.25 3 <.0001
## SEX * AGE (Factor+Higher Order Factors) 8.91 1 0.0028
## TOTAL INTERACTION 63.47 4 <.0001
## TOTAL 311.38 9 <.0001
The odds associated with the model are
summary(titanic.lrm.classagesex, CLASS="0", AGE="Adult", SEX="Male")
## Effects Response : SURVIVED
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## CLASS - 1:0 1 2 NA 0.56086 0.17821 0.21158 0.91014
## Odds Ratio 1 2 NA 1.75220 NA 1.23560 2.48470
## CLASS - 2:0 1 3 NA -0.71504 0.23692 -1.17940 -0.25068
## Odds Ratio 1 3 NA 0.48917 NA 0.30747 0.77827
## CLASS - 3:0 1 4 NA -0.52708 0.15308 -0.82711 -0.22705
## Odds Ratio 1 4 NA 0.59033 NA 0.43731 0.79688
## SEX - Female:Male 2 1 NA 3.14690 0.62453 1.92290 4.37100
## Odds Ratio 2 1 NA 23.26400 NA 6.84040 79.11900
## AGE - Child:Adult 2 1 NA 1.53850 0.27608 0.99735 2.07960
## Odds Ratio 2 1 NA 4.65740 NA 2.71110 8.00100
##
## Adjusted to: CLASS=0 SEX=Male AGE=Adult
summary(titanic.lrm.classagesex, CLASS="3", AGE="Child", SEX="Female")
## Effects Response : SURVIVED
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## CLASS - 0:3 4 1 NA 2.08940 0.63814 0.83867 3.34010
## Odds Ratio 4 1 NA 8.08010 NA 2.31330 28.22300
## CLASS - 1:3 4 2 NA 3.75360 0.52986 2.71510 4.79210
## Odds Ratio 4 2 NA 42.67500 NA 15.10600 120.56000
## CLASS - 2:3 4 3 NA 2.13910 0.32956 1.49320 2.78500
## Odds Ratio 4 3 NA 8.49190 NA 4.45120 16.20100
## SEX - Male:Female 1 2 NA -0.22646 0.42412 -1.05770 0.60480
## Odds Ratio 1 2 NA 0.79735 NA 0.34725 1.83090
## AGE - Adult:Child 1 2 NA -0.18035 0.36179 -0.88945 0.52876
## Odds Ratio 1 2 NA 0.83498 NA 0.41088 1.69680
##
## Adjusted to: CLASS=3 SEX=Female AGE=Child
The probability of survival for the different combinations of sex and age group may be plotted.
Predict(titanic.lrm.classagesex, fun=plogis, CLASS=c("0","1","2","3"), AGE=c("Child","Adult"), SEX=c("Female","Male"))
## CLASS AGE SEX yhat lower upper
## 1 0 Child Female 0.8886935 0.66194638 0.9701987
## 2 1 Child Female 0.9768348 0.92575376 0.9930367
## 3 2 Child Female 0.8935160 0.78056016 0.9519104
## 4 3 Child Female 0.4970148 0.33827964 0.6563539
## 5 0 Adult Female 0.8695652 0.66454828 0.9573283
## 6 1 Adult Female 0.9723831 0.92874210 0.9895961
## 7 2 Adult Female 0.8750999 0.79597143 0.9263784
## 8 3 Adult Female 0.4520760 0.37865906 0.5276396
## 9 0 Child Male 0.5716729 0.43150500 0.7012113
## 10 1 Child Male 0.7004700 0.55927805 0.8116614
## 11 2 Child Male 0.3949969 0.25626440 0.5529915
## 12 3 Child Male 0.4406809 0.32190559 0.5666583
## 13 0 Adult Male 0.2227378 0.19619893 0.2517422
## 14 1 Adult Male 0.3342723 0.26910345 0.4064468
## 15 2 Adult Male 0.1229466 0.08312897 0.1781310
## 16 3 Adult Male 0.1446912 0.11604927 0.1789709
##
## Response variable (y):
##
## Limits are 0.95 confidence limits
print(plot(Predict(titanic.lrm.classagesex, fun=plogis, CLASS=c("0","1","2","3"),SEX=c("Female","Male"), AGE=c("Child","Adult")), pch=c(2,1),col=c(1,2),layout=c(1,2)))
A nomogram may be helpful at this point.
nom = nomogram(titanic.lrm.classagesex, fun=plogis)
print(plot(nom))
## NULL