1 Library

library(epiDisplay) # dataset & nice output table
## Loading required package: foreign
## Loading required package: survival
## Loading required package: MASS
## Loading required package: nnet
library(gtsummary)  # descriptive table
## 
## Attaching package: 'gtsummary'
## The following object is masked from 'package:MASS':
## 
##     select
library(survival)   # clogit
library(broom)      # tidy
library(mfp)        # frac poly
library(gofcat)     # model fit
## 
## Attaching package: 'gofcat'
## The following object is masked from 'package:survival':
## 
##     retinopathy

2 Datasets

Now we use two built-in datasets

?VC1to1  # Datasets on a matched case-control study of esophageal cancer
data(VC1to1)
cancer = VC1to1
str(cancer)
## 'data.frame':    52 obs. of  5 variables:
##  $ matset : num  1 1 2 2 3 3 4 4 5 5 ...
##  $ case   : num  1 0 1 0 1 0 1 0 1 0 ...
##  $ smoking: num  1 1 1 1 1 1 1 1 0 1 ...
##  $ rubber : num  0 0 0 1 1 1 0 1 0 0 ...
##  $ alcohol: num  0 0 1 0 0 0 0 1 1 0 ...
##  - attr(*, "datalabel")= chr ""
##  - attr(*, "time.stamp")= chr "24 Jul 2005 23:36"
##  - attr(*, "formats")= chr [1:5] "%9.0g" "%9.0g" "%9.0g" "%9.0g" ...
##  - attr(*, "types")= int [1:5] 102 102 102 102 102
##  - attr(*, "val.labels")= chr [1:5] "" "" "" "" ...
##  - attr(*, "var.labels")= chr [1:5] "" "" "" "" ...
##  - attr(*, "version")= int 7
##  - attr(*, "label.table")=List of 5
##   ..$ : NULL
##   ..$ : NULL
##   ..$ : NULL
##   ..$ : NULL
##   ..$ : NULL
summary(cancer)
##      matset          case        smoking           rubber      
##  Min.   : 1.0   Min.   :0.0   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.: 7.0   1st Qu.:0.0   1st Qu.:1.0000   1st Qu.:0.0000  
##  Median :13.5   Median :0.5   Median :1.0000   Median :0.0000  
##  Mean   :13.5   Mean   :0.5   Mean   :0.8077   Mean   :0.3269  
##  3rd Qu.:20.0   3rd Qu.:1.0   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :26.0   Max.   :1.0   Max.   :1.0000   Max.   :1.0000  
##     alcohol      
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :1.0000  
##  Mean   :0.5192  
##  3rd Qu.:1.0000  
##  Max.   :1.0000
head(cancer, 10)  # 1-1 matching
##    matset case smoking rubber alcohol
## 1       1    1       1      0       0
## 2       1    0       1      0       0
## 3       2    1       1      0       1
## 4       2    0       1      1       0
## 5       3    1       1      1       0
## 6       3    0       1      1       0
## 7       4    1       1      0       0
## 8       4    0       1      1       1
## 9       5    1       0      0       1
## 10      5    0       1      0       0
?infert  # Infertility after Spontaneous and Induced Abortion
abort = infert
str(abort)
## 'data.frame':    248 obs. of  8 variables:
##  $ education     : Factor w/ 3 levels "0-5yrs","6-11yrs",..: 1 1 1 1 2 2 2 2 2 2 ...
##  $ age           : num  26 42 39 34 35 36 23 32 21 28 ...
##  $ parity        : num  6 1 6 4 3 4 1 2 1 2 ...
##  $ induced       : num  1 1 2 2 1 2 0 0 0 0 ...
##  $ case          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ spontaneous   : num  2 0 0 0 1 1 0 0 1 0 ...
##  $ stratum       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ pooled.stratum: num  3 1 4 2 32 36 6 22 5 19 ...
summary(abort)
##    education        age            parity         induced      
##  0-5yrs : 12   Min.   :21.00   Min.   :1.000   Min.   :0.0000  
##  6-11yrs:120   1st Qu.:28.00   1st Qu.:1.000   1st Qu.:0.0000  
##  12+ yrs:116   Median :31.00   Median :2.000   Median :0.0000  
##                Mean   :31.50   Mean   :2.093   Mean   :0.5726  
##                3rd Qu.:35.25   3rd Qu.:3.000   3rd Qu.:1.0000  
##                Max.   :44.00   Max.   :6.000   Max.   :2.0000  
##       case         spontaneous        stratum      pooled.stratum 
##  Min.   :0.0000   Min.   :0.0000   Min.   : 1.00   Min.   : 1.00  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:21.00   1st Qu.:19.00  
##  Median :0.0000   Median :0.0000   Median :42.00   Median :36.00  
##  Mean   :0.3347   Mean   :0.5766   Mean   :41.87   Mean   :33.58  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:62.25   3rd Qu.:48.25  
##  Max.   :1.0000   Max.   :2.0000   Max.   :83.00   Max.   :63.00
abort = abort[order(abort$stratum), ]
head(abort, 10)  # 1-2 matching
##     education age parity induced case spontaneous stratum pooled.stratum
## 1      0-5yrs  26      6       1    1           2       1              3
## 84     0-5yrs  26      6       2    0           0       1              3
## 166    0-5yrs  26      6       2    0           0       1              3
## 2      0-5yrs  42      1       1    1           0       2              1
## 85     0-5yrs  42      1       0    0           0       2              1
## 167    0-5yrs  42      1       0    0           0       2              1
## 3      0-5yrs  39      6       2    1           0       3              4
## 86     0-5yrs  39      6       2    0           0       3              4
## 168    0-5yrs  39      6       2    0           0       3              4
## 4      0-5yrs  34      4       2    1           0       4              2

3 1-1 Matching

3.1 Descriptive

str(cancer)
## 'data.frame':    52 obs. of  5 variables:
##  $ matset : num  1 1 2 2 3 3 4 4 5 5 ...
##  $ case   : num  1 0 1 0 1 0 1 0 1 0 ...
##  $ smoking: num  1 1 1 1 1 1 1 1 0 1 ...
##  $ rubber : num  0 0 0 1 1 1 0 1 0 0 ...
##  $ alcohol: num  0 0 1 0 0 0 0 1 1 0 ...
##  - attr(*, "datalabel")= chr ""
##  - attr(*, "time.stamp")= chr "24 Jul 2005 23:36"
##  - attr(*, "formats")= chr [1:5] "%9.0g" "%9.0g" "%9.0g" "%9.0g" ...
##  - attr(*, "types")= int [1:5] 102 102 102 102 102
##  - attr(*, "val.labels")= chr [1:5] "" "" "" "" ...
##  - attr(*, "var.labels")= chr [1:5] "" "" "" "" ...
##  - attr(*, "version")= int 7
##  - attr(*, "label.table")=List of 5
##   ..$ : NULL
##   ..$ : NULL
##   ..$ : NULL
##   ..$ : NULL
##   ..$ : NULL
tbl_summary(subset(cancer, select = c(case, smoking, rubber, alcohol)), by = case,
            statistic = list(all_categorical() ~ "{n} ({p}%)"))
Characteristic 0, N = 261 1, N = 261
smoking 21 (81%) 21 (81%)
rubber 9 (35%) 8 (31%)
alcohol 10 (38%) 17 (65%)
1 n (%)

3.2 Variable selection

3.2.1 Univariable

clog_smo = clogit(case ~ smoking + strata(matset), data = cancer)
summary(clog_smo)
## Call:
## coxph(formula = Surv(rep(1, 52L), case) ~ smoking + strata(matset), 
##     data = cancer, method = "exact")
## 
##   n= 52, number of events= 26 
## 
##           coef exp(coef) se(coef) z Pr(>|z|)
## smoking 0.0000    1.0000   0.6325 0        1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## smoking         1          1    0.2895     3.454
## 
## Concordance= 0.5  (se = 0 )
## Likelihood ratio test= 0  on 1 df,   p=1
## Wald test            = 0  on 1 df,   p=1
## Score (logrank) test = 0  on 1 df,   p=1
clog_rub = clogit(case ~ rubber + strata(matset), data = cancer)
summary(clog_rub)
## Call:
## coxph(formula = Surv(rep(1, 52L), case) ~ rubber + strata(matset), 
##     data = cancer, method = "exact")
## 
##   n= 52, number of events= 26 
## 
##           coef exp(coef) se(coef)      z Pr(>|z|)
## rubber -0.2231    0.8000   0.6708 -0.333    0.739
## 
##        exp(coef) exp(-coef) lower .95 upper .95
## rubber       0.8       1.25    0.2148     2.979
## 
## Concordance= 0.519  (se = 0.081 )
## Likelihood ratio test= 0.11  on 1 df,   p=0.7
## Wald test            = 0.11  on 1 df,   p=0.7
## Score (logrank) test = 0.11  on 1 df,   p=0.7
clog_alc = clogit(case ~ alcohol + strata(matset), data = cancer)
summary(clog_alc)
## Call:
## coxph(formula = Surv(rep(1, 52L), case) ~ alcohol + strata(matset), 
##     data = cancer, method = "exact")
## 
##   n= 52, number of events= 26 
## 
##           coef exp(coef) se(coef)     z Pr(>|z|)  
## alcohol 1.5041    4.5000   0.7817 1.924   0.0544 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## alcohol       4.5     0.2222    0.9723     20.83
## 
## Concordance= 0.635  (se = 0.082 )
## Likelihood ratio test= 4.82  on 1 df,   p=0.03
## Wald test            = 3.7  on 1 df,   p=0.05
## Score (logrank) test = 4.45  on 1 df,   p=0.03

If not analyzed by clogit, will get n (pairs) + p coefficients, try for alcohol

blogs_alc = glm(case ~ alcohol + factor(matset), data = cancer, family = "binomial")
summary(blogs_alc)
## 
## Call:
## glm(formula = case ~ alcohol + factor(matset), family = "binomial", 
##     data = cancer)
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)   
## (Intercept)       1.353e-15  1.414e+00   0.000  1.00000   
## alcohol           3.008e+00  1.106e+00   2.721  0.00651 **
## factor(matset)2  -1.504e+00  2.380e+00  -0.632  0.52749   
## factor(matset)3  -5.406e-15  2.000e+00   0.000  1.00000   
## factor(matset)4  -1.504e+00  2.380e+00  -0.632  0.52749   
## factor(matset)5  -1.504e+00  2.380e+00  -0.632  0.52749   
## factor(matset)6  -1.504e+00  2.380e+00  -0.632  0.52749   
## factor(matset)7  -1.504e+00  2.380e+00  -0.632  0.52749   
## factor(matset)8  -1.379e-15  2.000e+00   0.000  1.00000   
## factor(matset)9  -1.504e+00  2.380e+00  -0.632  0.52749   
## factor(matset)10 -1.500e-15  2.000e+00   0.000  1.00000   
## factor(matset)11 -3.008e+00  2.285e+00  -1.316  0.18806   
## factor(matset)12 -1.504e+00  2.380e+00  -0.632  0.52749   
## factor(matset)13 -2.135e-15  2.000e+00   0.000  1.00000   
## factor(matset)14 -3.008e+00  2.285e+00  -1.316  0.18806   
## factor(matset)15 -1.504e+00  2.380e+00  -0.632  0.52749   
## factor(matset)16 -3.008e+00  2.285e+00  -1.316  0.18806   
## factor(matset)17 -3.008e+00  2.285e+00  -1.316  0.18806   
## factor(matset)18 -3.008e+00  2.285e+00  -1.316  0.18806   
## factor(matset)19 -1.147e-15  2.000e+00   0.000  1.00000   
## factor(matset)20 -1.504e+00  2.380e+00  -0.632  0.52749   
## factor(matset)21 -3.008e+00  2.285e+00  -1.316  0.18806   
## factor(matset)22 -3.008e+00  2.285e+00  -1.316  0.18806   
## factor(matset)23 -3.008e+00  2.285e+00  -1.316  0.18806   
## factor(matset)24 -1.504e+00  2.380e+00  -0.632  0.52749   
## factor(matset)25 -1.713e-15  2.000e+00   0.000  1.00000   
## factor(matset)26 -1.504e+00  2.380e+00  -0.632  0.52749   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 72.087  on 51  degrees of freedom
## Residual deviance: 62.451  on 25  degrees of freedom
## AIC: 116.45
## 
## Number of Fisher Scoring iterations: 4

If analyzed by blogit w/out stratum for alcohol, note the difference in estimated coefficient

blog_alc = glm(case ~ alcohol, data = cancer, family = "binomial")
summary(blog_alc)
## 
## Call:
## glm(formula = case ~ alcohol, family = "binomial", data = cancer)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -0.5754     0.4167  -1.381   0.1673  
## alcohol       1.1060     0.5766   1.918   0.0551 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 72.087  on 51  degrees of freedom
## Residual deviance: 68.265  on 50  degrees of freedom
## AIC: 72.265
## 
## Number of Fisher Scoring iterations: 4

3.2.2 Multivariable

clog = clogit(case ~ smoking + rubber + alcohol + strata(matset), data = cancer)
summary(clog)
## Call:
## coxph(formula = Surv(rep(1, 52L), case) ~ smoking + rubber + 
##     alcohol + strata(matset), data = cancer, method = "exact")
## 
##   n= 52, number of events= 26 
## 
##             coef exp(coef) se(coef)      z Pr(>|z|)  
## smoking  0.04321   1.04416  0.86916  0.050   0.9604  
## rubber  -0.68078   0.50622  0.94518 -0.720   0.4714  
## alcohol  1.66701   5.29629  0.82878  2.011   0.0443 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## smoking    1.0442     0.9577    0.1901     5.736
## rubber     0.5062     1.9754    0.0794     3.228
## alcohol    5.2963     0.1888    1.0435    26.880
## 
## Concordance= 0.654  (se = 0.107 )
## Likelihood ratio test= 5.55  on 3 df,   p=0.1
## Wald test            = 4.11  on 3 df,   p=0.3
## Score (logrank) test = 5.05  on 3 df,   p=0.2
clog1 = clogit(case ~ rubber + alcohol + strata(matset), data = cancer)
summary(clog1)
## Call:
## coxph(formula = Surv(rep(1, 52L), case) ~ rubber + alcohol + 
##     strata(matset), data = cancer, method = "exact")
## 
##   n= 52, number of events= 26 
## 
##            coef exp(coef) se(coef)      z Pr(>|z|)  
## rubber  -0.6545    0.5197   0.7824 -0.837   0.4029  
## alcohol  1.6674    5.2982   0.8275  2.015   0.0439 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## rubber     0.5197     1.9241    0.1122     2.408
## alcohol    5.2982     0.1887    1.0466    26.822
## 
## Concordance= 0.654  (se = 0.1 )
## Likelihood ratio test= 5.55  on 2 df,   p=0.06
## Wald test            = 4.12  on 2 df,   p=0.1
## Score (logrank) test = 5.05  on 2 df,   p=0.08
anova(clog1, clog, test = "lrt") # w/out & w smoking
## Analysis of Deviance Table
##  Cox model: response is  Surv(rep(1, 52L), case)
##  Model 1: ~ rubber + alcohol + strata(matset)
##  Model 2: ~ smoking + rubber + alcohol + strata(matset)
##    loglik  Chisq Df Pr(>|Chi|)
## 1 -15.247                     
## 2 -15.245 0.0025  1     0.9603
anova(clog_alc, clog1, test = "lrt")  # w/out & w rubber
## Analysis of Deviance Table
##  Cox model: response is  Surv(rep(1, 52L), case)
##  Model 1: ~ alcohol + strata(matset)
##  Model 2: ~ rubber + alcohol + strata(matset)
##    loglik  Chisq Df Pr(>|Chi|)
## 1 -15.613                     
## 2 -15.247 0.7323  1     0.3921
anova(clog_rub, clog1, test = "lrt")  # w/out & w alcohol
## Analysis of Deviance Table
##  Cox model: response is  Surv(rep(1, 52L), case)
##  Model 1: ~ rubber + strata(matset)
##  Model 2: ~ rubber + alcohol + strata(matset)
##    loglik  Chisq Df Pr(>|Chi|)  
## 1 -17.966                       
## 2 -15.247 5.4392  1    0.01969 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

3.3 Variable assessment

3.3.1 Discordant pairs

with(cancer, matchTab(case, smoking, matset))
## 
## Exposure status: smoking = 1 
##  
## Total number of match sets in the tabulation = 26 
##  
## Number of controls = 1 
##                     No. of controls exposed
## No. of cases exposed  0  1
##                    0  0  5
##                    1  5 16
## 
## Odds ratio by Mantel-Haenszel method = 1 
##  
## Odds ratio by maximum likelihood estimate (MLE) method = 1 
##  95%CI= 0.29 , 3.454 
## 
with(cancer, matchTab(case, rubber, matset))
## 
## Exposure status: rubber = 1 
##  
## Total number of match sets in the tabulation = 26 
##  
## Number of controls = 1 
##                     No. of controls exposed
## No. of cases exposed  0  1
##                    0 13  5
##                    1  4  4
## 
## Odds ratio by Mantel-Haenszel method = 0.8 
##  
## Odds ratio by maximum likelihood estimate (MLE) method = 0.8 
##  95%CI= 0.215 , 2.979 
## 
with(cancer, matchTab(case, alcohol, matset))
## 
## Exposure status: alcohol = 1 
##  
## Total number of match sets in the tabulation = 26 
##  
## Number of controls = 1 
##                     No. of controls exposed
## No. of cases exposed 0 1
##                    0 7 2
##                    1 9 8
## 
## Odds ratio by Mantel-Haenszel method = 4.5 
##  
## Odds ratio by maximum likelihood estimate (MLE) method = 4.5 
##  95%CI= 0.972 , 20.827 
## 

3.4 Interaction

Specify interaction based between rubber * alcohol:

clogx = clogit(case ~ rubber * alcohol + strata(matset), data = cancer)
summary(clogx)  # Interaction not significant
## Call:
## coxph(formula = Surv(rep(1, 52L), case) ~ rubber * alcohol + 
##     strata(matset), data = cancer, method = "exact")
## 
##   n= 52, number of events= 26 
## 
##                   coef exp(coef) se(coef)      z Pr(>|z|)  
## rubber         -0.9829    0.3742   1.1606 -0.847   0.3971  
## alcohol         1.4989    4.4768   0.9030  1.660   0.0969 .
## rubber:alcohol  0.6341    1.8854   1.5519  0.409   0.6828  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                exp(coef) exp(-coef) lower .95 upper .95
## rubber            0.3742     2.6723   0.03847      3.64
## alcohol           4.4768     0.2234   0.76272     26.28
## rubber:alcohol    1.8854     0.5304   0.09003     39.48
## 
## Concordance= 0.654  (se = 0.1 )
## Likelihood ratio test= 5.72  on 3 df,   p=0.1
## Wald test            = 4.24  on 3 df,   p=0.2
## Score (logrank) test = 5.19  on 3 df,   p=0.2

3.5 Model fit assessment

Not possible for clogit ## Presentation and interpretation Present in table

clogistic.display(clog1)
## Conditional logistic regression predicting case : 1 vs 0 
## 
##  
##                  crude OR(95%CI)   adj. OR(95%CI)    P(Wald's test) P(LR-test)
## rubber: 1 vs 0   0.8 (0.21,2.98)   0.52 (0.11,2.41)  0.403          0.392     
##                                                                               
## alcohol: 1 vs 0  4.5 (0.97,20.83)  5.3 (1.05,26.82)  0.044          0.02      
##                                                                               
## No. of observations =  52
# Details
tidy(clog1, conf.int = T)
## # A tibble: 2 × 7
##   term    estimate std.error statistic p.value conf.low conf.high
##   <chr>      <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl>
## 1 rubber    -0.654     0.782    -0.837  0.403   -2.19       0.879
## 2 alcohol    1.67      0.827     2.01   0.0439   0.0455     3.29
tidy(clog1, conf.int = T, exponentiate = T)
## # A tibble: 2 × 7
##   term    estimate std.error statistic p.value conf.low conf.high
##   <chr>      <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl>
## 1 rubber     0.520     0.782    -0.837  0.403     0.112      2.41
## 2 alcohol    5.30      0.827     2.01   0.0439    1.05      26.8
knitr::kable(tidy(clog1, conf.int = T), format = "simple", digits = 3)
term estimate std.error statistic p.value conf.low conf.high
rubber -0.654 0.782 -0.837 0.403 -2.188 0.879
alcohol 1.667 0.827 2.015 0.044 0.046 3.289
knitr::kable(tidy(clog1, conf.int = T, exponentiate = T), format = "simple", digits = 3)
term estimate std.error statistic p.value conf.low conf.high
rubber 0.520 0.782 -0.837 0.403 0.112 2.408
alcohol 5.298 0.827 2.015 0.044 1.047 26.822

4 1-M Matching

4.1 Descriptive

str(abort)
## 'data.frame':    248 obs. of  8 variables:
##  $ education     : Factor w/ 3 levels "0-5yrs","6-11yrs",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ age           : num  26 26 26 42 42 42 39 39 39 34 ...
##  $ parity        : num  6 6 6 1 1 1 6 6 6 4 ...
##  $ induced       : num  1 2 2 1 0 0 2 2 2 2 ...
##  $ case          : num  1 0 0 1 0 0 1 0 0 1 ...
##  $ spontaneous   : num  2 0 0 0 0 0 0 0 0 0 ...
##  $ stratum       : int  1 1 1 2 2 2 3 3 3 4 ...
##  $ pooled.stratum: num  3 3 3 1 1 1 4 4 4 2 ...
tbl_summary(subset(abort, select = c(case, spontaneous, induced)), by = case,
            statistic = list(all_categorical() ~ "{n} ({p}%)"))
Characteristic 0, N = 1651 1, N = 831
spontaneous
    0 113 (68%) 28 (34%)
    1 40 (24%) 31 (37%)
    2 12 (7.3%) 24 (29%)
induced
    0 96 (58%) 47 (57%)
    1 45 (27%) 23 (28%)
    2 24 (15%) 13 (16%)
1 n (%)

4.2 Variable selection

4.2.1 Univariable

clogm_spo = clogit(case ~ spontaneous + strata(stratum), data = abort)
summary(clogm_spo)
## Call:
## coxph(formula = Surv(rep(1, 248L), case) ~ spontaneous + strata(stratum), 
##     data = abort, method = "exact")
## 
##   n= 248, number of events= 83 
## 
##               coef exp(coef) se(coef)     z Pr(>|z|)    
## spontaneous 1.1768    3.2441   0.2315 5.083 3.71e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##             exp(coef) exp(-coef) lower .95 upper .95
## spontaneous     3.244     0.3083     2.061     5.107
## 
## Concordance= 0.685  (se = 0.046 )
## Likelihood ratio test= 33.76  on 1 df,   p=6e-09
## Wald test            = 25.84  on 1 df,   p=4e-07
## Score (logrank) test = 34.13  on 1 df,   p=5e-09
clogm_ind = clogit(case ~ induced + strata(stratum), data = abort)
summary(clogm_ind)
## Call:
## coxph(formula = Surv(rep(1, 248L), case) ~ induced + strata(stratum), 
##     data = abort, method = "exact")
## 
##   n= 248, number of events= 83 
## 
##            coef exp(coef) se(coef)     z Pr(>|z|)
## induced 0.07374   1.07653  0.20969 0.352    0.725
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## induced     1.077     0.9289    0.7137     1.624
## 
## Concordance= 0.497  (se = 0.047 )
## Likelihood ratio test= 0.12  on 1 df,   p=0.7
## Wald test            = 0.12  on 1 df,   p=0.7
## Score (logrank) test = 0.12  on 1 df,   p=0.7

4.2.2 Multivariable

clogm = clogit(case ~ spontaneous + induced + strata(stratum), data = abort)
summary(clogm)
## Call:
## coxph(formula = Surv(rep(1, 248L), case) ~ spontaneous + induced + 
##     strata(stratum), data = abort, method = "exact")
## 
##   n= 248, number of events= 83 
## 
##               coef exp(coef) se(coef)     z Pr(>|z|)    
## spontaneous 1.9859    7.2854   0.3524 5.635 1.75e-08 ***
## induced     1.4090    4.0919   0.3607 3.906 9.38e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##             exp(coef) exp(-coef) lower .95 upper .95
## spontaneous     7.285     0.1373     3.651    14.536
## induced         4.092     0.2444     2.018     8.298
## 
## Concordance= 0.776  (se = 0.044 )
## Likelihood ratio test= 53.15  on 2 df,   p=3e-12
## Wald test            = 31.84  on 2 df,   p=1e-07
## Score (logrank) test = 48.44  on 2 df,   p=3e-11
anova(clogm_ind, clogm)  # inclusion of spontaneous
## Analysis of Deviance Table
##  Cox model: response is  Surv(rep(1, 248L), case)
##  Model 1: ~ induced + strata(stratum)
##  Model 2: ~ spontaneous + induced + strata(stratum)
##    loglik  Chisq Df Pr(>|Chi|)    
## 1 -90.718                         
## 2 -64.202 53.031  1  3.283e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(clogm_spo, clogm)  # inclusion of induced, contrast with p-value for wald test
## Analysis of Deviance Table
##  Cox model: response is  Surv(rep(1, 248L), case)
##  Model 1: ~ spontaneous + strata(stratum)
##  Model 2: ~ spontaneous + induced + strata(stratum)
##    loglik  Chisq Df Pr(>|Chi|)    
## 1 -73.898                         
## 2 -64.202 19.392  1  1.065e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.3 Variable assessment

4.3.1 Linearity in logit

Assume spontaneous & induced are numerical for practice purpose

# formula, copy & paste from summary(clogm)
mfp(Surv(rep(1, 248L), case) ~ fp(spontaneous, df = 4, select = 0.05) + 
      fp(induced, df = 4, select = 0.05) + strata(stratum), family = cox, data = abort)
## Call:
## mfp(formula = Surv(rep(1, 248L), case) ~ fp(spontaneous, df = 4, 
##     select = 0.05) + fp(induced, df = 4, select = 0.05) + strata(stratum), 
##     data = abort, family = cox)
## 
## 
## Deviance table:
##           Resid. Dev
## Null model    181.5587
## Linear model  128.4045
## Final model   128.4045
## 
## Fractional polynomials:
##             df.initial select alpha df.final power1 power2
## spontaneous          4   0.05  0.05        1      1      .
## induced              4   0.05  0.05        1      1      .
## 
## 
## Transformations of covariates:
##                          formula
## spontaneous I((spontaneous+1)^1)
## induced         I((induced+1)^1)
## 
## Re-Scaling:
## Non-positive values in some of the covariates. No re-scaling was performed.
## 
##                coef exp(coef) se(coef)     z        p
## spontaneous.1 1.986     7.285   0.3524 5.635 1.75e-08
## induced.1     1.409     4.092   0.3607 3.906 9.38e-05
## 
## Likelihood ratio test=53.15  on 2 df, p=2.869e-12 n= 248
# frac poly output -- linear

4.4 Interaction

Specify interaction based between spontaneous * induced:

clogmx = clogit(case ~ spontaneous * induced + strata(stratum), data = abort)
summary(clogmx)  # Interaction not significant
## Call:
## coxph(formula = Surv(rep(1, 248L), case) ~ spontaneous * induced + 
##     strata(stratum), data = abort, method = "exact")
## 
##   n= 248, number of events= 83 
## 
##                       coef exp(coef) se(coef)     z Pr(>|z|)    
## spontaneous         1.9517    7.0408   0.3768 5.180 2.21e-07 ***
## induced             1.3749    3.9548   0.3852 3.569 0.000358 ***
## spontaneous:induced 0.1068    1.1127   0.4435 0.241 0.809765    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                     exp(coef) exp(-coef) lower .95 upper .95
## spontaneous             7.041     0.1420    3.3646    14.734
## induced                 3.955     0.2529    1.8588     8.414
## spontaneous:induced     1.113     0.8987    0.4665     2.654
## 
## Concordance= 0.776  (se = 0.044 )
## Likelihood ratio test= 53.21  on 3 df,   p=2e-11
## Wald test            = 31.92  on 3 df,   p=5e-07
## Score (logrank) test = 48.71  on 3 df,   p=2e-10

4.5 Model fit assessment

Not possible for clogit Diagnostics also not available ## Presentation and interpretation Present in table

clogistic.display(clogm)
## Conditional logistic regression predicting case : 1 vs 0 
## 
##  
##                          crude OR(95%CI)   adj. OR(95%CI)     P(Wald's test)
## spontaneous (cont. var.) 3.24 (2.06,5.11)  7.29 (3.65,14.54)  < 0.001       
##                                                                             
## induced (cont. var.)     1.08 (0.71,1.62)  4.09 (2.02,8.3)    < 0.001       
##                                                                             
##                          P(LR-test)
## spontaneous (cont. var.) < 0.001   
##                                    
## induced (cont. var.)     < 0.001   
##                                    
## No. of observations =  248
# Details
tidy(clogm, conf.int = T)
## # A tibble: 2 × 7
##   term        estimate std.error statistic      p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>        <dbl>    <dbl>     <dbl>
## 1 spontaneous     1.99     0.352      5.63 0.0000000175    1.30       2.68
## 2 induced         1.41     0.361      3.91 0.0000938       0.702      2.12
tidy(clogm, conf.int = T, exponentiate = T)
## # A tibble: 2 × 7
##   term        estimate std.error statistic      p.value conf.low conf.high
##   <chr>          <dbl>     <dbl>     <dbl>        <dbl>    <dbl>     <dbl>
## 1 spontaneous     7.29     0.352      5.63 0.0000000175     3.65     14.5 
## 2 induced         4.09     0.361      3.91 0.0000938        2.02      8.30
knitr::kable(tidy(clogm, conf.int = T), format = "simple", digits = 3)
term estimate std.error statistic p.value conf.low conf.high
spontaneous 1.986 0.352 5.635 0 1.295 2.677
induced 1.409 0.361 3.906 0 0.702 2.116
knitr::kable(tidy(clogm, conf.int = T, exponentiate = T), format = "simple", digits = 3)
term estimate std.error statistic p.value conf.low conf.high
spontaneous 7.285 0.352 5.635 0 3.651 14.536
induced 4.092 0.361 3.906 0 2.018 8.298