1 Library

library(gtsummary)  # descriptive table
library(ordinal)    # clm, fit proportional odds model
library(broom)      # tidy
library(gofcat)     # model fit

2 Dataset

ms = read.csv("ms.csv", stringsAsFactors = T)
str(ms)
## 'data.frame':    4083 obs. of  4 variables:
##  $ fbs    : Factor w/ 3 levels "DM","IFG","Normal": 3 3 3 3 3 3 3 3 3 3 ...
##  $ totchol: num  5.43 5.13 5.55 4.01 5.21 ...
##  $ hpt    : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 1 ...
##  $ weight : num  40 54.6 37 48.4 44.5 ...
summary(ms)
##      fbs          totchol        hpt           weight      
##  DM    : 594   Min.   : 0.180   no :3601   Min.   : 30.00  
##  IFG   : 364   1st Qu.: 4.970   yes: 482   1st Qu.: 53.80  
##  Normal:3125   Median : 5.700              Median : 62.30  
##                Mean   : 5.791              Mean   : 63.93  
##                3rd Qu.: 6.525              3rd Qu.: 72.00  
##                Max.   :23.140              Max.   :187.80
ms$fbs = factor(ms$fbs, levels = c("Normal", "IFG", "DM"), ordered = T)  # order the reference category
str(ms$fbs)
##  Ord.factor w/ 3 levels "Normal"<"IFG"<..: 1 1 1 1 1 1 1 1 1 1 ...
levels(ms$fbs)
## [1] "Normal" "IFG"    "DM"

3 Descriptive

summary(ms)
##      fbs          totchol        hpt           weight      
##  Normal:3125   Min.   : 0.180   no :3601   Min.   : 30.00  
##  IFG   : 364   1st Qu.: 4.970   yes: 482   1st Qu.: 53.80  
##  DM    : 594   Median : 5.700              Median : 62.30  
##                Mean   : 5.791              Mean   : 63.93  
##                3rd Qu.: 6.525              3rd Qu.: 72.00  
##                Max.   :23.140              Max.   :187.80
tbl_summary(ms, by = fbs,
            statistic = list(all_continuous() ~ "{mean} ({sd})", 
                             all_categorical() ~ "{n} ({p}%)"))
Characteristic Normal, N = 3,1251 IFG, N = 3641 DM, N = 5941
totchol 5.69 (1.24) 6.08 (1.37) 6.16 (1.31)
hpt 281 (9.0%) 75 (21%) 126 (21%)
weight 63 (14) 68 (14) 68 (15)
1 Mean (SD); n (%)

4 Variable selection

4.1 Univariable

log_chol = clm(fbs ~ totchol, data = ms)
summary(log_chol)
## formula: fbs ~ totchol
## data:    ms
## 
##  link  threshold nobs logLik   AIC     niter max.grad cond.H 
##  logit flexible  4083 -2817.64 5641.28 5(1)  5.09e-07 1.8e+03
## 
## Coefficients:
##         Estimate Std. Error z value Pr(>|z|)    
## totchol  0.26227    0.02875   9.124   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##            Estimate Std. Error z value
## Normal|IFG   2.7288     0.1763   15.48
## IFG|DM       3.3281     0.1794   18.55
log_hpt = clm(fbs ~ hpt, data = ms)
summary(log_hpt)
## formula: fbs ~ hpt
## data:    ms
## 
##  link  threshold nobs logLik   AIC     niter max.grad cond.H 
##  logit flexible  4083 -2816.60 5639.19 5(1)  1.44e-08 2.5e+01
## 
## Coefficients:
##        Estimate Std. Error z value Pr(>|z|)    
## hptyes  0.95306    0.09802   9.723   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##            Estimate Std. Error z value
## Normal|IFG  1.32036    0.04086   32.31
## IFG|DM      1.92115    0.04836   39.73
log_wt = clm(fbs ~ weight, data = ms)
summary(log_wt)
## formula: fbs ~ weight
## data:    ms
## 
##  link  threshold nobs logLik   AIC     niter max.grad cond.H 
##  logit flexible  4083 -2817.00 5640.01 6(1)  1.40e-12 2.0e+05
## 
## Coefficients:
##        Estimate Std. Error z value Pr(>|z|)    
## weight 0.023197   0.002486    9.33   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##            Estimate Std. Error z value
## Normal|IFG   2.6944     0.1691   15.93
## IFG|DM       3.2935     0.1722   19.12

4.2 Multivariable

mlog = clm(fbs ~ totchol + hpt + weight, data = ms)
summary(mlog)
## formula: fbs ~ totchol + hpt + weight
## data:    ms
## 
##  link  threshold nobs logLik   AIC     niter max.grad cond.H 
##  logit flexible  4083 -2740.27 5490.55 6(1)  2.93e-12 4.2e+05
## 
## Coefficients:
##         Estimate Std. Error z value Pr(>|z|)    
## totchol 0.257688   0.029182   8.830   <2e-16 ***
## hptyes  0.848765   0.099833   8.502   <2e-16 ***
## weight  0.021852   0.002544   8.591   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##            Estimate Std. Error z value
## Normal|IFG   4.2516     0.2503   16.98
## IFG|DM       4.8722     0.2535   19.22

5 Interaction

Specify interaction based on clinical opinion, let’s say between hpt * weight:

mlogx = clm(fbs ~ totchol + hpt + weight + hpt * weight, data = ms)
summary(mlogx)  # Interaction not significant
## formula: fbs ~ totchol + hpt + weight + hpt * weight
## data:    ms
## 
##  link  threshold nobs logLik   AIC     niter max.grad cond.H 
##  logit flexible  4083 -2739.90 5491.79 6(1)  1.44e-11 8.6e+05
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## totchol        0.256810   0.029201   8.794  < 2e-16 ***
## hptyes         1.257361   0.479443   2.623  0.00873 ** 
## weight         0.022821   0.002778   8.215  < 2e-16 ***
## hptyes:weight -0.005987   0.006880  -0.870  0.38419    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##            Estimate Std. Error z value
## Normal|IFG   4.3103     0.2596   16.60
## IFG|DM       4.9309     0.2627   18.77

6 Model fit assessment

hosmerlem(mlog, tables = T)  # ordinal version of Hosmer-Lemeshow test
## 
## Hosmer-Lemeshow Test:
## 
##                     Chi-sq  df  pr(>chi)  
## ordinal(Hosmerlem)  32.609  17   0.01263 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## H0: No lack of fit dictated
## 
## rho: 100% 
## 
## 
## Observed Freqs:
## 
##  group total Normal IFG  DM
##      1   408    375  12  21
##      2   408    357  19  32
##      3   408    353  25  30
##      4   408    313  38  57
##      5   408    325  37  46
##      6   408    318  33  57
##      7   408    315  24  69
##      8   408    286  51  71
##      9   408    255  56  97
##     10   411    228  69 114
## 
## Expected Freqs:
## 
##  group     OS   Normal     IFG       DM
##      1 1.1711 363.4518 19.2985  25.2497
##      2 1.2211 350.7204 24.3407  32.9389
##      3 1.2559 341.9670 27.6564  38.3766
##      4 1.2867 334.2806 30.4688  43.2506
##      5 1.3183 326.4476 33.2387  48.3138
##      6 1.3531 317.9045 36.1438  53.9517
##      7 1.3948 307.7534 39.4339  60.8128
##      8 1.4525 293.8628 43.6362  70.5010
##      9 1.5461 271.8254 49.5247  86.6499
##     10 1.7858 220.1254 58.7670 132.1076
lipsitz(mlog)  # Lipsitz Test for Categorical Response Models
## 
## Lipsitz Test:
## 
##                       LR   df   pr(>chi)   
## ordinal(lipsitz)  23.311    9   0.005533 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## H0: No lack of fit dictated
# Both show poor fit!
Rsquared(mlog, measure = "nagelkerke")
## 
## Nagelkerke's R2: 
## 
## 0.07595

7 Proportional odds assumption

brant.test(mlog)
## 
## Brant Test:
##            chi-sq   df   pr(>chi)
## Omnibus     5.030    3       0.17
## totchol     0.803    1       0.37
## hptyes      2.105    1       0.15
## weight      1.830    1       0.18
## 
## H0: Proportional odds assumption holds

Compare with mlogit model to see why proportional odds assumption holds, check ORs

tidy(nnet::multinom(fbs ~ totchol + hpt + weight, data = ms), exponentiate = T)
## # weights:  15 (8 variable)
## initial  value 4485.633975 
## iter  10 value 2739.040075
## final  value 2738.198554 
## converged
## # A tibble: 8 × 6
##   y.level term        estimate std.error statistic  p.value
##   <chr>   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 IFG     (Intercept)  0.00602   0.366      -14.0  2.85e-44
## 2 IFG     totchol      1.27      0.0424       5.65 1.63e- 8
## 3 IFG     hptyes       2.38      0.146        5.96 2.56e- 9
## 4 IFG     weight       1.02      0.00371      5.93 3.07e- 9
## 5 DM      (Intercept)  0.00739   0.304      -16.2  1.02e-58
## 6 DM      totchol      1.32      0.0351       7.91 2.61e-15
## 7 DM      hptyes       2.46      0.120        7.47 7.89e-14
## 8 DM      weight       1.02      0.00307      7.38 1.62e-13

8 Presentation and interpretation

Present in table

# Details
tidy(mlog, conf.int = T)
## # A tibble: 5 × 8
##   term       estimate std.error statistic  p.value conf.low conf.high coef.type
##   <chr>         <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    
## 1 Normal|IFG   4.25     0.250       17.0  1.07e-64  NA        NA      intercept
## 2 IFG|DM       4.87     0.253       19.2  2.42e-82  NA        NA      intercept
## 3 totchol      0.258    0.0292       8.83 1.04e-18   0.201     0.315  location 
## 4 hptyes       0.849    0.0998       8.50 1.87e-17   0.652     1.04   location 
## 5 weight       0.0219   0.00254      8.59 8.62e-18   0.0169    0.0268 location
tidy(mlog, conf.int = T, exponentiate = T)
## # A tibble: 5 × 8
##   term       estimate std.error statistic  p.value conf.low conf.high coef.type
##   <chr>         <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl> <chr>    
## 1 Normal|IFG    70.2    0.250       17.0  1.07e-64    NA        NA    intercept
## 2 IFG|DM       131.     0.253       19.2  2.42e-82    NA        NA    intercept
## 3 totchol        1.29   0.0292       8.83 1.04e-18     1.22      1.37 location 
## 4 hptyes         2.34   0.0998       8.50 1.87e-17     1.92      2.84 location 
## 5 weight         1.02   0.00254      8.59 8.62e-18     1.02      1.03 location
knitr::kable(tidy(mlog, conf.int = T), format = "simple", digits = 3)
term estimate std.error statistic p.value conf.low conf.high coef.type
Normal|IFG 4.252 0.250 16.985 0 NA NA intercept
IFG|DM 4.872 0.253 19.222 0 NA NA intercept
totchol 0.258 0.029 8.830 0 0.201 0.315 location
hptyes 0.849 0.100 8.502 0 0.652 1.043 location
weight 0.022 0.003 8.591 0 0.017 0.027 location
knitr::kable(tidy(mlog, conf.int = T, exponentiate = T), format = "simple", digits = 3)
term estimate std.error statistic p.value conf.low conf.high coef.type
Normal|IFG 70.218 0.250 16.985 0 NA NA intercept
IFG|DM 130.608 0.253 19.222 0 NA NA intercept
totchol 1.294 0.029 8.830 0 1.222 1.370 location
hptyes 2.337 0.100 8.502 0 1.919 2.839 location
weight 1.022 0.003 8.591 0 1.017 1.027 location