library(gtsummary) # descriptive table
library(ordinal) # clm, fit proportional odds model
library(broom) # tidy
library(gofcat) # model fit
ms = read.csv("ms.csv", stringsAsFactors = T)
str(ms)
## 'data.frame': 4083 obs. of 4 variables:
## $ fbs : Factor w/ 3 levels "DM","IFG","Normal": 3 3 3 3 3 3 3 3 3 3 ...
## $ totchol: num 5.43 5.13 5.55 4.01 5.21 ...
## $ hpt : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 1 ...
## $ weight : num 40 54.6 37 48.4 44.5 ...
summary(ms)
## fbs totchol hpt weight
## DM : 594 Min. : 0.180 no :3601 Min. : 30.00
## IFG : 364 1st Qu.: 4.970 yes: 482 1st Qu.: 53.80
## Normal:3125 Median : 5.700 Median : 62.30
## Mean : 5.791 Mean : 63.93
## 3rd Qu.: 6.525 3rd Qu.: 72.00
## Max. :23.140 Max. :187.80
ms$fbs = factor(ms$fbs, levels = c("Normal", "IFG", "DM"), ordered = T) # order the reference category
str(ms$fbs)
## Ord.factor w/ 3 levels "Normal"<"IFG"<..: 1 1 1 1 1 1 1 1 1 1 ...
levels(ms$fbs)
## [1] "Normal" "IFG" "DM"
summary(ms)
## fbs totchol hpt weight
## Normal:3125 Min. : 0.180 no :3601 Min. : 30.00
## IFG : 364 1st Qu.: 4.970 yes: 482 1st Qu.: 53.80
## DM : 594 Median : 5.700 Median : 62.30
## Mean : 5.791 Mean : 63.93
## 3rd Qu.: 6.525 3rd Qu.: 72.00
## Max. :23.140 Max. :187.80
tbl_summary(ms, by = fbs,
statistic = list(all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} ({p}%)"))
Characteristic | Normal, N = 3,1251 | IFG, N = 3641 | DM, N = 5941 |
---|---|---|---|
totchol | 5.69 (1.24) | 6.08 (1.37) | 6.16 (1.31) |
hpt | 281 (9.0%) | 75 (21%) | 126 (21%) |
weight | 63 (14) | 68 (14) | 68 (15) |
1 Mean (SD); n (%) |
log_chol = clm(fbs ~ totchol, data = ms)
summary(log_chol)
## formula: fbs ~ totchol
## data: ms
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 4083 -2817.64 5641.28 5(1) 5.09e-07 1.8e+03
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## totchol 0.26227 0.02875 9.124 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## Normal|IFG 2.7288 0.1763 15.48
## IFG|DM 3.3281 0.1794 18.55
log_hpt = clm(fbs ~ hpt, data = ms)
summary(log_hpt)
## formula: fbs ~ hpt
## data: ms
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 4083 -2816.60 5639.19 5(1) 1.44e-08 2.5e+01
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## hptyes 0.95306 0.09802 9.723 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## Normal|IFG 1.32036 0.04086 32.31
## IFG|DM 1.92115 0.04836 39.73
log_wt = clm(fbs ~ weight, data = ms)
summary(log_wt)
## formula: fbs ~ weight
## data: ms
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 4083 -2817.00 5640.01 6(1) 1.40e-12 2.0e+05
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## weight 0.023197 0.002486 9.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## Normal|IFG 2.6944 0.1691 15.93
## IFG|DM 3.2935 0.1722 19.12
mlog = clm(fbs ~ totchol + hpt + weight, data = ms)
summary(mlog)
## formula: fbs ~ totchol + hpt + weight
## data: ms
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 4083 -2740.27 5490.55 6(1) 2.93e-12 4.2e+05
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## totchol 0.257688 0.029182 8.830 <2e-16 ***
## hptyes 0.848765 0.099833 8.502 <2e-16 ***
## weight 0.021852 0.002544 8.591 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## Normal|IFG 4.2516 0.2503 16.98
## IFG|DM 4.8722 0.2535 19.22
Specify interaction based on clinical opinion, let’s say between
hpt * weight
:
mlogx = clm(fbs ~ totchol + hpt + weight + hpt * weight, data = ms)
summary(mlogx) # Interaction not significant
## formula: fbs ~ totchol + hpt + weight + hpt * weight
## data: ms
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 4083 -2739.90 5491.79 6(1) 1.44e-11 8.6e+05
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## totchol 0.256810 0.029201 8.794 < 2e-16 ***
## hptyes 1.257361 0.479443 2.623 0.00873 **
## weight 0.022821 0.002778 8.215 < 2e-16 ***
## hptyes:weight -0.005987 0.006880 -0.870 0.38419
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## Normal|IFG 4.3103 0.2596 16.60
## IFG|DM 4.9309 0.2627 18.77
hosmerlem(mlog, tables = T) # ordinal version of Hosmer-Lemeshow test
##
## Hosmer-Lemeshow Test:
##
## Chi-sq df pr(>chi)
## ordinal(Hosmerlem) 32.609 17 0.01263 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## H0: No lack of fit dictated
##
## rho: 100%
##
##
## Observed Freqs:
##
## group total Normal IFG DM
## 1 408 375 12 21
## 2 408 357 19 32
## 3 408 353 25 30
## 4 408 313 38 57
## 5 408 325 37 46
## 6 408 318 33 57
## 7 408 315 24 69
## 8 408 286 51 71
## 9 408 255 56 97
## 10 411 228 69 114
##
## Expected Freqs:
##
## group OS Normal IFG DM
## 1 1.1711 363.4518 19.2985 25.2497
## 2 1.2211 350.7204 24.3407 32.9389
## 3 1.2559 341.9670 27.6564 38.3766
## 4 1.2867 334.2806 30.4688 43.2506
## 5 1.3183 326.4476 33.2387 48.3138
## 6 1.3531 317.9045 36.1438 53.9517
## 7 1.3948 307.7534 39.4339 60.8128
## 8 1.4525 293.8628 43.6362 70.5010
## 9 1.5461 271.8254 49.5247 86.6499
## 10 1.7858 220.1254 58.7670 132.1076
lipsitz(mlog) # Lipsitz Test for Categorical Response Models
##
## Lipsitz Test:
##
## LR df pr(>chi)
## ordinal(lipsitz) 23.311 9 0.005533 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## H0: No lack of fit dictated
# Both show poor fit!
Rsquared(mlog, measure = "nagelkerke")
##
## Nagelkerke's R2:
##
## 0.07595
brant.test(mlog)
##
## Brant Test:
## chi-sq df pr(>chi)
## Omnibus 5.030 3 0.17
## totchol 0.803 1 0.37
## hptyes 2.105 1 0.15
## weight 1.830 1 0.18
##
## H0: Proportional odds assumption holds
Compare with mlogit model to see why proportional odds assumption holds, check ORs
tidy(nnet::multinom(fbs ~ totchol + hpt + weight, data = ms), exponentiate = T)
## # weights: 15 (8 variable)
## initial value 4485.633975
## iter 10 value 2739.040075
## final value 2738.198554
## converged
## # A tibble: 8 × 6
## y.level term estimate std.error statistic p.value
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 IFG (Intercept) 0.00602 0.366 -14.0 2.85e-44
## 2 IFG totchol 1.27 0.0424 5.65 1.63e- 8
## 3 IFG hptyes 2.38 0.146 5.96 2.56e- 9
## 4 IFG weight 1.02 0.00371 5.93 3.07e- 9
## 5 DM (Intercept) 0.00739 0.304 -16.2 1.02e-58
## 6 DM totchol 1.32 0.0351 7.91 2.61e-15
## 7 DM hptyes 2.46 0.120 7.47 7.89e-14
## 8 DM weight 1.02 0.00307 7.38 1.62e-13
Present in table
# Details
tidy(mlog, conf.int = T)
## # A tibble: 5 × 8
## term estimate std.error statistic p.value conf.low conf.high coef.type
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Normal|IFG 4.25 0.250 17.0 1.07e-64 NA NA intercept
## 2 IFG|DM 4.87 0.253 19.2 2.42e-82 NA NA intercept
## 3 totchol 0.258 0.0292 8.83 1.04e-18 0.201 0.315 location
## 4 hptyes 0.849 0.0998 8.50 1.87e-17 0.652 1.04 location
## 5 weight 0.0219 0.00254 8.59 8.62e-18 0.0169 0.0268 location
tidy(mlog, conf.int = T, exponentiate = T)
## # A tibble: 5 × 8
## term estimate std.error statistic p.value conf.low conf.high coef.type
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Normal|IFG 70.2 0.250 17.0 1.07e-64 NA NA intercept
## 2 IFG|DM 131. 0.253 19.2 2.42e-82 NA NA intercept
## 3 totchol 1.29 0.0292 8.83 1.04e-18 1.22 1.37 location
## 4 hptyes 2.34 0.0998 8.50 1.87e-17 1.92 2.84 location
## 5 weight 1.02 0.00254 8.59 8.62e-18 1.02 1.03 location
knitr::kable(tidy(mlog, conf.int = T), format = "simple", digits = 3)
term | estimate | std.error | statistic | p.value | conf.low | conf.high | coef.type |
---|---|---|---|---|---|---|---|
Normal|IFG | 4.252 | 0.250 | 16.985 | 0 | NA | NA | intercept |
IFG|DM | 4.872 | 0.253 | 19.222 | 0 | NA | NA | intercept |
totchol | 0.258 | 0.029 | 8.830 | 0 | 0.201 | 0.315 | location |
hptyes | 0.849 | 0.100 | 8.502 | 0 | 0.652 | 1.043 | location |
weight | 0.022 | 0.003 | 8.591 | 0 | 0.017 | 0.027 | location |
knitr::kable(tidy(mlog, conf.int = T, exponentiate = T), format = "simple", digits = 3)
term | estimate | std.error | statistic | p.value | conf.low | conf.high | coef.type |
---|---|---|---|---|---|---|---|
Normal|IFG | 70.218 | 0.250 | 16.985 | 0 | NA | NA | intercept |
IFG|DM | 130.608 | 0.253 | 19.222 | 0 | NA | NA | intercept |
totchol | 1.294 | 0.029 | 8.830 | 0 | 1.222 | 1.370 | location |
hptyes | 2.337 | 0.100 | 8.502 | 0 | 1.919 | 2.839 | location |
weight | 1.022 | 0.003 | 8.591 | 0 | 1.017 | 1.027 | location |