library(summarytools) # descriptive table (descr, freq, stby)
library(gtsummary) # descriptive table (tbl_summary)
library(ordinal) # clm, fit proportional odds model
library(broom) # tidy, to extract results from models
library(knitr) # for nice display of results
library(gofcat) # model fit
ms = read.csv("ms.csv", stringsAsFactors = T)
str(ms)
'data.frame': 4083 obs. of 4 variables:
$ fbs : Factor w/ 3 levels "DM","IFG","Normal": 3 3 3 3 3 3 3 3 3 3 ...
$ totchol: num 5.43 5.13 5.55 4.01 5.21 ...
$ hpt : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 1 ...
$ weight : num 40 54.6 37 48.4 44.5 ...
summary(ms)
fbs totchol hpt weight
DM : 594 Min. : 0.180 no :3601 Min. : 30.00
IFG : 364 1st Qu.: 4.970 yes: 482 1st Qu.: 53.80
Normal:3125 Median : 5.700 Median : 62.30
Mean : 5.791 Mean : 63.93
3rd Qu.: 6.525 3rd Qu.: 72.00
Max. :23.140 Max. :187.80
ms
levels(ms$fbs) # Levels: DM IFG Normal, DM as reference
[1] "DM" "IFG" "Normal"
ms$fbs = factor(ms$fbs, levels = c("Normal", "IFG", "DM"), ordered = T) # ordinal, Normal as reference category
str(ms$fbs)
Ord.factor w/ 3 levels "Normal"<"IFG"<..: 1 1 1 1 1 1 1 1 1 1 ...
levels(ms$fbs) # Normal as reference
[1] "Normal" "IFG" "DM"
descr(ms)
Non-numerical variable(s) ignored: fbs, hpt
Descriptive Statistics
ms
N: 4083
totchol weight
----------------- --------- ---------
Mean 5.79 63.93
Std.Dev 1.28 14.30
Min 0.18 30.00
Q1 4.97 53.80
Median 5.70 62.30
Q3 6.53 72.00
Max 23.14 187.80
MAD 1.14 13.49
IQR 1.56 18.20
CV 0.22 0.22
Skewness 1.16 0.98
SE.Skewness 0.04 0.04
Kurtosis 10.20 2.97
N.Valid 4083.00 4083.00
Pct.Valid 100.00 100.00
with(ms, stby(totchol, fbs, descr))
Descriptive Statistics
totchol by fbs
Data Frame: ms
N: 3125
Normal IFG DM
----------------- --------- -------- --------
Mean 5.69 6.08 6.16
Std.Dev 1.24 1.37 1.31
Min 0.18 2.33 2.32
Q1 4.89 5.29 5.29
Median 5.62 5.94 6.03
Q3 6.41 6.72 6.96
Max 23.14 14.91 12.29
MAD 1.13 1.07 1.19
IQR 1.52 1.43 1.67
CV 0.22 0.22 0.21
Skewness 1.27 1.40 0.55
SE.Skewness 0.04 0.13 0.10
Kurtosis 13.58 6.20 1.26
N.Valid 3125.00 364.00 594.00
Pct.Valid 100.00 100.00 100.00
with(ms, stby(weight, fbs, descr))
Descriptive Statistics
weight by fbs
Data Frame: ms
N: 3125
Normal IFG DM
----------------- --------- -------- --------
Mean 62.74 67.71 67.86
Std.Dev 13.96 14.50 14.85
Min 30.00 38.90 30.80
Q1 53.00 56.80 58.00
Median 61.00 65.85 66.00
Q3 71.00 76.75 75.70
Max 187.80 121.00 147.50
MAD 13.34 14.60 12.82
IQR 18.00 19.77 17.60
CV 0.22 0.21 0.22
Skewness 1.03 0.58 1.04
SE.Skewness 0.04 0.13 0.10
Kurtosis 3.58 0.37 2.55
N.Valid 3125.00 364.00 594.00
Pct.Valid 100.00 100.00 100.00
freq(ms$fbs); freq(ms$hpt)
Frequencies
ms$fbs
Type: Ordered Factor
Freq % Valid % Valid Cum. % Total % Total Cum.
------------ ------ --------- -------------- --------- --------------
Normal 3125 76.54 76.54 76.54 76.54
IFG 364 8.92 85.45 8.92 85.45
DM 594 14.55 100.00 14.55 100.00
<NA> 0 0.00 100.00
Total 4083 100.00 100.00 100.00 100.00
Frequencies
ms$hpt
Type: Factor
Freq % Valid % Valid Cum. % Total % Total Cum.
----------- ------ --------- -------------- --------- --------------
no 3601 88.19 88.19 88.19 88.19
yes 482 11.81 100.00 11.81 100.00
<NA> 0 0.00 100.00
Total 4083 100.00 100.00 100.00 100.00
with(ms, stby(hpt, fbs, freq))
Frequencies
ms$hpt
Type: Factor
Group: fbs = Normal
Freq % Valid % Valid Cum. % Total % Total Cum.
----------- ------ --------- -------------- --------- --------------
no 2844 91.01 91.01 91.01 91.01
yes 281 8.99 100.00 8.99 100.00
<NA> 0 0.00 100.00
Total 3125 100.00 100.00 100.00 100.00
Group: fbs = IFG
Freq % Valid % Valid Cum. % Total % Total Cum.
----------- ------ --------- -------------- --------- --------------
no 289 79.40 79.40 79.40 79.40
yes 75 20.60 100.00 20.60 100.00
<NA> 0 0.00 100.00
Total 364 100.00 100.00 100.00 100.00
Group: fbs = DM
Freq % Valid % Valid Cum. % Total % Total Cum.
----------- ------ --------- -------------- --------- --------------
no 468 78.79 78.79 78.79 78.79
yes 126 21.21 100.00 21.21 100.00
<NA> 0 0.00 100.00
Total 594 100.00 100.00 100.00 100.00
tbl_summary(ms, by = fbs,
statistic = list(all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} ({p}%)"),
digits = list(all_continuous() ~ c(2,2),
all_categorical() ~ c(0,1)))
Characteristic | Normal, N = 3,1251 | IFG, N = 3641 | DM, N = 5941 |
---|---|---|---|
totchol | 5.69 (1.24) | 6.08 (1.37) | 6.16 (1.31) |
hpt | 281 (9.0%) | 75 (20.6%) | 126 (21.2%) |
weight | 62.74 (13.96) | 67.71 (14.50) | 67.86 (14.85) |
1 Mean (SD); n (%) |
log_chol = clm(fbs ~ totchol, data = ms)
summary(log_chol)
formula: fbs ~ totchol
data: ms
link threshold nobs logLik AIC niter max.grad cond.H
logit flexible 4083 -2817.64 5641.28 5(1) 5.09e-07 1.8e+03
Coefficients:
Estimate Std. Error z value Pr(>|z|)
totchol 0.26227 0.02875 9.124 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Threshold coefficients:
Estimate Std. Error z value
Normal|IFG 2.7288 0.1763 15.48
IFG|DM 3.3281 0.1794 18.55
log_hpt = clm(fbs ~ hpt, data = ms)
summary(log_hpt)
formula: fbs ~ hpt
data: ms
link threshold nobs logLik AIC niter max.grad cond.H
logit flexible 4083 -2816.60 5639.19 5(1) 1.44e-08 2.5e+01
Coefficients:
Estimate Std. Error z value Pr(>|z|)
hptyes 0.95306 0.09802 9.723 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Threshold coefficients:
Estimate Std. Error z value
Normal|IFG 1.32036 0.04086 32.31
IFG|DM 1.92115 0.04836 39.73
log_wt = clm(fbs ~ weight, data = ms)
summary(log_wt)
formula: fbs ~ weight
data: ms
link threshold nobs logLik AIC niter max.grad cond.H
logit flexible 4083 -2817.00 5640.01 6(1) 7.25e-13 2.0e+05
Coefficients:
Estimate Std. Error z value Pr(>|z|)
weight 0.023197 0.002486 9.33 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Threshold coefficients:
Estimate Std. Error z value
Normal|IFG 2.6944 0.1691 15.93
IFG|DM 3.2935 0.1722 19.12
mlog = clm(fbs ~ totchol + hpt + weight, data = ms)
summary(mlog)
formula: fbs ~ totchol + hpt + weight
data: ms
link threshold nobs logLik AIC niter max.grad cond.H
logit flexible 4083 -2740.27 5490.55 6(1) 1.04e-11 4.2e+05
Coefficients:
Estimate Std. Error z value Pr(>|z|)
totchol 0.257688 0.029182 8.830 <2e-16 ***
hptyes 0.848765 0.099833 8.502 <2e-16 ***
weight 0.021852 0.002544 8.591 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Threshold coefficients:
Estimate Std. Error z value
Normal|IFG 4.2516 0.2503 16.98
IFG|DM 4.8722 0.2535 19.22
tidy(mlog, conf.int = T)
Let’s see p-values by LR test:
anova(clm(fbs ~ hpt + weight, data = ms), mlog) # D0 - D1, model without - model with variable
anova(clm(fbs ~ totchol + weight, data = ms), mlog)
anova(clm(fbs ~ totchol + hpt, data = ms), mlog)
\(\rightarrow\) Preliminary main effects model
Linearity in logit – numerical variable, from separate binary logistic models
Other numerical issues
\(\rightarrow\) Main effects model
Specify interaction based on clinical opinion, let’s say between
hpt * weight
:
mlogx = clm(fbs ~ totchol + hpt + weight + hpt * weight, data = ms)
summary(mlogx) # Interaction not significant
formula: fbs ~ totchol + hpt + weight + hpt * weight
data: ms
link threshold nobs logLik AIC niter max.grad cond.H
logit flexible 4083 -2739.90 5491.79 6(1) 2.58e-12 8.6e+05
Coefficients:
Estimate Std. Error z value Pr(>|z|)
totchol 0.256810 0.029201 8.794 < 2e-16 ***
hptyes 1.257361 0.479443 2.623 0.00873 **
weight 0.022821 0.002778 8.215 < 2e-16 ***
hptyes:weight -0.005987 0.006880 -0.870 0.38419
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Threshold coefficients:
Estimate Std. Error z value
Normal|IFG 4.3103 0.2596 16.60
IFG|DM 4.9309 0.2627 18.77
\(\rightarrow\) Preliminary final model
brant.test(mlog)
Brant Test:
chi-sq df pr(>chi)
Omnibus 5.030 3 0.17
totchol 0.803 1 0.37
hptyes 2.105 1 0.15
weight 1.830 1 0.18
H0: Proportional odds assumption holds
Compare with mlogit model to see why proportional odds assumption holds, check ORs
tidy(nnet::multinom(fbs ~ totchol + hpt + weight, data = ms), exponentiate = T)
# weights: 15 (8 variable)
initial value 4485.633975
iter 10 value 2739.040075
final value 2738.198554
converged
tidy(mlog, exp = T)
hosmerlem(mlog, tables = T) # ordinal version of Hosmer-Lemeshow test
Hosmer-Lemeshow Test:
Chi-sq df pr(>chi)
ordinal(Hosmerlem) 32.609 17 0.01263 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
H0: No lack of fit dictated
rho: 100%
Observed Freqs:
group total Normal IFG DM
1 408 375 12 21
2 408 357 19 32
3 408 353 25 30
4 408 313 38 57
5 408 325 37 46
6 408 318 33 57
7 408 315 24 69
8 408 286 51 71
9 408 255 56 97
10 411 228 69 114
Expected Freqs:
group OS Normal IFG DM
1 1.1711 363.4518 19.2985 25.2497
2 1.2211 350.7204 24.3407 32.9389
3 1.2559 341.9670 27.6564 38.3766
4 1.2867 334.2806 30.4688 43.2506
5 1.3183 326.4476 33.2387 48.3138
6 1.3531 317.9045 36.1438 53.9517
7 1.3948 307.7534 39.4339 60.8128
8 1.4525 293.8628 43.6362 70.5010
9 1.5461 271.8254 49.5247 86.6499
10 1.7858 220.1254 58.7670 132.1076
lipsitz(mlog) # Lipsitz Test for Categorical Response Models
Lipsitz Test:
LR df pr(>chi)
ordinal(lipsitz) 23.311 9 0.005533 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
H0: No lack of fit dictated
# Both show poor fit!
Rsquared(mlog, measure = "nagelkerke")
Nagelkerke's R2:
0.07595
+
Regression diagnostics – from separate binary logistic
models
\(\rightarrow\) Final model
Present in table
tbl_regression(mlog, exp = T)
Characteristic | OR1 | 95% CI1 | p-value |
---|---|---|---|
totchol | 1.29 | 1.22, 1.37 | <0.001 |
hpt | |||
no | — | — | |
yes | 2.34 | 1.92, 2.84 | <0.001 |
weight | 1.02 | 1.02, 1.03 | <0.001 |
1 OR = Odds Ratio, CI = Confidence Interval |
# Details
tidy(mlog, conf.int = T) |> kable(digits = 3)
term | estimate | std.error | statistic | p.value | conf.low | conf.high | coef.type |
---|---|---|---|---|---|---|---|
Normal|IFG | 4.252 | 0.250 | 16.985 | 0 | NA | NA | intercept |
IFG|DM | 4.872 | 0.253 | 19.222 | 0 | NA | NA | intercept |
totchol | 0.258 | 0.029 | 8.830 | 0 | 0.201 | 0.315 | location |
hptyes | 0.849 | 0.100 | 8.502 | 0 | 0.652 | 1.043 | location |
weight | 0.022 | 0.003 | 8.591 | 0 | 0.017 | 0.027 | location |
tidy(mlog, conf.int = T, exponentiate = T) |> kable(digits = 3)
term | estimate | std.error | statistic | p.value | conf.low | conf.high | coef.type |
---|---|---|---|---|---|---|---|
Normal|IFG | 70.218 | 0.250 | 16.985 | 0 | NA | NA | intercept |
IFG|DM | 130.608 | 0.253 | 19.222 | 0 | NA | NA | intercept |
totchol | 1.294 | 0.029 | 8.830 | 0 | 1.222 | 1.370 | location |
hptyes | 2.337 | 0.100 | 8.502 | 0 | 1.919 | 2.839 | location |
weight | 1.022 | 0.003 | 8.591 | 0 | 1.017 | 1.027 | location |