library(summarytools) # descriptive table (descr, freq, stby)
library(gtsummary) # descriptive table (tbl_summary)
library(nnet) # multinom
library(broom) # tidy, to extract results from models
library(knitr) # for nice display of results
library(gofcat) # model fit
ms = read.csv("ms.csv", stringsAsFactors = T)
str(ms)
'data.frame': 4083 obs. of 4 variables:
$ fbs : Factor w/ 3 levels "DM","IFG","Normal": 3 3 3 3 3 3 3 3 3 3 ...
$ totchol: num 5.43 5.13 5.55 4.01 5.21 ...
$ hpt : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 1 ...
$ weight : num 40 54.6 37 48.4 44.5 ...
summary(ms)
fbs totchol hpt weight
DM : 594 Min. : 0.180 no :3601 Min. : 30.00
IFG : 364 1st Qu.: 4.970 yes: 482 1st Qu.: 53.80
Normal:3125 Median : 5.700 Median : 62.30
Mean : 5.791 Mean : 63.93
3rd Qu.: 6.525 3rd Qu.: 72.00
Max. :23.140 Max. :187.80
ms
levels(ms$fbs) # Levels: DM IFG Normal, DM as reference
[1] "DM" "IFG" "Normal"
ms$fbs = factor(ms$fbs, levels = c("Normal", "IFG", "DM")) # re-order, Normal as reference category
levels(ms$fbs) # Normal as reference
[1] "Normal" "IFG" "DM"
descr(ms)
Non-numerical variable(s) ignored: fbs, hpt
Descriptive Statistics
ms
N: 4083
totchol weight
----------------- --------- ---------
Mean 5.79 63.93
Std.Dev 1.28 14.30
Min 0.18 30.00
Q1 4.97 53.80
Median 5.70 62.30
Q3 6.53 72.00
Max 23.14 187.80
MAD 1.14 13.49
IQR 1.56 18.20
CV 0.22 0.22
Skewness 1.16 0.98
SE.Skewness 0.04 0.04
Kurtosis 10.20 2.97
N.Valid 4083.00 4083.00
Pct.Valid 100.00 100.00
with(ms, stby(totchol, fbs, descr))
Descriptive Statistics
totchol by fbs
Data Frame: ms
N: 3125
Normal IFG DM
----------------- --------- -------- --------
Mean 5.69 6.08 6.16
Std.Dev 1.24 1.37 1.31
Min 0.18 2.33 2.32
Q1 4.89 5.29 5.29
Median 5.62 5.94 6.03
Q3 6.41 6.72 6.96
Max 23.14 14.91 12.29
MAD 1.13 1.07 1.19
IQR 1.52 1.43 1.67
CV 0.22 0.22 0.21
Skewness 1.27 1.40 0.55
SE.Skewness 0.04 0.13 0.10
Kurtosis 13.58 6.20 1.26
N.Valid 3125.00 364.00 594.00
Pct.Valid 100.00 100.00 100.00
with(ms, stby(weight, fbs, descr))
Descriptive Statistics
weight by fbs
Data Frame: ms
N: 3125
Normal IFG DM
----------------- --------- -------- --------
Mean 62.74 67.71 67.86
Std.Dev 13.96 14.50 14.85
Min 30.00 38.90 30.80
Q1 53.00 56.80 58.00
Median 61.00 65.85 66.00
Q3 71.00 76.75 75.70
Max 187.80 121.00 147.50
MAD 13.34 14.60 12.82
IQR 18.00 19.77 17.60
CV 0.22 0.21 0.22
Skewness 1.03 0.58 1.04
SE.Skewness 0.04 0.13 0.10
Kurtosis 3.58 0.37 2.55
N.Valid 3125.00 364.00 594.00
Pct.Valid 100.00 100.00 100.00
freq(ms)
Variable(s) ignored: totchol
Frequencies
ms$fbs
Type: Factor
Freq % Valid % Valid Cum. % Total % Total Cum.
------------ ------ --------- -------------- --------- --------------
Normal 3125 76.54 76.54 76.54 76.54
IFG 364 8.92 85.45 8.92 85.45
DM 594 14.55 100.00 14.55 100.00
<NA> 0 0.00 100.00
Total 4083 100.00 100.00 100.00 100.00
ms$hpt
Type: Factor
Freq % Valid % Valid Cum. % Total % Total Cum.
----------- ------ --------- -------------- --------- --------------
no 3601 88.19 88.19 88.19 88.19
yes 482 11.81 100.00 11.81 100.00
<NA> 0 0.00 100.00
Total 4083 100.00 100.00 100.00 100.00
with(ms, stby(hpt, fbs, freq))
Frequencies
ms$hpt
Type: Factor
Group: fbs = Normal
Freq % Valid % Valid Cum. % Total % Total Cum.
----------- ------ --------- -------------- --------- --------------
no 2844 91.01 91.01 91.01 91.01
yes 281 8.99 100.00 8.99 100.00
<NA> 0 0.00 100.00
Total 3125 100.00 100.00 100.00 100.00
Group: fbs = IFG
Freq % Valid % Valid Cum. % Total % Total Cum.
----------- ------ --------- -------------- --------- --------------
no 289 79.40 79.40 79.40 79.40
yes 75 20.60 100.00 20.60 100.00
<NA> 0 0.00 100.00
Total 364 100.00 100.00 100.00 100.00
Group: fbs = DM
Freq % Valid % Valid Cum. % Total % Total Cum.
----------- ------ --------- -------------- --------- --------------
no 468 78.79 78.79 78.79 78.79
yes 126 21.21 100.00 21.21 100.00
<NA> 0 0.00 100.00
Total 594 100.00 100.00 100.00 100.00
tbl_summary(ms, by = fbs,
statistic = list(all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{n} ({p}%)"),
digits = list(all_continuous() ~ c(2,2),
all_categorical() ~ c(0,1)))
Characteristic | Normal, N = 3,1251 | IFG, N = 3641 | DM, N = 5941 |
---|---|---|---|
totchol | 5.69 (1.24) | 6.08 (1.37) | 6.16 (1.31) |
hpt | 281 (9.0%) | 75 (20.6%) | 126 (21.2%) |
weight | 62.74 (13.96) | 67.71 (14.50) | 67.86 (14.85) |
1 Mean (SD); n (%) |
log_chol = multinom(fbs ~ totchol, data = ms)
# weights: 9 (4 variable)
initial value 4485.633975
final value 2816.937753
converged
summary(log_chol)
Call:
multinom(formula = fbs ~ totchol, data = ms)
Coefficients:
(Intercept) totchol
IFG -3.599479 0.2466938
DM -3.335919 0.2836485
Std. Errors:
(Intercept) totchol
IFG 0.2576232 0.04176180
DM 0.2130532 0.03445814
Residual Deviance: 5633.876
AIC: 5641.876
tidy(log_chol, conf.int = T) # summary for multinom does not display p-value, need to use tidy
levels(ms$fbs) # remember Normal as reference
[1] "Normal" "IFG" "DM"
log_hpt = multinom(fbs ~ hpt, data = ms)
# weights: 9 (4 variable)
initial value 4485.633975
iter 10 value 2815.359919
iter 10 value 2815.359919
final value 2815.359919
converged
summary(log_hpt)
Call:
multinom(formula = fbs ~ hpt, data = ms)
Coefficients:
(Intercept) hptyes
IFG -2.286534 0.9656721
DM -1.804488 1.0023935
Std. Errors:
(Intercept) hptyes
IFG 0.06173986 0.1438879
DM 0.04988338 0.1182527
Residual Deviance: 5630.72
AIC: 5638.72
tidy(log_hpt, conf.int = T)
log_wt = multinom(fbs ~ weight, data = ms)
# weights: 9 (4 variable)
initial value 4485.633975
final value 2816.273988
converged
summary(log_wt)
Call:
multinom(formula = fbs ~ weight, data = ms)
Coefficients:
(Intercept) weight
IFG -3.671757 0.02337818
DM -3.221548 0.02396112
Std. Errors:
(Intercept) weight
IFG 0.2491246 0.003626756
DM 0.2044349 0.002989522
Residual Deviance: 5632.548
AIC: 5640.548
tidy(log_wt, conf.int = T)
mlog = multinom(fbs ~ totchol + hpt + weight, data = ms)
# weights: 15 (8 variable)
initial value 4485.633975
iter 10 value 2739.040075
final value 2738.198554
converged
summary(mlog)
Call:
multinom(formula = fbs ~ totchol + hpt + weight, data = ms)
Coefficients:
(Intercept) totchol hptyes weight
IFG -5.112735 0.2393748 0.8672457 0.02199345
DM -4.907901 0.2772382 0.9000338 0.02267585
Std. Errors:
(Intercept) totchol hptyes weight
IFG 0.3663222 0.04239126 0.1455646 0.003710277
DM 0.3037717 0.03505572 0.1204522 0.003073929
Residual Deviance: 5476.397
AIC: 5492.397
tidy(mlog, conf.int = T)
Let’s see p-values by LR test:
anova(multinom(fbs ~ hpt + weight, data = ms), mlog) # D0 - D1, model without - model with variable
# weights: 12 (6 variable)
initial value 4485.633975
iter 10 value 2778.035100
final value 2778.033543
converged
anova(multinom(fbs ~ totchol + weight, data = ms), mlog)
# weights: 12 (6 variable)
initial value 4485.633975
iter 10 value 2773.388803
final value 2773.387904
converged
anova(multinom(fbs ~ totchol + hpt, data = ms), mlog)
# weights: 12 (6 variable)
initial value 4485.633975
iter 10 value 2775.475233
final value 2775.351291
converged
\(\rightarrow\) Preliminary main effects model
Linearity in logit – numerical variable, from separate binary logistic models
Other numerical issues
\(\rightarrow\) Main effects model
Specify interaction based on clinical opinion, let’s say between
hpt * weight
:
mlogx = multinom(fbs ~ totchol + hpt + weight + hpt * weight, data = ms)
# weights: 18 (10 variable)
initial value 4485.633975
iter 10 value 2739.643635
final value 2737.858433
converged
summary(mlogx)
Call:
multinom(formula = fbs ~ totchol + hpt + weight + hpt * weight,
data = ms)
Coefficients:
(Intercept) totchol hptyes weight hptyes:weight
IFG -5.086868 0.239684 0.7531227 0.02158916 0.00156111
DM -4.970760 0.276339 1.3181710 0.02369976 -0.00611555
Std. Errors:
(Intercept) totchol hptyes weight hptyes:weight
IFG 0.3803843 0.04238647 0.6988769 0.004082641 0.009925563
DM 0.3149209 0.03507623 0.5826223 0.003348587 0.008399271
Residual Deviance: 5475.717
AIC: 5495.717
tidy(mlogx, conf.int = T) # Interaction not significant for IFG & DM
\(\rightarrow\) Preliminary final model
hosmerlem(mlog, tables = T) # multinomial version of Hosmer-Lemeshow test
Hosmer-Lemeshow Test:
Chi-sq df pr(>chi)
multinomial(Hosmerlem) 26.386 16 0.04884 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
H0: No lack of fit dictated
rho: 100%
Observed Freqs:
group total Normal IFG DM
1 408 373 14 21
2 408 359 17 32
3 408 353 25 30
4 408 313 38 57
5 408 325 37 46
6 408 318 33 57
7 408 316 24 68
8 408 285 51 72
9 408 257 55 96
10 411 226 70 115
Expected Freqs:
group OS Normal IFG DM
1 0.1073 364.2194 17.5758 26.2048
2 0.1387 351.4004 22.3269 34.2726
3 0.1605 342.5349 25.5955 39.8697
4 0.1796 334.7189 28.4429 44.8382
5 0.1992 326.7408 31.3057 49.9535
6 0.2205 318.0191 34.4627 55.5181
7 0.2461 307.5801 38.1956 62.2243
8 0.2812 293.2620 43.3012 71.4367
9 0.3377 270.2066 51.6100 86.1834
10 0.4737 216.3193 71.1943 123.4864
Rsquared(mlog, measure = "nagelkerke")
Nagelkerke's R2:
0.07722
+
Regression diagnostics – from separate binary logistic
models
\(\rightarrow\) Final model
Present in table
epiDisplay::mlogit.display(mlog)
Outcome =fbs; Referent group = Normal
IFG DM
Coeff./SE RRR(95%CI) Coeff./SE RRR(95%CI)
(Intercept) -5.11/0.366*** - -4.91/0.304*** -
totchol 0.24/0.042*** 1.27(1.17,1.38) 0.28/0.035*** 1.32(1.23,1.41)
hptyes 0.87/0.146*** 2.38(1.79,3.17) 0.9/0.12*** 2.46(1.94,3.11)
weight 0.02/0.004*** 1.02(1.01,1.03) 0.02/0.003*** 1.02(1.02,1.03)
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual Deviance: 5476.4
AIC = 5492.4
tbl_regression(mlog, exp = T)
ℹ Multinomial models have a different underlying structure than the models
gtsummary was designed for. Other gtsummary functions designed to work with
tbl_regression objects may yield unexpected results.
Characteristic | OR1 | 95% CI1 | p-value |
---|---|---|---|
IFG | |||
totchol | 1.27 | 1.17, 1.38 | <0.001 |
hpt | |||
no | — | — | |
yes | 2.38 | 1.79, 3.17 | <0.001 |
weight | 1.02 | 1.01, 1.03 | <0.001 |
DM | |||
totchol | 1.32 | 1.23, 1.41 | <0.001 |
hpt | |||
no | — | — | |
yes | 2.46 | 1.94, 3.11 | <0.001 |
weight | 1.02 | 1.02, 1.03 | <0.001 |
1 OR = Odds Ratio, CI = Confidence Interval |
# Details
tidy(mlog, conf.int = T) |> kable(digits = 3)
y.level | term | estimate | std.error | statistic | p.value | conf.low | conf.high |
---|---|---|---|---|---|---|---|
IFG | (Intercept) | -5.113 | 0.366 | -13.957 | 0 | -5.831 | -4.395 |
IFG | totchol | 0.239 | 0.042 | 5.647 | 0 | 0.156 | 0.322 |
IFG | hptyes | 0.867 | 0.146 | 5.958 | 0 | 0.582 | 1.153 |
IFG | weight | 0.022 | 0.004 | 5.928 | 0 | 0.015 | 0.029 |
DM | (Intercept) | -4.908 | 0.304 | -16.157 | 0 | -5.503 | -4.313 |
DM | totchol | 0.277 | 0.035 | 7.909 | 0 | 0.209 | 0.346 |
DM | hptyes | 0.900 | 0.120 | 7.472 | 0 | 0.664 | 1.136 |
DM | weight | 0.023 | 0.003 | 7.377 | 0 | 0.017 | 0.029 |
tidy(mlog, conf.int = T, exponentiate = T) |> kable(digits = 3)
y.level | term | estimate | std.error | statistic | p.value | conf.low | conf.high |
---|---|---|---|---|---|---|---|
IFG | (Intercept) | 0.006 | 0.366 | -13.957 | 0 | 0.003 | 0.012 |
IFG | totchol | 1.270 | 0.042 | 5.647 | 0 | 1.169 | 1.381 |
IFG | hptyes | 2.380 | 0.146 | 5.958 | 0 | 1.790 | 3.166 |
IFG | weight | 1.022 | 0.004 | 5.928 | 0 | 1.015 | 1.030 |
DM | (Intercept) | 0.007 | 0.304 | -16.157 | 0 | 0.004 | 0.013 |
DM | totchol | 1.319 | 0.035 | 7.909 | 0 | 1.232 | 1.413 |
DM | hptyes | 2.460 | 0.120 | 7.472 | 0 | 1.942 | 3.115 |
DM | weight | 1.023 | 0.003 | 7.377 | 0 | 1.017 | 1.029 |