1 Library

library(gtsummary)  # descriptive table
library(nnet)       # multinom
library(broom)      # tidy
library(gofcat)     # model fit

2 Dataset

ms = read.csv("ms.csv", stringsAsFactors = T)
str(ms)
## 'data.frame':    4083 obs. of  4 variables:
##  $ fbs    : Factor w/ 3 levels "DM","IFG","Normal": 3 3 3 3 3 3 3 3 3 3 ...
##  $ totchol: num  5.43 5.13 5.55 4.01 5.21 ...
##  $ hpt    : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 1 ...
##  $ weight : num  40 54.6 37 48.4 44.5 ...
summary(ms)
##      fbs          totchol        hpt           weight      
##  DM    : 594   Min.   : 0.180   no :3601   Min.   : 30.00  
##  IFG   : 364   1st Qu.: 4.970   yes: 482   1st Qu.: 53.80  
##  Normal:3125   Median : 5.700              Median : 62.30  
##                Mean   : 5.791              Mean   : 63.93  
##                3rd Qu.: 6.525              3rd Qu.: 72.00  
##                Max.   :23.140              Max.   :187.80
ms$fbs = factor(ms$fbs, levels = c("Normal", "IFG", "DM"))  # order the reference category

3 Descriptive

summary(ms)
##      fbs          totchol        hpt           weight      
##  Normal:3125   Min.   : 0.180   no :3601   Min.   : 30.00  
##  IFG   : 364   1st Qu.: 4.970   yes: 482   1st Qu.: 53.80  
##  DM    : 594   Median : 5.700              Median : 62.30  
##                Mean   : 5.791              Mean   : 63.93  
##                3rd Qu.: 6.525              3rd Qu.: 72.00  
##                Max.   :23.140              Max.   :187.80
tbl_summary(ms, by = fbs,
            statistic = list(all_continuous() ~ "{mean} ({sd})", 
                             all_categorical() ~ "{n} ({p}%)"))
Characteristic Normal, N = 3,1251 IFG, N = 3641 DM, N = 5941
totchol 5.69 (1.24) 6.08 (1.37) 6.16 (1.31)
hpt 281 (9.0%) 75 (21%) 126 (21%)
weight 63 (14) 68 (14) 68 (15)
1 Mean (SD); n (%)

4 Variable selection

4.1 Univariable

log_chol = multinom(fbs ~ totchol, data = ms)
## # weights:  9 (4 variable)
## initial  value 4485.633975 
## final  value 2816.937753 
## converged
summary(log_chol)
## Call:
## multinom(formula = fbs ~ totchol, data = ms)
## 
## Coefficients:
##     (Intercept)   totchol
## IFG   -3.599479 0.2466938
## DM    -3.335919 0.2836485
## 
## Std. Errors:
##     (Intercept)    totchol
## IFG   0.2576232 0.04176180
## DM    0.2130532 0.03445814
## 
## Residual Deviance: 5633.876 
## AIC: 5641.876
tidy(log_chol, conf.int = T)
## # A tibble: 4 × 8
##   y.level term        estimate std.error statistic  p.value conf.low conf.high
##   <chr>   <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
## 1 IFG     (Intercept)   -3.60     0.258     -14.0  2.31e-44   -4.10     -3.09 
## 2 IFG     totchol        0.247    0.0418      5.91 3.48e- 9    0.165     0.329
## 3 DM      (Intercept)   -3.34     0.213     -15.7  2.94e-55   -3.75     -2.92 
## 4 DM      totchol        0.284    0.0345      8.23 1.85e-16    0.216     0.351
levels(ms$fbs)  # 1 = Normal as reference
## [1] "Normal" "IFG"    "DM"
log_hpt = multinom(fbs ~ hpt, data = ms)
## # weights:  9 (4 variable)
## initial  value 4485.633975 
## iter  10 value 2815.359919
## iter  10 value 2815.359919
## final  value 2815.359919 
## converged
summary(log_hpt)
## Call:
## multinom(formula = fbs ~ hpt, data = ms)
## 
## Coefficients:
##     (Intercept)    hptyes
## IFG   -2.286534 0.9656721
## DM    -1.804488 1.0023935
## 
## Std. Errors:
##     (Intercept)    hptyes
## IFG  0.06173986 0.1438879
## DM   0.04988338 0.1182527
## 
## Residual Deviance: 5630.72 
## AIC: 5638.72
tidy(log_hpt, conf.int = T)
## # A tibble: 4 × 8
##   y.level term        estimate std.error statistic   p.value conf.low conf.high
##   <chr>   <chr>          <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 IFG     (Intercept)   -2.29     0.0617    -37.0  3.14e-300   -2.41      -2.17
## 2 IFG     hptyes         0.966    0.144       6.71 1.93e- 11    0.684      1.25
## 3 DM      (Intercept)   -1.80     0.0499    -36.2  1.55e-286   -1.90      -1.71
## 4 DM      hptyes         1.00     0.118       8.48 2.32e- 17    0.771      1.23
log_wt = multinom(fbs ~ weight, data = ms)
## # weights:  9 (4 variable)
## initial  value 4485.633975 
## final  value 2816.273988 
## converged
summary(log_wt)
## Call:
## multinom(formula = fbs ~ weight, data = ms)
## 
## Coefficients:
##     (Intercept)     weight
## IFG   -3.671757 0.02337818
## DM    -3.221548 0.02396112
## 
## Std. Errors:
##     (Intercept)      weight
## IFG   0.2491246 0.003626756
## DM    0.2044349 0.002989522
## 
## Residual Deviance: 5632.548 
## AIC: 5640.548
tidy(log_wt, conf.int = T)
## # A tibble: 4 × 8
##   y.level term        estimate std.error statistic  p.value conf.low conf.high
##   <chr>   <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
## 1 IFG     (Intercept)  -3.67     0.249      -14.7  3.64e-49  -4.16     -3.18  
## 2 IFG     weight        0.0234   0.00363      6.45 1.15e-10   0.0163    0.0305
## 3 DM      (Intercept)  -3.22     0.204      -15.8  6.02e-56  -3.62     -2.82  
## 4 DM      weight        0.0240   0.00299      8.02 1.10e-15   0.0181    0.0298

4.2 Multivariable

mlog = multinom(fbs ~ totchol + hpt + weight, data = ms)
## # weights:  15 (8 variable)
## initial  value 4485.633975 
## iter  10 value 2739.040075
## final  value 2738.198554 
## converged
summary(mlog)
## Call:
## multinom(formula = fbs ~ totchol + hpt + weight, data = ms)
## 
## Coefficients:
##     (Intercept)   totchol    hptyes     weight
## IFG   -5.112735 0.2393748 0.8672457 0.02199345
## DM    -4.907901 0.2772382 0.9000338 0.02267585
## 
## Std. Errors:
##     (Intercept)    totchol    hptyes      weight
## IFG   0.3663222 0.04239126 0.1455646 0.003710277
## DM    0.3037717 0.03505572 0.1204522 0.003073929
## 
## Residual Deviance: 5476.397 
## AIC: 5492.397
tidy(mlog, conf.int = T)
## # A tibble: 8 × 8
##   y.level term        estimate std.error statistic  p.value conf.low conf.high
##   <chr>   <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
## 1 IFG     (Intercept)  -5.11     0.366      -14.0  2.85e-44  -5.83     -4.39  
## 2 IFG     totchol       0.239    0.0424       5.65 1.63e- 8   0.156     0.322 
## 3 IFG     hptyes        0.867    0.146        5.96 2.56e- 9   0.582     1.15  
## 4 IFG     weight        0.0220   0.00371      5.93 3.07e- 9   0.0147    0.0293
## 5 DM      (Intercept)  -4.91     0.304      -16.2  1.02e-58  -5.50     -4.31  
## 6 DM      totchol       0.277    0.0351       7.91 2.61e-15   0.209     0.346 
## 7 DM      hptyes        0.900    0.120        7.47 7.89e-14   0.664     1.14  
## 8 DM      weight        0.0227   0.00307      7.38 1.62e-13   0.0167    0.0287

5 Interaction

Specify interaction based on clinical opinion, let’s say between hpt * weight:

mlogx = multinom(fbs ~ totchol + hpt + weight + hpt * weight, data = ms)
## # weights:  18 (10 variable)
## initial  value 4485.633975 
## iter  10 value 2739.643635
## final  value 2737.858433 
## converged
summary(mlogx)
## Call:
## multinom(formula = fbs ~ totchol + hpt + weight + hpt * weight, 
##     data = ms)
## 
## Coefficients:
##     (Intercept)  totchol    hptyes     weight hptyes:weight
## IFG   -5.086868 0.239684 0.7531227 0.02158916    0.00156111
## DM    -4.970760 0.276339 1.3181710 0.02369976   -0.00611555
## 
## Std. Errors:
##     (Intercept)    totchol    hptyes      weight hptyes:weight
## IFG   0.3803843 0.04238647 0.6988769 0.004082641   0.009925563
## DM    0.3149209 0.03507623 0.5826223 0.003348587   0.008399271
## 
## Residual Deviance: 5475.717 
## AIC: 5495.717
tidy(mlogx, conf.int = T)
## # A tibble: 10 × 8
##    y.level term         estimate std.error statistic  p.value conf.low conf.high
##    <chr>   <chr>           <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
##  1 IFG     (Intercept)  -5.09      0.380     -13.4   8.70e-41  -5.83     -4.34  
##  2 IFG     totchol       0.240     0.0424      5.65  1.56e- 8   0.157     0.323 
##  3 IFG     hptyes        0.753     0.699       1.08  2.81e- 1  -0.617     2.12  
##  4 IFG     weight        0.0216    0.00408     5.29  1.24e- 7   0.0136    0.0296
##  5 IFG     hptyes:weig…  0.00156   0.00993     0.157 8.75e- 1  -0.0179    0.0210
##  6 DM      (Intercept)  -4.97      0.315     -15.8   4.00e-56  -5.59     -4.35  
##  7 DM      totchol       0.276     0.0351      7.88  3.32e-15   0.208     0.345 
##  8 DM      hptyes        1.32      0.583       2.26  2.37e- 2   0.176     2.46  
##  9 DM      weight        0.0237    0.00335     7.08  1.47e-12   0.0171    0.0303
## 10 DM      hptyes:weig… -0.00612   0.00840    -0.728 4.67e- 1  -0.0226    0.0103
knitr::kable(tidy(mlogx, conf.int = T), digits = 3)  # Interaction not significant for IFG & DM
y.level term estimate std.error statistic p.value conf.low conf.high
IFG (Intercept) -5.087 0.380 -13.373 0.000 -5.832 -4.341
IFG totchol 0.240 0.042 5.655 0.000 0.157 0.323
IFG hptyes 0.753 0.699 1.078 0.281 -0.617 2.123
IFG weight 0.022 0.004 5.288 0.000 0.014 0.030
IFG hptyes:weight 0.002 0.010 0.157 0.875 -0.018 0.021
DM (Intercept) -4.971 0.315 -15.784 0.000 -5.588 -4.354
DM totchol 0.276 0.035 7.878 0.000 0.208 0.345
DM hptyes 1.318 0.583 2.262 0.024 0.176 2.460
DM weight 0.024 0.003 7.078 0.000 0.017 0.030
DM hptyes:weight -0.006 0.008 -0.728 0.467 -0.023 0.010

6 Model fit assessment

hosmerlem(mlog, tables = T)  # multinomial version of Hosmer-Lemeshow test
## 
## Hosmer-Lemeshow Test:
## 
##                         Chi-sq  df  pr(>chi)  
## multinomial(Hosmerlem)  26.386  16   0.04884 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## H0: No lack of fit dictated
## 
## rho: 100% 
## 
## 
## Observed Freqs:
## 
##  group total Normal IFG  DM
##      1   408    373  14  21
##      2   408    359  17  32
##      3   408    353  25  30
##      4   408    313  38  57
##      5   408    325  37  46
##      6   408    318  33  57
##      7   408    316  24  68
##      8   408    285  51  72
##      9   408    257  55  96
##     10   411    226  70 115
## 
## Expected Freqs:
## 
##  group     OS   Normal     IFG       DM
##      1 0.1073 364.2194 17.5758  26.2048
##      2 0.1387 351.4004 22.3269  34.2726
##      3 0.1605 342.5349 25.5955  39.8697
##      4 0.1796 334.7189 28.4429  44.8382
##      5 0.1992 326.7408 31.3057  49.9535
##      6 0.2205 318.0191 34.4627  55.5181
##      7 0.2461 307.5801 38.1956  62.2243
##      8 0.2812 293.2620 43.3012  71.4367
##      9 0.3377 270.2066 51.6100  86.1834
##     10 0.4737 216.3193 71.1943 123.4864
Rsquared(mlog, measure = "nagelkerke")
## 
## Nagelkerke's R2: 
## 
## 0.07722

7 Presentation and interpretation

Present in table

epiDisplay::mlogit.display(mlog)
## 
## Outcome =fbs; Referent group = Normal 
##             IFG                            DM                            
##             Coeff./SE      RRR(95%CI)      Coeff./SE      RRR(95%CI)     
## (Intercept) -5.11/0.366*** -               -4.91/0.304*** -              
## totchol     0.24/0.042***  1.27(1.17,1.38) 0.28/0.035***  1.32(1.23,1.41)
## hptyes      0.87/0.146***  2.38(1.79,3.17) 0.9/0.12***    2.46(1.94,3.11)
## weight      0.02/0.004***  1.02(1.01,1.03) 0.02/0.003***  1.02(1.02,1.03)
## 
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1  
## 
## Residual Deviance: 5476.4 
## AIC = 5492.4
# Details
tidy(mlog, conf.int = T)
## # A tibble: 8 × 8
##   y.level term        estimate std.error statistic  p.value conf.low conf.high
##   <chr>   <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
## 1 IFG     (Intercept)  -5.11     0.366      -14.0  2.85e-44  -5.83     -4.39  
## 2 IFG     totchol       0.239    0.0424       5.65 1.63e- 8   0.156     0.322 
## 3 IFG     hptyes        0.867    0.146        5.96 2.56e- 9   0.582     1.15  
## 4 IFG     weight        0.0220   0.00371      5.93 3.07e- 9   0.0147    0.0293
## 5 DM      (Intercept)  -4.91     0.304      -16.2  1.02e-58  -5.50     -4.31  
## 6 DM      totchol       0.277    0.0351       7.91 2.61e-15   0.209     0.346 
## 7 DM      hptyes        0.900    0.120        7.47 7.89e-14   0.664     1.14  
## 8 DM      weight        0.0227   0.00307      7.38 1.62e-13   0.0167    0.0287
tidy(mlog, conf.int = T, exponentiate = T)
## # A tibble: 8 × 8
##   y.level term        estimate std.error statistic  p.value conf.low conf.high
##   <chr>   <chr>          <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
## 1 IFG     (Intercept)  0.00602   0.366      -14.0  2.85e-44  0.00294    0.0123
## 2 IFG     totchol      1.27      0.0424       5.65 1.63e- 8  1.17       1.38  
## 3 IFG     hptyes       2.38      0.146        5.96 2.56e- 9  1.79       3.17  
## 4 IFG     weight       1.02      0.00371      5.93 3.07e- 9  1.01       1.03  
## 5 DM      (Intercept)  0.00739   0.304      -16.2  1.02e-58  0.00407    0.0134
## 6 DM      totchol      1.32      0.0351       7.91 2.61e-15  1.23       1.41  
## 7 DM      hptyes       2.46      0.120        7.47 7.89e-14  1.94       3.11  
## 8 DM      weight       1.02      0.00307      7.38 1.62e-13  1.02       1.03
knitr::kable(tidy(mlog, conf.int = T), format = "simple", digits = 3)
y.level term estimate std.error statistic p.value conf.low conf.high
IFG (Intercept) -5.113 0.366 -13.957 0 -5.831 -4.395
IFG totchol 0.239 0.042 5.647 0 0.156 0.322
IFG hptyes 0.867 0.146 5.958 0 0.582 1.153
IFG weight 0.022 0.004 5.928 0 0.015 0.029
DM (Intercept) -4.908 0.304 -16.157 0 -5.503 -4.313
DM totchol 0.277 0.035 7.909 0 0.209 0.346
DM hptyes 0.900 0.120 7.472 0 0.664 1.136
DM weight 0.023 0.003 7.377 0 0.017 0.029
knitr::kable(tidy(mlog, conf.int = T, exponentiate = T), format = "simple", digits = 3)
y.level term estimate std.error statistic p.value conf.low conf.high
IFG (Intercept) 0.006 0.366 -13.957 0 0.003 0.012
IFG totchol 1.270 0.042 5.647 0 1.169 1.381
IFG hptyes 2.380 0.146 5.958 0 1.790 3.166
IFG weight 1.022 0.004 5.928 0 1.015 1.030
DM (Intercept) 0.007 0.304 -16.157 0 0.004 0.013
DM totchol 1.319 0.035 7.909 0 1.232 1.413
DM hptyes 2.460 0.120 7.472 0 1.942 3.115
DM weight 1.023 0.003 7.377 0 1.017 1.029