1 Library

library(summarytools)  # descriptive table (descr, freq, stby)
library(gtsummary)     # descriptive table (tbl_summary)
library(nnet)          # multinom
library(broom)         # tidy, to extract results from models
library(knitr)         # for nice display of results
library(gofcat)        # model fit

2 Dataset

ms = read.csv("ms.csv", stringsAsFactors = T)
str(ms)
'data.frame':   4083 obs. of  4 variables:
 $ fbs    : Factor w/ 3 levels "DM","IFG","Normal": 3 3 3 3 3 3 3 3 3 3 ...
 $ totchol: num  5.43 5.13 5.55 4.01 5.21 ...
 $ hpt    : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 1 ...
 $ weight : num  40 54.6 37 48.4 44.5 ...
summary(ms)
     fbs          totchol        hpt           weight      
 DM    : 594   Min.   : 0.180   no :3601   Min.   : 30.00  
 IFG   : 364   1st Qu.: 4.970   yes: 482   1st Qu.: 53.80  
 Normal:3125   Median : 5.700              Median : 62.30  
               Mean   : 5.791              Mean   : 63.93  
               3rd Qu.: 6.525              3rd Qu.: 72.00  
               Max.   :23.140              Max.   :187.80  
ms

2.1 Categories of outcome

levels(ms$fbs)  # Levels: DM IFG Normal, DM as reference
[1] "DM"     "IFG"    "Normal"
ms$fbs = factor(ms$fbs, levels = c("Normal", "IFG", "DM"))  # re-order, Normal as reference category
levels(ms$fbs)  # Normal as reference
[1] "Normal" "IFG"    "DM"    

3 Descriptive

3.1 Numerical

descr(ms)
Non-numerical variable(s) ignored: fbs, hpt
Descriptive Statistics  
ms  
N: 4083  

                    totchol    weight
----------------- --------- ---------
             Mean      5.79     63.93
          Std.Dev      1.28     14.30
              Min      0.18     30.00
               Q1      4.97     53.80
           Median      5.70     62.30
               Q3      6.53     72.00
              Max     23.14    187.80
              MAD      1.14     13.49
              IQR      1.56     18.20
               CV      0.22      0.22
         Skewness      1.16      0.98
      SE.Skewness      0.04      0.04
         Kurtosis     10.20      2.97
          N.Valid   4083.00   4083.00
        Pct.Valid    100.00    100.00
with(ms, stby(totchol, fbs, descr))
Descriptive Statistics  
totchol by fbs  
Data Frame: ms  
N: 3125  

                     Normal      IFG       DM
----------------- --------- -------- --------
             Mean      5.69     6.08     6.16
          Std.Dev      1.24     1.37     1.31
              Min      0.18     2.33     2.32
               Q1      4.89     5.29     5.29
           Median      5.62     5.94     6.03
               Q3      6.41     6.72     6.96
              Max     23.14    14.91    12.29
              MAD      1.13     1.07     1.19
              IQR      1.52     1.43     1.67
               CV      0.22     0.22     0.21
         Skewness      1.27     1.40     0.55
      SE.Skewness      0.04     0.13     0.10
         Kurtosis     13.58     6.20     1.26
          N.Valid   3125.00   364.00   594.00
        Pct.Valid    100.00   100.00   100.00
with(ms, stby(weight, fbs, descr))
Descriptive Statistics  
weight by fbs  
Data Frame: ms  
N: 3125  

                     Normal      IFG       DM
----------------- --------- -------- --------
             Mean     62.74    67.71    67.86
          Std.Dev     13.96    14.50    14.85
              Min     30.00    38.90    30.80
               Q1     53.00    56.80    58.00
           Median     61.00    65.85    66.00
               Q3     71.00    76.75    75.70
              Max    187.80   121.00   147.50
              MAD     13.34    14.60    12.82
              IQR     18.00    19.77    17.60
               CV      0.22     0.21     0.22
         Skewness      1.03     0.58     1.04
      SE.Skewness      0.04     0.13     0.10
         Kurtosis      3.58     0.37     2.55
          N.Valid   3125.00   364.00   594.00
        Pct.Valid    100.00   100.00   100.00

3.2 Categorical

freq(ms)
Variable(s) ignored: totchol
Frequencies  
ms$fbs  
Type: Factor  

               Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
------------ ------ --------- -------------- --------- --------------
      Normal   3125     76.54          76.54     76.54          76.54
         IFG    364      8.92          85.45      8.92          85.45
          DM    594     14.55         100.00     14.55         100.00
        <NA>      0                               0.00         100.00
       Total   4083    100.00         100.00    100.00         100.00

ms$hpt  
Type: Factor  

              Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
----------- ------ --------- -------------- --------- --------------
         no   3601     88.19          88.19     88.19          88.19
        yes    482     11.81         100.00     11.81         100.00
       <NA>      0                               0.00         100.00
      Total   4083    100.00         100.00    100.00         100.00
with(ms, stby(hpt, fbs, freq))
Frequencies  
ms$hpt  
Type: Factor  
Group: fbs = Normal  

              Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
----------- ------ --------- -------------- --------- --------------
         no   2844     91.01          91.01     91.01          91.01
        yes    281      8.99         100.00      8.99         100.00
       <NA>      0                               0.00         100.00
      Total   3125    100.00         100.00    100.00         100.00

Group: fbs = IFG  

              Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
----------- ------ --------- -------------- --------- --------------
         no    289     79.40          79.40     79.40          79.40
        yes     75     20.60         100.00     20.60         100.00
       <NA>      0                               0.00         100.00
      Total    364    100.00         100.00    100.00         100.00

Group: fbs = DM  

              Freq   % Valid   % Valid Cum.   % Total   % Total Cum.
----------- ------ --------- -------------- --------- --------------
         no    468     78.79          78.79     78.79          78.79
        yes    126     21.21         100.00     21.21         100.00
       <NA>      0                               0.00         100.00
      Total    594    100.00         100.00    100.00         100.00

3.3 Overall

tbl_summary(ms, by = fbs,
            statistic = list(all_continuous() ~ "{mean} ({sd})", 
                             all_categorical() ~ "{n} ({p}%)"),
            digits = list(all_continuous() ~ c(2,2), 
                          all_categorical() ~ c(0,1)))
Characteristic Normal, N = 3,1251 IFG, N = 3641 DM, N = 5941
totchol 5.69 (1.24) 6.08 (1.37) 6.16 (1.31)
hpt 281 (9.0%) 75 (20.6%) 126 (21.2%)
weight 62.74 (13.96) 67.71 (14.50) 67.86 (14.85)
1 Mean (SD); n (%)

4 Variable selection

4.1 Univariable

log_chol = multinom(fbs ~ totchol, data = ms)
# weights:  9 (4 variable)
initial  value 4485.633975 
final  value 2816.937753 
converged
summary(log_chol)
Call:
multinom(formula = fbs ~ totchol, data = ms)

Coefficients:
    (Intercept)   totchol
IFG   -3.599479 0.2466938
DM    -3.335919 0.2836485

Std. Errors:
    (Intercept)    totchol
IFG   0.2576232 0.04176180
DM    0.2130532 0.03445814

Residual Deviance: 5633.876 
AIC: 5641.876 
tidy(log_chol, conf.int = T)  # summary for multinom does not display p-value, need to use tidy
levels(ms$fbs)  # remember Normal as reference
[1] "Normal" "IFG"    "DM"    
log_hpt = multinom(fbs ~ hpt, data = ms)
# weights:  9 (4 variable)
initial  value 4485.633975 
iter  10 value 2815.359919
iter  10 value 2815.359919
final  value 2815.359919 
converged
summary(log_hpt)
Call:
multinom(formula = fbs ~ hpt, data = ms)

Coefficients:
    (Intercept)    hptyes
IFG   -2.286534 0.9656721
DM    -1.804488 1.0023935

Std. Errors:
    (Intercept)    hptyes
IFG  0.06173986 0.1438879
DM   0.04988338 0.1182527

Residual Deviance: 5630.72 
AIC: 5638.72 
tidy(log_hpt, conf.int = T)
log_wt = multinom(fbs ~ weight, data = ms)
# weights:  9 (4 variable)
initial  value 4485.633975 
final  value 2816.273988 
converged
summary(log_wt)
Call:
multinom(formula = fbs ~ weight, data = ms)

Coefficients:
    (Intercept)     weight
IFG   -3.671757 0.02337818
DM    -3.221548 0.02396112

Std. Errors:
    (Intercept)      weight
IFG   0.2491246 0.003626756
DM    0.2044349 0.002989522

Residual Deviance: 5632.548 
AIC: 5640.548 
tidy(log_wt, conf.int = T)

4.2 Multivariable

mlog = multinom(fbs ~ totchol + hpt + weight, data = ms)
# weights:  15 (8 variable)
initial  value 4485.633975 
iter  10 value 2739.040075
final  value 2738.198554 
converged
summary(mlog)
Call:
multinom(formula = fbs ~ totchol + hpt + weight, data = ms)

Coefficients:
    (Intercept)   totchol    hptyes     weight
IFG   -5.112735 0.2393748 0.8672457 0.02199345
DM    -4.907901 0.2772382 0.9000338 0.02267585

Std. Errors:
    (Intercept)    totchol    hptyes      weight
IFG   0.3663222 0.04239126 0.1455646 0.003710277
DM    0.3037717 0.03505572 0.1204522 0.003073929

Residual Deviance: 5476.397 
AIC: 5492.397 
tidy(mlog, conf.int = T)

Let’s see p-values by LR test:

anova(multinom(fbs ~ hpt + weight, data = ms), mlog)  # D0 - D1, model without - model with variable
# weights:  12 (6 variable)
initial  value 4485.633975 
iter  10 value 2778.035100
final  value 2778.033543 
converged
anova(multinom(fbs ~ totchol + weight, data = ms), mlog)
# weights:  12 (6 variable)
initial  value 4485.633975 
iter  10 value 2773.388803
final  value 2773.387904 
converged
anova(multinom(fbs ~ totchol + hpt, data = ms), mlog)
# weights:  12 (6 variable)
initial  value 4485.633975 
iter  10 value 2775.475233
final  value 2775.351291 
converged

\(\rightarrow\) Preliminary main effects model

5 Variable assessment

  • Linearity in logit – numerical variable, from separate binary logistic models

  • Other numerical issues

    • Small cell counts
    • Multicollinearity

\(\rightarrow\) Main effects model

6 Interaction

Specify interaction based on clinical opinion, let’s say between hpt * weight:

mlogx = multinom(fbs ~ totchol + hpt + weight + hpt * weight, data = ms)
# weights:  18 (10 variable)
initial  value 4485.633975 
iter  10 value 2739.643635
final  value 2737.858433 
converged
summary(mlogx)
Call:
multinom(formula = fbs ~ totchol + hpt + weight + hpt * weight, 
    data = ms)

Coefficients:
    (Intercept)  totchol    hptyes     weight hptyes:weight
IFG   -5.086868 0.239684 0.7531227 0.02158916    0.00156111
DM    -4.970760 0.276339 1.3181710 0.02369976   -0.00611555

Std. Errors:
    (Intercept)    totchol    hptyes      weight hptyes:weight
IFG   0.3803843 0.04238647 0.6988769 0.004082641   0.009925563
DM    0.3149209 0.03507623 0.5826223 0.003348587   0.008399271

Residual Deviance: 5475.717 
AIC: 5495.717 
tidy(mlogx, conf.int = T)  # Interaction not significant for IFG & DM

\(\rightarrow\) Preliminary final model

7 Model fit assessment

hosmerlem(mlog, tables = T)  # multinomial version of Hosmer-Lemeshow test

Hosmer-Lemeshow Test:

                        Chi-sq  df  pr(>chi)  
multinomial(Hosmerlem)  26.386  16   0.04884 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

H0: No lack of fit dictated

rho: 100% 


Observed Freqs:

 group total Normal IFG  DM
     1   408    373  14  21
     2   408    359  17  32
     3   408    353  25  30
     4   408    313  38  57
     5   408    325  37  46
     6   408    318  33  57
     7   408    316  24  68
     8   408    285  51  72
     9   408    257  55  96
    10   411    226  70 115

Expected Freqs:

 group     OS   Normal     IFG       DM
     1 0.1073 364.2194 17.5758  26.2048
     2 0.1387 351.4004 22.3269  34.2726
     3 0.1605 342.5349 25.5955  39.8697
     4 0.1796 334.7189 28.4429  44.8382
     5 0.1992 326.7408 31.3057  49.9535
     6 0.2205 318.0191 34.4627  55.5181
     7 0.2461 307.5801 38.1956  62.2243
     8 0.2812 293.2620 43.3012  71.4367
     9 0.3377 270.2066 51.6100  86.1834
    10 0.4737 216.3193 71.1943 123.4864
Rsquared(mlog, measure = "nagelkerke")

Nagelkerke's R2: 

0.07722 

+ Regression diagnostics – from separate binary logistic models

\(\rightarrow\) Final model

8 Presentation and interpretation

Present in table

epiDisplay::mlogit.display(mlog)

Outcome =fbs; Referent group = Normal 
            IFG                            DM                            
            Coeff./SE      RRR(95%CI)      Coeff./SE      RRR(95%CI)     
(Intercept) -5.11/0.366*** -               -4.91/0.304*** -              
totchol     0.24/0.042***  1.27(1.17,1.38) 0.28/0.035***  1.32(1.23,1.41)
hptyes      0.87/0.146***  2.38(1.79,3.17) 0.9/0.12***    2.46(1.94,3.11)
weight      0.02/0.004***  1.02(1.01,1.03) 0.02/0.003***  1.02(1.02,1.03)

Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1  

Residual Deviance: 5476.4 
AIC = 5492.4 
tbl_regression(mlog, exp = T)
ℹ Multinomial models have a different underlying structure than the models
gtsummary was designed for. Other gtsummary functions designed to work with
tbl_regression objects may yield unexpected results.
Characteristic OR1 95% CI1 p-value
IFG
totchol 1.27 1.17, 1.38 <0.001
hpt


    no
    yes 2.38 1.79, 3.17 <0.001
weight 1.02 1.01, 1.03 <0.001
DM
totchol 1.32 1.23, 1.41 <0.001
hpt


    no
    yes 2.46 1.94, 3.11 <0.001
weight 1.02 1.02, 1.03 <0.001
1 OR = Odds Ratio, CI = Confidence Interval
# Details
tidy(mlog, conf.int = T) |> kable(digits = 3)
y.level term estimate std.error statistic p.value conf.low conf.high
IFG (Intercept) -5.113 0.366 -13.957 0 -5.831 -4.395
IFG totchol 0.239 0.042 5.647 0 0.156 0.322
IFG hptyes 0.867 0.146 5.958 0 0.582 1.153
IFG weight 0.022 0.004 5.928 0 0.015 0.029
DM (Intercept) -4.908 0.304 -16.157 0 -5.503 -4.313
DM totchol 0.277 0.035 7.909 0 0.209 0.346
DM hptyes 0.900 0.120 7.472 0 0.664 1.136
DM weight 0.023 0.003 7.377 0 0.017 0.029
tidy(mlog, conf.int = T, exponentiate = T) |> kable(digits = 3)
y.level term estimate std.error statistic p.value conf.low conf.high
IFG (Intercept) 0.006 0.366 -13.957 0 0.003 0.012
IFG totchol 1.270 0.042 5.647 0 1.169 1.381
IFG hptyes 2.380 0.146 5.958 0 1.790 3.166
IFG weight 1.022 0.004 5.928 0 1.015 1.030
DM (Intercept) 0.007 0.304 -16.157 0 0.004 0.013
DM totchol 1.319 0.035 7.909 0 1.232 1.413
DM hptyes 2.460 0.120 7.472 0 1.942 3.115
DM weight 1.023 0.003 7.377 0 1.017 1.029