Data set

library(PVBcorrect)
?cad_pvb
str(cad_pvb)
## 'data.frame':    2688 obs. of  5 variables:
##  $ X1: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ X2: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ X3: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ T : int  0 0 0 0 0 0 0 0 1 1 ...
##  $ D : int  0 0 0 0 0 0 0 1 0 0 ...

Cross-tabulation tables

view_table(data = cad_pvb, test = "T", disease = "D", 
           show_unverified = TRUE, show_total = TRUE)
##      Disease
## Test   yes   no unverified Total
##   yes  195  232        996  1423
##   no     5   39       1221  1265

Uncorrected, Complete case analysis (CCA)

Perform Complete Case Analysis, CCA, uncorrected for PVB.

cca_out = acc_cca(data = cad_pvb, test = "T", disease = "D", ci = TRUE)
cca_out$acc_results
## Estimates of accuracy measures
## Uncorrected for PVB: Complete Case Analysis
## 
##           Est         SE     LowCI     UppCI
## Sn  0.9750000 0.01103970 0.9533626 0.9966374
## Sp  0.1439114 0.02132173 0.1021216 0.1857013
## PPV 0.4566745 0.02410569 0.4094282 0.5039207
## NPV 0.8863636 0.04784519 0.7925888 0.9801385

Begg & Greenes’ Method (BG)

PVB correction by Begg and Greenes’ method with asymptotic normal CI. This is limited to no covariate.

bg_out = acc_bg(data = cad_pvb, test = "T", disease = "D", ci = TRUE)
bg_out$acc_results
## Estimates of accuracy measures
## Corrected for PVB: Begg and Greenes' Method
## 
##           Est         SE     LowCI     UppCI
## Sn  0.8188629 0.06320034 0.6949925 0.9427333
## Sp  0.5918754 0.01928759 0.5540724 0.6296783
## PPV 0.4566745 0.02410569 0.4094282 0.5039207
## NPV 0.8863636 0.04784519 0.7925888 0.9801385

Extended Begg & Greenes’ Method (EBG)

Perform PVB correction by Begg and Greenes’ method, as extended by Alonzo & Pepe, 2005. Uses bootstrapped CI. Allows inclusion of covariates.

ebg_out = acc_ebg(data = cad_pvb, test = "T", disease = "D", ci = TRUE, seednum = 12345)
ebg_out$acc_results
## Estimates of accuracy measures
## Corrected for PVB: Extended Begg and Greenes' Method
## 
##           Est         SE     LowCI     UppCI
## Sn  0.8188629 0.06438696 0.6860317 0.9360441
## Sp  0.5918754 0.01759285 0.5587741 0.6285913
## PPV 0.4566745 0.02421467 0.4063396 0.5059415
## NPV 0.8863636 0.04925696 0.8005051 0.9949495

seednum is set to allow replication for bootstrap. Set to a random number. Here to simplicity, set to 12345.

Add X3 as the covariate:

ebgx_out = acc_ebg(data = cad_pvb, test = "T", disease = "D", covariate = "X3", ci = TRUE,
                   seednum = 12345)
ebgx_out$acc_results
## Estimates of accuracy measures
## Corrected for PVB: Extended Begg and Greenes' Method
## 
##           Est         SE     LowCI     UppCI
## Sn  0.8314954 0.06077694 0.7092233 0.9457330
## Sp  0.5894661 0.01597580 0.5586440 0.6218741
## PPV 0.4434687 0.02343506 0.3963214 0.4904032
## NPV 0.8989051 0.04388566 0.8223543 0.9911776

Multiple imputation (MI)

Perform PVB correction by multiple imputation. Allows inclusion of covariates.

By default uses logistic regression for imputation.

mi_out = acc_mi(data = cad_pvb, test = "T", disease = "D", ci = TRUE, seednum = 12345)
mi_out$acc_results
## Estimates of accuracy measures
## Corrected for PVB: Multiple Imputation Method
## 
##           Est         SE     LowCI     UppCI
## Sn  0.8031807 0.06451495 0.6753082 0.9310532
## Sp  0.5874592 0.02199350 0.5440683 0.6308500
## PPV 0.4578988 0.02400632 0.4105655 0.5052321
## NPV 0.8699605 0.05498024 0.7609444 0.9789765

By predictive mean matching, PMM:

mipmm_out = acc_mi(data = cad_pvb, test = "T", disease = "D", ci = TRUE, method = "pmm", 
                   seednum = 12345)
mipmm_out$acc_results
## Estimates of accuracy measures
## Corrected for PVB: Multiple Imputation Method
## 
##           Est         SE     LowCI     UppCI
## Sn  0.8416093 0.18239366 0.4797334 1.2034852
## Sp  0.5949661 0.08828018 0.4198682 0.7700639
## PPV 0.4538229 0.16433228 0.1277984 0.7798474
## NPV 0.8839842 0.14061515 0.6049962 1.1629722

Add X3 as the covariate:

mix_out = acc_mi(data = cad_pvb, test = "T", disease = "D", covariate = "X3", ci = TRUE, 
                 seednum = 12345)
mix_out$acc_results
## Estimates of accuracy measures
## Corrected for PVB: Multiple Imputation Method
## 
##           Est         SE     LowCI     UppCI
## Sn  0.8168359 0.06368300 0.6906143 0.9430575
## Sp  0.5858694 0.02178298 0.5428942 0.6288446
## PPV 0.4455025 0.02444560 0.3972929 0.4937120
## NPV 0.8839209 0.05384509 0.7771515 0.9906904

EM-based logistic regression method (EM)

Perform PVB correction by EM-based logistic regression method to handle MNAR assumption. Uses bootstrapped CI. Allows inclusion of covariates.

Without covariate.

This will take a very long time to finish! Set R = 999 or higher later to test the code.

em_out = acc_em(data = cad_pvb, test = "T", disease = "D", mnar = TRUE, show_t = TRUE, 
                R = 9, ci = TRUE, seednum = 12345)
em_out$acc_results
## === Current EM iteration t = 100  ===
## [ EM converged at t = 162 when all changes < 1e-04 ]
## 
## Finished Boot Iteration = 0 
## =========================
## 
## === Current EM iteration t = 100  ===
## === Current EM iteration t = 200  ===
## [ EM converged at t = 259 when all changes < 1e-04 ]
## 
## Finished Boot Iteration = 1 
## =========================
## 
## === Current EM iteration t = 100  ===
## [ EM converged at t = 125 when all changes < 1e-04 ]
## 
## Finished Boot Iteration = 2 
## =========================
## 
## === Current EM iteration t = 100  ===
## [ EM converged at t = 143 when all changes < 1e-04 ]
## 
## Finished Boot Iteration = 3 
## =========================
## 
## === Current EM iteration t = 100  ===
## [ EM converged at t = 136 when all changes < 1e-04 ]
## 
## Finished Boot Iteration = 4 
## =========================
## 
## === Current EM iteration t = 100  ===
## [ EM converged at t = 146 when all changes < 1e-04 ]
## 
## Finished Boot Iteration = 5 
## =========================
## 
## === Current EM iteration t = 100  ===
## [ EM converged at t = 161 when all changes < 1e-04 ]
## 
## Finished Boot Iteration = 6 
## =========================
## 
## === Current EM iteration t = 100  ===
## [ EM converged at t = 135 when all changes < 1e-04 ]
## 
## Finished Boot Iteration = 7 
## =========================
## 
## === Current EM iteration t = 100  ===
## [ EM converged at t = 149 when all changes < 1e-04 ]
## 
## Finished Boot Iteration = 8 
## =========================
## 
## === Current EM iteration t = 100  ===
## [ EM converged at t = 156 when all changes < 1e-04 ]
## 
## Finished Boot Iteration = 9 
## =========================
## 
## [ Total Boot Iteration = 9 ]
## 
## Estimates of accuracy measures
## Corrected for PVB: EM-based Method
## 
##           Est         SE     LowCI     UppCI
## Sn  0.7126219 0.03875535 0.6388044 0.7548431
## Sp  0.6444116 0.03753282 0.5754812 0.6887143
## PPV 0.6552816 0.03800190 0.5872707 0.7030126
## NPV 0.7027397 0.03883311 0.6274167 0.7449176

Set show_t = FALSE to hide iteration details. Note the standard error and CI are not accurate with very small R. Repeat with higher R i.e. > 999.

Add X3 as the covariate.

This will take a very long time to finish! Set R = 999 or higher later to test the code.

emx_out = acc_em(data = cad_pvb, test = "T", disease = "D", covariate = "X3", mnar = TRUE,
                 show_t = FALSE, t_max = 50000, R = 3, ci = TRUE, seednum = 12345)
emx_out$acc_results
## [ EM converged at t = 1330 when all changes < 1e-04 ]
## 
## Finished Boot Iteration = 0 
## =========================
## 
## [ EM converged at t = 121 when all changes < 1e-04 ]
## 
## Finished Boot Iteration = 1 
## =========================
## 
## [ EM converged at t = 3601 when all changes < 1e-04 ]
## 
## Finished Boot Iteration = 2 
## =========================
## 
## [ EM converged at t = 128 when all changes < 1e-04 ]
## 
## Finished Boot Iteration = 3 
## =========================
## 
## [ Total Boot Iteration = 3 ]
## 
## Estimates of accuracy measures
## Corrected for PVB: EM-based Method
## 
##           Est         SE     LowCI     UppCI
## Sn  0.7287735 0.08058657 0.6286046 0.7781265
## Sp  0.6429360 0.02370844 0.6402086 0.6874929
## PPV 0.6382103 0.10241081 0.6154878 0.8041084
## NPV 0.7328115 0.12416499 0.5759180 0.8020656

With the addition of covariate, need to set high t_max i.e. the maximum iteration for EM algorithm.