cholest.sav dataset,library(foreign)
cholest = read.spss("cholest.sav", to.data.frame = T)
str(cholest)
#> 'data.frame': 80 obs. of 5 variables:
#> $ chol : num 6.5 6.6 6.8 6.8 6.9 7 7 7.2 7.2 7.2 ...
#> $ age : num 38 35 39 36 31 38 33 36 40 34 ...
#> $ exercise: num 6 5 6 5 4 4 5 5 4 6 ...
#> $ sex : Factor w/ 2 levels "female","male": 2 2 2 2 2 2 2 2 2 2 ...
#> $ categ : Factor w/ 3 levels "Grp A","Grp B",..: 1 1 1 1 1 1 1 1 1 1 ...
#> - attr(*, "variable.labels")= Named chr [1:5] "cholesterol in mmol/L" "age in year" "duration of exercise (hours/week)" "" ...
#> ..- attr(*, "names")= chr [1:5] "chol" "age" "exercise" "sex" ...
#> - attr(*, "codepage")= int 65001
head(cholest)
#> chol age exercise sex categ
#> 1 6.5 38 6 male Grp A
#> 2 6.6 35 5 male Grp A
#> 3 6.8 39 6 male Grp A
#> 4 6.8 36 5 male Grp A
#> 5 6.9 31 4 male Grp A
#> 6 7.0 38 4 male Grp A
Explore the data. Obtain the basic descriptive statistics. Mean and SD,
by(cholest$chol, cholest$sex, mean)
#> cholest$sex: female
#> [1] 8.9275
#> ------------------------------------------------------------
#> cholest$sex: male
#> [1] 7.5325
by(cholest$chol, cholest$sex, sd)
#> cholest$sex: female
#> [1] 0.4551627
#> ------------------------------------------------------------
#> cholest$sex: male
#> [1] 0.4687066
and the number of subjects per group,
table(cholest$sex)
#>
#> female male
#> 40 40
library(lattice)
histogram(~ chol | sex, data = cholest, layout = c(1, 2))
bwplot(chol ~ sex, data = cholest)
var.test(chol ~ sex, data = cholest) # equal*
#>
#> F test to compare two variances
#>
#> data: chol by sex
#> F = 0.94304, num df = 39, denom df = 39, p-value = 0.8556
#> alternative hypothesis: true ratio of variances is not equal to 1
#> 95 percent confidence interval:
#> 0.4987744 1.7830278
#> sample estimates:
#> ratio of variances
#> 0.9430422
*Choose: - Equal variance = Standard Two Sample t-test. - Unequal variance = Welch Two Sample t-test. 4. Perform independent t-test,
t.test(chol ~ sex, data = cholest) # significant difference
#>
#> Welch Two Sample t-test
#>
#> data: chol by sex
#> t = 13.504, df = 77.933, p-value < 2.2e-16
#> alternative hypothesis: true difference in means between group female and group male is not equal to 0
#> 95 percent confidence interval:
#> 1.189337 1.600663
#> sample estimates:
#> mean in group female mean in group male
#> 8.9275 7.5325
The function default is Welch Two Sample t-test (takes car the unequal variance). You can also obtain the standard t-test (equal variance assumed),
t.test(chol ~ sex, data = cholest, var.equal = TRUE)
#>
#> Two Sample t-test
#>
#> data: chol by sex
#> t = 13.504, df = 78, p-value < 2.2e-16
#> alternative hypothesis: true difference in means between group female and group male is not equal to 0
#> 95 percent confidence interval:
#> 1.18934 1.60066
#> sample estimates:
#> mean in group female mean in group male
#> 8.9275 7.5325
sbp.csv dataset,sbp = read.csv("sbp.csv")
str(sbp)
#> 'data.frame': 11 obs. of 2 variables:
#> $ S1: int 110 120 120 130 100 120 135 100 140 130 ...
#> $ S2: int 100 120 130 130 100 130 140 100 140 130 ...
sbp
#> S1 S2
#> 1 110 100
#> 2 120 120
#> 3 120 130
#> 4 130 130
#> 5 100 100
#> 6 120 130
#> 7 135 140
#> 8 100 100
#> 9 140 140
#> 10 130 130
#> 11 130 130
Explore the data. Obtain the basic descriptive statistics. Mean and SD,
mean(sbp$S1); sd(sbp$S1)
#> [1] 121.3636
#> [1] 13.43334
mean(sbp$S2); sd(sbp$S2)
#> [1] 122.7273
#> [1] 15.5505
mean(sbp$S2 - sbp$S1); sd(sbp$S2 - sbp$S1)
#> [1] 1.363636
#> [1] 5.518564
and the number of subjects,
lengths(sbp)
#> S1 S2
#> 11 11
histogram(~ (S2 - S1), data = sbp) # not perfectly normal
bwplot(~ (S2 - S1), data = sbp)
t.test(sbp$S1, sbp$S2, paired = TRUE) # no significant difference
#>
#> Paired t-test
#>
#> data: sbp$S1 and sbp$S2
#> t = -0.81954, df = 10, p-value = 0.4316
#> alternative hypothesis: true mean difference is not equal to 0
#> 95 percent confidence interval:
#> -5.071058 2.343785
#> sample estimates:
#> mean difference
#> -1.363636
by(cholest$chol, cholest$categ, mean)
#> cholest$categ: Grp A
#> [1] 7.248
#> ------------------------------------------------------------
#> cholest$categ: Grp B
#> [1] 8.293939
#> ------------------------------------------------------------
#> cholest$categ: Grp C
#> [1] 9.25
by(cholest$chol, cholest$categ, sd)
#> cholest$categ: Grp A
#> [1] 0.3355592
#> ------------------------------------------------------------
#> cholest$categ: Grp B
#> [1] 0.3091717
#> ------------------------------------------------------------
#> cholest$categ: Grp C
#> [1] 0.3569047
and the number of subjects per group,
table(cholest$categ)
#>
#> Grp A Grp B Grp C
#> 25 33 22
histogram(~ chol | categ, data = cholest, layout = c(1, 3))
bwplot(chol ~ categ, data = cholest)
However, we will mainly rely on residuals for the normality assessment. 3. Check the equality of variance assumption,
bartlett.test(chol ~ categ, data = cholest)
#>
#> Bartlett test of homogeneity of variances
#>
#> data: chol by categ
#> Bartlett's K-squared = 0.53515, df = 2, p-value = 0.7652
aov_chol = aov(chol ~ categ, data = cholest)
summary(aov_chol) # significant difference between three groups
#> Df Sum Sq Mean Sq F value Pr(>F)
#> categ 2 47.13 23.57 215.1 <2e-16 ***
#> Residuals 77 8.44 0.11
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Notice here we save the output of aov() into
aov_chol first. This allows further extraction of full
output from aov_chol ANOVA object. 5. Post-hoc test, to
look for significant group pairs,
pairwise.t.test(cholest$chol, cholest$categ, p.adj = "bonferroni")
#>
#> Pairwise comparisons using t tests with pooled SD
#>
#> data: cholest$chol and cholest$categ
#>
#> Grp A Grp B
#> Grp B <2e-16 -
#> Grp C <2e-16 5e-16
#>
#> P value adjustment method: bonferroni
all pairs significant difference Here, it works as if we do multiple
independent t-tests. We adjust for multiple comparison by
Bonferroni correction. 6. Check the normality of the
residuals, Save the residuals as residual_chol. We
also need to use as.numeric() to extract proper numerical
data from aov_chol ANOVA object, and save it again to
residuals_chol
residuals_chol = residuals(aov_chol)
residuals_chol = as.numeric(residuals_chol)
Then, check the normality,
histogram(~ residuals_chol) # normal
bwplot(~ residuals_chol)