###############################################################################
#                                                                             #
#   Reproducible files to the paper 'Pitfalls of hypothesis tests             #
#   and model selection on bootstrap samples: causes and consequences in      #
#   biometrical applications'  (2014)                                         #
#   by Janitza, Binder and Boulesteix                                         #
#                                                                             #
#   Contact: S. Janitza <janitza@ibe.med.uni-muenchen.de>                     #
#                                                                             #
#   File for reproducing results for Z-test shown in Figures 7 and 8          #
#                                                                             #
###############################################################################

# function to perform the study to investigate the variability of p-values

# input parameters: 
#   - niter: number of iterations
#   - n: number of observations
#   - mean: set value of 0 for performing a test under H0 and to a different value for a test under H1

# output: (niter x 3) matrix
#         1st column contains p-values for original samples, 
#         2nd column contains p-values for bootstrap samples, 
#         3rd column contains p-values for subsamples

perform.z.test <- function(niter = 100, n = 1000, mean = 0){
  
  p.value.orig <- vector()
  p.value.boot <- vector()
  p.value.sub  <- vector()
  
  for(i in 1:niter){
    
    set.seed(i)
    
    # generate data
    x <- rnorm(n, mean, 1) 
    
    # perform test for original sample    
    p.value.orig[i] <- 2 * (1 - pnorm(abs(sqrt(n) * mean(x)))) 
    
    # perform test for bootstrap sample    
    x.boot <- x[sample(1:n, replace = TRUE, size = n)]
    p.value.boot[i] <- 2 * (1 - pnorm(abs(sqrt(n) * mean(x.boot)))) 
    
    # perform test for subsample
    x.sub <- x[sample(1:n, replace = FALSE, size = floor(0.632 * n))]
    p.value.sub[i] <- 2 * (1 - pnorm(abs(sqrt(floor(0.632 * n)) * mean(x.sub)))) 
  }
  
  p.values <- data.frame(original  = p.value.orig,
                         bootstrap = p.value.boot,
                         subsample = p.value.sub)
}



# compute p-values from Z-test under null hypothesis (this may take a few minutes ...)

p.values.z.test.H0 <- perform.z.test(niter = 5000, mean = 0)  
save(p.values.z.test.H0, file = "p_values_z_test_H0.Rda") # save result in the current working directory


# compute p-values from Z-test under alternative hypothesis (this may take a few minutes ...)

p.values.z.test.H1 <- perform.z.test(niter = 5000, mean = 0.08) 
save(p.values.z.test.H1, file = "p_values_z_test_H1.Rda") # save result in the current working directory


# compute variability in p-values

# sd(-log(p))
sd.logp.z.test.H0 <- apply(p.values.z.test.H0, 2, function(x) sd(-log10(x))) 
sd.logp.z.test.H1 <- apply(p.values.z.test.H1, 2, function(x) sd(-log10(x)))
 
# sd(p)
sd.z.test.H0 <- apply(p.values.z.test.H0, 2, sd)
sd.z.test.H1 <- apply(p.values.z.test.H1, 2, sd)



# create Figure 7: bootstrapped p-value distribution for Z-test (save e.g. as pdf of size 5.2 x 7)

par(mfrow = c(2, 2), mar = c(4.2, 5, 3, 0.5), oma = c(1, 0.2, 1, 0), cex.lab = 1.4)

boxplot(p.values.z.test.H0[,"original"], p.values.z.test.H0[,"bootstrap"],  xaxt = "n", ylab = "p-value", main = "Null hypothesis", ylim = c(0, 1), outcex = 0.1, cex.main = 0.8)
lablist <- c("original", "bootstrap")
axis(1, at = c(1,2), labels = FALSE)
text(c(0.8, 1.8), par("usr")[3]-0.14,  labels = lablist, srt = 45, pos = 1, xpd = TRUE, cex = 1)
text(c(1, 1), labels = paste("sd =", round(sd.z.test.H0[c("original", "bootstrap")], digits = 2)), pos = 1, cex = 0.6)

boxplot(p.values.z.test.H1[,"original"], p.values.z.test.H1[,"bootstrap"], xaxt = "n", ylab = "", main = "Alternative hypothesis ", ylim = c(0, 1), outcex = 0.1, cex.main = 0.8)
axis(1, at = c(1,2), labels = FALSE)
text(c(0.8, 1.8), par("usr")[3]-0.14,  labels = lablist, srt = 45, pos = 1, xpd = TRUE, cex = 1)
text(c(1, 1), labels = paste("sd =", round(sd.z.test.H1[c("original", "bootstrap")], digits = 2)), pos = 1, cex = 0.6)

boxplot(-log10(p.values.z.test.H0[,"original"]), -log10(p.values.z.test.H0[,"bootstrap"]), xaxt = "n", ylab = expression(paste(plain('-log')["10"], plain("(p-value)"))), main = "Null hypothesis", outcex = 0.1, cex.main = 0.8)
axis(1, at = c(1,2), labels = FALSE)
text(c(0.8, 1.8), par("usr")[3]-1,  labels = lablist, srt = 45, pos = 1, xpd = TRUE, cex = 1)
text(rep(max(-log10(p.values.z.test.H0[,"original"]), -log10(p.values.z.test.H0[,"bootstrap"])), 2), labels = paste("sd =", round(sd.logp.z.test.H0[c("original", "bootstrap")], digits = 2)), pos = 1, cex = 0.6)

boxplot(-log10(p.values.z.test.H1[,"original"]), -log10(p.values.z.test.H1[,"bootstrap"]), xaxt = "n", ylab = "", main = "Alternative hypothesis", outcex = 0.1, cex.main = 0.8)
axis(1, at = c(1,2), labels = FALSE)
text(c(0.8, 1.8), par("usr")[3]-2,  labels = lablist, srt = 45, pos = 1, xpd = TRUE, cex = 1)
text(rep(max(-log10(p.values.z.test.H1[,"original"]), -log10(p.values.z.test.H1[,"bootstrap"])), 2), labels = paste("sd =", round(sd.logp.z.test.H1[c("original", "bootstrap")], digits = 2)), pos = 1, cex = 0.6)

title("Z-test", outer = TRUE, cex.main = 1.2)



# create Figure 8: subsampled p-value distribution for Z-test (save e.g. as pdf of size 5.2 x 7)

par(mfrow = c(2, 2), mar = c(4.8, 5, 3, 0.5), oma = c(1, 0.2, 1, 0), cex.lab = 1.4)

boxplot(p.values.z.test.H0[,"original"], p.values.z.test.H0[,"subsample"],  xaxt = "n", ylab = "p-value", main = "Null hypothesis", ylim = c(0, 1), outcex = 0.1, cex.main = 0.8)
lablist <- c("original", "subsample")
axis(1, at = c(1,2), labels = FALSE)
text(c(0.8, 1.8), par("usr")[3]-0.14,  labels = lablist, srt = 45, pos = 1, xpd = TRUE, cex = 1)
text(c(1, 1), labels = paste("sd =", round(sd.z.test.H0[c("original", "subsample")], digits = 2)), pos = 1, cex = 0.6)

boxplot(p.values.z.test.H1[,"original"], p.values.z.test.H1[,"subsample"], xaxt = "n", ylab = "", main = "Alternative hypothesis ", ylim = c(0, 1), outcex = 0.1, cex.main = 0.8)
axis(1, at = c(1,2), labels = FALSE)
text(c(0.8, 1.8), par("usr")[3]-0.14,  labels = lablist, srt = 45, pos = 1, xpd = TRUE, cex = 1)
text(c(1, 1), labels = paste("sd =", round(sd.z.test.H1[c("original", "subsample")], digits = 2)), pos = 1, cex = 0.6)

boxplot(-log10(p.values.z.test.H0[,"original"]), -log10(p.values.z.test.H0[,"subsample"]), xaxt = "n", main = "Null hypothesis",  outcex = 0.1, cex.main = 0.8, ylab = expression(paste(plain('-log')["10"], plain("(p-value)"))))
axis(1, at = c(1,2), labels = FALSE)
text(c(0.8, 1.8), par("usr")[3]-0.6,  labels = lablist, srt = 45, pos = 1, xpd = TRUE, cex = 1)
text(rep(max(-log10(p.values.z.test.H0[,"original"]), -log10(p.values.z.test.H0[,"subsample"])), 2), labels = paste("sd =", round(sd.logp.z.test.H0[c("original", "subsample")], digits = 2)), pos = 1, cex = 0.6)

boxplot(-log10(p.values.z.test.H1[,"original"]), -log10(p.values.z.test.H1[,"subsample"]), xaxt = "n", main = "Alternative hypothesis",  outcex = 0.1, cex.main = 0.8)
axis(1, at = c(1,2), labels = FALSE)
text(c(0.8, 1.8), par("usr")[3]-1.1,  labels = lablist, srt = 45, pos = 1, xpd = TRUE, cex = 1)
text(rep(max(-log10(p.values.z.test.H1[,"original"]), -log10(p.values.z.test.H1[,"subsample"])), 2), labels = paste("sd =", round(sd.logp.z.test.H1[c("original", "subsample")], digits = 2)), pos = 1, cex = 0.6)

title("Z-test", outer = TRUE, cex.main = 1.2)

