###############################################################################
#                                                                             #
#   Reproducible files to the paper 'Pitfalls of hypothesis tests             #
#   and model selection on bootstrap samples: causes and consequences in      #
#   biometrical applications'  (2014)                                         #
#   by Janitza, Binder and Boulesteix                                         #
#                                                                             #
#   Contact: S. Janitza <janitza@ibe.med.uni-muenchen.de>                     #
#                                                                             #
#   File for reproducing Figure A1                                            #
#                                                                             #
###############################################################################

# uncomment the following line and set the working directory
# setwd("...") 

# function to perform the study to obtain test statistics for original samples, bootstrap samples and subsamples if H0 is true in 'real world'

# input parameters: 
#   - niter: number of iterations
#   - n: number of observations

# output: (niter x 2) matrix
#         1st column contains values for test statistic in original samples, 
#         2nd column contains values for bootstrap samples

perform.z.test <- function(niter = 100, n = 1000){
  
  teststat.orig <- vector()
  teststat.boot <- vector()
  
  for(i in 1:niter){
    
    set.seed(i)
    
    # generate data
    x <- rnorm(n, 0, 1) # under H0 
    
    # perform test for original sample
    teststat.orig[i] <- sqrt(n) * mean(x)
    
    # perform test for bootstrap sample    
    x.boot <- x[sample(1:n, replace = TRUE, size = n)]
    teststat.boot[i] <- sqrt(n) * mean(x.boot)
    
  }
  
  teststat <- data.frame(original  = teststat.orig,
                         bootstrap = teststat.boot)
}



# compute test statistics from Z-test under null hypothesis (this may take a while ...)

teststat.z.test.H0 <- perform.z.test(niter = 500000, n = 1000)  
save(teststat.z.test.H0, file = "teststat_z_test_H0.Rda") #  save result in the current working directory


# create Figure A1: plot of densities for Z and Z* (is saved in the current working directory) 

graphics.off()
pdf(height = 4, width = 4.5, file = "testatistic_Ztest.pdf")

plot(density(teststat.z.test.H0[,1]), type = "l", xlim = c(-6, 6), xlab = "Test statistic", main = "Empirical density functions for Z-test", cex.lab = 1.1, cex.main = 1.2, lwd = 2)
lines( seq(-6, 6, length = 100000), dnorm(seq(-6, 6, length = 100000)), lty = 1, col = "red", lwd = 0.8) 
lines(density(teststat.z.test.H0[,2]), lty = 2, lwd = 2)
legend("topright", col = c("black", "black", "red"), lty = c(1, 2, 1), legend = c("Z", "Z*", "N(0, 1)"), lwd = c(2, 2, 1))

graphics.off()
