###############################################################################
#                                                                             #
#   Reproducible files to the paper 'Pitfalls of hypothesis tests             #
#   and model selection on bootstrap samples: causes and consequences in      #
#   biometrical applications'  (2014)                                         #
#   by Janitza, Binder and Boulesteix                                         #
#                                                                             #
#   Contact: S. Janitza <janitza@ibe.med.uni-muenchen.de>                     #
#                                                                             #
#   File for reproducing results for LR-test shown in Figures 7 and 8         #
#                                                                             #
###############################################################################

# function to perform the study to investigate the variability of p-values

# input parameters: 
#   - niter: number of iterations
#   - n: number of observations
#   - df: degrees of freedom the test should have
#   - nullhypo: indicates if to perform test under the null hypothesis (TRUE) or under the alternative (FALSE)

# output: (niter x 3) matrix
#         1st column contains p-values for original samples, 
#         2nd column contains p-values for bootstrap samples, 
#         3rd column contains p-values for subsamples 

perform.study <- function(niter = 100, n = 1000, df = 1, nullhypo = TRUE){
  
  p.value.orig <- vector()
  p.value.boot <- vector()
  p.value.sub  <- vector()
  
  for(i in 1:niter){
    
    set.seed(i)  
    
    # generate data
    x    <- matrix(rnorm(n = df * n, 0, 1), nrow = n)
    beta <- rep(0, df)
    if (!nullhypo) beta <- rep(0.02, df) # if test should be performed under the alternative hypothesis set effects unequal zero
    
    data <- data.frame(y = x %*% beta + rnorm(n, 0, 1),
                       x = x)
    
    # perform test for original sample
    p.value.orig[i] <- obtain.pvalues(dat = data)
    
    # perform test for bootstrap sample    
    data.boot <- data[sample(1:n, replace = TRUE, size = n),]
    p.value.boot[i] <- obtain.pvalues(dat = data.boot)
    
    # perform test for subsample
    data.sub <- data[sample(1:n, replace = FALSE, size = floor(0.632 * n)),]
    p.value.sub[i] <- obtain.pvalues(dat = data.sub)

  }
  
  p.values <- data.frame(original  = p.value.orig,
                         bootstrap = p.value.boot,
                         subsample = p.value.sub)
}


# function to compute p-value from LR-test (is called by function perform.study())

obtain.pvalues <- function(dat){
  
  model_full    <- lm(y ~ ., data = dat)
  model_reduced <- lm(y ~ 1, data = dat)
  
  p.value <- as.numeric(unlist(anova(model_full, model_reduced)["Pr(>F)"]))[2]
  
}

# compute p-values from LR-test under null hypothesis (this may take a few minutes ...)

p.values.LR.test.H0 <- perform.study(niter = 5000, df = 10, nullhypo = TRUE)  # test with 10 degrees of freedom
save(p.values.LR.test.H0, file = "p_values_LR_test_H0.Rda") #  save result in the current working directory


# compute p-values from LR-test under alternative hypothesis (this may take a few minutes ...)

p.values.LR.test.H1 <- perform.study(niter = 5000, df = 10, nullhypo = FALSE)  # test with 10 degrees of freedom
save(p.values.LR.test.H1, file = "p_values_LR_test_H1.Rda") # save result in the current working directory


# compute variability in p-values

# sd(-log10(p))
sd.logp.LR.test.H0 <- apply(p.values.LR.test.H0, 2, function(x) sd(-log10(x)))
sd.logp.LR.test.H1 <- apply(p.values.LR.test.H1, 2, function(x) sd(-log10(x)))

# sd(p)
sd.LR.test.H0 <- apply(p.values.LR.test.H0, 2, sd)
sd.LR.test.H1 <- apply(p.values.LR.test.H1, 2, sd)




# create Figure 7: bootstrapped p-value distribution for LR-test (save e.g. as pdf of size 5.2 x 7)

par(mfrow = c(2, 2), mar = c(4.2, 4, 3, 0.5), oma = c(1, 0.2, 1, 0))

boxplot(p.values.LR.test.H0[,"original"], p.values.LR.test.H0[,"bootstrap"],  xaxt = "n", ylab = "", main = "Null hypothesis", ylim = c(0, 1), outcex = 0.1, cex.main = 0.8)
lablist <- c("original", "bootstrap")
axis(1, at = c(1,2), labels = FALSE)
text(c(0.8, 1.8), par("usr")[3]-0.14,  labels = lablist, srt = 45, pos = 1, xpd = TRUE, cex = 1)
text(c(1, 1), labels = paste("sd =", round(sd.LR.test.H0[c("original", "bootstrap")], digits = 2)), pos = 1, cex = 0.6)

boxplot(p.values.LR.test.H1[,"original"], p.values.LR.test.H1[,"bootstrap"], xaxt = "n", ylab = "", main = "Alternative hypothesis ", ylim = c(0, 1), outcex = 0.1, cex.main = 0.8)
axis(1, at = c(1,2), labels = FALSE)
text(c(0.8, 1.8), par("usr")[3]-0.14,  labels = lablist, srt = 45, pos = 1, xpd = TRUE, cex = 1)
text(c(1, 1), labels = paste("sd =", round(sd.LR.test.H1[c("original", "bootstrap")], digits = 2)), pos = 1, cex = 0.6)

boxplot(-log10(p.values.LR.test.H0[,"original"]), -log10(p.values.LR.test.H0[,"bootstrap"]), xaxt = "n", main = "Null hypothesis", outcex = 0.1, cex.main = 0.8, ylab = "")
axis(1, at = c(1,2), labels = FALSE)
text(c(0.8, 1.8), par("usr")[3]-1.2,  labels = lablist, srt = 45, pos = 1, xpd = TRUE, cex = 1)
text(rep(max(-log10(p.values.LR.test.H0[,"original"]), -log10(p.values.LR.test.H0[,"bootstrap"])), 2), labels = paste("sd =", round(sd.logp.LR.test.H0[c("original", "bootstrap")], digits = 2)), pos = 1, cex = 0.6)

boxplot(-log10(p.values.LR.test.H1[,"original"]), -log10(p.values.LR.test.H1[,"bootstrap"]), xaxt = "n", main = "Alternative hypothesis", outcex = 0.1, cex.main = 0.8)
axis(1, at = c(1,2), labels = FALSE)
text(c(0.8, 1.8), par("usr")[3]-1.7,  labels = lablist, srt = 45, pos = 1, xpd = TRUE, cex = 1)
text(rep(max(-log10(p.values.LR.test.H1[,"original"]), -log10(p.values.LR.test.H1[,"bootstrap"])), 2), labels = paste("sd =", round(sd.logp.LR.test.H1[c("original", "bootstrap")], digits = 2)), pos = 1, cex = 0.6)

title("LR-test (df = 10)", outer = TRUE, cex.main = 1.2)



# create Figure 7: subsampled p-value distribution for LR-test (save e.g. as pdf of size 5.2 x 7)

par(mfrow = c(2, 2), mar = c(4.8, 4, 3, 0.5), oma = c(1, 0.2, 1, 0))

boxplot(p.values.LR.test.H0[,"original"], p.values.LR.test.H0[,"subsample"],  xaxt = "n", main = "Null hypothesis", ylim = c(0, 1), outcex = 0.1, cex.main = 0.8)
lablist <- c("original", "subsample")
axis(1, at = c(1,2), labels = FALSE)
text(c(0.8, 1.8), par("usr")[3]-0.14,  labels = lablist, srt = 45, pos = 1, xpd = TRUE, cex = 1)
text(c(1, 1), labels = paste("sd =", round(sd.LR.test.H0[c("original", "subsample")], digits = 2)), pos = 1, cex = 0.6)

boxplot(p.values.LR.test.H1[,"original"], p.values.LR.test.H1[,"subsample"], xaxt = "n", ylab = "", main = "Alternative hypothesis ", ylim = c(0, 1), outcex = 0.1, cex.main = 0.8)
axis(1, at = c(1,2), labels = FALSE)
text(c(0.8, 1.8), par("usr")[3]-0.14,  labels = lablist, srt = 45, pos = 1, xpd = TRUE, cex = 1)
text(c(1, 1), labels = paste("sd =", round(sd.LR.test.H1[c("original", "subsample")], digits = 2)), pos = 1, cex = 0.6)

boxplot(-log10(p.values.LR.test.H0[,"original"]), -log10(p.values.LR.test.H0[,"subsample"]), xaxt = "n", main = "Null hypothesis", outcex = 0.1, cex.main = 0.8)
axis(1, at = c(1,2), labels = FALSE)
text(c(0.8, 1.8), par("usr")[3]-0.6,  labels = lablist, srt = 45, pos = 1, xpd = TRUE, cex = 1)
text(rep(max(-log10(p.values.LR.test.H0[,"original"]), -log10(p.values.LR.test.H0[,"subsample"])), 2), labels = paste("sd =", round(sd.logp.LR.test.H0[c("original", "subsample")], digits = 2)), pos = 1, cex = 0.6)

boxplot(-log10(p.values.LR.test.H1[,"original"]), -log10(p.values.LR.test.H1[,"subsample"]), xaxt = "n", main = "Alternative hypothesis", outcex = 0.1, cex.main = 0.8)
axis(1, at = c(1,2), labels = FALSE)
text(c(0.8, 1.8), par("usr")[3]-0.9,  labels = lablist, srt = 45, pos = 1, xpd = TRUE, cex = 1)
text(rep(max(-log10(p.values.LR.test.H1[,"original"]), -log10(p.values.LR.test.H1[,"subsample"])), 2), labels = paste("sd =", round(sd.logp.LR.test.H1[c("original", "subsample")], digits = 2)), pos = 1, cex = 0.6)

title("LR-test (df = 10)", outer = TRUE, cex.main = 1.2)


