###############################################################################
#                                                                             #
#   Reproducible files to the paper 'Pitfalls of hypothesis tests             #
#   and model selection on bootstrap samples: causes and consequences in      #
#   biometrical applications'  (2014)                                         #
#   by Janitza, Binder and Boulesteix                                         #
#                                                                             #
#   Contact: S. Janitza <janitza@ibe.med.uni-muenchen.de>                     #
#                                                                             #
#   File for reproducing Figures 9, 10, A2 and Table A3 (Part 1)              #
#                                                                             #
###############################################################################

# NOTE: running this code requires sourcing the NHANES data 

# uncomment the following line and set the working directory where the objects shall be stored
# setwd("...") 

# load the NHANES data here
#load("NHANES_data.Rda")


# function to investigate bootstrapped AICs from models including exactly one covariate

# input parameters: 
#   - index: sample index, determining which observations are used for AIC computation 

# output: vector containing 28 AIC values for univariate model including one of the covariates and the CRP level 

boot <- function(index){
  
  varnames <- names(data)[names(data) != "CRP"] # obtain the names of all covariates
  AICs <- sapply(varnames, function(z) AIC(lm(data$CRP[index] ~ data[index,z])))
}



# AIC for original sample

orig_AIC <- boot(1:nrow(data))


# AIC for bootstrap samples

bootstrapped_AIC <- sapply(1:10000, function(z){
  set.seed(z)
  ind <- sample(1:nrow(data), size = nrow(data), replace = TRUE)
  boot(index = ind)})


# AIC for subsamples

subsampled_AIC <- sapply(1:10000, function(z){
  set.seed(z)
  ind <- sample(1:nrow(data), size = floor(0.632 * nrow(data)), replace = FALSE)
  boot(index = ind)})



# store objects in a list and save this list object

NHANES_AIC <- list(orig_AIC = orig_AIC, bootstrapped_AIC = bootstrapped_AIC, subsampled_AIC = subsampled_AIC)
save(NHANES_AIC, file = "NHANES_AIC.Rda")


