###############################################################################
#                                                                             #
#   Reproducible files to the paper 'Pitfalls of hypothesis tests             #
#   and model selection on bootstrap samples: causes and consequences in      #
#   biometrical applications'  (2014)                                         #
#   by Janitza, Binder and Boulesteix                                         #
#                                                                             #
#   Contact: S. Janitza <janitza@ibe.med.uni-muenchen.de>                     #
#                                                                             #
#   File for reproducing Figures 2 - 6, Tables 2, A2 (Part 2)                 #
#                                                                             #
###############################################################################


# uncomment the following line and set the working directory where the R objects are stored
# setwd("...") 


# load relevant R objects
load("NHANES_unmodified_p_values.Rda")
load("NHANES_10modified_pvalues.Rda")
load("NHANES_1000modified_sig.Rda")


#########################################
#
#  NUMBERS PRESENTED IN THE PAPER 
#
#########################################

# number of significant associations in the unmodified NHANES sample
sum(NHANES_unmodified_p_values$orig_pval <= 0.05) # = 17

# mean number of significant associations in unmodified NHANES bootstrap samples
mean(rowSums(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, function(z) z <= 0.05))) # = 18.4 (values always rounded to one digit)

# mean number of significant associations in unmodified NHANES subsamples
mean(rowSums(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, function(z) z <= 0.05))) # = 14.7



# number of significant associations in the modified NHANES sample
mean(sapply(1:1000, function(d) NHANES_1000modified_sig[[d]]$orig_sig)) # = 1.36

# mean number of significant associations in modified NHANES bootstrap samples
mean(sapply(1:1000, function(d) 
  sum(as.numeric(names(NHANES_1000modified_sig[[d]]$bootstrapped_sig)) *  as.numeric(NHANES_1000modified_sig[[d]]$bootstrapped_sig)
      /sum(as.numeric(NHANES_1000modified_sig[[d]]$bootstrapped_sig))))) # = 6.12

# mean number of significant associations in modified NHANES subsamples
mean(sapply(1:1000, function(d) 
  sum(as.numeric(names(NHANES_1000modified_sig[[d]]$subsampled_sig)) *  as.numeric(NHANES_1000modified_sig[[d]]$subsampled_sig)
      /sum(as.numeric(NHANES_1000modified_sig[[d]]$subsampled_sig))))) # = 1.40


#########################################
#
#  FIGURES 2 - 5
#
#########################################

# create Figure 2: Bootstrapped p-values versus original p-values (uncomment lines to save the figure in the current working directory)

#graphics.off()
#pdf(file = "NHANES_pvalues.pdf", height = 4, width = 7)

par(mfrow = c(1, 2))

plot(cbind(NHANES_unmodified_p_values$orig_pval, apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median)), xlab = "p-value (original sample)", main = "NHANES data",
     ylab = "Bootstrapped p-value (median)", ylim = c(0, 1), xlim = c(0, 1), pch = 16, cex = 0.5)
abline(c(0, 0), c(1, 1))


plot(cbind(as.numeric(sapply(1:10, function(z) NHANES_10modified_pvalues[[z]]$orig_pval)), 
           as.numeric(sapply(1:10, function(d) NHANES_10modified_pvalues[[d]]$bootstrapped_p_values))), xlab = "p-value (original sample)", main = "NHANES data with\n permuted response",, 
     ylab = "Bootstrapped p-value (median)", ylim = c(0, 1), xlim = c(0, 1), pch = 16, cex = 0.5)
abline(c(0, 0), c(1, 1))

#graphics.off()



# create Figure 3: Relative frequency of bootstrap samples with specified number of significant results when univariately testing the association between CRP level and 28 covariates 
# (uncomment lines to save the figure in the current working directory)

#graphics.off()
#pdf(file = "NHANES_pvalues_no_sig.pdf", height = 4, width = 10)

par(mfrow = c(1, 2))

barplot(table(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 2, function(z)  sum(z < 0.05)))/10000,  
        ylab = "Relative frequency", xlab = "Number of significant associations", 
        ylim = c(0, 0.2), main = "NHANES data", col = rep(c("gray90", "darkgray", "gray90"), c(6, 1, 9)))


barplot(sapply(as.character(0:21), function(a) 
  sum(sapply(1:1000, function(z) as.numeric(NHANES_1000modified_sig[[z]]$bootstrapped_sig[a])), na.rm = TRUE))/
    sum(sapply(1:1000, function(z) sum(NHANES_1000modified_sig[[z]]$bootstrapped_sig))),
  ylab = "Relative frequency", xlab = "Number of significant associations", 
  ylim = c(0, 0.2), main = "NHANES data\n with permuted response", col = rep(c("gray90", "darkgray", "gray90"), c(1, 1, 20)))

#graphics.off()


       

# create Figure 4: Subsampled p-values versus original p-values (uncomment lines to save the figure in the current working directory)

#graphics.off()
#pdf(file = "NHANES_pvalues_subsample.pdf", height = 4, width = 7)

par(mfrow = c(1, 2))

plot(cbind(NHANES_unmodified_p_values$orig_pval, apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median)), xlab = "p-value (original sample)", main = "NHANES data",
     ylab = "Subsampled p-value (median)", ylim = c(0, 1), xlim = c(0, 1), pch = 16, cex = 0.5)
abline(c(0, 0), c(1, 1))

plot(cbind(as.numeric(sapply(1:10, function(z) NHANES_10modified_pvalues[[z]]$orig_pval)), 
           as.numeric(sapply(1:10, function(d) NHANES_10modified_pvalues[[d]]$subsampled_p_values))), xlab = "p-value (original sample)", main = "NHANES data with\n permuted response",, 
     ylab = "Subsampled p-value (median)", ylim = c(0, 1), xlim = c(0, 1), pch = 16, cex = 0.5)
abline(c(0, 0), c(1, 1))

#graphics.off()


# create Figure 5: Relative frequency of subsamples with specified number of significant results when univariately testing the association between CRP level and 28 covariates 
# (uncomment lines to save the figure in the current working directory)

#graphics.off()
#pdf(file = "NHANES_pvalues_no_sig_subsample.pdf", height = 4, width = 10)

par(mfrow = c(1, 2))

barplot(table(apply(NHANES_unmodified_p_values$subsampled_p_values, 2, function(z)  sum(z < 0.05)))/10000,  
        ylab = "Relative frequency", xlab = "Number of significant associations", 
        ylim = c(0, 0.25), main = "NHANES data", col = rep(c("gray90", "darkgray", "gray90"), c(9, 1, 4)))

barplot(sapply(as.character(0:12), function(a) 
  sum(sapply(1:1000, function(z) as.numeric(NHANES_1000modified_sig[[z]]$subsampled_sig[a])), na.rm = TRUE))/
    sum(sapply(1:1000, function(z) sum(NHANES_1000modified_sig[[z]]$subsampled_sig))),
  ylim = c(0, 0.35), ylab = "Relative frequency", xlab = "Number of significant associations", 
  main = "NHANES data\n with permuted response", col = rep(c("gray90", "darkgray", "gray90"), c(1, 1, 11)))

#graphics.off()




#########################################
#
#  FIGURE 6 
#
#########################################


# create Figure 6: Variable ranking by p-values and median bootstrapped p-value (uncomment lines to save the figure in the current working directory)

#graphics.off()
#pdf(file = "NHANES_pval_ranking.pdf", height = 6.5, width = 12)

varscale <- c("m = 2", "m = 5", "m = 4", "m = 5", "m = 6", "metric", "metric", "metric", "m = 5", "m = 2", "m = 4", "m = 5", "m = 5", "m = 5", "m = 2", "m = 2", "m = 2", "m = 2", "m = 2", "m = 2", "metric", "m = 2", "m = 5", "metric", "metric", "metric", "m = 12", "metric")

par(mfrow = c(1, 2), mar = c(5, 12, 2.5, 0.2))

plot(c(min(NHANES_unmodified_p_values$orig_pval), max(NHANES_unmodified_p_values$orig_pval)), c(1, length(NHANES_unmodified_p_values$orig_pval)), type = "n", xlab = "p-value (original sample)", yaxt = "n", ylab = "", main = "Original", xlim = c(0, 0.55))
grid(ny = c(length(NHANES_unmodified_p_values$orig_pval)+1), nx = 0)
points(sort(NHANES_unmodified_p_values$orig_pval), c(length(NHANES_unmodified_p_values$orig_pval):1), cex = 0.5, col = "black", pch = 16)
axis(2, labels = paste(names(sort(NHANES_unmodified_p_values$orig_pval)), " (", 
                       varscale[order(NHANES_unmodified_p_values$orig_pval)], ")", sep = ""), at = length(NHANES_unmodified_p_values$orig_pval):1, las = 2)

apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median)
plot(c(min(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median)), max(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median))), c(1, length(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median))), type = "n", xlab = "Bootstrapped p-value (median)", yaxt = "n", ylab = "", main = "Bootstrap", xlim = c(0, 0.55))
grid(ny = c(length(NHANES_unmodified_p_values$orig_pval)+1), nx = 0)
points(sort(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median)), c(length(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median)):1), cex = 0.5, col = "black", pch = 16)
axis(2, labels = paste(names(sort(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median))), " (", 
                       varscale[order(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median))], ")", sep = ""), at = length(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median)):1, las = 2)

#graphics.off()


#graphics.off()
#pdf(file = "NHANES_pval_ranking_subsample.pdf", height = 6.5, width = 6)

varscale <- c("m = 2", "m = 5", "m = 4", "m = 5", "m = 6", "metric", "metric", "metric", "m = 5", "m = 2", "m = 4", "m = 5", "m = 5", "m = 5", "m = 2", "m = 2", "m = 2", "m = 2", "m = 2", "m = 2", "metric", "m = 2", "m = 5", "metric", "metric", "metric", "m = 12", "metric")

par(mfrow = c(1, 1), mar = c(5, 12, 2.5, 0.2))

apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median)
plot(c(min(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median)), max(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median))), c(1, length(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median))), type = "n", xlab = "Subsampled p-value (median)", yaxt = "n", ylab = "", main = "Subsample", xlim = c(0, 0.55))
grid(ny = c(length(NHANES_unmodified_p_values$orig_pval)+1), nx = 0)
points(sort(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median)), c(length(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median)):1), cex = 0.5, col = "black", pch = 16)
axis(2, labels = paste(names(sort(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median))), " (", 
                       varscale[order(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median))], ")", sep = ""), at = length(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median)):1, las = 2)

#graphics.off()



#########################################
#
#  TABLES 2 and A2
#
#########################################

varscale <- c("m = 2", "m = 5", "m = 4", "m = 5", "m = 6", "metric", "metric", "metric", "m = 5", "m = 2", "m = 4", "m = 5", "m = 5", "m = 5", "m = 2", "m = 2", "m = 2", "m = 2", "m = 2", "m = 2", "metric", "m = 2", "m = 5", "metric", "metric", "metric", "m = 12", "metric")


# compute numbers in Table 2: Variable ranking for the unmodified NHANES data

kat2 <- names(sort(NHANES_unmodified_p_values$orig_pval)[varscale[order(NHANES_unmodified_p_values$orig_pval)] %in% c("m = 2", "metric")])
kat4 <- names(sort(NHANES_unmodified_p_values$orig_pval)[varscale[order(NHANES_unmodified_p_values$orig_pval)]  == "m = 4"])
kat5 <- names(sort(NHANES_unmodified_p_values$orig_pval)[varscale[order(NHANES_unmodified_p_values$orig_pval)]  == "m = 5"])
kat6 <- names(sort(NHANES_unmodified_p_values$orig_pval)[varscale[order(NHANES_unmodified_p_values$orig_pval)]  == "m = 6"])
kat12 <- names(sort(NHANES_unmodified_p_values$orig_pval)[varscale[order(NHANES_unmodified_p_values$orig_pval)] == "m = 12"])

# show part of table for metric and binary variables
(kat2 <- data.frame(
  Original       = sapply(kat2, function(z) which(z == names(NHANES_unmodified_p_values$orig_pval[order(NHANES_unmodified_p_values$orig_pval)]))),
  Bootstrap      = sapply(kat2, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median))))),
  Bootstrap_diff = (sapply(kat2, function(z) which(z == names(NHANES_unmodified_p_values$orig_pval[order(NHANES_unmodified_p_values$orig_pval)])))) - (sapply(kat2, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median)))))),
  Subsample      = sapply(kat2, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median))))),
  Subsample_diff = (sapply(kat2, function(z) which(z == names(NHANES_unmodified_p_values$orig_pval[order(NHANES_unmodified_p_values$orig_pval)])))) - (sapply(kat2, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median)))))))
)

# show part of table for 4-category variables
(kat4 <- data.frame(
  Original       = sapply(kat4, function(z) which(z == names(NHANES_unmodified_p_values$orig_pval[order(NHANES_unmodified_p_values$orig_pval)]))),
  Bootstrap      = sapply(kat4, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median))))),
  Bootstrap_diff = (sapply(kat4, function(z) which(z == names(NHANES_unmodified_p_values$orig_pval[order(NHANES_unmodified_p_values$orig_pval)])))) - (sapply(kat4, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median)))))),
  Subsample      = sapply(kat4, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median))))),
  Subsample_diff = (sapply(kat4, function(z) which(z == names(NHANES_unmodified_p_values$orig_pval[order(NHANES_unmodified_p_values$orig_pval)])))) - (sapply(kat4, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median)))))))
)

# show part of table for 5-category variables
(kat5 <- data.frame(
  Original       = sapply(kat5, function(z) which(z == names(NHANES_unmodified_p_values$orig_pval[order(NHANES_unmodified_p_values$orig_pval)]))),
  Bootstrap      = sapply(kat5, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median))))),
  Bootstrap_diff = (sapply(kat5, function(z) which(z == names(NHANES_unmodified_p_values$orig_pval[order(NHANES_unmodified_p_values$orig_pval)])))) - (sapply(kat5, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median)))))),
  Subsample      = sapply(kat5, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median))))),
  Subsample_diff = (sapply(kat5, function(z) which(z == names(NHANES_unmodified_p_values$orig_pval[order(NHANES_unmodified_p_values$orig_pval)])))) - (sapply(kat5, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median)))))))
)


# show part of table for 6-category variables
(kat6 <- data.frame(
  Original       = sapply(kat6, function(z) which(z == names(NHANES_unmodified_p_values$orig_pval[order(NHANES_unmodified_p_values$orig_pval)]))),
  Bootstrap      = sapply(kat6, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median))))),
  Bootstrap_diff = (sapply(kat6, function(z) which(z == names(NHANES_unmodified_p_values$orig_pval[order(NHANES_unmodified_p_values$orig_pval)])))) - (sapply(kat6, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median)))))),
  Subsample      = sapply(kat6, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median))))),
  Subsample_diff = (sapply(kat6, function(z) which(z == names(NHANES_unmodified_p_values$orig_pval[order(NHANES_unmodified_p_values$orig_pval)])))) - (sapply(kat6, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median)))))))
)


# show part of table for 12-category variables
(kat12 <- data.frame(
  Original       = sapply(kat12, function(z) which(z == names(NHANES_unmodified_p_values$orig_pval[order(NHANES_unmodified_p_values$orig_pval)]))),
  Bootstrap      = sapply(kat12, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median))))),
  Bootstrap_diff = (sapply(kat12, function(z) which(z == names(NHANES_unmodified_p_values$orig_pval[order(NHANES_unmodified_p_values$orig_pval)])))) - (sapply(kat12, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$bootstrapped_p_values, 1, median)))))),
  Subsample      = sapply(kat12, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median))))),
  Subsample_diff = (sapply(kat12, function(z) which(z == names(NHANES_unmodified_p_values$orig_pval[order(NHANES_unmodified_p_values$orig_pval)])))) - (sapply(kat12, function(z) which(z == names(sort(apply(NHANES_unmodified_p_values$subsampled_p_values, 1, median)))))))
)


#library(xtable)
#xtable(rbind(kat2, kat4, kat5, kat6, kat12))




# compute numbers in Table A2: Variable ranking for the first modified NHANES data

i <- 1 # set number to 1, 2, ..., or 10 to report results for the 10th permuted datasets (in the paper we show results for the first one as an example)

kat2 <- rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)][varscale[order(NHANES_10modified_pvalues[[i]]$orig_pval)] %in% c("m = 2", "metric")]
kat4 <- rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)][varscale[order(NHANES_10modified_pvalues[[i]]$orig_pval)] == "m = 4"]
kat5 <- rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)][varscale[order(NHANES_10modified_pvalues[[i]]$orig_pval)] == "m = 5"]
kat6 <- rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)][varscale[order(NHANES_10modified_pvalues[[i]]$orig_pval)] == "m = 6"]
kat12 <- rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)][varscale[order(NHANES_10modified_pvalues[[i]]$orig_pval)] == "m = 12"]

# show part of table for metric and binary variables
(kat2 <- data.frame(
  Original       = sapply(kat2, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)])),
  Bootstrap      = sapply(kat2, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$bootstrapped_p_values)])),
  Bootstrap_diff = (sapply(kat2, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)]))) - (sapply(kat2, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$bootstrapped_p_values)]))),
  Subsample      = sapply(kat2, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$subsampled_p_values)])),
  Subsample_diff = (sapply(kat2, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)]))) - (sapply(kat2, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$subsampled_p_values)]))))
)


# show part of table for 4-category variables
(kat4 <- data.frame(
  Original       = sapply(kat4, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)])),
  Bootstrap      = sapply(kat4, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$bootstrapped_p_values)])),
  Bootstrap_diff = (sapply(kat4, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)]))) - (sapply(kat4, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$bootstrapped_p_values)]))),
  Subsample      = sapply(kat4, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$subsampled_p_values)])),
  Subsample_diff = (sapply(kat4, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)]))) - (sapply(kat4, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$subsampled_p_values)]))))
)


# show part of table for 5-category variables
(kat5 <- data.frame(
  Original       = sapply(kat5, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)])),
  Bootstrap      = sapply(kat5, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$bootstrapped_p_values)])),
  Bootstrap_diff = (sapply(kat5, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)]))) - (sapply(kat5, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$bootstrapped_p_values)]))),
  Subsample      = sapply(kat5, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$subsampled_p_values)])),
  Subsample_diff = (sapply(kat5, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)]))) - (sapply(kat5, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$subsampled_p_values)]))))
)


# show part of table for 6-category variables
(kat6 <- data.frame(
  Original       = sapply(kat6, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)])),
  Bootstrap      = sapply(kat6, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$bootstrapped_p_values)])),
  Bootstrap_diff = (sapply(kat6, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)]))) - (sapply(kat6, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$bootstrapped_p_values)]))),
  Subsample      = sapply(kat6, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$subsampled_p_values)])),
  Subsample_diff = (sapply(kat6, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)]))) - (sapply(kat6, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$subsampled_p_values)]))))
)


# show part of table for 12-category variables
(kat12 <- data.frame(
  Original       = sapply(kat12, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)])),
  Bootstrap      = sapply(kat12, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$bootstrapped_p_values)])),
  Bootstrap_diff = (sapply(kat12, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)]))) - (sapply(kat12, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$bootstrapped_p_values)]))),
  Subsample      = sapply(kat12, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$subsampled_p_values)])),
  Subsample_diff = (sapply(kat12, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$orig_pval)]))) - (sapply(kat12, function(z) which(z == rownames(NHANES_10modified_pvalues[[i]])[order(NHANES_10modified_pvalues[[i]]$subsampled_p_values)]))))
)


#library(xtable)
#xtable(rbind(kat2, kat4, kat5, kat6, kat12))



