mtext("Null Case Study", outer = TRUE, cex = 1.5, line = -1)
OOB_nnoise10_10_10[[1]]
sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$varimp.ER)
unlist(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$varimp.ER))
as.numeric(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$varimp.ER))
boxplot(as.numeric(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$varimp.ER)),
as.numeric(sapply(1:length(OOB_nnoise10_100_100), function(z) OOB_nnoise10_100_100[[z]]$varimp.ER)),
ylab = "OOB error", main = "p = 10", names = c("n = 20", "n = 200"))
abline(h = 0, lty = 2)
boxplot(rowMeans(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$varimp.ER)),
rowMeans(sapply(1:length(OOB_nnoise10_100_100), function(z) OOB_nnoise10_100_100[[z]]$varimp.ER)),
ylab = "OOB error", main = "p = 10", names = c("n = 20", "n = 200"))
rowMeans(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$varimp.ER))
barplot(rowMeans(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$varimp.ER)))
barplot(rowMeans(sapply(1:length(OOB_nnoise100_10_10), function(z) OOB_nnoise100_10_10[[z]]$varimp.ER)))
barplot(rowMeans(sapply(1:length(OOB_nnoise1000_10_10), function(z) OOB_nnoise1000_10_10[[z]]$varimp.ER)))
boxplot(rowMeans(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_1000_10[[z]]$varimp.ER)),
rowMeans(sapply(1:length(OOB_nnoise10_100_100), function(z) OOB_nnoise1000_100_100[[z]]$varimp.ER)),
ylab = "OOB error", main = "p = 10", names = c("n = 20", "n = 200"))
boxplot(rowMeans(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise1000_10_10[[z]]$varimp.ER)),
rowMeans(sapply(1:length(OOB_nnoise10_100_100), function(z) OOB_nnoise1000_100_100[[z]]$varimp.ER)),
ylab = "OOB error", main = "p = 10", names = c("n = 20", "n = 200"))
boxplot(as.numeric(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise1000_10_10[[z]]$varimp.ER)),
as.numeric(sapply(1:length(OOB_nnoise10_100_100), function(z) OOB_nnoise1000_100_100[[z]]$varimp.ER)),
ylab = "OOB error", main = "p = 10", names = c("n = 20", "n = 200"))
OOB_nnoise10_10_10[[1]]
barplot(as.numeric(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$varimp.class[,1])),
as.numeric(sapply(1:length(OOB_nnoise10_100_100), function(z) OOB_nnoise10_100_100[[z]]$varimp.class[,2]))
)
sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$varimp.class[,1])
barplot(rowMeans(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$varimp.class[,1])),
rowMeans(sapply(1:length(OOB_nnoise10_100_100), function(z) OOB_nnoise10_100_100[[z]]$varimp.class[,2])))
rowMeans(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$varimp.class[,1]))
barplot(c(rowMeans(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$varimp.class[,1])),
rowMeans(sapply(1:length(OOB_nnoise10_100_100), function(z) OOB_nnoise10_100_100[[z]]$varimp.class[,2]))))
load("Z:/OOBpower_nnoise10_10_10.Rda")
rowMeans(sapply(1:length(OOBpower_nnoise10_10_10), function(z) OOBpower_nnoise10_10_10[[z]]$varimp.class[,2]))))
rowMeans(sapply(1:length(OOBpower_nnoise10_10_10), function(z) OOBpower_nnoise10_10_10[[z]]$varimp.class[,2]))
load("Z:/OOBpower_nnoise10_100_100.Rda")
load("Z:/OOBpower_nnoise10_10_10.Rda")
par(mfrow = c(1,2), oma = c(0, 0, 1, 0))
boxplot(sapply(1:length(OOBpower_nnoise10_10_10), function(z) OOBpower_nnoise10_10_10[[z]]$OOB.ER),
sapply(1:length(OOBpower_nnoise10_100_100), function(z) OOBpower_nnoise10_100_100[[z]]$OOB.ER),
ylab = "OOB error", main = "p = 10", names = c("n = 20", "n = 200"))
abline(h = 0.5, lty = 2)
boxplot(sapply(1:length(OOBpower_nnoise1000_10_10), function(z) OOBpower_nnoise1000_10_10[[z]]$OOB.ER),
sapply(1:length(OOBpower_nnoise1000_100_100), function(z) OOBpower_nnoise1000_100_100[[z]]$OOB.ER),
ylab = "OOB error", main = "p = 1000", names = c("n = 20", "n = 200"))
abline(h = 0.5, lty = 2)
mtext("Power Case Study", outer = TRUE, cex = 1.5, line = -1)
# oob error
par(mfrow = c(1,2), oma = c(0, 0, 1, 0))
boxplot(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$OOB.ER),
sapply(1:length(OOB_nnoise10_100_100), function(z) OOB_nnoise10_100_100[[z]]$OOB.ER),
ylab = "OOB error", main = "p = 10", names = c("n = 20", "n = 200"))
abline(h = 0.5, lty = 2)
boxplot(sapply(1:length(OOBpower_nnoise10_10_10), function(z) OOBpower_nnoise10_10_10[[z]]$OOB.ER),
sapply(1:length(OOBpower_nnoise10_100_100), function(z) OOBpower_nnoise10_100_100[[z]]$OOB.ER),
ylab = "OOB error", main = "p = 10", names = c("n = 20", "n = 200"))
abline(h = 0.5, lty = 2)
barplot(c(rowMeans(sapply(1:length(OOBpower_nnoise10_10_10), function(z) OOBpower_nnoise10_10_10[[z]]$varimp.class[,1])),
rowMeans(sapply(1:length(OOBpower_nnoise10_100_100), function(z) OOBpower_nnoise10_100_100[[z]]$varimp.class[,2]))))
barplot(c(rowMeans(sapply(1:length(OOBpower_nnoise10_10_10), function(z) OOBpower_nnoise10_10_10[[z]]$varimp.class[,1])),
rowMeans(sapply(1:length(OOBpower_nnoise10_10_10), function(z) OOBpower_nnoise10_10_10[[z]]$varimp.class[,2]))))
barplot(c(rowMeans(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$varimp.class[,1])),
rowMeans(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$varimp.class[,2]))))
barplot(c(rowMeans(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$varimp.class[,1])),
rowMeans(sapply(1:length(OOB_nnoise10_10_10), function(z) OOB_nnoise10_10_10[[z]]$varimp.class[,2]))))
barplot(c(rowMeans(sapply(1:length(OOBpower_nnoise10_10_10), function(z) OOBpower_nnoise10_10_10[[z]]$varimp.class[,1])),
rowMeans(sapply(1:length(OOBpower_nnoise10_10_10), function(z) OOBpower_nnoise10_10_10[[z]]$varimp.class[,2]))))
barplot(c(rowMeans(sapply(1:length(OOBpower_nnoise100_10_10), function(z) OOBpower_nnoise10_10_10[[z]]$varimp.class[,1])),
rowMeans(sapply(1:length(OOBpower_nnoise100_10_10), function(z) OOBpower_nnoise10_10_10[[z]]$varimp.class[,2]))))
barplot(c(rowMeans(sapply(1:length(OOBpower_nnoise1000_10_10), function(z) OOBpower_nnoise10_10_10[[z]]$varimp.class[,1])),
rowMeans(sapply(1:length(OOBpower_nnoise1000_10_10), function(z) OOBpower_nnoise10_10_10[[z]]$varimp.class[,2]))))
load("Z:/OOBpower_nnoise1000_10_10.Rda")
barplot(c(rowMeans(sapply(1:length(OOBpower_nnoise1000_10_10), function(z) OOBpower_nnoise1000_10_10[[z]]$varimp.class[,1])),
rowMeans(sapply(1:length(OOBpower_nnoise1000_10_10), function(z) OOBpower_nnoise1000_10_10[[z]]$varimp.class[,2]))))
barplot(c(rowMeans(sapply(1:length(OOBpower_nnoise1000_10_10), function(z) OOBpower_nnoise1000_10_10[[z]]$varimp.class[,1]))[1:10],
rowMeans(sapply(1:length(OOBpower_nnoise1000_10_10), function(z) OOBpower_nnoise1000_10_10[[z]]$varimp.class[,2]))[1:10]))
nobs1 <- nobs2 <- 10
nnoise = 10
noise <- matrix(rnorm(nnoise * (sum(c(nobs1, nobs2)))), ncol = nnoise)
traindata <- data.frame(y = as.factor(c(rep(1, nobs1), rep(2, nobs2))),
x = noise)
noise <- matrix(rnorm(nnoise * (sum(c(5000, 5000)))), ncol = nnoise)
testdata <- data.frame(y = as.factor(c(rep(1, 5000), rep(2, 5000))),
x = noise)
xdata = traindata
get_VIMs <- function(xdata, mtry){
# Grow random forest
forest <- randomForest(y ~ ., data = xdata,
mtry         = mtry,
replace      = FALSE,
ntree        = 1000)
# Calculate preds
pred <- predict(forest)
return(mean(pred != xdata$y))
}
mtrygrid <- c(1, 5, 10, 50, 100, 150, 200, 250, 300)
res <- sapply(mtrygrid, function(z) get_VIMs(xdata = traindata, mtry = z))
library(randomForest)
get_VIMs <- function(xdata, mtry){
# Grow random forest
forest <- randomForest(y ~ ., data = xdata,
mtry         = mtry,
replace      = FALSE,
ntree        = 1000)
# Calculate preds
pred <- predict(forest)
return(mean(pred != xdata$y))
}
mtrygrid <- c(1, 5, 10, 50, 100, 150, 200, 250, 300)
res <- sapply(mtrygrid, function(z) get_VIMs(xdata = traindata, mtry = z))
mtrygrid <- list(1, 5, 10, 50, 100, 150, 200, 250, 300)
res <- sapply(mtrygrid, function(z) get_VIMs(xdata = traindata, mtry = z))
mtrygrid
res <- sapply(mtrygrid, function(z) get_VIMs(xdata = traindata, mtry = z))
mtry <- 1
forest <- randomForest(y ~ ., data = xdata,
mtry         = mtry,
replace      = FALSE,
ntree        = 1000)
pred <- predict(forest)
mean(pred != xdata$y)
get_VIMs <- function(xdata, mtry){
# Grow random forest
forest <- randomForest(y ~ ., data = xdata,
mtry         = mtry,
replace      = FALSE,
ntree        = 1000)
# Calculate preds
pred <- predict(forest)
return(mean(pred != xdata$y))
}
##################################################################
# Call function get_VIMs() for each dataset                      #
##################################################################
mtrygrid <- list(1, 5, 10, 50, 100, 150, 200, 250, 300)
res <- sapply(mtrygrid, function(z) get_VIMs(xdata = traindata, mtry = z))
res
nnoise = 1000
mtrygrid <- list(1, 5, 10, 50, 100, 150, 200, 250, 300)
res <- sapply(mtrygrid, function(z) get_VIMs(xdata = traindata, mtry = z))
# Create predictors (5 of each effect strength and nnoise noise pred.)
noise <- matrix(rnorm(nnoise * (sum(c(nobs1, nobs2)))), ncol = nnoise)
traindata <- data.frame(y = as.factor(c(rep(1, nobs1), rep(2, nobs2))),
x = noise)
noise <- matrix(rnorm(nnoise * (sum(c(5000, 5000)))), ncol = nnoise)
testdata <- data.frame(y = as.factor(c(rep(1, 5000), rep(2, 5000))),
x = noise)
##################################################################
# Function for calculating VIs for a given dataset               #
##################################################################
get_VIMs <- function(xdata, mtry){
# Grow random forest
forest <- randomForest(y ~ ., data = xdata,
mtry         = mtry,
replace      = FALSE,
ntree        = 1000)
# Calculate preds
pred <- predict(forest)
return(mean(pred != xdata$y))
}
##################################################################
# Call function get_VIMs() for each dataset                      #
##################################################################
mtrygrid <- list(1, 5, 10, 50, 100, 150, 200, 250, 300)
res <- sapply(mtrygrid, function(z) get_VIMs(xdata = traindata, mtry = z))
# Calculate preds
pred.OOB <- predict(forest)
pred.testdata <- predict(forest, newdata = testdata)
get_VIMs <- function(xdata, mtry){
# Grow random forest
forest <- randomForest(y ~ ., data = xdata,
mtry         = mtry,
replace      = FALSE,
ntree        = 1000)
# Calculate preds
pred.OOB <- predict(forest)
pred.testdata <- predict(forest, newdata = testdata)
return(c(mean(pred.OOB != xdata$y), mean(pred.testdata != xdata$y)))
}
res
mtrygrid <- list(1, 5, 10, 50, 100, 150, 200, 250, 300)
res <- sapply(mtrygrid, function(z) get_VIMs(xdata = traindata, mtry = z))
res
plot(res[,1])
plot(res)
plot(1:9, res[1,])
plot(1:9, res[2,])
res <- lapply(1:100, function(z) null_case_study(seed = z, nnoise = 10, nobs1 = 50, nobs2 = 50))
null_case_study <- function(seed, nnoise = 1000, nobs1 = 100, nobs2 = 100){
set.seed(seed)
# Create predictors (5 of each effect strength and nnoise noise pred.)
noise <- matrix(rnorm(nnoise * (sum(c(nobs1, nobs2)))), ncol = nnoise)
traindata <- data.frame(y = as.factor(c(rep(1, nobs1), rep(2, nobs2))),
x = noise)
noise <- matrix(rnorm(nnoise * (sum(c(5000, 5000)))), ncol = nnoise)
testdata <- data.frame(y = as.factor(c(rep(1, 5000), rep(2, 5000))),
x = noise)
##################################################################
# Function for calculating VIs for a given dataset               #
##################################################################
get_VIMs <- function(xdata, mtry){
# Grow random forest
forest <- randomForest(y ~ ., data = xdata,
mtry         = mtry,
replace      = FALSE,
ntree        = 1000)
# Calculate preds
pred.OOB <- predict(forest)
pred.testdata <- predict(forest, newdata = testdata)
return(c(mean(pred.OOB != xdata$y), mean(pred.testdata != xdata$y)))
}
##################################################################
# Call function get_VIMs() for each dataset                      #
##################################################################
mtrygrid <- list(1, 5, 10, 50, 100, 150, 200, 250, 300)
res <- sapply(mtrygrid, function(z) get_VIMs(xdata = traindata, mtry = z))
return(res)  # return list
}
res <- lapply(1:100, function(z) null_case_study(seed = z, nnoise = 10, nobs1 = 50, nobs2 = 50))
warnings()
null_case_study <- function(seed, nnoise = 1000, nobs1 = 100, nobs2 = 100){
set.seed(seed)
# Create predictors (5 of each effect strength and nnoise noise pred.)
noise <- matrix(rnorm(nnoise * (sum(c(nobs1, nobs2)))), ncol = nnoise)
traindata <- data.frame(y = as.factor(c(rep(1, nobs1), rep(2, nobs2))),
x = noise)
noise <- matrix(rnorm(nnoise * (sum(c(5000, 5000)))), ncol = nnoise)
testdata <- data.frame(y = as.factor(c(rep(1, 5000), rep(2, 5000))),
x = noise)
##################################################################
# Function for calculating VIs for a given dataset               #
##################################################################
get_VIMs <- function(xdata, mtry){
# Grow random forest
forest <- randomForest(y ~ ., data = xdata,
mtry         = mtry,
replace      = FALSE,
ntree        = 1000)
# Calculate preds
pred.OOB <- predict(forest)
pred.testdata <- predict(forest, newdata = testdata)
return(c(mean(pred.OOB != xdata$y), mean(pred.testdata != xdata$y)))
}
##################################################################
# Call function get_VIMs() for each dataset                      #
##################################################################
mtrygrid <- list(1, 5, 10, 50, 100, 150, 200, 250, 300)
res <- sapply(mtrygrid, function(z) get_VIMs(xdata = traindata, mtry = z))
return(res)  # return list
}
res <- lapply(1:100, function(z) null_case_study(seed = z, nnoise = 1000, nobs1 = 50, nobs2 = 50))
res <- lapply(1:100, function(z) null_case_study(seed = z, nnoise = 1000, nobs1 = 10, nobs2 = 10))
2^26
setwd("Z:/cmmgrp/Silke/Bootstrap_SJ_HB_ALB/reproducible_files/R_Objects/")
load("NHANES_AIC.Rda")
# inspect the covariate names and attribute corresponding number of parameters that need to be estimated
names(NHANES_AIC[[1]])
varscale <- c("k = 1", "k = 4", "k = 3", "k = 4", "k = 5", "k = 1", "k = 1", "k = 1", "k = 4", "k = 1", "k = 3", "k = 4", "k = 4", "k = 4", "k = 1", "k = 1", "k = 1", "k = 1", "k = 1", "k = 1", "k = 1", "k = 1", "k = 4", "k = 1", "k = 1", "k = 1", "k = 11", "k = 1")
par(mfrow = c(1, 2), mar = c(5, 12, 0.5, 0.2))
plot(c(min(NHANES_AIC$orig_AIC), max(NHANES_AIC$orig_AIC)), c(1, length(NHANES_AIC$orig_AIC)), type="n", xlab="AIC (original sample)", yaxt="n", ylab = "")
grid(ny = c(length(NHANES_AIC$orig_AIC)+1), nx = 0)
points(sort(NHANES_AIC$orig_AIC), c(length(NHANES_AIC$orig_AIC):1), cex = 0.5, col = "black", pch = 16)
axis(2, labels = paste(names(sort(NHANES_AIC$orig_AIC)), " (", varscale[order(NHANES_AIC$orig_AIC)], ")", sep = ""), at = length(NHANES_AIC$orig_AIC):1, las = 2)
plot(c(min(rowMeans(NHANES_AIC$bootstrapped_AIC)), max(rowMeans(NHANES_AIC$bootstrapped_AIC))), c(1, length(rowMeans(NHANES_AIC$bootstrapped_AIC))),
type = "n", xlab = "Bootstrapped AIC (averaged value)", yaxt = "n", ylab = "")
grid(ny = c(length(NHANES_AIC$orig_AIC)+1), nx = 0)
points(sort(rowMeans(NHANES_AIC$bootstrapped_AIC)), c(length(rowMeans(NHANES_AIC$bootstrapped_AIC)):1), cex = 0.5, col = "black", pch = 16)
axis(2, labels = paste(names(sort(rowMeans(NHANES_AIC$bootstrapped_AIC))), " (", varscale[order(rowMeans(NHANES_AIC$bootstrapped_AIC))], ")", sep = ""), at = length(rowMeans(NHANES_AIC$bootstrapped_AIC)):1, las = 2)
par(mfrow = c(1, 1), mar = c(5, 12, 0.5, 0.2))
plot(c(min(rowMeans(NHANES_AIC$subsampled_AIC)), max(rowMeans(NHANES_AIC$subsampled_AIC))), c(1, length(rowMeans(NHANES_AIC$subsampled_AIC))),
type = "n", xlab = "Bootstrapped AIC (averaged value)", yaxt = "n", ylab = "")
grid(ny = c(length(NHANES_AIC$orig_AIC)+1), nx = 0)
points(sort(rowMeans(NHANES_AIC$subsampled_AIC)), c(length(rowMeans(NHANES_AIC$subsampled_AIC)):1), cex = 0.5, col = "black", pch = 16)
axis(2, labels = paste(names(sort(rowMeans(NHANES_AIC$subsampled_AIC))), " (", varscale[order(rowMeans(NHANES_AIC$subsampled_AIC))], ")", sep = ""), at = length(rowMeans(NHANES_AIC$subsampled_AIC)):1, las = 2)
graphics.off()
pdf(file = "NHANES_AIC_ranking_subsample.pdf", height = 6, width = 6)
par(mfrow = c(1, 1), mar = c(5, 12, 0.5, 0.2))
plot(c(min(rowMeans(NHANES_AIC$subsampled_AIC)), max(rowMeans(NHANES_AIC$subsampled_AIC))), c(1, length(rowMeans(NHANES_AIC$subsampled_AIC))),
type = "n", xlab = "Bootstrapped AIC (averaged value)", yaxt = "n", ylab = "")
grid(ny = c(length(NHANES_AIC$orig_AIC)+1), nx = 0)
points(sort(rowMeans(NHANES_AIC$subsampled_AIC)), c(length(rowMeans(NHANES_AIC$subsampled_AIC)):1), cex = 0.5, col = "black", pch = 16)
axis(2, labels = paste(names(sort(rowMeans(NHANES_AIC$subsampled_AIC))), " (", varscale[order(rowMeans(NHANES_AIC$subsampled_AIC))], ")", sep = ""), at = length(rowMeans(NHANES_AIC$subsampled_AIC)):1, las = 2)
graphics.off()
graphics.off()
pdf(file = "NHANES_AIC_ranking_subsample.pdf", height = 6, width = 6)
par(mfrow = c(1, 1), mar = c(5, 12, 0.5, 0.2))
plot(c(min(rowMeans(NHANES_AIC$subsampled_AIC)), max(rowMeans(NHANES_AIC$subsampled_AIC))), c(1, length(rowMeans(NHANES_AIC$subsampled_AIC))),
type = "n", xlab = "Subsampled AIC (averaged value)", yaxt = "n", ylab = "")
grid(ny = c(length(NHANES_AIC$orig_AIC)+1), nx = 0)
points(sort(rowMeans(NHANES_AIC$subsampled_AIC)), c(length(rowMeans(NHANES_AIC$subsampled_AIC)):1), cex = 0.5, col = "black", pch = 16)
axis(2, labels = paste(names(sort(rowMeans(NHANES_AIC$subsampled_AIC))), " (", varscale[order(rowMeans(NHANES_AIC$subsampled_AIC))], ")", sep = ""), at = length(rowMeans(NHANES_AIC$subsampled_AIC)):1, las = 2)
graphics.off()
varscale
kat2 <- names(sort(NHANES_AIC$orig_AIC)[varscale[order(NHANES_AIC$orig_AIC)] == "k = 1"])
kat4 <- names(sort(NHANES_AIC$orig_AIC)[varscale[order(NHANES_AIC$orig_AIC)]  == "k = 3"])
kat5 <- names(sort(NHANES_AIC$orig_AIC)[varscale[order(NHANES_AIC$orig_AIC)]  == "k = 4"])
kat6 <- names(sort(NHANES_AIC$orig_AIC)[varscale[order(NHANES_AIC$orig_AIC)]  == "k = 5"])
kat12 <- names(sort(NHANES_AIC$orig_AIC)[varscale[order(NHANES_AIC$orig_AIC)] == "k = 11"])
# show part of table for metric and binary variables
(kat2 <- data.frame(
Original       = sapply(kat2, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)]))),
Bootstrap      = sapply(kat2, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean))))),
Bootstrap_diff = (sapply(kat2, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat2, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean)))))),
Subsample      = sapply(kat2, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean))))),
Subsample_diff = (sapply(kat2, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat2, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean)))))))
)
# show part of table for 4-category variables
(kat4 <- data.frame(
Original       = sapply(kat4, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)]))),
Bootstrap      = sapply(kat4, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean))))),
Bootstrap_diff = (sapply(kat4, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat4, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean)))))),
Subsample      = sapply(kat4, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean))))),
Subsample_diff = (sapply(kat4, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat4, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean)))))))
)
# show part of table for 4-category variables
(kat5 <- data.frame(
Original       = sapply(kat5, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)]))),
Bootstrap      = sapply(kat5, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean))))),
Bootstrap_diff = (sapply(kat5, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat5, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean)))))),
Subsample      = sapply(kat5, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean))))),
Subsample_diff = (sapply(kat5, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat5, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean)))))))
)
# show part of table for 4-category variables
(kat6 <- data.frame(
Original       = sapply(kat6, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)]))),
Bootstrap      = sapply(kat6, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean))))),
Bootstrap_diff = (sapply(kat6, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat6, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean)))))),
Subsample      = sapply(kat6, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean))))),
Subsample_diff = (sapply(kat6, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat6, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean)))))))
)
# show part of table for 4-category variables
(kat12 <- data.frame(
Original       = sapply(kat12, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)]))),
Bootstrap      = sapply(kat12, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean))))),
Bootstrap_diff = (sapply(kat12, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat12, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean)))))),
Subsample      = sapply(kat12, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean))))),
Subsample_diff = (sapply(kat12, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat12, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean)))))))
)
library(xtable)
xtable(rbind(kat2, kat4, kat5, kat6, kat12))
par(mfrow = c(1, 1), mar = c(5, 12, 0.5, 0.2))
plot(c(min(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$bootstrapped_AIC)), max(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$bootstrapped_AIC))), c(1, length(NHANES_AIC$orig_AIC)),
type = "n", xlab = "Difference in AIC (original sample) and averaged bootstrapped AIC", yaxt = "n", ylab = "")
for(i in 1:length(NHANES_AIC$orig_AIC)){
points(c(0, sort(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$bootstrapped_AIC))[i]), c(i, i), type = "l")
}
axis(2, labels = paste(names(sort(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$bootstrapped_AIC))), " (",
varscale[order(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$bootstrapped_AIC))], ")", sep = ""), at = 1:length(NHANES_AIC$orig_AIC), las = 2)
par(mfrow = c(1, 1), mar = c(5, 12, 0.5, 0.2))
plot(c(min(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC)), max(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))), c(1, length(NHANES_AIC$orig_AIC)),
type = "n", xlab = "Difference in AIC (original sample) and averaged bootstrapped AIC", yaxt = "n", ylab = "")
for(i in 1:length(NHANES_AIC$orig_AIC)){
points(c(0, sort(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))[i]), c(i, i), type = "l")
}
axis(2, labels = paste(names(sort(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))), " (",
varscale[order(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))], ")", sep = ""), at = 1:length(NHANES_AIC$orig_AIC), las = 2)
load("cmmgrp/Silke/Bootstrap_SJ_HB_ALB/reproducible_files/NHANES_data/NHANES.Rda")
load("Z:/cmmgrp/Silke/Bootstrap_SJ_HB_ALB/reproducible_files/NHANES_data/NHANES.Rda")
boxplot(data$BMI)
boxplot(data$WBCcount)
table(data$sleepTrouble)
table(data$race)
table(data$ToothCond)
table(data$income)
boxplot(data$Cholesterol)
boxplot(data$Cholesterol)
boxplot(data$BMI)
par(mfrow = c(1, 1), mar = c(5, 12, 0.5, 0.2))
plot(c(min(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC)), max(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))), c(1, length(NHANES_AIC$orig_AIC)),
type = "n", xlab = "Difference in AIC (original sample) and averaged bootstrapped AIC", yaxt = "n", ylab = "")
for(i in 1:length(NHANES_AIC$orig_AIC)){
points(c(0, sort(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))[i]), c(i, i), type = "l")
}
axis(2, labels = paste(names(sort(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))), " (",
varscale[order(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))], ")", sep = ""), at = 1:length(NHANES_AIC$orig_AIC), las = 2)
graphics.off()
pdf(file = "NHANES_AIC_Bias_subsample.pdf", height = 6, width = 8.5)
par(mfrow = c(1, 1), mar = c(5, 12, 0.5, 0.2))
plot(c(min(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC)), max(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))), c(1, length(NHANES_AIC$orig_AIC)),
type = "n", xlab = "Difference in AIC (original sample) and averaged subsampled AIC", yaxt = "n", ylab = "")
for(i in 1:length(NHANES_AIC$orig_AIC)){
points(c(0, sort(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))[i]), c(i, i), type = "l")
}
axis(2, labels = paste(names(sort(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))), " (",
varscale[order(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))], ")", sep = ""), at = 1:length(NHANES_AIC$orig_AIC), las = 2)
graphics.off()
#graphics.off()
#pdf(file = "NHANES_AIC_Bias_subsample.pdf", height = 6, width = 8.5)
par(mfrow = c(1, 1), mar = c(5, 12, 0.5, 0.2))
plot(c(min(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC)), max(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))), c(1, length(NHANES_AIC$orig_AIC)),
type = "n", xlab = "Difference in AIC (original sample) and averaged subsampled AIC", yaxt = "n", ylab = "")
for(i in 1:length(NHANES_AIC$orig_AIC)){
points(c(0, sort(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))[i]), c(i, i), type = "l")
}
axis(2, labels = paste(names(sort(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))), " (",
varscale[order(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))], ")", sep = ""), at = 1:length(NHANES_AIC$orig_AIC), las = 2)
#graphics.off()
varscale <- c("m = 2", "m = 5", "m = 4", "m = 5", "m = 6", "metric", "metric", "metric", "m = 5", "m = 2", "m = 4", "m = 5", "m = 5", "m = 5", "m = 2", "m = 2", "m = 2", "m = 2", "m = 2", "m = 2", "metric", "m = 2", "m = 5", "metric", "metric", "metric", "m = 12", "metric")
#graphics.off()
#pdf(file = "NHANES_AIC_Bias_subsample.pdf", height = 6, width = 8.5)
par(mfrow = c(1, 1), mar = c(5, 12, 0.5, 0.2))
plot(c(min(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC)), max(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))), c(1, length(NHANES_AIC$orig_AIC)),
type = "n", xlab = "Difference in AIC (original sample) and averaged subsampled AIC", yaxt = "n", ylab = "")
for(i in 1:length(NHANES_AIC$orig_AIC)){
points(c(0, sort(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))[i]), c(i, i), type = "l")
}
axis(2, labels = paste(names(sort(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))), " (",
varscale[order(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))], ")", sep = ""), at = 1:length(NHANES_AIC$orig_AIC), las = 2)
#graphics.off()
plot( NHANES_AIC$orig_AIC, NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))
plot( NHANES_AIC$orig_AIC, NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$bootstrapped_AIC))
par(mfrow = c(1, 2))
plot(NHANES_AIC$orig_AIC, NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$bootstrapped_AIC), xlab = "AIC (original sample)", ylab = "Difference in AIC (original sample) and averaged bootstrapped AIC", main = "Bootstrap")
plot(NHANES_AIC$orig_AIC, NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC), xlab = "AIC (original sample)", ylab = "Difference in AIC (original sample) and averaged subsampled AIC", main = "Subsample")
par(mfrow = c(1, 2))
plot(NHANES_AIC$orig_AIC, NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$bootstrapped_AIC), xlab = "AIC (original sample)", ylab = "Difference in AIC (original sample) and averaged bootstrapped AIC", main = "Bootstrap")
plot(NHANES_AIC$orig_AIC, NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC), xlab = "AIC (original sample)", ylab = "Difference in AIC (original sample) and averaged subsampled AIC", main = "Subsample")
plot(NHANES_AIC$orig_AIC, NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$bootstrapped_AIC), xlab = "AIC (original sample)", ylab = "Difference in AIC (original sample) and averaged bootstrapped AIC", main = "Bootstrap", pch = 16)
plot(NHANES_AIC$orig_AIC, NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC), xlab = "AIC (original sample)", ylab = "Difference in AIC (original sample) and averaged subsampled AIC", main = "Subsample", pch = 16)
graphics.off()
pdf(file = "NHANES_AICs.pdf", height = 4, width = 7)
par(mfrow = c(1, 2))
plot(NHANES_AIC$orig_AIC, NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$bootstrapped_AIC), xlab = "AIC (original sample)", ylab = "Difference in AIC (original sample) and averaged bootstrapped AIC", main = "Bootstrap", pch = 16)
plot(NHANES_AIC$orig_AIC, NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC), xlab = "AIC (original sample)", ylab = "Difference in AIC (original sample) and averaged subsampled AIC", main = "Subsample", pch = 16)
graphics.off()
graphics.off()
pdf(file = "NHANES_AICs.pdf", height = 4, width = 7)
par(mfrow = c(1, 2))
plot(NHANES_AIC$orig_AIC, NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$bootstrapped_AIC), xlab = "AIC (original sample)", ylab = "Difference in AICs", main = "Bootstrap", pch = 16, cex = 0.5)
plot(NHANES_AIC$orig_AIC, NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC), xlab = "AIC (original sample)", ylab = "Difference in AICs", main = "Subsample", pch = 16, cex = 0.5)
graphics.off()
graphics.off()
pdf(file = "NHANES_AICs.pdf", height = 4, width = 7)
par(mfrow = c(1, 2))
plot(NHANES_AIC$orig_AIC, NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$bootstrapped_AIC), xlab = "AIC (original sample)", ylab = "Difference in AIC (original - bootstrap)", main = "Bootstrap", pch = 16, cex = 0.5)
plot(NHANES_AIC$orig_AIC, NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC), xlab = "AIC (original sample)", ylab = "Difference in AIC (original - subsample)", main = "Subsample", pch = 16, cex = 0.5)
graphics.off()
par(mfrow = c(1, 1), mar = c(5, 12, 0.5, 0.2))
plot(c(min(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC)), max(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))), c(1, length(NHANES_AIC$orig_AIC)),
type = "n", xlab = "Difference in AIC (original sample) and averaged subsampled AIC", yaxt = "n", ylab = "")
for(i in 1:length(NHANES_AIC$orig_AIC)){
points(c(0, sort(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))[i]), c(i, i), type = "l")
}
axis(2, labels = paste(names(sort(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))), " (",
varscale[order(NHANES_AIC$orig_AIC-rowMeans(NHANES_AIC$subsampled_AIC))], ")", sep = ""), at = 1:length(NHANES_AIC$orig_AIC), las = 2)
log(2 * pi)
varnames <- names(data)[names(data) != "CRP"] # obtain the names of all covariates
z <- varnames[1]
z
index <- 1:nrow(data)
lm(data$CRP[index] ~ data[index,z])
?lm
logLik(lm(data$CRP[index] ~ data[index,z]))
index <- sample(1:nrow(data), size = 0.632*nrow(data))
logLik(lm(data$CRP[index] ~ data[index,z]))
index <- sample(1:nrow(data), size = 0.632*nrow(data))
logLik(lm(data$CRP[index] ~ data[index,z]))
index <- sample(1:nrow(data), size = 0.632*nrow(data))
logLik(lm(data$CRP[index] ~ data[index,z]))
index <- sample(1:nrow(data), size = 0.9*nrow(data))
logLik(lm(data$CRP[index] ~ data[index,z]))
varscale <- c("k = 1", "k = 4", "k = 3", "k = 4", "k = 5", "k = 1", "k = 1", "k = 1", "k = 4", "k = 1", "k = 3", "k = 4", "k = 4", "k = 4", "k = 1", "k = 1", "k = 1", "k = 1", "k = 1", "k = 1", "k = 1", "k = 1", "k = 4", "k = 1", "k = 1", "k = 1", "k = 11", "k = 1")
kat2 <- names(sort(NHANES_AIC$orig_AIC)[varscale[order(NHANES_AIC$orig_AIC)] == "k = 1"])
kat4 <- names(sort(NHANES_AIC$orig_AIC)[varscale[order(NHANES_AIC$orig_AIC)]  == "k = 3"])
kat5 <- names(sort(NHANES_AIC$orig_AIC)[varscale[order(NHANES_AIC$orig_AIC)]  == "k = 4"])
kat6 <- names(sort(NHANES_AIC$orig_AIC)[varscale[order(NHANES_AIC$orig_AIC)]  == "k = 5"])
kat12 <- names(sort(NHANES_AIC$orig_AIC)[varscale[order(NHANES_AIC$orig_AIC)] == "k = 11"])
# show part of table for metric and binary variables
(kat2 <- data.frame(
Original       = sapply(kat2, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)]))),
Bootstrap      = sapply(kat2, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean))))),
Bootstrap_diff = (sapply(kat2, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat2, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean)))))),
Subsample      = sapply(kat2, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean))))),
Subsample_diff = (sapply(kat2, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat2, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean)))))))
)
# show part of table for 4-category variables
(kat4 <- data.frame(
Original       = sapply(kat4, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)]))),
Bootstrap      = sapply(kat4, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean))))),
Bootstrap_diff = (sapply(kat4, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat4, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean)))))),
Subsample      = sapply(kat4, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean))))),
Subsample_diff = (sapply(kat4, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat4, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean)))))))
)
# show part of table for 4-category variables
(kat5 <- data.frame(
Original       = sapply(kat5, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)]))),
Bootstrap      = sapply(kat5, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean))))),
Bootstrap_diff = (sapply(kat5, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat5, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean)))))),
Subsample      = sapply(kat5, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean))))),
Subsample_diff = (sapply(kat5, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat5, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean)))))))
)
# show part of table for 4-category variables
(kat6 <- data.frame(
Original       = sapply(kat6, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)]))),
Bootstrap      = sapply(kat6, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean))))),
Bootstrap_diff = (sapply(kat6, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat6, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean)))))),
Subsample      = sapply(kat6, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean))))),
Subsample_diff = (sapply(kat6, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat6, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean)))))))
)
# show part of table for 4-category variables
(kat12 <- data.frame(
Original       = sapply(kat12, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)]))),
Bootstrap      = sapply(kat12, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean))))),
Bootstrap_diff = (sapply(kat12, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat12, function(z) which(z == names(sort(apply(NHANES_AIC$bootstrapped_AIC, 1, mean)))))),
Subsample      = sapply(kat12, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean))))),
Subsample_diff = (sapply(kat12, function(z) which(z == names(NHANES_AIC$orig_AIC[order(NHANES_AIC$orig_AIC)])))) - (sapply(kat12, function(z) which(z == names(sort(apply(NHANES_AIC$subsampled_AIC, 1, mean)))))))
)
library(xtable)
xtable(rbind(kat2, kat4, kat5, kat6, kat12))
