#################################################################################
#                                                                               #
#   Reproducible files to the paper 'A computationally fast variable            #
#   importance test for random forests for high-dimensional data' (2015)        #
#   by Janitza, Celik and Boulesteix                                            #
#                                                                               #
#   Contact: S. Janitza <janitza@ibe.med.uni-muenchen.de>                       #
#                                                                               #
#   File for creating Figures 5, B.10                                           #
#                                                                               #
#################################################################################

# load relevant R package 
library(vita)

# source the functions used for plotting the results 
source("functions_plot.R")

# load relevant R objects
for(i in grep("studyI_", dir("../results/"))) load(paste("../results/", dir("../results/")[i], sep = ""))


######################################################################################################################################
#
#                                                       F I G U R E   5
#
######################################################################################################################################

# new approach

graphics.off()
pdf(file = "Fig5upperleft.pdf", height = 5, width = 6)

par(mfrow = c(1, 2), mar = c(8, 4, 2, 0.1), oma = c(0, 0, 2, 0), bty = "l")

boxplot(sapply(1:500, function(z) mean(NTA(studyI_cv2_smallmtry_prostate[[z]]$cv_varim)$pvalue < 0.05)), 
        sapply(1:500, function(z) mean(NTA(studyI_cv2_smallmtry_breast[[z]]$cv_varim)$pvalue < 0.05)),
        sapply(1:500, function(z) mean(NTA(studyI_cv2_smallmtry_leukemia[[z]]$cv_varim)$pvalue < 0.05)),
        sapply(1:500, function(z) mean(NTA(studyI_cv2_smallmtry_colon[[z]]$cv_varim)$pvalue < 0.05)),
        sapply(1:500, function(z) mean(NTA(studyI_cv2_smallmtry_cns[[z]]$cv_varim)$pvalue < 0.05)),
        ylim = c(0, 0.15), main = expression(paste("mtry = ", sqrt(p))), ylab = "Type I error", las = 2, ylab = "type I error", col = "gray95",
        names = c("Prostate Cancer", "Breast Cancer", "Leukemia", "Colon Cancer", "Embryonal Tumor"))

abline(h = 0.05, col = "black", lty = 3)

boxplot(sapply(1:500, function(z) mean(NTA(studyI_cv2_largemtry_prostate[[z]]$cv_varim)$pvalue < 0.05)), 
        sapply(1:500, function(z) mean(NTA(studyI_cv2_largemtry_breast[[z]]$cv_varim)$pvalue < 0.05)),
        sapply(1:500, function(z) mean(NTA(studyI_cv2_largemtry_leukemia[[z]]$cv_varim)$pvalue < 0.05)),
        sapply(1:500, function(z) mean(NTA(studyI_cv2_largemtry_colon[[z]]$cv_varim)$pvalue < 0.05)),
        sapply(1:500, function(z) mean(NTA(studyI_cv2_largemtry_cns[[z]]$cv_varim)$pvalue < 0.05)),
        ylim = c(0, 0.15), main = expression(paste("mtry = p/5")), ylab = "", las = 2, ylab = "", col = "gray95",
        names = c("Prostate Cancer", "Breast Cancer", "Leukemia", "Colon Cancer", "Embryonal Tumor"))

abline(h = 0.05, col = "black", lty = 3)
mtext(side = 3, "New Approach", outer = TRUE, line = 0.5, cex = 1.4)

graphics.off()


# naive approach

graphics.off()
pdf(file = "Fig5upperright.pdf", height = 5, width = 6)

par(mfrow = c(1, 2), mar = c(8, 4, 2, 0.1), oma = c(0, 0, 2, 0), bty = "l")

boxplot(sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_smallmtry_prostate[[z]]) < 0.05)), 
        sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_smallmtry_breast[[z]]) < 0.05)),
        sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_smallmtry_leukemia[[z]]) < 0.05)),
        sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_smallmtry_colon[[z]]) < 0.05)),
        sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_smallmtry_cns[[z]]) < 0.05)),
        ylim = c(0, 0.15), main = expression(paste("mtry = ", sqrt(p))), ylab = "Type I error", las = 2, ylab = "type I error", col = "gray95",
        names = c("Prostate Cancer", "Breast Cancer", "Leukemia", "Colon Cancer", "Embryonal Tumor"))

abline(h = 0.05, col = "black", lty = 3)

boxplot(sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_largemtry_prostate[[z]]) < 0.05)), 
        sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_largemtry_breast[[z]]) < 0.05)),
        sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_largemtry_leukemia[[z]]) < 0.05)),
        sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_largemtry_colon[[z]]) < 0.05)),
        sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_largemtry_cns[[z]]) < 0.05)),
        ylim = c(0, 0.15), main = expression(paste("mtry = p/5")), ylab = "", las = 2, ylab = "", col = "gray95",
        names = c("Prostate Cancer", "Breast Cancer", "Leukemia", "Colon Cancer", "Embryonal Tumor"))

abline(h = 0.05, col = "black", lty = 3)
mtext(side = 3, "Naive Approach", outer = TRUE, line = 0.5, cex = 1.4)

graphics.off()


# Altmann's nonparametric approach

graphics.off()
pdf(file = "Fig5lowerleft.pdf", height = 5, width = 6)

par(mfrow = c(1, 2), mar = c(8, 4, 2, 0.1), oma = c(0, 0, 2, 0), bty = "l")

boxplot(sapply(1:200, function(z) mean(studyI_pimp_smallmtry_prostate[[z]]$nonparam < 0.05)), 
        sapply(1:200, function(z) mean(studyI_pimp_smallmtry_breast[[z]]$nonparam < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_smallmtry_leukemia[[z]]$nonparam < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_smallmtry_colon[[z]]$nonparam < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_smallmtry_cns[[z]]$nonparam < 0.05)),
        ylim = c(0, 0.15), main = expression(paste("mtry = ", sqrt(p))), ylab = "Type I error", las = 2, ylab = "type I error", col = "gray95",
        names = c("Prostate Cancer", "Breast Cancer", "Leukemia", "Colon Cancer", "Embryonal Tumor"))

abline(h = 0.05, col = "black", lty = 3)

boxplot(sapply(1:200, function(z) mean(studyI_pimp_largemtry_prostate[[z]]$nonparam < 0.05)), 
        sapply(1:200, function(z) mean(studyI_pimp_largemtry_breast[[z]]$nonparam < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_largemtry_leukemia[[z]]$nonparam < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_largemtry_colon[[z]]$nonparam < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_largemtry_cns[[z]]$nonparam < 0.05)),
        ylim = c(0, 0.15), main = expression(paste("mtry = p/5")), ylab = "", las = 2, ylab = "", col = "gray95",
        names = c("Prostate Cancer", "Breast Cancer", "Leukemia", "Colon Cancer", "Embryonal Tumor"))

abline(h = 0.05, col = "black", lty = 3)
mtext(side = 3, "Altmann (non-param.)", outer = TRUE, line = 0.5, cex = 1.4)

graphics.off()


# Altmann's parametric approach

graphics.off()
pdf(file = "Fig5lowerright.pdf", height = 5, width = 6)

par(mfrow = c(1, 2), mar = c(8, 4, 2, 0.1), oma = c(0, 0, 2, 0), bty = "l")

boxplot(sapply(1:200, function(z) mean(studyI_pimp_smallmtry_prostate[[z]]$param < 0.05)), 
        sapply(1:200, function(z) mean(studyI_pimp_smallmtry_breast[[z]]$param < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_smallmtry_leukemia[[z]]$param < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_smallmtry_colon[[z]]$param < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_smallmtry_cns[[z]]$param < 0.05)),
        ylim = c(0, 0.15), main = expression(paste("mtry = ", sqrt(p))), ylab = "Type I error", las = 2, ylab = "type I error", col = "gray95",
        names = c("Prostate Cancer", "Breast Cancer", "Leukemia", "Colon Cancer", "Embryonal Tumor"))

abline(h = 0.05, col = "black", lty = 3)

boxplot(sapply(1:200, function(z) mean(studyI_pimp_largemtry_prostate[[z]]$param < 0.05)), 
        sapply(1:200, function(z) mean(studyI_pimp_largemtry_breast[[z]]$param < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_largemtry_leukemia[[z]]$param < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_largemtry_colon[[z]]$param < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_largemtry_cns[[z]]$param < 0.05)),
        ylim = c(0, 0.15), main = expression(paste("mtry = p/5")), ylab = "", las = 2, ylab = "", col = "gray95",
        names = c("Prostate Cancer", "Breast Cancer", "Leukemia", "Colon Cancer", "Embryonal Tumor"))

abline(h = 0.05, col = "black", lty = 3)
mtext(side = 3, "Altmann (param.)", outer = TRUE, line = 0.5, cex = 1.4)

graphics.off()


######################################################################################################################################
#
#                                                       F I G U R E   B.10
#
######################################################################################################################################

# new approach

graphics.off()
pdf(file = "FigB10upperleft.pdf", height = 5, width = 6)

par(mfrow = c(1, 2), mar = c(8, 4, 2, 0.1), oma = c(0, 0, 2, 0), bty = "l")

boxplot(sapply(1:500, function(z) mean(NTA(studyI_cv2_smallmtry_p100_prostate[[z]]$cv_varim)$pvalue < 0.05)), 
        sapply(1:500, function(z) mean(NTA(studyI_cv2_smallmtry_p100_breast[[z]]$cv_varim)$pvalue < 0.05)),
        sapply(1:500, function(z) mean(NTA(studyI_cv2_smallmtry_p100_leukemia[[z]]$cv_varim)$pvalue < 0.05)),
        sapply(1:500, function(z) mean(NTA(studyI_cv2_smallmtry_p100_colon[[z]]$cv_varim)$pvalue < 0.05)),
        sapply(1:500, function(z) mean(NTA(studyI_cv2_smallmtry_p100_cns[[z]]$cv_varim)$pvalue < 0.05)),
        ylim = c(0, 0.15), main = expression(paste("mtry = ", sqrt(100))), ylab = "Type I error", las = 2, ylab = "type I error", col = "gray95",
        names = c("Prostate Cancer", "Breast Cancer", "Leukemia", "Colon Cancer", "Embryonal Tumor"))

abline(h = 0.05, col = "black", lty = 3)

boxplot(sapply(1:500, function(z) mean(NTA(studyI_cv2_largemtry_p100_prostate[[z]]$cv_varim)$pvalue < 0.05)), 
        sapply(1:500, function(z) mean(NTA(studyI_cv2_largemtry_p100_breast[[z]]$cv_varim)$pvalue < 0.05)),
        sapply(1:500, function(z) mean(NTA(studyI_cv2_largemtry_p100_leukemia[[z]]$cv_varim)$pvalue < 0.05)),
        sapply(1:500, function(z) mean(NTA(studyI_cv2_largemtry_p100_colon[[z]]$cv_varim)$pvalue < 0.05)),
        sapply(1:500, function(z) mean(NTA(studyI_cv2_largemtry_p100_cns[[z]]$cv_varim)$pvalue < 0.05)),
        ylim = c(0, 0.15), main = expression(paste("mtry = 100/5")), ylab = "", las = 2, ylab = "", col = "gray95",
        names = c("Prostate Cancer", "Breast Cancer", "Leukemia", "Colon Cancer", "Embryonal Tumor"))

abline(h = 0.05, col = "black", lty = 3)
mtext(side = 3, "New Approach", outer = TRUE, line = 0.5, cex = 1.4)

graphics.off()


# naive approach

graphics.off()
pdf(file = "FigB10upperright.pdf", height = 5, width = 6)

par(mfrow = c(1, 2), mar = c(8, 4, 2, 0.1), oma = c(0, 0, 2, 0), bty = "l")

boxplot(sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_smallmtry_p100_prostate[[z]]) < 0.05)), 
        sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_smallmtry_p100_breast[[z]]) < 0.05)),
        sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_smallmtry_p100_leukemia[[z]]) < 0.05)),
        sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_smallmtry_p100_colon[[z]]) < 0.05)),
        sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_smallmtry_p100_cns[[z]]) < 0.05)),
        ylim = c(0, 0.15), main = expression(paste("mtry = ", sqrt(100))), ylab = "Type I error", las = 2, ylab = "type I error", col = "gray95",
        names = c("Prostate Cancer", "Breast Cancer", "Leukemia", "Colon Cancer", "Embryonal Tumor"))

abline(h = 0.05, col = "black", lty = 3)

boxplot(sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_largemtry_p100_prostate[[z]]) < 0.05)), 
        sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_largemtry_p100_breast[[z]]) < 0.05)),
        sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_largemtry_p100_leukemia[[z]]) < 0.05)),
        sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_largemtry_p100_colon[[z]]) < 0.05)),
        sapply(1:500, function(z) mean(compute_pvalue(studyI_classical_largemtry_p100_cns[[z]]) < 0.05)),
        ylim = c(0, 0.15), main = expression(paste("mtry = 100/5")), ylab = "", las = 2, ylab = "", col = "gray95",
        names = c("Prostate Cancer", "Breast Cancer", "Leukemia", "Colon Cancer", "Embryonal Tumor"))

abline(h = 0.05, col = "black", lty = 3)
mtext(side = 3, "Naive Approach", outer = TRUE, line = 0.5, cex = 1.4)

graphics.off()


# Altmann's nonparametric approach

graphics.off()
pdf(file = "FigB10lowerleft.pdf", height = 5, width = 6)

par(mfrow = c(1, 2), mar = c(8, 4, 2, 0.1), oma = c(0, 0, 2, 0), bty = "l")

boxplot(sapply(1:200, function(z) mean(studyI_pimp_smallmtry_p100_prostate[[z]]$nonparam < 0.05)), 
        sapply(1:200, function(z) mean(studyI_pimp_smallmtry_p100_breast[[z]]$nonparam < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_smallmtry_p100_leukemia[[z]]$nonparam < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_smallmtry_p100_colon[[z]]$nonparam < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_smallmtry_p100_cns[[z]]$nonparam < 0.05)),
        ylim = c(0, 0.15), main = expression(paste("mtry = ", sqrt(100))), ylab = "Type I error", las = 2, ylab = "type I error", col = "gray95",
        names = c("Prostate Cancer", "Breast Cancer", "Leukemia", "Colon Cancer", "Embryonal Tumor"))

abline(h = 0.05, col = "black", lty = 3)

boxplot(sapply(1:200, function(z) mean(studyI_pimp_largemtry_p100_prostate[[z]]$nonparam < 0.05)), 
        sapply(1:200, function(z) mean(studyI_pimp_largemtry_p100_breast[[z]]$nonparam < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_largemtry_p100_leukemia[[z]]$nonparam < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_largemtry_p100_colon[[z]]$nonparam < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_largemtry_p100_cns[[z]]$nonparam < 0.05)),
        ylim = c(0, 0.15), main = expression(paste("mtry = 100/5")), ylab = "", las = 2, ylab = "", col = "gray95",
        names = c("Prostate Cancer", "Breast Cancer", "Leukemia", "Colon Cancer", "Embryonal Tumor"))

abline(h = 0.05, col = "black", lty = 3)
mtext(side = 3, "Altmann (non-param.)", outer = TRUE, line = 0.5, cex = 1.4)

graphics.off()


# Altmann's parametric approach

graphics.off()
pdf(file = "FigB10lowerright.pdf", height = 5, width = 6)

par(mfrow = c(1, 2), mar = c(8, 4, 2, 0.1), oma = c(0, 0, 2, 0), bty = "l")

boxplot(sapply(1:200, function(z) mean(studyI_pimp_smallmtry_p100_prostate[[z]]$param < 0.05)), 
        sapply(1:200, function(z) mean(studyI_pimp_smallmtry_p100_breast[[z]]$param < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_smallmtry_p100_leukemia[[z]]$param < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_smallmtry_p100_colon[[z]]$param < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_smallmtry_p100_cns[[z]]$param < 0.05)),
        ylim = c(0, 0.15), main = expression(paste("mtry = ", sqrt(100))), ylab = "Type I error", las = 2, ylab = "type I error", col = "gray95",
        names = c("Prostate Cancer", "Breast Cancer", "Leukemia", "Colon Cancer", "Embryonal Tumor"))

abline(h = 0.05, col = "black", lty = 3)

boxplot(sapply(1:200, function(z) mean(studyI_pimp_largemtry_p100_prostate[[z]]$param < 0.05)), 
        sapply(1:200, function(z) mean(studyI_pimp_largemtry_p100_breast[[z]]$param < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_largemtry_p100_leukemia[[z]]$param < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_largemtry_p100_colon[[z]]$param < 0.05)),
        sapply(1:200, function(z) mean(studyI_pimp_largemtry_p100_cns[[z]]$param < 0.05)),
        ylim = c(0, 0.15), main = expression(paste("mtry = 100/5")), ylab = "", las = 2, ylab = "", col = "gray95",
        names = c("Prostate Cancer", "Breast Cancer", "Leukemia", "Colon Cancer", "Embryonal Tumor"))

abline(h = 0.05, col = "black", lty = 3)
mtext(side = 3, "Altmann (param.)", outer = TRUE, line = 0.5, cex = 1.4)

graphics.off()
