#################################################################################
#                                                                               #
#   Reproducible files to the paper 'A computationally fast variable            #
#   importance test for random forests for high-dimensional data' (2015)        #
#   by Janitza, Celik and Boulesteix                                            #
#                                                                               #
#   Contact: S. Janitza <janitza@ibe.med.uni-muenchen.de>                       #
#                                                                               #
#   File for performing studies for the embryonal tumor data                    #
#   (large predictor space)                                                     #
#                                                                               #
#################################################################################

# NOTE: running this code requires installation of R package snowfall. You may possibly want to decrease / increase the number of
#       cores. For sequential mode, set parallel to FALSE.

require(snowfall)

sfInit(parallel = TRUE, cpus = 100, type = "MPI")

sfLibrary(randomForest)
sfLibrary(vita)

sfSource("functions_compute.R")

######################################################################################################################################
#
#                                                       L O A D   D A T A
#
######################################################################################################################################

## embryonal tumor data

x <- read.table("centralNervousSystem_outcome.data", sep = ",")
y <- as.factor(as.numeric(x[,7130]))

x <- x[,-7130]

sfExport("x")
sfExport("y")

######################################################################################################################################
#
#                                                    P E R F O R M   S T U D Y   I                
#
######################################################################################################################################

# new testing approach

studyI_cv2_smallmtry_cns <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 2, subset = FALSE, VIM = "CV", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_cv2_smallmtry_cns, file = "../results/studyI_cv2_smallmtry_cns.Rda")

studyI_cv2_largemtry_cns <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 2, subset = FALSE, VIM = "CV", mtry = ceiling(ncol(x)/5)))
save(studyI_cv2_largemtry_cns, file = "../results/studyI_cv2_largemtry_cns.Rda")


# naive testing approach

studyI_classical_smallmtry_cns <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 2, subset = FALSE, VIM = "classical", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_classical_smallmtry_cns, file = "../results/studyI_classical_smallmtry_cns.Rda")

studyI_classical_largemtry_cns <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 2, subset = FALSE, VIM = "classical", mtry = ceiling(ncol(x)/5)))
save(studyI_classical_largemtry_cns, file = "../results/studyI_classical_largemtry_cns.Rda")


# approach of Altmann et al. (2010) 

studyI_pimp_smallmtry_cns <- sfLapply(1:200, function(z) 
  null_study(seed = z, x = x, y = y, cv = 2, subset = FALSE, VIM = "PIMP", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_pimp_smallmtry_cns, file = "../results/studyI_pimp_smallmtry_cns.Rda")

studyI_pimp_largemtry_cns <- sfLapply(1:200, function(z) 
  null_study(seed = z, x = x, y = y, cv = 2, subset = FALSE, VIM = "PIMP", mtry = ceiling(ncol(x)/5)))
save(studyI_pimp_largemtry_cns, file = "../results/studyI_pimp_largemtry_cns.Rda")


######################################################################################################################################
#
#                                                    P E R F O R M   S T U D Y   I I                
#
######################################################################################################################################

# new testing approach

studyII_cv2_smallmtry_largeeffects_cns <- sfLapply(1:500, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = FALSE, subset = FALSE, VIM = "CV", nsignal = 100, mtry = ceiling(sqrt(ncol(x))), effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyII_cv2_smallmtry_largeeffects_cns, file = "../results/studyII_cv2_smallmtry_largeeffects_cns.Rda")

studyII_cv2_largemtry_largeeffects_cns <- sfLapply(1:500, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = FALSE, subset = FALSE, VIM = "CV", nsignal = 100, mtry = ceiling(ncol(x)/5), effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyII_cv2_largemtry_largeeffects_cns, file = "../results/studyII_cv2_largemtry_largeeffects_cns.Rda")


# naive testing approach

studyII_classical_smallmtry_largeeffects_cns <- sfLapply(1:500, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = FALSE, subset = FALSE, VIM = "classical", nsignal = 100, mtry = ceiling(sqrt(ncol(x))), effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyII_classical_smallmtry_largeeffects_cns, file = "../results/studyII_classical_smallmtry_largeeffects_cns.Rda")

studyII_classical_largemtry_largeeffects_cns <- sfLapply(1:500, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = FALSE, subset = FALSE, VIM = "classical", nsignal = 100, mtry = ceiling(ncol(x)/5), effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyII_classical_largemtry_largeeffects_cns, file = "../results/studyII_classical_largemtry_largeeffects_cns.Rda")


# approach of Altmann et al. (2010) 

studyII_pimp_smallmtry_largeeffects_cns <- sfLapply(1:200, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = FALSE, subset = FALSE, VIM = "PIMP", nsignal = 100, mtry = ceiling(sqrt(ncol(x))), effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyII_pimp_smallmtry_largeeffects_cns, file = "../results/studyII_pimp_smallmtry_largeeffects_cns.Rda")

studyII_pimp_largemtry_largeeffects_cns <- sfLapply(1:200, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = FALSE, subset = FALSE, VIM = "PIMP", nsignal = 100, mtry = ceiling(ncol(x)/5), effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyII_pimp_largemtry_largeeffects_cns, file = "../results/studyII_pimp_largemtry_largeeffects_cns.Rda")


######################################################################################################################################
#
#                                                    P E R F O R M   S T U D Y   I I I               
#
######################################################################################################################################

# new testing approach

studyIII_cv2_smallmtry_largeeffects_cns <- sfLapply(1:500, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = TRUE, subset = FALSE, VIM = "CV", nsignal = 100, mtry = ceiling(sqrt(ncol(x))), effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyIII_cv2_smallmtry_largeeffects_cns, file = "../results/studyIII_cv2_smallmtry_largeeffects_cns.Rda")

studyIII_cv2_largemtry_largeeffects_cns <- sfLapply(1:500, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = TRUE, subset = FALSE, VIM = "CV", nsignal = 100, mtry = ceiling(ncol(x)/5), effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyIII_cv2_largemtry_largeeffects_cns, file = "../results/studyIII_cv2_largemtry_largeeffects_cns.Rda")


# naive testing approach

studyIII_classical_smallmtry_largeeffects_cns <- sfLapply(1:500, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = TRUE, subset = FALSE, VIM = "classical", nsignal = 100, mtry = ceiling(sqrt(ncol(x))), effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyIII_classical_smallmtry_largeeffects_cns, file = "../results/studyIII_classical_smallmtry_largeeffects_cns.Rda")

studyIII_classical_largemtry_largeeffects_cns <- sfLapply(1:500, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = TRUE, subset = FALSE, VIM = "classical", nsignal = 100, mtry = ceiling(ncol(x)/5), effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyIII_classical_largemtry_largeeffects_cns, file = "../results/studyIII_classical_largemtry_largeeffects_cns.Rda")


# approach of Altmann et al. (2010) 

studyIII_pimp_smallmtry_largeeffects_cns <- sfLapply(1:200, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = TRUE, subset = FALSE, VIM = "PIMP", nsignal = 100, mtry = ceiling(sqrt(ncol(x))), effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyIII_pimp_smallmtry_largeeffects_cns, file = "../results/studyIII_pimp_smallmtry_largeeffects_cns.Rda")

studyIII_pimp_largemtry_largeeffects_cns <- sfLapply(1:200, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = TRUE, subset = FALSE, VIM = "PIMP", nsignal = 100, mtry = ceiling(ncol(x)/5), effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyIII_pimp_largemtry_largeeffects_cns, file = "../results/studyIII_pimp_largemtry_largeeffects_cns.Rda")


sfStop()
