#################################################################################
#                                                                               #
#   Reproducible files to the paper 'A computationally fast variable            #
#   importance test for random forests for high-dimensional data' (2015)        #
#   by Janitza, Celik and Boulesteix                                            #
#                                                                               #
#   Contact: S. Janitza <janitza@ibe.med.uni-muenchen.de>                       #
#                                                                               #
#   File for performing studies for the breast data (small predictor space)     #
#                                                                               #
#################################################################################

# NOTE: running this code requires installation of R package snowfall. You may possibly want to decrease / increase the number of
#       cores. For sequential mode, set parallel to FALSE.

require(snowfall)

sfInit(parallel = TRUE, cpus = 100, type = "MPI")

sfLibrary(randomForest)
sfLibrary(vita)

sfSource("functions_compute.R")

######################################################################################################################################
#
#                                                       L O A D   D A T A
#
######################################################################################################################################

## breast cancer data

y <- read.table("breast.2.class.class.txt")
y <- as.factor(unlist(y))

x <- read.table("breast.2.class.data.txt")
rownames(x) <- x[,1]
x <- t(x[, -1])

sfExport("x")
sfExport("y")

######################################################################################################################################
#
#                                                    P E R F O R M   S T U D Y   I                
#
######################################################################################################################################

# new testing approach

studyI_cv2_smallmtry_p100_breast <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 2, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 10))
save(studyI_cv2_smallmtry_p100_breast, file = "../results/studyI_cv2_smallmtry_p100_breast.Rda")

studyI_cv2_largemtry_p100_breast <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 2, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 100/5))
save(studyI_cv2_largemtry_p100_breast, file = "../results/studyI_cv2_largemtry_p100_breast.Rda")


# naive testing approach

studyI_classical_smallmtry_p100_breast <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 2, subset = TRUE, subsetsize = 100, VIM = "classical", mtry = 10))
save(studyI_classical_smallmtry_p100_breast, file = "../results/studyI_classical_smallmtry_p100_breast.Rda")

studyI_classical_largemtry_p100_breast <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 2, subset = TRUE, subsetsize = 100, VIM = "classical", mtry = 100/5))
save(studyI_classical_largemtry_p100_breast, file = "../results/studyI_classical_largemtry_p100_breast.Rda")


# approach of Altmann et al. (2010) 

studyI_pimp_smallmtry_p100_breast <- sfLapply(1:200, function(z) 
  null_study(seed = z, x = x, y = y, cv = 2, subset = TRUE, subsetsize = 100, VIM = "PIMP", mtry = 10))
save(studyI_pimp_smallmtry_p100_breast, file = "../results/studyI_pimp_smallmtry_p100_breast.Rda")

studyI_pimp_largemtry_p100_breast <- sfLapply(1:200, function(z) 
  null_study(seed = z, x = x, y = y, cv = 2, subset = TRUE, subsetsize = 100, VIM = "PIMP", mtry = 100/5))
save(studyI_pimp_largemtry_p100_breast, file = "../results/studyI_pimp_largemtry_p100_breast.Rda")


######################################################################################################################################
#
#                                                    P E R F O R M   S T U D Y   I I                
#
######################################################################################################################################

# new testing approach

studyII_cv2_smallmtry_largeeffects_p100_breast <- sfLapply(1:500, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = FALSE, subset = TRUE, subsetsize = 100, VIM = "CV", nsignal = 20, mtry = 10, effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyII_cv2_smallmtry_largeeffects_p100_breast, file = "../results/studyII_cv2_smallmtry_largeeffects_p100_breast.Rda")

studyII_cv2_largemtry_largeeffects_p100_breast <- sfLapply(1:500, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = FALSE, subset = TRUE, subsetsize = 100, VIM = "CV", nsignal = 20, mtry = 100/5, effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyII_cv2_largemtry_largeeffects_p100_breast, file = "../results/studyII_cv2_largemtry_largeeffects_p100_breast.Rda")


# naive testing approach

studyII_classical_smallmtry_largeeffects_p100_breast <- sfLapply(1:500, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = FALSE, subset = TRUE, subsetsize = 100, VIM = "classical", nsignal = 20, mtry = 10, effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyII_classical_smallmtry_largeeffects_p100_breast, file = "../results/studyII_classical_smallmtry_largeeffects_p100_breast.Rda")

studyII_classical_largemtry_largeeffects_p100_breast <- sfLapply(1:500, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = FALSE, subset = TRUE, subsetsize = 100, VIM = "classical", nsignal = 20, mtry = 100/5, effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyII_classical_largemtry_largeeffects_p100_breast, file = "../results/studyII_classical_largemtry_largeeffects_p100_breast.Rda")


# approach of Altmann et al. (2010) 

studyII_pimp_smallmtry_largeeffects_p100_breast <- sfLapply(1:200, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = FALSE, subset = TRUE, subsetsize = 100, VIM = "PIMP", nsignal = 20, mtry = 10, effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyII_pimp_smallmtry_largeeffects_p100_breast, file = "../results/studyII_pimp_smallmtry_largeeffects_p100_breast.Rda")

studyII_pimp_largemtry_largeeffects_p100_breast <- sfLapply(1:200, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = FALSE, subset = TRUE, subsetsize = 100, VIM = "PIMP", nsignal = 20, mtry = 100/5, effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyII_pimp_largemtry_largeeffects_p100_breast, file = "../results/studyII_pimp_largemtry_largeeffects_p100_breast.Rda")


######################################################################################################################################
#
#                                                    P E R F O R M   S T U D Y   I I I               
#
######################################################################################################################################

# new testing approach

studyIII_cv2_smallmtry_largeeffects_p100_breast <- sfLapply(1:500, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = TRUE, subset = TRUE, subsetsize = 100, VIM = "CV", nsignal = 20, mtry = 10, effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyIII_cv2_smallmtry_largeeffects_p100_breast, file = "../results/studyIII_cv2_smallmtry_largeeffects_p100_breast.Rda")

studyIII_cv2_largemtry_largeeffects_p100_breast <- sfLapply(1:500, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = TRUE, subset = TRUE, subsetsize = 100, VIM = "CV", nsignal = 20, mtry = 100/5, effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyIII_cv2_largemtry_largeeffects_p100_breast, file = "../results/studyIII_cv2_largemtry_largeeffects_p100_breast.Rda")


# naive testing approach

studyIII_classical_smallmtry_largeeffects_p100_breast <- sfLapply(1:500, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = TRUE, subset = TRUE, subsetsize = 100, VIM = "classical", nsignal = 20, mtry = 10, effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyIII_classical_smallmtry_largeeffects_p100_breast, file = "../results/studyIII_classical_smallmtry_largeeffects_p100_breast.Rda")

studyIII_classical_largemtry_largeeffects_p100_breast <- sfLapply(1:500, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = TRUE, subset = TRUE, subsetsize = 100, VIM = "classical", nsignal = 20, mtry = 100/5, effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyIII_classical_largemtry_largeeffects_p100_breast, file = "../results/studyIII_classical_largemtry_largeeffects_p100_breast.Rda")


# approach of Altmann et al. (2010) 

studyIII_pimp_smallmtry_largeeffects_p100_breast <- sfLapply(1:200, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = TRUE, subset = TRUE, subsetsize = 100, VIM = "PIMP", nsignal = 20, mtry = 10, effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyIII_pimp_smallmtry_largeeffects_p100_breast, file = "../results/studyIII_pimp_smallmtry_largeeffects_p100_breast.Rda")

studyIII_pimp_largemtry_largeeffects_p100_breast <- sfLapply(1:200, function(z) 
  power_study(seed = z, x = x, cv = 2, independence = TRUE, subset = TRUE, subsetsize = 100, VIM = "PIMP", nsignal = 20, mtry = 100/5, effectset = c(-0.5, -1, -2, -3, 0.5, 1, 2, 3)))
save(studyIII_pimp_largemtry_largeeffects_p100_breast, file = "../results/studyIII_pimp_largemtry_largeeffects_p100_breast.Rda")


sfStop()
