#################################################################################
#                                                                               #
#   Reproducible files to the paper 'A computationally fast variable            #
#   importance test for random forests for high-dimensional data' (2015)        #
#   by Janitza, Celik and Boulesteix                                            #
#                                                                               #
#   Contact: S. Janitza <janitza@ibe.med.uni-muenchen.de>                       #
#                                                                               #
#   File for performing studies with different values for k                     #
#                                                                               #
#################################################################################

# NOTE: running this code requires installation of R package snowfall. You may possibly want to decrease / increase the number of
#       cores. For sequential mode, set parallel to FALSE.

require(snowfall)

sfInit(parallel = TRUE, cpus = 100, type = "MPI")

sfLibrary(randomForest)
sfLibrary(vita)

sfSource("functions_compute.R")

######################################################################################################################################
#
#                                                 P R O S T A T E    C A N C E R    D A T A
#
######################################################################################################################################

# load data 

y <- read.table("prostate.class.txt")
y <- as.factor(unlist(y))

x <- read.table("prostate.data.txt")
rownames(x) <- x[,1]
x <- t(x[, -1])

sfExport("x")
sfExport("y")


# studies with small mtry

studyI_cv3_smallmtry_prostate <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = FALSE, VIM = "CV", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_cv3_smallmtry_prostate, file = "../results/studyI_cv3_smallmtry_prostate.Rda")

studyI_cv5_smallmtry_prostate <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = FALSE, VIM = "CV", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_cv5_smallmtry_prostate, file = "../results/studyI_cv5_smallmtry_prostate.Rda")

studyI_cv10_smallmtry_prostate <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = FALSE, VIM = "CV", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_cv10_smallmtry_prostate, file = "../results/studyI_cv10_smallmtry_prostate.Rda")


studyI_cv3_smallmtry_p100_prostate <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 10))
save(studyI_cv3_smallmtry_p100_prostate, file = "../results/studyI_cv3_smallmtry_p100_prostate.Rda")

studyI_cv5_smallmtry_p100_prostate <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 10))
save(studyI_cv5_smallmtry_p100_prostate, file = "../results/studyI_cv5_smallmtry_p100_prostate.Rda")

studyI_cv10_smallmtry_p100_prostate <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 10))
save(studyI_cv10_smallmtry_p100_prostate, file = "../results/studyI_cv10_smallmtry_p100_prostate.Rda")


# studies with large mtry

studyI_cv3_largemtry_prostate <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = FALSE, VIM = "CV", mtry = ceiling(ncol(x)/5)))
save(studyI_cv3_largemtry_prostate, file = "../results/studyI_cv3_largemtry_prostate.Rda")

studyI_cv5_largemtry_prostate <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = FALSE, VIM = "CV", mtry = ceiling(ncol(x)/5)))
save(studyI_cv5_largemtry_prostate, file = "../results/studyI_cv5_largemtry_prostate.Rda")

studyI_cv10_largemtry_prostate <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = FALSE, VIM = "CV", mtry = ceiling(ncol(x)/5)))
save(studyI_cv10_largemtry_prostate, file = "../results/studyI_cv10_largemtry_prostate.Rda")


studyI_cv3_largemtry_p100_prostate <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 100/5))
save(studyI_cv3_largemtry_p100_prostate, file = "../results/studyI_cv3_largemtry_p100_prostate.Rda")

studyI_cv5_largemtry_p100_prostate <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 100/5))
save(studyI_cv5_largemtry_p100_prostate, file = "../results/studyI_cv5_largemtry_p100_prostate.Rda")

studyI_cv10_largemtry_p100_prostate <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 100/5))
save(studyI_cv10_largemtry_p100_prostate, file = "../results/studyI_cv10_largemtry_p100_prostate.Rda")


rm(x)
rm(y)

######################################################################################################################################
#
#                                                       L E U K E M I A    D A T A
#
######################################################################################################################################

# load data 

library(golubEsets)
data(Golub_Merge)

y <- as.factor(as.numeric(pData(Golub_Merge)[, "ALL.AML"]))
x <- t(exprs(Golub_Merge))

sfExport("x")
sfExport("y")


# studies with small mtry

studyI_cv3_smallmtry_leukemia <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = FALSE, VIM = "CV", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_cv3_smallmtry_leukemia, file = "../results/studyI_cv3_smallmtry_leukemia.Rda")

studyI_cv5_smallmtry_leukemia <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = FALSE, VIM = "CV", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_cv5_smallmtry_leukemia, file = "../results/studyI_cv5_smallmtry_leukemia.Rda")

studyI_cv10_smallmtry_leukemia <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = FALSE, VIM = "CV", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_cv10_smallmtry_leukemia, file = "../results/studyI_cv10_smallmtry_leukemia.Rda")


studyI_cv3_smallmtry_p100_leukemia <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 10))
save(studyI_cv3_smallmtry_p100_leukemia, file = "../results/studyI_cv3_smallmtry_p100_leukemia.Rda")

studyI_cv5_smallmtry_p100_leukemia <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 10))
save(studyI_cv5_smallmtry_p100_leukemia, file = "../results/studyI_cv5_smallmtry_p100_leukemia.Rda")

studyI_cv10_smallmtry_p100_leukemia <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 10))
save(studyI_cv10_smallmtry_p100_leukemia, file = "../results/studyI_cv10_smallmtry_p100_leukemia.Rda")


# studies with large mtry

studyI_cv3_largemtry_leukemia <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = FALSE, VIM = "CV", mtry = ceiling(ncol(x)/5)))
save(studyI_cv3_largemtry_leukemia, file = "../results/studyI_cv3_largemtry_leukemia.Rda")

studyI_cv5_largemtry_leukemia <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = FALSE, VIM = "CV", mtry = ceiling(ncol(x)/5)))
save(studyI_cv5_largemtry_leukemia, file = "../results/studyI_cv5_largemtry_leukemia.Rda")

studyI_cv10_largemtry_leukemia <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = FALSE, VIM = "CV", mtry = ceiling(ncol(x)/5)))
save(studyI_cv10_largemtry_leukemia, file = "../results/studyI_cv10_largemtry_leukemia.Rda")


studyI_cv3_largemtry_p100_leukemia <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 100/5))
save(studyI_cv3_largemtry_p100_leukemia, file = "../results/studyI_cv3_largemtry_p100_leukemia.Rda")

studyI_cv5_largemtry_p100_leukemia <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 100/5))
save(studyI_cv5_largemtry_p100_leukemia, file = "../results/studyI_cv5_largemtry_p100_leukemia.Rda")

studyI_cv10_largemtry_p100_leukemia <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 100/5))
save(studyI_cv10_largemtry_p100_leukemia, file = "../results/studyI_cv10_largemtry_p100_leukemia.Rda")


rm(x)
rm(y)


######################################################################################################################################
#
#                                                  B R E A S T    C A N C E R    D A T A
#
######################################################################################################################################

# load data 

y <- read.table("breast.2.class.class.txt")
y <- as.factor(unlist(y))

x <- read.table("breast.2.class.data.txt")
rownames(x) <- x[,1]
x <- t(x[, -1])

sfExport("x")
sfExport("y")


# studies with small mtry

studyI_cv3_smallmtry_breast <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = FALSE, VIM = "CV", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_cv3_smallmtry_breast, file = "../results/studyI_cv3_smallmtry_breast.Rda")

studyI_cv5_smallmtry_breast <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = FALSE, VIM = "CV", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_cv5_smallmtry_breast, file = "../results/studyI_cv5_smallmtry_breast.Rda")

studyI_cv10_smallmtry_breast <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = FALSE, VIM = "CV", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_cv10_smallmtry_breast, file = "../results/studyI_cv10_smallmtry_breast.Rda")


studyI_cv3_smallmtry_p100_breast <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 10))
save(studyI_cv3_smallmtry_p100_breast, file = "../results/studyI_cv3_smallmtry_p100_breast.Rda")

studyI_cv5_smallmtry_p100_breast <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 10))
save(studyI_cv5_smallmtry_p100_breast, file = "../results/studyI_cv5_smallmtry_p100_breast.Rda")

studyI_cv10_smallmtry_p100_breast <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 10))
save(studyI_cv10_smallmtry_p100_breast, file = "../results/studyI_cv10_smallmtry_p100_breast.Rda")


# studies with large mtry

studyI_cv3_largemtry_breast <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = FALSE, VIM = "CV", mtry = ceiling(ncol(x)/5)))
save(studyI_cv3_largemtry_breast, file = "../results/studyI_cv3_largemtry_breast.Rda")

studyI_cv5_largemtry_breast <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = FALSE, VIM = "CV", mtry = ceiling(ncol(x)/5)))
save(studyI_cv5_largemtry_breast, file = "../results/studyI_cv5_largemtry_breast.Rda")

studyI_cv10_largemtry_breast <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = FALSE, VIM = "CV", mtry = ceiling(ncol(x)/5)))
save(studyI_cv10_largemtry_breast, file = "../results/studyI_cv10_largemtry_breast.Rda")


studyI_cv3_largemtry_p100_breast <- sfLapply(1:500, function(z) 
null_study(seed = z, x = x, y = y, cv = 3, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 100/5))
save(studyI_cv3_largemtry_p100_breast, file = "../results/studyI_cv3_largemtry_p100_breast.Rda")

studyI_cv5_largemtry_p100_breast <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 100/5))
save(studyI_cv5_largemtry_p100_breast, file = "../results/studyI_cv5_largemtry_p100_breast.Rda")

studyI_cv10_largemtry_p100_breast <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 100/5))
save(studyI_cv10_largemtry_p100_breast, file = "../results/studyI_cv10_largemtry_p100_breast.Rda")


rm(x)
rm(y)


######################################################################################################################################
#
#                                                   C O L O N    C A N C E R    D A T A
#
######################################################################################################################################

# load data 

y <- read.table("colon.class.txt")
y <- as.factor(as.numeric(unlist(y)))

x <- read.table("colon.data.txt")
rownames(x) <- x[,1]
x <- t(x[, -1])

sfExport("x")
sfExport("y")


# studies with small mtry

studyI_cv3_smallmtry_colon <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = FALSE, VIM = "CV", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_cv3_smallmtry_colon, file = "../results/studyI_cv3_smallmtry_colon.Rda")

studyI_cv5_smallmtry_colon <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = FALSE, VIM = "CV", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_cv5_smallmtry_colon, file = "../results/studyI_cv5_smallmtry_colon.Rda")

studyI_cv10_smallmtry_colon <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = FALSE, VIM = "CV", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_cv10_smallmtry_colon, file = "../results/studyI_cv10_smallmtry_colon.Rda")


studyI_cv3_smallmtry_p100_colon <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 10))
save(studyI_cv3_smallmtry_p100_colon, file = "../results/studyI_cv3_smallmtry_p100_colon.Rda")

studyI_cv5_smallmtry_p100_colon <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 10))
save(studyI_cv5_smallmtry_p100_colon, file = "../results/studyI_cv5_smallmtry_p100_colon.Rda")

studyI_cv10_smallmtry_p100_colon <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 10))
save(studyI_cv10_smallmtry_p100_colon, file = "../results/studyI_cv10_smallmtry_p100_colon.Rda")


# studies with large mtry

studyI_cv3_largemtry_colon <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = FALSE, VIM = "CV", mtry = ceiling(ncol(x)/5)))
save(studyI_cv3_largemtry_colon, file = "../results/studyI_cv3_largemtry_colon.Rda")

studyI_cv5_largemtry_colon <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = FALSE, VIM = "CV", mtry = ceiling(ncol(x)/5)))
save(studyI_cv5_largemtry_colon, file = "../results/studyI_cv5_largemtry_colon.Rda")

studyI_cv10_largemtry_colon <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = FALSE, VIM = "CV", mtry = ceiling(ncol(x)/5)))
save(studyI_cv10_largemtry_colon, file = "../results/studyI_cv10_largemtry_colon.Rda")


studyI_cv3_largemtry_p100_colon <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 100/5))
save(studyI_cv3_largemtry_p100_colon, file = "../results/studyI_cv3_largemtry_p100_colon.Rda")

studyI_cv5_largemtry_p100_colon <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 100/5))
save(studyI_cv5_largemtry_p100_colon, file = "../results/studyI_cv5_largemtry_p100_colon.Rda")

studyI_cv10_largemtry_p100_colon <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 100/5))
save(studyI_cv10_largemtry_p100_colon, file = "../results/studyI_cv10_largemtry_p100_colon.Rda")


rm(x)
rm(y)

######################################################################################################################################
#
#                                                 E M B R Y O N A L   T U M O R   D A T A
#
######################################################################################################################################

# load data 

x <- read.table("centralNervousSystem_outcome.data", sep = ",")
y <- as.factor(as.numeric(x[,7130]))

x <- x[,-7130]

sfExport("x")
sfExport("y")


# studies with small mtry

studyI_cv3_smallmtry_cns <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = FALSE, VIM = "CV", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_cv3_smallmtry_cns, file = "../results/studyI_cv3_smallmtry_cns.Rda")

studyI_cv5_smallmtry_cns <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = FALSE, VIM = "CV", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_cv5_smallmtry_cns, file = "../results/studyI_cv5_smallmtry_cns.Rda")

studyI_cv10_smallmtry_cns <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = FALSE, VIM = "CV", mtry = ceiling(sqrt(ncol(x)))))
save(studyI_cv10_smallmtry_cns, file = "../results/studyI_cv10_smallmtry_cns.Rda")


studyI_cv3_smallmtry_p100_cns <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 10))
save(studyI_cv3_smallmtry_p100_cns, file = "../results/studyI_cv3_smallmtry_p100_cns.Rda")

studyI_cv5_smallmtry_p100_cns <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 10))
save(studyI_cv5_smallmtry_p100_cns, file = "../results/studyI_cv5_smallmtry_p100_cns.Rda")

studyI_cv10_smallmtry_p100_cns <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 10))
save(studyI_cv10_smallmtry_p100_cns, file = "../results/studyI_cv10_smallmtry_p100_cns.Rda")


# studies with large mtry

studyI_cv3_largemtry_cns <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = FALSE, VIM = "CV", mtry = ceiling(ncol(x)/5)))
save(studyI_cv3_largemtry_cns, file = "../results/studyI_cv3_largemtry_cns.Rda")

studyI_cv5_largemtry_cns <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = FALSE, VIM = "CV", mtry = ceiling(ncol(x)/5)))
save(studyI_cv5_largemtry_cns, file = "../results/studyI_cv5_largemtry_cns.Rda")

studyI_cv10_largemtry_cns <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = FALSE, VIM = "CV", mtry = ceiling(ncol(x)/5)))
save(studyI_cv10_largemtry_cns, file = "../results/studyI_cv10_largemtry_cns.Rda")


studyI_cv3_largemtry_p100_cns <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 3, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 100/5))
save(studyI_cv3_largemtry_p100_cns, file = "../results/studyI_cv3_largemtry_p100_cns.Rda")

studyI_cv5_largemtry_p100_cns <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 5, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 100/5))
save(studyI_cv5_largemtry_p100_cns, file = "../results/studyI_cv5_largemtry_p100_cns.Rda")

studyI_cv10_largemtry_p100_cns <- sfLapply(1:500, function(z) 
  null_study(seed = z, x = x, y = y, cv = 10, subset = TRUE, subsetsize = 100, VIM = "CV", mtry = 100/5))
save(studyI_cv10_largemtry_p100_cns, file = "../results/studyI_cv10_largemtry_p100_cns.Rda")


rm(x)
rm(y)



sfStop()
