##########
# 1. LDA #
##########

rm(list=ls())

############################################################
## Enter below the name of your working directory          #
## This is the only thing to change to obtain the figures  #
## presented in the article                                #
############################################################         
wd <- "~/reproducibleCode"

# load libraries        
library(CMA)
require(Matrix)
require(mvtnorm)
require(SHIP)
library(gdata)
library(corpcor)
library(lattice)
library(entropy)
library(fdrtool)
library(sda)

# source necessary functions
setwd(wd)
source("SimulatedData/fonc.R")
source("SimulatedData/simulate.R")
source("LDA/SHIPmod.R")
source("LDA/twoclassRLDA.R")

# Parameters of the simulation as specified in the article
niter <- 100
ntest <- 500
nn <- list(c(25,25))
aks <- list(c(0.97,0.9,0.95),c(0.2,0.3,0.1))
tmpind <- expand.grid(1:length(nn),1:length(aks))
params <- apply(tmpind,1,function(ii) list(nn=nn[[ii[1]]],aks=aks[[ii[2]]]))
for (j in 1:length(params)) assign(paste("res",j,sep=""),matrix(NA,niter,2)) 
pks <- c(300,200,500) ; p <- sum(pks)
nbgene <- min(30,p)

res <- matrix(NA,niter,3)
for (j in 1:length(params)){
 restemp <- matrix(NA,niter,5)
 pb <- txtProgressBar(1,niter,style=3)
 for (i in 1:niter) {
  setTxtProgressBar(pb, i)
  mu1   <- rep(0,p)
  mu2   <- mu1 + 0.15*rnorm(p)
  o <- simulate(mus=list(mu1,mu2),nn=params[[j]]$nn+ntest,pks=pks,aks=params[[j]]$aks,noise=F,invcov=F)
  data1 <- X <- o$X
  Y          <- o$y
  l2         <- o$genegroups
  names(l2)  <- colnames(X) <- paste("Gene",1:p)
  l2permut <- sample(l2) ; names(l2permut) <- names(l2)
  learnset   <- GenerateLearningsets(y=Y,method = "MCCV",ntrain=sum(params[[j]]$nn),niter=1,strat=TRUE)
  geneselect <- GeneSelection(X=X,y=Y, learningsets = learnset, method = "t.test",trace=F)
  cGM.TD100      <- classification(X=X,y=Y, learningsets=learnset, type="D",
                    genesel = geneselect, nbgene = nbgene, classifier=rldaCMA,trace = F)
  cGM.TG100      <- classification(X=X,y=Y, learningsets=learnset, type="G",
                    genesINpaths=l2,genesel = geneselect, nbgene = nbgene, classifier=rldaCMA,trace = F)
  cGM.TG100_p    <- classification(X=X,y=Y, learningsets=learnset, type="G",
                    genesINpaths=l2permut,genesel = geneselect, nbgene = nbgene, classifier=rldaCMA,trace = F)
  cGM.TGstar100  <- classification(X=X,y=Y, learningsets=learnset, type="Gstar",
                    genesINpaths=l2,genesel = geneselect, nbgene = nbgene, classifier=rldaCMA,trace = F)
  cGM.TGstar100_p<- classification(X=X,y=Y, learningsets=learnset, type="Gstar",
                    genesINpaths=l2permut,genesel = geneselect, nbgene = nbgene, classifier=rldaCMA,trace = F)
  list.GM100 <- list(cGM.TD100, cGM.TG100, cGM.TG100_p, cGM.TGstar100, cGM.TGstar100_p)
  restemp[i,]    <- unlist(compare(list.GM100, measure=c("misclassification"),plot=FALSE))
 }
 assign(paste("res",j,sep=""),restemp)
 close(pb)
}


pdf("figure1.pdf",width=10,height=5)
layout(t(1:2))
par(cex=1.2)
boxplot(res1[,1:3],names=c("D","G","G (p)"),main="High correlations",ylab="Test error rate",ylim=c(0,0.5),notch=TRUE)
boxplot(res2[,1:3],names=c("D","G","G (p)"),main="Low  correlations",ylab="Test error rate",ylim=c(0,0.5),notch=TRUE)
dev.off()

save.image(file=paste(c("SaveLDA",gsub(":","_",as.character(Sys.time())),".RData"),sep="",collapse="_"))

