# This function performs the batch effect removal.
# It takes the whole number 'z' corresponding to the z-th line of 'batchremovalgrid',
# which contains informations stating which batch effect removal method to apply to which real dataset.

performBatchremovalRealData <- function(z) {

  require("bapred")

  load(paste("./FAbatchPaper/Datasets/ProcessedData/", batchremovalgrid$dataset[z], ".Rda", sep=""))

  set.seed(batchremovalgrid$seed[z])

  Xbr <- ba(x=X, y=y, batch=batch, method = batchremovalgrid$batchremmethod[z])$xadj

  batchessuited <- apply(table(batch, y), 1, function(x) min(x)>1)
    
   # Leave out variables with constant values within batches:
	
   sdb = as.list(rep(0,length(levels(batch))))
   for (i in 1:length(levels(batch))) {
      sdb[[i]] = apply(X[batch==levels(batch)[i],],2,sd)
   }

   badvariables <- sort(unique(unlist(lapply(sdb, function(x) which(x==0)))))
   goodvariables <- setdiff(1:ncol(X), badvariables)

   save(Xbr, goodvariables, file=paste("./FAbatchPaper/InterimResults/datasetbr", z, ".Rda", sep=""))
 
}		
		
		
	
	
		
# This function applies the different metrics to the real datasets (after batch effect removal).
# It takes the whole number 'z', which corresponds to the z-th line of 'metricgrid',
# which contains informations stating which metric to apply to which real dataset 
# (after batch effect removal).

evaluateMetricRealData <- function(z) {		

  require("bapred")

  load(paste("./FAbatchPaper/Datasets/ProcessedData/", batchremovalgrid$dataset[batchremovalgrid$datasetbrind==metricgrid$datasetbrind[z]], ".Rda", sep=""))
  load(paste("./FAbatchPaper/InterimResults/datasetbr", metricgrid$datasetbrind[z], ".Rda", sep=""))		

  set.seed(batchremovalgrid$seed[batchremovalgrid$datasetbrind==metricgrid$datasetbrind[z]])

  Xgoodvariables <- X[,goodvariables]
  
  if(metricgrid$metric[z]!="diffexpr")
    metrval <- bametric(xba=Xbr, batch=batch, y=y, x=X, metric = metricgrid$metric[z])
  else
    metrval <- bametric(x=Xgoodvariables, batch=batch, y=y, metric = "diffexpr", method = batchremovalgrid$batchremmethod[batchremovalgrid$datasetbrind==metricgrid$datasetbrind[z]])

  return(list(dataset=batchremovalgrid$dataset[batchremovalgrid$datasetbrind==metricgrid$datasetbrind[z]],
    batchremmethod=batchremovalgrid$batchremmethod[batchremovalgrid$datasetbrind==metricgrid$datasetbrind[z]],
    metric=metricgrid$metric[z],
    metrval=metrval))
 
}




# This function performs the addon batch effect removal and cross-batch prediction 
# in the cross-batch prediction study.
# It takes the whole number 'z', which corresponds to the z-th line of 'scenariogrid',
# which contains informations on which batch effect removal method to use and which
# batch to use as training batch.

performCrossBatchPrediction <- function(z) {

  require("bapred")
  
set.seed(scenariogrid$seed[z])

load("./FAbatchPaper/Datasets/ProcessedData/IUGRTranscr.Rda")
X <- X[,apply(X[batch==1,], 2, sd)!=0]

if(scenariogrid$batchlearnind[z]==1) {

Xtrain <- X[batch %in% 1,]
ytrain <- y[batch %in% 1]
batchtrain <- as.factor(as.numeric(as.factor(as.numeric(batch[batch %in% 1]))))

Xtest <- X[batch %in% 2,]
ytest <- y[batch %in% 2]
batchtest <- as.factor(as.numeric(as.factor(as.numeric(batch[batch %in% 2]))))

}


if(scenariogrid$batchlearnind[z]==2) {

Xtrain <- X[batch %in% 2,]
ytrain <- y[batch %in% 2]
batchtrain <- as.factor(as.numeric(as.factor(as.numeric(batch[batch %in% 2]))))

Xtest <- X[batch %in% 1,]
ytest <- y[batch %in% 1]
batchtest <- as.factor(as.numeric(as.factor(as.numeric(batch[batch %in% 1]))))

}

rm(X); gc()


if(!(scenariogrid$batchremmethod[z] %in% c("fsvafast", "fsvaexact"))) {
params <- ba(x=Xtrain, y=ytrain, batch=batchtrain, method = scenariogrid$batchremmethod[z])
}
else {
if(scenariogrid$batchremmethod[z]=="fsvafast")
  params <- ba(x=Xtrain, y=ytrain, batch=batchtrain, method = "sva", algorithm = "fast")
if(scenariogrid$batchremmethod[z]=="fsvaexact")
  params <- ba(x=Xtrain, y=ytrain, batch=batchtrain, method = "sva", algorithm = "exact")
}

Xtrain <- params$xadj

Xtest <- baaddon(params, x=Xtest, batch=batchtest)

errormcc <- performValidation(Xtrain, ytrain, Xtest, ytest)


error <- errormcc$error
mcc <- errormcc$mcc

return(list(method=scenariogrid$batchremmethod[z], 
  batchlearnind=scenariogrid$batchlearnind[z], 
  error=error, mcc=mcc))

}




# This function is used by 'performCrossBatchPrediction', see above.
# It does the following using the data after addon batch effect
# removal:
# 1) On the training batch fit LDA using PLS components 
# as covariates, tuning the number of components by 3-fold CV;
# 2) On the test batch estimate the MCC-value of the prediction rule
# fitted in 1).
#
# Input parameters:
# Xtrain - covariate matrix in the training batch (after batch effect removal)
# ytrain - target variable in the training batch
# Xtest - covariate matrix in the test batch after addon-batch effect removal
# ytest - target variable in the test batch

performValidation <- function(Xtrain, ytrain, Xtest, ytest) {

  require("CMA")
  comp.grid <- 1:10

  compbestvec <- 0
  for(i in 1:10) {
    tunecv <- tune(Xtrain, ytrain, classifier = pls_ldaCMA, grids = list(comp = comp.grid))
    compbestvec[i] <- comp.grid[which.min(tunecv@tuneres[[1]])]
  }
  compbest <- floor(median(compbestvec))

  plsldaobject <- pls_ldaCMA(rbind(Xtrain, Xtest), c(ytrain, ytest), 
    learnind=1:length(ytrain), comp = compbest)

  ytesthat <- plsldaobject@yhat + 1

  confmatrix <- table(ytesthat, ytest)

  if((nrow(confmatrix)==1) & (ncol(confmatrix)==1))
    confmatrix <- rbind(cbind(confmatrix, 0), c(0, 0))
  else {
    if(nrow(confmatrix)==1)
      confmatrix <- rbind(confmatrix, c(0,0))
    if(ncol(confmatrix)==1)
      confmatrix <- cbind(confmatrix, c(0,0))
  }
	  
  mcc <- (confmatrix[1,1]*confmatrix[2,2] - confmatrix[1,2]*confmatrix[2,1])/
    sqrt((confmatrix[1,1] + confmatrix[1,2])*(confmatrix[1,1] + confmatrix[2,1])*
      (confmatrix[2,2] + confmatrix[1,2])*(confmatrix[2,2] + confmatrix[1,2]))
  
  error <- mean(ytest!=ytesthat)

  return(list(mcc=mcc, error=error))
  
}
