################################################################################
# For simplicity's sake, this file contains all required packages and          #
# functions needed to carry out the method "rldaCMA". They can be loaded by    #
# using source("initialization.r").                                            #
################################################################################

###########################################################################
## 1. Load the required packages for all variants of LDA 'via the SH(IP)' #
###########################################################################

library(Biobase)
library(tools)
library(gdata)
library(AnnotationDbi)
library(DBI)
library(RSQLite)
library(CMA)
library(class)
library(corpcor)
library(SHIP)


################################################################################
# 2. Load the method 'rldaCMA'                                                 #
################################################################################
rldaCMA <- function(X, y, learnind,type,genesINpaths,models=FALSE){

nrowx   <- nrow(X)
lengthy <- length(y)
if(nrowx != lengthy)
stop("The number of rows of 'X' must agree with the length of y \n")
if(missing(learnind)) learnind <- 1:nrowx
if(length(learnind) > nrowx)
stop("The length of 'learnind' must be smaller than the number of observations. \n")
y         <- as.factor(y)
levels(y) <- 1:nlevels(y)
if(nlevels(y) > 2) mode <- "multiclass"
else mode <- "binary"
y <- as.numeric(y)-1
Ylearn <- y[learnind]
Xlearn <- X[learnind,]
Xtest  <- X[-learnind,,drop=FALSE]
if(nrow(Xtest) == 0){ Xtest <- Xlearn ; Ytest <- Ylearn } else
{Ytest <- y[-learnind]}


pred  <- rlda.iter(Xlearn,Xtest,Ylearn,type,genesINpaths)

new("cloutput", yhat=as.numeric(pred), y=Ytest, learnind = learnind, method = "rldaCMA", mode=mode)
}



rlda.iter <- function(Xlearn,Xtest,Ylearn,type,genesINpaths)  {

             if (type=="TargetG" || type=="TargetG*" || type=="TargetGpos" ||
                 type=="targetGcortest") {                                      # check for possibly incorrect inputs;
                 if (is.na(names(genesINpaths[1]))==TRUE){                      # only the first element is considered since
                     stop("The gene list mustn't be empty for this type!")}     # a condition with length>1 leads to a warning;
             }                                                                  # consideration of the first element suffices
             genes  <- genesINpaths[colnames(Xlearn)]

             prior <- numeric(length(unique(Ylearn)))
             for (i in 1:length(unique(Ylearn))) {
             prior[i] <- sum(Ylearn==sort(unique(Ylearn))[[i]])/nrow(Xlearn)
              }
             if (sum(prior)!=1) {
             stop("The sum of prior probabilities must be 1!")}

             learn <- data.frame(cbind(Ylearn,Xlearn))
             learn.class <- split(learn,learn$Ylearn)
             mean.class  <- matrix(nrow=length(unique(Ylearn)),ncol=ncol(Xlearn))
             for (i in 1:length(unique(Ylearn))) {
             for (j in 1:ncol(Xlearn)) {

             mean.class[i,j] <- mean(learn.class[[i]][,j+1])
              }
             }

             sigma.weight <- matrix(nrow=ncol(Xlearn),ncol=ncol(Xlearn),data=0)


             if(type=="standard") {
             for (i in 1:length(learn.class)) {
             sigma.weight <- sigma.weight + (nrow(learn.class[[i]])-1)*cov(learn.class[[i]][,-1])
              }
             }

             if(type=="TargetD") {
             for (i in 1:length(learn.class)) {
             target.class <- targetD(learn.class[[i]][,-1],NULL)
             sigma.weight <- sigma.weight + (nrow(learn.class[[i]])-1)*shrink.estim(learn.class[[i]][,-1],
             target.class)[[1]]
              }
             }

             if(type=="TargetG") {
             for (i in 1:length(learn.class)) {
             target.class <- targetG(learn.class[[i]][,-1],genes)
             sigma.weight <- sigma.weight + (nrow(learn.class[[i]])-1)*shrink.estim(learn.class[[i]][,-1],
             target.class)[[1]]
              }
             }

             if(type=="TargetG*") {
             for (i in 1:length(learn.class)) {
             target.class <- targetGstar(learn.class[[i]][,-1],genes)
             sigma.weight <- sigma.weight + (nrow(learn.class[[i]])-1)*shrink.estim(learn.class[[i]][,-1],
             target.class)[[1]]
              }
             }

             if(type=="TargetF") {
             for (i in 1:length(learn.class)) {
             target.class <- targetF(learn.class[[i]][,-1],NULL)
             sigma.weight <- sigma.weight + (nrow(learn.class[[i]])-1)*shrink.estim(learn.class[[i]][,-1],
             target.class)[[1]]
              }
             }

             if(type=="TargetGpos") {
             for (i in 1:length(learn.class)) {
             target.class <- targetGpos(learn.class[[i]][,-1],genes)
             sigma.weight <- sigma.weight + (nrow(learn.class[[i]])-1)*shrink.estim(learn.class[[i]][,-1],
             target.class)[[1]]
              }
             }
             
             if(type=="TargetCor") {
             for (i in 1:length(learn.class)) {
             target.class <- targetCor(learn.class[[i]][,-1],genes)
             sigma.weight <- sigma.weight + (nrow(learn.class[[i]])-1)*shrink.estim(learn.class[[i]][,-1],
             target.class)[[1]]
              }
             }

             sigma <- (1/(nrow(Xlearn)-length(learn.class)))*sigma.weight
             sigmainv <- pseudoinverse(sigma)


             discr <- matrix(nrow=length(unique(Ylearn)),ncol=nrow(Xtest))
             for (i in 1:length(unique(Ylearn))) {
             for (j in 1: nrow(Xtest)) {
             discr[i,j] <- (as.numeric(Xtest[j,]) %*% sigmainv) %*% mean.class[i,]-
                            0.5* (mean.class[i,] %*% sigmainv) %*% mean.class[i,]+
                            log(prior[i])
               }

             }


             pred <- numeric(nrow(Xtest))
             for (j in 1: nrow(Xtest)) {
             pred[j] <- sort(unique(Ylearn))[[which.max(discr[,j])]]
             }

             return(pred)

}

################################################################################
# 3. Load the required functions for the "fishing procedures" rlda.TG1 -       #
#    rlda.TG7.                                                                 #
################################################################################

# fishing procedure 1:
just.varingroup  <- function(genes,x){
                    colnames(x)[sapply(genes[colnames(x)],function(ll) !any(is.na(ll)))]
                    }

# fishing procedure 2:
fish.proc.two <- function(genes,x){
                 annotx     <- genes[colnames(x)]
                 out        <- colnames(x)[sapply(annotx,length) == 1]
                 names(out) <- unlist(genes[out])
                 return(out)
                 }

# fishing procedure 3:
whichMIN   <- function(vec) {
              ifelse( sum(min(vec)==vec)==1, which.min(vec),                    # select the extremum of a vector; when
              sample(which(vec==min(vec)),1) )                                  # several components are equal to an extremum,
              }                                                                 # then one of these components is randomly chosen

fish.proc.three <- function(genes,x,genesINpath){

                   annotx <- genes[colnames(x)]
                   out    <- colnames(x)
                   l      <- sapply(annotx,length) == 1
                   names(out)[l]  <- unlist(annotx[l])
                   multiplelist   <- annotx[!l]
                   names(out)[!l] <- sapply(multiplelist, function(ll) ll[whichMIN(sapply(genesINpath[ll],length))])
                   return(out)
                   }

# fishing procedure 4:
whichMAX <- function(vec) {
            ifelse( sum(max(vec)==vec)==1, which.max(vec), sample(which(vec==max(vec)),1) )
            }

fish.proc.four   <- function(genes,x,genesINpath){

                    annotx <- genes[colnames(x)]
                    out    <- colnames(x)
                    l      <- sapply(annotx,length) == 1
                    names(out)[l]  <- unlist(annotx[l])
                    multiplelist   <- annotx[!l]
                    names(out)[!l] <- sapply(multiplelist, function(ll) ll[whichMAX(sapply(genesINpath[ll],length))] )
                    return(out)
                    }
