univariateSelection<-function(data,nSel,nCov=0,nFav=0,offset=NULL)
{
  # data: contains the data, it needs to have time in the first column and status (censored/not censored) in the second
  # nSel: number of covariates to select (tuning parameter)
  # nCov: number of mandatory covariates
  # nFav: number of favored covariates (should be before the unfavored in data)
  # offset: possible offset
  if(!is.numeric(nSel)) stop('nSel must be numeric')
  if(!is.numeric(nFav)) stop('nFav must be numeric')
  if(!is.numeric(nCov)) stop('nCov must be numeric')
  if(nFav>0) if(length(nSel)!=2) stop('wrong dimension of nSel') # when there are favored covariates, we need a 2 dimensional tuning parameter
  if(is.null(offset)) offset<-rep(0,dim(data)[1])
  origNames<-colnames(data)[1:2]
  colnames(data)[1:2]<-c('time','status')
  nCov<-nCov+2 # add the dependent variables (time and status) to the mandatory ones
  computePvalue<-function(ind,data,nCov,offset) # function to compute p-value as measure of association predictor/outcome
  {
    data<-data[,c(ind,1:nCov)]
    if(nCov>2) suppressWarnings(modCov<-coxph(Surv(time,status)~offset(offset)+.,data=data[,-1])) # 'univariate' Cox regression model
    suppressWarnings(mod<-coxph(Surv(time,status)~offset(offset)+.,data=data)) # 'univariate' Cox regression model
    ifelse(nCov==2,out<-1-pchisq(-2*(mod$loglik[1]-mod$loglik[2]),length(mod$coefficients)),
           out<-1-pchisq(-2*(modCov$loglik[2]-mod$loglik[2]),(length(mod$coefficients)-nCov+2))) # likelihood ratio test
    out 
  }
  
  ind<-c((nCov+1):dim(data)[2])
  p.val<-sapply(ind,computePvalue,data=data,nCov=nCov,offset=offset) # compute the p-value for each 'univariate' model
  if(nFav>1) # in case of favored variables
  {
    p.clin<-p.val[1:nFav]
    p.gene<-p.val[-c(1:nFav)]
    mandatory<-data[,c(1:nCov)]
    favored<-data[,c((nCov+1):(nCov+nFav))]
    favored<-favored[,order(p.clin)] # update: ordering variables in order to speed cv.tuningParam
    others<-data[,-c(1:(nCov+nFav))]
    others<-others[,order(p.gene)] # update: ordering variables in order to speed cv.tuningParam
    if(nSel[1]==0) out<-cbind(mandatory,others[,c(1:nSel[2])])
    if(nSel[2]==0) out<-cbind(mandatory,favored[,c(1:nSel[1])])
    if(nSel[1]!=0&nSel[2]!=0) out<-cbind(mandatory,favored[,c(1:nSel[1])],others[,c(1:nSel[2])])
    if(nSel[2]==1) colnames(out)[dim(out)[2]]<-colnames(others)[1]
    if(nSel[1]==1) colnames(out)[(nCov+1)]<-colnames(favored)[1]
  }
  if(nFav==1) # in case of only one favored variable
  {
    p.clin<-p.val[1]
    p.gene<-p.val[-1]
    mandatory<-data[,c(1:nCov)]
    favored<-data[,(nCov+1)]
    others<-data[,-c(1:(nCov+nFav))]
    others<-others[,order(p.gene)] # update: ordering variables in order to speed cv.tuningParam
    if(nSel[1]==0)
    {
      out<-cbind(mandatory,others[,c(1:nSel[2])])
      if (nSel[2]==1) colnames(out)<-c(colnames(mandatory),colnames(others)[1])
    }
    if(nSel[2]==0)
    {
      out<-cbind(mandatory,favored)
      colnames(out)<-colnames(data)[1:(nCov+1)]
    }
    if(nSel[1]!=0&nSel[2]!=0)
    {
      out<-cbind(mandatory,favored[,c(1:nSel[1])],others[,c(1:nSel[2])])
      colnames(out)<-c(colnames(data)[1:(nCov+1)],colnames(others)[(1:nSel[2])])
    }
  }
  if(nFav<1) # in case of no favored variables
  {
    p.val<-c(rep(0,nCov),rank(p.val))
    data<-data[,order(p.val)] # update: ordering variables in order to speed cv.tuningParam
    out<-data[,c(1:(nCov+nSel))]
    if(nSel==1) colnames(out)[(nCov+1)]<-colnames(data)[(nCov+1)]
  }
  colnames(out)[1:2]<-origNames
  out
}


forwardSelection<-function(data,nSel,nCov=0,nFav=0,offset=NULL)
{
  # nSel: number of covariates to keep
  # nCov: number of mandatory covariates
  # nFav: number of favoring covariates
  p<-dim(data)[2]
  origNames<-colnames(data)[1:2]
  colnames(data)[1:2]<-c('time','status')
  if(nFav>0) if(length(nSel)!=2) stop('wrong dimension of nSel')
  if(is.null(offset)) offset<-rep(0,dim(data)[1])
  modOld<-suppressWarnings(coxph(Surv(time,status)~offset(offset)+.,data=data[,c(1:(2+nCov))]))
  if(nCov==0) llikOld<-modOld$loglik
  else llikOld<-modOld$loglik[2]
  nCov<-nCov+2 # add the dependent variable (time and censoring) to the mandatory ones
  
  # univariate models to select the first covariates
  compute_llik<-function(ind,data,nCov,offset,llikOld)
  {
    data<-as.data.frame(data[,c(ind,1:nCov)])
    mod<-suppressWarnings(coxph(Surv(time,status)~offset(offset)+.,data=data))
    c(1-pchisq(-2*(llikOld-mod$loglik[2]),(length(mod$coefficients)-nCov+2)),mod$loglik[2]) # likelihood ratio test
  } 
  count<-c(0,0)
  fullFav<-0
  while((sum(nSel)-sum(count))>0)
  {    
    if (length(nSel)==2) # only for favoring, when the number of clinical or molecular predictors is reached, look only to the others
    {
      if (count[2]==nSel[2]) p<-nCov+sum(count)+nFav
      if (count[1]==nSel[1]) fullFav<-nFav
    }
    ind<-c((nCov+sum(count)+fullFav+1):p)
    llik<-sapply(ind,compute_llik,data=data,nCov=nCov+sum(count),offset=offset,llikOld=llikOld) # compute the loglik for each univariate model
    selected<-which.min(llik[1,]) # maximum increase in loglik, since loglik_0 is the same
    llikOld<-llik[2,selected]
    tmp<-colnames(data)[selected+nCov+sum(count)+fullFav]
    if ((selected+fullFav)<=nFav) # if a clinical predictor is selected, then put it between the last clinical and before the first molecular
    {
      data<-cbind(data[,1:(nCov+count[1])],data[,(selected+nCov+sum(count))],data[,-c(1:(nCov+count[1]),selected+nCov+sum(count))]) # add to the significative variables the one with biggest loglik
      colnames(data)[nCov+count[1]+1]<-tmp
      nFav<-nFav-1
      count[1]<-count[1]+1
    }
    else # if a molecular predictor is selected, just append it after the last molecular
    {
      data<-cbind(data[,1:(nCov+sum(count))],data[,(selected+nCov+fullFav+sum(count))],data[,-c(1:(nCov+sum(count)),selected+nCov+fullFav+sum(count))]) # add to the significative variables the one with biggest loglik
      colnames(data)[nCov+sum(count)+1]<-tmp
      count[2]<-count[2]+1
    }
  }
  colnames(data)[1:2]<-origNames
  data[,1:(nCov+sum(nSel))]
}


cv.tuningParam<-function(response,data.clin,data.genes,maximum=25,foldCV=10,strategy=c(1,2,3,4.1,4.2),cpus=1,method=c('univariate','forward','univariateAdj','forwardAdj'),criteria=c('brier','likelihoodCV'),cens.model='marginal',maxtime='maximum',seed=NULL,...)
{
  # response: survival outcome (either a matrix with two columns or a Surv object)
  # data.clin: clinical data (coded with dummies variables)
  # data.genes: gene expression data (realization of continuous varables)
  # maximum: maximum number of covariates to consider
  # foldCV: number of folds to use in cross-validation
  # strategy: strategy used for combine clinical and molecular data (see Boulesteix & Sauerbrei, 2011)
  # cpus: number of cpus to use, 1 (default) for sequantial computing (if criteria is 'brier', it is better to set cpus=foldCV)
  # method: either univariate, forward selection, univariate with adj., or stepfi
  # maxtime: upper bound for computing integrated Brier score
  # cens.model: censoring model (see package pec)
  # seed: to reproduce results in parallel computing
  # ...: possible specification for parallel computing (see sfInit)  
  if(cpus>1)
  {
    library(snowfall)
    contr<-try(sfInit(parallel=T,cpus=cpus,...))
    if(contr)
    {
      if(!is.null(seed)&length(seed)!=cpus) stop('seed must be a vector of ',cpus,' elements!')
      sfLibrary(package=survival)
      if(criteria=='brier') sfLibrary(package=pec)
    }
    else
    {
      cpus<-1
      warning('Inizialization of the cpus failed, computation will be in one cpu')
    }
  }
  
  if(is.Surv(response)) response<-cbind(response[,1],response[,2])
  ifelse(dim(response)[2]!=2,stop('wrong dimensions for response'),colnames(response)<-c('time','status'))
  n<-dim(data.clin)[1]
  if(dim(response)[1]!=n | dim(data.genes)[1]!=n) stop('response, clinical and molecular datasets must contain the same number of observations')
  if(foldCV<0) stop('wrong specification of CV folds')
  ifelse(foldCV>=(n-2),index<-c(1:n),index<-sample(c(rep(c(1:foldCV),floor(n/foldCV)),sample(1:foldCV,size=n-foldCV*floor(n/foldCV))))) # split sample for cross-validation
  if(criteria=='brier'&is.character(maxtime)) maxtime<-switch(maxtime,
                                                              'equal'=min(sapply(1:foldCV,function(j) max(response[index==j,1]))),
                                                              'maximum'=NULL)
  if((criteria!='brier')&(criteria!='likelihoodCV')) stop('criteria should be <brier> or <likelihoodCV>')

  strategy.1<-function(nGenes,response,data.clin,data.genes,nCov,xSelection,criteria,foldCV,index,...)
  { # naive: use clinical and molecular information in the same way
    brierScore<-function(j,nGenes,response,data.clin,data.genes,index,nCov,xSelection,cens.model,maxtime,seed)
    {
      if(!is.null(seed)) set.seed(seed[j])
      data<-xSelection(data.frame(response[index!=j,],data.clin[index!=j,],data.genes[index!=j,]),nSel=nGenes,nCov=nCov) # select relevant variables (based on cv-training set)
      newdata<-data.frame(response[index==j,],data.clin[index==j,],data.genes[index==j,])[,names(data)] # cv-validation set (only with relevant covariates)
      selectModel<-function(nGenes,data,nCov)
      {
        data<-as.data.frame(data[,1:(nCov+nGenes+2)])
        coxph(Surv(time,status)~.,data=data)
      }
      models<-lapply(nGenes:1,selectModel,data=data,nCov=nCov) # fit a model using different number of covariates
      predEC<-pec::pec(models,formula=models[[1]]$formula,data=newdata,cens.model=cens.model,start=0,reference=F)
      if(is.null(maxtime)) maxtime<-sort(response[index==j,1],decreasing=TRUE)
      out<-pec::ibs(predEC,times=maxtime[1]) # compute IBS (if times is not specified, use the largest one which gives no problem)
      while(sum(is.na(out))>0)
      {
        if(length(maxtime)==1) stop('wrong specification of maxtime in IBS computation')
        maxtime<-maxtime[-1]
        out<-pec::ibs(predEC,times=maxtime[1])
      }
      out
    }
    llikCV<-function(nGenes,response,data.clin,data.genes,nCov,xSelection,foldCV,index)
    {
      data<-xSelection(data.frame(response,data.clin,data.genes),nSel=nGenes,nCov=nCov) # select relevant variables (based on cv-training set)
      cross_j<-function(j,data,xSelection,index)
      {
        mod<-coxph(Surv(time,status)~.,data=data[index!=j,])
        coxph(Surv(time,status)~.,data=data,init=mod$coeff,iter=0)$loglik[2]-mod$loglik[2]
      }
      # repeat the process for each cv-split
      sum(sapply(c(1:foldCV),cross_j,data=data,xSelection=xSelection,index=index))
    }
    if(criteria=='brier')
    {
      cpus<-list(...)$cpus
      cens.model<-list(...)$cens.model
      maxtime<-list(...)$maxtime
      seed<-list(...)$seed
      if(cpus==1) stra<-lapply(c(1:foldCV),brierScore,response=response,nGenes=nGenes,data.clin=data.clin,data.genes=data.genes,index=index,nCov=nCov,xSelection=xSelection,cens.model=cens.model,maxtime=maxtime,seed=seed)
      else stra<-sfLapply(c(1:foldCV),brierScore,response=response,nGenes=nGenes,data.clin=data.clin,data.genes=data.genes,index=index,nCov=nCov,xSelection=xSelection,cens.model=cens.model,maxtime=maxtime,seed=seed)
    }
    if(criteria=='likelihoodCV') stra<-llikCV(nGenes,response,data.clin,data.genes,nCov,xSelection,foldCV,index)
    stra
  }
  
  strategy.2<-function(nGenes,response,data.clin,data.genes,nCov,xSelection,criteria,foldCV,index,...)
  { # residuals: fit a clinical model and use the linear predictor as an offset
    brierScore<-function(j,nGenes,response,data.clin,data.genes,index,xSelection,cens.model,maxtime)
    {
      mod.clin<-coxph(Surv(response[index!=j,1],response[index!=j,2])~.,data=as.data.frame(data.clin[index!=j,])) # fit clinical model to compute the offset
      data<-xSelection(data.frame(response[index!=j,],data.genes[index!=j,]),nSel=nGenes,offset=mod.clin$linear.predictors) # select relevant variables (based on cv-training set)
      newdata<-data.frame(response[index==j,],data.genes[index==j,names(data)[-c(1:2)]],predict(mod.clin,newdata=data.frame(response[index==j,],data.clin[index==j,]))) # cv-validation set (only with relevant covariates)
      names(newdata)<-c(names(data),'offset')
      selectModel<-function(nGenes,data,offset)
      {
        data<-as.data.frame(data[,1:(nGenes+2)])
        mod<-coxph(Surv(time,status)~offset(offset)+.,data=data)
      }
      models<-lapply(nGenes:1,selectModel,data=data,offset=mod.clin$linear.predictors) # fit a model using different number of covariates
      predEC<-pec::pec(models,data=newdata,formula=models[[1]]$formula,cens.model=cens.model,start=0,reference=F)
      if(is.null(maxtime)) maxtime<-sort(response[index==j,1],decreasing=TRUE)
      out<-pec::ibs(predEC,times=maxtime[1]) # compute IBS (if times is not specified, use the largest one which gives no problem)
      while(!is.numeric(out)|sum(is.na(out))>0)
      {
        if(length(maxtime)==1) stop('wrong specification of maxtime in IBS computation')
        maxtime<-maxtime[-1]
        out<-pec::ibs(predEC,times=maxtime[1])
      }
    out
    }
    llikCV<-function(nGenes,response,data.clin,data.genes,nCov,xSelection,foldCV,index)
    {
      mod.clin<-coxph(Surv(response[,1],response[,2])~.,data=as.data.frame(data.clin)) # fit clinical model to compute the offset
      data<-xSelection(data.frame(response,data.genes),nSel=nGenes,offset=mod.clin$linear.predictors) # select relevant variables (based on cv-training set)
      cross_j<-function(j,data,data.clin,clin.coeff,xSelection,index)
      {
        mod<-coxph(Surv(time,status)~offset(mod.clin$linear.predictors[index!=j])+.,data=data[index!=j,])
        coxph(Surv(time,status)~offset(mod.clin$linear.predictors)+.,data=data,init=mod$coeff,iter=0)$loglik[2]-mod$loglik[2]
      }
      # repeat the process for each cv-split
      sum(sapply(c(1:foldCV),cross_j,index=index,data=data,xSelection=xSelection,data.clin=data.clin,clin.coeff=mod.clin$coeff))
    }
    if(criteria=='brier')
    {
      cpus<-list(...)$cpus
      cens.model<-list(...)$cens.model
      maxtime<-list(...)$maxtime
      # repeat the process for each cv-split
      if(cpus==1) stra<-lapply(c(1:foldCV),brierScore,response=response,nGenes=nGenes,data.clin=data.clin,data.genes=data.genes,index=index,xSelection=xSelection,cens.model=cens.model,maxtime=maxtime)
      else stra<-sfLapply(c(1:foldCV),brierScore,response=response,nGenes=nGenes,data.clin=data.clin,data.genes=data.genes,index=index,xSelection=xSelection,cens.model=cens.model,maxtime=maxtime)
    }
    if(criteria=='likelihoodCV') stra<-llikCV(nGenes,response,data.clin,data.genes,nCov,xSelection,foldCV,index)
    stra
  }
  
  strategy.3<-function(nGenes,response,data.clin,data.genes,nCov,xSelection,criteria,foldCV,index,...)
  { # favoring: favor somehow the clinical predictors
    brierScore<-function(j,nGenes,response,data.clin,data.genes,index,nFav,xSelection,cens.model,maxtime) # nGenes now must be bivariate, containing both the number of clinical and the number of molecular predictors to be selected
    {
      data<-xSelection(data.frame(response[index!=j,],data.clin[index!=j,],data.genes[index!=j,]),nSel=c(nFav,nGenes),nFav=nFav) # select relevant variables (based on cv-training set): all clinical are kept, ordered by importance
      fullFormula<-coxph(Surv(time,status)~.,data=data)$formula # need only for package pec
      newdata<-data.frame(response[index==j,],data.clin[index==j,],data.genes[index==j,])[,names(data)] # cv-validation set (only with relevant covariates)
      selectModel<-function(nGenes,data)
      {
        ifelse(nGenes[2]!=0,data<-as.data.frame(data[,c(1:(2+nGenes[1]),(nFav+3):(nFav+2+nGenes[2]))]),data<-as.data.frame(data[,c(1:(2+nGenes[1]))]))
        coxph(Surv(time,status)~.,data=data)
      }
      pairs<-list() # contains all the possible combination of (number of clinical predictors selected, number of molecular predictors selected)
      count<-1
      for(a in 1:nGenes)
        for(i in 0:min(nFav,a))
        {
          pairs[[count]]<-c(i,(a-i)) 
          count<-count+1
        }
      models<-lapply(pairs,selectModel,data=data) # fit a model using different numbers of covariates (clinical and molecular)
      predEC<-pec::pec(models,formula=fullFormula,data=newdata,cens.model=cens.model,start=0,reference=F)
      if(is.null(maxtime)) maxtime<-sort(response[index==j,1],decreasing=TRUE)
      out<-pec::ibs(predEC,times=maxtime[1]) # compute IBS (if times is not specified, use the largest one which gives no problem)
      while(sum(is.na(out))>0)
      {
        if(length(maxtime)==1) stop('wrong specification of maxtime in IBS computation')
        maxtime<-maxtime[-1]
        out<-pec::ibs(predEC,times=maxtime[1])
      }
      out
    }
    llikCV<-function(nGenes,response,data.clin,data.genes,nCov,xSelection,foldCV,index)
    {
      data<-xSelection(data.frame(response,data.clin,data.genes),nSel=nGenes,nFav=dim(data.clin)[2]) # select relevant variables (based on cv-training set): nGenes must be 2-dimensional
      cross_j<-function(j,index,data,xSelection)
      {
        mod<-coxph(Surv(time,status)~.,data=data[index!=j,])
        coxph(Surv(time,status)~.,data=data,init=mod$coeff,iter=0)$loglik[2]-mod$loglik[2]
      }
      # repeat the process for each cv-split
      sum(sapply(c(1:foldCV),cross_j,index=index,data=data,xSelection=xSelection))
    }
    if(criteria=='brier')
    {
      cpus<-list(...)$cpus
      cens.model<-list(...)$cens.model
      maxtime<-list(...)$maxtime
      # repeat the process for each cv-split
      if(cpus==1) stra<-lapply(c(1:foldCV),brierScore,response=response,nGenes=nGenes,data.clin=data.clin,data.genes=data.genes,index=index,nFav=dim(data.clin)[2],xSelection=xSelection,cens.model=cens.model,maxtime=maxtime)
      else stra<-sfLapply(c(1:foldCV),brierScore,response=response,nGenes=nGenes,data.clin=data.clin,data.genes=data.genes,index=index,nFav=dim(data.clin)[2],xSelection=xSelection,cens.model=cens.model,maxtime=maxtime)
    }
    if(criteria=='likelihoodCV') stra<-llikCV(nGenes,response,data.clin,data.genes,nCov,xSelection,foldCV,index)
    stra
  }
  
  strategy.41<-function(nGenes,response,data.clin,data.genes,nCov,xSelection,criteria,foldCV,index,...)
  { # molecular score: summarize the information form molecular predictors in a score (using the first principal component)
    brierScore<-function(j,nGenes,response,data.clin,data.genes,index,nCov,xSelection,cens.model,maxtime)
    {
      data<-xSelection(data.frame(response[index!=j,],data.genes[index!=j,]),nSel=nGenes,nCov=nCov) # select relevant molecular variables (based on cv-training set)
      selectModel<-function(nGenes,data,nCov,data.clin)
      { # gives both the model and the score (if only one gene is selected, score contains the name of the selected gene)
        data<-data[,1:(nCov+nGenes+2)]
        if(dim(data)[2]>(nCov+3))
        {
          score<-prcomp(data[,-c(1:(nCov+2))],scale=TRUE)
          mol.score<-predict(score)[,1]
        }
        else
        {
          score<-names(data)[nCov+3]
          mol.score<-data[,(nCov+3)]
        }
        x<-data.frame(response[index!=j,],data.clin[index!=j,],mol.score) # cv-training data (clinical predictors and all the possible scores (made by different number of selected genes))
        colnames(x)<-c('time','status',names(data.clin),paste('mol.score.',nGenes,sep=''))
        mod<-coxph(Surv(time,status)~.,data=x)
        list(mod=mod,score=score)
      }
      models<-list()
      newscores<-NULL
      for(i in nGenes:1)
      { # fit a model using different number of covariates and compute the values of the score in validation set
        tmp<-selectModel(i,data=data,nCov=nCov,data.clin=data.clin)
        models[[i]]<-tmp$mod
        ifelse(is.character(tmp$score),newscore<-data.genes[index==j,tmp$score],newscore<-predict(tmp$score,newdata=data.genes[index==j,names(tmp$score$center)])[,1])
        newscores<-cbind(newscores,newscore)
      }
      newdata<-data.frame(response[index==j,],data.clin[index==j,],newscores) # cv-validation data (clinical predictors and all the possible scores (derived from different number of selected genes))
      colnames(newdata)<-c('time','status',names(data.clin),paste('mol.score.',1:nGenes,sep=''))
      suppressWarnings(fullFormula<-coxph(Surv(time,status)~.,data=newdata)$formula) # compute a formula, it is useful only for pec function, does not matter if the model does not converge
      predEC<-pec::pec(models,formula=fullFormula,data=newdata,cens.model=cens.model,start=0,reference=F)
      if(is.null(maxtime)) maxtime<-sort(response[index==j,1],decreasing=TRUE)
      out<-pec::ibs(predEC,times=maxtime[1]) # compute IBS (if times is not specified, use the largest one which gives no problem)
      while(sum(is.na(out))>0)
      {
        if(length(maxtime)==1) stop('wrong specification of maxtime in IBS computation')
        maxtime<-maxtime[-1]
        out<-pec::ibs(predEC,times=maxtime[1])
      }
      out
    }
    llikCV<-function(nGenes,response,data.clin,data.genes,nCov,xSelection,foldCV,index)
    {
      data<-xSelection(data.frame(response,data.genes),nSel=nGenes,nCov=nCov) # select relevant molecular variables (based on cv-training set)
      if(dim(data)[2]>(nCov+3))
      {
        score<-prcomp(data[,-c(1:(nCov+2))],scale=TRUE)
        mol.score<-predict(score)[,1]
      }
      else
      {
        score<-names(data)[nCov+3]
        mol.score<-data[,(nCov+3)]
      }
      data<-data.frame(response,data.clin,mol.score)
      colnames(data)<-c('time','status',colnames(data.clin),'mol.score')
      cross_j<-function(j,data,xSelection,index)
      {
        mod<-coxph(Surv(time,status)~.,data=data[index!=j,])
        coxph(Surv(time,status)~.,data=data,init=mod$coeff,iter=0)$loglik[2]-mod$loglik[2]
      }
      # repeat the process for each cv-split
      sum(sapply(c(1:foldCV),cross_j,index=index,data=data,xSelection=xSelection))  
    }
    if(criteria=='brier')
    {
      cpus<-list(...)$cpus
      cens.model<-list(...)$cens.model
      maxtime<-list(...)$maxtime
      if(cpus==1) stra<-lapply(c(1:foldCV),brierScore,response=response,nGenes=nGenes,data.clin=data.clin,data.genes=data.genes,index=index,nCov=nCov,xSelection=xSelection,cens.model=cens.model,maxtime=maxtime)
      else stra<-sfLapply(c(1:foldCV),brierScore,response=response,nGenes=nGenes,data.clin=data.clin,data.genes=data.genes,index=index,nCov=nCov,xSelection=xSelection,cens.model=cens.model,maxtime=maxtime)
    }
    if(criteria=='likelihoodCV') stra<-llikCV(nGenes,response,data.clin,data.genes,nCov,xSelection,foldCV,index)
    stra
  }
  
  strategy.42<-function(nGenes,response,data.clin,data.genes,nCov,xSelection,criteria,foldCV,index,...)
  {# clinical and molecular score: summarize the information form clinical and molecular predictors in two score (using the clinical model linear predictor and the first principal component, respectively)
    brierScore<-function(j,nGenes,response,data.clin,data.genes,index,nCov,xSelection,cens.model,maxtime)
    {
      data<-xSelection(data.frame(response[index!=j,],data.genes[index!=j,]),nSel=nGenes,nCov=nCov) # select relevant molecular variables (based on cv-training set)
      mod.clin<-coxph(Surv(response[index!=j,1],response[index!=j,2])~.,data=data.clin[index!=j,])
      selectModel<-function(nGenes,data,nCov,clin.score)
      { # gives both the model and the molecular score (if only one gene is selected, score contains the name of the selected gene)
        data<-data[,1:(nCov+nGenes+2)]
        if(dim(data)[2]>(nCov+3))
        {
          score<-prcomp(data[,-c(1:(nCov+2))],scale=TRUE)
          mol.score<-predict(score)[,1]
        }
        else
        {
          score<-names(data)[nCov+3]
          mol.score<-data[,(nCov+3)]
        }
        x<-data.frame(response[index!=j,],clin.score,mol.score) # cv-training data (clinical score and all the possible molecular scores (derived from different number of selected genes))
        colnames(x)<-c('time','status','clin.score',paste('mol.score.',nGenes,sep=''))
        mod<-coxph(Surv(time,status)~.,data=x)
        list(mod=mod,score=score)
      }
      models<-list()
      newscores<-NULL
      for(i in nGenes:1)
      {
        tmp<-selectModel(i,data=data,nCov=nCov,clin.score=mod.clin$linear.predictors)
        models[[i]]<-tmp$mod
        ifelse(is.character(tmp$score),newscore<-data.genes[index==j,tmp$score],newscore<-predict(tmp$score,newdata=data.genes[index==j,names(tmp$score$center)])[,1])
        newscores<-cbind(newscores,newscore)
      }
      newdata<-data.frame(response[index==j,],predict(mod.clin,newdata=data.clin[index==j,]),newscores) # cv-validation data (clinical predictors and all the possible scores (derived from different number of selected genes))
      colnames(newdata)<-c('time','status','clin.score',paste('mol.score.',1:nGenes,sep=''))
      suppressWarnings(fullFormula<-coxph(Surv(time,status)~.,data=newdata)$formula) # compute a formula, it is useful only for pec function, does not matter if the model does not converge
      predEC<-pec::pec(models,formula=fullFormula,data=newdata,cens.model=cens.model,start=0,reference=F)
      if(is.null(maxtime)) maxtime<-sort(response[index==j,1],decreasing=TRUE)
      out<-pec::ibs(predEC,times=maxtime[1]) # compute IBS (if times is not specified, use the largest one which gives no problem)
      while(sum(is.na(out))>0)
      {
        if(length(maxtime)==1) stop('wrong specification of maxtime in IBS computation')
        maxtime<-maxtime[-1]
        out<-pec::ibs(predEC,times=maxtime[1])
      }
      out
    }
    llikCV<-function(nGenes,response,data.clin,data.genes,nCov,xSelection,foldCV,index)
    {
      data<-xSelection(data.frame(response,data.genes),nSel=nGenes,nCov=nCov) # select relevant molecular variables (based on cv-training set)
      mod.clin<-coxph(Surv(response[,1],response[,2])~.,data=as.data.frame(data.clin))
      if(dim(data)[2]>(nCov+3))
      {
        score<-prcomp(data[,-c(1:(nCov+2))],scale=TRUE)
        mol.score<-predict(score)[,1]
      }
      else
      {
        score<-names(data)[nCov+3]
        mol.score<-data[,(nCov+3)]
      }
      data<-data.frame(response,mod.clin$linear.predictors,mol.score) # cv-training data (clinical score and all the possible molecular scores (derived from different number of selected genes))
      colnames(data)<-c('time','status','clin.score','mol.score')
      cross_j<-function(j,data,xSelection,index)
      {
        mod<-coxph(Surv(time,status)~.,data=data[index!=j,])
        coxph(Surv(time,status)~.,data=data,init=mod$coeff,iter=0)$loglik[2]-mod$loglik[2]
      }
      # repeat the process for each cv-split
      sum(sapply(1:length(response[,1]),cross_j,index=index,data=data,xSelection=xSelection))  
    }
    if(criteria=='brier')
    {
      cpus<-list(...)$cpus
      cens.model<-list(...)$cens.model
      maxtime<-list(...)$maxtime
      # repeat the process for each cv-split
      if(cpus==1) stra<-lapply(c(1:foldCV),brierScore,response=response,nGenes=nGenes,data.clin=data.clin,data.genes=data.genes,index=index,nCov=nCov,xSelection=xSelection,cens.model=cens.model,maxtime=maxtime)
      else stra<-sfLapply(c(1:foldCV),brierScore,response=response,nGenes=nGenes,data.clin=data.clin,data.genes=data.genes,index=index,nCov=nCov,xSelection=xSelection,cens.model=cens.model,maxtime=maxtime)
    }
    if(criteria=='likelihoodCV') stra<-llikCV(nGenes,response,data.clin,data.genes,nCov,xSelection,foldCV,index)
    stra
  }
  
  # choose the method between univariate and stepwise selection
  xSelection<-switch(method,
                     'univariate'={
                       nCov<-0
                       univariateSelection},
                     'univariateAdj'={
                       nCov<-dim(data.clin)[2]
                       univariateSelection},
                     'forward'={
                       nCov<-0
                       forwardSelection},
                     'forwardAdj'={
                       nCov<-dim(data.clin)[2]
                       forwardSelection})
  if(is.null(xSelection)) stop('method not supported, choose between <univariate>, <forward>, <univariateAdj> and <forwardAdj>')
  
  # choose the strategy
  {
    if(nCov==0) strategyFun<-switch(as.character(strategy),
                                    '1'=strategy.1,
                                    '2'=strategy.2,
                                    '3'=strategy.3,
                                    '4.1'=strategy.41,
                                    '4.2'=strategy.42)
    else strategyFun<-switch(as.character(strategy),
                             '1'=stop('strategy 1 requires nCov=0'),
                             '2'=strategy.1, # with nCov>0 strategy 2 works with the same function of strategy 1
                             '3'=stop('strategy 3 requires nCov=0'),
                             '4.1'=strategy.41,
                             '4.2'=strategy.42)
  }
  if(is.null(strategyFun)) stop('strategy can assume value 1, 2, 3, 4.1 or 4.2')

  if(as.character(strategy)=='3')
  {
    count<-1
    candidates<-list()
    nFav<-dim(data.clin)[2]
    for(a in 1:maximum)
      for(i in 0:min(nFav,a)) 
      {
        candidates[[count]]<-c(i,(a-i))
        count<-count+1
      }
  }
  else candidates<-c(maximum:1)

  if(criteria=='brier') tot<-apply(matrix(unlist(strategyFun(maximum,response=response,data.clin=data.clin,data.genes=data.genes,index=index,foldCV=foldCV,nCov=nCov,cpus=cpus,xSelection=xSelection,cens.model=cens.model,maxtime=maxtime,criteria='brier')),ncol=foldCV,byrow=F),1,mean)
  if(criteria=='likelihoodCV')
  {
    if(cpus==1) tot<-lapply(candidates,strategyFun,response=response,data.clin=data.clin,data.genes=data.genes,nCov=nCov,xSelection=xSelection,criteria='likelihoodCV',foldCV=foldCV,index=index)
    else
    {
      eseg<-function(candM,strategyFun,response,data.clin,data.genes,nCov,xSelection,criteria,foldCV,index) lapply(candM,strategyFun,response=response,data.clin=data.clin,data.genes=data.genes,nCov=nCov,xSelection=xSelection,criteria=criteria,foldCV=foldCV,index=index)
      candList<-sfClusterSplit(candidates)
      tot<-sfLapply(candList,eseg,strategyFun=strategyFun,response=response,data.clin=data.clin,data.genes=data.genes,nCov=nCov,xSelection=xSelection,criteria='likelihoodCV',foldCV=foldCV,index=index)
    }
    tot<--unlist(tot)
  }
    
  if(cpus>1) sfStop()
  if(as.character(strategy)=='3') final<-as.vector(candidates[[which.min(tot)]])
  else final<-maximum+1-which.min(tot)
  final
}


