# File for reproducing the analyses presented in the paper
# "Complexity selection and CV in lasso and sparse PLS with high-dimensional data"
# by A.-L. Boulesteix, A. Richter and C. Bernau

# Functions


lassoCV<-function(X,y,niter=20,Kvec=c(3,4,5,6,7,8,9,10))
{
K<-length(Kvec)
lambda<-matrix(0,niter,K)
number<-matrix(0,niter,K)
whichselected<-array(0,dim=c(niter,K,500))

for (i in 1:niter)
 {
 print(paste("i=",i))
 set.seed(i)
 for (k in 1:length(Kvec))
  { 
  print(paste("k=",k)) 
  resultk<-cv.glmnet(x=X,y=y,nfolds=Kvec[k])   
  
  if (i==1&k==1)
   {
   cverror<-array(0,dim=c(niter,K,length(resultk$lambda)))
   numbervec<-resultk$nzero
   lambdavec<-resultk$lambda
   }
  selected<-max(which(resultk$cvm==min(resultk$cvm)))
  print(selected)
  lambda[i,k]<-resultk$lambda[selected]
  cverror[i,k,]<-resultk$cvm
  
  wsik<-which(coef(resultk$glmnet.fit)[-1,selected]!=0)
  if (length(wsik)>0)
   {
   whichselected[i,k,1:length(wsik)]<-wsik
   number[i,k]<-length(wsik)
   }
  }
 }
return(list(lambdaopt=lambda,lambdacand=lambdavec,numberopt=number,numbercand=numbervec,cverror=cverror,whichselected=whichselected))
}

######
######

lassoCV2<-function(X,y,niter=20,exclude=0.2)
{
lambda<-numeric(niter)
number<-numeric(niter)
whichselected<-matrix(0,niter,500)
n<-length(y)
ni<-round(n*exclude)

for (i in 1:niter)
 {
 print(paste("i=",i))
 set.seed(i)
 sampi<-sample(n,ni)
 Xi<-X[-sampi,]
 yi<-y[-sampi]
 resulti<-cv.glmnet(x=Xi,y=yi,nfolds=(n-ni))   
 
 selected<-max(which(resulti$cvm==min(resulti$cvm)))
 
 lambda[i]<-resulti$lambda[selected]
  
 wsi<-which(coef(resulti$glmnet.fit)[,selected]!=0)
 
 if (length(wsi)>0)
  {
  whichselected[i,1:length(wsi)]<-wsi
 number[i]<-length(wsi)
  }

 }
return(list(lambdaopt=lambda,numberopt=number,whichselected=whichselected))
}


##############
##############



splsCV <- function( X ,y , starti = 1, niter = 5, lc = seq(1,8,1), eta = seq(0.1,0.9,0.1), 
                    Kvec = seq(3,10,1))
{
  Kfold    <- length(Kvec)
  cv.eta   <- matrix(0,niter,Kfold)
  cv.lc    <- matrix(0,niter,Kfold)
  cverror  <- array(0, dim = c(niter,Kfold,length(lc)*length(eta)))
  number   <- matrix(0,niter,Kfold)
  whichselected <- array(0, dim = c(niter,Kfold,500)) # limited to max. 500!
         
for (i in 1:niter)
 {
 print(paste("i = ",i,'starti = ',starti))
 set.seed(starti)
                    
  for (k in 1:length(Kvec))
    { 
    print(paste("k = ",k)) 
    resultk <- cv.spls(x = X,y = y,fold = Kvec[k],eta = eta,K = lc,plot.it = FALSE)   
      
      cv.eta[i,k]       <- resultk$eta.opt
      cv.lc[i,k]        <- resultk$K.opt
      cverror[i,k,]     <- resultk$mspemat
      
      result.spls.fit   <- spls(x = X, y = y, K = resultk$K.opt, eta = resultk$eta.opt)
      coef.spls         <- coef(result.spls.fit)                              
      
      wsik <- which(coef.spls[-1,]!=0)
      {if (length(wsik)>500)
        {
        whichselected[i,k,1:500]<-wsik[1:500]        
        }
        else whichselected[i,k,1:length(wsik)]<-wsik}
      number[i,k]<-length(wsik)
    }
 starti = starti + 1
 }
return(list(etaopt = cv.eta, lc.opt = cv.lc, numberopt = number,
            cverror = cverror,whichselected = whichselected))
}

######
######

splsCV2 <- function( X ,y , niter = 20, lc = seq(1,8,1), eta = seq(0.1,0.9,0.1),
                     exclude = 0.2)
{

  cv.eta   <- numeric(niter)
  cv.lc    <- numeric(niter)
  number   <- numeric(niter)
  whichselected <- matrix(0, niter, 500) 
  n        <- length(y)
  ni       <- round(n*exclude)

for (i in 1:niter)
 {
 print(paste("i = ",i))
 set.seed(i)
 sampi <- sample(n,ni)
 Xi <- X[-sampi,]
 yi <- y[-sampi]
 
      resulti         <- cv.spls( x = Xi,y = yi, fold = (n - ni), eta = eta, K = lc,
                                    plot.it = FALSE)

      cv.eta[i]       <- resulti$eta.opt
      cv.lc[i]        <- resulti$K.opt

      result.spls.fit <- spls(x = Xi, y = yi, K = resulti$K.opt, eta = resulti$eta.opt)
      coef.spls       <- coef(result.spls.fit)

      wsik <- which(coef.spls[-1,]!=0)
      {if (length(wsik)>500)             # limited to max. 500!
        {
        whichselected[i,1:500]<-wsik[1:500]
        }
        else whichselected[i,1:length(wsik)]<-wsik}
      number[i]<-length(wsik)
    }

return(list(etaopt = cv.eta, lc.opt = cv.lc, numberopt = number,
            whichselected = whichselected))
}



###############
###############


cviter<-function(result,ncv=10)
{
niter<-dim(result$numberopt)[1]
niternew<-niter/ncv
if (niternew!=round(niternew))
 {
 error("niter must be a multiple of ncv")
 }
numberopt<-matrix(0,niternew,dim(result$numberopt)[2])

indexi<-1:ncv
for (i in 1:niternew)
 {
 print(i)
 cverrori<-result$cverror[indexi,,]
 cverrori<-apply(cverrori,FUN=mean,MARGIN=c(2,3))
 whichselected<-apply(cverrori,FUN=which.min,MARGIN=1)
 numberopt[i,]<-result$numbercand[whichselected]
 indexi<-indexi+ncv
 }
list(numberopt=numberopt)

}
   