\name{cv.tuningParam}
\alias{cv.tuningParam}
\title{Cross validation based method to select the best number of predictors in a Cox model}
\description{cross-validation procedure to select the best number of predictors to include in a Cox model.}
\usage{
cv.tuningParam(response, data.clin, data.genes, maximum = 25, foldCV = 10,
               strategy = c(1, 2, 3, 4.1, 4.2), cpus = 1,
               method = c("univariate", "forward", "univariateAdj", "forwardAdj"),
               criteria = c("brier", "likelihoodCV"), cens.model = "marginal",
               maxtime = "maximum", seed=NULL, ...)
}
\arguments{
  \item{response}{survival outcome (either a matrix with two columns or a Surv object).}
  \item{data.clin}{clinical data.}
  \item{data.genes}{high-throughput molecular data.}
  \item{maximum}{maximum number of predictors to consider in the cross-validation procedure.}
  \item{foldCV}{number of folds to use in the cross-validation procedure.}
  \item{strategy}{strategy used to combine clinical and molecular data. I can asssume values '1', '2', '3', '4.1' or '4.2' (see De Bin et al., 201?, for futher details).}
  \item{cpus}{number of cpus to use, 1 (default) for sequantial computing (if criteria is "brier", we recommend to set cpus=foldCV).}
  \item{method}{either univariate (\code{univariate}), forward (\code{forward}), univariate with adjustment (\code{univariateAdj}) or forward with adjustment (\code{forwardAdj}) selection (see De Bin et al., 201?).}
  \item{criteria}{criteria to evaluate the performance of the prediction model in the cross-validation procedure: in can be either \code{brier} to measure the prediction ability via the integrated Brier score or \code{likelihoodCV} for evaluating the cross-validated log-likelihood for each candidate number of predictors.}
  \item{cens.model}{method to estimate the inverse probability of censoring weights in the computation of integrated Brier score. Possible values are \code{marginal}, \code{cox}, \code{nonpar} and \code{aalen}, see package \pkg{pec} for further details. It is relevant only if \code{criteria="brier"}.}
  \item{maxtime}{upper bound for computing integrated Brier score. It is relevant only if \code{criteria="brier"}. It can be either a number or a special string: \code{maximum} to compute the integrated Brier score until the maximum value of each cross-validation fold (default), \code{equal} to compute the integrated Brier score until the same value in every cross-validation fold (the minimum among the maximum times of each cross-validation fold).}
  \item{seed}{vector of length equal to the number of cpus to use, indicating the seed to use in each process (for reproducibility).}
  \item{...}{supplementary arguments for parallel computing. See package \pkg{snowfall} for further details.}
}
\details{
The argument \code{strategy} indicates which combining strategy you would like to use for the model. In particular:

- in \code{strategy='1'} clinical and molecular predictors are used in the same way in the fitting of the model;

- in \code{strategy='2'} a model with molecular predictor is fitted on the residuals of the clinical model;

- in \code{strategy='3'} the clinical predictors are somehow favored with repsect to the molecular ones;

- in \code{strategy='4.1'} a molecular score is derived summarizing the information from the molecular data and added to a clinical model;

- in \code{strategy='4.2'} also the clinical information is summarized in a singular score.

See De Bin et al. (201?) for futher details.
}
\value{
a number indicating the amount of predictors to include in the Cox model. It is a single number if \code{strategy} is \code{1}, \code{2}, \code{4.1}, or \code{4.2}; a vector on the form c(a,b) if \code{strategy=3}, where 'a' denotes the number of clinical predictors and 'b' the number of molecular predictors to consider).
}
\references{
De Bin et al. (201?)
}
\author{Riccardo De Bin}
\seealso{\code{\link{forwardSelection}}, \code{\link{univariateSelection}}}
\examples{
#   Generate some survival data with 10 informative covariates 
n <- 350; q <- 10; p<-300 
beta <- c(rep(10,3),rep(0,q-3),rep(0,p-7),rep(5,7))
x <- matrix(rnorm(n*(p+q)),n,p+q)
real.time <- -(log(runif(n)))/(10*exp(drop(x \%*\% beta)))
cens.time <- rexp(n,rate=1/10)
status <- ifelse(real.time <= cens.time,1,0)
obs.time <- ifelse(real.time <= cens.time,real.time,cens.time)
data<-data.frame(obs.time,status,x)
colnames(data)<-c('time','status',paste('Z',1:q,sep=''),paste('X',1:p,sep=''))

# number of relevant predictors
cv.tuningParam(response=data[,1:2], data.clin=data[,3:12], data.genes=data[,13:263],
               maximum = 12, foldCV = 5, strategy = 1, cpus = 1,
               method = "univariate", criteria = "likelihoodCV")
# sequential computation, it can take some time...
}