# Analysis of neuroblastoma dataset by Oberthuer et al. (2008) # ################################################################ library(combPreds,lib='../../R/myrlibrary/') ################# # read the data # ################# tmp<-read.table('webclinical.txt',header=T,sep='\t') # data collected from the web tmp<-tmp[seq(1,751,by=2),] tmp<-tmp[,-c(2,3,4,6,7)] tmp[,1]<-substr(tmp[,1],1,4) tmp[,3]<-substr(tmp[,3],1,4) tmp[,4]<-substr(tmp[,4],1,4) tmp[,5]<-substr(tmp[,5],1,5) for (i in 1:376) { if (is.na(suppressWarnings(as.numeric(tmp[i,1])))) tmp[i,1]<-substr(tmp[i,1],1,3) if (is.na(suppressWarnings(as.numeric(tmp[i,1])))) tmp[i,1]<-substr(tmp[i,1],1,2) if (is.na(suppressWarnings(as.numeric(tmp[i,1])))) tmp[i,1]<-substr(tmp[i,1],1,1) if (is.na(suppressWarnings(as.numeric(tmp[i,3])))) tmp[i,3]<-substr(tmp[i,3],1,3) if (is.na(suppressWarnings(as.numeric(tmp[i,3])))) tmp[i,3]<-substr(tmp[i,3],1,2) if (is.na(suppressWarnings(as.numeric(tmp[i,3])))) tmp[i,3]<-substr(tmp[i,3],1,1) if (is.na(suppressWarnings(as.numeric(tmp[i,4])))) tmp[i,4]<-substr(tmp[i,4],1,3) if (is.na(suppressWarnings(as.numeric(tmp[i,4])))) tmp[i,4]<-substr(tmp[i,4],1,2) if (is.na(suppressWarnings(as.numeric(tmp[i,4])))) tmp[i,4]<-substr(tmp[i,4],1,1) ifelse(tmp[i,5]=='alive',tmp[i,5]<-0,tmp[i,5]<-1) } tmp[,1]<-as.numeric(tmp[,1]) tmp[,3]<-as.numeric(tmp[,3]) tmp[,4]<-as.numeric(tmp[,4]) medianAge<-median(tmp$Age) boev<-read.table('NB2004_clin_and_genes_neuroblastoma_noTies.txt',header=T,sep='\t') # data provided by H. M. Boevelstad clinicalData<-cbind(boev[,1:3],rep(NA,dim(boev)[1])) supp<-tmp[c(288:329,331,332,335,338,339,341,343:353,355:369,371:376),4] clinicalData[283:362,4]<-as.numeric(supp>medianAge) # the last observations are in order for (i in 1:282) for (j in 1:287) if ((trunc(boev[i,1]*365)==tmp[j,4])&boev[i,2]==tmp[j,5]) clinicalData[i,4]<-as.numeric(tmp[j,1]>medianAge) # survival time and censoring status are unique key. Only possible issue: observation 247. colnames(clinicalData)<-c('time','status','risk','age') molecularData<-boev[,-(1:3)] colnames(molecularData)<-paste('X',1:9978,sep='') rm(tmp,boev,i,j) ######################################################## # arbitrary split between training and validation sets # ######################################################## set.seed(461) index.split<-sample(c(rep(0,240),rep(1,122))) trainData.clin<-clinicalData[index.split==0,] trainData.genes<-molecularData[index.split==0,] validData.clin<-clinicalData[index.split==1,] validData.genes<-molecularData[index.split==1,] sdGenes<-apply(trainData.genes,2,sd) meanGenes<-apply(trainData.genes,2,mean) centrGenes<-scale(trainData.genes,scale=F) studGenes01<-scale(trainData.genes) clinDataDummies<-trainData.clin[,-c(1,2)] tdataDummies<-cbind(clinDataDummies,trainData.genes) tdataDummies.c<-cbind(clinDataDummies,centrGenes) tdataDummies.01<-cbind(clinDataDummies,studGenes01) studGenesV01<-centrGenesV<-validData.genes for (i in 1:dim(trainData.genes)[2]) { centrGenesV[,i]<-validData.genes[,i]-meanGenes[i] studGenesV01[,i]<-(validData.genes[,i]-meanGenes[i])/sdGenes[i] } clinDataDummiesV<-validData.clin[,-c(1,2)] vdataDummies<-cbind(clinDataDummiesV,validData.genes) vdataDummies.c<-cbind(clinDataDummiesV,centrGenesV) vdataDummies.01<-cbind(clinDataDummiesV,studGenesV01) save(trainData.genes,validData.genes,trainData.clin,validData.clin, clinDataDummies,tdataDummies,tdataDummies.c,tdataDummies.01,studGenes01,centrGenes, clinDataDummiesV,vdataDummies,vdataDummies.c,vdataDummies.01,studGenesV01,centrGenesV, file='dataNBS_preproc.Rdata') #################### # tuning parameter # #################### tunPar<-list() nGenesCV<-list() # -------- # # CoxBoost # # -------- # # strategy 1: set.seed(614) tmp<-optimCoxBoostPenalty(trainData.clin$time,trainData.clin$status,as.matrix(tdataDummies.01),standardize=FALSE,parallel=FALSE) tunPar$cb1.pen<-tmp$penalty tmp<-cv.CoxBoost(trainData.clin$time,trainData.clin$status,as.matrix(tdataDummies.01),standardize=FALSE,penalty=tunPar$cb1.pen,maxstepno=2e2) tunPar$cb1.mstop<-tmp$optimal.step # strategy 3: set.seed(7469) tmp<-optimCoxBoostPenalty(trainData.clin$time,trainData.clin$status,as.matrix(tdataDummies.01),unpen.index=c(1:2),standardize=FALSE,parallel=FALSE) tunPar$cb3.pen<-tmp$penalty tmp<-cv.CoxBoost(trainData.clin$time,trainData.clin$status,as.matrix(tdataDummies.01),unpen.index=c(1:2),standardize=FALSE,penalty=tunPar$cb3.pen,maxstepno=2e2) tunPar$cb3.mstop<-tmp$optimal.step # strategy 4: set.seed(943) tmp<-optimCoxBoostPenalty(trainData.clin$time,trainData.clin$status,as.matrix(studGenes01),standardize=FALSE,parallel=FALSE) tunPar$cb4.pen<-tmp$penalty tmp<-cv.CoxBoost(trainData.clin$time,trainData.clin$status,as.matrix(studGenes01),standardize=FALSE,penalty=tunPar$cb4.pen,maxstepno=2e2) tunPar$cb4.mstop<-tmp$optimal.step save(tunPar,file='tunParCB.Rdata') # strategy 2: library(snowfall) sfInit(par=T,cp=180,type='MPI') sfLibrary(combPreds,lib='../../R/myrlibrary/') sfSource('../CoxBoostMod.R') criteria='likelihoodCV' foldCV<-10 penalties<-seq(500,4000,by=500) n<-dim(clinDataDummies)[1] set.seed(64) ifelse(foldCV>=(n-2),index<-c(1:n),index<-sample(c(rep(c(1:foldCV),floor(n/foldCV)),sample(1:foldCV,n-foldCV*floor(n/foldCV))))) step<-function(i,time,cens,studGenes01,index,penalty,clinDataDummies,criteria,seed=NULL) { if(!is.null(seed)) set.seed(seed[i]) times<-5 brierScore<-function(j,i,time,cens,studGenes01,index,penalty,clinDataDummies,times=times) { mod.clin.dummies<-coxph(Surv(time,cens)~.,cbind(time,cens,clinDataDummies)[index!=j,]) mod<-CoxBoostMod(time[index!=j],cens[index!=j],as.matrix(studGenes01[index!=j,]),penalty=penalty,stepno=i,standardize=FALSE,offset=mod.clin.dummies$linear.predictors) dataTmp<-cbind(time,cens,studGenes01[,mod$coefficients[mod$stepno+1,]!=0])[index!=j,] names(dataTmp)[-c(1,2)]<-names(studGenes01)[mod$coefficients[mod$stepno+1,]!=0] model<-coxph(Surv(time,cens)~offset(mod.clin.dummies$linear.predictors)+.,data=as.data.frame(dataTmp),init=mod$coefficients[mod$stepno+1,mod$coefficients[mod$stepno+1,]!=0],iter=0) computeIBS(model,cbind(time,cens)[index!=j,],studGenes01[index!=j,],cbind(time,cens)[index==j,],studGenes01[index==j,],offset=mod.clin.dummies$linear.predictors,offset.new=predict(mod.clin.dummies,newdata=cbind(time,cens,clinDataDummies)[index==j,]),times=times) } cvllik<-function(j,i,time,cens,studGenes01,index,penalty,clinDataDummies) { mod.clin.dummies<-coxph(Surv(time,cens)~.,data=data.frame(time,cens,clinDataDummies)) mod<-CoxBoostMod(time[index!=j],cens[index!=j],as.matrix(studGenes01[index!=j,]),penalty=penalty,stepno=i,standardize=FALSE,offset=mod.clin.dummies$linear.predictors[index!=j]) dataTmp<-data.frame(time[index==j],cens[index==j],studGenes01[index==j,mod$coefficients[mod$stepno+1,]!=0]) names(dataTmp)<-c('time','cens',mod$xnames[mod$coefficients[mod$stepno+1,]!=0]) model<-coxph(Surv(time,cens)~offset(mod.clin.dummies$linear.predictors[index==j])+.,data=dataTmp,init=mod$coefficients[mod$stepno+1,mod$coefficients[mod$stepno+1,]!=0],iter=0) -model$loglik[2] } if (criteria=='brier') res<-mean(sapply(c(1:10),brierScore,i=i,time=time,cens=cens,studGenes01=studGenes01,index=index,penalty=penalty,clinDataDummies=clinDataDummies,times=times)) if (criteria=='likelihoodCV') res<-mean(sapply(c(1:10),cvllik,i=i,time=time,cens=cens,studGenes01=studGenes01,index=index,penalty=penalty,clinDataDummies=clinDataDummies)) res } res<-list() seed<-91:270 for (a in 1:length(penalties)) res[[a]]<-sfSapply(seq(21,200,by=1),step,time=trainData.clin$time,cens=trainData.clin$status,studGenes01=studGenes01,index=index,penalty=penalties[i],clinDataDummies=clinDataDummies,criteria=criteria,seed=seed) mstop<-lapply(res,which.min) pen<-which.min(lapply(res,min)) tunPar$cb2.pen<-penalties[pen] tunPar$cb2.mstop<-20+mstop[[pen]] save(tunPar,file='tunParCB.Rdata') sfStop() # -------------------- # # Univariate selection # # -------------------- # seed<-43:67 nGenesCV$ur1<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),clinDataDummies,trainData.genes,foldCV=10,strategy=1,cpus=25,type='MPI',method='univariate',criteria='likelihoodCV',seed=seed) seed<-397:421 nGenesCV$ur2<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),clinDataDummies,trainData.genes,foldCV=10,strategy=2,cpus=25,type='MPI',method='univariate',criteria='likelihoodCV',seed=seed) seed<-76:100 nGenesCV$ur3<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),clinDataDummies,trainData.genes,foldCV=10,strategy=3,cpus=25,type='MPI',method='univariate',criteria='likelihoodCV',seed=seed) seed<-876:900 nGenesCV$ur4.1<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),clinDataDummies,trainData.genes,foldCV=10,strategy=4.1,cpus=25,type='MPI',method='univariate',criteria='likelihoodCV',seed=seed) seed<-669:693 nGenesCV$ur4.2<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),clinDataDummies,trainData.genes,foldCV=10,strategy=4.2,cpus=25,type='MPI',method='univariate',criteria='likelihoodCV',seed=seed) save(nGenesCV,file='nGenesCV.Rdata') # ------------------------------------ # # Univariate selection with adjustment # # ------------------------------------ # seed<-486:510 nGenesCV$tcr2<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),clinDataDummies,trainData.genes,foldCV=10,strategy=2,cpus=25,type='MPI',method='univariate',criteria='likelihoodCV',seed=seed) seed<-877:901 nGenesCV$tcr41<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),clinDataDummies,trainData.genes,foldCV=10,strategy=4.1,cpus=25,type='MPI',method='univariate',criteria='likelihoodCV',seed=seed) seed<-584:608 nGenesCV$tcr42<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),clinDataDummies,trainData.genes,foldCV=10,strategy=4.2,cpus=25,type='MPI',method='univariate',criteria='likelihoodCV',seed=seed) save(nGenesCV,file='nGenesCV.Rdata') # ----------------- # # forward selection # # ----------------- # seed<-1:25 nGenesCV$sw1<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),clinDataDummies,trainData.genes,foldCV=10,strategy=1,cpus=25,type='MPI',method='forward',criteria='likelihoodCV',seed=seed) seed<-431:455 nGenesCV$sw2<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),clinDataDummies,trainData.genes,foldCV=10,strategy=2,cpus=25,type='MPI',method='forward',criteria='likelihoodCV',seed=seed) seed<-761:785 nGenesCV$sw3<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),clinDataDummies,trainData.genes,foldCV=10,strategy=3,cpus=25,type='MPI',method='forward',criteria='likelihoodCV',seed=seed) seed<-3217:3241 nGenesCV$sw4.1<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),clinDataDummies,trainData.genes,foldCV=10,strategy=4.1,cpus=25,type='MPI',method='forward',criteria='likelihoodCV',seed=seed) seed<-265:289 nGenesCV$sw4.2<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),clinDataDummies,trainData.genes,foldCV=10,strategy=4.2,cpus=25,type='MPI',method='forward',criteria='likelihoodCV',seed=seed) save(nGenesCV,file='nGenesCV.Rdata') # --------------------------------- # # forward selection with adjustment # # --------------------------------- # seed<-184:208 nGenesCV$swtc2<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),clinDataDummies,trainData.genes,foldCV=10,strategy=2,cpus=25,type='MPI',method='forwardAdj',criteria='likelihoodCV',seed=seed) seed<-769:793 nGenesCV$swtc41<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),clinDataDummies,trainData.genes,foldCV=10,strategy=4.1,cpus=25,type='MPI',method='forwardAdj',criteria='likelihoodCV',seed=seed) seed<-99:123 nGenesCV$swtc42<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),clinDataDummies,trainData.genes,foldCV=10,strategy=4.2,cpus=25,type='MPI',method='forwardAdj',criteria='likelihoodCV',seed=seed) save(nGenesCV,file='nGenesCV.Rdata') ###################### # training the model # ###################### set.seed(61) # ++++++++++++++++++ # # Strategy 1 (naive) # # ++++++++++++++++++ # # univatiate selection # # -------------------- # data1.u<-univariateSelection(data=cbind(trainData.clin,trainData.genes),nSel=nGenesCV$ur1) mod1.u<-coxph(Surv(time,status)~.,data=data1.u) # boosting # # -------- # # CoxBoost mod1.CB<-CoxBoost(trainData.clin$time,trainData.clin$status,as.matrix(tdataDummies.01),penalty=tunPar$cb1.pen,stepno=tunPar$cb1.mstop,standardize=FALSE) # mboost mod1.mB<-glmboost(as.matrix(tdataDummies.c),Surv(trainData.clin$time,trainData.clin$status),family=CoxPH(),control=boost_control(mstop=2e2),center=T) cv10f <- cv(model.weights(mod1.mB), type = "kfold") set.seed(23) cv.mB <- cvrisk(mod1.mB, folds=cv10f, papply = lapply) # LASSO # # ----- # set.seed(547) cv.mL<-cv.glmnet(x=as.matrix(tdataDummies.01),y=Surv(trainData.clin$time,trainData.clin$status),family='cox',standardize=FALSE) mL<-glmnet(as.matrix(tdataDummies.01),Surv(trainData.clin$time,trainData.clin$status),family='cox',standardize=FALSE) # forward selection # # ------------------ # data1.sw<-forwardSelection(cbind(trainData.clin,trainData.genes),nSel=nGenesCV$sw1) mod1.sw<-coxph(Surv(time,status)~.,data=data1.sw) # ++++++++++++++++++++++ # # strategy 2 (residuals) # # ++++++++++++++++++++++ # # compute linear predictor for the offset mod.clin<-coxph(Surv(time,status)~risk+age,data=trainData.clin) lin.pred<-mod.clin$linear.predictors # univatiate selection # # -------------------- # data2.u<-univariateSelection(data=cbind(trainData.clin[,c(1,2)],trainData.genes),offset=lin.pred,nSel=nGenesCV$ur2) mod2.u<-coxph(Surv(time,status)~offset(lin.pred)+.,data=data2.u) # univatiate selection TC # # ----------------------- # data2.tc<-univariateSelection(data=cbind(trainData.clin,trainData.genes),nCov=2,nSel=nGenesCV$tcr2) mod2.tc<-coxph(Surv(time,status)~.,data=data2.tc) # boosting # # -------- # # with CoxBoost source('../CoxBoostMod.R') mod2.CB<-CoxBoostMod(trainData.clin$time,trainData.clin$status,as.matrix(studGenes01),penalty=tunPar$cb2.pen,stepno=tunPar$cb2.mstop,standardize=FALSE,offset=lin.pred) # with mboost mod2.mB<-glmboost(as.matrix(centrGenes),Surv(trainData.clin$time,trainData.clin$status),family=CoxPH(),offset=lin.pred,control=boost_control(mstop=2e2),center=T) cv10f <- cv(model.weights(mod2.mB), type = "kfold") set.seed(747) cv2.mB <- cvrisk(mod2.mB, folds=cv10f, papply = lapply) # LASSO # # ----- # set.seed(345) cv2.mL<-cv.glmnet(as.matrix(studGenes01),Surv(trainData.clin$time,trainData.clin$status),offset=lin.pred,family='cox',standardize=FALSE) mL2<-glmnet(as.matrix(studGenes01),Surv(trainData.clin$time,trainData.clin$status),offset=lin.pred,family='cox',standardize=FALSE) # forward selection # # ------------------ # data2.sw<-forwardSelection(data=cbind(trainData.clin[,c(1,2)],trainData.genes),offset=lin.pred,nSel=nGenesCV$sw2) mod2.sw<-coxph(Surv(time,status)~offset(lin.pred)+.,data=data2.sw) # forward selection TC # # ----------------------- # data2.swtc<-forwardSelection(data=cbind(trainData.clin,trainData.genes),nCov=2,nSel=nGenesCV$swtc2) mod2.swtc<-coxph(Surv(time,status)~.,data=data2.swtc) ######################### # strategy 3 (favoring) # ######################### # univatiate selection # # -------------------- # data3.u<-univariateSelection(cbind(trainData.clin,trainData.genes),nFav=2,nSel=nGenesCV$ur3) mod3.u<-coxph(Surv(time,status)~.,data=data3.u) # boosting # # -------- # # we can use only CoxBoost mod3.CB<-CoxBoost(trainData.clin$time,trainData.clin$status,as.matrix(tdataDummies.01),unpen.index=c(1:2),penalty=tunPar$cb3.pen,stepno=tunPar$cb3.mstop,standardize=FALSE) # LASSO # # ----- # set.seed(28) cv3.mL<-cv.glmnet(as.matrix(tdataDummies.01),Surv(trainData.clin$time,trainData.clin$status),penalty.factor=c(rep(0,2),rep(1,9978)),family='cox',standardize=FALSE) mL3<-glmnet(as.matrix(tdataDummies.01),Surv(trainData.clin$time,trainData.clin$status),penalty.factor=c(rep(0,2),rep(1,9978)),family='cox',standardize=FALSE) # forward selection # # ------------------ # data3.sw<-forwardSelection(cbind(trainData.clin,trainData.genes),nFav=2,nSel=nGenesCV$sw3) mod3.sw<-coxph(Surv(time,status)~.,data=data3.sw) # ++++++++++++++++++++++++++++++++++ # # strategy 4 (dimensional reduction) # # ++++++++++++++++++++++++++++++++++ # # 4.1 (only molecular score) # # ++++++++++++++++++++++++++ # # univatiate selection # # -------------------- # data41.u<-univariateSelection(cbind(trainData.clin[,c(1,2)],trainData.genes),nSel=nGenesCV$ur4.1) ifelse(dim(data41.u)[2]>3,score41.u<-prcomp(studGenes01[,names(data41.u)[-c(1:2)]]),score41.u<-names(data41.u)[3]) ifelse(dim(data41.u)[2]>3,mol.score41.u<-score41.u$x[,1],mol.score41.u<-data41.u[,3]) mod41.u<-coxph(Surv(time,status)~.,data=cbind(trainData.clin,mol.score41.u)) # univatiate selection TC # # ----------------------- # data41.tc<-univariateSelection(cbind(trainData.clin,trainData.genes),nCov=2,nSel=nGenesCV$tcr41) ifelse(dim(data41.tc)[2]>5,score41.tc<-prcomp(studGenes01[,names(data41.tc)[-c(1:4)]]),score41.tc<-names(data41.tc)[5]) ifelse(dim(data41.tc)[2]>5,mol.score41.tc<-score41.tc$x[,1],mol.score41.tc<-data41.tc[,5]) mod41.tc<-coxph(Surv(time,status)~.,data=cbind(trainData.clin,mol.score41.tc)) # boosting # # -------- # # using CoxBoost mod4.CB<-CoxBoost(trainData.clin$time,trainData.clin$status,as.matrix(studGenes01),penalty=tunPar$cb4.pen,stepno=tunPar$cb4.mstop,standardize=FALSE) score41.CB<-mod4.CB$coefficients[tunPar$cb4.mstop+1,mod4.CB$coefficients[tunPar$cb4.mstop+1,]!=0] mol.score41.CB<-mod4.CB$linear.predictor[tunPar$cb4.mstop+1,] mod41.CB<-coxph(Surv(time,status)~.,data=cbind(trainData.clin,mol.score41.CB)) # using mboost mod4.mB<-glmboost(as.matrix(centrGenes),Surv(trainData.clin$time,trainData.clin$status),family=CoxPH(),control=boost_control(mstop=2e2),center=T) cv10f <- cv(model.weights(mod4.mB), type = "kfold") set.seed(438) cv4.mB <- cvrisk(mod4.mB, folds=cv10f, papply = lapply) score41.mB<-coef(mod4.mB[mstop(cv4.mB)]) mol.score41.mB<-fitted(mod4.mB[mstop(cv4.mB)]) mod41.mB<-coxph(Surv(time,status)~.,data=cbind(trainData.clin,mol.score41.mB)) # LASSO # # ----- # set.seed(69) cv41.mL<-cv.glmnet(as.matrix(studGenes01),Surv(trainData.clin$time,trainData.clin$status),family='cox',standardize=FALSE) mL41<-glmnet(as.matrix(studGenes01),Surv(trainData.clin$time,trainData.clin$status),family='cox',standardize=FALSE) iR41<-which(mL41$lambda==cv41.mL$lambda.min) score41.mL<-mL41$beta[mL41$beta[,iR41]!=0,iR41] mol.score41.mL<-predict(mL41,newx=as.matrix(studGenes01),s=cv41.mL$lambda.min) colnames(mol.score41.mL)<-'mol.score41.mL' mod41.mL<-coxph(Surv(time,status)~.,data=cbind(trainData.clin,mol.score41.mL)) # forward selection # # ------------------ # data41.sw<-forwardSelection(cbind(trainData.clin[,c(1,2)],trainData.genes),nSel=nGenesCV$sw4.1) ifelse(dim(data41.sw)[2]>3,score41.sw<-prcomp(studGenes01[,names(data41.sw)[-c(1:2)]]),score41.sw<-names(data41.sw)[3]) ifelse(dim(data41.sw)[2]>3,mol.score41.sw<-score41.sw$x[,1],mol.score41.sw<-data41.sw[,3]) mod41.sw<-coxph(Surv(time,status)~.,data=cbind(trainData.clin,mol.score41.sw)) # forward selection TC # # --------------------- # data41.swtc<-forwardSelection(cbind(trainData.clin,trainData.genes),nCov=2,nSel=nGenesCV$swtc41) ifelse(dim(data41.swtc)[2]>5,score41.swtc<-prcomp(tdataDummies.01[,names(data41.swtc)[-c(1:4)]]),score41.swtc<-names(data41.swtc)[5]) ifelse(dim(data41.swtc)[2]>5,mol.score41.swtc<-score41.swtc$x[,1],mol.score41.swtc<-data41.swtc[,5]) mod41.swtc<-coxph(Surv(time,status)~.,data=cbind(trainData.clin,mol.score41.swtc)) # +++++++++++++++++++++++++++++++++++++++ # # 4.2 (both clinical and molecular score) # # +++++++++++++++++++++++++++++++++++++++ # score.clin<-lin.pred # univatiate selection # # -------------------- # if(nGenesCV$ur4.1!=nGenesCV$ur4.2) { data42.u<-univariateSelection(cbind(trainData.clin[,c(1,2)],trainData.genes),nSel=nGenesCV$ur4.2) ifelse(dim(data42.u)[2]>3,score42.u<-prcomp(studGenes01[,names(data42.u)[-c(1:2)]]),score42.u<-names(data42.u)[3]) ifelse(dim(data42.u)[2]>3,mol.score42.u<-score42.u$x[,1],mol.score42.u<-data42.u[,3]) } if(nGenesCV$ur4.1==nGenesCV$ur4.2) { data42.u<-data41.u score42.u<-score41.u mol.score42.u<-mol.score41.u } mod42.u<-coxph(Surv(time,status)~.,data=cbind(trainData.clin[,c(1:2)],score.clin,mol.score42.u)) # univatiate selection TC # # ----------------------- # if(nGenesCV$tcr41!=nGenesCV$tcr42) { data42.tc<-forwardSelection(data=cbind(trainData.clin,trainData.genes),nCov=2,nSel=nGenesCV$tcr42) ifelse(dim(data42.tc)[2]>5,score42.tc<-prcomp(studGenes01[,names(data42.tc)[-c(1:4)]]),score42.tc<-names(data42.tc)[5]) ifelse(dim(data42.tc)[2]>5,mol.score42.tc<-score42.tc$x[,1],mol.score42.tc<-data42.tc[,5]) } if(nGenesCV$tcr41==nGenesCV$tcr42) { data42.tc<-data41.tc score42.tc<-score41.tc mol.score42.tc<-mol.score41.tc } mod42.tc<-coxph(Surv(time,status)~.,data=cbind(trainData.clin[,c(1:2)],score.clin,mol.score42.tc)) # boosting # # -------- # # using CoxBoost mod42.CB<-coxph(Surv(time,status)~.,data=cbind(trainData.clin[,c(1:2)],score.clin,mol.score41.CB)) # using mboost mod42.mB<-coxph(Surv(time,status)~.,data=cbind(trainData.clin[,c(1:2)],score.clin,mol.score41.mB)) # LASSO # # ----- # mod42.mL<-coxph(Surv(time,status)~.,data=cbind(trainData.clin[,c(1:2)],score.clin,mol.score41.mL)) # forward selection # # ------------------ # if(nGenesCV$sw4.1!=nGenesCV$sw4.2) { data42.sw<-forwardSelection(cbind(trainData.clin[,c(1,2)],trainData.genes),nSel=nGenesCV$sw4.2) ifelse(dim(data42.sw)[2]>3,score42.sw<-prcomp(studGenes01[,names(data42.sw)[-c(1:2)]]),score42.sw<-names(data42.sw)[3]) ifelse(dim(data42.sw)[2]>3,mol.score41.sw<-score42.sw$x[,1],mol.score41.sw<-data42.sw[,3]) } if(nGenesCV$sw4.1==nGenesCV$sw4.2) { data42.sw<-data41.sw score42.sw<-score41.sw mol.score42.sw<-mol.score41.sw } mod42.sw<-coxph(Surv(time,status)~.,data=cbind(trainData.clin[,c(1:2)],score.clin,mol.score42.sw)) # forward selection TC # # --------------------- # if(nGenesCV$swtc41!=nGenesCV$swtc42) { data42.swtc<-forwardSelection(data=cbind(trainData.clin,trainData.genes),nCov=2,nSel=nGenesCV$swtc42) ifelse(dim(data42.swtc)[2]>5,score42.swtc<-prcomp(studGenes01[,names(data42.swtc)[-c(1:4)]]),score42.swtc<-names(data42.swtc)[5]) ifelse(dim(data42.swtc)[2]>5,mol.score42.swtc<-score42.swtc$x[,1],mol.score42.swtc<-data42.swtc[,5]) } if(nGenesCV$swtc41==nGenesCV$swtc42) { data42.swtc<-data41.swtc score42.swtc<-score41.swtc mol.score42.swtc<-mol.score41.swtc } mod42.swtc<-coxph(Surv(time,status)~.,data=cbind(trainData.clin[,c(1:2)],score.clin,mol.score42.swtc)) save(data1.u,mod1.u,mod1.CB,mod1.mB,cv.mB,cv.mL,mL,data1.sw,mod1.sw, data2.u,mod2.u,mod2.CB,mod2.mB,cv2.mB,cv2.mL,mL2,data2.tc,mod2.tc,data2.sw,mod2.sw,data2.swtc,mod2.swtc,mod.clin, data3.u,mod3.u,mod3.CB,cv3.mL,mL3,data3.sw,mod3.sw, data41.u,mod41.u,data42.u,mod42.u,score41.u,score42.u, mod4.CB,mod41.CB,mod4.mB,cv4.mB,mod41.mB,cv41.mL,mL41,mod41.mL,score41.CB,score41.mB,score41.mL, data41.tc,mod41.tc,data42.tc,mod42.tc,score41.tc,score42.tc, data41.sw,mod41.sw,data42.sw,mod42.sw,score41.sw,score42.sw, data41.swtc,mod41.swtc,data42.swtc,mod42.swtc,score41.swtc,score42.swtc, mod42.CB,mod42.mB,mod42.mL, file='allMod_NBS.Rdata') ##################################### # evaluate models in validation set # ##################################### y<-Surv(trainData.clin$time,trainData.clin$status) y.new<-Surv(validData.clin$time,validData.clin$status) # ----------------------- # # prediction error curves # # ----------------------- # prec<-list() prec$modClin<-computePEC(mod.clin,y,clinDataDummies,y.new,clinDataDummiesV) # strategy 1 # # ---------- # prec$mod1.u<-computePEC(mod1.u,y,cbind(trainData.clin,trainData.genes),y.new,cbind(validData.clin,validData.genes)) prec$mod1.CB<-computePEC(mod1.CB,y,tdataDummies.01,y.new,vdataDummies.01) prec$mod1.mB<-computePEC(mod1.mB,y,tdataDummies.c,y.new,vdataDummies.c,cvPar=cv.mB) prec$mod1.lasso<-computePEC(mL,y,tdataDummies.01,y.new,vdataDummies.01,cvPar=cv.mL) prec$mod1.sw<-computePEC(mod1.sw,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) # strategy 2 # # ---------- # prec$mod2.u<-computePEC(mod2.u,y,cbind(trainData.clin,trainData.genes),y.new,cbind(validData.clin,validData.genes),offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=validData.clin)) prec$mod2.tc<-computePEC(mod2.tc,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) prec$mod2.CB<-computePEC(mod2.CB,y,studGenes01,y.new,studGenesV01,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=validData.clin)) prec$mod2.mB<-computePEC(mod2.mB,y,centrGenes,y.new,centrGenesV,cvPar=cv2.mB,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=validData.clin)) ifelse(sum(mL2$beta[,which(mL2$lambda==cv2.mL$lambda.min)])>0, prec$mod2.lasso<-computePEC(mL2,y,studGenes01,y.new,studGenesV01,cvPar=cv2.mL,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=validData.clin)), prec$mod2.lasso<-prec$modClin) prec$mod2.sw<-computePEC(mod2.sw,y,cbind(trainData.clin,trainData.genes),y.new,cbind(validData.clin,validData.genes),offset.new=predict(mod.clin,newdata=validData.clin)) prec$mod2.swtc<-computePEC(mod2.swtc,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) # strategy 3 # # ---------- # prec$mod3.u<-computePEC(mod3.u,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) prec$mod3.CB<-computePEC(mod3.CB,y,tdataDummies.01,y.new,vdataDummies.01) prec$mod3.lasso<-computePEC(mL3,y,tdataDummies.01,y.new,vdataDummies.01,cvPar=cv3.mL) prec$mod3.sw<-computePEC(mod3.sw,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) # strategy 4 # # ---------- # ifelse(is.character(score41.u),mol.score<-studGenes01[,score41.u],mol.score<-score41.u$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.u),mol.score.new<-studGenesV01[,score41.u],mol.score.new<-as.matrix(studGenesV01[,names(score41.u$rotation[,1])])%*%score41.u$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score41.u','score.clin') prec$mod41.u<-computePEC(mod41.u,y,x,y.new,x.new) ifelse(is.character(score42.u),mol.score<-studGenes01[,score42.u],mol.score<-score42.u$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.u),mol.score.new<-studGenesV01[,score42.u],mol.score.new<-as.matrix(studGenesV01[,names(score42.u$rotation[,1])])%*%score42.u$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score42.u','score.clin') prec$mod42.u<-computePEC(mod42.u,y,x,y.new,x.new) ifelse(is.character(score41.tc),mol.score<-studGenes01[,score41.tc],mol.score<-score41.tc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.tc),mol.score.new<-studGenesV01[,score41.tc],mol.score.new<-as.matrix(studGenesV01[,names(score41.tc$rotation[,1])])%*%score41.tc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score41.tc','score.clin') prec$mod41.tc<-computePEC(mod41.tc,y,x,y.new,x.new) ifelse(is.character(score42.tc),mol.score<-studGenes01[,score42.tc],mol.score<-score42.tc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.tc),mol.score.new<-studGenesV01[,score42.tc],mol.score.new<-as.matrix(studGenesV01[,names(score42.tc$rotation[,1])])%*%score42.tc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score42.tc','score.clin') prec$mod42.tc<-computePEC(mod42.tc,y,x,y.new,x.new) x<-cbind(clinDataDummies,mod4.CB$linear.predictor[mod4.CB$stepno+1,],mod.clin$linear.predictors) x.new<-cbind(clinDataDummiesV,t(predict(mod4.CB,newdata=studGenesV01)),predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score41.CB','score.clin') prec$mod41.CB<-computePEC(mod41.CB,y,x,y.new,x.new) prec$mod42.CB<-computePEC(mod42.CB,y,x,y.new,x.new) x<-cbind(clinDataDummies,fitted(mod4.mB[mstop(cv4.mB)]),mod.clin$linear.predictors) x.new<-cbind(clinDataDummiesV,predict(mod4.mB[mstop(cv4.mB)],newdata=centrGenesV),predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score41.mB','score.clin') prec$mod41.mB<-computePEC(mod41.mB,y,x,y.new,x.new) prec$mod42.mB<-computePEC(mod42.mB,y,x,y.new,x.new) x<-cbind(clinDataDummies,predict(mL41,newx=studGenes01,s=cv41.mL$lambda.min),mod.clin$linear.predictors) x.new<-cbind(clinDataDummiesV,predict(mL41,newx=studGenesV01,s=cv41.mL$lambda.min),predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score41.mL','score.clin') prec$mod41.lasso<-computePEC(mod41.mL,y,x,y.new,x.new) prec$mod42.lasso<-computePEC(mod42.mL,y,x,y.new,x.new) ifelse(is.character(score41.sw),mol.score<-studGenes01[,score41.sw],mol.score<-score41.sw$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.sw),mol.score.new<-studGenesV01[,score41.sw],mol.score.new<-as.matrix(studGenesV01[,names(score41.sw$rotation[,1])])%*%score41.sw$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score41.sw','score.clin') prec$mod41.sw<-computePEC(mod41.sw,y,x,y.new,x.new) ifelse(is.character(score42.sw),mol.score<-studGenes01[,score42.sw],mol.score<-score42.sw$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.sw),mol.score.new<-studGenesV01[,score42.sw],mol.score.new<-as.matrix(studGenesV01[,names(score42.sw$rotation[,1])])%*%score42.sw$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score42.sw','score.clin') prec$mod42.sw<-computePEC(mod42.sw,y,x,y.new,x.new) ifelse(is.character(score41.swtc),mol.score<-studGenes01[,score41.swtc],mol.score<-score41.swtc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.swtc),mol.score.new<-studGenesV01[,score41.swtc],mol.score.new<-as.matrix(studGenesV01[,names(score41.swtc$rotation[,1])])%*%score41.swtc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score41.swtc','score.clin') prec$mod41.swtc<-computePEC(mod41.swtc,y,x,y.new,x.new) ifelse(is.character(score42.swtc),mol.score<-studGenes01[,score42.swtc],mol.score<-score42.swtc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.swtc),mol.score.new<-studGenesV01[,score42.swtc],mol.score.new<-as.matrix(studGenesV01[,names(score42.swtc$rotation[,1])])%*%score42.swtc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score42.swtc','score.clin') prec$mod42.swtc<-computePEC(mod42.swtc,y,x,y.new,x.new) # ---------------------- # # Integrated Brier score # # ---------------------- # ibs5<-lapply(prec,ibs,times=5) # ----------------- # # concordance-index # # ----------------- # cScore<-list() cScore$mod.clin<-computeCindex(mod.clin,y,clinDataDummies,y.new,clinDataDummiesV) # strategy 1 # # ---------- # cScore$mod1.u<-computeCindex(mod1.u,y,cbind(trainData.clin,trainData.genes),y.new,cbind(validData.clin,validData.genes)) cScore$mod1.cb<-computeCindex(mod1.CB,y,tdataDummies.01,y.new,vdataDummies.01) cScore$mod1.mb<-computeCindex(mod1.mB,y,tdataDummies.c,y.new,vdataDummies.c,cvPar=cv.mB) cScore$mod1.lasso<-computeCindex(mL,y,tdataDummies.01,y.new,vdataDummies.01,cvPar=cv.mL) cScore$mod1.sw<-computeCindex(mod1.sw,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) # strategy 2 # # ---------- # cScore$mod2.u<-computeCindex(mod2.u,y,cbind(trainData.clin,trainData.genes),y.new,cbind(validData.clin,validData.genes),offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=validData.clin)) cScore$mod2.tc<-computeCindex(mod2.tc,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) cScore$mod2.CB<-computeCindex(mod2.CB,y,studGenes01,y.new,studGenesV01,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=as.data.frame(validData.clin))) cScore$mod2.mB<-computeCindex(mod2.mB,y,centrGenes,y.new,centrGenesV,cvPar=cv2.mB,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=as.data.frame(validData.clin))) ifelse(sum(mL2$beta[,which(mL2$lambda==cv2.mL$lambda.min)])>0, cScore$mod2.lasso<-computeCindex(mL2,y,studGenes01,y.new,studGenesV01,cvPar=cv2.mL,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=as.data.frame(validData.clin))), cScore$mod2.lasso<-cScore$mod.clin) cScore$mod2.sw<-computeCindex(mod2.sw,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes),offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=as.data.frame(validData.clin))) cScore$mod2.swtc<-computeCindex(mod2.swtc,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) # strategy 3 # # ---------- # cScore$mod3.u<-computeCindex(mod3.u,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) cScore$mod3.CB<-computeCindex(mod3.CB,y,tdataDummies.01,y.new,vdataDummies.01) cScore$mod3.lasso<-computeCindex(mL3,y,tdataDummies.01,y.new,vdataDummies.01,cvPar=cv3.mL) cScore$mod3.sw<-computeCindex(mod3.sw,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) # strategy 4 # # ---------- # ifelse(is.character(score41.u),mol.score<-studGenes01[,score41.u],mol.score<-score41.u$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.u),mol.score.new<-studGenesV01[,score41.u],mol.score.new<-as.matrix(studGenesV01[,names(score41.u$rotation[,1])])%*%score41.u$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score41.u','score.clin') cScore$mod41.u<-computeCindex(mod41.u,y,x,y.new,x.new) ifelse(is.character(score42.u),mol.score<-studGenes01[,score42.u],mol.score<-score42.u$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.u),mol.score.new<-studGenesV01[,score42.u],mol.score.new<-as.matrix(studGenesV01[,names(score42.u$rotation[,1])])%*%score42.u$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score42.u','score.clin') cScore$mod42.u<-computeCindex(mod42.u,y,x,y.new,x.new) ifelse(is.character(score41.tc),mol.score<-studGenes01[,score41.tc],mol.score<-score41.tc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.tc),mol.score.new<-studGenesV01[,score41.tc],mol.score.new<-as.matrix(studGenesV01[,names(score41.tc$rotation[,1])])%*%score41.tc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score41.tc','score.clin') cScore$mod41.tc<-computeCindex(mod41.tc,y,x,y.new,x.new) ifelse(is.character(score42.tc),mol.score<-studGenes01[,score42.tc],mol.score<-score42.tc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.tc),mol.score.new<-studGenesV01[,score42.tc],mol.score.new<-as.matrix(studGenesV01[,names(score42.tc$rotation[,1])])%*%score42.tc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score42.tc','score.clin') cScore$mod42.tc<-computeCindex(mod42.tc,y,x,y.new,x.new) x<-cbind(clinDataDummies,mod4.CB$linear.predictor[mod4.CB$stepno+1,],mod.clin$linear.predictors) x.new<-cbind(clinDataDummiesV,t(predict(mod4.CB,newdata=studGenesV01)),predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score41.CB','score.clin') cScore$mod41.CB<-computeCindex(mod41.CB,y,x,y.new,x.new) cScore$mod42.CB<-computeCindex(mod42.CB,y,x,y.new,x.new) x<-cbind(clinDataDummies,fitted(mod4.mB[mstop(cv4.mB)]),mod.clin$linear.predictors) x.new<-cbind(clinDataDummiesV,predict(mod4.mB[mstop(cv4.mB)],newdata=centrGenesV),predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score41.mB','score.clin') cScore$mod41.mB<-computeCindex(mod41.mB,y,x,y.new,x.new) cScore$mod42.mB<-computeCindex(mod42.mB,y,x,y.new,x.new) x<-cbind(clinDataDummies,predict(mL41,newx=studGenes01,s=cv41.mL$lambda.min),mod.clin$linear.predictors) x.new<-cbind(clinDataDummiesV,predict(mL41,newx=studGenesV01,s=cv41.mL$lambda.min),predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score41.mL','score.clin') cScore$mod41.lasso<-computeCindex(mod41.mL,y,x,y.new,x.new) cScore$mod42.lasso<-computeCindex(mod42.mL,y,x,y.new,x.new) ifelse(is.character(score41.sw),mol.score<-studGenes01[,score41.sw],mol.score<-score41.sw$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.sw),mol.score.new<-studGenesV01[,score41.sw],mol.score.new<-as.matrix(studGenesV01[,names(score41.sw$rotation[,1])])%*%score41.sw$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score41.sw','score.clin') cScore$mod41.sw<-computeCindex(mod41.sw,y,x,y.new,x.new) ifelse(is.character(score42.sw),mol.score<-studGenes01[,score42.sw],mol.score<-score42.sw$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.sw),mol.score.new<-studGenesV01[,score42.sw],mol.score.new<-as.matrix(studGenesV01[,names(score42.sw$rotation[,1])])%*%score42.sw$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score42.sw','score.clin') cScore$mod42.sw<-computeCindex(mod42.sw,y,x,y.new,x.new) ifelse(is.character(score41.swtc),mol.score<-studGenes01[,score41.swtc],mol.score<-score41.swtc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.swtc),mol.score.new<-studGenesV01[,score41.swtc],mol.score.new<-as.matrix(studGenesV01[,names(score41.swtc$rotation[,1])])%*%score41.swtc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score41.swtc','score.clin') cScore$mod41.swtc<-computeCindex(mod41.swtc,y,x,y.new,x.new) ifelse(is.character(score42.swtc),mol.score<-studGenes01[,score42.swtc],mol.score<-score42.swtc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.swtc),mol.score.new<-studGenesV01[,score42.swtc],mol.score.new<-as.matrix(studGenesV01[,names(score42.swtc$rotation[,1])])%*%score42.swtc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score42.swtc','score.clin') cScore$mod42.swtc<-computeCindex(mod42.swtc,y,x,y.new,x.new) save(ibs5,cScore,file='evalIndeces.Rdata') ###################### # produce the tables # ###################### library(xtable) # -------------- # # clinical model # # -------------- # sink('table4.txt') summary(mod.clin) sink() # ---- # # ibs5 # # ---- # out<-matrix(NA,7,5) rownames(out)<-c('univariate selection','forward selection','lasso','offset boosting','gradient boosting','univariate sel. with adj.','forward sel. with adj.') colnames(out)<-c('naive','residuals','favoring','mol. score','scores') null<-ibs5$mod1.u[1] clin<-ibs5$modClin[2] out[1,1]<-ibs5$mod1.u[2] out[4,1]<-ibs5$mod1.CB[2] out[5,1]<-ibs5$mod1.mB[2] out[3,1]<-ibs5$mod1.lasso[2] out[2,1]<-ibs5$mod1.sw[2] out[1,2]<-ibs5$mod2.u[2] out[6,2]<-ibs5$mod2.tc[2] out[4,2]<-ibs5$mod2.CB[2] out[5,2]<-ibs5$mod2.mB[2] out[3,2]<-ibs5$mod2.lasso[2] out[2,2]<-ibs5$mod2.sw[2] out[7,2]<-ibs5$mod2.swtc[2] out[1,3]<-ibs5$mod3.u[2] out[4,3]<-ibs5$mod3.CB[2] out[3,3]<-ibs5$mod3.lasso[2] out[2,3]<-ibs5$mod3.sw[2] out[1,4]<-ibs5$mod41.u[2] out[6,4]<-ibs5$mod41.tc[2] out[4,4]<-ibs5$mod41.CB[2] out[5,4]<-ibs5$mod41.mB[2] out[3,4]<-ibs5$mod41.lasso[2] out[2,4]<-ibs5$mod41.sw[2] out[7,4]<-ibs5$mod41.swtc[2] out[1,5]<-ibs5$mod42.u[2] out[6,5]<-ibs5$mod42.tc[2] out[4,5]<-ibs5$mod42.CB[2] out[5,5]<-ibs5$mod42.mB[2] out[3,5]<-ibs5$mod42.lasso[2] out[2,5]<-ibs5$mod42.sw[2] out[7,5]<-ibs5$mod42.swtc[2] pdf('plotIBS5_NBS.pdf') barplot(out,beside=T,ylim=c(0,0.1),legend.text=T,ylab='integrated Brier score', main='overall predictive ability',args.legend=c(x=12,y=0.1,cex=0.7)) abline(h=0.077,lty=2) dev.off() # ------- # # C-index # # ------- # out<-matrix(NA,7,5) rownames(out)<-c('univariate selection','forward selection','lasso','offset boosting','gradient boosting','univariate sel. with adj.','forward sel. with adj.') colnames(out)<-c('naive','residuals','favoring','mol. score','scores') clin<-as.numeric(cScore$mod.clin) out[1,1]<-as.numeric(cScore$mod1.u) out[4,1]<-as.numeric(cScore$mod1.cb) out[5,1]<-as.numeric(cScore$mod1.mb) out[3,1]<-as.numeric(cScore$mod1.lasso) out[2,1]<-as.numeric(cScore$mod1.sw) out[1,2]<-as.numeric(cScore$mod2.u) out[6,2]<-as.numeric(cScore$mod2.tc) out[4,2]<-as.numeric(cScore$mod2.CB) out[5,2]<-as.numeric(cScore$mod2.mB) out[3,2]<-as.numeric(cScore$mod2.lasso) out[2,2]<-as.numeric(cScore$mod2.sw) out[7,2]<-as.numeric(cScore$mod2.swtc) out[1,3]<-as.numeric(cScore$mod3.u) out[4,3]<-as.numeric(cScore$mod3.CB) out[3,3]<-as.numeric(cScore$mod3.lasso) out[2,3]<-as.numeric(cScore$mod3.sw) out[1,4]<-as.numeric(cScore$mod41.u) out[6,4]<-as.numeric(cScore$mod41.tc) out[4,4]<-as.numeric(cScore$mod41.CB) out[5,4]<-as.numeric(cScore$mod41.mB) out[3,4]<-as.numeric(cScore$mod41.lasso) out[2,4]<-as.numeric(cScore$mod41.sw) out[7,4]<-as.numeric(cScore$mod41.swtc) out[1,5]<-as.numeric(cScore$mod42.u) out[6,5]<-as.numeric(cScore$mod42.tc) out[4,5]<-as.numeric(cScore$mod42.CB) out[5,5]<-as.numeric(cScore$mod42.mB) out[3,5]<-as.numeric(cScore$mod42.lasso) out[2,5]<-as.numeric(cScore$mod42.sw) out[7,5]<-as.numeric(cScore$mod42.swtc) pdf('plotCindex_NBS.pdf') barplot(out,beside=T,ylim=c(0,1),legend.text=T,ylab='C-index', main='discriminative ability',args.legend=c(x=32,y=1.05,ncol=3,cex=0.7)) abline(h=clin,lty=2) dev.off()