# Analysis of breast cancer datasets by Hatzis et al. (2011) # ############################################################## library(combPreds,lib='../../../R/myrlibrary/') ################# # read the data # ################# source("http://bioconductor.org/biocLite.R") biocLite("GEOquery") require(GEOquery) GSE25055<-getGEO("GSE25055", GSEMatrix = TRUE) GSE25065<-getGEO("GSE25065", GSEMatrix = TRUE) trainData.genes<-t(exprs(GSE25055[[1]])) validData.genes<-t(exprs(GSE25065[[1]])) tDc<-pData(phenoData(GSE25055[[1]]))[,10:32] vDc<-pData(phenoData(GSE25065[[1]]))[,10:32] trainData.clin<-as.data.frame(matrix(0,dim(tDc))) trainData.clin$ID<-substr(tDc$characteristics_ch1,12,1e2) trainData.clin$src<-as.factor(substr(tDc$characteristics_ch1.1,10,1e2)) trainData.clin$age<-as.numeric(substr(tDc$characteristics_ch1.2,12,1e2)) trainData.clin$er<-substr(tDc$characteristics_ch1.3,16,1e2) trainData.clin$er[trainData.clin$er=='NA']<-NA trainData.clin$er<-as.factor(trainData.clin$er) trainData.clin$pr<-substr(tDc$characteristics_ch1.4,16,1e2) trainData.clin$pr[trainData.clin$pr=='NA']<-NA trainData.clin$pr<-as.factor(trainData.clin$pr) trainData.clin$her2<-substr(tDc$characteristics_ch1.5,14,1e2) trainData.clin$her2[trainData.clin$her2=='NA']<-NA trainData.clin$her2<-as.factor(trainData.clin$her2) trainData.clin$er_woI<-as.factor(substr(tDc$characteristics_ch1.6,39,1e2)) trainData.clin$t_stage<-as.factor(substr(tDc$characteristics_ch1.7,19,1e2)) trainData.clin$nodal_status<-as.factor(substr(tDc$characteristics_ch1.8,24,1e2)) trainData.clin$ajcc_stage<-as.factor(substr(tDc$characteristics_ch1.9,22,1e2)) trainData.clin$grade<-substr(tDc$characteristics_ch1.10,8,1e2) trainData.clin$grade[trainData.clin$grade=='4=Indeterminate']<-NA trainData.clin$grade[trainData.clin$grade=='NA']<-NA trainData.clin$grade<-as.factor(trainData.clin$grade) trainData.clin$pcr_rd<-substr(tDc$characteristics_ch1.11,29,1e2) trainData.clin$pcr_rd[trainData.clin$pcr_rd=='NA']<-NA trainData.clin$pcr_rd<-as.factor(trainData.clin$pcr_rd) trainData.clin$rcb_class<-substr(tDc$characteristics_ch1.12,32,1e2) trainData.clin$rcb_class[trainData.clin$rcb_class=='NA']<-NA trainData.clin$rcb_class<-as.factor(trainData.clin$rcb_class) trainData.clin$drfs_cens<-as.numeric(substr(tDc$characteristics_ch1.13,26,1e2)) # 0 censored, 1 event trainData.clin$drfs_time<-as.numeric(substr(tDc$characteristics_ch1.14,23,1e2)) # in years trainData.clin$esr1_status<-as.factor(substr(tDc$characteristics_ch1.15,14,1e2)) trainData.clin$erbb2_status<-as.factor(substr(tDc$characteristics_ch1.16,15,1e2)) trainData.clin$set_class<-as.factor(substr(tDc$characteristics_ch1.17,12,1e2)) trainData.clin$chemosensitivity_prediction<-as.factor(substr(tDc$characteristics_ch1.18,30,1e2)) trainData.clin$ggi_class<-as.factor(substr(tDc$characteristics_ch1.19,12,1e2)) trainData.clin$pam50_class<-as.factor(substr(tDc$characteristics_ch1.20,14,1e2)) trainData.clin$dlda30_prediction<-as.factor(substr(tDc$characteristics_ch1.21,20,1e2)) trainData.clin$rcb_prediction<-as.factor(substr(tDc$characteristics_ch1.22,21,1e2)) trainData.clin<-trainData.clin[,-1] validData.clin<-as.data.frame(matrix(0,dim(vDc))) validData.clin$ID<-substr(vDc$characteristics_ch1,12,1e2) validData.clin$src<-as.factor(substr(vDc$characteristics_ch1.1,10,1e2)) validData.clin$age<-as.numeric(substr(vDc$characteristics_ch1.2,12,1e2)) validData.clin$er<-substr(vDc$characteristics_ch1.3,16,1e2) validData.clin$er[validData.clin$er=='NA']<-NA validData.clin$er<-as.factor(validData.clin$er) validData.clin$pr<-substr(vDc$characteristics_ch1.4,16,1e2) validData.clin$pr[validData.clin$pr=='NA']<-NA validData.clin$pr<-as.factor(validData.clin$pr) validData.clin$her2<-substr(vDc$characteristics_ch1.5,14,1e2) validData.clin$her2[validData.clin$her2=='NA']<-NA validData.clin$her2<-as.factor(validData.clin$her2) validData.clin$t_stage<-as.factor(substr(vDc$characteristics_ch1.6,19,1e2)) validData.clin$nodal_status<-as.factor(substr(vDc$characteristics_ch1.7,24,1e2)) validData.clin$ajcc_stage<-as.factor(substr(vDc$characteristics_ch1.8,22,1e2)) validData.clin$grade<-substr(vDc$characteristics_ch1.9,8,1e2) validData.clin$grade[validData.clin$grade=='23']<-NA validData.clin$grade[validData.clin$grade=='NA']<-NA validData.clin$grade<-as.factor(validData.clin$grade) validData.clin$pcr_rd<-substr(vDc$characteristics_ch1.10,29,1e2) validData.clin$pcr_rd[validData.clin$pcr_rd=='NA']<-NA validData.clin$pcr_rd<-as.factor(validData.clin$pcr_rd) validData.clin$rcb_class<-substr(vDc$characteristics_ch1.11,32,1e2) validData.clin$rcb_class[validData.clin$rcb_class=='NA']<-NA validData.clin$rcb_class<-as.factor(validData.clin$rcb_class) validData.clin$drfs_cens<-as.numeric(substr(vDc$characteristics_ch1.12,26,1e2)) # 0 censored, 1 event validData.clin$drfs_time<-as.numeric(substr(vDc$characteristics_ch1.13,23,1e2)) # in years validData.clin$taxane<-as.factor(substr(vDc$characteristics_ch1.14,14,1e2)) validData.clin$esr1_status<-as.factor(substr(vDc$characteristics_ch1.15,14,1e2)) validData.clin$erbb2_status<-as.factor(substr(vDc$characteristics_ch1.16,15,1e2)) validData.clin$set_class<-as.factor(substr(vDc$characteristics_ch1.17,12,1e2)) validData.clin$chemosensitivity_prediction<-as.factor(substr(vDc$characteristics_ch1.18,30,1e2)) validData.clin$ggi_class<-as.factor(substr(vDc$characteristics_ch1.19,12,1e2)) validData.clin$pam50_class<-as.factor(substr(vDc$characteristics_ch1.20,14,1e2)) validData.clin$dlda30_prediction<-as.factor(substr(vDc$characteristics_ch1.21,20,1e2)) validData.clin$rcb_prediction<-as.factor(substr(vDc$characteristics_ch1.22,21,1e2)) validData.clin<-validData.clin[,-1] save(trainData.genes,validData.genes,trainData.clin,validData.clin,file='dataHatzis.Rdata') ####################### # preprocess the data # ####################### # ------------ # # training set # # ------------ # # change names to variables in order to be able to manage them colnames(trainData.genes)<-paste('x',colnames(trainData.genes),sep='') colnames(trainData.genes)<-sapply(colnames(trainData.genes),gsub,pattern='-',replacement='_') colnames(trainData.genes)<-sapply(colnames(trainData.genes),gsub,pattern='/',replacement='_') # dichotomize age using 40 as threshold (as suggested by Saurbrei, 1999) trainData.clin$age<-ifelse(trainData.clin$age<=40,'L40','M40') trainData.clin$age<-as.factor(trainData.clin$age) # consider grade 'indeterminate' as NA trainData.clin$pr[trainData.clin$pr=='I']<-NA trainData.clin$pr<-as.character(trainData.clin$pr) trainData.clin$pr<-as.factor(trainData.clin$pr) # since T0 (t stage) has only 3 occourence, let T0 and T1 collapse in an unique group T01 trainData.clin$t_stage<-as.character(trainData.clin$t_stage) trainData.clin$t_stage[trainData.clin$t_stage=='T0']<-'T01' trainData.clin$t_stage[trainData.clin$t_stage=='T1']<-'T01' trainData.clin$t_stage<-as.factor(trainData.clin$t_stage) # add epsilon to observation 10 in the training dataset because time = 0 trainData.clin$drfs_time<-trainData.clin$drfs_time + 1e-6 # data.frame with dummies for clinical covariates n<-dim(trainData.clin)[1] ageL40<-ageM40<-erN<-erP<-prN<-prP<-T01<-T2<-T3<-T4<-N0<-N1<-N2<-N3<-G1<-G2<-G3<-rep(0,n) ageM40[trainData.clin$age=='M40']<-1 erP[trainData.clin$er_woI=='P']<-1 prP[trainData.clin$pr=='P']<-1 prP[is.na(trainData.clin$pr)]<-prN[is.na(trainData.clin$pr)]<-NA T4[trainData.clin$t_stage=='T4']<-1 T3[trainData.clin$t_stage=='T3']<-1 T2[trainData.clin$t_stage=='T2']<-1 N3[trainData.clin$nodal_status=='N3']<-1 N2[trainData.clin$nodal_status=='N2']<-1 N1[trainData.clin$nodal_status=='N1']<-1 G3[trainData.clin$grade=='3']<-1 G2[trainData.clin$grade=='2']<-1 G1[is.na(trainData.clin$grade)]<-G2[is.na(trainData.clin$grade)]<-G3[is.na(trainData.clin$grade)]<-NA clinDataDummies<-NULL clinDataDummies$ageL40<-ageL40 clinDataDummies$ageM40<-ageM40 clinDataDummies$erN<-erN clinDataDummies$erP<-erP clinDataDummies$prN<-prN clinDataDummies$prP<-prP clinDataDummies$T01<-T01 clinDataDummies$T2<-T2 clinDataDummies$T3<-T3 clinDataDummies$T4<-T4 clinDataDummies$N0<-N0 clinDataDummies$N1<-N1 clinDataDummies$N2<-N2 clinDataDummies$N3<-N3 clinDataDummies$G1<-G1 clinDataDummies$G2<-G2 clinDataDummies$G3<-G3 clinDataDummies<-as.data.frame(clinDataDummies) rm(ageL40,ageM40,erN,erP,prN,prP,T01,T2,T3,T4,N0,N1,N2,N3,G1,G2,G3) # rows not containing missing values noNA<-apply(is.na(trainData.clin[,c(15,14,3,5,7,8,9,11)]),1,sum)==0 # use only row without NA for better comparison trainData.genes<-trainData.genes[noNA,] trainData.clin<-trainData.clin[noNA,] clinDataDummies<-clinDataDummies[noNA,] # studentize molecular variables meanGenes<-apply(trainData.genes,2,mean) sdGenes<-apply(trainData.genes,2,sd) studGenes01<-scale(trainData.genes) centrGenes<-scale(trainData.genes,scale=F) tdataDummies<-cbind(clinDataDummies[,-c(1,3,5,7,11,15)],trainData.genes) tdataDummies.01<-cbind(clinDataDummies[,-c(1,3,5,7,11,15)],studGenes01) tdataDummies.c<-cbind(clinDataDummies[,-c(1,3,5,7,11,15)],centrGenes) # -------------- # # validation set # # -------------- # # change names to genes in order to be able to manage them colnames(validData.genes)<-paste('x',colnames(validData.genes),sep='') colnames(validData.genes)<-sapply(colnames(validData.genes),gsub,pattern='-',replacement='_') colnames(validData.genes)<-sapply(colnames(validData.genes),gsub,pattern='/',replacement='_') colnames(validData.clin)[4]<-'er_woI' # dichotomize age using 40 as threshold (see Sauerbrei et al., 1999) validData.clin$age<-ifelse(validData.clin$age<=40,'L40','M40') validData.clin$age<-as.factor(validData.clin$age) # since T0 (t stage) has only 3 occourence, let T0 and T1 collapse in an unique group T01 validData.clin$t_stage<-as.character(validData.clin$t_stage) validData.clin$t_stage[validData.clin$t_stage=='T0']<-'T01' validData.clin$t_stage[validData.clin$t_stage=='T1']<-'T01' validData.clin$t_stage<-as.factor(validData.clin$t_stage) # data.frame with dummies for clinical covariates n<-dim(validData.clin)[1] ageL40<-ageM40<-erN<-erP<-prN<-prP<-T01<-T2<-T3<-T4<-N0<-N1<-N2<-N3<-G1<-G2<-G3<-rep(0,n) ageM40[validData.clin$age=='M40']<-1 erP[validData.clin$er_woI=='P']<-1 prP[validData.clin$pr=='P']<-1 prP[is.na(validData.clin$pr)]<-prN[is.na(validData.clin$pr)]<-NA T4[validData.clin$t_stage=='T4']<-1 T3[validData.clin$t_stage=='T3']<-1 T2[validData.clin$t_stage=='T2']<-1 N3[validData.clin$nodal_status=='N3']<-1 N2[validData.clin$nodal_status=='N2']<-1 N1[validData.clin$nodal_status=='N1']<-1 G3[validData.clin$grade=='3']<-1 G2[validData.clin$grade=='2']<-1 G1[is.na(validData.clin$grade)]<-G2[is.na(validData.clin$grade)]<-G3[is.na(validData.clin$grade)]<-NA clinDataDummiesV<-NULL clinDataDummiesV$ageL40<-ageL40 clinDataDummiesV$ageM40<-ageM40 clinDataDummiesV$erN<-erN clinDataDummiesV$erP<-erP clinDataDummiesV$prN<-prN clinDataDummiesV$prP<-prP clinDataDummiesV$T01<-T01 clinDataDummiesV$T2<-T2 clinDataDummiesV$T3<-T3 clinDataDummiesV$T4<-T4 clinDataDummiesV$N0<-N0 clinDataDummiesV$N1<-N1 clinDataDummiesV$N2<-N2 clinDataDummiesV$N3<-N3 clinDataDummiesV$G1<-G1 clinDataDummiesV$G2<-G2 clinDataDummiesV$G3<-G3 clinDataDummiesV<-as.data.frame(clinDataDummiesV) rm(ageL40,ageM40,erN,erP,prN,prP,T01,T2,T3,T4,N0,N1,N2,N3,G1,G2,G3) # rows not containing missing values noNA<-apply(is.na(validData.clin[,c(14,13,3,4,5,7,8,10)]),1,sum)==0 # use only row without NA for better comparison validData.genes<-validData.genes[noNA,] validData.clin<-validData.clin[noNA,] clinDataDummiesV<-clinDataDummiesV[noNA,] # studentize molecular variables studGenesV01<-centrGenesV<-validData.genes for (i in 1:length(sdGenes)) { studGenesV01[,i]<-(validData.genes[,i]-meanGenes[i])/sdGenes[i] centrGenesV[,i]<-validData.genes[,i]-meanGenes[i] } vdataDummies<-cbind(clinDataDummiesV[,-c(1,3,5,7,11,15)],validData.genes) vdataDummies.01<-cbind(clinDataDummiesV[,-c(1,3,5,7,11,15)],studGenesV01) vdataDummies.c<-cbind(clinDataDummiesV[,-c(1,3,5,7,11,15)],centrGenesV) # order and delete columns trainData.clin<-trainData.clin[,c(15,14,3,5,7,8,9,11)] names(trainData.clin)<-c('time','status','age','pr','er','t_stage','nodal_status','grade') validData.clin<-validData.clin[,c(14,13,3,5,4,7,8,10)] names(validData.clin)<-c('time','status','age','pr','er','t_stage','nodal_status','grade') clinDataDummies<-clinDataDummies[,-c(1,3,5,7,11,15)] names(clinDataDummies)<-c("ageM40","erP","prP","t_stageT2","t_stageT3","t_stageT4","nodal_statusN1","nodal_statusN2","nodal_statusN3","grade2","grade3") clinDataDummiesV<-clinDataDummiesV[,-c(1,3,5,7,11,15)] names(clinDataDummiesV)<-c("ageM40","erP","prP","t_stageT2","t_stageT3","t_stageT4","nodal_statusN1","nodal_statusN2","nodal_statusN3","grade2","grade3") save(trainData.genes,validData.genes,trainData.clin,validData.clin, clinDataDummies,tdataDummies,tdataDummies.c,tdataDummies.01,studGenes01,centrGenes, clinDataDummiesV,vdataDummies,vdataDummies.c,vdataDummies.01,studGenesV01,centrGenesV, file='dataHatzis_preproc.Rdata') #################### # tuning parameter # #################### tunPar<-list() nGenesCV<-list() # -------- # # CoxBoost # # -------- # # strategy 1: set.seed(3641) tmp<-optimCoxBoostPenalty(trainData.clin$time,trainData.clin$status,as.matrix(tdataDummies.01),standardize=FALSE,parallel=F) # parallel computation can cause some problem, we use sequential computation tunPar$cb1.pen<-tmp$penalty tmp<-cv.CoxBoost(trainData.clin$time,trainData.clin$status,as.matrix(tdataDummies.01),standardize=FALSE,penalty=tunPar$cb1.pen,maxstepno=2e2) tunPar$cb1.mstop<-tmp$optimal.step # strategy 4: set.seed(6084) tmp<-optimCoxBoostPenalty(trainData.clin$time,trainData.clin$status,as.matrix(studGenes01),standardize=FALSE) # parallel computation can cause some problem, we use sequential computation tunPar$cb4.pen<-tmp$penalty tmp<-cv.CoxBoost(trainData.clin$time,trainData.clin$status,as.matrix(studGenes01),standardize=FALSE,penalty=tunPar$cb4.pen,maxstepno=2e2) tunPar$cb4.mstop<-tmp$optimal.step save(tunPar,file='tunParCB.Rdata') # strategy 2: library(snowfall) sfInit(par=T,cpus=180,type='MPI') sfSource('CoxBoostMod.R') sfLibrary(combPreds,lib='../../../R/myrlibrary/') criteria<-'likelihoodCV' foldCV<-10 penalties<-seq(500,4000,by=500) n<-length(trainData.clin$time) set.seed(11) ifelse(foldCV>=(n-2),index<-c(1:n),index<-sample(c(rep(c(1:foldCV),floor(n/foldCV)),sample(1:foldCV,n-foldCV*floor(n/foldCV))))) step<-function(i,time,cens,studGenes01,index,penalty,clinDataDummies,criteria='brier',seed=NULL) { if(!is.null(seed)) set.seed(seed[i]) brierScore<-function(j,i,time,cens,studGenes01,index,penalty,clinDataDummies) { mod.clin.dummies<-coxph(Surv(time,cens)~.,cbind(time,cens,clinDataDummies)[index!=j,]) mod<-CoxBoostMod(time[index!=j],cens[index!=j],as.matrix(studGenes01[index!=j,]),penalty=penalty,stepno=i,standardize=FALSE,offset=mod.clin.dummies$linear.predictors) dataTmp<-data.frame(time[index!=j],cens[index!=j],studGenes01[index!=j,mod$coefficients[mod$stepno+1,]!=0]) names(dataTmp)<-c('time','cens',colnames(studGenes01)[mod$coefficients[mod$stepno+1,]!=0]) model<-coxph(Surv(time,cens)~offset(mod.clin.dummies$linear.predictors)+.,data=dataTmp,init=mod$coefficients[mod$stepno+1,mod$coefficients[mod$stepno+1,]!=0],iter=0) computeIBS(model,cbind(time,cens)[index!=j,],studGenes01[index!=j,],cbind(time,cens)[index==j,],studGenes01[index==j,],offset=mod.clin.dummies$linear.predictors,offset.new=predict(mod.clin.dummies,newdata=cbind(time,cens,clinDataDummies)[index==j,]),maxtime=5) } cvllik<-function(j,i,time,cens,studGenes01,index,penalty,clinDataDummies) { mod.clin.dummies<-coxph(Surv(time,cens)~.,data=data.frame(time,cens,clinDataDummies)) mod<-CoxBoostMod(time[index!=j],cens[index!=j],as.matrix(studGenes01[index!=j,]),penalty=penalty,stepno=i,standardize=FALSE,offset=mod.clin.dummies$linear.predictors[index!=j]) dataTmp<-data.frame(time[index==j],cens[index==j],studGenes01[index==j,mod$coefficients[mod$stepno+1,]!=0]) names(dataTmp)<-c('time','cens',mod$xnames[mod$coefficients[mod$stepno+1,]!=0]) model<-coxph(Surv(time,cens)~offset(mod.clin.dummies$linear.predictors[index==j])+.,data=dataTmp,init=mod$coefficients[mod$stepno+1,mod$coefficients[mod$stepno+1,]!=0],iter=0) -model$loglik[2] } if (criteria=='brier') res<-mean(sapply(c(1:10),brierScore,i=i,time=time,cens=cens,studGenes01=studGenes01,index=index,penalty=penalty,clinDataDummies=clinDataDummies)) if (criteria=='likelihoodCV') res<-mean(sapply(c(1:10),cvllik,i=i,time=time,cens=cens,studGenes01=studGenes01,index=index,penalty=penalty,clinDataDummies=clinDataDummies)) res } res<-list() seed<-673:852 for (a in 1:length(penalties)) { res[[a]]<-sfSapply(seq(21,200,by=1),step,time=trainData.clin$time,cens=trainData.clin$status,studGenes01=studGenes01,index=index,penalty=penalties[a],clinDataDummies=clinDataDummies,criteria=criteria,seed=seed) print(a) } mstop<-lapply(res,which.min) pen<-which.min(lapply(res,min)) tunPar$cb2.mstop<-20+mstop[[pen]] tunPar$cb2.pen<-penalties[pen] save(tunPar,file='tunParCB_cv.Rdata') # strategy 3: step3<-function(i,time,status,studGenes01,index,penalty,clinDataDummies,unpen.index,criteria,seed=NULL) { if(!is.null(seed)) set.seed(seed[i]) brierScore<-function(j,i,time,status,studGenes01,index,penalty,clinDataDummies,unpen.index) { mod<-CoxBoost(time[index!=j],status[index!=j],as.matrix(cbind(clinDataDummies,studGenes01)[index!=j,]),unpen.index=unpen.index,penalty=penalty,stepno=i,standardize=FALSE) computeIBS(mod,cbind(time,status)[index!=j,],cbind(clinDataDummies,studGenes01)[index!=j,],cbind(time,status)[index==j,],cbind(clinDataDummies,studGenes01)[index==j,],maxtime=5) } cvllik<-function(j,i,time,status,studGenes01,index,penalty,clinDataDummies,unpen.index) { mod<-CoxBoost(time[index!=j],status[index!=j],as.matrix(cbind(clinDataDummies,studGenes01)[index!=j,]),unpen.index=unpen.index,penalty=penalty,stepno=i,standardize=FALSE) dataTmp<-data.frame(time[index==j],status[index==j],cbind(clinDataDummies,studGenes01)[index==j,mod$coefficients[mod$stepno+1,]!=0]) names(dataTmp)<-c('time','status',mod$xnames[mod$coefficients[mod$stepno+1,]!=0]) model<-coxph(Surv(time,status)~.,data=dataTmp,init=mod$coefficients[mod$stepno+1,mod$coefficients[mod$stepno+1,]!=0],iter=0) -model$loglik[2] } tmp<-NULL if (criteria=='brier') for (j in 1:10) tmp[j]<-try(brierScore(j=j,i=i,time=time,status=status,studGenes01=studGenes01,index=index,penalty=penalty,clinDataDummies=clinDataDummies,unpen.index=unpen.index)) if (criteria=='likelihoodCV') for (j in 1:10) tmp[j]<-try(cvllik(j=j,i=i,time=time,status=status,studGenes01=studGenes01,index=index,penalty=penalty,clinDataDummies=clinDataDummies,unpen.index=unpen.index)) mean(as.numeric(tmp),na.rm=T) } res<-list() seed<-68:247 for (a in 1:length(penalties)) { res[[a]]<-sfSapply(seq(21,200,by=1),step3,time=trainData.clin$time,status=trainData.clin$status,studGenes01=studGenes01,index=index,penalty=penalties[a],clinDataDummies=clinDataDummies,unpen.index=c(1:11),criteria=criteria,seed=seed) print(a) } mstop<-lapply(res,which.min) pen<-which.min(lapply(res,min)) tunPar$cb3.pen<-penalties[pen] tunPar$cb3.mstop<-20+mstop[[pen]] save(tunPar,file='tunParCB_cv.Rdata') sfStop() # -------------------- # # Univariate selection # # -------------------- # seed<-39:63 nGenesCV$ur1<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),trainData.clin[,-c(1,2)],trainData.genes,foldCV=10,strategy=1,cpus=25,type='MPI',method='univariate',criteria='brier',maxtime=5,seed=seed) seed<-6431:6455 nGenesCV$ur2<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),trainData.clin[,-c(1,2)],trainData.genes,foldCV=10,strategy=2,cpus=25,type='MPI',method='univariate',criteria='brier',maxtime=5,seed=seed) seed<-684:708 nGenesCV$ur3<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),trainData.clin[,-c(1,2)],trainData.genes,foldCV=10,strategy=3,cpus=25,type='MPI',method='univariate',criteria='brier',maxtime=5,seed=seed) seed<-525:549 nGenesCV$ur4.1<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),trainData.clin[,-c(1,2)],trainData.genes,foldCV=10,strategy=4.1,cpus=25,type='MPI',method='univariate',criteria='brier',maxtime=5,seed=seed) seed<-60:84 nGenesCV$ur4.2<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),trainData.clin[,-c(1,2)],trainData.genes,foldCV=10,strategy=4.2,cpus=25,type='MPI',method='univariate',criteria='brier',maxtime=5,seed=seed) save(nGenesCV,file='nGenesCV.Rdata') # ------------------------------------ # # Univariate selection with adjustment # # ------------------------------------ # seed<-6914:6938 nGenesCV$tcr2<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),trainData.clin[,-c(1,2)],trainData.genes,foldCV=10,strategy=2,cpus=25,type='MPI',method='univariateAdj',criteria='brier',maxtime=5,seed=seed) seed<-5:29 nGenesCV$tcr41<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),trainData.clin[,-c(1,2)],trainData.genes,foldCV=10,strategy=4.1,cpus=25,type='MPI',method='univariateAdj',criteria='brier',maxtime=5,seed=seed) seed<-35:59 nGenesCV$tcr42<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),trainData.clin[,-c(1,2)],trainData.genes,foldCV=10,strategy=4.2,cpus=25,type='MPI',method='univariateAdj',criteria='brier',maxtime=5,seed=seed) save(nGenesCV,file='nGenesCV.Rdata') # ----------------- # # Forward selection # # ----------------- # seed<-581:605 nGenesCV$sw1<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),trainData.clin[,-c(1,2)],trainData.genes,foldCV=10,strategy=1,cpus=25,type='MPI',method='forward',criteria='brier',maxtime=5,seed=seed) seed<-3981:4005 nGenesCV$sw2<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),trainData.clin[,-c(1,2)],trainData.genes,foldCV=10,strategy=2,cpus=25,type='MPI',method='forward',criteria='brier',maxtime=5,seed=seed) seed<-513:537 nGenesCV$sw3<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),trainData.clin[,-c(1,2)],trainData.genes,foldCV=10,strategy=3,cpus=25,type='MPI',method='forward',criteria='brier',maxtime=5,seed=seed) seed<-783:807 nGenesCV$sw4.1<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),trainData.clin[,-c(1,2)],trainData.genes,foldCV=10,strategy=4.1,cpus=25,type='MPI',method='forward',criteria='brier',maxtime=5,seed=seed) seed<-2963:2987 nGenesCV$sw4.2<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),trainData.clin[,-c(1,2)],trainData.genes,foldCV=10,strategy=4.2,cpus=25,type='MPI',method='forward',criteria='brier',maxtime=5,seed=seed) save(nGenesCV,file='nGenesCV.Rdata') # -------------------- # # Forward TC selection # # -------------------- # seed<-697:721 nGenesCV$swtc2<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),trainData.clin[,-c(1,2)],trainData.genes,foldCV=10,strategy=2,cpus=25,type='MPI',method='forwardAdj',criteria='brier',maxtime=5,seed=seed) seed<-7435:7459 nGenesCV$swtc41<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),trainData.clin[,-c(1,2)],trainData.genes,foldCV=10,strategy=4.1,cpus=25,type='MPI',method='forwardAdj',criteria='brier',maxtime=5,seed=seed) seed<-834:858 nGenesCV$swtc42<-cv.tuningParam(cbind(trainData.clin$time,trainData.clin$status),trainData.clin[,-c(1,2)],trainData.genes,foldCV=10,strategy=4.2,cpus=25,type='MPI',method='forwardAdj',criteria='brier',maxtime=5,seed=seed) save(nGenesCV,file='nGenesCV.Rdata') ###################### # training the model # ###################### set.seed(3419) # ++++++++++++++++++ # # Strategy 1 (naive) # # ++++++++++++++++++ # # univatiate selection # # -------------------- # data1.u<-univariateSelection(data=cbind(trainData.clin,trainData.genes),nSel=nGenesCV$ur1) mod1.u<-coxph(Surv(time,status)~.,data=data1.u) # boosting # # -------- # # CoxBoost mod1.CB<-CoxBoost(trainData.clin$time,trainData.clin$status,as.matrix(tdataDummies.01),penalty=tunPar$cb1.pen,stepno=tunPar$cb1.mstop,standardize=FALSE) # mboost mod1.mB<-glmboost(as.matrix(tdataDummies.c),Surv(trainData.clin$time,trainData.clin$status),family=CoxPH(),control=boost_control(mstop=2e2),center=T) cv10f <- cv(model.weights(mod1.mB), type = "kfold") set.seed(6942) cv.mB <- cvrisk(mod1.mB, folds=cv10f, papply = lapply) # LASSO # # ----- # set.seed(164) cv.mL<-cv.glmnet(x=as.matrix(tdataDummies.01),y=Surv(trainData.clin$time,trainData.clin$status),family='cox',standardize=FALSE) mL<-glmnet(as.matrix(tdataDummies.01),Surv(trainData.clin$time,trainData.clin$status),family='cox',standardize=FALSE) # forward selection # # ------------------ # data1.sw<-forwardSelection(cbind(trainData.clin,trainData.genes),nSel=nGenesCV$sw1) mod1.sw<-coxph(Surv(time,status)~.,data=data1.sw) # ++++++++++++++++++++++ # # strategy 2 (residuals) # # ++++++++++++++++++++++ # # compute linear predictor for the offset mod.clin<-coxph(Surv(time,status)~age+pr+er+t_stage+nodal_status+grade,data=trainData.clin) score.clin<-lin.pred<-mod.clin$linear.predictors # univatiate selection # # -------------------- # data2.u<-univariateSelection(data=cbind(trainData.clin[,c(1,2)],trainData.genes),offset=lin.pred,nSel=nGenesCV$ur2) mod2.u<-coxph(Surv(time,status)~offset(lin.pred)+.,data=data2.u) # univatiate selection TC # # ----------------------- # data2.tc<-univariateSelection(data=cbind(trainData.clin,trainData.genes),nCov=6,nSel=nGenesCV$tcr2) mod2.tc<-coxph(Surv(time,status)~.,data=data2.tc) # boosting # # -------- # # with CoxBoost source('CoxBoostMod.R') mod2.CB<-CoxBoostMod(trainData.clin$time,trainData.clin$status,as.matrix(studGenes01),penalty=tunPar$cb2.pen,stepno=tunPar$cb2.mstop,standardize=FALSE,offset=lin.pred) # with mboost mod2.mB<-glmboost(centrGenes,Surv(trainData.clin$time,trainData.clin$status),family=CoxPH(),offset=lin.pred,control=boost_control(mstop=2e2),center=T) cv10f <- cv(model.weights(mod2.mB), type = "kfold") set.seed(8385) cv2.mB <- cvrisk(mod2.mB, folds=cv10f, papply = lapply) # LASSO # # ----- # set.seed(5230) cv2.mL<-cv.glmnet(studGenes01,Surv(trainData.clin$time,trainData.clin$status),offset=lin.pred,family='cox',standardize=FALSE) mL2<-glmnet(studGenes01,Surv(trainData.clin$time,trainData.clin$status),offset=lin.pred,family='cox',standardize=FALSE) # forward selection # # ------------------ # data2.sw<-forwardSelection(data=cbind(trainData.clin[,c(1,2)],trainData.genes),offset=lin.pred,nSel=nGenesCV$sw2) mod2.sw<-coxph(Surv(time,status)~offset(lin.pred)+.,data=data2.sw) # forward selection TC # # ----------------------- # data2.swtc<-forwardSelection(data=cbind(trainData.clin,trainData.genes),nCov=6,nSel=nGenesCV$swtc2) mod2.swtc<-coxph(Surv(time,status)~.,data=data2.swtc) # +++++++++++++++++++++ # # strategy 3 (favoring) # # +++++++++++++++++++++ # # univatiate selection # # -------------------- # data3.u<-univariateSelection(cbind(trainData.clin,trainData.genes),nFav=6,nSel=nGenesCV$ur3) mod3.u<-coxph(Surv(time,status)~.,data=data3.u) # boosting # # -------- # # we can use only CoxBoost mod3.CB<-try(CoxBoost(trainData.clin$time,trainData.clin$status,as.matrix(tdataDummies.01),unpen.index=c(1:11),penalty=tunPar$cb3.pen,stepno=tunPar$cb3.mstop,standardize=FALSE)) # LASSO # # ----- # set.seed(943) cv3.mL<-cv.glmnet(as.matrix(tdataDummies.01),Surv(trainData.clin$time,trainData.clin$status),penalty.factor=c(rep(0,11),rep(1,22283)),family='cox',standardize=FALSE) mL3<-glmnet(as.matrix(tdataDummies.01),Surv(trainData.clin$time,trainData.clin$status),penalty.factor=c(rep(0,11),rep(1,22283)),family='cox',standardize=FALSE) # forward selection # # ------------------ # data3.sw<-forwardSelection(cbind(trainData.clin,trainData.genes),nFav=6,nSel=nGenesCV$sw3) mod3.sw<-coxph(Surv(time,status)~.,data=data3.sw) # ++++++++++++++++++++++++++++++++++ # # strategy 4 (dimensional reduction) # # ++++++++++++++++++++++++++++++++++ # data42.u<-score42.u<-data42.tc<-score42.tc<-data42.sw<-score42.sw<-data42.swtc<-score42.swtc<-NULL # univatiate selection # # -------------------- # data41.u<-univariateSelection(cbind(trainData.clin[,c(1,2)],trainData.genes),nSel=nGenesCV$ur4.1) score41.u<-prcomp(studGenes01[,names(data41.u)[-c(1:2)]]) mol.score<-score41.u$x[,1] mod41.u<-coxph(Surv(time,status)~.,data=cbind(trainData.clin,mol.score)) if(nGenesCV$ur4.1!=nGenesCV$ur4.2) { data42.u<-univariateSelection(cbind(trainData.clin[,c(1,2)],trainData.genes),nSel=nGenesCV$ur4.2) ifelse(dim(data42.u)[2]>3,score42.u<-prcomp(studGenes01[,names(data42.u)[-c(1:2)]]),score42.u<-names(data42.u)[3]) ifelse(dim(data42.u)[2]>3,mol.score<-score42.u$x[,1],mol.score<-data41.u[,3]) } mod42.u<-coxph(Surv(time,status)~.,data=cbind(trainData.clin[,c(1:2)],score.clin,mol.score)) # univatiate selection TC # # ----------------------- # data41.tc<-forwardSelection(data=cbind(trainData.clin,trainData.genes),nCov=6,nSel=nGenesCV$tcr41) ifelse(dim(data41.tc)[2]>9,score41.tc<-prcomp(studGenes01[,names(data41.tc)[-c(1:8)]]),score41.tc<-names(data41.tc)[9]) ifelse(dim(data41.tc)[2]>9,mol.score<-score41.tc$x[,1],mol.score<-data41.tc[,9]) mod41.tc<-coxph(Surv(time,status)~.,data=cbind(trainData.clin,mol.score)) if(nGenesCV$tcr41!=nGenesCV$tcr42) { data42.tc<-forwardSelection(data=cbind(trainData.clin,trainData.genes),nCov=6,nSel=nGenesCV$tcr42) ifelse(dim(data42.tc)[2]>9,score42.tc<-prcomp(studGenes01[,names(data42.tc)[-c(1:8)]]),score42.tc<-names(data42.tc)[9]) ifelse(dim(data42.tc)[2]>9,mol.score<-score42.tc$x[,1],mol.score<-data42.tc[,9]) } mod42.tc<-coxph(Surv(time,status)~.,data=cbind(trainData.clin[,c(1:2)],score.clin,mol.score)) # boosting # # -------- # # using CoxBoost mod4.CB<-CoxBoost(trainData.clin$time,trainData.clin$status,as.matrix(studGenes01),penalty=tunPar$cb4.pen,stepno=tunPar$cb4.mstop,standardize=FALSE) score41.CB<-mod4.CB$coefficients[tunPar$cb4.mstop+1,mod4.CB$coefficients[tunPar$cb4.mstop+1,]!=0] mol.score<-mod4.CB$linear.predictor[tunPar$cb4.mstop+1,] mod41.CB<-coxph(Surv(time,status)~.,data=cbind(trainData.clin,mol.score)) mod42.CB<-coxph(Surv(time,status)~.,data=cbind(trainData.clin[,c(1:2)],score.clin,mol.score)) # using mboost mod4.mB<-glmboost(centrGenes,Surv(trainData.clin$time,trainData.clin$status),family=CoxPH(),control=boost_control(mstop=2e2),center=T) cv10f <- cv(model.weights(mod4.mB), type = "kfold") set.seed(3179) cv4.mB <- cvrisk(mod4.mB, folds=cv10f, papply = lapply) score41.mB<-coef(mod4.mB[mstop(cv4.mB)]) mol.score<-fitted(mod4.mB[mstop(cv4.mB)]) mod41.mB<-coxph(Surv(time,status)~.,data=cbind(trainData.clin,mol.score)) mod42.mB<-coxph(Surv(time,status)~.,data=cbind(trainData.clin[,c(1:2)],score.clin,mol.score)) # LASSO # # ----- # set.seed(3562) cv41.mL<-cv.glmnet(as.matrix(studGenes01),Surv(trainData.clin$time,trainData.clin$status),family='cox',standardize=FALSE) mL41<-glmnet(as.matrix(studGenes01),Surv(trainData.clin$time,trainData.clin$status),family='cox',standardize=FALSE) iR41<-which(mL41$lambda==cv41.mL$lambda.min) score41.mL<-mL41$beta[mL41$beta[,iR41]!=0,iR41] mol.score<-predict(mL41,newx=studGenes01,s=cv41.mL$lambda.min) colnames(mol.score)<-'mol.score' mod41.mL<-coxph(Surv(time,status)~.,data=cbind(trainData.clin,mol.score)) mod42.mL<-coxph(Surv(time,status)~.,data=cbind(trainData.clin[,c(1:2)],score.clin,mol.score)) # forward selection # # ------------------ # data41.sw<-forwardSelection(cbind(trainData.clin[,c(1,2)],trainData.genes),nSel=nGenesCV$sw4.1) ifelse(dim(data41.sw)[2]>3,score41.sw<-prcomp(studGenes01[,names(data41.sw)[-c(1:2)]]),score41.swtc<-names(data41.swtc)[3]) ifelse(dim(data41.sw)[2]>3,mol.score<-score41.sw$x[,1],mol.score<-data41.sw[,3]) mod41.sw<-coxph(Surv(time,status)~.,data=cbind(trainData.clin,mol.score)) if(nGenesCV$sw4.1!=nGenesCV$sw4.2) { data42.sw<-forwardSelection(cbind(trainData.clin[,c(1,2)],trainData.genes),nSel=nGenesCV$sw4.2) ifelse(dim(data42.sw)[2]>3,score42.sw<-prcomp(studGenes01[,names(data42.sw)[-c(1:2)]]),score42.sw<-names(data42.sw)[3]) ifelse(dim(data42.sw)[2]>3,mol.score<-score42.sw$x[,1],mol.score<-data42.sw[,3]) } mod42.sw<-coxph(Surv(time,status)~.,data=cbind(trainData.clin[,c(1:2)],score.clin,mol.score)) # forward selection TC # # --------------------- # data41.swtc<-forwardSelection(data=cbind(trainData.clin,trainData.genes),nCov=6,nSel=nGenesCV$swtc41) ifelse(dim(data41.swtc)[2]>9,score41.swtc<-prcomp(studGenes01[,names(data41.swtc)[-c(1:8)]]),score41.swtc<-names(data41.swtc)[9]) ifelse(dim(data41.swtc)[2]>9,mol.score<-score41.swtc$x[,1],mol.score<-data41.swtc[,9]) mod41.swtc<-coxph(Surv(time,status)~.,data=cbind(trainData.clin,mol.score)) if(nGenesCV$swtc41!=nGenesCV$swtc42) { data42.swtc<-forwardSelection(data=cbind(trainData.clin,trainData.genes),nCov=6,nSel=nGenesCV$swtc42) ifelse(dim(data42.swtc)[2]>9,score42.swtc<-prcomp(studGenes01[,names(data42.swtc)[-c(1:8)]]),score42.swtc<-names(data42.swtc)[9]) ifelse(dim(data42.swtc)[2]>9,mol.score<-score42.swtc$x[,1],mol.score<-data42.swtc[,9]) } mod42.swtc<-coxph(Surv(time,status)~.,data=cbind(trainData.clin[,c(1:2)],score.clin,mol.score)) save(data1.u,mod1.u,mod1.CB,mod1.mB,cv.mB,cv.mL,mL,data1.sw,mod1.sw, data2.u,mod2.u,mod2.CB,mod2.mB,cv2.mB,cv2.mL,mL2,data2.tc,mod2.tc,data2.sw,mod2.sw,data2.swtc,mod2.swtc,mod.clin, data3.u,mod3.u,mod3.CB,cv3.mL,mL3,data3.sw,mod3.sw, data41.u,mod41.u,data42.u,mod42.u,score41.u,score42.u, mod4.CB,mod41.CB,mod4.mB,cv4.mB,mod41.mB,cv41.mL,mL41,mod41.mL,score41.CB,score41.mB,score41.mL, data41.tc,mod41.tc,data42.tc,mod42.tc,score41.tc,score42.tc, data41.sw,mod41.sw,data42.sw,mod42.sw,score41.sw,score42.sw, data41.swtc,mod41.swtc,data42.swtc,mod42.swtc,score41.swtc,score42.swtc, mod42.CB,mod42.mB,mod42.mL, file='allMod_breast.Rdata') ##################################### # evaluate models in validation set # ##################################### y<-Surv(trainData.clin$time,trainData.clin$status) y.new<-Surv(validData.clin$time,validData.clin$status) # ----------------------- # # prediction error curves # # ----------------------- # prec<-list() prec$modClin<-computePEC(mod.clin,y,clinDataDummies,y.new,clinDataDummiesV) # strategy 1 # # ---------- # prec$mod1.u<-computePEC(mod1.u,y,cbind(trainData.clin,trainData.genes),y.new,cbind(validData.clin,validData.genes)) prec$mod1.CB<-computePEC(mod1.CB,y,tdataDummies.01,y.new,vdataDummies.01) prec$mod1.mB<-computePEC(mod1.mB,y,tdataDummies.c,y.new,vdataDummies.c,cvPar=cv.mB) prec$mod1.lasso<-computePEC(mL,y,tdataDummies.01,y.new,vdataDummies.01,cvPar=cv.mL) prec$mod1.sw<-computePEC(mod1.sw,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) # strategy 2 # # ---------- # prec$mod2.u<-computePEC(mod2.u,y,cbind(trainData.clin,trainData.genes),y.new,cbind(validData.clin,validData.genes),offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=validData.clin)) prec$mod2.tc<-computePEC(mod2.tc,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) prec$mod2.CB<-computePEC(mod2.CB,y,studGenes01,y.new,studGenesV01,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=validData.clin)) prec$mod2.mB<-computePEC(mod2.mB,y,centrGenes,y.new,centrGenesV,cvPar=cv2.mB,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=validData.clin)) ifelse(sum(mL2$beta[,which(mL2$lambda==cv2.mL$lambda.min)])>0, prec$mod2.lasso<-computePEC(mL2,y,studGenes01,y.new,studGenesV01,cvPar=cv2.mL,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=validData.clin)), prec$mod2.lasso<-prec$modClin) prec$mod2.sw<-computePEC(mod2.sw,y,cbind(trainData.clin,trainData.genes),y.new,cbind(validData.clin,validData.genes),offset.new=predict(mod.clin,newdata=validData.clin)) prec$mod2.swtc<-computePEC(mod2.swtc,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) # strategy 3 # # ---------- # prec$mod3.u<-computePEC(mod3.u,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) prec$mod3.CB<-computePEC(mod3.CB,y,tdataDummies.01,y.new,vdataDummies.01) prec$mod3.lasso<-computePEC(mL3,y,tdataDummies.01,y.new,vdataDummies.01,cvPar=cv3.mL) prec$mod3.sw<-computePEC(mod3.sw,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) # strategy 4 # # ---------- # ifelse(is.character(score41.u),mol.score<-studGenes01[,score41.u],mol.score<-score41.u$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.u),mol.score.new<-studGenesV01[,score41.u],mol.score.new<-as.matrix(studGenesV01[,names(score41.u$rotation[,1])])%*%score41.u$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') prec$mod41.u<-computePEC(mod41.u,y,x,y.new,x.new) if(!is.null(score42.u)) { ifelse(is.character(score42.u),mol.score<-studGenes01[,score42.u],mol.score<-score42.u$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.u),mol.score.new<-studGenesV01[,score42.u],mol.score.new<-as.matrix(studGenesV01[,names(score42.u$rotation[,1])])%*%score42.u$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') } prec$mod42.u<-computePEC(mod42.u,y,x,y.new,x.new) ifelse(is.character(score41.tc),mol.score<-studGenes01[,score41.tc],mol.score<-score41.tc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.tc),mol.score.new<-studGenesV01[,score41.tc],mol.score.new<-as.matrix(studGenesV01[,names(score41.tc$rotation[,1])])%*%score41.tc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') prec$mod41.tc<-computePEC(mod41.tc,y,x,y.new,x.new) if(!is.null(score42.tc)) { ifelse(is.character(score42.tc),mol.score<-studGenes01[,score42.tc],mol.score<-score42.tc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.tc),mol.score.new<-studGenesV01[,score42.tc],mol.score.new<-as.matrix(studGenesV01[,names(score42.tc$rotation[,1])])%*%score42.tc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') } prec$mod42.tc<-computePEC(mod42.tc,y,x,y.new,x.new) x<-cbind(clinDataDummies,mod4.CB$linear.predictor[mod4.CB$stepno+1,],mod.clin$linear.predictors) x.new<-cbind(clinDataDummiesV,t(predict(mod4.CB,newdata=studGenesV01)),predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') prec$mod41.CB<-computePEC(mod41.CB,y,x,y.new,x.new) prec$mod42.CB<-computePEC(mod42.CB,y,x,y.new,x.new) x<-cbind(clinDataDummies,fitted(mod4.mB[mstop(cv4.mB)]),mod.clin$linear.predictors) x.new<-cbind(clinDataDummiesV,predict(mod4.mB[mstop(cv4.mB)],newdata=centrGenesV),predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') prec$mod41.mB<-computePEC(mod41.mB,y,x,y.new,x.new) prec$mod42.mB<-computePEC(mod42.mB,y,x,y.new,x.new) x<-cbind(clinDataDummies,predict(mL41,newx=studGenes01,s=cv41.mL$lambda.min),mod.clin$linear.predictors) x.new<-cbind(clinDataDummiesV,predict(mL41,newx=studGenesV01,s=cv41.mL$lambda.min),predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') prec$mod41.lasso<-computePEC(mod41.mL,y,x,y.new,x.new) prec$mod42.lasso<-computePEC(mod42.mL,y,x,y.new,x.new) ifelse(is.character(score41.sw),mol.score<-studGenes01[,score41.sw],mol.score<-score41.sw$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.sw),mol.score.new<-studGenesV01[,score41.sw],mol.score.new<-as.matrix(studGenesV01[,names(score41.sw$rotation[,1])])%*%score41.sw$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') prec$mod41.sw<-computePEC(mod41.sw,y,x,y.new,x.new) if(!is.null(score42.sw)) { ifelse(is.character(score42.sw),mol.score<-studGenes01[,score42.sw],mol.score<-score42.sw$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.sw),mol.score.new<-studGenesV01[,score42.sw],mol.score.new<-as.matrix(studGenesV01[,names(score42.sw$rotation[,1])])%*%score42.sw$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') } prec$mod42.sw<-computePEC(mod42.sw,y,x,y.new,x.new) ifelse(is.character(score41.swtc),mol.score<-studGenes01[,score41.swtc],mol.score<-score41.swtc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.swtc),mol.score.new<-studGenesV01[,score41.swtc],mol.score.new<-as.matrix(studGenesV01[,names(score41.swtc$rotation[,1])])%*%score41.swtc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') prec$mod41.swtc<-computePEC(mod41.swtc,y,x,y.new,x.new) if(!is.null(score42.swtc)) { ifelse(is.character(score42.swtc),mol.score<-studGenes01[,score42.swtc],mol.score<-score42.swtc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.swtc),mol.score.new<-studGenesV01[,score42.swtc],mol.score.new<-as.matrix(studGenesV01[,names(score42.swtc$rotation[,1])])%*%score42.swtc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') } prec$mod42.swtc<-computePEC(mod42.swtc,y,x,y.new,x.new) # ---------------------- # # Integrated Brier score # # ---------------------- # ibs5<-lapply(prec,ibs,times=5) # ----------------- # # concordance-index # # ----------------- # cScore<-list() cScore$mod.clin<-computeCindex(mod.clin,y,clinDataDummies,y.new,clinDataDummiesV) # strategy 1 # # ---------- # cScore$mod1.u<-computeCindex(mod1.u,y,cbind(trainData.clin,trainData.genes),y.new,cbind(validData.clin,validData.genes)) cScore$mod1.cb<-computeCindex(mod1.CB,y,tdataDummies.01,y.new,vdataDummies.01) cScore$mod1.mb<-computeCindex(mod1.mB,y,tdataDummies.c,y.new,vdataDummies.c,cvPar=cv.mB) cScore$mod1.lasso<-computeCindex(mL,y,tdataDummies.01,y.new,vdataDummies.01,cvPar=cv.mL) cScore$mod1.sw<-computeCindex(mod1.sw,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) # strategy 2 # # ---------- # cScore$mod2.u<-computeCindex(mod2.u,y,cbind(trainData.clin,trainData.genes),y.new,cbind(validData.clin,validData.genes),offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=validData.clin)) cScore$mod2.tc<-computeCindex(mod2.tc,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) cScore$mod2.CB<-computeCindex(mod2.CB,y,studGenes01,y.new,studGenesV01,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=as.data.frame(validData.clin))) cScore$mod2.mB<-computeCindex(mod2.mB,y,centrGenes,y.new,centrGenesV,cvPar=cv2.mB,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=as.data.frame(validData.clin))) ifelse(sum(mL2$beta[,which(mL2$lambda==cv2.mL$lambda.min)])>0, cScore$mod2.lasso<-computeCindex(mL2,y,studGenes01,y.new,studGenesV01,cvPar=cv2.mL,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=as.data.frame(validData.clin))), cScore$mod2.lasso<-cScore$mod.clin) cScore$mod2.sw<-computeCindex(mod2.sw,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes),offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=as.data.frame(validData.clin))) cScore$mod2.swtc<-computeCindex(mod2.swtc,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) # strategy 3 # # ---------- # cScore$mod3.u<-computeCindex(mod3.u,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) cScore$mod3.CB<-computeCindex(mod3.CB,y,tdataDummies.01,y.new,vdataDummies.01) cScore$mod3.lasso<-computeCindex(mL3,y,tdataDummies.01,y.new,vdataDummies.01,cvPar=cv3.mL) cScore$mod3.sw<-computeCindex(mod3.sw,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y.new,cbind(validData.clin[,c(1,2)],clinDataDummiesV,validData.genes)) # strategy 4 # # ---------- # ifelse(is.character(score41.u),mol.score<-studGenes01[,score41.u],mol.score<-score41.u$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.u),mol.score.new<-studGenesV01[,score41.u],mol.score.new<-as.matrix(studGenesV01[,names(score41.u$rotation[,1])])%*%score41.u$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') cScore$mod41.u<-computeCindex(mod41.u,y,x,y.new,x.new) if(!is.null(score42.u)) { ifelse(is.character(score42.u),mol.score<-studGenes01[,score42.u],mol.score<-score42.u$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.u),mol.score.new<-studGenesV01[,score42.u],mol.score.new<-as.matrix(studGenesV01[,names(score42.u$rotation[,1])])%*%score42.u$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') } cScore$mod42.u<-computeCindex(mod42.u,y,x,y.new,x.new) ifelse(is.character(score41.tc),mol.score<-studGenes01[,score41.tc],mol.score<-score41.tc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.tc),mol.score.new<-studGenesV01[,score41.tc],mol.score.new<-as.matrix(studGenesV01[,names(score41.tc$rotation[,1])])%*%score41.tc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') cScore$mod41.tc<-computeCindex(mod41.tc,y,x,y.new,x.new) if(!is.null(score42.tc)) { ifelse(is.character(score42.tc),mol.score<-studGenes01[,score42.tc],mol.score<-score42.tc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.tc),mol.score.new<-studGenesV01[,score42.tc],mol.score.new<-as.matrix(studGenesV01[,names(score42.tc$rotation[,1])])%*%score42.tc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') } cScore$mod42.tc<-computeCindex(mod42.tc,y,x,y.new,x.new) x<-cbind(clinDataDummies,mod4.CB$linear.predictor[mod4.CB$stepno+1,],mod.clin$linear.predictors) x.new<-cbind(clinDataDummiesV,t(predict(mod4.CB,newdata=studGenesV01)),predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') cScore$mod41.CB<-computeCindex(mod41.CB,y,x,y.new,x.new) cScore$mod42.CB<-computeCindex(mod42.CB,y,x,y.new,x.new) x<-cbind(clinDataDummies,fitted(mod4.mB[mstop(cv4.mB)]),mod.clin$linear.predictors) x.new<-cbind(clinDataDummiesV,predict(mod4.mB[mstop(cv4.mB)],newdata=centrGenesV),predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') cScore$mod41.mB<-computeCindex(mod41.mB,y,x,y.new,x.new) cScore$mod42.mB<-computeCindex(mod42.mB,y,x,y.new,x.new) x<-cbind(clinDataDummies,predict(mL41,newx=studGenes01,s=cv41.mL$lambda.min),mod.clin$linear.predictors) x.new<-cbind(clinDataDummiesV,predict(mL41,newx=studGenesV01,s=cv41.mL$lambda.min),predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') cScore$mod41.lasso<-computeCindex(mod41.mL,y,x,y.new,x.new) cScore$mod42.lasso<-computeCindex(mod42.mL,y,x,y.new,x.new) ifelse(is.character(score41.sw),mol.score<-studGenes01[,score41.sw],mol.score<-score41.sw$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.sw),mol.score.new<-studGenesV01[,score41.sw],mol.score.new<-as.matrix(studGenesV01[,names(score41.sw$rotation[,1])])%*%score41.sw$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') cScore$mod41.sw<-computeCindex(mod41.sw,y,x,y.new,x.new) if(!is.null(score42.sw)) { ifelse(is.character(score42.sw),mol.score<-studGenes01[,score42.sw],mol.score<-score42.sw$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.sw),mol.score.new<-studGenesV01[,score42.sw],mol.score.new<-as.matrix(studGenesV01[,names(score42.sw$rotation[,1])])%*%score42.sw$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') } cScore$mod42.sw<-computeCindex(mod42.sw,y,x,y.new,x.new) ifelse(is.character(score41.swtc),mol.score<-studGenes01[,score41.swtc],mol.score<-score41.swtc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.swtc),mol.score.new<-studGenesV01[,score41.swtc],mol.score.new<-as.matrix(studGenesV01[,names(score41.swtc$rotation[,1])])%*%score41.swtc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') cScore$mod41.swtc<-computeCindex(mod41.swtc,y,x,y.new,x.new) if(!is.null(score42.swtc)) { ifelse(is.character(score42.swtc),mol.score<-studGenes01[,score42.swtc],mol.score<-score42.swtc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.swtc),mol.score.new<-studGenesV01[,score42.swtc],mol.score.new<-as.matrix(studGenesV01[,names(score42.swtc$rotation[,1])])%*%score42.swtc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') } cScore$mod42.swtc<-computeCindex(mod42.swtc,y,x,y.new,x.new) save(ibs5,cScore,file='evalIndeces.Rdata') ################################### # evaluate models in training set # ################################### # ----------------------- # # prediction error curves # # ----------------------- # prec_t<-list() prec_t$modClin<-computePEC(mod.clin,y,clinDataDummies,y,clinDataDummies) # strategy 1 # # ---------- # prec_t$mod1.u<-computePEC(mod1.u,y,cbind(trainData.clin,trainData.genes),y,cbind(trainData.clin,trainData.genes)) prec_t$mod1.CB<-computePEC(mod1.CB,y,tdataDummies.01,y,tdataDummies.01) prec_t$mod1.mB<-computePEC(mod1.mB,y,tdataDummies.c,y,tdataDummies.c,cvPar=cv.mB) prec_t$mod1.lasso<-computePEC(mL,y,tdataDummies.01,y,tdataDummies.01,cvPar=cv.mL) prec_t$mod1.sw<-computePEC(mod1.sw,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes)) # strategy 2 # # ---------- # prec_t$mod2.u<-computePEC(mod2.u,y,cbind(trainData.clin,trainData.genes),y,cbind(trainData.clin,trainData.genes),offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=trainData.clin)) prec_t$mod2.tc<-computePEC(mod2.tc,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes)) prec_t$mod2.CB<-computePEC(mod2.CB,y,studGenes01,y,studGenes01,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=trainData.clin)) prec_t$mod2.mB<-computePEC(mod2.mB,y,centrGenes,y,centrGenes,cvPar=cv2.mB,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=trainData.clin)) ifelse(sum(mL2$beta[,which(mL2$lambda==cv2.mL$lambda.min)])>0, prec_t$mod2.lasso<-computePEC(mL2,y,studGenes01,y,studGenes01,cvPar=cv2.mL,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=trainData.clin)), prec_t$mod2.lasso<-prec_t$modClin) prec_t$mod2.sw<-computePEC(mod2.sw,y,cbind(trainData.clin,trainData.genes),y,cbind(trainData.clin,trainData.genes),offset.new=predict(mod.clin,newdata=trainData.clin)) prec_t$mod2.swtc<-computePEC(mod2.swtc,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes)) # strategy 3 # # ---------- # prec_t$mod3.u<-computePEC(mod3.u,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes)) prec_t$mod3.CB<-computePEC(mod3.CB,y,tdataDummies.01,y,tdataDummies.01) prec_t$mod3.lasso<-computePEC(mL3,y,tdataDummies.01,y,tdataDummies.01,cvPar=cv3.mL) prec_t$mod3.sw<-computePEC(mod3.sw,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes)) # strategy 4 # # ---------- # ifelse(is.character(score41.u),mol.score<-studGenes01[,score41.u],mol.score<-score41.u$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.u),mol.score.new<-studGenes01[,score41.u],mol.score.new<-as.matrix(studGenes01[,names(score41.u$rotation[,1])])%*%score41.u$rotation[,1]) x.new<-cbind(clinDataDummies,mol.score.new,predict(mod.clin,newdata=trainData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') prec_t$mod41.u<-computePEC(mod41.u,y,x,y,x.new) if(!is.null(score42.u)) { ifelse(is.character(score42.u),mol.score<-studGenes01[,score42.u],mol.score<-score42.u$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.u),mol.score.new<-studGenes01[,score42.u],mol.score.new<-as.matrix(studGenes01[,names(score42.u$rotation[,1])])%*%score42.u$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') } prec_t$mod42.u<-computePEC(mod42.u,y,x,y,x.new) ifelse(is.character(score41.tc),mol.score<-studGenes01[,score41.tc],mol.score<-score41.tc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.tc),mol.score.new<-studGenes01[,score41.tc],mol.score.new<-as.matrix(studGenes01[,names(score41.tc$rotation[,1])])%*%score41.tc$rotation[,1]) x.new<-cbind(clinDataDummies,mol.score.new,predict(mod.clin,newdata=trainData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') prec_t$mod41.tc<-computePEC(mod41.tc,y,x,y,x.new) if(!is.null(score42.tc)) { ifelse(is.character(score42.tc),mol.score<-studGenes01[,score42.tc],mol.score<-score42.tc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.tc),mol.score.new<-studGenes01[,score42.tc],mol.score.new<-as.matrix(studGenes01[,names(score42.tc$rotation[,1])])%*%score42.tc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') } prec_t$mod42.tc<-computePEC(mod42.tc,y,x,y,x.new) x<-cbind(clinDataDummies,mod4.CB$linear.predictor[mod4.CB$stepno+1,],mod.clin$linear.predictors) x.new<-cbind(clinDataDummies,t(predict(mod4.CB,newdata=studGenes01)),predict(mod.clin,newdata=trainData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') prec_t$mod41.CB<-computePEC(mod41.CB,y,x,y,x.new) prec_t$mod42.CB<-computePEC(mod42.CB,y,x,y,x.new) x<-cbind(clinDataDummies,fitted(mod4.mB[mstop(cv4.mB)]),mod.clin$linear.predictors) x.new<-cbind(clinDataDummies,predict(mod4.mB[mstop(cv4.mB)],newdata=centrGenes),predict(mod.clin,newdata=trainData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') prec_t$mod41.mB<-computePEC(mod41.mB,y,x,y,x.new) prec_t$mod42.mB<-computePEC(mod42.mB,y,x,y,x.new) x<-cbind(clinDataDummies,predict(mL41,newx=studGenes01,s=cv41.mL$lambda.min),mod.clin$linear.predictors) x.new<-cbind(clinDataDummies,predict(mL41,newx=studGenes01,s=cv41.mL$lambda.min),predict(mod.clin,newdata=trainData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') prec_t$mod41.lasso<-computePEC(mod41.mL,y,x,y,x.new) prec_t$mod42.lasso<-computePEC(mod42.mL,y,x,y,x.new) ifelse(is.character(score41.sw),mol.score<-studGenes01[,score41.sw],mol.score<-score41.sw$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.sw),mol.score.new<-studGenesV01[,score41.sw],mol.score.new<-as.matrix(studGenesV01[,names(score41.sw$rotation[,1])])%*%score41.sw$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') prec_t$mod41.sw<-computePEC(mod41.sw,y,x,y,x.new) if(!is.null(score42.sw)) { ifelse(is.character(score42.sw),mol.score<-studGenes01[,score42.sw],mol.score<-score42.sw$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.sw),mol.score.new<-studGenes01[,score42.sw],mol.score.new<-as.matrix(studGenes01[,names(score42.sw$rotation[,1])])%*%score42.sw$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') } prec_t$mod42.sw<-computePEC(mod42.sw,y,x,y,x.new) ifelse(is.character(score41.swtc),mol.score<-studGenes01[,score41.swtc],mol.score<-score41.swtc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.swtc),mol.score.new<-studGenes01[,score41.swtc],mol.score.new<-as.matrix(studGenes01[,names(score41.swtc$rotation[,1])])%*%score41.swtc$rotation[,1]) x.new<-cbind(clinDataDummies,mol.score.new,predict(mod.clin,newdata=trainData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') prec_t$mod41.swtc<-computePEC(mod41.swtc,y,x,y,x.new) if(!is.null(score42.swtc)) { ifelse(is.character(score42.swtc),mol.score<-studGenes01[,score42.swtc],mol.score<-score42.swtc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.swtc),mol.score.new<-studGenes01[,score42.swtc],mol.score.new<-as.matrix(studGenes01[,names(score42.swtc$rotation[,1])])%*%score42.swtc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') } prec_t$mod42.swtc<-computePEC(mod42.swtc,y,x,y,x.new) # ---------------------- # # Integrated Brier score # # ---------------------- # ibs5_t<-lapply(prec_t,ibs,times=5) # ----------------- # # concordance-index # # ----------------- # cScore_t<-list() cScore_t$mod.clin<-computeCindex(mod.clin,y,clinDataDummies,y,clinDataDummies) # strategy 1 # # ---------- # cScore_t$mod1.u<-computeCindex(mod1.u,y,cbind(trainData.clin,trainData.genes),y,cbind(trainData.clin,trainData.genes)) cScore_t$mod1.cb<-computeCindex(mod1.CB,y,tdataDummies.01,y,tdataDummies.01) cScore_t$mod1.mb<-computeCindex(mod1.mB,y,tdataDummies.c,y,tdataDummies.c,cvPar=cv.mB) cScore_t$mod1.lasso<-computeCindex(mL,y,tdataDummies.01,y,tdataDummies.01,cvPar=cv.mL) cScore_t$mod1.sw<-computeCindex(mod1.sw,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes)) # strategy 2 # # ---------- # cScore_t$mod2.u<-computeCindex(mod2.u,y,cbind(trainData.clin,trainData.genes),y,cbind(trainData.clin,trainData.genes),offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=trainData.clin)) cScore_t$mod2.tc<-computeCindex(mod2.tc,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes)) cScore_t$mod2.CB<-computeCindex(mod2.CB,y,studGenes01,y,studGenes01,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=as.data.frame(trainData.clin))) cScore_t$mod2.mB<-computeCindex(mod2.mB,y,centrGenes,y,centrGenes,cvPar=cv2.mB,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=as.data.frame(trainData.clin))) ifelse(sum(mL2$beta[,which(mL2$lambda==cv2.mL$lambda.min)])>0, cScore_t$mod2.lasso<-computeCindex(mL2,y,studGenes01,y,studGenes01,cvPar=cv2.mL,offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=as.data.frame(trainData.clin))), cScore_t$mod2.lasso<-cScore_t$mod.clin) cScore_t$mod2.sw<-computeCindex(mod2.sw,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),offset=mod.clin$linear.predictors,offset.new=predict(mod.clin,newdata=as.data.frame(trainData.clin))) cScore_t$mod2.swtc<-computeCindex(mod2.swtc,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes)) # strategy 3 # # ---------- # cScore_t$mod3.u<-computeCindex(mod3.u,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes)) cScore_t$mod3.CB<-computeCindex(mod3.CB,y,tdataDummies.01,y,tdataDummies.01) cScore_t$mod3.lasso<-computeCindex(mL3,y,tdataDummies.01,y,tdataDummies.01,cvPar=cv3.mL) cScore_t$mod3.sw<-computeCindex(mod3.sw,y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes),y,cbind(trainData.clin[,c(1,2)],clinDataDummies,trainData.genes)) # strategy 4 # # ---------- # ifelse(is.character(score41.u),mol.score<-studGenes01[,score41.u],mol.score<-score41.u$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.u),mol.score.new<-studGenes01[,score41.u],mol.score.new<-as.matrix(studGenes01[,names(score41.u$rotation[,1])])%*%score41.u$rotation[,1]) x.new<-cbind(clinDataDummies,mol.score.new,predict(mod.clin,newdata=trainData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') cScore_t$mod41.u<-computeCindex(mod41.u,y,x,y,x.new) if(!is.null(score42.u)) { ifelse(is.character(score42.u),mol.score<-studGenes01[,score42.u],mol.score<-score42.u$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.u),mol.score.new<-studGenes01[,score42.u],mol.score.new<-as.matrix(studGenes01[,names(score42.u$rotation[,1])])%*%score42.u$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') } cScore_t$mod42.u<-computeCindex(mod42.u,y,x,y,x.new) ifelse(is.character(score41.tc),mol.score<-studGenes01[,score41.tc],mol.score<-score41.tc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.tc),mol.score.new<-studGenes01[,score41.tc],mol.score.new<-as.matrix(studGenes01[,names(score41.tc$rotation[,1])])%*%score41.tc$rotation[,1]) x.new<-cbind(clinDataDummies,mol.score.new,predict(mod.clin,newdata=trainData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') cScore_t$mod41.tc<-computeCindex(mod41.tc,y,x,y,x.new) if(!is.null(score42.tc)) { ifelse(is.character(score42.tc),mol.score<-studGenes01[,score42.tc],mol.score<-score42.tc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.tc),mol.score.new<-studGenes01[,score42.tc],mol.score.new<-as.matrix(studGenes01[,names(score42.tc$rotation[,1])])%*%score42.tc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') } cScore_t$mod42.tc<-computeCindex(mod42.tc,y,x,y,x.new) x<-cbind(clinDataDummies,mod4.CB$linear.predictor[mod4.CB$stepno+1,],mod.clin$linear.predictors) x.new<-cbind(clinDataDummies,t(predict(mod4.CB,newdata=studGenes01)),predict(mod.clin,newdata=trainData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') cScore_t$mod41.CB<-computeCindex(mod41.CB,y,x,y,x.new) cScore_t$mod42.CB<-computeCindex(mod42.CB,y,x,y,x.new) x<-cbind(clinDataDummies,fitted(mod4.mB[mstop(cv4.mB)]),mod.clin$linear.predictors) x.new<-cbind(clinDataDummies,predict(mod4.mB[mstop(cv4.mB)],newdata=centrGenes),predict(mod.clin,newdata=trainData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') cScore_t$mod41.mB<-computeCindex(mod41.mB,y,x,y,x.new) cScore_t$mod42.mB<-computeCindex(mod42.mB,y,x,y,x.new) x<-cbind(clinDataDummies,predict(mL41,newx=studGenes01,s=cv41.mL$lambda.min),mod.clin$linear.predictors) x.new<-cbind(clinDataDummies,predict(mL41,newx=studGenes01,s=cv41.mL$lambda.min),predict(mod.clin,newdata=trainData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') cScore_t$mod41.lasso<-computeCindex(mod41.mL,y,x,y,x.new) cScore_t$mod42.lasso<-computeCindex(mod42.mL,y,x,y,x.new) ifelse(is.character(score41.sw),mol.score<-studGenes01[,score41.sw],mol.score<-score41.sw$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.sw),mol.score.new<-studGenes01[,score41.sw],mol.score.new<-as.matrix(studGenes01[,names(score41.sw$rotation[,1])])%*%score41.sw$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') cScore_t$mod41.sw<-computeCindex(mod41.sw,y,x,y,x.new) if(!is.null(score42.sw)) { ifelse(is.character(score42.sw),mol.score<-studGenes01[,score42.sw],mol.score<-score42.sw$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.sw),mol.score.new<-studGenes01[,score42.sw],mol.score.new<-as.matrix(studGenes01[,names(score42.sw$rotation[,1])])%*%score42.sw$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') } cScore_t$mod42.sw<-computeCindex(mod42.sw,y,x,y,x.new) ifelse(is.character(score41.swtc),mol.score<-studGenes01[,score41.swtc],mol.score<-score41.swtc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score41.swtc),mol.score.new<-studGenes01[,score41.swtc],mol.score.new<-as.matrix(studGenes01[,names(score41.swtc$rotation[,1])])%*%score41.swtc$rotation[,1]) x.new<-cbind(clinDataDummies,mol.score.new,predict(mod.clin,newdata=trainData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') cScore_t$mod41.swtc<-computeCindex(mod41.swtc,y,x,y,x.new) if(!is.null(score42.swtc)) { ifelse(is.character(score42.swtc),mol.score<-studGenes01[,score42.swtc],mol.score<-score42.swtc$x[,1]) x<-cbind(clinDataDummies,mol.score,mod.clin$linear.predictors) ifelse(is.character(score42.swtc),mol.score.new<-studGenes01[,score42.swtc],mol.score.new<-as.matrix(studGenes01[,names(score42.swtc$rotation[,1])])%*%score42.swtc$rotation[,1]) x.new<-cbind(clinDataDummiesV,mol.score.new,predict(mod.clin,newdata=validData.clin)) names(x)<-names(x.new)<-c(names(clinDataDummies),'mol.score','score.clin') } cScore_t$mod42.swtc<-computeCindex(mod42.swtc,y,x,y,x.new) save(ibs5_t,cScore_t,file='evalIndeces_train.Rdata') ####################################### # produce the tables and the graphics # ####################################### library(xtable) # -------------- # # clinical model # # -------------- # sink('table2.txt') summary(mod.clin) sink() # ---- # # ibs5 # # ---- # out<-matrix(NA,7,5) rownames(out)<-c('univariate selection','forward selection','lasso','offset boosting','gradient boosting','univariate sel. with adj.','forward sel. with adj.') colnames(out)<-c('naive','residuals','favoring','mol. score','scores') null<-ibs5$mod1.u[1] clin<-ibs5$modClin[2] out[1,1]<-ibs5$mod1.u[2] out[4,1]<-ibs5$mod1.CB[2] out[5,1]<-ibs5$mod1.mB[2] out[3,1]<-ibs5$mod1.lasso[2] out[2,1]<-ibs5$mod1.sw[2] out[1,2]<-ibs5$mod2.u[2] out[6,2]<-ibs5$mod2.tc[2] out[4,2]<-ibs5$mod2.CB[2] out[5,2]<-ibs5$mod2.mB[2] out[3,2]<-ibs5$mod2.lasso[2] out[2,2]<-ibs5$mod2.sw[2] out[7,2]<-ibs5$mod2.swtc[2] out[1,3]<-ibs5$mod3.u[2] out[4,3]<-ibs5$mod3.CB[2] out[3,3]<-ibs5$mod3.lasso[2] out[2,3]<-ibs5$mod3.sw[2] out[1,4]<-ibs5$mod41.u[2] out[6,4]<-ibs5$mod41.tc[2] out[4,4]<-ibs5$mod41.CB[2] out[5,4]<-ibs5$mod41.mB[2] out[3,4]<-ibs5$mod41.lasso[2] out[2,4]<-ibs5$mod41.sw[2] out[7,4]<-ibs5$mod41.swtc[2] out[1,5]<-ibs5$mod42.u[2] out[6,5]<-ibs5$mod42.tc[2] out[4,5]<-ibs5$mod42.CB[2] out[5,5]<-ibs5$mod42.mB[2] out[3,5]<-ibs5$mod42.lasso[2] out[2,5]<-ibs5$mod42.sw[2] out[7,5]<-ibs5$mod42.swtc[2] pdf('plotIBS5_breast.pdf') barplot(out,beside=T,ylim=c(0,0.20),legend.text=T,ylab='integrated Brier score',args.legend=c(x=12,y=0.20,cex=0.7),main='validation set') abline(h=ibs5$modClin[2],lty=2) dev.off() # ------- # # C-index # # ------- # out<-matrix(NA,7,5) rownames(out)<-c('univariate selection','forward selection','lasso','offset boosting','gradient boosting','univariate sel. with adj.','forward sel. with adj.') colnames(out)<-c('naive','residuals','favoring','mol. score','scores') clin<-cScore$mod.clin out[1,1]<-cScore$mod1.u out[4,1]<-cScore$mod1.cb out[5,1]<-cScore$mod1.mb out[3,1]<-cScore$mod1.lasso out[2,1]<-cScore$mod1.sw out[1,2]<-cScore$mod2.u out[6,2]<-cScore$mod2.tc out[4,2]<-cScore$mod2.CB out[5,2]<-cScore$mod2.mB out[3,2]<-cScore$mod2.lasso out[2,2]<-cScore$mod2.sw out[7,2]<-cScore$mod2.swtc out[1,3]<-cScore$mod3.u out[4,3]<-cScore$mod3.CB out[3,3]<-cScore$mod3.lasso out[2,3]<-cScore$mod3.sw out[1,4]<-cScore$mod41.u out[6,4]<-cScore$mod41.tc out[4,4]<-cScore$mod41.CB out[5,4]<-cScore$mod41.mB out[3,4]<-cScore$mod41.lasso out[2,4]<-cScore$mod41.sw out[7,4]<-cScore$mod41.swtc out[1,5]<-cScore$mod42.u out[6,5]<-cScore$mod42.tc out[4,5]<-cScore$mod42.CB out[5,5]<-cScore$mod42.mB out[3,5]<-cScore$mod42.lasso out[2,5]<-cScore$mod42.sw out[7,5]<-cScore$mod42.swtc pdf('plotCindex_breast.pdf') barplot(out,beside=T,ylim=c(0,1),legend.text=T,ylab='C-index',args.legend=c(x=12,y=1,cex=0.7),main='validation set') abline(h=cScore$mod.clin,lty=2) dev.off() # -------------------- # # number of covariates # # -------------------- # out<-matrix(NA,7,5) rownames(out)<-c('univariate selection','forward selection','lasso','offset boosting','gradient boosting','univariate sel. with adj.','forward sel. with adj.') colnames(out)<-c('naive','residuals','favoring','mol. score','scores') clin<-c(6,0) out[1,1]<-NA # check manually with summary(mod1.u) out[3,1]<-NA # check manually with mod1.CB$xnames[mod1.CB$coefficients[mod1.CB$stepno+1,]!=0] out[4,1]<-NA # check manually with coef(mod1.mB[mstop(cv.mB)]) out[5,1]<-NA # check manually with mL$beta[mL$beta[,mL$lambda==cv.mL$lambda.min]!=0,mL$lambda==cv.mL$lambda.min] out[6,1]<-NA # check manually with summary(mod1.sw) out[1,2]<-c(6,nGenesCV$ur2) out[2,2]<-c(6,nGenesCV$tcr2) out[3,2]<-c(6,length(mod2.CB$xnames[mod2.CB$coefficients[mod2.CB$stepno+1,]!=0])) out[4,2]<-c(6,length(coef(mod2.mB[mstop(cv2.mB)]))) out[5,2]<-c(6,length(mL2$beta[mL2$beta[,mL2$lambda==cv2.mL$lambda.min]!=0,mL2$lambda==cv2.mL$lambda.min])) out[6,2]<-c(6,nGenesCV$sw2) out[7,2]<-c(6,nGenesCV$swtc2) out[1,3]<-nGenesCV$ur3 out[3,3]<-c(6,NA) # check manually with mod3.CB$xnames[mod3.CB$coefficients[mod3.CB$stepno+1,]!=0] out[5,3]<-c(6,NA) # check manually with mL3$beta[mL3$beta[,mL3$lambda==cv3.mL$lambda.min]!=0,mL3$lambda==cv3.mL$lambda.min] out[6,3]<-nGenesCV$ur3 out[1,4]<-c(6,nGenesCV$ur4.1) out[2,4]<-c(6,nGenesCV$tcr4.1) out[3,4]<-c(6,length(mod4.CB$xnames[mod4.CB$coefficients[mod4.CB$stepno+1,]!=0])) out[4,4]<-c(6,length(coef(mod4.mB[mstop(cv4.mB)]))) out[5,4]<-c(6,length(mL41$beta[mL41$beta[,mL41$lambda==cv41.mL$lambda.min]!=0,mL41$lambda==cv41.mL$lambda.min])) out[6,4]<-c(6,nGenesCV$sw4.1) out[7,4]<-c(6,nGenesCV$swtc41) out[1,5]<-c(6,nGenesCV$ur4.2) out[2,5]<-c(6,nGenesCV$tcr4.2) out[3,5]<-c(6,length(mod4.CB$xnames[mod4.CB$coefficients[mod4.CB$stepno+1,]!=0])) out[4,5]<-c(6,length(coef(mod4.mB[mstop(cv4.mB)]))) out[5,5]<-c(6,length(mL41$beta[mL41$beta[,mL41$lambda==cv41.mL$lambda.min]!=0,mL41$lambda==cv41.mL$lambda.min])) out[6,5]<-c(6,nGenesCV$sw4.2) out[7,5]<-c(6,nGenesCV$swtc42) sink('table3.txt') xtable(out) sink() # ------------------- # # ibs5 (training set) # # ------------------- # out<-matrix(NA,7,5) rownames(out)<-c('univariate selection','forward selection','lasso','offset boosting','gradient boosting','univariate sel. with adj.','forward sel. with adj.') colnames(out)<-c('naive','residuals','favoring','mol. score','scores') null<-ibs5_t$mod1.u[1] clin<-ibs5_t$modClin[2] out[1,1]<-ibs5_t$mod1.u[2] out[4,1]<-ibs5_t$mod1.CB[2] out[5,1]<-ibs5_t$mod1.mB[2] out[3,1]<-ibs5_t$mod1.lasso[2] out[2,1]<-ibs5_t$mod1.sw[2] out[1,2]<-ibs5_t$mod2.u[2] out[6,2]<-ibs5_t$mod2.tc[2] out[4,2]<-ibs5_t$mod2.CB[2] out[5,2]<-ibs5_t$mod2.mB[2] out[3,2]<-ibs5_t$mod2.lasso[2] out[2,2]<-ibs5_t$mod2.sw[2] out[7,2]<-ibs5_t$mod2.swtc[2] out[1,3]<-ibs5_t$mod3.u[2] out[4,3]<-ibs5_t$mod3.CB[2] out[3,3]<-ibs5_t$mod3.lasso[2] out[2,3]<-ibs5_t$mod3.sw[2] out[1,4]<-ibs5_t$mod41.u[2] out[6,4]<-ibs5_t$mod41.tc[2] out[4,4]<-ibs5_t$mod41.CB[2] out[5,4]<-ibs5_t$mod41.mB[2] out[3,4]<-ibs5_t$mod41.lasso[2] out[2,4]<-ibs5_t$mod41.sw[2] out[7,4]<-ibs5_t$mod41.swtc[2] out[1,5]<-ibs5_t$mod42.u[2] out[6,5]<-ibs5_t$mod42.tc[2] out[4,5]<-ibs5_t$mod42.CB[2] out[5,5]<-ibs5_t$mod42.mB[2] out[3,5]<-ibs5_t$mod42.lasso[2] out[2,5]<-ibs5_t$mod42.sw[2] out[7,5]<-ibs5_t$mod42.swtc[2] pdf('plotIBS5_train.pdf') barplot(out,beside=T,ylim=c(0,0.20),legend.text=T,ylab='integrated Brier score',args.legend=c(x=12,y=0.20,cex=0.7),main='training set') abline(h=ibs5_t$modClin[2],lty=2) dev.off() # ------- # # C-index # # ------- # out<-matrix(NA,7,5) rownames(out)<-c('univariate selection','forward selection','lasso','offset boosting','gradient boosting','univariate sel. with adj.','forward sel. with adj.') colnames(out)<-c('naive','residuals','favoring','mol. score','scores') clin<-cScore_t$mod.clin out[1,1]<-cScore_t$mod1.u out[4,1]<-cScore_t$mod1.cb out[5,1]<-cScore_t$mod1.mb out[3,1]<-cScore_t$mod1.lasso out[2,1]<-cScore_t$mod1.sw out[1,2]<-cScore_t$mod2.u out[6,2]<-cScore_t$mod2.tc out[4,2]<-cScore_t$mod2.CB out[5,2]<-cScore_t$mod2.mB out[3,2]<-cScore_t$mod2.lasso out[2,2]<-cScore_t$mod2.sw out[7,2]<-cScore_t$mod2.swtc out[1,3]<-cScore_t$mod3.u out[4,3]<-cScore_t$mod3.CB out[3,3]<-cScore_t$mod3.lasso out[2,3]<-cScore_t$mod3.sw out[1,4]<-cScore_t$mod41.u out[6,4]<-cScore_t$mod41.tc out[4,4]<-cScore_t$mod41.CB out[5,4]<-cScore_t$mod41.mB out[3,4]<-cScore_t$mod41.lasso out[2,4]<-cScore_t$mod41.sw out[7,4]<-cScore_t$mod41.swtc out[1,5]<-cScore_t$mod42.u out[6,5]<-cScore_t$mod42.tc out[4,5]<-cScore_t$mod42.CB out[5,5]<-cScore_t$mod42.mB out[3,5]<-cScore_t$mod42.lasso out[2,5]<-cScore_t$mod42.sw out[7,5]<-cScore_t$mod42.swtc pdf('plotCindex_train.pdf') barplot(out,beside=T,ylim=c(0,1),legend.text=T,ylab='C-index',args.legend=c(x=24,y=1.02,cex=0.7),main='training set') abline(h=cScore_t$mod.clin,lty=2) dev.off()