# Load the dataset 'AlcoholismTranscr'.
# NOTE: The Rda-file loaded here is NOT included in the electronic appendix. To obtain it,
# one has to run the R-script 'AlcoholismTranscr_preparationinfos.R' found in the folder
# 'FAbatchPaper/Datasets/PreparationScripts'.

load("./FAbatchPaper/Datasets/ProcessedData/AlcoholismTranscr.Rda")



# Apply FAbatch using three factors, ComBat and SVA using three factors
# to the dataset:

library("bapred")

set.seed(1234)
Xfabatch <- ba(x=X, y=y, batch=batch, method = "fabatch", nbf=3)$xadj
Xcom <- ba(x=X, y=y, batch=batch, method = "combat")$xadj
Xsva <- ba(x=X, y=y, batch=batch, method = "sva", nbf=3)$xadj




# First calculate principal components using the dataset before batch effect 
# adjustment and after applying the individual methods:

library("ggplot2")

methods <- c("Without batch effect adjustment", "ComBat", "SVA",
  "FAbatch")

methodpcs <- c()
pcs1 <- c()
pcs2 <- c()
batchpcs <- c()
ypcs <- c()

Xlist <- list(X, Xcom, Xsva, Xfabatch)

for(i in 1:4) {

  xpr <- prcomp(Xlist[[i]], scale. = FALSE)
  PCs <- predict(xpr)[,1:2]

  methodpcs <- c(methodpcs, rep(methods[i], nrow(PCs)))
  pcs1 <- c(pcs1, PCs[,1])
  pcs2 <- c(pcs2, PCs[,2])
  batchpcs <- c(batchpcs, as.numeric(batch))
  ypcs <- c(ypcs, as.numeric(y))

}

resultsPC <- data.frame(method=methodpcs, batch=batchpcs, 
  y=ypcs, pc1=pcs1, pc2=pcs2)

resultsPC$method <- factor(resultsPC$method, levels=c("Without batch effect adjustment", 
  "ComBat", "SVA", "FAbatch"))


# Now generate a ggplot for each of the above and store them in a list:

library("MASS")

plist <- list()

for(i in seq(along=levels(resultsPC$method))) {

resultsPCtemp <- resultsPC[resultsPC$method==levels(resultsPC$method)[i],]
  
batches <- unique(resultsPCtemp$batch)

xall <- yall <- zall <- batchall <- c()

for(j in seq(along=batches)) {

pc1temp <- resultsPCtemp$pc1[resultsPCtemp$batch==batches[j]]
pc2temp <- resultsPCtemp$pc2[resultsPCtemp$batch==batches[j]]
dens2 <- kde2d(pc1temp, pc2temp, lims=c(min(pc1temp)-sd(pc1temp), max(pc1temp)+sd(pc1temp), min(pc2temp)-
  sd(pc2temp), max(pc2temp)+sd(pc2temp)))

densdf <- expand.grid(dens2$x, dens2$y)[,2:1]
densdf$z <- as.vector(dens2$z)

names(densdf) <- c("y", "x", "z")

xall <- c(xall, densdf$x)
yall <- c(yall, densdf$y)
zall <- c(zall, densdf$z)
batchall <- c(batchall, rep(batches[j], nrow(densdf)))

}

densdfall <- data.frame(x=xall, y=yall, z=zall, batch=batchall)

plist[[i]] <- ggplot(data=resultsPCtemp, aes(x=pc1, y=pc2)) + 
  scale_color_manual(values = c("black", "red")) +
  stat_contour(data=densdfall[densdfall$batch==1,], aes(x=x, y=y, z=z), size=1, colour="black", bins=5) + 
  stat_contour(data=densdfall[densdfall$batch==2,], aes(x=x, y=y, z=z), size=0.5, colour="red", bins=5) + 
  geom_text(aes(label=as.character(as.numeric(y)), colour=factor(batch), fontface=ifelse(batch==1, "bold", "plain")), size=4) + theme(legend.position="none", axis.text=element_text(size=13),
    axis.title=element_text(size=13), plot.title = element_text(size = 13)) +
  labs(x="PC 1", y="PC 2") + ggtitle(levels(resultsPC$method)[i]) +
  geom_point(data=data.frame(x=sapply(batches, function(x) mean(resultsPCtemp$pc1[resultsPCtemp$batch==x])), 
    y=sapply(batches, function(x) mean(resultsPCtemp$pc2[resultsPCtemp$batch==x])), batch=batches), 
    aes(x=x, y=y), shape=18, size=5, colour="black") + 
  geom_point(data=data.frame(x=sapply(batches, function(x) mean(resultsPCtemp$pc1[resultsPCtemp$batch==x])), 
    y=sapply(batches, function(x) mean(resultsPCtemp$pc2[resultsPCtemp$batch==x])), batch=batches), 
    aes(x=x, y=y, colour=factor(batch)), shape=18, size=3.5)

}


# Combine plots:

library("gridExtra")
grobframe <- marrangeGrob(plist, ncol=2, nrow=2, top=NULL)

grobframe

ggsave <- ggplot2::ggsave; body(ggsave) <- body(ggplot2::ggsave)[-2]

ggsave(file="./FAbatchPaper/Results/Figure1.pdf", grobframe, width=7, height=7)
