######################################################################################
#                                                                                    #
#              Test for significant differences in performance                       #
#                                                                                    #
######################################################################################

# IMPORTANT: Set working directory where AUC_VIM folder is stored
setwd("AUC_VIM/Comparison_Studies/Simulated_Data/Comparison_AUC_ER/")

# Load computed VIs
load("R_Objects/list100.Rda")
load("R_Objects/list500.Rda")
load("R_Objects/list1000.Rda")

# Source the function used to get AUCs and difference in AUCs
source("../Functions/get_AUC.r")
source("../Functions/get_AUC_difference.r")

# Get difference distributions for all sample sizes
diff100  <- get_AUC_difference(liste = list100,  effect = "all")
diff500  <- get_AUC_difference(liste = list500,  effect = "all")
diff1000 <- get_AUC_difference(liste = list1000, effect = "all")

# Get p values for H0: no difference in AUCs
p_values_100 <-  sapply(diff100,  function(x) round(t.test(x)$p.value, digits = 4))
p_values_500 <-  sapply(diff500,  function(x) round(t.test(x)$p.value, digits = 4))
p_values_1000 <- sapply(diff1000, function(x) round(t.test(x)$p.value, digits = 4))


# Plot distributions for AUC differences
# (positive difference corresponds to a better performance of AUC-based VIM)

graphics.off()

pdf(file = "Results/Performance_differences_p_values.pdf", width = 15, height = 5)

par(mfrow = c(1,3), cex.main = 2, cex.axis = 1.6, cex.lab = 1.6, mar = c(5,5,4,2))

labelnames <- c("50%", "40%", "30%", "20%", "10%", "5%", "1%")

####################
# Sample size  100 #
####################

boxplot(diff100[[1]], diff100[[2]], diff100[[3]], diff100[[4]], diff100[[5]], diff100[[6]],
  xlab  = "Class Imbalance Level",
  ylab  = "AUC(AUC_VI) - AUC(ER_VI)",
  main  = "n = 100",
  names = labelnames[1:length(diff100)],
  ylim  = c(-0.1, 0.5))

abline(h = 0, col = "grey", lty = 2)
text(1:length(diff100), 0.48, p_values_100)
text(1, 0.5, "P values")


####################
# Sample size  500 #
####################

boxplot(diff500[[1]], diff500[[2]], diff500[[3]], diff500[[4]], diff500[[5]], diff500[[6]], diff500[[7]],
  xlab  = "Class Imbalance Level",
  ylab  = "AUC(AUC_VI) - AUC(ER_VI)",
  names = labelnames[1:length(diff500)],
  main  = "n = 500",
  ylim  = c(-0.1, 0.5))
  
abline(h = 0, col = "grey", lty = 2)
text(1:length(diff500), 0.48, p_values_500)
text(1, 0.5, "P values")


#####################
# Sample size  1000 #
#####################

boxplot(diff1000[[1]], diff1000[[2]], diff1000[[3]], diff1000[[4]], diff1000[[5]], diff1000[[6]], diff1000[[7]],
  xlab  = "Class Imbalance Level",
  ylab  = "AUC(AUC_VI) - AUC(ER_VI)",
  names = labelnames[1:length(diff1000)],
  main  = "n = 1000",
  ylim  = c(-0.1, 0.5))
  
abline(h = 0, col = "grey", lty = 2)
text(1:length(diff1000), 0.48, p_values_1000)
text(1, 0.5, "P values")

graphics.off()