######################################################################################
#                                                                                    #
#                 Test for significant differences in performance                    #  
#                                                                                    #
######################################################################################

# IMPORTANT: Set working directory where AUC_VIM folder is stored
setwd("AUC_VIM/Comparison_Studies/Real_Data/")

# Load computed VIs
load("R_Objects/imb_1.Rda")
load("R_Objects/imb_5.Rda")
load("R_Objects/imb_10.Rda")
load("R_Objects/imb_15.Rda")
load("R_Objects/imb_20.Rda")
load("R_Objects/imb_25.Rda")
load("R_Objects/imb_30.Rda")
load("R_Objects/imb_35.Rda")
load("R_Objects/imb_40.Rda")
load("R_Objects/imb_45.Rda")
load("R_Objects/imb_50.Rda")

# Source the function used to compute the AUCs
source("Functions/calc_AUC.r")

setwd("Results/")


##################################################################
# Get AUC values for all imbalance objects                       #
##################################################################

for (i in c(1, 5*(1:10))){
  assign(paste("AUC_", i, "_identified", sep =""),
         calc_AUC(object = get(paste("imb_", i, sep = "")))
  )
}

# Combine results from different class imbalances to one list object:

liste <- list(AUC_50 = AUC_50_identified,
              AUC_45 = AUC_45_identified,
              AUC_40 = AUC_40_identified,
              AUC_35 = AUC_35_identified,
              AUC_30 = AUC_30_identified,
              AUC_25 = AUC_25_identified,
              AUC_20 = AUC_20_identified,
              AUC_15 = AUC_15_identified,
              AUC_10 = AUC_10_identified,
              AUC_5  = AUC_5_identified,
              AUC_1  = AUC_1_identified)


##################################################################
# Produce table with mean AUC-values                             #
##################################################################

AUC_table <- data.frame(ImbalanceLevel = as.factor(substr(names(liste), start = 5, stop = 6)),
                        AUC   = sapply(liste, function(x) mean(x$AUC_AUC_VI)),
                        ER    = sapply(liste, function(x) mean(x$AUC_ER_VI)),
                        Ratio = sapply(liste, function(x) mean(x$AUC_AUC_VI)) /
                          sapply(liste, function(x) mean(x$AUC_ER_VI)))

rownames(AUC_table) <- NULL

AUC_table_round      <- AUC_table
AUC_table_round[,-1] <- apply(AUC_table[,-1], 2, function(x) round(x, digits = 2))


# View mean AUC-values (averaged over 100 iterations)

AUC_table_round



##################################################################
# Plot differences in AUCs and p values                          #
##################################################################

graphics.off()

pdf(file = "Performance_differences_p_values.pdf", width = 11, height = 9)

par(cex.main = 2, cex.axis = 1.6, cex.lab = 1.6, mar = c(5,5,4,2))


# Plot differences in AUCs

boxplot(lapply(liste, function(x) x$AUC_AUC_VI - x$AUC_ER_VI), 
        main  = "Difference in AUCs",
        names = paste(rev(c(1, 5*(1:10))), "%", sep = ""), 
        xlab  = "Class Imbalance Level", 
        ylab  = "AUC(AUC_VI) - AUC(ER_VI)",
        ylim = c(-0.3, 0.35))

abline(h = 0, lty = 2)

p_values <- lapply(liste, function(x) 
                          round(t.test(x$AUC_AUC_VI - x$AUC_ER_VI)$p.value, digits = 4))

text(1:11, 0.33, p_values)
text(1, 0.35, "P values")

graphics.off()
