##########################################
##########################################
## IMPORTANT: WE CONSIDER RISK MEASURES
##            IN THE RIGHT TAIL.
##            FOR THE LEFT TAIL, FLIP
##            THE RETURNS
##########################################
##########################################



###################
## Set up functions
###################
library(ggplot2)
library(ggpubr)
library(Rcpp)
library(RcppArmadillo)
library(forecast)
require(readxl)
require(writexl)
library(lubridate)
library(rugarch)

source('PZC_simulation_aid.R')
source('Integrated_EVT_filter.R')
source('TVGPD_filter.R')
sourceCpp('PZC_simulation.cpp')
sourceCpp('Integrated_EVT_filter.cpp')

# set seed for replicability
s_GLOBAL_RSEED = 1234
set.seed(s_GLOBAL_RSEED)

# set number of cores for GARCH fit
s_GLOBAL_NRCORES = 14

#####################################
## Set tail percentage for thresholds
## and extreme tail percentages for
## VaR and EL
#####################################
verbosity_level = 0
in_sample_only = FALSE
NZ_table_out = total_table = NULL
## IMPORTANT: the alpha loop should be inside the asset loop, as the GARCH 
##            is NOT re-estimated for different alphas
## IF YOU COMMENT 43, UNCOMMENT 44-47, and COMMENT 293
alphas = 0.1/c(1,2,4); for (asset_name in c("BTC", "ETH")) { NZ_table_out = NULL; GARCH_estimated = FALSE; for (alpha_tail in alphas) {alpha_extreme = alpha_tail / 10
# alphas = 0.1; for (asset_name in c("BTC")) { NZ_table_out = NULL; GARCH_estimated = FALSE; for (alpha_tail in alphas) {alpha_extreme = alpha_tail / 10
# alphas = alpha_tail = 0.025
# alpha_extreme = alpha_extremes = alphas / 10
# asset_name = c("BTC", "ETH")[1]
# NZ_table_out = NULL; GARCH_estimated = FALSE; 

###############################
## set model configurations PZC
## PZC = Patton/Ziegel/Chen
##       J.Econometrics
## IMPORTANT: PZC is orignally
##     for the left tail. We
##     recast it to the right
##     tail by implementing it
##     for the flipped 
##     empirical series
###############################
s_PZC_OPTIONS = list(
  diagonal_A = FALSE,        # boolean; TRUE: use diagonal A matrix 
  diagonal_B = TRUE,         # boolean; TRUE: use diagonal B matrix 
  ALPHA_EXTREME = alpha_extreme,  # double, tail percentage for the thresholds tau
  rw_specification = FALSE   # boolean; TRUE: use unit B matrix; overrides diagonal_B
)

###############################
## set model configurations EVT
###############################
s_EVT_OPTIONS = list(
  FIXED_OMEGA = FALSE,       # logical, if TRUE, do not estimate omega for tail dynamics, 
                             #          but fix to s_FIXED_OMEGA_VALUE
  FIXED_OMEGA_VALUE = 0,     # double, value of omega for tail dynamics if s_FIXED_OMEGA == TRUE
  BANDS_RAW = TRUE,          # logical, if FALSE base confidence bands on (omega, alpha),
                             #          otherwise base bands on free (theta1, theta2)
                             #          parameters and transform to (omega, alpha)
  BANDS_NRSIMS = 0,          # integer, do not change (quick fix of code)
  BANDS_NRSIMS_MAX = 1000,   # integer, number of simulations for the confidence 
                             #          bands (only for full sample)
  BANDS_PCT = 0.75,          # double, confidence level for the confidence bands
  TAU_TAIL_PCT = alpha_tail, # double, tail percentage for the thresholds tau
  ALPHA_EXTREME = alpha_extreme, # double, tail percentage for the thresholds tau
  EXTERNAL_TAU = NULL        # can be overwritten if external thresholds available
)


##################################
## set model configurations TVGPD
##################################
s_TVGPD_OPTIONS = list(
  TAU_TAIL_PCT = alpha_tail, # double, tail percentage for the thresholds tau
  ALPHA_EXTREME = alpha_extreme, # double, tail percentage for the thresholds tau
  EXTERNAL_TAU = NULL        # can be overwritten if external thresholds available
)


#############################
## Read data
#############################
if (asset_name %in% c("BTC", "ETH")) {
  ## CRYPTO-USD
  flip_left_to_right = TRUE
  my_data = read_empirical_data(paste0('../../Data/updated data/Bitfinex_', asset_name, 'USD_1h.csv'), 
                                price_column_name = "close", date_column_name = "date",
                                prices_are_already_returns = FALSE, reverse = TRUE, 
                                date_POSX_format = "%Y-%m-%d %H:%M:%S", tz = "UTC",
                                flip_return = flip_left_to_right)
  estimation_evaluation_dates = cbind(
    "2018-01-05", paste0(2020 + 1:5, "-12-31"),
    "2018-01-05", paste0(2021 + 1:5, "-12-31")
  )
  ## remove all days with less then x hours of trading data
  limit_hour = 24
  aid_idx = tapply(my_data$y, as.Date(my_data$dates), length)
  aid_idx = aid_idx[which(aid_idx < limit_hour)]
  my_data = my_data[ !(as.Date(my_data$dates) %in% as.Date(names(aid_idx))), ]
  print(paste0("Removed ", length(aid_idx), " days from ", asset_name, " due to incomplete day obs"))
  print(paste0("Another total of absent days from the raw data: ", sum(!((as.Date(as.Date(my_data$dates[1]):as.Date(tail(my_data$dates,1)))) %in% unique(as.Date(my_data$dates))))))
  print(paste0("Total of missing days: ", length(aid_idx) + sum(!((as.Date(as.Date(my_data$dates[1]):as.Date(tail(my_data$dates,1)))) %in% unique(as.Date(my_data$dates))))))
} else {
  error(paste0("ERROR: WRONG ASSET NAME ", asset_name))
}
if (in_sample_only) {
  estimation_evaluation_dates = matrix(
    estimation_evaluation_dates[nrow(estimation_evaluation_dates), ],
    nrow = 1)
  my_data_oos = NULL
}


###############################################
## loop over the different estimation and
## evaluation periods
###############################################
initial_oos_run = TRUE
for (sample_count in 1:nrow(estimation_evaluation_dates)) {

  ###############################################
  ## set the sample
  ###############################################
  in_sample_idx = get_index_from_date_vector(estimation_evaluation_dates[sample_count, 1], my_data$dates, start = TRUE) :
    get_index_from_date_vector(estimation_evaluation_dates[sample_count, 2], my_data$dates, start = FALSE)
  out_of_sample_idx = get_index_from_date_vector(estimation_evaluation_dates[sample_count, 3], my_data$dates, start = TRUE) :
    get_index_from_date_vector(estimation_evaluation_dates[sample_count, 4], my_data$dates, start = FALSE)
  if (verbosity_level > -1) print(paste0(
    "(", asset_name, ", ", alpha_extreme, "): ",
    "In-sample: ", paste0(as.Date(my_data$dates[range(in_sample_idx)]), collapse = "/"),
    ";  Out-of-sample: ", paste0(as.Date(my_data$dates[range(out_of_sample_idx)]), collapse = "/")
  ))
  


  ###############################################
  ## estimate PZC specification for extreme alpha
  ## IMPORTANT: this gives the UPPER tail VaR
  ##            and ES
  ###############################################
  PZC_optimizer_outputs = 
    PZC_optimize(my_data, filter = PZC_filter_cpp, 
                 PZC_OPTIONS = s_PZC_OPTIONS, verbosity = verbosity_level,
                 in_sample_idx = in_sample_idx,
                 out_of_sample_idx = out_of_sample_idx)
  my_data = PZC_optimizer_outputs$my_data; PZC_optimizer_outputs$my_data = NULL
  
  
  
  
  ###############################################
  ## estimate EVT specification for extreme alpha
  ## using PZC taus
  ###############################################
  if (max(out_of_sample_idx) == max(in_sample_idx)) {
    # if full sample, use 1000 samples for bands
    tmp_nr = s_EVT_OPTIONS$BANDS_NRSIMS
    s_EVT_OPTIONS$BANDS_NRSIMS = s_EVT_OPTIONS$BANDS_NRSIMS_MAX
  }
  EVT_optimizer_outputs = EVT_optimize(my_data, EVT_OPTIONS = s_EVT_OPTIONS, 
                                       PZC_OPTIONS = s_PZC_OPTIONS,
                                       verbosity = verbosity_level, 
                                       in_sample_idx = in_sample_idx,
                                       out_of_sample_idx = out_of_sample_idx)
  my_data = EVT_optimizer_outputs$my_data; EVT_optimizer_outputs$my_data = NULL
  if (max(out_of_sample_idx) == max(in_sample_idx)) {
    # if full sample, restore afterwards
    s_EVT_OPTIONS$BANDS_NRSIMS = tmp_nr
  }
  
  
  
  
  ###############################################
  ## estimate TVGPD specification for extreme 
  ## alpha using PZC taus
  ###############################################
  s_TVGPD_OPTIONS$EXTERNAL_TAU = my_data$EVT_tau
  TVGPD_optimizer_outputs = TVGPD_optimize(my_data, TVGPD_OPTIONS = s_TVGPD_OPTIONS, 
                                           PZC_OPTIONS = s_PZC_OPTIONS, verbosity = 0,
                                           in_sample_idx = in_sample_idx,
                                           out_of_sample_idx = out_of_sample_idx)
  my_data = TVGPD_optimizer_outputs$my_data; TVGPD_optimizer_outputs$my_data = NULL
  
  
  
  
  ###############################################
  ## estimate GARCH specifications for extreme
  ## alpha
  ###############################################
  ## economize on GARCH re-estimation for different alpha_extreme does not yet work
  ## in current nesting of loops
  if (!GARCH_estimated) {GARCH_optimizer_outputs = NULL; GARCH_estimated = FALSE}
  GARCH_optimizer_outputs = GARCH_optimizer(my_data, alpha_extreme, verbosity = verbosity_level,
                                            in_sample_idx = in_sample_idx,
                                            out_of_sample_idx = out_of_sample_idx,
                                            GARCH_optimizer_outputs = GARCH_optimizer_outputs)
  my_data = GARCH_optimizer_outputs$my_data; GARCH_optimizer_outputs = GARCH_optimizer_outputs$GARCH_optimizer_outputs
  
  
  
  ###############################################
  ## store the estimation results
  ###############################################
  if (max(out_of_sample_idx) > max(in_sample_idx)) {
    if (initial_oos_run) {my_data_oos = my_data + NA; initial_oos_run = FALSE}
    aid_idx = (max(in_sample_idx)+1):max(out_of_sample_idx)
    my_data_oos[ aid_idx, ] = my_data[aid_idx, ]
    for (nm in names(my_data)) my_data_oos[ aid_idx, nm] = my_data[aid_idx, nm]
  }
  save(my_data, my_data_oos, 
       s_EVT_OPTIONS, s_PZC_OPTIONS, s_TVGPD_OPTIONS,
       EVT_optimizer_outputs, PZC_optimizer_outputs,
       TVGPD_optimizer_outputs, GARCH_optimizer_outputs,
       file = paste0("results/", asset_name, " ", as.Date(tail(my_data$dates[tail(in_sample_idx, 1)])), " ", alpha_extreme, ".rds"))
  
}

###############################################
## store file with IS and OOS filter values
## note last time should be FULL In-Sample
###############################################
writexl::write_xlsx(my_data, path = paste0("results/", asset_name, " IS ", alpha_extreme, ".xlsx"))
if (!in_sample_only) writexl::write_xlsx(my_data_oos, path = paste0("results/", asset_name, " OOS ", alpha_extreme, ".xlsx"))

###############################################
## plot result (FULL SAMPLE only)
###############################################
if (max(in_sample_idx - 1:nrow(my_data)) < 1e-4) {
  flip_back_to_left = ifelse(flip_left_to_right, -1, 1)
  ## plot data and VaR levels EVT & PZC
  gg =
    ggplot(
      data = data.frame(
        x = rep(my_data$dates, 5),
        y = flip_back_to_left * c(my_data$PZC_VaR, my_data$EVT_VaR, my_data$EVT_tau, my_data$TVGPD_VaR, my_data$GARCH_VaR),
        VaR = rep(c("PZC", "EVT", "tau", "TVGPD", "GARCH"), each = nrow(my_data))
      ),
      aes(x = x, y = y, color = VaR)
    ) +
    geom_line(data = my_data, aes(x = dates, y = flip_back_to_left * y), color = 'gray') +
    geom_line()
  plot(gg)
  ## plot data and VaR levels EVT & PZC
  gg =
    ggplot(
      data = data.frame(
        x = rep(my_data$dates, 4),
        y = flip_back_to_left * c(my_data$PZC_ES, my_data$EVT_ES, my_data$TVGPD_ES,  my_data$GARCH_ES),
        ES = rep(c("PZC", "EVT", "TVGPD", "GARCH"), each = nrow(my_data))
      ),
      aes(x = x, y = y, color = ES)
    ) +
    geom_line(data = my_data, aes(x = dates, y = flip_back_to_left * y), color = 'gray') +
    geom_line()
  plot(gg)
  ## plot ft plus band
  gg = ggplot(my_data, aes(x = dates)) +
    geom_ribbon(aes(ymin = EVT_bandL, ymax = EVT_bandU), fill = "lightblue", alpha = 0.8) +
    geom_line(aes(y = EVT_ft), color = "blue") + #, size = 1.2) +
    xlab("") +
    ylab("") +
    scale_x_datetime(
      breaks = as.POSIXct(paste0(2019:2025,"-01-01"), tz = "UTC"),
      date_labels = "%Y"
    ) +
    theme_bw() +
    theme(
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      axis.text = element_text(size = 24)
    )
  plot(gg)
}


###############################################
## print the NZ result
###############################################
NZ_table_out = print_NZ_table(my_data, my_data_oos,
                              in_sample_data, out_of_sample_data, alpha_extreme, 
                              asset_name, NZ_table_in = NZ_table_out, 
                              NZ_nms = c("NZ_2.19", "NZ_2.23"),
                              with_print = (alpha_tail == alphas[length(alphas)]))
if (alpha_tail == alphas[length(alphas)]) {
  outfile = file(paste0("results/", asset_name, alpha_extreme, ".txt"), "w")
  writeLines(NZ_table_out, con = outfile)
  close(outfile)
  total_table = c(total_table, rep('', 3), NZ_table_out)
}

}} # THEN ALSO COMMENT LINE 43

###############################################
## print the total result and write to file
###############################################
writeLines(total_table)
outfile = file(paste0("results/total_result.txt"), "w"); writeLines(total_table, con = outfile); close(outfile)
