library(tidyverse)

# Set working directory.
#setwd(".../Code_and_data_v1/")

# Ropes functions:
source("Algorithm/BAwR_internalROPES.R")
source("Algorithm/ropes.R")

# Data:
df_bpm <- read.csv("BPM/df_bpm.csv")
length(unique(df_bpm$player_link))
#[1] 3075

# Our ultimate goal will be to predict the BPM in the season 2017-2018, for those players who played 
# at least in one season before the season 2017-2018 and also played in the season 2017-2018 itself.

# So, first filter the players who belong to this test set:
df_bpm1 <- df_bpm %>%
  filter(season != "2018")
df_bpm2 <- df_bpm %>%
  filter(season == "2018")
players_test <- intersect(df_bpm1$player_link, df_bpm2$player_link)
length(players_test)
#[1] 385

# Test data frame:
df_test <- df_bpm2 %>%
  filter(player_link %in% players_test) 

# Load b and w matrices:
load("BPM/b_bpm.RData")
load("BPM/w_bpm.RData")

# For the test players, replace in 'b' the BPM value by NA, and in 'w' the 1 value by 0:
for (i in 1:nrow(df_test)) {
  b_bpm[rownames(b_bpm) %in% df_test$player_link[i], 
        colnames(b_bpm) %in% df_test$age[i]] <- NA
  
  w_bpm[rownames(w_bpm) %in% df_test$player_link[i], 
        colnames(w_bpm) %in% df_test$age[i]] <- 0
}  

# To count the number of non-NA-values in each row of b:
nonNA_rows <- apply(b_bpm, 1, FUN = function(x) length(x[!is.na(x)]) )
pl_na <- c(which(nonNA_rows == 0))

# Remove the rows with all NAs (otherwise ropes won't work): 
b_bpm1 <- b_bpm[!rownames(b_bpm) %in% names(pl_na),]
w_bpm1 <- w_bpm[!rownames(w_bpm) %in% names(pl_na),]

b_bpm2 <- as.matrix(b_bpm1)
w_bpm2 <- as.matrix(w_bpm1)

df_test <- df_test %>%
  filter(!player_link %in% names(pl_na))

# Optimal lambda combination for ropes:
lambda2Ages <- 900
lambda1Ages <- 10
lambda0Ages <- 10

# Run ropes:
s0 <- Sys.time()
result_test <- ropes(b_bpm2, w_bpm2, 
                     lambda2Ages = lambda2Ages, lambda1Ages = lambda1Ages, lambda0Ages = lambda0Ages, 
                     lambda0Inds = 0.1, trace = 0, method = "cg")
s1 <- Sys.time() - s0
s1
#Time difference of 5.205871 hours

save(result_test, file = "Validation/result_test.RData")

preds <- result_test$Z
dimnames(preds) <- dimnames(b_bpm2)

df_test$bpm_pred <- NA
for (j in 1:nrow(df_test)) {
  df_test$bpm_pred[j] <- round(preds[rownames(preds) %in% df_test$player_link[j], 
                                     colnames(preds) %in% df_test$age[j]], 2)
}

# Results:
mean(df_test$bpm)
#[1] -0.9106494
sd(df_test$bpm)
#[1] 3.313776

mean(df_test$bpm_pred)
#[1] -0.6104935
sd(df_test$bpm_pred)
#[1] 3.014374

df_test$Dif <- round(abs(df_test$bpm_pred - df_test$bpm), 2)
df_test$Dif2 <- round((df_test$bpm_pred - df_test$bpm)^2, 2)

mean(df_test$Dif2)
#[1] 6.728753
sd(df_test$Dif2)
#[1] 16.27862

# Save the vector of predictions:
pred_ropes <- df_test$bpm_pred
save(pred_ropes, file = "Validation/pred_ropes.RData")
