library(tidyverse)
library(fdapace)
# https://cran.r-project.org/web/packages/fdapace/vignettes/fdapaceVig.html
library(Rcpp)

# Set working directory:
#setwd("~/.../Code_and_Data_v1")

# Data:
df_bpm <- read.csv("BPM/df_bpm.csv")
length(unique(df_bpm$player_link))
#[1] 2919

# The test set must be made up of players who have played more than one season.
# Otherwise, if we select one of the players who have played only one season, 
# when we replace his value by NA, we have to remove him because all his values are NA,
# and ropes cannot deal with all NA observations.
more_than_one <- df_bpm %>% 
  count(player_link) %>%
  filter(n > 1) %>%
  select(player_link) 

df_bpm1 <- df_bpm %>% 
  filter(player_link %in% more_than_one$player_link) 
player_names <- as.character(unique(df_bpm1$player_link))
length(player_names)
#[1] 2263

# Test data frame:
set.seed(2018)
df_test <- df_bpm1[sample(1:nrow(df_bpm1), size = floor(0.05 * nrow(df_bpm1))),]
players_test <- as.character(unique(df_test$player_link))
length(players_test)
#[1] 642 # Around 25% of the total players.

# Load b and w matrices:
load("BPM/b_bpm.RData")
load("BPM/w_bpm.RData")

# Training/Validation set:
b_bpm_tr_va <- b_bpm[!rownames(b_bpm) %in% players_test,]
bnonan <- apply(b_bpm_tr_va, 1, function(x) x[!is.na(x)])
Lys <- bnonan
Lts <- lapply(bnonan, function(x) as.numeric(names(x)))

# Test set:
b_bpm_test <- b_bpm[rownames(b_bpm) %in% players_test,]
bnonan_test <- apply(b_bpm_test, 1, function(x) x[!is.na(x)])
Lyte <- bnonan_test
Ltte <- lapply(bnonan_test, function(x) as.numeric(names(x)))

# FPCA for selected players:
ress <- FPCA(Lys, Lts)

# Fitted-predicted values of the test set:
K <- length(ress$lambda)

# From FPCA and GetCESscores
# Method to estimate the PC scores; 'CE' (Condit. Expectation)
obsGrid <- ress$obsGrid
muObs <- ConvertSupport(ress$workGrid, ress$obsGrid, mu = ress$mu)

# Convert phi and fittedCov to obsGrid: OJO lo he hecho en observed, sin truncar
phiObs <- ConvertSupport(ress$workGrid, ress$obsGrid, phi = ress$phi)
#save(phiObs, file = "phiObs.RData")
CovObs <- ConvertSupport(ress$workGrid, ress$obsGrid, Cov = ress$fittedCov)

source('Validation/PACE/GetCEScores.R')
sourceCpp('Validation/PACE/GetIndCEScoresCPP.cpp')
scoresObj <- GetCEScores(Lyte, Ltte, ress$optns, muObs, obsGrid, CovObs, ress$lambda, phiObs, ress$rho) 
xiEst <- t(do.call(cbind, scoresObj[1, ]))

# From from fitted.FPCA
ZMFV <- xiEst[,1:K, drop = FALSE] %*% t(ress$phi[,1:K, drop = FALSE])
IM <- ress$mu
f <- t(apply(ZMFV, 1, function(x) x + IM))

# Only  rows with two non-nans:
ft <- f[complete.cases(f),]

#fy: predicted BPM for those with two values at least in the observed grid
fy <- matrix(0, nrow = dim(ft)[1], ncol = length(ress$obsGrid))
for (i in 1:dim(ft)[1]) {
  fy[i,] <- ConvertSupport(ress$workGrid, ress$obsGrid, mu = ft[i,])
}

dimnames(fy) <- dimnames(b_bpm_test)
dim(fy)
#[1] 642  23

df_test$bpm_pred <- NA
for (j in 1:nrow(df_test)) {
  df_test$bpm_pred[j] <- round(fy[rownames(fy) %in% df_test$player_link[j], 
                                  colnames(fy) %in% df_test$age[j]], 2)
}

# Results:
mean(df_test$bpm)
#[1] -1.223484
sd(df_test$bpm)
#[1] 3.270697

mean(df_test$bpm_pred)
#[1] -1.269148
sd(df_test$bpm_pred)
#[1] 2.505575

df_test$Dif <- round(abs(df_test$bpm_pred - df_test$bpm), 2)
df_test$Dif2 <- round((df_test$bpm_pred - df_test$bpm)^2, 2)

mean(df_test$Dif2)
#[1] 2.757123
sd(df_test$Dif2)
#[1] 5.358426 

# Save the vector of predictions:
pred_pace <- df_test$bpm_pred
save(pred_pace, file = "Validation/PACE/pred_pace.RData")
