library(tidyverse)
library(fdapace)
# https://cran.r-project.org/web/packages/fdapace/vignettes/fdapaceVig.html
library(Rcpp)

# Set working directory:
#setwd("~/.../Code_and_Data_v1")

# Data:
df_bpm <- read.csv("BPM/df_bpm.csv")
length(unique(df_bpm$player_link))
#[1] 3075

# Our ultimate goal will be to predict the BPM in the season 2017-2018, for those players who played 
# at least in one season before the season 2017-2018 and also played in the season 2017-2018 itself.

# So, first filter the players who belong to this test set:
df_bpm1 <- df_bpm %>%
  filter(season != "2018")
df_bpm2 <- df_bpm %>%
  filter(season == "2018")
players_test <- intersect(df_bpm1$player_link, df_bpm2$player_link)
length(players_test)
#[1] 385

# Test data frame:
df_test <- df_bpm2 %>%
  filter(player_link %in% players_test) 

# Load b and w matrices:
load("BPM/b_bpm.RData")
load("BPM/w_bpm.RData")

# Training/Validation set:
b_bpm_tr_va <- b_bpm[!rownames(b_bpm) %in% players_test,]
bnonan <- apply(b_bpm_tr_va, 1, function(x) x[!is.na(x)])
Lys <- bnonan
Lts <- lapply(bnonan, function(x) as.numeric(names(x)))

# Test set:
b_bpm_test <- b_bpm[rownames(b_bpm) %in% players_test,]
bnonan_test <- apply(b_bpm_test, 1, function(x) x[!is.na(x)])
Lyte <- bnonan_test
Ltte <- lapply(bnonan_test, function(x) as.numeric(names(x)))

# FPCA for selected players:
ress <- FPCA(Lys, Lts)

# Fitted-predicted values of the test set:
K <- length(ress$lambda)

# From FPCA and GetCESscores:
# Method to estimate the PC scores; 'CE' (Condit. Expectation)
obsGrid <- ress$obsGrid
muObs <- ConvertSupport(ress$workGrid, ress$obsGrid, mu = ress$mu)

# Convert phi and fittedCov to obsGrid: 
phiObs <- ConvertSupport(ress$workGrid, ress$obsGrid, phi = ress$phi)
#save(phiObs, file = "phiObs.RData")
CovObs <- ConvertSupport(ress$workGrid, ress$obsGrid, Cov = ress$fittedCov)

source('Validation/PACE/GetCEScores.R')
sourceCpp('Validation/PACE/GetIndCEScoresCPP.cpp')
scoresObj <- GetCEScores(Lyte, Ltte, ress$optns, muObs, obsGrid, CovObs, ress$lambda, phiObs, ress$rho) 
xiEst <- t(do.call(cbind, scoresObj[1, ]))

# From fitted.FPCA:
ZMFV <- xiEst[,1:K, drop = FALSE] %*% t(ress$phi[,1:K, drop = FALSE])
IM <- ress$mu
f <- t(apply(ZMFV, 1, function(x) x + IM))

# Only  rows with two non-nans:
ft <- f[complete.cases(f),]

#fy: predicted BPM for those with two values at least in the observed grid
fy <- matrix(0, nrow = dim(ft)[1], ncol = length(ress$obsGrid))
for (i in 1:dim(ft)[1]) {
  fy[i,] <- ConvertSupport(ress$workGrid, ress$obsGrid, mu = ft[i,])
}

dimnames(fy) <- dimnames(b_bpm_test)
dim(fy)
#[1] 385  23

df_test$bpm_pred <- NA
for (j in 1:nrow(df_test)) {
  df_test$bpm_pred[j] <- round(fy[rownames(fy) %in% df_test$player_link[j], 
                                  colnames(fy) %in% df_test$age[j]], 2)
}

# Results:
mean(df_test$bpm)
#[1] -0.9106494
sd(df_test$bpm)
#[1] 3.313776

mean(df_test$bpm_pred)
#[1] -0.8745195
sd(df_test$bpm_pred)
#[1] 2.357108

df_test$Dif <- round(abs(df_test$bpm_pred - df_test$bpm), 2)
df_test$Dif2 <- round((df_test$bpm_pred - df_test$bpm)^2, 2)

mean(df_test$Dif2)
#[1] 3.239948
sd(df_test$Dif2)
#[1] 7.60318

# Save the vector of predictions:
pred_pace <- df_test$bpm_pred
save(pred_pace, file = "Validation/PACE/pred_pace.RData")
