library(ballr) # NBAPerGameAdvStatistics
library(tidyverse) 

# https://www.basketball-reference.com/leagues/NBA_2018_advanced.html
# There are some players who played in different teams, so for them 
# I only take Team = TOT (total values).

# https://www.basketball-reference.com/about/bpm.html
# BPM was created to intentionally only use information that is available historically, going back to 1973-74.
# Indeed, https://www.basketball-reference.com/leagues/NBA_1974_advanced.html is the first website that 
# includes both PER and BPM.

seasons <- 2018:1974
df0 <- data.frame()
s0 <- Sys.time()
for (i in seasons) {
  print(i)
  df0_iter <- NBAPerGameAdvStatistics(season = i)
  df0_iter1 <- df0_iter %>% 
    # https://stackoverflow.com/questions/48389658/r-how-do-i-choose-which-row-dplyrdistinct-keeps-based-on-a-value-in-another
    # When it comes to a duplicate player, distinct automatically keeps 
    # the first entry, which is TOT in df0, so nothing more to do.
    distinct(player, .keep_all = TRUE) %>%
    mutate(season = i)
  df0 <- bind_rows(df0, df0_iter1)
}
s1 <- Sys.time() - s0
s1
#Time difference of 2.991389 mins

df1 <- df0 %>%
  select(-rk, -x, -x_2)
colnames(df1)
write_csv(df1, path = "df_adv_hist.csv")
