Skip to content

Commit

Permalink
update tests, fix kp_box, add kp_referee (#41)
Browse files Browse the repository at this point in the history
* update tests, fix kp_box, add kp_referee
  • Loading branch information
saiemgilani committed Oct 29, 2021
1 parent 8144dd7 commit e43e0ae
Show file tree
Hide file tree
Showing 55 changed files with 290 additions and 128 deletions.
37 changes: 19 additions & 18 deletions .Rbuildignore
Expand Up @@ -25,21 +25,22 @@ docs/*
data-raw/*
mbb_pbp_db
nba_pbp_db
^docs/CNAME
^docs/.gitignore
^docs/babel.config.js
^docs/
^docs/blog$
^docs/docs$
^docs/build$
^docs/[.].*$
^docs/[.].*/[.].*$
^docs/static/[.].*$
^docs/blog/[.].*$
^docs/docs/[.].*$
^docs/node_modules/[.].*$
^docs/src/[.].*$
^docs/build/MBB/[.].*$
^docs/docs/MBB/[.].*$
^docs/docs/NBA/[.].*$
^docs/[\.]\.*$
^website$
^website/*
^cran-comments\.md$
^LICENSE\.md$
^.travis.yml
^.github$
^_pkgdown.yml$
^_pkgdown\.yml$
^vignettes
^\.github$
^data-raw$
^data-raw
^CODE_OF_CONDUCT\.md
^logo.png
^\.github$
^man/figures
^README\.Rmd$
^md
^CRAN-RELEASE$
2 changes: 1 addition & 1 deletion .github/workflows/pkgdown.yaml
Expand Up @@ -8,7 +8,7 @@ name: pkgdown

jobs:
pkgdown:
runs-on: macOS-latest
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
KP_USER: ${{ secrets.KP_USER }}
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
@@ -1,6 +1,6 @@
Package: hoopR
Title: Functions to Access Men's Basketball Play by Play Data
Version: 1.4.4
Version: 1.4.5
Authors@R:
c(person(given = "Saiem",
family = "Gilani",
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Expand Up @@ -45,6 +45,7 @@ export(kp_pointdist)
export(kp_pomeroy_archive_ratings)
export(kp_pomeroy_ratings)
export(kp_program_ratings)
export(kp_referee)
export(kp_team_depth_chart)
export(kp_team_history)
export(kp_team_lineups)
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
@@ -1,3 +1,6 @@
# **hoopR 1.4.5**
- Add ```kp_referee()``` function

# **hoopR 1.4.4**
- Remove referee ranks from ```kp_box()``` function

Expand Down
10 changes: 5 additions & 5 deletions R/espn_mbb_data.R
Expand Up @@ -32,13 +32,13 @@ espn_mbb_game_all <- function(game_id){

resp <- res %>%
httr::content(as = "text", encoding = "UTF-8")

#---- Play-by-Play ------
tryCatch(
expr = {
raw_play_df <- jsonlite::fromJSON(resp)[["gamepackageJSON"]]
raw_play_df <- jsonlite::fromJSON(jsonlite::toJSON(raw_play_df),flatten=TRUE)

#---- Play-by-Play ------

plays <- raw_play_df[["plays"]] %>%
tidyr::unnest_wider(unlist(.data$participants))
suppressWarnings(
Expand All @@ -47,7 +47,7 @@ espn_mbb_game_all <- function(game_id){
dplyr::select(.data$id, .data$athlete.id) %>%
tidyr::unnest_wider(unlist(.data$athlete.id, use.names=FALSE),names_sep = "_")
)
names(aths)<-c("play.id","athlete1.id","athlete2.id")
names(aths)<-c("play.id","athlete.id.1","athlete.id.2")
plays_df <- dplyr::bind_cols(plays, aths) %>%
select(-.data$athlete.id)

Expand Down Expand Up @@ -227,7 +227,7 @@ espn_mbb_pbp <- function(game_id){
dplyr::select(.data$id, .data$athlete.id) %>%
tidyr::unnest_wider(unlist(.data$athlete.id, use.names=FALSE),names_sep = "_")
)
names(aths)<-c("play.id","athlete1.id","athlete2.id")
names(aths)<-c("play.id","athlete.id.1","athlete.id.2")
plays_df <- dplyr::bind_cols(plays, aths) %>%
select(-.data$athlete.id)
plays_df <- plays_df %>%
Expand Down Expand Up @@ -387,7 +387,7 @@ espn_mbb_player_box <- function(game_id){
expr = {
raw_play_df <- jsonlite::fromJSON(resp)[["gamepackageJSON"]]
raw_play_df <- jsonlite::fromJSON(jsonlite::toJSON(raw_play_df),flatten=TRUE)
#---- Player Box ------

players_df <- jsonlite::fromJSON(jsonlite::toJSON(raw_play_df[["boxscore"]][["players"]]), flatten=TRUE) %>%
tidyr::unnest(.data$statistics) %>%
tidyr::unnest(.data$athletes)
Expand Down
20 changes: 12 additions & 8 deletions R/espn_nba_data.R
Expand Up @@ -46,7 +46,7 @@ espn_nba_game_all <- function(game_id){
dplyr::select(.data$id, .data$athlete.id) %>%
tidyr::unnest_wider(unlist(.data$athlete.id, use.names=FALSE),names_sep = "_")
)
names(aths)<-c("play.id","athlete1.id","athlete2.id","athlete3.id")
names(aths)<-c("play.id","athlete.id.1","athlete.id.2","athlete.id.3")
plays_df <- dplyr::bind_cols(plays, aths) %>%
select(-.data$athlete.id)
},
Expand All @@ -58,12 +58,15 @@ espn_nba_game_all <- function(game_id){
finally = {
}
)

#---- Team Box ------
tryCatch(
expr = {
raw_play_df <- jsonlite::fromJSON(resp)[["gamepackageJSON"]]
season <- raw_play_df[['header']][['season']][['year']]
season_type <- raw_play_df[['header']][['season']][['type']]
homeAwayTeam1 = toupper(raw_play_df[['header']][['competitions']][['competitors']][[1]][['homeAway']][1])
homeAwayTeam2 = toupper(raw_play_df[['header']][['competitions']][['competitors']][[1]][['homeAway']][2])
homeTeamId = raw_play_df[['header']][['competitions']][['competitors']][[1]][['team']][['id']][1]
awayTeamId = raw_play_df[['header']][['competitions']][['competitors']][[1]][['team']][['id']][2]
homeTeamMascot = raw_play_df[['header']][['competitions']][['competitors']][[1]][['team']][['name']][1]
Expand All @@ -74,7 +77,6 @@ espn_nba_game_all <- function(game_id){
homeTeamAbbrev = raw_play_df[['header']][['competitions']][['competitors']][[1]][['team']][['abbreviation']][1]
awayTeamAbbrev = raw_play_df[['header']][['competitions']][['competitors']][[1]][['team']][['abbreviation']][2]
game_date = as.Date(substr(raw_play_df[['header']][['competitions']][['date']],0,10))
#---- Team Box ------
teams_box_score_df <- jsonlite::fromJSON(jsonlite::toJSON(raw_play_df[["boxscore"]][["teams"]]),flatten=TRUE)

teams_box_score_df_2 <- teams_box_score_df[[1]][[2]] %>%
Expand All @@ -85,15 +87,15 @@ espn_nba_game_all <- function(game_id){
dplyr::rename(Away = .data$displayValue)
teams2 <- data.frame(t(teams_box_score_df_2$Home))
colnames(teams2) <- t(teams_box_score_df_2$name)
teams2$homeAway <- "Home"
teams2$homeAway <- homeAwayTeam2
teams2$OpponentId <- as.integer(awayTeamId)
teams2$OpponentName <- awayTeamName
teams2$OpponentMascot <- awayTeamMascot
teams2$OpponentAbbrev <- awayTeamAbbrev

teams1 <- data.frame(t(teams_box_score_df_1$Away))
colnames(teams1) <- t(teams_box_score_df_1$name)
teams1$homeAway <- "Away"
teams1$homeAway <- homeAwayTeam1
teams1$OpponentId <- as.integer(homeTeamId)
teams1$OpponentName <- homeTeamName
teams1$OpponentMascot <- homeTeamMascot
Expand Down Expand Up @@ -225,7 +227,7 @@ espn_nba_pbp <- function(game_id){
dplyr::select(.data$id, .data$athlete.id) %>%
tidyr::unnest_wider(unlist(.data$athlete.id, use.names=FALSE),names_sep = "_")
)
names(aths)<-c("play.id","athlete1.id","athlete2.id","athlete3.id")
names(aths)<-c("play.id","athlete.id.1","athlete.id.2","athlete.id.3")
plays_df <- dplyr::bind_cols(plays, aths) %>%
select(-.data$athlete.id)

Expand Down Expand Up @@ -282,6 +284,8 @@ espn_nba_team_box <- function(game_id){
raw_play_df <- jsonlite::fromJSON(resp)[["gamepackageJSON"]]
season <- raw_play_df[['header']][['season']][['year']]
season_type <- raw_play_df[['header']][['season']][['type']]
homeAwayTeam1 = toupper(raw_play_df[['header']][['competitions']][['competitors']][[1]][['homeAway']][1])
homeAwayTeam2 = toupper(raw_play_df[['header']][['competitions']][['competitors']][[1]][['homeAway']][2])
homeTeamId = raw_play_df[['header']][['competitions']][['competitors']][[1]][['team']][['id']][1]
awayTeamId = raw_play_df[['header']][['competitions']][['competitors']][[1]][['team']][['id']][2]
homeTeamMascot = raw_play_df[['header']][['competitions']][['competitors']][[1]][['team']][['name']][1]
Expand All @@ -292,7 +296,7 @@ espn_nba_team_box <- function(game_id){
homeTeamAbbrev = raw_play_df[['header']][['competitions']][['competitors']][[1]][['team']][['abbreviation']][1]
awayTeamAbbrev = raw_play_df[['header']][['competitions']][['competitors']][[1]][['team']][['abbreviation']][2]
game_date = as.Date(substr(raw_play_df[['header']][['competitions']][['date']],0,10))
#---- Team Box ------

teams_box_score_df <- jsonlite::fromJSON(jsonlite::toJSON(raw_play_df[["boxscore"]][["teams"]]),flatten=TRUE)

teams_box_score_df_2 <- teams_box_score_df[[1]][[2]] %>%
Expand All @@ -303,15 +307,15 @@ espn_nba_team_box <- function(game_id){
dplyr::rename(Away = .data$displayValue)
teams2 <- data.frame(t(teams_box_score_df_2$Home))
colnames(teams2) <- t(teams_box_score_df_2$name)
teams2$homeAway <- "Home"
teams2$homeAway <- homeAwayTeam2
teams2$OpponentId <- as.integer(awayTeamId)
teams2$OpponentName <- awayTeamName
teams2$OpponentMascot <- awayTeamMascot
teams2$OpponentAbbrev <- awayTeamAbbrev

teams1 <- data.frame(t(teams_box_score_df_1$Away))
colnames(teams1) <- t(teams_box_score_df_1$name)
teams1$homeAway <- "Away"
teams1$homeAway <- homeAwayTeam1
teams1$OpponentId <- as.integer(homeTeamId)
teams1$OpponentName <- homeTeamName
teams1$OpponentMascot <- homeTeamMascot
Expand Down
23 changes: 12 additions & 11 deletions R/kp_box_tables.R
Expand Up @@ -42,6 +42,7 @@ kp_box <- function(game_id, year){
teams <- dplyr::bind_rows(lapply(rvest::html_text(teams),
function(x){data.frame(Team = x, stringsAsFactors = FALSE)}))


refs <- (page %>%
xml2::read_html() %>%
rvest::html_elements(xpath = "//*[@id='half-column3']//span//div[4]") %>%
Expand All @@ -51,25 +52,25 @@ kp_box <- function(game_id, year){
xml2::read_html() %>%
rvest::html_elements(xpath = "//*[@id='half-column3']//span//div[4]") %>%
rvest::html_elements(".seed")

ref_ranks <- dplyr::bind_rows(lapply(rvest::html_text(ref_ranks),
function(x){data.frame(Official.Rk=x, stringsAsFactors=FALSE)}))


ref_ids <- dplyr::bind_rows(lapply(xml2::xml_attrs(refs),
function(x){data.frame(as.list(x), stringsAsFactors=FALSE)}))
ref_ids <- ref_ids %>%
dplyr::filter(!stringr::str_detect(.data$href,"official")) %>%
dplyr::mutate(ref_id = stringr::str_remove(stringr::str_remove(
stringi::stri_extract_first_regex(.data$href,"=(.+)"),"="),"&(.+)")) %>%
dplyr::select(.data$ref_id) %>%
dplyr::rename(OfficialId=.data$ref_id)

if(length(ref_ids)>0){
ref_ids <- ref_ids %>%
dplyr::filter(!stringr::str_detect(.data$href,"official")) %>%
dplyr::mutate(ref_id = stringr::str_remove(stringr::str_remove(
stringi::stri_extract_first_regex(.data$href,"=(.+)"),"="),"&(.+)")) %>%
dplyr::select(.data$ref_id) %>%
dplyr::rename(OfficialId=.data$ref_id)
}

ref_names <- dplyr::bind_rows(lapply(rvest::html_text(refs),
function(x){data.frame(x, stringsAsFactors=FALSE)})) %>%
dplyr::rename(OfficialName=.data$x)
function(x){data.frame(OfficialName = x, stringsAsFactors=FALSE)}))

ref_table <- data.frame(ref_ids,ref_names, stringsAsFactors = FALSE)
ref_table <- dplyr::bind_cols(ref_ids,ref_names)
ref_table$GameId <- game_id
ref_table$Year <- year
ref_table <- ref_table %>%
Expand Down
85 changes: 85 additions & 0 deletions R/kp_misc_stats.R
Expand Up @@ -159,6 +159,91 @@ kp_officials <- function(year = most_recent_mbb_season()){
return(kenpom)
}

#' Get referee game log
#' @param referee Referee ID
#' @param year Year of data to pull
#'
#' @return A data frame with 11 columns:
#' \describe{
#' \item{\code{game_number}}{integer.}
#' \item{\code{date}}{character.}
#' \item{\code{time_et)}}{character.}
#' \item{\code{game}}{character.}
#' \item{\code{location}}{character.}
#' \item{\code{venue}}{character.}
#' \item{\code{conference}}{character.}
#' \item{\code{thrill_score}}{double.}
#' \item{\code{referee_name}}{character.}
#' \item{\code{ref_rank}}{integer.}
#' \item{\code{year}}{integer.}
#' }
#' @keywords Refs
#' @importFrom cli cli_abort
#' @importFrom dplyr select filter mutate mutate_at
#' @import rvest
#' @export
#'
#' @examples
#' \donttest{
#' try(kp_referee(referee = 714363, year = 2021))
#' }

kp_referee <- function(referee, year){
tryCatch(
expr = {
if (!has_kp_user_and_pw()) stop("This function requires a KenPom subscription e-mail and password combination, set as the system environment variables KP_USER and KP_PW.", "\n See ?kp_user_pw for details.", call. = FALSE)

browser <- login()
if(!(is.numeric(year) && nchar(year) == 4 && year>=2016)) {
# Check if year is numeric, if not NULL
cli::cli_abort("Enter valid year as a number (YYYY), data only goes back to 2016")
}


### Pull Data
url <- paste0("https://kenpom.com/referee.php?",
"r=",referee,
"&y=",year)
page <- rvest::session_jump_to(browser, url)
header_cols <- c("GameNumber","Date","Time (ET)","Game","Location",
"Venue","Conference", "ThrillScore")

x <- (page %>%
xml2::read_html() %>%
rvest::html_elements("table"))[[1]] %>%
rvest::html_table() %>%
as.data.frame()

colnames(x) <- header_cols
rk <- page %>%
xml2::read_html() %>%
rvest::html_element(".rank") %>%
rvest::html_text()
name <- page %>%
xml2::read_html() %>%
rvest::html_element("h5") %>%
rvest::html_text()
x$RefereeName <- stringr::str_remove(name,"\\d+ ")
x$RefRank <- as.numeric(rk)
x <- dplyr::mutate(x,
"Year" = year)

### Store Data
kenpom <- x %>%
janitor::clean_names()

},
error = function(e) {
message(glue::glue("{Sys.time()}: Invalid arguments or no referee data for {referee} in {year} available!"))
},
warning = function(w) {
},
finally = {
}
)
return(kenpom)
}

#' Get Home Court Advantage Estimates
#'
#'
Expand Down

1 comment on commit e43e0ae

@vercel
Copy link

@vercel vercel bot commented on e43e0ae Oct 29, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.