-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #34 from sportsdataverse/pwhl_scraper
PWHL Scraper
- Loading branch information
Showing
17 changed files
with
1,165 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,14 @@ | ||
Package: fastRhockey | ||
Title: Functions to Access Premier Hockey Federation and National Hockey League Play by Play Data | ||
Version: 0.6.0 | ||
Title: Functions to Access Professional Women's Hockey League and National Hockey League Play by Play Data | ||
Version: 0.7.0 | ||
Authors@R: c( | ||
person(given = "Ben", family = "Howell", email = "[email protected]", role = c("aut")), | ||
person(given = "Saiem", family = "Gilani", email = "[email protected]", role = c("cre", "aut")), | ||
person(given = "Alyssa", family = "Longmuir", email = "[email protected]", role = c("ctb")) | ||
) | ||
Description: A utility to scrape and load play-by-play data | ||
and statistics from the Premier Hockey Federation (PHF) <https://www.premierhockeyfederation.com/>, formerly | ||
known as the National Women's Hockey League (NWHL). Additionally, allows access to the National Hockey League's | ||
and statistics from the Professional Women's Hockey League <https://www.thepwhl.com/>, formerly | ||
known as the Premier Hockey Federation (PHF) or National Women's Hockey League (NWHL). Additionally, allows access to the National Hockey League's | ||
stats API <https://www.nhl.com/>. | ||
License: MIT + file LICENSE | ||
URL: https://fastRhockey.sportsdataverse.org/, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
#' @title **PWHL Schedule** | ||
#' @description PWHL Schedule lookup | ||
#' | ||
#' @param season Season (YYYY) to pull the schedule from, the concluding year in XXXX-YY format | ||
#' @return A data frame with schedule data | ||
#' @import jsonlite | ||
#' @import dplyr | ||
#' @import httr | ||
#' @importFrom glue glue | ||
#' @export | ||
#' @examples | ||
#' \donttest{ | ||
#' try(pwhl_schedule(season = 2023)) | ||
#' } | ||
|
||
pwhl_schedule <- function(season) { | ||
|
||
base_url = "https://lscluster.hockeytech.com/feed/index.php?feed=statviewfeed&view=schedule&team=-1&season=1&month=-1&location=homeaway&key=694cfeed58c932ee&client_code=pwhl&site_id=2&league_id=1&division_id=-1&lang=en&callback=angular.callbacks._1" | ||
full_url = base_url | ||
|
||
res <- httr::RETRY( | ||
"GET", | ||
full_url | ||
) | ||
|
||
res <- res %>% | ||
httr::content(as = "text", encoding = "utf-8") | ||
|
||
res <- gsub("angular.callbacks._1\\(", "", res) | ||
res <- gsub("}}]}]}])", "}}]}]}]", res) | ||
|
||
r <- res %>% | ||
jsonlite::parse_json() | ||
|
||
gm <- r[[1]]$sections[[1]]$data | ||
|
||
schedule_data <- data.frame() | ||
|
||
tryCatch( | ||
expr = { | ||
for (i in 1:length(gm)) { | ||
|
||
if (is.null(gm[[i]]$prop$venue_name$venueUrl)) { | ||
venue <-'TBD' | ||
} else { | ||
venue <- gm[[i]]$prop$venue_name$venueUrl | ||
} | ||
|
||
game_info <- data.frame( | ||
"game_id" = c(gm[[i]]$row$game_id), | ||
"game_date" = c(gm[[i]]$row$date_with_day), | ||
"game_status" = c(gm[[i]]$row$game_status), | ||
"home_team" = c(gm[[i]]$row$home_team_city), | ||
"home_team_id" = c(gm[[i]]$prop$home_team_city$teamLink), | ||
"away_team" = c(gm[[i]]$row$visiting_team_city), | ||
"away_team_id" = c(gm[[i]]$prop$visiting_team_city$teamLink), | ||
"home_score" = c(gm[[i]]$row$home_goal_count), | ||
"away_score" = c(gm[[i]]$row$visiting_goal_count), | ||
"venue" = c(gm[[i]]$row$venue_name), | ||
"venue_url" = c(venue) | ||
) | ||
|
||
schedule_data <- rbind( | ||
schedule_data, | ||
game_info | ||
) | ||
|
||
} | ||
|
||
schedule_data <- schedule_data %>% | ||
dplyr::mutate( | ||
winner = dplyr::case_when( | ||
.data$home_score == '' | .data$away_score == "-" ~ '-', | ||
.data$home_score > .data$away_score ~ .data$home_team, | ||
.data$away_score > .data$home_score ~ .data$away_team, | ||
.data$home_score == .data$away_score & .data$home_score != "-" ~ "Tie", | ||
TRUE ~ NA_character_ | ||
), | ||
season = season | ||
) %>% | ||
dplyr::select( | ||
c( | ||
"game_id", | ||
"game_date", | ||
"game_status", | ||
"home_team", | ||
"home_team_id", | ||
"away_team", | ||
"away_team_id", | ||
"home_score", | ||
"away_score", | ||
"winner", | ||
"venue", | ||
"venue_url" | ||
) | ||
) | ||
}, | ||
error = function(e) { | ||
message(glue::glue("{Sys.time()}: Invalid season or no schedule data available! Try a season from 2023 onwards!")) | ||
|
||
}, | ||
warning = function(w) { | ||
}, | ||
finally = { | ||
} | ||
) | ||
|
||
return(schedule_data) | ||
|
||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
#' @title **PWHL Standings** | ||
#' @description PWHL Standings lookup | ||
#' | ||
#' @param season Season (YYYY) to pull the roster from, the concluding year in XXXX-YY format | ||
#' @param regular Bool for whether to pull regular or pre-season rosters | ||
#' @return A data frame with standings data | ||
#' @import jsonlite | ||
#' @import dplyr | ||
#' @import httr | ||
#' @importFrom glue glue | ||
|
||
pwhl_standings <- function(season = 2023, regular = TRUE) { | ||
if (regular) { | ||
season_id <- 1 | ||
} else if (! regular) { | ||
season_id <- 2 | ||
} | ||
|
||
REG_URL = paste0("https://lscluster.hockeytech.com/feed/index.php?feed=statviewfeed&view=teams&groupTeamsBy=league&context=overall&site_id=2&season=", season_id, "&special=false&key=694cfeed58c932ee&client_code=pwhl&league_id=1&division=undefined&sort=points&lang=en&callback=angular.callbacks._b") | ||
URL = paste0("https://lscluster.hockeytech.com/feed/index.php?feed=statviewfeed&view=teams&groupTeamsBy=division&context=overall&site_id=2&season=", season_id, "&special=true&key=694cfeed58c932ee&client_code=pwhl&league_id=1&division=-1&sort=points&lang=en&callback=angular.callbacks._4") | ||
reg_res <- httr::RETRY( | ||
"GET", | ||
REG_URL | ||
) | ||
|
||
reg_res <- reg_res %>% | ||
httr::content(as = "text", encoding = "utf-8") | ||
|
||
reg_res <- gsub("angular.callbacks._b\\(", "", reg_res) | ||
reg_res <- gsub("}}]}]}])", "}}]}]}]", reg_res) | ||
r_reg <- reg_res %>% | ||
jsonlite::parse_json() | ||
|
||
res <- httr::RETRY( | ||
"GET", | ||
URL | ||
) | ||
|
||
res <- res %>% | ||
httr::content(as = "text", encoding = "utf-8") | ||
|
||
res <- gsub("angular.callbacks._4\\(", "", res) | ||
res <- gsub("}}]}]}])", "}}]}]}]", res) | ||
r <- res %>% | ||
jsonlite::parse_json() | ||
|
||
reg_data <- r_reg[[1]]$sections[[1]]$data | ||
data <- r[[1]]$sections[[1]]$data | ||
|
||
reg_standings <- data.frame() | ||
standings <- data.frame() | ||
# data[[1]] | ||
|
||
# jsonlite::fromJSON(r[[1]]$sections[[1]]$data) | ||
# jsonlite::flatten(data) | ||
|
||
tryCatch( | ||
expr = { | ||
for (y in 1:length(reg_data)) { | ||
|
||
reg_team_stand <- data.frame( | ||
team_rank = c(reg_data[[y]]$row$rank), | ||
team = c(reg_data[[y]]$row$name), | ||
team_code = c(reg_data[[y]]$row$team_code), | ||
games_played = c(reg_data[[y]]$row$games_played), | ||
points = c(reg_data[[y]]$row$points), | ||
wins = c(reg_data[[y]]$row$regulation_wins), | ||
non_reg_wins = c(reg_data[[y]]$row$non_reg_wins), | ||
losses = c(reg_data[[y]]$row$losses), | ||
non_reg_losses = c(reg_data[[y]]$row$non_reg_losses), | ||
goals_for = c(reg_data[[y]]$row$goals_for), | ||
goals_against = c(reg_data[[y]]$row$goals_against), | ||
games_remaining = c(reg_data[[y]]$row$games_remaining) | ||
) | ||
|
||
reg_standings <- dplyr::bind_rows(reg_standings, reg_team_stand) | ||
|
||
} | ||
|
||
|
||
for (y in 1:length(data)) { | ||
|
||
team_stand <- data.frame( | ||
# team_rank = c(data[[y]]$row$rank), | ||
team = c(data[[y]]$row$name), | ||
team_code = c(data[[y]]$row$team_code), | ||
# games = c(data[[y]]$row$games_played), | ||
ot_wins = c(data[[y]]$row$ot_wins), | ||
ot_losses = c(data[[y]]$row$ot_losses), | ||
so_wins = c(data[[y]]$row$shootout_wins), | ||
so_losses = c(data[[y]]$row$shootout_losses), | ||
power_play_goals = c(data[[y]]$row$power_play_goals), | ||
power_play_goals_against = c(data[[y]]$row$power_play_goals_against), | ||
power_plays = c(data[[y]]$row$power_plays), | ||
power_play_pct = c(data[[y]]$row$power_play_pct), | ||
short_handed_goals = c(data[[y]]$row$short_handed_goals_for), | ||
short_handed_goals_against = c(data[[y]]$row$short_handed_goals_against), | ||
times_short_handed = c(data[[y]]$row$times_short_handed), | ||
penalty_kill_pct = c(data[[y]]$row$penalty_kill_pct) | ||
) | ||
|
||
standings <- dplyr::bind_rows(standings, team_stand) | ||
|
||
} | ||
|
||
lg_standings <- reg_standings %>% | ||
dplyr::left_join(standings, by = c("team", "team_code")) | ||
}, | ||
error = function(e) { | ||
message(glue::glue("{Sys.time()}: Invalid season or no roster data available! Try a season from 2023 onwards!")) | ||
|
||
}, | ||
warning = function(w) { | ||
}, | ||
finally = { | ||
} | ||
) | ||
return(lg_standings) | ||
|
||
} |
Oops, something went wrong.