Skip to content

Commit

Permalink
Merge pull request #34 from sportsdataverse/pwhl_scraper
Browse files Browse the repository at this point in the history
PWHL Scraper
  • Loading branch information
benhowell71 authored Feb 22, 2024
2 parents 60c1df2 + acedc65 commit b4cc412
Show file tree
Hide file tree
Showing 17 changed files with 1,165 additions and 53 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
Package: fastRhockey
Title: Functions to Access Premier Hockey Federation and National Hockey League Play by Play Data
Version: 0.6.0
Title: Functions to Access Professional Women's Hockey League and National Hockey League Play by Play Data
Version: 0.7.0
Authors@R: c(
person(given = "Ben", family = "Howell", email = "[email protected]", role = c("aut")),
person(given = "Saiem", family = "Gilani", email = "[email protected]", role = c("cre", "aut")),
person(given = "Alyssa", family = "Longmuir", email = "[email protected]", role = c("ctb"))
)
Description: A utility to scrape and load play-by-play data
and statistics from the Premier Hockey Federation (PHF) <https://www.premierhockeyfederation.com/>, formerly
known as the National Women's Hockey League (NWHL). Additionally, allows access to the National Hockey League's
and statistics from the Professional Women's Hockey League <https://www.thepwhl.com/>, formerly
known as the Premier Hockey Federation (PHF) or National Women's Hockey League (NWHL). Additionally, allows access to the National Hockey League's
stats API <https://www.nhl.com/>.
License: MIT + file LICENSE
URL: https://fastRhockey.sportsdataverse.org/,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ export(phf_standings)
export(phf_team_box)
export(phf_team_roster)
export(phf_team_stats)
export(pwhl_schedule)
export(pwhl_team_roster)
export(pwhl_teams)
export(update_nhl_db)
export(update_phf_db)
import(dplyr)
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# **fastRhockey 0.7.0**

### **PWHL functions added**

* ```pwhl_schedule()``` function added.
* ```pwhl_team_roster()``` function added.
* ```pwhl_teams()``` function added.

# **fastRhockey 0.6.0**

* Improved resiliency for several PHF functions, updates under the hood.
Expand Down
449 changes: 449 additions & 0 deletions R/pwhl_pbp.R

Large diffs are not rendered by default.

111 changes: 111 additions & 0 deletions R/pwhl_schedule.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#' @title **PWHL Schedule**
#' @description PWHL Schedule lookup
#'
#' @param season Season (YYYY) to pull the schedule from, the concluding year in XXXX-YY format
#' @return A data frame with schedule data
#' @import jsonlite
#' @import dplyr
#' @import httr
#' @importFrom glue glue
#' @export
#' @examples
#' \donttest{
#' try(pwhl_schedule(season = 2023))
#' }

pwhl_schedule <- function(season) {

base_url = "https://lscluster.hockeytech.com/feed/index.php?feed=statviewfeed&view=schedule&team=-1&season=1&month=-1&location=homeaway&key=694cfeed58c932ee&client_code=pwhl&site_id=2&league_id=1&division_id=-1&lang=en&callback=angular.callbacks._1"
full_url = base_url

res <- httr::RETRY(
"GET",
full_url
)

res <- res %>%
httr::content(as = "text", encoding = "utf-8")

res <- gsub("angular.callbacks._1\\(", "", res)
res <- gsub("}}]}]}])", "}}]}]}]", res)

r <- res %>%
jsonlite::parse_json()

gm <- r[[1]]$sections[[1]]$data

schedule_data <- data.frame()

tryCatch(
expr = {
for (i in 1:length(gm)) {

if (is.null(gm[[i]]$prop$venue_name$venueUrl)) {
venue <-'TBD'
} else {
venue <- gm[[i]]$prop$venue_name$venueUrl
}

game_info <- data.frame(
"game_id" = c(gm[[i]]$row$game_id),
"game_date" = c(gm[[i]]$row$date_with_day),
"game_status" = c(gm[[i]]$row$game_status),
"home_team" = c(gm[[i]]$row$home_team_city),
"home_team_id" = c(gm[[i]]$prop$home_team_city$teamLink),
"away_team" = c(gm[[i]]$row$visiting_team_city),
"away_team_id" = c(gm[[i]]$prop$visiting_team_city$teamLink),
"home_score" = c(gm[[i]]$row$home_goal_count),
"away_score" = c(gm[[i]]$row$visiting_goal_count),
"venue" = c(gm[[i]]$row$venue_name),
"venue_url" = c(venue)
)

schedule_data <- rbind(
schedule_data,
game_info
)

}

schedule_data <- schedule_data %>%
dplyr::mutate(
winner = dplyr::case_when(
.data$home_score == '' | .data$away_score == "-" ~ '-',
.data$home_score > .data$away_score ~ .data$home_team,
.data$away_score > .data$home_score ~ .data$away_team,
.data$home_score == .data$away_score & .data$home_score != "-" ~ "Tie",
TRUE ~ NA_character_
),
season = season
) %>%
dplyr::select(
c(
"game_id",
"game_date",
"game_status",
"home_team",
"home_team_id",
"away_team",
"away_team_id",
"home_score",
"away_score",
"winner",
"venue",
"venue_url"
)
)
},
error = function(e) {
message(glue::glue("{Sys.time()}: Invalid season or no schedule data available! Try a season from 2023 onwards!"))

},
warning = function(w) {
},
finally = {
}
)

return(schedule_data)

}

120 changes: 120 additions & 0 deletions R/pwhl_standings.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#' @title **PWHL Standings**
#' @description PWHL Standings lookup
#'
#' @param season Season (YYYY) to pull the roster from, the concluding year in XXXX-YY format
#' @param regular Bool for whether to pull regular or pre-season rosters
#' @return A data frame with standings data
#' @import jsonlite
#' @import dplyr
#' @import httr
#' @importFrom glue glue

pwhl_standings <- function(season = 2023, regular = TRUE) {
if (regular) {
season_id <- 1
} else if (! regular) {
season_id <- 2
}

REG_URL = paste0("https://lscluster.hockeytech.com/feed/index.php?feed=statviewfeed&view=teams&groupTeamsBy=league&context=overall&site_id=2&season=", season_id, "&special=false&key=694cfeed58c932ee&client_code=pwhl&league_id=1&division=undefined&sort=points&lang=en&callback=angular.callbacks._b")
URL = paste0("https://lscluster.hockeytech.com/feed/index.php?feed=statviewfeed&view=teams&groupTeamsBy=division&context=overall&site_id=2&season=", season_id, "&special=true&key=694cfeed58c932ee&client_code=pwhl&league_id=1&division=-1&sort=points&lang=en&callback=angular.callbacks._4")
reg_res <- httr::RETRY(
"GET",
REG_URL
)

reg_res <- reg_res %>%
httr::content(as = "text", encoding = "utf-8")

reg_res <- gsub("angular.callbacks._b\\(", "", reg_res)
reg_res <- gsub("}}]}]}])", "}}]}]}]", reg_res)
r_reg <- reg_res %>%
jsonlite::parse_json()

res <- httr::RETRY(
"GET",
URL
)

res <- res %>%
httr::content(as = "text", encoding = "utf-8")

res <- gsub("angular.callbacks._4\\(", "", res)
res <- gsub("}}]}]}])", "}}]}]}]", res)
r <- res %>%
jsonlite::parse_json()

reg_data <- r_reg[[1]]$sections[[1]]$data
data <- r[[1]]$sections[[1]]$data

reg_standings <- data.frame()
standings <- data.frame()
# data[[1]]

# jsonlite::fromJSON(r[[1]]$sections[[1]]$data)
# jsonlite::flatten(data)

tryCatch(
expr = {
for (y in 1:length(reg_data)) {

reg_team_stand <- data.frame(
team_rank = c(reg_data[[y]]$row$rank),
team = c(reg_data[[y]]$row$name),
team_code = c(reg_data[[y]]$row$team_code),
games_played = c(reg_data[[y]]$row$games_played),
points = c(reg_data[[y]]$row$points),
wins = c(reg_data[[y]]$row$regulation_wins),
non_reg_wins = c(reg_data[[y]]$row$non_reg_wins),
losses = c(reg_data[[y]]$row$losses),
non_reg_losses = c(reg_data[[y]]$row$non_reg_losses),
goals_for = c(reg_data[[y]]$row$goals_for),
goals_against = c(reg_data[[y]]$row$goals_against),
games_remaining = c(reg_data[[y]]$row$games_remaining)
)

reg_standings <- dplyr::bind_rows(reg_standings, reg_team_stand)

}


for (y in 1:length(data)) {

team_stand <- data.frame(
# team_rank = c(data[[y]]$row$rank),
team = c(data[[y]]$row$name),
team_code = c(data[[y]]$row$team_code),
# games = c(data[[y]]$row$games_played),
ot_wins = c(data[[y]]$row$ot_wins),
ot_losses = c(data[[y]]$row$ot_losses),
so_wins = c(data[[y]]$row$shootout_wins),
so_losses = c(data[[y]]$row$shootout_losses),
power_play_goals = c(data[[y]]$row$power_play_goals),
power_play_goals_against = c(data[[y]]$row$power_play_goals_against),
power_plays = c(data[[y]]$row$power_plays),
power_play_pct = c(data[[y]]$row$power_play_pct),
short_handed_goals = c(data[[y]]$row$short_handed_goals_for),
short_handed_goals_against = c(data[[y]]$row$short_handed_goals_against),
times_short_handed = c(data[[y]]$row$times_short_handed),
penalty_kill_pct = c(data[[y]]$row$penalty_kill_pct)
)

standings <- dplyr::bind_rows(standings, team_stand)

}

lg_standings <- reg_standings %>%
dplyr::left_join(standings, by = c("team", "team_code"))
},
error = function(e) {
message(glue::glue("{Sys.time()}: Invalid season or no roster data available! Try a season from 2023 onwards!"))

},
warning = function(w) {
},
finally = {
}
)
return(lg_standings)

}
Loading

0 comments on commit b4cc412

Please sign in to comment.