diff --git a/.DS_Store b/.DS_Store index 9c7e095..fe0ea78 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/DESCRIPTION b/DESCRIPTION index 5f4cfe3..bbf0cfd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: uktrade Type: Package Title: Accessing Data Through HMRC's API -Version: 0.7.1 +Version: 0.7.2 Authors@R: person("van der Meulen", "Peter", email = "peter.vd.meulen@outlook.com", role = c("aut", "cre")) Description: Convenient functions to load HMRC Overseas Trade Statistics, Regional Trade Statistics, and custom URLs using HMRC's API. License: MIT + file LICENSE @@ -17,4 +17,4 @@ Imports: stringr, dplyr, rlang -RoxygenNote: 7.2.1 +RoxygenNote: 7.3.2 diff --git a/R/load_custom.R b/R/load_custom.R index 78231ac..b4a0b49 100644 --- a/R/load_custom.R +++ b/R/load_custom.R @@ -6,7 +6,7 @@ #' @param base_url Base URL for use in API. Defaults to https://api.uktradeinfo.com. #' @param endpoint Endpoint for use in API. Takes a single character string with no default. #' @param custom_search Custom query. Takes a single character string with no default. -#' @param skip_interval A non-negative integer value showing the skip interval for paginated results. Defaults to 40,000 rows. +#' @param skip_interval Indicates the skip interval for paginated results. Defaults to NULL; this uses the skip parameter automatically generated by the API. Setting this to a different non-negative integer value is currently not supported. #' @param output A character specifying if a tibble ("tibble") or dataframe ("df") should be returned. Defaults to "tibble". #' @param request A non-negative integer value to keep track of the starting number of requests made. Defaults to zero. This can be increased in case you are making multiple requests using this function in succession and do not want to exceed the API limit (60 requests per minute). #' @param timer A non-negative integer value (seconds) to keep track of the time taken so far. Defaults to NULL. This can be increased in case you are making multiple requests using this function in succession and do not want to exceed the API limit (60 requests per minute). @@ -48,8 +48,8 @@ load_custom <- function(base_url = "https://api.uktradeinfo.com", endpoint, custom_search = "", request = 1, - skip_interval = 4e4, timer = NULL, + skip_interval = NULL, output = "tibble", print_url = FALSE, debug = FALSE, @@ -57,6 +57,22 @@ load_custom <- function(base_url = "https://api.uktradeinfo.com", ... ){ + # Hard code the following variables for now - HMRC API does not paginate when + # setting custom skip and top variables. The above will force the code to use + # the pagination set by the API itself when a request exceeds some predefined + # amount of rows. + + if(!is.null(skip_interval)){ + warning("Setting skip_interval to NULL. This uses the skip parameter automatically generated by the API. A manual skip paramater is currently not supported.") + } + + skip_interval <- NULL + top <- NULL + + max_page_size <- 4e4 # Used for a warning when skip_interval is greater + + # Continue with request: + check_internet() done <- FALSE @@ -66,6 +82,14 @@ load_custom <- function(base_url = "https://api.uktradeinfo.com", skip <- 0 page <- 1 + if(!is.null(skip_interval)){ + if(skip_interval > max_page_size){ + warning(paste0("Manual skip interval of ", skip_interval, + " exceeds the maximum page size of ", + max_page_size, ". Consider setting a lower skip interval.")) + } + } + # Create timer reference point: timer <- if(is.null(timer)){ proc.time() } else { timer } @@ -83,8 +107,20 @@ load_custom <- function(base_url = "https://api.uktradeinfo.com", ), " seconds"))} # Construct skip suffix: - skip_suffix <- if(skip == 0) { NULL } else { - paste0("&$skip=", format(skip, scientific = FALSE)) } + skip_suffix <- if(skip == 0 & is.null(skip_interval)) { + # If we are on the first page and we're not using a custom skip interval: + NULL + } else if(skip > 0 & is.null(skip_interval)){ + # If skip > 0 and we're not using a custom skip interval: + paste0("&$skip=", format(skip, scientific = FALSE)) + } else if(skip == 0 & !is.null(skip_interval)){ + # If we are on the first page and we're using a custom skip interval: + paste0("&$top=", format(top, scientific = FALSE)) + } else{ + # If skip > 0 and we're using a custom skip interval: + paste0("&$skip=", format(skip, scientific = FALSE), "&$top=", + format(top, scientific = FALSE)) + } # Construct URL: url <- paste0(base_url, "/", endpoint, custom_search, skip_suffix) @@ -135,7 +171,17 @@ load_custom <- function(base_url = "https://api.uktradeinfo.com", # If response indicates there's a next page: if(length(names(content)) < 3) { done <- TRUE } else { - skip <- skip + skip_interval + if(is.null(skip_interval)){ + + skip <- as.numeric( + gsub(x = content[[3]], pattern = ".*\\$skip=", replacement = "") + ) + + } else { + + skip <- skip + skip_interval + + } page <- page + 1 diff --git a/R/load_ots.R b/R/load_ots.R index 5c03a07..9653226 100644 --- a/R/load_ots.R +++ b/R/load_ots.R @@ -14,7 +14,7 @@ #' @param join_lookup A logical value indicating whether results should be joined with lookups from the API. Defaults to TRUE. Setting to FALSE will return a smaller but less human-readable dataframe containing only codes. #' @param print_url A logical. Defaults to FALSE. Setting this to TRUE will print the URL(s) used to load the trade data to the console. #' @param output A character specifying if a tibble ("tibble") or dataframe ("df") should be returned. Defaults to "tibble". -#' @param skip_interval Passed to load_custom(). A non-negative integer value showing the skip interval for paginated results. Defaults to 40,000 rows. +#' @param skip_interval Passed to load_custom(). Indicates the skip interval for paginated results. Defaults to NULL; this uses the skip parameter automatically generated by the API. Setting this to a different non-negative integer value is currently not supported. #' @param use_proxy A logical. Defaults to FALSE. Setting this to TRUE will allow the use of a proxy connection using `use_proxy()` from `httr`. #' @param ... Optional arguments to be passed along to `use_proxy()` when using a proxy connection (by setting use_proxy to TRUE). See the `httr` documentation for more details. #' @@ -53,7 +53,7 @@ load_ots <- function(month = NULL, join_lookup = TRUE, print_url = FALSE, output = "tibble", - skip_interval = 4e4, + skip_interval = NULL, use_proxy = FALSE, ... ){ diff --git a/R/load_rts.R b/R/load_rts.R index aaa85e2..542f5bf 100644 --- a/R/load_rts.R +++ b/R/load_rts.R @@ -12,7 +12,7 @@ #' @param join_lookup A logical value indicating whether results should be joined with lookups from the API. Defaults to TRUE. Setting to FALSE will return a smaller but less human-readable dataframe containing only codes. #' @param print_url A logical. Defaults to FALSE. Setting this to TRUE will print the URL(s) used to load the trade data to the console. #' @param output A character specifying if a tibble ("tibble") or dataframe ("df") should be returned. Defaults to "tibble". -#' @param skip_interval Passed to load_custom(). A non-negative integer value showing the skip interval for paginated results. Defaults to 40,000 rows. +#' @param skip_interval Passed to load_custom(). Indicates the skip interval for paginated results. Defaults to NULL; this uses the skip parameter automatically generated by the API. Setting this to a different non-negative integer value is currently not supported. #' @param use_proxy A logical. Defaults to FALSE. Setting this to TRUE will allow the use of a proxy connection using `use_proxy()` from `httr`. #' @param ... Optional arguments to be passed along to `use_proxy()` when using a proxy connection (by setting use_proxy to TRUE). See the `httr` documentation for more details. #' @@ -51,10 +51,10 @@ load_rts <- function(month = NULL, join_lookup = TRUE, print_url = FALSE, output = "tibble", - skip_interval = 4e4, + skip_interval = NULL, use_proxy = FALSE, ... - ){ +){ # If no commodities are chosen, load all (detailed): @@ -62,7 +62,7 @@ load_rts <- function(month = NULL, message("Loading trade data for all commodities. This may take a while.") - } + } # Check for internet: check_internet() @@ -85,12 +85,12 @@ load_rts <- function(month = NULL, NULL - } else { + } else { unique(c(country_region_lookup[is.element( country_region_lookup$CountryCodeAlpha, country), "CountryId"])) - } + } chosen_region_id <- if(is.null(region)){ @@ -101,14 +101,15 @@ load_rts <- function(month = NULL, unique(c(country_region_lookup[is.element( country_region_lookup$Area1a, region), "RegionId"])) - } + } # UK country lookup ---------------------------------------------------------- ukcountry_lookup <- load_custom(endpoint = "Region", output = output, request = request, - timer = timer, skip_interval = skip_interval, + timer = timer, + skip_interval = skip_interval, print_url = FALSE, use_proxy = use_proxy, ...) @@ -124,7 +125,7 @@ load_rts <- function(month = NULL, unique(c(ukcountry_lookup[is.element( ukcountry_lookup$GovRegionGroupName, uk_country), "GovRegionId"])) - } + } # Build filter --------------------------------------------------------------- @@ -155,8 +156,8 @@ load_rts <- function(month = NULL, custom_search = paste0("?$filter=", filter), output = output, request = request, timer = timer, - skip_interval = skip_interval, print_url = print_url, - use_proxy = use_proxy, ...) + skip_interval = skip_interval, + print_url = print_url, use_proxy = use_proxy, ...) if(join_lookup == FALSE) { return(rts_data) } else { @@ -169,22 +170,22 @@ load_rts <- function(month = NULL, paste0("?$filter=", paste0(stringr::str_extract_all( filter, "\\(CommoditySitc2Id[^()]+\\)" - )[[1]], collapse = " or ")) + )[[1]], collapse = " or ")) } # Rename filter: sitc_filter <- stringr::str_replace_all( sitc_filter,"CommoditySitc2Id", "CommoditySitcId" - ) + ) sitc_lookup <- load_custom(endpoint = "SITC", custom_search = sitc_filter, output = output, request = request, - timer = timer, skip_interval = skip_interval, - print_url = FALSE, - use_proxy = use_proxy, ...) + timer = timer, + skip_interval = skip_interval, + print_url = FALSE, use_proxy = use_proxy, ...) # Remove potential odata column: @@ -199,7 +200,7 @@ load_rts <- function(month = NULL, paste0("?$filter=", paste0(stringr::str_extract_all( filter, "\\(FlowTypeId[^()]+\\)" - )[[1]], collapse = " and ")) + )[[1]], collapse = " and ")) } @@ -207,9 +208,9 @@ load_rts <- function(month = NULL, custom_search = flow_filter, output = output, request = request, - timer = timer, skip_interval = skip_interval, - print_url = FALSE, - use_proxy = use_proxy, ...) + timer = timer, + skip_interval = skip_interval, + print_url = FALSE, use_proxy = use_proxy, ...) # Remove potential odata column: @@ -251,7 +252,7 @@ load_rts <- function(month = NULL, dplyr::as_tibble(rts_data) - } + } return(rts_data) diff --git a/man/load_custom.Rd b/man/load_custom.Rd index 4d57b20..ba67292 100644 --- a/man/load_custom.Rd +++ b/man/load_custom.Rd @@ -9,8 +9,8 @@ load_custom( endpoint, custom_search = "", request = 1, - skip_interval = 40000, timer = NULL, + skip_interval = NULL, output = "tibble", print_url = FALSE, debug = FALSE, @@ -27,10 +27,10 @@ load_custom( \item{request}{A non-negative integer value to keep track of the starting number of requests made. Defaults to zero. This can be increased in case you are making multiple requests using this function in succession and do not want to exceed the API limit (60 requests per minute).} -\item{skip_interval}{A non-negative integer value showing the skip interval for paginated results. Defaults to 40,000 rows.} - \item{timer}{A non-negative integer value (seconds) to keep track of the time taken so far. Defaults to NULL. This can be increased in case you are making multiple requests using this function in succession and do not want to exceed the API limit (60 requests per minute).} +\item{skip_interval}{Indicates the skip interval for paginated results. Defaults to NULL; this uses the skip parameter automatically generated by the API. Setting this to a different non-negative integer value is currently not supported.} + \item{output}{A character specifying if a tibble ("tibble") or dataframe ("df") should be returned. Defaults to "tibble".} \item{print_url}{A logical. Defaults to FALSE. Setting this to TRUE will print the URL(s) used to load data to the console.} diff --git a/man/load_ots.Rd b/man/load_ots.Rd index 40e6f6c..ddf94eb 100644 --- a/man/load_ots.Rd +++ b/man/load_ots.Rd @@ -16,7 +16,7 @@ load_ots( join_lookup = TRUE, print_url = FALSE, output = "tibble", - skip_interval = 40000, + skip_interval = NULL, use_proxy = FALSE, ... ) @@ -44,7 +44,7 @@ load_ots( \item{output}{A character specifying if a tibble ("tibble") or dataframe ("df") should be returned. Defaults to "tibble".} -\item{skip_interval}{Passed to load_custom(). A non-negative integer value showing the skip interval for paginated results. Defaults to 40,000 rows.} +\item{skip_interval}{Passed to load_custom(). Indicates the skip interval for paginated results. Defaults to NULL; this uses the skip parameter automatically generated by the API. Setting this to a different non-negative integer value is currently not supported.} \item{use_proxy}{A logical. Defaults to FALSE. Setting this to TRUE will allow the use of a proxy connection using `use_proxy()` from `httr`.} diff --git a/man/load_rts.Rd b/man/load_rts.Rd index 5a2a7bc..16bdff5 100644 --- a/man/load_rts.Rd +++ b/man/load_rts.Rd @@ -14,7 +14,7 @@ load_rts( join_lookup = TRUE, print_url = FALSE, output = "tibble", - skip_interval = 40000, + skip_interval = NULL, use_proxy = FALSE, ... ) @@ -38,7 +38,7 @@ load_rts( \item{output}{A character specifying if a tibble ("tibble") or dataframe ("df") should be returned. Defaults to "tibble".} -\item{skip_interval}{Passed to load_custom(). A non-negative integer value showing the skip interval for paginated results. Defaults to 40,000 rows.} +\item{skip_interval}{Passed to load_custom(). Indicates the skip interval for paginated results. Defaults to NULL; this uses the skip parameter automatically generated by the API. Setting this to a different non-negative integer value is currently not supported.} \item{use_proxy}{A logical. Defaults to FALSE. Setting this to TRUE will allow the use of a proxy connection using `use_proxy()` from `httr`.} diff --git a/uktrade.Rproj b/uktrade.Rproj index 497f8bf..fbf6bf4 100644 --- a/uktrade.Rproj +++ b/uktrade.Rproj @@ -1,4 +1,5 @@ Version: 1.0 +ProjectId: b2c19a9c-1a7c-4b3e-b7a2-5b6fa65f4194 RestoreWorkspace: Default SaveWorkspace: Default