From bc02536a83b567e98a80aea313252aa13da3aab5 Mon Sep 17 00:00:00 2001 From: pvdmeulen Date: Tue, 1 Nov 2022 21:32:55 +0000 Subject: [PATCH] v0.7.1 --- DESCRIPTION | 2 +- README.Rmd | 6 +++--- README.md | 48 +++++++++++++++++++++++++----------------------- man/load_ots.Rd | 2 +- man/load_rts.Rd | 2 +- 5 files changed, 31 insertions(+), 29 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 81d8859..5f4cfe3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: uktrade Type: Package Title: Accessing Data Through HMRC's API -Version: 0.7.0 +Version: 0.7.1 Authors@R: person("van der Meulen", "Peter", email = "peter.vd.meulen@outlook.com", role = c("aut", "cre")) Description: Convenient functions to load HMRC Overseas Trade Statistics, Regional Trade Statistics, and custom URLs using HMRC's API. License: MIT + file LICENSE diff --git a/README.Rmd b/README.Rmd index ee398b8..495d0cd 100644 --- a/README.Rmd +++ b/README.Rmd @@ -95,13 +95,13 @@ data When loading an HS2 code, or a SITC1 or SITC2 code, so-called Below Threshold Trade Allocation estimates are also loaded (for EU trade). These are, roughly, estimated values for those trades which fell below the Intrastat Survey threshold. At more detailed commodity levels, these estimates are excluded. BTTA trade estimates have different commodity codes (9-digit CN codes ending in 9999999, or 7-digit SITC codes ending in 99999): ```{r load_ots example with BTTA} -data <- load_ots(month = c(201901, 201912), commodity = 22, join_lookup = TRUE, print_url = TRUE) +data <- load_ots(month = c(202101, 202103), commodity = "03", join_lookup = TRUE, print_url = TRUE) library(dplyr) library(stringr) -data %>% - filter(str_detect(Hs4Code, "-")) +data %>% + filter(stringr::str_detect(Sitc4Code, "-")) ``` Specifying `commodity = NULL` and `SITC = NULL` will load all commodities (this may take considerable time). This will also include so-called non-response estimates, which have negative commodity codes (and currently cannot be split by e.g. SITC2 or HS2). For example, we can load all exports to Australia in January 2019: diff --git a/README.md b/README.md index b00754d..d0e4ae6 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,6 @@ data #> # ... with 4,653 more rows, and abbreviated variable names 1: FlowTypeId, #> # 2: SuppressionIndex, 3: CommodityId, 4: CommoditySitcId, 5: CountryId, #> # 6: SuppUnit -#> # i Use `print(n = ...)` to see more rows ``` Note that the `month` argument specifies a range in the form of @@ -145,7 +144,6 @@ data #> # Area1 , Area1a , Area2 , Area2a , Area3 , #> # Area3a , Area5a , CountryId , CountryCodeNumeric , #> # CountryCodeAlpha , CountryName , PortId , ... -#> # i Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names ``` Loading aggregate data (such as all spirits, HS4 code 2208) is possible @@ -157,7 +155,7 @@ what URL the code is using by specifying `print_URL = TRUE`: data <- load_ots(month = c(201901, 201912), commodity = 2208, join_lookup = TRUE, print_url = TRUE) #> Loading data via the following URL(s): -#> URL 1: https://api.uktradeinfo.com/OTS?$filter=(FlowTypeId eq 1 or FlowTypeId eq 2 or FlowTypeId eq 3 or FlowTypeId eq 4) and (MonthId ge 201901 and MonthId le 201912) and ((CommodityId ge 22080000 and CommodityId le 22089999)) +#> URL 1: https://api.uktradeinfo.com/OTS?$filter=(MonthId ge 201901 and MonthId le 201912) and ((CommodityId ge 22080000 and CommodityId le 22089999)) data #> # A tibble: 23,466 x 39 @@ -180,7 +178,6 @@ data #> # Area1 , Area1a , Area2 , Area2a , Area3 , #> # Area3a , Area5a , CountryId , CountryCodeNumeric , #> # CountryCodeAlpha , CountryName , PortId , ... -#> # i Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names ``` When loading an HS2 code, or a SITC1 or SITC2 code, so-called Below @@ -192,10 +189,10 @@ codes (9-digit CN codes ending in 9999999, or 7-digit SITC codes ending in 99999): ``` r -data <- load_ots(month = c(201901, 201912), commodity = 22, join_lookup = TRUE, print_url = TRUE) +data <- load_ots(month = c(202101, 202103), commodity = "03", join_lookup = TRUE, + print_url = TRUE) #> Loading data via the following URL(s): -#> URL 1: https://api.uktradeinfo.com/OTS?$filter=(FlowTypeId eq 1 or FlowTypeId eq 2 or FlowTypeId eq 3 or FlowTypeId eq 4) and (MonthId ge 201901 and MonthId le 201912) and ((CommodityId ge 22000000 and CommodityId le 22999999) or CommodityId eq 229999999) -#> URL 2: https://api.uktradeinfo.com/OTS?$filter=(FlowTypeId eq 1 or FlowTypeId eq 2 or FlowTypeId eq 3 or FlowTypeId eq 4) and (MonthId ge 201901 and MonthId le 201912) and ((CommodityId ge 22000000 and CommodityId le 22999999) or CommodityId eq 229999999)&$skip=40000 +#> URL 1: https://api.uktradeinfo.com/OTS?$filter=(MonthId ge 202101 and MonthId le 202103) and ((CommodityId ge 03000000 and CommodityId le 03999999) or CommodityId eq 039999999) library(dplyr) #> @@ -209,16 +206,27 @@ library(dplyr) library(stringr) data %>% - filter(str_detect(Hs4Code, "-")) -#> # A tibble: 0 x 39 -#> # ... with 39 variables: MonthId , FlowTypeId , -#> # FlowTypeDescription , SuppressionIndex , SuppressionDesc , -#> # Hs2Code , Hs2Description , Hs4Code , Hs4Description , -#> # Hs6Code , Hs6Description , Cn8Code , -#> # Cn8LongDescription , Sitc1Code , Sitc1Desc , -#> # Sitc2Code , Sitc2Desc , Sitc3Code , Sitc3Desc , -#> # Sitc4Code , Sitc4Desc , Area1 , Area1a , ... -#> # i Use `colnames()` to see all variable names + filter(stringr::str_detect(Sitc4Code, "-")) +#> # A tibble: 54 x 39 +#> MonthId FlowTypeId FlowType~1 Suppr~2 Suppr~3 Hs2Code Hs2De~4 Hs4Code Hs4De~5 +#> +#> 1 202101 1 "EU Impor~ 0 +#> 2 202101 2 "EU Expor~ 0 +#> 3 202102 2 "EU Expor~ 0 +#> 4 202103 2 "EU Expor~ 0 +#> 5 202101 1 "EU Impor~ 0 +#> 6 202102 1 "EU Impor~ 0 +#> 7 202103 1 "EU Impor~ 0 +#> 8 202101 2 "EU Expor~ 0 +#> 9 202102 2 "EU Expor~ 0 +#> 10 202103 2 "EU Expor~ 0 +#> # ... with 44 more rows, 30 more variables: Hs6Code , +#> # Hs6Description , Cn8Code , Cn8LongDescription , +#> # Sitc1Code , Sitc1Desc , Sitc2Code , Sitc2Desc , +#> # Sitc3Code , Sitc3Desc , Sitc4Code , Sitc4Desc , +#> # Area1 , Area1a , Area2 , Area2a , Area3 , +#> # Area3a , Area5a , CountryId , CountryCodeNumeric , +#> # CountryCodeAlpha , CountryName , PortId , ... ``` Specifying `commodity = NULL` and `SITC = NULL` will load all @@ -252,7 +260,6 @@ data #> # Area1 , Area1a , Area2 , Area2a , Area3 , #> # Area3a , Area5a , CountryId , CountryCodeNumeric , #> # CountryCodeAlpha , CountryName , PortId , ... -#> # i Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names ``` We can also use SITC codes - here, we load all beverage (SITC2 code 11) @@ -282,7 +289,6 @@ data #> # Area1 , Area1a , Area2 , Area2a , Area3 , #> # Area3a , Area5a , CountryId , CountryCodeNumeric , #> # CountryCodeAlpha , CountryName , PortId , ... -#> # i Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names ``` Note that SITC codes need to be in character format, and include any @@ -314,7 +320,6 @@ data #> # Area1 , Area1a , Area2 , Area2a , Area3 , #> # Area3a , Area5a , CountryId , CountryCodeNumeric , #> # CountryCodeAlpha , CountryName , PortId , ... -#> # i Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names ``` ### RTS @@ -349,7 +354,6 @@ data #> # CountryCodeNumeric , CountryCodeAlpha , CountryName , #> # Value , NetMass , and abbreviated variable names #> # 1: FlowTypeDescription, 2: Sitc1Code, 3: Sitc1Desc, ... -#> # i Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names ``` Note: where relevant, BTTA data is [included in RTS @@ -384,7 +388,6 @@ data #> # ... with 2 more variables: Cn8LongDescription , Exports , and #> # abbreviated variable names 1: Hs2Description, 2: Hs4Description, #> # 3: Hs6Description, 4: SitcCommodityCode -#> # i Use `colnames()` to see all variable names ``` Note that the variables expanded in the API query, Exports and Trader, @@ -415,7 +418,6 @@ tidyr::unnest(data, Exports, names_repair = "unique") #> # TraderId , CommodityId...12 , MonthId , Trader , and #> # abbreviated variable names 1: CommodityId...1, 2: Hs2Description, #> # 3: Hs4Description, 4: Hs6Description, 5: SitcCommodityCode -#> # i Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names ``` ## MIT License diff --git a/man/load_ots.Rd b/man/load_ots.Rd index 9768696..40e6f6c 100644 --- a/man/load_ots.Rd +++ b/man/load_ots.Rd @@ -6,7 +6,7 @@ \usage{ load_ots( month = NULL, - flow = c(1, 2, 3, 4), + flow = NULL, commodity = NULL, sitc = NULL, country = NULL, diff --git a/man/load_rts.Rd b/man/load_rts.Rd index 549ecbd..5a2a7bc 100644 --- a/man/load_rts.Rd +++ b/man/load_rts.Rd @@ -6,7 +6,7 @@ \usage{ load_rts( month = NULL, - flow = c(1, 2, 3, 4), + flow = NULL, sitc = NULL, country = NULL, region = NULL,