diff --git a/globalprep/np/v2024/R/np_fxn.R b/globalprep/np/v2024/R/np_fxn.R index 31c0caeb..502760eb 100644 --- a/globalprep/np/v2024/R/np_fxn.R +++ b/globalprep/np/v2024/R/np_fxn.R @@ -73,8 +73,6 @@ np_split_antilles <- function(m) { - - # np_harvest_cat <- function(h_tonnes, h_usd) { # ### Merge harvest in tonnes to harvest in USD. # ### * forces 'commodity' variable to character, to avoid issues with @@ -108,17 +106,17 @@ np_harvest_preclip <- function(h) { mutate(no_data = is.na(tonnes) & is.na(usd)) %>% arrange(rgn_id, commodity, no_data, year) %>% mutate( - year_last = max(year, na.rm=T), + year_last = max(year, na.rm = T), ### note: currently year_latest is always most recent year of whole dataset - year_beg = as.integer(ifelse(no_data[1], (year_last + 1), year[1]))) %>% + year_beg = as.integer(ifelse(no_data[1], (year_last + 1), year[1]))) %>% ### Since ordered by (is.na(tonnes) & is.na(usd)) before year, should pickup first non-NA year. ### If no non-NA years, no_data[1] == TRUE, assign year_beg to be beyond the time series. ### Note: The "as.integer" is there to get around an "incompatible types" error. - filter(year>=year_beg) %>% + filter(year >= year_beg) %>% ### eliminates years prior to first reporting - dplyr::select(-year_beg, -year_last, -no_data) %>% + dplyr::select(-c(year_beg, year_last, no_data)) %>% ### cleans up all columns created in this function ungroup() %>% arrange(rgn_id, product, commodity, year) diff --git a/globalprep/np/v2024/STEP1a_np_ornamentals_prep.Rmd b/globalprep/np/v2024/STEP1a_np_ornamentals_prep.Rmd index 28d36d8c..bbe28ae1 100644 --- a/globalprep/np/v2024/STEP1a_np_ornamentals_prep.Rmd +++ b/globalprep/np/v2024/STEP1a_np_ornamentals_prep.Rmd @@ -26,6 +26,7 @@ This analysis converts FAO commodities data into data layers used to calculate O - New year of FAO data (2022)\ - Updated links to datasets\ - Updated file paths to be more reproducible, enable cross-platform capability (not dependent on direction of directory separator) +- Updated data cleaning to follow tidyverse style, use of `janitor::clean_names()` ### v2023 New year of FAO data (2021). Replaced deprecated functions (`replace_at()`, `spread()`, `gather()`) @@ -52,28 +53,42 @@ New year of FAO data (2021). Replaced deprecated functions (`replace_at()`, `spr ```{r setup, warning=FALSE, message=FALSE} -knitr::opts_chunk$set(eval=FALSE) - -## load libraries, set directories -library(ohicore) #devtools::install_github('ohi-science/ohicore@dev') -library(dplyr) -library(stringr) -library(tidyr) -library(zoo) -library(ggplot2) -library(here) -library(tidyverse) -library(plotly) +knitr::opts_chunk$set(eval = FALSE) +# ======= Load packages ============ +if (!require(librarian)) { + install.packages("librarian") + library(librarian) +} +librarian::shelf( + ohicore, #devtools::install_github('ohi-science/ohicore@dev') + dplyr, + stringr, + tidyr, + here, + tidyverse, + zoo, + ggplot2, + plotly, + tictoc, + RColorBrewer +) + +# ======= Set directories =========== # Update scenario year, set up programmatic scenario year updating -scen_year_number <- 2023 +scen_year_number <- 2024 scen_year <- as.character(scen_year_number) prev_scen_year <- as.character(scen_year_number - 1) +data_dir_year <- paste0("d", scen_year) +prev_data_dir_year <- paste0("d", prev_scen_year) +v_scen_year <- paste0("v", scen_year) + +current_np_dir <- here::here("globalprep", "np", v_scen_year) ## Load FAO-specific user-defined functions -source(here('workflow/R/fao_fxn.R')) # function for cleaning FAO files -source(here('workflow/R/common.R')) # directory locations -source(here(paste0("globalprep/np/v", scen_year,"/R/np_fxn.R"))) +source(here::here("workflow", "R", "fao_fxn.R")) # function for cleaning FAO files +source(here::here("workflow", "R", "common.R")) # directory locations +source(here::here(current_np_dir, "R", "np_fxn.R")) # function for handling FAO commodity data specific to NP ``` @@ -82,59 +97,68 @@ source(here(paste0("globalprep/np/v", scen_year,"/R/np_fxn.R"))) Simultaneously read and process FAO commodities value and quantity data. ```{r} - ## NOTE: This can be run as a loop, but the "value" and "quant" datasets need to be run individually to make sure ## there are no problems (after this check, they can be looped for efficiency) ## list files included in d2020 folder (value and quant datasets) -dir_fao_data <- file.path(dir_M, paste0('git-annex/globalprep/_raw_data/FAO_commodities/d', scen_year)) -files <- list.files(dir_fao_data, pattern=glob2rx('*.csv'), full.names=TRUE) +#dir_fao_data <- file.path(dir_M, paste0('git-annex/globalprep/_raw_data/FAO_commodities/d', scen_year)) +dir_fao_data <- here::here(dir_M, "git-annex", "globalprep", "_raw_data", "FAO_commodities", data_dir_year) + +files <- list.files(dir_fao_data, pattern = glob2rx('*.csv'), full.names = TRUE) ## To compare to old data: -dir_fao_data_old <- file.path(dir_M, paste0('git-annex/globalprep/_raw_data/FAO_commodities/d', prev_scen_year)) -files_old <- list.files(dir_fao_data_old, pattern=glob2rx('*.csv'), full.names=TRUE) +#dir_fao_data_old <- file.path(dir_M, paste0('git-annex/globalprep/_raw_data/FAO_commodities/d', prev_scen_year)) +dir_fao_data_old <- here::here(dir_M, "git-annex", "globalprep", "_raw_data", "FAO_commodities", prev_data_dir_year) + +files_old <- list.files(dir_fao_data_old, pattern = glob2rx('*.csv'), full.names = TRUE) -## loop +# ===== loop ================ for (f in files){ + f <- files[1] # un-comment and update to test individual files cat(sprintf('\n\n\n====\nfile: %s\n', basename(f))) d <- read.csv(f, check.names = FALSE, strip.white = TRUE, stringsAsFactors = FALSE) # stringsAsFactors=T # checks names syntactically, strips leading and trailing whitespace, prevents conversion of characters to factors + #d_test <- readr::read_csv(f) ## Specifies that units are tonnes if we are reading in the Commodities Quantity data csv, and usd if we are reading in the Commodities Value data csv units <- c('tonnes','usd')[str_detect(f, c('quant','value'))] # detect unit name using lowercase American English + # ---- Preliminary cleaning & tidying ---- ## gather into long format and clean up FAO-specific data foibles - ## warning: attributes are not identical across measure variables; they will be dropped: this is fine + ## warning: attributes are not identical across measure variables; they will be dropped: this is fine (didn't get this warning in v2024 once code was updated) m <- d %>% - dplyr::select(-`Unit (Name)`) %>% - rename(country = `Reporting country (Name)`, - commodity = `Commodity (Name)`, - trade = `Trade flow (Name)`) %>% - rename_with(~ gsub("\\[", "", .)) %>% - rename_with(~ gsub("\\]", "", .)) %>% - pivot_longer(cols = -c(country, commodity, trade, Unit), + janitor::clean_names() %>% + dplyr::select(-c(unit_name)) %>% # "Tonnes – net product weight" == TPW + rename(country = reporting_country_name, + commodity = commodity_name, + trade = trade_flow_name) %>% + rename_with(~ gsub("x", "", .)) %>% # tidy up year column names (clean_names() added "x"s) + pivot_longer(cols = -c(country, commodity, trade, unit), names_to = "year", values_to = "value") - ## Include only the "Exports" data: - ## 2022 - changed from "Export" to "Exports" + # ---- Include only the "Exports" data ---- m <- m %>% filter(trade == "Exports") + # ---- Run fao data cleaning function ---- + # cleans up flags, swaps out FAO-specific codes for analysis m <- m %>% fao_clean_data_new() %>% # swaps out FAO-specific codes. NOTE: optional parameter 'sub_N' can be passed to control how an 'N' code is interpreted. - select(-trade, -Unit) %>% # eliminate 'trade' column + select(-c(trade, unit)) %>% # eliminate 'trade' column arrange(country, commodity, is.na(value), year) + # warning: NAs introduced by coercion - ## Products join: attach product categories from com2prod, and - ## filter out all entries that do not match a product category. + # ---- Products join ---- + ## attach product categories from com2prod, and filter out + ## all entries that do not match a product category. ## Note: commodity_lookup is user-defined function to compare ## commodities in data vs commodities in lookup table - ## load lookup for converting commodities to products - com2prod <- read.csv(here(paste0('globalprep/np/v', scen_year, '/raw/commodities2products_weighting.csv')), na.strings='') + # Load lookup for converting commodities to products + com2prod <- read.csv(here::here(current_np_dir, "raw", "commodities2products_weighting.csv"), na.strings = '') ## version used in 2019: ## read.csv(here('globalprep/np/v2019/raw/commodities2products.csv'), na.strings='') @@ -152,7 +176,7 @@ for (f in files){ ## inner_join will attach product names to matching commodities according to ## lookup table 'com2prod', and eliminate all commodities that do not appear in the lookup table. m <- m %>% - inner_join(com2prod, by='commodity') + inner_join(com2prod, by = 'commodity') ## Special case: user-defined function deals with @@ -164,8 +188,8 @@ for (f in files){ filter(country != "Azerbaijan") m_rgn <- name_2_rgn(df_in = m, - fld_name='country', - flds_unique=c('commodity', 'product', 'year')) + fld_name = 'country', + flds_unique = c('commodity', 'product', 'year')) # v2021 duplicates: [1] "China" "China, Hong Kong SAR" "China, Macao SAR" "Guadeloupe" # [5] "Martinique" "Montenegro" "Russian Federation" "Serbia and Montenegro" diff --git a/globalprep/np/v2024/raw/commodities2products_weighting.csv b/globalprep/np/v2024/raw/commodities2products_weighting.csv index 2b0914bc..8b40b62c 100644 --- a/globalprep/np/v2024/raw/commodities2products_weighting.csv +++ b/globalprep/np/v2024/raw/commodities2products_weighting.csv @@ -42,10 +42,12 @@ sponges,Natural sponges nei sponges,Natural sponges other than raw sponges,Natural sponges raw corals,Coral and the like +corals,Miscellaneous corals and shells shells,Abalone shells shells,Miscellaneous corals and shells shells,Mother of pearl shells shells,Oyster shells shells,Sea snail shells shells,Shells nei -shells,Trochus shells \ No newline at end of file +shells,Trochus shells +shells,Powder and waste of shells \ No newline at end of file