beginning of NP update

OHI-Science · Aug 13, 2024 · 0857eb1 · 0857eb1
1 parent 86d8243
commit 0857eb1
Show file tree

Hide file tree

Showing 3 changed files with 71 additions and 47 deletions.
diff --git a/globalprep/np/v2024/R/np_fxn.R b/globalprep/np/v2024/R/np_fxn.R
@@ -73,8 +73,6 @@ np_split_antilles <- function(m) {
 
 
 
-
-
 # np_harvest_cat <- function(h_tonnes, h_usd) {
 # ### Merge harvest in tonnes to harvest in USD.  
 # ### * forces 'commodity' variable to character, to avoid issues with 
@@ -108,17 +106,17 @@ np_harvest_preclip <- function(h) {
     mutate(no_data = is.na(tonnes) & is.na(usd)) %>%
     arrange(rgn_id, commodity, no_data, year) %>%
     mutate(
-      year_last = max(year, na.rm=T),    
+      year_last = max(year, na.rm = T),    
       ### note: currently year_latest is always most recent year of whole dataset
-      year_beg  = as.integer(ifelse(no_data[1], (year_last + 1), year[1]))) %>%
+      year_beg = as.integer(ifelse(no_data[1], (year_last + 1), year[1]))) %>%
     ### Since ordered by (is.na(tonnes) & is.na(usd)) before year, should pickup first non-NA year.
     ###   If no non-NA years, no_data[1] == TRUE, assign year_beg to be beyond the time series. 
     ### Note: The "as.integer" is there to get around an "incompatible types" error.
 
-    filter(year>=year_beg) %>%
+    filter(year >= year_beg) %>%
     ### eliminates years prior to first reporting
 
-    dplyr::select(-year_beg, -year_last, -no_data) %>%
+    dplyr::select(-c(year_beg, year_last, no_data)) %>%
     ### cleans up all columns created in this function
     ungroup() %>%    
     arrange(rgn_id, product, commodity, year)

diff --git a/globalprep/np/v2024/STEP1a_np_ornamentals_prep.Rmd b/globalprep/np/v2024/STEP1a_np_ornamentals_prep.Rmd
@@ -26,6 +26,7 @@ This analysis converts FAO commodities data into data layers used to calculate O
 - New year of FAO data (2022)\
 - Updated links to datasets\
 - Updated file paths to be more reproducible, enable cross-platform capability (not dependent on direction of directory separator)
+- Updated data cleaning to follow tidyverse style, use of `janitor::clean_names()`
 
 ### v2023
 New year of FAO data (2021). Replaced deprecated functions (`replace_at()`, `spread()`, `gather()`)
@@ -52,28 +53,42 @@ New year of FAO data (2021). Replaced deprecated functions (`replace_at()`, `spr
 
 ```{r setup, warning=FALSE, message=FALSE}
 
-knitr::opts_chunk$set(eval=FALSE)
-
-## load libraries, set directories
-library(ohicore)  #devtools::install_github('ohi-science/ohicore@dev')
-library(dplyr)
-library(stringr)
-library(tidyr)
-library(zoo)  
-library(ggplot2)
-library(here)
-library(tidyverse)
-library(plotly)
+knitr::opts_chunk$set(eval = FALSE)
 
+# ======= Load packages ============
+if (!require(librarian)) {
+  install.packages("librarian")
+  library(librarian)
+}
+librarian::shelf(
+  ohicore, #devtools::install_github('ohi-science/ohicore@dev')
+  dplyr,
+  stringr,
+  tidyr,
+  here,
+  tidyverse,
+  zoo,
+  ggplot2,
+  plotly,
+  tictoc,
+  RColorBrewer
+)
+
+# ======= Set directories ===========
 # Update scenario year, set up programmatic scenario year updating
-scen_year_number <- 2023
+scen_year_number <- 2024
 scen_year <- as.character(scen_year_number)
 prev_scen_year <- as.character(scen_year_number - 1)
+data_dir_year <- paste0("d", scen_year)
+prev_data_dir_year <- paste0("d", prev_scen_year)
+v_scen_year <- paste0("v", scen_year)
+
+current_np_dir <- here::here("globalprep", "np", v_scen_year)
 
 ## Load FAO-specific user-defined functions
-source(here('workflow/R/fao_fxn.R')) # function for cleaning FAO files
-source(here('workflow/R/common.R')) # directory locations
-source(here(paste0("globalprep/np/v", scen_year,"/R/np_fxn.R")))
+source(here::here("workflow", "R", "fao_fxn.R")) # function for cleaning FAO files
+source(here::here("workflow", "R", "common.R")) # directory locations
+source(here::here(current_np_dir, "R", "np_fxn.R")) # function for handling FAO commodity data specific to NP
 
 ```
 
@@ -82,59 +97,68 @@ source(here(paste0("globalprep/np/v", scen_year,"/R/np_fxn.R")))
 Simultaneously read and process FAO commodities value and quantity data.
 
 ```{r}
-
 ## NOTE: This can be run as a loop, but the "value" and "quant" datasets need to be run individually to make sure
 ##  there are no problems (after this check, they can be looped for efficiency)
 
 
 ## list files included in d2020 folder (value and quant datasets)
-dir_fao_data <- file.path(dir_M, paste0('git-annex/globalprep/_raw_data/FAO_commodities/d', scen_year))
-files <- list.files(dir_fao_data, pattern=glob2rx('*.csv'), full.names=TRUE)
+#dir_fao_data <- file.path(dir_M, paste0('git-annex/globalprep/_raw_data/FAO_commodities/d', scen_year))
+dir_fao_data <- here::here(dir_M, "git-annex", "globalprep", "_raw_data", "FAO_commodities", data_dir_year)
+
+files <- list.files(dir_fao_data, pattern = glob2rx('*.csv'), full.names = TRUE)
 
 ## To compare to old data:
-dir_fao_data_old <- file.path(dir_M, paste0('git-annex/globalprep/_raw_data/FAO_commodities/d', prev_scen_year))
-files_old <- list.files(dir_fao_data_old, pattern=glob2rx('*.csv'), full.names=TRUE)
+#dir_fao_data_old <- file.path(dir_M, paste0('git-annex/globalprep/_raw_data/FAO_commodities/d', prev_scen_year))
+dir_fao_data_old <- here::here(dir_M, "git-annex", "globalprep", "_raw_data", "FAO_commodities", prev_data_dir_year)
+
+files_old <- list.files(dir_fao_data_old, pattern = glob2rx('*.csv'), full.names = TRUE)
 
-## loop
+# ===== loop ================
 for (f in files){
+  f <- files[1] # un-comment and update to test individual files
   cat(sprintf('\n\n\n====\nfile: %s\n', basename(f)))
   
   d <- read.csv(f, check.names = FALSE, strip.white = TRUE, stringsAsFactors = FALSE) # stringsAsFactors=T
   # checks names syntactically, strips leading and trailing whitespace, prevents conversion of characters to factors 
+  #d_test <- readr::read_csv(f)
   
   ## Specifies that units are tonnes if we are reading in the Commodities Quantity data csv, and usd if we are reading in the Commodities Value data csv
   units <- c('tonnes','usd')[str_detect(f, c('quant','value'))] # detect unit name using lowercase American English
 
+    # ---- Preliminary cleaning & tidying ----
   ## gather into long format and clean up FAO-specific data foibles
-  ## warning: attributes are not identical across measure variables; they will be dropped: this is fine
+  ## warning: attributes are not identical across measure variables; they will be dropped: this is fine (didn't get this warning in v2024 once code was updated)
   m <- d %>% 
-    dplyr::select(-`Unit (Name)`) %>%
-    rename(country   = `Reporting country (Name)`,
-           commodity = `Commodity (Name)`,
-           trade     = `Trade flow (Name)`) %>%
-    rename_with(~ gsub("\\[", "", .)) %>%
-    rename_with(~ gsub("\\]", "", .)) %>%
-    pivot_longer(cols = -c(country, commodity, trade, Unit),
+    janitor::clean_names() %>% 
+    dplyr::select(-c(unit_name)) %>% # "Tonnes – net product weight" == TPW
+    rename(country = reporting_country_name,
+           commodity = commodity_name,
+           trade = trade_flow_name) %>%
+    rename_with(~ gsub("x", "", .)) %>% # tidy up year column names (clean_names() added "x"s)
+    pivot_longer(cols = -c(country, commodity, trade, unit),
                    names_to = "year", values_to = "value") 
   
-  ## Include only the "Exports" data:
-  ## 2022 - changed from "Export" to "Exports"
+  # ---- Include only the "Exports" data ----
   m <- m %>%
     filter(trade == "Exports")
 
+  # ---- Run fao data cleaning function ----
+  # cleans up flags, swaps out FAO-specific codes for analysis
   m <- m %>%
     fao_clean_data_new() %>%  # swaps out FAO-specific codes. NOTE: optional parameter 'sub_N' can be passed to control how an 'N' code is interpreted.
-    select(-trade, -Unit) %>% # eliminate 'trade' column
+    select(-c(trade, unit)) %>% # eliminate 'trade' column
   arrange(country, commodity, is.na(value), year)
+  # warning: NAs introduced by coercion
 
   
-  ## Products join: attach product categories from com2prod, and
-  ##   filter out all entries that do not match a product category.
+  # ---- Products join ----
+  ## attach product categories from com2prod, and filter out 
+  ## all entries that do not match a product category.
   ## Note: commodity_lookup is user-defined function to compare 
   ##   commodities in data vs commodities in lookup table
   
-  ## load lookup for converting commodities to products
-  com2prod <- read.csv(here(paste0('globalprep/np/v', scen_year, '/raw/commodities2products_weighting.csv')), na.strings='')
+  # Load lookup for converting commodities to products
+  com2prod <- read.csv(here::here(current_np_dir, "raw", "commodities2products_weighting.csv"), na.strings = '')
   
   ## version used in 2019:
   ##    read.csv(here('globalprep/np/v2019/raw/commodities2products.csv'), na.strings='')
@@ -152,7 +176,7 @@ for (f in files){
   ## inner_join will attach product names to matching commodities according to
   ##    lookup table 'com2prod', and eliminate all commodities that do not appear in the lookup table.
   m <- m %>%
-      inner_join(com2prod, by='commodity')
+      inner_join(com2prod, by = 'commodity')
     
     
   ## Special case: user-defined function deals with 
@@ -164,8 +188,8 @@ for (f in files){
     filter(country != "Azerbaijan") 
 
   m_rgn <- name_2_rgn(df_in = m,
-                      fld_name='country', 
-                      flds_unique=c('commodity', 'product', 'year'))
+                      fld_name = 'country', 
+                      flds_unique = c('commodity', 'product', 'year'))
     
 # v2021 duplicates:  [1] "China"                 "China, Hong Kong SAR"  "China, Macao SAR"      "Guadeloupe"           
 # [5] "Martinique"            "Montenegro"            "Russian Federation"    "Serbia and Montenegro"

diff --git a/globalprep/np/v2024/raw/commodities2products_weighting.csv b/globalprep/np/v2024/raw/commodities2products_weighting.csv
@@ -42,10 +42,12 @@ sponges,Natural sponges nei
 sponges,Natural sponges other than raw
 sponges,Natural sponges raw
 corals,Coral and the like
+corals,Miscellaneous corals and shells
 shells,Abalone shells
 shells,Miscellaneous corals and shells
 shells,Mother of pearl shells
 shells,Oyster shells
 shells,Sea snail shells
 shells,Shells nei
-shells,Trochus shells
+shells,Trochus shells
+shells,Powder and waste of shells