Skip to content

Commit

Permalink
beginning of NP update
Browse files Browse the repository at this point in the history
  • Loading branch information
annaramji committed Aug 13, 2024
1 parent 86d8243 commit 0857eb1
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 47 deletions.
10 changes: 4 additions & 6 deletions globalprep/np/v2024/R/np_fxn.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,6 @@ np_split_antilles <- function(m) {





# np_harvest_cat <- function(h_tonnes, h_usd) {
# ### Merge harvest in tonnes to harvest in USD.
# ### * forces 'commodity' variable to character, to avoid issues with
Expand Down Expand Up @@ -108,17 +106,17 @@ np_harvest_preclip <- function(h) {
mutate(no_data = is.na(tonnes) & is.na(usd)) %>%
arrange(rgn_id, commodity, no_data, year) %>%
mutate(
year_last = max(year, na.rm=T),
year_last = max(year, na.rm = T),
### note: currently year_latest is always most recent year of whole dataset
year_beg = as.integer(ifelse(no_data[1], (year_last + 1), year[1]))) %>%
year_beg = as.integer(ifelse(no_data[1], (year_last + 1), year[1]))) %>%
### Since ordered by (is.na(tonnes) & is.na(usd)) before year, should pickup first non-NA year.
### If no non-NA years, no_data[1] == TRUE, assign year_beg to be beyond the time series.
### Note: The "as.integer" is there to get around an "incompatible types" error.

filter(year>=year_beg) %>%
filter(year >= year_beg) %>%
### eliminates years prior to first reporting

dplyr::select(-year_beg, -year_last, -no_data) %>%
dplyr::select(-c(year_beg, year_last, no_data)) %>%
### cleans up all columns created in this function
ungroup() %>%
arrange(rgn_id, product, commodity, year)
Expand Down
104 changes: 64 additions & 40 deletions globalprep/np/v2024/STEP1a_np_ornamentals_prep.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ This analysis converts FAO commodities data into data layers used to calculate O
- New year of FAO data (2022)\
- Updated links to datasets\
- Updated file paths to be more reproducible, enable cross-platform capability (not dependent on direction of directory separator)
- Updated data cleaning to follow tidyverse style, use of `janitor::clean_names()`

### v2023
New year of FAO data (2021). Replaced deprecated functions (`replace_at()`, `spread()`, `gather()`)
Expand All @@ -52,28 +53,42 @@ New year of FAO data (2021). Replaced deprecated functions (`replace_at()`, `spr

```{r setup, warning=FALSE, message=FALSE}
knitr::opts_chunk$set(eval=FALSE)
## load libraries, set directories
library(ohicore) #devtools::install_github('ohi-science/ohicore@dev')
library(dplyr)
library(stringr)
library(tidyr)
library(zoo)
library(ggplot2)
library(here)
library(tidyverse)
library(plotly)
knitr::opts_chunk$set(eval = FALSE)
# ======= Load packages ============
if (!require(librarian)) {
install.packages("librarian")
library(librarian)
}
librarian::shelf(
ohicore, #devtools::install_github('ohi-science/ohicore@dev')
dplyr,
stringr,
tidyr,
here,
tidyverse,
zoo,
ggplot2,
plotly,
tictoc,
RColorBrewer
)
# ======= Set directories ===========
# Update scenario year, set up programmatic scenario year updating
scen_year_number <- 2023
scen_year_number <- 2024
scen_year <- as.character(scen_year_number)
prev_scen_year <- as.character(scen_year_number - 1)
data_dir_year <- paste0("d", scen_year)
prev_data_dir_year <- paste0("d", prev_scen_year)
v_scen_year <- paste0("v", scen_year)
current_np_dir <- here::here("globalprep", "np", v_scen_year)
## Load FAO-specific user-defined functions
source(here('workflow/R/fao_fxn.R')) # function for cleaning FAO files
source(here('workflow/R/common.R')) # directory locations
source(here(paste0("globalprep/np/v", scen_year,"/R/np_fxn.R")))
source(here::here("workflow", "R", "fao_fxn.R")) # function for cleaning FAO files
source(here::here("workflow", "R", "common.R")) # directory locations
source(here::here(current_np_dir, "R", "np_fxn.R")) # function for handling FAO commodity data specific to NP
```

Expand All @@ -82,59 +97,68 @@ source(here(paste0("globalprep/np/v", scen_year,"/R/np_fxn.R")))
Simultaneously read and process FAO commodities value and quantity data.

```{r}
## NOTE: This can be run as a loop, but the "value" and "quant" datasets need to be run individually to make sure
## there are no problems (after this check, they can be looped for efficiency)
## list files included in d2020 folder (value and quant datasets)
dir_fao_data <- file.path(dir_M, paste0('git-annex/globalprep/_raw_data/FAO_commodities/d', scen_year))
files <- list.files(dir_fao_data, pattern=glob2rx('*.csv'), full.names=TRUE)
#dir_fao_data <- file.path(dir_M, paste0('git-annex/globalprep/_raw_data/FAO_commodities/d', scen_year))
dir_fao_data <- here::here(dir_M, "git-annex", "globalprep", "_raw_data", "FAO_commodities", data_dir_year)
files <- list.files(dir_fao_data, pattern = glob2rx('*.csv'), full.names = TRUE)
## To compare to old data:
dir_fao_data_old <- file.path(dir_M, paste0('git-annex/globalprep/_raw_data/FAO_commodities/d', prev_scen_year))
files_old <- list.files(dir_fao_data_old, pattern=glob2rx('*.csv'), full.names=TRUE)
#dir_fao_data_old <- file.path(dir_M, paste0('git-annex/globalprep/_raw_data/FAO_commodities/d', prev_scen_year))
dir_fao_data_old <- here::here(dir_M, "git-annex", "globalprep", "_raw_data", "FAO_commodities", prev_data_dir_year)
files_old <- list.files(dir_fao_data_old, pattern = glob2rx('*.csv'), full.names = TRUE)
## loop
# ===== loop ================
for (f in files){
f <- files[1] # un-comment and update to test individual files
cat(sprintf('\n\n\n====\nfile: %s\n', basename(f)))
d <- read.csv(f, check.names = FALSE, strip.white = TRUE, stringsAsFactors = FALSE) # stringsAsFactors=T
# checks names syntactically, strips leading and trailing whitespace, prevents conversion of characters to factors
#d_test <- readr::read_csv(f)
## Specifies that units are tonnes if we are reading in the Commodities Quantity data csv, and usd if we are reading in the Commodities Value data csv
units <- c('tonnes','usd')[str_detect(f, c('quant','value'))] # detect unit name using lowercase American English
# ---- Preliminary cleaning & tidying ----
## gather into long format and clean up FAO-specific data foibles
## warning: attributes are not identical across measure variables; they will be dropped: this is fine
## warning: attributes are not identical across measure variables; they will be dropped: this is fine (didn't get this warning in v2024 once code was updated)
m <- d %>%
dplyr::select(-`Unit (Name)`) %>%
rename(country = `Reporting country (Name)`,
commodity = `Commodity (Name)`,
trade = `Trade flow (Name)`) %>%
rename_with(~ gsub("\\[", "", .)) %>%
rename_with(~ gsub("\\]", "", .)) %>%
pivot_longer(cols = -c(country, commodity, trade, Unit),
janitor::clean_names() %>%
dplyr::select(-c(unit_name)) %>% # "Tonnes – net product weight" == TPW
rename(country = reporting_country_name,
commodity = commodity_name,
trade = trade_flow_name) %>%
rename_with(~ gsub("x", "", .)) %>% # tidy up year column names (clean_names() added "x"s)
pivot_longer(cols = -c(country, commodity, trade, unit),
names_to = "year", values_to = "value")
## Include only the "Exports" data:
## 2022 - changed from "Export" to "Exports"
# ---- Include only the "Exports" data ----
m <- m %>%
filter(trade == "Exports")
# ---- Run fao data cleaning function ----
# cleans up flags, swaps out FAO-specific codes for analysis
m <- m %>%
fao_clean_data_new() %>% # swaps out FAO-specific codes. NOTE: optional parameter 'sub_N' can be passed to control how an 'N' code is interpreted.
select(-trade, -Unit) %>% # eliminate 'trade' column
select(-c(trade, unit)) %>% # eliminate 'trade' column
arrange(country, commodity, is.na(value), year)
# warning: NAs introduced by coercion
## Products join: attach product categories from com2prod, and
## filter out all entries that do not match a product category.
# ---- Products join ----
## attach product categories from com2prod, and filter out
## all entries that do not match a product category.
## Note: commodity_lookup is user-defined function to compare
## commodities in data vs commodities in lookup table
## load lookup for converting commodities to products
com2prod <- read.csv(here(paste0('globalprep/np/v', scen_year, '/raw/commodities2products_weighting.csv')), na.strings='')
# Load lookup for converting commodities to products
com2prod <- read.csv(here::here(current_np_dir, "raw", "commodities2products_weighting.csv"), na.strings = '')
## version used in 2019:
## read.csv(here('globalprep/np/v2019/raw/commodities2products.csv'), na.strings='')
Expand All @@ -152,7 +176,7 @@ for (f in files){
## inner_join will attach product names to matching commodities according to
## lookup table 'com2prod', and eliminate all commodities that do not appear in the lookup table.
m <- m %>%
inner_join(com2prod, by='commodity')
inner_join(com2prod, by = 'commodity')
## Special case: user-defined function deals with
Expand All @@ -164,8 +188,8 @@ for (f in files){
filter(country != "Azerbaijan")
m_rgn <- name_2_rgn(df_in = m,
fld_name='country',
flds_unique=c('commodity', 'product', 'year'))
fld_name = 'country',
flds_unique = c('commodity', 'product', 'year'))
# v2021 duplicates: [1] "China" "China, Hong Kong SAR" "China, Macao SAR" "Guadeloupe"
# [5] "Martinique" "Montenegro" "Russian Federation" "Serbia and Montenegro"
Expand Down
4 changes: 3 additions & 1 deletion globalprep/np/v2024/raw/commodities2products_weighting.csv
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,12 @@ sponges,Natural sponges nei
sponges,Natural sponges other than raw
sponges,Natural sponges raw
corals,Coral and the like
corals,Miscellaneous corals and shells
shells,Abalone shells
shells,Miscellaneous corals and shells
shells,Mother of pearl shells
shells,Oyster shells
shells,Sea snail shells
shells,Shells nei
shells,Trochus shells
shells,Trochus shells
shells,Powder and waste of shells

0 comments on commit 0857eb1

Please sign in to comment.