From bc5725fa3379e54eb0c7100351f6c66bab2f1434 Mon Sep 17 00:00:00 2001 From: Bruce Delo Date: Wed, 10 Apr 2024 10:49:51 -0300 Subject: [PATCH 1/2] Added rollup function. --- R/rollup.R | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 R/rollup.R diff --git a/R/rollup.R b/R/rollup.R new file mode 100644 index 0000000..14bb147 --- /dev/null +++ b/R/rollup.R @@ -0,0 +1,63 @@ +#' @title Join output from Remora back onto its OTN detection extract. +#' +#' @description Take two parameters- an OTN detection extract and the output created by Remora on parsing that detection extract- and merge them back together such that the Remora QC columns are appended to the OTN extract, +#' preserving appropriate ordering and getting back all the OTN data. This function exists because, to get OTN data into Remora, we have to cut it up until it looks like IMOS data (this problem was the genesis of Surimi, in fact). +#' But that means the output from Remora has all IMOS-formatted columns and is missing some information, because we either had to discard it to get into IMOS format or because we can't re-synthesize it from what's in the IMOS +#' files. However, we do have enough information to join the two tables, thereby obviating the data loss problem by taking us all the way back to the original data, with a little something extra attached. +#' +#' @param detection_extract Path to an OTN detection extract corresponding to the remora output in the second parameter. +#' @param remora_output Path to Remora's QC output corresponding to the OTN detection extract in the first parameter. +#' +#' @return The OTN detection extract, but with the remora QC attached as appropriate. +#' +#' @importFrom dplyr '%>%' mutate rename left_join +#' @importFrom tidyr unite separate +#' @importFrom lubridate ymd_hms +#' @export +#' +# + +# For what it's worth, I ad a lot of ideas for names for this function, including 'bento' (because it pairs the output of 'surimi' with something else), or +# 'mcfly' (because it takes you back to the starting point of your adventure made whole but nonetheless irrevocably altered) but those are a little obscure and if I'm being honest, I have little patience +# for that kind of tweeness. Even 'rollup' (referring to making a sushi roll, in the same vein as 'bento') is a little much. + +rollup <- function(detection_extract, remora_output) { + #Read in the two dataframes. + otn_dets <- read.csv(detection_extract) + remora_out <- read.csv(remora_output) + + #Select the appropriate columns from remora_output. + remora_to_merge <- remora_out %>% + dplyr::select( + transmitter_id, + tag_id, + detection_datetime, + receiver_id, + ends_with("_QC") + ) + + #Get the dates into the same format for comparison. + otn_dets <- otn_dets %>% + mutate( + datecollected = ymd_hms(datecollected) + ) + + remora_to_merge <- remora_to_merge %>% + mutate( + detection_datetime = ymd_hms(detection_datetime) + ) + + #Join them to otn_dets + otn_det_output <- left_join( + otn_dets, + remora_to_merge, + by = join_by( + tagname == transmitter_id, + catalognumber == tag_id, + datecollected == detection_datetime, + receiver == receiver_id + )) + + #Return the merged columns. + return(otn_det_output) +} \ No newline at end of file From a28b42726b8f0e8900828c2a1c6f441f6a06a395 Mon Sep 17 00:00:00 2001 From: jackVanish Date: Wed, 10 Apr 2024 13:51:17 +0000 Subject: [PATCH 2/2] Style code (GHA) --- R/rollup.R | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/R/rollup.R b/R/rollup.R index 14bb147..cd9fb84 100644 --- a/R/rollup.R +++ b/R/rollup.R @@ -1,14 +1,14 @@ -#' @title Join output from Remora back onto its OTN detection extract. +#' @title Join output from Remora back onto its OTN detection extract. #' #' @description Take two parameters- an OTN detection extract and the output created by Remora on parsing that detection extract- and merge them back together such that the Remora QC columns are appended to the OTN extract, #' preserving appropriate ordering and getting back all the OTN data. This function exists because, to get OTN data into Remora, we have to cut it up until it looks like IMOS data (this problem was the genesis of Surimi, in fact). #' But that means the output from Remora has all IMOS-formatted columns and is missing some information, because we either had to discard it to get into IMOS format or because we can't re-synthesize it from what's in the IMOS #' files. However, we do have enough information to join the two tables, thereby obviating the data loss problem by taking us all the way back to the original data, with a little something extra attached. #' -#' @param detection_extract Path to an OTN detection extract corresponding to the remora output in the second parameter. -#' @param remora_output Path to Remora's QC output corresponding to the OTN detection extract in the first parameter. +#' @param detection_extract Path to an OTN detection extract corresponding to the remora output in the second parameter. +#' @param remora_output Path to Remora's QC output corresponding to the OTN detection extract in the first parameter. #' -#' @return The OTN detection extract, but with the remora QC attached as appropriate. +#' @return The OTN detection extract, but with the remora QC attached as appropriate. #' #' @importFrom dplyr '%>%' mutate rename left_join #' @importFrom tidyr unite separate @@ -17,17 +17,17 @@ #' # -# For what it's worth, I ad a lot of ideas for names for this function, including 'bento' (because it pairs the output of 'surimi' with something else), or +# For what it's worth, I ad a lot of ideas for names for this function, including 'bento' (because it pairs the output of 'surimi' with something else), or # 'mcfly' (because it takes you back to the starting point of your adventure made whole but nonetheless irrevocably altered) but those are a little obscure and if I'm being honest, I have little patience # for that kind of tweeness. Even 'rollup' (referring to making a sushi roll, in the same vein as 'bento') is a little much. rollup <- function(detection_extract, remora_output) { - #Read in the two dataframes. + # Read in the two dataframes. otn_dets <- read.csv(detection_extract) remora_out <- read.csv(remora_output) - - #Select the appropriate columns from remora_output. - remora_to_merge <- remora_out %>% + + # Select the appropriate columns from remora_output. + remora_to_merge <- remora_out %>% dplyr::select( transmitter_id, tag_id, @@ -35,29 +35,30 @@ rollup <- function(detection_extract, remora_output) { receiver_id, ends_with("_QC") ) - - #Get the dates into the same format for comparison. + + # Get the dates into the same format for comparison. otn_dets <- otn_dets %>% mutate( datecollected = ymd_hms(datecollected) ) - + remora_to_merge <- remora_to_merge %>% mutate( detection_datetime = ymd_hms(detection_datetime) ) - - #Join them to otn_dets + + # Join them to otn_dets otn_det_output <- left_join( - otn_dets, - remora_to_merge, + otn_dets, + remora_to_merge, by = join_by( tagname == transmitter_id, catalognumber == tag_id, datecollected == detection_datetime, receiver == receiver_id - )) - - #Return the merged columns. + ) + ) + + # Return the merged columns. return(otn_det_output) -} \ No newline at end of file +}