From 9c47f3eaff0cb7c9540e62cb1cb1420803573c15 Mon Sep 17 00:00:00 2001 From: thomaszwagerman Date: Tue, 29 Oct 2024 08:57:37 +0000 Subject: [PATCH 1/5] inherit params and add ... for other waldo arguments --- R/catch.R | 7 +++---- R/create_object_list.R | 11 +++++++++-- R/loupe.R | 7 +++---- R/release.R | 7 +++---- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/R/catch.R b/R/catch.R index e788549..ef43f95 100644 --- a/R/catch.R +++ b/R/catch.R @@ -7,9 +7,7 @@ #' #' The underlying functionality is handled by `create_object_list()`. #' -#' @param df_current data.frame, the newest/current version of dataset x. -#' @param df_previous data.frame, the old version of dataset, for example x - t1. -#' @param datetime_variable character, which variable to use as unique ID to join `df_current` and `df_previous`. Usually a "datetime" variable. +#' @inheritParams create_object_list #' #' @returns A dataframe which contains only rows of `df_current` that have changes from `df_previous`, but without new rows. #' also returns a waldo object as in `loupe()`. @@ -32,7 +30,8 @@ catch <- function(df_current, df_previous, datetime_variable) { butterfly_object_list <- create_object_list( df_current, df_previous, - datetime_variable + datetime_variable, + ... ) # By using an inner join, we drop any row which does not match in diff --git a/R/create_object_list.R b/R/create_object_list.R index df4f32b..ef5edad 100644 --- a/R/create_object_list.R +++ b/R/create_object_list.R @@ -18,6 +18,7 @@ #' @param df_previous data.frame, the old version of dataset, for example x - t1. #' @param datetime_variable string, which variable to use as unique ID to join #' `df_current` and `df_previous`. Usually a "datetime" variable. +#' @param ... Other `waldo::compare()` arguments are supported. #' #' @returns A list containing boolean where TRUE indicates no changes to #' previous data and FALSE indicates unexpected changes, a dataframe of @@ -33,7 +34,12 @@ #' butterfly_object_list #' #' @export -create_object_list <- function(df_current, df_previous, datetime_variable) { +create_object_list <- function( + df_current, + df_previous, + datetime_variable, + ... + ) { # Check input is as expected stopifnot("`df_current` must be a data.frame" = is.data.frame(df_current)) stopifnot("`df_previous` must be a data.frame" = is.data.frame(df_previous)) @@ -70,7 +76,8 @@ create_object_list <- function(df_current, df_previous, datetime_variable) { # Compare the current data with the previous data, without "new" values waldo_object <- waldo::compare( df_current_without_new_row, - df_previous + df_previous, + ... ) # Creating a feedback message depending on the waldo object's output diff --git a/R/loupe.R b/R/loupe.R index a50a1c0..1c1c163 100644 --- a/R/loupe.R +++ b/R/loupe.R @@ -20,9 +20,7 @@ #' #' The underlying functionality is handled by `create_object_list()`. #' -#' @param df_current data.frame, the newest/current version of dataset x. -#' @param df_previous data.frame, the old version of dataset, for example x - t1. -#' @param datetime_variable string, which variable to use as unique ID to join `df_current` and `df_previous`. Usually a "datetime" variable. +#' @inheritParams create_object_list #' #' @returns A boolean where TRUE indicates no changes to previous data and FALSE indicates unexpected changes. #' @@ -49,7 +47,8 @@ loupe <- function(df_current, df_previous, datetime_variable) { butterfly_object_list <- create_object_list( df_current, df_previous, - datetime_variable + datetime_variable, + ... ) return(butterfly_object_list$butterfly_status) diff --git a/R/release.R b/R/release.R index fa52827..533af4b 100644 --- a/R/release.R +++ b/R/release.R @@ -5,9 +5,7 @@ #' which contains the new rows (if present) but matched rows which contain #' changes from previous data will be dropped. #' -#' @param df_current data.frame, the newest/current version of dataset x. -#' @param df_previous data.frame, the old version of dataset, for example x - t1. -#' @param datetime_variable string, which variable to use as unique ID to join `df_current` and `df_previous`. Usually a "datetime" variable. +#' @inheritParams create_object_list #' @param include_new boolean, should new rows be included? Default is TRUE. #' #' @returns A dataframe which contains only rows of `df_current` that have not changed from `df_previous`, and includes new rows. @@ -32,7 +30,8 @@ release <- function(df_current, df_previous, datetime_variable, include_new = TR butterfly_object_list <- create_object_list( df_current, df_previous, - datetime_variable + datetime_variable, + ... ) # By using an inner join, we drop any row which does not match in From 638838f30ebd7eabfd864ffad76eaa58db0bba72 Mon Sep 17 00:00:00 2001 From: thomaszwagerman Date: Tue, 29 Oct 2024 09:21:30 +0000 Subject: [PATCH 2/5] properly implement waldo compare inheritance --- R/catch.R | 4 ++-- R/create_object_list.R | 12 +++++++++++- R/loupe.R | 2 +- R/release.R | 2 +- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/R/catch.R b/R/catch.R index ef43f95..a6f738f 100644 --- a/R/catch.R +++ b/R/catch.R @@ -26,13 +26,13 @@ #' df_caught #' #' @export -catch <- function(df_current, df_previous, datetime_variable) { +catch <- function(df_current, df_previous, datetime_variable, ...) { butterfly_object_list <- create_object_list( df_current, df_previous, datetime_variable, ... - ) + ) # By using an inner join, we drop any row which does not match in # df_previous. diff --git a/R/create_object_list.R b/R/create_object_list.R index ef5edad..ed1907a 100644 --- a/R/create_object_list.R +++ b/R/create_object_list.R @@ -18,7 +18,7 @@ #' @param df_previous data.frame, the old version of dataset, for example x - t1. #' @param datetime_variable string, which variable to use as unique ID to join #' `df_current` and `df_previous`. Usually a "datetime" variable. -#' @param ... Other `waldo::compare()` arguments are supported. +#' @param ... Other `waldo::compare()` arguments can be supplied here. #' #' @returns A list containing boolean where TRUE indicates no changes to #' previous data and FALSE indicates unexpected changes, a dataframe of @@ -33,6 +33,16 @@ #' #' butterfly_object_list #' +#' # You can pass other `waldo::compare()` options such as tolerance here +#' butterfly_object_list <- butterfly::create_object_list( +#' butterflycount$march, # This is your new or current dataset +#' butterflycount$february, # This is the previous version you are comparing it to +#' datetime_variable = "time", # This is the unique ID variable they have in common +#' tolerance = 2 +#' ) +#' +#' butterfly_object_list +#' #' @export create_object_list <- function( df_current, diff --git a/R/loupe.R b/R/loupe.R index 1c1c163..93105a1 100644 --- a/R/loupe.R +++ b/R/loupe.R @@ -43,7 +43,7 @@ #' ) #' #' @export -loupe <- function(df_current, df_previous, datetime_variable) { +loupe <- function(df_current, df_previous, datetime_variable, ...) { butterfly_object_list <- create_object_list( df_current, df_previous, diff --git a/R/release.R b/R/release.R index 533af4b..a07a904 100644 --- a/R/release.R +++ b/R/release.R @@ -26,7 +26,7 @@ #' df_released #' #' @export -release <- function(df_current, df_previous, datetime_variable, include_new = TRUE) { +release <- function(df_current, df_previous, datetime_variable, include_new = TRUE, ...) { butterfly_object_list <- create_object_list( df_current, df_previous, From 8dd3bf973d4a2701874b5d1b3ff9a9914f794415 Mon Sep 17 00:00:00 2001 From: thomaszwagerman Date: Tue, 29 Oct 2024 10:19:35 +0000 Subject: [PATCH 3/5] reknitted documents, adding test to check extra arguments work --- R/catch.R | 41 ++++++++------ R/create_object_list.R | 3 +- R/release.R | 71 ++++++++++++++---------- man/butterfly-package.Rd | 2 +- man/catch.Rd | 8 ++- man/create_object_list.Rd | 15 ++++- man/loupe.Rd | 8 ++- man/release.Rd | 8 ++- tests/testthat/test-create_object_list.R | 14 +++++ 9 files changed, 115 insertions(+), 55 deletions(-) diff --git a/R/catch.R b/R/catch.R index a6f738f..d0a416c 100644 --- a/R/catch.R +++ b/R/catch.R @@ -32,25 +32,34 @@ catch <- function(df_current, df_previous, datetime_variable, ...) { df_previous, datetime_variable, ... - ) + ) - # By using an inner join, we drop any row which does not match in - # df_previous. - df_rows_changed_from_previous <- suppressMessages( - dplyr::anti_join( - butterfly_object_list$df_current_without_new_row, - df_previous + if (butterfly_object_list$butterfly_status == TRUE) { + cli::cat_bullet( + "There are no differences, so there are no rows to return Did you specify a tolerance that exceeds number of differences?", + bullet = "info", + col = "orange", + bullet_col = "orange" + ) + } else { + # By using an inner join, we drop any row which does not match in + # df_previous. + df_rows_changed_from_previous <- suppressMessages( + dplyr::anti_join( + butterfly_object_list$df_current_without_new_row, + df_previous + ) ) - ) - cli::cat_line() + cli::cat_line() - cli::cat_bullet( - "Only these rows are returned.", - bullet = "info", - col = "orange", - bullet_col = "orange" - ) + cli::cat_bullet( + "Only these rows are returned.", + bullet = "info", + col = "orange", + bullet_col = "orange" + ) - return(df_rows_changed_from_previous) + return(df_rows_changed_from_previous) + } } diff --git a/R/create_object_list.R b/R/create_object_list.R index ed1907a..b620d39 100644 --- a/R/create_object_list.R +++ b/R/create_object_list.R @@ -18,7 +18,8 @@ #' @param df_previous data.frame, the old version of dataset, for example x - t1. #' @param datetime_variable string, which variable to use as unique ID to join #' `df_current` and `df_previous`. Usually a "datetime" variable. -#' @param ... Other `waldo::compare()` arguments can be supplied here. +#' @param ... Other `waldo::compare()` arguments can be supplied here, +#' such as `tolerance` or `max_diffs`. See ?waldo::compare() for a full list. #' #' @returns A list containing boolean where TRUE indicates no changes to #' previous data and FALSE indicates unexpected changes, a dataframe of diff --git a/R/release.R b/R/release.R index a07a904..acdb8ec 100644 --- a/R/release.R +++ b/R/release.R @@ -34,45 +34,56 @@ release <- function(df_current, df_previous, datetime_variable, include_new = TR ... ) - # By using an inner join, we drop any row which does not match in - # df_previous. - df_current_without_changed_rows <- suppressMessages( - dplyr::inner_join( - butterfly_object_list$df_current_without_new_row, - df_previous - ) - ) - - # Returng the dataframe with or without new rows added - if (include_new == TRUE) { - # Then we add the new rows back in and return the dataframe as such - df_release <- dplyr::bind_rows( - butterfly_object_list$df_current_new_rows, - df_current_without_changed_rows - ) - - cli::cat_line() + if (butterfly_object_list$butterfly_status == TRUE){ cli::cat_bullet( - "These will be dropped, but new rows are included.", + "There are no differences, so there are no rows to drop. Did you specify a tolerance that exceeds number of differences?", bullet = "info", col = "orange", bullet_col = "orange" ) - return(df_release) + } else { + # By using an inner join, we drop any row which does not match in + # df_previous. + df_current_without_changed_rows <- suppressMessages( + dplyr::inner_join( + butterfly_object_list$df_current_without_new_row, + df_previous + ) + ) - } else if (include_new == FALSE) { - cli::cat_line() + # Returng the dataframe with or without new rows added + if (include_new == TRUE) { + # Then we add the new rows back in and return the dataframe as such + df_release <- dplyr::bind_rows( + butterfly_object_list$df_current_new_rows, + df_current_without_changed_rows + ) - cli::cat_bullet( - "These will be dropped, along with new rows.", - bullet = "info", - col = "orange", - bullet_col = "orange" - ) + cli::cat_line() + + cli::cat_bullet( + "These will be dropped, but new rows are included.", + bullet = "info", + col = "orange", + bullet_col = "orange" + ) + + return(df_release) + + } else if (include_new == FALSE) { + cli::cat_line() + + cli::cat_bullet( + "These will be dropped, along with new rows.", + bullet = "info", + col = "orange", + bullet_col = "orange" + ) - # If new rows are not included, simply return the df without changed rows - return(df_current_without_changed_rows) + # If new rows are not included, simply return the df without changed rows + return(df_current_without_changed_rows) + } } } diff --git a/man/butterfly-package.Rd b/man/butterfly-package.Rd index 121d5fd..af42318 100644 --- a/man/butterfly-package.Rd +++ b/man/butterfly-package.Rd @@ -4,7 +4,7 @@ \name{butterfly-package} \alias{butterfly} \alias{butterfly-package} -\title{butterfly: QA/QC For Continually Updating Timeseries Data} +\title{butterfly: Verification For Continually Updating Timeseries Data} \description{ \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} diff --git a/man/catch.Rd b/man/catch.Rd index af95acc..22d9c64 100644 --- a/man/catch.Rd +++ b/man/catch.Rd @@ -4,14 +4,18 @@ \alias{catch} \title{Catch: return dataframe containing only rows that have changed} \usage{ -catch(df_current, df_previous, datetime_variable) +catch(df_current, df_previous, datetime_variable, ...) } \arguments{ \item{df_current}{data.frame, the newest/current version of dataset x.} \item{df_previous}{data.frame, the old version of dataset, for example x - t1.} -\item{datetime_variable}{character, which variable to use as unique ID to join \code{df_current} and \code{df_previous}. Usually a "datetime" variable.} +\item{datetime_variable}{string, which variable to use as unique ID to join +\code{df_current} and \code{df_previous}. Usually a "datetime" variable.} + +\item{...}{Other \code{waldo::compare()} arguments can be supplied here, +such as \code{tolerance} or \code{max_diffs}. See ?waldo::compare() for a full list.} } \value{ A dataframe which contains only rows of \code{df_current} that have changes from \code{df_previous}, but without new rows. diff --git a/man/create_object_list.Rd b/man/create_object_list.Rd index 64582ac..540b79a 100644 --- a/man/create_object_list.Rd +++ b/man/create_object_list.Rd @@ -4,7 +4,7 @@ \alias{create_object_list} \title{create_object_list: creates a list of objects used in all butterfly functions} \usage{ -create_object_list(df_current, df_previous, datetime_variable) +create_object_list(df_current, df_previous, datetime_variable, ...) } \arguments{ \item{df_current}{data.frame, the newest/current version of dataset x.} @@ -13,6 +13,9 @@ create_object_list(df_current, df_previous, datetime_variable) \item{datetime_variable}{string, which variable to use as unique ID to join \code{df_current} and \code{df_previous}. Usually a "datetime" variable.} + +\item{...}{Other \code{waldo::compare()} arguments can be supplied here, +such as \code{tolerance} or \code{max_diffs}. See ?waldo::compare() for a full list.} } \value{ A list containing boolean where TRUE indicates no changes to @@ -44,4 +47,14 @@ butterfly_object_list <- butterfly::create_object_list( butterfly_object_list +# You can pass other `waldo::compare()` options such as tolerance here +butterfly_object_list <- butterfly::create_object_list( + butterflycount$march, # This is your new or current dataset + butterflycount$february, # This is the previous version you are comparing it to + datetime_variable = "time", # This is the unique ID variable they have in common + tolerance = 2 +) + +butterfly_object_list + } diff --git a/man/loupe.Rd b/man/loupe.Rd index 192a412..2228545 100644 --- a/man/loupe.Rd +++ b/man/loupe.Rd @@ -4,14 +4,18 @@ \alias{loupe} \title{Loupe: compare new and old data in continuously updated timeseries} \usage{ -loupe(df_current, df_previous, datetime_variable) +loupe(df_current, df_previous, datetime_variable, ...) } \arguments{ \item{df_current}{data.frame, the newest/current version of dataset x.} \item{df_previous}{data.frame, the old version of dataset, for example x - t1.} -\item{datetime_variable}{string, which variable to use as unique ID to join \code{df_current} and \code{df_previous}. Usually a "datetime" variable.} +\item{datetime_variable}{string, which variable to use as unique ID to join +\code{df_current} and \code{df_previous}. Usually a "datetime" variable.} + +\item{...}{Other \code{waldo::compare()} arguments can be supplied here, +such as \code{tolerance} or \code{max_diffs}. See ?waldo::compare() for a full list.} } \value{ A boolean where TRUE indicates no changes to previous data and FALSE indicates unexpected changes. diff --git a/man/release.Rd b/man/release.Rd index 724daa8..5c43fe8 100644 --- a/man/release.Rd +++ b/man/release.Rd @@ -4,16 +4,20 @@ \alias{release} \title{Release: return current dataframe without changed old rows} \usage{ -release(df_current, df_previous, datetime_variable, include_new = TRUE) +release(df_current, df_previous, datetime_variable, include_new = TRUE, ...) } \arguments{ \item{df_current}{data.frame, the newest/current version of dataset x.} \item{df_previous}{data.frame, the old version of dataset, for example x - t1.} -\item{datetime_variable}{string, which variable to use as unique ID to join \code{df_current} and \code{df_previous}. Usually a "datetime" variable.} +\item{datetime_variable}{string, which variable to use as unique ID to join +\code{df_current} and \code{df_previous}. Usually a "datetime" variable.} \item{include_new}{boolean, should new rows be included? Default is TRUE.} + +\item{...}{Other \code{waldo::compare()} arguments can be supplied here, +such as \code{tolerance} or \code{max_diffs}. See ?waldo::compare() for a full list.} } \value{ A dataframe which contains only rows of \code{df_current} that have not changed from \code{df_previous}, and includes new rows. diff --git a/tests/testthat/test-create_object_list.R b/tests/testthat/test-create_object_list.R index 3523b13..e39d05a 100644 --- a/tests/testthat/test-create_object_list.R +++ b/tests/testthat/test-create_object_list.R @@ -68,3 +68,17 @@ test_that("comparison object is returned when not equal", { 0 ) }) + +test_that("passing of additional waldo arguments works as expected", { + # Adding a tolerance of 2 should now "ignore" the single change + create_object_list_output <- create_object_list( + butterflycount$march, + butterflycount$february, + datetime_variable = "time", + tolerance = 2 + ) + + testthat::expect_true( + create_object_list_output$butterfly_status + ) +}) From 30b0c8ee67de80ff457720902289d5eb8498d3d2 Mon Sep 17 00:00:00 2001 From: thomaszwagerman Date: Tue, 29 Oct 2024 10:35:34 +0000 Subject: [PATCH 4/5] turn ?waldo::compare call into code chunks --- R/create_object_list.R | 2 +- man/catch.Rd | 2 +- man/create_object_list.Rd | 2 +- man/loupe.Rd | 2 +- man/release.Rd | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/create_object_list.R b/R/create_object_list.R index b620d39..d7521a7 100644 --- a/R/create_object_list.R +++ b/R/create_object_list.R @@ -19,7 +19,7 @@ #' @param datetime_variable string, which variable to use as unique ID to join #' `df_current` and `df_previous`. Usually a "datetime" variable. #' @param ... Other `waldo::compare()` arguments can be supplied here, -#' such as `tolerance` or `max_diffs`. See ?waldo::compare() for a full list. +#' such as `tolerance` or `max_diffs`. See `?waldo::compare()` for a full list. #' #' @returns A list containing boolean where TRUE indicates no changes to #' previous data and FALSE indicates unexpected changes, a dataframe of diff --git a/man/catch.Rd b/man/catch.Rd index 22d9c64..5eb8b13 100644 --- a/man/catch.Rd +++ b/man/catch.Rd @@ -15,7 +15,7 @@ catch(df_current, df_previous, datetime_variable, ...) \code{df_current} and \code{df_previous}. Usually a "datetime" variable.} \item{...}{Other \code{waldo::compare()} arguments can be supplied here, -such as \code{tolerance} or \code{max_diffs}. See ?waldo::compare() for a full list.} +such as \code{tolerance} or \code{max_diffs}. See \code{?waldo::compare()} for a full list.} } \value{ A dataframe which contains only rows of \code{df_current} that have changes from \code{df_previous}, but without new rows. diff --git a/man/create_object_list.Rd b/man/create_object_list.Rd index 540b79a..24fffcd 100644 --- a/man/create_object_list.Rd +++ b/man/create_object_list.Rd @@ -15,7 +15,7 @@ create_object_list(df_current, df_previous, datetime_variable, ...) \code{df_current} and \code{df_previous}. Usually a "datetime" variable.} \item{...}{Other \code{waldo::compare()} arguments can be supplied here, -such as \code{tolerance} or \code{max_diffs}. See ?waldo::compare() for a full list.} +such as \code{tolerance} or \code{max_diffs}. See \code{?waldo::compare()} for a full list.} } \value{ A list containing boolean where TRUE indicates no changes to diff --git a/man/loupe.Rd b/man/loupe.Rd index 2228545..9301b86 100644 --- a/man/loupe.Rd +++ b/man/loupe.Rd @@ -15,7 +15,7 @@ loupe(df_current, df_previous, datetime_variable, ...) \code{df_current} and \code{df_previous}. Usually a "datetime" variable.} \item{...}{Other \code{waldo::compare()} arguments can be supplied here, -such as \code{tolerance} or \code{max_diffs}. See ?waldo::compare() for a full list.} +such as \code{tolerance} or \code{max_diffs}. See \code{?waldo::compare()} for a full list.} } \value{ A boolean where TRUE indicates no changes to previous data and FALSE indicates unexpected changes. diff --git a/man/release.Rd b/man/release.Rd index 5c43fe8..c402c18 100644 --- a/man/release.Rd +++ b/man/release.Rd @@ -17,7 +17,7 @@ release(df_current, df_previous, datetime_variable, include_new = TRUE, ...) \item{include_new}{boolean, should new rows be included? Default is TRUE.} \item{...}{Other \code{waldo::compare()} arguments can be supplied here, -such as \code{tolerance} or \code{max_diffs}. See ?waldo::compare() for a full list.} +such as \code{tolerance} or \code{max_diffs}. See \code{?waldo::compare()} for a full list.} } \value{ A dataframe which contains only rows of \code{df_current} that have not changed from \code{df_previous}, and includes new rows. From 46b91415f54585e15b462e0f5246d2bc48438347 Mon Sep 17 00:00:00 2001 From: thomaszwagerman Date: Tue, 29 Oct 2024 10:46:36 +0000 Subject: [PATCH 5/5] adding additional parameter example to vignette --- vignettes/butterfly.Rmd | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/vignettes/butterfly.Rmd b/vignettes/butterfly.Rmd index d24502e..eb434d1 100644 --- a/vignettes/butterfly.Rmd +++ b/vignettes/butterfly.Rmd @@ -53,6 +53,23 @@ butterfly::loupe( `butterfly` follows the `waldo` philosophy of erring on the side of providing too much information, rather than too little. It will give a detailed feedback message on the status between two objects. +### Additional arguments from `waldo::compare()` + +You have the flexibility to pass further arguments that `waldo::compare()` accepts, to any butterfly function, for instance to specify the tolerance. + +If we add a tolerance of 2 to the previous example, no differences should be returned: + +```{r tolerance_example} +butterfly::loupe( + butterflycount$march, + butterflycount$february, + datetime_variable = "time", + tolerance = 2 # <- setting a tolerance of 2 +) +``` + +Call `?waldo::compare()` to see the full list of arguments. + ## Extracting unexpected changes: catch() You might want to return changed rows as a dataframe. For this `butterfly::catch()`is provided.