Merge pull request #13 from thomaszwagerman/naming_consistency

Moving common functionality to utility function
antarctica · Oct 17, 2024 · ce9a9fe · ce9a9fe
2 parents 72e0d7f + 7185708
commit ce9a9fe
Show file tree

Hide file tree

Showing 17 changed files with 388 additions and 370 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 export(catch)
+export(create_object_list)
 export(loupe)
 export(release)
 importFrom(lifecycle,deprecated)
diff --git a/R/catch.R b/R/catch.R
@@ -5,6 +5,8 @@
 #' which contains only rows that have changed compared to previous data. It will
 #' not return any new rows.
 #'
+#' The underlying functionality is handled by `create_object_list()`.
+#'
 #' @param df_current data.frame, the newest/current version of dataset x.
 #' @param df_previous data.frame, the old version of dataset, for example x - t1.
 #' @param datetime_variable character, which variable to use as unique ID to join `df_current` and `df_previous`. Usually a "datetime" variable.
@@ -13,6 +15,7 @@
 #' also returns a waldo object as in `loupe()`.
 #'
 #' @seealso [loupe()]
+#' @seealso [create_object_list()]
 #'
 #' @examples
 #' df_caught <- butterfly::catch(
@@ -25,94 +28,29 @@
 #'
 #' @export
 catch <- function(df_current, df_previous, datetime_variable) {
-
-  # Check input is as expected
-  stopifnot("`df_current` must be a data.frame" = is.data.frame(df_current))
-  stopifnot("`df_previous` must be a data.frame" = is.data.frame(df_previous))
-
-  # Check if `datetime_variable` is in both `df_current` and `df_previous`
-  if (!datetime_variable %in% names(df_current) || !datetime_variable %in% names(df_previous)){
-    stop(
-      "`datetime_variable` must be present in both `df_current` and `df_previous`"
-    )
-  }
-
-  # Using semi_join to extract rows with matching datetime_variables
-  # (ie previously generated data)
-  df_current_without_new_row <- dplyr::semi_join(
+  butterfly_object_list <- create_object_list(
     df_current,
     df_previous,
-    by = datetime_variable
+    datetime_variable
   )
 
-  # Compare the current data with the previous data, without "new" values
-  waldo_object <- waldo::compare(
-    df_current_without_new_row,
-    df_previous
-  )
-
-  # Obtaining the new rows to provide in feedback
-  df_current_new_rows <- dplyr::anti_join(
-    df_current,
-    df_previous,
-    by = datetime_variable
-  )
-
-  if (nrow(df_current_new_rows) == 0) {
-    warning(
-      "There are no new rows. Check '",
-      deparse(substitute(df_current)),
-      "' is your most recent data, and '",
-      deparse(substitute(df_previous)),
-      "' is your previous data."
-    )
-  } else {
-    # Tell the user which rows are new, regardless of previous data changing
-    cli::cat_line(
-      paste0(
-        "The following rows are new in '",
-        deparse(substitute(df_current)),
-        "': "
-      ),
-      col = "green"
+  # By using an inner join, we drop any row which does not match in
+  # df_previous.
+  df_rows_changed_from_previous <- suppressMessages(
+    dplyr::anti_join(
+      butterfly_object_list$df_current_without_new_row,
+      df_previous
     )
+  )
 
-    cli::cat_print(
-      df_current_new_rows
-    )
-  }
-
-  # Return a simple message if there are no changes in previous data
-  if (length(waldo_object) == 0) {
-    stop(
-      "There are no differences between current and previous data."
-    )
-
-  } else {
-    # Return detailed breakdown and warning if previous data have changed.
-    if (length(waldo_object) > 0) {
-      cli::cat_line()
-
-      cli::cat_bullet(
-        "The following rows have changed from the previous data, and will be returned:",
-        bullet = "info",
-        col = "orange",
-        bullet_col = "orange"
-      )
+  cli::cat_line()
 
-      cli::cat_print(
-        waldo_object
-      )
+  cli::cat_bullet(
+    "Only these rows are returned.",
+    bullet = "info",
+    col = "orange",
+    bullet_col = "orange"
+  )
 
-      # By using an inner join, we drop any row which does not match in
-      # df_previous.
-      df_rows_changed_from_previous <- suppressMessages(
-        dplyr::anti_join(
-          df_current_without_new_row,
-          df_previous
-        )
-      )
-    }
-  }
   return(df_rows_changed_from_previous)
 }
diff --git a/R/create_object_list.R b/R/create_object_list.R
@@ -0,0 +1,139 @@
+#' create_object_list: creates a list of objects used in all butterfly functions
+#'
+#' This function creates a list of objects which is used by all of `loupe()`,
+#' `catch()` and `release()`.
+#'
+#' This function matches two dataframe objects by their unique identifier
+#' (usually "time" or "datetime in a timeseries).
+#'
+#' It informs the user of new (unmatched) rows which have appeared, and then
+#' returns a `waldo::compare()` call to give a detailed breakdown of changes.
+#'
+#' The main assumption is that `df_current` and `df_previous` are a newer and
+#' older versions of the same data, and that the `datetime_variable` variable name always
+#' remains the same. Elsewhere new columns can of appear, and these will be
+#' returned in the report.
+#'
+#' @param df_current data.frame, the newest/current version of dataset x.
+#' @param df_previous data.frame, the old version of dataset, for example x - t1.
+#' @param datetime_variable string, which variable to use as unique ID to join
+#'  `df_current` and `df_previous`. Usually a "datetime" variable.
+#'
+#' @returns A list containing boolean where TRUE indicates no changes to
+#' previous data and FALSE indicates unexpected changes, a dataframe of
+#' the current data without new rows and a dataframe of new rows only
+#'
+#' @examples
+#' butterfly_object_list <- butterfly::create_object_list(
+#'   butterflycount$february,
+#'   butterflycount$january,
+#'   datetime_variable = "time"
+#' )
+#'
+#' butterfly_object_list
+#'
+#' @export
+create_object_list <- function(df_current, df_previous, datetime_variable) {
+  # Check input is as expected
+  stopifnot("`df_current` must be a data.frame" = is.data.frame(df_current))
+  stopifnot("`df_previous` must be a data.frame" = is.data.frame(df_previous))
+
+  # Check if `datetime_variable` is in both `df_current` and `df_previous`
+  if (!datetime_variable %in% names(df_current) || !datetime_variable %in% names(df_previous)) {
+    stop(
+      "`datetime_variable` must be present in both `df_current` and `df_previous`"
+    )
+  }
+
+  # Initialise list to store objects used by `loupe()`, `catch()` and `release()`
+  list_butterfly <- list(
+    "waldo_object" = character(),
+    "df_current_without_new_row" = data.frame(),
+    "df_current_new_rows" = data.frame()
+  )
+
+  # Using semi_join to extract rows with matching datetime_variables
+  # (ie previously generated data)
+  df_current_without_new_row <- dplyr::semi_join(
+    df_current,
+    df_previous,
+    by = datetime_variable
+  )
+
+  # Obtaining the new rows to provide in feedback
+  df_current_new_rows <- dplyr::anti_join(
+    df_current,
+    df_previous,
+    by = datetime_variable
+  )
+
+  # Compare the current data with the previous data, without "new" values
+  waldo_object <- waldo::compare(
+    df_current_without_new_row,
+    df_previous
+  )
+
+  # Creating a feedback message depending on the waldo object's output
+  # First checking if there are new rows at all:
+  if (nrow(df_current_new_rows) == 0) {
+    stop(
+      "There are no new rows. Check '",
+      deparse(substitute(df_current)),
+      "' is your most recent data, and '",
+      deparse(substitute(df_previous)),
+      "' is your previous data. If comparing like for like, try waldo::compare()."
+    )
+  } else {
+    # Tell the user which rows are new, regardless of previous data changing
+    cli::cat_line(
+      "The following rows are new in '",
+      deparse(substitute(df_current)),
+      "': ",
+      col = "green"
+    )
+    cli::cat_print(
+      df_current_new_rows
+    )
+  }
+
+  # Return a simple message if there are no changes in previous data
+  if (length(waldo_object) == 0) {
+    cli::cat_bullet(
+      "And there are no differences with previous data.",
+      bullet = "tick",
+      col = "green",
+      bullet_col = "green"
+    )
+
+    butterfly_status <- TRUE
+
+  } else {
+    # Return detailed breakdown and warning if previous data have changed.
+    if (length(waldo_object) > 0) {
+      cli::cat_line()
+
+      cli::cat_bullet(
+        "The following values have changes from the previous data.",
+        bullet = "info",
+        col = "orange",
+        bullet_col = "orange"
+      )
+
+      cli::cat_print(
+        waldo_object
+      )
+
+      butterfly_status <- FALSE
+
+    }
+  }
+
+  # Populate list with objects
+  list_butterfly <- list(
+    butterfly_status = butterfly_status,
+    df_current_without_new_row = df_current_without_new_row,
+    df_current_new_rows = df_current_new_rows
+  )
+
+  return(list_butterfly)
+}
diff --git a/R/loupe.R b/R/loupe.R
@@ -18,18 +18,22 @@
 #' remains the same. Elsewhere new columns can of appear, and these will be
 #' returned in the report.
 #'
+#' The underlying functionality is handled by `create_object_list()`.
+#'
 #' @param df_current data.frame, the newest/current version of dataset x.
 #' @param df_previous data.frame, the old version of dataset, for example x - t1.
 #' @param datetime_variable string, which variable to use as unique ID to join `df_current` and `df_previous`. Usually a "datetime" variable.
 #'
-#' @returns A waldo object containing a message on differences or 'And there are no differences with previous data'.
+#' @returns A boolean where TRUE indicates no changes to previous data and FALSE indicates unexpected changes.
+#'
+#' @seealso [create_object_list()]
 #'
 #' @examples
 #' # This example contains no differences with previous data
 #' butterfly::loupe(
-#'  butterflycount$february,
-#'  butterflycount$january,
-#'  datetime_variable = "time"
+#'   butterflycount$february,
+#'   butterflycount$january,
+#'   datetime_variable = "time"
 #' )
 #'
 #' # This example does contain differences with previous data
@@ -41,82 +45,12 @@
 #'
 #' @export
 loupe <- function(df_current, df_previous, datetime_variable) {
-
-  # Check input is as expected
-  stopifnot("`df_current` must be a data.frame" = is.data.frame(df_current))
-  stopifnot("`df_previous` must be a data.frame" = is.data.frame(df_previous))
-
-  # Check if `datetime_variable` is in both `df_current` and `df_previous`
-  if (!datetime_variable %in% names(df_current) || !datetime_variable %in% names(df_previous)){
-    stop(
-      "`datetime_variable` must be present in both `df_current` and `df_previous`"
-    )
-  }
-
-  # Using semi_join to extract rows with matching datetime_variables
-  # (ie previously generated data)
-  df_current_without_new_row <- dplyr::semi_join(
+  butterfly_object_list <- create_object_list(
     df_current,
     df_previous,
-    by = datetime_variable
+    datetime_variable
   )
 
-  # Compare the current data with the previous data, without "new" values
-  waldo_object <- waldo::compare(
-    df_current_without_new_row,
-    df_previous
-  )
-
-  # Obtaining the new rows to provide in feedback
-  df_current_new_rows <- dplyr::anti_join(
-    df_current,
-    df_previous,
-    by = datetime_variable
-  )
-
-  # Creating a feedback message depending on the waldo object's output
-  # First checking if there are new rows at all:
-  if (nrow(df_current_new_rows) == 0) {
-    stop(
-      "There are no new rows. Check '",
-      deparse(substitute(df_current)),
-      "' is your most recent data, and '",
-      deparse(substitute(df_previous)),
-      "' is your previous data. If comparing like for like, try waldo::compare()."
-    )
-  } else {
-    # Tell the user which rows are new, regardless of previous data changing
-    cli::cat_line(
-      "The following rows are new in '",
-      deparse(substitute(df_current)),
-      "': ",
-      col = "green"
-    )
-    cli::cat_print(
-      df_current_new_rows
-    )
-  }
-
-  # Return a simple message if there are no changes in previous data
-  if (length(waldo_object) == 0) {
-    cli::cat_bullet(
-      "And there are no differences with previous data.",
-      bullet = "tick",
-      col = "green",
-      bullet_col = "green"
-    )
-  } else {
-    # Return detailed breakdown and warning if previous data have changed.
-    if (length(waldo_object) > 0) {
-      cli::cat_line()
+  return(butterfly_object_list$butterfly_status)
 
-      cli::cat_bullet(
-        "But the following values have changes from the previous data:",
-        bullet = "info",
-        col = "orange",
-        bullet_col = "orange"
-      )
-      return(waldo_object)
-    }
-  }
 }