From 7c2edc6657ed19e5d1414efe749dc4f9325fb1e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 5 Oct 2023 08:49:38 +0200 Subject: [PATCH] SNAPSHOT: Replace build_copy_queries() with dm_sql() --- R/db-interface.R | 220 +++++++++---------------- R/error-helpers.R | 24 --- R/zzx-deprecated.R | 18 +- man/copy_dm_to.Rd | 54 +----- tests/testthat/_snaps/db-interface.md | 29 ++++ tests/testthat/_snaps/error-helpers.md | 15 -- tests/testthat/test-db-interface.R | 23 +-- tests/testthat/test-error-helpers.R | 3 - 8 files changed, 135 insertions(+), 251 deletions(-) create mode 100644 tests/testthat/_snaps/db-interface.md diff --git a/R/db-interface.R b/R/db-interface.R index 7f5c0d560e..b680a59a53 100644 --- a/R/db-interface.R +++ b/R/db-interface.R @@ -5,64 +5,26 @@ #' and a [`dm`] object as its second argument. #' The latter is copied to the former. #' The default is to create temporary tables, set `temporary = FALSE` to create permanent tables. -#' Unless `set_key_constraints` is `FALSE`, primary key constraints are set on all databases, +#' Unless `set_key_constraints` is `FALSE`, primary key, foreign key, and unique constraints are set on all databases, #' and in addition foreign key constraints are set on MSSQL and Postgres databases. #' -#' @details -#' No tables will be overwritten; passing `overwrite = TRUE` to the function will give an error. -#' Types are determined separately for each table, setting the `types` argument will -#' also throw an error. -#' The arguments are included in the signature to avoid passing them via the -#' `...` ellipsis. -#' #' @inheritParams dm_examine_constraints #' #' @param dest An object of class `"src"` or `"DBIConnection"`. #' @param dm A `dm` object. -#' @param overwrite,types,indexes,unique_indexes Must remain `NULL`. #' @param set_key_constraints If `TRUE` will mirror `dm` primary and foreign key constraints on a database #' and create unique indexes. #' Set to `FALSE` if your data model currently does not satisfy primary or foreign key constraints. -#' @param unique_table_names Deprecated. #' @param temporary If `TRUE`, only temporary tables will be created. #' These tables will vanish when disconnecting from the database. #' @param schema Name of schema to copy the `dm` to. -#' If `schema` is provided, an error will be thrown if `temporary = FALSE` or -#' `table_names` is not `NULL`. -#' -#' Not all DBMS are supported. -#' @param table_names Desired names for the tables on `dest`; the names within the `dm` remain unchanged. -#' Can be `NULL`, a named character vector, a function or a one-sided formula. -#' -#' If left `NULL` (default), the names will be determined automatically depending on the `temporary` argument: -#' -#' 1. `temporary = TRUE` (default): unique table names based on the names of the tables in the `dm` are created. -#' 1. `temporary = FALSE`: the table names in the `dm` are used as names for the tables on `dest`. -#' -#' If a function or one-sided formula, `table_names` is converted to a function -#' using [rlang::as_function()]. -#' This function is called with the unquoted table names of the `dm` object -#' as the only argument. -#' The output of this function is processed by [DBI::dbQuoteIdentifier()], -#' that result should be a vector of identifiers of the same length -#' as the original table names. -#' -#' Use a variant of -#' `table_names = ~ DBI::SQL(paste0("schema_name", ".", .x))` -#' to specify the same schema for all tables. -#' Use `table_names = identity` with `temporary = TRUE` -#' to avoid giving temporary tables unique names. -#' -#' If a named character vector, -#' the names of this vector need to correspond to the table names in the `dm`, -#' and its values are the desired names on `dest`. -#' The value is processed by [DBI::dbQuoteIdentifier()], -#' that result should be a vector of identifiers of the same length -#' as the original table names. +#' If `schema` is provided, an error will be thrown if `temporary = FALSE` or +#' `table_names` is not `NULL`. #' -#' Use qualified names corresponding to your database's syntax -#' to specify e.g. database and schema for your tables. -#' @param copy_to,... Deprecated. +#' Not all DBMS are supported. +#' @inheritParams dm_sql +#' @inheritParams rlang::args_dots_empty +#' @param unique_table_names,copy_to Deprecated. #' #' @family DB interaction functions #' @@ -94,10 +56,6 @@ copy_dm_to <- function( dest, dm, ..., - types = NULL, - overwrite = NULL, - indexes = NULL, - unique_indexes = NULL, set_key_constraints = TRUE, unique_table_names = NULL, table_names = NULL, @@ -111,156 +69,134 @@ copy_dm_to <- function( # 2. copy the tables to `dest` # 3. implement the key situation within our `dm` on the DB - if (!is_null(overwrite)) { - abort_no_overwrite() - } - - if (!is_null(types)) { - abort_no_types() - } - - if (!is_null(indexes)) { - abort_no_indexes() - } - - if (!is_null(unique_indexes)) { - abort_no_unique_indexes() - } - if (!is.null(unique_table_names)) { - deprecate_soft( + deprecate_stop( "0.1.4", "dm::copy_dm_to(unique_table_names = )", - details = "Use `table_names = identity` to use unchanged names for temporary tables." + details = "Use `table_names = set_names(names(dm))` to use unchanged names for temporary tables." ) - - if (is.null(table_names) && temporary && !unique_table_names) { - table_names <- identity - } } if (!is.null(copy_to)) { - deprecate_soft( + deprecate_stop( "1.0.0", "dm::copy_dm_to(copy_to = )", - details = "Use `dm_ddl()` for more control over the schema creation process." + details = "Use `dm_sql()` for more control over the schema creation process." ) } - if (dots_n(...) > 0) { - deprecate_soft( - "1.0.0", "dm::copy_dm_to(... = )", - details = "Use `dm_ddl()` for more control over the schema creation process." - ) - } + check_dots_empty() + + check_not_zoomed(dm) check_suggested("dbplyr", use = TRUE) dest <- src_from_src_or_con(dest) - src_names <- src_tbls_impl(dm) - if (is_db(dest)) { - dest_con <- con_from_src_or_con(dest) - - # in case `table_names` was chosen by the user, check if the input makes sense: - # 1. is there one name per dm-table? - # 2. are there any duplicated table names? - # 3. is it a named character or ident_q vector with the correct names? - if (is.null(table_names)) { - table_names_out <- repair_table_names_for_db(src_names, temporary, dest_con, schema) - # https://github.com/tidyverse/dbplyr/issues/487 - if (is_mssql(dest)) { - temporary <- FALSE - } - } else { - if (!is.null(schema)) abort_one_of_schema_table_names() - if (is_function(table_names) || is_bare_formula(table_names)) { - table_name_fun <- as_function(table_names) - table_names_out <- set_names(table_name_fun(src_names), src_names) - } else { - table_names_out <- table_names - } - check_naming(names(table_names_out), src_names) - - if (anyDuplicated(table_names_out)) { - problem <- table_names_out[duplicated(table_names_out)][[1]] - abort_copy_dm_to_table_names_duplicated(problem) - } - - names(table_names_out) <- src_names - } - } else { - # FIXME: Other data sources than local and database possible - deprecate_soft( - "0.1.6", "dm::copy_dm_to(dest = 'must refer to a remote data source')", + if (!is_db(dest)) { + deprecate_stop( + "0.1.6", "dm::copy_dm_to(dest = 'must refer to a DBI connection')", "dm::collect.dm()" ) - table_names_out <- set_names(src_names) } - check_not_zoomed(dm) + src_names <- src_tbls_impl(dm) + dest_con <- con_from_src_or_con(dest) + + # in case `table_names` was chosen by the user, check if the input makes sense: + # 1. is there one name per dm-table? + # 2. are there any duplicated table names? + # 3. is it a named character or ident_q vector with the correct names? + if (is.null(table_names)) { + table_names_out <- repair_table_names_for_db(src_names, temporary, dest_con, schema) + # https://github.com/tidyverse/dbplyr/issues/487 + if (is_mssql(dest)) { + temporary <- FALSE + } + } else { + if (!is.null(schema)) abort_one_of_schema_table_names() + if (is_function(table_names) || is_bare_formula(table_names)) { + table_name_fun <- as_function(table_names) + table_names_out <- set_names(table_name_fun(src_names), src_names) + } else { + table_names_out <- table_names + } + check_naming(names(table_names_out), src_names) - # FIXME: if same_src(), can use compute() but need to set NOT NULL and other - # constraints + if (anyDuplicated(table_names_out)) { + problem <- table_names_out[duplicated(table_names_out)][[1]] + abort_copy_dm_to_table_names_duplicated(problem) + } - # Shortcut necessary to avoid copying into .GlobalEnv - if (!is_db(dest)) { - return(dm) + names(table_names_out) <- src_names + } + + table_names_out <- ddl_check_table_names(table_names_out, dm) + + if (isTRUE(set_key_constraints)) { + dm_for_sql <- dm + } else { + def_no_keys <- dm_get_def(dm) + def_no_keys$uks[] <- list(new_uk()) + def_no_keys$fks[] <- list(new_fk()) + # Must keep primary keys + dm_for_sql <- dm_from_def(def_no_keys) } - queries <- build_copy_queries(dest_con, dm, set_key_constraints, temporary, table_names_out) + sql <- dm_sql(dm_for_sql, dest_con, table_names_out, temporary) - ticker_create <- new_ticker( + # FIXME: Extract function + # FIXME: Make descriptions part of the dm_sql() output + + pre <- unlist(sql$pre) + load <- unlist(sql$load) + post <- unlist(sql$post) + + ticker_pre <- new_ticker( "creating tables", - n = length(queries$sql_table), + n = length(pre), progress = progress, top_level_fun = "copy_dm_to" ) # create tables - walk(queries$sql_table, ticker_create(~ { + walk(pre, ticker_pre(~ { DBI::dbExecute(dest_con, .x, immediate = TRUE) })) - ticker_populate <- new_ticker( + ticker_load <- new_ticker( "populating tables", - n = length(queries$name), + n = length(load), progress = progress, top_level_fun = "copy_dm_to" ) # populate tables - pwalk( - queries[c("name", "remote_name")], - ticker_populate(~ db_append_table( - con = dest_con, - remote_table = .y, - table = dm[[.x]], - progress = progress, - autoinc = dm_get_all_pks(dm, table = !!.x)$autoincrement - )) - ) + walk(load, ticker_load(~ { + DBI::dbExecute(dest_con, .x, immediate = TRUE) + })) - ticker_index <- new_ticker( + ticker_post <- new_ticker( "creating indexes", - n = sum(lengths(queries$sql_index)), + n = length(post), progress = progress, top_level_fun = "copy_dm_to" ) # create indexes - walk(unlist(queries$sql_index), ticker_index(~ { + walk(post, ticker_post(~ { DBI::dbExecute(dest_con, .x, immediate = TRUE) })) # remote dm is same as source dm with replaced data + # FIXME: Extract function def <- dm_get_def(dm) remote_tables <- map2( table_names_out, map(def$data, colnames), - ~ tbl(dest_con, ..1, vars = ..2) + ~ tbl(dest_con, .x, vars = .y) ) - def$data <- unname(remote_tables[names(dm)]) + def$data <- unname(remote_tables) remote_dm <- dm_from_def(def) invisible(debug_dm_validate(remote_dm)) diff --git a/R/error-helpers.R b/R/error-helpers.R index 24fa7b0006..4811a92271 100644 --- a/R/error-helpers.R +++ b/R/error-helpers.R @@ -187,30 +187,6 @@ error_txt_no_overwrite <- function(fun_name) { glue("`{fun_name}()` does not support the `overwrite` argument.") } -abort_no_types <- function() { - abort(error_txt_no_types(), class = dm_error_full("no_types")) -} - -error_txt_no_types <- function() { - "`copy_dm_to()` does not support the `types` argument." -} - -abort_no_indexes <- function() { - abort(error_txt_no_indexes(), class = dm_error_full("no_indexes")) -} - -error_txt_no_indexes <- function() { - "`copy_dm_to()` does not support the `indexes` argument." -} - -abort_no_unique_indexes <- function() { - abort(error_txt_no_unique_indexes(), class = dm_error_full("no_unique_indexes")) -} - -error_txt_no_unique_indexes <- function() { - "`copy_dm_to()` does not support the `unique_indexes` argument." -} - abort_update_not_supported <- function() { abort(error_txt_update_not_supported(), class = dm_error_full("update_not_supported")) } diff --git a/R/zzx-deprecated.R b/R/zzx-deprecated.R index 16750ac79a..a5e9388e16 100644 --- a/R/zzx-deprecated.R +++ b/R/zzx-deprecated.R @@ -127,12 +127,20 @@ cdm_copy_to <- function(dest, dm, ..., types = NULL, overwrite = NULL, indexes = } } - copy_dm_to( - dest = dest, dm = dm, ... = ..., types = types, - overwrite = overwrite, indexes = indexes, unique_indexes = unique_indexes, + inject(copy_dm_to( + dest = dest, + dm = dm, + ... = ..., + !!!compact(list( + types = types, + overwrite = overwrite, + indexes = indexes, + unique_indexes = unique_indexes + )), set_key_constraints = set_key_constraints, - table_names = table_names, temporary = temporary - ) + table_names = table_names, + temporary = temporary + )) } #' @rdname deprecated diff --git a/man/copy_dm_to.Rd b/man/copy_dm_to.Rd index db74b44fba..fc6911f019 100644 --- a/man/copy_dm_to.Rd +++ b/man/copy_dm_to.Rd @@ -8,10 +8,6 @@ copy_dm_to( dest, dm, ..., - types = NULL, - overwrite = NULL, - indexes = NULL, - unique_indexes = NULL, set_key_constraints = TRUE, unique_table_names = NULL, table_names = NULL, @@ -26,46 +22,17 @@ copy_dm_to( \item{dm}{A \code{dm} object.} -\item{overwrite, types, indexes, unique_indexes}{Must remain \code{NULL}.} +\item{...}{These dots are for future extensions and must be empty.} \item{set_key_constraints}{If \code{TRUE} will mirror \code{dm} primary and foreign key constraints on a database and create unique indexes. Set to \code{FALSE} if your data model currently does not satisfy primary or foreign key constraints.} -\item{unique_table_names}{Deprecated.} +\item{unique_table_names, copy_to}{Deprecated.} -\item{table_names}{Desired names for the tables on \code{dest}; the names within the \code{dm} remain unchanged. -Can be \code{NULL}, a named character vector, a function or a one-sided formula. - -If left \code{NULL} (default), the names will be determined automatically depending on the \code{temporary} argument: -\enumerate{ -\item \code{temporary = TRUE} (default): unique table names based on the names of the tables in the \code{dm} are created. -\item \code{temporary = FALSE}: the table names in the \code{dm} are used as names for the tables on \code{dest}. -} - -If a function or one-sided formula, \code{table_names} is converted to a function -using \code{\link[rlang:as_function]{rlang::as_function()}}. -This function is called with the unquoted table names of the \code{dm} object -as the only argument. -The output of this function is processed by \code{\link[DBI:dbQuoteIdentifier]{DBI::dbQuoteIdentifier()}}, -that result should be a vector of identifiers of the same length -as the original table names. - -Use a variant of -\code{table_names = ~ DBI::SQL(paste0("schema_name", ".", .x))} -to specify the same schema for all tables. -Use \code{table_names = identity} with \code{temporary = TRUE} -to avoid giving temporary tables unique names. - -If a named character vector, -the names of this vector need to correspond to the table names in the \code{dm}, -and its values are the desired names on \code{dest}. -The value is processed by \code{\link[DBI:dbQuoteIdentifier]{DBI::dbQuoteIdentifier()}}, -that result should be a vector of identifiers of the same length -as the original table names. - -Use qualified names corresponding to your database's syntax -to specify e.g. database and schema for your tables.} +\item{table_names}{A named character vector or vector of \link[DBI:Id]{DBI::Id} objects, +with one unique element for each table in \code{dm}. +The default, \code{NULL}, means to use the original table names.} \item{temporary}{If \code{TRUE}, only temporary tables will be created. These tables will vanish when disconnecting from the database.} @@ -79,8 +46,6 @@ Not all DBMS are supported.} \item{progress}{Whether to display a progress bar, if \code{NA} (the default) hide in non-interactive mode, show in interactive mode. Requires the 'progress' package.} - -\item{copy_to, ...}{Deprecated.} } \value{ A \code{dm} object on the given \code{src} with the same table names @@ -91,16 +56,9 @@ as the input \code{dm}. and a \code{\link{dm}} object as its second argument. The latter is copied to the former. The default is to create temporary tables, set \code{temporary = FALSE} to create permanent tables. -Unless \code{set_key_constraints} is \code{FALSE}, primary key constraints are set on all databases, +Unless \code{set_key_constraints} is \code{FALSE}, primary key, foreign key, and unique constraints are set on all databases, and in addition foreign key constraints are set on MSSQL and Postgres databases. } -\details{ -No tables will be overwritten; passing \code{overwrite = TRUE} to the function will give an error. -Types are determined separately for each table, setting the \code{types} argument will -also throw an error. -The arguments are included in the signature to avoid passing them via the -\code{...} ellipsis. -} \examples{ \dontshow{if (rlang::is_installed("RSQLite") && rlang::is_installed("nycflights13") && rlang::is_installed("dbplyr")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} con <- DBI::dbConnect(RSQLite::SQLite()) diff --git a/tests/testthat/_snaps/db-interface.md b/tests/testthat/_snaps/db-interface.md new file mode 100644 index 0000000000..fc2e5c5671 --- /dev/null +++ b/tests/testthat/_snaps/db-interface.md @@ -0,0 +1,29 @@ +# copy_dm_to() copies data frames from any source + + Code + copy_dm_to(default_local_src(), dm_for_filter()) + Condition + Error: + ! The `dest` argument of `copy_dm_to()` must refer to a DBI connection as of dm 0.1.6. + i Please use `collect.dm()` instead. + +# copy_dm_to() rejects overwrite and types arguments + + Code + copy_dm_to(my_test_src(), dm_for_filter(), overwrite = TRUE) + Condition + Error in `copy_dm_to()`: + ! `...` must be empty. + x Problematic argument: + * overwrite = TRUE + +--- + + Code + copy_dm_to(my_test_src(), dm_for_filter(), types = character()) + Condition + Error in `copy_dm_to()`: + ! `...` must be empty. + x Problematic argument: + * types = character() + diff --git a/tests/testthat/_snaps/error-helpers.md b/tests/testthat/_snaps/error-helpers.md index d696be40b5..b8a65bfe28 100644 --- a/tests/testthat/_snaps/error-helpers.md +++ b/tests/testthat/_snaps/error-helpers.md @@ -83,21 +83,6 @@ Condition Error in `abort_no_overwrite()`: ! `eval()` does not support the `overwrite` argument. - Code - abort_no_types() - Condition - Error in `abort_no_types()`: - ! `copy_dm_to()` does not support the `types` argument. - Code - abort_no_indexes() - Condition - Error in `abort_no_indexes()`: - ! `copy_dm_to()` does not support the `indexes` argument. - Code - abort_no_unique_indexes() - Condition - Error in `abort_no_unique_indexes()`: - ! `copy_dm_to()` does not support the `unique_indexes` argument. Code abort_pk_not_defined() Condition diff --git a/tests/testthat/test-db-interface.R b/tests/testthat/test-db-interface.R index 7229b5d584..8f66d9b34b 100644 --- a/tests/testthat/test-db-interface.R +++ b/tests/testthat/test-db-interface.R @@ -22,27 +22,22 @@ test_that("copy_dm_to() copies data frames to databases", { }) test_that("copy_dm_to() copies data frames from any source", { - expect_equivalent_dm( - expect_deprecated_obj( - copy_dm_to(default_local_src(), dm_for_filter()) - ), - dm_for_filter() - ) + expect_snapshot(error = TRUE, { + copy_dm_to(default_local_src(), dm_for_filter()) + }) }) # FIXME: Add test that set_key_constraints = FALSE doesn't set key constraints, # in combination with dm_learn_from_db test_that("copy_dm_to() rejects overwrite and types arguments", { - expect_dm_error( - copy_dm_to(my_test_src(), dm_for_filter(), overwrite = TRUE), - class = "no_overwrite" - ) + expect_snapshot(error = TRUE, { + copy_dm_to(my_test_src(), dm_for_filter(), overwrite = TRUE) + }) - expect_dm_error( - copy_dm_to(my_test_src(), dm_for_filter(), types = character()), - class = "no_types" - ) + expect_snapshot(error = TRUE, { + copy_dm_to(my_test_src(), dm_for_filter(), types = character()) + }) }) test_that("copy_dm_to() fails with duplicate table names", { diff --git a/tests/testthat/test-error-helpers.R b/tests/testthat/test-error-helpers.R index 00a146c612..97f55c1833 100644 --- a/tests/testthat/test-error-helpers.R +++ b/tests/testthat/test-error-helpers.R @@ -18,9 +18,6 @@ test_that("output", { abort_tables_not_reachable_from_start() abort_dupl_new_id_col_name("tibbletable") abort_no_overwrite() - abort_no_types() - abort_no_indexes() - abort_no_unique_indexes() abort_pk_not_defined() abort_fk_exists("child", c("child_1", "child_2"), "parent") abort_first_rm_fks("parent", c("child_1", "child_2"))