From c7d8693f7074b4153c85809f641b8ac1ce02bfac Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Thu, 10 Oct 2024 18:41:03 -0400 Subject: [PATCH] Enable setting datetime value for dbt_valid_to when the record is current (#10780) --- .../unreleased/Features-20240925-120855.yaml | 6 ++ core/dbt/artifacts/resources/v1/snapshot.py | 1 + schemas/dbt/manifest/v12.json | 24 ++++- .../functional/artifacts/expected_manifest.py | 1 + tests/functional/list/test_list.py | 1 + tests/functional/logging/test_logging.py | 2 +- .../snapshots/data/seed_dbt_valid_to.sql | 82 ++++++++++++++++ .../snapshots/test_snapshot_column_names.py | 97 +++++++++++++++++++ tests/unit/test_events.py | 5 +- 9 files changed, 213 insertions(+), 6 deletions(-) create mode 100644 .changes/unreleased/Features-20240925-120855.yaml create mode 100644 tests/functional/snapshots/data/seed_dbt_valid_to.sql diff --git a/.changes/unreleased/Features-20240925-120855.yaml b/.changes/unreleased/Features-20240925-120855.yaml new file mode 100644 index 00000000000..394e5c91659 --- /dev/null +++ b/.changes/unreleased/Features-20240925-120855.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Enable specification of dbt_valid_to for current records +time: 2024-09-25T12:08:55.926848-04:00 +custom: + Author: gshank + Issue: "10187" diff --git a/core/dbt/artifacts/resources/v1/snapshot.py b/core/dbt/artifacts/resources/v1/snapshot.py index 464d94bae69..062b6a62814 100644 --- a/core/dbt/artifacts/resources/v1/snapshot.py +++ b/core/dbt/artifacts/resources/v1/snapshot.py @@ -28,6 +28,7 @@ class SnapshotConfig(NodeConfig): snapshot_meta_column_names: SnapshotMetaColumnNames = field( default_factory=SnapshotMetaColumnNames ) + dbt_valid_to_current: Optional[str] = None @property def snapshot_table_column_names(self): diff --git a/schemas/dbt/manifest/v12.json b/schemas/dbt/manifest/v12.json index cecc35c58bb..dc369a2cbe3 100644 --- a/schemas/dbt/manifest/v12.json +++ b/schemas/dbt/manifest/v12.json @@ -6685,6 +6685,17 @@ } }, "additionalProperties": false + }, + "dbt_valid_to_current": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null } }, "additionalProperties": true @@ -16511,6 +16522,17 @@ } }, "additionalProperties": false + }, + "dbt_valid_to_current": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null } }, "additionalProperties": true @@ -22476,4 +22498,4 @@ "unit_tests" ], "$id": "https://schemas.getdbt.com/dbt/manifest/v12.json" -} \ No newline at end of file +} diff --git a/tests/functional/artifacts/expected_manifest.py b/tests/functional/artifacts/expected_manifest.py index 501efef85a2..286fcad7ff1 100644 --- a/tests/functional/artifacts/expected_manifest.py +++ b/tests/functional/artifacts/expected_manifest.py @@ -111,6 +111,7 @@ def get_rendered_snapshot_config(**updates): "dbt_updated_at": None, "dbt_scd_id": None, }, + "dbt_valid_to_current": None, "tags": [], "persist_docs": {}, "full_refresh": None, diff --git a/tests/functional/list/test_list.py b/tests/functional/list/test_list.py index 6894ec96bb4..78b4cd7383c 100644 --- a/tests/functional/list/test_list.py +++ b/tests/functional/list/test_list.py @@ -63,6 +63,7 @@ def expect_snapshot_output(self, happy_path_project): # noqa: F811 "persist_docs": {}, "target_database": happy_path_project.database, "target_schema": happy_path_project.test_schema, + "dbt_valid_to_current": None, "snapshot_meta_column_names": { "dbt_scd_id": None, "dbt_updated_at": None, diff --git a/tests/functional/logging/test_logging.py b/tests/functional/logging/test_logging.py index 252d4818974..9de46a84a95 100644 --- a/tests/functional/logging/test_logging.py +++ b/tests/functional/logging/test_logging.py @@ -101,7 +101,7 @@ def test_invalid_event_value(project, logs_dir): with pytest.raises(Exception) as excinfo: fire_event(InvalidOptionYAML(option_name=1)) - assert str(excinfo.value) == "[InvalidOptionYAML]: Unable to parse dict {'option_name': 1}" + assert "[InvalidOptionYAML]: Unable to parse logging event dictionary." in str(excinfo.value) groups_yml = """ diff --git a/tests/functional/snapshots/data/seed_dbt_valid_to.sql b/tests/functional/snapshots/data/seed_dbt_valid_to.sql new file mode 100644 index 00000000000..9627151042c --- /dev/null +++ b/tests/functional/snapshots/data/seed_dbt_valid_to.sql @@ -0,0 +1,82 @@ +create table {database}.{schema}.seed ( + id INTEGER, + first_name VARCHAR(50), + last_name VARCHAR(50), + email VARCHAR(50), + gender VARCHAR(50), + ip_address VARCHAR(20), + updated_at TIMESTAMP WITHOUT TIME ZONE +); + +create table {database}.{schema}.snapshot_expected ( + id INTEGER, + first_name VARCHAR(50), + last_name VARCHAR(50), + email VARCHAR(50), + gender VARCHAR(50), + ip_address VARCHAR(20), + + -- snapshotting fields + updated_at TIMESTAMP WITHOUT TIME ZONE, + test_valid_from TIMESTAMP WITHOUT TIME ZONE, + test_valid_to TIMESTAMP WITHOUT TIME ZONE, + test_scd_id TEXT, + test_updated_at TIMESTAMP WITHOUT TIME ZONE +); + + +-- seed inserts +-- use the same email for two users to verify that duplicated check_cols values +-- are handled appropriately +insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values +(1, 'Judith', 'Kennedy', '(not provided)', 'Female', '54.60.24.128', '2015-12-24 12:19:28'), +(2, 'Arthur', 'Kelly', '(not provided)', 'Male', '62.56.24.215', '2015-10-28 16:22:15'), +(3, 'Rachel', 'Moreno', 'rmoreno2@msu.edu', 'Female', '31.222.249.23', '2016-04-05 02:05:30'), +(4, 'Ralph', 'Turner', 'rturner3@hp.com', 'Male', '157.83.76.114', '2016-08-08 00:06:51'), +(5, 'Laura', 'Gonzales', 'lgonzales4@howstuffworks.com', 'Female', '30.54.105.168', '2016-09-01 08:25:38'), +(6, 'Katherine', 'Lopez', 'klopez5@yahoo.co.jp', 'Female', '169.138.46.89', '2016-08-30 18:52:11'), +(7, 'Jeremy', 'Hamilton', 'jhamilton6@mozilla.org', 'Male', '231.189.13.133', '2016-07-17 02:09:46'), +(8, 'Heather', 'Rose', 'hrose7@goodreads.com', 'Female', '87.165.201.65', '2015-12-29 22:03:56'), +(9, 'Gregory', 'Kelly', 'gkelly8@trellian.com', 'Male', '154.209.99.7', '2016-03-24 21:18:16'), +(10, 'Rachel', 'Lopez', 'rlopez9@themeforest.net', 'Female', '237.165.82.71', '2016-08-20 15:44:49'), +(11, 'Donna', 'Welch', 'dwelcha@shutterfly.com', 'Female', '103.33.110.138', '2016-02-27 01:41:48'), +(12, 'Russell', 'Lawrence', 'rlawrenceb@qq.com', 'Male', '189.115.73.4', '2016-06-11 03:07:09'), +(13, 'Michelle', 'Montgomery', 'mmontgomeryc@scientificamerican.com', 'Female', '243.220.95.82', '2016-06-18 16:27:19'), +(14, 'Walter', 'Castillo', 'wcastillod@pagesperso-orange.fr', 'Male', '71.159.238.196', '2016-10-06 01:55:44'), +(15, 'Robin', 'Mills', 'rmillse@vkontakte.ru', 'Female', '172.190.5.50', '2016-10-31 11:41:21'), +(16, 'Raymond', 'Holmes', 'rholmesf@usgs.gov', 'Male', '148.153.166.95', '2016-10-03 08:16:38'), +(17, 'Gary', 'Bishop', 'gbishopg@plala.or.jp', 'Male', '161.108.182.13', '2016-08-29 19:35:20'), +(18, 'Anna', 'Riley', 'arileyh@nasa.gov', 'Female', '253.31.108.22', '2015-12-11 04:34:27'), +(19, 'Sarah', 'Knight', 'sknighti@foxnews.com', 'Female', '222.220.3.177', '2016-09-26 00:49:06'), +(20, 'Phyllis', 'Fox', null, 'Female', '163.191.232.95', '2016-08-21 10:35:19'); + + +-- populate snapshot table +insert into {database}.{schema}.snapshot_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + test_valid_from, + test_valid_to, + test_updated_at, + test_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by snapshotting + updated_at as test_valid_from, + date('2099-12-31') as test_valid_to, + updated_at as test_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id +from {database}.{schema}.seed; diff --git a/tests/functional/snapshots/test_snapshot_column_names.py b/tests/functional/snapshots/test_snapshot_column_names.py index 85e9f425765..bf0e59825b0 100644 --- a/tests/functional/snapshots/test_snapshot_column_names.py +++ b/tests/functional/snapshots/test_snapshot_column_names.py @@ -1,3 +1,4 @@ +import datetime import os import pytest @@ -7,6 +8,7 @@ get_manifest, run_dbt, run_dbt_and_capture, + run_sql_with_adapter, update_config_file, ) @@ -232,3 +234,98 @@ def test_snapshot_invalid_column_names(self, project): assert len(results) == 1 assert "Compilation Error in snapshot snapshot_actual" in log_output assert "Snapshot target is missing configured columns" in log_output + + +snapshots_valid_to_current_yml = """ +snapshots: + - name: snapshot_actual + config: + strategy: timestamp + updated_at: updated_at + dbt_valid_to_current: "date('2099-12-31')" + snapshot_meta_column_names: + dbt_valid_to: test_valid_to + dbt_valid_from: test_valid_from + dbt_scd_id: test_scd_id + dbt_updated_at: test_updated_at +""" + +update_with_current_sql = """ +-- insert v2 of the 11 - 21 records + +insert into {database}.{schema}.snapshot_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + test_valid_from, + test_valid_to, + test_updated_at, + test_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by snapshotting + updated_at as test_valid_from, + date('2099-12-31') as test_valid_to, + updated_at as test_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id +from {database}.{schema}.seed +where id >= 10 and id <= 20; +""" + + +class TestSnapshotDbtValidToCurrent: + @pytest.fixture(scope="class") + def snapshots(self): + return {"snapshot.sql": snapshot_actual_sql} + + @pytest.fixture(scope="class") + def models(self): + return { + "snapshots.yml": snapshots_valid_to_current_yml, + "ref_snapshot.sql": ref_snapshot_sql, + } + + def test_valid_to_current(self, project): + path = os.path.join(project.test_data_dir, "seed_dbt_valid_to.sql") + project.run_sql_file(path) + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + original_snapshot = run_sql_with_adapter( + project.adapter, + "select id, test_scd_id, test_valid_to from {database}.{schema}.snapshot_actual", + "all", + ) + assert original_snapshot[0][2] == datetime.datetime(2099, 12, 31, 0, 0) + assert original_snapshot[9][2] == datetime.datetime(2099, 12, 31, 0, 0) + + project.run_sql(invalidate_sql) + project.run_sql(update_with_current_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + updated_snapshot = run_sql_with_adapter( + project.adapter, + "select id, test_scd_id, test_valid_to from {database}.{schema}.snapshot_actual", + "all", + ) + assert updated_snapshot[0][2] == datetime.datetime(2099, 12, 31, 0, 0) + # Original row that was updated now has a non-current (2099/12/31) date + assert updated_snapshot[9][2] == datetime.datetime(2016, 8, 20, 16, 44, 49) + # Updated row has a current date + assert updated_snapshot[20][2] == datetime.datetime(2099, 12, 31, 0, 0) + + check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"]) diff --git a/tests/unit/test_events.py b/tests/unit/test_events.py index 68a3edc2614..085f849492e 100644 --- a/tests/unit/test_events.py +++ b/tests/unit/test_events.py @@ -537,10 +537,7 @@ def test_bad_serialization(): with pytest.raises(Exception) as excinfo: types.Note(param_event_doesnt_have="This should break") - assert ( - str(excinfo.value) - == "[Note]: Unable to parse dict {'param_event_doesnt_have': 'This should break'}" - ) + assert 'has no field named "param_event_doesnt_have" at "Note"' in str(excinfo.value) def test_single_run_error():