From cd6bb9e782e6a1275c519da248969b056430a6ff Mon Sep 17 00:00:00 2001 From: Kshitij Aranke Date: Tue, 15 Oct 2024 18:53:51 +0100 Subject: [PATCH 01/11] Fix #2578: Allow instances of generic data tests to be documented (#10850) --- .../unreleased/Fixes-20241014-212135.yaml | 6 ++++ core/dbt/parser/generic_test_builders.py | 11 ++++++ core/dbt/parser/schema_generic_tests.py | 3 ++ .../generic_test_description/fixtures.py | 34 +++++++++++++++++++ .../test_generic_test_description.py | 32 +++++++++++++++++ tests/unit/parser/test_parser.py | 4 ++- 6 files changed, 89 insertions(+), 1 deletion(-) create mode 100644 .changes/unreleased/Fixes-20241014-212135.yaml create mode 100644 tests/functional/generic_test_description/fixtures.py create mode 100644 tests/functional/generic_test_description/test_generic_test_description.py diff --git a/.changes/unreleased/Fixes-20241014-212135.yaml b/.changes/unreleased/Fixes-20241014-212135.yaml new file mode 100644 index 00000000000..8cc700095ba --- /dev/null +++ b/.changes/unreleased/Fixes-20241014-212135.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Allow instances of generic data tests to be documented +time: 2024-10-14T21:21:35.767115+01:00 +custom: + Author: aranke + Issue: "2578" diff --git a/core/dbt/parser/generic_test_builders.py b/core/dbt/parser/generic_test_builders.py index 6bca8300dae..1c9478734bf 100644 --- a/core/dbt/parser/generic_test_builders.py +++ b/core/dbt/parser/generic_test_builders.py @@ -136,6 +136,17 @@ def __init__( if self.namespace is not None: self.package_name = self.namespace + # If the user has provided a description for this generic test, use it + # Then delete the "description" argument to: + # 1. Avoid passing it into the test macro + # 2. Avoid passing it into the test name synthesis + # Otherwise, use an empty string + self.description: str = "" + + if "description" in self.args: + self.description = self.args["description"] + del self.args["description"] + # If the user has provided a custom name for this generic test, use it # Then delete the "name" argument to avoid passing it into the test macro # Otherwise, use an auto-generated name synthesized from test inputs diff --git a/core/dbt/parser/schema_generic_tests.py b/core/dbt/parser/schema_generic_tests.py index 9f2538f06f0..58be6dc94be 100644 --- a/core/dbt/parser/schema_generic_tests.py +++ b/core/dbt/parser/schema_generic_tests.py @@ -96,6 +96,7 @@ def create_test_node( test_metadata: Dict[str, Any], file_key_name: str, column_name: Optional[str], + description: str, ) -> GenericTestNode: HASH_LENGTH = 10 @@ -134,6 +135,7 @@ def get_hashable_md(data: Union[str, int, float, List, Dict]) -> Union[str, List "column_name": column_name, "checksum": FileHash.empty().to_dict(omit_none=True), "file_key_name": file_key_name, + "description": description, } try: GenericTestNode.validate(dct) @@ -229,6 +231,7 @@ def parse_generic_test( column_name=column_name, test_metadata=metadata, file_key_name=file_key_name, + description=builder.description, ) self.render_test_update(node, config, builder, schema_file_id) diff --git a/tests/functional/generic_test_description/fixtures.py b/tests/functional/generic_test_description/fixtures.py new file mode 100644 index 00000000000..2be2f2313ba --- /dev/null +++ b/tests/functional/generic_test_description/fixtures.py @@ -0,0 +1,34 @@ +models__my_model_sql = """ +with my_cte as ( + select 1 as id, 'blue' as color + union all + select 2 as id, 'green' as red + union all + select 3 as id, 'red' as red +) +select * from my_cte +""" + +models__schema_yml = """ +models: + - name: my_model + columns: + - name: id + tests: + - unique: + description: "id must be unique" + - not_null + - name: color + tests: + - accepted_values: + values: ['blue', 'green', 'red'] + description: "{{ doc('color_accepted_values') }}" +""" + +models__doc_block_md = """ +{% docs color_accepted_values %} + +The `color` column must be one of 'blue', 'green', or 'red'. + +{% enddocs %} +""" diff --git a/tests/functional/generic_test_description/test_generic_test_description.py b/tests/functional/generic_test_description/test_generic_test_description.py new file mode 100644 index 00000000000..cf68ff1c33c --- /dev/null +++ b/tests/functional/generic_test_description/test_generic_test_description.py @@ -0,0 +1,32 @@ +import pytest + +from dbt.tests.util import get_artifact, run_dbt +from tests.functional.generic_test_description.fixtures import ( + models__doc_block_md, + models__my_model_sql, + models__schema_yml, +) + + +class TestBuiltinGenericTestDescription: + @pytest.fixture(scope="class") + def models(self): + return { + "my_model.sql": models__my_model_sql, + "schema.yml": models__schema_yml, + "doc_block.md": models__doc_block_md, + } + + def test_compile(self, project): + run_dbt(["compile"]) + manifest = get_artifact(project.project_root, "target", "manifest.json") + assert len(manifest["nodes"]) == 4 + + nodes = {node["alias"]: node for node in manifest["nodes"].values()} + + assert nodes["unique_my_model_id"]["description"] == "id must be unique" + assert nodes["not_null_my_model_id"]["description"] == "" + assert ( + nodes["accepted_values_my_model_color__blue__green__red"]["description"] + == "The `color` column must be one of 'blue', 'green', or 'red'." + ) diff --git a/tests/unit/parser/test_parser.py b/tests/unit/parser/test_parser.py index b9b414ebac2..d5f809252b3 100644 --- a/tests/unit/parser/test_parser.py +++ b/tests/unit/parser/test_parser.py @@ -279,6 +279,7 @@ def assertEqualNodes(node_one, node_two): - not_null: severity: WARN - accepted_values: + description: Only primary colors are allowed in here values: ['red', 'blue', 'green'] - foreign_package.test_case: arg: 100 @@ -631,6 +632,7 @@ def test__parse_basic_model_tests(self): self.assertEqual(tests[0].tags, []) self.assertEqual(tests[0].refs, [RefArgs(name="my_model")]) self.assertEqual(tests[0].column_name, "color") + self.assertEqual(tests[0].description, "Only primary colors are allowed in here") self.assertEqual(tests[0].package_name, "snowplow") self.assertTrue(tests[0].name.startswith("accepted_values_")) self.assertEqual(tests[0].fqn, ["snowplow", tests[0].name]) @@ -654,7 +656,7 @@ def test__parse_basic_model_tests(self): self.assertEqual(tests[1].tags, []) self.assertEqual(tests[1].refs, [RefArgs(name="my_model")]) self.assertEqual(tests[1].column_name, "color") - self.assertEqual(tests[1].column_name, "color") + self.assertEqual(tests[1].description, "") self.assertEqual(tests[1].fqn, ["snowplow", tests[1].name]) self.assertTrue(tests[1].name.startswith("foreign_package_test_case_")) self.assertEqual(tests[1].package_name, "snowplow") From 8f847167fa6da298daaa342af989f3a3b3b5f1f0 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 15 Oct 2024 15:39:01 -0400 Subject: [PATCH 02/11] Remove dbt_valid_to_current test (will go in adapter zone) (#10854) --- .../snapshots/test_snapshot_column_names.py | 97 ------------------- 1 file changed, 97 deletions(-) diff --git a/tests/functional/snapshots/test_snapshot_column_names.py b/tests/functional/snapshots/test_snapshot_column_names.py index bf0e59825b0..85e9f425765 100644 --- a/tests/functional/snapshots/test_snapshot_column_names.py +++ b/tests/functional/snapshots/test_snapshot_column_names.py @@ -1,4 +1,3 @@ -import datetime import os import pytest @@ -8,7 +7,6 @@ get_manifest, run_dbt, run_dbt_and_capture, - run_sql_with_adapter, update_config_file, ) @@ -234,98 +232,3 @@ def test_snapshot_invalid_column_names(self, project): assert len(results) == 1 assert "Compilation Error in snapshot snapshot_actual" in log_output assert "Snapshot target is missing configured columns" in log_output - - -snapshots_valid_to_current_yml = """ -snapshots: - - name: snapshot_actual - config: - strategy: timestamp - updated_at: updated_at - dbt_valid_to_current: "date('2099-12-31')" - snapshot_meta_column_names: - dbt_valid_to: test_valid_to - dbt_valid_from: test_valid_from - dbt_scd_id: test_scd_id - dbt_updated_at: test_updated_at -""" - -update_with_current_sql = """ --- insert v2 of the 11 - 21 records - -insert into {database}.{schema}.snapshot_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - test_valid_from, - test_valid_to, - test_updated_at, - test_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by snapshotting - updated_at as test_valid_from, - date('2099-12-31') as test_valid_to, - updated_at as test_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id -from {database}.{schema}.seed -where id >= 10 and id <= 20; -""" - - -class TestSnapshotDbtValidToCurrent: - @pytest.fixture(scope="class") - def snapshots(self): - return {"snapshot.sql": snapshot_actual_sql} - - @pytest.fixture(scope="class") - def models(self): - return { - "snapshots.yml": snapshots_valid_to_current_yml, - "ref_snapshot.sql": ref_snapshot_sql, - } - - def test_valid_to_current(self, project): - path = os.path.join(project.test_data_dir, "seed_dbt_valid_to.sql") - project.run_sql_file(path) - results = run_dbt(["snapshot"]) - assert len(results) == 1 - - original_snapshot = run_sql_with_adapter( - project.adapter, - "select id, test_scd_id, test_valid_to from {database}.{schema}.snapshot_actual", - "all", - ) - assert original_snapshot[0][2] == datetime.datetime(2099, 12, 31, 0, 0) - assert original_snapshot[9][2] == datetime.datetime(2099, 12, 31, 0, 0) - - project.run_sql(invalidate_sql) - project.run_sql(update_with_current_sql) - - results = run_dbt(["snapshot"]) - assert len(results) == 1 - - updated_snapshot = run_sql_with_adapter( - project.adapter, - "select id, test_scd_id, test_valid_to from {database}.{schema}.snapshot_actual", - "all", - ) - assert updated_snapshot[0][2] == datetime.datetime(2099, 12, 31, 0, 0) - # Original row that was updated now has a non-current (2099/12/31) date - assert updated_snapshot[9][2] == datetime.datetime(2016, 8, 20, 16, 44, 49) - # Updated row has a current date - assert updated_snapshot[20][2] == datetime.datetime(2099, 12, 31, 0, 0) - - check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"]) From ffa75ca9ff7dcc7614998c7556b4b4746ea1c92d Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 15 Oct 2024 16:44:27 -0400 Subject: [PATCH 03/11] Refactor code to properly handle reference deprecations (#10852) --- .../unreleased/Fixes-20241015-121825.yaml | 6 ++ core/dbt/contracts/graph/manifest.py | 19 ------ core/dbt/parser/manifest.py | 59 ++++++++++--------- ...ecations.py => test_model_deprecations.py} | 4 +- 4 files changed, 40 insertions(+), 48 deletions(-) create mode 100644 .changes/unreleased/Fixes-20241015-121825.yaml rename tests/functional/deprecations/{model_deprecations.py => test_model_deprecations.py} (97%) diff --git a/.changes/unreleased/Fixes-20241015-121825.yaml b/.changes/unreleased/Fixes-20241015-121825.yaml new file mode 100644 index 00000000000..f5867871868 --- /dev/null +++ b/.changes/unreleased/Fixes-20241015-121825.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Fix warnings for models referring to a deprecated model +time: 2024-10-15T12:18:25.14525-04:00 +custom: + Author: gshank + Issue: "10833" diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py index b556b479fb4..4ce887591d9 100644 --- a/core/dbt/contracts/graph/manifest.py +++ b/core/dbt/contracts/graph/manifest.py @@ -1315,25 +1315,6 @@ def singular_test_lookup(self) -> SingularTestLookup: def external_node_unique_ids(self): return [node.unique_id for node in self.nodes.values() if node.is_external_node] - def resolve_refs( - self, - source_node: ModelNode, - current_project: str, # TODO: ModelNode is overly restrictive typing - ) -> List[MaybeNonSource]: - resolved_refs: List[MaybeNonSource] = [] - for ref in source_node.refs: - resolved = self.resolve_ref( - source_node, - ref.name, - ref.package, - ref.version, - current_project, - source_node.package_name, - ) - resolved_refs.append(resolved) - - return resolved_refs - # Called by dbt.parser.manifest._process_refs & ManifestLoader.check_for_model_deprecations def resolve_ref( self, diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index 7ffd00febc5..493b562bbdc 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -573,36 +573,41 @@ def safe_update_project_parser_files_partially(self, project_parser_files: Dict) return project_parser_files def check_for_model_deprecations(self): + # build parent and child_maps + self.manifest.build_parent_and_child_maps() for node in self.manifest.nodes.values(): - if isinstance(node, ModelNode) and node.is_past_deprecation_date: - warn_or_error( - DeprecatedModel( - model_name=node.name, - model_version=version_to_str(node.version), - deprecation_date=node.deprecation_date.isoformat(), + if isinstance(node, ModelNode) and node.deprecation_date: + if node.is_past_deprecation_date: + warn_or_error( + DeprecatedModel( + model_name=node.name, + model_version=version_to_str(node.version), + deprecation_date=node.deprecation_date.isoformat(), + ) ) - ) - - resolved_refs = self.manifest.resolve_refs(node, self.root_project.project_name) - resolved_model_refs = [r for r in resolved_refs if isinstance(r, ModelNode)] - node.depends_on - for resolved_ref in resolved_model_refs: - if resolved_ref.deprecation_date: - if resolved_ref.is_past_deprecation_date: - event_cls = DeprecatedReference - else: - event_cls = UpcomingReferenceDeprecation - - warn_or_error( - event_cls( - model_name=node.name, - ref_model_package=resolved_ref.package_name, - ref_model_name=resolved_ref.name, - ref_model_version=version_to_str(resolved_ref.version), - ref_model_latest_version=str(resolved_ref.latest_version), - ref_model_deprecation_date=resolved_ref.deprecation_date.isoformat(), - ) + # At this point _process_refs should already have been called, and + # we just rebuilt the parent and child maps. + # Get the child_nodes and check for deprecations. + child_nodes = self.manifest.child_map[node.unique_id] + for child_unique_id in child_nodes: + child_node = self.manifest.nodes[child_unique_id] + if not isinstance(child_node, ModelNode): + continue + if node.is_past_deprecation_date: + event_cls = DeprecatedReference + else: + event_cls = UpcomingReferenceDeprecation + + warn_or_error( + event_cls( + model_name=child_node.name, + ref_model_package=node.package_name, + ref_model_name=node.name, + ref_model_version=version_to_str(node.version), + ref_model_latest_version=str(node.latest_version), + ref_model_deprecation_date=node.deprecation_date.isoformat(), ) + ) def check_for_spaces_in_resource_names(self): """Validates that resource names do not contain spaces diff --git a/tests/functional/deprecations/model_deprecations.py b/tests/functional/deprecations/test_model_deprecations.py similarity index 97% rename from tests/functional/deprecations/model_deprecations.py rename to tests/functional/deprecations/test_model_deprecations.py index 03e38b1220e..1318562c10f 100644 --- a/tests/functional/deprecations/model_deprecations.py +++ b/tests/functional/deprecations/test_model_deprecations.py @@ -1,8 +1,8 @@ import pytest from dbt.cli.main import dbtRunner -from dbt.exceptions import EventCompilationError from dbt.tests.util import run_dbt +from dbt_common.exceptions import EventCompilationError deprecated_model__yml = """ version: 2 @@ -52,7 +52,7 @@ def test_deprecation_warning_error_options(self, project): run_dbt(["--warn-error-options", '{"include": ["DeprecatedModel"]}', "parse"]) -class TestUpcomingReferenceDeprecatingWarning: +class TestUpcomingReferenceDeprecationWarning: @pytest.fixture(scope="class") def models(self): return { From d18f50bbb8a274c73442d8ae674d1036e736bbf0 Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Tue, 15 Oct 2024 16:55:19 -0500 Subject: [PATCH 04/11] Ensure consistent `current_time` across microbatch models in an invocation (#10830) * Add test that checks microbatch models are all operating with the same `current_time` * Set an `invocated_at` on the `RuntimeConfig` and plumb to `MicrobatchBuilder` * Add changie doc * Rename `invocated_at` to `invoked_at` * Simply conditional logic for setting MicrobatchBuilder.batch_current_time * Rename `batch_current_time` to `default_end_time` for MicrobatchBuilder --- .../unreleased/Features-20241007-115853.yaml | 6 ++++ core/dbt/config/runtime.py | 6 +++- .../incremental/microbatch.py | 4 ++- core/dbt/task/run.py | 1 + .../functional/microbatch/test_microbatch.py | 32 +++++++++++++++++++ 5 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 .changes/unreleased/Features-20241007-115853.yaml diff --git a/.changes/unreleased/Features-20241007-115853.yaml b/.changes/unreleased/Features-20241007-115853.yaml new file mode 100644 index 00000000000..ac2e61c5b59 --- /dev/null +++ b/.changes/unreleased/Features-20241007-115853.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Ensure microbatch models use same `current_time` value +time: 2024-10-07T11:58:53.460941-05:00 +custom: + Author: QMalcolm + Issue: "10819" diff --git a/core/dbt/config/runtime.py b/core/dbt/config/runtime.py index e1c24cf5f0c..d2c0183e1cb 100644 --- a/core/dbt/config/runtime.py +++ b/core/dbt/config/runtime.py @@ -1,7 +1,8 @@ import itertools import os from copy import deepcopy -from dataclasses import dataclass +from dataclasses import dataclass, field +from datetime import datetime from pathlib import Path from typing import ( Any, @@ -15,6 +16,8 @@ Type, ) +import pytz + from dbt import tracking from dbt.adapters.contracts.connection import ( AdapterRequiredConfig, @@ -98,6 +101,7 @@ class RuntimeConfig(Project, Profile, AdapterRequiredConfig): profile_name: str cli_vars: Dict[str, Any] dependencies: Optional[Mapping[str, "RuntimeConfig"]] = None + invoked_at: datetime = field(default_factory=lambda: datetime.now(pytz.UTC)) def __post_init__(self): self.validate() diff --git a/core/dbt/materializations/incremental/microbatch.py b/core/dbt/materializations/incremental/microbatch.py index 268132ffa0c..da8930acb89 100644 --- a/core/dbt/materializations/incremental/microbatch.py +++ b/core/dbt/materializations/incremental/microbatch.py @@ -18,6 +18,7 @@ def __init__( is_incremental: bool, event_time_start: Optional[datetime], event_time_end: Optional[datetime], + default_end_time: Optional[datetime] = None, ): if model.config.incremental_strategy != "microbatch": raise DbtInternalError( @@ -35,10 +36,11 @@ def __init__( event_time_start.replace(tzinfo=pytz.UTC) if event_time_start else None ) self.event_time_end = event_time_end.replace(tzinfo=pytz.UTC) if event_time_end else None + self.default_end_time = default_end_time or datetime.now(pytz.UTC) def build_end_time(self): """Defaults the end_time to the current time in UTC unless a non `None` event_time_end was provided""" - return self.event_time_end or datetime.now(tz=pytz.utc) + return self.event_time_end or self.default_end_time def build_start_time(self, checkpoint: Optional[datetime]): """Create a start time based off the passed in checkpoint. diff --git a/core/dbt/task/run.py b/core/dbt/task/run.py index b1f706d72ec..99913a551c5 100644 --- a/core/dbt/task/run.py +++ b/core/dbt/task/run.py @@ -498,6 +498,7 @@ def _execute_microbatch_materialization( is_incremental=self._is_incremental(model), event_time_start=getattr(self.config.args, "EVENT_TIME_START", None), event_time_end=getattr(self.config.args, "EVENT_TIME_END", None), + default_end_time=self.config.invoked_at, ) end = microbatch_builder.build_end_time() start = microbatch_builder.build_start_time(end) diff --git a/tests/functional/microbatch/test_microbatch.py b/tests/functional/microbatch/test_microbatch.py index f6b49e1405a..71c8588b17f 100644 --- a/tests/functional/microbatch/test_microbatch.py +++ b/tests/functional/microbatch/test_microbatch.py @@ -1,7 +1,9 @@ import os +from datetime import datetime from unittest import mock import pytest +import pytz from dbt.events.types import LogModelResult from dbt.tests.util import ( @@ -40,6 +42,11 @@ select * from {{ ref('input_model') }} """ +microbatch_model_downstream_sql = """ +{{ config(materialized='incremental', incremental_strategy='microbatch', unique_key='id', event_time='event_time', batch_size='day', begin=modules.datetime.datetime(2020, 1, 1, 0, 0, 0)) }} +select * from {{ ref('microbatch_model') }} +""" + microbatch_model_ref_render_sql = """ {{ config(materialized='incremental', incremental_strategy='microbatch', unique_key='id', event_time='event_time', batch_size='day', begin=modules.datetime.datetime(2020, 1, 1, 0, 0, 0)) }} @@ -671,3 +678,28 @@ def test_run_with_event_time(self, project): with patch_microbatch_end_time("2020-01-03 13:57:00"): run_dbt(["run", "--full-refresh"]) self.assert_row_count(project, "microbatch_model", 3) + + +class TestMicrbobatchModelsRunWithSameCurrentTime(BaseMicrobatchTest): + + @pytest.fixture(scope="class") + def models(self): + return { + "input_model.sql": input_model_sql, + "microbatch_model.sql": microbatch_model_sql, + "second_microbatch_model.sql": microbatch_model_downstream_sql, + } + + @mock.patch.dict(os.environ, {"DBT_EXPERIMENTAL_MICROBATCH": "True"}) + def test_microbatch(self, project) -> None: + current_time = datetime.now(pytz.UTC) + run_dbt(["run", "--event-time-start", current_time.strftime("%Y-%m-%d")]) + + run_results = get_artifact(project.project_root, "target", "run_results.json") + microbatch_model_last_batch = run_results["results"][1]["batch_results"]["successful"][-1] + second_microbatch_model_last_batch = run_results["results"][2]["batch_results"][ + "successful" + ][-1] + + # they should have the same last batch because they are using the _same_ "current_time" + assert microbatch_model_last_batch == second_microbatch_model_last_batch From 78c05718c589a56bc49f7520b47474690ae1cbe0 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Wed, 16 Oct 2024 15:15:27 -0400 Subject: [PATCH 05/11] Remove Python 3.8 from various places (#10861) * Remove Python 3.8 from various places * Add changelog entry. --------- Co-authored-by: Peter Allen Webb --- .../unreleased/Under the Hood-20241016-144056.yaml | 6 ++++++ .github/workflows/main.yml | 10 +++++----- .github/workflows/model_performance.yml | 2 +- .github/workflows/schema-check.yml | 2 +- .github/workflows/structured-logging-schema-check.yml | 2 +- .github/workflows/test-repeater.yml | 1 - .pre-commit-config.yaml | 2 +- Dockerfile.test | 3 --- core/setup.py | 7 +++---- 9 files changed, 18 insertions(+), 17 deletions(-) create mode 100644 .changes/unreleased/Under the Hood-20241016-144056.yaml diff --git a/.changes/unreleased/Under the Hood-20241016-144056.yaml b/.changes/unreleased/Under the Hood-20241016-144056.yaml new file mode 100644 index 00000000000..454724607cc --- /dev/null +++ b/.changes/unreleased/Under the Hood-20241016-144056.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Remove support and testing for Python 3.8, which is now EOL. +time: 2024-10-16T14:40:56.451972-04:00 +custom: + Author: gshank peterallenwebb + Issue: "10861" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b596bf7293f..586bb651a9f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -52,7 +52,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.9' - name: Install python dependencies run: | @@ -74,7 +74,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] + python-version: [ "3.9", "3.10", "3.11", "3.12" ] env: TOXENV: "unit" @@ -139,7 +139,7 @@ jobs: - name: generate include id: generate-include run: | - INCLUDE=('"python-version":"3.8","os":"windows-latest"' '"python-version":"3.8","os":"macos-12"' ) + INCLUDE=('"python-version":"3.9","os":"windows-latest"' '"python-version":"3.9","os":"macos-12"' ) INCLUDE_GROUPS="[" for include in ${INCLUDE[@]}; do for group in $(seq 1 ${{ env.PYTHON_INTEGRATION_TEST_WORKERS }}); do @@ -161,7 +161,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] + python-version: [ "3.9", "3.10", "3.11", "3.12" ] os: [ubuntu-20.04] split-group: ${{ fromJson(needs.integration-metadata.outputs.split-groups) }} include: ${{ fromJson(needs.integration-metadata.outputs.include) }} @@ -263,7 +263,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.9' - name: Install python dependencies run: | diff --git a/.github/workflows/model_performance.yml b/.github/workflows/model_performance.yml index 8d238ac574e..886fc764a25 100644 --- a/.github/workflows/model_performance.yml +++ b/.github/workflows/model_performance.yml @@ -150,7 +150,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: "3.8" + python-version: "3.9" - name: Install dbt run: pip install dbt-postgres==${{ needs.set-variables.outputs.release_id }} diff --git a/.github/workflows/schema-check.yml b/.github/workflows/schema-check.yml index 4cb8fce50c8..4fb3980dcb7 100644 --- a/.github/workflows/schema-check.yml +++ b/.github/workflows/schema-check.yml @@ -37,7 +37,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: 3.9 - name: Checkout dbt repo uses: actions/checkout@v4 diff --git a/.github/workflows/structured-logging-schema-check.yml b/.github/workflows/structured-logging-schema-check.yml index 4934bffcaeb..993ef8c41d9 100644 --- a/.github/workflows/structured-logging-schema-check.yml +++ b/.github/workflows/structured-logging-schema-check.yml @@ -76,7 +76,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v5 with: - python-version: "3.8" + python-version: "3.9" - name: Install python dependencies run: | diff --git a/.github/workflows/test-repeater.yml b/.github/workflows/test-repeater.yml index 315133336e8..c10088d0ae0 100644 --- a/.github/workflows/test-repeater.yml +++ b/.github/workflows/test-repeater.yml @@ -27,7 +27,6 @@ on: description: 'Version of Python to Test Against' type: choice options: - - '3.8' - '3.9' - '3.10' - '3.11' diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a30d2f5be4d..d5317ec512c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ exclude: ^(core/dbt/docs/build/|core/dbt/common/events/types_pb2.py|core/dbt/events/core_types_pb2.py|core/dbt/adapters/events/adapter_types_pb2.py) -# Force all unspecified python hooks to run python 3.8 +# Force all unspecified python hooks to run python 3.9 default_language_version: python: python3 diff --git a/Dockerfile.test b/Dockerfile.test index a168be8b497..f29329bfa02 100644 --- a/Dockerfile.test +++ b/Dockerfile.test @@ -33,9 +33,6 @@ RUN apt-get update \ python-is-python3 \ python-dev-is-python3 \ python3-pip \ - python3.8 \ - python3.8-dev \ - python3.8-venv \ python3.9 \ python3.9-dev \ python3.9-venv \ diff --git a/core/setup.py b/core/setup.py index f29832dc598..b7a8dabd14e 100644 --- a/core/setup.py +++ b/core/setup.py @@ -2,9 +2,9 @@ import os import sys -if sys.version_info < (3, 8): +if sys.version_info < (3, 9): print("Error: dbt does not support this version of Python.") - print("Please upgrade to Python 3.8 or higher.") + print("Please upgrade to Python 3.9 or higher.") sys.exit(1) @@ -89,11 +89,10 @@ "Operating System :: Microsoft :: Windows", "Operating System :: MacOS :: MacOS X", "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ], - python_requires=">=3.8", + python_requires=">=3.9", ) From 8be063502b65c635c865f39ae30cff1ae36d1d77 Mon Sep 17 00:00:00 2001 From: Paul Yang Date: Thu, 17 Oct 2024 10:54:30 -0700 Subject: [PATCH 06/11] Add `order_by` and `limit` fields to saved queries (#10532) * Add `order_by` and `limit` fields to saved queries. * Update JSON schema * Add change log for #10531. * Check order by / limit in saved-query parsing test. --- .../unreleased/Features-20240806-144859.yaml | 6 + .../dbt/artifacts/resources/v1/saved_query.py | 2 + core/dbt/contracts/graph/unparsed.py | 2 + core/dbt/parser/schema_yaml_readers.py | 2 + schemas/dbt/catalog/v1.json | 2 +- schemas/dbt/manifest/v12.json | 132 +++++++++++++++++- schemas/dbt/run-results/v6.json | 2 +- schemas/dbt/sources/v3.json | 2 +- tests/functional/saved_queries/fixtures.py | 4 + .../saved_queries/test_saved_query_parsing.py | 4 + 10 files changed, 154 insertions(+), 4 deletions(-) create mode 100644 .changes/unreleased/Features-20240806-144859.yaml diff --git a/.changes/unreleased/Features-20240806-144859.yaml b/.changes/unreleased/Features-20240806-144859.yaml new file mode 100644 index 00000000000..69df7fff614 --- /dev/null +++ b/.changes/unreleased/Features-20240806-144859.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add `order_by` and `limit` fields to saved queries. +time: 2024-08-06T14:48:59.035914-07:00 +custom: + Author: plypaul + Issue: "10531" diff --git a/core/dbt/artifacts/resources/v1/saved_query.py b/core/dbt/artifacts/resources/v1/saved_query.py index 1eea7990cc1..e1d056d0422 100644 --- a/core/dbt/artifacts/resources/v1/saved_query.py +++ b/core/dbt/artifacts/resources/v1/saved_query.py @@ -44,6 +44,8 @@ class QueryParams(dbtClassMixin): metrics: List[str] group_by: List[str] where: Optional[WhereFilterIntersection] + order_by: List[str] = field(default_factory=list) + limit: Optional[int] = None @dataclass diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py index f20e76a8b68..5e63c487e89 100644 --- a/core/dbt/contracts/graph/unparsed.py +++ b/core/dbt/contracts/graph/unparsed.py @@ -720,6 +720,8 @@ class UnparsedQueryParams(dbtClassMixin): group_by: List[str] = field(default_factory=list) # Note: `Union` must be the outermost part of the type annotation for serialization to work properly. where: Union[str, List[str], None] = None + order_by: List[str] = field(default_factory=list) + limit: Optional[int] = None @dataclass diff --git a/core/dbt/parser/schema_yaml_readers.py b/core/dbt/parser/schema_yaml_readers.py index dc99e87a218..9b4a550b5d3 100644 --- a/core/dbt/parser/schema_yaml_readers.py +++ b/core/dbt/parser/schema_yaml_readers.py @@ -788,6 +788,8 @@ def _get_query_params(self, unparsed: UnparsedQueryParams) -> QueryParams: group_by=unparsed.group_by, metrics=unparsed.metrics, where=parse_where_filter(unparsed.where), + order_by=unparsed.order_by, + limit=unparsed.limit, ) def parse_saved_query(self, unparsed: UnparsedSavedQuery) -> None: diff --git a/schemas/dbt/catalog/v1.json b/schemas/dbt/catalog/v1.json index 25c2b25b2b3..f104c5b977f 100644 --- a/schemas/dbt/catalog/v1.json +++ b/schemas/dbt/catalog/v1.json @@ -12,7 +12,7 @@ }, "dbt_version": { "type": "string", - "default": "1.9.0a1" + "default": "1.9.0b2" }, "generated_at": { "type": "string" diff --git a/schemas/dbt/manifest/v12.json b/schemas/dbt/manifest/v12.json index dc369a2cbe3..9cb7f732e62 100644 --- a/schemas/dbt/manifest/v12.json +++ b/schemas/dbt/manifest/v12.json @@ -13,7 +13,7 @@ }, "dbt_version": { "type": "string", - "default": "1.9.0a1" + "default": "1.9.0b2" }, "generated_at": { "type": "string" @@ -706,6 +706,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -1739,6 +1745,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -2387,6 +2399,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -3172,6 +3190,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -3976,6 +4000,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -5331,6 +5361,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -5979,6 +6015,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -6942,6 +6984,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -10543,6 +10591,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -11576,6 +11630,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -12224,6 +12284,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -13009,6 +13075,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -13813,6 +13885,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -15168,6 +15246,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -15816,6 +15900,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -16779,6 +16869,12 @@ "type": "string" } }, + "unrendered_config_call_dict": { + "type": "object", + "propertyNames": { + "type": "string" + } + }, "relation_name": { "anyOf": [ { @@ -19542,6 +19638,23 @@ "type": "null" } ] + }, + "order_by": { + "type": "array", + "items": { + "type": "string" + } + }, + "limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null } }, "additionalProperties": false, @@ -21076,6 +21189,23 @@ "type": "null" } ] + }, + "order_by": { + "type": "array", + "items": { + "type": "string" + } + }, + "limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null } }, "additionalProperties": false, diff --git a/schemas/dbt/run-results/v6.json b/schemas/dbt/run-results/v6.json index 1f185a2135a..1bf1cf75e83 100644 --- a/schemas/dbt/run-results/v6.json +++ b/schemas/dbt/run-results/v6.json @@ -12,7 +12,7 @@ }, "dbt_version": { "type": "string", - "default": "1.9.0a1" + "default": "1.9.0b2" }, "generated_at": { "type": "string" diff --git a/schemas/dbt/sources/v3.json b/schemas/dbt/sources/v3.json index 5ade4a90be0..df2784f1a81 100644 --- a/schemas/dbt/sources/v3.json +++ b/schemas/dbt/sources/v3.json @@ -12,7 +12,7 @@ }, "dbt_version": { "type": "string", - "default": "1.9.0a1" + "default": "1.9.0b2" }, "generated_at": { "type": "string" diff --git a/tests/functional/saved_queries/fixtures.py b/tests/functional/saved_queries/fixtures.py index 96383ab5472..58ed73c81b0 100644 --- a/tests/functional/saved_queries/fixtures.py +++ b/tests/functional/saved_queries/fixtures.py @@ -16,6 +16,10 @@ - "{{ TimeDimension('id__ds', 'DAY') }} <= now()" - "{{ TimeDimension('id__ds', 'DAY') }} >= '2023-01-01'" - "{{ Metric('txn_revenue', ['id']) }} > 1" + order_by: + - "Metric('simple_metric')" + - "Dimension('id__ds')" + limit: 10 exports: - name: my_export config: diff --git a/tests/functional/saved_queries/test_saved_query_parsing.py b/tests/functional/saved_queries/test_saved_query_parsing.py index 40e4cdfa4fb..f00f7366562 100644 --- a/tests/functional/saved_queries/test_saved_query_parsing.py +++ b/tests/functional/saved_queries/test_saved_query_parsing.py @@ -66,6 +66,10 @@ def test_semantic_model_parsing(self, project): assert len(saved_query.query_params.group_by) == 1 assert len(saved_query.query_params.where.where_filters) == 3 assert len(saved_query.depends_on.nodes) == 1 + + assert len(saved_query.query_params.order_by) == 2 + assert saved_query.query_params.limit is not None + assert saved_query.description == "My SavedQuery Description" assert len(saved_query.exports) == 1 assert saved_query.exports[0].name == "my_export" From ba6c7baf1d66d91fe7050e351e408e2a1186eee7 Mon Sep 17 00:00:00 2001 From: Kshitij Aranke Date: Fri, 18 Oct 2024 22:28:58 +0100 Subject: [PATCH 07/11] [Tidy-First]: Fix `timings` object for hooks and macros, and make types of timings explicit (#10882) * [Tidy-First]: Fix `timings` object for hooks and macros, and make types of timings explicit * cast literal to str * change test * change jsonschema to enum * Discard changes to schemas/dbt/manifest/v12.json * nits --------- Co-authored-by: Chenyu Li --- core/dbt/artifacts/schemas/results.py | 16 +++++-- core/dbt/task/run.py | 19 +++++--- core/dbt/task/run_operation.py | 46 +++++++++++-------- schemas/dbt/run-results/v6.json | 6 ++- schemas/dbt/sources/v3.json | 6 ++- .../adapter/hooks/test_on_run_hooks.py | 8 ++++ .../functional/microbatch/test_microbatch.py | 2 +- .../run_operations/test_run_operations.py | 17 +++++++ tests/unit/test_events.py | 2 +- 9 files changed, 88 insertions(+), 34 deletions(-) diff --git a/core/dbt/artifacts/schemas/results.py b/core/dbt/artifacts/schemas/results.py index 00746c87885..ee27fc6d5d4 100644 --- a/core/dbt/artifacts/schemas/results.py +++ b/core/dbt/artifacts/schemas/results.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from datetime import datetime -from typing import Any, Callable, Dict, List, Optional, Sequence, Union +from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Union from dbt.contracts.graph.nodes import ResultNode from dbt_common.dataclass_schema import StrEnum, dbtClassMixin @@ -10,7 +10,13 @@ @dataclass class TimingInfo(dbtClassMixin): - name: str + """ + Represents a step in the execution of a node. + `name` should be one of: compile, execute, or other + Do not call directly, use `collect_timing_info` instead. + """ + + name: Literal["compile", "execute", "other"] started_at: Optional[datetime] = None completed_at: Optional[datetime] = None @@ -21,7 +27,7 @@ def end(self): self.completed_at = datetime.utcnow() def to_msg_dict(self): - msg_dict = {"name": self.name} + msg_dict = {"name": str(self.name)} if self.started_at: msg_dict["started_at"] = datetime_to_json_string(self.started_at) if self.completed_at: @@ -31,7 +37,9 @@ def to_msg_dict(self): # This is a context manager class collect_timing_info: - def __init__(self, name: str, callback: Callable[[TimingInfo], None]) -> None: + def __init__( + self, name: Literal["compile", "execute", "other"], callback: Callable[[TimingInfo], None] + ) -> None: self.timing_info = TimingInfo(name=name) self.callback = callback diff --git a/core/dbt/task/run.py b/core/dbt/task/run.py index 99913a551c5..f159861e1ae 100644 --- a/core/dbt/task/run.py +++ b/core/dbt/task/run.py @@ -17,6 +17,7 @@ RunningStatus, RunStatus, TimingInfo, + collect_timing_info, ) from dbt.artifacts.schemas.run import RunResult from dbt.cli.flags import Flags @@ -633,7 +634,6 @@ def get_hooks_by_type(self, hook_type: RunHookType) -> List[HookNode]: def safe_run_hooks( self, adapter: BaseAdapter, hook_type: RunHookType, extra_context: Dict[str, Any] ) -> RunStatus: - started_at = datetime.utcnow() ordered_hooks = self.get_hooks_by_type(hook_type) if hook_type == RunHookType.End and ordered_hooks: @@ -653,14 +653,20 @@ def safe_run_hooks( hook.index = idx hook_name = f"{hook.package_name}.{hook_type}.{hook.index - 1}" execution_time = 0.0 - timing = [] + timing: List[TimingInfo] = [] failures = 1 if not failed: + with collect_timing_info("compile", timing.append): + sql = self.get_hook_sql( + adapter, hook, hook.index, num_hooks, extra_context + ) + + started_at = timing[0].started_at or datetime.utcnow() hook.update_event_status( started_at=started_at.isoformat(), node_status=RunningStatus.Started ) - sql = self.get_hook_sql(adapter, hook, hook.index, num_hooks, extra_context) + fire_event( LogHookStartLine( statement=hook_name, @@ -670,11 +676,12 @@ def safe_run_hooks( ) ) - status, message = get_execution_status(sql, adapter) - finished_at = datetime.utcnow() + with collect_timing_info("execute", timing.append): + status, message = get_execution_status(sql, adapter) + + finished_at = timing[1].completed_at or datetime.utcnow() hook.update_event_status(finished_at=finished_at.isoformat()) execution_time = (finished_at - started_at).total_seconds() - timing = [TimingInfo(hook_name, started_at, finished_at)] failures = 0 if status == RunStatus.Success else 1 if status == RunStatus.Success: diff --git a/core/dbt/task/run_operation.py b/core/dbt/task/run_operation.py index 793ba81fb01..ebe8b14352e 100644 --- a/core/dbt/task/run_operation.py +++ b/core/dbt/task/run_operation.py @@ -2,11 +2,11 @@ import threading import traceback from datetime import datetime -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, List import dbt_common.exceptions from dbt.adapters.factory import get_adapter -from dbt.artifacts.schemas.results import RunStatus, TimingInfo +from dbt.artifacts.schemas.results import RunStatus, TimingInfo, collect_timing_info from dbt.artifacts.schemas.run import RunResult, RunResultsArtifact from dbt.contracts.files import FileHash from dbt.contracts.graph.nodes import HookNode @@ -51,25 +51,29 @@ def _run_unsafe(self, package_name, macro_name) -> "agate.Table": return res def run(self) -> RunResultsArtifact: - start = datetime.utcnow() - self.compile_manifest() + timing: List[TimingInfo] = [] - success = True + with collect_timing_info("compile", timing.append): + self.compile_manifest() + + start = timing[0].started_at + success = True package_name, macro_name = self._get_macro_parts() - try: - self._run_unsafe(package_name, macro_name) - except dbt_common.exceptions.DbtBaseException as exc: - fire_event(RunningOperationCaughtError(exc=str(exc))) - fire_event(LogDebugStackTrace(exc_info=traceback.format_exc())) - success = False - except Exception as exc: - fire_event(RunningOperationUncaughtError(exc=str(exc))) - fire_event(LogDebugStackTrace(exc_info=traceback.format_exc())) - success = False + with collect_timing_info("execute", timing.append): + try: + self._run_unsafe(package_name, macro_name) + except dbt_common.exceptions.DbtBaseException as exc: + fire_event(RunningOperationCaughtError(exc=str(exc))) + fire_event(LogDebugStackTrace(exc_info=traceback.format_exc())) + success = False + except Exception as exc: + fire_event(RunningOperationUncaughtError(exc=str(exc))) + fire_event(LogDebugStackTrace(exc_info=traceback.format_exc())) + success = False - end = datetime.utcnow() + end = timing[1].completed_at macro = ( self.manifest.find_macro_by_name(macro_name, self.config.project_name, package_name) @@ -85,10 +89,12 @@ def run(self) -> RunResultsArtifact: f"dbt could not find a macro with the name '{macro_name}' in any package" ) + execution_time = (end - start).total_seconds() if start and end else 0.0 + run_result = RunResult( adapter_response={}, status=RunStatus.Success if success else RunStatus.Error, - execution_time=(end - start).total_seconds(), + execution_time=execution_time, failures=0 if success else 1, message=None, node=HookNode( @@ -105,13 +111,13 @@ def run(self) -> RunResultsArtifact: original_file_path="", ), thread_id=threading.current_thread().name, - timing=[TimingInfo(name=macro_name, started_at=start, completed_at=end)], + timing=timing, batch_results=None, ) results = RunResultsArtifact.from_execution_results( - generated_at=end, - elapsed_time=(end - start).total_seconds(), + generated_at=end or datetime.utcnow(), + elapsed_time=execution_time, args={ k: v for k, v in self.args.__dict__.items() diff --git a/schemas/dbt/run-results/v6.json b/schemas/dbt/run-results/v6.json index 1bf1cf75e83..96456882ae6 100644 --- a/schemas/dbt/run-results/v6.json +++ b/schemas/dbt/run-results/v6.json @@ -84,7 +84,11 @@ "title": "TimingInfo", "properties": { "name": { - "type": "string" + "enum": [ + "compile", + "execute", + "other" + ] }, "started_at": { "anyOf": [ diff --git a/schemas/dbt/sources/v3.json b/schemas/dbt/sources/v3.json index df2784f1a81..8cb3633f99a 100644 --- a/schemas/dbt/sources/v3.json +++ b/schemas/dbt/sources/v3.json @@ -211,7 +211,11 @@ "title": "TimingInfo", "properties": { "name": { - "type": "string" + "enum": [ + "compile", + "execute", + "other" + ] }, "started_at": { "anyOf": [ diff --git a/tests/functional/adapter/hooks/test_on_run_hooks.py b/tests/functional/adapter/hooks/test_on_run_hooks.py index 42edbdae970..b9239b93b4a 100644 --- a/tests/functional/adapter/hooks/test_on_run_hooks.py +++ b/tests/functional/adapter/hooks/test_on_run_hooks.py @@ -55,6 +55,14 @@ def test_results(self, project, log_counts, my_model_run_status): for result in results if isinstance(result.node, HookNode) ] == [(id, str(status)) for id, status in expected_results if id.startswith("operation")] + + for result in results: + if result.status == RunStatus.Skipped: + continue + + timing_keys = [timing.name for timing in result.timing] + assert timing_keys == ["compile", "execute"] + assert log_counts in log_output assert "4 project hooks, 1 view model" in log_output diff --git a/tests/functional/microbatch/test_microbatch.py b/tests/functional/microbatch/test_microbatch.py index 71c8588b17f..8bbf274554d 100644 --- a/tests/functional/microbatch/test_microbatch.py +++ b/tests/functional/microbatch/test_microbatch.py @@ -164,7 +164,7 @@ def test_use_custom_microbatch_strategy_env_var_true_invalid_incremental_strateg ): # Initial run with patch_microbatch_end_time("2020-01-03 13:57:00"): - run_dbt(["run"]) + run_dbt(["run"], expect_pass=False) # Incremental run fails with patch_microbatch_end_time("2020-01-03 13:57:00"): diff --git a/tests/functional/run_operations/test_run_operations.py b/tests/functional/run_operations/test_run_operations.py index 064c98b3a51..258ed679d7e 100644 --- a/tests/functional/run_operations/test_run_operations.py +++ b/tests/functional/run_operations/test_run_operations.py @@ -3,6 +3,7 @@ import pytest import yaml +from dbt.artifacts.schemas.results import RunStatus from dbt.tests.util import ( check_table_does_exist, mkdir, @@ -135,9 +136,25 @@ def test_run_operation_local_macro(self, project): run_dbt(["deps"]) results, log_output = run_dbt_and_capture(["run-operation", "something_cool"]) + + for result in results: + if result.status == RunStatus.Skipped: + continue + + timing_keys = [timing.name for timing in result.timing] + assert timing_keys == ["compile", "execute"] + assert "something cool" in log_output results, log_output = run_dbt_and_capture(["run-operation", "pkg.something_cool"]) + + for result in results: + if result.status == RunStatus.Skipped: + continue + + timing_keys = [timing.name for timing in result.timing] + assert timing_keys == ["compile", "execute"] + assert "something cool" in log_output rm_dir("pkg") diff --git a/tests/unit/test_events.py b/tests/unit/test_events.py index 085f849492e..f6ac66f0034 100644 --- a/tests/unit/test_events.py +++ b/tests/unit/test_events.py @@ -517,7 +517,7 @@ def test_all_serializable(self): def test_date_serialization(): - ti = TimingInfo("test") + ti = TimingInfo("compile") ti.begin() ti.end() ti_dict = ti.to_dict() From a0674db8400673d7dd4ebf7597dd82b3349337ce Mon Sep 17 00:00:00 2001 From: Chenyu Li Date: Fri, 18 Oct 2024 15:07:03 -0700 Subject: [PATCH 08/11] exclude hook results from results in on-run-end context (#10885) * exclude hook results from results in on-run-end context * changelog * preserve previous behavior --- .../unreleased/Fixes-20241018-135810.yaml | 6 ++ core/dbt/task/run.py | 4 +- core/dbt/task/runnable.py | 1 - .../adapter/hooks/test_on_run_hooks.py | 74 +++++++++++++++++++ 4 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 .changes/unreleased/Fixes-20241018-135810.yaml diff --git a/.changes/unreleased/Fixes-20241018-135810.yaml b/.changes/unreleased/Fixes-20241018-135810.yaml new file mode 100644 index 00000000000..c205e15bb09 --- /dev/null +++ b/.changes/unreleased/Fixes-20241018-135810.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Exclude hook result from results in on-run-end context +time: 2024-10-18T13:58:10.396884-07:00 +custom: + Author: ChenyuLInx + Issue: "7387" diff --git a/core/dbt/task/run.py b/core/dbt/task/run.py index f159861e1ae..7a321e69d30 100644 --- a/core/dbt/task/run.py +++ b/core/dbt/task/run.py @@ -774,7 +774,9 @@ def after_run(self, adapter, results) -> None: extras = { "schemas": list({s for _, s in database_schema_set}), - "results": results, + "results": [ + r for r in results if r.thread_id != "main" or r.status == RunStatus.Error + ], # exclude that didn't fail to preserve backwards compatibility "database_schemas": list(database_schema_set), } with adapter.connection_named("master"): diff --git a/core/dbt/task/runnable.py b/core/dbt/task/runnable.py index a37308f81ea..7ad9d87e10d 100644 --- a/core/dbt/task/runnable.py +++ b/core/dbt/task/runnable.py @@ -512,7 +512,6 @@ def execute_with_hooks(self, selected_uids: AbstractSet[str]): self.started_at = time.time() try: before_run_status = self.before_run(adapter, selected_uids) - if before_run_status == RunStatus.Success or ( not get_flags().skip_nodes_if_on_run_start_fails ): diff --git a/tests/functional/adapter/hooks/test_on_run_hooks.py b/tests/functional/adapter/hooks/test_on_run_hooks.py index b9239b93b4a..b85784be3cf 100644 --- a/tests/functional/adapter/hooks/test_on_run_hooks.py +++ b/tests/functional/adapter/hooks/test_on_run_hooks.py @@ -168,3 +168,77 @@ def test_results(self, project): run_results = get_artifact(project.project_root, "target", "run_results.json") assert run_results["results"] == [] + + +class Test__HookContext__HookSuccess: + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "on-run-start": [ + "select 1 as id", # success + "select 1 as id", # success + ], + "on-run-end": [ + '{{ log("Num Results in context: " ~ results|length)}}' + "{{ output_thread_ids(results) }}", + ], + } + + @pytest.fixture(scope="class") + def macros(self): + return { + "log.sql": """ +{% macro output_thread_ids(results) %} + {% for result in results %} + {{ log("Thread ID: " ~ result.thread_id) }} + {% endfor %} +{% endmacro %} +""" + } + + @pytest.fixture(scope="class") + def models(self): + return {"my_model.sql": "select 1"} + + def test_results_in_context_success(self, project): + results, log_output = run_dbt_and_capture(["--debug", "run"]) + assert "Thread ID: " in log_output + assert "Thread ID: main" not in log_output + assert results[0].thread_id == "main" # hook still exists in run results + assert "Num Results in context: 1" in log_output # only model given hook was successful + + +class Test__HookContext__HookFail: + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "on-run-start": [ + "select a as id", # fail + ], + "on-run-end": [ + '{{ log("Num Results in context: " ~ results|length)}}' + "{{ output_thread_ids(results) }}", + ], + } + + @pytest.fixture(scope="class") + def macros(self): + return { + "log.sql": """ +{% macro output_thread_ids(results) %} + {% for result in results %} + {{ log("Thread ID: " ~ result.thread_id) }} + {% endfor %} +{% endmacro %} +""" + } + + @pytest.fixture(scope="class") + def models(self): + return {"my_model.sql": "select 1"} + + def test_results_in_context_hook_fail(self, project): + results, log_output = run_dbt_and_capture(["--debug", "run"], expect_pass=False) + assert "Thread ID: main" in log_output + assert results[0].thread_id == "main" + assert "Num Results in context: 2" in log_output # failed hook and model From 7920b0e71d5ec5b769a4bd95e29d760fee93570e Mon Sep 17 00:00:00 2001 From: Quigley Malcolm Date: Mon, 21 Oct 2024 13:10:00 -0700 Subject: [PATCH 09/11] Update microbatch tests to handle update wherein incremental strategies are always validated (#10884) dbt-adapters updated the incremental_strategy validation of incremental models such that the validation now _always_ happens when an incremental model is executed. A test in dbt-core `TestMicrobatchCustomUserStrategyEnvVarTrueInvalid` was previously set to _expect_ buggy behavior where an incremental model would succeed on it's "first"/"refresh" run even if it had an invalid incremental strategy. Thus we needed to update this test in dbt-core to expect the now correct behavior of incremental model execution time validation --- tests/functional/microbatch/test_microbatch.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/functional/microbatch/test_microbatch.py b/tests/functional/microbatch/test_microbatch.py index 8bbf274554d..02c6976c848 100644 --- a/tests/functional/microbatch/test_microbatch.py +++ b/tests/functional/microbatch/test_microbatch.py @@ -162,11 +162,8 @@ def test_use_custom_microbatch_strategy_env_var_true_invalid_incremental_strateg with mock.patch.object( type(project.adapter), "valid_incremental_strategies", lambda _: [] ): - # Initial run - with patch_microbatch_end_time("2020-01-03 13:57:00"): - run_dbt(["run"], expect_pass=False) - - # Incremental run fails + # Run of microbatch model while adapter doesn't have a "valid" + # microbatch strategy causes an error to be raised with patch_microbatch_end_time("2020-01-03 13:57:00"): _, logs = run_dbt_and_capture(["run"], expect_pass=False) assert "'microbatch' is not valid" in logs From 3d96b4e36c479ff81eb35ba78fd2d8c5584ea7ae Mon Sep 17 00:00:00 2001 From: Peter Webb Date: Mon, 21 Oct 2024 19:01:15 -0400 Subject: [PATCH 10/11] Loosen Type in TimingInfo (#10897) --- core/dbt/artifacts/schemas/results.py | 8 +++----- schemas/dbt/run-results/v6.json | 6 +----- schemas/dbt/sources/v3.json | 6 +----- 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/core/dbt/artifacts/schemas/results.py b/core/dbt/artifacts/schemas/results.py index ee27fc6d5d4..dd455f309b8 100644 --- a/core/dbt/artifacts/schemas/results.py +++ b/core/dbt/artifacts/schemas/results.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from datetime import datetime -from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Union +from typing import Any, Callable, Dict, List, Optional, Sequence, Union from dbt.contracts.graph.nodes import ResultNode from dbt_common.dataclass_schema import StrEnum, dbtClassMixin @@ -16,7 +16,7 @@ class TimingInfo(dbtClassMixin): Do not call directly, use `collect_timing_info` instead. """ - name: Literal["compile", "execute", "other"] + name: str started_at: Optional[datetime] = None completed_at: Optional[datetime] = None @@ -37,9 +37,7 @@ def to_msg_dict(self): # This is a context manager class collect_timing_info: - def __init__( - self, name: Literal["compile", "execute", "other"], callback: Callable[[TimingInfo], None] - ) -> None: + def __init__(self, name: str, callback: Callable[[TimingInfo], None]) -> None: self.timing_info = TimingInfo(name=name) self.callback = callback diff --git a/schemas/dbt/run-results/v6.json b/schemas/dbt/run-results/v6.json index 96456882ae6..1bf1cf75e83 100644 --- a/schemas/dbt/run-results/v6.json +++ b/schemas/dbt/run-results/v6.json @@ -84,11 +84,7 @@ "title": "TimingInfo", "properties": { "name": { - "enum": [ - "compile", - "execute", - "other" - ] + "type": "string" }, "started_at": { "anyOf": [ diff --git a/schemas/dbt/sources/v3.json b/schemas/dbt/sources/v3.json index 8cb3633f99a..df2784f1a81 100644 --- a/schemas/dbt/sources/v3.json +++ b/schemas/dbt/sources/v3.json @@ -211,11 +211,7 @@ "title": "TimingInfo", "properties": { "name": { - "enum": [ - "compile", - "execute", - "other" - ] + "type": "string" }, "started_at": { "anyOf": [ From f7b7935a977432fa699e4673ff19ced8c926d1ba Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 22 Oct 2024 14:47:51 -0400 Subject: [PATCH 11/11] Support multiple unique keys in snapshots (#10795) --- .../unreleased/Features-20241001-134051.yaml | 6 + core/dbt/artifacts/resources/v1/snapshot.py | 2 +- core/setup.py | 2 +- schemas/dbt/manifest/v12.json | 12 + .../adapter/simple_snapshot/fixtures.py | 430 ++++++++++++++++++ .../simple_snapshot/test_various_configs.py | 277 +++++++++++ 6 files changed, 727 insertions(+), 2 deletions(-) create mode 100644 .changes/unreleased/Features-20241001-134051.yaml create mode 100644 tests/functional/adapter/simple_snapshot/fixtures.py create mode 100644 tests/functional/adapter/simple_snapshot/test_various_configs.py diff --git a/.changes/unreleased/Features-20241001-134051.yaml b/.changes/unreleased/Features-20241001-134051.yaml new file mode 100644 index 00000000000..60ada51ece3 --- /dev/null +++ b/.changes/unreleased/Features-20241001-134051.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Enable use of multi-column unique key in snapshots +time: 2024-10-01T13:40:51.297529-04:00 +custom: + Author: gshank + Issue: "9992" diff --git a/core/dbt/artifacts/resources/v1/snapshot.py b/core/dbt/artifacts/resources/v1/snapshot.py index 062b6a62814..1a7b9344ca0 100644 --- a/core/dbt/artifacts/resources/v1/snapshot.py +++ b/core/dbt/artifacts/resources/v1/snapshot.py @@ -19,7 +19,7 @@ class SnapshotMetaColumnNames(dbtClassMixin): class SnapshotConfig(NodeConfig): materialized: str = "snapshot" strategy: Optional[str] = None - unique_key: Optional[str] = None + unique_key: Optional[Union[str, List[str]]] = None target_schema: Optional[str] = None target_database: Optional[str] = None updated_at: Optional[str] = None diff --git a/core/setup.py b/core/setup.py index b7a8dabd14e..b787ce8a923 100644 --- a/core/setup.py +++ b/core/setup.py @@ -71,7 +71,7 @@ "dbt-extractor>=0.5.0,<=0.6", "dbt-semantic-interfaces>=0.7.3,<0.8", # Minor versions for these are expected to be backwards-compatible - "dbt-common>=1.9.0,<2.0", + "dbt-common>=1.11.0,<2.0", "dbt-adapters>=1.7.0,<2.0", # ---- # Expect compatibility with all new versions of these packages, so lower bounds only. diff --git a/schemas/dbt/manifest/v12.json b/schemas/dbt/manifest/v12.json index 9cb7f732e62..cebc0970b9d 100644 --- a/schemas/dbt/manifest/v12.json +++ b/schemas/dbt/manifest/v12.json @@ -6540,6 +6540,12 @@ { "type": "string" }, + { + "type": "array", + "items": { + "type": "string" + } + }, { "type": "null" } @@ -16425,6 +16431,12 @@ { "type": "string" }, + { + "type": "array", + "items": { + "type": "string" + } + }, { "type": "null" } diff --git a/tests/functional/adapter/simple_snapshot/fixtures.py b/tests/functional/adapter/simple_snapshot/fixtures.py new file mode 100644 index 00000000000..cec28a7d64d --- /dev/null +++ b/tests/functional/adapter/simple_snapshot/fixtures.py @@ -0,0 +1,430 @@ +create_seed_sql = """ +create table {schema}.seed ( + id INTEGER, + first_name VARCHAR(50), + last_name VARCHAR(50), + email VARCHAR(50), + gender VARCHAR(50), + ip_address VARCHAR(20), + updated_at TIMESTAMP +); +""" + +create_snapshot_expected_sql = """ +create table {schema}.snapshot_expected ( + id INTEGER, + first_name VARCHAR(50), + last_name VARCHAR(50), + email VARCHAR(50), + gender VARCHAR(50), + ip_address VARCHAR(20), + + -- snapshotting fields + updated_at TIMESTAMP, + test_valid_from TIMESTAMP, + test_valid_to TIMESTAMP, + test_scd_id TEXT, + test_updated_at TIMESTAMP +); +""" + + +seed_insert_sql = """ +-- seed inserts +-- use the same email for two users to verify that duplicated check_cols values +-- are handled appropriately +insert into {schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values +(1, 'Judith', 'Kennedy', '(not provided)', 'Female', '54.60.24.128', '2015-12-24 12:19:28'), +(2, 'Arthur', 'Kelly', '(not provided)', 'Male', '62.56.24.215', '2015-10-28 16:22:15'), +(3, 'Rachel', 'Moreno', 'rmoreno2@msu.edu', 'Female', '31.222.249.23', '2016-04-05 02:05:30'), +(4, 'Ralph', 'Turner', 'rturner3@hp.com', 'Male', '157.83.76.114', '2016-08-08 00:06:51'), +(5, 'Laura', 'Gonzales', 'lgonzales4@howstuffworks.com', 'Female', '30.54.105.168', '2016-09-01 08:25:38'), +(6, 'Katherine', 'Lopez', 'klopez5@yahoo.co.jp', 'Female', '169.138.46.89', '2016-08-30 18:52:11'), +(7, 'Jeremy', 'Hamilton', 'jhamilton6@mozilla.org', 'Male', '231.189.13.133', '2016-07-17 02:09:46'), +(8, 'Heather', 'Rose', 'hrose7@goodreads.com', 'Female', '87.165.201.65', '2015-12-29 22:03:56'), +(9, 'Gregory', 'Kelly', 'gkelly8@trellian.com', 'Male', '154.209.99.7', '2016-03-24 21:18:16'), +(10, 'Rachel', 'Lopez', 'rlopez9@themeforest.net', 'Female', '237.165.82.71', '2016-08-20 15:44:49'), +(11, 'Donna', 'Welch', 'dwelcha@shutterfly.com', 'Female', '103.33.110.138', '2016-02-27 01:41:48'), +(12, 'Russell', 'Lawrence', 'rlawrenceb@qq.com', 'Male', '189.115.73.4', '2016-06-11 03:07:09'), +(13, 'Michelle', 'Montgomery', 'mmontgomeryc@scientificamerican.com', 'Female', '243.220.95.82', '2016-06-18 16:27:19'), +(14, 'Walter', 'Castillo', 'wcastillod@pagesperso-orange.fr', 'Male', '71.159.238.196', '2016-10-06 01:55:44'), +(15, 'Robin', 'Mills', 'rmillse@vkontakte.ru', 'Female', '172.190.5.50', '2016-10-31 11:41:21'), +(16, 'Raymond', 'Holmes', 'rholmesf@usgs.gov', 'Male', '148.153.166.95', '2016-10-03 08:16:38'), +(17, 'Gary', 'Bishop', 'gbishopg@plala.or.jp', 'Male', '161.108.182.13', '2016-08-29 19:35:20'), +(18, 'Anna', 'Riley', 'arileyh@nasa.gov', 'Female', '253.31.108.22', '2015-12-11 04:34:27'), +(19, 'Sarah', 'Knight', 'sknighti@foxnews.com', 'Female', '222.220.3.177', '2016-09-26 00:49:06'), +(20, 'Phyllis', 'Fox', null, 'Female', '163.191.232.95', '2016-08-21 10:35:19'); +""" + + +populate_snapshot_expected_sql = """ +-- populate snapshot table +insert into {schema}.snapshot_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + test_valid_from, + test_valid_to, + test_updated_at, + test_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by snapshotting + updated_at as test_valid_from, + null::timestamp as test_valid_to, + updated_at as test_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id +from {schema}.seed; +""" + +populate_snapshot_expected_valid_to_current_sql = """ +-- populate snapshot table +insert into {schema}.snapshot_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + test_valid_from, + test_valid_to, + test_updated_at, + test_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by snapshotting + updated_at as test_valid_from, + date('2099-12-31') as test_valid_to, + updated_at as test_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id +from {schema}.seed; +""" + +snapshot_actual_sql = """ +{% snapshot snapshot_actual %} + + {{ + config( + unique_key='id || ' ~ "'-'" ~ ' || first_name', + ) + }} + + select * from {{target.database}}.{{target.schema}}.seed + +{% endsnapshot %} +""" + +snapshots_yml = """ +snapshots: + - name: snapshot_actual + config: + strategy: timestamp + updated_at: updated_at + snapshot_meta_column_names: + dbt_valid_to: test_valid_to + dbt_valid_from: test_valid_from + dbt_scd_id: test_scd_id + dbt_updated_at: test_updated_at +""" + +snapshots_no_column_names_yml = """ +snapshots: + - name: snapshot_actual + config: + strategy: timestamp + updated_at: updated_at +""" + +ref_snapshot_sql = """ +select * from {{ ref('snapshot_actual') }} +""" + + +invalidate_sql = """ +-- update records 11 - 21. Change email and updated_at field +update {schema}.seed set + updated_at = updated_at + interval '1 hour', + email = case when id = 20 then 'pfoxj@creativecommons.org' else 'new_' || email end +where id >= 10 and id <= 20; + + +-- invalidate records 11 - 21 +update {schema}.snapshot_expected set + test_valid_to = updated_at + interval '1 hour' +where id >= 10 and id <= 20; + +""" + +update_sql = """ +-- insert v2 of the 11 - 21 records + +insert into {schema}.snapshot_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + test_valid_from, + test_valid_to, + test_updated_at, + test_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by snapshotting + updated_at as test_valid_from, + null::timestamp as test_valid_to, + updated_at as test_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id +from {schema}.seed +where id >= 10 and id <= 20; +""" + +# valid_to_current fixtures + +snapshots_valid_to_current_yml = """ +snapshots: + - name: snapshot_actual + config: + strategy: timestamp + updated_at: updated_at + dbt_valid_to_current: "date('2099-12-31')" + snapshot_meta_column_names: + dbt_valid_to: test_valid_to + dbt_valid_from: test_valid_from + dbt_scd_id: test_scd_id + dbt_updated_at: test_updated_at +""" + +update_with_current_sql = """ +-- insert v2 of the 11 - 21 records + +insert into {schema}.snapshot_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + test_valid_from, + test_valid_to, + test_updated_at, + test_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by snapshotting + updated_at as test_valid_from, + date('2099-12-31') as test_valid_to, + updated_at as test_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as test_scd_id +from {schema}.seed +where id >= 10 and id <= 20; +""" + + +# multi-key snapshot fixtures + +create_multi_key_seed_sql = """ +create table {schema}.seed ( + id1 INTEGER, + id2 INTEGER, + first_name VARCHAR(50), + last_name VARCHAR(50), + email VARCHAR(50), + gender VARCHAR(50), + ip_address VARCHAR(20), + updated_at TIMESTAMP +); +""" + + +create_multi_key_snapshot_expected_sql = """ +create table {schema}.snapshot_expected ( + id1 INTEGER, + id2 INTEGER, + first_name VARCHAR(50), + last_name VARCHAR(50), + email VARCHAR(50), + gender VARCHAR(50), + ip_address VARCHAR(20), + + -- snapshotting fields + updated_at TIMESTAMP, + test_valid_from TIMESTAMP, + test_valid_to TIMESTAMP, + test_scd_id TEXT, + test_updated_at TIMESTAMP +); +""" + +seed_multi_key_insert_sql = """ +-- seed inserts +-- use the same email for two users to verify that duplicated check_cols values +-- are handled appropriately +insert into {schema}.seed (id1, id2, first_name, last_name, email, gender, ip_address, updated_at) values +(1, 100, 'Judith', 'Kennedy', '(not provided)', 'Female', '54.60.24.128', '2015-12-24 12:19:28'), +(2, 200, 'Arthur', 'Kelly', '(not provided)', 'Male', '62.56.24.215', '2015-10-28 16:22:15'), +(3, 300, 'Rachel', 'Moreno', 'rmoreno2@msu.edu', 'Female', '31.222.249.23', '2016-04-05 02:05:30'), +(4, 400, 'Ralph', 'Turner', 'rturner3@hp.com', 'Male', '157.83.76.114', '2016-08-08 00:06:51'), +(5, 500, 'Laura', 'Gonzales', 'lgonzales4@howstuffworks.com', 'Female', '30.54.105.168', '2016-09-01 08:25:38'), +(6, 600, 'Katherine', 'Lopez', 'klopez5@yahoo.co.jp', 'Female', '169.138.46.89', '2016-08-30 18:52:11'), +(7, 700, 'Jeremy', 'Hamilton', 'jhamilton6@mozilla.org', 'Male', '231.189.13.133', '2016-07-17 02:09:46'), +(8, 800, 'Heather', 'Rose', 'hrose7@goodreads.com', 'Female', '87.165.201.65', '2015-12-29 22:03:56'), +(9, 900, 'Gregory', 'Kelly', 'gkelly8@trellian.com', 'Male', '154.209.99.7', '2016-03-24 21:18:16'), +(10, 1000, 'Rachel', 'Lopez', 'rlopez9@themeforest.net', 'Female', '237.165.82.71', '2016-08-20 15:44:49'), +(11, 1100, 'Donna', 'Welch', 'dwelcha@shutterfly.com', 'Female', '103.33.110.138', '2016-02-27 01:41:48'), +(12, 1200, 'Russell', 'Lawrence', 'rlawrenceb@qq.com', 'Male', '189.115.73.4', '2016-06-11 03:07:09'), +(13, 1300, 'Michelle', 'Montgomery', 'mmontgomeryc@scientificamerican.com', 'Female', '243.220.95.82', '2016-06-18 16:27:19'), +(14, 1400, 'Walter', 'Castillo', 'wcastillod@pagesperso-orange.fr', 'Male', '71.159.238.196', '2016-10-06 01:55:44'), +(15, 1500, 'Robin', 'Mills', 'rmillse@vkontakte.ru', 'Female', '172.190.5.50', '2016-10-31 11:41:21'), +(16, 1600, 'Raymond', 'Holmes', 'rholmesf@usgs.gov', 'Male', '148.153.166.95', '2016-10-03 08:16:38'), +(17, 1700, 'Gary', 'Bishop', 'gbishopg@plala.or.jp', 'Male', '161.108.182.13', '2016-08-29 19:35:20'), +(18, 1800, 'Anna', 'Riley', 'arileyh@nasa.gov', 'Female', '253.31.108.22', '2015-12-11 04:34:27'), +(19, 1900, 'Sarah', 'Knight', 'sknighti@foxnews.com', 'Female', '222.220.3.177', '2016-09-26 00:49:06'), +(20, 2000, 'Phyllis', 'Fox', null, 'Female', '163.191.232.95', '2016-08-21 10:35:19'); +""" + +populate_multi_key_snapshot_expected_sql = """ +-- populate snapshot table +insert into {schema}.snapshot_expected ( + id1, + id2, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + test_valid_from, + test_valid_to, + test_updated_at, + test_scd_id +) + +select + id1, + id2, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by snapshotting + updated_at as test_valid_from, + null::timestamp as test_valid_to, + updated_at as test_updated_at, + md5(id1::text || '|' || id2::text || '|' || updated_at::text) as test_scd_id +from {schema}.seed; +""" + +model_seed_sql = """ +select * from {{target.database}}.{{target.schema}}.seed +""" + +snapshots_multi_key_yml = """ +snapshots: + - name: snapshot_actual + relation: "ref('seed')" + config: + strategy: timestamp + updated_at: updated_at + unique_key: + - id1 + - id2 + snapshot_meta_column_names: + dbt_valid_to: test_valid_to + dbt_valid_from: test_valid_from + dbt_scd_id: test_scd_id + dbt_updated_at: test_updated_at +""" + +invalidate_multi_key_sql = """ +-- update records 11 - 21. Change email and updated_at field +update {schema}.seed set + updated_at = updated_at + interval '1 hour', + email = case when id1 = 20 then 'pfoxj@creativecommons.org' else 'new_' || email end +where id1 >= 10 and id1 <= 20; + + +-- invalidate records 11 - 21 +update {schema}.snapshot_expected set + test_valid_to = updated_at + interval '1 hour' +where id1 >= 10 and id1 <= 20; + +""" + +update_multi_key_sql = """ +-- insert v2 of the 11 - 21 records + +insert into {schema}.snapshot_expected ( + id1, + id2, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + test_valid_from, + test_valid_to, + test_updated_at, + test_scd_id +) + +select + id1, + id2, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by snapshotting + updated_at as test_valid_from, + null::timestamp as test_valid_to, + updated_at as test_updated_at, + md5(id1::text || '|' || id2::text || '|' || updated_at::text) as test_scd_id +from {schema}.seed +where id1 >= 10 and id1 <= 20; +""" diff --git a/tests/functional/adapter/simple_snapshot/test_various_configs.py b/tests/functional/adapter/simple_snapshot/test_various_configs.py new file mode 100644 index 00000000000..d288d2934df --- /dev/null +++ b/tests/functional/adapter/simple_snapshot/test_various_configs.py @@ -0,0 +1,277 @@ +import datetime + +import pytest + +from dbt.tests.util import ( + check_relations_equal, + get_manifest, + run_dbt, + run_dbt_and_capture, + run_sql_with_adapter, + update_config_file, +) +from tests.functional.adapter.simple_snapshot.fixtures import ( + create_multi_key_seed_sql, + create_multi_key_snapshot_expected_sql, + create_seed_sql, + create_snapshot_expected_sql, + invalidate_multi_key_sql, + invalidate_sql, + model_seed_sql, + populate_multi_key_snapshot_expected_sql, + populate_snapshot_expected_sql, + populate_snapshot_expected_valid_to_current_sql, + ref_snapshot_sql, + seed_insert_sql, + seed_multi_key_insert_sql, + snapshot_actual_sql, + snapshots_multi_key_yml, + snapshots_no_column_names_yml, + snapshots_valid_to_current_yml, + snapshots_yml, + update_multi_key_sql, + update_sql, + update_with_current_sql, +) + + +class BaseSnapshotColumnNames: + @pytest.fixture(scope="class") + def snapshots(self): + return {"snapshot.sql": snapshot_actual_sql} + + @pytest.fixture(scope="class") + def models(self): + return { + "snapshots.yml": snapshots_yml, + "ref_snapshot.sql": ref_snapshot_sql, + } + + def test_snapshot_column_names(self, project): + project.run_sql(create_seed_sql) + project.run_sql(create_snapshot_expected_sql) + project.run_sql(seed_insert_sql) + project.run_sql(populate_snapshot_expected_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + project.run_sql(invalidate_sql) + project.run_sql(update_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + # run_dbt(["test"]) + check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"]) + + +class TestSnapshotColumnNames(BaseSnapshotColumnNames): + pass + + +class BaseSnapshotColumnNamesFromDbtProject: + @pytest.fixture(scope="class") + def snapshots(self): + return {"snapshot.sql": snapshot_actual_sql} + + @pytest.fixture(scope="class") + def models(self): + return { + "snapshots.yml": snapshots_no_column_names_yml, + "ref_snapshot.sql": ref_snapshot_sql, + } + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "snapshots": { + "test": { + "+snapshot_meta_column_names": { + "dbt_valid_to": "test_valid_to", + "dbt_valid_from": "test_valid_from", + "dbt_scd_id": "test_scd_id", + "dbt_updated_at": "test_updated_at", + } + } + } + } + + def test_snapshot_column_names_from_project(self, project): + project.run_sql(create_seed_sql) + project.run_sql(create_snapshot_expected_sql) + project.run_sql(seed_insert_sql) + project.run_sql(populate_snapshot_expected_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + project.run_sql(invalidate_sql) + project.run_sql(update_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + # run_dbt(["test"]) + check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"]) + + +class TestSnapshotColumnNamesFromDbtProject(BaseSnapshotColumnNamesFromDbtProject): + pass + + +class BaseSnapshotInvalidColumnNames: + @pytest.fixture(scope="class") + def snapshots(self): + return {"snapshot.sql": snapshot_actual_sql} + + @pytest.fixture(scope="class") + def models(self): + return { + "snapshots.yml": snapshots_no_column_names_yml, + "ref_snapshot.sql": ref_snapshot_sql, + } + + @pytest.fixture(scope="class") + def project_config_update(self): + return { + "snapshots": { + "test": { + "+snapshot_meta_column_names": { + "dbt_valid_to": "test_valid_to", + "dbt_valid_from": "test_valid_from", + "dbt_scd_id": "test_scd_id", + "dbt_updated_at": "test_updated_at", + } + } + } + } + + def test_snapshot_invalid_column_names(self, project): + project.run_sql(create_seed_sql) + project.run_sql(create_snapshot_expected_sql) + project.run_sql(seed_insert_sql) + project.run_sql(populate_snapshot_expected_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + manifest = get_manifest(project.project_root) + snapshot_node = manifest.nodes["snapshot.test.snapshot_actual"] + snapshot_node.config.snapshot_meta_column_names == { + "dbt_valid_to": "test_valid_to", + "dbt_valid_from": "test_valid_from", + "dbt_scd_id": "test_scd_id", + "dbt_updated_at": "test_updated_at", + } + + project.run_sql(invalidate_sql) + project.run_sql(update_sql) + + # Change snapshot_meta_columns and look for an error + different_columns = { + "snapshots": { + "test": { + "+snapshot_meta_column_names": { + "dbt_valid_to": "test_valid_to", + "dbt_updated_at": "test_updated_at", + } + } + } + } + update_config_file(different_columns, "dbt_project.yml") + + results, log_output = run_dbt_and_capture(["snapshot"], expect_pass=False) + assert len(results) == 1 + assert "Compilation Error in snapshot snapshot_actual" in log_output + assert "Snapshot target is missing configured columns" in log_output + + +class TestSnapshotInvalidColumnNames(BaseSnapshotInvalidColumnNames): + pass + + +class BaseSnapshotDbtValidToCurrent: + @pytest.fixture(scope="class") + def snapshots(self): + return {"snapshot.sql": snapshot_actual_sql} + + @pytest.fixture(scope="class") + def models(self): + return { + "snapshots.yml": snapshots_valid_to_current_yml, + "ref_snapshot.sql": ref_snapshot_sql, + } + + def test_valid_to_current(self, project): + project.run_sql(create_seed_sql) + project.run_sql(create_snapshot_expected_sql) + project.run_sql(seed_insert_sql) + project.run_sql(populate_snapshot_expected_valid_to_current_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + original_snapshot = run_sql_with_adapter( + project.adapter, + "select id, test_scd_id, test_valid_to from {schema}.snapshot_actual", + "all", + ) + assert original_snapshot[0][2] == datetime.datetime(2099, 12, 31, 0, 0) + assert original_snapshot[9][2] == datetime.datetime(2099, 12, 31, 0, 0) + + project.run_sql(invalidate_sql) + project.run_sql(update_with_current_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + updated_snapshot = run_sql_with_adapter( + project.adapter, + "select id, test_scd_id, test_valid_to from {schema}.snapshot_actual", + "all", + ) + assert updated_snapshot[0][2] == datetime.datetime(2099, 12, 31, 0, 0) + # Original row that was updated now has a non-current (2099/12/31) date + assert updated_snapshot[9][2] == datetime.datetime(2016, 8, 20, 16, 44, 49) + # Updated row has a current date + assert updated_snapshot[20][2] == datetime.datetime(2099, 12, 31, 0, 0) + + check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"]) + + +class TestSnapshotDbtValidToCurrent(BaseSnapshotDbtValidToCurrent): + pass + + +# This uses snapshot_meta_column_names, yaml-only snapshot def, +# and multiple keys +class BaseSnapshotMultiUniqueKey: + @pytest.fixture(scope="class") + def models(self): + return { + "seed.sql": model_seed_sql, + "snapshots.yml": snapshots_multi_key_yml, + "ref_snapshot.sql": ref_snapshot_sql, + } + + def test_multi_column_unique_key(self, project): + project.run_sql(create_multi_key_seed_sql) + project.run_sql(create_multi_key_snapshot_expected_sql) + project.run_sql(seed_multi_key_insert_sql) + project.run_sql(populate_multi_key_snapshot_expected_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + project.run_sql(invalidate_multi_key_sql) + project.run_sql(update_multi_key_sql) + + results = run_dbt(["snapshot"]) + assert len(results) == 1 + + # run_dbt(["test"]) + check_relations_equal(project.adapter, ["snapshot_actual", "snapshot_expected"]) + + +class TestSnapshotMultiUniqueKey(BaseSnapshotMultiUniqueKey): + pass