From bbb8e7ee2056bca9b26efad0eab311d22931de0c Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Wed, 23 Oct 2024 14:16:33 -0400 Subject: [PATCH] Partial parse yaml snapshots (#10907) --- .../unreleased/Fixes-20241022-222927.yaml | 6 ++++ core/dbt/contracts/files.py | 1 + core/dbt/contracts/graph/manifest.py | 9 +++-- core/dbt/parser/partial.py | 21 ++++++++++++ core/dbt/parser/schemas.py | 5 +-- tests/functional/snapshots/fixtures.py | 13 +++++++- .../snapshots/test_basic_snapshot.py | 33 +++++++++++++++++++ 7 files changed, 82 insertions(+), 6 deletions(-) create mode 100644 .changes/unreleased/Fixes-20241022-222927.yaml diff --git a/.changes/unreleased/Fixes-20241022-222927.yaml b/.changes/unreleased/Fixes-20241022-222927.yaml new file mode 100644 index 00000000000..cd294862ba4 --- /dev/null +++ b/.changes/unreleased/Fixes-20241022-222927.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Implement partial parsing for all-yaml snapshots +time: 2024-10-22T22:29:27.396378-04:00 +custom: + Author: gshank + Issue: "10903" diff --git a/core/dbt/contracts/files.py b/core/dbt/contracts/files.py index d5c1dba5366..15e951e026c 100644 --- a/core/dbt/contracts/files.py +++ b/core/dbt/contracts/files.py @@ -192,6 +192,7 @@ class SchemaSourceFile(BaseSourceFile): sources: List[str] = field(default_factory=list) exposures: List[str] = field(default_factory=list) metrics: List[str] = field(default_factory=list) + snapshots: List[str] = field(default_factory=list) # The following field will no longer be used. Leaving # here to avoid breaking existing projects. To be removed # later if possible. diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py index 4ce887591d9..d387616a8ea 100644 --- a/core/dbt/contracts/graph/manifest.py +++ b/core/dbt/contracts/graph/manifest.py @@ -59,6 +59,7 @@ SeedNode, SemanticModel, SingularTestNode, + SnapshotNode, SourceDefinition, UnitTestDefinition, UnitTestFileFixture, @@ -1600,12 +1601,14 @@ def add_node(self, source_file: AnySourceFile, node: ManifestNode, test_from=Non if isinstance(node, GenericTestNode): assert test_from source_file.add_test(node.unique_id, test_from) - if isinstance(node, Metric): + elif isinstance(node, Metric): source_file.metrics.append(node.unique_id) - if isinstance(node, Exposure): + elif isinstance(node, Exposure): source_file.exposures.append(node.unique_id) - if isinstance(node, Group): + elif isinstance(node, Group): source_file.groups.append(node.unique_id) + elif isinstance(node, SnapshotNode): + source_file.snapshots.append(node.unique_id) elif isinstance(source_file, FixtureSourceFile): pass else: diff --git a/core/dbt/parser/partial.py b/core/dbt/parser/partial.py index 774edf8ce6d..d4e20a617e1 100644 --- a/core/dbt/parser/partial.py +++ b/core/dbt/parser/partial.py @@ -658,10 +658,14 @@ def handle_schema_file_changes(self, schema_file, saved_yaml_dict, new_yaml_dict key_diff = self.get_diff_for(dict_key, saved_yaml_dict, new_yaml_dict) if key_diff["changed"]: for elem in key_diff["changed"]: + if dict_key == "snapshots" and "relation" in elem: + self.delete_yaml_snapshot(schema_file, elem) self.delete_schema_mssa_links(schema_file, dict_key, elem) self.merge_patch(schema_file, dict_key, elem, True) if key_diff["deleted"]: for elem in key_diff["deleted"]: + if dict_key == "snapshots" and "relation" in elem: + self.delete_yaml_snapshot(schema_file, elem) self.delete_schema_mssa_links(schema_file, dict_key, elem) if key_diff["added"]: for elem in key_diff["added"]: @@ -673,6 +677,8 @@ def handle_schema_file_changes(self, schema_file, saved_yaml_dict, new_yaml_dict continue elem = self.get_schema_element(new_yaml_dict[dict_key], name) if elem: + if dict_key == "snapshots" and "relation" in elem: + self.delete_yaml_snapshot(schema_file, elem) self.delete_schema_mssa_links(schema_file, dict_key, elem) self.merge_patch(schema_file, dict_key, elem, True) @@ -828,6 +834,8 @@ def delete_schema_mssa_links(self, schema_file, dict_key, elem): # remove elem node and remove unique_id from node_patches for elem_unique_id in elem_unique_ids: # might have been already removed + # For all-yaml snapshots, we don't do this, since the node + # should have already been removed. if ( elem_unique_id in self.saved_manifest.nodes or elem_unique_id in self.saved_manifest.disabled @@ -868,6 +876,19 @@ def remove_tests(self, schema_file, dict_key, name): self.saved_manifest.nodes.pop(test_unique_id) schema_file.remove_tests(dict_key, name) + def delete_yaml_snapshot(self, schema_file, snapshot_dict): + snapshot_name = snapshot_dict["name"] + snapshots = schema_file.snapshots.copy() + for unique_id in snapshots: + if unique_id in self.saved_manifest.nodes: + snapshot = self.saved_manifest.nodes[unique_id] + if snapshot.name == snapshot_name: + self.saved_manifest.nodes.pop(unique_id) + schema_file.snapshots.remove(unique_id) + elif unique_id in self.saved_manifest.disabled: + self.delete_disabled(unique_id, schema_file.file_id) + schema_file.snapshots.remove(unique_id) + def delete_schema_source(self, schema_file, source_dict): # both patches, tests, and source nodes source_name = source_dict["name"] diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 077d7083ed5..e0c94b8b444 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -309,8 +309,9 @@ def _add_yaml_snapshot_nodes_to_manifest( snapshot_node.raw_code = "select * from {{ " + snapshot["relation"] + " }}" # Add our new node to the manifest, and note that ref lookup collections - # will need to be rebuilt. - self.manifest.add_node_nofile(snapshot_node) + # will need to be rebuilt. This adds the node unique_id to the "snapshots" + # list in the SchemaSourceFile. + self.manifest.add_node(block.file, snapshot_node) rebuild_refs = True if rebuild_refs: diff --git a/tests/functional/snapshots/fixtures.py b/tests/functional/snapshots/fixtures.py index 5b3182098d2..562fcb4b10f 100644 --- a/tests/functional/snapshots/fixtures.py +++ b/tests/functional/snapshots/fixtures.py @@ -292,7 +292,6 @@ """ snapshots_pg__snapshot_yml = """ -version: 2 snapshots: - name: snapshot_actual relation: "ref('seed')" @@ -304,6 +303,18 @@ owner: 'a_owner' """ +snapshots_pg__snapshot_mod_yml = """ +snapshots: + - name: snapshot_actual + relation: "ref('seed')" + config: + unique_key: "id || '-' || first_name" + strategy: timestamp + updated_at: updated_at + meta: + owner: 'b_owner' +""" + snapshots_pg__snapshot_no_target_schema_sql = """ {% snapshot snapshot_actual %} diff --git a/tests/functional/snapshots/test_basic_snapshot.py b/tests/functional/snapshots/test_basic_snapshot.py index b5a508b04a9..5300f921971 100644 --- a/tests/functional/snapshots/test_basic_snapshot.py +++ b/tests/functional/snapshots/test_basic_snapshot.py @@ -8,6 +8,7 @@ check_relations_equal, relation_from_name, run_dbt, + update_config_file, write_file, ) from tests.functional.snapshots.fixtures import ( @@ -18,6 +19,7 @@ models__schema_yml, seeds__seed_csv, seeds__seed_newcol_csv, + snapshots_pg__snapshot_mod_yml, snapshots_pg__snapshot_no_target_schema_sql, snapshots_pg__snapshot_sql, snapshots_pg__snapshot_yml, @@ -394,3 +396,34 @@ def models(self): class TestBasicSnapshotYaml(BasicYaml): def test_basic_snapshot_yaml(self, project): snapshot_setup(project, num_snapshot_models=1) + + +class TestYamlSnapshotPartialParsing(BasicYaml): + def test_snapshot_partial_parsing(self, project): + manifest = run_dbt(["parse"]) + snapshot_id = "snapshot.test.snapshot_actual" + assert snapshot_id in manifest.nodes + snapshot = manifest.nodes[snapshot_id] + assert snapshot.meta["owner"] == "a_owner" + + # change snapshot yml file and re-parse + write_file(snapshots_pg__snapshot_mod_yml, "snapshots", "snapshot.yml") + manifest = run_dbt(["parse"]) + snapshot = manifest.nodes[snapshot_id] + assert snapshot.meta["owner"] == "b_owner" + + # modify dbt_project.yml and re-parse + config_updates = { + "snapshots": { + "test": { + "+snapshot_meta_column_names": { + "dbt_valid_to": "test_valid_to", + "dbt_valid_from": "test_valid_from", + }, + } + } + } + update_config_file(config_updates, "dbt_project.yml") + manifest = run_dbt(["parse"]) + snapshot = manifest.nodes[snapshot_id] + assert snapshot.config.snapshot_meta_column_names.dbt_valid_to == "test_valid_to"