From b859dea5eacabe95efe724f78a48c6250b496bba Mon Sep 17 00:00:00 2001 From: ariosramirez Date: Fri, 20 Sep 2024 01:44:54 -0300 Subject: [PATCH] feat(snapshot): Squash commit with duplicate column validation --- .changes/unreleased/Features-20240228-192518.yaml | 6 ++++++ core/dbt/artifacts/resources/v1/snapshot.py | 6 ++++++ tests/unit/contracts/graph/test_nodes_parsed.py | 10 ++++++++++ 3 files changed, 22 insertions(+) create mode 100644 .changes/unreleased/Features-20240228-192518.yaml diff --git a/.changes/unreleased/Features-20240228-192518.yaml b/.changes/unreleased/Features-20240228-192518.yaml new file mode 100644 index 00000000000..5f52a703952 --- /dev/null +++ b/.changes/unreleased/Features-20240228-192518.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Validation to detect duplicate column names in the check_cols configuration of a snapshot. +time: 2024-02-28T19:25:18.00447-03:00 +custom: + Author: ariosramirez + Issue: "9656" diff --git a/core/dbt/artifacts/resources/v1/snapshot.py b/core/dbt/artifacts/resources/v1/snapshot.py index c9f1acdb50f..553440e814e 100644 --- a/core/dbt/artifacts/resources/v1/snapshot.py +++ b/core/dbt/artifacts/resources/v1/snapshot.py @@ -34,6 +34,12 @@ def final_validate(self): f"Invalid value for 'check_cols': {self.check_cols}. " "Expected 'all' or a list of strings." ) + # Validate if there are duplicate column names in check_cols. + if isinstance(self.check_cols, list): + if len(self.check_cols) != len(set(self.check_cols)): + raise ValidationError( + f"Duplicate column names in 'check_cols':" f" {self.check_cols}." + ) elif self.strategy == "timestamp": if not self.updated_at: raise ValidationError( diff --git a/tests/unit/contracts/graph/test_nodes_parsed.py b/tests/unit/contracts/graph/test_nodes_parsed.py index 7655b7aa444..46b51c2ef8d 100644 --- a/tests/unit/contracts/graph/test_nodes_parsed.py +++ b/tests/unit/contracts/graph/test_nodes_parsed.py @@ -1481,6 +1481,16 @@ def assert_snapshot_config_fails_validation(dct): obj.final_validate() +def test_duplicate_check_cols(basic_check_snapshot_config_dict): + duplicate_cols = basic_check_snapshot_config_dict + # Introducing duplicate column names + duplicate_cols["check_cols"] = ["col1", "col2", "col2"] + with pytest.raises(ValidationError, match=r"Duplicate column names in 'check_cols'"): + SnapshotConfig.validate(duplicate_cols) + cfg = SnapshotConfig.from_dict(duplicate_cols) + cfg.final_validate() + + def test_invalid_check_value(basic_check_snapshot_config_dict): invalid_check_type = basic_check_snapshot_config_dict invalid_check_type["check_cols"] = "some"