From 61372de2f6366315e774592a37c67dc32ed6e65d Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Sat, 16 Dec 2023 19:41:16 -0500 Subject: [PATCH] dbt-materialize: more gracefully handle contracts on unknown types Backport dbt-code#8887 to make data contracts work correctly with custom PostgreSQL types that are unknown to dbt/psycopg2. The error messages are bad when contract validation on such types fails, but the contracts fundamentally work, which is a big improvement. See comments within the patch for details. --- misc/dbt-materialize/CHANGELOG.md | 5 ++ .../dbt/adapters/materialize/connections.py | 45 ++++++++++- .../tests/adapter/test_contracts.py | 81 +++++++++++++++++++ 3 files changed, 127 insertions(+), 4 deletions(-) create mode 100644 misc/dbt-materialize/tests/adapter/test_contracts.py diff --git a/misc/dbt-materialize/CHANGELOG.md b/misc/dbt-materialize/CHANGELOG.md index 4d5c68c812aec..13cc98123322c 100644 --- a/misc/dbt-materialize/CHANGELOG.md +++ b/misc/dbt-materialize/CHANGELOG.md @@ -1,5 +1,10 @@ # dbt-materialize Changelog +## Unreleased + +* Backport [dbt-core #8887](https://github.com/dbt-labs/dbt-core/pull/8887) to + to unblock users using any custom type with data contracts. + ## 1.7.1 - 2023-12-14 * Remove the dependency of data contracts pre-flight checks on the existence of diff --git a/misc/dbt-materialize/dbt/adapters/materialize/connections.py b/misc/dbt-materialize/dbt/adapters/materialize/connections.py index 54563de538885..c0a2748229e88 100644 --- a/misc/dbt-materialize/dbt/adapters/materialize/connections.py +++ b/misc/dbt-materialize/dbt/adapters/materialize/connections.py @@ -18,6 +18,7 @@ from typing import Optional import psycopg2 +from psycopg2.extensions import string_types from psycopg2.extras import register_uuid import dbt.adapters.postgres.connections @@ -31,6 +32,12 @@ logger = AdapterLogger("Materialize") +# NOTE(morsapaes): registering the UUID type produces nicer error messages +# when data contracts fail on a UUID type. See comment in the +# `data_type_code_to_name`` method for details. We may be able to remove +# this when dbt-core#8900 lands. +register_uuid() + # Override the psycopg2 connect function in order to inject Materialize-specific # session parameter defaults. # @@ -50,10 +57,6 @@ def connect(**kwargs): ] kwargs["options"] = " ".join(options) - # NOTE(morsapaes): work around dbt-core #8353 while #8900 doesn't land to - # unblock users using UUID types. - register_uuid() - return _connect(**kwargs) @@ -135,6 +138,40 @@ def cancel(self, connection): # probably bad, re-raise it raise + # NOTE(benesch): this is a backport, with modifications, of dbt-core#8887. + # TODO(benesch): consider removing this when v1.8 ships with this code. + @classmethod + def data_type_code_to_name(cls, type_code: int) -> str: + if type_code in string_types: + return string_types[type_code].name + else: + # The type is unknown to psycopg2, so make up a unique name based on + # the type's OID. Here are the consequences for data contracts that + # reference unknown types: + # + # * Data contracts that are valid work flawlessly. Take the + # `mz_timestamp` type, for example, which is unknown to psycopg2 + # because it is a special Materialize type. It has OID 16552. If + # the data contract specifies a column of type `mz_timestamp` + # and the model's column is actually of type `mz_timestamp`, the + # contract will validate successfully and the user will have no + # idea that under the hood dbt validated these two strings + # against one another: + # + # expected: `custom type unknown to dbt (OID 16552)` + # actual: `custom type unknown to dbt (OID 16552)` + # + # * Data contracts that are invalid produce an ugly error message. + # If the contract specifies the `timestamp` type but the model's + # column is actually of type `mz_timestamp`, dbt will complain + # with an error message like "expected type DATETIME, got custom + # type unknown to dbt (OID 16552)". + # + # Still, this is much better than the built-in behavior with dbt + # 1.7, which is to raise "Unhandled error while executing: + # 16552". See dbt-core#8353 for details. + return f"custom type unknown to dbt (OID {type_code})" + # Disable transactions. Materialize transactions do not support arbitrary # queries in transactions and therefore many of dbt's internal macros # produce invalid transactions. diff --git a/misc/dbt-materialize/tests/adapter/test_contracts.py b/misc/dbt-materialize/tests/adapter/test_contracts.py new file mode 100644 index 0000000000000..771adbf11e510 --- /dev/null +++ b/misc/dbt-materialize/tests/adapter/test_contracts.py @@ -0,0 +1,81 @@ +import pytest +from dbt.tests.util import run_dbt, run_dbt_and_capture + +# NOTE(benesch): these tests are backported, with modifications, from +# dbt-core#8887. + +# TODO(benesch): consider removing these tests when v1.8 ships with these tests +# as part of core. + +my_timestamp_model_sql = """ +select + '2023-01-01T00:00:00'::timestamp as ts +""" + +my_mz_timestamp_model_sql = """ +select + '1672531200000'::mz_timestamp as ts +""" + +model_schema_timestamp_yml = """ +models: + - name: my_model + config: + contract: + enforced: true + columns: + - name: ts + data_type: timestamp +""" + +model_schema_mz_timestamp_yml = """ +models: + - name: my_model + config: + contract: + enforced: true + columns: + - name: ts + data_type: mz_timestamp +""" + + +class TestModelContractUnrecognizedTypeCode1: + @pytest.fixture(scope="class") + def models(self): + return { + "my_model.sql": my_mz_timestamp_model_sql, + "schema.yml": model_schema_mz_timestamp_yml, + } + + def test_nonstandard_data_type(self, project): + run_dbt(["run"], expect_pass=True) + + +class TestModelContractUnrecognizedTypeCodeActualMismatch: + @pytest.fixture(scope="class") + def models(self): + return { + "my_model.sql": my_mz_timestamp_model_sql, + "schema.yml": model_schema_timestamp_yml, + } + + def test_nonstandard_data_type(self, project): + expected_msg = "custom type unknown to dbt (OID 16552) | DATETIME | data type mismatch" + _, logs = run_dbt_and_capture(["run"], expect_pass=False) + assert expected_msg in logs + + +class TestModelContractUnrecognizedTypeCodeExpectedMismatch: + @pytest.fixture(scope="class") + def models(self): + return { + "my_model.sql": my_timestamp_model_sql, + "schema.yml": model_schema_mz_timestamp_yml, + } + + def test_nonstandard_data_type(self, project): + expected_msg = "DATETIME | custom type unknown to dbt (OID 16552) | data type mismatch" + _, logs = run_dbt_and_capture(["run"], expect_pass=False) + print(logs) + assert expected_msg in logs