From 984cab209854981edbd7c20e810a305912a3037b Mon Sep 17 00:00:00 2001 From: IDoneShaveIt Date: Wed, 18 Oct 2023 17:12:42 +0300 Subject: [PATCH 1/5] On BigQuery use column relation data_type instead of dtype --- macros/commands/generate_schema_baseline_test.sql | 2 +- .../schema_changes/get_columns_snapshot_query.sql | 2 +- macros/utils/data_types/get_column_data_type.sql | 12 ++++++++++++ macros/utils/data_types/get_normalized_data_type.sql | 5 +++++ 4 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 macros/utils/data_types/get_column_data_type.sql diff --git a/macros/commands/generate_schema_baseline_test.sql b/macros/commands/generate_schema_baseline_test.sql index 893d41831..0c1e246ab 100644 --- a/macros/commands/generate_schema_baseline_test.sql +++ b/macros/commands/generate_schema_baseline_test.sql @@ -58,7 +58,7 @@ columns: {%- for column in columns %} - name: {{ column.name }} - data_type: {{ column.dtype }} + data_type: {{ elementary.get_column_data_type(column) }} {% endfor %} tests: - elementary.schema_changes_from_baseline diff --git a/macros/edr/data_monitoring/schema_changes/get_columns_snapshot_query.sql b/macros/edr/data_monitoring/schema_changes/get_columns_snapshot_query.sql index d96faa924..e44cfd498 100644 --- a/macros/edr/data_monitoring/schema_changes/get_columns_snapshot_query.sql +++ b/macros/edr/data_monitoring/schema_changes/get_columns_snapshot_query.sql @@ -38,7 +38,7 @@ {% for column in columns %} select {{ elementary.edr_cast_as_string(elementary.edr_quote(column.name)) }} as column_name, - {{ elementary.edr_cast_as_string(elementary.edr_quote(elementary.get_normalized_data_type(column.dtype))) }} as data_type + {{ elementary.edr_cast_as_string(elementary.edr_quote(elementary.get_normalized_data_type(elementary.get_column_data_type(column)))) }} as data_type {% if not loop.last %} union all {% endif %} diff --git a/macros/utils/data_types/get_column_data_type.sql b/macros/utils/data_types/get_column_data_type.sql new file mode 100644 index 000000000..2d88c8076 --- /dev/null +++ b/macros/utils/data_types/get_column_data_type.sql @@ -0,0 +1,12 @@ +{% macro get_column_data_type(column_relation) %} + {% set data_type = adapter.dispatch('get_column_data_type','elementary')(column_relation) %} + {{ return(data_type) }} +{% endmacro %} + +{% macro default__get_column_data_type(column_relation) %} + {{return (column_relation.dtype) }} +{% endmacro %} + +{% macro bigquery__get_column_data_type(column_relation) %} + {{return (column_relation.data_type) }} +{% endmacro %} diff --git a/macros/utils/data_types/get_normalized_data_type.sql b/macros/utils/data_types/get_normalized_data_type.sql index 9730d8035..d7c81e096 100644 --- a/macros/utils/data_types/get_normalized_data_type.sql +++ b/macros/utils/data_types/get_normalized_data_type.sql @@ -14,7 +14,12 @@ {% macro bigquery__get_normalized_data_type(exact_data_type) %} {# BigQuery has no concept of data type synonyms, see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types #} + {% set exact_data_type_to_data_type_returned_by_the_info_schema = {'BOOLEAN': 'BOOL'} %} + {%- if exact_data_type in exact_data_type_to_data_type_returned_by_the_info_schema%} + {{ return (exact_data_type_to_data_type_returned_by_the_info_schema[exact_data_type])}} + {%- else %} {{return (exact_data_type) }} + {%- endif%} {% endmacro %} From 99f601989d5f5bc6842702256f471f9de06d3e34 Mon Sep 17 00:00:00 2001 From: IDoneShaveIt Date: Mon, 23 Oct 2023 15:18:44 +0300 Subject: [PATCH 2/5] Create macro for creating tables with all data types + macro for comparing relation types with schema changes --- .../macros/create_all_types_table.sql | 223 ++++++++++++++++++ .../get_columns_from_information_schema.sql | 30 ++- 2 files changed, 244 insertions(+), 9 deletions(-) create mode 100644 integration_tests/dbt_project/macros/create_all_types_table.sql diff --git a/integration_tests/dbt_project/macros/create_all_types_table.sql b/integration_tests/dbt_project/macros/create_all_types_table.sql new file mode 100644 index 000000000..b7ceafc05 --- /dev/null +++ b/integration_tests/dbt_project/macros/create_all_types_table.sql @@ -0,0 +1,223 @@ +{% macro create_all_types_table() %} + {% do return(adapter.dispatch('create_all_types_table','elementary')()) %} +{% endmacro %} + +{% macro bigquery__create_all_types_table() %} + {# see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types #} + {% set database_name, schema_name = elementary.get_package_database_and_schema('elementary') %} + {% set _, relation = dbt.get_or_create_relation(database=database_name, schema=schema_name, identifier='all_types', type="table") %} + {% set sql_query %} + select + struct("string" as col1, 42 as col2) as flat_struct_col, + struct("string" as col1, struct(42 as nestcol1) as col2) as nested_struct_col, + [1,2,3] as array_col, + null as null_col, + true as bool_col, + cast("str" as STRING) as str_col, + cast(12345 as INT64) as int64_col, + cast(12345 as FLOAT64) as float64_col, + cast(12345 as NUMERIC) as numeric_col, + cast(1122334455 as BIGNUMERIC) as bignum_col, + b'1' as bytes_col, + INTERVAL '10 -12:30' DAY TO MINUTE as interval_col, + JSON '{"data_type": "json"}' as json_col, + ST_GEOGPOINT(-122, 47) AS geo_col, + CURRENT_DATE() as date_col, + CURRENT_DATETIME() as datetime_col, + CURRENT_TIME() as time_col, + CURRENT_TIMESTAMP() as timestamp_col, + {% endset %} + {% set create_table_query = dbt.create_table_as(false, relation, sql_query) %} + {% do elementary.edr_log(create_table_query) %} + {% do elementary.run_query(create_table_query) %} +{% endmacro %} + +{% macro snowflake__create_all_types_table() %} + {# see https://docs.snowflake.com/en/sql-reference/intro-summary-data-types.html #} + {% set database_name, schema_name = elementary.get_package_database_and_schema('elementary') %} + {% set _, relation = dbt.get_or_create_relation(database=database_name, schema=schema_name, identifier='all_types', type="table") %} + {% set sql_query %} + select + 'str'::STRING as str_col, + 'str'::TEXT as text_col, + 'a'::VARCHAR as var_col, + 'a'::CHAR as char_col, + 'a'::CHARACTER as character_col, + 'a'::NCHAR as nchar_col, + 'a'::NVARCHAR as nvarchar_col, + 'a'::NVARCHAR2 as nvarchar2_col, + 'a'::CHAR VARYING as char_varying_col, + 'a'::NCHAR VARYING as nchar_varying_col, + TRY_TO_BINARY('1', 'HEX')::BINARY as binary_col, + TRY_TO_BINARY('1', 'HEX')::VARBINARY as varbinary_col, + TRUE::BOOLEAN as boolean_col, + 13::NUMBER as number_col, + 13::DEC as dec_col, + 13::DECIMAL as decimal_col, + 13::INT as int_col, + 13::INTEGER as integer_col, + 13::BIGINT as bigint_col, + 13::SMALLINT as smallint_col, + 13::TINYINT as tinyint_col, + 13::BYTEINT as byteint_col, + 13::FLOAT as float_col, + 13::FLOAT4 as float4_col, + 13::FLOAT8 as float8_col, + 13::DOUBLE as double_col, + 13::DOUBLE PRECISION as double_precision_col, + 13::REAL as real_col, + '2023-10-23'::DATE as date_col, + '13:30:00'::TIME as time_col, + '2023-10-23 12:00:00'::TIMESTAMP_TZ as timestamp_tz_col, + '2023-10-23 12:00:00'::TIMESTAMP_LTZ as timestamp_ltz_col, + '2023-10-23 12:00:00'::TIMESTAMP_NTZ as timestamp_ntz_col, + '2023-10-23 12:00:00'::DATETIME as datetime_col, + TO_VARIANT(1.23) as variant_col, + {'data_type': 'object'} as object_col, + [1,2,3] as array_col, + TO_GEOGRAPHY('POINT(-122.35 37.55)') as geography_col + {% endset %} + {% set create_table_query = dbt.create_table_as(false, relation, sql_query) %} + {% do elementary.edr_log(create_table_query) %} + {% do elementary.run_query(create_table_query) %} +{% endmacro %} + +{% macro redshift__create_all_types_table() %} + {# see https://docs.aws.amazon.com/redshift/latest/dg/c_Supported_data_types.html #} + {% set database_name, schema_name = elementary.get_package_database_and_schema('elementary') %} + {% set _, relation = dbt.get_or_create_relation(database=database_name, schema=schema_name, identifier='all_types', type="table") %} + {% set sql_query %} + select + 1::SMALLINT as smallint_col, + 1::INT2 as int2_col, + 1::INTEGER as integer_col, + 1::INT as int_col, + 1::INT4 as int4_col, + 1::BIGINT as bigint_col, + 1::INT8 as int8_col, + 1::DECIMAL as decimal_col, + 1::NUMERIC as numeric_col, + 1::REAL as real_col, + 1::FLOAT4 as float4_col, + 1::FLOAT as float_col, + 1::FLOAT8 as float8_col, + 1::DOUBLE PRECISION as double_precision_col, + TRUE::BOOLEAN as boolean_col, + TRUE::bool as bool_col, + 'a'::VARCHAR as var_col, + 'str'::TEXT as text_col, + 'a'::NVARCHAR as nvarchar_col, + 'a'::CHARACTER VARYING as character_varying_col, + 'a'::CHAR as char_col, + 'a'::CHARACTER as character_col, + 'a'::NCHAR as nchar_col, + 'a'::BPCHAR as bpchar_col, + TO_DATE('20231023', 'YYYYMMDD') as date_col, + sysdate as timestamp_col, + TO_TIMESTAMP(sysdate, 'YYYY-MM-DD HH24:MI:SS') as timestampptz_col, + ST_GeomFromText('POLYGON((0 2,1 1,0 -1,0 2))') as geomtry_col, + ST_GeogFromText('SRID=4324;POLYGON((0 0,0 1,1 1,10 10,1 0,0 0))') as geography_col, + JSON_PARSE('{"data_type": "super"}') as super_col + {% endset %} + {% set create_table_query = dbt.create_table_as(false, relation, sql_query) %} + {% do elementary.edr_log(create_table_query) %} + {% do elementary.run_query(create_table_query) %} + +{% endmacro %} + +{% macro postgres__create_all_types_table() %} + {# see https://www.postgresql.org/docs/current/datatype.html #} + {% set database_name, schema_name = elementary.get_package_database_and_schema('elementary') %} + {% set _, relation = dbt.get_or_create_relation(database=database_name, schema=schema_name, identifier='all_types', type="table") %} + {% set sql_query %} + select + CAST(1 as BIGINT) as bigint_col, + CAST(1 as INT8) as int8_col, + CAST(B'00' as BIT) as bit_col, + CAST(B'00' as BIT VARYING) as bit_varying_col, + CAST(B'00' as VARBIT) as varbit_col, + CAST(TRUE as BOOLEAN) as boolean_col, + CAST(TRUE as BOOL) as bool_col, + CAST('(1, 1), (2, 2)' as BOX) as box_col, + '\xDEADBEEF'::bytea as bytea_col, + 'a'::char as char_col, + 'a'::character as character_col, + 'a'::character varying as character_varying_col, + 'a'::varchar as varchar_col, + '8.8.8.8'::cidr as cidr_col, + '(1, 1), 1'::circle as circle_col, + '2023-10-23'::date as date_col, + CAST(1 as FLOAT8) as float8_col, + CAST(1 as DOUBLE PRECISION) as double_precision_col, + '8.8.8.8'::inet as inet_col, + CAST(1 as INTEGER) as integer_col, + CAST(1 as INT) as int_col, + CAST(1 as INT4) as int4_col, + interval '1 hour' as interval_col, + '{"a":1,"b":2}'::json as json_col, + '{"a":1,"b":2}'::jsonb as jsonb_col, + '[(1,1),(2,2)]'::line as line_col, + '[(1,1),(2,2)]'::lseg as lseg_col, + 'ff:ff:ff:ff:ff:ff'::macaddr as mac_col, + 'ff:ff:ff:ff:ff:ff'::macaddr8 as mac8_col, + 42::money as money_col, + 42::numeric as numeric_col, + 42::decimal as decimal_col, + '[(1,1),(2,2)]'::path as path_col, + '(1,1)'::point as point_col, + '((1,1),(2,2))'::polygon as polygon_col, + CAST(1 as REAL) as real_col, + CAST(1 as FLOAT4) as float4_col, + CAST(1 as SMALLINT) as smallint_col, + CAST(1 as INT2) as int2_col, + 'a'::text as text_col, + '12:00:00'::time as time_col, + '12:00:00-600'::timetz as timetz_col, + '2004-10-19 10:23:54'::timestamp as timestamp_col, + '2004-10-19 10:23:54+02'::timestamptz as timestamptz_col, + 'confidence'::tsquery as tsquery_col, + 'confidence'::tsvector as tsvector_col, + 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'::uuid as uuid_col, + xmlcomment('text') as xml_col + {% endset %} + {% set create_table_query = dbt.create_table_as(false, relation, sql_query) %} + {% do elementary.edr_log(create_table_query) %} + {% do elementary.run_query(create_table_query) %} +{% endmacro %} + +{% macro default__create_all_types_table() %} + {{ exceptions.raise_compiler_error("This macro is not supported on '{}'.".format(target.type)) }} +{% endmacro %} + + +{% macro compare_relation_types_and_information_schema_types() %} + {% do elementary_tests.create_all_types_table() %} + + {% set schema_tuple = elementary.get_package_database_and_schema('elementary') %} + {% set database_name, schema_name = schema_tuple %} + {% set _, relation = dbt.get_or_create_relation(database=database_name, schema=schema_name, identifier='all_types', type="table") %} + + {% set relation_column_types = {} %} + {% set columns = adapter.get_columns_in_relation(relation) %} + {% for column in columns %} + {% do relation_column_types.update({column.name.lower(): elementary.get_normalized_data_type(elementary.get_column_data_type(column))}) %} + {% endfor %} + {% do elementary.edr_log(relation_column_types) %} + + {% set information_schema_column_types = {} %} + {% set information_schema_column_types_rows = elementary.agate_to_dicts(elementary.run_query(elementary.get_columns_from_information_schema(schema_tuple, 'all_types'))) %} + {% for row in information_schema_column_types_rows %} + {% do information_schema_column_types.update({row.column_name.lower(): elementary.get_normalized_data_type(row.data_type)}) %} + {% endfor %} + {% do elementary.edr_log(information_schema_column_types) %} + + {% set unmached_types = [] %} + {% for col, relation_value in relation_column_types.items() %} + {% set info_schema_value = information_schema_column_types[col] %} + {% if relation_value != info_schema_value %} + {% do unmached_types.append('Column "{}" types do not match: {} != {} '.format(col, relation_value, info_schema_value)) %} + {% endif %} + {% endfor %} + {% do elementary.edr_log(unmached_types) %} + {% do return(unmached_types) %} +{% endmacro %} diff --git a/macros/edr/metadata_collection/get_columns_from_information_schema.sql b/macros/edr/metadata_collection/get_columns_from_information_schema.sql index 60c5a6c0f..782c5b868 100644 --- a/macros/edr/metadata_collection/get_columns_from_information_schema.sql +++ b/macros/edr/metadata_collection/get_columns_from_information_schema.sql @@ -1,10 +1,10 @@ -{% macro get_columns_from_information_schema(schema_tuple) %} +{% macro get_columns_from_information_schema(schema_tuple, table_name = none) %} {%- set database_name, schema_name = schema_tuple %} - {{ return(adapter.dispatch('get_columns_from_information_schema', 'elementary')(database_name, schema_name)) }} + {{ return(adapter.dispatch('get_columns_from_information_schema', 'elementary')(database_name, schema_name, table_name)) }} {% endmacro %} {# Snowflake #} -{% macro default__get_columns_from_information_schema(database_name, schema_name) %} +{% macro default__get_columns_from_information_schema(database_name, schema_name, table_name = none) %} {% set schema_relation = api.Relation.create(database=database_name, schema=schema_name).without_identifier() %} select upper(table_catalog || '.' || table_schema || '.' || table_name) as full_table_name, @@ -15,19 +15,22 @@ data_type from {{ schema_relation.information_schema('COLUMNS') }} where upper(table_schema) = upper('{{ schema_name }}') + {% if table_name %} + and upper(table_name) = upper('{{ table_name }}') + {% endif %} {% endmacro %} -{% macro bigquery__get_columns_from_information_schema(database_name, schema_name) %} +{% macro bigquery__get_columns_from_information_schema(database_name, schema_name, table_name = none) %} {% set schema_relation = api.Relation.create(database=database_name, schema=schema_name).without_identifier() %} {% set columns_schema = schema_relation.information_schema('COLUMNS') %} {% if elementary.can_query_relation(columns_schema) %} - {{ elementary.default__get_columns_from_information_schema(database_name, schema_name) }} + {{ elementary.default__get_columns_from_information_schema(database_name, schema_name, table_name) }} {% else %} {{ elementary.get_empty_columns_from_information_schema_table() }} {% endif %} {% endmacro %} -{% macro redshift__get_columns_from_information_schema(database_name, schema_name) %} +{% macro redshift__get_columns_from_information_schema(database_name, schema_name, table_name = none) %} select upper(table_catalog || '.' || table_schema || '.' || table_name) as full_table_name, upper(table_catalog) as database_name, @@ -37,9 +40,12 @@ data_type from pg_catalog.svv_columns where upper(table_schema) = upper('{{ schema_name }}') + {% if table_name %} + and upper(table_name) = upper('{{ table_name }}') + {% endif %} {% endmacro %} -{% macro postgres__get_columns_from_information_schema(database_name, schema_name) %} +{% macro postgres__get_columns_from_information_schema(database_name, schema_name, table_name = none) %} select upper(table_catalog || '.' || table_schema || '.' || table_name) as full_table_name, upper(table_catalog) as database_name, @@ -49,9 +55,12 @@ data_type from information_schema.columns where upper(table_schema) = upper('{{ schema_name }}') + {% if table_name %} + and upper(table_name) = upper('{{ table_name }}') + {% endif %} {% endmacro %} -{% macro databricks__get_columns_from_information_schema(database_name, schema_name) %} +{% macro databricks__get_columns_from_information_schema(database_name, schema_name, table_name = none) %} {% if target.catalog is not none %} {# Information schema is only available when using Unity Catalog. #} {% set schema_relation = api.Relation.create(database=database_name, schema=schema_name).quote(false, false, false) %} @@ -64,12 +73,15 @@ data_type from {{ schema_relation.information_schema('COLUMNS') }} where upper(table_schema) = upper('{{ schema_name }}') + {% if table_name %} + and upper(table_name) = upper('{{ table_name }}') + {% endif %} {% else %} {{ elementary.get_empty_columns_from_information_schema_table() }} {% endif %} {% endmacro %} -{% macro spark__get_columns_from_information_schema(database_name, schema_name) %} +{% macro spark__get_columns_from_information_schema(database_name, schema_name, table_name = none) %} {{ elementary.get_empty_columns_from_information_schema_table() }} {% endmacro %} From ad5697b119dfa3719c21db0b95473d86a0413a7a Mon Sep 17 00:00:00 2001 From: IDoneShaveIt Date: Mon, 23 Oct 2023 15:22:32 +0300 Subject: [PATCH 3/5] Added a comment for the new macros --- .../dbt_project/macros/create_all_types_table.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/integration_tests/dbt_project/macros/create_all_types_table.sql b/integration_tests/dbt_project/macros/create_all_types_table.sql index b7ceafc05..64778e510 100644 --- a/integration_tests/dbt_project/macros/create_all_types_table.sql +++ b/integration_tests/dbt_project/macros/create_all_types_table.sql @@ -1,3 +1,7 @@ +{# + Those macros are used to generate a table with all of the supported data types for each DWH. +#} + {% macro create_all_types_table() %} {% do return(adapter.dispatch('create_all_types_table','elementary')()) %} {% endmacro %} From 32def427543c35f73333d0baba4b85bae2268c69 Mon Sep 17 00:00:00 2001 From: IDoneShaveIt Date: Mon, 23 Oct 2023 15:33:03 +0300 Subject: [PATCH 4/5] precommit - fix typos --- .../dbt_project/macros/create_all_types_table.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/integration_tests/dbt_project/macros/create_all_types_table.sql b/integration_tests/dbt_project/macros/create_all_types_table.sql index 64778e510..89d541887 100644 --- a/integration_tests/dbt_project/macros/create_all_types_table.sql +++ b/integration_tests/dbt_project/macros/create_all_types_table.sql @@ -119,7 +119,7 @@ TO_DATE('20231023', 'YYYYMMDD') as date_col, sysdate as timestamp_col, TO_TIMESTAMP(sysdate, 'YYYY-MM-DD HH24:MI:SS') as timestampptz_col, - ST_GeomFromText('POLYGON((0 2,1 1,0 -1,0 2))') as geomtry_col, + ST_GeomFromText('POLYGON((0 2,1 1,0 -1,0 2))') as geometry_col, ST_GeogFromText('SRID=4324;POLYGON((0 0,0 1,1 1,10 10,1 0,0 0))') as geography_col, JSON_PARSE('{"data_type": "super"}') as super_col {% endset %} @@ -215,13 +215,13 @@ {% endfor %} {% do elementary.edr_log(information_schema_column_types) %} - {% set unmached_types = [] %} + {% set unmatched_types = [] %} {% for col, relation_value in relation_column_types.items() %} {% set info_schema_value = information_schema_column_types[col] %} {% if relation_value != info_schema_value %} - {% do unmached_types.append('Column "{}" types do not match: {} != {} '.format(col, relation_value, info_schema_value)) %} + {% do unmatched_types.append('Column "{}" types do not match: {} != {} '.format(col, relation_value, info_schema_value)) %} {% endif %} {% endfor %} - {% do elementary.edr_log(unmached_types) %} - {% do return(unmached_types) %} + {% do elementary.edr_log(unmatched_types) %} + {% do return(unmatched_types) %} {% endmacro %} From 163839593a40eb6ab32a2e7bef938443cf5a5a42 Mon Sep 17 00:00:00 2001 From: IDoneShaveIt Date: Tue, 24 Oct 2023 15:12:38 +0300 Subject: [PATCH 5/5] Fixed exposures schema validation usage of columns types --- integration_tests/tests/test_exposure_schema_validity.py | 8 ++++---- .../data_monitors_configuration/get_column_monitors.sql | 4 ++-- macros/edr/tests/test_exposure_schema_validity.sql | 2 +- .../test_utils/find_normalized_data_type_for_column.sql | 2 +- macros/utils/data_types/get_column_data_type.sql | 4 ++-- macros/utils/table_operations/get_columns_and_types.sql | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/integration_tests/tests/test_exposure_schema_validity.py b/integration_tests/tests/test_exposure_schema_validity.py index bd04034c4..0394c6926 100644 --- a/integration_tests/tests/test_exposure_schema_validity.py +++ b/integration_tests/tests/test_exposure_schema_validity.py @@ -87,7 +87,7 @@ def test_exposure_schema_validity_correct_columns_and_types( ) DBT_TEST_ARGS = { "node": "models.exposures_test", - "columns": [{"name": "order_id", "dtype": "string"}], + "columns": [{"name": "order_id", "dtype": "string", "data_type": "string"}], "exposures": { "ZOMG": { "meta": { @@ -116,7 +116,7 @@ def test_exposure_schema_validity_correct_columns_and_invalid_type( ): DBT_TEST_ARGS = { "node": "models.exposures_test", - "columns": [{"name": "order_id", "dtype": "numeric"}], + "columns": [{"name": "order_id", "dtype": "numeric", "data_type": "numeric"}], "exposures": { "ZOMG": { "meta": { @@ -147,7 +147,7 @@ def test_exposure_schema_validity_correct_columns_and_missing_type( ): DBT_TEST_ARGS = { "node": "models.exposures_test", - "columns": [{"name": "order_id", "dtype": "numeric"}], + "columns": [{"name": "order_id", "dtype": "numeric", "data_type": "numeric"}], "exposures": { "ZOMG": { "meta": {"referenced_columns": [{"column_name": "order_id"}]}, @@ -170,7 +170,7 @@ def test_exposure_schema_validity_missing_columns( ): DBT_TEST_ARGS = { "node": "models.exposures_test", - "columns": [{"name": "order", "dtype": "numeric"}], + "columns": [{"name": "order", "dtype": "numeric", "data_type": "numeric"}], "exposures": { "ZOMG": { "meta": { diff --git a/macros/edr/data_monitoring/data_monitors_configuration/get_column_monitors.sql b/macros/edr/data_monitoring/data_monitors_configuration/get_column_monitors.sql index ffa95a163..8cefb3426 100644 --- a/macros/edr/data_monitoring/data_monitors_configuration/get_column_monitors.sql +++ b/macros/edr/data_monitoring/data_monitors_configuration/get_column_monitors.sql @@ -4,7 +4,7 @@ {% set column_objects = adapter.get_columns_in_relation(model_relation) %} {% for column_obj in column_objects %} {% if column_obj.name | lower == column_name | lower %} - {% set column_monitors = elementary.column_monitors_by_type(column_obj.dtype, column_tests) %} + {% set column_monitors = elementary.column_monitors_by_type(elementary.get_column_data_type(column_obj), column_tests) %} {% set column_item = {'column': column_obj, 'monitors': column_monitors} %} {{ return(column_item) }} {% endif %} @@ -20,7 +20,7 @@ {% set column_objects = adapter.get_columns_in_relation(model_relation) %} {% for column_obj in column_objects %} - {% set column_monitors = elementary.column_monitors_by_type(column_obj.dtype, column_tests) %} + {% set column_monitors = elementary.column_monitors_by_type(elementary.get_column_data_type(column_obj), column_tests) %} {% set column_item = {'column': column_obj, 'monitors': column_monitors} %} {% do column_obj_and_monitors.append(column_item) %} {% endfor %} diff --git a/macros/edr/tests/test_exposure_schema_validity.sql b/macros/edr/tests/test_exposure_schema_validity.sql index bc8d5e81d..388c3cd23 100644 --- a/macros/edr/tests/test_exposure_schema_validity.sql +++ b/macros/edr/tests/test_exposure_schema_validity.sql @@ -35,7 +35,7 @@ {%- if matching_exposures | length > 0 -%} {%- set columns_dict = {} -%} {%- for column in columns -%} - {%- do columns_dict.update({ column['name'].strip('"').strip("'") | upper : elementary.normalize_data_type(column['dtype']) }) -%} + {%- do columns_dict.update({ column['name'].strip('"').strip("'") | upper : elementary.normalize_data_type(elementary.get_column_data_type(column)) }) -%} {%- endfor -%} {%- set invalid_exposures = [] -%} {%- for exposure in matching_exposures -%} diff --git a/macros/edr/tests/test_utils/find_normalized_data_type_for_column.sql b/macros/edr/tests/test_utils/find_normalized_data_type_for_column.sql index b5ea6085a..a139fc246 100644 --- a/macros/edr/tests/test_utils/find_normalized_data_type_for_column.sql +++ b/macros/edr/tests/test_utils/find_normalized_data_type_for_column.sql @@ -4,7 +4,7 @@ {% if column_name and columns_from_relation and columns_from_relation is iterable %} {% for column_obj in columns_from_relation %} {% if column_obj.column | lower == column_name | trim('\'\"\`') | lower %} - {{ return(elementary.normalize_data_type(column_obj.dtype)) }} + {{ return(elementary.normalize_data_type(elementary.get_column_data_type(column_obj))) }} {% endif %} {% endfor %} {% do exceptions.raise_compiler_error("Column `{}` was not found in `{}`.".format(column_name, model_relation.name)) %} diff --git a/macros/utils/data_types/get_column_data_type.sql b/macros/utils/data_types/get_column_data_type.sql index 2d88c8076..17479a3b9 100644 --- a/macros/utils/data_types/get_column_data_type.sql +++ b/macros/utils/data_types/get_column_data_type.sql @@ -4,9 +4,9 @@ {% endmacro %} {% macro default__get_column_data_type(column_relation) %} - {{return (column_relation.dtype) }} + {{return (column_relation["dtype"]) }} {% endmacro %} {% macro bigquery__get_column_data_type(column_relation) %} - {{return (column_relation.data_type) }} + {{return (column_relation["data_type"]) }} {% endmacro %} diff --git a/macros/utils/table_operations/get_columns_and_types.sql b/macros/utils/table_operations/get_columns_and_types.sql index 9feb15b94..1195b3cbe 100644 --- a/macros/utils/table_operations/get_columns_and_types.sql +++ b/macros/utils/table_operations/get_columns_and_types.sql @@ -20,7 +20,7 @@ {%- set columns_from_relation = adapter.get_columns_in_relation(relation) -%} {% for column in columns_from_relation %} - {%- set column_item = {'column_name': column['column'], 'data_type': elementary.normalize_data_type(column['dtype'])} %} + {%- set column_item = {'column_name': column['column'], 'data_type': elementary.normalize_data_type(elementary.get_column_data_type(column))} %} {%- do columns.append(column_item) -%} {% endfor %}