From 984cab209854981edbd7c20e810a305912a3037b Mon Sep 17 00:00:00 2001
From: IDoneShaveIt <idanshavit31@gmail.com>
Date: Wed, 18 Oct 2023 17:12:42 +0300
Subject: [PATCH 1/5] On BigQuery use column relation data_type instead of
 dtype

---
 macros/commands/generate_schema_baseline_test.sql    |  2 +-
 .../schema_changes/get_columns_snapshot_query.sql    |  2 +-
 macros/utils/data_types/get_column_data_type.sql     | 12 ++++++++++++
 macros/utils/data_types/get_normalized_data_type.sql |  5 +++++
 4 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 macros/utils/data_types/get_column_data_type.sql

diff --git a/macros/commands/generate_schema_baseline_test.sql b/macros/commands/generate_schema_baseline_test.sql
index 893d41831..0c1e246ab 100644
--- a/macros/commands/generate_schema_baseline_test.sql
+++ b/macros/commands/generate_schema_baseline_test.sql
@@ -58,7 +58,7 @@
     columns:
     {%- for column in columns %}
       - name: {{ column.name }}
-        data_type: {{ column.dtype }}
+        data_type: {{ elementary.get_column_data_type(column) }}
     {% endfor %}
     tests:
       - elementary.schema_changes_from_baseline
diff --git a/macros/edr/data_monitoring/schema_changes/get_columns_snapshot_query.sql b/macros/edr/data_monitoring/schema_changes/get_columns_snapshot_query.sql
index d96faa924..e44cfd498 100644
--- a/macros/edr/data_monitoring/schema_changes/get_columns_snapshot_query.sql
+++ b/macros/edr/data_monitoring/schema_changes/get_columns_snapshot_query.sql
@@ -38,7 +38,7 @@
                 {% for column in columns %}
                     select
                         {{ elementary.edr_cast_as_string(elementary.edr_quote(column.name)) }} as column_name,
-                        {{ elementary.edr_cast_as_string(elementary.edr_quote(elementary.get_normalized_data_type(column.dtype))) }} as data_type
+                        {{ elementary.edr_cast_as_string(elementary.edr_quote(elementary.get_normalized_data_type(elementary.get_column_data_type(column)))) }} as data_type
                     {% if not loop.last %}
                         union all
                     {% endif %}
diff --git a/macros/utils/data_types/get_column_data_type.sql b/macros/utils/data_types/get_column_data_type.sql
new file mode 100644
index 000000000..2d88c8076
--- /dev/null
+++ b/macros/utils/data_types/get_column_data_type.sql
@@ -0,0 +1,12 @@
+{% macro get_column_data_type(column_relation) %}
+    {% set data_type = adapter.dispatch('get_column_data_type','elementary')(column_relation) %}
+    {{ return(data_type) }}
+{% endmacro %}
+
+{% macro default__get_column_data_type(column_relation) %}
+   {{return (column_relation.dtype) }}
+{% endmacro %}
+
+{% macro bigquery__get_column_data_type(column_relation) %}
+   {{return (column_relation.data_type) }}
+{% endmacro %}
diff --git a/macros/utils/data_types/get_normalized_data_type.sql b/macros/utils/data_types/get_normalized_data_type.sql
index 9730d8035..d7c81e096 100644
--- a/macros/utils/data_types/get_normalized_data_type.sql
+++ b/macros/utils/data_types/get_normalized_data_type.sql
@@ -14,7 +14,12 @@
 {% macro bigquery__get_normalized_data_type(exact_data_type) %}
 {# BigQuery has no concept of data type synonyms,
  see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types #}
+ {% set exact_data_type_to_data_type_returned_by_the_info_schema = {'BOOLEAN': 'BOOL'} %}
+ {%- if exact_data_type in exact_data_type_to_data_type_returned_by_the_info_schema%}
+   {{ return (exact_data_type_to_data_type_returned_by_the_info_schema[exact_data_type])}}
+ {%- else %}
    {{return (exact_data_type) }}
+ {%- endif%}
 {% endmacro %}
 
 

From 99f601989d5f5bc6842702256f471f9de06d3e34 Mon Sep 17 00:00:00 2001
From: IDoneShaveIt <idanshavit31@gmail.com>
Date: Mon, 23 Oct 2023 15:18:44 +0300
Subject: [PATCH 2/5] Create macro for creating tables with all data types +
 macro for comparing relation types with schema changes

---
 .../macros/create_all_types_table.sql         | 223 ++++++++++++++++++
 .../get_columns_from_information_schema.sql   |  30 ++-
 2 files changed, 244 insertions(+), 9 deletions(-)
 create mode 100644 integration_tests/dbt_project/macros/create_all_types_table.sql

diff --git a/integration_tests/dbt_project/macros/create_all_types_table.sql b/integration_tests/dbt_project/macros/create_all_types_table.sql
new file mode 100644
index 000000000..b7ceafc05
--- /dev/null
+++ b/integration_tests/dbt_project/macros/create_all_types_table.sql
@@ -0,0 +1,223 @@
+{% macro create_all_types_table() %}
+    {% do return(adapter.dispatch('create_all_types_table','elementary')()) %}
+{% endmacro %}
+
+{% macro bigquery__create_all_types_table() %}
+    {# see https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types #}
+    {% set database_name, schema_name = elementary.get_package_database_and_schema('elementary') %}
+    {% set _, relation = dbt.get_or_create_relation(database=database_name, schema=schema_name, identifier='all_types', type="table") %}
+    {% set sql_query %}
+      select 
+        struct("string" as col1, 42 as col2) as flat_struct_col,
+        struct("string" as col1, struct(42 as nestcol1) as col2) as nested_struct_col,
+        [1,2,3] as array_col,
+        null as null_col,
+        true as bool_col,
+        cast("str" as STRING) as str_col,
+        cast(12345 as INT64) as int64_col,
+        cast(12345 as FLOAT64) as float64_col,
+        cast(12345 as NUMERIC) as numeric_col,
+        cast(1122334455 as BIGNUMERIC) as bignum_col,
+        b'1' as bytes_col,
+        INTERVAL '10 -12:30' DAY TO MINUTE as interval_col,
+        JSON '{"data_type": "json"}' as json_col,
+        ST_GEOGPOINT(-122, 47) AS geo_col,
+        CURRENT_DATE() as date_col,
+        CURRENT_DATETIME() as datetime_col,
+        CURRENT_TIME() as time_col,
+        CURRENT_TIMESTAMP() as timestamp_col,
+    {% endset %}
+    {% set create_table_query = dbt.create_table_as(false, relation, sql_query) %}
+    {% do elementary.edr_log(create_table_query) %}
+    {% do elementary.run_query(create_table_query) %}
+{% endmacro %}
+
+{% macro snowflake__create_all_types_table() %}
+    {# see https://docs.snowflake.com/en/sql-reference/intro-summary-data-types.html #}
+    {% set database_name, schema_name = elementary.get_package_database_and_schema('elementary') %}
+    {% set _, relation = dbt.get_or_create_relation(database=database_name, schema=schema_name, identifier='all_types', type="table") %}
+    {% set sql_query %}
+      select 
+        'str'::STRING as str_col,
+        'str'::TEXT as text_col,
+        'a'::VARCHAR as var_col,
+        'a'::CHAR as char_col,
+        'a'::CHARACTER as character_col,
+        'a'::NCHAR as nchar_col,
+        'a'::NVARCHAR as nvarchar_col,
+        'a'::NVARCHAR2 as nvarchar2_col,
+        'a'::CHAR VARYING as char_varying_col,
+        'a'::NCHAR VARYING as nchar_varying_col,
+        TRY_TO_BINARY('1', 'HEX')::BINARY as binary_col,
+        TRY_TO_BINARY('1', 'HEX')::VARBINARY as varbinary_col,
+        TRUE::BOOLEAN as boolean_col,
+        13::NUMBER as number_col,
+        13::DEC as dec_col,
+        13::DECIMAL as decimal_col,
+        13::INT as int_col,
+        13::INTEGER as integer_col,
+        13::BIGINT as bigint_col,
+        13::SMALLINT as smallint_col,
+        13::TINYINT as tinyint_col,
+        13::BYTEINT as byteint_col,
+        13::FLOAT as float_col,
+        13::FLOAT4 as float4_col,
+        13::FLOAT8 as float8_col,
+        13::DOUBLE as double_col,
+        13::DOUBLE PRECISION as double_precision_col,
+        13::REAL as real_col,
+        '2023-10-23'::DATE as date_col,
+        '13:30:00'::TIME as time_col,
+        '2023-10-23 12:00:00'::TIMESTAMP_TZ as timestamp_tz_col,
+        '2023-10-23 12:00:00'::TIMESTAMP_LTZ as timestamp_ltz_col,
+        '2023-10-23 12:00:00'::TIMESTAMP_NTZ as timestamp_ntz_col,
+        '2023-10-23 12:00:00'::DATETIME as datetime_col,
+        TO_VARIANT(1.23) as variant_col,
+        {'data_type': 'object'} as object_col,
+        [1,2,3] as array_col,
+        TO_GEOGRAPHY('POINT(-122.35 37.55)') as geography_col
+    {% endset %}
+    {% set create_table_query = dbt.create_table_as(false, relation, sql_query) %}
+    {% do elementary.edr_log(create_table_query) %}
+    {% do elementary.run_query(create_table_query) %}
+{% endmacro %}
+
+{% macro redshift__create_all_types_table() %}
+    {# see https://docs.aws.amazon.com/redshift/latest/dg/c_Supported_data_types.html #}
+    {% set database_name, schema_name = elementary.get_package_database_and_schema('elementary') %}
+    {% set _, relation = dbt.get_or_create_relation(database=database_name, schema=schema_name, identifier='all_types', type="table") %}
+    {% set sql_query %}
+      select 
+        1::SMALLINT as smallint_col,
+        1::INT2 as int2_col,
+        1::INTEGER as integer_col,
+        1::INT as int_col,
+        1::INT4 as int4_col,
+        1::BIGINT as bigint_col,
+        1::INT8 as int8_col,
+        1::DECIMAL as decimal_col,
+        1::NUMERIC as numeric_col,
+        1::REAL as real_col,
+        1::FLOAT4 as float4_col,
+        1::FLOAT as float_col,
+        1::FLOAT8 as float8_col,
+        1::DOUBLE PRECISION as double_precision_col,
+        TRUE::BOOLEAN as boolean_col,
+        TRUE::bool as bool_col,
+        'a'::VARCHAR as var_col,
+        'str'::TEXT as text_col,
+        'a'::NVARCHAR as nvarchar_col,
+        'a'::CHARACTER VARYING as character_varying_col,
+        'a'::CHAR as char_col,
+        'a'::CHARACTER as character_col,
+        'a'::NCHAR as nchar_col,
+        'a'::BPCHAR as bpchar_col,
+        TO_DATE('20231023', 'YYYYMMDD') as date_col,
+        sysdate as timestamp_col,
+        TO_TIMESTAMP(sysdate, 'YYYY-MM-DD HH24:MI:SS') as timestampptz_col,
+        ST_GeomFromText('POLYGON((0 2,1 1,0 -1,0 2))') as geomtry_col,
+        ST_GeogFromText('SRID=4324;POLYGON((0 0,0 1,1 1,10 10,1 0,0 0))') as geography_col,
+        JSON_PARSE('{"data_type": "super"}') as super_col
+    {% endset %}
+    {% set create_table_query = dbt.create_table_as(false, relation, sql_query) %}
+    {% do elementary.edr_log(create_table_query) %}
+    {% do elementary.run_query(create_table_query) %}
+  
+{% endmacro %}
+
+{% macro postgres__create_all_types_table() %}
+    {# see https://www.postgresql.org/docs/current/datatype.html #}
+    {% set database_name, schema_name = elementary.get_package_database_and_schema('elementary') %}
+    {% set _, relation = dbt.get_or_create_relation(database=database_name, schema=schema_name, identifier='all_types', type="table") %}
+    {% set sql_query %}
+      select 
+        CAST(1 as BIGINT) as bigint_col,
+        CAST(1 as INT8) as int8_col,
+        CAST(B'00' as BIT) as bit_col,
+        CAST(B'00' as BIT VARYING) as bit_varying_col,
+        CAST(B'00' as VARBIT) as varbit_col,
+        CAST(TRUE as BOOLEAN) as boolean_col,
+        CAST(TRUE as BOOL) as bool_col,
+        CAST('(1, 1), (2, 2)' as BOX) as box_col,
+        '\xDEADBEEF'::bytea as bytea_col,
+        'a'::char as char_col,
+        'a'::character as character_col,
+        'a'::character varying as character_varying_col,
+        'a'::varchar as varchar_col,
+        '8.8.8.8'::cidr as cidr_col,
+        '(1, 1), 1'::circle as circle_col,
+        '2023-10-23'::date as date_col,
+        CAST(1 as FLOAT8) as float8_col,
+        CAST(1 as DOUBLE PRECISION) as double_precision_col,
+        '8.8.8.8'::inet as inet_col,
+        CAST(1 as INTEGER) as integer_col,
+        CAST(1 as INT) as int_col,
+        CAST(1 as INT4) as int4_col,
+        interval '1 hour' as interval_col,
+        '{"a":1,"b":2}'::json as json_col,
+        '{"a":1,"b":2}'::jsonb as jsonb_col,
+        '[(1,1),(2,2)]'::line as line_col,
+        '[(1,1),(2,2)]'::lseg as lseg_col,
+        'ff:ff:ff:ff:ff:ff'::macaddr as mac_col,
+        'ff:ff:ff:ff:ff:ff'::macaddr8 as mac8_col,
+        42::money as money_col,
+        42::numeric as numeric_col,
+        42::decimal as decimal_col,
+        '[(1,1),(2,2)]'::path as path_col,
+        '(1,1)'::point as point_col,
+        '((1,1),(2,2))'::polygon as polygon_col,
+        CAST(1 as REAL) as real_col,
+        CAST(1 as FLOAT4) as float4_col,
+        CAST(1 as SMALLINT) as smallint_col,
+        CAST(1 as INT2) as int2_col,
+        'a'::text as text_col,
+        '12:00:00'::time as time_col,
+        '12:00:00-600'::timetz as timetz_col,
+        '2004-10-19 10:23:54'::timestamp as timestamp_col,
+        '2004-10-19 10:23:54+02'::timestamptz as timestamptz_col,
+        'confidence'::tsquery as tsquery_col,
+        'confidence'::tsvector as tsvector_col,
+        'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'::uuid as uuid_col,
+        xmlcomment('text') as xml_col
+    {% endset %}
+    {% set create_table_query = dbt.create_table_as(false, relation, sql_query) %}
+    {% do elementary.edr_log(create_table_query) %}
+    {% do elementary.run_query(create_table_query) %}
+{% endmacro %}
+
+{% macro default__create_all_types_table() %}
+  {{ exceptions.raise_compiler_error("This macro is not supported on '{}'.".format(target.type)) }}
+{% endmacro %}
+
+
+{% macro compare_relation_types_and_information_schema_types() %}
+    {% do elementary_tests.create_all_types_table() %}
+
+    {% set schema_tuple = elementary.get_package_database_and_schema('elementary') %}
+    {% set database_name, schema_name = schema_tuple %}
+    {% set _, relation = dbt.get_or_create_relation(database=database_name, schema=schema_name, identifier='all_types', type="table") %}
+
+    {% set relation_column_types = {} %}
+    {% set columns = adapter.get_columns_in_relation(relation) %}
+    {% for column in columns %}
+      {% do relation_column_types.update({column.name.lower(): elementary.get_normalized_data_type(elementary.get_column_data_type(column))}) %}
+    {% endfor %}
+    {% do elementary.edr_log(relation_column_types) %}
+
+    {% set information_schema_column_types = {} %}
+    {% set information_schema_column_types_rows = elementary.agate_to_dicts(elementary.run_query(elementary.get_columns_from_information_schema(schema_tuple, 'all_types'))) %}
+    {% for row in information_schema_column_types_rows %}
+      {% do information_schema_column_types.update({row.column_name.lower(): elementary.get_normalized_data_type(row.data_type)}) %}
+    {% endfor %}
+    {% do elementary.edr_log(information_schema_column_types) %}
+
+    {% set unmached_types = [] %}
+    {% for col, relation_value in relation_column_types.items() %}
+      {% set info_schema_value = information_schema_column_types[col] %}
+      {% if relation_value != info_schema_value %}
+        {% do unmached_types.append('Column "{}" types do not match: {} != {} '.format(col, relation_value, info_schema_value)) %}
+      {% endif %}
+    {% endfor %}
+    {% do elementary.edr_log(unmached_types) %}
+    {% do return(unmached_types) %}
+{% endmacro %}
diff --git a/macros/edr/metadata_collection/get_columns_from_information_schema.sql b/macros/edr/metadata_collection/get_columns_from_information_schema.sql
index 60c5a6c0f..782c5b868 100644
--- a/macros/edr/metadata_collection/get_columns_from_information_schema.sql
+++ b/macros/edr/metadata_collection/get_columns_from_information_schema.sql
@@ -1,10 +1,10 @@
-{% macro get_columns_from_information_schema(schema_tuple) %}
+{% macro get_columns_from_information_schema(schema_tuple, table_name = none) %}
     {%- set database_name, schema_name = schema_tuple %}
-    {{ return(adapter.dispatch('get_columns_from_information_schema', 'elementary')(database_name, schema_name)) }}
+    {{ return(adapter.dispatch('get_columns_from_information_schema', 'elementary')(database_name, schema_name, table_name)) }}
 {% endmacro %}
 
 {# Snowflake #}
-{% macro default__get_columns_from_information_schema(database_name, schema_name) %}
+{% macro default__get_columns_from_information_schema(database_name, schema_name, table_name = none) %}
     {% set schema_relation = api.Relation.create(database=database_name, schema=schema_name).without_identifier() %}
     select
         upper(table_catalog || '.' || table_schema || '.' || table_name) as full_table_name,
@@ -15,19 +15,22 @@
         data_type
     from {{ schema_relation.information_schema('COLUMNS') }}
     where upper(table_schema) = upper('{{ schema_name }}')
+    {% if table_name %}
+      and upper(table_name) = upper('{{ table_name }}')
+    {% endif %}
 {% endmacro %}
 
-{% macro bigquery__get_columns_from_information_schema(database_name, schema_name) %}
+{% macro bigquery__get_columns_from_information_schema(database_name, schema_name, table_name = none) %}
     {% set schema_relation = api.Relation.create(database=database_name, schema=schema_name).without_identifier() %}
     {% set columns_schema = schema_relation.information_schema('COLUMNS') %}
     {% if elementary.can_query_relation(columns_schema) %}
-      {{ elementary.default__get_columns_from_information_schema(database_name, schema_name) }}
+      {{ elementary.default__get_columns_from_information_schema(database_name, schema_name, table_name) }}
     {% else %}
       {{ elementary.get_empty_columns_from_information_schema_table() }}
     {% endif %}
 {% endmacro %}
 
-{% macro redshift__get_columns_from_information_schema(database_name, schema_name) %}
+{% macro redshift__get_columns_from_information_schema(database_name, schema_name, table_name = none) %}
     select
         upper(table_catalog || '.' || table_schema || '.' || table_name) as full_table_name,
         upper(table_catalog) as database_name,
@@ -37,9 +40,12 @@
         data_type
     from pg_catalog.svv_columns
     where upper(table_schema) = upper('{{ schema_name }}')
+    {% if table_name %}
+      and upper(table_name) = upper('{{ table_name }}')
+    {% endif %}
 {% endmacro %}
 
-{% macro postgres__get_columns_from_information_schema(database_name, schema_name) %}
+{% macro postgres__get_columns_from_information_schema(database_name, schema_name, table_name = none) %}
     select
         upper(table_catalog || '.' || table_schema || '.' || table_name) as full_table_name,
         upper(table_catalog) as database_name,
@@ -49,9 +55,12 @@
         data_type
     from information_schema.columns
     where upper(table_schema) = upper('{{ schema_name }}')
+    {% if table_name %}
+      and upper(table_name) = upper('{{ table_name }}')
+    {% endif %}
 {% endmacro %}
 
-{% macro databricks__get_columns_from_information_schema(database_name, schema_name) %}
+{% macro databricks__get_columns_from_information_schema(database_name, schema_name, table_name = none) %}
     {% if target.catalog is not none %}
         {# Information schema is only available when using Unity Catalog. #}
         {% set schema_relation = api.Relation.create(database=database_name, schema=schema_name).quote(false, false, false) %}
@@ -64,12 +73,15 @@
             data_type
         from {{ schema_relation.information_schema('COLUMNS') }}
         where upper(table_schema) = upper('{{ schema_name }}')
+        {% if table_name %}
+            and upper(table_name) = upper('{{ table_name }}')
+        {% endif %}
     {% else %}
         {{ elementary.get_empty_columns_from_information_schema_table() }}
     {% endif %}
 {% endmacro %}
 
-{% macro spark__get_columns_from_information_schema(database_name, schema_name) %}
+{% macro spark__get_columns_from_information_schema(database_name, schema_name, table_name = none) %}
     {{ elementary.get_empty_columns_from_information_schema_table() }}
 {% endmacro %}
 

From ad5697b119dfa3719c21db0b95473d86a0413a7a Mon Sep 17 00:00:00 2001
From: IDoneShaveIt <idanshavit31@gmail.com>
Date: Mon, 23 Oct 2023 15:22:32 +0300
Subject: [PATCH 3/5] Added a comment for the new macros

---
 .../dbt_project/macros/create_all_types_table.sql             | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/integration_tests/dbt_project/macros/create_all_types_table.sql b/integration_tests/dbt_project/macros/create_all_types_table.sql
index b7ceafc05..64778e510 100644
--- a/integration_tests/dbt_project/macros/create_all_types_table.sql
+++ b/integration_tests/dbt_project/macros/create_all_types_table.sql
@@ -1,3 +1,7 @@
+{# 
+  Those macros are used to generate a table with all of the supported data types for each DWH.
+#}
+
 {% macro create_all_types_table() %}
     {% do return(adapter.dispatch('create_all_types_table','elementary')()) %}
 {% endmacro %}

From 32def427543c35f73333d0baba4b85bae2268c69 Mon Sep 17 00:00:00 2001
From: IDoneShaveIt <idanshavit31@gmail.com>
Date: Mon, 23 Oct 2023 15:33:03 +0300
Subject: [PATCH 4/5] precommit - fix typos

---
 .../dbt_project/macros/create_all_types_table.sql      | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/integration_tests/dbt_project/macros/create_all_types_table.sql b/integration_tests/dbt_project/macros/create_all_types_table.sql
index 64778e510..89d541887 100644
--- a/integration_tests/dbt_project/macros/create_all_types_table.sql
+++ b/integration_tests/dbt_project/macros/create_all_types_table.sql
@@ -119,7 +119,7 @@
         TO_DATE('20231023', 'YYYYMMDD') as date_col,
         sysdate as timestamp_col,
         TO_TIMESTAMP(sysdate, 'YYYY-MM-DD HH24:MI:SS') as timestampptz_col,
-        ST_GeomFromText('POLYGON((0 2,1 1,0 -1,0 2))') as geomtry_col,
+        ST_GeomFromText('POLYGON((0 2,1 1,0 -1,0 2))') as geometry_col,
         ST_GeogFromText('SRID=4324;POLYGON((0 0,0 1,1 1,10 10,1 0,0 0))') as geography_col,
         JSON_PARSE('{"data_type": "super"}') as super_col
     {% endset %}
@@ -215,13 +215,13 @@
     {% endfor %}
     {% do elementary.edr_log(information_schema_column_types) %}
 
-    {% set unmached_types = [] %}
+    {% set unmatched_types = [] %}
     {% for col, relation_value in relation_column_types.items() %}
       {% set info_schema_value = information_schema_column_types[col] %}
       {% if relation_value != info_schema_value %}
-        {% do unmached_types.append('Column "{}" types do not match: {} != {} '.format(col, relation_value, info_schema_value)) %}
+        {% do unmatched_types.append('Column "{}" types do not match: {} != {} '.format(col, relation_value, info_schema_value)) %}
       {% endif %}
     {% endfor %}
-    {% do elementary.edr_log(unmached_types) %}
-    {% do return(unmached_types) %}
+    {% do elementary.edr_log(unmatched_types) %}
+    {% do return(unmatched_types) %}
 {% endmacro %}

From 163839593a40eb6ab32a2e7bef938443cf5a5a42 Mon Sep 17 00:00:00 2001
From: IDoneShaveIt <idanshavit31@gmail.com>
Date: Tue, 24 Oct 2023 15:12:38 +0300
Subject: [PATCH 5/5] Fixed exposures schema validation usage of columns types

---
 integration_tests/tests/test_exposure_schema_validity.py  | 8 ++++----
 .../data_monitors_configuration/get_column_monitors.sql   | 4 ++--
 macros/edr/tests/test_exposure_schema_validity.sql        | 2 +-
 .../test_utils/find_normalized_data_type_for_column.sql   | 2 +-
 macros/utils/data_types/get_column_data_type.sql          | 4 ++--
 macros/utils/table_operations/get_columns_and_types.sql   | 2 +-
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/integration_tests/tests/test_exposure_schema_validity.py b/integration_tests/tests/test_exposure_schema_validity.py
index bd04034c4..0394c6926 100644
--- a/integration_tests/tests/test_exposure_schema_validity.py
+++ b/integration_tests/tests/test_exposure_schema_validity.py
@@ -87,7 +87,7 @@ def test_exposure_schema_validity_correct_columns_and_types(
     )
     DBT_TEST_ARGS = {
         "node": "models.exposures_test",
-        "columns": [{"name": "order_id", "dtype": "string"}],
+        "columns": [{"name": "order_id", "dtype": "string", "data_type": "string"}],
         "exposures": {
             "ZOMG": {
                 "meta": {
@@ -116,7 +116,7 @@ def test_exposure_schema_validity_correct_columns_and_invalid_type(
 ):
     DBT_TEST_ARGS = {
         "node": "models.exposures_test",
-        "columns": [{"name": "order_id", "dtype": "numeric"}],
+        "columns": [{"name": "order_id", "dtype": "numeric", "data_type": "numeric"}],
         "exposures": {
             "ZOMG": {
                 "meta": {
@@ -147,7 +147,7 @@ def test_exposure_schema_validity_correct_columns_and_missing_type(
 ):
     DBT_TEST_ARGS = {
         "node": "models.exposures_test",
-        "columns": [{"name": "order_id", "dtype": "numeric"}],
+        "columns": [{"name": "order_id", "dtype": "numeric", "data_type": "numeric"}],
         "exposures": {
             "ZOMG": {
                 "meta": {"referenced_columns": [{"column_name": "order_id"}]},
@@ -170,7 +170,7 @@ def test_exposure_schema_validity_missing_columns(
 ):
     DBT_TEST_ARGS = {
         "node": "models.exposures_test",
-        "columns": [{"name": "order", "dtype": "numeric"}],
+        "columns": [{"name": "order", "dtype": "numeric", "data_type": "numeric"}],
         "exposures": {
             "ZOMG": {
                 "meta": {
diff --git a/macros/edr/data_monitoring/data_monitors_configuration/get_column_monitors.sql b/macros/edr/data_monitoring/data_monitors_configuration/get_column_monitors.sql
index ffa95a163..8cefb3426 100644
--- a/macros/edr/data_monitoring/data_monitors_configuration/get_column_monitors.sql
+++ b/macros/edr/data_monitoring/data_monitors_configuration/get_column_monitors.sql
@@ -4,7 +4,7 @@
     {% set column_objects = adapter.get_columns_in_relation(model_relation) %}
     {% for column_obj in column_objects %}
         {% if column_obj.name | lower == column_name | lower %}
-            {% set column_monitors = elementary.column_monitors_by_type(column_obj.dtype, column_tests) %}
+            {% set column_monitors = elementary.column_monitors_by_type(elementary.get_column_data_type(column_obj), column_tests) %}
             {% set column_item = {'column': column_obj, 'monitors': column_monitors} %}
             {{ return(column_item) }}
         {% endif %}
@@ -20,7 +20,7 @@
     {% set column_objects = adapter.get_columns_in_relation(model_relation) %}
 
     {% for column_obj in column_objects %}
-        {% set column_monitors = elementary.column_monitors_by_type(column_obj.dtype, column_tests) %}
+        {% set column_monitors = elementary.column_monitors_by_type(elementary.get_column_data_type(column_obj), column_tests) %}
         {% set column_item = {'column': column_obj, 'monitors': column_monitors} %}
         {% do column_obj_and_monitors.append(column_item) %}
     {% endfor %}
diff --git a/macros/edr/tests/test_exposure_schema_validity.sql b/macros/edr/tests/test_exposure_schema_validity.sql
index bc8d5e81d..388c3cd23 100644
--- a/macros/edr/tests/test_exposure_schema_validity.sql
+++ b/macros/edr/tests/test_exposure_schema_validity.sql
@@ -35,7 +35,7 @@
     {%- if matching_exposures | length > 0 -%}
         {%- set columns_dict = {} -%}
         {%- for column in columns -%}
-            {%- do columns_dict.update({ column['name'].strip('"').strip("'") | upper : elementary.normalize_data_type(column['dtype']) }) -%}
+            {%- do columns_dict.update({ column['name'].strip('"').strip("'") | upper : elementary.normalize_data_type(elementary.get_column_data_type(column)) }) -%}
         {%- endfor -%}
         {%- set invalid_exposures = [] -%}
         {%- for exposure in matching_exposures -%}
diff --git a/macros/edr/tests/test_utils/find_normalized_data_type_for_column.sql b/macros/edr/tests/test_utils/find_normalized_data_type_for_column.sql
index b5ea6085a..a139fc246 100644
--- a/macros/edr/tests/test_utils/find_normalized_data_type_for_column.sql
+++ b/macros/edr/tests/test_utils/find_normalized_data_type_for_column.sql
@@ -4,7 +4,7 @@
 {% if column_name and columns_from_relation and columns_from_relation is iterable %}
     {% for column_obj in columns_from_relation %}
         {% if column_obj.column | lower == column_name | trim('\'\"\`') | lower %}
-            {{ return(elementary.normalize_data_type(column_obj.dtype)) }}
+            {{ return(elementary.normalize_data_type(elementary.get_column_data_type(column_obj))) }}
         {% endif %}
     {% endfor %}
     {% do exceptions.raise_compiler_error("Column `{}` was not found in `{}`.".format(column_name, model_relation.name)) %}
diff --git a/macros/utils/data_types/get_column_data_type.sql b/macros/utils/data_types/get_column_data_type.sql
index 2d88c8076..17479a3b9 100644
--- a/macros/utils/data_types/get_column_data_type.sql
+++ b/macros/utils/data_types/get_column_data_type.sql
@@ -4,9 +4,9 @@
 {% endmacro %}
 
 {% macro default__get_column_data_type(column_relation) %}
-   {{return (column_relation.dtype) }}
+   {{return (column_relation["dtype"]) }}
 {% endmacro %}
 
 {% macro bigquery__get_column_data_type(column_relation) %}
-   {{return (column_relation.data_type) }}
+   {{return (column_relation["data_type"]) }}
 {% endmacro %}
diff --git a/macros/utils/table_operations/get_columns_and_types.sql b/macros/utils/table_operations/get_columns_and_types.sql
index 9feb15b94..1195b3cbe 100644
--- a/macros/utils/table_operations/get_columns_and_types.sql
+++ b/macros/utils/table_operations/get_columns_and_types.sql
@@ -20,7 +20,7 @@
     {%- set columns_from_relation = adapter.get_columns_in_relation(relation) -%}
 
     {% for column in columns_from_relation %}
-        {%- set column_item = {'column_name': column['column'], 'data_type': elementary.normalize_data_type(column['dtype'])} %}
+        {%- set column_item = {'column_name': column['column'], 'data_type': elementary.normalize_data_type(elementary.get_column_data_type(column))} %}
         {%- do columns.append(column_item) -%}
     {% endfor %}