diff --git a/integration_tests/tests/test_exposure_schema_validity.py b/integration_tests/tests/test_exposure_schema_validity.py index 7b43064be..002357f7a 100644 --- a/integration_tests/tests/test_exposure_schema_validity.py +++ b/integration_tests/tests/test_exposure_schema_validity.py @@ -1,161 +1,108 @@ -from datetime import date, timedelta -from typing import Any, Dict, List - -from data_generator import DATE_FORMAT, generate_dates from dbt_project import DbtProject DBT_TEST_NAME = "elementary.exposure_schema_validity" -def test_expose_schema_validity_with_no_exposures(test_id: str, dbt_project: DbtProject): +def test_expose_schema_validity_with_no_exposures( + test_id: str, dbt_project: DbtProject +): test_result = dbt_project.test(test_id, DBT_TEST_NAME, as_model=True) assert test_result["status"] == "pass" -def test_expose_schema_validity_with_correct_columns_and_types(test_id: str, dbt_project: DbtProject): + +def test_expose_schema_validity_with_correct_columns_and_types( + test_id: str, dbt_project: DbtProject +): DBT_TEST_ARGS = { "node": "models.exposures_test", - "columns": [ - { - "name": "order_id", - "dtype": "string" - } - ], + "columns": [{"name": "order_id", "dtype": "string"}], "exposures": { "ZOMG": { - "meta": { - "columns": [{ - "name": "order_id", - "data_type": "string" - }] - }, + "meta": {"columns": [{"name": "order_id", "data_type": "string"}]}, "url": "http://bla.com", "name": "ZOMG", - "depends_on": { - "nodes": ['models.exposures_test'] - } + "depends_on": {"nodes": ["models.exposures_test"]}, } - } + }, } test_result = dbt_project.test( - test_id, - DBT_TEST_NAME, - DBT_TEST_ARGS, - columns=[dict( - name="bla" - )], - as_model=True + test_id, DBT_TEST_NAME, DBT_TEST_ARGS, columns=[dict(name="bla")], as_model=True ) assert test_result["status"] == "pass" -def test_expose_schema_validity_with_correct_columns_and_invalid_type(test_id: str, dbt_project: DbtProject): + +def test_expose_schema_validity_with_correct_columns_and_invalid_type( + test_id: str, dbt_project: DbtProject +): DBT_TEST_ARGS = { "node": "models.exposures_test", - "columns": [ - { - "name": "order_id", - "dtype": "numeric" - } - ], + "columns": [{"name": "order_id", "dtype": "numeric"}], "exposures": { "ZOMG": { - "meta": { - "columns": [{ - "name": "order_id", - "data_type": "string" - }] - }, + "meta": {"columns": [{"name": "order_id", "data_type": "string"}]}, "url": "http://bla.com", "name": "ZOMG", - "depends_on": { - "nodes": ['models.exposures_test'] - } + "depends_on": {"nodes": ["models.exposures_test"]}, } - } + }, } test_result = dbt_project.test( - test_id, - DBT_TEST_NAME, - DBT_TEST_ARGS, - columns=[dict( - name="bla" - )], - as_model=True + test_id, DBT_TEST_NAME, DBT_TEST_ARGS, columns=[dict(name="bla")], as_model=True ) - assert 'different data type for the column order_id string vs numeric' in test_result['test_results_query'] + assert ( + "different data type for the column order_id string vs" + in test_result["test_results_query"] + ) assert test_result["status"] == "fail" -def test_expose_schema_validity_with_correct_columns_and_missing_type(test_id: str, dbt_project: DbtProject): +def test_expose_schema_validity_with_correct_columns_and_missing_type( + test_id: str, dbt_project: DbtProject +): DBT_TEST_ARGS = { "node": "models.exposures_test", - "columns": [ - { - "name": "order_id", - "dtype": "numeric" - } - ], + "columns": [{"name": "order_id", "dtype": "numeric"}], "exposures": { "ZOMG": { "meta": { - "columns": [{ - "name": "order_id", - }] + "columns": [ + { + "name": "order_id", + } + ] }, "url": "http://bla.com", "name": "ZOMG", - "depends_on": { - "nodes": ['models.exposures_test'] - } + "depends_on": {"nodes": ["models.exposures_test"]}, } - } + }, } test_result = dbt_project.test( - test_id, - DBT_TEST_NAME, - DBT_TEST_ARGS, - columns=[dict( - name="bla" - )], - as_model=True + test_id, DBT_TEST_NAME, DBT_TEST_ARGS, columns=[dict(name="bla")], as_model=True ) assert test_result["status"] == "pass" -def test_expose_schema_validity_with_missing_columns(test_id: str, dbt_project: DbtProject): + +def test_expose_schema_validity_with_missing_columns( + test_id: str, dbt_project: DbtProject +): DBT_TEST_ARGS = { "node": "models.exposures_test", - "columns": [ - { - "name": "order", - "dtype": "numeric" - } - ], + "columns": [{"name": "order", "dtype": "numeric"}], "exposures": { "ZOMG": { - "meta": { - "columns": [{ - "name": "order_id", - "data_type": "string" - }] - }, + "meta": {"columns": [{"name": "order_id", "data_type": "string"}]}, "url": "http://bla.com", "name": "ZOMG", - "depends_on": { - "nodes": ['models.exposures_test'] - } + "depends_on": {"nodes": ["models.exposures_test"]}, } - } + }, } test_result = dbt_project.test( - test_id, - DBT_TEST_NAME, - DBT_TEST_ARGS, - columns=[dict( - name="bla" - )], - as_model=True + test_id, DBT_TEST_NAME, DBT_TEST_ARGS, columns=[dict(name="bla")], as_model=True ) - assert 'order_id column missing in the model' in test_result['test_results_query'] + assert "order_id column missing in the model" in test_result["test_results_query"] assert test_result["status"] == "fail" diff --git a/macros/edr/tests/test_exposure_schema_validity.sql b/macros/edr/tests/test_exposure_schema_validity.sql index c18ff68c7..e7c3a2e96 100644 --- a/macros/edr/tests/test_exposure_schema_validity.sql +++ b/macros/edr/tests/test_exposure_schema_validity.sql @@ -3,22 +3,25 @@ {%- do return(none) -%} {%- endif -%} + {# Parameters used only for dependency injection in integration tests #} + {%- set node = node | default(context['model']['attached_node']) -%} + {%- set exposures = ( exposures | default(graph.exposures) ).values() -%} + {%- set columns = columns | default(adapter.get_columns_in_relation(model)) -%} + {%- set model_relation = elementary.get_model_relation_for_test(model, context["model"]) -%} {%- set full_table_name = elementary.relation_to_full_name(model_relation) -%} {{- elementary.test_log('start', full_table_name, 'exposure validation') -}} {%- set matching_exposures = [] -%} - {%- for exposure in (exposures|default(graph.exposures)).values() -%} - {%- if node | default(context['model']['attached_node']) in exposure.depends_on.nodes and exposure['meta'] | default(none) is not none -%} + {%- for exposure in exposures -%} + {%- if node in exposure.depends_on.nodes and exposure['meta'] | default(none) is not none -%} {%- do matching_exposures.append(exposure) -%} {%- endif -%} {%- endfor -%} - {%- set matching_exposures_len = matching_exposures | length -%} - {%- if matching_exposures_len > 0 -%} - {%- set columns_to_use = columns | default(adapter.get_columns_in_relation(model)) -%} + {%- if matching_exposures | length > 0 -%} {%- set columns_dict = {} -%} - {%- for column in columns_to_use -%} + {%- for column in columns -%} {%- do columns_dict.update({ column['name'] | upper : elementary.normalize_data_type(column['dtype']) }) -%} {%- endfor -%} {%- set invalid_exposures = [] -%} @@ -27,31 +30,34 @@ {%- set meta = exposure['meta'] | default(none) -%} {%- if meta != none and meta['columns'] | default(none) is iterable -%} {%- for column in meta['columns'] -%} - {%- if column['name'] | upper not in columns_dict.keys() -%} - {%- do invalid_exposures.append({ - 'exposure': exposure['name'], - 'url': exposure['url'], - 'error': column['name'] ~ ' column missing in the model' - }) - -%} - {%- elif column['data_type'] | default('') != '' and elementary.normalize_data_type(column['data_type']) != columns_dict[column['name'] | upper] -%} - {%- do invalid_exposures.append({ - 'exposure': exposure['name'], - 'url': exposure['url'], - 'error': 'different data type for the column ' ~ column['name'] ~ ' ' ~ column['data_type'] ~ ' vs ' ~ columns_dict[column['name'] | upper] - }) - -%} + {%- if matching_exposures | length == 1 or context['render'](column['source']) | default('') == node -%} + {%- if column['name'] | upper not in columns_dict.keys() -%} + {%- do invalid_exposures.append({ + 'exposure': exposure['name'], + 'url': exposure['url'], + 'error': column['name'] ~ ' column missing in the model' + }) + -%} + {%- elif column['data_type'] | default('') != '' and column['data_type'] != columns_dict[column['name'] | upper] -%} + {%- do invalid_exposures.append({ + 'exposure': exposure['name'], + 'url': exposure['url'], + 'error': 'different data type for the column ' ~ column['name'] ~ ' ' ~ column['data_type'] ~ ' vs ' ~ columns_dict[column['name'] | upper] + }) + -%} + {%- endif -%} {%- endif -%} {%- endfor -%} {%- endif -%} {%- endfor -%} - {%- for invalid_exposure in invalid_exposures %} - {{ 'UNION ALL ' if not loop.first }}SELECT '{{ invalid_exposure['exposure'] }}' as exposure, '{{ invalid_exposure['url'] }}' as url, '{{ invalid_exposure['error'] }}' as error - {%- endfor -%} - {{ elementary.test_log('end', full_table_name, 'exposure validation') }} - {% if invalid_exposures | length == 0 %} + {%- if invalid_exposures | length > 0 -%} + {%- for invalid_exposure in invalid_exposures %} + {{ 'UNION ALL ' if not loop.first }}SELECT '{{ invalid_exposure['exposure'] }}' as exposure, '{{ invalid_exposure['url'] }}' as url, '{{ invalid_exposure['error'] }}' as error + {%- endfor -%} + {{ elementary.test_log('end', full_table_name, 'exposure validation') }} + {%- else -%} {{ elementary.no_results_query() }} - {% endif %} + {%- endif -%} {%- else -%} {{ elementary.no_results_query() }} {%- endif -%} diff --git a/macros/utils/common_test_configs.sql b/macros/utils/common_test_configs.sql index 01c051b0f..de3404771 100644 --- a/macros/utils/common_test_configs.sql +++ b/macros/utils/common_test_configs.sql @@ -340,6 +340,9 @@ }, "column_anomalies": { "description": "Column-level anomaly monitors (null_count, null_percent, zero_count, string_length, variance, etc.) on the column according to its data type." + }, + "exposure_schema_validity": { + "description": "Column level exposure validation according to the meta.columns property in exposures.yml" } } } %}