diff --git a/integration_tests/tests/test_exposure_schema_validity.py b/integration_tests/tests/test_exposure_schema_validity.py new file mode 100644 index 000000000..7b43064be --- /dev/null +++ b/integration_tests/tests/test_exposure_schema_validity.py @@ -0,0 +1,161 @@ +from datetime import date, timedelta +from typing import Any, Dict, List + +from data_generator import DATE_FORMAT, generate_dates +from dbt_project import DbtProject + +DBT_TEST_NAME = "elementary.exposure_schema_validity" + + +def test_expose_schema_validity_with_no_exposures(test_id: str, dbt_project: DbtProject): + test_result = dbt_project.test(test_id, DBT_TEST_NAME, as_model=True) + assert test_result["status"] == "pass" + +def test_expose_schema_validity_with_correct_columns_and_types(test_id: str, dbt_project: DbtProject): + DBT_TEST_ARGS = { + "node": "models.exposures_test", + "columns": [ + { + "name": "order_id", + "dtype": "string" + } + ], + "exposures": { + "ZOMG": { + "meta": { + "columns": [{ + "name": "order_id", + "data_type": "string" + }] + }, + "url": "http://bla.com", + "name": "ZOMG", + "depends_on": { + "nodes": ['models.exposures_test'] + } + } + } + } + test_result = dbt_project.test( + test_id, + DBT_TEST_NAME, + DBT_TEST_ARGS, + columns=[dict( + name="bla" + )], + as_model=True + ) + assert test_result["status"] == "pass" + +def test_expose_schema_validity_with_correct_columns_and_invalid_type(test_id: str, dbt_project: DbtProject): + DBT_TEST_ARGS = { + "node": "models.exposures_test", + "columns": [ + { + "name": "order_id", + "dtype": "numeric" + } + ], + "exposures": { + "ZOMG": { + "meta": { + "columns": [{ + "name": "order_id", + "data_type": "string" + }] + }, + "url": "http://bla.com", + "name": "ZOMG", + "depends_on": { + "nodes": ['models.exposures_test'] + } + } + } + } + test_result = dbt_project.test( + test_id, + DBT_TEST_NAME, + DBT_TEST_ARGS, + columns=[dict( + name="bla" + )], + as_model=True + ) + + assert 'different data type for the column order_id string vs numeric' in test_result['test_results_query'] + assert test_result["status"] == "fail" + + +def test_expose_schema_validity_with_correct_columns_and_missing_type(test_id: str, dbt_project: DbtProject): + DBT_TEST_ARGS = { + "node": "models.exposures_test", + "columns": [ + { + "name": "order_id", + "dtype": "numeric" + } + ], + "exposures": { + "ZOMG": { + "meta": { + "columns": [{ + "name": "order_id", + }] + }, + "url": "http://bla.com", + "name": "ZOMG", + "depends_on": { + "nodes": ['models.exposures_test'] + } + } + } + } + test_result = dbt_project.test( + test_id, + DBT_TEST_NAME, + DBT_TEST_ARGS, + columns=[dict( + name="bla" + )], + as_model=True + ) + + assert test_result["status"] == "pass" + +def test_expose_schema_validity_with_missing_columns(test_id: str, dbt_project: DbtProject): + DBT_TEST_ARGS = { + "node": "models.exposures_test", + "columns": [ + { + "name": "order", + "dtype": "numeric" + } + ], + "exposures": { + "ZOMG": { + "meta": { + "columns": [{ + "name": "order_id", + "data_type": "string" + }] + }, + "url": "http://bla.com", + "name": "ZOMG", + "depends_on": { + "nodes": ['models.exposures_test'] + } + } + } + } + test_result = dbt_project.test( + test_id, + DBT_TEST_NAME, + DBT_TEST_ARGS, + columns=[dict( + name="bla" + )], + as_model=True + ) + + assert 'order_id column missing in the model' in test_result['test_results_query'] + assert test_result["status"] == "fail" diff --git a/macros/edr/tests/test_exposure_schema_validity.sql b/macros/edr/tests/test_exposure_schema_validity.sql new file mode 100644 index 000000000..c18ff68c7 --- /dev/null +++ b/macros/edr/tests/test_exposure_schema_validity.sql @@ -0,0 +1,58 @@ +{% test exposure_schema_validity(model, exposures, node, columns) %} + {%- if not execute -%} + {%- do return(none) -%} + {%- endif -%} + + {%- set model_relation = elementary.get_model_relation_for_test(model, context["model"]) -%} + {%- set full_table_name = elementary.relation_to_full_name(model_relation) -%} + {{- elementary.test_log('start', full_table_name, 'exposure validation') -}} + + {%- set matching_exposures = [] -%} + + {%- for exposure in (exposures|default(graph.exposures)).values() -%} + {%- if node | default(context['model']['attached_node']) in exposure.depends_on.nodes and exposure['meta'] | default(none) is not none -%} + {%- do matching_exposures.append(exposure) -%} + {%- endif -%} + {%- endfor -%} + {%- set matching_exposures_len = matching_exposures | length -%} + {%- if matching_exposures_len > 0 -%} + {%- set columns_to_use = columns | default(adapter.get_columns_in_relation(model)) -%} + {%- set columns_dict = {} -%} + {%- for column in columns_to_use -%} + {%- do columns_dict.update({ column['name'] | upper : elementary.normalize_data_type(column['dtype']) }) -%} + {%- endfor -%} + {%- set invalid_exposures = [] -%} + {%- for exposure in matching_exposures -%} + {# Depend on meta since column level info is not available on exposures #} + {%- set meta = exposure['meta'] | default(none) -%} + {%- if meta != none and meta['columns'] | default(none) is iterable -%} + {%- for column in meta['columns'] -%} + {%- if column['name'] | upper not in columns_dict.keys() -%} + {%- do invalid_exposures.append({ + 'exposure': exposure['name'], + 'url': exposure['url'], + 'error': column['name'] ~ ' column missing in the model' + }) + -%} + {%- elif column['data_type'] | default('') != '' and elementary.normalize_data_type(column['data_type']) != columns_dict[column['name'] | upper] -%} + {%- do invalid_exposures.append({ + 'exposure': exposure['name'], + 'url': exposure['url'], + 'error': 'different data type for the column ' ~ column['name'] ~ ' ' ~ column['data_type'] ~ ' vs ' ~ columns_dict[column['name'] | upper] + }) + -%} + {%- endif -%} + {%- endfor -%} + {%- endif -%} + {%- endfor -%} + {%- for invalid_exposure in invalid_exposures %} + {{ 'UNION ALL ' if not loop.first }}SELECT '{{ invalid_exposure['exposure'] }}' as exposure, '{{ invalid_exposure['url'] }}' as url, '{{ invalid_exposure['error'] }}' as error + {%- endfor -%} + {{ elementary.test_log('end', full_table_name, 'exposure validation') }} + {% if invalid_exposures | length == 0 %} + {{ elementary.no_results_query() }} + {% endif %} + {%- else -%} + {{ elementary.no_results_query() }} + {%- endif -%} +{% endtest %} diff --git a/macros/edr/tests/test_utils/get_test_type.sql b/macros/edr/tests/test_utils/get_test_type.sql index 7e804e209..c537681a1 100644 --- a/macros/edr/tests/test_utils/get_test_type.sql +++ b/macros/edr/tests/test_utils/get_test_type.sql @@ -23,12 +23,15 @@ {%- set schema_changes_tests = [ 'schema_changes', 'schema_changes_from_baseline', - 'json_schema' + 'json_schema', ] %} {%- set python_tests = [ 'python', 'json_schema' - ] %} + ] %} + {%- set dbt_tests = [ + 'exposure_schema_validity' + ] %} {% if flattened_test.test_namespace == "elementary" %} {% if flattened_test.short_name | lower in anomaly_detection_tests %} @@ -37,6 +40,8 @@ {% do return("schema_change") %} {% elif flattened_test.short_name | lower in python_tests %} {% do return("python_test") %} + {% elif flattened_test.short_name | lower in dbt_tests %} + {% do return("dbt_test") %} {% endif %} {% endif %} -{% endmacro %} \ No newline at end of file +{% endmacro %}