Add generic test for exposure schema validation

elementary-data · Sep 11, 2023 · 7463179 · 7463179
1 parent 2ea1c6f
commit 7463179
Show file tree

Hide file tree

Showing 3 changed files with 227 additions and 3 deletions.
diff --git a/integration_tests/tests/test_exposure_schema_validity.py b/integration_tests/tests/test_exposure_schema_validity.py
@@ -0,0 +1,161 @@
+from datetime import date, timedelta
+from typing import Any, Dict, List
+
+from data_generator import DATE_FORMAT, generate_dates
+from dbt_project import DbtProject
+
+DBT_TEST_NAME = "elementary.exposure_schema_validity"
+
+
+def test_expose_schema_validity_with_no_exposures(test_id: str, dbt_project: DbtProject):
+    test_result = dbt_project.test(test_id, DBT_TEST_NAME, as_model=True)
+    assert test_result["status"] == "pass"
+
+def test_expose_schema_validity_with_correct_columns_and_types(test_id: str, dbt_project: DbtProject):
+    DBT_TEST_ARGS = {
+        "node": "models.exposures_test",
+        "columns": [
+            {
+                "name": "order_id",
+                "dtype": "string"
+            }
+        ],
+        "exposures": {
+            "ZOMG": {
+                "meta": {
+                    "columns": [{
+                        "name": "order_id",
+                        "data_type": "string"
+                    }]
+                },
+                "url": "http://bla.com",
+                "name": "ZOMG",
+                "depends_on": {
+                    "nodes": ['models.exposures_test']
+                }
+            }
+        }
+    }
+    test_result = dbt_project.test(
+        test_id,
+        DBT_TEST_NAME,
+        DBT_TEST_ARGS,
+        columns=[dict(
+            name="bla"
+        )],
+        as_model=True
+    )
+    assert test_result["status"] == "pass"
+
+def test_expose_schema_validity_with_correct_columns_and_invalid_type(test_id: str, dbt_project: DbtProject):
+    DBT_TEST_ARGS = {
+        "node": "models.exposures_test",
+        "columns": [
+            {
+                "name": "order_id",
+                "dtype": "numeric"
+            }
+        ],
+        "exposures": {
+            "ZOMG": {
+                "meta": {
+                    "columns": [{
+                        "name": "order_id",
+                        "data_type": "string"
+                    }]
+                },
+                "url": "http://bla.com",
+                "name": "ZOMG",
+                "depends_on": {
+                    "nodes": ['models.exposures_test']
+                }
+            }
+        }
+    }
+    test_result = dbt_project.test(
+        test_id,
+        DBT_TEST_NAME,
+        DBT_TEST_ARGS,
+        columns=[dict(
+            name="bla"
+        )],
+        as_model=True
+    )
+
+    assert 'different data type for the column order_id string vs numeric' in test_result['test_results_query']
+    assert test_result["status"] == "fail"
+
+
+def test_expose_schema_validity_with_correct_columns_and_missing_type(test_id: str, dbt_project: DbtProject):
+    DBT_TEST_ARGS = {
+        "node": "models.exposures_test",
+        "columns": [
+            {
+                "name": "order_id",
+                "dtype": "numeric"
+            }
+        ],
+        "exposures": {
+            "ZOMG": {
+                "meta": {
+                    "columns": [{
+                        "name": "order_id",
+                    }]
+                },
+                "url": "http://bla.com",
+                "name": "ZOMG",
+                "depends_on": {
+                    "nodes": ['models.exposures_test']
+                }
+            }
+        }
+    }
+    test_result = dbt_project.test(
+        test_id,
+        DBT_TEST_NAME,
+        DBT_TEST_ARGS,
+        columns=[dict(
+            name="bla"
+        )],
+        as_model=True
+    )
+
+    assert test_result["status"] == "pass"
+
+def test_expose_schema_validity_with_missing_columns(test_id: str, dbt_project: DbtProject):
+    DBT_TEST_ARGS = {
+        "node": "models.exposures_test",
+        "columns": [
+            {
+                "name": "order",
+                "dtype": "numeric"
+            }
+        ],
+        "exposures": {
+            "ZOMG": {
+                "meta": {
+                    "columns": [{
+                        "name": "order_id",
+                        "data_type": "string"
+                    }]
+                },
+                "url": "http://bla.com",
+                "name": "ZOMG",
+                "depends_on": {
+                    "nodes": ['models.exposures_test']
+                }
+            }
+        }
+    }
+    test_result = dbt_project.test(
+        test_id,
+        DBT_TEST_NAME,
+        DBT_TEST_ARGS,
+        columns=[dict(
+            name="bla"
+        )],
+        as_model=True
+    )
+
+    assert 'order_id column missing in the model' in test_result['test_results_query']
+    assert test_result["status"] == "fail"
diff --git a/macros/edr/tests/test_exposure_schema_validity.sql b/macros/edr/tests/test_exposure_schema_validity.sql
@@ -0,0 +1,58 @@
+{% test exposure_schema_validity(model, exposures, node, columns) %}
+    {%- if not execute -%}
+        {%- do return(none) -%}
+    {%- endif -%}
+
+    {%- set model_relation = elementary.get_model_relation_for_test(model, context["model"]) -%}
+    {%- set full_table_name = elementary.relation_to_full_name(model_relation) -%}
+    {{- elementary.test_log('start', full_table_name, 'exposure validation') -}}
+
+    {%- set matching_exposures = [] -%}
+
+    {%- for exposure in (exposures|default(graph.exposures)).values() -%}
+        {%- if node | default(context['model']['attached_node']) in exposure.depends_on.nodes and exposure['meta'] | default(none) is not none -%}
+            {%- do matching_exposures.append(exposure) -%}
+        {%- endif -%}
+    {%- endfor -%}
+    {%- set matching_exposures_len = matching_exposures | length -%}
+    {%- if matching_exposures_len > 0 -%}
+        {%- set columns_to_use = columns | default(adapter.get_columns_in_relation(model)) -%}
+        {%- set columns_dict = {} -%}
+        {%- for column in columns_to_use -%}
+            {%- do columns_dict.update({ column['name'] | upper : elementary.normalize_data_type(column['dtype']) }) -%}
+        {%- endfor -%}
+        {%- set invalid_exposures = [] -%}
+        {%- for exposure in matching_exposures -%}
+            {# Depend on meta since column level info is not available on exposures #}
+            {%- set meta = exposure['meta'] | default(none) -%}
+            {%- if meta != none and meta['columns'] | default(none) is iterable -%}
+                {%- for column in meta['columns'] -%}
+                    {%- if column['name'] | upper not in columns_dict.keys() -%}
+                        {%- do invalid_exposures.append({
+                                'exposure': exposure['name'],
+                                'url': exposure['url'],
+                                'error': column['name'] ~ ' column missing in the model'
+                                })
+                        -%}
+                    {%- elif column['data_type'] | default('') != '' and elementary.normalize_data_type(column['data_type']) != columns_dict[column['name'] | upper] -%}
+                        {%- do invalid_exposures.append({
+                                'exposure': exposure['name'],
+                                'url': exposure['url'],
+                                'error': 'different data type for the column ' ~ column['name'] ~ ' ' ~ column['data_type'] ~ ' vs ' ~ columns_dict[column['name'] | upper]
+                                })
+                        -%}
+                    {%- endif -%}
+                {%- endfor -%}
+            {%- endif -%}
+        {%- endfor -%}
+        {%- for invalid_exposure in invalid_exposures %}
+            {{ 'UNION ALL ' if not loop.first }}SELECT '{{ invalid_exposure['exposure'] }}' as exposure, '{{ invalid_exposure['url'] }}' as url, '{{ invalid_exposure['error'] }}' as error
+        {%- endfor -%}
+        {{ elementary.test_log('end', full_table_name, 'exposure validation') }}
+        {% if invalid_exposures | length == 0 %}
+            {{ elementary.no_results_query() }}
+        {% endif %}
+    {%- else -%}
+    {{ elementary.no_results_query() }}
+    {%- endif -%}
+{% endtest %}
diff --git a/macros/edr/tests/test_utils/get_test_type.sql b/macros/edr/tests/test_utils/get_test_type.sql
@@ -23,12 +23,15 @@
     {%- set schema_changes_tests = [
         'schema_changes',
         'schema_changes_from_baseline',
-        'json_schema'
+        'json_schema',
     ] %}
     {%- set python_tests = [
         'python',
         'json_schema'
-    ]   %}
+    ] %}
+    {%- set dbt_tests = [
+        'exposure_schema_validity'
+    ] %}
 
   {% if flattened_test.test_namespace == "elementary" %}
     {% if flattened_test.short_name | lower in anomaly_detection_tests %}
@@ -37,6 +40,8 @@
       {% do return("schema_change") %}
     {% elif flattened_test.short_name | lower in python_tests %}
         {% do return("python_test") %}
+    {% elif flattened_test.short_name | lower in dbt_tests %}
+        {% do return("dbt_test") %}
     {% endif %}
   {% endif %}
-{% endmacro %}
+{% endmacro %}