Skip to content

Commit

Permalink
Add generic test for exposure schema validation
Browse files Browse the repository at this point in the history
  • Loading branch information
erikzaadi committed Sep 11, 2023
1 parent 2ea1c6f commit 7463179
Show file tree
Hide file tree
Showing 3 changed files with 227 additions and 3 deletions.
161 changes: 161 additions & 0 deletions integration_tests/tests/test_exposure_schema_validity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
from datetime import date, timedelta
from typing import Any, Dict, List

from data_generator import DATE_FORMAT, generate_dates
from dbt_project import DbtProject

DBT_TEST_NAME = "elementary.exposure_schema_validity"


def test_expose_schema_validity_with_no_exposures(test_id: str, dbt_project: DbtProject):
test_result = dbt_project.test(test_id, DBT_TEST_NAME, as_model=True)
assert test_result["status"] == "pass"

def test_expose_schema_validity_with_correct_columns_and_types(test_id: str, dbt_project: DbtProject):
DBT_TEST_ARGS = {
"node": "models.exposures_test",
"columns": [
{
"name": "order_id",
"dtype": "string"
}
],
"exposures": {
"ZOMG": {
"meta": {
"columns": [{
"name": "order_id",
"data_type": "string"
}]
},
"url": "http://bla.com",
"name": "ZOMG",
"depends_on": {
"nodes": ['models.exposures_test']
}
}
}
}
test_result = dbt_project.test(
test_id,
DBT_TEST_NAME,
DBT_TEST_ARGS,
columns=[dict(
name="bla"
)],
as_model=True
)
assert test_result["status"] == "pass"

def test_expose_schema_validity_with_correct_columns_and_invalid_type(test_id: str, dbt_project: DbtProject):
DBT_TEST_ARGS = {
"node": "models.exposures_test",
"columns": [
{
"name": "order_id",
"dtype": "numeric"
}
],
"exposures": {
"ZOMG": {
"meta": {
"columns": [{
"name": "order_id",
"data_type": "string"
}]
},
"url": "http://bla.com",
"name": "ZOMG",
"depends_on": {
"nodes": ['models.exposures_test']
}
}
}
}
test_result = dbt_project.test(
test_id,
DBT_TEST_NAME,
DBT_TEST_ARGS,
columns=[dict(
name="bla"
)],
as_model=True
)

assert 'different data type for the column order_id string vs numeric' in test_result['test_results_query']
assert test_result["status"] == "fail"


def test_expose_schema_validity_with_correct_columns_and_missing_type(test_id: str, dbt_project: DbtProject):
DBT_TEST_ARGS = {
"node": "models.exposures_test",
"columns": [
{
"name": "order_id",
"dtype": "numeric"
}
],
"exposures": {
"ZOMG": {
"meta": {
"columns": [{
"name": "order_id",
}]
},
"url": "http://bla.com",
"name": "ZOMG",
"depends_on": {
"nodes": ['models.exposures_test']
}
}
}
}
test_result = dbt_project.test(
test_id,
DBT_TEST_NAME,
DBT_TEST_ARGS,
columns=[dict(
name="bla"
)],
as_model=True
)

assert test_result["status"] == "pass"

def test_expose_schema_validity_with_missing_columns(test_id: str, dbt_project: DbtProject):
DBT_TEST_ARGS = {
"node": "models.exposures_test",
"columns": [
{
"name": "order",
"dtype": "numeric"
}
],
"exposures": {
"ZOMG": {
"meta": {
"columns": [{
"name": "order_id",
"data_type": "string"
}]
},
"url": "http://bla.com",
"name": "ZOMG",
"depends_on": {
"nodes": ['models.exposures_test']
}
}
}
}
test_result = dbt_project.test(
test_id,
DBT_TEST_NAME,
DBT_TEST_ARGS,
columns=[dict(
name="bla"
)],
as_model=True
)

assert 'order_id column missing in the model' in test_result['test_results_query']
assert test_result["status"] == "fail"
58 changes: 58 additions & 0 deletions macros/edr/tests/test_exposure_schema_validity.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{% test exposure_schema_validity(model, exposures, node, columns) %}
{%- if not execute -%}
{%- do return(none) -%}
{%- endif -%}

{%- set model_relation = elementary.get_model_relation_for_test(model, context["model"]) -%}
{%- set full_table_name = elementary.relation_to_full_name(model_relation) -%}
{{- elementary.test_log('start', full_table_name, 'exposure validation') -}}

{%- set matching_exposures = [] -%}

{%- for exposure in (exposures|default(graph.exposures)).values() -%}
{%- if node | default(context['model']['attached_node']) in exposure.depends_on.nodes and exposure['meta'] | default(none) is not none -%}
{%- do matching_exposures.append(exposure) -%}
{%- endif -%}
{%- endfor -%}
{%- set matching_exposures_len = matching_exposures | length -%}
{%- if matching_exposures_len > 0 -%}
{%- set columns_to_use = columns | default(adapter.get_columns_in_relation(model)) -%}
{%- set columns_dict = {} -%}
{%- for column in columns_to_use -%}
{%- do columns_dict.update({ column['name'] | upper : elementary.normalize_data_type(column['dtype']) }) -%}
{%- endfor -%}
{%- set invalid_exposures = [] -%}
{%- for exposure in matching_exposures -%}
{# Depend on meta since column level info is not available on exposures #}
{%- set meta = exposure['meta'] | default(none) -%}
{%- if meta != none and meta['columns'] | default(none) is iterable -%}
{%- for column in meta['columns'] -%}
{%- if column['name'] | upper not in columns_dict.keys() -%}
{%- do invalid_exposures.append({
'exposure': exposure['name'],
'url': exposure['url'],
'error': column['name'] ~ ' column missing in the model'
})
-%}
{%- elif column['data_type'] | default('') != '' and elementary.normalize_data_type(column['data_type']) != columns_dict[column['name'] | upper] -%}
{%- do invalid_exposures.append({
'exposure': exposure['name'],
'url': exposure['url'],
'error': 'different data type for the column ' ~ column['name'] ~ ' ' ~ column['data_type'] ~ ' vs ' ~ columns_dict[column['name'] | upper]
})
-%}
{%- endif -%}
{%- endfor -%}
{%- endif -%}
{%- endfor -%}
{%- for invalid_exposure in invalid_exposures %}
{{ 'UNION ALL ' if not loop.first }}SELECT '{{ invalid_exposure['exposure'] }}' as exposure, '{{ invalid_exposure['url'] }}' as url, '{{ invalid_exposure['error'] }}' as error
{%- endfor -%}
{{ elementary.test_log('end', full_table_name, 'exposure validation') }}
{% if invalid_exposures | length == 0 %}
{{ elementary.no_results_query() }}
{% endif %}
{%- else -%}
{{ elementary.no_results_query() }}
{%- endif -%}
{% endtest %}
11 changes: 8 additions & 3 deletions macros/edr/tests/test_utils/get_test_type.sql
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,15 @@
{%- set schema_changes_tests = [
'schema_changes',
'schema_changes_from_baseline',
'json_schema'
'json_schema',
] %}
{%- set python_tests = [
'python',
'json_schema'
] %}
] %}
{%- set dbt_tests = [
'exposure_schema_validity'
] %}

{% if flattened_test.test_namespace == "elementary" %}
{% if flattened_test.short_name | lower in anomaly_detection_tests %}
Expand All @@ -37,6 +40,8 @@
{% do return("schema_change") %}
{% elif flattened_test.short_name | lower in python_tests %}
{% do return("python_test") %}
{% elif flattened_test.short_name | lower in dbt_tests %}
{% do return("dbt_test") %}
{% endif %}
{% endif %}
{% endmacro %}
{% endmacro %}

0 comments on commit 7463179

Please sign in to comment.