Skip to content

Commit

Permalink
[SQUASH] With column level source origin
Browse files Browse the repository at this point in the history
  • Loading branch information
erikzaadi committed Sep 14, 2023
1 parent b755b67 commit aa7f9d7
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 126 deletions.
147 changes: 47 additions & 100 deletions integration_tests/tests/test_exposure_schema_validity.py
Original file line number Diff line number Diff line change
@@ -1,161 +1,108 @@
from datetime import date, timedelta
from typing import Any, Dict, List

from data_generator import DATE_FORMAT, generate_dates
from dbt_project import DbtProject

DBT_TEST_NAME = "elementary.exposure_schema_validity"


def test_expose_schema_validity_with_no_exposures(test_id: str, dbt_project: DbtProject):
def test_expose_schema_validity_with_no_exposures(
test_id: str, dbt_project: DbtProject
):
test_result = dbt_project.test(test_id, DBT_TEST_NAME, as_model=True)
assert test_result["status"] == "pass"

def test_expose_schema_validity_with_correct_columns_and_types(test_id: str, dbt_project: DbtProject):

def test_expose_schema_validity_with_correct_columns_and_types(
test_id: str, dbt_project: DbtProject
):
DBT_TEST_ARGS = {
"node": "models.exposures_test",
"columns": [
{
"name": "order_id",
"dtype": "string"
}
],
"columns": [{"name": "order_id", "dtype": "string"}],
"exposures": {
"ZOMG": {
"meta": {
"columns": [{
"name": "order_id",
"data_type": "string"
}]
},
"meta": {"columns": [{"name": "order_id", "data_type": "string"}]},
"url": "http://bla.com",
"name": "ZOMG",
"depends_on": {
"nodes": ['models.exposures_test']
}
"depends_on": {"nodes": ["models.exposures_test"]},
}
}
},
}
test_result = dbt_project.test(
test_id,
DBT_TEST_NAME,
DBT_TEST_ARGS,
columns=[dict(
name="bla"
)],
as_model=True
test_id, DBT_TEST_NAME, DBT_TEST_ARGS, columns=[dict(name="bla")], as_model=True
)
assert test_result["status"] == "pass"

def test_expose_schema_validity_with_correct_columns_and_invalid_type(test_id: str, dbt_project: DbtProject):

def test_expose_schema_validity_with_correct_columns_and_invalid_type(
test_id: str, dbt_project: DbtProject
):
DBT_TEST_ARGS = {
"node": "models.exposures_test",
"columns": [
{
"name": "order_id",
"dtype": "numeric"
}
],
"columns": [{"name": "order_id", "dtype": "numeric"}],
"exposures": {
"ZOMG": {
"meta": {
"columns": [{
"name": "order_id",
"data_type": "string"
}]
},
"meta": {"columns": [{"name": "order_id", "data_type": "string"}]},
"url": "http://bla.com",
"name": "ZOMG",
"depends_on": {
"nodes": ['models.exposures_test']
}
"depends_on": {"nodes": ["models.exposures_test"]},
}
}
},
}
test_result = dbt_project.test(
test_id,
DBT_TEST_NAME,
DBT_TEST_ARGS,
columns=[dict(
name="bla"
)],
as_model=True
test_id, DBT_TEST_NAME, DBT_TEST_ARGS, columns=[dict(name="bla")], as_model=True
)

assert 'different data type for the column order_id string vs numeric' in test_result['test_results_query']
assert (
"different data type for the column order_id string vs"
in test_result["test_results_query"]
)
assert test_result["status"] == "fail"


def test_expose_schema_validity_with_correct_columns_and_missing_type(test_id: str, dbt_project: DbtProject):
def test_expose_schema_validity_with_correct_columns_and_missing_type(
test_id: str, dbt_project: DbtProject
):
DBT_TEST_ARGS = {
"node": "models.exposures_test",
"columns": [
{
"name": "order_id",
"dtype": "numeric"
}
],
"columns": [{"name": "order_id", "dtype": "numeric"}],
"exposures": {
"ZOMG": {
"meta": {
"columns": [{
"name": "order_id",
}]
"columns": [
{
"name": "order_id",
}
]
},
"url": "http://bla.com",
"name": "ZOMG",
"depends_on": {
"nodes": ['models.exposures_test']
}
"depends_on": {"nodes": ["models.exposures_test"]},
}
}
},
}
test_result = dbt_project.test(
test_id,
DBT_TEST_NAME,
DBT_TEST_ARGS,
columns=[dict(
name="bla"
)],
as_model=True
test_id, DBT_TEST_NAME, DBT_TEST_ARGS, columns=[dict(name="bla")], as_model=True
)

assert test_result["status"] == "pass"

def test_expose_schema_validity_with_missing_columns(test_id: str, dbt_project: DbtProject):

def test_expose_schema_validity_with_missing_columns(
test_id: str, dbt_project: DbtProject
):
DBT_TEST_ARGS = {
"node": "models.exposures_test",
"columns": [
{
"name": "order",
"dtype": "numeric"
}
],
"columns": [{"name": "order", "dtype": "numeric"}],
"exposures": {
"ZOMG": {
"meta": {
"columns": [{
"name": "order_id",
"data_type": "string"
}]
},
"meta": {"columns": [{"name": "order_id", "data_type": "string"}]},
"url": "http://bla.com",
"name": "ZOMG",
"depends_on": {
"nodes": ['models.exposures_test']
}
"depends_on": {"nodes": ["models.exposures_test"]},
}
}
},
}
test_result = dbt_project.test(
test_id,
DBT_TEST_NAME,
DBT_TEST_ARGS,
columns=[dict(
name="bla"
)],
as_model=True
test_id, DBT_TEST_NAME, DBT_TEST_ARGS, columns=[dict(name="bla")], as_model=True
)

assert 'order_id column missing in the model' in test_result['test_results_query']
assert "order_id column missing in the model" in test_result["test_results_query"]
assert test_result["status"] == "fail"
58 changes: 32 additions & 26 deletions macros/edr/tests/test_exposure_schema_validity.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,25 @@
{%- do return(none) -%}
{%- endif -%}

{# Parameters used only for dependency injection in integration tests #}
{%- set node = node | default(context['model']['attached_node']) -%}
{%- set exposures = ( exposures | default(graph.exposures) ).values() -%}
{%- set columns = columns | default(adapter.get_columns_in_relation(model)) -%}

{%- set model_relation = elementary.get_model_relation_for_test(model, context["model"]) -%}
{%- set full_table_name = elementary.relation_to_full_name(model_relation) -%}
{{- elementary.test_log('start', full_table_name, 'exposure validation') -}}

{%- set matching_exposures = [] -%}

{%- for exposure in (exposures|default(graph.exposures)).values() -%}
{%- if node | default(context['model']['attached_node']) in exposure.depends_on.nodes and exposure['meta'] | default(none) is not none -%}
{%- for exposure in exposures -%}
{%- if node in exposure.depends_on.nodes and exposure['meta'] | default(none) is not none -%}
{%- do matching_exposures.append(exposure) -%}
{%- endif -%}
{%- endfor -%}
{%- set matching_exposures_len = matching_exposures | length -%}
{%- if matching_exposures_len > 0 -%}
{%- set columns_to_use = columns | default(adapter.get_columns_in_relation(model)) -%}
{%- if matching_exposures | length > 0 -%}
{%- set columns_dict = {} -%}
{%- for column in columns_to_use -%}
{%- for column in columns -%}
{%- do columns_dict.update({ column['name'] | upper : elementary.normalize_data_type(column['dtype']) }) -%}
{%- endfor -%}
{%- set invalid_exposures = [] -%}
Expand All @@ -27,31 +30,34 @@
{%- set meta = exposure['meta'] | default(none) -%}
{%- if meta != none and meta['columns'] | default(none) is iterable -%}
{%- for column in meta['columns'] -%}
{%- if column['name'] | upper not in columns_dict.keys() -%}
{%- do invalid_exposures.append({
'exposure': exposure['name'],
'url': exposure['url'],
'error': column['name'] ~ ' column missing in the model'
})
-%}
{%- elif column['data_type'] | default('') != '' and elementary.normalize_data_type(column['data_type']) != columns_dict[column['name'] | upper] -%}
{%- do invalid_exposures.append({
'exposure': exposure['name'],
'url': exposure['url'],
'error': 'different data type for the column ' ~ column['name'] ~ ' ' ~ column['data_type'] ~ ' vs ' ~ columns_dict[column['name'] | upper]
})
-%}
{%- if matching_exposures | length == 1 or context['render'](column['source']) | default('') == node -%}
{%- if column['name'] | upper not in columns_dict.keys() -%}
{%- do invalid_exposures.append({
'exposure': exposure['name'],
'url': exposure['url'],
'error': column['name'] ~ ' column missing in the model'
})
-%}
{%- elif column['data_type'] | default('') != '' and column['data_type'] != columns_dict[column['name'] | upper] -%}
{%- do invalid_exposures.append({
'exposure': exposure['name'],
'url': exposure['url'],
'error': 'different data type for the column ' ~ column['name'] ~ ' ' ~ column['data_type'] ~ ' vs ' ~ columns_dict[column['name'] | upper]
})
-%}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- endif -%}
{%- endfor -%}
{%- for invalid_exposure in invalid_exposures %}
{{ 'UNION ALL ' if not loop.first }}SELECT '{{ invalid_exposure['exposure'] }}' as exposure, '{{ invalid_exposure['url'] }}' as url, '{{ invalid_exposure['error'] }}' as error
{%- endfor -%}
{{ elementary.test_log('end', full_table_name, 'exposure validation') }}
{% if invalid_exposures | length == 0 %}
{%- if invalid_exposures | length > 0 -%}
{%- for invalid_exposure in invalid_exposures %}
{{ 'UNION ALL ' if not loop.first }}SELECT '{{ invalid_exposure['exposure'] }}' as exposure, '{{ invalid_exposure['url'] }}' as url, '{{ invalid_exposure['error'] }}' as error
{%- endfor -%}
{{ elementary.test_log('end', full_table_name, 'exposure validation') }}
{%- else -%}
{{ elementary.no_results_query() }}
{% endif %}
{%- endif -%}
{%- else -%}
{{ elementary.no_results_query() }}
{%- endif -%}
Expand Down
3 changes: 3 additions & 0 deletions macros/utils/common_test_configs.sql
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,9 @@
},
"column_anomalies": {
"description": "Column-level anomaly monitors (null_count, null_percent, zero_count, string_length, variance, etc.) on the column according to its data type."
},
"exposure_schema_validity": {
"description": "Column level exposure validation according to the meta.columns property in exposures.yml"
}
}
} %}
Expand Down

0 comments on commit aa7f9d7

Please sign in to comment.