diff --git a/.github/workflows/test-warehouse.yml b/.github/workflows/test-warehouse.yml index 883aa9eb8..cc56fd730 100644 --- a/.github/workflows/test-warehouse.yml +++ b/.github/workflows/test-warehouse.yml @@ -74,12 +74,12 @@ jobs: - name: Start Postgres if: inputs.warehouse-type == 'postgres' working-directory: ${{ env.TESTS_DIR }} - run: docker-compose up -d postgres + run: docker compose up -d postgres - name: Start Trino if: inputs.warehouse-type == 'trino' working-directory: ${{ env.TESTS_DIR }} - run: docker-compose -f docker-compose-trino.yml up -d + run: docker compose -f docker-compose-trino.yml up -d - name: Setup Python uses: actions/setup-python@v4 diff --git a/README.md b/README.md index 49fa108af..30ac2a558 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ Available as self-hosted or Cloud service with premium features. ```yml packages.yml packages: - package: elementary-data/elementary - version: 0.15.2 + version: 0.16.0 ## Docs: https://docs.elementary-data.com ``` diff --git a/dbt_project.yml b/dbt_project.yml index 8f9ccce69..1a4f71e9f 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -1,5 +1,5 @@ name: "elementary" -version: "0.15.2" +version: "0.16.0" require-dbt-version: [">=1.0.0", "<2.0.0"] diff --git a/integration_tests/README.md b/integration_tests/README.md index 44e82c847..6f1c7eb22 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -31,7 +31,15 @@ elementary_tests: pip install -r requirements.txt ``` -4. Run the tests. +4. Install elementary-data + +`elementary-data` is required for testing. Install specific version if latest doesn't fit your needs. + +```shell +pip install elementary-data +``` + +5. Run the tests. ```shell pytest -vvv -n8 diff --git a/integration_tests/tests/test_collect_metrics.py b/integration_tests/tests/test_collect_metrics.py index 8ba3e56db..336bef02b 100644 --- a/integration_tests/tests/test_collect_metrics.py +++ b/integration_tests/tests/test_collect_metrics.py @@ -9,7 +9,7 @@ TIMESTAMP_COLUMN = "updated_at" DBT_TEST_NAME = "elementary.collect_metrics" -COL_TO_METRIC_NAMES = { +COL_TO_METRIC_TYPES = { None: {"row_count"}, "id": {"average"}, "name": {"average_length"}, @@ -17,19 +17,19 @@ ("id", "name"): {"zero_count"}, # Shouldn't do anything on 'name'. } EXPECTED_COL_TO_METRIC_NAMES = { - None: {"row_count"}, - "id": {"average", "null_count", "zero_count"}, - "name": {"average_length", "null_count"}, - "updated_at": {"null_count"}, + None: {"custom_row_count"}, + "id": {"custom_average", "custom_null_count", "custom_zero_count"}, + "name": {"custom_average_length", "custom_null_count"}, + "updated_at": {"custom_null_count"}, } DBT_TEST_ARGS = { "timestamp_column": TIMESTAMP_COLUMN, "metrics": [ - {"name": metric_name, "columns": col_name} - for col_name, metric_names in COL_TO_METRIC_NAMES.items() - for metric_name in metric_names + {"type": metric_type, "name": f"custom_{metric_type}", "columns": col_name} + for col_name, metric_types in COL_TO_METRIC_TYPES.items() + for metric_type in metric_types ], } @@ -54,6 +54,7 @@ def test_collect_metrics(test_id: str, dbt_project: DbtProject): ) col_to_metric_names = defaultdict(set) for metric in metrics: + assert metric["metric_type"] is not None col_name = metric["column_name"].lower() if metric["column_name"] else None metric_name = metric["metric_name"] col_to_metric_names[col_name].add(metric_name) @@ -109,6 +110,11 @@ def test_collect_group_by_metrics(test_id: str, dbt_project: DbtProject): DBT_TEST_NAME, { **DBT_TEST_ARGS, + "metrics": [ + metric + for metric in DBT_TEST_ARGS["metrics"] + if metric["type"] == "row_count" + ], "dimensions": ["dimension"], }, data=data, @@ -116,16 +122,10 @@ def test_collect_group_by_metrics(test_id: str, dbt_project: DbtProject): assert test_result["status"] == "pass" - # Unfortunately, the dimension's metric name is 'dimension' rather than 'row_count'. - expected_col_to_metric_names = { - **EXPECTED_COL_TO_METRIC_NAMES, - "dimension": {"null_count"}, - None: {"dimension"}, - } expected_dim_to_col_to_metric_names = { - "dim1": expected_col_to_metric_names, - "dim2": expected_col_to_metric_names, - None: {None: {"dimension"}}, + "dim1": {None: {"custom_row_count"}}, + "dim2": {None: {"custom_row_count"}}, + None: {None: {"custom_row_count"}}, } metrics = dbt_project.read_table( METRICS_TABLE, @@ -141,6 +141,26 @@ def test_collect_group_by_metrics(test_id: str, dbt_project: DbtProject): assert dim_to_col_to_metric_names == expected_dim_to_col_to_metric_names +def test_collect_metrics_unique_metric_name(test_id: str, dbt_project: DbtProject): + args = DBT_TEST_ARGS.copy() + args["metrics"].append(args["metrics"][0]) + test_result = dbt_project.test( + test_id, + DBT_TEST_NAME, + args, + ) + assert test_result["status"] == "error" + + args = DBT_TEST_ARGS.copy() + args["metrics"][0].pop("name") + test_result = dbt_project.test( + test_id, + DBT_TEST_NAME, + args, + ) + assert test_result["status"] == "error" + + def test_collect_metrics_elementary_disabled(test_id: str, dbt_project: DbtProject): utc_today = datetime.utcnow().date() data: List[Dict[str, Any]] = [ diff --git a/integration_tests/tests/test_dbt_artifacts/test_artifacts.py b/integration_tests/tests/test_dbt_artifacts/test_artifacts.py index 0391dc6cb..eacdf3988 100644 --- a/integration_tests/tests/test_dbt_artifacts/test_artifacts.py +++ b/integration_tests/tests/test_dbt_artifacts/test_artifacts.py @@ -59,6 +59,38 @@ def test_dbt_invocations(dbt_project: DbtProject): ) +def test_seed_run_results(dbt_project: DbtProject): + dbt_project.read_table("seed_run_results", raise_if_empty=False) + + +def test_job_run_results(dbt_project: DbtProject): + dbt_project.read_table("job_run_results", raise_if_empty=False) + + +def test_model_run_results(dbt_project: DbtProject): + dbt_project.read_table("model_run_results", raise_if_empty=False) + + +def test_snapshot_run_results(dbt_project: DbtProject): + dbt_project.read_table("snapshot_run_results", raise_if_empty=False) + + +def test_monitors_runs(dbt_project: DbtProject): + dbt_project.read_table("monitors_runs", raise_if_empty=False) + + +def test_dbt_artifacts_hashes(dbt_project: DbtProject): + dbt_project.read_table("dbt_artifacts_hashes", raise_if_empty=False) + + +def test_anomaly_threshold_sensitivity(dbt_project: DbtProject): + dbt_project.read_table("anomaly_threshold_sensitivity", raise_if_empty=False) + + +def test_metrics_anomaly_score(dbt_project: DbtProject): + dbt_project.read_table("metrics_anomaly_score", raise_if_empty=False) + + @pytest.mark.requires_dbt_version("1.8.0") def test_source_freshness_results(test_id: str, dbt_project: DbtProject): source_config = { diff --git a/macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql b/macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql index ff82ad592..d822c5110 100644 --- a/macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql +++ b/macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql @@ -1,4 +1,4 @@ -{% macro get_anomaly_scores_query(test_metrics_table_relation, model_relation, test_configuration, monitors, column_name = none, columns_only = false, metric_properties = none, data_monitoring_metrics_table=none) %} +{% macro get_anomaly_scores_query(test_metrics_table_relation, model_relation, test_configuration, metric_names, column_name = none, columns_only = false, metric_properties = none, data_monitoring_metrics_table=none) %} {%- set model_graph_node = elementary.get_model_graph_node(model_relation) %} {%- set full_table_name = elementary.model_node_to_full_name(model_graph_node) %} {%- set test_execution_id = elementary.get_test_execution_id() %} @@ -53,6 +53,7 @@ full_table_name, column_name, metric_name, + metric_type, metric_value, source_value, bucket_start, @@ -80,7 +81,7 @@ and updated_at > {{ elementary.edr_cast_as_timestamp(elementary.edr_quote(latest_full_refresh)) }} {% endif %} and upper(full_table_name) = upper('{{ full_table_name }}') - and metric_name in {{ elementary.strings_list_to_tuple(monitors) }} + and metric_name in {{ elementary.strings_list_to_tuple(metric_names) }} {%- if column_name %} and upper(column_name) = upper('{{ column_name }}') {%- endif %} diff --git a/macros/edr/data_monitoring/data_monitors_configuration/get_buckets_configuration.sql b/macros/edr/data_monitoring/data_monitors_configuration/get_buckets_configuration.sql index d01fb6724..1d66953bb 100644 --- a/macros/edr/data_monitoring/data_monitors_configuration/get_buckets_configuration.sql +++ b/macros/edr/data_monitoring/data_monitors_configuration/get_buckets_configuration.sql @@ -20,7 +20,7 @@ {% endmacro %} -{% macro get_metric_buckets_min_and_max(model_relation, backfill_days, days_back, detection_delay=none, monitors=none, column_name=none, metric_properties=none, unit_test=false, unit_test_relation=none) %} +{% macro get_metric_buckets_min_and_max(model_relation, backfill_days, days_back, detection_delay=none, metric_names=none, column_name=none, metric_properties=none, unit_test=false, unit_test_relation=none) %} {%- set detection_end = elementary.get_detection_end(detection_delay) %} {%- set detection_end_expr = elementary.edr_cast_as_timestamp(elementary.edr_quote(detection_end)) %} @@ -29,8 +29,8 @@ {%- set full_table_name = elementary.relation_to_full_name(model_relation) %} {%- set force_metrics_backfill = elementary.get_config_var('force_metrics_backfill') %} - {%- if monitors %} - {%- set monitors_tuple = elementary.strings_list_to_tuple(monitors) %} + {%- if metric_names %} + {%- set metric_names_tuple = elementary.strings_list_to_tuple(metric_names) %} {%- endif %} {%- if unit_test %} @@ -71,8 +71,8 @@ and bucket_end <= {{ detection_end_expr }} and upper(full_table_name) = upper('{{ full_table_name }}') and metric_properties = {{ elementary.dict_to_quoted_json(metric_properties) }} - {%- if monitors %} - and metric_name in {{ monitors_tuple }} + {%- if metric_names %} + and metric_name in {{ metric_names_tuple }} {%- endif %} {%- if column_name %} and upper(column_name) = upper('{{ column_name }}') diff --git a/macros/edr/data_monitoring/monitors_query/column_monitoring_query.sql b/macros/edr/data_monitoring/monitors_query/column_monitoring_query.sql index a76a0a1f1..0f35f1f1b 100644 --- a/macros/edr/data_monitoring/monitors_query/column_monitoring_query.sql +++ b/macros/edr/data_monitoring/monitors_query/column_monitoring_query.sql @@ -1,4 +1,4 @@ -{% macro column_monitoring_query(monitored_table, monitored_table_relation, min_bucket_start, max_bucket_end, days_back, column_obj, column_monitors, metric_properties, dimensions) %} +{% macro column_monitoring_query(monitored_table, monitored_table_relation, min_bucket_start, max_bucket_end, days_back, column_obj, column_metrics, metric_properties, dimensions) %} {%- set full_table_name_str = elementary.edr_quote(elementary.relation_to_full_name(monitored_table_relation)) %} {%- set timestamp_column = metric_properties.timestamp_column %} {% set prefixed_dimensions = [] %} @@ -6,6 +6,13 @@ {% do prefixed_dimensions.append("dimension_" ~ dimension_column) %} {% endfor %} + {% set metric_types = [] %} + {% set metric_name_to_type = {} %} + {% for metric in column_metrics %} + {% do metric_types.append(metric.type) %} + {% do metric_name_to_type.update({metric.name: metric.type}) %} + {% endfor %} + with monitored_table as ( select * from {{ monitored_table }} @@ -38,9 +45,9 @@ ), {% endif %} - column_monitors as ( + column_metrics as ( - {%- if column_monitors %} + {%- if column_metrics %} {%- set column = column_obj.quoted -%} select {%- if timestamp_column %} @@ -53,26 +60,26 @@ {% if dimensions | length > 0 %} {{ elementary.select_dimensions_columns(prefixed_dimensions) }}, {% endif %} - {%- if 'null_count' in column_monitors -%} {{ elementary.null_count(column) }} {%- else -%} null {% endif %} as null_count, - {%- if 'null_percent' in column_monitors -%} {{ elementary.null_percent(column) }} {%- else -%} null {% endif %} as null_percent, - {%- if 'not_null_percent' in column_monitors -%} {{ elementary.not_null_percent(column) }} {%- else -%} null {% endif %} as not_null_percent, - {%- if 'max' in column_monitors -%} {{ elementary.max(column) }} {%- else -%} null {% endif %} as max, - {%- if 'min' in column_monitors -%} {{ elementary.min(column) }} {%- else -%} null {% endif %} as min, - {%- if 'average' in column_monitors -%} {{ elementary.average(column) }} {%- else -%} null {% endif %} as average, - {%- if 'zero_count' in column_monitors -%} {{ elementary.zero_count(column) }} {%- else -%} null {% endif %} as zero_count, - {%- if 'zero_percent' in column_monitors -%} {{ elementary.zero_percent(column) }} {%- else -%} null {% endif %} as zero_percent, - {%- if 'not_zero_percent' in column_monitors -%} {{ elementary.not_zero_percent(column) }} {%- else -%} null {% endif %} as not_zero_percent, - {%- if 'standard_deviation' in column_monitors -%} {{ elementary.standard_deviation(column) }} {%- else -%} null {% endif %} as standard_deviation, - {%- if 'variance' in column_monitors -%} {{ elementary.variance(column) }} {%- else -%} null {% endif %} as variance, - {%- if 'max_length' in column_monitors -%} {{ elementary.max_length(column) }} {%- else -%} null {% endif %} as max_length, - {%- if 'min_length' in column_monitors -%} {{ elementary.min_length(column) }} {%- else -%} null {% endif %} as min_length, - {%- if 'average_length' in column_monitors -%} {{ elementary.average_length(column) }} {%- else -%} null {% endif %} as average_length, - {%- if 'missing_count' in column_monitors -%} {{ elementary.missing_count(column) }} {%- else -%} null {% endif %} as missing_count, - {%- if 'missing_percent' in column_monitors -%} {{ elementary.missing_percent(column) }} {%- else -%} null {% endif %} as missing_percent, - {%- if 'count_true' in column_monitors -%} {{ elementary.count_true(column) }} {%- else -%} null {% endif %} as count_true, - {%- if 'count_false' in column_monitors -%} {{ elementary.count_false(column) }} {%- else -%} null {% endif %} as count_false, - {%- if 'not_missing_percent' in column_monitors -%} {{ elementary.not_missing_percent(column) }} {%- else -%} null {% endif %} as not_missing_percent, - {%- if 'sum' in column_monitors -%} {{ elementary.sum(column) }} {%- else -%} null {% endif %} as sum + {%- if 'null_count' in metric_types -%} {{ elementary.null_count(column) }} {%- else -%} null {% endif %} as null_count, + {%- if 'null_percent' in metric_types -%} {{ elementary.null_percent(column) }} {%- else -%} null {% endif %} as null_percent, + {%- if 'not_null_percent' in metric_types -%} {{ elementary.not_null_percent(column) }} {%- else -%} null {% endif %} as not_null_percent, + {%- if 'max' in metric_types -%} {{ elementary.max(column) }} {%- else -%} null {% endif %} as max, + {%- if 'min' in metric_types -%} {{ elementary.min(column) }} {%- else -%} null {% endif %} as min, + {%- if 'average' in metric_types -%} {{ elementary.average(column) }} {%- else -%} null {% endif %} as average, + {%- if 'zero_count' in metric_types -%} {{ elementary.zero_count(column) }} {%- else -%} null {% endif %} as zero_count, + {%- if 'zero_percent' in metric_types -%} {{ elementary.zero_percent(column) }} {%- else -%} null {% endif %} as zero_percent, + {%- if 'not_zero_percent' in metric_types -%} {{ elementary.not_zero_percent(column) }} {%- else -%} null {% endif %} as not_zero_percent, + {%- if 'standard_deviation' in metric_types -%} {{ elementary.standard_deviation(column) }} {%- else -%} null {% endif %} as standard_deviation, + {%- if 'variance' in metric_types -%} {{ elementary.variance(column) }} {%- else -%} null {% endif %} as variance, + {%- if 'max_length' in metric_types -%} {{ elementary.max_length(column) }} {%- else -%} null {% endif %} as max_length, + {%- if 'min_length' in metric_types -%} {{ elementary.min_length(column) }} {%- else -%} null {% endif %} as min_length, + {%- if 'average_length' in metric_types -%} {{ elementary.average_length(column) }} {%- else -%} null {% endif %} as average_length, + {%- if 'missing_count' in metric_types -%} {{ elementary.missing_count(column) }} {%- else -%} null {% endif %} as missing_count, + {%- if 'missing_percent' in metric_types -%} {{ elementary.missing_percent(column) }} {%- else -%} null {% endif %} as missing_percent, + {%- if 'count_true' in metric_types -%} {{ elementary.count_true(column) }} {%- else -%} null {% endif %} as count_true, + {%- if 'count_false' in metric_types -%} {{ elementary.count_false(column) }} {%- else -%} null {% endif %} as count_false, + {%- if 'not_missing_percent' in metric_types -%} {{ elementary.not_missing_percent(column) }} {%- else -%} null {% endif %} as not_missing_percent, + {%- if 'sum' in metric_types -%} {{ elementary.sum(column) }} {%- else -%} null {% endif %} as sum from filtered_monitored_table {%- if timestamp_column %} left join buckets on (edr_bucket_start = start_bucket_in_data) @@ -88,10 +95,10 @@ ), - column_monitors_unpivot as ( + column_metrics_unpivot as ( - {%- if column_monitors %} - {% for monitor in column_monitors %} + {%- if column_metrics %} + {% for metric_name, metric_type in metric_name_to_type.items() %} select {{ elementary.const_as_string(column_obj.name) }} as edr_column_name, bucket_start, @@ -108,13 +115,14 @@ {{ elementary.null_string() }} as dimension, {{ elementary.null_string() }} as dimension_value, {% endif %} - {{ elementary.edr_cast_as_float(monitor) }} as metric_value, - {{ elementary.edr_cast_as_string(elementary.edr_quote(monitor)) }} as metric_name - from column_monitors where {{ monitor }} is not null + {{ elementary.edr_cast_as_float(metric_type) }} as metric_value, + {{ elementary.edr_cast_as_string(elementary.edr_quote(metric_name)) }} as metric_name, + {{ elementary.edr_cast_as_string(elementary.edr_quote(metric_type)) }} as metric_type + from column_metrics where {{ metric_type }} is not null {% if not loop.last %} union all {% endif %} {%- endfor %} {%- else %} - {{ elementary.empty_table([('edr_column_name','string'),('bucket_start','timestamp'),('bucket_end','timestamp'),('bucket_duration_hours','int'),('dimension','string'),('dimension_value','string'),('metric_name','string'),('metric_value','float')]) }} + {{ elementary.empty_table([('edr_column_name','string'),('bucket_start','timestamp'),('bucket_end','timestamp'),('bucket_duration_hours','int'),('dimension','string'),('dimension_value','string'),('metric_name','string'),('metric_type','string'),('metric_value','float')]) }} {%- endif %} ), @@ -125,6 +133,7 @@ {{ elementary.edr_cast_as_string(full_table_name_str) }} as full_table_name, edr_column_name as column_name, metric_name, + metric_type, {{ elementary.edr_cast_as_float('metric_value') }} as metric_value, {{ elementary.null_string() }} as source_value, bucket_start, @@ -133,7 +142,7 @@ dimension, dimension_value, {{elementary.dict_to_quoted_json(metric_properties) }} as metric_properties - from column_monitors_unpivot + from column_metrics_unpivot ) @@ -142,6 +151,7 @@ 'full_table_name', 'column_name', 'metric_name', + 'metric_type', 'dimension', 'dimension_value', 'bucket_end', @@ -150,6 +160,7 @@ full_table_name, column_name, metric_name, + metric_type, metric_value, source_value, bucket_start, @@ -166,9 +177,6 @@ {% macro select_dimensions_columns(dimension_columns, as_prefix="") %} {% set select_statements %} {%- for column in dimension_columns -%} - {%- if col_prefix -%} - {{ col_prefix ~ "_" }} - {%- endif -%} {{ column }} {%- if as_prefix -%} {{ " as " ~ as_prefix ~ "_" ~ column }} diff --git a/macros/edr/data_monitoring/monitors_query/dimension_monitoring_query.sql b/macros/edr/data_monitoring/monitors_query/dimension_monitoring_query.sql index 2806c5fc4..c9e98a464 100644 --- a/macros/edr/data_monitoring/monitors_query/dimension_monitoring_query.sql +++ b/macros/edr/data_monitoring/monitors_query/dimension_monitoring_query.sql @@ -1,5 +1,5 @@ -{% macro dimension_monitoring_query(monitored_table, monitored_table_relation, dimensions, min_bucket_start, max_bucket_end, metric_properties) %} - {% set metric_name = 'dimension' %} +{% macro dimension_monitoring_query(monitored_table, monitored_table_relation, dimensions, min_bucket_start, max_bucket_end, metric_properties, metric_name=none) %} + {% set metric_name = metric_name or 'dimension' %} {% set full_table_name_str = elementary.edr_quote(elementary.relation_to_full_name(monitored_table_relation)) %} {% set dimensions_string = elementary.join_list(dimensions, '; ') %} {% set concat_dimensions_sql_expression = elementary.list_concat_with_separator(dimensions, '; ') %} @@ -126,6 +126,7 @@ {{ elementary.edr_cast_as_string(full_table_name_str) }} as full_table_name, {{ elementary.null_string() }} as column_name, metric_name, + {{ elementary.const_as_string("row_count") }} as metric_type, {{ elementary.edr_cast_as_float('metric_value') }} as metric_value, source_value, edr_bucket_start as bucket_start, @@ -195,6 +196,7 @@ {{ elementary.edr_cast_as_string(full_table_name_str) }} as full_table_name, {{ elementary.null_string() }} as column_name, {{ elementary.const_as_string(metric_name) }} as metric_name, + {{ elementary.const_as_string("row_count") }} as metric_type, {{ elementary.edr_cast_as_float('row_count_value') }} as metric_value, {{ elementary.null_string() }} as source_value, {{ elementary.null_timestamp() }} as bucket_start, @@ -212,6 +214,7 @@ 'full_table_name', 'column_name', 'metric_name', + 'metric_type', 'dimension', 'dimension_value', 'bucket_end', @@ -219,6 +222,7 @@ full_table_name, column_name, metric_name, + metric_type, metric_value, source_value, bucket_start, diff --git a/macros/edr/data_monitoring/monitors_query/table_monitoring_query.sql b/macros/edr/data_monitoring/monitors_query/table_monitoring_query.sql index 1b57a92ae..8a4c88af6 100644 --- a/macros/edr/data_monitoring/monitors_query/table_monitoring_query.sql +++ b/macros/edr/data_monitoring/monitors_query/table_monitoring_query.sql @@ -1,18 +1,20 @@ -{% macro table_monitoring_query(monitored_table, monitored_table_relation, min_bucket_start, max_bucket_end, table_monitors, metric_properties) %} +{% macro table_monitoring_query(monitored_table, monitored_table_relation, min_bucket_start, max_bucket_end, table_metrics, metric_properties) %} - {{ elementary.get_table_monitoring_query(monitored_table, monitored_table_relation, min_bucket_start, max_bucket_end, table_monitors, metric_properties) }} + {{ elementary.get_table_monitoring_query(monitored_table, monitored_table_relation, min_bucket_start, max_bucket_end, table_metrics, metric_properties) }} select {{ elementary.generate_surrogate_key([ 'full_table_name', 'column_name', 'metric_name', + 'metric_type', 'bucket_end', 'metric_properties' ]) }} as id, full_table_name, column_name, metric_name, + metric_type, metric_value, source_value, bucket_start, @@ -26,38 +28,33 @@ {% endmacro %} -{% macro get_table_monitoring_query(monitored_table, monitored_table_relation, min_bucket_start, max_bucket_end, table_monitors, metric_properties) %} +{% macro get_table_monitoring_query(monitored_table, monitored_table_relation, min_bucket_start, max_bucket_end, table_metrics, metric_properties) %} {%- set full_table_name_str = elementary.edr_quote(elementary.relation_to_full_name(monitored_table_relation)) %} {%- set timestamp_column = metric_properties.timestamp_column %} {%- if timestamp_column %} - {{ elementary.get_timestamp_table_query(monitored_table, metric_properties, timestamp_column, table_monitors, min_bucket_start, max_bucket_end, full_table_name_str) }} - {%- elif table_monitors == ["row_count"] %} - {{ elementary.get_no_timestamp_volume_query(monitored_table, metric_properties, full_table_name_str) }} - {%- elif table_monitors == ["event_freshness"] %} - {# Event freshness with only event_timestamp and not update_timestamp #} - {{ elementary.get_no_timestamp_event_freshness_query(monitored_table, metric_properties, full_table_name_str) }} - {%- else %} - {% do exceptions.raise_compiler_error("freshness_anomalies test is not supported without timestamp_column.") %} - {# TODO: We can enhance this test for models to use model_run_results in case a timestamp column is not defined #} - {% do return(none) %} - {%- endif %} + {{ elementary.get_timestamp_table_query(monitored_table, metric_properties, timestamp_column, table_metrics, min_bucket_start, max_bucket_end, full_table_name_str) }} + {% else %} + {{ elementary.get_no_timestamp_table_query(monitored_table, metric_properties, table_metrics, full_table_name_str) }} + {% endif %} {% endmacro %} -{% macro get_no_timestamp_volume_query(monitored_table, metric_properties, full_table_name_str) %} - with monitored_table_metrics as ( - select - {{ elementary.const_as_string('row_count') }} as metric_name, - {{ elementary.row_count() }} as metric_value - from {{ monitored_table }} +{% macro get_no_timestamp_table_query(monitored_table, metric_properties, table_metrics, full_table_name_str) %} + with monitored_table as ( + select * from {{ monitored_table }} {% if metric_properties.where_expression %} where {{ metric_properties.where_expression }} {% endif %} - group by 1 ), + + metrics as ( + {{ elementary.get_unified_metrics_query(table_metrics, metric_properties) }} + ), + metrics_final as ( select {{ elementary.edr_cast_as_string(full_table_name_str) }} as full_table_name, {{ elementary.null_string() }} as column_name, metric_name, + metric_type, {{ elementary.edr_cast_as_float('metric_value') }} as metric_value, {{ elementary.null_string() }} as source_value, {{ elementary.null_timestamp() }} as bucket_start, @@ -66,11 +63,19 @@ {{ elementary.null_string() }} as dimension, {{ elementary.null_string() }} as dimension_value, {{elementary.dict_to_quoted_json(metric_properties) }} as metric_properties - from monitored_table_metrics + from metrics ) {% endmacro %} -{% macro get_timestamp_table_query(monitored_table, metric_properties, timestamp_column, table_monitors, min_bucket_start, max_bucket_end, full_table_name_str) %} +{% macro get_no_timestamp_volume_query(metric, metric_properties) %} + select + {{ elementary.const_as_string(metric.name) }} as metric_name, + {{ elementary.const_as_string("row_count") }} as metric_type, + {{ elementary.row_count() }} as metric_value + from monitored_table +{% endmacro %} + +{% macro get_timestamp_table_query(monitored_table, metric_properties, timestamp_column, table_metrics, min_bucket_start, max_bucket_end, full_table_name_str) %} with partially_time_filtered_monitored_table as ( select {{ elementary.edr_cast_as_timestamp(timestamp_column) }} as monitored_table_timestamp_column @@ -109,8 +114,7 @@ ), metrics as ( - {{ elementary.get_unified_metrics_query(metrics=table_monitors, - metric_properties=metric_properties) }} + {{ elementary.get_unified_metrics_query(table_metrics, metric_properties) }} ), metrics_final as ( @@ -118,6 +122,7 @@ {{ elementary.edr_cast_as_string(full_table_name_str) }} as full_table_name, {{ elementary.null_string() }} as column_name, metric_name, + metric_type, {{ elementary.edr_cast_as_float('metric_value') }} as metric_value, source_value, edr_bucket_start as bucket_start, @@ -134,57 +139,62 @@ {% endmacro %} -{% macro get_unified_metrics_query(metrics, metric_properties) %} - {%- set included_monitors = {} %} - {%- for metric_name in metrics %} - {%- set metric_query = elementary.get_metric_query(metric_name, metric_properties) %} - {%- if metric_query %} - {% do included_monitors.update({metric_name: metric_query}) %} - {%- endif %} +{% macro get_unified_metrics_query(table_metrics, metric_properties) %} + {%- set metric_name_to_query = {} %} + {%- for metric in table_metrics %} + {% set metric_query = elementary.get_metric_query(metric, metric_properties) %} + {% do metric_name_to_query.update({metric.name: metric_query}) %} {%- endfor %} - {% if not included_monitors %} + {% if not metric_name_to_query %} {% if metric_properties.timestamp_column %} - {% do return(elementary.empty_table([('edr_bucket_start','timestamp'),('edr_bucket_end','timestamp'),('metric_name','string'),('source_value','string'),('metric_value','int')])) %} + {% do return(elementary.empty_table([('edr_bucket_start','timestamp'),('edr_bucket_end','timestamp'),('metric_name','string'),('metric_type','string'),('source_value','string'),('metric_value','int')])) %} {% else %} - {% do return(elementary.empty_table([('metric_name','string'),('metric_value','int')])) %} + {% do return(elementary.empty_table([('metric_name','string'),('metric_type','string'),('metric_value','int')])) %} {% endif %} {% endif %} with - {%- for metric_name, metric_query in included_monitors.items() %} + {%- for metric_name, metric_query in metric_name_to_query.items() %} {{ metric_name }} as ( {{ metric_query }} ){% if not loop.last %},{% endif %} {%- endfor %} - {%- for metric_name in included_monitors %} + {%- for metric_name in metric_name_to_query %} select * from {{ metric_name }} {% if not loop.last %} union all {% endif %} {%- endfor %} {% endmacro %} -{% macro get_metric_query(metric_name, metric_properties) %} - {%- set metrics_macro_mapping = { - "row_count": elementary.row_count_metric_query, - "freshness": elementary.freshness_metric_query, - "event_freshness": elementary.event_freshness_metric_query - } %} +{% macro get_metric_query(metric, metric_properties) %} + {% if metric_properties.timestamp_column %} + {% set metrics_macro_mapping = { + "row_count": elementary.row_count_metric_query, + "freshness": elementary.freshness_metric_query, + "event_freshness": elementary.event_freshness_metric_query + } %} + {% else %} + {% set metrics_macro_mapping = { + "row_count": elementary.get_no_timestamp_volume_query, + "event_freshness": elementary.get_no_timestamp_event_freshness_query + } %} + {% endif %} - {%- set metric_macro = metrics_macro_mapping.get(metric_name) %} + {%- set metric_macro = metrics_macro_mapping.get(metric.type) %} {%- if not metric_macro %} - {%- do return(none) %} + {%- do exceptions.raise_compiler_error("Unable to get macro query for metric '{}'".format(metric.type)) %} {%- endif %} - {%- set metric_query = metric_macro(metric_properties) %} + {%- set metric_query = metric_macro(metric, metric_properties) %} {%- if not metric_query %} - {%- do return(none) %} + {%- do exceptions.raise_compiler_error("Unable to get query for metric '{}'".format(metric.type)) %} {%- endif %} {{ metric_query }} {% endmacro %} -{% macro row_count_metric_query(metric_properties) %} +{% macro row_count_metric_query(metric, metric_properties) %} with row_count_values as ( select edr_bucket_start, edr_bucket_end, @@ -198,13 +208,14 @@ select edr_bucket_start, edr_bucket_end, - {{ elementary.const_as_string('row_count') }} as metric_name, + {{ elementary.const_as_string(metric.name) }} as metric_name, + {{ elementary.const_as_string("row_count") }} as metric_type, {{ elementary.null_string() }} as source_value, row_count_value as metric_value from row_count_values {% endmacro %} -{% macro freshness_metric_query(metric_properties) %} +{% macro freshness_metric_query(metric, metric_properties) %} -- get ordered consecutive update timestamps in the source data with unique_timestamps as ( select distinct monitored_table_timestamp_column as timestamp_val @@ -265,18 +276,20 @@ select edr_bucket_start, edr_bucket_end, - {{ elementary.const_as_string('freshness') }} as metric_name, + {{ elementary.const_as_string(metric.name) }} as metric_name, + {{ elementary.const_as_string("freshness") }} as metric_type, {{ elementary.edr_cast_as_string('update_timestamp') }} as source_value, freshness as metric_value from bucket_freshness_ranked where row_number = 1 {% endmacro %} -{% macro event_freshness_metric_query(metric_properties) %} +{% macro event_freshness_metric_query(metric, metric_properties) %} select edr_bucket_start, edr_bucket_end, - {{ elementary.const_as_string('event_freshness') }} as metric_name, + {{ elementary.const_as_string(metric.name) }} as metric_name, + {{ elementary.const_as_string("event_freshness") }} as metric_type, {{ elementary.edr_cast_as_string('max({})'.format('monitored_table_event_timestamp_column')) }} as source_value, {{ 'coalesce(max({}), {})'.format( elementary.timediff('second', elementary.edr_cast_as_timestamp('monitored_table_event_timestamp_column'), elementary.edr_cast_as_timestamp('monitored_table_timestamp_column')), @@ -286,28 +299,10 @@ group by 1,2 {% endmacro %} -{% macro get_no_timestamp_event_freshness_query(monitored_table, metric_properties, full_table_name_str) %} - with monitored_table_metrics as ( - select - {{ elementary.const_as_string('event_freshness') }} as metric_name, - {{ elementary.timediff('second', elementary.edr_cast_as_timestamp("max({})".format(metric_properties.event_timestamp_column)), elementary.edr_quote(elementary.get_run_started_at())) }} as metric_value - from {{ monitored_table }} - {% if metric_properties.where_expression %} where {{ metric_properties.where_expression }} {% endif %} - group by 1 - ), - metrics_final as ( - select - {{ elementary.edr_cast_as_string(full_table_name_str) }} as full_table_name, - {{ elementary.null_string() }} as column_name, - metric_name, - {{ elementary.edr_cast_as_float('metric_value') }} as metric_value, - {{ elementary.null_string() }} as source_value, - {{ elementary.null_timestamp() }} as bucket_start, - {{ elementary.edr_cast_as_timestamp(elementary.edr_quote(elementary.run_started_at_as_string())) }} as bucket_end, - {{ elementary.null_int() }} as bucket_duration_hours, - {{ elementary.null_string() }} as dimension, - {{ elementary.null_string() }} as dimension_value, - {{elementary.dict_to_quoted_json(metric_properties) }} as metric_properties - from monitored_table_metrics - ) -{% endmacro %} \ No newline at end of file +{% macro get_no_timestamp_event_freshness_query(metric, metric_properties) %} + select + {{ elementary.const_as_string(metric.name) }} as metric_name, + {{ elementary.const_as_string("event_freshness") }} as metric_type, + {{ elementary.timediff('second', elementary.edr_cast_as_timestamp("max({})".format(metric_properties.event_timestamp_column)), elementary.edr_quote(elementary.get_run_started_at())) }} as metric_value + from monitored_table +{% endmacro %} diff --git a/macros/edr/dbt_artifacts/upload_dbt_tests.sql b/macros/edr/dbt_artifacts/upload_dbt_tests.sql index edd9d4605..8327f90cf 100644 --- a/macros/edr/dbt_artifacts/upload_dbt_tests.sql +++ b/macros/edr/dbt_artifacts/upload_dbt_tests.sql @@ -23,6 +23,7 @@ ('error_if', 'string'), ('test_params', 'long_string'), ('test_namespace', 'string'), + ('test_original_name', 'string'), ('tags', 'long_string'), ('model_tags', 'long_string'), ('model_owners', 'long_string'), @@ -48,9 +49,10 @@ {% set test_metadata = elementary.safe_get_with_default(node_dict, 'test_metadata', {}) %} {% set test_namespace = test_metadata.get('namespace') %} + {% set test_original_name = test_metadata.get('name') %} {% set test_short_name = elementary.get_test_short_name(node_dict, test_metadata) %} - {% set default_description = elementary.get_default_description(test_short_name, test_namespace) %} + {% set default_description = elementary.get_default_description(test_original_name, test_namespace) %} {% set config_meta_dict = elementary.safe_get_with_default(config_dict, 'meta', {}) %} {% set meta_dict = {} %} @@ -148,6 +150,7 @@ 'error_if': config_dict.get('error_if'), 'test_params': test_kwargs, 'test_namespace': test_namespace, + 'test_original_name': test_original_name, 'tags': elementary.filter_none_and_sort(tags), 'model_tags': elementary.filter_none_and_sort(test_models_tags), 'model_owners': elementary.filter_none_and_sort(test_models_owners), @@ -165,7 +168,7 @@ 'compiled_code': elementary.get_compiled_code(node_dict), 'path': node_dict.get('path'), 'generated_at': elementary.datetime_now_utc_as_string(), - 'quality_dimension': meta_dict.get('quality_dimension') or elementary.get_quality_dimension(test_short_name, test_namespace) + 'quality_dimension': meta_dict.get('quality_dimension') or elementary.get_quality_dimension(test_original_name, test_namespace) }%} {% do flatten_test_metadata_dict.update({"metadata_hash": elementary.get_artifact_metadata_hash(flatten_test_metadata_dict)}) %} {{ return(flatten_test_metadata_dict) }} @@ -217,9 +220,9 @@ {% endmacro %} -{% macro get_default_description(short_name, test_namespace = none) %} +{% macro get_default_description(test_original_name, test_namespace = none) %} {% set description = none %} - {% set common_test_config = elementary.get_common_test_config_by_namespace_and_name(test_namespace, short_name) %} + {% set common_test_config = elementary.get_common_test_config_by_namespace_and_name(test_namespace, test_original_name) %} {% if common_test_config %} {% set description = common_test_config.get("description") %} {% endif %} @@ -227,9 +230,9 @@ {% endmacro %} -{% macro get_quality_dimension(short_name, test_namespace = none) %} +{% macro get_quality_dimension(test_original_name, test_namespace = none) %} {% set quality_dimension = none %} - {% set common_test_config = elementary.get_common_test_config_by_namespace_and_name(test_namespace, short_name) %} + {% set common_test_config = elementary.get_common_test_config_by_namespace_and_name(test_namespace, test_original_name) %} {% if common_test_config %} {% set quality_dimension = common_test_config.get("quality_dimension") %} {% endif %} diff --git a/macros/edr/metadata_collection/get_metric_properties.sql b/macros/edr/metadata_collection/get_metric_properties.sql index aeed5b1e5..0ddec03ec 100644 --- a/macros/edr/metadata_collection/get_metric_properties.sql +++ b/macros/edr/metadata_collection/get_metric_properties.sql @@ -5,7 +5,8 @@ time_bucket, dimensions=none, freshness_column=none, - event_timestamp_column=none + event_timestamp_column=none, + collected_by=none ) %} {% set timestamp_column = elementary.get_test_argument('timestamp_column', timestamp_column, model_graph_node) %} {% set where_expression = elementary.get_test_argument('where_expression', where_expression, model_graph_node) %} @@ -13,12 +14,16 @@ {% set freshness_column = elementary.get_test_argument('freshness_column', freshness_column, model_graph_node) %} {% set event_timestamp_column = elementary.get_test_argument('event_timestamp_column', event_timestamp_column, model_graph_node) %} {% set dimensions = elementary.get_test_argument('dimensions', dimensions, model_graph_node) %} - {% do return({ + {% set metric_props = { 'timestamp_column': timestamp_column, 'where_expression': where_expression, 'time_bucket': time_bucket, 'freshness_column': freshness_column, 'event_timestamp_column': event_timestamp_column, 'dimensions': dimensions - }) %} + } %} + {% if collected_by %} + {% do metric_props.update({'collected_by': collected_by}) %} + {% endif %} + {% do return(metric_props) %} {% endmacro %} diff --git a/macros/edr/system/system_utils/empty_table.sql b/macros/edr/system/system_utils/empty_table.sql index 4e9a35140..6adcd013d 100644 --- a/macros/edr/system/system_utils/empty_table.sql +++ b/macros/edr/system/system_utils/empty_table.sql @@ -58,6 +58,7 @@ ('full_table_name','string'), ('column_name','string'), ('metric_name','string'), + ('metric_type','string'), ('metric_value','float'), ('source_value','string'), ('bucket_start','timestamp'), diff --git a/macros/edr/tests/on_run_end/handle_tests_results.sql b/macros/edr/tests/on_run_end/handle_tests_results.sql index 4541f68bc..98e41f082 100644 --- a/macros/edr/tests/on_run_end/handle_tests_results.sql +++ b/macros/edr/tests/on_run_end/handle_tests_results.sql @@ -77,6 +77,7 @@ full_table_name, column_name, metric_name, + metric_type, metric_value, source_value, bucket_start, @@ -93,6 +94,7 @@ full_table_name, column_name, metric_name, + metric_type, metric_value, source_value, bucket_start, diff --git a/macros/edr/tests/on_run_end/union_metrics_query.sql b/macros/edr/tests/on_run_end/union_metrics_query.sql index 8fa21b003..3ea0f32ef 100644 --- a/macros/edr/tests/on_run_end/union_metrics_query.sql +++ b/macros/edr/tests/on_run_end/union_metrics_query.sql @@ -17,6 +17,7 @@ full_table_name, column_name, metric_name, + metric_type, metric_value, source_value, bucket_start, diff --git a/macros/edr/tests/test_all_columns_anomalies.sql b/macros/edr/tests/test_all_columns_anomalies.sql index 07b1375b1..dced33e3d 100644 --- a/macros/edr/tests/test_all_columns_anomalies.sql +++ b/macros/edr/tests/test_all_columns_anomalies.sql @@ -62,13 +62,19 @@ backfill_days=test_configuration.backfill_days, days_back=test_configuration.days_back, detection_delay=test_configuration.detection_delay, - monitors=column_monitors, + metric_names=column_monitors, column_name=column_name, metric_properties=metric_properties) %} {%- endif %} {{ elementary.debug_log('min_bucket_start - ' ~ min_bucket_start) }} {{ elementary.test_log('start', full_table_name, column_name) }} - {%- set column_monitoring_query = elementary.column_monitoring_query(model, model_relation, min_bucket_start, max_bucket_end, test_configuration.days_back, column_obj, column_monitors, metric_properties, dimensions) %} + + {% set metrics = [] %} + {% for monitor in column_monitors %} + {% do metrics.append({"name": monitor, "type": monitor}) %} + {% endfor %} + + {%- set column_monitoring_query = elementary.column_monitoring_query(model, model_relation, min_bucket_start, max_bucket_end, test_configuration.days_back, column_obj, metrics, metric_properties, dimensions) %} {%- do elementary.run_query(elementary.insert_as_select(temp_table_relation, column_monitoring_query)) -%} {%- else -%} {{ elementary.debug_log('column ' ~ column_name ~ ' is excluded') }} @@ -80,7 +86,7 @@ {%- set anomaly_scores_query = elementary.get_anomaly_scores_query(test_metrics_table_relation=temp_table_relation, model_relation=model_relation, test_configuration=test_configuration, - monitors=all_columns_monitors, + metric_names=all_columns_monitors, columns_only=true, metric_properties=metric_properties) %} diff --git a/macros/edr/tests/test_collect_metrics.sql b/macros/edr/tests/test_collect_metrics.sql index 59d348aa0..12ceb4523 100644 --- a/macros/edr/tests/test_collect_metrics.sql +++ b/macros/edr/tests/test_collect_metrics.sql @@ -6,7 +6,8 @@ days_back=64, backfill_days=none, where_expression=none, - dimensions=none + dimensions=none, + cloud_monitored=false ) %} {{ config( @@ -18,6 +19,7 @@ {% do return(elementary.no_results_query()) %} {% endif %} + {% do elementary.validate_unique_metric_names(metrics) %} {% do elementary.debug_log("Metrics: {}".format(metrics)) %} {% if not dimensions %} @@ -36,11 +38,11 @@ {% set col_to_metrics = {} %} {% for metric in metrics %} {% if metric.get("columns") %} - {% if metric.name not in available_col_monitors %} - {% if metric.name in available_table_monitors %} - {% do exceptions.raise_compiler_error("The metric '{}' is a table metric and shouldn't receive 'columns' argument.".format(metric.name)) %} + {% if metric.type not in available_col_monitors %} + {% if metric.type in available_table_monitors %} + {% do exceptions.raise_compiler_error("The metric '{}' is a table metric and shouldn't receive 'columns' argument.".format(metric.type)) %} {% endif %} - {% do exceptions.raise_compiler_error("Unsupported column metric: '{}'.".format(metric.name)) %} + {% do exceptions.raise_compiler_error("Unsupported column metric: '{}'.".format(metric.type)) %} {% endif %} {% if metric.columns is string %} @@ -60,11 +62,11 @@ {% do exceptions.raise_compiler_error("Unexpected value provided for 'columns' argument.") %} {% endif %} {% else %} - {% if metric.name not in available_table_monitors %} - {% if metric.name in available_col_monitors %} - {% do exceptions.raise_compiler_error("The metric '{}' is a column metric and should receive 'columns' argument.".format(metric.name)) %} + {% if metric.type not in available_table_monitors %} + {% if metric.type in available_col_monitors %} + {% do exceptions.raise_compiler_error("The metric '{}' is a column metric and should receive 'columns' argument.".format(metric.type)) %} {% endif %} - {% do exceptions.raise_compiler_error("Unsupported table metric: '{}'.".format(metric.name)) %} + {% do exceptions.raise_compiler_error("Unsupported table metric: '{}'.".format(metric.type)) %} {% endif %} {% do table_metrics.append(metric) %} @@ -72,11 +74,11 @@ {% endfor %} {% if table_metrics %} - {% do elementary.collect_table_metrics(table_metrics, model, model_relation, timestamp_column, time_bucket, days_back, backfill_days, where_expression, dimensions) %} + {% do elementary.collect_table_metrics(table_metrics, model, model_relation, timestamp_column, time_bucket, days_back, backfill_days, where_expression, dimensions, collected_by="collect_metrics") %} {% endif %} {% for col_name, col_metrics in col_to_metrics.items() %} - {% do elementary.collect_column_metrics(col_metrics, model, model_relation, col_name, timestamp_column, time_bucket, days_back, backfill_days, where_expression, dimensions) %} + {% do elementary.collect_column_metrics(col_metrics, model, model_relation, col_name, timestamp_column, time_bucket, days_back, backfill_days, where_expression, dimensions, collected_by="collect_metrics") %} {% endfor %} {# This test always passes. #} diff --git a/macros/edr/tests/test_column_anomalies.sql b/macros/edr/tests/test_column_anomalies.sql index 2836e7d54..5a54a3d6b 100644 --- a/macros/edr/tests/test_column_anomalies.sql +++ b/macros/edr/tests/test_column_anomalies.sql @@ -56,10 +56,16 @@ backfill_days=test_configuration.backfill_days, days_back=test_configuration.days_back, detection_delay=test_configuration.detection_delay, - monitors=column_monitors, + metric_names=column_monitors, column_name=column_name, metric_properties=metric_properties) %} {%- endif %} + + {% set metrics = [] %} + {% for monitor in column_monitors %} + {% do metrics.append({"name": monitor, "type": monitor}) %} + {% endfor %} + {{ elementary.debug_log('min_bucket_start - ' ~ min_bucket_start) }} {#- execute table monitors and write to temp test table -#} {{ elementary.test_log('start', full_table_name, column_name) }} @@ -69,7 +75,7 @@ max_bucket_end, test_configuration.days_back, column_obj, - column_monitors, + metrics, metric_properties, dimensions) %} {{ elementary.debug_log('column_monitoring_query - \n' ~ column_monitoring_query) }} @@ -79,7 +85,7 @@ {% set anomaly_scores_query = elementary.get_anomaly_scores_query(test_metrics_table_relation=temp_table_relation, model_relation=model_relation, test_configuration=test_configuration, - monitors=column_monitors, + metric_names=column_monitors, column_name=column_name, metric_properties=metric_properties ) %} diff --git a/macros/edr/tests/test_dimension_anomalies.sql b/macros/edr/tests/test_dimension_anomalies.sql index e715973ab..fab1e15f4 100644 --- a/macros/edr/tests/test_dimension_anomalies.sql +++ b/macros/edr/tests/test_dimension_anomalies.sql @@ -65,7 +65,7 @@ {% set anomaly_scores_query = elementary.get_anomaly_scores_query(test_metrics_table_relation=temp_table_relation, model_relation=model_relation, test_configuration=test_configuration, - monitors=['dimension'], + metric_names=['dimension'], metric_properties=metric_properties) %} {{ elementary.debug_log('dimension monitors anomaly scores query - \n' ~ anomaly_scores_query) }} diff --git a/macros/edr/tests/test_table_anomalies.sql b/macros/edr/tests/test_table_anomalies.sql index aed6cd6b0..5e51bce58 100644 --- a/macros/edr/tests/test_table_anomalies.sql +++ b/macros/edr/tests/test_table_anomalies.sql @@ -50,18 +50,24 @@ backfill_days=test_configuration.backfill_days, days_back=test_configuration.days_back, detection_delay=test_configuration.detection_delay, - monitors=table_monitors, + metric_names=table_monitors, metric_properties=metric_properties) %} {%- endif %} {{ elementary.debug_log('min_bucket_start: ' ~ min_bucket_start ~ ' | max_bucket_end: ' ~ max_bucket_end ) }} {#- execute table monitors and write to temp test table -#} {{ elementary.test_log('start', full_table_name) }} + + {% set table_metrics = [] %} + {% for table_monitor in table_monitors %} + {% do table_metrics.append({"type": table_monitor, "name": table_monitor}) %} + {% endfor %} + {%- set table_monitoring_query = elementary.table_monitoring_query(model, model_relation, min_bucket_start, max_bucket_end, - table_monitors, + table_metrics, metric_properties=metric_properties) %} {{ elementary.debug_log('table_monitoring_query - \n' ~ table_monitoring_query) }} {% set temp_table_relation = elementary.create_elementary_test_table(database_name, tests_schema_name, test_table_name, 'metrics', table_monitoring_query) %} @@ -71,7 +77,7 @@ model_relation, test_configuration=test_configuration, metric_properties=metric_properties, - monitors=table_monitors) %} + metric_names=table_monitors) %} {{ elementary.debug_log('table monitors anomaly scores query - \n' ~ anomaly_scores_query) }} {% set anomaly_scores_test_table_relation = elementary.create_elementary_test_table(database_name, tests_schema_name, test_table_name, 'anomaly_scores', anomaly_scores_query) %} diff --git a/macros/edr/tests/test_utils/collect_column_metrics.sql b/macros/edr/tests/test_utils/collect_column_metrics.sql index edb345bef..638b3de00 100644 --- a/macros/edr/tests/test_utils/collect_column_metrics.sql +++ b/macros/edr/tests/test_utils/collect_column_metrics.sql @@ -8,35 +8,49 @@ days_back, backfill_days, where_expression, - dimensions + dimensions, + collected_by=none ) %} {% set model_graph_node = elementary.get_model_graph_node(model_relation) %} - {% set metric_props = elementary.get_metric_properties(model_graph_node, timestamp_column, where_expression, time_bucket, dimensions) %} + {% set metric_props = elementary.get_metric_properties(model_graph_node, timestamp_column, where_expression, time_bucket, dimensions, collected_by=collected_by) %} {% set days_back = elementary.get_test_argument('days_back', days_back, model_graph_node) %} {% set backfill_days = elementary.get_test_argument('backfill_days', backfill_days, model_graph_node) %} {% set metric_names = [] %} + {% set metric_types = [] %} {% for metric in column_metrics %} {% do metric_names.append(metric.name) %} + {% do metric_types.append(metric.type) %} {% endfor %} - {% set column_obj_and_monitors = elementary.get_column_obj_and_monitors(model_relation, column_name, monitors=metric_names) %} + {% set column_obj_and_monitors = elementary.get_column_obj_and_monitors(model_relation, column_name, monitors=metric_types) %} {% if not column_obj_and_monitors %} {% do exceptions.raise_compiler_error("Unable to find column `{}` in `{}`".format(column_name, model_relation)) %} {% endif %} {% set column_monitors = column_obj_and_monitors.monitors %} + {% set column_obj = column_obj_and_monitors.column %} {% if not column_monitors %} {% do return(none) %} {% endif %} - {% set column_obj = column_obj_and_monitors.column %} + {% set filtered_column_metrics = [] %} + {% for metric in column_metrics %} + {% if metric.type in column_monitors %} + {% do filtered_column_metrics.append(metric) %} + {% endif %} + {% endfor %} + + {% set metric_names = [] %} + {% for metric in column_metrics %} + {% do metric_names.append(metric.name) %} + {% endfor %} {% if metric_props.timestamp_column %} {% set min_bucket_start, max_bucket_end = elementary.get_metric_buckets_min_and_max( model_relation=model_relation, backfill_days=backfill_days, days_back=days_back, - monitors=column_monitors, + metric_names=metric_names, column_name=column_name, metric_properties=metric_props ) %} @@ -49,7 +63,7 @@ max_bucket_end, days_back, column_obj, - column_monitors, + filtered_column_metrics, metric_props, dimensions ) %} diff --git a/macros/edr/tests/test_utils/collect_table_metrics.sql b/macros/edr/tests/test_utils/collect_table_metrics.sql index f3069b88a..e09981e26 100644 --- a/macros/edr/tests/test_utils/collect_table_metrics.sql +++ b/macros/edr/tests/test_utils/collect_table_metrics.sql @@ -7,10 +7,11 @@ days_back, backfill_days, where_expression, - dimensions + dimensions, + collected_by=none ) %} {% set model_graph_node = elementary.get_model_graph_node(model_relation) %} - {% set metric_props = elementary.get_metric_properties(model_graph_node, timestamp_column, where_expression, time_bucket, dimensions) %} + {% set metric_props = elementary.get_metric_properties(model_graph_node, timestamp_column, where_expression, time_bucket, dimensions, collected_by=collected_by) %} {% set days_back = elementary.get_test_argument('days_back', days_back, model_graph_node) %} {% set backfill_days = elementary.get_test_argument('backfill_days', backfill_days, model_graph_node) %} @@ -18,34 +19,35 @@ {% for metric in table_metrics %} {% do metric_names.append(metric.name) %} {% endfor %} - {% set table_monitors = elementary.get_final_table_monitors(monitors=metric_names) %} - {% if not table_monitors %} - {% do return(none) %} - {% endif %} - - {% if dimensions and table_monitors != ["row_count"] %} - {% do exceptions.raise_compiler_error("collect_metrics test does not support non row_count dimensional table metrics.") %} - {% endif %} {% if metric_props.timestamp_column %} {% set min_bucket_start, max_bucket_end = elementary.get_metric_buckets_min_and_max( model_relation=model_relation, backfill_days=backfill_days, days_back=days_back, - monitors=table_monitors, + metric_names=metric_names, metric_properties=metric_props ) %} {% endif %} {% if dimensions %} + {% if table_metrics | length != 1 %} + {% do exceptions.raise_compiler_error("collect_metrics test with 'dimensions' expects a single 'row_count' metric.") %} + {% endif %} + {% set dim_metric = table_metrics[0] %} + {% if dim_metric.type != "row_count" %} + {% do exceptions.raise_compiler_error("collect_metrics test does not support non-'row_count' dimensional table metrics.") %} + {% endif %} + {% set monitoring_query = elementary.dimension_monitoring_query( model_expr, model_relation, dimensions, min_bucket_start, max_bucket_end, - metric_properties=metric_props + metric_properties=metric_props, + metric_name=dim_metric.name ) %} {% else %} {% set monitoring_query = elementary.table_monitoring_query( @@ -53,7 +55,7 @@ model_relation, min_bucket_start, max_bucket_end, - table_monitors, + table_metrics, metric_properties=metric_props ) %} {% endif %} diff --git a/macros/edr/tests/test_utils/validate_unique_metric_names.sql b/macros/edr/tests/test_utils/validate_unique_metric_names.sql new file mode 100644 index 000000000..135e1fc06 --- /dev/null +++ b/macros/edr/tests/test_utils/validate_unique_metric_names.sql @@ -0,0 +1,31 @@ +{% macro validate_unique_metric_names(metrics) %} + {% set metric_names = [] %} + {% for metric in metrics %} + {% if not metric.name %} + {% do exceptions.raise_compiler_error("The 'name' argument is required for each metric.") %} + {% endif %} + {% if metric.name in metric_names %} + {% do exceptions.raise_compiler_error("The metric '{}' is already defined.".format(metric.name)) %} + {% endif %} + {% do metric_names.append(metric.name) %} + {% endfor %} + + {% set test_node = context["model"] %} + {% set parent_model_unique_ids = elementary.get_parent_model_unique_ids_from_test_node(test_node) %} + + {% for graph_node in graph.nodes.values() %} + {% if test_node.unique_id != graph_node.unique_id and graph_node.resource_type == "test" %} + {% set test_metadata = elementary.safe_get_with_default(graph_node, 'test_metadata', {}) %} + {% if test_metadata.namespace == "elementary" and test_metadata.name == "collect_metrics" %} + {% set test_parent_model_unique_ids = elementary.get_parent_model_unique_ids_from_test_node(graph_node) %} + {% if parent_model_unique_ids == test_parent_model_unique_ids %} + {% for metric in test_metadata.kwargs.metrics %} + {% if metric.name in metric_names %} + {% do exceptions.raise_compiler_error("The metric '{}' is already defined.".format(metric.name)) %} + {% endif %} + {% endfor %} + {% endif %} + {% endif %} + {% endif %} + {% endfor %} +{% endmacro %} diff --git a/macros/utils/common_test_configs.sql b/macros/utils/common_test_configs.sql index aaa9713eb..22eac6cae 100644 --- a/macros/utils/common_test_configs.sql +++ b/macros/utils/common_test_configs.sql @@ -1,5 +1,5 @@ {% macro get_common_test_config(flattened_test) %} - {% set test_name = flattened_test["short_name"] %} + {% set test_name = flattened_test["test_original_name"] %} {% set test_namespace = flattened_test["test_namespace"] %} {% do return(elementary.get_common_test_config_by_namespace_and_name(test_namespace, test_name)) %} {% endmacro %} diff --git a/macros/utils/graph/cache.sql b/macros/utils/graph/cache.sql new file mode 100644 index 000000000..e20b97b8e --- /dev/null +++ b/macros/utils/graph/cache.sql @@ -0,0 +1,11 @@ +{% macro set_cache(entry, val) %} + {% do graph.setdefault("elementary", {}).update({entry: val}) %} +{% endmacro %} + +{% macro get_cache(entry, default=none) %} + {% do return(graph.setdefault("elementary", {}).get(entry, default)) %} +{% endmacro %} + +{% macro setdefault_cache(entry, default=none) %} + {% do return(graph.setdefault("elementary", {}).setdefault(entry, default)) %} +{% endmacro %} diff --git a/macros/utils/graph/get_cache.sql b/macros/utils/graph/get_cache.sql deleted file mode 100644 index 5c972243f..000000000 --- a/macros/utils/graph/get_cache.sql +++ /dev/null @@ -1,6 +0,0 @@ -{% macro get_cache(entry, default=none) %} - {% if execute %} - {{ return(graph.get("elementary", {}).get(entry, default)) }} - {% endif %} - {{ return(none) }} -{% endmacro %} diff --git a/macros/utils/graph/set_cache.sql b/macros/utils/graph/set_cache.sql index b924a41dc..e69de29bb 100644 --- a/macros/utils/graph/set_cache.sql +++ b/macros/utils/graph/set_cache.sql @@ -1,5 +0,0 @@ -{% macro set_cache(entry, val) %} - {% if execute %} - {% do graph.get("elementary", {}).update({entry: val}) %} - {% endif %} -{% endmacro %} diff --git a/models/edr/run_results/seed_run_results.sql b/models/edr/run_results/seed_run_results.sql new file mode 100644 index 000000000..60f9e651d --- /dev/null +++ b/models/edr/run_results/seed_run_results.sql @@ -0,0 +1,44 @@ +{{ + config( + materialized = 'view', + bind=False + ) +}} + +with dbt_run_results as ( + select * from {{ ref('dbt_run_results') }} +), + +dbt_seeds as ( + select * from {{ ref('dbt_seeds') }} +) + +SELECT + run_results.model_execution_id, + run_results.unique_id, + run_results.invocation_id, + run_results.query_id, + run_results.name, + run_results.generated_at, + run_results.status, + run_results.full_refresh, + run_results.message, + run_results.execution_time, + run_results.execute_started_at, + run_results.execute_completed_at, + run_results.compile_started_at, + run_results.compile_completed_at, + run_results.compiled_code, + run_results.adapter_response, + run_results.thread_id, + seeds.database_name, + seeds.schema_name, + run_results.materialization, + seeds.tags, + seeds.package_name, + seeds.path, + seeds.original_path, + seeds.owner, + seeds.alias +FROM dbt_run_results run_results +JOIN dbt_seeds seeds ON run_results.unique_id = seeds.unique_id