Skip to content

Commit

Permalink
Added a required 'name' argument to 'metrics'.
Browse files Browse the repository at this point in the history
  • Loading branch information
elongl committed Jul 22, 2024
1 parent 260841f commit 6bcd126
Show file tree
Hide file tree
Showing 13 changed files with 187 additions and 162 deletions.
22 changes: 10 additions & 12 deletions integration_tests/tests/test_collect_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,27 @@
TIMESTAMP_COLUMN = "updated_at"
DBT_TEST_NAME = "elementary.collect_metrics"

COL_TO_METRIC_NAMES = {
COL_TO_METRIC_TYPES = {
None: {"row_count"},
"id": {"average"},
"name": {"average_length"},
"*": {"null_count"},
("id", "name"): {"zero_count"}, # Shouldn't do anything on 'name'.
}
EXPECTED_COL_TO_METRIC_NAMES = {
None: {"row_count"},
"id": {"average", "null_count", "zero_count"},
"name": {"average_length", "null_count"},
"updated_at": {"null_count"},
None: {"custom_row_count"},
"id": {"custom_average", "custom_null_count", "custom_zero_count"},
"name": {"custom_average_length", "custom_null_count"},
"updated_at": {"custom_null_count"},
}


DBT_TEST_ARGS = {
"timestamp_column": TIMESTAMP_COLUMN,
"metrics": [
{"name": metric_name, "columns": col_name}
for col_name, metric_names in COL_TO_METRIC_NAMES.items()
for metric_name in metric_names
{"type": metric_type, "name": f"custom_{metric_type}", "columns": col_name}
for col_name, metric_types in COL_TO_METRIC_TYPES.items()
for metric_type in metric_types
],
}

Expand Down Expand Up @@ -116,16 +116,14 @@ def test_collect_group_by_metrics(test_id: str, dbt_project: DbtProject):

assert test_result["status"] == "pass"

# Unfortunately, the dimension's metric name is 'dimension' rather than 'row_count'.
expected_col_to_metric_names = {
**EXPECTED_COL_TO_METRIC_NAMES,
"dimension": {"null_count"},
None: {"dimension"},
"dimension": {"custom_null_count"},
}
expected_dim_to_col_to_metric_names = {
"dim1": expected_col_to_metric_names,
"dim2": expected_col_to_metric_names,
None: {None: {"dimension"}},
None: {None: {"custom_row_count"}},
}
metrics = dbt_project.read_table(
METRICS_TABLE,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% macro get_anomaly_scores_query(test_metrics_table_relation, model_relation, test_configuration, monitors, column_name = none, columns_only = false, metric_properties = none, data_monitoring_metrics_table=none) %}
{% macro get_anomaly_scores_query(test_metrics_table_relation, model_relation, test_configuration, metric_names, column_name = none, columns_only = false, metric_properties = none, data_monitoring_metrics_table=none) %}
{%- set model_graph_node = elementary.get_model_graph_node(model_relation) %}
{%- set full_table_name = elementary.model_node_to_full_name(model_graph_node) %}
{%- set test_execution_id = elementary.get_test_execution_id() %}
Expand Down Expand Up @@ -80,7 +80,7 @@
and updated_at > {{ elementary.edr_cast_as_timestamp(elementary.edr_quote(latest_full_refresh)) }}
{% endif %}
and upper(full_table_name) = upper('{{ full_table_name }}')
and metric_name in {{ elementary.strings_list_to_tuple(monitors) }}
and metric_name in {{ elementary.strings_list_to_tuple(metric_names) }}
{%- if column_name %}
and upper(column_name) = upper('{{ column_name }}')
{%- endif %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
{% endmacro %}


{% macro get_metric_buckets_min_and_max(model_relation, backfill_days, days_back, detection_delay=none, monitors=none, column_name=none, metric_properties=none, unit_test=false, unit_test_relation=none) %}
{% macro get_metric_buckets_min_and_max(model_relation, backfill_days, days_back, detection_delay=none, metric_names=none, column_name=none, metric_properties=none, unit_test=false, unit_test_relation=none) %}

{%- set detection_end = elementary.get_detection_end(detection_delay) %}
{%- set detection_end_expr = elementary.edr_cast_as_timestamp(elementary.edr_quote(detection_end)) %}
Expand All @@ -29,8 +29,8 @@
{%- set full_table_name = elementary.relation_to_full_name(model_relation) %}
{%- set force_metrics_backfill = elementary.get_config_var('force_metrics_backfill') %}

{%- if monitors %}
{%- set monitors_tuple = elementary.strings_list_to_tuple(monitors) %}
{%- if metric_names %}
{%- set metric_names_tuple = elementary.strings_list_to_tuple(metric_names) %}
{%- endif %}

{%- if unit_test %}
Expand Down Expand Up @@ -71,8 +71,8 @@
and bucket_end <= {{ detection_end_expr }}
and upper(full_table_name) = upper('{{ full_table_name }}')
and metric_properties = {{ elementary.dict_to_quoted_json(metric_properties) }}
{%- if monitors %}
and metric_name in {{ monitors_tuple }}
{%- if metric_names %}
and metric_name in {{ metric_names_tuple }}
{%- endif %}
{%- if column_name %}
and upper(column_name) = upper('{{ column_name }}')
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
{% macro column_monitoring_query(monitored_table, monitored_table_relation, min_bucket_start, max_bucket_end, days_back, column_obj, column_monitors, metric_properties, dimensions) %}
{% macro column_monitoring_query(monitored_table, monitored_table_relation, min_bucket_start, max_bucket_end, days_back, column_obj, column_metrics, metric_properties, dimensions) %}
{%- set full_table_name_str = elementary.edr_quote(elementary.relation_to_full_name(monitored_table_relation)) %}
{%- set timestamp_column = metric_properties.timestamp_column %}
{% set prefixed_dimensions = [] %}
{% for dimension_column in dimensions %}
{% do prefixed_dimensions.append("dimension_" ~ dimension_column) %}
{% endfor %}

{% set metric_types = [] %}
{% set metric_name_to_type = {} %}
{% for metric in column_metrics %}
{% do metric_types.append(metric.type) %}
{% do metric_name_to_type.update({metric.name: metric.type}) %}
{% endfor %}


with monitored_table as (
select * from {{ monitored_table }}
Expand Down Expand Up @@ -38,9 +45,9 @@
),
{% endif %}

column_monitors as (
column_metrics as (

{%- if column_monitors %}
{%- if column_metrics %}
{%- set column = column_obj.quoted -%}
select
{%- if timestamp_column %}
Expand All @@ -53,26 +60,26 @@
{% if dimensions | length > 0 %}
{{ elementary.select_dimensions_columns(prefixed_dimensions) }},
{% endif %}
{%- if 'null_count' in column_monitors -%} {{ elementary.null_count(column) }} {%- else -%} null {% endif %} as null_count,
{%- if 'null_percent' in column_monitors -%} {{ elementary.null_percent(column) }} {%- else -%} null {% endif %} as null_percent,
{%- if 'not_null_percent' in column_monitors -%} {{ elementary.not_null_percent(column) }} {%- else -%} null {% endif %} as not_null_percent,
{%- if 'max' in column_monitors -%} {{ elementary.max(column) }} {%- else -%} null {% endif %} as max,
{%- if 'min' in column_monitors -%} {{ elementary.min(column) }} {%- else -%} null {% endif %} as min,
{%- if 'average' in column_monitors -%} {{ elementary.average(column) }} {%- else -%} null {% endif %} as average,
{%- if 'zero_count' in column_monitors -%} {{ elementary.zero_count(column) }} {%- else -%} null {% endif %} as zero_count,
{%- if 'zero_percent' in column_monitors -%} {{ elementary.zero_percent(column) }} {%- else -%} null {% endif %} as zero_percent,
{%- if 'not_zero_percent' in column_monitors -%} {{ elementary.not_zero_percent(column) }} {%- else -%} null {% endif %} as not_zero_percent,
{%- if 'standard_deviation' in column_monitors -%} {{ elementary.standard_deviation(column) }} {%- else -%} null {% endif %} as standard_deviation,
{%- if 'variance' in column_monitors -%} {{ elementary.variance(column) }} {%- else -%} null {% endif %} as variance,
{%- if 'max_length' in column_monitors -%} {{ elementary.max_length(column) }} {%- else -%} null {% endif %} as max_length,
{%- if 'min_length' in column_monitors -%} {{ elementary.min_length(column) }} {%- else -%} null {% endif %} as min_length,
{%- if 'average_length' in column_monitors -%} {{ elementary.average_length(column) }} {%- else -%} null {% endif %} as average_length,
{%- if 'missing_count' in column_monitors -%} {{ elementary.missing_count(column) }} {%- else -%} null {% endif %} as missing_count,
{%- if 'missing_percent' in column_monitors -%} {{ elementary.missing_percent(column) }} {%- else -%} null {% endif %} as missing_percent,
{%- if 'count_true' in column_monitors -%} {{ elementary.count_true(column) }} {%- else -%} null {% endif %} as count_true,
{%- if 'count_false' in column_monitors -%} {{ elementary.count_false(column) }} {%- else -%} null {% endif %} as count_false,
{%- if 'not_missing_percent' in column_monitors -%} {{ elementary.not_missing_percent(column) }} {%- else -%} null {% endif %} as not_missing_percent,
{%- if 'sum' in column_monitors -%} {{ elementary.sum(column) }} {%- else -%} null {% endif %} as sum
{%- if 'null_count' in metric_types -%} {{ elementary.null_count(column) }} {%- else -%} null {% endif %} as null_count,
{%- if 'null_percent' in metric_types -%} {{ elementary.null_percent(column) }} {%- else -%} null {% endif %} as null_percent,
{%- if 'not_null_percent' in metric_types -%} {{ elementary.not_null_percent(column) }} {%- else -%} null {% endif %} as not_null_percent,
{%- if 'max' in metric_types -%} {{ elementary.max(column) }} {%- else -%} null {% endif %} as max,
{%- if 'min' in metric_types -%} {{ elementary.min(column) }} {%- else -%} null {% endif %} as min,
{%- if 'average' in metric_types -%} {{ elementary.average(column) }} {%- else -%} null {% endif %} as average,
{%- if 'zero_count' in metric_types -%} {{ elementary.zero_count(column) }} {%- else -%} null {% endif %} as zero_count,
{%- if 'zero_percent' in metric_types -%} {{ elementary.zero_percent(column) }} {%- else -%} null {% endif %} as zero_percent,
{%- if 'not_zero_percent' in metric_types -%} {{ elementary.not_zero_percent(column) }} {%- else -%} null {% endif %} as not_zero_percent,
{%- if 'standard_deviation' in metric_types -%} {{ elementary.standard_deviation(column) }} {%- else -%} null {% endif %} as standard_deviation,
{%- if 'variance' in metric_types -%} {{ elementary.variance(column) }} {%- else -%} null {% endif %} as variance,
{%- if 'max_length' in metric_types -%} {{ elementary.max_length(column) }} {%- else -%} null {% endif %} as max_length,
{%- if 'min_length' in metric_types -%} {{ elementary.min_length(column) }} {%- else -%} null {% endif %} as min_length,
{%- if 'average_length' in metric_types -%} {{ elementary.average_length(column) }} {%- else -%} null {% endif %} as average_length,
{%- if 'missing_count' in metric_types -%} {{ elementary.missing_count(column) }} {%- else -%} null {% endif %} as missing_count,
{%- if 'missing_percent' in metric_types -%} {{ elementary.missing_percent(column) }} {%- else -%} null {% endif %} as missing_percent,
{%- if 'count_true' in metric_types -%} {{ elementary.count_true(column) }} {%- else -%} null {% endif %} as count_true,
{%- if 'count_false' in metric_types -%} {{ elementary.count_false(column) }} {%- else -%} null {% endif %} as count_false,
{%- if 'not_missing_percent' in metric_types -%} {{ elementary.not_missing_percent(column) }} {%- else -%} null {% endif %} as not_missing_percent,
{%- if 'sum' in metric_types -%} {{ elementary.sum(column) }} {%- else -%} null {% endif %} as sum
from filtered_monitored_table
{%- if timestamp_column %}
left join buckets on (edr_bucket_start = start_bucket_in_data)
Expand All @@ -83,15 +90,15 @@
group by 1,2
{% endif %}
{%- else %}
{{ elementary.empty_column_monitors_cte() }}
{{ elementary.empty_column_metrics_cte() }}
{%- endif %}

),

column_monitors_unpivot as (
column_metrics_unpivot as (

{%- if column_monitors %}
{% for monitor in column_monitors %}
{%- if column_metrics %}
{% for metric_name, metric_type in metric_name_to_type.items() %}
select
{{ elementary.const_as_string(column_obj.name) }} as edr_column_name,
bucket_start,
Expand All @@ -108,9 +115,9 @@
{{ elementary.null_string() }} as dimension,
{{ elementary.null_string() }} as dimension_value,
{% endif %}
{{ elementary.edr_cast_as_float(monitor) }} as metric_value,
{{ elementary.edr_cast_as_string(elementary.edr_quote(monitor)) }} as metric_name
from column_monitors where {{ monitor }} is not null
{{ elementary.edr_cast_as_float(metric_type) }} as metric_value,
{{ elementary.edr_cast_as_string(elementary.edr_quote(metric_name)) }} as metric_name
from column_metrics where {{ metric_type }} is not null
{% if not loop.last %} union all {% endif %}
{%- endfor %}
{%- else %}
Expand All @@ -133,7 +140,7 @@
dimension,
dimension_value,
{{elementary.dict_to_quoted_json(metric_properties) }} as metric_properties
from column_monitors_unpivot
from column_metrics_unpivot

)

Expand Down Expand Up @@ -166,8 +173,8 @@
{% macro select_dimensions_columns(dimension_columns, as_prefix="") %}
{% set select_statements %}
{%- for column in dimension_columns -%}
{%- if col_prefix -%}
{{ col_prefix ~ "_" }}
{%- if as_prefix -%}
{{ as_prefix ~ "_" }}
{%- endif -%}
{{ column }}
{%- if as_prefix -%}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% macro dimension_monitoring_query(monitored_table, monitored_table_relation, dimensions, min_bucket_start, max_bucket_end, metric_properties) %}
{% set metric_name = 'dimension' %}
{% macro dimension_monitoring_query(monitored_table, monitored_table_relation, dimensions, min_bucket_start, max_bucket_end, metric_properties, metric_name=none) %}
{% set metric_name = metric_name or 'dimension' %}
{% set full_table_name_str = elementary.edr_quote(elementary.relation_to_full_name(monitored_table_relation)) %}
{% set dimensions_string = elementary.join_list(dimensions, '; ') %}
{% set concat_dimensions_sql_expression = elementary.list_concat_with_separator(dimensions, '; ') %}
Expand Down
Loading

0 comments on commit 6bcd126

Please sign in to comment.