Added a required 'name' argument to 'metrics'.

elementary-data · Jul 22, 2024 · 6bcd126 · 6bcd126
1 parent 260841f
commit 6bcd126
Show file tree

Hide file tree

Showing 13 changed files with 187 additions and 162 deletions.
diff --git a/integration_tests/tests/test_collect_metrics.py b/integration_tests/tests/test_collect_metrics.py
@@ -9,27 +9,27 @@
 TIMESTAMP_COLUMN = "updated_at"
 DBT_TEST_NAME = "elementary.collect_metrics"
 
-COL_TO_METRIC_NAMES = {
+COL_TO_METRIC_TYPES = {
     None: {"row_count"},
     "id": {"average"},
     "name": {"average_length"},
     "*": {"null_count"},
     ("id", "name"): {"zero_count"},  # Shouldn't do anything on 'name'.
 }
 EXPECTED_COL_TO_METRIC_NAMES = {
-    None: {"row_count"},
-    "id": {"average", "null_count", "zero_count"},
-    "name": {"average_length", "null_count"},
-    "updated_at": {"null_count"},
+    None: {"custom_row_count"},
+    "id": {"custom_average", "custom_null_count", "custom_zero_count"},
+    "name": {"custom_average_length", "custom_null_count"},
+    "updated_at": {"custom_null_count"},
 }
 
 
 DBT_TEST_ARGS = {
     "timestamp_column": TIMESTAMP_COLUMN,
     "metrics": [
-        {"name": metric_name, "columns": col_name}
-        for col_name, metric_names in COL_TO_METRIC_NAMES.items()
-        for metric_name in metric_names
+        {"type": metric_type, "name": f"custom_{metric_type}", "columns": col_name}
+        for col_name, metric_types in COL_TO_METRIC_TYPES.items()
+        for metric_type in metric_types
     ],
 }
 
@@ -116,16 +116,14 @@ def test_collect_group_by_metrics(test_id: str, dbt_project: DbtProject):
 
     assert test_result["status"] == "pass"
 
-    # Unfortunately, the dimension's metric name is 'dimension' rather than 'row_count'.
     expected_col_to_metric_names = {
         **EXPECTED_COL_TO_METRIC_NAMES,
-        "dimension": {"null_count"},
-        None: {"dimension"},
+        "dimension": {"custom_null_count"},
     }
     expected_dim_to_col_to_metric_names = {
         "dim1": expected_col_to_metric_names,
         "dim2": expected_col_to_metric_names,
-        None: {None: {"dimension"}},
+        None: {None: {"custom_row_count"}},
     }
     metrics = dbt_project.read_table(
         METRICS_TABLE,

diff --git a/macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql b/macros/edr/data_monitoring/anomaly_detection/get_anomaly_scores_query.sql
@@ -1,4 +1,4 @@
-{% macro get_anomaly_scores_query(test_metrics_table_relation, model_relation, test_configuration, monitors, column_name = none, columns_only = false, metric_properties = none, data_monitoring_metrics_table=none) %}
+{% macro get_anomaly_scores_query(test_metrics_table_relation, model_relation, test_configuration, metric_names, column_name = none, columns_only = false, metric_properties = none, data_monitoring_metrics_table=none) %}
     {%- set model_graph_node = elementary.get_model_graph_node(model_relation) %}
     {%- set full_table_name = elementary.model_node_to_full_name(model_graph_node) %}
     {%- set test_execution_id = elementary.get_test_execution_id() %}
@@ -80,7 +80,7 @@
                     and updated_at > {{ elementary.edr_cast_as_timestamp(elementary.edr_quote(latest_full_refresh)) }}
                 {% endif %}
                 and upper(full_table_name) = upper('{{ full_table_name }}')
-                and metric_name in {{ elementary.strings_list_to_tuple(monitors) }}
+                and metric_name in {{ elementary.strings_list_to_tuple(metric_names) }}
                 {%- if column_name %}
                     and upper(column_name) = upper('{{ column_name }}')
                 {%- endif %}

diff --git a/macros/edr/data_monitoring/data_monitors_configuration/get_buckets_configuration.sql b/macros/edr/data_monitoring/data_monitors_configuration/get_buckets_configuration.sql
@@ -20,7 +20,7 @@
 {% endmacro %}
 
 
-{% macro get_metric_buckets_min_and_max(model_relation, backfill_days, days_back, detection_delay=none, monitors=none, column_name=none, metric_properties=none, unit_test=false, unit_test_relation=none) %}
+{% macro get_metric_buckets_min_and_max(model_relation, backfill_days, days_back, detection_delay=none, metric_names=none, column_name=none, metric_properties=none, unit_test=false, unit_test_relation=none) %}
 
     {%- set detection_end = elementary.get_detection_end(detection_delay) %}
     {%- set detection_end_expr = elementary.edr_cast_as_timestamp(elementary.edr_quote(detection_end)) %}
@@ -29,8 +29,8 @@
     {%- set full_table_name = elementary.relation_to_full_name(model_relation) %}
     {%- set force_metrics_backfill = elementary.get_config_var('force_metrics_backfill') %}
 
-    {%- if monitors %}
-        {%- set monitors_tuple = elementary.strings_list_to_tuple(monitors) %}
+    {%- if metric_names %}
+        {%- set metric_names_tuple = elementary.strings_list_to_tuple(metric_names) %}
     {%- endif %}
 
     {%- if unit_test %}
@@ -71,8 +71,8 @@
             and bucket_end <= {{ detection_end_expr }}
             and upper(full_table_name) = upper('{{ full_table_name }}')
             and metric_properties = {{ elementary.dict_to_quoted_json(metric_properties) }}
-            {%- if monitors %}
-            and metric_name in {{ monitors_tuple }}
+            {%- if metric_names %}
+            and metric_name in {{ metric_names_tuple }}
             {%- endif %}
             {%- if column_name %}
             and upper(column_name) = upper('{{ column_name }}')

diff --git a/macros/edr/data_monitoring/monitors_query/column_monitoring_query.sql b/macros/edr/data_monitoring/monitors_query/column_monitoring_query.sql
@@ -1,11 +1,18 @@
-{% macro column_monitoring_query(monitored_table, monitored_table_relation, min_bucket_start, max_bucket_end, days_back, column_obj, column_monitors, metric_properties, dimensions) %}
+{% macro column_monitoring_query(monitored_table, monitored_table_relation, min_bucket_start, max_bucket_end, days_back, column_obj, column_metrics, metric_properties, dimensions) %}
     {%- set full_table_name_str = elementary.edr_quote(elementary.relation_to_full_name(monitored_table_relation)) %}
     {%- set timestamp_column = metric_properties.timestamp_column %}
     {% set prefixed_dimensions = [] %}
     {% for dimension_column in dimensions %}
       {% do prefixed_dimensions.append("dimension_" ~ dimension_column) %}
     {% endfor %}
 
+    {% set metric_types = [] %}
+    {% set metric_name_to_type = {} %}
+    {% for metric in column_metrics %}
+        {% do metric_types.append(metric.type) %}
+        {% do metric_name_to_type.update({metric.name: metric.type}) %}
+    {% endfor %}
+
 
     with monitored_table as (
         select * from {{ monitored_table }}
@@ -38,9 +45,9 @@
         ),
     {% endif %}
 
-    column_monitors as (
+    column_metrics as (
 
-        {%- if column_monitors %}
+        {%- if column_metrics %}
             {%- set column = column_obj.quoted -%}
                 select
                     {%- if timestamp_column %}
@@ -53,26 +60,26 @@
                     {% if dimensions | length > 0 %}
                       {{ elementary.select_dimensions_columns(prefixed_dimensions) }},
                     {% endif %}
-                    {%- if 'null_count' in column_monitors -%} {{ elementary.null_count(column) }} {%- else -%} null {% endif %} as null_count,
-                    {%- if 'null_percent' in column_monitors -%} {{ elementary.null_percent(column) }} {%- else -%} null {% endif %} as null_percent,
-                    {%- if 'not_null_percent' in column_monitors -%} {{ elementary.not_null_percent(column) }} {%- else -%} null {% endif %} as not_null_percent,
-                    {%- if 'max' in column_monitors -%} {{ elementary.max(column) }} {%- else -%} null {% endif %} as max,
-                    {%- if 'min' in column_monitors -%} {{ elementary.min(column) }} {%- else -%} null {% endif %} as min,
-                    {%- if 'average' in column_monitors -%} {{ elementary.average(column) }} {%- else -%} null {% endif %} as average,
-                    {%- if 'zero_count' in column_monitors -%} {{ elementary.zero_count(column) }} {%- else -%} null {% endif %} as zero_count,
-                    {%- if 'zero_percent' in column_monitors -%} {{ elementary.zero_percent(column) }} {%- else -%} null {% endif %} as zero_percent,
-                    {%- if 'not_zero_percent' in column_monitors -%} {{ elementary.not_zero_percent(column) }} {%- else -%} null {% endif %} as not_zero_percent,
-                    {%- if 'standard_deviation' in column_monitors -%} {{ elementary.standard_deviation(column) }} {%- else -%} null {% endif %} as standard_deviation,
-                    {%- if 'variance' in column_monitors -%} {{ elementary.variance(column) }} {%- else -%} null {% endif %} as variance,
-                    {%- if 'max_length' in column_monitors -%} {{ elementary.max_length(column) }} {%- else -%} null {% endif %} as max_length,
-                    {%- if 'min_length' in column_monitors -%} {{ elementary.min_length(column) }} {%- else -%} null {% endif %} as min_length,
-                    {%- if 'average_length' in column_monitors -%} {{ elementary.average_length(column) }} {%- else -%} null {% endif %} as average_length,
-                    {%- if 'missing_count' in column_monitors -%} {{ elementary.missing_count(column) }} {%- else -%} null {% endif %} as missing_count,
-                    {%- if 'missing_percent' in column_monitors -%} {{ elementary.missing_percent(column) }} {%- else -%} null {% endif %} as missing_percent,
-                    {%- if 'count_true' in column_monitors -%} {{ elementary.count_true(column) }} {%- else -%} null {% endif %} as count_true,
-                    {%- if 'count_false' in column_monitors -%} {{ elementary.count_false(column) }} {%- else -%} null {% endif %} as count_false,
-                    {%- if 'not_missing_percent' in column_monitors -%} {{ elementary.not_missing_percent(column) }} {%- else -%} null {% endif %} as not_missing_percent,
-                    {%- if 'sum' in column_monitors -%} {{ elementary.sum(column) }} {%- else -%} null {% endif %} as sum
+                    {%- if 'null_count' in metric_types -%} {{ elementary.null_count(column) }} {%- else -%} null {% endif %} as null_count,
+                    {%- if 'null_percent' in metric_types -%} {{ elementary.null_percent(column) }} {%- else -%} null {% endif %} as null_percent,
+                    {%- if 'not_null_percent' in metric_types -%} {{ elementary.not_null_percent(column) }} {%- else -%} null {% endif %} as not_null_percent,
+                    {%- if 'max' in metric_types -%} {{ elementary.max(column) }} {%- else -%} null {% endif %} as max,
+                    {%- if 'min' in metric_types -%} {{ elementary.min(column) }} {%- else -%} null {% endif %} as min,
+                    {%- if 'average' in metric_types -%} {{ elementary.average(column) }} {%- else -%} null {% endif %} as average,
+                    {%- if 'zero_count' in metric_types -%} {{ elementary.zero_count(column) }} {%- else -%} null {% endif %} as zero_count,
+                    {%- if 'zero_percent' in metric_types -%} {{ elementary.zero_percent(column) }} {%- else -%} null {% endif %} as zero_percent,
+                    {%- if 'not_zero_percent' in metric_types -%} {{ elementary.not_zero_percent(column) }} {%- else -%} null {% endif %} as not_zero_percent,
+                    {%- if 'standard_deviation' in metric_types -%} {{ elementary.standard_deviation(column) }} {%- else -%} null {% endif %} as standard_deviation,
+                    {%- if 'variance' in metric_types -%} {{ elementary.variance(column) }} {%- else -%} null {% endif %} as variance,
+                    {%- if 'max_length' in metric_types -%} {{ elementary.max_length(column) }} {%- else -%} null {% endif %} as max_length,
+                    {%- if 'min_length' in metric_types -%} {{ elementary.min_length(column) }} {%- else -%} null {% endif %} as min_length,
+                    {%- if 'average_length' in metric_types -%} {{ elementary.average_length(column) }} {%- else -%} null {% endif %} as average_length,
+                    {%- if 'missing_count' in metric_types -%} {{ elementary.missing_count(column) }} {%- else -%} null {% endif %} as missing_count,
+                    {%- if 'missing_percent' in metric_types -%} {{ elementary.missing_percent(column) }} {%- else -%} null {% endif %} as missing_percent,
+                    {%- if 'count_true' in metric_types -%} {{ elementary.count_true(column) }} {%- else -%} null {% endif %} as count_true,
+                    {%- if 'count_false' in metric_types -%} {{ elementary.count_false(column) }} {%- else -%} null {% endif %} as count_false,
+                    {%- if 'not_missing_percent' in metric_types -%} {{ elementary.not_missing_percent(column) }} {%- else -%} null {% endif %} as not_missing_percent,
+                    {%- if 'sum' in metric_types -%} {{ elementary.sum(column) }} {%- else -%} null {% endif %} as sum
                 from filtered_monitored_table
                 {%- if timestamp_column %}
                     left join buckets on (edr_bucket_start = start_bucket_in_data)
@@ -83,15 +90,15 @@
                     group by 1,2
                 {% endif %}
         {%- else %}
-            {{ elementary.empty_column_monitors_cte() }}
+            {{ elementary.empty_column_metrics_cte() }}
         {%- endif %}
 
     ),
 
-    column_monitors_unpivot as (
+    column_metrics_unpivot as (
 
-        {%- if column_monitors %}
-            {% for monitor in column_monitors %}
+        {%- if column_metrics %}
+            {% for metric_name, metric_type in metric_name_to_type.items() %}
                 select
                     {{ elementary.const_as_string(column_obj.name) }} as edr_column_name,
                     bucket_start,
@@ -108,9 +115,9 @@
                       {{ elementary.null_string() }} as dimension,
                       {{ elementary.null_string() }} as dimension_value,
                     {% endif %}
-                    {{ elementary.edr_cast_as_float(monitor) }} as metric_value,
-                    {{ elementary.edr_cast_as_string(elementary.edr_quote(monitor)) }} as metric_name
-                from column_monitors where {{ monitor }} is not null
+                    {{ elementary.edr_cast_as_float(metric_type) }} as metric_value,
+                    {{ elementary.edr_cast_as_string(elementary.edr_quote(metric_name)) }} as metric_name
+                from column_metrics where {{ metric_type }} is not null
                 {% if not loop.last %} union all {% endif %}
             {%- endfor %}
         {%- else %}
@@ -133,7 +140,7 @@
             dimension,
             dimension_value,
             {{elementary.dict_to_quoted_json(metric_properties) }} as metric_properties
-        from column_monitors_unpivot
+        from column_metrics_unpivot
 
     )
 
@@ -166,8 +173,8 @@
 {% macro select_dimensions_columns(dimension_columns, as_prefix="") %}
   {% set select_statements %}
     {%- for column in dimension_columns -%}
-      {%- if col_prefix -%}
-        {{ col_prefix ~ "_" }}
+      {%- if as_prefix -%}
+        {{ as_prefix ~ "_" }}
       {%- endif -%}
       {{ column }}
       {%- if as_prefix -%}

diff --git a/macros/edr/data_monitoring/monitors_query/dimension_monitoring_query.sql b/macros/edr/data_monitoring/monitors_query/dimension_monitoring_query.sql
@@ -1,5 +1,5 @@
-{% macro dimension_monitoring_query(monitored_table, monitored_table_relation, dimensions, min_bucket_start, max_bucket_end, metric_properties) %}
-    {% set metric_name = 'dimension' %}
+{% macro dimension_monitoring_query(monitored_table, monitored_table_relation, dimensions, min_bucket_start, max_bucket_end, metric_properties, metric_name=none) %}
+    {% set metric_name = metric_name or 'dimension' %}
     {% set full_table_name_str = elementary.edr_quote(elementary.relation_to_full_name(monitored_table_relation)) %}
     {% set dimensions_string = elementary.join_list(dimensions, '; ') %}
     {% set concat_dimensions_sql_expression = elementary.list_concat_with_separator(dimensions, '; ') %}