Skip to content

Commit

Permalink
Merge pull request #626 from elementary-data/ele-2112-ignore-small-ch…
Browse files Browse the repository at this point in the history
…anges-always-false-when-direction-both

ELE-2112: Ignore small changes - always False when direction == Both
  • Loading branch information
dapollak authored Dec 5, 2023
2 parents e1c1159 + 6b83218 commit c1784fb
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 9 deletions.
27 changes: 26 additions & 1 deletion integration_tests/tests/test_volume_anomalies.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def test_volume_anomaly_static_data_spike(
] * metric_value

# 30 new rows every day
# 35 new rows in the last day
# (metric_value) new rows in the last day
# z-score ~ -3.6

test_args = {
Expand Down Expand Up @@ -380,3 +380,28 @@ def test_anomalyless_table_volume_anomalies_periods_params(
}
test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data)
assert test_result["status"] == "fail"


def test_ignore_small_changes_both(test_id: str, dbt_project: DbtProject):
now = datetime.utcnow()
data = [
{TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT)}
for cur_date in generate_dates(base_date=now, step=timedelta(days=1))
if cur_date < now - timedelta(days=1)
] * 30
data += [{TIMESTAMP_COLUMN: (now - timedelta(days=1)).strftime(DATE_FORMAT)}] * 14

# 30 new rows every day
# 14 new rows in the last day, which is less than 50% of 30.
# Therefore test should fail.

test_args = {
**DBT_TEST_ARGS,
"time_bucket": {"period": "day", "count": 1},
"ignore_small_changes": {
"spike_failure_percent_threshold": 2,
"drop_failure_percent_threshold": 50,
},
}
test_result = dbt_project.test(test_id, DBT_TEST_NAME, test_args, data=data)
assert test_result["status"] == "fail"
27 changes: 19 additions & 8 deletions macros/edr/tests/test_utils/get_anomaly_query.sql
Original file line number Diff line number Diff line change
Expand Up @@ -118,18 +118,29 @@ case when
{%- endmacro -%}

{%- macro avg_percent_anomalous_condition(spike_failure_percent_threshold, drop_failure_percent_threshold, anomaly_direction) -%}
{% if spike_failure_percent_threshold and anomaly_direction | lower in ['spike', 'both'] %}
{% set spike_filter %}
(metric_value > ((1 + {{ spike_failure_percent_threshold }}/100.0) * training_avg))
{% endset %}
{% set drop_filter %}
(metric_value < ((1 - {{ drop_failure_percent_threshold }}/100.0) * training_avg))
{% endset %}

{% if spike_failure_percent_threshold and drop_failure_percent_threshold and (anomaly_direction | lower) == 'both' %}
{{ spike_filter }} or {{ drop_filter }}
{% else %}
(1 = 1)
{% endif %}
{% if spike_failure_percent_threshold and anomaly_direction | lower in ['spike', 'both'] %}
{{ spike_filter }}
{% else %}
(1 = 1)
{% endif %}

and
and

{% if drop_failure_percent_threshold and anomaly_direction | lower in ['drop', 'both'] %}
(metric_value < ((1 - {{ drop_failure_percent_threshold }}/100.0) * training_avg))
{% else %}
(1 = 1)
{% if drop_failure_percent_threshold and anomaly_direction | lower in ['drop', 'both'] %}
{{ drop_filter }}
{% else %}
(1 = 1)
{% endif %}
{% endif %}
{%- endmacro -%}

Expand Down

0 comments on commit c1784fb

Please sign in to comment.