-
Notifications
You must be signed in to change notification settings - Fork 92
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
anomalies: add support for anomaly_exclude_filter (#582)
* anomalies: add support for anomalies_exclude_dates * anomalies_exclude_dates: don't allow ranges without "before" * anomalies_exclude_dates: bugfix * anomalies_exclude_dates: several bugfixes, didn't work well for non-daily buckets * test-warehouse: increase schema name prefix * test_slower_rate_event_freshness: solve race hopefully * test-warehouse: name the detailed report html like we name the artifact * changed to "anomaly_exclude_metrics" - which is now a simple "where" expression * get_anomaly_scores_query - remove unnecessary code * get_test_argument: better logic * add comment * bugfixes
- Loading branch information
Showing
16 changed files
with
242 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
185 changes: 185 additions & 0 deletions
185
integration_tests/tests/test_anomaly_exclude_metrics.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
from datetime import datetime, timedelta | ||
from typing import Any, Dict, List | ||
|
||
from data_generator import DATE_FORMAT, generate_dates | ||
from dbt_project import DbtProject | ||
from parametrization import Parametrization | ||
|
||
TIMESTAMP_COLUMN = "updated_at" | ||
DBT_TEST_NAME = "elementary.column_anomalies" | ||
DBT_TEST_ARGS = { | ||
"timestamp_column": TIMESTAMP_COLUMN, | ||
"column_anomalies": ["sum"], | ||
} | ||
|
||
|
||
@Parametrization.autodetect_parameters() | ||
@Parametrization.case( | ||
name="daily_buckets", | ||
time_bucket={"period": "day", "count": 1}, | ||
dates_step=timedelta(days=1), | ||
) | ||
@Parametrization.case( | ||
name="six_hour_buckets", | ||
time_bucket={"period": "hour", "count": 6}, | ||
dates_step=timedelta(hours=6), | ||
) | ||
def test_exclude_specific_dates( | ||
test_id: str, dbt_project: DbtProject, time_bucket: dict, dates_step: timedelta | ||
): | ||
utc_now = datetime.utcnow() | ||
test_bucket, *training_buckets = generate_dates( | ||
base_date=utc_now - timedelta(1), step=dates_step | ||
) | ||
|
||
exclude_dates = [ | ||
(utc_now - timedelta(5)).date(), | ||
(utc_now - timedelta(3)).date(), | ||
] | ||
|
||
data: List[Dict[str, Any]] = [ | ||
{TIMESTAMP_COLUMN: test_bucket.strftime(DATE_FORMAT), "metric": 10} | ||
] | ||
data += [ | ||
{ | ||
TIMESTAMP_COLUMN: cur_bucket.strftime(DATE_FORMAT), | ||
"metric": 1 if cur_bucket.date() not in exclude_dates else 10, | ||
} | ||
for cur_bucket in training_buckets | ||
] | ||
|
||
test_args = {**DBT_TEST_ARGS, "time_bucket": time_bucket} | ||
test_result = dbt_project.test( | ||
test_id, DBT_TEST_NAME, test_args, data=data, test_column="metric" | ||
) | ||
assert test_result["status"] == "pass" | ||
|
||
excluded_dates_str = ", ".join([f"'{cur_date}'" for cur_date in exclude_dates]) | ||
test_args = { | ||
**DBT_TEST_ARGS, | ||
"anomaly_exclude_metrics": f"metric_date in ({excluded_dates_str})", | ||
"time_bucket": time_bucket, | ||
} | ||
test_result = dbt_project.test( | ||
test_id, DBT_TEST_NAME, test_args, test_column="metric" | ||
) | ||
assert test_result["status"] == "fail" | ||
|
||
|
||
def test_exclude_specific_timestamps(test_id: str, dbt_project: DbtProject): | ||
# To avoid races, set the "custom_started_at" to the beginning of the hour | ||
test_started_at = datetime.utcnow().replace(minute=0, second=0) | ||
|
||
test_bucket, *training_buckets = generate_dates( | ||
base_date=test_started_at - timedelta(hours=1), | ||
step=timedelta(hours=1), | ||
days_back=1, | ||
) | ||
|
||
excluded_buckets = [ | ||
test_started_at - timedelta(hours=22), | ||
test_started_at - timedelta(hours=20), | ||
] | ||
|
||
data: List[Dict[str, Any]] = [ | ||
{TIMESTAMP_COLUMN: test_bucket.strftime(DATE_FORMAT), "metric": 10} | ||
] | ||
data += [ | ||
{ | ||
TIMESTAMP_COLUMN: cur_bucket.strftime(DATE_FORMAT), | ||
"metric": 1 if cur_bucket not in excluded_buckets else 10, | ||
} | ||
for cur_bucket in training_buckets | ||
] | ||
|
||
time_bucket = {"period": "hour", "count": 1} | ||
test_args = {**DBT_TEST_ARGS, "time_bucket": time_bucket, "days_back": 1} | ||
test_result = dbt_project.test( | ||
test_id, DBT_TEST_NAME, test_args, data=data, test_column="metric" | ||
) | ||
assert test_result["status"] == "pass" | ||
|
||
excluded_buckets_str = ", ".join( | ||
["'%s'" % cur_ts.strftime(DATE_FORMAT) for cur_ts in excluded_buckets] | ||
) | ||
test_args = { | ||
**DBT_TEST_ARGS, | ||
"time_bucket": time_bucket, | ||
"days_back": 1, | ||
"anomaly_exclude_metrics": f"metric_time_bucket in ({excluded_buckets_str})", | ||
} | ||
test_result = dbt_project.test( | ||
test_id, | ||
DBT_TEST_NAME, | ||
test_args, | ||
test_column="metric", | ||
test_vars={"custom_run_started_at": test_started_at.isoformat()}, | ||
) | ||
assert test_result["status"] == "fail" | ||
|
||
|
||
def test_exclude_date_range(test_id: str, dbt_project: DbtProject): | ||
utc_today = datetime.utcnow().date() | ||
test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1)) | ||
|
||
start_date = utc_today - timedelta(6) | ||
end_date = utc_today - timedelta(3) | ||
|
||
data: List[Dict[str, Any]] = [ | ||
{TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "metric": 10} | ||
] | ||
data += [ | ||
{ | ||
TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT), | ||
"metric": 1 if cur_date < start_date or cur_date > end_date else 10, | ||
} | ||
for cur_date in training_dates | ||
] | ||
|
||
test_args = {**DBT_TEST_ARGS, "days_back": 30} | ||
test_result = dbt_project.test( | ||
test_id, DBT_TEST_NAME, test_args, data=data, test_column="metric" | ||
) | ||
assert test_result["status"] == "pass" | ||
|
||
test_args = { | ||
**DBT_TEST_ARGS, | ||
"anomaly_exclude_metrics": f"metric_date >= '{start_date}' and metric_date <= '{end_date}'", | ||
"days_back": 30, | ||
} | ||
test_result = dbt_project.test( | ||
test_id, DBT_TEST_NAME, test_args, test_column="metric" | ||
) | ||
assert test_result["status"] == "fail" | ||
|
||
|
||
def test_exclude_by_metric_value(test_id: str, dbt_project: DbtProject): | ||
utc_today = datetime.utcnow().date() | ||
test_date, *training_dates = generate_dates(base_date=utc_today - timedelta(1)) | ||
|
||
data: List[Dict[str, Any]] = [ | ||
{TIMESTAMP_COLUMN: test_date.strftime(DATE_FORMAT), "metric": 10} | ||
] | ||
data += [ | ||
{ | ||
TIMESTAMP_COLUMN: cur_date.strftime(DATE_FORMAT), | ||
"metric": 1 if cur_date.day % 3 > 0 else 10, | ||
} | ||
for cur_date in training_dates | ||
] | ||
|
||
test_args = {**DBT_TEST_ARGS, "days_back": 30} | ||
test_result = dbt_project.test( | ||
test_id, DBT_TEST_NAME, test_args, data=data, test_column="metric" | ||
) | ||
assert test_result["status"] == "pass" | ||
|
||
test_args = { | ||
**DBT_TEST_ARGS, | ||
"anomaly_exclude_metrics": f"metric_date < '{test_date}' and metric_value >= 5", | ||
"days_back": 30, | ||
} | ||
test_result = dbt_project.test( | ||
test_id, DBT_TEST_NAME, test_args, test_column="metric" | ||
) | ||
assert test_result["status"] == "fail" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.