From d4d1fa4c7ae33ff1717334a73d4881ba6baf249e Mon Sep 17 00:00:00 2001 From: joseph-sentry <136376984+joseph-sentry@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:36:27 -0400 Subject: [PATCH] feat: add backfill one off scripts (#734) --- one_off_script.py | 13 ++ one_off_scripts/__init__.py | 6 + .../backfill_daily_test_rollups.py | 201 +++++++++++++++++ one_off_scripts/backfill_test_flag_bridges.py | 54 +++++ .../tests/test_backfill_daily_test_rollups.py | 208 ++++++++++++++++++ .../tests/test_backfill_test_flag_bridges.py | 58 +++++ requirements.in | 2 +- requirements.txt | 4 +- 8 files changed, 543 insertions(+), 3 deletions(-) create mode 100644 one_off_script.py create mode 100644 one_off_scripts/__init__.py create mode 100644 one_off_scripts/backfill_daily_test_rollups.py create mode 100644 one_off_scripts/backfill_test_flag_bridges.py create mode 100644 one_off_scripts/tests/test_backfill_daily_test_rollups.py create mode 100644 one_off_scripts/tests/test_backfill_test_flag_bridges.py diff --git a/one_off_script.py b/one_off_script.py new file mode 100644 index 000000000..a5ffdd5cb --- /dev/null +++ b/one_off_script.py @@ -0,0 +1,13 @@ +import os + +import django + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "django_scaffold.settings") +django.setup() + +if __name__ == "__main__": + from one_off_scripts.backfill_daily_test_rollups import run_impl + from one_off_scripts.backfill_test_flag_bridges import backfill_test_flag_bridges + + run_impl() + backfill_test_flag_bridges() diff --git a/one_off_scripts/__init__.py b/one_off_scripts/__init__.py new file mode 100644 index 000000000..82d545d6b --- /dev/null +++ b/one_off_scripts/__init__.py @@ -0,0 +1,6 @@ +import os + +import django + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "django_scaffold.settings") +django.setup() diff --git a/one_off_scripts/backfill_daily_test_rollups.py b/one_off_scripts/backfill_daily_test_rollups.py new file mode 100644 index 000000000..be5360274 --- /dev/null +++ b/one_off_scripts/backfill_daily_test_rollups.py @@ -0,0 +1,201 @@ +import logging +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import date, datetime, timedelta + +from django.db import transaction as django_transaction +from shared.django_apps.core.models import Repository +from shared.django_apps.reports.models import DailyTestRollup, Flake, TestInstance +from test_results_parser import Outcome + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger() + + +@dataclass +class RollupObj: + pass_count: int + fail_count: int + skip_count: int + flaky_fail_count: int + + sum_duration_seconds: float + last_duration_seconds: float + + latest_run: datetime + + commits_where_fail: set[str] = field(default_factory=set) + + +def get_test_analytics_repos(start_repoid): + # get all repos that have test_analytics_enabled == True + test_analytics_repos = Repository.objects.filter( + test_analytics_enabled=True + ).order_by("repoid") + + if start_repoid is not None: + test_analytics_repos = test_analytics_repos.filter(repoid__gte=start_repoid) + + return test_analytics_repos + + +def process_instance( + rollup_dict: dict[tuple[str, str], RollupObj], + flake_dict: dict[str, list[tuple[datetime, datetime | None]]], + instance: TestInstance, +): + pass_count = 0 + fail_count = 0 + skip_count = 0 + flaky_fail_count = 0 + duration_seconds = instance.duration_seconds + created_at = instance.created_at + commitid = instance.commitid + + match instance.outcome: + case Outcome.Pass: + pass_count = 1 + case Outcome.Skip: + skip_count = 1 + case _: + fail_count = 1 + if (flaky_range_list := flake_dict.get(instance.test_id)) is not None: + for range in flaky_range_list: + if range[0] <= instance.created_at and ( + range[1] is None or instance.created_at < range[1] + ): + flaky_fail_count += 1 + break + + if (entry := rollup_dict.get((instance.test_id, instance.branch))) is not None: + entry.pass_count += pass_count + entry.fail_count += fail_count + entry.skip_count += skip_count + entry.flaky_fail_count += flaky_fail_count + entry.sum_duration_seconds += duration_seconds + entry.last_duration_seconds = duration_seconds + entry.latest_run = created_at + if commitid: + entry.commits_where_fail.add(commitid) + else: + rollup_dict[(instance.test_id, instance.branch)] = RollupObj( + pass_count, + fail_count, + skip_count, + flaky_fail_count, + duration_seconds, + duration_seconds, + created_at, + set(), + ) + if commitid: + rollup_dict[(instance.test_id, instance.branch)].commits_where_fail.add( + commitid + ) + + +def save_rollups(rollup_dict, repoid, date): + rollups_to_create = [] + for obj_key, obj in rollup_dict.items(): + rollup = DailyTestRollup( + repoid=repoid, + date=date, + test_id=obj_key[0], + branch=obj_key[1], + pass_count=obj.pass_count, + fail_count=obj.fail_count, + skip_count=obj.skip_count, + flaky_fail_count=obj.flaky_fail_count, + commits_where_fail=list(obj.commits_where_fail), + latest_run=obj.latest_run, + last_duration_seconds=obj.last_duration_seconds, + avg_duration_seconds=obj.sum_duration_seconds + / (obj.pass_count + obj.fail_count), + ) + + rollups_to_create.append(rollup) + + DailyTestRollup.objects.bulk_create(rollups_to_create, 1000) + + +def backfill_test_rollups( + start_repoid: int | None = None, + start_date: str | None = None, # default is 2024-07-16 + end_date: str | None = None, # default is 2024-09-17 +) -> dict[str, bool]: + log.info( + "Updating test instances", + extra=dict(start_repoid=start_repoid, start_date=start_date, end_date=end_date), + ) + test_analytics_repos = get_test_analytics_repos(start_repoid) + + chunk_size = 10000 + + log.info( + "Starting backfill for repos", + extra=dict(repos=[repo.repoid for repo in test_analytics_repos]), + ) + + for repo in test_analytics_repos: + repoid = repo.repoid + log.info("Starting backfill for repo", extra=dict(repoid=repoid)) + + curr_date = date.fromisoformat(start_date) if start_date else date(2024, 7, 16) + until_date = date.fromisoformat(end_date) if end_date else date(2024, 9, 17) + + # delete all existing rollups for this day + DailyTestRollup.objects.filter( + repoid=repoid, date__gte=curr_date, date__lte=until_date + ).delete() + django_transaction.commit() + log.info("Deleted rollups for repo", extra=dict(repoid=repoid)) + + # get flakes + flake_list = list(Flake.objects.filter(repository_id=repoid)) + + flake_dict: dict[str, list[tuple[datetime, datetime | None]]] = defaultdict( + list + ) + for flake in flake_list: + flake_dict[flake.test_id].append((flake.start_date, flake.end_date)) + + while curr_date <= until_date: + log.info( + "Starting backfill for repo on date", + extra=dict(repoid=repoid, date=curr_date), + ) + + rollup_dict: dict[tuple[str, str], RollupObj] = {} + + test_instances = TestInstance.objects.filter( + repoid=repoid, created_at__date=curr_date + ).order_by("created_at") + + num_test_instances = test_instances.count() + if num_test_instances == 0: + curr_date += timedelta(days=1) + continue + + chunks = [ + test_instances[i : i + chunk_size] + for i in range(0, num_test_instances, chunk_size) + ] + + for chunk in chunks: + for instance in chunk: + if instance.branch is None or instance.commitid is None: + continue + + process_instance(rollup_dict, flake_dict, instance) + + save_rollups(rollup_dict, repoid, curr_date) + django_transaction.commit() + log.info( + "Committed repo for day", + extra=dict(repoid=repoid, date=curr_date), + ) + curr_date += timedelta(days=1) + + log.info("Finished backfill for repo", extra=dict(repoid=repoid)) + + return {"successful": True} diff --git a/one_off_scripts/backfill_test_flag_bridges.py b/one_off_scripts/backfill_test_flag_bridges.py new file mode 100644 index 000000000..e40cb46fd --- /dev/null +++ b/one_off_scripts/backfill_test_flag_bridges.py @@ -0,0 +1,54 @@ +import logging + +from django.db import transaction as django_transaction +from shared.django_apps.core.models import Repository +from shared.django_apps.reports.models import ( + RepositoryFlag, + Test, + TestFlagBridge, + TestInstance, +) + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger() + + +def backfill_test_flag_bridges(repoid=None): + log.info("Backfilling TestFlagBridge objects", extra=dict(repoid=repoid)) + repos = Repository.objects.filter(test_analytics_enabled=True) + if repoid is not None: + repos = repos.filter(repoid=repoid) + + for repo in repos: + tests = Test.objects.filter(repository_id=repo.repoid) + + flags = { + flag.flag_name: flag + for flag in RepositoryFlag.objects.filter(repository=repo) + } + + bridges_to_create = [] + for test in tests: + TestFlagBridge.objects.filter(test=test).delete() + + first_test_instance = ( + TestInstance.objects.filter(test_id=test.id) + .select_related("upload") + .first() + ) + + if first_test_instance is None: + continue + + flag_names = first_test_instance.upload.flag_names + + for flag_name in flag_names: + new_bridge = TestFlagBridge(test=test, flag=flags[flag_name]) + bridges_to_create.append(new_bridge) + + TestFlagBridge.objects.bulk_create(bridges_to_create, 1000) + log.info( + "Done creating flag bridges for repo", + extra=dict(repoid=repoid, num_tests=len(tests)), + ) + django_transaction.commit() diff --git a/one_off_scripts/tests/test_backfill_daily_test_rollups.py b/one_off_scripts/tests/test_backfill_daily_test_rollups.py new file mode 100644 index 000000000..fa4e4f024 --- /dev/null +++ b/one_off_scripts/tests/test_backfill_daily_test_rollups.py @@ -0,0 +1,208 @@ +import datetime as dt + +import pytest +import time_machine +from shared.django_apps.core.tests.factories import RepositoryFactory +from shared.django_apps.reports.models import DailyTestRollup +from shared.django_apps.reports.tests.factories import ( + DailyTestRollupFactory, + FlakeFactory, + TestFactory, + TestInstanceFactory, +) + +from one_off_scripts.backfill_daily_test_rollups import backfill_test_rollups + + +@pytest.fixture +def setup_tests(transactional_db): + repo_1 = RepositoryFactory(test_analytics_enabled=True) + + test_1 = TestFactory(repository=repo_1) + + _ = FlakeFactory( + test=test_1, + repository=repo_1, + start_date=dt.datetime.fromisoformat("1970-01-02T00:00:00Z"), + end_date=dt.datetime.fromisoformat("1970-01-04T00:00:00Z"), + ) + + _ = FlakeFactory( + test=test_1, + repository=repo_1, + start_date=dt.datetime.fromisoformat("1970-01-04T12:00:00Z"), + end_date=None, + ) + + traveller = time_machine.travel("1970-01-01T00:00:00Z", tick=False) + traveller.start() + ti = TestInstanceFactory(test=test_1, duration_seconds=10.0) + traveller.stop() + + traveller = time_machine.travel("1970-01-03T00:00:00Z", tick=False) + traveller.start() + ti = TestInstanceFactory(test=test_1, duration_seconds=10.0) + traveller.stop() + + traveller = time_machine.travel("1970-01-05T00:00:00Z", tick=False) + traveller.start() + ti = TestInstanceFactory( + test=test_1, + duration_seconds=10000.0, + ) + traveller.stop() + + _ = DailyTestRollupFactory( + test=test_1, + date=dt.date.fromisoformat("1970-01-03"), + fail_count=10, + pass_count=5, + last_duration_seconds=10.0, + avg_duration_seconds=1.0, + latest_run=dt.datetime.fromisoformat("1970-01-01T00:00:00Z"), + ) + + _ = DailyTestRollupFactory( + test=test_1, + date=dt.date.fromisoformat("1970-01-05"), + fail_count=10, + pass_count=5, + last_duration_seconds=10.0, + avg_duration_seconds=1.0, + latest_run=dt.datetime.fromisoformat("1970-01-01T00:00:00Z"), + ) + + _ = RepositoryFactory(test_analytics_enabled=False) + + return repo_1 + + +@pytest.mark.django_db(transaction=True) +def test_backfill_test_rollups(setup_tests): + rollups = DailyTestRollup.objects.all() + assert [ + { + "date": r.date, + "pass_count": r.pass_count, + "skip_count": r.skip_count, + "fail_count": r.fail_count, + "flaky_fail_count": r.flaky_fail_count, + "last_duration_seconds": r.last_duration_seconds, + "avg_duration_seconds": r.avg_duration_seconds, + "latest_run": r.latest_run, + "commits_where_fail": r.commits_where_fail, + } + for r in rollups + ] == [ + { + "avg_duration_seconds": 1.0, + "commits_where_fail": [], + "date": dt.date(1970, 1, 3), + "fail_count": 10, + "flaky_fail_count": 0, + "last_duration_seconds": 10.0, + "latest_run": dt.datetime.fromisoformat("1970-01-01T00:00:00Z"), + "pass_count": 5, + "skip_count": 0, + }, + { + "avg_duration_seconds": 1.0, + "commits_where_fail": [], + "date": dt.date(1970, 1, 5), + "fail_count": 10, + "flaky_fail_count": 0, + "last_duration_seconds": 10.0, + "latest_run": dt.datetime.fromisoformat("1970-01-01T00:00:00Z"), + "pass_count": 5, + "skip_count": 0, + }, + ] + + backfill_test_rollups( + start_date="1970-01-02", + end_date="1970-01-04", + ) + + rollups = DailyTestRollup.objects.all() + + assert [ + { + "date": r.date, + "pass_count": r.pass_count, + "skip_count": r.skip_count, + "fail_count": r.fail_count, + "flaky_fail_count": r.flaky_fail_count, + "last_duration_seconds": r.last_duration_seconds, + "avg_duration_seconds": r.avg_duration_seconds, + "latest_run": r.latest_run, + } + for r in rollups + ] == [ + { + "avg_duration_seconds": 1.0, + "date": dt.date(1970, 1, 5), + "fail_count": 10, + "flaky_fail_count": 0, + "last_duration_seconds": 10.0, + "latest_run": dt.datetime.fromisoformat("1970-01-01T00:00:00Z"), + "pass_count": 5, + "skip_count": 0, + }, + { + "avg_duration_seconds": 10.0, + "date": dt.date(1970, 1, 3), + "fail_count": 1, + "flaky_fail_count": 1, + "last_duration_seconds": 10.0, + "latest_run": dt.datetime.fromisoformat("1970-01-03T00:00:00Z"), + "pass_count": 0, + "skip_count": 0, + }, + ] + + assert len(rollups[1].commits_where_fail) == 1 + + backfill_test_rollups( + start_date="1970-01-02", + end_date="1970-01-05", + ) + + rollups = DailyTestRollup.objects.all() + + assert [ + { + "date": r.date, + "pass_count": r.pass_count, + "skip_count": r.skip_count, + "fail_count": r.fail_count, + "flaky_fail_count": r.flaky_fail_count, + "last_duration_seconds": r.last_duration_seconds, + "avg_duration_seconds": r.avg_duration_seconds, + "latest_run": r.latest_run, + } + for r in rollups + ] == [ + { + "avg_duration_seconds": 10.0, + "date": dt.date(1970, 1, 3), + "fail_count": 1, + "flaky_fail_count": 1, + "last_duration_seconds": 10.0, + "latest_run": dt.datetime.fromisoformat("1970-01-03T00:00:00Z"), + "pass_count": 0, + "skip_count": 0, + }, + { + "avg_duration_seconds": 10000.0, + "date": dt.date(1970, 1, 5), + "fail_count": 1, + "flaky_fail_count": 1, + "last_duration_seconds": 10000.0, + "latest_run": dt.datetime.fromisoformat("1970-01-05T00:00:00Z"), + "pass_count": 0, + "skip_count": 0, + }, + ] + + for r in rollups: + assert len(r.commits_where_fail) == 1 diff --git a/one_off_scripts/tests/test_backfill_test_flag_bridges.py b/one_off_scripts/tests/test_backfill_test_flag_bridges.py new file mode 100644 index 000000000..c98948eb4 --- /dev/null +++ b/one_off_scripts/tests/test_backfill_test_flag_bridges.py @@ -0,0 +1,58 @@ +import pytest +from shared.django_apps.core.tests.factories import RepositoryFactory +from shared.django_apps.reports.models import TestFlagBridge +from shared.django_apps.reports.tests.factories import ( + RepositoryFlagFactory, + TestFactory, + TestInstanceFactory, + UploadFactory, +) + +from one_off_scripts.backfill_test_flag_bridges import backfill_test_flag_bridges + + +@pytest.fixture +def setup_tests(transactional_db): + repo = RepositoryFactory(test_analytics_enabled=True) + + flag_1 = RepositoryFlagFactory(repository=repo, flag_name="first") + flag_2 = RepositoryFlagFactory(repository=repo, flag_name="second") + flag_3 = RepositoryFlagFactory(repository=repo, flag_name="third") + + test_1 = TestFactory(repository_id=repo.repoid) + upload_1 = UploadFactory() + upload_1.flags.set([flag_1, flag_2]) + test_instance_1 = TestInstanceFactory(test=test_1, upload=upload_1) + + test_2 = TestFactory(repository_id=repo.repoid) + upload_2 = UploadFactory() + upload_2.flags.set([flag_3]) + test_instance_2 = TestInstanceFactory(test=test_2, upload=upload_2) + + test_3 = TestFactory(repository_id=repo.repoid) + + +@pytest.mark.django_db(transaction=True) +def test_it_backfills_test_flag_bridges(setup_tests): + bridges = TestFlagBridge.objects.all() + assert len(bridges) == 0 + + backfill_test_flag_bridges() + + bridges = TestFlagBridge.objects.all() + assert len(bridges) == 3 + + assert [(b.test.name, b.flag.flag_name) for b in bridges] == [ + ( + "test_1", + "first", + ), + ( + "test_1", + "second", + ), + ( + "test_2", + "third", + ), + ] diff --git a/requirements.in b/requirements.in index 361fc8715..c2a3c99b5 100644 --- a/requirements.in +++ b/requirements.in @@ -1,5 +1,5 @@ https://github.com/codecov/opentelem-python/archive/refs/tags/v0.0.4a1.tar.gz#egg=codecovopentelem -https://github.com/codecov/shared/archive/106b0ae2b9a2870899fa3903fc6da0a9ba67eef2.tar.gz#egg=shared +https://github.com/codecov/shared/archive/36791fe3c18a0dbdf7296ffbdffbf2137fa9fc06.tar.gz#egg=shared https://github.com/codecov/test-results-parser/archive/1507de2241601d678e514c08b38426e48bb6d47d.tar.gz#egg=test-results-parser https://github.com/codecov/timestring/archive/d37ceacc5954dff3b5bd2f887936a98a668dda42.tar.gz#egg=timestring asgiref>=3.7.2 diff --git a/requirements.txt b/requirements.txt index b7dd741ea..5b1417982 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile +# pip-compile requirements.in # amqp==5.2.0 # via kombu @@ -357,7 +357,7 @@ sentry-sdk[celery]==2.13.0 # via # -r requirements.in # shared -shared @ https://github.com/codecov/shared/archive/106b0ae2b9a2870899fa3903fc6da0a9ba67eef2.tar.gz +shared @ https://github.com/codecov/shared/archive/36791fe3c18a0dbdf7296ffbdffbf2137fa9fc06.tar.gz # via -r requirements.in six==1.16.0 # via