From d4d1fa4c7ae33ff1717334a73d4881ba6baf249e Mon Sep 17 00:00:00 2001
From: joseph-sentry <136376984+joseph-sentry@users.noreply.github.com>
Date: Tue, 1 Oct 2024 14:36:27 -0400
Subject: [PATCH] feat: add backfill one off scripts (#734)

---
 one_off_script.py                             |  13 ++
 one_off_scripts/__init__.py                   |   6 +
 .../backfill_daily_test_rollups.py            | 201 +++++++++++++++++
 one_off_scripts/backfill_test_flag_bridges.py |  54 +++++
 .../tests/test_backfill_daily_test_rollups.py | 208 ++++++++++++++++++
 .../tests/test_backfill_test_flag_bridges.py  |  58 +++++
 requirements.in                               |   2 +-
 requirements.txt                              |   4 +-
 8 files changed, 543 insertions(+), 3 deletions(-)
 create mode 100644 one_off_script.py
 create mode 100644 one_off_scripts/__init__.py
 create mode 100644 one_off_scripts/backfill_daily_test_rollups.py
 create mode 100644 one_off_scripts/backfill_test_flag_bridges.py
 create mode 100644 one_off_scripts/tests/test_backfill_daily_test_rollups.py
 create mode 100644 one_off_scripts/tests/test_backfill_test_flag_bridges.py

diff --git a/one_off_script.py b/one_off_script.py
new file mode 100644
index 000000000..a5ffdd5cb
--- /dev/null
+++ b/one_off_script.py
@@ -0,0 +1,13 @@
+import os
+
+import django
+
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "django_scaffold.settings")
+django.setup()
+
+if __name__ == "__main__":
+    from one_off_scripts.backfill_daily_test_rollups import run_impl
+    from one_off_scripts.backfill_test_flag_bridges import backfill_test_flag_bridges
+
+    run_impl()
+    backfill_test_flag_bridges()
diff --git a/one_off_scripts/__init__.py b/one_off_scripts/__init__.py
new file mode 100644
index 000000000..82d545d6b
--- /dev/null
+++ b/one_off_scripts/__init__.py
@@ -0,0 +1,6 @@
+import os
+
+import django
+
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "django_scaffold.settings")
+django.setup()
diff --git a/one_off_scripts/backfill_daily_test_rollups.py b/one_off_scripts/backfill_daily_test_rollups.py
new file mode 100644
index 000000000..be5360274
--- /dev/null
+++ b/one_off_scripts/backfill_daily_test_rollups.py
@@ -0,0 +1,201 @@
+import logging
+from collections import defaultdict
+from dataclasses import dataclass, field
+from datetime import date, datetime, timedelta
+
+from django.db import transaction as django_transaction
+from shared.django_apps.core.models import Repository
+from shared.django_apps.reports.models import DailyTestRollup, Flake, TestInstance
+from test_results_parser import Outcome
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger()
+
+
+@dataclass
+class RollupObj:
+    pass_count: int
+    fail_count: int
+    skip_count: int
+    flaky_fail_count: int
+
+    sum_duration_seconds: float
+    last_duration_seconds: float
+
+    latest_run: datetime
+
+    commits_where_fail: set[str] = field(default_factory=set)
+
+
+def get_test_analytics_repos(start_repoid):
+    # get all repos that have test_analytics_enabled == True
+    test_analytics_repos = Repository.objects.filter(
+        test_analytics_enabled=True
+    ).order_by("repoid")
+
+    if start_repoid is not None:
+        test_analytics_repos = test_analytics_repos.filter(repoid__gte=start_repoid)
+
+    return test_analytics_repos
+
+
+def process_instance(
+    rollup_dict: dict[tuple[str, str], RollupObj],
+    flake_dict: dict[str, list[tuple[datetime, datetime | None]]],
+    instance: TestInstance,
+):
+    pass_count = 0
+    fail_count = 0
+    skip_count = 0
+    flaky_fail_count = 0
+    duration_seconds = instance.duration_seconds
+    created_at = instance.created_at
+    commitid = instance.commitid
+
+    match instance.outcome:
+        case Outcome.Pass:
+            pass_count = 1
+        case Outcome.Skip:
+            skip_count = 1
+        case _:
+            fail_count = 1
+            if (flaky_range_list := flake_dict.get(instance.test_id)) is not None:
+                for range in flaky_range_list:
+                    if range[0] <= instance.created_at and (
+                        range[1] is None or instance.created_at < range[1]
+                    ):
+                        flaky_fail_count += 1
+                        break
+
+    if (entry := rollup_dict.get((instance.test_id, instance.branch))) is not None:
+        entry.pass_count += pass_count
+        entry.fail_count += fail_count
+        entry.skip_count += skip_count
+        entry.flaky_fail_count += flaky_fail_count
+        entry.sum_duration_seconds += duration_seconds
+        entry.last_duration_seconds = duration_seconds
+        entry.latest_run = created_at
+        if commitid:
+            entry.commits_where_fail.add(commitid)
+    else:
+        rollup_dict[(instance.test_id, instance.branch)] = RollupObj(
+            pass_count,
+            fail_count,
+            skip_count,
+            flaky_fail_count,
+            duration_seconds,
+            duration_seconds,
+            created_at,
+            set(),
+        )
+        if commitid:
+            rollup_dict[(instance.test_id, instance.branch)].commits_where_fail.add(
+                commitid
+            )
+
+
+def save_rollups(rollup_dict, repoid, date):
+    rollups_to_create = []
+    for obj_key, obj in rollup_dict.items():
+        rollup = DailyTestRollup(
+            repoid=repoid,
+            date=date,
+            test_id=obj_key[0],
+            branch=obj_key[1],
+            pass_count=obj.pass_count,
+            fail_count=obj.fail_count,
+            skip_count=obj.skip_count,
+            flaky_fail_count=obj.flaky_fail_count,
+            commits_where_fail=list(obj.commits_where_fail),
+            latest_run=obj.latest_run,
+            last_duration_seconds=obj.last_duration_seconds,
+            avg_duration_seconds=obj.sum_duration_seconds
+            / (obj.pass_count + obj.fail_count),
+        )
+
+        rollups_to_create.append(rollup)
+
+    DailyTestRollup.objects.bulk_create(rollups_to_create, 1000)
+
+
+def backfill_test_rollups(
+    start_repoid: int | None = None,
+    start_date: str | None = None,  # default is 2024-07-16
+    end_date: str | None = None,  # default is 2024-09-17
+) -> dict[str, bool]:
+    log.info(
+        "Updating test instances",
+        extra=dict(start_repoid=start_repoid, start_date=start_date, end_date=end_date),
+    )
+    test_analytics_repos = get_test_analytics_repos(start_repoid)
+
+    chunk_size = 10000
+
+    log.info(
+        "Starting backfill for repos",
+        extra=dict(repos=[repo.repoid for repo in test_analytics_repos]),
+    )
+
+    for repo in test_analytics_repos:
+        repoid = repo.repoid
+        log.info("Starting backfill for repo", extra=dict(repoid=repoid))
+
+        curr_date = date.fromisoformat(start_date) if start_date else date(2024, 7, 16)
+        until_date = date.fromisoformat(end_date) if end_date else date(2024, 9, 17)
+
+        # delete all existing rollups for this day
+        DailyTestRollup.objects.filter(
+            repoid=repoid, date__gte=curr_date, date__lte=until_date
+        ).delete()
+        django_transaction.commit()
+        log.info("Deleted rollups for repo", extra=dict(repoid=repoid))
+
+        # get flakes
+        flake_list = list(Flake.objects.filter(repository_id=repoid))
+
+        flake_dict: dict[str, list[tuple[datetime, datetime | None]]] = defaultdict(
+            list
+        )
+        for flake in flake_list:
+            flake_dict[flake.test_id].append((flake.start_date, flake.end_date))
+
+        while curr_date <= until_date:
+            log.info(
+                "Starting backfill for repo on date",
+                extra=dict(repoid=repoid, date=curr_date),
+            )
+
+            rollup_dict: dict[tuple[str, str], RollupObj] = {}
+
+            test_instances = TestInstance.objects.filter(
+                repoid=repoid, created_at__date=curr_date
+            ).order_by("created_at")
+
+            num_test_instances = test_instances.count()
+            if num_test_instances == 0:
+                curr_date += timedelta(days=1)
+                continue
+
+            chunks = [
+                test_instances[i : i + chunk_size]
+                for i in range(0, num_test_instances, chunk_size)
+            ]
+
+            for chunk in chunks:
+                for instance in chunk:
+                    if instance.branch is None or instance.commitid is None:
+                        continue
+
+                    process_instance(rollup_dict, flake_dict, instance)
+
+            save_rollups(rollup_dict, repoid, curr_date)
+            django_transaction.commit()
+            log.info(
+                "Committed repo for day",
+                extra=dict(repoid=repoid, date=curr_date),
+            )
+            curr_date += timedelta(days=1)
+
+        log.info("Finished backfill for repo", extra=dict(repoid=repoid))
+
+    return {"successful": True}
diff --git a/one_off_scripts/backfill_test_flag_bridges.py b/one_off_scripts/backfill_test_flag_bridges.py
new file mode 100644
index 000000000..e40cb46fd
--- /dev/null
+++ b/one_off_scripts/backfill_test_flag_bridges.py
@@ -0,0 +1,54 @@
+import logging
+
+from django.db import transaction as django_transaction
+from shared.django_apps.core.models import Repository
+from shared.django_apps.reports.models import (
+    RepositoryFlag,
+    Test,
+    TestFlagBridge,
+    TestInstance,
+)
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger()
+
+
+def backfill_test_flag_bridges(repoid=None):
+    log.info("Backfilling TestFlagBridge objects", extra=dict(repoid=repoid))
+    repos = Repository.objects.filter(test_analytics_enabled=True)
+    if repoid is not None:
+        repos = repos.filter(repoid=repoid)
+
+    for repo in repos:
+        tests = Test.objects.filter(repository_id=repo.repoid)
+
+        flags = {
+            flag.flag_name: flag
+            for flag in RepositoryFlag.objects.filter(repository=repo)
+        }
+
+        bridges_to_create = []
+        for test in tests:
+            TestFlagBridge.objects.filter(test=test).delete()
+
+            first_test_instance = (
+                TestInstance.objects.filter(test_id=test.id)
+                .select_related("upload")
+                .first()
+            )
+
+            if first_test_instance is None:
+                continue
+
+            flag_names = first_test_instance.upload.flag_names
+
+            for flag_name in flag_names:
+                new_bridge = TestFlagBridge(test=test, flag=flags[flag_name])
+                bridges_to_create.append(new_bridge)
+
+        TestFlagBridge.objects.bulk_create(bridges_to_create, 1000)
+        log.info(
+            "Done creating flag bridges for repo",
+            extra=dict(repoid=repoid, num_tests=len(tests)),
+        )
+        django_transaction.commit()
diff --git a/one_off_scripts/tests/test_backfill_daily_test_rollups.py b/one_off_scripts/tests/test_backfill_daily_test_rollups.py
new file mode 100644
index 000000000..fa4e4f024
--- /dev/null
+++ b/one_off_scripts/tests/test_backfill_daily_test_rollups.py
@@ -0,0 +1,208 @@
+import datetime as dt
+
+import pytest
+import time_machine
+from shared.django_apps.core.tests.factories import RepositoryFactory
+from shared.django_apps.reports.models import DailyTestRollup
+from shared.django_apps.reports.tests.factories import (
+    DailyTestRollupFactory,
+    FlakeFactory,
+    TestFactory,
+    TestInstanceFactory,
+)
+
+from one_off_scripts.backfill_daily_test_rollups import backfill_test_rollups
+
+
+@pytest.fixture
+def setup_tests(transactional_db):
+    repo_1 = RepositoryFactory(test_analytics_enabled=True)
+
+    test_1 = TestFactory(repository=repo_1)
+
+    _ = FlakeFactory(
+        test=test_1,
+        repository=repo_1,
+        start_date=dt.datetime.fromisoformat("1970-01-02T00:00:00Z"),
+        end_date=dt.datetime.fromisoformat("1970-01-04T00:00:00Z"),
+    )
+
+    _ = FlakeFactory(
+        test=test_1,
+        repository=repo_1,
+        start_date=dt.datetime.fromisoformat("1970-01-04T12:00:00Z"),
+        end_date=None,
+    )
+
+    traveller = time_machine.travel("1970-01-01T00:00:00Z", tick=False)
+    traveller.start()
+    ti = TestInstanceFactory(test=test_1, duration_seconds=10.0)
+    traveller.stop()
+
+    traveller = time_machine.travel("1970-01-03T00:00:00Z", tick=False)
+    traveller.start()
+    ti = TestInstanceFactory(test=test_1, duration_seconds=10.0)
+    traveller.stop()
+
+    traveller = time_machine.travel("1970-01-05T00:00:00Z", tick=False)
+    traveller.start()
+    ti = TestInstanceFactory(
+        test=test_1,
+        duration_seconds=10000.0,
+    )
+    traveller.stop()
+
+    _ = DailyTestRollupFactory(
+        test=test_1,
+        date=dt.date.fromisoformat("1970-01-03"),
+        fail_count=10,
+        pass_count=5,
+        last_duration_seconds=10.0,
+        avg_duration_seconds=1.0,
+        latest_run=dt.datetime.fromisoformat("1970-01-01T00:00:00Z"),
+    )
+
+    _ = DailyTestRollupFactory(
+        test=test_1,
+        date=dt.date.fromisoformat("1970-01-05"),
+        fail_count=10,
+        pass_count=5,
+        last_duration_seconds=10.0,
+        avg_duration_seconds=1.0,
+        latest_run=dt.datetime.fromisoformat("1970-01-01T00:00:00Z"),
+    )
+
+    _ = RepositoryFactory(test_analytics_enabled=False)
+
+    return repo_1
+
+
+@pytest.mark.django_db(transaction=True)
+def test_backfill_test_rollups(setup_tests):
+    rollups = DailyTestRollup.objects.all()
+    assert [
+        {
+            "date": r.date,
+            "pass_count": r.pass_count,
+            "skip_count": r.skip_count,
+            "fail_count": r.fail_count,
+            "flaky_fail_count": r.flaky_fail_count,
+            "last_duration_seconds": r.last_duration_seconds,
+            "avg_duration_seconds": r.avg_duration_seconds,
+            "latest_run": r.latest_run,
+            "commits_where_fail": r.commits_where_fail,
+        }
+        for r in rollups
+    ] == [
+        {
+            "avg_duration_seconds": 1.0,
+            "commits_where_fail": [],
+            "date": dt.date(1970, 1, 3),
+            "fail_count": 10,
+            "flaky_fail_count": 0,
+            "last_duration_seconds": 10.0,
+            "latest_run": dt.datetime.fromisoformat("1970-01-01T00:00:00Z"),
+            "pass_count": 5,
+            "skip_count": 0,
+        },
+        {
+            "avg_duration_seconds": 1.0,
+            "commits_where_fail": [],
+            "date": dt.date(1970, 1, 5),
+            "fail_count": 10,
+            "flaky_fail_count": 0,
+            "last_duration_seconds": 10.0,
+            "latest_run": dt.datetime.fromisoformat("1970-01-01T00:00:00Z"),
+            "pass_count": 5,
+            "skip_count": 0,
+        },
+    ]
+
+    backfill_test_rollups(
+        start_date="1970-01-02",
+        end_date="1970-01-04",
+    )
+
+    rollups = DailyTestRollup.objects.all()
+
+    assert [
+        {
+            "date": r.date,
+            "pass_count": r.pass_count,
+            "skip_count": r.skip_count,
+            "fail_count": r.fail_count,
+            "flaky_fail_count": r.flaky_fail_count,
+            "last_duration_seconds": r.last_duration_seconds,
+            "avg_duration_seconds": r.avg_duration_seconds,
+            "latest_run": r.latest_run,
+        }
+        for r in rollups
+    ] == [
+        {
+            "avg_duration_seconds": 1.0,
+            "date": dt.date(1970, 1, 5),
+            "fail_count": 10,
+            "flaky_fail_count": 0,
+            "last_duration_seconds": 10.0,
+            "latest_run": dt.datetime.fromisoformat("1970-01-01T00:00:00Z"),
+            "pass_count": 5,
+            "skip_count": 0,
+        },
+        {
+            "avg_duration_seconds": 10.0,
+            "date": dt.date(1970, 1, 3),
+            "fail_count": 1,
+            "flaky_fail_count": 1,
+            "last_duration_seconds": 10.0,
+            "latest_run": dt.datetime.fromisoformat("1970-01-03T00:00:00Z"),
+            "pass_count": 0,
+            "skip_count": 0,
+        },
+    ]
+
+    assert len(rollups[1].commits_where_fail) == 1
+
+    backfill_test_rollups(
+        start_date="1970-01-02",
+        end_date="1970-01-05",
+    )
+
+    rollups = DailyTestRollup.objects.all()
+
+    assert [
+        {
+            "date": r.date,
+            "pass_count": r.pass_count,
+            "skip_count": r.skip_count,
+            "fail_count": r.fail_count,
+            "flaky_fail_count": r.flaky_fail_count,
+            "last_duration_seconds": r.last_duration_seconds,
+            "avg_duration_seconds": r.avg_duration_seconds,
+            "latest_run": r.latest_run,
+        }
+        for r in rollups
+    ] == [
+        {
+            "avg_duration_seconds": 10.0,
+            "date": dt.date(1970, 1, 3),
+            "fail_count": 1,
+            "flaky_fail_count": 1,
+            "last_duration_seconds": 10.0,
+            "latest_run": dt.datetime.fromisoformat("1970-01-03T00:00:00Z"),
+            "pass_count": 0,
+            "skip_count": 0,
+        },
+        {
+            "avg_duration_seconds": 10000.0,
+            "date": dt.date(1970, 1, 5),
+            "fail_count": 1,
+            "flaky_fail_count": 1,
+            "last_duration_seconds": 10000.0,
+            "latest_run": dt.datetime.fromisoformat("1970-01-05T00:00:00Z"),
+            "pass_count": 0,
+            "skip_count": 0,
+        },
+    ]
+
+    for r in rollups:
+        assert len(r.commits_where_fail) == 1
diff --git a/one_off_scripts/tests/test_backfill_test_flag_bridges.py b/one_off_scripts/tests/test_backfill_test_flag_bridges.py
new file mode 100644
index 000000000..c98948eb4
--- /dev/null
+++ b/one_off_scripts/tests/test_backfill_test_flag_bridges.py
@@ -0,0 +1,58 @@
+import pytest
+from shared.django_apps.core.tests.factories import RepositoryFactory
+from shared.django_apps.reports.models import TestFlagBridge
+from shared.django_apps.reports.tests.factories import (
+    RepositoryFlagFactory,
+    TestFactory,
+    TestInstanceFactory,
+    UploadFactory,
+)
+
+from one_off_scripts.backfill_test_flag_bridges import backfill_test_flag_bridges
+
+
+@pytest.fixture
+def setup_tests(transactional_db):
+    repo = RepositoryFactory(test_analytics_enabled=True)
+
+    flag_1 = RepositoryFlagFactory(repository=repo, flag_name="first")
+    flag_2 = RepositoryFlagFactory(repository=repo, flag_name="second")
+    flag_3 = RepositoryFlagFactory(repository=repo, flag_name="third")
+
+    test_1 = TestFactory(repository_id=repo.repoid)
+    upload_1 = UploadFactory()
+    upload_1.flags.set([flag_1, flag_2])
+    test_instance_1 = TestInstanceFactory(test=test_1, upload=upload_1)
+
+    test_2 = TestFactory(repository_id=repo.repoid)
+    upload_2 = UploadFactory()
+    upload_2.flags.set([flag_3])
+    test_instance_2 = TestInstanceFactory(test=test_2, upload=upload_2)
+
+    test_3 = TestFactory(repository_id=repo.repoid)
+
+
+@pytest.mark.django_db(transaction=True)
+def test_it_backfills_test_flag_bridges(setup_tests):
+    bridges = TestFlagBridge.objects.all()
+    assert len(bridges) == 0
+
+    backfill_test_flag_bridges()
+
+    bridges = TestFlagBridge.objects.all()
+    assert len(bridges) == 3
+
+    assert [(b.test.name, b.flag.flag_name) for b in bridges] == [
+        (
+            "test_1",
+            "first",
+        ),
+        (
+            "test_1",
+            "second",
+        ),
+        (
+            "test_2",
+            "third",
+        ),
+    ]
diff --git a/requirements.in b/requirements.in
index 361fc8715..c2a3c99b5 100644
--- a/requirements.in
+++ b/requirements.in
@@ -1,5 +1,5 @@
 https://github.com/codecov/opentelem-python/archive/refs/tags/v0.0.4a1.tar.gz#egg=codecovopentelem
-https://github.com/codecov/shared/archive/106b0ae2b9a2870899fa3903fc6da0a9ba67eef2.tar.gz#egg=shared
+https://github.com/codecov/shared/archive/36791fe3c18a0dbdf7296ffbdffbf2137fa9fc06.tar.gz#egg=shared
 https://github.com/codecov/test-results-parser/archive/1507de2241601d678e514c08b38426e48bb6d47d.tar.gz#egg=test-results-parser
 https://github.com/codecov/timestring/archive/d37ceacc5954dff3b5bd2f887936a98a668dda42.tar.gz#egg=timestring
 asgiref>=3.7.2
diff --git a/requirements.txt b/requirements.txt
index b7dd741ea..5b1417982 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
-#    pip-compile
+#    pip-compile requirements.in
 #
 amqp==5.2.0
     # via kombu
@@ -357,7 +357,7 @@ sentry-sdk[celery]==2.13.0
     # via
     #   -r requirements.in
     #   shared
-shared @ https://github.com/codecov/shared/archive/106b0ae2b9a2870899fa3903fc6da0a9ba67eef2.tar.gz
+shared @ https://github.com/codecov/shared/archive/36791fe3c18a0dbdf7296ffbdffbf2137fa9fc06.tar.gz
     # via -r requirements.in
 six==1.16.0
     # via