Skip to content

Commit

Permalink
feat: add backfill one off scripts (#734)
Browse files Browse the repository at this point in the history
  • Loading branch information
joseph-sentry authored Oct 1, 2024
1 parent 64598f2 commit d4d1fa4
Show file tree
Hide file tree
Showing 8 changed files with 543 additions and 3 deletions.
13 changes: 13 additions & 0 deletions one_off_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import os

import django

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "django_scaffold.settings")
django.setup()

if __name__ == "__main__":
from one_off_scripts.backfill_daily_test_rollups import run_impl
from one_off_scripts.backfill_test_flag_bridges import backfill_test_flag_bridges

run_impl()
backfill_test_flag_bridges()
6 changes: 6 additions & 0 deletions one_off_scripts/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import os

import django

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "django_scaffold.settings")
django.setup()
201 changes: 201 additions & 0 deletions one_off_scripts/backfill_daily_test_rollups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
import logging
from collections import defaultdict
from dataclasses import dataclass, field
from datetime import date, datetime, timedelta

from django.db import transaction as django_transaction
from shared.django_apps.core.models import Repository
from shared.django_apps.reports.models import DailyTestRollup, Flake, TestInstance
from test_results_parser import Outcome

logging.basicConfig(level=logging.INFO)
log = logging.getLogger()


@dataclass
class RollupObj:
pass_count: int
fail_count: int
skip_count: int
flaky_fail_count: int

sum_duration_seconds: float
last_duration_seconds: float

latest_run: datetime

commits_where_fail: set[str] = field(default_factory=set)


def get_test_analytics_repos(start_repoid):
# get all repos that have test_analytics_enabled == True
test_analytics_repos = Repository.objects.filter(
test_analytics_enabled=True
).order_by("repoid")

if start_repoid is not None:
test_analytics_repos = test_analytics_repos.filter(repoid__gte=start_repoid)

return test_analytics_repos


def process_instance(
rollup_dict: dict[tuple[str, str], RollupObj],
flake_dict: dict[str, list[tuple[datetime, datetime | None]]],
instance: TestInstance,
):
pass_count = 0
fail_count = 0
skip_count = 0
flaky_fail_count = 0
duration_seconds = instance.duration_seconds
created_at = instance.created_at
commitid = instance.commitid

match instance.outcome:
case Outcome.Pass:
pass_count = 1
case Outcome.Skip:
skip_count = 1
case _:
fail_count = 1
if (flaky_range_list := flake_dict.get(instance.test_id)) is not None:
for range in flaky_range_list:
if range[0] <= instance.created_at and (
range[1] is None or instance.created_at < range[1]
):
flaky_fail_count += 1
break

if (entry := rollup_dict.get((instance.test_id, instance.branch))) is not None:
entry.pass_count += pass_count
entry.fail_count += fail_count
entry.skip_count += skip_count
entry.flaky_fail_count += flaky_fail_count
entry.sum_duration_seconds += duration_seconds
entry.last_duration_seconds = duration_seconds
entry.latest_run = created_at
if commitid:
entry.commits_where_fail.add(commitid)
else:
rollup_dict[(instance.test_id, instance.branch)] = RollupObj(
pass_count,
fail_count,
skip_count,
flaky_fail_count,
duration_seconds,
duration_seconds,
created_at,
set(),
)
if commitid:
rollup_dict[(instance.test_id, instance.branch)].commits_where_fail.add(
commitid
)


def save_rollups(rollup_dict, repoid, date):
rollups_to_create = []
for obj_key, obj in rollup_dict.items():
rollup = DailyTestRollup(
repoid=repoid,
date=date,
test_id=obj_key[0],
branch=obj_key[1],
pass_count=obj.pass_count,
fail_count=obj.fail_count,
skip_count=obj.skip_count,
flaky_fail_count=obj.flaky_fail_count,
commits_where_fail=list(obj.commits_where_fail),
latest_run=obj.latest_run,
last_duration_seconds=obj.last_duration_seconds,
avg_duration_seconds=obj.sum_duration_seconds
/ (obj.pass_count + obj.fail_count),
)

rollups_to_create.append(rollup)

DailyTestRollup.objects.bulk_create(rollups_to_create, 1000)


def backfill_test_rollups(
start_repoid: int | None = None,
start_date: str | None = None, # default is 2024-07-16
end_date: str | None = None, # default is 2024-09-17
) -> dict[str, bool]:
log.info(
"Updating test instances",
extra=dict(start_repoid=start_repoid, start_date=start_date, end_date=end_date),
)
test_analytics_repos = get_test_analytics_repos(start_repoid)

chunk_size = 10000

log.info(
"Starting backfill for repos",
extra=dict(repos=[repo.repoid for repo in test_analytics_repos]),
)

for repo in test_analytics_repos:
repoid = repo.repoid
log.info("Starting backfill for repo", extra=dict(repoid=repoid))

curr_date = date.fromisoformat(start_date) if start_date else date(2024, 7, 16)
until_date = date.fromisoformat(end_date) if end_date else date(2024, 9, 17)

# delete all existing rollups for this day
DailyTestRollup.objects.filter(
repoid=repoid, date__gte=curr_date, date__lte=until_date
).delete()
django_transaction.commit()
log.info("Deleted rollups for repo", extra=dict(repoid=repoid))

# get flakes
flake_list = list(Flake.objects.filter(repository_id=repoid))

flake_dict: dict[str, list[tuple[datetime, datetime | None]]] = defaultdict(
list
)
for flake in flake_list:
flake_dict[flake.test_id].append((flake.start_date, flake.end_date))

while curr_date <= until_date:
log.info(
"Starting backfill for repo on date",
extra=dict(repoid=repoid, date=curr_date),
)

rollup_dict: dict[tuple[str, str], RollupObj] = {}

test_instances = TestInstance.objects.filter(
repoid=repoid, created_at__date=curr_date
).order_by("created_at")

num_test_instances = test_instances.count()
if num_test_instances == 0:
curr_date += timedelta(days=1)
continue

chunks = [
test_instances[i : i + chunk_size]
for i in range(0, num_test_instances, chunk_size)
]

for chunk in chunks:
for instance in chunk:
if instance.branch is None or instance.commitid is None:
continue

process_instance(rollup_dict, flake_dict, instance)

save_rollups(rollup_dict, repoid, curr_date)
django_transaction.commit()
log.info(
"Committed repo for day",
extra=dict(repoid=repoid, date=curr_date),
)
curr_date += timedelta(days=1)

log.info("Finished backfill for repo", extra=dict(repoid=repoid))

return {"successful": True}
54 changes: 54 additions & 0 deletions one_off_scripts/backfill_test_flag_bridges.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import logging

from django.db import transaction as django_transaction
from shared.django_apps.core.models import Repository
from shared.django_apps.reports.models import (
RepositoryFlag,
Test,
TestFlagBridge,
TestInstance,
)

logging.basicConfig(level=logging.INFO)
log = logging.getLogger()


def backfill_test_flag_bridges(repoid=None):
log.info("Backfilling TestFlagBridge objects", extra=dict(repoid=repoid))
repos = Repository.objects.filter(test_analytics_enabled=True)
if repoid is not None:
repos = repos.filter(repoid=repoid)

for repo in repos:
tests = Test.objects.filter(repository_id=repo.repoid)

flags = {
flag.flag_name: flag
for flag in RepositoryFlag.objects.filter(repository=repo)
}

bridges_to_create = []
for test in tests:
TestFlagBridge.objects.filter(test=test).delete()

first_test_instance = (
TestInstance.objects.filter(test_id=test.id)
.select_related("upload")
.first()
)

if first_test_instance is None:
continue

flag_names = first_test_instance.upload.flag_names

for flag_name in flag_names:
new_bridge = TestFlagBridge(test=test, flag=flags[flag_name])
bridges_to_create.append(new_bridge)

TestFlagBridge.objects.bulk_create(bridges_to_create, 1000)
log.info(
"Done creating flag bridges for repo",
extra=dict(repoid=repoid, num_tests=len(tests)),
)
django_transaction.commit()
Loading

0 comments on commit d4d1fa4

Please sign in to comment.