From 30513b6f5fc17a5713171b162116d9ac64574d5f Mon Sep 17 00:00:00 2001 From: jtimpe <111305129+jtimpe@users.noreply.github.com> Date: Thu, 3 Oct 2024 09:52:42 -0400 Subject: [PATCH] 2990 - stuck files notification (#3195) * stuck file templates * send stuck file notif * improve table readability * handle edge cases * lint * only send if files stuck * convert to celery task * fix test * fix test * unused import * rm stuck-file-single * insert admin link * task efficiency * datetime -> timezone * clean up tests * lint * fix test * lint * Update tdrs-backend/tdpservice/email/helpers/data_file.py Co-authored-by: Alex P. <63075587+ADPennington@users.noreply.github.com> --------- Co-authored-by: Alex P. <63075587+ADPennington@users.noreply.github.com> --- tdrs-backend/tdpservice/data_files/models.py | 5 + tdrs-backend/tdpservice/data_files/tasks.py | 48 ++++ .../data_files/test/test_stuck_files.py | 252 ++++++++++++++++++ tdrs-backend/tdpservice/email/email_enums.py | 1 + .../tdpservice/email/helpers/data_file.py | 29 ++ .../email/templates/stuck-file-list.html | 40 +++ .../tdpservice/parsers/test/factories.py | 18 ++ .../search_indexes/test/test_reparse.py | 6 +- tdrs-backend/tdpservice/settings/common.py | 4 + 9 files changed, 401 insertions(+), 2 deletions(-) create mode 100644 tdrs-backend/tdpservice/data_files/tasks.py create mode 100644 tdrs-backend/tdpservice/data_files/test/test_stuck_files.py create mode 100644 tdrs-backend/tdpservice/email/templates/stuck-file-list.html diff --git a/tdrs-backend/tdpservice/data_files/models.py b/tdrs-backend/tdpservice/data_files/models.py index c00541419..6fe5355e0 100644 --- a/tdrs-backend/tdpservice/data_files/models.py +++ b/tdrs-backend/tdpservice/data_files/models.py @@ -5,6 +5,7 @@ from io import StringIO from typing import Union +from django.conf import settings from django.contrib.admin.models import ADDITION, ContentType, LogEntry from django.core.files.base import File from django.db import models @@ -206,6 +207,10 @@ def submitted_by(self): """Return the author as a string for this data file.""" return self.user.get_full_name() + def admin_link(self): + """Return a link to the admin console for this file.""" + return f"{settings.FRONTEND_BASE_URL}/admin/data_files/datafile/?id={self.pk}" + @classmethod def create_new_version(self, data): """Create a new version of a data file with an incremented version.""" diff --git a/tdrs-backend/tdpservice/data_files/tasks.py b/tdrs-backend/tdpservice/data_files/tasks.py new file mode 100644 index 000000000..16e35de79 --- /dev/null +++ b/tdrs-backend/tdpservice/data_files/tasks.py @@ -0,0 +1,48 @@ +"""Celery shared tasks for use in scheduled jobs.""" + +from celery import shared_task +from datetime import timedelta +from django.utils import timezone +from django.contrib.auth.models import Group +from django.db.models import Q, Count +from tdpservice.users.models import AccountApprovalStatusChoices, User +from tdpservice.data_files.models import DataFile +from tdpservice.parsers.models import DataFileSummary +from tdpservice.email.helpers.data_file import send_stuck_file_email + + +def get_stuck_files(): + """Return a queryset containing files in a 'stuck' state.""" + stuck_files = DataFile.objects.annotate(reparse_count=Count('reparse_meta_models')).filter( + # non-reparse submissions over an hour old + Q( + reparse_count=0, + created_at__lte=timezone.now() - timedelta(hours=1), + ) | # OR + # reparse submissions past the timeout, where the reparse did not complete + Q( + reparse_count__gt=0, + reparse_meta_models__timeout_at__lte=timezone.now(), + reparse_meta_models__finished=False, + reparse_meta_models__success=False + ) + ).filter( + # where there is NO summary or the summary is in PENDING status + Q(summary=None) | Q(summary__status=DataFileSummary.Status.PENDING) + ) + + return stuck_files + + +@shared_task +def notify_stuck_files(): + """Find files stuck in 'Pending' and notify SysAdmins.""" + stuck_files = get_stuck_files() + + if stuck_files.count() > 0: + recipients = User.objects.filter( + account_approval_status=AccountApprovalStatusChoices.APPROVED, + groups=Group.objects.get(name='OFA System Admin') + ).values_list('username', flat=True).distinct() + + send_stuck_file_email(stuck_files, recipients) diff --git a/tdrs-backend/tdpservice/data_files/test/test_stuck_files.py b/tdrs-backend/tdpservice/data_files/test/test_stuck_files.py new file mode 100644 index 000000000..95f4f8f3a --- /dev/null +++ b/tdrs-backend/tdpservice/data_files/test/test_stuck_files.py @@ -0,0 +1,252 @@ +"""Test the get_stuck_files function.""" + + +import pytest +from datetime import timedelta +from django.utils import timezone +from tdpservice.data_files.models import DataFile +from tdpservice.parsers.models import DataFileSummary +from tdpservice.data_files.tasks import get_stuck_files +from tdpservice.parsers.test.factories import ParsingFileFactory, DataFileSummaryFactory, ReparseMetaFactory + + +def _time_ago(hours=0, minutes=0, seconds=0): + return timezone.now() - timedelta(hours=hours, minutes=minutes, seconds=seconds) + + +def make_datafile(stt_user, stt, version): + """Create a test data file with default params.""" + datafile = ParsingFileFactory.create( + quarter=DataFile.Quarter.Q1, section=DataFile.Section.ACTIVE_CASE_DATA, + year=2023, version=version, user=stt_user, stt=stt + ) + return datafile + + +def make_summary(datafile, status): + """Create a test data file summary given a file and status.""" + return DataFileSummaryFactory.create( + datafile=datafile, + status=status, + ) + + +def make_reparse_meta(finished, success): + """Create a test reparse meta model.""" + return ReparseMetaFactory.create( + timeout_at=_time_ago(hours=1), + finished=finished, + success=success + ) + + +@pytest.mark.django_db +def test_find_pending_submissions__none_stuck(stt_user, stt): + """Finds no stuck files.""" + # an accepted standard submission, more than an hour old + df1 = make_datafile(stt_user, stt, 1) + df1.created_at = _time_ago(hours=2) + df1.save() + make_summary(df1, DataFileSummary.Status.ACCEPTED) + + # an accepted reparse submission, past the timeout + df2 = make_datafile(stt_user, stt, 2) + df2.created_at = _time_ago(hours=1) + df2.save() + make_summary(df2, DataFileSummary.Status.ACCEPTED) + rpm = make_reparse_meta(True, True) + df2.reparse_meta_models.add(rpm) + + # a pending standard submission, less than an hour old + df3 = make_datafile(stt_user, stt, 3) + df3.created_at = _time_ago(minutes=40) + df3.save() + make_summary(df3, DataFileSummary.Status.PENDING) + + stuck_files = get_stuck_files() + assert stuck_files.count() == 0 + + +@pytest.mark.django_db +def test_find_pending_submissions__non_reparse_stuck(stt_user, stt): + """Finds standard upload/submission stuck in Pending.""" + # a pending standard submission, more than an hour old + df1 = make_datafile(stt_user, stt, 1) + df1.created_at = _time_ago(hours=2) + df1.save() + make_summary(df1, DataFileSummary.Status.PENDING) + + # an accepted reparse submission, past the timeout + df2 = make_datafile(stt_user, stt, 2) + df2.created_at = _time_ago(hours=1) + df2.save() + make_summary(df2, DataFileSummary.Status.ACCEPTED) + rpm = make_reparse_meta(True, True) + df2.reparse_meta_models.add(rpm) + + stuck_files = get_stuck_files() + assert stuck_files.count() == 1 + assert stuck_files.first().pk == df1.pk + + +@pytest.mark.django_db +def test_find_pending_submissions__non_reparse_stuck__no_dfs(stt_user, stt): + """Finds standard upload/submission stuck in Pending.""" + # a standard submission with no summary + df1 = make_datafile(stt_user, stt, 1) + df1.created_at = _time_ago(hours=2) + df1.save() + + # an accepted reparse submission, past the timeout + df2 = make_datafile(stt_user, stt, 2) + df2.created_at = _time_ago(hours=1) + df2.save() + make_summary(df2, DataFileSummary.Status.ACCEPTED) + rpm = make_reparse_meta(True, True) + df2.reparse_meta_models.add(rpm) + + stuck_files = get_stuck_files() + assert stuck_files.count() == 1 + assert stuck_files.first().pk == df1.pk + + +@pytest.mark.django_db +def test_find_pending_submissions__reparse_stuck(stt_user, stt): + """Finds a reparse submission stuck in pending, past the timeout.""" + # an accepted standard submission, more than an hour old + df1 = make_datafile(stt_user, stt, 1) + df1.created_at = _time_ago(hours=2) + df1.save() + make_summary(df1, DataFileSummary.Status.ACCEPTED) + + # a pending reparse submission, past the timeout + df2 = make_datafile(stt_user, stt, 2) + df2.created_at = _time_ago(hours=1) + df2.save() + make_summary(df2, DataFileSummary.Status.PENDING) + rpm = make_reparse_meta(False, False) + df2.reparse_meta_models.add(rpm) + + stuck_files = get_stuck_files() + assert stuck_files.count() == 1 + assert stuck_files.first().pk == df2.pk + + +@pytest.mark.django_db +def test_find_pending_submissions__reparse_stuck__no_dfs(stt_user, stt): + """Finds a reparse submission stuck in pending, past the timeout.""" + # an accepted standard submission, more than an hour old + df1 = make_datafile(stt_user, stt, 1) + df1.created_at = _time_ago(hours=2) + df1.save() + make_summary(df1, DataFileSummary.Status.ACCEPTED) + + # a reparse submission with no summary, past the timeout + df2 = make_datafile(stt_user, stt, 2) + df2.created_at = _time_ago(hours=1) + df2.save() + rpm = make_reparse_meta(False, False) + df2.reparse_meta_models.add(rpm) + + stuck_files = get_stuck_files() + assert stuck_files.count() == 1 + assert stuck_files.first().pk == df2.pk + + +@pytest.mark.django_db +def test_find_pending_submissions__reparse_and_non_reparse_stuck(stt_user, stt): + """Finds stuck submissions, both reparse and standard parse.""" + # a pending standard submission, more than an hour old + df1 = make_datafile(stt_user, stt, 1) + df1.created_at = _time_ago(hours=2) + df1.save() + make_summary(df1, DataFileSummary.Status.PENDING) + + # a pending reparse submission, past the timeout + df2 = make_datafile(stt_user, stt, 2) + df2.created_at = _time_ago(hours=1) + df2.save() + make_summary(df2, DataFileSummary.Status.PENDING) + rpm = make_reparse_meta(False, False) + df2.reparse_meta_models.add(rpm) + + stuck_files = get_stuck_files() + assert stuck_files.count() == 2 + for f in stuck_files: + assert f.pk in (df1.pk, df2.pk) + + +@pytest.mark.django_db +def test_find_pending_submissions__reparse_and_non_reparse_stuck_no_dfs(stt_user, stt): + """Finds stuck submissions, both reparse and standard parse.""" + # a pending standard submission, more than an hour old + df1 = make_datafile(stt_user, stt, 1) + df1.created_at = _time_ago(hours=2) + df1.save() + + # a pending reparse submission, past the timeout + df2 = make_datafile(stt_user, stt, 2) + df2.created_at = _time_ago(hours=1) + df2.save() + rpm = make_reparse_meta(False, False) + df2.reparse_meta_models.add(rpm) + + stuck_files = get_stuck_files() + assert stuck_files.count() == 2 + for f in stuck_files: + assert f.pk in (df1.pk, df2.pk) + + +@pytest.mark.django_db +def test_find_pending_submissions__old_reparse_stuck__new_not_stuck(stt_user, stt): + """Finds no stuck files, as the new parse is successful.""" + # a pending standard submission, more than an hour old + df1 = make_datafile(stt_user, stt, 1) + df1.created_at = _time_ago(hours=2) + df1.save() + dfs1 = make_summary(df1, DataFileSummary.Status.PENDING) + + # reparse fails the first time + rpm1 = make_reparse_meta(False, False) + df1.reparse_meta_models.add(rpm1) + + stuck_files = get_stuck_files() + assert stuck_files.count() == 1 + + # reparse again, succeeds this time + dfs1.delete() # reparse deletes the original dfs and creates the new one + make_summary(df1, DataFileSummary.Status.ACCEPTED) + + rpm2 = make_reparse_meta(True, True) + df1.reparse_meta_models.add(rpm2) + + stuck_files = get_stuck_files() + assert stuck_files.count() == 0 + + +@pytest.mark.django_db +def test_find_pending_submissions__new_reparse_stuck__old_not_stuck(stt_user, stt): + """Finds files stuck from the new reparse, even though the old one was successful.""" + # file rejected on first upload + df1 = make_datafile(stt_user, stt, 1) + df1.created_at = _time_ago(hours=2) + df1.save() + dfs1 = make_summary(df1, DataFileSummary.Status.REJECTED) + + # reparse succeeds + rpm1 = make_reparse_meta(True, True) + df1.reparse_meta_models.add(rpm1) + + # reparse again, fails this time + dfs1.delete() # reparse deletes the original dfs and creates the new one + DataFileSummary.objects.create( + datafile=df1, + status=DataFileSummary.Status.PENDING, + ) + + rpm2 = make_reparse_meta(False, False) + df1.reparse_meta_models.add(rpm2) + + stuck_files = get_stuck_files() + assert stuck_files.count() == 1 + assert stuck_files.first().pk == df1.pk diff --git a/tdrs-backend/tdpservice/email/email_enums.py b/tdrs-backend/tdpservice/email/email_enums.py index 4527b6016..82e15e66d 100644 --- a/tdrs-backend/tdpservice/email/email_enums.py +++ b/tdrs-backend/tdpservice/email/email_enums.py @@ -15,3 +15,4 @@ class EmailType(Enum): ACCOUNT_DEACTIVATED = 'account-deactivated.html' ACCOUNT_DEACTIVATED_ADMIN = 'account-deactivated-admin.html' UPCOMING_SUBMISSION_DEADLINE = 'upcoming-submission-deadline.html' + STUCK_FILE_LIST = 'stuck-file-list.html' diff --git a/tdrs-backend/tdpservice/email/helpers/data_file.py b/tdrs-backend/tdpservice/email/helpers/data_file.py index 1ed966a87..3b9112b54 100644 --- a/tdrs-backend/tdpservice/email/helpers/data_file.py +++ b/tdrs-backend/tdpservice/email/helpers/data_file.py @@ -1,5 +1,6 @@ """Helper functions for sending data file submission emails.""" from django.conf import settings +from tdpservice.users.models import User from tdpservice.email.email_enums import EmailType from tdpservice.email.email import automated_email, log from tdpservice.parsers.util import get_prog_from_section @@ -69,3 +70,31 @@ def send_data_submitted_email( text_message=text_message, logger_context=logger_context ) + + +def send_stuck_file_email(stuck_files, recipients): + """Send an email to sys admins with details of files stuck in Pending.""" + logger_context = { + 'user_id': User.objects.get_or_create(username='system')[0].pk + } + + template_path = EmailType.STUCK_FILE_LIST.value + subject = 'List of submitted files with pending status after 1 hour' + text_message = 'The system has detected stuck files.' + + context = { + "subject": subject, + "url": settings.FRONTEND_BASE_URL, + "files": stuck_files, + } + + log(f'Emailing stuck files to SysAdmins: {list(recipients)}', logger_context=logger_context) + + automated_email( + email_path=template_path, + recipient_email=recipients, + subject=subject, + email_context=context, + text_message=text_message, + logger_context=logger_context + ) diff --git a/tdrs-backend/tdpservice/email/templates/stuck-file-list.html b/tdrs-backend/tdpservice/email/templates/stuck-file-list.html new file mode 100644 index 000000000..bfe5055a2 --- /dev/null +++ b/tdrs-backend/tdpservice/email/templates/stuck-file-list.html @@ -0,0 +1,40 @@ +{% extends 'base.html' %} +{% block content %} + + + + +

+

+ +

Hello,

+ +

The system has detected stuck data submissions.

+ + + + + + + + + + + + + {% for file in files %} + + + + + + + + {% endfor %} + +
SttSectionFiscal yearSubmitted onFile
{{ file.stt }}{{ file.section }}{{ file.fiscal_year }}{{ file.created_at }} {{ file.created_time_ago }} + View in Admin Console +
+ +{% endblock %} \ No newline at end of file diff --git a/tdrs-backend/tdpservice/parsers/test/factories.py b/tdrs-backend/tdpservice/parsers/test/factories.py index 5b558ef3f..c0f50e85b 100644 --- a/tdrs-backend/tdpservice/parsers/test/factories.py +++ b/tdrs-backend/tdpservice/parsers/test/factories.py @@ -1,11 +1,29 @@ """Factories for generating test data for parsers.""" import factory +from django.utils import timezone from tdpservice.parsers.models import DataFileSummary, ParserErrorCategoryChoices from faker import Faker from tdpservice.data_files.test.factories import DataFileFactory from tdpservice.users.test.factories import UserFactory from tdpservice.stts.test.factories import STTFactory + +class ReparseMetaFactory(factory.django.DjangoModelFactory): + """Generate test reparse meta model.""" + + class Meta: + """Hardcoded meta data for factory.""" + + model = "search_indexes.ReparseMeta" + + timeout_at = timezone.now() + finished = False + success = False + num_files_to_reparse = 1 + files_completed = 1 + files_failed = 0 + + class ParsingFileFactory(factory.django.DjangoModelFactory): """Generate test data for data files.""" diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py index 360988224..2c8647cea 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -273,8 +273,10 @@ def test_reparse_sequential(log_context): meta = ReparseMeta.objects.create(timeout_at=None) assert False is cmd._assert_sequential_execution(log_context) timeout_entry = LogEntry.objects.latest('pk') - assert timeout_entry.change_message == ("The latest ReparseMeta model's (ID: 1) timeout_at field is None. Cannot " - "safely execute reparse, please fix manually.") + assert timeout_entry.change_message == ( + f"The latest ReparseMeta model's (ID: {meta.pk}) timeout_at field is None. Cannot " + "safely execute reparse, please fix manually." + ) meta.timeout_at = timezone.now() + timedelta(seconds=100) meta.save() diff --git a/tdrs-backend/tdpservice/settings/common.py b/tdrs-backend/tdpservice/settings/common.py index cd7b5274b..ba936b545 100644 --- a/tdrs-backend/tdpservice/settings/common.py +++ b/tdrs-backend/tdpservice/settings/common.py @@ -499,6 +499,10 @@ class Common(Configuration): 'task': 'tdpservice.email.tasks.email_admin_num_access_requests', 'schedule': crontab(minute='0', hour='1', day_of_week='*', day_of_month='*', month_of_year='*'), # Every day at 1am UTC (9pm EST) }, + 'Email Admin Number of Stuck Files' : { + 'task': 'tdpservice.data_files.tasks.notify_stuck_files', + 'schedule': crontab(minute='0', hour='1', day_of_week='*', day_of_month='*', month_of_year='*'), # Every day at 1am UTC (9pm EST) + }, 'Email Data Analyst Q1 Upcoming Submission Deadline Reminder': { 'task': 'tdpservice.email.tasks.send_data_submission_reminder', # Feb 9 at 1pm UTC (9am EST)