Skip to content

Commit

Permalink
2990 - stuck files notification (#3195)
Browse files Browse the repository at this point in the history
* stuck file templates

* send stuck file notif

* improve table readability

* handle edge cases

* lint

* only send if files stuck

* convert to celery task

* fix test

* fix test

* unused import

* rm stuck-file-single

* insert admin link

* task efficiency

* datetime -> timezone

* clean up tests

* lint

* fix test

* lint

* Update tdrs-backend/tdpservice/email/helpers/data_file.py

Co-authored-by: Alex P.  <[email protected]>

---------

Co-authored-by: Alex P. <[email protected]>
  • Loading branch information
jtimpe and ADPennington authored Oct 3, 2024
1 parent fab247d commit 30513b6
Show file tree
Hide file tree
Showing 9 changed files with 401 additions and 2 deletions.
5 changes: 5 additions & 0 deletions tdrs-backend/tdpservice/data_files/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from io import StringIO
from typing import Union

from django.conf import settings
from django.contrib.admin.models import ADDITION, ContentType, LogEntry
from django.core.files.base import File
from django.db import models
Expand Down Expand Up @@ -206,6 +207,10 @@ def submitted_by(self):
"""Return the author as a string for this data file."""
return self.user.get_full_name()

def admin_link(self):
"""Return a link to the admin console for this file."""
return f"{settings.FRONTEND_BASE_URL}/admin/data_files/datafile/?id={self.pk}"

@classmethod
def create_new_version(self, data):
"""Create a new version of a data file with an incremented version."""
Expand Down
48 changes: 48 additions & 0 deletions tdrs-backend/tdpservice/data_files/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Celery shared tasks for use in scheduled jobs."""

from celery import shared_task
from datetime import timedelta
from django.utils import timezone
from django.contrib.auth.models import Group
from django.db.models import Q, Count
from tdpservice.users.models import AccountApprovalStatusChoices, User
from tdpservice.data_files.models import DataFile
from tdpservice.parsers.models import DataFileSummary
from tdpservice.email.helpers.data_file import send_stuck_file_email


def get_stuck_files():
"""Return a queryset containing files in a 'stuck' state."""
stuck_files = DataFile.objects.annotate(reparse_count=Count('reparse_meta_models')).filter(
# non-reparse submissions over an hour old
Q(
reparse_count=0,
created_at__lte=timezone.now() - timedelta(hours=1),
) | # OR
# reparse submissions past the timeout, where the reparse did not complete
Q(
reparse_count__gt=0,
reparse_meta_models__timeout_at__lte=timezone.now(),
reparse_meta_models__finished=False,
reparse_meta_models__success=False
)
).filter(
# where there is NO summary or the summary is in PENDING status
Q(summary=None) | Q(summary__status=DataFileSummary.Status.PENDING)
)

return stuck_files


@shared_task
def notify_stuck_files():
"""Find files stuck in 'Pending' and notify SysAdmins."""
stuck_files = get_stuck_files()

if stuck_files.count() > 0:
recipients = User.objects.filter(
account_approval_status=AccountApprovalStatusChoices.APPROVED,
groups=Group.objects.get(name='OFA System Admin')
).values_list('username', flat=True).distinct()

send_stuck_file_email(stuck_files, recipients)
252 changes: 252 additions & 0 deletions tdrs-backend/tdpservice/data_files/test/test_stuck_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
"""Test the get_stuck_files function."""


import pytest
from datetime import timedelta
from django.utils import timezone
from tdpservice.data_files.models import DataFile
from tdpservice.parsers.models import DataFileSummary
from tdpservice.data_files.tasks import get_stuck_files
from tdpservice.parsers.test.factories import ParsingFileFactory, DataFileSummaryFactory, ReparseMetaFactory


def _time_ago(hours=0, minutes=0, seconds=0):
return timezone.now() - timedelta(hours=hours, minutes=minutes, seconds=seconds)


def make_datafile(stt_user, stt, version):
"""Create a test data file with default params."""
datafile = ParsingFileFactory.create(
quarter=DataFile.Quarter.Q1, section=DataFile.Section.ACTIVE_CASE_DATA,
year=2023, version=version, user=stt_user, stt=stt
)
return datafile


def make_summary(datafile, status):
"""Create a test data file summary given a file and status."""
return DataFileSummaryFactory.create(
datafile=datafile,
status=status,
)


def make_reparse_meta(finished, success):
"""Create a test reparse meta model."""
return ReparseMetaFactory.create(
timeout_at=_time_ago(hours=1),
finished=finished,
success=success
)


@pytest.mark.django_db
def test_find_pending_submissions__none_stuck(stt_user, stt):
"""Finds no stuck files."""
# an accepted standard submission, more than an hour old
df1 = make_datafile(stt_user, stt, 1)
df1.created_at = _time_ago(hours=2)
df1.save()
make_summary(df1, DataFileSummary.Status.ACCEPTED)

# an accepted reparse submission, past the timeout
df2 = make_datafile(stt_user, stt, 2)
df2.created_at = _time_ago(hours=1)
df2.save()
make_summary(df2, DataFileSummary.Status.ACCEPTED)
rpm = make_reparse_meta(True, True)
df2.reparse_meta_models.add(rpm)

# a pending standard submission, less than an hour old
df3 = make_datafile(stt_user, stt, 3)
df3.created_at = _time_ago(minutes=40)
df3.save()
make_summary(df3, DataFileSummary.Status.PENDING)

stuck_files = get_stuck_files()
assert stuck_files.count() == 0


@pytest.mark.django_db
def test_find_pending_submissions__non_reparse_stuck(stt_user, stt):
"""Finds standard upload/submission stuck in Pending."""
# a pending standard submission, more than an hour old
df1 = make_datafile(stt_user, stt, 1)
df1.created_at = _time_ago(hours=2)
df1.save()
make_summary(df1, DataFileSummary.Status.PENDING)

# an accepted reparse submission, past the timeout
df2 = make_datafile(stt_user, stt, 2)
df2.created_at = _time_ago(hours=1)
df2.save()
make_summary(df2, DataFileSummary.Status.ACCEPTED)
rpm = make_reparse_meta(True, True)
df2.reparse_meta_models.add(rpm)

stuck_files = get_stuck_files()
assert stuck_files.count() == 1
assert stuck_files.first().pk == df1.pk


@pytest.mark.django_db
def test_find_pending_submissions__non_reparse_stuck__no_dfs(stt_user, stt):
"""Finds standard upload/submission stuck in Pending."""
# a standard submission with no summary
df1 = make_datafile(stt_user, stt, 1)
df1.created_at = _time_ago(hours=2)
df1.save()

# an accepted reparse submission, past the timeout
df2 = make_datafile(stt_user, stt, 2)
df2.created_at = _time_ago(hours=1)
df2.save()
make_summary(df2, DataFileSummary.Status.ACCEPTED)
rpm = make_reparse_meta(True, True)
df2.reparse_meta_models.add(rpm)

stuck_files = get_stuck_files()
assert stuck_files.count() == 1
assert stuck_files.first().pk == df1.pk


@pytest.mark.django_db
def test_find_pending_submissions__reparse_stuck(stt_user, stt):
"""Finds a reparse submission stuck in pending, past the timeout."""
# an accepted standard submission, more than an hour old
df1 = make_datafile(stt_user, stt, 1)
df1.created_at = _time_ago(hours=2)
df1.save()
make_summary(df1, DataFileSummary.Status.ACCEPTED)

# a pending reparse submission, past the timeout
df2 = make_datafile(stt_user, stt, 2)
df2.created_at = _time_ago(hours=1)
df2.save()
make_summary(df2, DataFileSummary.Status.PENDING)
rpm = make_reparse_meta(False, False)
df2.reparse_meta_models.add(rpm)

stuck_files = get_stuck_files()
assert stuck_files.count() == 1
assert stuck_files.first().pk == df2.pk


@pytest.mark.django_db
def test_find_pending_submissions__reparse_stuck__no_dfs(stt_user, stt):
"""Finds a reparse submission stuck in pending, past the timeout."""
# an accepted standard submission, more than an hour old
df1 = make_datafile(stt_user, stt, 1)
df1.created_at = _time_ago(hours=2)
df1.save()
make_summary(df1, DataFileSummary.Status.ACCEPTED)

# a reparse submission with no summary, past the timeout
df2 = make_datafile(stt_user, stt, 2)
df2.created_at = _time_ago(hours=1)
df2.save()
rpm = make_reparse_meta(False, False)
df2.reparse_meta_models.add(rpm)

stuck_files = get_stuck_files()
assert stuck_files.count() == 1
assert stuck_files.first().pk == df2.pk


@pytest.mark.django_db
def test_find_pending_submissions__reparse_and_non_reparse_stuck(stt_user, stt):
"""Finds stuck submissions, both reparse and standard parse."""
# a pending standard submission, more than an hour old
df1 = make_datafile(stt_user, stt, 1)
df1.created_at = _time_ago(hours=2)
df1.save()
make_summary(df1, DataFileSummary.Status.PENDING)

# a pending reparse submission, past the timeout
df2 = make_datafile(stt_user, stt, 2)
df2.created_at = _time_ago(hours=1)
df2.save()
make_summary(df2, DataFileSummary.Status.PENDING)
rpm = make_reparse_meta(False, False)
df2.reparse_meta_models.add(rpm)

stuck_files = get_stuck_files()
assert stuck_files.count() == 2
for f in stuck_files:
assert f.pk in (df1.pk, df2.pk)


@pytest.mark.django_db
def test_find_pending_submissions__reparse_and_non_reparse_stuck_no_dfs(stt_user, stt):
"""Finds stuck submissions, both reparse and standard parse."""
# a pending standard submission, more than an hour old
df1 = make_datafile(stt_user, stt, 1)
df1.created_at = _time_ago(hours=2)
df1.save()

# a pending reparse submission, past the timeout
df2 = make_datafile(stt_user, stt, 2)
df2.created_at = _time_ago(hours=1)
df2.save()
rpm = make_reparse_meta(False, False)
df2.reparse_meta_models.add(rpm)

stuck_files = get_stuck_files()
assert stuck_files.count() == 2
for f in stuck_files:
assert f.pk in (df1.pk, df2.pk)


@pytest.mark.django_db
def test_find_pending_submissions__old_reparse_stuck__new_not_stuck(stt_user, stt):
"""Finds no stuck files, as the new parse is successful."""
# a pending standard submission, more than an hour old
df1 = make_datafile(stt_user, stt, 1)
df1.created_at = _time_ago(hours=2)
df1.save()
dfs1 = make_summary(df1, DataFileSummary.Status.PENDING)

# reparse fails the first time
rpm1 = make_reparse_meta(False, False)
df1.reparse_meta_models.add(rpm1)

stuck_files = get_stuck_files()
assert stuck_files.count() == 1

# reparse again, succeeds this time
dfs1.delete() # reparse deletes the original dfs and creates the new one
make_summary(df1, DataFileSummary.Status.ACCEPTED)

rpm2 = make_reparse_meta(True, True)
df1.reparse_meta_models.add(rpm2)

stuck_files = get_stuck_files()
assert stuck_files.count() == 0


@pytest.mark.django_db
def test_find_pending_submissions__new_reparse_stuck__old_not_stuck(stt_user, stt):
"""Finds files stuck from the new reparse, even though the old one was successful."""
# file rejected on first upload
df1 = make_datafile(stt_user, stt, 1)
df1.created_at = _time_ago(hours=2)
df1.save()
dfs1 = make_summary(df1, DataFileSummary.Status.REJECTED)

# reparse succeeds
rpm1 = make_reparse_meta(True, True)
df1.reparse_meta_models.add(rpm1)

# reparse again, fails this time
dfs1.delete() # reparse deletes the original dfs and creates the new one
DataFileSummary.objects.create(
datafile=df1,
status=DataFileSummary.Status.PENDING,
)

rpm2 = make_reparse_meta(False, False)
df1.reparse_meta_models.add(rpm2)

stuck_files = get_stuck_files()
assert stuck_files.count() == 1
assert stuck_files.first().pk == df1.pk
1 change: 1 addition & 0 deletions tdrs-backend/tdpservice/email/email_enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ class EmailType(Enum):
ACCOUNT_DEACTIVATED = 'account-deactivated.html'
ACCOUNT_DEACTIVATED_ADMIN = 'account-deactivated-admin.html'
UPCOMING_SUBMISSION_DEADLINE = 'upcoming-submission-deadline.html'
STUCK_FILE_LIST = 'stuck-file-list.html'
29 changes: 29 additions & 0 deletions tdrs-backend/tdpservice/email/helpers/data_file.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Helper functions for sending data file submission emails."""
from django.conf import settings
from tdpservice.users.models import User
from tdpservice.email.email_enums import EmailType
from tdpservice.email.email import automated_email, log
from tdpservice.parsers.util import get_prog_from_section
Expand Down Expand Up @@ -69,3 +70,31 @@ def send_data_submitted_email(
text_message=text_message,
logger_context=logger_context
)


def send_stuck_file_email(stuck_files, recipients):
"""Send an email to sys admins with details of files stuck in Pending."""
logger_context = {
'user_id': User.objects.get_or_create(username='system')[0].pk
}

template_path = EmailType.STUCK_FILE_LIST.value
subject = 'List of submitted files with pending status after 1 hour'
text_message = 'The system has detected stuck files.'

context = {
"subject": subject,
"url": settings.FRONTEND_BASE_URL,
"files": stuck_files,
}

log(f'Emailing stuck files to SysAdmins: {list(recipients)}', logger_context=logger_context)

automated_email(
email_path=template_path,
recipient_email=recipients,
subject=subject,
email_context=context,
text_message=text_message,
logger_context=logger_context
)
Loading

0 comments on commit 30513b6

Please sign in to comment.