From cc536b54b5e3dc45473f75247e283e185cda7b54 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Wed, 4 Sep 2024 13:19:05 -0400 Subject: [PATCH 01/39] - Updated method to batch delete records in elastic to avoid pulling whole queryset into memory --- .../search_indexes/management/commands/clean_and_reparse.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py index a3b746a66..28b0157a0 100644 --- a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py @@ -2,6 +2,7 @@ from django.core.management.base import BaseCommand from django.core.management import call_command +from django.core.paginator import Paginator from django.db.utils import DatabaseError from elasticsearch.exceptions import ElasticsearchException from tdpservice.data_files.models import DataFile @@ -106,7 +107,9 @@ def __delete_records(self, file_ids, new_indices, log_context): total_deleted += qset.count() if not new_indices: # If we aren't creating new indices, then we don't want duplicate data in the existing indices. - doc().update(qset, refresh=True, action='delete') + paginator = Paginator(qset, settings.BULK_CREATE_BATCH_SIZE) + for page in paginator: + doc().update(page.object_list, action='delete') qset._raw_delete(qset.db) except ElasticsearchException as e: log(f'Elastic document delete failed for type {model}. The database and Elastic are INCONSISTENT! ' From a27086ca753ae9afb7a13b8e3ae8025c7856d12e Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Wed, 4 Sep 2024 13:54:29 -0400 Subject: [PATCH 02/39] - Instantiate document once --- .../search_indexes/management/commands/clean_and_reparse.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py index 28b0157a0..7fe0f67f5 100644 --- a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py @@ -108,8 +108,9 @@ def __delete_records(self, file_ids, new_indices, log_context): if not new_indices: # If we aren't creating new indices, then we don't want duplicate data in the existing indices. paginator = Paginator(qset, settings.BULK_CREATE_BATCH_SIZE) + document = doc() for page in paginator: - doc().update(page.object_list, action='delete') + document.update(page.object_list, action='delete') qset._raw_delete(qset.db) except ElasticsearchException as e: log(f'Elastic document delete failed for type {model}. The database and Elastic are INCONSISTENT! ' From 6f2105f5a96f9237ea0f240d7afc7bde0a3350a6 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Thu, 5 Sep 2024 10:22:49 -0400 Subject: [PATCH 03/39] - Updated clean and reparse to not load entire queryset into memory - Added extra console logging - changed log level on frequent exception --- .../tdpservice/parsers/validators/category3.py | 2 +- .../management/commands/clean_and_reparse.py | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tdrs-backend/tdpservice/parsers/validators/category3.py b/tdrs-backend/tdpservice/parsers/validators/category3.py index cb278e5e2..89f9547c8 100644 --- a/tdrs-backend/tdpservice/parsers/validators/category3.py +++ b/tdrs-backend/tdpservice/parsers/validators/category3.py @@ -386,7 +386,7 @@ def validate(record, row_schema): "Caught exception in validator: validate__WORK_ELIGIBLE_INDICATOR__HOH__AGE. " + f"With field values: {vals}." ) - logger.error(f'Exception: {e}') + logger.debug(f'Exception: {e}') # Per conversation with Alex on 03/26/2024, returning the true case during exception handling to avoid # confusing the STTs. return true_case diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py index 7fe0f67f5..81cccafa4 100644 --- a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py @@ -62,6 +62,7 @@ def __handle_elastic(self, new_indices, log_context): """Create new Elastic indices and delete old ones.""" if new_indices: try: + logger.info("Creating new elastic indexes.") call_command('tdp_search_index', '--create', '-f', '--use-alias') log("Index creation complete.", logger_context=log_context, @@ -83,7 +84,10 @@ def __delete_summaries(self, file_ids, log_context): """Raw delete all DataFileSummary objects.""" try: qset = DataFileSummary.objects.filter(datafile_id__in=file_ids) + count = qset.count() + logger.info(f"Deleting {count} datafile summary objects.") qset._raw_delete(qset.db) + logger.info("Successfully deleted datafile summary objects.") except DatabaseError as e: log('Encountered a DatabaseError while deleting DataFileSummary from Postgres. The database ' 'and Elastic are INCONSISTENT! Restore the DB from the backup as soon as possible!', @@ -103,8 +107,10 @@ def __delete_records(self, file_ids, new_indices, log_context): for doc in DOCUMENTS: try: model = doc.Django.model - qset = model.objects.filter(datafile_id__in=file_ids) - total_deleted += qset.count() + qset = model.objects.filter(datafile_id__in=file_ids).order_by('id') + count = qset.count() + total_deleted += count + logger.info(f"Deleting {count} records of type: {model}.") if not new_indices: # If we aren't creating new indices, then we don't want duplicate data in the existing indices. paginator = Paginator(qset, settings.BULK_CREATE_BATCH_SIZE) @@ -136,7 +142,10 @@ def __delete_errors(self, file_ids, log_context): """Raw delete all ParserErrors for each file ID.""" try: qset = ParserError.objects.filter(file_id__in=file_ids) + count = qset.count() + logger.info(f"Deleting {count} parser errors.") qset._raw_delete(qset.db) + logger.info("Successfully deleted parser errors.") except DatabaseError as e: log('Encountered a DatabaseError while deleting ParserErrors from Postgres. The database ' 'and Elastic are INCONSISTENT! Restore the DB from the backup as soon as possible!', From a8093e555ab33c8d5ede5b7077ef9eff6c07cc62 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Tue, 10 Sep 2024 15:25:00 -0400 Subject: [PATCH 04/39] - Update names of functions - Add tests --- .../management/commands/clean_and_reparse.py | 50 ++--- .../search_indexes/test/test_reparse.py | 174 ++++++++++++++++++ 2 files changed, 199 insertions(+), 25 deletions(-) create mode 100644 tdrs-backend/tdpservice/search_indexes/test/test_reparse.py diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py index 81cccafa4..e10f472e3 100644 --- a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py @@ -34,7 +34,7 @@ def add_arguments(self, parser): parser.add_argument("-a", "--all", action='store_true', help="Clean and reparse all datafiles. If selected, " "fiscal_year/quarter aren't necessary.") - def __get_log_context(self, system_user): + def _get_log_context(self, system_user): """Return logger context.""" context = {'user_id': system_user.id, 'action_flag': ADDITION, @@ -42,7 +42,7 @@ def __get_log_context(self, system_user): } return context - def __backup(self, backup_file_name, log_context): + def _backup(self, backup_file_name, log_context): """Execute Postgres DB backup.""" try: logger.info("Beginning reparse DB Backup.") @@ -58,7 +58,7 @@ def __backup(self, backup_file_name, log_context): level='error') raise e - def __handle_elastic(self, new_indices, log_context): + def _handle_elastic(self, new_indices, log_context): """Create new Elastic indices and delete old ones.""" if new_indices: try: @@ -74,13 +74,13 @@ def __handle_elastic(self, new_indices, log_context): level='error') raise e except Exception as e: - log("Caught generic exception in __handle_elastic. Clean and reparse NOT executed. " + log("Caught generic exception in _handle_elastic. Clean and reparse NOT executed. " "Database is CONSISTENT, Elastic is INCONSISTENT!", logger_context=log_context, level='error') raise e - def __delete_summaries(self, file_ids, log_context): + def _delete_summaries(self, file_ids, log_context): """Raw delete all DataFileSummary objects.""" try: qset = DataFileSummary.objects.filter(datafile_id__in=file_ids) @@ -101,7 +101,7 @@ def __delete_summaries(self, file_ids, log_context): level='critical') raise e - def __delete_records(self, file_ids, new_indices, log_context): + def _delete_records(self, file_ids, new_indices, log_context): """Delete records, errors, and documents from Postgres and Elastic.""" total_deleted = 0 for doc in DOCUMENTS: @@ -138,7 +138,7 @@ def __delete_records(self, file_ids, new_indices, log_context): raise e return total_deleted - def __delete_errors(self, file_ids, log_context): + def _delete_errors(self, file_ids, log_context): """Raw delete all ParserErrors for each file ID.""" try: qset = ParserError.objects.filter(file_id__in=file_ids) @@ -159,14 +159,14 @@ def __delete_errors(self, file_ids, log_context): level='critical') raise e - def __delete_associated_models(self, meta_model, file_ids, new_indices, log_context): + def _delete_associated_models(self, meta_model, file_ids, new_indices, log_context): """Delete all models associated to the selected datafiles.""" - self.__delete_summaries(file_ids, log_context) - self.__delete_errors(file_ids, log_context) - num_deleted = self.__delete_records(file_ids, new_indices, log_context) + self._delete_summaries(file_ids, log_context) + self._delete_errors(file_ids, log_context) + num_deleted = self._delete_records(file_ids, new_indices, log_context) meta_model.num_records_deleted = num_deleted - def __handle_datafiles(self, files, meta_model, log_context): + def _handle_datafiles(self, files, meta_model, log_context): """Delete, re-save, and reparse selected datafiles.""" for file in files: try: @@ -180,13 +180,13 @@ def __handle_datafiles(self, files, meta_model, log_context): level='critical') raise e except Exception as e: - log('Caught generic exception in __handle_datafiles. Database and Elastic are INCONSISTENT! ' + log('Caught generic exception in _handle_datafiles. Database and Elastic are INCONSISTENT! ' 'Restore the DB from the backup as soon as possible!', logger_context=log_context, level='critical') raise e - def __count_total_num_records(self, log_context): + def _count_total_num_records(self, log_context): """Count total number of records in the database for meta object.""" try: return count_all_records() @@ -203,7 +203,7 @@ def __count_total_num_records(self, log_context): level='error') exit(1) - def __assert_sequential_execution(self, log_context): + def _assert_sequential_execution(self, log_context): """Assert that no other reparse commands are still executing.""" latest_meta_model = ReparseMeta.get_latest() now = timezone.now() @@ -226,7 +226,7 @@ def __assert_sequential_execution(self, log_context): logger_context=log_context, level='warn') - def __calculate_timeout(self, num_files, num_records): + def _calculate_timeout(self, num_files, num_records): """Estimate a timeout parameter based on the number of files and the number of records.""" # Increase by an order of magnitude to have the bases covered. line_parse_time = settings.MEDIAN_LINE_PARSE_TIME * 10 @@ -291,7 +291,7 @@ def handle(self, *args, **options): system_user, created = User.objects.get_or_create(username='system') if created: logger.debug('Created reserved system user.') - log_context = self.__get_log_context(system_user) + log_context = self._get_log_context(system_user) all_fy = "All" all_q = "Q1-4" @@ -307,7 +307,7 @@ def handle(self, *args, **options): level='warn') return - self.__assert_sequential_execution(log_context) + self._assert_sequential_execution(log_context) meta_model = ReparseMeta.objects.create(fiscal_quarter=fiscal_quarter, fiscal_year=fiscal_year, all=reparse_all, @@ -317,29 +317,29 @@ def handle(self, *args, **options): # Backup the Postgres DB backup_file_name += f"_rpv{meta_model.pk}.pg" - self.__backup(backup_file_name, log_context) + self._backup(backup_file_name, log_context) meta_model.db_backup_location = backup_file_name meta_model.save() # Create and delete Elastic indices if necessary - self.__handle_elastic(new_indices, log_context) + self._handle_elastic(new_indices, log_context) # Delete records from Postgres and Elastic if necessary file_ids = files.values_list('id', flat=True).distinct() - meta_model.total_num_records_initial = self.__count_total_num_records(log_context) + meta_model.total_num_records_initial = self._count_total_num_records(log_context) meta_model.save() - self.__delete_associated_models(meta_model, file_ids, new_indices, log_context) + self._delete_associated_models(meta_model, file_ids, new_indices, log_context) - meta_model.timeout_at = meta_model.created_at + self.__calculate_timeout(num_files, - meta_model.num_records_deleted) + meta_model.timeout_at = meta_model.created_at + self._calculate_timeout(num_files, + meta_model.num_records_deleted) meta_model.save() logger.info(f"Deleted a total of {meta_model.num_records_deleted} records accross {num_files} files.") # Delete and re-save datafiles to handle cascading dependencies logger.info(f'Deleting and re-parsing {num_files} files') - self.__handle_datafiles(files, meta_model, log_context) + self._handle_datafiles(files, meta_model, log_context) log("Database cleansing complete and all files have been re-scheduling for parsing and validation.", logger_context=log_context, diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py new file mode 100644 index 000000000..daec36291 --- /dev/null +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -0,0 +1,174 @@ +"""Test cases for reparse functions.""" + +import pytest +from tdpservice.parsers import util, parse +from tdpservice.parsers.test.factories import DataFileSummaryFactory +from tdpservice.search_indexes.management.commands import clean_and_reparse +from tdpservice.search_indexes.models.reparse_meta import ReparseMeta +from tdpservice.users.models import User + +from django.contrib.admin.models import ADDITION + +import os +import time + +@pytest.fixture +def cat4_edge_case_file(stt_user, stt): + """Fixture for cat_4_edge_case.txt.""" + cat4_edge_case_file = util.create_test_datafile('cat_4_edge_case.txt', stt_user, stt) + cat4_edge_case_file.year = 2024 + cat4_edge_case_file.quarter = 'Q1' + cat4_edge_case_file.save() + return cat4_edge_case_file + +@pytest.fixture +def big_file(stt_user, stt): + """Fixture for ADS.E2J.FTP1.TS06.""" + return util.create_test_datafile('ADS.E2J.FTP1.TS06', stt_user, stt) + +@pytest.fixture +def small_ssp_section1_datafile(stt_user, stt): + """Fixture for small_ssp_section1.""" + small_ssp_section1_datafile = util.create_test_datafile('small_ssp_section1.txt', stt_user, + stt, 'SSP Active Case Data') + small_ssp_section1_datafile.year = 2024 + small_ssp_section1_datafile.quarter = 'Q1' + small_ssp_section1_datafile.save() + return small_ssp_section1_datafile + +@pytest.fixture +def tribal_section_1_file(stt_user, stt): + """Fixture for ADS.E2J.FTP4.TS06.""" + tribal_section_1_file = util.create_test_datafile('ADS.E2J.FTP1.TS142', stt_user, stt, "Tribal Active Case Data") + tribal_section_1_file.year = 2022 + tribal_section_1_file.quarter = 'Q1' + tribal_section_1_file.save() + return tribal_section_1_file + +@pytest.fixture +def dfs(): + """Fixture for DataFileSummary.""" + return DataFileSummaryFactory.build() + +@pytest.fixture +def log_context(): + """Fixture for logger context.""" + system_user, created = User.objects.get_or_create(username='system') + context = {'user_id': system_user.id, + 'action_flag': ADDITION, + 'object_repr': "Test Clean and Reparse" + } + return context + +def parse_files(summary, f1, f2, f3, f4): + """Parse all files.""" + summary.datafile = f1 + parse.parse_datafile(f1, summary) + + summary.datafile = f2 + parse.parse_datafile(f2, summary) + + summary.datafile = f3 + parse.parse_datafile(f3, summary) + + summary.datafile = f4 + parse.parse_datafile(f4, summary) + f1.save() + f2.save() + f3.save() + f4.save() + return [f1.pk, f2.pk, f3.pk, f4.pk] + +@pytest.mark.django_db +def test_count_total_num_records(log_context, dfs, cat4_edge_case_file, big_file, + small_ssp_section1_datafile, tribal_section_1_file): + """Count total number of records in DB.""" + parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) + + cmd = clean_and_reparse.Command() + assert 3104 == cmd._count_total_num_records(log_context) + cat4_edge_case_file.delete() + assert 3096 == cmd._count_total_num_records(log_context) + +@pytest.mark.django_db +def test_reparse_backup(log_context, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, + tribal_section_1_file): + """Verify a backup is created with the correct size.""" + parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) + + cmd = clean_and_reparse.Command() + file_name = "/tmp/test_reparse.pg" + cmd._backup(file_name, log_context) + time.sleep(10) + + file_size = os.path.getsize(file_name) + assert file_size > 180000 + +@pytest.mark.django_db +def test_delete_associated_models(log_context, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, + tribal_section_1_file): + """Verify all records and models are deleted.""" + ids = parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) + + cmd = clean_and_reparse.Command() + assert 3104 == cmd._count_total_num_records(log_context) + + class Fake: + pass + fake_meta = Fake() + cmd._delete_associated_models(fake_meta, ids, False, log_context) + + assert cmd._count_total_num_records(log_context) == 0 + +@pytest.mark.django_db +def test_timeout_calculation(log_context, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, + tribal_section_1_file): + """Verify calculated timeout.""" + ids = parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) + + cmd = clean_and_reparse.Command() + num_records = cmd._count_total_num_records(log_context) + + assert cmd._calculate_timeout(len(ids), num_records).seconds == 57 + + assert cmd._calculate_timeout(len(ids), 50).seconds == 40 + +@pytest.mark.django_db +def test_reparse_dunce(): + """Test reparse no args.""" + cmd = clean_and_reparse.Command() + assert None is cmd.handle() + assert ReparseMeta.objects.count() == 0 + + +################################ +# The function below doesn't work. This is because the command kicks off the parser task which tries to query the DB for +# the file to parse. But Pytest segregates the DB changes to the test (even when transactions are disbled) which leads +# the parser task to fail because it cannot query the DataFile model. I couldn't find a way around this issue. +################################ + +# @pytest.mark.django_db(transaction=False) +# def test_reparse_all(log_context, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, +# tribal_section_1_file): +# """Test reparse no args.""" +# ids = parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) +# cmd = clean_and_reparse.Command() +# print(f"\n\nPKS: {ids}\n\n") + +# opts = {'all': True, 'test': True} +# cmd.handle(**opts) +# done = False +# timeout = 0 +# while (not done or timeout == 30): +# timeout += 1 +# time.sleep(1) +# latest = ReparseMeta.objects.latest('pk') +# done = latest.finished + +# latest = ReparseMeta.objects.select_for_update().latest("pk") +# assert latest.success == True +# assert latest.num_files_to_reparse == len(ids) +# assert latest.files_completed == len(ids) +# assert latest.files_failed == 0 +# assert latest.num_records_deleted == latest.num_records_created +# assert latest.total_num_records_initial == latest.total_num_records_post From f34b014ae335cc52bc66a4a05d60b341f0388559 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Tue, 10 Sep 2024 15:53:50 -0400 Subject: [PATCH 05/39] - Update sequential function to return boolean to allow testing - Update all files done logic - Add new test for sequential execution --- .../management/commands/clean_and_reparse.py | 10 ++++-- .../search_indexes/models/reparse_meta.py | 2 +- .../search_indexes/test/test_reparse.py | 31 ++++++++++++++++++- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py index e10f472e3..fa9d9bdfd 100644 --- a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py @@ -213,18 +213,20 @@ def _assert_sequential_execution(self, log_context): "Cannot safely execute reparse, please fix manually.", logger_context=log_context, level='error') - exit(1) + return False if (is_not_none and not ReparseMeta.assert_all_files_done(latest_meta_model) and not now > latest_meta_model.timeout_at): log('A previous execution of the reparse command is RUNNING. Cannot execute in parallel, exiting.', logger_context=log_context, level='warn') - exit(1) + return False elif (is_not_none and latest_meta_model.timeout_at is not None and now > latest_meta_model.timeout_at and not ReparseMeta.assert_all_files_done(latest_meta_model)): log("Previous reparse has exceeded the timeout. Allowing execution of the command.", logger_context=log_context, level='warn') + return True + return True def _calculate_timeout(self, num_files, num_records): """Estimate a timeout parameter based on the number of files and the number of records.""" @@ -307,7 +309,9 @@ def handle(self, *args, **options): level='warn') return - self._assert_sequential_execution(log_context) + is_sequential = self._assert_sequential_execution(log_context) + if not is_sequential: + exit(1) meta_model = ReparseMeta.objects.create(fiscal_quarter=fiscal_quarter, fiscal_year=fiscal_year, all=reparse_all, diff --git a/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py b/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py index 15f659d64..3b5391de4 100644 --- a/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py +++ b/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py @@ -55,7 +55,7 @@ def assert_all_files_done(meta_model): This function assumes the meta_model has been passed in a distributed/thread safe way. If the database row containing this model has not been locked the caller will experience race issues. """ - if (meta_model.finished or meta_model.files_completed == meta_model.num_files_to_reparse or + if meta_model.finished and (meta_model.files_completed == meta_model.num_files_to_reparse or meta_model.files_completed + meta_model.files_failed == meta_model.num_files_to_reparse or meta_model.files_failed == meta_model.num_files_to_reparse): return True diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py index daec36291..39cf5af3e 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -7,8 +7,10 @@ from tdpservice.search_indexes.models.reparse_meta import ReparseMeta from tdpservice.users.models import User -from django.contrib.admin.models import ADDITION +from django.contrib.admin.models import LogEntry, ADDITION +from django.utils import timezone +from datetime import timedelta import os import time @@ -140,6 +142,33 @@ def test_reparse_dunce(): assert None is cmd.handle() assert ReparseMeta.objects.count() == 0 +@pytest.mark.django_db +def test_reparse_sequential(log_context): + """Test reparse _assert_sequential_execution.""" + cmd = clean_and_reparse.Command() + assert True == cmd._assert_sequential_execution(log_context) + + meta = ReparseMeta.objects.create(timeout_at=None) + assert False == cmd._assert_sequential_execution(log_context) + timeout_entry = LogEntry.objects.latest('pk') + assert timeout_entry.change_message == ("The latest ReparseMeta model's (ID: 1) timeout_at field is None. Cannot " + "safely execute reparse, please fix manually.") + + meta.timeout_at = timezone.now() + timedelta(seconds=100) + meta.save() + assert False == cmd._assert_sequential_execution(log_context) + not_seq_entry = LogEntry.objects.latest('pk') + assert not_seq_entry.change_message == ("A previous execution of the reparse command is RUNNING. " + "Cannot execute in parallel, exiting.") + + meta.timeout_at = timezone.now() + meta.save() + assert True == cmd._assert_sequential_execution(log_context) + timeout_entry = LogEntry.objects.latest('pk') + assert timeout_entry.change_message == ("Previous reparse has exceeded the timeout. Allowing " + "execution of the command.") + + ################################ # The function below doesn't work. This is because the command kicks off the parser task which tries to query the DB for From d96f45561d530977894cdf7193bb6271670bffd4 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Wed, 11 Sep 2024 09:02:30 -0400 Subject: [PATCH 06/39] - new indices --- tdrs-backend/tdpservice/search_indexes/test/test_reparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py index 39cf5af3e..83a828880 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -118,7 +118,7 @@ def test_delete_associated_models(log_context, dfs, cat4_edge_case_file, big_fil class Fake: pass fake_meta = Fake() - cmd._delete_associated_models(fake_meta, ids, False, log_context) + cmd._delete_associated_models(fake_meta, ids, True, log_context) assert cmd._count_total_num_records(log_context) == 0 From 5621f7f4e6777d8013fc574edca8fa46922ab0de Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Wed, 11 Sep 2024 09:28:38 -0400 Subject: [PATCH 07/39] - linting --- .../management/commands/clean_and_reparse.py | 8 ++++++-- .../tdpservice/search_indexes/models/reparse_meta.py | 5 +++-- .../tdpservice/search_indexes/test/test_reparse.py | 10 ++++------ 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py index fa9d9bdfd..689a573a5 100644 --- a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py @@ -228,6 +228,11 @@ def _assert_sequential_execution(self, log_context): return True return True + def _should_exit(self, condition): + """Exit on condition.""" + if condition: + exit(1) + def _calculate_timeout(self, num_files, num_records): """Estimate a timeout parameter based on the number of files and the number of records.""" # Increase by an order of magnitude to have the bases covered. @@ -310,8 +315,7 @@ def handle(self, *args, **options): return is_sequential = self._assert_sequential_execution(log_context) - if not is_sequential: - exit(1) + self._should_exit(not is_sequential) meta_model = ReparseMeta.objects.create(fiscal_quarter=fiscal_quarter, fiscal_year=fiscal_year, all=reparse_all, diff --git a/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py b/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py index 3b5391de4..b7ddf3ff4 100644 --- a/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py +++ b/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py @@ -56,8 +56,9 @@ def assert_all_files_done(meta_model): containing this model has not been locked the caller will experience race issues. """ if meta_model.finished and (meta_model.files_completed == meta_model.num_files_to_reparse or - meta_model.files_completed + meta_model.files_failed == meta_model.num_files_to_reparse or - meta_model.files_failed == meta_model.num_files_to_reparse): + meta_model.files_completed + meta_model.files_failed == + meta_model.num_files_to_reparse or + meta_model.files_failed == meta_model.num_files_to_reparse): return True return False diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py index 83a828880..4e9fc9732 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -146,30 +146,28 @@ def test_reparse_dunce(): def test_reparse_sequential(log_context): """Test reparse _assert_sequential_execution.""" cmd = clean_and_reparse.Command() - assert True == cmd._assert_sequential_execution(log_context) + assert True is cmd._assert_sequential_execution(log_context) meta = ReparseMeta.objects.create(timeout_at=None) - assert False == cmd._assert_sequential_execution(log_context) + assert False is cmd._assert_sequential_execution(log_context) timeout_entry = LogEntry.objects.latest('pk') assert timeout_entry.change_message == ("The latest ReparseMeta model's (ID: 1) timeout_at field is None. Cannot " "safely execute reparse, please fix manually.") meta.timeout_at = timezone.now() + timedelta(seconds=100) meta.save() - assert False == cmd._assert_sequential_execution(log_context) + assert False is cmd._assert_sequential_execution(log_context) not_seq_entry = LogEntry.objects.latest('pk') assert not_seq_entry.change_message == ("A previous execution of the reparse command is RUNNING. " "Cannot execute in parallel, exiting.") meta.timeout_at = timezone.now() meta.save() - assert True == cmd._assert_sequential_execution(log_context) + assert True is cmd._assert_sequential_execution(log_context) timeout_entry = LogEntry.objects.latest('pk') assert timeout_entry.change_message == ("Previous reparse has exceeded the timeout. Allowing " "execution of the command.") - - ################################ # The function below doesn't work. This is because the command kicks off the parser task which tries to query the DB for # the file to parse. But Pytest segregates the DB changes to the test (even when transactions are disbled) which leads From 9a03ff304485ac8c8e667884346394cb64860444 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Wed, 11 Sep 2024 10:49:08 -0400 Subject: [PATCH 08/39] - Added tests for most exception paths --- .../management/commands/clean_and_reparse.py | 3 +- .../search_indexes/test/test_reparse.py | 113 +++++++++++++++++- 2 files changed, 110 insertions(+), 6 deletions(-) diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py index 689a573a5..d6b1fe785 100644 --- a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py @@ -114,9 +114,8 @@ def _delete_records(self, file_ids, new_indices, log_context): if not new_indices: # If we aren't creating new indices, then we don't want duplicate data in the existing indices. paginator = Paginator(qset, settings.BULK_CREATE_BATCH_SIZE) - document = doc() for page in paginator: - document.update(page.object_list, action='delete') + doc().update(page.object_list, action='delete') qset._raw_delete(qset.db) except ElasticsearchException as e: log(f'Elastic document delete failed for type {model}. The database and Elastic are INCONSISTENT! ' diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py index 4e9fc9732..75ddb99f0 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -8,7 +8,9 @@ from tdpservice.users.models import User from django.contrib.admin.models import LogEntry, ADDITION +from django.db.utils import DatabaseError from django.utils import timezone +from elasticsearch.exceptions import ElasticsearchException from datetime import timedelta import os @@ -93,7 +95,7 @@ def test_count_total_num_records(log_context, dfs, cat4_edge_case_file, big_file assert 3096 == cmd._count_total_num_records(log_context) @pytest.mark.django_db -def test_reparse_backup(log_context, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, +def test_reparse_backup_succeed(log_context, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file): """Verify a backup is created with the correct size.""" parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) @@ -107,8 +109,33 @@ def test_reparse_backup(log_context, dfs, cat4_edge_case_file, big_file, small_s assert file_size > 180000 @pytest.mark.django_db -def test_delete_associated_models(log_context, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, - tribal_section_1_file): +def test_reparse_backup_fail(mocker, log_context, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, + tribal_section_1_file): + """Verify a backup is created with the correct size.""" + parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) + + mocker.patch( + 'tdpservice.search_indexes.management.commands.clean_and_reparse.Command._backup', + side_effect=Exception('Backup exception') + ) + + + cmd = clean_and_reparse.Command() + file_name = "/tmp/test_reparse.pg" + with pytest.raises(Exception): + cmd._backup(file_name, log_context) + assert os.path.exists(file_name) is False + exception_msg = LogEntry.objects.latest('pk').change_message + assert exception_msg == ("Database backup FAILED. Clean and reparse NOT executed. Database " + "and Elastic are CONSISTENT!") + +@pytest.mark.parametrize(("new_indexes"), [ + (True), + (False) +]) +@pytest.mark.django_db +def test_delete_associated_models(new_indexes, log_context, dfs, cat4_edge_case_file, big_file, + small_ssp_section1_datafile, tribal_section_1_file): """Verify all records and models are deleted.""" ids = parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) @@ -118,10 +145,88 @@ def test_delete_associated_models(log_context, dfs, cat4_edge_case_file, big_fil class Fake: pass fake_meta = Fake() - cmd._delete_associated_models(fake_meta, ids, True, log_context) + cmd._delete_associated_models(fake_meta, ids, new_indexes, log_context) assert cmd._count_total_num_records(log_context) == 0 +@pytest.mark.parametrize(("exc_msg, exception_type"), [ + (('Encountered a DatabaseError while deleting DataFileSummary from Postgres. The database ' + 'and Elastic are INCONSISTENT! Restore the DB from the backup as soon as possible!'), DatabaseError), + (('Caught generic exception while deleting DataFileSummary. The database and Elastic are INCONSISTENT! ' + 'Restore the DB from the backup as soon as possible!'), Exception) +]) +@pytest.mark.django_db +def test_delete_summaries_exceptions(mocker, log_context, exc_msg, exception_type): + """Test summary exception handling.""" + mocker.patch( + 'tdpservice.search_indexes.management.commands.clean_and_reparse.Command._delete_summaries', + side_effect=exception_type('Summary delete exception') + ) + cmd = clean_and_reparse.Command() + with pytest.raises(exception_type): + cmd._delete_summaries([], log_context) + exception_msg = LogEntry.objects.latest('pk').change_message + assert exception_msg == exc_msg + +@pytest.mark.parametrize(("exc_msg, exception_type"), [ + (('Elastic document delete failed for type {model}. The database and Elastic are INCONSISTENT! ' + 'Restore the DB from the backup as soon as possible!'), ElasticsearchException), + (('Encountered a DatabaseError while deleting records of type {model} from Postgres. The database ' + 'and Elastic are INCONSISTENT! Restore the DB from the backup as soon as possible!'), DatabaseError), + (('Caught generic exception while deleting records of type {model}. The database and Elastic are ' + 'INCONSISTENT! Restore the DB from the backup as soon as possible!'), Exception) +]) +@pytest.mark.django_db +def test_delete_records_exceptions(mocker, log_context, exc_msg, exception_type): + """Test record exception handling.""" + mocker.patch( + 'tdpservice.search_indexes.management.commands.clean_and_reparse.Command._delete_records', + side_effect=exception_type('Record delete exception') + ) + cmd = clean_and_reparse.Command() + with pytest.raises(exception_type): + cmd._delete_records([], True, log_context) + exception_msg = LogEntry.objects.latest('pk').change_message + assert exception_msg == exc_msg + +@pytest.mark.parametrize(("exc_msg, exception_type"), [ + (('Encountered a DatabaseError while deleting ParserErrors from Postgres. The database ' + 'and Elastic are INCONSISTENT! Restore the DB from the backup as soon as possible!'), DatabaseError), + (('Caught generic exception while deleting ParserErrors. The database and Elastic are INCONSISTENT! ' + 'Restore the DB from the backup as soon as possible!'), Exception) +]) +@pytest.mark.django_db +def test_delete_errors_exceptions(mocker, log_context, exc_msg, exception_type): + """Test error exception handling.""" + mocker.patch( + 'tdpservice.search_indexes.management.commands.clean_and_reparse.Command._delete_errors', + side_effect=exception_type('Error delete exception') + ) + cmd = clean_and_reparse.Command() + with pytest.raises(exception_type): + cmd._delete_errors([], log_context) + exception_msg = LogEntry.objects.latest('pk').change_message + assert exception_msg == exc_msg + +@pytest.mark.parametrize(("exc_msg, exception_type"), [ + (('Encountered a DatabaseError while re-creating datafiles. The database ' + 'and Elastic are INCONSISTENT! Restore the DB from the backup as soon as possible!'), DatabaseError), + (('Caught generic exception in _handle_datafiles. Database and Elastic are INCONSISTENT! ' + 'Restore the DB from the backup as soon as possible!'), Exception) +]) +@pytest.mark.django_db +def test_handle_files_exceptions(mocker, log_context, exc_msg, exception_type): + """Test error exception handling.""" + mocker.patch( + 'tdpservice.search_indexes.management.commands.clean_and_reparse.Command._handle_datafiles', + side_effect=exception_type('Files exception') + ) + cmd = clean_and_reparse.Command() + with pytest.raises(exception_type): + cmd._handle_datafiles([], None, log_context) + exception_msg = LogEntry.objects.latest('pk').change_message + assert exception_msg == exc_msg + @pytest.mark.django_db def test_timeout_calculation(log_context, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file): From 778c82783bb9c9f2301eb60788ef89e2c180977c Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Wed, 11 Sep 2024 10:50:54 -0400 Subject: [PATCH 09/39] -linting --- .../tdpservice/search_indexes/test/test_reparse.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py index 75ddb99f0..43d292d36 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -96,7 +96,7 @@ def test_count_total_num_records(log_context, dfs, cat4_edge_case_file, big_file @pytest.mark.django_db def test_reparse_backup_succeed(log_context, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, - tribal_section_1_file): + tribal_section_1_file): """Verify a backup is created with the correct size.""" parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) @@ -118,8 +118,6 @@ def test_reparse_backup_fail(mocker, log_context, dfs, cat4_edge_case_file, big_ 'tdpservice.search_indexes.management.commands.clean_and_reparse.Command._backup', side_effect=Exception('Backup exception') ) - - cmd = clean_and_reparse.Command() file_name = "/tmp/test_reparse.pg" with pytest.raises(Exception): @@ -127,7 +125,7 @@ def test_reparse_backup_fail(mocker, log_context, dfs, cat4_edge_case_file, big_ assert os.path.exists(file_name) is False exception_msg = LogEntry.objects.latest('pk').change_message assert exception_msg == ("Database backup FAILED. Clean and reparse NOT executed. Database " - "and Elastic are CONSISTENT!") + "and Elastic are CONSISTENT!") @pytest.mark.parametrize(("new_indexes"), [ (True), @@ -151,7 +149,7 @@ class Fake: @pytest.mark.parametrize(("exc_msg, exception_type"), [ (('Encountered a DatabaseError while deleting DataFileSummary from Postgres. The database ' - 'and Elastic are INCONSISTENT! Restore the DB from the backup as soon as possible!'), DatabaseError), + 'and Elastic are INCONSISTENT! Restore the DB from the backup as soon as possible!'), DatabaseError), (('Caught generic exception while deleting DataFileSummary. The database and Elastic are INCONSISTENT! ' 'Restore the DB from the backup as soon as possible!'), Exception) ]) From 4d2ee69ec01908db75b4b03e94682b5b444064d9 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Thu, 12 Sep 2024 11:03:33 -0400 Subject: [PATCH 10/39] - Adding refresh --- .../search_indexes/management/commands/clean_and_reparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py index d6b1fe785..a9f2c47bf 100644 --- a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py @@ -115,7 +115,7 @@ def _delete_records(self, file_ids, new_indices, log_context): # If we aren't creating new indices, then we don't want duplicate data in the existing indices. paginator = Paginator(qset, settings.BULK_CREATE_BATCH_SIZE) for page in paginator: - doc().update(page.object_list, action='delete') + doc().update(page.object_list, refresh=True, action='delete') qset._raw_delete(qset.db) except ElasticsearchException as e: log(f'Elastic document delete failed for type {model}. The database and Elastic are INCONSISTENT! ' From d8a3b445bfd29c479fff2c5f058e664bd0f0e1d3 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Thu, 12 Sep 2024 12:01:42 -0400 Subject: [PATCH 11/39] - testing not duplicating container --- .circleci/build-and-test/jobs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/build-and-test/jobs.yml b/.circleci/build-and-test/jobs.yml index a40d1568f..5982f7e41 100644 --- a/.circleci/build-and-test/jobs.yml +++ b/.circleci/build-and-test/jobs.yml @@ -8,7 +8,7 @@ name: Run Unit Tests And Create Code Coverage Report command: | cd tdrs-backend; - docker-compose run --rm web bash -c "./wait_for_services.sh && pytest --cov-report=xml" + docker-compose exec web pytest --cov-report=xml - run: name: Execute Python Linting Test command: cd tdrs-backend; docker-compose run --rm web bash -c "flake8 ." From fa77bac81e7764cbfaa5a41ac0ed7919b8dbc609 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Thu, 12 Sep 2024 12:36:32 -0400 Subject: [PATCH 12/39] - revert test change - change order --- .circleci/build-and-test/jobs.yml | 2 +- tdrs-backend/tdpservice/search_indexes/test/test_reparse.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/build-and-test/jobs.yml b/.circleci/build-and-test/jobs.yml index 5982f7e41..a40d1568f 100644 --- a/.circleci/build-and-test/jobs.yml +++ b/.circleci/build-and-test/jobs.yml @@ -8,7 +8,7 @@ name: Run Unit Tests And Create Code Coverage Report command: | cd tdrs-backend; - docker-compose exec web pytest --cov-report=xml + docker-compose run --rm web bash -c "./wait_for_services.sh && pytest --cov-report=xml" - run: name: Execute Python Linting Test command: cd tdrs-backend; docker-compose run --rm web bash -c "flake8 ." diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py index 43d292d36..d7ca7262c 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -128,8 +128,8 @@ def test_reparse_backup_fail(mocker, log_context, dfs, cat4_edge_case_file, big_ "and Elastic are CONSISTENT!") @pytest.mark.parametrize(("new_indexes"), [ - (True), - (False) + (False), + (True) ]) @pytest.mark.django_db def test_delete_associated_models(new_indexes, log_context, dfs, cat4_edge_case_file, big_file, From 01dc2c350e1c5ba71a0fd6781d597feab555a28e Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Thu, 12 Sep 2024 13:01:54 -0400 Subject: [PATCH 13/39] - remove param --- .../tdpservice/search_indexes/test/test_reparse.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py index d7ca7262c..777872bdb 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -127,12 +127,8 @@ def test_reparse_backup_fail(mocker, log_context, dfs, cat4_edge_case_file, big_ assert exception_msg == ("Database backup FAILED. Clean and reparse NOT executed. Database " "and Elastic are CONSISTENT!") -@pytest.mark.parametrize(("new_indexes"), [ - (False), - (True) -]) @pytest.mark.django_db -def test_delete_associated_models(new_indexes, log_context, dfs, cat4_edge_case_file, big_file, +def test_delete_associated_models(log_context, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file): """Verify all records and models are deleted.""" ids = parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) @@ -143,7 +139,7 @@ def test_delete_associated_models(new_indexes, log_context, dfs, cat4_edge_case_ class Fake: pass fake_meta = Fake() - cmd._delete_associated_models(fake_meta, ids, new_indexes, log_context) + cmd._delete_associated_models(fake_meta, ids, True, log_context) assert cmd._count_total_num_records(log_context) == 0 From 876260c64e57a6ca92c85c5172442cc414a886fd Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Thu, 12 Sep 2024 13:26:23 -0400 Subject: [PATCH 14/39] - Add missing tests --- .../search_indexes/test/test_reparse.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py index 777872bdb..f48120750 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -162,6 +162,25 @@ def test_delete_summaries_exceptions(mocker, log_context, exc_msg, exception_typ exception_msg = LogEntry.objects.latest('pk').change_message assert exception_msg == exc_msg +@pytest.mark.parametrize(("exc_msg, exception_type"), [ + (('Elastic index creation FAILED. Clean and reparse NOT executed. ' + 'Database is CONSISTENT, Elastic is INCONSISTENT!'), ElasticsearchException), + (('Caught generic exception in _handle_elastic. Clean and reparse NOT executed. ' + 'Database is CONSISTENT, Elastic is INCONSISTENT!'), Exception) +]) +@pytest.mark.django_db +def test_handle_elastic_exceptions(mocker, log_context, exc_msg, exception_type): + """Test summary exception handling.""" + mocker.patch( + 'tdpservice.search_indexes.management.commands.clean_and_reparse.Command._handle_elastic', + side_effect=exception_type('Summary delete exception') + ) + cmd = clean_and_reparse.Command() + with pytest.raises(exception_type): + cmd._handle_elastic([], True, log_context) + exception_msg = LogEntry.objects.latest('pk').change_message + assert exception_msg == exc_msg + @pytest.mark.parametrize(("exc_msg, exception_type"), [ (('Elastic document delete failed for type {model}. The database and Elastic are INCONSISTENT! ' 'Restore the DB from the backup as soon as possible!'), ElasticsearchException), From 76624fcd32e965b82191657cb1436282a6a47077 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Thu, 12 Sep 2024 14:58:23 -0400 Subject: [PATCH 15/39] - adding test --- .../management/commands/clean_and_reparse.py | 17 ++++++-- .../search_indexes/test/test_reparse.py | 43 ++++++++----------- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py index a9f2c47bf..eff358107 100644 --- a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py @@ -242,6 +242,14 @@ def _calculate_timeout(self, num_files, num_records): logger.info(f"Setting timeout for the reparse event to be {delta} seconds from meta model creation date.") return delta + def _handle_input(self, testing, continue_msg): + """Handle user input.""" + if not testing: + c = str(input(f'\n{continue_msg}\nContinue [y/n]? ')).lower() + if c not in ['y', 'yes']: + print('Cancelled.') + exit(0) + def handle(self, *args, **options): """Delete and reparse datafiles matching a query.""" fiscal_year = options.get('fiscal_year', None) @@ -249,6 +257,10 @@ def handle(self, *args, **options): reparse_all = options.get('all', False) new_indices = reparse_all is True + # Option that can only be specified by calling `handle` directly and passing it. + testing = options.get('testing', False) + ## + args_passed = fiscal_year is not None or fiscal_quarter is not None or reparse_all if not args_passed: @@ -289,10 +301,7 @@ def handle(self, *args, **options): fmt_str = f"ALL ({num_files})" if reparse_all else f"({num_files})" continue_msg += "\nThese options will delete and reparse {0} datafiles.".format(fmt_str) - c = str(input(f'\n{continue_msg}\nContinue [y/n]? ')).lower() - if c not in ['y', 'yes']: - print('Cancelled.') - return + self._handle_input(testing, continue_msg) system_user, created = User.objects.get_or_create(username='system') if created: diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py index f48120750..a11a645c5 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -292,28 +292,21 @@ def test_reparse_sequential(log_context): # the parser task to fail because it cannot query the DataFile model. I couldn't find a way around this issue. ################################ -# @pytest.mark.django_db(transaction=False) -# def test_reparse_all(log_context, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, -# tribal_section_1_file): -# """Test reparse no args.""" -# ids = parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) -# cmd = clean_and_reparse.Command() -# print(f"\n\nPKS: {ids}\n\n") - -# opts = {'all': True, 'test': True} -# cmd.handle(**opts) -# done = False -# timeout = 0 -# while (not done or timeout == 30): -# timeout += 1 -# time.sleep(1) -# latest = ReparseMeta.objects.latest('pk') -# done = latest.finished - -# latest = ReparseMeta.objects.select_for_update().latest("pk") -# assert latest.success == True -# assert latest.num_files_to_reparse == len(ids) -# assert latest.files_completed == len(ids) -# assert latest.files_failed == 0 -# assert latest.num_records_deleted == latest.num_records_created -# assert latest.total_num_records_initial == latest.total_num_records_post +@pytest.mark.django_db() +def test_reparse_quarter_and_year(mocker, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, + tribal_section_1_file): + """Test reparse no args.""" + parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) + cmd = clean_and_reparse.Command() + + mocker.patch( + 'tdpservice.scheduling.parser_task.parse', + return_value=None + ) + + opts = {'fiscal_quarter': 'Q1', 'fiscal_year': 2021, 'testing': True} + cmd.handle(**opts) + + latest = ReparseMeta.objects.select_for_update().latest("pk") + assert latest.num_files_to_reparse == 1 + assert latest.num_records_deleted == 3073 From 644703ffb06f16df0a04b959a1fa893aed8f13d5 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Thu, 12 Sep 2024 15:26:26 -0400 Subject: [PATCH 16/39] - Add remaining tests --- .../search_indexes/test/test_reparse.py | 46 ++++++++++++++++--- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py index a11a645c5..d81565493 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -286,16 +286,10 @@ def test_reparse_sequential(log_context): assert timeout_entry.change_message == ("Previous reparse has exceeded the timeout. Allowing " "execution of the command.") -################################ -# The function below doesn't work. This is because the command kicks off the parser task which tries to query the DB for -# the file to parse. But Pytest segregates the DB changes to the test (even when transactions are disbled) which leads -# the parser task to fail because it cannot query the DataFile model. I couldn't find a way around this issue. -################################ - @pytest.mark.django_db() def test_reparse_quarter_and_year(mocker, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file): - """Test reparse no args.""" + """Test reparse with year and quarter.""" parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) cmd = clean_and_reparse.Command() @@ -310,3 +304,41 @@ def test_reparse_quarter_and_year(mocker, dfs, cat4_edge_case_file, big_file, sm latest = ReparseMeta.objects.select_for_update().latest("pk") assert latest.num_files_to_reparse == 1 assert latest.num_records_deleted == 3073 + +@pytest.mark.django_db() +def test_reparse_quarter(mocker, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, + tribal_section_1_file): + """Test reparse with quarter.""" + parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) + cmd = clean_and_reparse.Command() + + mocker.patch( + 'tdpservice.scheduling.parser_task.parse', + return_value=None + ) + + opts = {'fiscal_quarter': 'Q1', 'testing': True} + cmd.handle(**opts) + + latest = ReparseMeta.objects.select_for_update().latest("pk") + assert latest.num_files_to_reparse == 4 + assert latest.num_records_deleted == 3104 + +@pytest.mark.django_db() +def test_reparse_year(mocker, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, + tribal_section_1_file): + """Test reparse year.""" + parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) + cmd = clean_and_reparse.Command() + + mocker.patch( + 'tdpservice.scheduling.parser_task.parse', + return_value=None + ) + + opts = {'fiscal_year': 2024, 'testing': True} + cmd.handle(**opts) + + latest = ReparseMeta.objects.select_for_update().latest("pk") + assert latest.num_files_to_reparse == 2 + assert latest.num_records_deleted == 27 From 09bf83d74247f99c92e8662897f1b10a101ba64d Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 07:24:55 -0400 Subject: [PATCH 17/39] - Add more debug logging --- tdrs-backend/tdpservice/parsers/parse.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tdrs-backend/tdpservice/parsers/parse.py b/tdrs-backend/tdpservice/parsers/parse.py index 1f14b6557..b2b9f0445 100644 --- a/tdrs-backend/tdpservice/parsers/parse.py +++ b/tdrs-backend/tdpservice/parsers/parse.py @@ -211,11 +211,10 @@ def rollback_records(unsaved_records, datafile): f"Encountered error while indexing datafile documents: \n{e}", "error" ) - logger.warn("Encountered an Elastic exception, enforcing DB cleanup.") + logger.warning("Encountered an Elastic exception, enforcing DB cleanup.") num_deleted, models = qset.delete() - logger.info("Succesfully performed DB cleanup after elastic failure.") log_parser_exception(datafile, - "Succesfully performed DB cleanup after elastic failure.", + "Succesfully performed DB cleanup after elastic failure in rollback_records.", "info" ) except DatabaseError as e: @@ -310,7 +309,7 @@ def delete_serialized_records(duplicate_manager, dfs): total_deleted += num_deleted dfs.total_number_of_records_created -= num_deleted log_parser_exception(dfs.datafile, - "Succesfully performed DB cleanup after elastic failure.", + "Succesfully performed DB cleanup after elastic failure in delete_serialized_records.", "info" ) except DatabaseError as e: From 9fe3d1c0f359d6e8464565926cf83f980c95fd5c Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 07:51:49 -0400 Subject: [PATCH 18/39] - running only failing test --- .circleci/build-and-test/jobs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/build-and-test/jobs.yml b/.circleci/build-and-test/jobs.yml index a40d1568f..ae94e5ffa 100644 --- a/.circleci/build-and-test/jobs.yml +++ b/.circleci/build-and-test/jobs.yml @@ -8,7 +8,7 @@ name: Run Unit Tests And Create Code Coverage Report command: | cd tdrs-backend; - docker-compose run --rm web bash -c "./wait_for_services.sh && pytest --cov-report=xml" + docker-compose run --rm web bash -c "./wait_for_services.sh && pytest --cov-report=xml -k test_reparse.py" - run: name: Execute Python Linting Test command: cd tdrs-backend; docker-compose run --rm web bash -c "flake8 ." From 94218696dbfb987bb5c842780e163aed36481302 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 07:54:22 -0400 Subject: [PATCH 19/39] - more debugging --- tdrs-backend/tdpservice/parsers/parse.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tdrs-backend/tdpservice/parsers/parse.py b/tdrs-backend/tdpservice/parsers/parse.py index b2b9f0445..bb583bf9f 100644 --- a/tdrs-backend/tdpservice/parsers/parse.py +++ b/tdrs-backend/tdpservice/parsers/parse.py @@ -285,6 +285,7 @@ def delete_serialized_records(duplicate_manager, dfs): """Delete all records that have already been serialized to the DB that have cat4 errors.""" total_deleted = 0 for document, ids in duplicate_manager.get_records_to_remove().items(): + print(f"Doc: {document}, IDs: {ids}") try: model = document.Django.model qset = model.objects.filter(id__in=ids) From d456c8b6cd2d5b72715ca0de1b6c2cebfa0c5b1e Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 07:54:51 -0400 Subject: [PATCH 20/39] - backend only --- .circleci/build-and-test/workflows.yml | 46 +++++++++++++------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/.circleci/build-and-test/workflows.yml b/.circleci/build-and-test/workflows.yml index b822f1cdc..2346268af 100644 --- a/.circleci/build-and-test/workflows.yml +++ b/.circleci/build-and-test/workflows.yml @@ -6,12 +6,12 @@ - test-backend: requires: - secrets-check - - test-frontend: - requires: - - secrets-check - - test-e2e: - requires: - - secrets-check + # - test-frontend: + # requires: + # - secrets-check + # - test-e2e: + # requires: + # - secrets-check ci-build-and-test-all: jobs: @@ -31,23 +31,23 @@ - /^release.*/ requires: - secrets-check - - test-frontend: - filters: - branches: - only: - - main - - master - - /^release.*/ - requires: - - secrets-check - - test-e2e: - filters: - branches: - only: - - main - - master - - /^release.*/ - requires: + # - test-frontend: + # filters: + # branches: + # only: + # - main + # - master + # - /^release.*/ + # requires: + # - secrets-check + # - test-e2e: + # filters: + # branches: + # only: + # - main + # - master + # - /^release.*/ + # requires: - secrets-check build-and-test-backend: From f408833aa564cfa682f7946175e654612153ab86 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 08:04:11 -0400 Subject: [PATCH 21/39] - remove test_reparse only --- .circleci/build-and-test/jobs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/build-and-test/jobs.yml b/.circleci/build-and-test/jobs.yml index ae94e5ffa..a40d1568f 100644 --- a/.circleci/build-and-test/jobs.yml +++ b/.circleci/build-and-test/jobs.yml @@ -8,7 +8,7 @@ name: Run Unit Tests And Create Code Coverage Report command: | cd tdrs-backend; - docker-compose run --rm web bash -c "./wait_for_services.sh && pytest --cov-report=xml -k test_reparse.py" + docker-compose run --rm web bash -c "./wait_for_services.sh && pytest --cov-report=xml" - run: name: Execute Python Linting Test command: cd tdrs-backend; docker-compose run --rm web bash -c "flake8 ." From a3aa2f4edeb42616c1554289340fdf8310fe51e6 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 09:00:26 -0400 Subject: [PATCH 22/39] - Reset settings after parsing --- tdrs-backend/tdpservice/parsers/test/test_parse.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tdrs-backend/tdpservice/parsers/test/test_parse.py b/tdrs-backend/tdpservice/parsers/test/test_parse.py index 71dd6fc10..ee1e84b8a 100644 --- a/tdrs-backend/tdpservice/parsers/test/test_parse.py +++ b/tdrs-backend/tdpservice/parsers/test/test_parse.py @@ -2,6 +2,7 @@ import pytest +import os from django.contrib.admin.models import LogEntry from django.conf import settings from django.db.models import Q as Query @@ -1739,6 +1740,9 @@ def test_parse_duplicate(file, batch_size, model, record_type, num_errors, dfs, settings.BULK_CREATE_BATCH_SIZE = batch_size parse.parse_datafile(datafile, dfs) + + settings.BULK_CREATE_BATCH_SIZE = os.getenv("BULK_CREATE_BATCH_SIZE", 10000) + parser_errors = ParserError.objects.filter(file=datafile, error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY).order_by('id') for e in parser_errors: @@ -1782,6 +1786,9 @@ def test_parse_partial_duplicate(file, batch_size, model, record_type, num_error settings.BULK_CREATE_BATCH_SIZE = batch_size parse.parse_datafile(datafile, dfs) + + settings.BULK_CREATE_BATCH_SIZE = os.getenv("BULK_CREATE_BATCH_SIZE", 10000) + parser_errors = ParserError.objects.filter(file=datafile, error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY).order_by('id') for e in parser_errors: @@ -1806,6 +1813,8 @@ def test_parse_cat_4_edge_case_file(cat4_edge_case_file, dfs): parse.parse_datafile(cat4_edge_case_file, dfs) + settings.BULK_CREATE_BATCH_SIZE = os.getenv("BULK_CREATE_BATCH_SIZE", 10000) + parser_errors = ParserError.objects.filter(file=cat4_edge_case_file).filter( error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY) From 9e28a6608d03e7994998570abf6df2d4d41531f5 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 09:21:54 -0400 Subject: [PATCH 23/39] - update file --- tdrs-backend/tdpservice/parsers/test/data/cat_4_edge_case.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tdrs-backend/tdpservice/parsers/test/data/cat_4_edge_case.txt b/tdrs-backend/tdpservice/parsers/test/data/cat_4_edge_case.txt index d9dc53305..5684c9eb9 100644 --- a/tdrs-backend/tdpservice/parsers/test/data/cat_4_edge_case.txt +++ b/tdrs-backend/tdpservice/parsers/test/data/cat_4_edge_case.txt @@ -19,6 +19,6 @@ T320231011111119939499999999WTTY9BWYW212221122 6306100000000120100702TTTTTTTTT21 # T1 Duplicate of first record with valid child records T120231011111117835242 240198112 23111 1003 0 0 0 483 0 0 0 0 0 0 0 0 0 0 0222222 0 02229 22 -T2202310111111178352299999999WWWWWWWWW2122222222221 13 1211 0 3106990 0 0 0 0 0 0 00000000000000000000000 +T2202310111111178352299999999WWWWWWWWW2122222222221 13 1211 0 310699000 0 0 0 0 0 00000000000000000000000 T320231011111117835499999999WTTY9BWYW212221122 6306100000000120100702TTTTTTTTT212222122 6306100000000 TRAILER0000003 From d7b543aadee6c542885c8f07082d814533a4945d Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 10:59:03 -0400 Subject: [PATCH 24/39] - Update factories to use the correct types for fields - Update test to create fields with correct type --- .../tdpservice/parsers/test/factories.py | 82 +++++----- .../search_indexes/test/test_model_mapping.py | 140 +++++++++--------- 2 files changed, 111 insertions(+), 111 deletions(-) diff --git a/tdrs-backend/tdpservice/parsers/test/factories.py b/tdrs-backend/tdpservice/parsers/test/factories.py index 101f6c141..5b558ef3f 100644 --- a/tdrs-backend/tdpservice/parsers/test/factories.py +++ b/tdrs-backend/tdpservice/parsers/test/factories.py @@ -184,43 +184,43 @@ class Meta: EMPLOYMENT_STATUS = 1 WORK_ELIGIBLE_INDICATOR = "01" WORK_PART_STATUS = "01" - UNSUB_EMPLOYMENT = 1 - SUB_PRIVATE_EMPLOYMENT = 1 - SUB_PUBLIC_EMPLOYMENT = 1 - WORK_EXPERIENCE_HOP = 1 - WORK_EXPERIENCE_EA = 1 - WORK_EXPERIENCE_HOL = 1 - OJT = 1 - JOB_SEARCH_HOP = 1 - JOB_SEARCH_EA = 1 - JOB_SEARCH_HOL = 1 - COMM_SERVICES_HOP = 1 - COMM_SERVICES_EA = 1 - COMM_SERVICES_HOL = 1 - VOCATIONAL_ED_TRAINING_HOP = 1 - VOCATIONAL_ED_TRAINING_EA = 1 - VOCATIONAL_ED_TRAINING_HOL = 1 - JOB_SKILLS_TRAINING_HOP = 1 - JOB_SKILLS_TRAINING_EA = 1 - JOB_SKILLS_TRAINING_HOL = 1 - ED_NO_HIGH_SCHOOL_DIPL_HOP = 1 - ED_NO_HIGH_SCHOOL_DIPL_EA = 1 - ED_NO_HIGH_SCHOOL_DIPL_HOL = 1 - SCHOOL_ATTENDENCE_HOP = 1 - SCHOOL_ATTENDENCE_EA = 1 - SCHOOL_ATTENDENCE_HOL = 1 - PROVIDE_CC_HOP = 1 - PROVIDE_CC_EA = 1 - PROVIDE_CC_HOL = 1 - OTHER_WORK_ACTIVITIES = 1 - DEEMED_HOURS_FOR_OVERALL = 1 - DEEMED_HOURS_FOR_TWO_PARENT = 1 - EARNED_INCOME = 1 - UNEARNED_INCOME_TAX_CREDIT = 1 - UNEARNED_SOCIAL_SECURITY = 1 - UNEARNED_SSI = 1 - UNEARNED_WORKERS_COMP = 1 - OTHER_UNEARNED_INCOME = 1 + UNSUB_EMPLOYMENT = "01" + SUB_PRIVATE_EMPLOYMENT = "01" + SUB_PUBLIC_EMPLOYMENT = "01" + WORK_EXPERIENCE_HOP = "01" + WORK_EXPERIENCE_EA = "01" + WORK_EXPERIENCE_HOL = "01" + OJT = "01" + JOB_SEARCH_HOP = "01" + JOB_SEARCH_EA = "01" + JOB_SEARCH_HOL = "01" + COMM_SERVICES_HOP = "01" + COMM_SERVICES_EA = "01" + COMM_SERVICES_HOL = "01" + VOCATIONAL_ED_TRAINING_HOP = "01" + VOCATIONAL_ED_TRAINING_EA = "01" + VOCATIONAL_ED_TRAINING_HOL = "01" + JOB_SKILLS_TRAINING_HOP = "01" + JOB_SKILLS_TRAINING_EA = "01" + JOB_SKILLS_TRAINING_HOL = "01" + ED_NO_HIGH_SCHOOL_DIPL_HOP = "01" + ED_NO_HIGH_SCHOOL_DIPL_EA = "01" + ED_NO_HIGH_SCHOOL_DIPL_HOL = "01" + SCHOOL_ATTENDENCE_HOP = "01" + SCHOOL_ATTENDENCE_EA = "01" + SCHOOL_ATTENDENCE_HOL = "01" + PROVIDE_CC_HOP = "01" + PROVIDE_CC_EA = "01" + PROVIDE_CC_HOL = "01" + OTHER_WORK_ACTIVITIES = "01" + DEEMED_HOURS_FOR_OVERALL = "01" + DEEMED_HOURS_FOR_TWO_PARENT = "01" + EARNED_INCOME = "01" + UNEARNED_INCOME_TAX_CREDIT = "01" + UNEARNED_SOCIAL_SECURITY = "01" + UNEARNED_SSI = "01" + UNEARNED_WORKERS_COMP = "01" + OTHER_UNEARNED_INCOME = "01" class TanfT3Factory(factory.django.DjangoModelFactory): @@ -451,10 +451,10 @@ class Meta: CURRENT_MONTH_STATE_EXEMPT = 1 EMPLOYMENT_STATUS = 1 WORK_PART_STATUS = "01" - UNSUB_EMPLOYMENT = 1 - SUB_PRIVATE_EMPLOYMENT = 1 - SUB_PUBLIC_EMPLOYMENT = 1 - OJT = 1 + UNSUB_EMPLOYMENT = "01" + SUB_PRIVATE_EMPLOYMENT = "01" + SUB_PUBLIC_EMPLOYMENT = "01" + OJT = "01" JOB_SEARCH = '1' COMM_SERVICES = '1' VOCATIONAL_ED_TRAINING = '1' diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_model_mapping.py b/tdrs-backend/tdpservice/search_indexes/test/test_model_mapping.py index dd66010a9..fbaa28648 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_model_mapping.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_model_mapping.py @@ -96,7 +96,7 @@ def test_can_create_and_index_tanf_t2_submission(test_datafile): submission.CASE_NUMBER = '1' submission.FAMILY_AFFILIATION = 1 submission.NONCUSTODIAL_PARENT = 1 - submission.DATE_OF_BIRTH = 1 + submission.DATE_OF_BIRTH = "1" submission.SSN = '1' submission.RACE_HISPANIC = 1 submission.RACE_AMER_INDIAN = 1 @@ -107,59 +107,59 @@ def test_can_create_and_index_tanf_t2_submission(test_datafile): submission.GENDER = 1 submission.FED_OASDI_PROGRAM = 1 submission.FED_DISABILITY_STATUS = 1 - submission.DISABLED_TITLE_XIVAPDT = 1 + submission.DISABLED_TITLE_XIVAPDT = "1" submission.AID_AGED_BLIND = 1 submission.RECEIVE_SSI = 1 submission.MARITAL_STATUS = 1 submission.RELATIONSHIP_HOH = "01" submission.PARENT_MINOR_CHILD = 1 submission.NEEDS_PREGNANT_WOMAN = 1 - submission.EDUCATION_LEVEL = 1 + submission.EDUCATION_LEVEL = "01" submission.CITIZENSHIP_STATUS = 1 submission.COOPERATION_CHILD_SUPPORT = 1 - submission.MONTHS_FED_TIME_LIMIT = 1 - submission.MONTHS_STATE_TIME_LIMIT = 1 + submission.MONTHS_FED_TIME_LIMIT = "01" + submission.MONTHS_STATE_TIME_LIMIT = "01" submission.CURRENT_MONTH_STATE_EXEMPT = 1 submission.EMPLOYMENT_STATUS = 1 - submission.WORK_ELIGIBLE_INDICATOR = 1 - submission.WORK_PART_STATUS = 1 - submission.UNSUB_EMPLOYMENT = 1 - submission.SUB_PRIVATE_EMPLOYMENT = 1 - submission.SUB_PUBLIC_EMPLOYMENT = 1 - submission.WORK_EXPERIENCE_HOP = 1 - submission.WORK_EXPERIENCE_EA = 1 - submission.WORK_EXPERIENCE_HOL = 1 - submission.OJT = 1 - submission.JOB_SEARCH_HOP = 1 - submission.JOB_SEARCH_EA = 1 - submission.JOB_SEARCH_HOL = 1 - submission.COMM_SERVICES_HOP = 1 - submission.COMM_SERVICES_EA = 1 - submission.COMM_SERVICES_HOL = 1 - submission.VOCATIONAL_ED_TRAINING_HOP = 1 - submission.VOCATIONAL_ED_TRAINING_EA = 1 - submission.VOCATIONAL_ED_TRAINING_HOL = 1 - submission.JOB_SKILLS_TRAINING_HOP = 1 - submission.JOB_SKILLS_TRAINING_EA = 1 - submission.JOB_SKILLS_TRAINING_HOL = 1 - submission.ED_NO_HIGH_SCHOOL_DIPL_HOP = 1 - submission.ED_NO_HIGH_SCHOOL_DIPL_EA = 1 - submission.ED_NO_HIGH_SCHOOL_DIPL_HOL = 1 - submission.SCHOOL_ATTENDENCE_HOP = 1 - submission.SCHOOL_ATTENDENCE_EA = 1 - submission.SCHOOL_ATTENDENCE_HOL = 1 - submission.PROVIDE_CC_HOP = 1 - submission.PROVIDE_CC_EA = 1 - submission.PROVIDE_CC_HOL = 1 - submission.OTHER_WORK_ACTIVITIES = 1 - submission.DEEMED_HOURS_FOR_OVERALL = 1 - submission.DEEMED_HOURS_FOR_TWO_PARENT = 1 - submission.EARNED_INCOME = 1 - submission.UNEARNED_INCOME_TAX_CREDIT = 1 - submission.UNEARNED_SOCIAL_SECURITY = 1 - submission.UNEARNED_SSI = 1 - submission.UNEARNED_WORKERS_COMP = 1 - submission.OTHER_UNEARNED_INCOME = 1 + submission.WORK_ELIGIBLE_INDICATOR = "01" + submission.WORK_PART_STATUS = "01" + submission.UNSUB_EMPLOYMENT = "01" + submission.SUB_PRIVATE_EMPLOYMENT = "01" + submission.SUB_PUBLIC_EMPLOYMENT = "01" + submission.WORK_EXPERIENCE_HOP = "01" + submission.WORK_EXPERIENCE_EA = "01" + submission.WORK_EXPERIENCE_HOL = "01" + submission.OJT = "01" + submission.JOB_SEARCH_HOP = "01" + submission.JOB_SEARCH_EA = "01" + submission.JOB_SEARCH_HOL = "01" + submission.COMM_SERVICES_HOP = "01" + submission.COMM_SERVICES_EA = "01" + submission.COMM_SERVICES_HOL = "01" + submission.VOCATIONAL_ED_TRAINING_HOP = "01" + submission.VOCATIONAL_ED_TRAINING_EA = "01" + submission.VOCATIONAL_ED_TRAINING_HOL = "01" + submission.JOB_SKILLS_TRAINING_HOP = "01" + submission.JOB_SKILLS_TRAINING_EA = "01" + submission.JOB_SKILLS_TRAINING_HOL = "01" + submission.ED_NO_HIGH_SCHOOL_DIPL_HOP = "01" + submission.ED_NO_HIGH_SCHOOL_DIPL_EA = "01" + submission.ED_NO_HIGH_SCHOOL_DIPL_HOL = "01" + submission.SCHOOL_ATTENDENCE_HOP = "01" + submission.SCHOOL_ATTENDENCE_EA = "01" + submission.SCHOOL_ATTENDENCE_HOL = "01" + submission.PROVIDE_CC_HOP = "01" + submission.PROVIDE_CC_EA = "01" + submission.PROVIDE_CC_HOL = "01" + submission.OTHER_WORK_ACTIVITIES = "01" + submission.DEEMED_HOURS_FOR_OVERALL = "01" + submission.DEEMED_HOURS_FOR_TWO_PARENT = "01" + submission.EARNED_INCOME = "01" + submission.UNEARNED_INCOME_TAX_CREDIT = "01" + submission.UNEARNED_SOCIAL_SECURITY = "01" + submission.UNEARNED_SSI = "01" + submission.UNEARNED_WORKERS_COMP = "01" + submission.OTHER_UNEARNED_INCOME = "01" submission.save() @@ -802,7 +802,7 @@ def test_can_create_and_index_tribal_tanf_t2_submission(test_datafile): submission.CASE_NUMBER = '1' submission.FAMILY_AFFILIATION = 1 submission.NONCUSTODIAL_PARENT = 1 - submission.DATE_OF_BIRTH = 1 + submission.DATE_OF_BIRTH = "1" submission.SSN = '1' submission.RACE_HISPANIC = 1 submission.RACE_AMER_INDIAN = 1 @@ -813,41 +813,41 @@ def test_can_create_and_index_tribal_tanf_t2_submission(test_datafile): submission.GENDER = 1 submission.FED_OASDI_PROGRAM = 1 submission.FED_DISABILITY_STATUS = 1 - submission.DISABLED_TITLE_XIVAPDT = 1 + submission.DISABLED_TITLE_XIVAPDT = "01" submission.AID_AGED_BLIND = 1 submission.RECEIVE_SSI = 1 submission.MARITAL_STATUS = 1 submission.RELATIONSHIP_HOH = "01" submission.NEEDS_PREGNANT_WOMAN = 1 - submission.EDUCATION_LEVEL = 1 + submission.EDUCATION_LEVEL = "01" submission.CITIZENSHIP_STATUS = 1 submission.COOPERATION_CHILD_SUPPORT = 1 - submission.MONTHS_FED_TIME_LIMIT = 1 - submission.MONTHS_STATE_TIME_LIMIT = 1 + submission.MONTHS_FED_TIME_LIMIT = "01" + submission.MONTHS_STATE_TIME_LIMIT = "01" submission.CURRENT_MONTH_STATE_EXEMPT = 1 submission.EMPLOYMENT_STATUS = 1 - submission.WORK_PART_STATUS = 1 - submission.UNSUB_EMPLOYMENT = 1 - submission.SUB_PRIVATE_EMPLOYMENT = 1 - submission.SUB_PUBLIC_EMPLOYMENT = 1 - submission.WORK_EXPERIENCE = 1 - submission.OJT = 1 - submission.JOB_SEARCH = 1 - submission.COMM_SERVICES = 1 - submission.VOCATIONAL_ED_TRAINING = 1 - submission.JOB_SKILLS_TRAINING = 1 - submission.ED_NO_HIGH_SCHOOL_DIPLOMA = 1 - submission.SCHOOL_ATTENDENCE = 1 - submission.PROVIDE_CC = 1 + submission.WORK_PART_STATUS = "01" + submission.UNSUB_EMPLOYMENT = "01" + submission.SUB_PRIVATE_EMPLOYMENT = "01" + submission.SUB_PUBLIC_EMPLOYMENT = "01" + submission.WORK_EXPERIENCE = "01" + submission.OJT = "01" + submission.JOB_SEARCH = "01" + submission.COMM_SERVICES = "01" + submission.VOCATIONAL_ED_TRAINING = "01" + submission.JOB_SKILLS_TRAINING = "01" + submission.ED_NO_HIGH_SCHOOL_DIPLOMA = "01" + submission.SCHOOL_ATTENDENCE = "01" + submission.PROVIDE_CC = "01" submission.ADD_WORK_ACTIVITIES = '01' - submission.OTHER_WORK_ACTIVITIES = 1 - submission.REQ_HRS_WAIVER_DEMO = 1 - submission.EARNED_INCOME = 1 - submission.UNEARNED_INCOME_TAX_CREDIT = 1 - submission.UNEARNED_SOCIAL_SECURITY = 1 - submission.UNEARNED_SSI = 1 - submission.UNEARNED_WORKERS_COMP = 1 - submission.OTHER_UNEARNED_INCOME = 1 + submission.OTHER_WORK_ACTIVITIES = "01" + submission.REQ_HRS_WAIVER_DEMO = "01" + submission.EARNED_INCOME = "01" + submission.UNEARNED_INCOME_TAX_CREDIT = "01" + submission.UNEARNED_SOCIAL_SECURITY = "01" + submission.UNEARNED_SSI = "01" + submission.UNEARNED_WORKERS_COMP = "01" + submission.OTHER_UNEARNED_INCOME = "01" submission.save() From e1bfce38d99c4070f0c41ac36f580106a9069e91 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 11:19:15 -0400 Subject: [PATCH 25/39] - paremetrize test --- .../tdpservice/search_indexes/test/test_reparse.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py index d81565493..b40417402 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -127,8 +127,12 @@ def test_reparse_backup_fail(mocker, log_context, dfs, cat4_edge_case_file, big_ assert exception_msg == ("Database backup FAILED. Clean and reparse NOT executed. Database " "and Elastic are CONSISTENT!") +@pytest.mark.parametrize(("new_indexes"), [ + (True), + (False) +]) @pytest.mark.django_db -def test_delete_associated_models(log_context, dfs, cat4_edge_case_file, big_file, +def test_delete_associated_models(log_context, new_indexes, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file): """Verify all records and models are deleted.""" ids = parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) @@ -139,7 +143,7 @@ def test_delete_associated_models(log_context, dfs, cat4_edge_case_file, big_fil class Fake: pass fake_meta = Fake() - cmd._delete_associated_models(fake_meta, ids, True, log_context) + cmd._delete_associated_models(fake_meta, ids, new_indexes, log_context) assert cmd._count_total_num_records(log_context) == 0 From 3f89673fe5b7791d2669dcad01062d7088237198 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 11:32:43 -0400 Subject: [PATCH 26/39] - revert debugging changes --- .circleci/build-and-test/workflows.yml | 46 +++++++++---------- tdrs-backend/tdpservice/parsers/parse.py | 1 - .../parsers/test/data/cat_4_edge_case.txt | 2 +- 3 files changed, 24 insertions(+), 25 deletions(-) diff --git a/.circleci/build-and-test/workflows.yml b/.circleci/build-and-test/workflows.yml index 2346268af..b822f1cdc 100644 --- a/.circleci/build-and-test/workflows.yml +++ b/.circleci/build-and-test/workflows.yml @@ -6,12 +6,12 @@ - test-backend: requires: - secrets-check - # - test-frontend: - # requires: - # - secrets-check - # - test-e2e: - # requires: - # - secrets-check + - test-frontend: + requires: + - secrets-check + - test-e2e: + requires: + - secrets-check ci-build-and-test-all: jobs: @@ -31,23 +31,23 @@ - /^release.*/ requires: - secrets-check - # - test-frontend: - # filters: - # branches: - # only: - # - main - # - master - # - /^release.*/ - # requires: - # - secrets-check - # - test-e2e: - # filters: - # branches: - # only: - # - main - # - master - # - /^release.*/ - # requires: + - test-frontend: + filters: + branches: + only: + - main + - master + - /^release.*/ + requires: + - secrets-check + - test-e2e: + filters: + branches: + only: + - main + - master + - /^release.*/ + requires: - secrets-check build-and-test-backend: diff --git a/tdrs-backend/tdpservice/parsers/parse.py b/tdrs-backend/tdpservice/parsers/parse.py index bb583bf9f..b2b9f0445 100644 --- a/tdrs-backend/tdpservice/parsers/parse.py +++ b/tdrs-backend/tdpservice/parsers/parse.py @@ -285,7 +285,6 @@ def delete_serialized_records(duplicate_manager, dfs): """Delete all records that have already been serialized to the DB that have cat4 errors.""" total_deleted = 0 for document, ids in duplicate_manager.get_records_to_remove().items(): - print(f"Doc: {document}, IDs: {ids}") try: model = document.Django.model qset = model.objects.filter(id__in=ids) diff --git a/tdrs-backend/tdpservice/parsers/test/data/cat_4_edge_case.txt b/tdrs-backend/tdpservice/parsers/test/data/cat_4_edge_case.txt index 5684c9eb9..d9dc53305 100644 --- a/tdrs-backend/tdpservice/parsers/test/data/cat_4_edge_case.txt +++ b/tdrs-backend/tdpservice/parsers/test/data/cat_4_edge_case.txt @@ -19,6 +19,6 @@ T320231011111119939499999999WTTY9BWYW212221122 6306100000000120100702TTTTTTTTT21 # T1 Duplicate of first record with valid child records T120231011111117835242 240198112 23111 1003 0 0 0 483 0 0 0 0 0 0 0 0 0 0 0222222 0 02229 22 -T2202310111111178352299999999WWWWWWWWW2122222222221 13 1211 0 310699000 0 0 0 0 0 00000000000000000000000 +T2202310111111178352299999999WWWWWWWWW2122222222221 13 1211 0 3106990 0 0 0 0 0 0 00000000000000000000000 T320231011111117835499999999WTTY9BWYW212221122 6306100000000120100702TTTTTTTTT212222122 6306100000000 TRAILER0000003 From c3002e09b19109cc096deecbc8ded307290b8c05 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 12:07:33 -0400 Subject: [PATCH 27/39] - Add tests for more coverage --- .../search_indexes/test/test_reparse.py | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py index b40417402..451196413 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -346,3 +346,41 @@ def test_reparse_year(mocker, dfs, cat4_edge_case_file, big_file, small_ssp_sect latest = ReparseMeta.objects.select_for_update().latest("pk") assert latest.num_files_to_reparse == 2 assert latest.num_records_deleted == 27 + +@pytest.mark.django_db() +def test_reparse_all(mocker, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, + tribal_section_1_file): + """Test reparse year.""" + parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) + cmd = clean_and_reparse.Command() + + mocker.patch( + 'tdpservice.scheduling.parser_task.parse', + return_value=None + ) + + opts = {'all': True, 'testing': True} + cmd.handle(**opts) + + latest = ReparseMeta.objects.select_for_update().latest("pk") + assert latest.num_files_to_reparse == 4 + assert latest.num_records_deleted == 3104 + +@pytest.mark.django_db() +def test_reparse_no_files(mocker, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, + tribal_section_1_file): + """Test reparse year.""" + cmd = clean_and_reparse.Command() + + mocker.patch( + 'tdpservice.scheduling.parser_task.parse', + return_value=None + ) + + opts = {'fiscal_year': 2025, 'testing': True} + res = cmd.handle(**opts) + + assert ReparseMeta.objects.count() == 0 + assert res is None + assert LogEntry.objects.latest('pk').change_message == ("No files available for the selected Fiscal Year: 2025 and " + "Quarter: Q1-4. Nothing to do.") From 96372d8069859502e1e5f220e460a7ad9547ca35 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 12:19:12 -0400 Subject: [PATCH 28/39] - linting --- .../tdpservice/search_indexes/test/test_reparse.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py index 451196413..938c159b9 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -349,8 +349,8 @@ def test_reparse_year(mocker, dfs, cat4_edge_case_file, big_file, small_ssp_sect @pytest.mark.django_db() def test_reparse_all(mocker, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, - tribal_section_1_file): - """Test reparse year.""" + tribal_section_1_file): + """Test reparse all.""" parse_files(dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, tribal_section_1_file) cmd = clean_and_reparse.Command() @@ -367,9 +367,8 @@ def test_reparse_all(mocker, dfs, cat4_edge_case_file, big_file, small_ssp_secti assert latest.num_records_deleted == 3104 @pytest.mark.django_db() -def test_reparse_no_files(mocker, dfs, cat4_edge_case_file, big_file, small_ssp_section1_datafile, - tribal_section_1_file): - """Test reparse year.""" +def test_reparse_no_files(mocker): + """Test reparse with no files in query.""" cmd = clean_and_reparse.Command() mocker.patch( From 830b6d2340d1b5276f17f0babe45490b1eaac4cf Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 13:46:28 -0400 Subject: [PATCH 29/39] - UPdate logic for setting reparse to finished - add tests for meta model methods --- .../search_indexes/models/reparse_meta.py | 24 +++- .../search_indexes/test/test_reparse.py | 122 ++++++++++++++++++ 2 files changed, 140 insertions(+), 6 deletions(-) diff --git a/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py b/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py index b7ddf3ff4..ddbf4ce4a 100644 --- a/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py +++ b/tdrs-backend/tdpservice/search_indexes/models/reparse_meta.py @@ -47,6 +47,21 @@ class Meta: new_indices = models.BooleanField(default=False) delete_old_indices = models.BooleanField(default=False) + @staticmethod + def file_counts_match(meta_model): + """ + Check whether the file counts match. + + This function assumes the meta_model has been passed in a distributed/thread safe way. If the database row + containing this model has not been locked the caller will experience race issues. + """ + print("\n\nINSIDE FILE COUNTS MATCH:") + print(f"{meta_model.num_files_to_reparse }, {meta_model.files_completed}, {meta_model.files_failed}\n\n") + return (meta_model.files_completed == meta_model.num_files_to_reparse or + meta_model.files_completed + meta_model.files_failed == + meta_model.num_files_to_reparse or + meta_model.files_failed == meta_model.num_files_to_reparse) + @staticmethod def assert_all_files_done(meta_model): """ @@ -55,10 +70,7 @@ def assert_all_files_done(meta_model): This function assumes the meta_model has been passed in a distributed/thread safe way. If the database row containing this model has not been locked the caller will experience race issues. """ - if meta_model.finished and (meta_model.files_completed == meta_model.num_files_to_reparse or - meta_model.files_completed + meta_model.files_failed == - meta_model.num_files_to_reparse or - meta_model.files_failed == meta_model.num_files_to_reparse): + if meta_model.finished and ReparseMeta.file_counts_match(meta_model): return True return False @@ -89,7 +101,7 @@ def increment_files_completed(reparse_meta_models): try: meta_model = reparse_meta_models.select_for_update().latest("pk") meta_model.files_completed += 1 - if ReparseMeta.assert_all_files_done(meta_model): + if ReparseMeta.file_counts_match(meta_model): ReparseMeta.set_reparse_finished(meta_model) meta_model.save() except DatabaseError: @@ -110,7 +122,7 @@ def increment_files_failed(reparse_meta_models): try: meta_model = reparse_meta_models.select_for_update().latest("pk") meta_model.files_failed += 1 - if ReparseMeta.assert_all_files_done(meta_model): + if ReparseMeta.file_counts_match(meta_model): ReparseMeta.set_reparse_finished(meta_model) meta_model.save() except DatabaseError: diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py index 938c159b9..592724a0d 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -383,3 +383,125 @@ def test_reparse_no_files(mocker): assert res is None assert LogEntry.objects.latest('pk').change_message == ("No files available for the selected Fiscal Year: 2025 and " "Quarter: Q1-4. Nothing to do.") + +@pytest.mark.django_db() +def test_mm_all_files_done(): + """Test meta model all files done""" + meta_model = ReparseMeta.objects.create() + assert ReparseMeta.assert_all_files_done(meta_model) is False + + meta_model.finished = True + meta_model.files_completed = 1 + meta_model.num_files_to_reparse = 1 + assert ReparseMeta.assert_all_files_done(meta_model) is True + +@pytest.mark.django_db() +def test_mm_increment_files_completed(big_file): + """Test meta model increment files completed.""" + meta_model = ReparseMeta.objects.create(num_files_to_reparse=2, all=True) + big_file.reparse_meta_models.add(meta_model) + big_file.save() + + ReparseMeta.increment_files_completed(big_file.reparse_meta_models) + meta_model = ReparseMeta.get_latest() + assert meta_model.finished is False + assert meta_model.files_completed == 1 + assert meta_model.files_failed == 0 + + ReparseMeta.increment_files_completed(big_file.reparse_meta_models) + meta_model = ReparseMeta.get_latest() + assert meta_model.finished is True + assert meta_model.files_completed == 2 + assert meta_model.files_failed == 0 + + assert meta_model.success is True + + assert ReparseMeta.assert_all_files_done(meta_model) is True + +@pytest.mark.django_db() +def test_mm_increment_files_failed(big_file): + """Test meta model increment files failed.""" + meta_model = ReparseMeta.objects.create(num_files_to_reparse=2, all=True) + big_file.reparse_meta_models.add(meta_model) + big_file.save() + + ReparseMeta.increment_files_failed(big_file.reparse_meta_models) + meta_model = ReparseMeta.get_latest() + assert meta_model.finished is False + assert meta_model.files_completed == 0 + assert meta_model.files_failed == 1 + + ReparseMeta.increment_files_failed(big_file.reparse_meta_models) + meta_model = ReparseMeta.get_latest() + assert meta_model.finished is True + assert meta_model.files_completed == 0 + assert meta_model.files_failed == 2 + + assert meta_model.success is False + + assert ReparseMeta.assert_all_files_done(meta_model) is True + +@pytest.mark.django_db() +def test_mm_increment_files_failed_and_passed(big_file): + """Test meta model both increment failed and passed files.""" + meta_model = ReparseMeta.objects.create(num_files_to_reparse=2, all=True) + big_file.reparse_meta_models.add(meta_model) + big_file.save() + + ReparseMeta.increment_files_completed(big_file.reparse_meta_models) + meta_model = ReparseMeta.get_latest() + assert meta_model.finished is False + assert meta_model.files_completed == 1 + assert meta_model.files_failed == 0 + + ReparseMeta.increment_files_failed(big_file.reparse_meta_models) + meta_model = ReparseMeta.get_latest() + assert meta_model.finished is True + assert meta_model.files_completed == 1 + assert meta_model.files_failed == 1 + + assert meta_model.success is False + + assert ReparseMeta.assert_all_files_done(meta_model) is True + +@pytest.mark.django_db() +def test_mm_increment_records_created(big_file): + """Test meta model increment records created.""" + meta_model = ReparseMeta.objects.create(num_files_to_reparse=2, all=True) + big_file.reparse_meta_models.add(meta_model) + big_file.save() + + ReparseMeta.increment_records_created(big_file.reparse_meta_models, 500) + meta_model = ReparseMeta.get_latest() + assert meta_model.num_records_created == 500 + + ReparseMeta.increment_records_created(big_file.reparse_meta_models, 888) + meta_model = ReparseMeta.get_latest() + assert meta_model.num_records_created == 1388 + +@pytest.mark.django_db() +def test_mm_get_latest(): + """Test get latest meta model.""" + assert ReparseMeta.get_latest() is None + meta1 = ReparseMeta.objects.create() + assert ReparseMeta.get_latest() == meta1 + + ReparseMeta.objects.create() + assert ReparseMeta.get_latest() != meta1 + +@pytest.mark.django_db() +def test_mm_file_counts_match(): + """Test meta model file counts match.""" + meta_model = ReparseMeta.objects.create(num_files_to_reparse=2) + assert ReparseMeta.file_counts_match(meta_model) is False + + meta_model.files_completed = 2 + assert ReparseMeta.file_counts_match(meta_model) is True + + meta_model.files_completed = 0 + meta_model.files_failed = 2 + assert ReparseMeta.file_counts_match(meta_model) is True + + meta_model.files_completed = 1 + meta_model.files_failed = 1 + assert ReparseMeta.file_counts_match(meta_model) is True From 51ec731aa14371e030223d52d88f17925a924932 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 13:59:57 -0400 Subject: [PATCH 30/39] - linting --- tdrs-backend/tdpservice/search_indexes/test/test_reparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py index 592724a0d..360988224 100644 --- a/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/test/test_reparse.py @@ -386,7 +386,7 @@ def test_reparse_no_files(mocker): @pytest.mark.django_db() def test_mm_all_files_done(): - """Test meta model all files done""" + """Test meta model all files done.""" meta_model = ReparseMeta.objects.create() assert ReparseMeta.assert_all_files_done(meta_model) is False From b9cb34e5a26293e83ef2bc45eb62aed38c373406 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 15:41:16 -0400 Subject: [PATCH 31/39] - add basic tests for db backup --- tdrs-backend/tdpservice/conftest.py | 6 ++ .../scheduling/management/db_backup.py | 25 +++++---- .../scheduling/test/test_db_backup.py | 56 +++++++++++++++++++ 3 files changed, 76 insertions(+), 11 deletions(-) create mode 100644 tdrs-backend/tdpservice/scheduling/test/test_db_backup.py diff --git a/tdrs-backend/tdpservice/conftest.py b/tdrs-backend/tdpservice/conftest.py index 416a4a890..0437ef13b 100644 --- a/tdrs-backend/tdpservice/conftest.py +++ b/tdrs-backend/tdpservice/conftest.py @@ -395,6 +395,12 @@ def test_private_key(): yield get_private_key(key) +@pytest.fixture() +def system_user(): + """Create system user.""" + return UserFactory.create(username='system') + + # Register factories with pytest-factoryboy for automatic dependency injection # of model-related fixtures into tests. register(OwaspZapScanFactory) diff --git a/tdrs-backend/tdpservice/scheduling/management/db_backup.py b/tdrs-backend/tdpservice/scheduling/management/db_backup.py index 11beceaed..0929a98e4 100644 --- a/tdrs-backend/tdpservice/scheduling/management/db_backup.py +++ b/tdrs-backend/tdpservice/scheduling/management/db_backup.py @@ -20,7 +20,10 @@ OS_ENV = os.environ -content_type = ContentType.objects.get_for_model(LogEntry) + +def get_content_type(): + """Get content type for log entry.""" + return ContentType.objects.get_for_model(LogEntry) def get_system_values(): """Return dict of keys and settings to use whether local or deployed.""" @@ -91,7 +94,7 @@ def backup_database(file_name, logger.info(msg) LogEntry.objects.log_action( user_id=system_user.pk, - content_type_id=content_type.pk, + content_type_id=get_content_type().pk, object_id=None, object_repr="Executed Database Backup", action_flag=ADDITION, @@ -123,7 +126,7 @@ def restore_database(file_name, postgres_client, database_uri, system_user): msg = "Completed database creation." LogEntry.objects.log_action( user_id=system_user.pk, - content_type_id=content_type.pk, + content_type_id=get_content_type().pk, object_id=None, object_repr="Executed Database create", action_flag=ADDITION, @@ -145,7 +148,7 @@ def restore_database(file_name, postgres_client, database_uri, system_user): msg = "Completed database restoration." LogEntry.objects.log_action( user_id=system_user.pk, - content_type_id=content_type.pk, + content_type_id=get_content_type().pk, object_id=None, object_repr="Executed Database restore", action_flag=ADDITION, @@ -177,7 +180,7 @@ def upload_file(file_name, bucket, sys_values, system_user, object_name=None, re msg = "Successfully uploaded {} to s3://{}/{}.".format(file_name, bucket, object_name) LogEntry.objects.log_action( user_id=system_user.pk, - content_type_id=content_type.pk, + content_type_id=get_content_type().pk, object_id=None, object_repr="Executed database backup S3 upload", action_flag=ADDITION, @@ -208,7 +211,7 @@ def download_file(bucket, msg = "Successfully downloaded s3 file {}/{} to {}.".format(bucket, object_name, file_name) LogEntry.objects.log_action( user_id=system_user.pk, - content_type_id=content_type.pk, + content_type_id=get_content_type().pk, object_id=None, object_repr="Executed database backup S3 download", action_flag=ADDITION, @@ -293,7 +296,7 @@ def main(argv, sys_values, system_user): if arg_to_backup: LogEntry.objects.log_action( user_id=system_user.pk, - content_type_id=content_type.pk, + content_type_id=get_content_type().pk, object_id=None, object_repr="Begining Database Backup", action_flag=ADDITION, @@ -316,7 +319,7 @@ def main(argv, sys_values, system_user): LogEntry.objects.log_action( user_id=system_user.pk, - content_type_id=content_type.pk, + content_type_id=get_content_type().pk, object_id=None, object_repr="Finished Database Backup", action_flag=ADDITION, @@ -329,7 +332,7 @@ def main(argv, sys_values, system_user): elif arg_to_restore: LogEntry.objects.log_action( user_id=system_user.pk, - content_type_id=content_type.pk, + content_type_id=get_content_type().pk, object_id=None, object_repr="Begining Database Restore", action_flag=ADDITION, @@ -352,7 +355,7 @@ def main(argv, sys_values, system_user): LogEntry.objects.log_action( user_id=system_user.pk, - content_type_id=content_type.pk, + content_type_id=get_content_type().pk, object_id=None, object_repr="Finished Database Restore", action_flag=ADDITION, @@ -377,7 +380,7 @@ def run_backup(arg): logger.error(f"Caught Exception in run_backup. Exception: {e}.") LogEntry.objects.log_action( user_id=system_user.pk, - content_type_id=content_type.pk, + content_type_id=get_content_type().pk, object_id=None, object_repr="Exception in run_backup", action_flag=ADDITION, diff --git a/tdrs-backend/tdpservice/scheduling/test/test_db_backup.py b/tdrs-backend/tdpservice/scheduling/test/test_db_backup.py new file mode 100644 index 000000000..6e85b53e8 --- /dev/null +++ b/tdrs-backend/tdpservice/scheduling/test/test_db_backup.py @@ -0,0 +1,56 @@ +"""Test cases for db_backup.py functions.""" + +import os +import pytest +from tdpservice.scheduling.management import db_backup + +@pytest.mark.django_db +def test_backup_database(mocker, system_user): + """Test backup functionality.""" + mocker.patch( + 'tdpservice.scheduling.management.db_backup.get_system_values', + return_value={'DATABASE_URI': "postgres://tdpuser:something_secure@postgres:5432/tdrs_test"} + ) + + file_name = "/tmp/test_backup.pg" + ret = db_backup.backup_database(file_name, "", + "postgres://tdpuser:something_secure@postgres:5432/tdrs_test", + system_user) + + assert ret is True + assert os.path.getsize(file_name) > 0 + os.remove(file_name) + assert os.path.exists(file_name) is False + +@pytest.mark.django_db +def test_backup_database_fail_on_backup(system_user): + """Test backup fails on psql non-zero return code.""" + with pytest.raises(Exception) as e: + file_name = "/tmp/test_backup.pg" + db_backup.backup_database(file_name, "asdfasdfassfd", + "postgres://tdpuser:something_secure@postgres:5432/tdrs_test", + system_user) + + assert str(e.value) == "pg_dump command failed with a non zero exit code." + assert os.path.exists(file_name) is False + +@pytest.mark.django_db +def test_backup_database_fail_on_general_exception(): + """Test backup succeeds but raises exception on string user for log entry.""" + with pytest.raises(Exception) as e: + file_name = "/tmp/test_backup.pg" + db_backup.backup_database(file_name, "", + "postgres://tdpuser:something_secure@postgres:5432/tdrs_test", + "system_user") + + assert str(e.value) == "'str' object has no attribute 'pk'" + assert os.path.exists(file_name) is True + os.remove(file_name) + assert os.path.exists(file_name) is False + + +@pytest.mark.django_db +def test_get_database_credentials(): + """Test get credentials.""" + creds = db_backup.get_database_credentials("postgres://tdpuser:something_secure@postgres:5432/tdrs_test") + assert creds == ["tdpuser", "something_secure", "postgres", "5432", "tdrs_test"] From a85b2327fdab865dd6a3dcbbae5a212b73e1d515 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 15:41:34 -0400 Subject: [PATCH 32/39] - remove mock --- tdrs-backend/tdpservice/scheduling/test/test_db_backup.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tdrs-backend/tdpservice/scheduling/test/test_db_backup.py b/tdrs-backend/tdpservice/scheduling/test/test_db_backup.py index 6e85b53e8..2ca6d4699 100644 --- a/tdrs-backend/tdpservice/scheduling/test/test_db_backup.py +++ b/tdrs-backend/tdpservice/scheduling/test/test_db_backup.py @@ -5,13 +5,8 @@ from tdpservice.scheduling.management import db_backup @pytest.mark.django_db -def test_backup_database(mocker, system_user): +def test_backup_database(system_user): """Test backup functionality.""" - mocker.patch( - 'tdpservice.scheduling.management.db_backup.get_system_values', - return_value={'DATABASE_URI': "postgres://tdpuser:something_secure@postgres:5432/tdrs_test"} - ) - file_name = "/tmp/test_backup.pg" ret = db_backup.backup_database(file_name, "", "postgres://tdpuser:something_secure@postgres:5432/tdrs_test", From 76fa734cbe5a3b1b921d39a380a60926d0f9944b Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 15:54:01 -0400 Subject: [PATCH 33/39] - add main routine test for backup --- .../scheduling/test/test_db_backup.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tdrs-backend/tdpservice/scheduling/test/test_db_backup.py b/tdrs-backend/tdpservice/scheduling/test/test_db_backup.py index 2ca6d4699..5a1562a33 100644 --- a/tdrs-backend/tdpservice/scheduling/test/test_db_backup.py +++ b/tdrs-backend/tdpservice/scheduling/test/test_db_backup.py @@ -3,6 +3,7 @@ import os import pytest from tdpservice.scheduling.management import db_backup +from django.contrib.admin.models import LogEntry @pytest.mark.django_db def test_backup_database(system_user): @@ -49,3 +50,20 @@ def test_get_database_credentials(): """Test get credentials.""" creds = db_backup.get_database_credentials("postgres://tdpuser:something_secure@postgres:5432/tdrs_test") assert creds == ["tdpuser", "something_secure", "postgres", "5432", "tdrs_test"] + +@pytest.mark.django_db +def test_main_backup(mocker, system_user): + """Test call the main function.""" + mocker.patch( + 'tdpservice.scheduling.management.db_backup.upload_file', + return_value=True + ) + sys_vals = {"DATABASE_URI": "postgres://tdpuser:something_secure@postgres:5432", + "DATABASE_DB_NAME": "tdrs_test", + "POSTGRES_CLIENT_DIR": "", + "S3_BUCKET": "", + "S3_REGION": "",} + + db_backup.main(['-b', '-f', '/tmp/test_backup.pg'], sys_values=sys_vals, system_user=system_user) + assert LogEntry.objects.get(change_message="Begining database backup.").pk is not None + assert LogEntry.objects.get(change_message="Finished database backup.").pk is not None From 0dfb37ea9a3392a669a8fd67f0145a91ab6fb7a2 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Fri, 13 Sep 2024 15:54:24 -0400 Subject: [PATCH 34/39] - linting --- tdrs-backend/tdpservice/scheduling/test/test_db_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tdrs-backend/tdpservice/scheduling/test/test_db_backup.py b/tdrs-backend/tdpservice/scheduling/test/test_db_backup.py index 5a1562a33..bdcdc727e 100644 --- a/tdrs-backend/tdpservice/scheduling/test/test_db_backup.py +++ b/tdrs-backend/tdpservice/scheduling/test/test_db_backup.py @@ -62,7 +62,7 @@ def test_main_backup(mocker, system_user): "DATABASE_DB_NAME": "tdrs_test", "POSTGRES_CLIENT_DIR": "", "S3_BUCKET": "", - "S3_REGION": "",} + "S3_REGION": ""} db_backup.main(['-b', '-f', '/tmp/test_backup.pg'], sys_values=sys_vals, system_user=system_user) assert LogEntry.objects.get(change_message="Begining database backup.").pk is not None From 16c1e11a23e9bcb4ca2c1af92d4e7fe30f0a811d Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Thu, 19 Sep 2024 07:32:29 -0400 Subject: [PATCH 35/39] - Added comment for posterity --- .../search_indexes/management/commands/clean_and_reparse.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py index eff358107..fde13b309 100644 --- a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py @@ -113,6 +113,9 @@ def _delete_records(self, file_ids, new_indices, log_context): logger.info(f"Deleting {count} records of type: {model}.") if not new_indices: # If we aren't creating new indices, then we don't want duplicate data in the existing indices. + # We alos use a Paginator here because it allows us to slice querysets based on a batch size. This + # prevents a very large queryset from being brought into main memory when `doc().update(...)` + # evaluates it by iterating over the queryset and deleting the models from ES. paginator = Paginator(qset, settings.BULK_CREATE_BATCH_SIZE) for page in paginator: doc().update(page.object_list, refresh=True, action='delete') From 0fd06c2a6845dc0fa3932339f863222d975d23be Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Mon, 23 Sep 2024 16:23:50 -0400 Subject: [PATCH 36/39] - Update reparse to not fail/exit when elastic throws index update. instead aggregate errors and log a warning message for the user. --- .../management/commands/clean_and_reparse.py | 38 ++++++++++++------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py index fde13b309..d0c7a9934 100644 --- a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py +++ b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py @@ -101,9 +101,25 @@ def _delete_summaries(self, file_ids, log_context): level='critical') raise e + def __handle_elastic_doc_delete(self, doc, qset, model, elastic_exceptions, new_indices): + """Delete documents from Elastic and handle exceptions.""" + if not new_indices: + # If we aren't creating new indices, then we don't want duplicate data in the existing indices. + # We alos use a Paginator here because it allows us to slice querysets based on a batch size. This + # prevents a very large queryset from being brought into main memory when `doc().update(...)` + # evaluates it by iterating over the queryset and deleting the models from ES. + paginator = Paginator(qset, settings.BULK_CREATE_BATCH_SIZE) + for page in paginator: + try: + doc().update(page.object_list, refresh=True, action='delete') + except ElasticsearchException: + elastic_exceptions[model] = elastic_exceptions.get(model, 0) + 1 + continue + def _delete_records(self, file_ids, new_indices, log_context): """Delete records, errors, and documents from Postgres and Elastic.""" total_deleted = 0 + elastic_exceptions = dict() for doc in DOCUMENTS: try: model = doc.Django.model @@ -111,21 +127,8 @@ def _delete_records(self, file_ids, new_indices, log_context): count = qset.count() total_deleted += count logger.info(f"Deleting {count} records of type: {model}.") - if not new_indices: - # If we aren't creating new indices, then we don't want duplicate data in the existing indices. - # We alos use a Paginator here because it allows us to slice querysets based on a batch size. This - # prevents a very large queryset from being brought into main memory when `doc().update(...)` - # evaluates it by iterating over the queryset and deleting the models from ES. - paginator = Paginator(qset, settings.BULK_CREATE_BATCH_SIZE) - for page in paginator: - doc().update(page.object_list, refresh=True, action='delete') + self.__handle_elastic_doc_delete(doc, qset, model, elastic_exceptions, new_indices) qset._raw_delete(qset.db) - except ElasticsearchException as e: - log(f'Elastic document delete failed for type {model}. The database and Elastic are INCONSISTENT! ' - 'Restore the DB from the backup as soon as possible!', - logger_context=log_context, - level='critical') - raise e except DatabaseError as e: log(f'Encountered a DatabaseError while deleting records of type {model} from Postgres. The database ' 'and Elastic are INCONSISTENT! Restore the DB from the backup as soon as possible!', @@ -138,6 +141,13 @@ def _delete_records(self, file_ids, new_indices, log_context): logger_context=log_context, level='critical') raise e + + if elastic_exceptions != {}: + msg = ("Warning: Elastic is inconsistent and the database is consistent. " + "Models which generated the Elastic exception(s) are below:\n") + for key, val in elastic_exceptions.items(): + msg += f"Model: {key} generated {val} Elastic Exception(s) while being deleted.\n" + log(msg, logger_context=log_context, level='warn') return total_deleted def _delete_errors(self, file_ids, log_context): From 23924fb50c8d5a5fb02d27de118723cafca2dd37 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Thu, 26 Sep 2024 11:01:58 -0400 Subject: [PATCH 37/39] - update terraform for testing --- terraform/dev/main.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/dev/main.tf b/terraform/dev/main.tf index 0b81b8114..35372b7a8 100644 --- a/terraform/dev/main.tf +++ b/terraform/dev/main.tf @@ -50,8 +50,8 @@ data "cloudfoundry_service" "rds" { resource "cloudfoundry_service_instance" "database" { name = "tdp-db-dev" space = data.cloudfoundry_space.space.id - service_plan = data.cloudfoundry_service.rds.service_plans["micro-psql"] - json_params = "{\"version\": \"15\"}" + service_plan = data.cloudfoundry_service.rds.service_plans["medium-gp-psql"] + json_params = "{\"version\": \"15\", \"storage_type\": \"gp3\", \"storage\": 50}" recursive_delete = true timeouts { create = "60m" From 0543f299f246538baec1344b24a55e1a8521c752 Mon Sep 17 00:00:00 2001 From: Eric Lipe Date: Mon, 30 Sep 2024 09:35:07 -0400 Subject: [PATCH 38/39] - Update terraform based on cloud.gov response - Give Prod at least 500GB --- terraform/production/main.tf | 4 ++-- terraform/staging/main.tf | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/terraform/production/main.tf b/terraform/production/main.tf index c9ecf505e..5a02cb04b 100644 --- a/terraform/production/main.tf +++ b/terraform/production/main.tf @@ -50,8 +50,8 @@ data "cloudfoundry_service" "rds" { resource "cloudfoundry_service_instance" "database" { name = "tdp-db-prod" space = data.cloudfoundry_space.space.id - service_plan = data.cloudfoundry_service.rds.service_plans["medium-psql"] - json_params = "{\"version\": \"15\"}" + service_plan = data.cloudfoundry_service.rds.service_plans["medium-gp-psql"] + json_params = "{\"version\": \"15\", \"storage_type\": \"gp3\", \"storage\": 500}" recursive_delete = true timeouts { create = "60m" diff --git a/terraform/staging/main.tf b/terraform/staging/main.tf index 0c4cc2576..7cdd3f676 100644 --- a/terraform/staging/main.tf +++ b/terraform/staging/main.tf @@ -50,8 +50,8 @@ data "cloudfoundry_service" "rds" { resource "cloudfoundry_service_instance" "database" { name = "tdp-db-staging" space = data.cloudfoundry_space.space.id - service_plan = data.cloudfoundry_service.rds.service_plans["micro-psql"] - json_params = "{\"version\": \"15\"}" + service_plan = data.cloudfoundry_service.rds.service_plans["medium-gp-psql"] + json_params = "{\"version\": \"15\", \"storage_type\": \"gp3\", \"storage\": 50}" recursive_delete = true timeouts { create = "60m" From 95fc24bccc4a4c6a35cd93e825a47da40326b185 Mon Sep 17 00:00:00 2001 From: robgendron <163159602+robgendron@users.noreply.github.com> Date: Mon, 30 Sep 2024 11:27:19 -0400 Subject: [PATCH 39/39] Create sprint-107-summary.md (#3189) * Create sprint-107-summary * Rename sprint-107-summary to sprint-107-summary.md * Update sprint-107-summary.md * Update sprint-107-summary.md * Update docs/Sprint-Review/sprint-107-summary.md Co-authored-by: Andrew <84722778+andrew-jameson@users.noreply.github.com> * Update sprint-107-summary.md --------- Co-authored-by: Andrew <84722778+andrew-jameson@users.noreply.github.com> Co-authored-by: Alex P. <63075587+ADPennington@users.noreply.github.com> --- docs/Sprint-Review/sprint-107-summary.md | 89 ++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 docs/Sprint-Review/sprint-107-summary.md diff --git a/docs/Sprint-Review/sprint-107-summary.md b/docs/Sprint-Review/sprint-107-summary.md new file mode 100644 index 000000000..e6d1aa4d9 --- /dev/null +++ b/docs/Sprint-Review/sprint-107-summary.md @@ -0,0 +1,89 @@ +# sprint-107-summary + +8/28/2024 - 9/10/2024 + +### Priority Setting + +* Re-parsing epic +* Postgres db access +* UX research with DIGIT team +* Continuous communication with STTs about latest TDP features and updates + +### Sprint Goal + +**Dev:** + +_**Re-parsing, Admin Console Improvements, and Application Health Monitoring work**_ + +* \#3106 — Re-Parse Django Action +* \#3137 — \[bug] OFA unable to export data to csv by record type and fiscal period +* \#3074 — TDP Data Files page permissions for DIGIT & Sys Admin user groups +* \#3044 — Prometheus/Grafana - Local Environment +* \#3042 — Sentry in cloud.gov + +**DevOps:** +_**Successful deployments across environments and pipeline stability investments**_ + +* \#2965 — As tech lead, I want a database seed implemented for testing +* \#2458 — Integrate Nexus into CircleCI + +**Design:** + +_**Support reviews, In-app banner to support parsed data, Continue Error Audit (Cat 4)**_ + +* \#3156 — Release Notes Email Template +* \#3100 — \[Design Deliverable] Update stakeholders & personas document +* \#2968 — \[Design Deliverable] Update Error Audit for Cat 4 / QA + +## Tickets + +### Completed/Merged + +* [#2561 As a sys admin, I need TDP to automatically deactivate accounts that are inactive for 180 days](https://github.com/raft-tech/TANF-app/issues/2561) +* [#2792 \[Error Audit\] Category 3 error messages clean-up ](https://github.com/raft-tech/TANF-app/issues/2792) +* [#3043 Sentry: Local environment for Debugging](https://github.com/raft-tech/TANF-app/issues/3043) +* [#3064 Re-parse Meta Model](https://github.com/raft-tech/TANF-app/issues/3064) +* [#3065 Spike - Guarantee Sequential Execution of Re-parse Command](https://github.com/raft-tech/TANF-app/issues/3065) +* [#3074 TDP Data Files page permissions for DIGIT & Sys Admin user groups ](https://github.com/raft-tech/TANF-app/issues/3074) +* [#3076 Admin Filter Enhancements for Data Files Page ](https://github.com/raft-tech/TANF-app/issues/3076) +* [#3078 \[Research Synthesis\] DIGIT Admin Experience Improvements ](https://github.com/raft-tech/TANF-app/issues/3078) +* [#3087 Admin By Newest Filter Enhancements for Data Files Page ](https://github.com/raft-tech/TANF-app/issues/3087) +* [#3114 \[Design Spike\] In-app banner for submission history pages w/ data parsed before May 2024 ](https://github.com/raft-tech/TANF-app/issues/3114) +* [#3142 \[Research Spike\] Get more detail about Yun & DIGIT's data workflow and use cases ](https://github.com/raft-tech/TANF-app/issues/3142) + +### Submitted (QASP Review, OCIO Review) + +* + +### Ready to Merge + +* [#2883 Pre-Made Reporting Dashboards on Kibana ](https://github.com/raft-tech/TANF-app/issues/2883) +* [#3102 Admin Exp: Django Implement Multi-Select Fiscal Period Dropdown For Data Export ](https://github.com/raft-tech/TANF-app/issues/3102) + +### Closed (Not Merged) + +* [#3110 Spike - Investigate Custom Filter Integration ](https://github.com/raft-tech/TANF-app/issues/3110) +* [#3156 Release Notes Knowledge Center and Email Template ](https://github.com/raft-tech/TANF-app/issues/3156) + +### Moved to Next Sprint + +**In Progress** + +* [#2968 \[Design Deliverable\] Update Error Audit for Cat 4 / QA ](https://github.com/raft-tech/TANF-app/issues/2968) +* [#3060 As a TDP user, I need to stay logged in when I'm actively using the system ](https://github.com/raft-tech/TANF-app/issues/3060) +* [#3100 \[Design Deliverable\] Update stakeholders & personas document ](https://github.com/raft-tech/TANF-app/issues/3100) +* [#3106 Re-Parse Django Action ](https://github.com/raft-tech/TANF-app/issues/3106) +* [#3137 \[bug\] OFA unable to export data to csv by record type and fiscal period ](https://github.com/raft-tech/TANF-app/issues/3137) +* [#3164 \[Research Synthesis\] Yun & DIGIT's data workflow and use cases ](https://github.com/raft-tech/TANF-app/issues/3164) +* [#3170 Reparse Command Fails when Queryset is Large ](https://github.com/raft-tech/TANF-app/issues/3170) +* [#3179 Spike - How We Work / Hopes & Fears Workshop prep ](https://github.com/raft-tech/TANF-app/issues/3179) + +**Blocked** + +* + +**Raft Review** + +* [#2458 Integrate Nexus into CircleCI ](https://github.com/raft-tech/TANF-app/issues/2458) +* [#2965 As tech lead, I want a database seed implemented for testing ](https://github.com/raft-tech/TANF-app/issues/2965) +* [#3044 Prometheus/Grafana - Local Environment ](https://github.com/raft-tech/TANF-app/issues/3044)