diff --git a/.circleci/build-and-test/jobs.yml b/.circleci/build-and-test/jobs.yml index b4f5afd2f..4e32831f8 100644 --- a/.circleci/build-and-test/jobs.yml +++ b/.circleci/build-and-test/jobs.yml @@ -5,14 +5,14 @@ - checkout - docker-compose-check - docker-compose-up-backend - - run: - name: Execute Python Linting Test - command: cd tdrs-backend; docker-compose run --rm web bash -c "flake8 ." - run: name: Run Unit Tests And Create Code Coverage Report command: | cd tdrs-backend; docker-compose run --rm web bash -c "./wait_for_services.sh && pytest --cov-report=xml" + - run: + name: Execute Python Linting Test + command: cd tdrs-backend; docker-compose run --rm web bash -c "flake8 ." - upload-codecov: component: backend coverage-report: ./tdrs-backend/coverage.xml diff --git a/.circleci/config.yml b/.circleci/config.yml index 8b8a62ee7..65715debc 100755 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -82,5 +82,5 @@ workflows: - develop - main - master - - /^release.*/ - + - /^release.*/ + diff --git a/scripts/zap-scanner.sh b/scripts/zap-scanner.sh index f2e999895..c3f534b84 100755 --- a/scripts/zap-scanner.sh +++ b/scripts/zap-scanner.sh @@ -139,7 +139,6 @@ ZAP_CLI_OPTIONS="\ -config globalexcludeurl.url_list.url\(21\).regex='^https:\/\/.*\.identitysandbox.gov\/.*$' \ -config globalexcludeurl.url_list.url\(21\).description='Site - IdentitySandbox.gov' \ -config globalexcludeurl.url_list.url\(21\).enabled=true \ - -config spider.postform=true" # How long ZAP will crawl the app with the spider process diff --git a/tdrs-backend/Pipfile.lock b/tdrs-backend/Pipfile.lock index 3e049d740..bc99d280f 100644 --- a/tdrs-backend/Pipfile.lock +++ b/tdrs-backend/Pipfile.lock @@ -916,6 +916,7 @@ ], "index": "pypi", "version": "==2022.1" + }, "redis": { "hashes": [ diff --git a/tdrs-backend/docker-compose.local.yml b/tdrs-backend/docker-compose.local.yml index ac5924e18..3c8e76317 100644 --- a/tdrs-backend/docker-compose.local.yml +++ b/tdrs-backend/docker-compose.local.yml @@ -80,7 +80,7 @@ services: build: . command: > bash -c "./wait_for_services.sh && - ./gunicorn_start.sh && + ./gunicorn_start.sh && celery -A tdpservice.settings worker -l info" ports: - "5555:5555" @@ -106,5 +106,5 @@ volumes: networks: default: - external: - name: external-net + name: external-net + external: true diff --git a/tdrs-backend/docker-compose.yml b/tdrs-backend/docker-compose.yml index d9d10d393..69e08bc64 100644 --- a/tdrs-backend/docker-compose.yml +++ b/tdrs-backend/docker-compose.yml @@ -124,5 +124,5 @@ volumes: networks: default: - external: name: external-net + external: true diff --git a/tdrs-backend/tdpservice/data_files/test/factories.py b/tdrs-backend/tdpservice/data_files/test/factories.py index 34522154c..88333f7d9 100644 --- a/tdrs-backend/tdpservice/data_files/test/factories.py +++ b/tdrs-backend/tdpservice/data_files/test/factories.py @@ -18,7 +18,7 @@ class Meta: extension = "txt" section = "Active Case Data" quarter = "Q1" - year = "2020" + year = 2020 version = 1 user = factory.SubFactory(UserFactory) stt = factory.SubFactory(STTFactory) diff --git a/tdrs-backend/tdpservice/parsers/admin.py b/tdrs-backend/tdpservice/parsers/admin.py index c98ef5d70..266fb5b26 100644 --- a/tdrs-backend/tdpservice/parsers/admin.py +++ b/tdrs-backend/tdpservice/parsers/admin.py @@ -15,4 +15,11 @@ class ParserErrorAdmin(admin.ModelAdmin): ] +class DataFileSummaryAdmin(admin.ModelAdmin): + """ModelAdmin class for DataFileSummary objects generated in parsing.""" + + list_display = ['status', 'case_aggregates', 'datafile'] + + admin.site.register(models.ParserError, ParserErrorAdmin) +admin.site.register(models.DataFileSummary, DataFileSummaryAdmin) diff --git a/tdrs-backend/tdpservice/parsers/migrations/0002_alter_parsererror_error_type.py b/tdrs-backend/tdpservice/parsers/migrations/0002_alter_parsererror_error_type.py index 5236b5c29..e55c856ce 100644 --- a/tdrs-backend/tdpservice/parsers/migrations/0002_alter_parsererror_error_type.py +++ b/tdrs-backend/tdpservice/parsers/migrations/0002_alter_parsererror_error_type.py @@ -14,5 +14,5 @@ class Migration(migrations.Migration): model_name='parsererror', name='error_type', field=models.TextField(choices=[('1', 'File pre-check'), ('2', 'Record value invalid'), ('3', 'Record value consistency'), ('4', 'Case consistency'), ('5', 'Section consistency'), ('6', 'Historical consistency')], max_length=128), - ), + ) ] diff --git a/tdrs-backend/tdpservice/parsers/migrations/0007_datafilesummary.py b/tdrs-backend/tdpservice/parsers/migrations/0007_datafilesummary.py new file mode 100644 index 000000000..5f5e2a9b5 --- /dev/null +++ b/tdrs-backend/tdpservice/parsers/migrations/0007_datafilesummary.py @@ -0,0 +1,24 @@ +# Generated by Django 3.2.15 on 2023-09-20 15:35 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('data_files', '0012_datafile_s3_versioning_id'), + ('parsers', '0006_auto_20230810_1500'), + ] + + operations = [ + migrations.CreateModel( + name='DataFileSummary', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('status', models.CharField(choices=[('Pending', 'Pending'), ('Accepted', 'Accepted'), ('Accepted with Errors', 'Accepted With Errors'), ('Rejected', 'Rejected')], default='Pending', max_length=50)), + ('case_aggregates', models.JSONField(null=True)), + ('datafile', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='data_files.datafile')), + ], + ), + ] diff --git a/tdrs-backend/tdpservice/parsers/models.py b/tdrs-backend/tdpservice/parsers/models.py index 4a638e06a..0c0ccdc50 100644 --- a/tdrs-backend/tdpservice/parsers/models.py +++ b/tdrs-backend/tdpservice/parsers/models.py @@ -5,7 +5,7 @@ from django.utils.translation import gettext_lazy as _ from django.contrib.contenttypes.fields import GenericForeignKey from django.contrib.contenttypes.models import ContentType - +from tdpservice.data_files.models import DataFile class ParserErrorCategoryChoices(models.TextChoices): """Enum of ParserError error_type.""" @@ -62,8 +62,49 @@ def __repr__(self): def __str__(self): """Return a string representation of the model.""" - return f"error_message: {self.error_message}" + return f"ParserError {self.__dict__}" def _get_error_message(self): """Return the error message.""" return self.error_message + +class DataFileSummary(models.Model): + """Aggregates information about a parsed file.""" + + class Status(models.TextChoices): + """Enum for status of parsed file.""" + + PENDING = "Pending" # file has been uploaded, but not validated + ACCEPTED = "Accepted" + ACCEPTED_WITH_ERRORS = "Accepted with Errors" + REJECTED = "Rejected" + + status = models.CharField( + max_length=50, + choices=Status.choices, + default=Status.PENDING, + ) + + datafile = models.ForeignKey(DataFile, on_delete=models.CASCADE) + + case_aggregates = models.JSONField(null=True, blank=False) + + def get_status(self): + """Set and return the status field based on errors and models associated with datafile.""" + errors = ParserError.objects.filter(file=self.datafile) + [print(error) for error in errors] + + # excluding row-level pre-checks and trailer pre-checks. + precheck_errors = errors.filter(error_type=ParserErrorCategoryChoices.PRE_CHECK)\ + .exclude(field_name="Record_Type")\ + .exclude(error_message__icontains="trailer")\ + .exclude(error_message__icontains="Unknown Record_Type was found.") + + if errors is None: + return DataFileSummary.Status.PENDING + elif errors.count() == 0: + return DataFileSummary.Status.ACCEPTED + elif precheck_errors.count() > 0: + return DataFileSummary.Status.REJECTED + else: + return DataFileSummary.Status.ACCEPTED_WITH_ERRORS diff --git a/tdrs-backend/tdpservice/parsers/parse.py b/tdrs-backend/tdpservice/parsers/parse.py index 2c2183c68..e8e4a3121 100644 --- a/tdrs-backend/tdpservice/parsers/parse.py +++ b/tdrs-backend/tdpservice/parsers/parse.py @@ -38,8 +38,8 @@ def parse_datafile(datafile): section_is_valid, section_error = validators.validate_header_section_matches_submission( datafile, - program_type, - section, + util.get_section_reference(program_type, section), + util.make_generate_parser_error(datafile, 1) ) if not section_is_valid: @@ -123,7 +123,6 @@ def parse_datafile_lines(datafile, program_type, section, is_encrypted): errors = {} line_number = 0 - schema_manager_options = get_schema_manager_options(program_type) unsaved_records = {} unsaved_parser_errors = {} @@ -180,11 +179,9 @@ def parse_datafile_lines(datafile, program_type, section, is_encrypted): prev_sum = header_count + trailer_count continue - schema_manager = get_schema_manager(line, section, schema_manager_options) - - schema_manager.update_encrypted_fields(is_encrypted) + schema_manager = get_schema_manager(line, section, program_type) - records = manager_parse_line(line, schema_manager, generate_error) + records = manager_parse_line(line, schema_manager, generate_error, is_encrypted) record_number = 0 for i in range(len(records)): @@ -236,68 +233,25 @@ def parse_datafile_lines(datafile, program_type, section, is_encrypted): return errors -def manager_parse_line(line, schema_manager, generate_error): +def manager_parse_line(line, schema_manager, generate_error, is_encrypted=False): """Parse and validate a datafile line using SchemaManager.""" - if schema_manager.schemas: + try: + schema_manager.update_encrypted_fields(is_encrypted) records = schema_manager.parse_and_validate(line, generate_error) return records + except AttributeError as e: + logging.error(e) + return [(None, False, [ + generate_error( + schema=None, + error_category=ParserErrorCategoryChoices.PRE_CHECK, + error_message="Unknown Record_Type was found.", + record=None, + field="Record_Type", + ) + ])] - logger.debug("Record Type is missing from record.") - return [(None, False, [ - generate_error( - schema=None, - error_category=ParserErrorCategoryChoices.PRE_CHECK, - error_message="Record Type is missing from record.", - record=None, - field=None - ) - ])] - - -def get_schema_manager_options(program_type): - """Return the allowed schema options.""" - match program_type: - case 'TAN': - return { - 'A': { - 'T1': schema_defs.tanf.t1, - 'T2': schema_defs.tanf.t2, - 'T3': schema_defs.tanf.t3, - }, - 'C': { - 'T4': schema_defs.tanf.t4, - 'T5': schema_defs.tanf.t5, - }, - 'G': { - 'T6': schema_defs.tanf.t6, - }, - 'S': { - # 'T7': schema_options.t7, - }, - } - case 'SSP': - return { - 'A': { - 'M1': schema_defs.ssp.m1, - 'M2': schema_defs.ssp.m2, - 'M3': schema_defs.ssp.m3, - }, - 'C': { - # 'M4': schema_options.m4, - # 'M5': schema_options.m5, - }, - 'G': { - # 'M6': schema_options.m6, - }, - 'S': { - # 'M7': schema_options.m7, - }, - } - # case tribal? - return None - - -def get_schema_manager(line, section, schema_options): +def get_schema_manager(line, section, program_type): """Return the appropriate schema for the line.""" line_type = line[0:2] - return schema_options.get(section, {}).get(line_type, util.SchemaManager([])) + return util.get_program_model(program_type, section, line_type) diff --git a/tdrs-backend/tdpservice/parsers/row_schema.py b/tdrs-backend/tdpservice/parsers/row_schema.py index a4faecdf3..d19f9f5f1 100644 --- a/tdrs-backend/tdpservice/parsers/row_schema.py +++ b/tdrs-backend/tdpservice/parsers/row_schema.py @@ -81,7 +81,7 @@ def run_preparsing_validators(self, line, generate_error): error_category=ParserErrorCategoryChoices.PRE_CHECK, error_message=validator_error, record=None, - field=None + field="Record_Type" ) ) diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py index 08e171c22..546910386 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py @@ -1,4 +1,4 @@ -"""Schema for HEADER row of all submission types.""" +"""Schema for t1 record types.""" from ...util import SchemaManager from ...fields import Field diff --git a/tdrs-backend/tdpservice/parsers/serializers.py b/tdrs-backend/tdpservice/parsers/serializers.py index 05a4e0d07..9b4ad734d 100644 --- a/tdrs-backend/tdpservice/parsers/serializers.py +++ b/tdrs-backend/tdpservice/parsers/serializers.py @@ -1,7 +1,7 @@ """Serializers for parsing errors.""" from rest_framework import serializers -from .models import ParserError +from .models import ParserError, DataFileSummary class ParsingErrorSerializer(serializers.ModelSerializer): @@ -23,3 +23,13 @@ class Meta: model = ParserError fields = '__all__' + + +class DataFileSummarySerializer(serializers.ModelSerializer): + """Serializer for Parsing Errors.""" + + class Meta: + """Metadata.""" + + model = DataFileSummary + fields = ['status', 'case_aggregates', 'datafile'] diff --git a/tdrs-backend/tdpservice/parsers/test/data/small_tanf_section1.txt b/tdrs-backend/tdpservice/parsers/test/data/small_tanf_section1.txt index e906c2ed3..dc9ddae99 100644 --- a/tdrs-backend/tdpservice/parsers/test/data/small_tanf_section1.txt +++ b/tdrs-backend/tdpservice/parsers/test/data/small_tanf_section1.txt @@ -1,12 +1,12 @@ HEADER20204A06 TAN1EN T12020101111111111223003403361110213120000300000000000008730010000000000000000000000000000000000222222000000002229012 -T2202010111111111121219740114WTTTTTY@W2221222222221012212110014722011400000000000000000000000000000000000000000000000000000000000000000000000000000000000291 +T2202010111111111121219740114WTTTTTY@W2221222222221012212110014722011500000000000000000000000000000000000000000000000000000000000000000000000000000000000291 T320201011111111112120190127WTTTT90W022212222204398100000000 T12020101111111111524503401311110233110374300000000000005450320000000000000000000000000000000000222222000000002229021 T2202010111111111152219730113WTTTT@#Z@2221222122211012210110630023080700000000000000000000000000000000000000000000000000000000000000000000000551019700000000 T320201011111111115120160401WTTTT@BTB22212212204398100000000 T12020101111111114023001401101120213110336300000000000002910410000000000000000000000000000000000222222000000002229012 -T2202010111111111401219910501WTTTT@9#T2221222222221012212210421322011400000000000000000000000000000000000000000000000000000000000000000000000000000000000000 +T2202010111111111401219910501WTTTT@9#T2221222222221012212210421322011500000000000000000000000000000000000000000000000000000000000000000000000000000000000000 T320201011111111140120170423WTTTT@@T#22212222204398100000000 T12020101111111114721801401711120212110374300000000000003820060000000000000000000000000000000000222222000000002229012 T2202010111111111471219800223WTTTT@TTW2222212222221012212110065423010700000000000000000000000000000000000000000000000000000000000000000000000000000000000000 diff --git a/tdrs-backend/tdpservice/parsers/test/factories.py b/tdrs-backend/tdpservice/parsers/test/factories.py index c9f9adc6c..8eb309b60 100644 --- a/tdrs-backend/tdpservice/parsers/test/factories.py +++ b/tdrs-backend/tdpservice/parsers/test/factories.py @@ -1,7 +1,64 @@ """Factories for generating test data for parsers.""" import factory +from tdpservice.parsers.models import DataFileSummary, ParserErrorCategoryChoices from faker import Faker from tdpservice.data_files.test.factories import DataFileFactory +from tdpservice.users.test.factories import UserFactory +from tdpservice.stts.test.factories import STTFactory + +class ParsingFileFactory(factory.django.DjangoModelFactory): + """Generate test data for data files.""" + + class Meta: + """Hardcoded meta data for data files.""" + + model = "data_files.DataFile" + + original_filename = "data_file.txt" + slug = "data_file-txt-slug" + extension = "txt" + section = "Active Case Data" + quarter = "Q1" + year = "2020" + version = 1 + user = factory.SubFactory(UserFactory) + stt = factory.SubFactory(STTFactory) + file = factory.django.FileField(data=b'test', filename='my_data_file.txt') + s3_versioning_id = 0 + +class DataFileSummaryFactory(factory.django.DjangoModelFactory): + """Generate test data for data files.""" + + class Meta: + """Hardcoded meta data for data files.""" + + model = DataFileSummary + + status = DataFileSummary.Status.PENDING + + case_aggregates = { + "rejected": 0, + "months": [ + { + "accepted_without_errors": 100, + "accepted_with_errors": 10, + "month": "Jan", + }, + { + "accepted_without_errors": 100, + "accepted_with_errors": 10, + "month": "Feb", + }, + { + "accepted_without_errors": 100, + "accepted_with_errors": 10, + "month": "Mar", + }, + ] + } + + datafile = factory.SubFactory(DataFileFactory) + fake = Faker() @@ -21,7 +78,7 @@ class Meta: case_number = '1' rpt_month_year = 202001 error_message = "test error message" - error_type = "out of range" + error_type = ParserErrorCategoryChoices.PRE_CHECK created_at = factory.Faker("date_time") fields_json = {"test": "test"} diff --git a/tdrs-backend/tdpservice/parsers/test/test_models.py b/tdrs-backend/tdpservice/parsers/test/test_models.py index c46532ada..783e859e7 100644 --- a/tdrs-backend/tdpservice/parsers/test/test_models.py +++ b/tdrs-backend/tdpservice/parsers/test/test_models.py @@ -2,7 +2,7 @@ import pytest from tdpservice.parsers.models import ParserError -from tdpservice.parsers.test.factories import ParserErrorFactory +from .factories import ParserErrorFactory @pytest.fixture def parser_error_instance(): diff --git a/tdrs-backend/tdpservice/parsers/test/test_parse.py b/tdrs-backend/tdpservice/parsers/test/test_parse.py index 4a538ae8a..fd794280b 100644 --- a/tdrs-backend/tdpservice/parsers/test/test_parse.py +++ b/tdrs-backend/tdpservice/parsers/test/test_parse.py @@ -2,11 +2,14 @@ import pytest -from ..util import create_test_datafile from .. import parse -from ..models import ParserError, ParserErrorCategoryChoices +from ..models import ParserError, ParserErrorCategoryChoices, DataFileSummary from tdpservice.search_indexes.models.tanf import TANF_T1, TANF_T2, TANF_T3, TANF_T4, TANF_T5, TANF_T6 from tdpservice.search_indexes.models.ssp import SSP_M1, SSP_M2, SSP_M3 +from .factories import DataFileSummaryFactory +from tdpservice.data_files.models import DataFile +from .. import schema_defs, util + import logging es_logger = logging.getLogger('elasticsearch') @@ -16,15 +19,30 @@ @pytest.fixture def test_datafile(stt_user, stt): """Fixture for small_correct_file.""" - return create_test_datafile('small_correct_file', stt_user, stt) + return util.create_test_datafile('small_correct_file', stt_user, stt) +@pytest.fixture +def dfs(): + """Fixture for DataFileSummary.""" + return DataFileSummaryFactory.create() -@pytest.mark.django_db() -def test_parse_small_correct_file(test_datafile): +@pytest.mark.django_db +def test_parse_small_correct_file(test_datafile, dfs): """Test parsing of small_correct_file.""" - errors = parse.parse_datafile(test_datafile) - errors = ParserError.objects.filter(file=test_datafile) - assert errors.count() == 0 + dfs.datafile = test_datafile + dfs.save() + + parse.parse_datafile(test_datafile) + dfs.status = dfs.get_status() + dfs.case_aggregates = util.case_aggregates_by_month(dfs.datafile, dfs.status) + assert dfs.case_aggregates == {'rejected': 0, + 'months': [ + {'accepted_without_errors': 1, 'accepted_with_errors': 0, 'month': 'Oct'}, + {'accepted_without_errors': 0, 'accepted_with_errors': 0, 'month': 'Nov'}, + {'accepted_without_errors': 0, 'accepted_with_errors': 0, 'month': 'Dec'} + ]} + + assert dfs.get_status() == DataFileSummary.Status.ACCEPTED assert TANF_T1.objects.count() == 1 @@ -41,16 +59,32 @@ def test_parse_small_correct_file(test_datafile): assert t1.SANC_REDUCTION_AMT == 0 assert t1.FAMILY_NEW_CHILD == 2 - -@pytest.mark.django_db() -def test_parse_section_mismatch(test_datafile): +@pytest.mark.django_db +def test_parse_section_mismatch(test_datafile, dfs): """Test parsing of small_correct_file where the DataFile section doesn't match the rawfile section.""" test_datafile.section = 'Closed Case Data' test_datafile.save() - errors = parse.parse_datafile(test_datafile) + dfs.datafile = test_datafile + dfs.save() + errors = parse.parse_datafile(test_datafile) + dfs.status = dfs.get_status() + assert dfs.status == DataFileSummary.Status.REJECTED parser_errors = ParserError.objects.filter(file=test_datafile) + dfs.case_aggregates = util.case_aggregates_by_month(dfs.datafile, dfs.status) + assert dfs.case_aggregates == {'rejected': 1, + 'months': [ + {'accepted_without_errors': 'N/A', + 'accepted_with_errors': 'N/A', + 'month': 'Oct'}, + {'accepted_without_errors': 'N/A', + 'accepted_with_errors': 'N/A', + 'month': 'Nov'}, + {'accepted_without_errors': 'N/A', + 'accepted_with_errors': 'N/A', + 'month': 'Dec'} + ]} assert parser_errors.count() == 1 err = parser_errors.first() @@ -65,13 +99,16 @@ def test_parse_section_mismatch(test_datafile): } -@pytest.mark.django_db() -def test_parse_wrong_program_type(test_datafile): +@pytest.mark.django_db +def test_parse_wrong_program_type(test_datafile, dfs): """Test parsing of small_correct_file where the DataFile program type doesn't match the rawfile.""" test_datafile.section = 'SSP Active Case Data' test_datafile.save() + dfs.datafile = test_datafile + dfs.save() errors = parse.parse_datafile(test_datafile) + assert dfs.get_status() == DataFileSummary.Status.REJECTED parser_errors = ParserError.objects.filter(file=test_datafile) assert parser_errors.count() == 1 @@ -91,17 +128,30 @@ def test_parse_wrong_program_type(test_datafile): @pytest.fixture def test_big_file(stt_user, stt): """Fixture for ADS.E2J.FTP1.TS06.""" - return create_test_datafile('ADS.E2J.FTP1.TS06', stt_user, stt) - + return util.create_test_datafile('ADS.E2J.FTP1.TS06', stt_user, stt) -@pytest.mark.django_db() -def test_parse_big_file(test_big_file): +@pytest.mark.django_db +@pytest.mark.skip(reason="long runtime") # big_files +def test_parse_big_file(test_big_file, dfs): """Test parsing of ADS.E2J.FTP1.TS06.""" expected_t1_record_count = 815 expected_t2_record_count = 882 expected_t3_record_count = 1376 + dfs.datafile = test_big_file + dfs.save() + parse.parse_datafile(test_big_file) + dfs.status = dfs.get_status() + assert dfs.status == DataFileSummary.Status.ACCEPTED_WITH_ERRORS + dfs.case_aggregates = util.case_aggregates_by_month(dfs.datafile, dfs.status) + assert dfs.case_aggregates == {'rejected': 0, + 'months': [ + {'accepted_without_errors': 171, 'accepted_with_errors': 99, 'month': 'Oct'}, + {'accepted_without_errors': 169, 'accepted_with_errors': 104, 'month': 'Nov'}, + {'accepted_without_errors': 166, 'accepted_with_errors': 106, 'month': 'Dec'} + ]} + parser_errors = ParserError.objects.filter(file=test_big_file) error_message = 'MONTHS_FED_TIME_LIMIT is required but a value was not provided.' @@ -119,10 +169,10 @@ def test_parse_big_file(test_big_file): @pytest.fixture def bad_test_file(stt_user, stt): """Fixture for bad_TANF_S2.""" - return create_test_datafile('bad_TANF_S2.txt', stt_user, stt) + return util.create_test_datafile('bad_TANF_S2.txt', stt_user, stt) -@pytest.mark.django_db() +@pytest.mark.django_db def test_parse_bad_test_file(bad_test_file): """Test parsing of bad_TANF_S2.""" errors = parse.parse_datafile(bad_test_file) @@ -145,13 +195,15 @@ def test_parse_bad_test_file(bad_test_file): @pytest.fixture def bad_file_missing_header(stt_user, stt): """Fixture for bad_missing_header.""" - return create_test_datafile('bad_missing_header.txt', stt_user, stt) - + return util.create_test_datafile('bad_missing_header.txt', stt_user, stt) -@pytest.mark.django_db() -def test_parse_bad_file_missing_header(bad_file_missing_header): +@pytest.mark.django_db +def test_parse_bad_file_missing_header(bad_file_missing_header, dfs): """Test parsing of bad_missing_header.""" errors = parse.parse_datafile(bad_file_missing_header) + dfs.datafile = bad_file_missing_header + dfs.save() + assert dfs.get_status() == DataFileSummary.Status.REJECTED parser_errors = ParserError.objects.filter(file=bad_file_missing_header) @@ -172,13 +224,16 @@ def test_parse_bad_file_missing_header(bad_file_missing_header): @pytest.fixture def bad_file_multiple_headers(stt_user, stt): """Fixture for bad_two_headers.""" - return create_test_datafile('bad_two_headers.txt', stt_user, stt) + return util.create_test_datafile('bad_two_headers.txt', stt_user, stt) -@pytest.mark.django_db() -def test_parse_bad_file_multiple_headers(bad_file_multiple_headers): +@pytest.mark.django_db +def test_parse_bad_file_multiple_headers(bad_file_multiple_headers, dfs): """Test parsing of bad_two_headers.""" errors = parse.parse_datafile(bad_file_multiple_headers) + dfs.datafile = bad_file_multiple_headers + dfs.save() + assert dfs.get_status() == DataFileSummary.Status.REJECTED parser_errors = ParserError.objects.filter(file=bad_file_multiple_headers) assert parser_errors.count() == 1 @@ -196,11 +251,11 @@ def test_parse_bad_file_multiple_headers(bad_file_multiple_headers): @pytest.fixture def big_bad_test_file(stt_user, stt): """Fixture for bad_TANF_S1.""" - return create_test_datafile('bad_TANF_S1.txt', stt_user, stt) + return util.create_test_datafile('bad_TANF_S1.txt', stt_user, stt) -@pytest.mark.django_db() -def test_parse_big_bad_test_file(big_bad_test_file): +@pytest.mark.django_db +def test_parse_big_bad_test_file(big_bad_test_file, dfs): """Test parsing of bad_TANF_S1.""" parse.parse_datafile(big_bad_test_file) @@ -219,12 +274,14 @@ def test_parse_big_bad_test_file(big_bad_test_file): @pytest.fixture def bad_trailer_file(stt_user, stt): """Fixture for bad_trailer_1.""" - return create_test_datafile('bad_trailer_1.txt', stt_user, stt) + return util.create_test_datafile('bad_trailer_1.txt', stt_user, stt) - -@pytest.mark.django_db() -def test_parse_bad_trailer_file(bad_trailer_file): +@pytest.mark.django_db +def test_parse_bad_trailer_file(bad_trailer_file, dfs): """Test parsing bad_trailer_1.""" + dfs.datafile = bad_trailer_file + dfs.save() + errors = parse.parse_datafile(bad_trailer_file) parser_errors = ParserError.objects.filter(file=bad_trailer_file) @@ -251,7 +308,7 @@ def test_parse_bad_trailer_file(bad_trailer_file): @pytest.fixture def bad_trailer_file_2(stt_user, stt): """Fixture for bad_trailer_2.""" - return create_test_datafile('bad_trailer_2.txt', stt_user, stt) + return util.create_test_datafile('bad_trailer_2.txt', stt_user, stt) @pytest.mark.django_db() @@ -298,15 +355,35 @@ def test_parse_bad_trailer_file2(bad_trailer_file_2): @pytest.fixture def empty_file(stt_user, stt): """Fixture for empty_file.""" - return create_test_datafile('empty_file', stt_user, stt) + return util.create_test_datafile('empty_file', stt_user, stt) -@pytest.mark.django_db() -def test_parse_empty_file(empty_file): +@pytest.mark.django_db +def test_parse_empty_file(empty_file, dfs): """Test parsing of empty_file.""" + dfs.datafile = empty_file + dfs.save() errors = parse.parse_datafile(empty_file) + dfs.status = dfs.get_status() + dfs.case_aggregates = util.case_aggregates_by_month(empty_file, dfs.status) + + assert dfs.status == DataFileSummary.Status.REJECTED + assert dfs.case_aggregates == {'rejected': 2, + 'months': [ + {'accepted_without_errors': 'N/A', + 'accepted_with_errors': 'N/A', + 'month': 'Oct'}, + {'accepted_without_errors': 'N/A', + 'accepted_with_errors': 'N/A', + 'month': 'Nov'}, + {'accepted_without_errors': 'N/A', + 'accepted_with_errors': 'N/A', + 'month': 'Dec'} + ]} + parser_errors = ParserError.objects.filter(file=empty_file).order_by('id') + assert parser_errors.count() == 2 err = parser_errors.first() @@ -324,18 +401,35 @@ def test_parse_empty_file(empty_file): @pytest.fixture def small_ssp_section1_datafile(stt_user, stt): """Fixture for small_ssp_section1.""" - return create_test_datafile('small_ssp_section1.txt', stt_user, stt, 'SSP Active Case Data') + return util.create_test_datafile('small_ssp_section1.txt', stt_user, stt, 'SSP Active Case Data') -@pytest.mark.django_db() -def test_parse_small_ssp_section1_datafile(small_ssp_section1_datafile): +@pytest.mark.django_db +def test_parse_small_ssp_section1_datafile(small_ssp_section1_datafile, dfs): """Test parsing small_ssp_section1_datafile.""" expected_m1_record_count = 5 expected_m2_record_count = 6 expected_m3_record_count = 8 + small_ssp_section1_datafile.year = 2019 + small_ssp_section1_datafile.quarter = 'Q1' + small_ssp_section1_datafile.save() + + dfs.datafile = small_ssp_section1_datafile + dfs.save() + errors = parse.parse_datafile(small_ssp_section1_datafile) + dfs.status = dfs.get_status() + assert dfs.status == DataFileSummary.Status.ACCEPTED_WITH_ERRORS + dfs.case_aggregates = util.case_aggregates_by_month(dfs.datafile, dfs.status) + assert dfs.case_aggregates == {'rejected': 1, + 'months': [ + {'accepted_without_errors': 5, 'accepted_with_errors': 0, 'month': 'Oct'}, + {'accepted_without_errors': 0, 'accepted_with_errors': 0, 'month': 'Nov'}, + {'accepted_without_errors': 0, 'accepted_with_errors': 0, 'month': 'Dec'} + ]} + parser_errors = ParserError.objects.filter(file=small_ssp_section1_datafile) assert parser_errors.count() == 1 @@ -357,7 +451,7 @@ def test_parse_small_ssp_section1_datafile(small_ssp_section1_datafile): @pytest.fixture def ssp_section1_datafile(stt_user, stt): """Fixture for ssp_section1_datafile.""" - return create_test_datafile('ssp_section1_datafile.txt', stt_user, stt, 'SSP Active Case Data') + return util.create_test_datafile('ssp_section1_datafile.txt', stt_user, stt, 'SSP Active Case Data') @pytest.mark.django_db() @@ -387,13 +481,26 @@ def test_parse_ssp_section1_datafile(ssp_section1_datafile): @pytest.fixture def small_tanf_section1_datafile(stt_user, stt): """Fixture for small_tanf_section1.""" - return create_test_datafile('small_tanf_section1.txt', stt_user, stt) + return util.create_test_datafile('small_tanf_section1.txt', stt_user, stt) -@pytest.mark.django_db() -def test_parse_tanf_section1_datafile(small_tanf_section1_datafile): +@pytest.mark.django_db +def test_parse_tanf_section1_datafile(small_tanf_section1_datafile, dfs): """Test parsing of small_tanf_section1_datafile and validate T2 model data.""" + dfs.datafile = small_tanf_section1_datafile + dfs.save() + parse.parse_datafile(small_tanf_section1_datafile) + dfs.status = dfs.get_status() + assert dfs.status == DataFileSummary.Status.ACCEPTED + dfs.case_aggregates = util.case_aggregates_by_month(dfs.datafile, dfs.status) + assert dfs.case_aggregates == {'rejected': 0, + 'months': [ + {'accepted_without_errors': 5, 'accepted_with_errors': 0, 'month': 'Oct'}, + {'accepted_without_errors': 0, 'accepted_with_errors': 0, 'month': 'Nov'}, + {'accepted_without_errors': 0, 'accepted_with_errors': 0, 'month': 'Dec'} + ]} + assert TANF_T2.objects.count() == 5 t2_models = TANF_T2.objects.all() @@ -410,6 +517,7 @@ def test_parse_tanf_section1_datafile(small_tanf_section1_datafile): assert t2_2.FAMILY_AFFILIATION == 2 assert t2_2.OTHER_UNEARNED_INCOME == '0000' + @pytest.mark.django_db() def test_parse_tanf_section1_datafile_obj_counts(small_tanf_section1_datafile): """Test parsing of small_tanf_section1_datafile in general.""" @@ -444,7 +552,8 @@ def test_parse_tanf_section1_datafile_t3s(small_tanf_section1_datafile): @pytest.fixture def super_big_s1_file(stt_user, stt): """Fixture for ADS.E2J.NDM1.TS53_fake.""" - return create_test_datafile('ADS.E2J.NDM1.TS53_fake', stt_user, stt) + return util.create_test_datafile('ADS.E2J.NDM1.TS53_fake', stt_user, stt) + @pytest.mark.django_db() def test_parse_super_big_s1_file(super_big_s1_file): @@ -458,9 +567,10 @@ def test_parse_super_big_s1_file(super_big_s1_file): @pytest.fixture def super_big_s1_rollback_file(stt_user, stt): """Fixture for ADS.E2J.NDM1.TS53_fake.rollback.""" - return create_test_datafile('ADS.E2J.NDM1.TS53_fake.rollback', stt_user, stt) + return util.create_test_datafile('ADS.E2J.NDM1.TS53_fake.rollback', stt_user, stt) @pytest.mark.django_db() +@pytest.mark.skip(reason="cuz") # big_files def test_parse_super_big_s1_file_with_rollback(super_big_s1_rollback_file): """Test parsing of super_big_s1_rollback_file. @@ -487,16 +597,22 @@ def test_parse_super_big_s1_file_with_rollback(super_big_s1_rollback_file): @pytest.fixture def bad_tanf_s1__row_missing_required_field(stt_user, stt): """Fixture for small_tanf_section1.""" - return create_test_datafile('small_bad_tanf_s1', stt_user, stt) + return util.create_test_datafile('small_bad_tanf_s1', stt_user, stt) -@pytest.mark.django_db() -def test_parse_bad_tfs1_missing_required(bad_tanf_s1__row_missing_required_field): +@pytest.mark.django_db +def test_parse_bad_tfs1_missing_required(bad_tanf_s1__row_missing_required_field, dfs): """Test parsing a bad TANF Section 1 submission where a row is missing required data.""" + dfs.datafile = bad_tanf_s1__row_missing_required_field + dfs.save() + parse.parse_datafile(bad_tanf_s1__row_missing_required_field) + assert dfs.get_status() == DataFileSummary.Status.ACCEPTED_WITH_ERRORS + parser_errors = ParserError.objects.filter(file=bad_tanf_s1__row_missing_required_field) assert parser_errors.count() == 4 + [print(parser_error) for parser_error in parser_errors] error_message = 'RPT_MONTH_YEAR is required but a value was not provided.' row_2_error = parser_errors.get(row_number=2, error_message=error_message) @@ -517,7 +633,7 @@ def test_parse_bad_tfs1_missing_required(bad_tanf_s1__row_missing_required_field assert row_4_error.content_type.model == 'tanf_t3' assert row_4_error.object_id is not None - error_message = 'Record Type is missing from record.' + error_message = 'Unknown Record_Type was found.' row_5_error = parser_errors.get(row_number=5, error_message=error_message) assert row_5_error.error_type == ParserErrorCategoryChoices.PRE_CHECK assert row_5_error.error_message == error_message @@ -528,7 +644,7 @@ def test_parse_bad_tfs1_missing_required(bad_tanf_s1__row_missing_required_field @pytest.fixture def bad_ssp_s1__row_missing_required_field(stt_user, stt): """Fixture for ssp_section1_datafile.""" - return create_test_datafile('small_bad_ssp_s1', stt_user, stt, 'SSP Active Case Data') + return util.create_test_datafile('small_bad_ssp_s1', stt_user, stt, 'SSP Active Case Data') @pytest.mark.django_db() @@ -559,7 +675,7 @@ def test_parse_bad_ssp_s1_missing_required(bad_ssp_s1__row_missing_required_fiel row_5_error = parser_errors.get(row_number=5) assert row_5_error.error_type == ParserErrorCategoryChoices.PRE_CHECK - assert row_5_error.error_message == 'Record Type is missing from record.' + assert row_5_error.error_message == 'Unknown Record_Type was found.' assert row_5_error.content_type is None assert row_5_error.object_id is None @@ -577,10 +693,71 @@ def test_parse_bad_ssp_s1_missing_required(bad_ssp_s1__row_missing_required_fiel 'trailer': [trailer_error], } +@pytest.mark.django_db +def test_dfs_set_case_aggregates(test_datafile, dfs): + """Test that the case aggregates are set correctly.""" + test_datafile.section = 'Active Case Data' + test_datafile.save() + parse.parse_datafile(test_datafile) # this still needs to execute to create db objects to be queried + dfs.file = test_datafile + dfs.save() + dfs.status = dfs.get_status() + dfs.case_aggregates = util.case_aggregates_by_month(test_datafile, dfs.status) + dfs.save() + + for month in dfs.case_aggregates['months']: + if month['month'] == 'Oct': + assert month['accepted_without_errors'] == 1 + assert month['accepted_with_errors'] == 0 + +@pytest.mark.django_db +def test_get_schema_options(dfs): + """Test use-cases for translating strings to named object references.""" + ''' + text -> section + text -> models{} YES + text -> model YES + datafile -> model + ^ section -> program -> model + datafile -> text + model -> text YES + section -> text + + text**: input string from the header/file + ''' + + # from text: + schema = parse.get_schema_manager('T1xx', 'A', 'TAN') + assert isinstance(schema, util.SchemaManager) + assert schema == schema_defs.tanf.t1 + + # get model + models = util.get_program_models('TAN', 'A') + assert models == { + 'T1': schema_defs.tanf.t1, + 'T2': schema_defs.tanf.t2, + 'T3': schema_defs.tanf.t3, + } + + model = util.get_program_model('TAN', 'A', 'T1') + assert model == schema_defs.tanf.t1 + # get section + section = util.get_section_reference('TAN', 'C') + assert section == DataFile.Section.CLOSED_CASE_DATA + + # from datafile: + # get model(s) + # get section str + + # from model: + # get text + # get section str + # get ref section + @pytest.fixture def small_tanf_section2_file(stt_user, stt): - """Fixture for small_tanf_section2.""" - return create_test_datafile('small_tanf_section2.txt', stt_user, stt, 'Closed Case Data') + """Fixture for tanf section2 datafile.""" + return util.create_test_datafile('small_tanf_section2.txt', stt_user, stt, 'Closed Case Data') @pytest.mark.django_db() def test_parse_small_tanf_section2_file(small_tanf_section2_file): @@ -606,7 +783,7 @@ def test_parse_small_tanf_section2_file(small_tanf_section2_file): @pytest.fixture def tanf_section2_file(stt_user, stt): """Fixture for ADS.E2J.FTP2.TS06.""" - return create_test_datafile('ADS.E2J.FTP2.TS06', stt_user, stt, 'Closed Case Data') + return util.create_test_datafile('ADS.E2J.FTP2.TS06', stt_user, stt, 'Closed Case Data') @pytest.mark.django_db() def test_parse_tanf_section2_file(tanf_section2_file): @@ -627,7 +804,7 @@ def test_parse_tanf_section2_file(tanf_section2_file): @pytest.fixture def tanf_section3_file(stt_user, stt): """Fixture for ADS.E2J.FTP3.TS06.""" - return create_test_datafile('ADS.E2J.FTP3.TS06', stt_user, stt, "Aggregate Data") + return util.create_test_datafile('ADS.E2J.FTP3.TS06', stt_user, stt, "Aggregate Data") @pytest.mark.django_db() def test_parse_tanf_section3_file(tanf_section3_file): diff --git a/tdrs-backend/tdpservice/parsers/urls.py b/tdrs-backend/tdpservice/parsers/urls.py index f2226e0ab..cd1d560d3 100644 --- a/tdrs-backend/tdpservice/parsers/urls.py +++ b/tdrs-backend/tdpservice/parsers/urls.py @@ -1,12 +1,13 @@ """Routing for DataFiles.""" from django.urls import path, include from rest_framework.routers import DefaultRouter -from .views import ParsingErrorViewSet +from .views import ParsingErrorViewSet, DataFileSummaryViewSet router = DefaultRouter() -router.register("", ParsingErrorViewSet) +router.register("parsing_errors", ParsingErrorViewSet) +router.register("dfs", DataFileSummaryViewSet) urlpatterns = [ - path('parsing_errors/', include(router.urls)), + path('', include(router.urls)), ] diff --git a/tdrs-backend/tdpservice/parsers/util.py b/tdrs-backend/tdpservice/parsers/util.py index 0e50bce1b..accc36269 100644 --- a/tdrs-backend/tdpservice/parsers/util.py +++ b/tdrs-backend/tdpservice/parsers/util.py @@ -1,17 +1,22 @@ """Utility file for functions shared between all parsers even preparser.""" from .models import ParserError from django.contrib.contenttypes.models import ContentType +from . import schema_defs from tdpservice.data_files.models import DataFile +from datetime import datetime from pathlib import Path from .fields import TransformField -from datetime import datetime +import logging + +logger = logging.getLogger(__name__) + def create_test_datafile(filename, stt_user, stt, section='Active Case Data'): """Create a test DataFile instance with the given file attached.""" path = str(Path(__file__).parent.joinpath('test/data')) + f'/{filename}' datafile = DataFile.create_new_version({ - 'quarter': '4', - 'year': 2022, + 'quarter': 'Q1', + 'year': 2021, 'section': section, 'user': stt_user, 'stt': stt @@ -88,9 +93,158 @@ def contains_encrypted_indicator(line, encryption_field): return encryption_field.parse_value(line) == "E" return False -def month_to_int(month): - """Return the integer value of a month.""" - return datetime.strptime(month, '%b').strftime('%m') +def get_schema_options(program, section, query=None, model=None, model_name=None): + """Centralized function to return the appropriate schema for a given program, section, and query. + + TODO: need to rework this docstring as it is outdated hence the weird ';;' for some of them. + + @param program: the abbreviated program type (.e.g, 'TAN') + @param section: the section of the file (.e.g, 'A');; or ACTIVE_CASE_DATA + @param query: the query for section_names (.e.g, 'section', 'models', etc.) + @return: the appropriate references (e.g., ACTIVE_CASE_DATA or {t1,t2,t3}) ;; returning 'A' + """ + schema_options = { + 'TAN': { + 'A': { + 'section': DataFile.Section.ACTIVE_CASE_DATA, + 'models': { + 'T1': schema_defs.tanf.t1, + 'T2': schema_defs.tanf.t2, + 'T3': schema_defs.tanf.t3, + } + }, + 'C': { + 'section': DataFile.Section.CLOSED_CASE_DATA, + 'models': { + 'T4': schema_defs.tanf.t4, + 'T5': schema_defs.tanf.t5, + } + }, + 'G': { + 'section': DataFile.Section.AGGREGATE_DATA, + 'models': { + 'T6': schema_defs.tanf.t6, + } + }, + 'S': { + 'section': DataFile.Section.STRATUM_DATA, + 'models': { + # 'T7': schema_defs.tanf.t7, + } + } + }, + 'SSP': { + 'A': { + 'section': DataFile.Section.SSP_ACTIVE_CASE_DATA, + 'models': { + 'M1': schema_defs.ssp.m1, + 'M2': schema_defs.ssp.m2, + 'M3': schema_defs.ssp.m3, + } + }, + 'C': { + 'section': DataFile.Section.SSP_CLOSED_CASE_DATA, + 'models': { + # 'S4': schema_defs.ssp.m4, + # 'S5': schema_defs.ssp.m5, + } + }, + 'G': { + 'section': DataFile.Section.SSP_AGGREGATE_DATA, + 'models': { + # 'S6': schema_defs.ssp.m6, + } + }, + 'S': { + 'section': DataFile.Section.SSP_STRATUM_DATA, + 'models': { + # 'S7': schema_defs.ssp.m7, + } + } + }, + # TODO: tribal tanf + } + + if query == "text": + for prog_name, prog_dict in schema_options.items(): + for sect, val in prog_dict.items(): + if val['section'] == section: + return {'program_type': prog_name, 'section': sect} + raise ValueError("Model not found in schema_defs") + elif query == "section": + return schema_options.get(program, {}).get(section, None)[query] + elif query == "models": + links = schema_options.get(program, {}).get(section, None) + + # if query is not chosen or wrong input, return all options + # query = 'models', model = 'T1' + models = links.get(query, links) + + if model_name is None: + return models + elif model_name not in models.keys(): + logger.debug(f"Model {model_name} not found in schema_defs") + return [] # intentionally trigger the error_msg for unknown record type + else: + return models.get(model_name, models) + + +''' +text -> section YES +text -> models{} YES +text -> model YES +datafile -> model + ^ section -> program -> model +datafile -> text +model -> text YES +section -> text + +text**: input string from the header/file +''' + +def get_program_models(str_prog, str_section): + """Return the models dict for a given program and section.""" + return get_schema_options(program=str_prog, section=str_section, query='models') + +def get_program_model(str_prog, str_section, str_model): + """Return singular model for a given program, section, and name.""" + return get_schema_options(program=str_prog, section=str_section, query='models', model_name=str_model) + +def get_section_reference(str_prog, str_section): + """Return the named section reference for a given program and section.""" + return get_schema_options(program=str_prog, section=str_section, query='section') + +def get_text_from_df(df): + """Return the short-hand text for program, section for a given datafile.""" + return get_schema_options("", section=df.section, query='text') + +def get_prog_from_section(str_section): + """Return the program type for a given section.""" + # e.g., 'SSP Closed Case Data' + if str_section.startswith('SSP'): + return 'SSP' + elif str_section.startswith('Tribal'): + return 'TAN' # problematic, do we need to infer tribal entirely from tribe/fips code? + else: + return 'TAN' + + # TODO: if given a datafile (section), we can reverse back to the program b/c the + # section string has "tribal/ssp" in it, then process of elimination we have tanf + +def get_schema(line, section, program_type): + """Return the appropriate schema for the line.""" + line_type = line[0:2] + return get_schema_options(program_type, section, query='models', model_name=line_type) + +def fiscal_to_calendar(year, fiscal_quarter): + """Decrement the input quarter text by one.""" + array = [1, 2, 3, 4] # wrapping around an array + int_qtr = int(fiscal_quarter[1:]) # remove the 'Q', e.g., 'Q1' -> '1' + if int_qtr == 1: + year = year - 1 + + ind_qtr = array.index(int_qtr) # get the index so we can easily wrap-around end of array + return year, "Q{}".format(array[ind_qtr - 1]) # return the previous quarter def transform_to_months(quarter): """Return a list of months in a quarter.""" @@ -105,3 +259,59 @@ def transform_to_months(quarter): return ["Oct", "Nov", "Dec"] case _: raise ValueError("Invalid quarter value.") + + +def month_to_int(month): + """Return the integer value of a month.""" + return datetime.strptime(month, '%b').strftime('%m') + + +def case_aggregates_by_month(df, dfs_status): + """Return case aggregates by month.""" + section = str(df.section) # section -> text + program_type = get_prog_from_section(section) # section -> program_type -> text + + # from datafile year/quarter, generate short month names for each month in quarter ala 'Jan', 'Feb', 'Mar' + calendar_year, calendar_qtr = fiscal_to_calendar(df.year, df.quarter) + month_list = transform_to_months(calendar_qtr) + + short_section = get_text_from_df(df)['section'] + schema_models_dict = get_program_models(program_type, short_section) + schema_models = [model for model in schema_models_dict.values()] + + aggregate_data = {"months": [], "rejected": 0} + for month in month_list: + total = 0 + cases_with_errors = 0 + accepted = 0 + month_int = month_to_int(month) + rpt_month_year = int(f"{calendar_year}{month_int}") + + if dfs_status == "Rejected": + # we need to be careful here on examples of bad headers or empty files, since no month will be found + # but we can rely on the frontend submitted year-quarter to still generate the list of months + aggregate_data["months"].append({"accepted_with_errors": "N/A", + "accepted_without_errors": "N/A", + "month": month}) + continue + + case_numbers = set() + for schema_model in schema_models: + if isinstance(schema_model, SchemaManager): + schema_model = schema_model.schemas[0] + + curr_case_numbers = set(schema_model.model.objects.filter(datafile=df).filter(RPT_MONTH_YEAR=rpt_month_year) + .distinct("CASE_NUMBER").values_list("CASE_NUMBER", flat=True)) + case_numbers = case_numbers.union(curr_case_numbers) + + total += len(case_numbers) + cases_with_errors += ParserError.objects.filter(case_number__in=case_numbers).distinct('case_number').count() + accepted = total - cases_with_errors + + aggregate_data['months'].append({"month": month, + "accepted_without_errors": accepted, + "accepted_with_errors": cases_with_errors}) + + aggregate_data['rejected'] = ParserError.objects.filter(file=df).filter(case_number=None).count() + + return aggregate_data diff --git a/tdrs-backend/tdpservice/parsers/validators.py b/tdrs-backend/tdpservice/parsers/validators.py index c811a6ef1..a8722794d 100644 --- a/tdrs-backend/tdpservice/parsers/validators.py +++ b/tdrs-backend/tdpservice/parsers/validators.py @@ -1,8 +1,6 @@ """Generic parser validator functions for use in schema definitions.""" -from .util import generate_parser_error from .models import ParserErrorCategoryChoices -from tdpservice.data_files.models import DataFile from datetime import date # higher order validator func @@ -348,76 +346,14 @@ def validate(instance): return (True, None) return lambda instance: validate(instance) -def validate_single_header_trailer(datafile): - """Validate that a raw datafile has one trailer and one footer.""" - line_number = 0 - headers = 0 - trailers = 0 - is_valid = True - error_message = None - - for rawline in datafile.file: - line = rawline.decode() - line_number += 1 - - if line.startswith('HEADER'): - headers += 1 - elif line.startswith('TRAILER'): - trailers += 1 - - if headers > 1: - is_valid = False - error_message = 'Multiple headers found.' - break - - if trailers > 1: - is_valid = False - error_message = 'Multiple trailers found.' - break - - if headers == 0: - is_valid = False - error_message = 'No headers found.' - error = None - if not is_valid: - error = generate_parser_error( - datafile=datafile, - line_number=line_number, - schema=None, - error_category=ParserErrorCategoryChoices.PRE_CHECK, - error_message=error_message, - record=None, - field=None - ) - - return is_valid, error - - -def validate_header_section_matches_submission(datafile, program_type, section): +def validate_header_section_matches_submission(datafile, section, generate_error): """Validate header section matches submission section.""" - section_names = { - 'TAN': { - 'A': DataFile.Section.ACTIVE_CASE_DATA, - 'C': DataFile.Section.CLOSED_CASE_DATA, - 'G': DataFile.Section.AGGREGATE_DATA, - 'S': DataFile.Section.STRATUM_DATA, - }, - 'SSP': { - 'A': DataFile.Section.SSP_ACTIVE_CASE_DATA, - 'C': DataFile.Section.SSP_CLOSED_CASE_DATA, - 'G': DataFile.Section.SSP_AGGREGATE_DATA, - 'S': DataFile.Section.SSP_STRATUM_DATA, - }, - } - - is_valid = datafile.section == section_names.get(program_type, {}).get(section) + is_valid = datafile.section == section error = None if not is_valid: - error = generate_parser_error( - datafile=datafile, - line_number=1, + error = generate_error( schema=None, error_category=ParserErrorCategoryChoices.PRE_CHECK, error_message=f"Data does not match the expected layout for {datafile.section}.", diff --git a/tdrs-backend/tdpservice/parsers/views.py b/tdrs-backend/tdpservice/parsers/views.py index d39965ee3..8e40b79e4 100644 --- a/tdrs-backend/tdpservice/parsers/views.py +++ b/tdrs-backend/tdpservice/parsers/views.py @@ -2,8 +2,8 @@ from tdpservice.users.permissions import IsApprovedPermission from rest_framework.viewsets import ModelViewSet from rest_framework.response import Response -from .serializers import ParsingErrorSerializer -from .models import ParserError +from .serializers import ParsingErrorSerializer, DataFileSummarySerializer +from .models import ParserError, DataFileSummary import logging import base64 from io import BytesIO @@ -69,3 +69,11 @@ def _get_xls_serialized_file(self, data): col += 1 workbook.close() return {"data": data, "xls_report": base64.b64encode(output.getvalue()).decode("utf-8")} + + +class DataFileSummaryViewSet(ModelViewSet): + """DataFileSummary file views.""" + + queryset = DataFileSummary.objects.all() + serializer_class = DataFileSummarySerializer + permission_classes = [IsApprovedPermission] diff --git a/tdrs-backend/tdpservice/scheduling/parser_task.py b/tdrs-backend/tdpservice/scheduling/parser_task.py index 4ffd91277..b1e5f8d5c 100644 --- a/tdrs-backend/tdpservice/scheduling/parser_task.py +++ b/tdrs-backend/tdpservice/scheduling/parser_task.py @@ -4,6 +4,9 @@ import logging from tdpservice.data_files.models import DataFile from tdpservice.parsers.parse import parse_datafile +from tdpservice.parsers.models import DataFileSummary +from tdpservice.parsers.util import case_aggregates_by_month + logger = logging.getLogger(__name__) @@ -17,5 +20,9 @@ def parse(data_file_id): data_file = DataFile.objects.get(id=data_file_id) logger.info(f"DataFile parsing started for file -> {repr(data_file)}") + dfs = DataFileSummary.objects.create(datafile=data_file, status=DataFileSummary.Status.PENDING) errors = parse_datafile(data_file) - logger.info(f"DataFile parsing finished with {len(errors)} errors, for file -> {repr(data_file)}.") + dfs.status = dfs.get_status() + dfs.case_aggregates = case_aggregates_by_month(data_file, dfs.status) + dfs.save() + logger.info(f"Parsing finished for file -> {repr(data_file)} with status {dfs.status} and {len(errors)} errors.") diff --git a/tdrs-backend/tdpservice/users/test/test_permissions.py b/tdrs-backend/tdpservice/users/test/test_permissions.py index 984e3b226..2f25347aa 100644 --- a/tdrs-backend/tdpservice/users/test/test_permissions.py +++ b/tdrs-backend/tdpservice/users/test/test_permissions.py @@ -111,6 +111,9 @@ def test_ofa_system_admin_permissions(ofa_system_admin): 'parsers.add_parsererror', 'parsers.change_parsererror', 'parsers.view_parsererror', + 'parsers.add_datafilesummary', + 'parsers.view_datafilesummary', + 'parsers.change_datafilesummary', 'search_indexes.add_ssp_m1', 'search_indexes.view_ssp_m1', 'search_indexes.change_ssp_m1', diff --git a/tdrs-frontend/docker-compose.yml b/tdrs-frontend/docker-compose.yml index 0e6a28283..d75772fa5 100644 --- a/tdrs-frontend/docker-compose.yml +++ b/tdrs-frontend/docker-compose.yml @@ -32,7 +32,7 @@ services: command: > /bin/sh -c "echo 'starting nginx' && - envsubst '$${BACK_END}' < /etc/nginx/locations.conf > /etc/nginx/locations_.conf && + envsubst '$${BACK_END}' < /etc/nginx/locations.conf > /etc/nginx/locations_.conf && rm /etc/nginx/locations.conf && cp /etc/nginx/locations_.conf /etc/nginx/locations.conf && envsubst '