Merge branch 'develop' into sprint-100-summary

raft-tech · Jun 27, 2024 · 017690f · 017690f
2 parents 25e9612 + 35adb87
commit 017690f
Show file tree

Hide file tree

Showing 40 changed files with 2,867 additions and 37,482 deletions.
diff --git a/docs/Technical-Documentation/diagrams/parsing.drawio b/docs/Technical-Documentation/diagrams/parsing.drawio
diff --git a/docs/Technical-Documentation/diagrams/parsing.png b/docs/Technical-Documentation/diagrams/parsing.png
diff --git a/docs/Technical-Documentation/parsing-flow.md b/docs/Technical-Documentation/parsing-flow.md
@@ -0,0 +1,11 @@
+# High Level Parsing Flow
+
+Parsing begins after a user submits a datafile or datafiles via the frontend. The submission generates a new Celery task
+or tasks which are enqueued to Redis. As work becomes available the Celery workers dequeue a task from Redis and begin 
+working them. The parsing task gets the Datafile Django model and begins iterating over each line in the file. For each 
+line in the file this task: parses the line into a new record, performs category 1 - 3 validation on the record, 
+performs exact duplicate and partial duplicate detection, performs category 4 validation, and stores the record in a 
+cache to be bulk created/serialized to the database and ElasticSearch. The image below provides a high level flow of the
+aforementioned steps.
+
+![Parsing Flow](./diagrams/parsing.png)
diff --git a/tdrs-backend/tdpservice/data_files/test/test_api.py b/tdrs-backend/tdpservice/data_files/test/test_api.py
@@ -100,8 +100,8 @@ def assert_error_report_tanf_file_content_matches_with_friendly_names(response):
 
         assert ws.cell(row=1, column=1).value == "Please refer to the most recent versions of the coding " \
             + "instructions (linked below) when looking up items and allowable values during the data revision process"
-        assert ws.cell(row=8, column=COL_ERROR_MESSAGE).value == "if cash amount :873 validator1 passed" \
-            + " then number of months T1: 0 is not larger than 0."
+        assert ws.cell(row=8, column=COL_ERROR_MESSAGE).value == "Every T1 record should have at least one " + \
+            "corresponding T2 or T3 record with the same RPT_MONTH_YEAR and CASE_NUMBER."
 
     @staticmethod
     def assert_error_report_ssp_file_content_matches_with_friendly_names(response):
@@ -132,8 +132,9 @@ def assert_error_report_file_content_matches_without_friendly_names(response):
 
         assert ws.cell(row=1, column=1).value == "Please refer to the most recent versions of the coding " \
             + "instructions (linked below) when looking up items and allowable values during the data revision process"
-        assert ws.cell(row=8, column=COL_ERROR_MESSAGE).value == ("if CASH_AMOUNT :873 validator1 passed then "
-                                                                  "NBR_MONTHS T1: 0 is not larger than 0.")
+        assert ws.cell(row=8, column=COL_ERROR_MESSAGE).value == ("Every T1 record should have at least one "
+                                                                  "corresponding T2 or T3 record with the same "
+                                                                  "RPT_MONTH_YEAR and CASE_NUMBER.")
 
     @staticmethod
     def assert_data_file_exists(data_file_data, version, user):

diff --git a/tdrs-backend/tdpservice/parsers/aggregates.py b/tdrs-backend/tdpservice/parsers/aggregates.py
@@ -1,9 +1,10 @@
 """Aggregate methods for the parsers."""
 from .row_schema import SchemaManager
-from .models import ParserError
+from .models import ParserError, ParserErrorCategoryChoices
 from .util import month_to_int, \
     transform_to_months, fiscal_to_calendar, get_prog_from_section
 from .schema_defs.utils import get_program_models, get_text_from_df
+from django.db.models import Q as Query
 
 
 def case_aggregates_by_month(df, dfs_status):
@@ -39,22 +40,25 @@ def case_aggregates_by_month(df, dfs_status):
             if isinstance(schema_model, SchemaManager):
                 schema_model = schema_model.schemas[0]
 
-            curr_case_numbers = set(schema_model.document.Django.model.objects.filter(datafile=df)
-                                    .filter(RPT_MONTH_YEAR=rpt_month_year)
+            curr_case_numbers = set(schema_model.document.Django.model.objects.filter(datafile=df,
+                                                                                      RPT_MONTH_YEAR=rpt_month_year)
                                     .distinct("CASE_NUMBER").values_list("CASE_NUMBER", flat=True))
             case_numbers = case_numbers.union(curr_case_numbers)
 
         total += len(case_numbers)
-        cases_with_errors += ParserError.objects.filter(file=df).filter(
-            case_number__in=case_numbers).distinct('case_number').count()
+        cases_with_errors += ParserError.objects.filter(file=df, case_number__in=case_numbers)\
+            .distinct('case_number').count()
         accepted = total - cases_with_errors
 
         aggregate_data['months'].append({"month": month,
                                          "accepted_without_errors": accepted,
                                          "accepted_with_errors": cases_with_errors})
 
-    aggregate_data['rejected'] = ParserError.objects.filter(file=df).filter(case_number=None).distinct("row_number")\
-        .exclude(row_number=0).count()
+    error_type_query = Query(error_type=ParserErrorCategoryChoices.PRE_CHECK) | \
+        Query(error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY)
+
+    aggregate_data['rejected'] = ParserError.objects.filter(error_type_query, file=df)\
+        .distinct("row_number").exclude(row_number=0).count()
 
     return aggregate_data