Skip to content

Commit

Permalink
Merge branch 'develop' into sprint-100-summary
Browse files Browse the repository at this point in the history
  • Loading branch information
robgendron authored Jun 27, 2024
2 parents 25e9612 + 35adb87 commit 017690f
Show file tree
Hide file tree
Showing 40 changed files with 2,867 additions and 37,482 deletions.
378 changes: 378 additions & 0 deletions docs/Technical-Documentation/diagrams/parsing.drawio

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
11 changes: 11 additions & 0 deletions docs/Technical-Documentation/parsing-flow.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# High Level Parsing Flow

Parsing begins after a user submits a datafile or datafiles via the frontend. The submission generates a new Celery task
or tasks which are enqueued to Redis. As work becomes available the Celery workers dequeue a task from Redis and begin
working them. The parsing task gets the Datafile Django model and begins iterating over each line in the file. For each
line in the file this task: parses the line into a new record, performs category 1 - 3 validation on the record,
performs exact duplicate and partial duplicate detection, performs category 4 validation, and stores the record in a
cache to be bulk created/serialized to the database and ElasticSearch. The image below provides a high level flow of the
aforementioned steps.

![Parsing Flow](./diagrams/parsing.png)
9 changes: 5 additions & 4 deletions tdrs-backend/tdpservice/data_files/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ def assert_error_report_tanf_file_content_matches_with_friendly_names(response):

assert ws.cell(row=1, column=1).value == "Please refer to the most recent versions of the coding " \
+ "instructions (linked below) when looking up items and allowable values during the data revision process"
assert ws.cell(row=8, column=COL_ERROR_MESSAGE).value == "if cash amount :873 validator1 passed" \
+ " then number of months T1: 0 is not larger than 0."
assert ws.cell(row=8, column=COL_ERROR_MESSAGE).value == "Every T1 record should have at least one " + \
"corresponding T2 or T3 record with the same RPT_MONTH_YEAR and CASE_NUMBER."

@staticmethod
def assert_error_report_ssp_file_content_matches_with_friendly_names(response):
Expand Down Expand Up @@ -132,8 +132,9 @@ def assert_error_report_file_content_matches_without_friendly_names(response):

assert ws.cell(row=1, column=1).value == "Please refer to the most recent versions of the coding " \
+ "instructions (linked below) when looking up items and allowable values during the data revision process"
assert ws.cell(row=8, column=COL_ERROR_MESSAGE).value == ("if CASH_AMOUNT :873 validator1 passed then "
"NBR_MONTHS T1: 0 is not larger than 0.")
assert ws.cell(row=8, column=COL_ERROR_MESSAGE).value == ("Every T1 record should have at least one "
"corresponding T2 or T3 record with the same "
"RPT_MONTH_YEAR and CASE_NUMBER.")

@staticmethod
def assert_data_file_exists(data_file_data, version, user):
Expand Down
18 changes: 11 additions & 7 deletions tdrs-backend/tdpservice/parsers/aggregates.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
"""Aggregate methods for the parsers."""
from .row_schema import SchemaManager
from .models import ParserError
from .models import ParserError, ParserErrorCategoryChoices
from .util import month_to_int, \
transform_to_months, fiscal_to_calendar, get_prog_from_section
from .schema_defs.utils import get_program_models, get_text_from_df
from django.db.models import Q as Query


def case_aggregates_by_month(df, dfs_status):
Expand Down Expand Up @@ -39,22 +40,25 @@ def case_aggregates_by_month(df, dfs_status):
if isinstance(schema_model, SchemaManager):
schema_model = schema_model.schemas[0]

curr_case_numbers = set(schema_model.document.Django.model.objects.filter(datafile=df)
.filter(RPT_MONTH_YEAR=rpt_month_year)
curr_case_numbers = set(schema_model.document.Django.model.objects.filter(datafile=df,
RPT_MONTH_YEAR=rpt_month_year)
.distinct("CASE_NUMBER").values_list("CASE_NUMBER", flat=True))
case_numbers = case_numbers.union(curr_case_numbers)

total += len(case_numbers)
cases_with_errors += ParserError.objects.filter(file=df).filter(
case_number__in=case_numbers).distinct('case_number').count()
cases_with_errors += ParserError.objects.filter(file=df, case_number__in=case_numbers)\
.distinct('case_number').count()
accepted = total - cases_with_errors

aggregate_data['months'].append({"month": month,
"accepted_without_errors": accepted,
"accepted_with_errors": cases_with_errors})

aggregate_data['rejected'] = ParserError.objects.filter(file=df).filter(case_number=None).distinct("row_number")\
.exclude(row_number=0).count()
error_type_query = Query(error_type=ParserErrorCategoryChoices.PRE_CHECK) | \
Query(error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY)

aggregate_data['rejected'] = ParserError.objects.filter(error_type_query, file=df)\
.distinct("row_number").exclude(row_number=0).count()

return aggregate_data

Expand Down
Loading

0 comments on commit 017690f

Please sign in to comment.