Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/2646 total errors aggregates #2800

Merged
merged 25 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
58ddd8d
total_errors_by_month aggregator
jtimpe Jan 5, 2024
b86e66f
impl total_errors aggregator
jtimpe Jan 5, 2024
15be219
submission history component structure
jtimpe Jan 5, 2024
525c18c
total errors frontend impl
jtimpe Jan 5, 2024
52f1025
Merge branch 'develop' into feat/2646-total-errors-aggregates
jtimpe Jan 5, 2024
da3f2b0
add section 3/4 aggregates to tests
jtimpe Jan 17, 2024
f3a20ed
Merge branch 'develop' into feat/2646-total-errors-aggregates
jtimpe Jan 19, 2024
6b6410c
lint
jtimpe Jan 19, 2024
6d4f282
fix null coalescing
jtimpe Jan 19, 2024
dd039b1
fix+add aggregates tests
jtimpe Jan 19, 2024
cd5f733
add tribal tests, positive case test
jtimpe Jan 23, 2024
b1889aa
remove unused code from total_errors aggregate func
jtimpe Jan 23, 2024
850bbd0
remove unnecessary saves
jtimpe Jan 23, 2024
d671e2a
Merge branch 'develop' into feat/2646-total-errors-aggregates
jtimpe Jan 23, 2024
1c2ce4f
move total_errors to aggregates.py
jtimpe Jan 23, 2024
7c3180c
extra blank line
jtimpe Jan 23, 2024
54e98e5
undo formatter changes
jtimpe Jan 23, 2024
1124d5a
extra space
jtimpe Jan 23, 2024
579b526
Merge branch 'develop' into feat/2646-total-errors-aggregates
jtimpe Feb 13, 2024
12c3d6b
make the query more efficient
jtimpe Feb 13, 2024
51a5a09
Merge branch 'develop' into feat/2646-total-errors-aggregates
ADPennington Feb 21, 2024
13a7c18
Merge branch 'develop' into feat/2646-total-errors-aggregates
ADPennington Feb 22, 2024
94f2477
Merge branch 'develop' into feat/2646-total-errors-aggregates
jtimpe Feb 26, 2024
f836320
rm unused code
jtimpe Feb 26, 2024
b0e063a
fix error report url
jtimpe Feb 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 76 additions & 5 deletions tdrs-backend/tdpservice/parsers/test/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -887,10 +887,24 @@ def tanf_section3_file(stt_user, stt):


@pytest.mark.django_db()
def test_parse_tanf_section3_file(tanf_section3_file):
def test_parse_tanf_section3_file(tanf_section3_file, dfs):
"""Test parsing TANF Section 3 submission."""
dfs.datafile = tanf_section3_file
dfs.save()
jtimpe marked this conversation as resolved.
Show resolved Hide resolved

parse.parse_datafile(tanf_section3_file)

dfs.status = dfs.get_status()
dfs.case_aggregates = util.total_errors_by_month(
dfs.datafile, dfs.status)
assert dfs.case_aggregates == {"months": [
elipe17 marked this conversation as resolved.
Show resolved Hide resolved
{"month": "Oct", "total_errors": 0},
{"month": "Nov", "total_errors": 0},
{"month": "Dec", "total_errors": 0}
]}

assert dfs.get_status() == DataFileSummary.Status.ACCEPTED

assert TANF_T6.objects.all().count() == 3

parser_errors = ParserError.objects.filter(file=tanf_section3_file)
Expand Down Expand Up @@ -947,10 +961,24 @@ def tanf_section4_file(stt_user, stt):


@pytest.mark.django_db()
def test_parse_tanf_section4_file(tanf_section4_file):
def test_parse_tanf_section4_file(tanf_section4_file, dfs):
"""Test parsing TANF Section 4 submission."""
dfs.datafile = tanf_section4_file
dfs.save()

parse.parse_datafile(tanf_section4_file)

dfs.status = dfs.get_status()
dfs.case_aggregates = util.total_errors_by_month(
dfs.datafile, dfs.status)
assert dfs.case_aggregates == {"months": [
{"month": "Oct", "total_errors": 0},
{"month": "Nov", "total_errors": 0},
{"month": "Dec", "total_errors": 0}
]}

assert dfs.get_status() == DataFileSummary.Status.ACCEPTED

assert TANF_T7.objects.all().count() == 18

parser_errors = ParserError.objects.filter(file=tanf_section4_file)
Expand All @@ -976,12 +1004,23 @@ def ssp_section4_file(stt_user, stt):
return util.create_test_datafile('ADS.E2J.NDM4.MS24', stt_user, stt, "SSP Stratum Data")

@pytest.mark.django_db()
def test_parse_ssp_section4_file(ssp_section4_file):
def test_parse_ssp_section4_file(ssp_section4_file, dfs):
"""Test parsing SSP Section 4 submission."""
dfs.datafile = ssp_section4_file
dfs.save()
parse.parse_datafile(ssp_section4_file)

m7_objs = SSP_M7.objects.all().order_by('FAMILIES_MONTH')

dfs.status = dfs.get_status()
dfs.case_aggregates = util.total_errors_by_month(
dfs.datafile, dfs.status)
assert dfs.case_aggregates == {"months": [
{"month": "Oct", "total_errors": 0},
{"month": "Nov", "total_errors": 0},
{"month": "Dec", "total_errors": 0}
]}

assert m7_objs.count() == 12

first = m7_objs.first()
Expand All @@ -994,10 +1033,28 @@ def ssp_section2_file(stt_user, stt):
return util.create_test_datafile('ADS.E2J.NDM2.MS24', stt_user, stt, 'SSP Closed Case Data')

@pytest.mark.django_db()
def test_parse_ssp_section2_file(ssp_section2_file):
def test_parse_ssp_section2_file(ssp_section2_file, dfs):
"""Test parsing SSP Section 2 submission."""
dfs.datafile = ssp_section2_file
dfs.save()

parse.parse_datafile(ssp_section2_file)

dfs.status = dfs.get_status()
dfs.case_aggregates = util.case_aggregates_by_month(
dfs.datafile, dfs.status)
assert dfs.case_aggregates == {'rejected': 0,
'months': [
{'accepted_without_errors': 0,
'accepted_with_errors': 58, 'month': 'Oct'},
{'accepted_without_errors': 0,
'accepted_with_errors': 52, 'month': 'Nov'},
{'accepted_without_errors': 0,
'accepted_with_errors': 40, 'month': 'Dec'}
]}

assert dfs.get_status() == DataFileSummary.Status.ACCEPTED_WITH_ERRORS

m4_objs = SSP_M4.objects.all().order_by('id')
m5_objs = SSP_M5.objects.all().order_by('AMOUNT_EARNED_INCOME')

Expand Down Expand Up @@ -1036,10 +1093,24 @@ def ssp_section3_file(stt_user, stt):
return util.create_test_datafile('ADS.E2J.NDM3.MS24', stt_user, stt, "SSP Aggregate Data")

@pytest.mark.django_db()
def test_parse_ssp_section3_file(ssp_section3_file):
def test_parse_ssp_section3_file(ssp_section3_file, dfs):
"""Test parsing TANF Section 3 submission."""
dfs.datafile = ssp_section3_file
dfs.save()

parse.parse_datafile(ssp_section3_file)

dfs.status = dfs.get_status()
dfs.case_aggregates = util.total_errors_by_month(
dfs.datafile, dfs.status)
assert dfs.case_aggregates == {"months": [
{"month": "Oct", "total_errors": 0},
{"month": "Nov", "total_errors": 0},
{"month": "Dec", "total_errors": 0}
]}

assert dfs.get_status() == DataFileSummary.Status.ACCEPTED

m6_objs = SSP_M6.objects.all().order_by('RPT_MONTH_YEAR')
assert m6_objs.count() == 3

Expand Down
68 changes: 61 additions & 7 deletions tdrs-backend/tdpservice/parsers/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,17 @@ def generate_parser_error(datafile, line_number, schema, error_category, error_m
row_number=line_number,
column_number=getattr(field, 'item', None),
item_number=getattr(field, 'item', None),
field_name=getattr(field, 'name', None) if hasattr(field, 'name') else field,
field_name=getattr(field, 'name', None) if hasattr(
field, 'name') else field,
rpt_month_year=getattr(record, 'RPT_MONTH_YEAR', None),
case_number=getattr(record, 'CASE_NUMBER', None),
error_message=error_message,
error_type=error_category,
content_type=ContentType.objects.get_for_model(
model=schema.document.Django.model if schema else None
) if record and not isinstance(record, dict) else None,
object_id=getattr(record, 'id', None) if record and not isinstance(record, dict) else None,
object_id=getattr(record, 'id', None) if record and not isinstance(
record, dict) else None,
fields_json=fields_json
)

Expand Down Expand Up @@ -98,7 +100,8 @@ def parse_and_validate(self, line, generate_error):
records = []

for schema in self.schemas:
record, is_valid, errors = schema.parse_and_validate(line, generate_error)
record, is_valid, errors = schema.parse_and_validate(
line, generate_error)
records.append((record, is_valid, errors))

return records
Expand Down Expand Up @@ -247,22 +250,27 @@ def get_schema_options(program, section, query=None, model=None, model_name=None
text**: input string from the header/file
'''


def get_program_models(str_prog, str_section):
"""Return the models dict for a given program and section."""
return get_schema_options(program=str_prog, section=str_section, query='models')


def get_program_model(str_prog, str_section, str_model):
"""Return singular model for a given program, section, and name."""
return get_schema_options(program=str_prog, section=str_section, query='models', model_name=str_model)


def get_section_reference(str_prog, str_section):
"""Return the named section reference for a given program and section."""
return get_schema_options(program=str_prog, section=str_section, query='section')


def get_text_from_df(df):
"""Return the short-hand text for program, section for a given datafile."""
return get_schema_options("", section=df.section, query='text')


def get_prog_from_section(str_section):
"""Return the program type for a given section."""
# e.g., 'SSP Closed Case Data'
Expand All @@ -276,20 +284,25 @@ def get_prog_from_section(str_section):
# TODO: if given a datafile (section), we can reverse back to the program b/c the
# section string has "tribal/ssp" in it, then process of elimination we have tanf


def get_schema(line, section, program_type):
"""Return the appropriate schema for the line."""
line_type = line[0:2]
return get_schema_options(program_type, section, query='models', model_name=line_type)


def fiscal_to_calendar(year, fiscal_quarter):
"""Decrement the input quarter text by one."""
array = [1, 2, 3, 4] # wrapping around an array
int_qtr = int(fiscal_quarter[1:]) # remove the 'Q', e.g., 'Q1' -> '1'
if int_qtr == 1:
year = year - 1

ind_qtr = array.index(int_qtr) # get the index so we can easily wrap-around end of array
return year, "Q{}".format(array[ind_qtr - 1]) # return the previous quarter
# get the index so we can easily wrap-around end of array
ind_qtr = array.index(int_qtr)
# return the previous quarter
return year, "Q{}".format(array[ind_qtr - 1])


def transform_to_months(quarter):
"""Return a list of months in a quarter."""
Expand All @@ -305,6 +318,7 @@ def transform_to_months(quarter):
case _:
raise ValueError("Invalid quarter value.")


def month_to_int(month):
"""Return the integer value of a month."""
return datetime.strptime(month, '%b').strftime('%m')
Expand All @@ -313,7 +327,8 @@ def month_to_int(month):
def case_aggregates_by_month(df, dfs_status):
"""Return case aggregates by month."""
section = str(df.section) # section -> text
program_type = get_prog_from_section(section) # section -> program_type -> text
program_type = get_prog_from_section(
section) # section -> program_type -> text
jtimpe marked this conversation as resolved.
Show resolved Hide resolved

# from datafile year/quarter, generate short month names for each month in quarter ala 'Jan', 'Feb', 'Mar'
calendar_year, calendar_qtr = fiscal_to_calendar(df.year, df.quarter)
Expand Down Expand Up @@ -358,6 +373,45 @@ def case_aggregates_by_month(df, dfs_status):
"accepted_without_errors": accepted,
"accepted_with_errors": cases_with_errors})

aggregate_data['rejected'] = ParserError.objects.filter(file=df).filter(case_number=None).count()
aggregate_data['rejected'] = ParserError.objects.filter(
file=df).filter(case_number=None).count()

return aggregate_data


def total_errors_by_month(df, dfs_status):
jtimpe marked this conversation as resolved.
Show resolved Hide resolved
"""Return total errors for each month in the reporting period."""
section = str(df.section) # section -> text
program_type = get_prog_from_section(
section) # section -> program_type -> text

# from datafile year/quarter, generate short month names for each month in quarter ala 'Jan', 'Feb', 'Mar'
calendar_year, calendar_qtr = fiscal_to_calendar(df.year, df.quarter)
month_list = transform_to_months(calendar_qtr)

short_section = get_text_from_df(df)['section']
schema_models_dict = get_program_models(program_type, short_section)
schema_models = [model for model in schema_models_dict.values()]

total_errors_data = {"months": []}

for month in month_list:
if dfs_status == "Rejected":
total_errors_data["months"].append(
{"month": month, "total_errors": "N/A"})
continue

month_int = month_to_int(month)
rpt_month_year = int(f"{calendar_year}{month_int}")

for schema_model in schema_models:
if isinstance(schema_model, SchemaManager):
schema_model = schema_model.schemas[0]
jtimpe marked this conversation as resolved.
Show resolved Hide resolved

# records = schema_model.model.objects.filter(datafile=df).filter(RPT_MONTH_YEAR=rpt_month_year)
error_count = ParserError.objects.filter(
file=df, rpt_month_year=rpt_month_year).count()
total_errors_data["months"].append(
{"month": month, "total_errors": error_count})

return total_errors_data
4 changes: 3 additions & 1 deletion tdrs-backend/tdpservice/scheduling/parser_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from tdpservice.data_files.models import DataFile
from tdpservice.parsers.parse import parse_datafile
from tdpservice.parsers.models import DataFileSummary
from tdpservice.parsers.util import case_aggregates_by_month
from tdpservice.parsers.util import case_aggregates_by_month, total_errors_by_month


logger = logging.getLogger(__name__)
Expand All @@ -27,6 +27,8 @@ def parse(data_file_id):

if "Case Data" in data_file.section:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not related to this PR but we should have had "Case Data" as a CONSTANT, and I would put it in .DataFile.py

dfs.case_aggregates = case_aggregates_by_month(data_file, dfs.status)
else:
dfs.case_aggregates = total_errors_by_month(data_file, dfs.status)

dfs.save()

Expand Down
Loading
Loading