Skip to content

Commit

Permalink
- Correct document
Browse files Browse the repository at this point in the history
- functionize prioritized queries
- Update group check function to allow multiple groups
- add new admin property to user class
  • Loading branch information
elipe17 committed Oct 23, 2024
1 parent fba33f3 commit bf33db1
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 80 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Current requirements from OFA do not require category two errors to be queryable
Given the current OFA requirements, we can implement prioritized errors, and memory efficient report generation without too much work. OFA has provided [this OneNote](https://gorafttech.sharepoint.com/:o:/s/TDRSResearchDesign/EnIa1Mn4v7pOskW7BLomXhIBxUMlYLRU_f1C0dxemW7dWw?e=m0rNyI) document which outlines the error types, errors, and fields that are most important/prioritized for STTs to see.

### Memory Efficient Report Generation
As previously mentioned in the #background section, the `get_xls_serialized_file` introduces a method to serialize parser errors into a XLSX that requires the entire queryset of parser errors to be brought into memory. Because these querysets can be very large, having them in memory regularly kills Celery workers with an OOM error. To remedy the issue, this tech memo suggests updating `get_xls_serialized_file` to not use Django model serializers and instead leverage the power of Django querysets and pagination. To accomplish this, instead of passing a JSON serialized querset to `get_xls_serialized_file`, a standard (un-evaluated) queryset should be passed. Then, the body of the `get_xls_serialized_file` function should be updated appropriately to use a queryset object instead of a JSON object to generate the XLSX spreadsheet. The updates should also include paginating the queryset to avoid bringing the entirety of the queryset into memory at any one time. The code snippet below provides an example of paginating the queryset and writing the appropriate fields of each entry to the XLSX report.
As previously mentioned in the #background section, the `get_xls_serialized_file` introduces a method to serialize parser errors into a XLSX that requires the entire queryset of parser errors to be brought into memory. Because these querysets can be very large, having them in memory regularly kills Gunicorn workers with an OOM error. To remedy the issue, this tech memo suggests updating `get_xls_serialized_file` to not use Django model serializers and instead leverage the power of Django querysets and pagination. To accomplish this, instead of passing a JSON serialized querset to `get_xls_serialized_file`, a standard (un-evaluated) queryset should be passed. Then, the body of the `get_xls_serialized_file` function should be updated appropriately to use a queryset object instead of a JSON object to generate the XLSX spreadsheet. The updates should also include paginating the queryset to avoid bringing the entirety of the queryset into memory at any one time. The code snippet below provides an example of paginating the queryset and writing the appropriate fields of each entry to the XLSX report.

```python
paginator = Paginator(parser_errors, settings.BULK_CREATE_BATCH_SIZE)
Expand Down
112 changes: 39 additions & 73 deletions tdrs-backend/tdpservice/data_files/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,21 @@ class DataFileViewSet(ModelViewSet):
# Ref: https://github.com/raft-tech/TANF-app/issues/1007
queryset = DataFile.objects.all()

PRIORITIZED_CAT2 = (
("FAMILY_AFFILIATION", "CITIZENSHIP_STATUS", "CLOSURE_REASON"),
)

PRIORITIZED_CAT3 = (
("FAMILY_AFFILIATION", "SSN"),
("FAMILY_AFFILIATION", "CITIZENSHIP_STATUS"),
("AMT_FOOD_STAMP_ASSISTANCE", "AMT_SUB_CC", "CASH_AMOUNT", "CC_AMOUNT", "TRANSP_AMOUNT"),
("FAMILY_AFFILIATION", "SSN", "CITIZENSHIP_STATUS"),
("FAMILY_AFFILIATION", "PARENT_MINOR_CHILD"),
("FAMILY_AFFILIATION", "EDUCATION_LEVEL"),
("FAMILY_AFFILIATION", "WORK_ELIGIBLE_INDICATOR"),
("CITIZENSHIP_STATUS", "WORK_ELIGIBLE_INDICATOR"),
)

def create(self, request, *args, **kwargs):
"""Override create to upload in case of successful scan."""
logger.debug(f"{self.__class__.__name__}: {request}")
Expand Down Expand Up @@ -143,88 +158,39 @@ def download(self, request, pk=None):
)
return response

def __prioritize_queryset(self, filtered_errors, all_errors):
"""Generate prioritized queryset of ParserErrors."""
# All cat1/4 errors
error_type_query = Q(error_type=ParserErrorCategoryChoices.PRE_CHECK) | \
Q(error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY)
filtered_errors = all_errors.filter(error_type_query)

for fields in self.PRIORITIZED_CAT2:
filtered_errors = filtered_errors.union(all_errors.filter(
field_name__in=fields,
error_type=ParserErrorCategoryChoices.FIELD_VALUE
))

for fields in self.PRIORITIZED_CAT3:
filtered_errors = filtered_errors.union(all_errors.filter(
fields_json__friendly_name__has_keys=fields,
error_type=ParserErrorCategoryChoices.VALUE_CONSISTENCY
))

return filtered_errors

@action(methods=["get"], detail=True)
def download_error_report(self, request, pk=None):
"""Generate and return the parsing error report xlsx."""
datafile = self.get_object()
all_errors = ParserError.objects.filter(file=datafile)
filtered_errors = None
user = self.request.user
is_active = "Active" in datafile.section
is_closed = "Closed" in datafile.section
is_s1_s2 = "Active" in datafile.section or "Closed" in datafile.section

# We only filter Active and Closed submissions. Aggregate and Stratum return all errors.
if not (user.is_ofa_sys_admin or user.is_ofa_admin) and (is_active or is_closed):
# All cat1/4 errors
error_type_query = Q(error_type=ParserErrorCategoryChoices.PRE_CHECK) | \
Q(error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY)
filtered_errors = all_errors.filter(error_type_query)

# All cat2 errors associated with FAMILY_AFFILIATION and (CITIZENSHIP_STATUS or CLOSURE_REASON)
second_field = "CITIZENSHIP_STATUS" if is_active else "CLOSURE_REASON"
field_query = Q(field_name="FAMILY_AFFILIATION") | Q(field_name=second_field)
filtered_errors = filtered_errors.union(all_errors.filter(
field_query,
error_type=ParserErrorCategoryChoices.FIELD_VALUE
))

# All cat3 errors associated with FAMILY_AFFILIATION and SSN
filtered_errors = filtered_errors.union(all_errors.filter(fields_json__friendly_name__has_keys=[
"FAMILY_AFFILIATION",
"SSN"
],
error_type=ParserErrorCategoryChoices.VALUE_CONSISTENCY))

# All cat3 errors associated with FAMILY_AFFILIATION and CITIZENSHIP_STATUS
filtered_errors = filtered_errors.union(all_errors.filter(fields_json__friendly_name__has_keys=[
"FAMILY_AFFILIATION",
"CITIZENSHIP_STATUS"
],
error_type=ParserErrorCategoryChoices.VALUE_CONSISTENCY))

if is_active:
# All cat3 errors associated with summed fields: AMT_FOOD_STAMP_ASSISTANCE, AMT_SUB_CC, CASH_AMOUNT,
# CC_AMOUNT, TRANSP_AMOUNT
filtered_errors = filtered_errors.union(all_errors.filter(fields_json__friendly_name__has_keys=[
"AMT_FOOD_STAMP_ASSISTANCE", "AMT_SUB_CC", "CASH_AMOUNT", "CC_AMOUNT", "TRANSP_AMOUNT"
],
error_type=ParserErrorCategoryChoices.VALUE_CONSISTENCY))

# All cat3 errors associated with FAMILY_AFFILIATION and SSN and CITIZENSHIP_STATUS
filtered_errors = filtered_errors.union(all_errors.filter(fields_json__friendly_name__has_keys=[
"FAMILY_AFFILIATION",
"SSN",
"CITIZENSHIP_STATUS"
],
error_type=ParserErrorCategoryChoices.VALUE_CONSISTENCY))

# All cat3 errors associated with FAMILY_AFFILIATION and PARENT_MINOR_CHILD
filtered_errors = filtered_errors.union(all_errors.filter(fields_json__friendly_name__has_keys=[
"FAMILY_AFFILIATION",
"PARENT_MINOR_CHILD",
],
error_type=ParserErrorCategoryChoices.VALUE_CONSISTENCY))

# All cat3 errors associated with FAMILY_AFFILIATION and EDUCATION_LEVEL
filtered_errors = filtered_errors.union(all_errors.filter(fields_json__friendly_name__has_keys=[
"FAMILY_AFFILIATION",
"EDUCATION_LEVEL",
],
error_type=ParserErrorCategoryChoices.VALUE_CONSISTENCY))

# All cat3 errors associated with FAMILY_AFFILIATION and WORK_ELIGIBLE_INDICATOR
filtered_errors = filtered_errors.union(all_errors.filter(fields_json__friendly_name__has_keys=[
"FAMILY_AFFILIATION",
"WORK_ELIGIBLE_INDICATOR",
],
error_type=ParserErrorCategoryChoices.VALUE_CONSISTENCY))

# All cat3 errors associated with CITIZENSHIP_STATUS and WORK_ELIGIBLE_INDICATOR
filtered_errors = filtered_errors.union(all_errors.filter(fields_json__friendly_name__has_keys=[
"CITIZENSHIP_STATUS",
"WORK_ELIGIBLE_INDICATOR",
],
error_type=ParserErrorCategoryChoices.VALUE_CONSISTENCY))
if not user.is_an_admin and is_s1_s2:
filtered_errors = self.__prioritize_queryset(filtered_errors, all_errors)
else:
filtered_errors = all_errors

Expand Down
14 changes: 8 additions & 6 deletions tdrs-backend/tdpservice/users/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,11 @@ def __str__(self):
"""Return the username as the string representation of the object."""
return self.username

def is_in_group(self, group_name: str) -> bool:
"""Return whether or not the user is a member of the specified Group."""
return self.groups.filter(name=group_name).exists()
def is_in_group(self, group_names: list) -> bool:
"""Return whether or not the user is a member of the specified Group(s)."""
if type(group_names) == str:
group_names = [group_names]
return self.groups.filter(name__in=group_names).exists()

def validate_location(self):
"""Throw a validation error if a user has a location type incompatable with their role."""
Expand Down Expand Up @@ -181,9 +183,9 @@ def is_ocio_staff(self) -> bool:
return self.is_in_group("ACF OCIO")

@property
def is_ofa_admin(self) -> bool:
"""Return whether or not the user is in the OFA Admin Group."""
return self.is_in_group("OFA Admin")
def is_an_admin(self) -> bool:
"""Return whether or not the user is in the OFA Admin Group or OFA System Admin."""
return self.is_in_group(["OFA Admin", "OFA System Admin"])

@property
def is_ofa_sys_admin(self) -> bool:
Expand Down

0 comments on commit bf33db1

Please sign in to comment.