Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/raft-tech/TANF-app into …
Browse files Browse the repository at this point in the history
…1612-case-aggregates-frontend
  • Loading branch information
elipe17 committed Sep 19, 2023
2 parents 12dd45c + 4d5ed4b commit 221f25f
Show file tree
Hide file tree
Showing 10 changed files with 200 additions and 8 deletions.
51 changes: 51 additions & 0 deletions docs/Sprint-Review/sprint-80-summary.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Sprint 80 Summary

08/16/23 - 08/29/23

Velocity: Dev (20)

## Sprint Goal
* Continue parsing engine development for TANF Sections (01-04), complete decoupling backend application spike and continue integration test epic (2282).
* UX to continue regional staff research, service design blueprint (.1 and .2) and bridge onboarding to >85% of total users
* DevOps to investigate nightlyscan issues and resolve utlity images for CircleCI and container registry.


## Tickets
### Completed/Merged
* [#2369 As tech lead, we need the parsing engine to run quailty checks across TANF section 1](https://app.zenhub.com/workspaces/sprint-board-5f18ab06dfd91c000f7e682e/issues/gh/raft-tech/tanf-app/2369)
* [#1110 TANF (03) Parsing and Validation](https://app.zenhub.com/workspaces/sprint-board-5f18ab06dfd91c000f7e682e/issues/gh/raft-tech/tanf-app/1110)
* [#2282 As tech lead, I want a file upload integration test](https://app.zenhub.com/workspaces/sprint-board-5f18ab06dfd91c000f7e682e/issues/gh/raft-tech/tanf-app/2282)
* [#1784 - Email Relay](https://app.zenhub.com/workspaces/sprint-board-5f18ab06dfd91c000f7e682e/issues/gh/raft-tech/tanf-app/1784)

### Ready to Merge
* N/A

### Submitted (QASP Review, OCIO Review)
* [#1109 TANF (02) Parsing and Validation](https://app.zenhub.com/workspaces/sprint-board-5f18ab06dfd91c000f7e682e/issues/gh/raft-tech/tanf-app/1109)

### Closed (not merged)
* N/A

## Moved to Next Sprint (Blocked, Raft Review, In Progress)
### In Progress
* [#2116 Container Registry Creation](https://app.zenhub.com/workspaces/sprint-board-5f18ab06dfd91c000f7e682e/issues/gh/raft-tech/tanf-app/2116)
* [#2429 Singular ClamAV scanner](https://app.zenhub.com/workspaces/sprint-board-5f18ab06dfd91c000f7e682e/issues/gh/raft-tech/tanf-app/2429)
* [#1111 TANF (04) Parsing and Validation](https://app.zenhub.com/workspaces/sprint-board-5f18ab06dfd91c000f7e682e/issues/gh/raft-tech/tanf-app/1111)


### Blocked
* N/A


### Raft Review
* [#1610 As a user, I need information about the acceptance of my data and a link for the error report](https://app.zenhub.com/workspaces/sprint-board-5f18ab06dfd91c000f7e682e/issues/gh/raft-tech/tanf-app/1610)
* [#1612 Detailed case level metadata](https://app.zenhub.com/workspaces/sprint-board-5f18ab06dfd91c000f7e682e/issues/gh/raft-tech/tanf-app/1612)
* [#1613 As a developer, I need parsed file meta data (TANF Section 1)](https://app.zenhub.com/workspaces/sprint-board-5f18ab06dfd91c000f7e682e/board)
* [#2626 (Spike) improve parsing logging](https://app.zenhub.com/workspaces/sprint-board-5f18ab06dfd91c000f7e682e/issues/gh/raft-tech/tanf-app/2626)

### Demo
* Internal:
* 2369 / 1110 - TANF Sections (01 and 03) Parsing and Validation
* External:
* 2369 / 1110 - TANF Sections (01 and 03) Parsing and Validation

1 change: 1 addition & 0 deletions tdrs-backend/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ services:
./gunicorn_start.sh && celery -A tdpservice.settings worker -l info"
ports:
- "5555:5555"
tty: true
depends_on:
- clamav-rest
- localstack
Expand Down
38 changes: 38 additions & 0 deletions tdrs-backend/tdpservice/core/logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Contains core logging functionality for TDP."""

import logging

class ColorFormatter(logging.Formatter):
"""Simple formatter class to add color to log messages based on log level."""

BLACK = '\033[0;30m'
RED = '\033[0;31m'
GREEN = '\033[0;32m'
BROWN = '\033[0;33m'
BLUE = '\033[0;34m'
PURPLE = '\033[0;35m'
CYAN = '\033[0;36m'
GREY = '\033[0;37m'

DARK_GREY = '\033[1;30m'
LIGHT_RED = '\033[1;31m'
LIGHT_GREEN = '\033[1;32m'
YELLOW = '\033[1;33m'
LIGHT_BLUE = '\033[1;34m'
LIGHT_PURPLE = '\033[1;35m'
LIGHT_CYAN = '\033[1;36m'
WHITE = '\033[1;37m'

RESET = "\033[0m"

def __init__(self, *args, **kwargs):
self._colors = {logging.DEBUG: self.CYAN,
logging.INFO: self.GREEN,
logging.WARNING: self.YELLOW,
logging.ERROR: self.LIGHT_RED,
logging.CRITICAL: self.RED}
super(ColorFormatter, self).__init__(*args, **kwargs)

def format(self, record):
"""Format the record to be colored based on the log level."""
return self._colors.get(record.levelno, self.WHITE) + super().format(record) + self.RESET
9 changes: 9 additions & 0 deletions tdrs-backend/tdpservice/data_files/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,15 @@ def find_latest_version(self, year, quarter, section, stt):
version=version, year=year, quarter=quarter, section=section, stt=stt,
).first()

def __repr__(self):
"""Return a string representation of the model."""
return f"{{id: {self.id}, filename: {self.original_filename}, STT: {self.stt}, S3 location: " + \
f"{self.s3_location}}}"

def __str__(self):
"""Return a string representation of the model."""
return f"filename: {self.original_filename}"

class LegacyFileTransferManager(models.Manager):
"""Extends object manager functionality for LegacyFileTransfer model."""

Expand Down
4 changes: 4 additions & 0 deletions tdrs-backend/tdpservice/data_files/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ def create(self, request, *args, **kwargs):
data_file_id = response.data.get('id')
data_file = DataFile.objects.get(id=data_file_id)

logger.info(f"Preparing parse task: User META -> user: {request.user}, stt: {data_file.stt}. " +
f"Datafile META -> datafile: {data_file_id}, section: {data_file.section}, " +
f"quarter {data_file.quarter}, year {data_file.year}.")

parser_task.parse.delay(data_file_id)
logger.info("Submitted parse task to queue for datafile %s.", data_file_id)

Expand Down
9 changes: 9 additions & 0 deletions tdrs-backend/tdpservice/parsers/fields.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
"""Datafile field representations."""

import logging

logger = logging.getLogger(__name__)

def value_is_empty(value, length):
"""Handle 'empty' values as field inputs."""
empty_values = [
Expand Down Expand Up @@ -36,6 +40,7 @@ def parse_value(self, line):
value = line[self.startIndex:self.endIndex]

if value_is_empty(value, self.endIndex-self.startIndex):
logger.debug(f"Field: '{self.name}' at position: [{self.startIndex}, {self.endIndex}) is empty.")
return None

match self.type:
Expand All @@ -44,9 +49,13 @@ def parse_value(self, line):
value = int(value)
return value
except ValueError:
logger.error(f"Error parsing field value: {value} to integer.")
return None
case 'string':
return value
case _:
logger.warn(f"Unknown field type: {self.type}.")
return None

class TransformField(Field):
"""Represents a field that requires some transformation before serializing."""
Expand Down
5 changes: 3 additions & 2 deletions tdrs-backend/tdpservice/parsers/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,12 @@ def rpt_month_name(self):

def __repr__(self):
"""Return a string representation of the model."""
return f"ParserError {self.id} for file {self.file} and object key {self.object_id}"
return f"{{id: {self.id}, file: {self.file.id}, row: {self.row_number}, column: {self.column_number}, " + \
f"error message: {self.error_message}}}"

def __str__(self):
"""Return a string representation of the model."""
return f"ParserError {self.__dict__}"
return f"error_message: {self.error_message}"

def _get_error_message(self):
"""Return the error message."""
Expand Down
76 changes: 72 additions & 4 deletions tdrs-backend/tdpservice/parsers/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,18 @@ def parse_datafile(datafile):
util.make_generate_parser_error(datafile, 1)
)
if not header_is_valid:
logger.info(f"Preparser Error: {len(header_errors)} header errors encountered.")
errors['header'] = header_errors
bulk_create_errors({1: header_errors}, 1, flush=True)
return errors

is_encrypted = util.contains_encrypted_indicator(header_line, schema_defs.header.get_field_by_name("encryption"))
logger.debug(f"Datafile has encrypted fields: {is_encrypted}.")

# ensure file section matches upload section
program_type = header['program_type']
section = header['type']
logger.debug(f"Program type: {program_type}, Section: {section}.")

section_is_valid, section_error = validators.validate_header_section_matches_submission(
datafile,
Expand All @@ -40,6 +43,7 @@ def parse_datafile(datafile):
)

if not section_is_valid:
logger.info(f"Preparser Error -> Section is not valid: {section_error.error_message}")
errors['document'] = [section_error]
unsaved_parser_errors = {1: [section_error]}
bulk_create_errors(unsaved_parser_errors, 1, flush=True)
Expand All @@ -55,12 +59,17 @@ def parse_datafile(datafile):
def bulk_create_records(unsaved_records, line_number, header_count, batch_size=10000, flush=False):
"""Bulk create passed in records."""
if (line_number % batch_size == 0 and header_count > 0) or flush:
logger.debug("Bulk creating records.")
try:
num_created = 0
num_expected = 0
for model, records in unsaved_records.items():
num_expected += len(records)
num_created += len(model.objects.bulk_create(records))
if num_created != num_expected:
logger.error(f"Bulk create only created {num_created}/{num_expected}!")
else:
logger.info(f"Created {num_created}/{num_expected} records.")
return num_created == num_expected, {}
except DatabaseError as e:
logger.error(f"Encountered error while creating datafile records: {e}")
Expand All @@ -70,7 +79,10 @@ def bulk_create_records(unsaved_records, line_number, header_count, batch_size=1
def bulk_create_errors(unsaved_parser_errors, num_errors, batch_size=5000, flush=False):
"""Bulk create all ParserErrors."""
if flush or (unsaved_parser_errors and num_errors >= batch_size):
ParserError.objects.bulk_create(list(itertools.chain.from_iterable(unsaved_parser_errors.values())))
logger.debug("Bulk creating ParserErrors.")
num_created = len(ParserError.objects.bulk_create(list(itertools.chain.from_iterable(
unsaved_parser_errors.values()))))
logger.info(f"Created {num_created}/{num_errors} ParserErrors.")
return {}, 0
return unsaved_parser_errors, num_errors

Expand All @@ -94,12 +106,16 @@ def evaluate_trailer(datafile, trailer_count, multiple_trailer_errors, is_last_l

def rollback_records(unsaved_records, datafile):
"""Delete created records in the event of a failure."""
logger.info("Rolling back created records.")
for model in unsaved_records:
model.objects.filter(datafile=datafile).delete()
num_deleted, models = model.objects.filter(datafile=datafile).delete()
logger.debug(f"Deleted {num_deleted} records of type: {model}.")

def rollback_parser_errors(datafile):
"""Delete created errors in the event of a failure."""
ParserError.objects.filter(file=datafile).delete()
logger.info("Rolling back created parser errors.")
num_deleted, models = ParserError.objects.filter(file=datafile).delete()
logger.debug(f"Deleted {num_deleted} {ParserError}.")

def parse_datafile_lines(datafile, program_type, section, is_encrypted):
"""Parse lines with appropriate schema and return errors."""
Expand Down Expand Up @@ -134,13 +150,16 @@ def parse_datafile_lines(datafile, program_type, section, is_encrypted):
is_last, line, line_number)

if trailer_errors is not None:
logger.debug(f"{len(trailer_errors)} trailer error(s) detected for file " +
f"'{datafile.original_filename}' on line {line_number}.")
errors['trailer'] = trailer_errors
unsaved_parser_errors.update({"trailer": trailer_errors})
num_errors += len(trailer_errors)

generate_error = util.make_generate_parser_error(datafile, line_number)

if header_count > 1:
logger.info(f"Preparser Error -> Multiple headers found for file: {datafile.id} on line: {line_number}.")
errors.update({'document': ['Multiple headers found.']})
err_obj = generate_error(
schema=None,
Expand Down Expand Up @@ -170,6 +189,9 @@ def parse_datafile_lines(datafile, program_type, section, is_encrypted):
record_number += 1
record, record_is_valid, record_errors = r
if not record_is_valid:
logger.debug(f"Record #{i} from line {line_number} is invalid.")
line_errors = errors.get(f"{line_number}_{i}", {})
line_errors.update({record_number: record_errors})
errors.update({f"{line_number}_{i}": record_errors})
unsaved_parser_errors.update({f"{line_number}_{i}": record_errors})
num_errors += len(record_errors)
Expand All @@ -182,6 +204,7 @@ def parse_datafile_lines(datafile, program_type, section, is_encrypted):
unsaved_parser_errors, num_errors = bulk_create_errors(unsaved_parser_errors, num_errors)

if header_count == 0:
logger.info(f"Preparser Error -> No headers found for file: {datafile.id}.")
errors.update({'document': ['No headers found.']})
err_obj = generate_error(
schema=None,
Expand All @@ -200,6 +223,7 @@ def parse_datafile_lines(datafile, program_type, section, is_encrypted):
# successfully create the records.
all_created, unsaved_records = bulk_create_records(unsaved_records, line_number, header_count, flush=True)
if not all_created:
logger.error(f"Not all parsed records created for file: {datafile.id}!")
rollback_records(unsaved_records, datafile)
bulk_create_errors(unsaved_parser_errors, num_errors, flush=True)
return errors
Expand Down Expand Up @@ -227,7 +251,51 @@ def manager_parse_line(line, schema_manager, generate_error, is_encrypted=False)
)
])]

def get_schema_manager(line, section, program_type):

def get_schema_manager_options(program_type):
"""Return the allowed schema options."""
match program_type:
case 'TAN':
return {
'A': {
'T1': schema_defs.tanf.t1,
'T2': schema_defs.tanf.t2,
'T3': schema_defs.tanf.t3,
},
'C': {
'T4': schema_defs.tanf.t4,
'T5': schema_defs.tanf.t5,
},
'G': {
'T6': schema_defs.tanf.t6,
},
'S': {
# 'T7': schema_options.t7,
},
}
case 'SSP':
return {
'A': {
'M1': schema_defs.ssp.m1,
'M2': schema_defs.ssp.m2,
'M3': schema_defs.ssp.m3,
},
'C': {
# 'M4': schema_options.m4,
# 'M5': schema_options.m5,
},
'G': {
# 'M6': schema_options.m6,
},
'S': {
# 'M7': schema_options.m7,
},
}
# case tribal?
return None


def get_schema_manager(line, section, schema_options):
"""Return the appropriate schema for the line."""
line_type = line[0:2]
return util.get_program_model(program_type, section, line_type)
4 changes: 4 additions & 0 deletions tdrs-backend/tdpservice/parsers/row_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""Row schema for datafile."""
from .models import ParserErrorCategoryChoices
from .fields import Field, value_is_empty
import logging

logger = logging.getLogger(__name__)


class RowSchema:
Expand Down Expand Up @@ -45,6 +48,7 @@ def parse_and_validate(self, line, generate_error):
if not preparsing_is_valid:
if self.quiet_preparser_errors:
return None, True, []
logger.info(f"{len(preparsing_errors)} preparser error(s) encountered.")
return None, False, preparsing_errors

# parse line to model
Expand Down
11 changes: 9 additions & 2 deletions tdrs-backend/tdpservice/settings/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,12 @@ class Common(Configuration):
},
"verbose": {
"format": (
"[%(asctime)s %(levelname)s %(filename)s::%(funcName)s:L%(lineno)d : %(message)s"
"%(asctime)s %(levelname)s %(filename)s::%(funcName)s:L%(lineno)d : %(message)s"
)
},
"simple": {"format": "%(levelname)s %(message)s"},
"color": {"()": "tdpservice.core.logger.ColorFormatter",
"format": "%(asctime)s %(levelname)s %(filename)s::%(funcName)s:L%(lineno)d : %(message)s"}
},
"filters": {"require_debug_true": {"()": "django.utils.log.RequireDebugTrue"}},
"handlers": {
Expand All @@ -224,7 +226,7 @@ class Common(Configuration):
},
"application": {
"class": "logging.StreamHandler",
"formatter": "verbose",
"formatter": "color",
},
},
"loggers": {
Expand All @@ -233,6 +235,11 @@ class Common(Configuration):
"propagate": True,
"level": LOGGING_LEVEL
},
"tdpservice.parsers": {
"handlers": ["application"],
"propagate": False,
"level": LOGGING_LEVEL
},
"django": {"handlers": ["console"], "propagate": True},
"django.server": {
"handlers": ["django.server"],
Expand Down

0 comments on commit 221f25f

Please sign in to comment.