From 542823fc0dc01722f6c29645764244c9bca53307 Mon Sep 17 00:00:00 2001 From: Andrew <84722778+andrew-jameson@users.noreply.github.com> Date: Tue, 8 Oct 2024 09:08:27 -0400 Subject: [PATCH] Feat/2965 db seed impl (#3134) * mgmt cmd structure * Pushing after pair w/ Jan * Pushing latest prior to lunch * ignore for research/grep results * End of day commit, successful run w/o syntax issues. Need to do something with files * just the useful changes, sorry for the mess * latest work for RPY to get around preparser blocker * Latest changes, removing comments/prints mostly. * functional again after a header rework * Cleaning up comments/prints for PR * Linter clean-up * linter pt2 * Improvements to header, linting, and Eric's PR feedback * Fixed quarter preparing issue * Comment clean up, linting fix --------- Co-authored-by: andrew-jameson --- .gitignore | 3 +- Taskfile.yml | 17 ++ .../tdpservice/parsers/management/__init__.py | 0 .../parsers/management/commands/__init__.py | 0 .../parsers/management/commands/seed_db.py | 181 ++++++++++++++++++ 5 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 tdrs-backend/tdpservice/parsers/management/__init__.py create mode 100644 tdrs-backend/tdpservice/parsers/management/commands/__init__.py create mode 100644 tdrs-backend/tdpservice/parsers/management/commands/seed_db.py diff --git a/.gitignore b/.gitignore index 2fee3eca0..6be3a5017 100644 --- a/.gitignore +++ b/.gitignore @@ -108,4 +108,5 @@ tfapply cypress.env.json # Patches -*.patch \ No newline at end of file +*.patch +tdrs-backend/*.pg diff --git a/Taskfile.yml b/Taskfile.yml index ac4812394..8f1731fe9 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -114,6 +114,23 @@ tasks: cmds: - docker-compose -f docker-compose.yml exec web sh -c "python ./manage.py shell" + backend-exec: + desc: Execute a command in the backend container + dir: tdrs-backend + vars: + CMD: '{{.CMD}}' + cmds: + - docker-compose -f docker-compose.yml exec web sh -c "python manage.py {{.CMD}}" + + backend-exec-seed-db: + desc: Execute seed_db command in the backend container + dir: tdrs-backend + vars: + CMD: '{{.CMD}}' + cmds: + - docker-compose -f docker-compose.yml up -d + - docker-compose -f docker-compose.yml exec web sh -c "python manage.py populate_stts; python ./manage.py seed_db" + backend-pytest: desc: 'Run pytest in the backend container E.g: task backend-pytest PYTEST_ARGS="tdpservice/test/ -s -vv"' dir: tdrs-backend diff --git a/tdrs-backend/tdpservice/parsers/management/__init__.py b/tdrs-backend/tdpservice/parsers/management/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tdrs-backend/tdpservice/parsers/management/commands/__init__.py b/tdrs-backend/tdpservice/parsers/management/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tdrs-backend/tdpservice/parsers/management/commands/seed_db.py b/tdrs-backend/tdpservice/parsers/management/commands/seed_db.py new file mode 100644 index 000000000..e8b8f6136 --- /dev/null +++ b/tdrs-backend/tdpservice/parsers/management/commands/seed_db.py @@ -0,0 +1,181 @@ +"""`seed_db` command.""" + +from django.core.management import BaseCommand +from django.core.files.base import ContentFile +from django.db.utils import IntegrityError +from tdpservice.parsers.schema_defs.header import header +from tdpservice.parsers.schema_defs.trailer import trailer +from tdpservice.parsers.schema_defs.utils import get_schema_options, get_program_models +from tdpservice.parsers.util import fiscal_to_calendar +# all models should be referenced by using the utils.py get_schema_options wrappers +from tdpservice.data_files.models import DataFile +# from tdpservice.parsers import parse +from tdpservice.parsers.test.factories import DataFileSummaryFactory +from tdpservice.scheduling.parser_task import parse as parse_task +from tdpservice.stts.models import STT +from tdpservice.users.models import User +from tdpservice.parsers.row_schema import RowSchema +from faker import Faker +import logging +import random + +fake = Faker() +logger = logging.getLogger(__name__) + +# https://faker.readthedocs.io/en/stable/providers/baseprovider.html#faker.providers.BaseProvider +# """ class FieldFaker(faker.providers.BaseProvider):...""" + +def build_datafile(stt, year, quarter, original_filename, file_name, section, file_data): + """Build a datafile.""" + try: + d = DataFile.objects.create( + user=User.objects.get_or_create(username='system')[0], + stt=stt, + year=year, + quarter=quarter, + original_filename=original_filename, + section=section, + version=random.randint(1, 1993415), + ) + + d.file.save(file_name, ContentFile(file_data)) + except IntegrityError as e: + logger.error(f"Error creating datafile: {e}") + pass + return d + + +def validValues(schemaMgr, field, year, qtr): + """Take in a field and returns a line of valid values.""" + field_len = field.endIndex - field.startIndex + + if field.name == 'RecordType': + return schemaMgr.record_type + if field.name == 'SSN': + # only used by recordtypes 2,3,5 + # TODO: reverse the TransformField logic to 'encrypt' a random number + field_format = '?' * field_len + elif field.name in ('RPT_MONTH_YEAR'): # previously had CALENDAR_QUARTER + # given a quarter, set upper lower bounds for month + qtr = qtr[1:] + upper = int(qtr) * 3 + lower = upper - 2 + + month = '{}'.format(random.randint(lower, upper)).zfill(2) + field_format = '{}{}'.format(year, str(month)) + else: + if field.friendly_name == 'Family Affiliation': + print('Family Affiliation') + field_format = '#' * field_len + return fake.bothify(text=field_format) + + +def make_line(schemaMgr, section, year, qtr): + """Take in a schema manager and returns a line of data.""" + line = '' + + # for row_schema in schemaMgr.schemas: # this is to handle multi-schema like T6 + # if len(schemaMgr.schemas) > 1: + row_schema = schemaMgr.schemas[0] + + for field in row_schema.fields: + line += validValues(row_schema, field, year, qtr) + print(f"Field: {field.name}, field length {field.endIndex - field.startIndex} Value: {line}") + return line + '\n' + +def make_HT(schemaMgr, prog_type, section, year, quarter, stt): + """Handle special case of header/trailer lines.""" + line = '' + + if type(schemaMgr) is RowSchema: + if schemaMgr.record_type == 'HEADER': + # e.g. HEADER20201CAL000TAN1ED + + if stt.state is not None: # this is a tribe + my_stt = stt.state + else: + my_stt = stt + state_fips = '{}'.format(my_stt.stt_code).zfill(2) + # state_fips = stt.state.stt_code if stt.state is not None else stt.stt_code + tribe_code = '{}'.format(stt.stt_code) if stt.type == 'tribe' else '000' + + line = f"HEADER{year}{quarter[1:]}{section}{state_fips}{tribe_code}{prog_type}1ED" + + elif schemaMgr.record_type == 'TRAILER': + line += 'TRAILER' + '1' * 16 + else: + print('Invalid record type') + return None + + return line + '\n' + +def make_files(stt, sub_year, sub_quarter): + """Given a STT, parameterize calls to build_datafile and make_line.""" + sections = stt.filenames.keys() + files_for_quarter = {} + + for long_section in sections: + text_dict = get_schema_options("", section=long_section, query='text') + prog_type = text_dict['program_type'] # TAN + section = text_dict['section'] # A + models_in_section = get_program_models(prog_type, section) + temp_file = '' + + cal_year, cal_quarter = fiscal_to_calendar(sub_year, 'Q{}'.format(sub_quarter)) + temp_file += make_HT(header, prog_type, section, cal_year, cal_quarter, stt) + + # iterate over models and generate lines + for _, model in models_in_section.items(): + # below is equivalent to 'contains' for the tuple + if any(section in long_section for section in ('Active Case', 'Closed Case', 'Aggregate', 'Stratum')): + for i in range(random.randint(1, 3)): + temp_file += make_line(model, section, cal_year, cal_quarter) + # elif section in ['Aggregate Data', 'Stratum Data']: + # # we should generate a smaller count of lines...maybe leave this as a TODO + # # shouldn't this be based on the active/closed case data? + # pass + + # make trailer line + temp_file += make_HT(trailer, prog_type, section, cal_year, cal_quarter, stt) + # print(temp_file) + + datafile = build_datafile( + stt=stt, + year=sub_year, # fiscal submission year + quarter=f"Q{sub_quarter}", # fiscal submission quarter + original_filename=f'{stt}-{section}-{sub_year}Q{sub_quarter}.txt', + file_name=f'{stt}-{section}-{sub_year}Q{sub_quarter}', + section=long_section, + file_data=bytes(temp_file.rstrip(), 'utf-8'), + ) + datafile.save() + files_for_quarter[section] = datafile + + return files_for_quarter + +def make_seed(): + """Invoke scheduling/management/commands/backup_db management command.""" + from tdpservice.scheduling.management.commands.backup_db import Command as BackupCommand + backup = BackupCommand() + backup.handle(file='/tdpapp/tdrs_db_seed.pg') + +class Command(BaseCommand): + """Command class.""" + + help = "Populate datafiles, records, summaries, and errors for all STTs." + + def handle(self, *args, **options): + """Populate datafiles, records, summaries, and errors for all STTs.""" + for stt in STT.objects.filter(id__in=range(1, 2)): # .all(): + for yr in range(2020, 2021): + for qtr in [1, 2]: # , 3, 4]: + files_for_qtr = make_files(stt, yr, qtr) + print(files_for_qtr) + for f in files_for_qtr.keys(): + df = files_for_qtr[f] + dfs = DataFileSummaryFactory.build() + dfs.datafile = df + parse_task(df.id, False) + + # dump db in full using `make_seed` func + make_seed()