Feat/2965 db seed impl (#3134)

* mgmt cmd structure * Pushing after pair w/ Jan * Pushing latest prior to lunch * ignore for research/grep results * End of day commit, successful run w/o syntax issues. Need to do something with files * just the useful changes, sorry for the mess * latest work for RPY to get around preparser blocker * Latest changes, removing comments/prints mostly. * functional again after a header rework * Cleaning up comments/prints for PR * Linter clean-up * linter pt2 * Improvements to header, linting, and Eric's PR feedback * Fixed quarter preparing issue * Comment clean up, linting fix --------- Co-authored-by: andrew-jameson <[email protected]>
raft-tech · Oct 8, 2024 · 542823f · 542823f
1 parent 30513b6
commit 542823f
Show file tree

Hide file tree

Showing 5 changed files with 200 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -108,4 +108,5 @@ tfapply
 cypress.env.json
 
 # Patches
-*.patch
+*.patch
+tdrs-backend/*.pg
diff --git a/Taskfile.yml b/Taskfile.yml
@@ -114,6 +114,23 @@ tasks:
     cmds:
       - docker-compose -f docker-compose.yml exec web sh -c "python ./manage.py shell"
 
+  backend-exec:
+    desc: Execute a command in the backend container
+    dir: tdrs-backend
+    vars:
+      CMD: '{{.CMD}}'
+    cmds:
+      - docker-compose -f docker-compose.yml exec web sh -c "python manage.py {{.CMD}}"
+
+  backend-exec-seed-db:
+    desc: Execute seed_db command in the backend container
+    dir: tdrs-backend
+    vars:
+      CMD: '{{.CMD}}'
+    cmds:
+      - docker-compose -f docker-compose.yml up -d
+      - docker-compose -f docker-compose.yml exec web sh -c "python manage.py populate_stts; python ./manage.py seed_db"
+
   backend-pytest:
     desc: 'Run pytest in the backend container E.g: task backend-pytest PYTEST_ARGS="tdpservice/test/ -s -vv"'
     dir: tdrs-backend

diff --git a/tdrs-backend/tdpservice/parsers/management/__init__.py b/tdrs-backend/tdpservice/parsers/management/__init__.py
diff --git a/tdrs-backend/tdpservice/parsers/management/commands/__init__.py b/tdrs-backend/tdpservice/parsers/management/commands/__init__.py
diff --git a/tdrs-backend/tdpservice/parsers/management/commands/seed_db.py b/tdrs-backend/tdpservice/parsers/management/commands/seed_db.py
@@ -0,0 +1,181 @@
+"""`seed_db` command."""
+
+from django.core.management import BaseCommand
+from django.core.files.base import ContentFile
+from django.db.utils import IntegrityError
+from tdpservice.parsers.schema_defs.header import header
+from tdpservice.parsers.schema_defs.trailer import trailer
+from tdpservice.parsers.schema_defs.utils import get_schema_options, get_program_models
+from tdpservice.parsers.util import fiscal_to_calendar
+# all models should be referenced by using the utils.py get_schema_options wrappers
+from tdpservice.data_files.models import DataFile
+# from tdpservice.parsers import parse
+from tdpservice.parsers.test.factories import DataFileSummaryFactory
+from tdpservice.scheduling.parser_task import parse as parse_task
+from tdpservice.stts.models import STT
+from tdpservice.users.models import User
+from tdpservice.parsers.row_schema import RowSchema
+from faker import Faker
+import logging
+import random
+
+fake = Faker()
+logger = logging.getLogger(__name__)
+
+# https://faker.readthedocs.io/en/stable/providers/baseprovider.html#faker.providers.BaseProvider
+# """ class FieldFaker(faker.providers.BaseProvider):..."""
+
+def build_datafile(stt, year, quarter, original_filename, file_name, section, file_data):
+    """Build a datafile."""
+    try:
+        d = DataFile.objects.create(
+            user=User.objects.get_or_create(username='system')[0],
+            stt=stt,
+            year=year,
+            quarter=quarter,
+            original_filename=original_filename,
+            section=section,
+            version=random.randint(1, 1993415),
+        )
+
+        d.file.save(file_name, ContentFile(file_data))
+    except IntegrityError as e:
+        logger.error(f"Error creating datafile: {e}")
+        pass
+    return d
+
+
+def validValues(schemaMgr, field, year, qtr):
+    """Take in a field and returns a line of valid values."""
+    field_len = field.endIndex - field.startIndex
+
+    if field.name == 'RecordType':
+        return schemaMgr.record_type
+    if field.name == 'SSN':
+        # only used by recordtypes 2,3,5
+        # TODO: reverse the TransformField logic to 'encrypt' a random number
+        field_format = '?' * field_len
+    elif field.name in ('RPT_MONTH_YEAR'):  # previously had CALENDAR_QUARTER
+        # given a quarter, set upper lower bounds for month
+        qtr = qtr[1:]
+        upper = int(qtr) * 3
+        lower = upper - 2
+
+        month = '{}'.format(random.randint(lower, upper)).zfill(2)
+        field_format = '{}{}'.format(year, str(month))
+    else:
+        if field.friendly_name == 'Family Affiliation':
+            print('Family Affiliation')
+        field_format = '#' * field_len
+    return fake.bothify(text=field_format)
+
+
+def make_line(schemaMgr, section, year, qtr):
+    """Take in a schema manager and returns a line of data."""
+    line = ''
+
+    # for row_schema in schemaMgr.schemas:  # this is to handle multi-schema like T6
+    # if len(schemaMgr.schemas) > 1:
+    row_schema = schemaMgr.schemas[0]
+
+    for field in row_schema.fields:
+        line += validValues(row_schema, field, year, qtr)
+        print(f"Field: {field.name}, field length {field.endIndex - field.startIndex} Value: {line}")
+    return line + '\n'
+
+def make_HT(schemaMgr, prog_type, section, year, quarter, stt):
+    """Handle special case of header/trailer lines."""
+    line = ''
+
+    if type(schemaMgr) is RowSchema:
+        if schemaMgr.record_type == 'HEADER':
+            # e.g. HEADER20201CAL000TAN1ED
+
+            if stt.state is not None:  # this is a tribe
+                my_stt = stt.state
+            else:
+                my_stt = stt
+            state_fips = '{}'.format(my_stt.stt_code).zfill(2)
+            # state_fips = stt.state.stt_code if stt.state is not None else stt.stt_code
+            tribe_code = '{}'.format(stt.stt_code) if stt.type == 'tribe' else '000'
+
+            line = f"HEADER{year}{quarter[1:]}{section}{state_fips}{tribe_code}{prog_type}1ED"
+
+        elif schemaMgr.record_type == 'TRAILER':
+            line += 'TRAILER' + '1' * 16
+    else:
+        print('Invalid record type')
+        return None
+
+    return line + '\n'
+
+def make_files(stt, sub_year, sub_quarter):
+    """Given a STT, parameterize calls to build_datafile and make_line."""
+    sections = stt.filenames.keys()
+    files_for_quarter = {}
+
+    for long_section in sections:
+        text_dict = get_schema_options("", section=long_section, query='text')
+        prog_type = text_dict['program_type']  # TAN
+        section = text_dict['section']  # A
+        models_in_section = get_program_models(prog_type, section)
+        temp_file = ''
+
+        cal_year, cal_quarter = fiscal_to_calendar(sub_year, 'Q{}'.format(sub_quarter))
+        temp_file += make_HT(header, prog_type, section, cal_year, cal_quarter, stt)
+
+        # iterate over models and generate lines
+        for _, model in models_in_section.items():
+            # below is equivalent to 'contains' for the tuple
+            if any(section in long_section for section in ('Active Case', 'Closed Case', 'Aggregate', 'Stratum')):
+                for i in range(random.randint(1, 3)):
+                    temp_file += make_line(model, section, cal_year, cal_quarter)
+            # elif section in ['Aggregate Data', 'Stratum Data']:
+            #    # we should generate a smaller count of lines...maybe leave this as a TODO
+            #    # shouldn't this be based on the active/closed case data?
+            #    pass
+
+        # make trailer line
+        temp_file += make_HT(trailer, prog_type, section, cal_year, cal_quarter, stt)
+        # print(temp_file)
+
+        datafile = build_datafile(
+            stt=stt,
+            year=sub_year,  # fiscal submission year
+            quarter=f"Q{sub_quarter}",  # fiscal submission quarter
+            original_filename=f'{stt}-{section}-{sub_year}Q{sub_quarter}.txt',
+            file_name=f'{stt}-{section}-{sub_year}Q{sub_quarter}',
+            section=long_section,
+            file_data=bytes(temp_file.rstrip(), 'utf-8'),
+        )
+        datafile.save()
+        files_for_quarter[section] = datafile
+
+    return files_for_quarter
+
+def make_seed():
+    """Invoke scheduling/management/commands/backup_db management command."""
+    from tdpservice.scheduling.management.commands.backup_db import Command as BackupCommand
+    backup = BackupCommand()
+    backup.handle(file='/tdpapp/tdrs_db_seed.pg')
+
+class Command(BaseCommand):
+    """Command class."""
+
+    help = "Populate datafiles, records, summaries, and errors for all STTs."
+
+    def handle(self, *args, **options):
+        """Populate datafiles, records, summaries, and errors for all STTs."""
+        for stt in STT.objects.filter(id__in=range(1, 2)):  # .all():
+            for yr in range(2020, 2021):
+                for qtr in [1, 2]:  # , 3, 4]:
+                    files_for_qtr = make_files(stt, yr, qtr)
+                    print(files_for_qtr)
+                    for f in files_for_qtr.keys():
+                        df = files_for_qtr[f]
+                        dfs = DataFileSummaryFactory.build()
+                        dfs.datafile = df
+                        parse_task(df.id, False)
+
+        # dump db in full using `make_seed` func
+        make_seed()