Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/raft-tech/TANF-app into …
Browse files Browse the repository at this point in the history
…3046-plg-cloud
  • Loading branch information
elipe17 committed Oct 9, 2024
2 parents a8cb835 + 542823f commit a7450d6
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,6 @@ cypress.env.json

# Logs
*.log

# DB seeds
tdrs-backend/*.pg
17 changes: 17 additions & 0 deletions Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,23 @@ tasks:
cmds:
- docker-compose -f docker-compose.yml exec web sh -c "python ./manage.py shell"

backend-exec:
desc: Execute a command in the backend container
dir: tdrs-backend
vars:
CMD: '{{.CMD}}'
cmds:
- docker-compose -f docker-compose.yml exec web sh -c "python manage.py {{.CMD}}"

backend-exec-seed-db:
desc: Execute seed_db command in the backend container
dir: tdrs-backend
vars:
CMD: '{{.CMD}}'
cmds:
- docker-compose -f docker-compose.yml up -d
- docker-compose -f docker-compose.yml exec web sh -c "python manage.py populate_stts; python ./manage.py seed_db"

backend-pytest:
desc: 'Run pytest in the backend container E.g: task backend-pytest PYTEST_ARGS="tdpservice/test/ -s -vv"'
dir: tdrs-backend
Expand Down
Empty file.
Empty file.
181 changes: 181 additions & 0 deletions tdrs-backend/tdpservice/parsers/management/commands/seed_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
"""`seed_db` command."""

from django.core.management import BaseCommand
from django.core.files.base import ContentFile
from django.db.utils import IntegrityError
from tdpservice.parsers.schema_defs.header import header
from tdpservice.parsers.schema_defs.trailer import trailer
from tdpservice.parsers.schema_defs.utils import get_schema_options, get_program_models
from tdpservice.parsers.util import fiscal_to_calendar
# all models should be referenced by using the utils.py get_schema_options wrappers
from tdpservice.data_files.models import DataFile
# from tdpservice.parsers import parse
from tdpservice.parsers.test.factories import DataFileSummaryFactory
from tdpservice.scheduling.parser_task import parse as parse_task
from tdpservice.stts.models import STT
from tdpservice.users.models import User
from tdpservice.parsers.row_schema import RowSchema
from faker import Faker
import logging
import random

fake = Faker()
logger = logging.getLogger(__name__)

# https://faker.readthedocs.io/en/stable/providers/baseprovider.html#faker.providers.BaseProvider
# """ class FieldFaker(faker.providers.BaseProvider):..."""

def build_datafile(stt, year, quarter, original_filename, file_name, section, file_data):
"""Build a datafile."""
try:
d = DataFile.objects.create(
user=User.objects.get_or_create(username='system')[0],
stt=stt,
year=year,
quarter=quarter,
original_filename=original_filename,
section=section,
version=random.randint(1, 1993415),
)

d.file.save(file_name, ContentFile(file_data))
except IntegrityError as e:
logger.error(f"Error creating datafile: {e}")
pass
return d


def validValues(schemaMgr, field, year, qtr):
"""Take in a field and returns a line of valid values."""
field_len = field.endIndex - field.startIndex

if field.name == 'RecordType':
return schemaMgr.record_type
if field.name == 'SSN':
# only used by recordtypes 2,3,5
# TODO: reverse the TransformField logic to 'encrypt' a random number
field_format = '?' * field_len
elif field.name in ('RPT_MONTH_YEAR'): # previously had CALENDAR_QUARTER
# given a quarter, set upper lower bounds for month
qtr = qtr[1:]
upper = int(qtr) * 3
lower = upper - 2

month = '{}'.format(random.randint(lower, upper)).zfill(2)
field_format = '{}{}'.format(year, str(month))
else:
if field.friendly_name == 'Family Affiliation':
print('Family Affiliation')
field_format = '#' * field_len
return fake.bothify(text=field_format)


def make_line(schemaMgr, section, year, qtr):
"""Take in a schema manager and returns a line of data."""
line = ''

# for row_schema in schemaMgr.schemas: # this is to handle multi-schema like T6
# if len(schemaMgr.schemas) > 1:
row_schema = schemaMgr.schemas[0]

for field in row_schema.fields:
line += validValues(row_schema, field, year, qtr)
print(f"Field: {field.name}, field length {field.endIndex - field.startIndex} Value: {line}")
return line + '\n'

def make_HT(schemaMgr, prog_type, section, year, quarter, stt):
"""Handle special case of header/trailer lines."""
line = ''

if type(schemaMgr) is RowSchema:
if schemaMgr.record_type == 'HEADER':
# e.g. HEADER20201CAL000TAN1ED

if stt.state is not None: # this is a tribe
my_stt = stt.state
else:
my_stt = stt
state_fips = '{}'.format(my_stt.stt_code).zfill(2)
# state_fips = stt.state.stt_code if stt.state is not None else stt.stt_code
tribe_code = '{}'.format(stt.stt_code) if stt.type == 'tribe' else '000'

line = f"HEADER{year}{quarter[1:]}{section}{state_fips}{tribe_code}{prog_type}1ED"

elif schemaMgr.record_type == 'TRAILER':
line += 'TRAILER' + '1' * 16
else:
print('Invalid record type')
return None

return line + '\n'

def make_files(stt, sub_year, sub_quarter):
"""Given a STT, parameterize calls to build_datafile and make_line."""
sections = stt.filenames.keys()
files_for_quarter = {}

for long_section in sections:
text_dict = get_schema_options("", section=long_section, query='text')
prog_type = text_dict['program_type'] # TAN
section = text_dict['section'] # A
models_in_section = get_program_models(prog_type, section)
temp_file = ''

cal_year, cal_quarter = fiscal_to_calendar(sub_year, 'Q{}'.format(sub_quarter))
temp_file += make_HT(header, prog_type, section, cal_year, cal_quarter, stt)

# iterate over models and generate lines
for _, model in models_in_section.items():
# below is equivalent to 'contains' for the tuple
if any(section in long_section for section in ('Active Case', 'Closed Case', 'Aggregate', 'Stratum')):
for i in range(random.randint(1, 3)):
temp_file += make_line(model, section, cal_year, cal_quarter)
# elif section in ['Aggregate Data', 'Stratum Data']:
# # we should generate a smaller count of lines...maybe leave this as a TODO
# # shouldn't this be based on the active/closed case data?
# pass

# make trailer line
temp_file += make_HT(trailer, prog_type, section, cal_year, cal_quarter, stt)
# print(temp_file)

datafile = build_datafile(
stt=stt,
year=sub_year, # fiscal submission year
quarter=f"Q{sub_quarter}", # fiscal submission quarter
original_filename=f'{stt}-{section}-{sub_year}Q{sub_quarter}.txt',
file_name=f'{stt}-{section}-{sub_year}Q{sub_quarter}',
section=long_section,
file_data=bytes(temp_file.rstrip(), 'utf-8'),
)
datafile.save()
files_for_quarter[section] = datafile

return files_for_quarter

def make_seed():
"""Invoke scheduling/management/commands/backup_db management command."""
from tdpservice.scheduling.management.commands.backup_db import Command as BackupCommand
backup = BackupCommand()
backup.handle(file='/tdpapp/tdrs_db_seed.pg')

class Command(BaseCommand):
"""Command class."""

help = "Populate datafiles, records, summaries, and errors for all STTs."

def handle(self, *args, **options):
"""Populate datafiles, records, summaries, and errors for all STTs."""
for stt in STT.objects.filter(id__in=range(1, 2)): # .all():
for yr in range(2020, 2021):
for qtr in [1, 2]: # , 3, 4]:
files_for_qtr = make_files(stt, yr, qtr)
print(files_for_qtr)
for f in files_for_qtr.keys():
df = files_for_qtr[f]
dfs = DataFileSummaryFactory.build()
dfs.datafile = df
parse_task(df.id, False)

# dump db in full using `make_seed` func
make_seed()

0 comments on commit a7450d6

Please sign in to comment.