diff --git a/.github/workflows/deploy-on-label.yml b/.github/workflows/deploy-on-label.yml index 20fbfd8b3..8d4720f07 100644 --- a/.github/workflows/deploy-on-label.yml +++ b/.github/workflows/deploy-on-label.yml @@ -58,6 +58,17 @@ jobs: STATE=${{ fromJson(steps.get-pr-checks.outputs.data).state }} echo "::set-output name=STATE::$STATE" echo "Current PR state: $STATE" + + - name: Live Environment Badge + uses: schneegans/dynamic-badges-action@v1.7.0 + with: + auth: ${{ secrets.LIVE_ENV_GIST }} + gistID: ded3a260ed8245a5b231ba726b3039df + filename: Live-Environments-${{steps.extract-deploy-env.outputs.DEPLOY_ENV}}.json + label: ${{steps.extract-deploy-env.outputs.DEPLOY_ENV}} + message: "${{ github.head_ref }} - ${{ github.sha }}" + color: blue + - name: Circle CI Deployment Trigger id: curl-circle-ci if: steps.get-pr-state.outputs.STATE == 'success' diff --git a/README.md b/README.md index 424accaa6..4e8241331 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,8 @@ Our vision is to build a new, secure, web-based data reporting system to improve |**Frontend Coverage**| [![Codecov-Frontend-Dev](https://codecov.io/gh/raft-tech/TANF-app/branch/develop/graph/badge.svg?flag=dev-frontend)](https://codecov.io/gh/raft-tech/TANF-app?flag=dev-frontend) | [![Codeco-Frontend-HHS](https://codecov.io/gh/HHS/TANF-app/branch/main/graph/badge.svg?flag=main-frontend)](https://codecov.io/gh/HHS/TANF-app?flag=main-frontend) | [![Codeco-Frontend-HHS](https://codecov.io/gh/HHS/TANF-app/branch/master/graph/badge.svg?flag=master-frontend)](https://codecov.io/gh/HHS/TANF-app?flag=master-frontend) |**Backend Coverage**| [![Codecov-Backend-Dev](https://codecov.io/gh/raft-tech/TANF-app/branch/develop/graph/badge.svg?flag=dev-backend)](https://codecov.io/gh/raft-tech/TANF-app/branch/develop?flag=dev-backend)| [![Codecov-Backend-HHS]( https://codecov.io/gh/HHS/TANF-app/branch/main/graph/badge.svg?flag=main-backend)](https://codecov.io/gh/HHS/TANF-app/branch/main?flag=main-backend) | [![Codecov-Backend-HHS]( https://codecov.io/gh/HHS/TANF-app/branch/master/graph/badge.svg?flag=master-backend)](https://codecov.io/gh/HHS/TANF-app/branch/master?flag=master-backend) +[Link to Current Development Deployments](https://github.com/raft-tech/TANF-app/blob/feat/1860/docs/Technical-Documentation/TDP-environments-README.md#development) + *Due to limitations imposed by Github and occasional slow server response times, some badges may require a page refresh to load.* *TDP is subject to the **[ACF Privacy Policy](https://www.acf.hhs.gov/privacy-policy)** and **[HHS Vulnerability Disclosure Policy](https://www.hhs.gov/vulnerability-disclosure-policy/index.html)***. diff --git a/Taskfile.yml b/Taskfile.yml new file mode 100644 index 000000000..74f3e9c7c --- /dev/null +++ b/Taskfile.yml @@ -0,0 +1,196 @@ +version: '3' + +tasks: + + create-network: + desc: Create the external network + cmds: + - docker network create external-net + + init-backend: + desc: Initialize the backend project + dir: tdrs-backend + cmds: + - docker-compose -f docker-compose.yml up -d --build + - docker-compose -f docker-compose.yml exec web sh -c "python ./manage.py makemigrations" + - docker-compose -f docker-compose.yml exec web sh -c "python ./manage.py migrate" + - docker-compose -f docker-compose.yml down + + drop-db: + desc: Drop the backend database + dir: tdrs-backend + cmds: + - docker-compose -f docker-compose.yml down + - docker volume rm tdrs-backend_postgres_data + + backend-up: + desc: Start backend web server + dir: tdrs-backend + cmds: + - docker-compose -f docker-compose.yml up -d + + backend-down: + desc: Stop backend web server + dir: tdrs-backend + cmds: + - docker-compose -f docker-compose.yml down + + backend-logs: + desc: Show and follow backend web server logs + dir: tdrs-backend + cmds: + - docker-compose -f docker-compose.yml logs -f + + backend-restart: + desc: Restart backend web server + dir: tdrs-backend + cmds: + - docker-compose -f docker-compose.yml restart -d + + backend-bash: + desc: Open a shell in the backend container + dir: tdrs-backend + cmds: + - docker-compose -f docker-compose.yml exec web sh + + backend-shell: + desc: Open a Django shell in the backend container + dir: tdrs-backend + cmds: + - docker-compose -f docker-compose.yml exec web sh -c "python ./manage.py shell" + + backend-pytest: + desc: 'Run pytest in the backend container E.g: task backend-pytest PYTEST_ARGS="tdpservice/test/ -s -vv"' + dir: tdrs-backend + vars: + PYTEST_ARGS: '{{.PYTEST_ARGS | default "."}}' + cmds: + - task backend-up + - docker-compose -f docker-compose.yml exec web sh -c "pytest {{.PYTEST_ARGS}}" + + backend-remove-volumes: + desc: Remove the backend volumes + dir: tdrs-backend + cmds: + - docker-compose -f docker-compose.yml down -v + + backend-lint: + desc: Run flake8 in the backend container + dir: tdrs-backend + cmds: + - docker-compose -f docker-compose.yml exec web sh -c "flake8 . && if [ $? -eq 0 ]; then echo 'Flake8 linter found no issues'; fi" + + backend-pip-lock: + #TODO: Add a task to lock the pip dependencies + desc: Lock the pip dependencies + dir: tdrs-backend + cmds: + - docker-compose -f docker-compose.yml exec web sh -c "pipenv lock" + + psql: + desc: Open a psql shell in the backend container + dir: tdrs-backend + cmds: + - task create-network || true + - docker-compose -f docker-compose.yml up -d postgres + - sleep 5 + - docker-compose -f docker-compose.yml exec postgres sh -c "psql -U tdpuser -d tdrs_test" + + clean: + desc: Remove all containers, networks, and volumes + cmds: + - docker-compose -f tdrs-backend/docker-compose.yml down -v + - docker-compose -f tdrs-frontend/docker-compose.yml down -v + - docker system prune -f -a + + clamav-up: + desc: Start clamav service + dir: tdrs-backend + cmds: + - docker-compose -f docker-compose.yml up -d clamav-rest + + frontend-up: + desc: Start frontend web server + dir: tdrs-frontend + cmds: + - docker-compose -f docker-compose.yml up -d + + frontend-down: + desc: Stop frontend web server + dir: tdrs-frontend + cmds: + - docker-compose -f docker-compose.yml down + + frontend-restart: + desc: Restart frontend web server + dir: tdrs-frontend + cmds: + - docker-compose -f docker-compose.yml restart -d + + frontend-av: + desc: Start frontend with optional clamav service + dir: tdrs-frontend + cmds: + - task: frontend-up + - task: clamav-up + + + # need more work + frontend-init: + desc: Initialize the frontend project + dir: tdrs-frontend + cmds: + - docker-compose -f docker-compose.yml up -d --build + - docker-compose -f docker-compose.yml exec tdp-frontend sh -c "apk add nodejs npm" + - docker-compose -f docker-compose.yml exec tdp-frontend sh -c "npm install" + - docker-compose -f docker-compose.yml down + + frontend-test: + desc: Run frontend tests + dir: tdrs-frontend + cmds: + - docker-compose -f docker-compose.local.yml up tdp-frontend-test -d + - docker-compose -f docker-compose.local.yml exec tdp-frontend-test sh -c "npm run test" + + frontend-test-cov: + desc: Run frontend tests with coverage + dir: tdrs-frontend + cmds: + - docker-compose -f docker-compose.local.yml up tdp-frontend-test -d + - docker-compose -f docker-compose.local.yml exec tdp-frontend-test sh -c "npm run test:cov" + + frontend-lint: + desc: Run eslint in the frontend container + dir: tdrs-frontend + cmds: + - docker-compose -f docker-compose.local.yml up -d tdp-frontend-test --quiet-pull + - docker-compose -f docker-compose.yml exec tdp-frontend-test sh -c "npm run lint" + + frontend-logs: + desc: Show and follow frontend web server logs + dir: tdrs-frontend + cmds: + - docker-compose -f docker-compose.yml logs -f + + frontend-bash: + desc: Open a shell in the frontend container + dir: tdrs-frontend + cmds: + - docker-compose -f docker-compose.yml exec tdp-frontend sh + + up: + desc: Start both frontend and backend web servers + cmds: + - task: backend-up + - task: frontend-up + + down: + desc: Stop both frontend and backend web servers + cmds: + - task: backend-down + - task: frontend-down + + help: + desc: Show this help message + cmds: + - task --list \ No newline at end of file diff --git a/commands.sh b/commands.sh deleted file mode 100644 index 0ecd0fe91..000000000 --- a/commands.sh +++ /dev/null @@ -1,237 +0,0 @@ -#!/usr/bin/sh -# You will need to set this variable to match your local directory structure -# TDRS_HOME="$HOME/Where/Ever/You/Want/TANF-app" - -# navigate terminal to tdrs home if $TDRS_HOME is set -alias cd-tdrs='cd "$TDRS_HOME"' - -# navigate terminal to tdrs frontend if $TDRS_HOME is set -alias cd-tdrs-frontend='cd "$TDRS_HOME/tdrs-frontend"' - -# navigate terminal to tdrs backend if $TDRS_HOME is set -alias cd-tdrs-backend='cd "$TDRS_HOME/tdrs-backend"' - -# shortcut for applying all relavent compose files for local development -# I.E. `cd-tdrs-frontend && tdrs-compose-local up` -alias tdrs-compose-local='docker-compose -f docker-compose.local.yml' - -# Stop tdrs backend entirely, then start it up again -alias tdrs-backend-hard-restart='tdrs-stop-backend && tdrs-start-backend' - -# shortcut for running bash commands in backend container -alias tdrs-backend-exec='tdrs-compose-backend exec web /bin/bash' - -# Open shell_plus for django backend inside of container -alias tdrs-django-shell='tdrs-compose-backend run --rm web bash -c "python manage.py shell_plus"' - -# start both the frontend and backend -alias tdrs-start='tdrs-start-backend && tdrs-start-frontend' - -# Stop both the frontend and the backend -alias tdrs-stop='tdrs-stop-frontend && tdrs-stop-backend' - -# Restart frontend and backend -alias tdrs-restart='tdrs-restart-backend && tdrs-restart-frontend' - -# start all backend containers -alias tdrs-start-backend='tdrs-compose-backend up -d' - -# run npm install updating all dependencies and start the dev server -alias tdrs-start-frontend='tdrs-compose-frontend up -d' - -# Stop all containers for the backend -alias tdrs-stop-backend='tdrs-compose-backend down' - -# stop the frontend development server -alias tdrs-stop-frontend='tdrs-compose-frontend down' - -# restart the frontends, mainly to rebuild dependencies -alias tdrs-restart-frontend='tdrs-compose-frontend restart' - -# restart all containers for the backend -alias tdrs-restart-backend='tdrs-compose-backend restart' - -# to restart just django, keeping the other containers intact. -alias tdrs-restart-django='tdrs-compose-backend restart web' - -# starts containers with the optional clamav image -alias tdrs-start-av='tdrs-start-frontend --remove-orphans && cd-tdrs-backend && tdrs-compose-local up -d --remove-orphans && docker-compose up -d clamav-rest && cd ..' - -# Run frontend unit tests through jest -alias tdrs-run-jest='tdrs-npm-run test' - -# Run frontend unit tests through jest with coverage report -alias tdrs-run-jest-cov='tdrs-npm-run test:cov' - -# run any new migrations for django backend -alias tdrs-run-migrations='tdrs-compose-backend run web python manage.py migrate' - -# Generate new migrations from changes to models for django backend -alias tdrs-make-migrations='tdrs-compose-backend run --rm web python manage.py makemigrations' - -# Nuke all non running docker data -alias tdrs-prune-all-docker-data='docker system prune -a && docker system prune --volumes' - -# Run eslint against frontend source from frontend container -alias tdrs-lint-frontend='tdrs-npm-run lint' - -# Opens up logs for backend -alias tdrs-logs-backend='docker logs tdrs-backend-web-1 -f' - -# Update backend lockfile -alias tdrs-piplock="tdrs-compose-backend run --rm web pipenv lock" - -# I had to deal with the following issue: -# https://stackoverflow.com/questions/27093746/django-stops-working-with-runtimeerror-populate-isnt-reentrant -# So let's create an alias to ssh into container and sed the file -alias tdrs-fix-django-populate='tdrs-backend-exec && sed -i "s/raise Runtime..populate.. isn.t reentrant../self.app_configs = {}/g" /usr/local/lib/python3.10/site-packages/django/apps/registry.py' - -# A recurring pattern I was doing to get a fresh setup -alias tdrs-fresh-start='tdrs-stop && docker system prune --volumes && tdrs-start' - -# Deploy current branch to the given environment -# See comments in main function for to set up Circle CI token -alias tdrs-deploy='tdrs-run-deploy' - -# run flake8 against backend source from inside of web container -tdrs-lint-backend() { - tdrs-compose-backend run --rm web bash -c "flake8 . && if [ $? -eq 0 ]; then echo 'Flake8 linter found no issues'; fi" -} - -# create docker network for tdrs if it doesn't exist - tdrs-docker-net() { - docker network inspect external-net >/dev/null 2>&1 \ - || docker network create external-net - } - -# short cut for running compose sub commands on backend -tdrs-compose-backend() { - cd-tdrs - tdrs-docker-net - cd tdrs-backend && tdrs-compose-local $@ - cd .. -} - -# short cut for running compose sub commands on backend -tdrs-compose-frontend() { - cd-tdrs - cd tdrs-frontend && tdrs-compose-local $@ - cd .. -} - -# Stop the backend if its running and rebuild the docker container for django -tdrs-rebuild-backend() { - cd-tdrs - tdrs-stop-backend - cd tdrs-backend && tdrs-compose-local up --build -d web - cd .. -} - -# Fix all automatically fixable linting errors for the frontend -tdrs-fix-lint-frontend() { - cd-tdrs-frontend - eslint --fix ./src - cd .. -} - -# Shortcut for running npm scripts for the frontend -tdrs-npm-run() { - cd-tdrs - cd tdrs-frontend/ && npm run $@ - cd .. -} - -# Run pa11y tests on frontend -tdrs-run-pa11y() { - cd tdrs-frontend; mkdir pa11y-screenshots/; npm run test:accessibility - cd .. -} - - -# Spin up backend services and run pytest in docker -tdrs-run-pytest () { - - cd-tdrs - tdrs-start-av - cd tdrs-backend/ - - # to escape quoted arguements that would be passed to docker inside of a quote - if [ "$#" -lt 1 ]; then - quoted_args="" - else - quoted_args="$(printf " %q" "${@}")" - fi - tdrs-compose-local run --rm web bash -c "./wait_for_services.sh && pytest ${quoted_args}" - cd .. -} - - -# Run owasp scan for backend assuming circle ci environment -tdrs-run-backend-owasp() { - if [[ $(docker network inspect external-net 2>&1 | grep -c Scope) == 0 ]]; then - docker network create external-net - fi - cd-tdrs-backend - - # We don't need to use the local compose file - # because we are trying to simulate a production environment - - docker-compose up -d --build - docker-compose run --rm zaproxy bash -c \ - "PATH=$PATH:/home/zap/.local/bin && - pip install wait-for-it && - wait-for-it --service http://web:8080 \ - --timeout 60 \ - -- echo \"Django is ready\"" - cd .. - cd-tdrs-frontend - docker-compose up -d --build - cd .. - ./scripts/zap-scanner.sh backend circle -} - -# Run owasp scan for frontend assuming circle ci environment -tdrs-run-frontend-owasp() { - if [[ $(docker network inspect external-net 2>&1 | grep -c Scope) == 0 ]]; then - docker network create external-net - fi - cd-tdrs-backend - - # We don't need to use the local compose file - # because we are trying to simulate a production environment - - docker-compose up -d --build - docker-compose run --rm zaproxy bash -c \ - "PATH=$PATH:/home/zap/.local/bin && - pip install wait-for-it && - wait-for-it --service http://web:8080 \ - --timeout 60 \ - -- echo \"Django is ready\"" - cd .. - cd-tdrs-frontend - docker-compose up -d --build - cd .. - ./scripts/zap-scanner.sh frontend circle -} - -tdrs-run-deploy() { - # Circle CI token can be generated here: https://app.circleci.com/settings/user/tokens - # Once generated add it to your shell profile as CIRCLE_CI_TOKEN - # Use like: tdrs-deploy sandbox - TARGET_ENV=$1 - BRANCH=$(git rev-parse --abbrev-ref HEAD) - - echo "Deploying branch $BRANCH to $TARGET_ENV" - - curl --request POST \ - --url https://circleci.com/api/v2/project/github/raft-tech/TANF-app/pipeline \ - --header 'Circle-Token: '$CIRCLE_CI_TOKEN \ - --header 'content-type: application/json' \ - --data '{"parameters":{"triggered": true, "run_dev_deployment": true, "target_env":"'$TARGET_ENV'"}, "branch":"'$BRANCH'"}' -} - -# List all aliases and functions associated with tdrs -alias tdrs-functions='declare -F|grep tdrs && alias|grep tdrs|cut -d" " -f1 --complement' - -# Get logs on backend -alias tdrs-backend-log="docker logs $(docker ps|grep web|awk '{print $1}')" diff --git a/docs/Security-Compliance/Security-Controls/cm-7-2/images/clamavcm7.PNG b/docs/Security-Compliance/Security-Controls/cm-7-2/images/clamavcm7.PNG index 7dd04cadd..d3a599839 100644 Binary files a/docs/Security-Compliance/Security-Controls/cm-7-2/images/clamavcm7.PNG and b/docs/Security-Compliance/Security-Controls/cm-7-2/images/clamavcm7.PNG differ diff --git a/docs/Technical-Documentation/TDP-environments-README.md b/docs/Technical-Documentation/TDP-environments-README.md index 162ac7275..214c5183a 100644 --- a/docs/Technical-Documentation/TDP-environments-README.md +++ b/docs/Technical-Documentation/TDP-environments-README.md @@ -2,11 +2,18 @@ ## Development -| Dev Site | Frontend URL | Backend URL | Purpose | -| -------- | -------- | -------- | -------- | -| A11y | https://tdp-frontend-a11y.app.cloud.gov | https://tdp-frontend-a11y.app.cloud.gov/admin/ | Space for accessibility testing | -| QASP | https://tdp-frontend-qasp.app.cloud.gov | https://tdp-frontend-qasp.app.cloud.gov/admin/ | Space for QASP review | -| raft | https://tdp-frontend-raft.app.cloud.gov | https://tdp-frontend-raft.app.cloud.gov/admin/ | Space for raft review | +| Dev Site | Frontend URL | Backend URL | Branch/Commit | Purpose | +| -------- | -------- | -------- | -------- | -------- | +| A11y | https://tdp-frontend-a11y.app.cloud.gov | https://tdp-frontend-a11y.app.cloud.gov/admin/ | | Space for accessibility testing | +| QASP | https://tdp-frontend-qasp.app.cloud.gov | https://tdp-frontend-qasp.app.cloud.gov/admin/ | | Space for QASP review | +| raft | https://tdp-frontend-raft.app.cloud.gov | https://tdp-frontend-raft.app.cloud.gov/admin/ | + | Space for raft review | + +![badge](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/andrew-jameson/ded3a260ed8245a5b231ba726b3039df/raw/Live-Environments-raft.json) + +![badge](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/andrew-jameson/ded3a260ed8245a5b231ba726b3039df/raw/Live-Environments-a11y.json) + +![badge](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/andrew-jameson/ded3a260ed8245a5b231ba726b3039df/raw/Live-Environments-qasp.json) ### Dependencies diff --git a/docs/Technical-Documentation/clean-and-reparse.md b/docs/Technical-Documentation/clean-and-reparse.md new file mode 100644 index 000000000..92175ab02 --- /dev/null +++ b/docs/Technical-Documentation/clean-and-reparse.md @@ -0,0 +1,233 @@ +# Clean and Re-parse DataFiles + +## Background +As TDP has evolved so has it's validation mechanisms, messages, and expansiveness. As such, many of the datafiles locked in the database and S3 +have not undergone TDP's latest and most stringent validation processes. Because data quality is so important to all TDP stakeholders +we wanted to introduce a way to re-parse and subsequently re-validate datafiles that have already been submitted to TDP to enhance the integrity +and the quality of the submissions. The following lays out the process TDP takes to automate and execute this process, and how this process can +be tested locally and in our deployed environments. + +# Clean and Re-parse Flow +As a safety measure, this process must ALWAYS be executed manually by a system administrator. Once executed, all processes thereafter are completely +automated. The steps below outline how this process executes. + +1. System admin logs in to the appropriate backend application. E.g. `tdp-backend-raft`. + - See [OFA Admin Backend App Login](#OFA-Admin-Backend-App-Login) instructions below +2. System admin executes the `clean_and_reparse` Django command. E.g `python manage.py clean_and_reparse ...options`. +4. System admin validates the command is selecting the appropriate set of datafiles to reparse and executes the command. +4. `clean_and_reparse` collects the appropriate datafiles that match the system admin's command choices. +5. `clean_and_reparse` executes a backup of the Postgres database. +6. `clean_and_reparse` creates/deletes appropriate Elastic indices pending the system admin's command choices. +7. `clean_and_reparse` deletes documents from appropriate Elastic indices pending the system admin's command choices. +8. `clean_and_reparse` deletes all Postgres rows associated to all selected datafiles. +9. `clean_and_reparse` deletes `DataFileSummary` and `ParserError` objects associated with the selected datafiles. +10. `clean_and_reparse` re-saves the selected datafiles to the database. +11. `clean_and_reparse` pushes a new `parser_task` onto the Redis queue for each of the selected datafiles. + +## Local Clean and Re-parse +Make sure you have submitted a few datafiles, ideally accross program types and fiscal timeframes. + +1. Browse the [indices](http://localhost:9200/_cat/indices/?pretty&v&s=index) and the DAC and verify the indices reflect the document counts you expect and the DAC reflects the record counts you expect. +2. Exec into the backend container. +3. Execute `python manage.py clean_and_reparse -h` to get an idea of what options you might want to specify. +4. Execute the `clean_and_reparse` command with your selected options. +5. Verify in the above URL that Elastic is consistent with the options you selected. +6. Verify the DAC has the same amount of records as in step 1. + +### Local Examples +This section assumes that you have submitted the following files: `ADS.E2J.FTP1.TS06`, `cat_4_edge_case.txt`, and `small_ssp_section1.txt`. After submitting, your indices should match the indices below: +``` +index docs.count +.kibana_1 1 +dev_ssp_m1_submissions 5 +dev_ssp_m2_submissions 6 +dev_ssp_m3_submissions 8 +dev_tanf_t1_submissions 817 +dev_tanf_t2_submissions 884 +dev_tanf_t3_submissions 1380 +``` +All tests are considered to have been run INDEPENDENTLY. For each test, your Elastic and DAC state should match the initial conditions above. The commands in the section below should be run in between each test if you want to match the expected output. + +#### Some Useful Commands to Reset Elastic State +The commands should ALWAYS be executed in the order they appear below. +1. curl -X DELETE 'http://localhost:9200/dev*' +2. python manage.py search_index --rebuild + +#### Clean and Re-parse All with New Indices and Keeping Old Indices +1. Execute `python manage.py clean_and_reparse -a -n` + - If this is the first time you're executing a command with new indices, because we have to create an alias in Elastic with the same name as the + original index i.e. (`dev_tanf_t1_submissions`), the old indices no matter whether you specified `-d` or not will be deleted. From thereafter, + the command will always respect the `-d` switch. +2. Expected Elastic results. + - If this is the first time you have ran the command the [indices](http://localhost:9200/_cat/indices/?pretty&v&s=index) url should reflect 21 indices prefixed with `dev` and they should contain the same number of documents as the original indices did. The new indices will also have a datetime suffix indicating when the re-parse occurred. + - If this is the second time running this command the [indices](http://localhost:9200/_cat/indices/?pretty&v&s=index) url should reflect 42 indices prefixed with `dev` and they should each contain the same number of documents as the original indices did. The latest indices will have a new datetime suffix delineating them from the other indices. +3. Expected DAC results. + - The DAC record counts should be exactly the same no matter how many times the command is run. + - The primary key for all reparsed datafiles should no longer be the same. + - `ParserError` and `DataFileSummary` objects should be consistent with the file. + +#### Clean and Re-parse All with New Indices and Deleting Old Indices +1. Execute `python manage.py clean_and_reparse -a -n -d` +2. The expected results for this command will be exactly the same as above. The only difference is that no matter how many times you execute this command, you should only see 21 indices in Elastic with the `dev` prefix. + +#### Clean and Re-parse All with Same Indices +1. Execute `python manage.py clean_and_reparse -a` +2. The expected results for this command will match the initial result from above. + +``` +health status index uuid pri rep docs.count docs.deleted store.size pri.store.size +green open .kibana_1 VKeA-BPcSQmJJl_AbZr8gQ 1 0 1 0 4.9kb 4.9kb +yellow open dev_ssp_m1_submissions mDIiQxJrRdq0z7W9H_QUYg 1 1 5 0 24kb 24kb +yellow open dev_ssp_m2_submissions OUrgAN1XRKOJgJHwr4xm7w 1 1 6 0 33.6kb 33.6kb +yellow open dev_ssp_m3_submissions 60fCBXHGTMK31MyWw4t2gQ 1 1 8 0 32.4kb 32.4kb +yellow open dev_tanf_t1_submissions 19f_lawWQKSeuwejo2Qgvw 1 1 817 0 288.2kb 288.2kb +yellow open dev_tanf_t2_submissions dPj2BdNtSJyAxCqnMaV2aw 1 1 884 0 414.4kb 414.4kb +yellow open dev_tanf_t3_submissions e7bEl0AURPmcZ5kiFwclcA 1 1 1380 0 355.2kb 355.2kb +``` + +#### Clean and Re-parse FY 2024 New Indices and Keep Old Indices +1. Execute `python manage.py clean_and_reparse -y 2024 -n` +2. The expected results here are much different with respect to Elastic. Again, Postgres is the ground truth and it's counts should never change. Because this is the first time we execute this command and therfore are creating our Elastic aliases the result returned from the [indices](http://localhost:9200/_cat/indices/?pretty&v&s=index) url might be confusing. See below. + +``` +index docs.count +.kibana_1 2 +dev_ssp_m1_submissions_2024-07-05_17.26.26 5 +dev_ssp_m2_submissions_2024-07-05_17.26.26 6 +dev_ssp_m3_submissions_2024-07-05_17.26.26 8 +dev_tanf_t1_submissions_2024-07-05_17.26.26 2 +dev_tanf_t2_submissions_2024-07-05_17.26.26 2 +dev_tanf_t3_submissions_2024-07-05_17.26.26 4 +``` + +- While the DAC reports the correct number of records for all submitted types, Elastic does not. This is because we only reparsed a subset of the entire collection of datafiles for the first time we executed the `clean_and_reparse` command. Therefore, Elastic only has documents for the subset of resubmitted files. If we had already executed the command: `python manage.py clean_and_reparse -a -n` and then executed `python manage.py clean_and_reparse -y 2024 -n`, we would see what you might have initially expected to see. + +``` +index docs.count +.kibana_1 2 +dev_ssp_m1_submissions_2024-07-05_17.34.34 5 +dev_ssp_m1_submissions_2024-07-05_17.35.26 5 +dev_ssp_m2_submissions_2024-07-05_17.34.34 6 +dev_ssp_m2_submissions_2024-07-05_17.35.26 6 +dev_ssp_m3_submissions_2024-07-05_17.34.34 8 +dev_ssp_m3_submissions_2024-07-05_17.35.26 8 +dev_tanf_t1_submissions_2024-07-05_17.34.34 817 +dev_tanf_t1_submissions_2024-07-05_17.35.26 2 +dev_tanf_t2_submissions_2024-07-05_17.34.34 884 +dev_tanf_t2_submissions_2024-07-05_17.35.26 2 +dev_tanf_t3_submissions_2024-07-05_17.34.34 1380 +dev_tanf_t3_submissions_2024-07-05_17.35.26 4 +``` + +## Cloud.gov Examples +Running the `clean_and_reparse` command in a Cloud.gov environment will require the executor to do some exploratory data analysis for the environment to verify things are running correctly. With that said, the logic and general expected results for the local example commands above will be a one to one match with same command executed in Cloud.gov. Below are the general steps a system admin will follow to execute a desired command and also verify the results of the command. + +1. System admin logs in to the appropriate backend application. E.g. `tdp-backend-raft`. +2. System admin has the DAC open and verifies the counts of records, and other models before executing command. +3. System admin logs into the environments Elastic proxy. E.g. `cf ssh tdp-elastic-proxy-dev`. +4. System admin queries the indices for their counts from the Elastic proxy: `curl http://localhost:8080/_cat/indices/?pretty&v&s=index` +5. System admin executes the `clean_and_reparse` Django command from the backend app. E.g `python manage.py clean_and_reparse -a -n`. +6. System admin verifies the DAC is consistent and the Elastic indices match their expectations. + +## OFA Admin Backend App Login + +### 0. Disconnect from VPN. + +### 1. Authenticate with Cloud.gov +API endpoint: api.fr.cloud.gov +```bash +$ cf login -a api.fr.cloud.gov --sso + +Temporary Authentication Code ( Get one at https://login.fr.cloud.gov/passcode ): + +Authenticating... +OK + + +Select an org: +1. hhs-acf-ofa +2. sandbox-hhs + +Org (enter to skip): 1 +1 +Targeted org hhs-acf-ofa. + +Select a space: +1. tanf-dev +2. tanf-prod +3. tanf-staging + +Space (enter to skip): 1 +1 +Targeted space tanf-dev. + +API endpoint: https://api.fr.cloud.gov +API version: 3.170.0 +user: +org: hhs-acf-ofa +space: tanf-dev +``` + +### 2. SSH into Backend App +1. Get the app GUID + ```bash + $ cf curl v3/apps/$(cf app tdp-backend-qasp --guid)/processes | jq --raw-output '.resources | .[]? | select(.type == "web").guid' + + + ``` + +2. Get the SSH code + ```bash + $ cf ssh-code + + + ``` + +3. SSH into the App + ```bash + $ ssh -p 2222 cf:/0@ssh.fr.cloud.gov + + The authenticity of host '[ssh.fr.cloud.gov]:2222 ([2620:108:d00f::fcd:e8d8]:2222)' can't be established. + RSA key fingerprint is . + This key is not known by any other names + Please type 'yes', 'no' or the fingerprint: yes + Could not create directory '/u/.ssh' (No such file or directory). + Failed to add the host to the list of known hosts (/u/.ssh/known_hosts). + cf:/0@ssh.fr.cloud.gov's password: + ``` + +### 3. Activate Interactive Shell +```bash +$ /tmp/lifecycle/shell +``` + +### 4. Display Help for Re-parse Command +```bash +$ python manage.py clean_and_reparse -h + +usage: manage.py clean_and_parse [-h] [-q {Q1,Q2,Q3,Q4}] [-y FISCAL_YEAR] [-a] [-n] [-d] [--configuration CONFIGURATION] [--version] [-v {0,1,2,3}] [--settings SETTINGS] [--pythonpath PYTHONPATH] [--traceback] [--no-color] [--force-color] [--skip-checks] + +Delete and re-parse a set of datafiles. All re-parsed data will be moved into a new set of Elastic indexes. + +options: + -h, --help show this help message and exit + -q {Q1,Q2,Q3,Q4}, --fiscal_quarter {Q1,Q2,Q3,Q4} + Re-parse all files in the fiscal quarter, e.g. Q1. + -y FISCAL_YEAR, --fiscal_year FISCAL_YEAR + Re-parse all files in the fiscal year, e.g. 2021. + -a, --all Clean and re-parse all datafiles. If selected, fiscal_year/quarter aren't necessary. + -n, --new_indices Move re-parsed data to new Elastic indices. + -d, --delete_indices Requires new_indices. Delete the current Elastic indices. + --configuration CONFIGURATION + The name of the configuration class to load, e.g. "Development". If this isn't provided, the DJANGO_CONFIGURATION environment variable will be used. + --version show program's version number and exit + -v {0,1,2,3}, --verbosity {0,1,2,3} + Verbosity level; 0=minimal output, 1=normal output, 2=verbose output, 3=very verbose output + --settings SETTINGS The Python path to a settings module, e.g. "myproject.settings.main". If this isn't provided, the DJANGO_SETTINGS_MODULE environment variable will be used. + --pythonpath PYTHONPATH + A directory to add to the Python path, e.g. "/home/djangoprojects/myproject". + --traceback Raise on CommandError exceptions + --no-color Don't colorize the command output. + --force-color Force colorization of the command output. + --skip-checks Skip system checks. +``` \ No newline at end of file diff --git a/scripts/deploy-frontend.sh b/scripts/deploy-frontend.sh index 07d47e980..cbdd32245 100755 --- a/scripts/deploy-frontend.sh +++ b/scripts/deploy-frontend.sh @@ -57,6 +57,7 @@ update_frontend() unlink .env.production mkdir deployment + cp -r build deployment/public cp nginx/cloud.gov/buildpack.nginx.conf deployment/nginx.conf cp nginx/cloud.gov/locations.conf deployment/locations.conf diff --git a/tdrs-backend/Dockerfile b/tdrs-backend/Dockerfile index dcf0178d2..f09622854 100644 --- a/tdrs-backend/Dockerfile +++ b/tdrs-backend/Dockerfile @@ -14,10 +14,13 @@ WORKDIR /tdpapp/ RUN apt-get -y update # Upgrade already installed packages: RUN apt-get -y upgrade -# Install a new package: -RUN apt-get install -y gcc && apt-get install -y graphviz && apt-get install -y graphviz-dev -RUN apt-get install postgresql-client -y -RUN apt-get install -y libpq-dev python3-dev +# Postgres client setup +RUN apt install -y postgresql-common curl ca-certificates && install -d /usr/share/postgresql-common/pgdg && \ +curl -o /usr/share/postgresql-common/pgdg/apt.postgresql.org.asc --fail https://www.postgresql.org/media/keys/ACCC4CF8.asc && \ +sh -c 'echo "deb [signed-by=/usr/share/postgresql-common/pgdg/apt.postgresql.org.asc] https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list' && \ +apt -y update && apt install postgresql-client-15 -y +# Install packages: +RUN apt install -y gcc graphviz graphviz-dev libpq-dev python3-dev # Install pipenv RUN pip install --upgrade pip pipenv RUN pipenv install --dev --system --deploy diff --git a/tdrs-backend/README.md b/tdrs-backend/README.md index 2cb44b007..83a7d2f60 100644 --- a/tdrs-backend/README.md +++ b/tdrs-backend/README.md @@ -8,6 +8,7 @@ Backend API Service for TDP. Deployed to Cloud.gov at https://tdp-backend.app.cl - [Login.gov Account](https://login.gov/) - [Cloud.gov Account](https://cloud.gov/) - [Cloud Foundry CLI](https://docs.cloudfoundry.org/cf-cli/install-go-cli.html) +- [Task file](https://taskfile.dev/installation/) # Contents @@ -25,6 +26,8 @@ This project uses a Pipfile for dependency management. **Commands are to be executed from within the `tdrs-backend` directory** +Note: first step is to install Taskfile + 1.) Configure your local environment by copying over the .env.example file ```bash $ cp .env.example .env @@ -41,7 +44,7 @@ with the email you use to login to [login.gov](https://login.gov) ```bash # Merge in local overrides for docker-compose by using -f flag and specifying both # This allows environment variables to be passed in from .env files locally. -$ docker-compose -f docker-compose.yml -f docker-compose.local.yml up --build -d +$ task backend-up ``` This command will start the following containers: @@ -59,7 +62,7 @@ a64c18db30ed localstack/localstack:0.12.9 "docker-entrypoint.sh" 2 hour 6.) To `exec` into the PostgreSQL database in the container. ```bash -$ docker exec -it tdrs-backend_postgres_1 psql -U tdpuser -d tdrs_test +$ task psql ``` 7.) For configuration of a superuser for admin tasks please refer to the [user_role_management.md](../docs/user_role_management.md) guide. @@ -67,13 +70,19 @@ $ docker exec -it tdrs-backend_postgres_1 psql -U tdpuser -d tdrs_test 8.) Backend project tear down: ```bash - $ docker-compose down --remove-orphans + $ task backend-down ``` 9.) The `postgres` and `localstack` containers use [Docker Named Volumes](https://spin.atomicobject.com/2019/07/11/docker-volumes-explained/) to persist container data between tear down and restart of containers. To clear all stored data and reset to an initial state, pass the `-v` flag when tearing down the containers: ```bash - $ docker-compose down -v + $ task backend-remove-volume +``` + +10.) To remove all volumes, containers and images, we can run the following command. Note that this will remove all containers including containers outside of this project. + +```bash +$ task clean ``` ---- @@ -116,13 +125,13 @@ s3_client.generate_presigned_url(**params) 1. Run local unit tests by executing the following command. ```bash -$ docker-compose run --rm web bash -c "./wait_for_services.sh && pytest" +$ task backend-pytest ``` 2. Run local linting tests by executing the following command: ```bash -$ docker-compose run --rm web bash -c "flake8 ." +$ task backend-lint ``` The [flake8](https://flake8.pycqa.org/en/latest/) linter is configured to check the formatting of the source against this [setup.cfg](./setup.cfg#L20-L34) file. diff --git a/tdrs-backend/docker-compose.local.yml b/tdrs-backend/docker-compose.local.yml index 2de355c9c..89cc54f96 100644 --- a/tdrs-backend/docker-compose.local.yml +++ b/tdrs-backend/docker-compose.local.yml @@ -3,7 +3,7 @@ version: "3.4" services: postgres: - image: postgres:11.6 + image: postgres:15.7 environment: - PGDATA=/var/lib/postgresql/data/ - POSTGRES_DB=tdrs_test diff --git a/tdrs-backend/gunicorn_start.sh b/tdrs-backend/gunicorn_start.sh index 02108ef9f..9224f9de3 100755 --- a/tdrs-backend/gunicorn_start.sh +++ b/tdrs-backend/gunicorn_start.sh @@ -12,16 +12,17 @@ else fi # Collect static files. This is needed for swagger to work in local environment -if [[ $DISABLE_COLLECTSTATIC ]]; then +if [[ $DISABLE_COLLECTSTATIC ]]; then echo "DISABLE_COLLECTSTATIC is set to true, skipping collectstatic" else echo "Collecting static files" python manage.py collectstatic --noinput fi - -celery -A tdpservice.settings worker -c 1 & +# Celery worker config can be found here: https://docs.celeryq.dev/en/stable/userguide/workers.html#:~:text=The-,hostname,-argument%20can%20expand +celery -A tdpservice.settings worker --loglevel=WARNING --concurrency=1 -n worker1@%h & sleep 5 + # TODO: Uncomment the following line to add flower service when memory limitation is resolved celery -A tdpservice.settings --broker=$REDIS_URI flower & celery -A tdpservice.settings beat -l info --scheduler django_celery_beat.schedulers:DatabaseScheduler & diff --git a/tdrs-backend/tdpservice/parsers/parse.py b/tdrs-backend/tdpservice/parsers/parse.py index b60aad1cf..6b1f1a338 100644 --- a/tdrs-backend/tdpservice/parsers/parse.py +++ b/tdrs-backend/tdpservice/parsers/parse.py @@ -2,7 +2,6 @@ from django.conf import settings -from django.db import DatabaseError from django.contrib.admin.models import LogEntry, ADDITION from django.contrib.contenttypes.models import ContentType import itertools @@ -12,7 +11,6 @@ from . import row_schema from .schema_defs.utils import get_section_reference, get_program_model from .case_consistency_validator import CaseConsistencyValidator -from elasticsearch.helpers.errors import BulkIndexError from elasticsearch.exceptions import ElasticsearchException from tdpservice.data_files.models import DataFile @@ -108,42 +106,40 @@ def bulk_create_records(unsaved_records, line_number, header_count, datafile, df batch_size = settings.BULK_CREATE_BATCH_SIZE if (line_number % batch_size == 0 and header_count > 0) or flush: logger.debug("Bulk creating records.") - try: - num_db_records_created = 0 - num_expected_db_records = 0 - num_elastic_records_created = 0 - for document, records in unsaved_records.items(): + num_db_records_created = 0 + num_expected_db_records = 0 + num_elastic_records_created = 0 + for document, records in unsaved_records.items(): + try: num_expected_db_records += len(records) created_objs = document.Django.model.objects.bulk_create(records) num_db_records_created += len(created_objs) - - try: - num_elastic_records_created += document.update(created_objs)[0] - except BulkIndexError as e: - logger.error(f"Encountered error while indexing datafile documents: {e}") - LogEntry.objects.log_action( - user_id=datafile.user.pk, - content_type_id=ContentType.objects.get_for_model(DataFile).pk, - object_id=datafile, - object_repr=f"Datafile id: {datafile.pk}; year: {datafile.year}, quarter: {datafile.quarter}", - action_flag=ADDITION, - change_message=f"Encountered error while indexing datafile documents: {e}", - ) - continue - - dfs.total_number_of_records_created += num_db_records_created - if num_db_records_created != num_expected_db_records: - logger.error(f"Bulk Django record creation only created {num_db_records_created}/" + - f"{num_expected_db_records}!") - elif num_elastic_records_created != num_expected_db_records: - logger.error(f"Bulk Elastic document creation only created {num_elastic_records_created}/" + - f"{num_expected_db_records}!") - else: - logger.info(f"Created {num_db_records_created}/{num_expected_db_records} records.") - return num_db_records_created == num_expected_db_records, {} - except DatabaseError as e: - logger.error(f"Encountered error while creating datafile records: {e}") - return False + num_elastic_records_created += document.update(created_objs)[0] + except ElasticsearchException as e: + logger.error(f"Encountered error while indexing datafile documents: {e}") + LogEntry.objects.log_action( + user_id=datafile.user.pk, + content_type_id=ContentType.objects.get_for_model(DataFile).pk, + object_id=datafile, + object_repr=f"Datafile id: {datafile.pk}; year: {datafile.year}, quarter: {datafile.quarter}", + action_flag=ADDITION, + change_message=f"Encountered error while indexing datafile documents: {e}", + ) + continue + except Exception as e: + logger.error(f"Encountered error while creating datafile records: {e}") + return False + + dfs.total_number_of_records_created += num_db_records_created + if num_db_records_created != num_expected_db_records: + logger.error(f"Bulk Django record creation only created {num_db_records_created}/" + + f"{num_expected_db_records}!") + elif num_elastic_records_created != num_expected_db_records: + logger.error(f"Bulk Elastic document creation only created {num_elastic_records_created}/" + + f"{num_expected_db_records}!") + else: + logger.info(f"Created {num_db_records_created}/{num_expected_db_records} records.") + return num_db_records_created == num_expected_db_records return False def bulk_create_errors(unsaved_parser_errors, num_errors, batch_size=5000, flush=False): diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/header.py b/tdrs-backend/tdpservice/parsers/schema_defs/header.py index 9738c43ee..67475fd5f 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/header.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/header.py @@ -122,7 +122,11 @@ startIndex=22, endIndex=23, required=True, - validators=[validators.matches("D")], + validators=[validators.matches("D", + error_func=lambda eargs: ("HEADER Update Indicator must be set to D " + f"instead of {eargs.value}. Please review " + "Exporting Complete Data Using FTANF in the " + "Knowledge Center."))], ), ], ) diff --git a/tdrs-backend/tdpservice/parsers/test/test_header.py b/tdrs-backend/tdpservice/parsers/test/test_header.py index 78ffdfa21..18079bc68 100644 --- a/tdrs-backend/tdpservice/parsers/test/test_header.py +++ b/tdrs-backend/tdpservice/parsers/test/test_header.py @@ -37,3 +37,37 @@ def test_header_cleanup(test_datafile): assert header_is_valid assert header_errors == [] + +@pytest.mark.parametrize("header_line, is_valid, error", [ + # Title error + (" 20204A06 TAN1ED", False, "Your file does not begin with a HEADER record."), + # quarter error + ("HEADER20205A06 TAN1ED", False, "HEADER Item 5 (quarter): 5 is not in [1, 2, 3, 4]."), + # Type error + ("HEADER20204E06 TAN1ED", False, "HEADER Item 6 (type): E is not in [A, C, G, S]."), + # Fips error + ("HEADER20204A07 TAN1ED", False, ("HEADER Item 1 (state fips): 07 is not in [00, 01, 02, 04, 05, 06, 08, 09, " + "10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, " + "30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, " + "50, 51, 53, 54, 55, 56, 66, 72, 78].")), + # Tribe error + ("HEADER20204A00 -1TAN1ED", False, "HEADER Item 3 (tribe code): -1 is not in range [0, 999]."), + # Program type error + ("HEADER20204A06 BAD1ED", False, "HEADER Item 7 (program type): BAD is not in [TAN, SSP]."), + # Edit error + ("HEADER20204A06 TAN3ED", False, "HEADER Item 8 (edit): 3 is not in [1, 2]."), + # Encryption error + ("HEADER20204A06 TAN1AD", False, "HEADER Item 9 (encryption): A is not in [ , E]."), + # Update error + ("HEADER20204A06 TAN1EA", False, ("HEADER Update Indicator must be set to D instead of A. Please review " + "Exporting Complete Data Using FTANF in the Knowledge Center.")), +]) +@pytest.mark.django_db +def test_header_fields(test_datafile, header_line, is_valid, error): + """Test validate all header fields.""" + generate_error = util.make_generate_parser_error(test_datafile, 1) + header, header_is_valid, header_errors = schema_defs.header.parse_and_validate(header_line, + generate_error) + + assert is_valid == header_is_valid + assert error == header_errors[0].error_message diff --git a/tdrs-backend/tdpservice/parsers/test/test_parse.py b/tdrs-backend/tdpservice/parsers/test/test_parse.py index d82124894..fd24bb8fe 100644 --- a/tdrs-backend/tdpservice/parsers/test/test_parse.py +++ b/tdrs-backend/tdpservice/parsers/test/test_parse.py @@ -1492,7 +1492,7 @@ def test_bulk_create_returns_rollback_response_on_bulk_index_exception(small_cor documents.tanf.TANF_T3DataSubmissionDocument(): [TANF_T3()] } - all_created, unsaved_records = parse.bulk_create_records( + all_created = parse.bulk_create_records( records, line_number=1, header_count=1, @@ -1507,7 +1507,6 @@ def test_bulk_create_returns_rollback_response_on_bulk_index_exception(small_cor assert log.change_message == "Encountered error while indexing datafile documents: indexing exception" assert all_created is True - assert len(unsaved_records.items()) == 0 assert TANF_T1.objects.all().count() == 1 assert TANF_T2.objects.all().count() == 1 assert TANF_T3.objects.all().count() == 1 @@ -1616,7 +1615,10 @@ def test_parse_tanf_section_1_file_with_bad_update_indicator(tanf_section_1_file error = parser_errors.first() assert error.error_type == ParserErrorCategoryChoices.FIELD_VALUE - assert error.error_message == "HEADER Item 10 (update indicator): U does not match D." + assert error.error_message == ("HEADER Update Indicator must be set to D " + "instead of U. Please review " + "Exporting Complete Data Using FTANF in the " + "Knowledge Center.") @pytest.mark.django_db() diff --git a/tdrs-backend/tdpservice/parsers/test/test_util.py b/tdrs-backend/tdpservice/parsers/test/test_util.py index dd4465e9c..4d379e741 100644 --- a/tdrs-backend/tdpservice/parsers/test/test_util.py +++ b/tdrs-backend/tdpservice/parsers/test/test_util.py @@ -4,8 +4,13 @@ from datetime import datetime from ..fields import Field from ..row_schema import RowSchema, SchemaManager -from ..util import make_generate_parser_error, create_test_datafile, get_years_apart, clean_options_string - +from ..util import ( + make_generate_parser_error, + create_test_datafile, + get_years_apart, + clean_options_string, + generate_t2_t3_t5_hashes) +import logging def passing_validator(): """Fake validator that always returns valid.""" @@ -553,3 +558,21 @@ def test_clean_options_string(options, expected): """Test `clean_options_string` util func.""" result = clean_options_string(options) assert result == expected + + +@pytest.mark.django_db() +def test_empty_SSN_DOB_space_filled(caplog): + """Test empty_SSN_DOB_space_filled.""" + line = 'fake_line' + + class record: + CASE_NUMBER = 'fake_case_number' + SSN = None + DATE_OF_BIRTH = None + FAMILY_AFFILIATION = 'fake_family_affiliation' + RPT_MONTH_YEAR = '202310' + RecordType = 'T2' + + with caplog.at_level(logging.ERROR): + generate_t2_t3_t5_hashes(line, record) + assert caplog.text == '' diff --git a/tdrs-backend/tdpservice/parsers/util.py b/tdrs-backend/tdpservice/parsers/util.py index 7e6ac239d..72a2850df 100644 --- a/tdrs-backend/tdpservice/parsers/util.py +++ b/tdrs-backend/tdpservice/parsers/util.py @@ -242,7 +242,7 @@ def add_record(self, case_hash, record_doc_pair, line_num): logger.error(f"Error: Case hash for record at line #{line_num} was None!") def get_bulk_create_struct(self): - """Return dict of form {document: Iterable(records)} for bulk_create_records to consume.""" + """Return dict of form {document: {record: None}} for bulk_create_records to consume.""" return self.cases def clear(self, all_created): @@ -250,7 +250,12 @@ def clear(self, all_created): if all_created: self.serialized_cases.update(set(self.hash_sorted_cases.keys())) self.hash_sorted_cases = dict() - self.cases = dict() + + # We don't want to re-assign self.cases here because we lose the keys of the record/doc types we've already + # made. If we don't maintain that state we might not delete everything if we need to roll the records back + # at the end of, or during parsing. + for key in self.cases.keys(): + self.cases[key] = {} def remove_case_due_to_errors(self, should_remove, case_hash): """Remove all records from memory given the hash.""" @@ -276,14 +281,15 @@ def remove_case_due_to_errors(self, should_remove, case_hash): def generate_t1_t4_hashes(line, record): """Return hashes for duplicate and partial duplicate detection for T1 & T4 records.""" logger.debug(f"Partial Hash Field Values: {record.RecordType} {str(record.RPT_MONTH_YEAR)} {record.CASE_NUMBER}") - return hash(line), hash(record.RecordType + str(record.RPT_MONTH_YEAR) + record.CASE_NUMBER) + return hash(line), hash(record.RecordType + str(record.RPT_MONTH_YEAR or '') + str(record.CASE_NUMBER or '')) def generate_t2_t3_t5_hashes(line, record): """Return hashes for duplicate and partial duplicate detection for T2 & T3 & T5 records.""" logger.debug(f"Partial Hash Field Values: {record.RecordType} {str(record.RPT_MONTH_YEAR)} {record.CASE_NUMBER} " + f"{str(record.FAMILY_AFFILIATION)} {record.DATE_OF_BIRTH} {record.SSN}") - return hash(line), hash(record.RecordType + str(record.RPT_MONTH_YEAR) + record.CASE_NUMBER + - str(record.FAMILY_AFFILIATION) + record.DATE_OF_BIRTH + record.SSN) + return hash(line), hash(record.RecordType + str(record.RPT_MONTH_YEAR or '') + str(record.CASE_NUMBER or '') + + str(record.FAMILY_AFFILIATION or '') + str(record.DATE_OF_BIRTH or '') + + str(record.SSN or '')) def get_t1_t4_partial_hash_members(): """Return field names used to generate t1/t4 partial hashes.""" diff --git a/tdrs-backend/tdpservice/parsers/validators.py b/tdrs-backend/tdpservice/parsers/validators.py index e8320055a..2d4ac7b34 100644 --- a/tdrs-backend/tdpservice/parsers/validators.py +++ b/tdrs-backend/tdpservice/parsers/validators.py @@ -317,7 +317,7 @@ def matches(option, error_func=None): """Validate that value is equal to option.""" return make_validator( lambda value: value == option, - lambda eargs: error_func(option) + lambda eargs: error_func(eargs) if error_func else f"{format_error_context(eargs)} {eargs.value} does not match {option}.", ) diff --git a/tdrs-backend/tdpservice/scheduling/BACKUP_README.md b/tdrs-backend/tdpservice/scheduling/management/BACKUP_README.md similarity index 100% rename from tdrs-backend/tdpservice/scheduling/BACKUP_README.md rename to tdrs-backend/tdpservice/scheduling/management/BACKUP_README.md diff --git a/tdrs-backend/tdpservice/scheduling/management/__init__.py b/tdrs-backend/tdpservice/scheduling/management/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tdrs-backend/tdpservice/scheduling/management/commands/backup_db.py b/tdrs-backend/tdpservice/scheduling/management/commands/backup_db.py new file mode 100644 index 000000000..ccd683884 --- /dev/null +++ b/tdrs-backend/tdpservice/scheduling/management/commands/backup_db.py @@ -0,0 +1,53 @@ +"""Command to facilitate backup of the Postgres DB.""" + +import os +from django.core.management.base import BaseCommand +from django.conf import settings +from tdpservice.scheduling.management.db_backup import main, get_system_values +from tdpservice.users.models import User +import logging + +logger = logging.getLogger(__name__) + +class Command(BaseCommand): + """Command class.""" + + help = "Backup the Postgres DB to a file locally or to S3 if in cloud.gov." + + def add_arguments(self, parser): + """Add arguments to the management command.""" + parser.add_argument("-b", "--backup", required=True, action='store_true', + help="Backup the databse to the file.") + parser.add_argument("-f", "--file", required=True, type=str, action='store', + help="The FQP of the file.") + + def handle(self, *args, **options): + """Backup the Postgres DB.""" + file = options["file"] + if not settings.USE_LOCALSTACK: + system_user, created = User.objects.get_or_create(username='system') + if created: + logger.debug('Created reserved system user.') + try: + main(['-b', '-f', f'{file}'], sys_values=get_system_values(), system_user=system_user) + except Exception as e: + logger.error(f"Exception occured while executing backup/restore: {e}") + raise e + logger.info("Cloud backup/restore job complete.") + else: + db_host = settings.DATABASES['default']['HOST'] + db_port = settings.DATABASES['default']['PORT'] + db_name = settings.DATABASES['default']['NAME'] + db_user = settings.DATABASES['default']['USER'] + + export_password = f"export PGPASSWORD={settings.DATABASES['default']['PASSWORD']}" + try: + cmd = (f"{export_password} && pg_dump -h {db_host} -p {db_port} -d {db_name} -U {db_user} -F c " + f"--no-password --no-acl --no-owner -f {file}") + os.system(cmd) + if os.path.getsize(file) == 0: + raise Exception("DB backup failed! Backup file size is 0 bytes!") + logger.info(f"Local backup saved to: {file}.") + logger.info("Local backup job complete.") + except Exception as e: + raise e diff --git a/tdrs-backend/tdpservice/scheduling/db_backup.py b/tdrs-backend/tdpservice/scheduling/management/db_backup.py similarity index 87% rename from tdrs-backend/tdpservice/scheduling/db_backup.py rename to tdrs-backend/tdpservice/scheduling/management/db_backup.py index 48d0da749..2ee42c14a 100644 --- a/tdrs-backend/tdpservice/scheduling/db_backup.py +++ b/tdrs-backend/tdpservice/scheduling/management/db_backup.py @@ -74,7 +74,7 @@ def get_system_values(): with open('/home/vcap/.pgpass', 'w') as f: f.write(sys_values['DATABASE_HOST'] + ":" + sys_values['DATABASE_PORT'] + ":" - + sys_values['DATABASE_DB_NAME'] + ":" + + settings.DATABASES['default']['NAME'] + ":" + sys_values['DATABASE_USERNAME'] + ":" + sys_values['DATABASE_PASSWORD']) os.environ['PGPASSFILE'] = '/home/vcap/.pgpass' @@ -94,7 +94,17 @@ def backup_database(file_name, pg_dump -F c --no-acl --no-owner -f backup.pg postgresql://${USERNAME}:${PASSWORD}@${HOST}:${PORT}/${NAME} """ try: - cmd = postgres_client + "pg_dump -Fc --no-acl -f " + file_name + " -d " + database_uri + # TODO: This is a bandaid until the correct logic is determined for the system values with respect to the + # correct database name. + # cmd = postgres_client + "pg_dump -Fc --no-acl -f " + file_name + " -d " + database_uri + db_host = settings.DATABASES['default']['HOST'] + db_port = settings.DATABASES['default']['PORT'] + db_name = settings.DATABASES['default']['NAME'] + db_user = settings.DATABASES['default']['USER'] + + export_password = f"export PGPASSWORD={settings.DATABASES['default']['PASSWORD']}" + cmd = (f"{export_password} && {postgres_client}pg_dump -h {db_host} -p {db_port} -d {db_name} -U {db_user} " + f"-F c --no-password --no-acl --no-owner -f {file_name}") logger.info(f"Executing backup command: {cmd}") os.system(cmd) msg = "Successfully executed backup. Wrote pg dumpfile to {}".format(file_name) @@ -140,32 +150,32 @@ def restore_database(file_name, postgres_client, database_uri, system_user): change_message=msg ) logger.info(msg) - except Exception as e: - logger.error(f"Caught exception while creating the database. Exception: {e}.") - return False - # write .pgpass - with open('/home/vcap/.pgpass', 'w') as f: - f.write(DATABASE_HOST+":"+DATABASE_PORT+":"+DATABASE_DB_NAME+":"+DATABASE_USERNAME+":"+DATABASE_PASSWORD) - os.environ['PGPASSFILE'] = '/home/vcap/.pgpass' - os.system('chmod 0600 /home/vcap/.pgpass') + # write .pgpass + with open('/home/vcap/.pgpass', 'w') as f: + f.write(DATABASE_HOST+":"+DATABASE_PORT+":"+DATABASE_DB_NAME+":"+DATABASE_USERNAME+":"+DATABASE_PASSWORD) + os.environ['PGPASSFILE'] = '/home/vcap/.pgpass' + os.system('chmod 0600 /home/vcap/.pgpass') - logger.info("Begining database restoration.") - cmd = (postgres_client + "pg_restore" + " -p " + DATABASE_PORT + " -h " + - DATABASE_HOST + " -U " + DATABASE_USERNAME + " -d " + DATABASE_DB_NAME + " " + file_name) - logger.info(f"Executing restore command: {cmd}") - os.system(cmd) - msg = "Completed database restoration." - LogEntry.objects.log_action( - user_id=system_user.pk, - content_type_id=content_type.pk, - object_id=None, - object_repr="Executed Database restore", - action_flag=ADDITION, - change_message=msg - ) - logger.info(msg) - return True + logger.info("Begining database restoration.") + cmd = (postgres_client + "pg_restore" + " -p " + DATABASE_PORT + " -h " + + DATABASE_HOST + " -U " + DATABASE_USERNAME + " -d " + DATABASE_DB_NAME + " " + file_name) + logger.info(f"Executing restore command: {cmd}") + os.system(cmd) + msg = "Completed database restoration." + LogEntry.objects.log_action( + user_id=system_user.pk, + content_type_id=content_type.pk, + object_id=None, + object_repr="Executed Database restore", + action_flag=ADDITION, + change_message=msg + ) + logger.info(msg) + return True + except Exception as e: + logger.error(f"Caught exception while restoring the database. Exception: {e}.") + raise e def upload_file(file_name, bucket, sys_values, system_user, object_name=None, region='us-gov-west-1'): diff --git a/tdrs-backend/tdpservice/scheduling/parser_task.py b/tdrs-backend/tdpservice/scheduling/parser_task.py index c1dec5c99..732d6fbe6 100644 --- a/tdrs-backend/tdpservice/scheduling/parser_task.py +++ b/tdrs-backend/tdpservice/scheduling/parser_task.py @@ -15,7 +15,7 @@ @shared_task -def parse(data_file_id): +def parse(data_file_id, should_send_submission_email=True): """Send data file for processing.""" # passing the data file FileField across redis was rendering non-serializable failures, doing the below lookup # to avoid those. I suppose good practice to not store/serializer large file contents in memory when stored in redis @@ -37,10 +37,11 @@ def parse(data_file_id): logger.info(f"Parsing finished for file -> {repr(data_file)} with status {dfs.status} and {len(errors)} errors.") - recipients = User.objects.filter( - stt=data_file.stt, - account_approval_status=AccountApprovalStatusChoices.APPROVED, - groups=Group.objects.get(name='Data Analyst') - ).values_list('username', flat=True).distinct() + if should_send_submission_email is True: + recipients = User.objects.filter( + stt=data_file.stt, + account_approval_status=AccountApprovalStatusChoices.APPROVED, + groups=Group.objects.get(name='Data Analyst') + ).values_list('username', flat=True).distinct() - send_data_submitted_email(dfs, recipients) + send_data_submitted_email(dfs, recipients) diff --git a/tdrs-backend/tdpservice/scheduling/tasks.py b/tdrs-backend/tdpservice/scheduling/tasks.py index 24690d8c8..251dad7c2 100644 --- a/tdrs-backend/tdpservice/scheduling/tasks.py +++ b/tdrs-backend/tdpservice/scheduling/tasks.py @@ -3,7 +3,7 @@ from __future__ import absolute_import from celery import shared_task import logging -from .db_backup import run_backup +from tdpservice.scheduling.management.db_backup import run_backup logger = logging.getLogger(__name__) diff --git a/tdrs-backend/tdpservice/search_indexes/admin/filters.py b/tdrs-backend/tdpservice/search_indexes/admin/filters.py index 36e1e66da..1d8caf0f8 100644 --- a/tdrs-backend/tdpservice/search_indexes/admin/filters.py +++ b/tdrs-backend/tdpservice/search_indexes/admin/filters.py @@ -51,7 +51,7 @@ def __init__(self, field, request, params, model, model_admin, field_path): self.lookup_choices = self._get_lookup_choices(request) def _get_lookup_choices(self, request): - """Filter queryset to guarentee lookup_choices only has STTs associated with the record type.""" + """Filter queryset to guarantee lookup_choices only has STTs associated with the record type.""" record_type = str(request.path).split('/')[-2] queryset = STT.objects.all() if 'tribal' in record_type: diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py new file mode 100644 index 000000000..f6cf2c930 --- /dev/null +++ b/tdrs-backend/tdpservice/search_indexes/management/commands/clean_and_reparse.py @@ -0,0 +1,272 @@ +"""Delete and re-parse a set of datafiles.""" + +from django.core.management.base import BaseCommand +from django.core.management import call_command +from django.db.utils import DatabaseError +from elasticsearch.exceptions import ElasticsearchException +from tdpservice.data_files.models import DataFile +from tdpservice.parsers.models import ParserError +from tdpservice.scheduling import parser_task +from tdpservice.search_indexes.documents import tanf, ssp, tribal +from tdpservice.core.utils import log +from django.contrib.admin.models import ADDITION +from tdpservice.users.models import User +from datetime import datetime +import logging + +logger = logging.getLogger(__name__) + + +class Command(BaseCommand): + """Command class.""" + + help = "Delete and re-parse a set of datafiles. All re-parsed data will be moved into a new set of Elastic indexes." + + def add_arguments(self, parser): + """Add arguments to the management command.""" + parser.add_argument("-q", "--fiscal_quarter", type=str, choices=["Q1", "Q2", "Q3", "Q4"], + help="Re-parse all files in the fiscal quarter, e.g. Q1.") + parser.add_argument("-y", "--fiscal_year", type=int, help="Re-parse all files in the fiscal year, e.g. 2021.") + parser.add_argument("-a", "--all", action='store_true', help="Clean and re-parse all datafiles. If selected, " + "fiscal_year/quarter aren't necessary.") + parser.add_argument("-n", "--new_indices", action='store_true', help="Move re-parsed data to new Elastic " + "indices.") + parser.add_argument("-d", "--delete_indices", action='store_true', help="Requires new_indices. Delete the " + "current Elastic indices.") + + def __get_log_context(self, system_user): + """Return logger context.""" + context = {'user_id': system_user.id, + 'action_flag': ADDITION, + 'object_repr': "Clean and Re-parse" + } + return context + + def __backup(self, backup_file_name, log_context): + """Execute Postgres DB backup.""" + try: + logger.info("Beginning re-parse DB Backup.") + call_command('backup_db', '-b', '-f', f'{backup_file_name}') + logger.info("Backup complete! Commencing clean and re-parse.") + + log("Database backup complete.", + logger_context=log_context, + level='info') + except Exception as e: + log("Database backup FAILED. Clean and re-parse NOT executed. Database and Elastic are CONSISTENT!", + logger_context=log_context, + level='error') + raise e + + def __handle_elastic(self, new_indices, delete_indices, log_context): + """Create new Elastic indices and delete old ones.""" + if new_indices: + try: + if not delete_indices: + call_command('tdp_search_index', '--create', '-f', '--use-alias', '--use-alias-keep-index') + else: + call_command('tdp_search_index', '--create', '-f', '--use-alias') + log("Index creation complete.", + logger_context=log_context, + level='info') + except ElasticsearchException as e: + log("Elastic index creation FAILED. Clean and re-parse NOT executed. " + "Database is CONSISTENT, Elastic is INCONSISTENT!", + logger_context=log_context, + level='error') + raise e + except Exception as e: + log("Caught generic exception in __handle_elastic. Clean and re-parse NOT executed. " + "Database is CONSISTENT, Elastic is INCONSISTENT!", + logger_context=log_context, + level='error') + raise e + + def __delete_records(self, docs, file_ids, new_indices, log_context): + """Delete records, errors, and documents from Postgres and Elastic.""" + total_deleted = 0 + self.__delete_errors(file_ids, log_context) + for doc in docs: + try: + model = doc.Django.model + qset = model.objects.filter(datafile_id__in=file_ids) + total_deleted += qset.count() + if not new_indices: + # If we aren't creating new indices, then we don't want duplicate data in the existing indices. + doc().update(qset, refresh=True, action='delete') + qset._raw_delete(qset.db) + except ElasticsearchException as e: + log(f'Elastic document delete failed for type {model}. The database and Elastic are INCONSISTENT! ' + 'Restore the DB from the backup as soon as possible!', + logger_context=log_context, + level='critical') + raise e + except DatabaseError as e: + log(f'Encountered a DatabaseError while deleting records of type {model} from Postgres. The database ' + 'and Elastic are INCONSISTENT! Restore the DB from the backup as soon as possible!', + logger_context=log_context, + level='critical') + raise e + except Exception as e: + log(f'Caught generic exception while deleting records of type {model}. The database and Elastic are ' + 'INCONSISTENT! Restore the DB from the backup as soon as possible!', + logger_context=log_context, + level='critical') + raise e + return total_deleted + + def __delete_errors(self, file_ids, log_context): + """Raw delete all ParserErrors for each file ID.""" + try: + qset = ParserError.objects.filter(file_id__in=file_ids) + qset._raw_delete(qset.db) + except DatabaseError as e: + log('Encountered a DatabaseError while deleting ParserErrors from Postgres. The database ' + 'and Elastic are INCONSISTENT! Restore the DB from the backup as soon as possible!', + logger_context=log_context, + level='critical') + raise e + except Exception as e: + log('Caught generic exception while deleting ParserErrors. The database and Elastic are INCONSISTENT! ' + 'Restore the DB from the backup as soon as possible!', + logger_context=log_context, + level='critical') + raise e + + def __handle_datafiles(self, files, log_context): + """Delete, re-save, and re-parse selected datafiles.""" + for file in files: + try: + logger.info(f"Deleting file with PK: {file.pk}") + file.delete() + file.save() + logger.info(f"New file PK: {file.pk}") + # latest version only? -> possible new ticket + parser_task.parse.delay(file.pk, should_send_submission_email=False) + except DatabaseError as e: + log('Encountered a DatabaseError while re-creating datafiles. The database ' + 'and Elastic are INCONSISTENT! Restore the DB from the backup as soon as possible!', + logger_context=log_context, + level='critical') + raise e + except Exception as e: + log('Caught generic exception in __handle_datafiles. Database and Elastic are INCONSISTENT! ' + 'Restore the DB from the backup as soon as possible!', + logger_context=log_context, + level='critical') + raise e + + def handle(self, *args, **options): + """Delete and re-parse datafiles matching a query.""" + fiscal_year = options.get('fiscal_year', None) + fiscal_quarter = options.get('fiscal_quarter', None) + reparse_all = options.get('all', False) + new_indices = options.get('new_indices', False) + delete_indices = options.get('delete_indices', False) + + args_passed = fiscal_year is not None or fiscal_quarter is not None or reparse_all + + if not args_passed: + logger.warn("No arguments supplied.") + self.print_help("manage.py", "clean_and_parse") + return + + backup_file_name = "/tmp/reparsing_backup" + files = DataFile.objects.all() + continue_msg = "You have selected to re-parse datafiles for FY {fy} and {q}. The re-parsed files " + if reparse_all: + backup_file_name += "_FY_All_Q1-4" + continue_msg = continue_msg.format(fy="All", q="Q1-4") + else: + if not fiscal_year and not fiscal_quarter: + print( + 'Options --fiscal_year and --fiscal_quarter not set. ' + 'Provide either option to continue, or --all to wipe all submissions.' + ) + return + if fiscal_year is not None and fiscal_quarter is not None: + files = files.filter(year=fiscal_year, quarter=fiscal_quarter) + backup_file_name += f"_FY_{fiscal_year}_{fiscal_quarter}" + continue_msg = continue_msg.format(fy=fiscal_year, q=fiscal_quarter) + elif fiscal_year is not None: + files = files.filter(year=fiscal_year) + backup_file_name += f"_FY_{fiscal_year}_Q1-4" + continue_msg = continue_msg.format(fy=fiscal_year, q="Q1-4") + elif fiscal_quarter is not None: + files = files.filter(quarter=fiscal_quarter) + backup_file_name += f"_FY_All_{fiscal_quarter}" + continue_msg = continue_msg.format(fy="All", q=fiscal_quarter) + + fmt_str = "be" if new_indices else "NOT be" + continue_msg += "will {new_index} stored in new indices and the old indices ".format(new_index=fmt_str) + + fmt_str = "be" if delete_indices else "NOT be" + continue_msg += "will {old_index} deleted.".format(old_index=fmt_str) + + fmt_str = f"ALL ({files.count()})" if reparse_all else f"({files.count()})" + continue_msg += "\nThese options will delete and re-parse {0} datafiles.".format(fmt_str) + + c = str(input(f'\n{continue_msg}\nContinue [y/n]? ')).lower() + if c not in ['y', 'yes']: + print('Cancelled.') + return + + system_user, created = User.objects.get_or_create(username='system') + if created: + logger.debug('Created reserved system user.') + log_context = self.__get_log_context(system_user) + + all_fy = "All" + all_q = "Q1-4" + log(f"Starting clean and re-parse command for FY {fiscal_year if fiscal_year else all_fy} and " + f"{fiscal_quarter if fiscal_quarter else all_q}", + logger_context=log_context, + level='info') + + if files.count() == 0: + log(f"No files available for the selected Fiscal Year: {fiscal_year if fiscal_year else all_fy} and " + f"Quarter: {fiscal_quarter if fiscal_quarter else all_q}. Nothing to do.", + logger_context=log_context, + level='warn') + return + + # Backup the Postgres DB + pattern = "%Y-%m-%d_%H.%M.%S" + backup_file_name += f"_{datetime.now().strftime(pattern)}.pg" + self.__backup(backup_file_name, log_context) + + # Create and delete Elastic indices if necessary + self.__handle_elastic(new_indices, delete_indices, log_context) + + # Delete records from Postgres and Elastic if necessary + file_ids = files.values_list('id', flat=True).distinct() + docs = [ + tanf.TANF_T1DataSubmissionDocument, tanf.TANF_T2DataSubmissionDocument, + tanf.TANF_T3DataSubmissionDocument, tanf.TANF_T4DataSubmissionDocument, + tanf.TANF_T5DataSubmissionDocument, tanf.TANF_T6DataSubmissionDocument, + tanf.TANF_T7DataSubmissionDocument, + + ssp.SSP_M1DataSubmissionDocument, ssp.SSP_M2DataSubmissionDocument, ssp.SSP_M3DataSubmissionDocument, + ssp.SSP_M4DataSubmissionDocument, ssp.SSP_M5DataSubmissionDocument, ssp.SSP_M6DataSubmissionDocument, + ssp.SSP_M7DataSubmissionDocument, + + tribal.Tribal_TANF_T1DataSubmissionDocument, tribal.Tribal_TANF_T2DataSubmissionDocument, + tribal.Tribal_TANF_T3DataSubmissionDocument, tribal.Tribal_TANF_T4DataSubmissionDocument, + tribal.Tribal_TANF_T5DataSubmissionDocument, tribal.Tribal_TANF_T6DataSubmissionDocument, + tribal.Tribal_TANF_T7DataSubmissionDocument + ] + total_deleted = self.__delete_records(docs, file_ids, new_indices, log_context) + logger.info(f"Deleted a total of {total_deleted} records accross {files.count()} files.") + + # Delete and re-save datafiles to handle cascading dependencies + logger.info(f'Deleting and re-parsing {files.count()} files') + self.__handle_datafiles(files, log_context) + + log("Database cleansing complete and all files have been re-scheduling for parsing and validation.", + logger_context=log_context, + level='info') + log(f"Clean and re-parse command completed. All files for FY {fiscal_year if fiscal_year else all_fy} and " + f"{fiscal_quarter if fiscal_quarter else all_q} have been queued for parsing.", + logger_context=log_context, + level='info') + logger.info('Done. All tasks have been queued to parse the selected datafiles.') diff --git a/tdrs-backend/tdpservice/search_indexes/management/commands/tdp_search_index.py b/tdrs-backend/tdpservice/search_indexes/management/commands/tdp_search_index.py index 7709617b0..19f3b7d89 100644 --- a/tdrs-backend/tdpservice/search_indexes/management/commands/tdp_search_index.py +++ b/tdrs-backend/tdpservice/search_indexes/management/commands/tdp_search_index.py @@ -6,9 +6,13 @@ """ import time +from datetime import datetime from django_elasticsearch_dsl.management.commands import search_index from django_elasticsearch_dsl.registries import registry from django.conf import settings +from tdpservice.core.utils import log +from django.contrib.admin.models import ADDITION +from tdpservice.users.models import User class Command(search_index.Command): @@ -17,6 +21,39 @@ class Command(search_index.Command): def __init__(self, *args, **kwargs): super(Command, self).__init__(*args, **kwargs) + def __get_log_context(self): + context = {'user_id': User.objects.get_or_create(username='system')[0].id, + 'action_flag': ADDITION, + 'object_repr': "Elastic Index Creation" + } + return context + + def _create(self, models, aliases, options): + log_context = self.__get_log_context() + alias_index_pairs = [] + fmt = "%Y-%m-%d_%H.%M.%S" + index_suffix = f"_{datetime.now().strftime(fmt)}" + + for index in registry.get_indices(models): + new_index = index._name + index_suffix + alias_index_pairs.append( + {'alias': index._name, 'index': new_index} + ) + index._name = new_index + + super()._create(models, aliases, options) + + for alias_index_pair in alias_index_pairs: + alias = alias_index_pair['alias'] + alias_exists = alias in aliases + self._update_alias( + alias, alias_index_pair['index'], alias_exists, options + ) + + log(f"Aliased index creation complete. Newest suffix: {index_suffix}", + logger_context=log_context, + level='info') + def _populate(self, models, options): parallel = options['parallel'] for doc in registry.get_documents(models): diff --git a/tdrs-backend/tdpservice/settings/common.py b/tdrs-backend/tdpservice/settings/common.py index 954f0906a..05542a561 100644 --- a/tdrs-backend/tdpservice/settings/common.py +++ b/tdrs-backend/tdpservice/settings/common.py @@ -366,6 +366,8 @@ class Common(Configuration): KIBANA_BASE_URL = os.getenv('KIBANA_BASE_URL', 'http://kibana:5601') BYPASS_KIBANA_AUTH = os.getenv("BYPASS_KIBANA_AUTH", False) ELASTIC_INDEX_PREFIX = APP_NAME + '_' + es_logger = logging.getLogger('elasticsearch') + es_logger.setLevel(logging.WARNING) s3_src = "s3-us-gov-west-1.amazonaws.com" diff --git a/tdrs-frontend/docker-compose.local.yml b/tdrs-frontend/docker-compose.local.yml index 5664b16d8..5f868bd23 100644 --- a/tdrs-frontend/docker-compose.local.yml +++ b/tdrs-frontend/docker-compose.local.yml @@ -16,6 +16,18 @@ services: - ./:/home/node/app networks: - local + + tdp-frontend-test: + stdin_open: true # docker run -i + tty: true # docker run -t + build: + context: . + target: localdev + command: sleep infinity + volumes: + - ./:/home/node/app + networks: + - local networks: local: driver: bridge