diff --git a/.circleci/deployment/commands.yml b/.circleci/deployment/commands.yml index d1aa82b7d..992f6440d 100644 --- a/.circleci/deployment/commands.yml +++ b/.circleci/deployment/commands.yml @@ -226,15 +226,8 @@ default: CF_APP steps: - checkout - - run: - name: Install dependencies - command: | - apk update - apk add jq - apk add curl - # TODO: Add Signature check - curl -L "https://packages.cloudfoundry.org/stable?release=linux64-binary&version=v7&source=github" | tar -zx - mv cf7 /usr/local/bin/cf + - sudo-check + - cf-check - login-cloud-dot-gov: cf-password: <> cf-username: <> @@ -285,16 +278,7 @@ type: string steps: - checkout - - run: - name: Install dependencies - command: | - sudo apt update - sudo apt install jq - sudo apt install curl - # TODO: Add Signature check - curl -L "https://packages.cloudfoundry.org/stable?release=linux64-binary&version=v7&source=github" | tar -zx - sudo mv cf7 /usr/local/bin/cf - sudo chmod +x /usr/local/bin/cf + - cf-check - login-cloud-dot-gov: cf-password: <> cf-username: <> diff --git a/.gitconfig b/.gitconfig index f70bcd581..b3cc6696c 100644 --- a/.gitconfig +++ b/.gitconfig @@ -1,17 +1,20 @@ [secrets] providers = git secrets --aws-provider - patterns = (A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16} - patterns = (\"|')?(AWS|aws|Aws)?_?(SECRET|secret|Secret)?_?(ACCESS|access|Access)?_?(KEY|key|Key)(\"|')?\\s*(:|=>|=)\\s*(\"|')?[A-Za-z0-9/\\+=]{40}(\"|')? - patterns = (\"|')?(AWS|aws|Aws)?_?(ACCOUNT|account|Account)_?(ID|id|Id)?(\"|')?\\s*(:|=>|=)\\s*(\"|')?[0-9]{4}\\-?[0-9]{4}\\-?[0-9]{4}(\"|')? - patterns = .+_KEY=.+ allowed = [A-Z]+_KEY=..echo \".{S3_CREDENTIALS}\" [|] jq -r .+ allowed = ./tdrs-backend/.env.example:.* allowed = ./tdrs-backend/docker-compose.yml:57:.* - allowed = ./tdrs-backend/manifest.proxy.yml:* + + allowed = ./tdrs-frontend/node_modules* allowed = regexes.json:.* allowed = ./scripts/copy-login-gov-keypair.sh:14:JWT_KEY=.* allowed = scripts/deploy-backend.sh:.+:DJANGO_SECRET_KEY=..python -c .from secrets import token_urlsafe. print.token_urlsafe..* allowed = .git/config:.* allowed = .gitconfig:.* - allowed = .*DJANGO_SECRET_KEY=.* + allowed = .*DJANGO_SECRET_KEY=.* #this is auto-generated in deployed environments + allowed = ./tdrs-backend/manifest.proxy.yml:* allowed = ./tdrs-backend/plg/loki/manifest.yml:* + patterns = (A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16} + patterns = (\"|')?(AWS|aws|Aws)?_?(SECRET|secret|Secret)?_?(ACCESS|access|Access)?_?(KEY|key|Key)(\"|')?\\s*(:|=>|=)\\s*(\"|')?[A-Za-z0-9/\\+=]{40}(\"|')? + patterns = (\"|')?(AWS|aws|Aws)?_?(ACCOUNT|account|Account)_?(ID|id|Id)?(\"|')?\\s*(:|=>|=)\\s*(\"|')?[0-9]{4}\\-?[0-9]{4}\\-?[0-9]{4}(\"|')? + patterns = .+_KEY=.+ + patterns = .+smtp_auth_password: .[^{]+ diff --git a/.githooks/pre-commit b/.githooks/pre-commit new file mode 100755 index 000000000..7da1e7bb0 --- /dev/null +++ b/.githooks/pre-commit @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +zsh ./scripts/git-secrets-check.sh local diff --git a/.githooks/pre-push b/.githooks/pre-push new file mode 100755 index 000000000..51e4e28ff --- /dev/null +++ b/.githooks/pre-push @@ -0,0 +1,14 @@ +#!/bin/bash +set -e + +task frontend-lint 2>/dev/null +if [ $? != "0" ]; then + echo "Frontend lint failed" + exit 1 +fi + +task backend-lint 2>/dev/null +if [ $? != "0" ]; then + echo "Backend lint failed" + exit 1 +fi \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/design-deliverable-issue-template.md b/.github/ISSUE_TEMPLATE/design-deliverable-issue-template.md index 35eb23ed7..509198d3e 100644 --- a/.github/ISSUE_TEMPLATE/design-deliverable-issue-template.md +++ b/.github/ISSUE_TEMPLATE/design-deliverable-issue-template.md @@ -24,7 +24,7 @@ assignees: '' - [ ] Documentation work for the following has occurred: - [ ] Relevant User stories. - [ ] Recommended pa11y checks. - - [ ] Updating living UX documents, e.g. User Flows or Personas(if relevant). + - [ ] Updating living UX documents, e.g. User Flows, Personas, [Service Blueprint](https://www.figma.com/design/irgQPLTrajxCXNiYBTEnMV/TDP-Mockups-For-Feedback?node-id=9080-4762) (if relevant). - [ ] Internal Raft Review has occurred to ensure DoD standards and QA - [ ] Dev/Design sync has occurred; resulting tickets created - [ ] The design is usable and accessible, meaning it adheres to definition of done standards for design work. diff --git a/.github/ISSUE_TEMPLATE/research-synthesis-issue-template.md b/.github/ISSUE_TEMPLATE/research-synthesis-issue-template.md index 208bfe2c3..81892f210 100644 --- a/.github/ISSUE_TEMPLATE/research-synthesis-issue-template.md +++ b/.github/ISSUE_TEMPLATE/research-synthesis-issue-template.md @@ -13,7 +13,8 @@ assignees: '' **AC:** -- [ ] A hack.md with the drafted synthesis has been reviewed. +- [ ] A Gitbook with the drafted synthesis has been reviewed. +- [ ] [TDP Service Blueprint](https://www.figma.com/design/irgQPLTrajxCXNiYBTEnMV/TDP-Mockups-For-Feedback?node-id=9080-4762) has been updated, as appplicable - [ ] PR has been opened containing the final draft of the synthesis. - [ ] Internal Raft Review has occurred to ensure DoD standards and QA - [ ] The content is usable and accessible, meaning it adheres to definition of done standards for design work. @@ -35,4 +36,4 @@ assignees: '' **Supporting Documentation:** -- --Link to hack.md-- +- --Link to the gitbook page-- diff --git a/.gitignore b/.gitignore index f6766031a..7d693b2c7 100644 --- a/.gitignore +++ b/.gitignore @@ -115,3 +115,6 @@ cypress.env.json # DB seeds tdrs-backend/*.pg + +# Log files +*.log diff --git a/README.md b/README.md index c7ed080a9..ce86a895b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Temporary Assistance for Needy Families (TANF) Data Portal - TDP + # Temporary Assistance for Needy Families (TANF) Data Portal - TDP Welcome to the project for the New TANF Data Portal, which will replace the legacy TANF Data Reporting System! diff --git a/Taskfile.yml b/Taskfile.yml index 9495d3a2d..93de45d5c 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -2,6 +2,11 @@ version: '3' tasks: + gitcfg: + desc: Configure git + cmds: + - git config core.hooksPath .githooks + create-network: desc: Create the external network cmds: @@ -12,10 +17,10 @@ tasks: dir: tdrs-backend cmds: - task: create-network - - docker-compose -f docker-compose.yml up -d --build - - docker-compose -f docker-compose.yml exec web sh -c "python ./manage.py makemigrations" - - docker-compose -f docker-compose.yml exec web sh -c "python ./manage.py migrate" - - docker-compose -f docker-compose.yml down + - docker compose -f docker-compose.yml up -d --build + - docker compose -f docker-compose.yml exec web sh -c "python ./manage.py makemigrations" + - docker compose -f docker-compose.yml exec web sh -c "python ./manage.py migrate" + - docker compose -f docker-compose.yml down - task: sentry-down clone-sentry-repo: @@ -43,7 +48,7 @@ tasks: - docker cp .env sentry:/self-hosted/.env - docker exec sentry bash -c "cd self-hosted && ./install.sh --skip-user-creation --no-report-self-hosted-issues" # create a new user - - docker exec sentry bash -c "cd self-hosted && docker-compose run --rm web createuser --email admin@tanf.com --password admin --superuser" + - docker exec sentry bash -c "cd self-hosted && docker compose run --rm web createuser --email admin@tanf.com --password admin --superuser" # copy backup.json file to sentry - docker cp backup.json sentry:/self-hosted/sentry/backup.json # restore backup @@ -58,56 +63,56 @@ tasks: desc: Start sentry service dir: sentry cmds: - - docker exec sentry bash -c "cd self-hosted && docker-compose up -d" + - docker exec sentry bash -c "cd self-hosted && docker compose up -d" sentry-down: desc: Stop sentry service dir: sentry cmds: - - docker exec sentry bash -c "cd self-hosted && docker-compose down" + - docker exec sentry bash -c "cd self-hosted && docker compose down" drop-db: desc: Drop the backend database dir: tdrs-backend cmds: - - docker-compose -f docker-compose.yml down + - docker compose -f docker-compose.yml down - docker volume rm tdrs-backend_postgres_data backend-up: desc: Start backend web server dir: tdrs-backend cmds: - - docker-compose -f docker-compose.yml up -d + - docker compose -f docker-compose.yml up -d backend-down: desc: Stop backend web server dir: tdrs-backend cmds: - - docker-compose -f docker-compose.yml down + - docker compose -f docker-compose.yml down backend-logs: desc: Show and follow backend web server logs dir: tdrs-backend cmds: - - docker-compose -f docker-compose.yml logs -f + - docker compose -f docker-compose.yml logs -f backend-restart: desc: Restart backend web server dir: tdrs-backend cmds: - - docker-compose -f docker-compose.yml restart + - docker compose -f docker-compose.yml restart backend-bash: desc: Open a shell in the backend container dir: tdrs-backend cmds: - - docker-compose -f docker-compose.yml exec web sh + - docker compose -f docker-compose.yml exec web sh backend-shell: desc: Open a Django shell in the backend container dir: tdrs-backend cmds: - - docker-compose -f docker-compose.yml exec web sh -c "python ./manage.py shell" + - docker compose -f docker-compose.yml exec web sh -c "python ./manage.py shell" backend-exec: desc: Execute a command in the backend container @@ -115,7 +120,7 @@ tasks: vars: CMD: '{{.CMD}}' cmds: - - docker-compose -f docker-compose.yml exec web sh -c "python manage.py {{.CMD}}" + - docker compose -f docker-compose.yml exec web sh -c "python manage.py {{.CMD}}" backend-exec-seed-db: desc: Execute seed_db command in the backend container @@ -123,8 +128,8 @@ tasks: vars: CMD: '{{.CMD}}' cmds: - - docker-compose -f docker-compose.yml up -d - - docker-compose -f docker-compose.yml exec web sh -c "python manage.py populate_stts; python ./manage.py seed_db" + - docker compose -f docker-compose.yml up -d + - docker compose -f docker-compose.yml exec web sh -c "python manage.py populate_stts; python ./manage.py seed_db" backend-pytest: desc: 'Run pytest in the backend container E.g: task backend-pytest PYTEST_ARGS="tdpservice/test/ -s -vv"' @@ -133,20 +138,20 @@ tasks: PYTEST_ARGS: '{{.PYTEST_ARGS | default "."}}' cmds: - task backend-up - - docker-compose -f docker-compose.yml exec web sh -c "pytest {{.PYTEST_ARGS}}" + - docker compose -f docker-compose.yml exec web sh -c "pytest {{.PYTEST_ARGS}}" backend-remove-volumes: desc: Remove the backend volumes dir: tdrs-backend cmds: - - docker-compose -f docker-compose.yml down -v + - docker compose -f docker-compose.yml down -v backend-lint: desc: Run flake8 in the backend container dir: tdrs-backend cmds: - task backend-up - - docker-compose -f docker-compose.yml exec web sh -c "flake8 . && if [ $? -eq 0 ]; then echo 'Flake8 linter found no issues'; fi" + - docker compose -f docker-compose.yml exec -T web sh -c "flake8 . && if [ $? -eq 0 ]; then echo 'Flake8 linter found no issues'; fi" backend-pip-lock: #TODO: Add a task to lock the pip dependencies @@ -154,16 +159,16 @@ tasks: dir: tdrs-backend cmds: - task: backend-up - - docker-compose -f docker-compose.yml exec web sh -c "pipenv lock" + - docker compose -f docker-compose.yml exec web sh -c "pipenv lock" psql: desc: Open a psql shell in the backend container dir: tdrs-backend cmds: - task create-network || true - - docker-compose -f docker-compose.yml up -d postgres + - docker compose -f docker-compose.yml up -d postgres - sleep 5 - - docker-compose -f docker-compose.yml exec postgres sh -c "psql -U tdpuser -d tdrs_test" + - docker compose -f docker-compose.yml exec postgres sh -c "psql -U tdpuser -d tdrs_test" clean: desc: Remove all containers, networks, and volumes @@ -177,25 +182,25 @@ tasks: desc: Start clamav service dir: tdrs-backend cmds: - - docker-compose -f docker-compose.yml up -d clamav-rest + - docker compose -f docker-compose.yml up -d clamav-rest frontend-up: desc: Start frontend web server dir: tdrs-frontend cmds: - - docker-compose -f docker-compose.yml up -d + - docker compose -f docker-compose.yml up -d frontend-down: desc: Stop frontend web server dir: tdrs-frontend cmds: - - docker-compose -f docker-compose.yml down + - docker compose -f docker-compose.yml down frontend-restart: desc: Restart frontend web server dir: tdrs-frontend cmds: - - docker-compose -f docker-compose.yml restart + - docker compose -f docker-compose.yml restart frontend-av: desc: Start frontend with optional clamav service @@ -210,24 +215,24 @@ tasks: desc: Initialize the frontend project dir: tdrs-frontend cmds: - - docker-compose -f docker-compose.yml up -d --build - - docker-compose -f docker-compose.yml exec tdp-frontend sh -c "apk add nodejs npm" - - docker-compose -f docker-compose.yml exec tdp-frontend sh -c "npm install" - - docker-compose -f docker-compose.yml down + - docker compose -f docker-compose.yml up -d --build + - docker compose -f docker-compose.yml exec tdp-frontend sh -c "apk add nodejs npm" + - docker compose -f docker-compose.yml exec tdp-frontend sh -c "npm install" + - docker compose -f docker-compose.yml down frontend-test: desc: Run frontend tests dir: tdrs-frontend cmds: - - docker-compose -f docker-compose.local.yml up tdp-frontend-test -d - - docker-compose -f docker-compose.local.yml exec tdp-frontend-test sh -c "npm run test" + - docker compose -f docker-compose.local.yml up tdp-frontend-test -d + - docker compose -f docker-compose.local.yml exec tdp-frontend-test sh -c "npm run test" frontend-test-cov: desc: Run frontend tests with coverage dir: tdrs-frontend cmds: - - docker-compose -f docker-compose.local.yml up tdp-frontend-test -d - - docker-compose -f docker-compose.local.yml exec tdp-frontend-test sh -c "npm run test:cov" + - docker compose -f docker-compose.local.yml up tdp-frontend-test -d + - docker compose -f docker-compose.local.yml exec tdp-frontend-test sh -c "npm run test:cov" cypress: desc: Run cypress tests @@ -241,20 +246,20 @@ tasks: desc: Run eslint in the frontend container dir: tdrs-frontend cmds: - - docker-compose -f docker-compose.local.yml up -d tdp-frontend-test --quiet-pull - - docker-compose -f docker-compose.yml exec tdp-frontend-test sh -c "npm run lint" + - docker compose -f docker-compose.local.yml up -d tdp-frontend-test --quiet-pull + - docker compose -f docker-compose.yml exec -T tdp-frontend-test sh -c "npm run lint" frontend-logs: desc: Show and follow frontend web server logs dir: tdrs-frontend cmds: - - docker-compose -f docker-compose.yml logs -f + - docker compose -f docker-compose.yml logs -f frontend-bash: desc: Open a shell in the frontend container dir: tdrs-frontend cmds: - - docker-compose -f docker-compose.yml exec tdp-frontend bash + - docker compose -f docker-compose.yml exec tdp-frontend bash up: desc: Start both frontend and backend web servers diff --git a/docs/Security-Compliance/diagram.png b/docs/Security-Compliance/diagram.png index 95f10ed10..6f367531b 100644 Binary files a/docs/Security-Compliance/diagram.png and b/docs/Security-Compliance/diagram.png differ diff --git a/docs/Technical-Documentation/tech-memos/parsing-log-per-file/parsing-log-per-file.md b/docs/Technical-Documentation/tech-memos/parsing-log-per-file/parsing-log-per-file.md new file mode 100644 index 000000000..5aac5cb82 --- /dev/null +++ b/docs/Technical-Documentation/tech-memos/parsing-log-per-file/parsing-log-per-file.md @@ -0,0 +1,315 @@ +# Parsing log per file upload + +**Audience**: TDP Software Engineers
+**Subject**: Parsing log per file upload
+**Date**: November 4, 2024
+ +## Summary +This technical memorandum discusses the implementation of features to bring more visibility into system behavior during file parsing. This includes: +* Generating and storing a file for logs generated during the parsing run. The log file should be stored in s3, alongside the submitted datafile. + +This memo discusses various implementations and the benefits/drawbacks of each, then provides a suggested implementation taking these factors into consideration. + +## Background +TDP currently uses python's `logging` utility to capture debug messages sent by the application to the terminal. These logs are captured by Cloud.gov, Prometheus, and Sentry. + +* These logs possibly leak sensitive data +* Hard to dig back through logs to file errors associated with a particular upload +* No visibility into differences in logs between parse runs for the same file + +## Out of Scope +Call out what is out of scope for this technical memorandum and should be considered in a different technical memorandum. +* Efficiency - must write logs to file on disk and upload to s3 at the end of the parser run. + * This will have a memory impact and a disk space impact, as well as increase the run time of the parsing process (network upload). + * The singleton solution explored here will additionally increase memory utilization. + * Some mitigation techniques are mentioned, but not implemented. +* Association of the parse log file with a model in the Django Admin Console. + * An MVP of this feature only includes uploading the resulting file to s3, alongside the datafile submission. + * An exploration of this was done in Method/Design Step 6, but is out of scope for this work. + +## Method/Design + +In general, this solution requires two simple parts: + +1. Capture the log output during parsing of a datafile and write it to a file on disk + * This solution utilizes the `FileHandler` already included as part of python's `logging` utility, which we use extensively throughout TDP (including throughout the parser). `FileHandler` allows log output to be written to a file on disk + * [Documentation](https://docs.python.org/3/howto/logging.html#logging-to-a-file) + * Example: + ```python + import logging + + + def setup_logger(name, filename): + handler = logging.FileHandler(filename) + logger = logging.getLogger(name) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + return logger, handler + + + logger, handler = setup_logger('test123', 'test123.log') + logger.info('test123') + logger.warning('asdddddddd3') + logger.error('asdfasdf') + handler.close() + ``` + * This step can be accomplished using a custom-built file logger. However, since `logging` is already used extensively throughout the project, we chose to extend this existing functionality rather than have to replace it for a single use-case. + * This comes with the drawback that `logging.FileHandler` doesn't compress the resulting file by default. We could potentially address this by overriding or creating a custom version of `logging.FileHandler`. + +2. Persist the resulting file once parsing completes + * s3 is a popular file storage location, already in use throughout our application. + * s3 provides familiarity for end users, as well as allowing them to access log files alongside the datafile for which they are associated. + * s3 has a 5TB file limit; large datafiles and log files, as well as frequent reparsing, increase our usage. We may consider implementing an infrequent-access tier bucket for log files if they aren't expected to be downloaded often. + * Once parsing completes and a file containing logs is written to disk, we can use existing tooling in our application to upload the resulting file to s3. An example of this is already implemented in `tdpservice.search_indexes.tasks.export_queryset_to_s3_csv` + * Example + ```python + from botocore.exceptions import ClientError + from tdpservice.data_files.s3_client import S3Client + + s3 = S3Client() + try: + s3.client.upload_file(local_filename, settings.AWS_S3_DATAFILES_BUCKET_NAME, s3_file_path) + except ClientError as e: + logger.error('upload to s3 failed') + ``` + * This simply uploads a local file to s3, using the `boto` library. + * To limit disk usage, we could potentially write logs to s3 on-the-fly as parsing happens using the `smart_open` library. + * https://github.com/piskvorky/smart_open/tree/develop + * This would incur additional network timing/cost as parsing runs, as logs would need to be transmitted to s3 many times during the parse execution, rather than once after. + + +The biggest complicating factor to implementing this feature is allowing functions in multiple modules throughout the app to access the same instance of the logger. In general, instances of `logging` are scoped to the module-level (file-level) via a call to `logging.getLogger(__name__)` (where `__name__` is the module name). This allows all functions within a module to access the same logger instance. In our case, however, an instance of `logging` needs to be scoped to an individual run of the parsing engine: a logger instance is created when a file starts parsing, is used by functions in multiple modules throughout the app, then is destroyed once parsing is completed and the file is transferred to s3. This means a module-scoping of `logging` won't work, and leaves us with a couple options: +* Create a logger instance that gets passed through function parameters to other areas of the parser (functional approach) + ```python + # `tdpservice.scheduling.parser_task.parse` + logger = setup_logger('parse-log', 'parse-log-123.log') + errors = parse_datafile(data_file, dfs, logger) + # `logger` also needs to be passed by `parse_datafile()` to other funcs + ``` + * This requires a lot of passing of variables, creating the opportunity to miss a module or function +* Create a global (singleton) class that holds on to instances of `logging` so that other functions can access them. + ```python + class ParseLogContextManager: + """Caches an instance of logger for use throughout the parse routine.""" + + def __init__(self, logger_instance): + self.logger_instance = None + self.handler_instance = None + + def set_logger_instance(self, logger_name): + """Creates and caches a new instance of logger.""" + logger, handler = setup_logger(logger_name, f'{logger_name}.log') + self.logger_instance = logger + self.handler_instance = handler + + def get_logger_instance(self): + """Returns the cached instance of logger.""" + return self.logger_instance + + def clear_logger_instance(self): + """Closes and clears the parse logger instance.""" + self.handler_instance.close() + # upload to s3 + ``` + * This is easier to implement as it doesn't require modifying every function call in the parser + * This likely has a higher memory impact, though, as an instance of this class will be present in memory for every instance of the application + * More work has to be done to ensure thread-safety if we ever increase the number of worker threads (which could theoretically share an instance of the singleton for multiple concurrent parse runs). This "memoization" is included as part of the implementation details below + +The remainder of this memo will focus on the singleton design pattern as the solution to implement. + +### 1. Create the singleton class and initialize an instance in `settings` + +In `tdpservice.parsers.parse_log_context_manager` (or similar), create the new singleton class + +```python +import logging + +def setup_logger(name, filename): + pass + + +class ParseLogContextManager: + """Caches an instance of logger for use throughout the parse routine.""" + + def __init__(self, logger_instance): + self.loggers = {} +``` + +In `settings.common`, intialize an instance of the singleton. Settings is used because Django already ensures there is only one instance of the settings object per application - it's already a singleton! + +```python +from tdpservice.parsers.parse_log_context_manager import ParseLogContextManager + +PARSE_LOG_CONTEXT_MANAGER = ParseLogContextManager() +``` + + +### 2. Initialize a new logger in `parser_task` + +In `tdpservice.scheduling.parser_task`, import settings +```python +from django.conf import settings +``` + +Then create a new logger instance at the beginning of the `parse` function +```python +settings.PARSE_LOG_CONTEXT_MANAGER.create_logger_instance(datafile_id, reparse_id) +``` + +At the end of the `parse` function, close the instance +```python +settings.PARSE_LOG_CONTEXT_MANAGER.clear_logger_instance(datafile_id, reparse_id) +``` + + +### 3. Implement the remainder of `ParseLogContextManager` + +Now, in `tdpservice.parsers.parse_log_context_manager`, we need to implement `ParseLogContextManager`'s remaining methods +```python +class ParseLogContextManager: + """Caches an instance of logger for use throughout the parse routine.""" + + def __init__(self, logger_instance): + self.loggers = {} + + # can utilize this to avoid having to pass both params every time + # (also accept `logger_name` as the param in functions below) + def _get_logger_name(self, datafile_id, reparse_id): + if reparse_id is None: + return f"parse-log-{datafile_id}" + return f"parse-log-{datafile_id}-{reparse_id}" + + # this implements the memoization technique to store one + # instance of logger per datafile_id/reparse_id + def set_logger_instance(self, datafile_id, reparse_id): + """Creates and caches a new instance of logger.""" + logger_name = self._get_logger_name(datafile_id, reparse_id) + logger, handler = setup_logger(logger_name, f'{logger_name}.log') + + self.loggers[logger_name] = { + 'logger': logger, + 'handler': handler + } + + def get_logger_instance(self, datafile_id, reparse_id): + """Returns the cached instance of logger.""" + logger_name = self._get_logger_name(datafile_id, reparse_id) + return self.loggers[logger_name].['logger'] + + def clear_logger_instance(self, datafile_id, reparse_id): + """Closes and clears the parse logger instance.""" + logger_name = self._get_logger_name(datafile_id, reparse_id) + self.loggers[logger_name].['handler'].close() + # upload to s3 +``` + +`setup_logger` can be implemented like so: +```python +def setup_logger(name, filename): + handler = logging.FileHandler(filename) + logger = logging.getLogger(name) + logger.addHandler(handler) + logger.setLevel(logging.DEBUG) # the min level this will accept logs at + return logger, handler +``` + + +### 4. Use `ParseLogContextManager` throughout the parser + +Currently, modules in `tdpservice.parsers` use `logging` scoped at the module level, with a line like this at the top of the file: +```python +logger = logging.getLogger(__name__) +``` +REMOVE this line. + + +Then, import settings +```python +from django.conf import settings +``` + +Then, INSIDE a function definition (with access to `datafile_id` and `reparse_id`), get the logger from the new singleton class +```python +logger = settings.PARSE_LOG_CONTEXT_MANAGER.get_logger_instance(datafile_id, reparse_id) +``` + +This instance of `logger` can be used the same as the previous instance. This change needs to be made in every module and function where logs should be written to the parse log file. This includes +* `tdpservice.scheduling.parser_task` +* `tdpservice.parsers.parse` +* `tdpservice.parsers.case_consistency_validator` +* `tdpservice.parsers.duplicate_manager` +* `tdpservice.parsers.fields` +* `tdpservice.parsers.row_schema` +* `tdpservice.parsers.util` + + +### 5. Implement the s3 upload + +The s3 upload can be implemented with the following simple code: +```python +from botocore.exceptions import ClientError +from tdpservice.data_files.s3_client import S3Client + +s3 = S3Client() +try: + s3.client.upload_file(local_filename, settings.AWS_S3_DATAFILES_BUCKET_NAME, s3_file_path) +except ClientError as e: + logger.error('upload to s3 failed') +``` + +This just needs to be placed where it makes the most sense. Make sure `handler.close()` has been called (such as is done in `ParseLogContextManager.clear_logger_context`). + +### 6. (Optional) Associate logs to a submission in the Django Admin Console + +To associate the uploaded log file with a submission in the admin console, we can +1. Add the s3 url to the existing `DataFile` or `DataFileSummary` models + * These get wiped out with every reparse, so we would be unable to see log differences between parses of the same submission. +2. Modify `ReparseFileMeta` to be created for every parse run (not just reparses), effectively creating a `ParseFileMeta` + * Addresses reparses wiping out previous logs + * Allows us to track all reparse meta stats for regular parse runs as well + * Requires substantial modification of the reparse command and the parser routine, as well as numerous migrations. +3. Create a new model solely for storing the s3 url of parser logs. + * Easier implementation + * Would have to manually correlate with `ReparseMeta` and `ReparseFileMeta` for reparse visibility. + + +A light exploration of option 2 above was done for this technical memorandum, simply to prove the migrations were allowed. The implementation steps below are incomplete and untested +* Rename the reparse meta models to `ParseMeta` and `ParseFileMeta` (migration is possible) +* Rename the `DataFile` `reparses` to `parses` +* Remove `file.reparses.add(meta_model)` from `tdpservice.search_indexes.management.commands.clean_and_reparse._handle_datafiles` and modify `tdpservice.scheduling.parser_task.parse` to create the meta model instead. + ```python + # `tdpservice.scheduling.parser_task.parse` + file_meta = None + + if reparse_id: + file_meta = ReparseFileMeta.objects.get(data_file_id=data_file_id, reparse_meta_id=reparse_id) + else: + file.reparses.add(meta_model) + file.save() + + file_meta.started_at = timezone.now() + file_meta.save() + ``` + * Add a new field to `ParseFileMeta` to store the s3 logs url + ```python + # end of `tdpservice.scheduling.parser_task.parse` + handler.close() + s3 = S3Client() + try: + s3.client.upload_file(local_filename, settings.AWS_S3_DATAFILES_BUCKET_NAME, f'parsing_logs/{logger_name}.log') + file_meta.logs_uri = f'parsing_logs/{logger_name}.log' + file_meta.save() + # .... the rest + ``` + +## Affected Systems +* Parser routine - everything from the celery task to every called function needs access to `datafile_id` and `reparse_id` (or the generated `logger_name`) so that relevant functions can access the parser context +* Settings - will store a new singleton (global class; has a memory impact) +* Application server will be storing temporary/intermediary log file before it is uploaded to s3 + +## Use and Test cases to consider +* Ensure all types of parser errors are logged to the file + * every level (debug, info, error) + * every class (parser task, parser, case consistency, duplicate management, row schema, fields, transform fields) + * parser exceptions (handled and unhandled) +* Test large file uploads with many errors. Test concurrent uploads of large files. \ No newline at end of file diff --git a/docs/Technical-Documentation/tech-memos/priotitized-errors/prioritized-errors.md b/docs/Technical-Documentation/tech-memos/priotitized-errors/prioritized-errors.md new file mode 100644 index 000000000..931bceb47 --- /dev/null +++ b/docs/Technical-Documentation/tech-memos/priotitized-errors/prioritized-errors.md @@ -0,0 +1,100 @@ +# TDP Prioritized Parser Errors + +**Audience**: TDP Software Engineers
+**Subject**: Prioritized Errors
+**Date**: October 20, 2024
+ +## Summary +This technical memorandum provides a suggested path to implement a set of new requirements OFA has generated to alleviate the sheer number of parser errors generated during a STT's data submission. OFA has indicated that some errors are of a lower priority for STTs to review and correct. Thus, the OFA team has requested that "critical" be assigned to parser errors so that the report STTs receive is filtered down to only the critical errors that must be reviewed and fixed. Regardless of how errors are prioritized, STTs will still retain the ability to see a summary of all errors detected in the error report. + +## Background +Currently, error reports are generated in the TDP backend via the `get_xls_serialized_file` function. This function accepts the serialized queryset of the appropriate `ParserError`s queryset. This function the writes an XLSX file and returns it to the user. Apart from the lack of priotization in the report generated from this function, it also introduces the possibility to cause an out of memory (OOM) error. This can occur because, the Django model serializer brings the entire queryset into memory to serialize it into JSON. Because these ParserError querysets can be very large (hundreds of thousands), we will also alleviate the memory pressure `get_xls_serialized_file` introduces by removing the Django model serializer and make use of queryset pagination. + +## Out of Scope +Current requirements from OFA do not require category two errors to be queryable by value and expected value. That feature is out of scope within the tech memo and would require more design and implementation work. + +## Method/Design +Given the current OFA requirements, we can implement prioritized/critical errors, and memory efficient report generation without too much work. OFA has provided [this OneNote](https://gorafttech.sharepoint.com/:o:/s/TDRSResearchDesign/EnIa1Mn4v7pOskW7BLomXhIBxUMlYLRU_f1C0dxemW7dWw?e=m0rNyI) document which outlines the error types, errors, and fields that are most important/prioritized for STTs to see. + +### Memory Efficient Report Generation +As previously mentioned in the #background section, the `get_xls_serialized_file` introduces a method to serialize parser errors into a XLSX that requires the entire queryset of parser errors to be brought into memory. Because these querysets can be very large, having them in memory regularly kills Gunicorn workers with an OOM error. To remedy the issue, this tech memo suggests updating `get_xls_serialized_file` to not use Django model serializers and instead leverage the power of Django querysets and pagination. To accomplish this, instead of passing a JSON serialized querset to `get_xls_serialized_file`, a standard (un-evaluated) queryset should be passed. Then, the body of the `get_xls_serialized_file` function should be updated appropriately to use a queryset object instead of a JSON object to generate the XLSX spreadsheet. The updates should also include paginating the queryset to avoid bringing the entirety of the queryset into memory at any one time. The code snippet below provides an example of paginating the queryset and writing the appropriate fields of each entry to the XLSX report. + +```python +paginator = Paginator(parser_errors, settings.BULK_CREATE_BATCH_SIZE) +row_idx = 6 +for page in paginator: + for record in page.object_list: + rpt_month_year = str(getattr(record, 'rpt_month_year', None)) + fields_json = getattr(record, 'fields_json', {}) + + worksheet.write(row_idx, 0, record.case_number) + worksheet.write(row_idx, 1, rpt_month_year[:4]) + worksheet.write(row_idx, 2, calendar.month_name[int(rpt_month_year[4:])] if rpt_month_year[4:] else None) + worksheet.write(row_idx, 3, format_error_msg(record.error_message, fields_json)) + worksheet.write(row_idx, 4, record.item_number) + worksheet.write(row_idx, 5, friendly_names(fields_json)) + worksheet.write(row_idx, 6, internal_names(fields_json)) + worksheet.write(row_idx, 7, record.row_number) + worksheet.write(row_idx, 8, str(ParserErrorCategoryChoices(record.error_type).label)) +``` + +The three helper functions: `format_error_msg`, `friendly_names`, `internal_names` used to write the appropriate fields can be seen below. + +```python +def format_error_msg(error_msg, fields_json): + """Format error message.""" + for key, value in fields_json['friendly_name'].items(): + error_msg = error_msg.replace(key, value) if value else error_msg + return error_msg + + +def friendly_names(fields_json): + """Return comma separated string of friendly names.""" + return ','.join([i for i in fields_json['friendly_name'].values()]) + + +def internal_names(fields_json): + """Return comma separated string of internal names.""" + return ','.join([i for i in fields_json['friendly_name'].keys()]) +``` + +### Prioritized/Critical Errors +[This OneNote](https://gorafttech.sharepoint.com/:o:/s/TDRSResearchDesign/EnIa1Mn4v7pOskW7BLomXhIBxUMlYLRU_f1C0dxemW7dWw?e=m0rNyI) is invaluable to the implementation of prioritized errors. Prioritizing errors could be a very large and technically challenging feature involving new migrations, validation/validator refactors, etc... However, this can all be avoided by making a key insight for each of the category two and category three validators by way of OFA's requirements for them. For the category two case, the OneNote document generically specifies category two validation surrounding: Family Affiliation, Citizenship and Closure reason. Further discussion with OFA indicated that it is important/a priority for a STT to see all category two errors encompassing these fields in their entirety. That makes prioritizing these category two errors extremely easy because the need to query those fields by specific values and expected values is not required. The queries below provide a complete implementation to query all category two errors encompassing those fields. + +```python +# All cat2 errors associated with FAMILY_AFFILIATION and (CITIZENSHIP_STATUS or CLOSURE_REASON) +second_field = "CITIZENSHIP_STATUS" if is_active else "CLOSURE_REASON" +field_query = Q(field_name="FAMILY_AFFILIATION") | Q(field_name=second_field) +filtered_errors = filtered_errors.union(all_errors.filter( + field_query, + error_type=ParserErrorCategoryChoices.FIELD_VALUE + )) +``` + +The key insight for the category three case is less obvious. Looking at the OneNote, it seems as though we might need to query errors based on field name(s), expected value and actual value. However, for category three errors that information is encoded into the error by its existence. For example, the OneNote indicates that a high priority error a STT should have included in their report is `If fam affil = 1 then SSN must be valid `. This exact error and it's values (expected and given) can be uniquely found in any of the active or closed case record schemas. E.g.: + +```python +category3.ifThenAlso( + condition_field_name='FAMILY_AFFILIATION', + condition_function=category3.isEqual(1), + result_field_name='SSN', + result_function=category3.validateSSN(), +) +``` + +The existence of this error, with these fields, is uniquely defined in the appropriate schemas. The same can be said for the remaining critical category three errors. Thus, to define the high priority errors we need only know the required field(s) and their error type. Given those pieces of information, queries of the form below can be used to filter STT error reports to only show the highest priority errors. + +```python +errors.filter(fields_json__friendly_name__has_keys=[FIELD_NAME, FIELD_NAME, ETC...], + error_type=ParserErrorCategoryChoices.VALUE_CONSISTENCY) +``` + +By unioning the category two queries from above with the remainder of the category three queries, a queryset containing only the critical errors can be generated and subsequently passed to `get_xls_serialized_file` generate and return the prioritized error report to the requesting STT. + +## Affected Systems +- TDP backend +- TDP frontend: latency time incurred while generating report + +## Use and Test cases to consider +- Admin and STT receive the same report +- Existing tests leveraging ParserError querysets are updated and re-validated for correctness diff --git a/scripts/cf-check.sh b/scripts/cf-check.sh index 5a2c84be7..684e38492 100755 --- a/scripts/cf-check.sh +++ b/scripts/cf-check.sh @@ -3,8 +3,15 @@ set -e if command -v cf /dev/null 2>&1; then echo The command cf is available else - apt-get update - apt-get install wget gnupg2 apt-transport-https + if [[ -f /bin/terraform ]]; then + echo "This is our Terraform executor, Alpine Linux v3.13" + apk update + apk add curl jq + + else + apt-get update + apt-get install curl wget gnupg2 apt-transport-https jq + fi NEXUS_ARCHIVE="cf7-cli_7.7.13_linux_x86-64.tgz" NEXUS_URL="https://tdp-nexus.dev.raftlabs.tech/repository/tdp-bin/cloudfoundry-cli/$NEXUS_ARCHIVE" @@ -12,5 +19,4 @@ else tar xzf $NEXUS_ARCHIVE mv ./cf7 /usr/local/bin/cf cf --version - fi diff --git a/scripts/deploy-backend.sh b/scripts/deploy-backend.sh index c4ab36e2b..8a66babcc 100755 --- a/scripts/deploy-backend.sh +++ b/scripts/deploy-backend.sh @@ -118,17 +118,6 @@ prepare_promtail() { popd } -update_plg_networking() { - # Need to switch the space after deploy since we're not always in dev space to handle specific networking from dev - # PLG apps to the correct backend app. - cf target -o hhs-acf-ofa -s tanf-dev - cf add-network-policy prometheus "$CGAPPNAME_BACKEND" -s "$CF_SPACE" --protocol tcp --port 8080 - cf target -o hhs-acf-ofa -s "$CF_SPACE" - - # Promtial needs to send logs to Loki - cf add-network-policy "$CGAPPNAME_BACKEND" loki -s "tanf-dev" --protocol tcp --port 8080 -} - update_backend() { cd tdrs-backend || exit @@ -167,9 +156,6 @@ update_backend() # Add network policy to allow frontend to access backend cf add-network-policy "$CGAPPNAME_FRONTEND" "$CGAPPNAME_BACKEND" --protocol tcp --port 8080 - # Add PLG routing - update_plg_networking - if [ "$CF_SPACE" = "tanf-prod" ]; then # Add network policy to allow backend to access tanf-prod services cf add-network-policy "$CGAPPNAME_BACKEND" clamav-rest --protocol tcp --port 9000 diff --git a/scripts/deploy-frontend.sh b/scripts/deploy-frontend.sh index fd7206929..ddb0bcdea 100755 --- a/scripts/deploy-frontend.sh +++ b/scripts/deploy-frontend.sh @@ -13,7 +13,6 @@ CF_SPACE=${5} ENVIRONMENT=${6} env=${CF_SPACE#"tanf-"} -frontend_app_name=$(echo $CGHOSTNAME_FRONTEND | cut -d"-" -f3) # Update the Kibana name to include the environment KIBANA_BASE_URL="${CGAPPNAME_KIBANA}-${env}.apps.internal" diff --git a/scripts/git-secrets-check.sh b/scripts/git-secrets-check.sh index f371f303e..dcfcd7821 100755 --- a/scripts/git-secrets-check.sh +++ b/scripts/git-secrets-check.sh @@ -1,29 +1,57 @@ #!/bin/bash set -e +islocal=$1 -if [ -d /tmp/git-secrets ]; then +if [[ $(uname -s) == "Darwin" ]]; then # Mac OSX check + gs_path="/usr/local/bin" +else # Linux, we're likely running in CircleCI + gs_path="/usr/sbin" +fi + +if [ -f "$gs_path/git-secrets" ]; then echo The command git-secrets is available else echo The command git-secrets is not available, cloning... git clone git@github.com:awslabs/git-secrets.git /tmp/git-secrets/ if [ -f /tmp/git-secrets/git-secrets ]; then - echo "Moving git secrets into PATH" - sudo cp /tmp/git-secrets/git-secrets /usr/sbin/ + + echo "Moving git secrets into PATH" + sudo cp /tmp/git-secrets/git-secrets $gs_path/ + $gs_path/git-secrets --install -f + rm -rf /tmp/git-secrets #cleanup of clone dir else - echo "Git clone failed for git-secrets" + echo "Git clone failed for git-secrets" fi fi # ensure we have correct configs in place -[ -f ../.gitconfig ] -cat .gitconfig >> .git/config -echo "Git-Secrets Config loaded:" -grep -A10 secrets .git/config -# grep will return non-zero code if nothing found, failing the build +if [ -f .gitconfig ]; then + cat .gitconfig >> .git/config + echo "Git-Secrets Config loaded:" + grep -A10 secrets .git/config + # grep will return non-zero code if nothing found, failing the build +fi -echo "git-secrets-check.sh: Scanning repo ..." -git secrets --scan -r ../ -retVal=$? +if [ $islocal ]; then + echo "git-secrets-check.sh: Scanning files staged for commit ..." + setopt shwordsplit + staged_files=$(git diff --cached --name-status | grep -vE "D|^R[0-9]+"| cut -f2 | xargs) + + for filename in $staged_files; do + echo "git-secrets-check.sh: Scanning $filename ..." + git secrets --scan $filename + retVal=$? + if [[ $retVal -ne 0 ]]; then + echo "git-secrets found issues, prevented commit." + return 1 + fi + done + +else + echo "git-secrets-check.sh: Scanning repo ..." + git secrets --scan -r ../ + retVal=$? +fi # if there are issues, they will be listed then script will abort here if [[ $retVal -eq 0 ]]; then @@ -32,4 +60,3 @@ else echo "git-secrets-check.sh: Issues found with return code $retVal, please remediate." return 1 fi - diff --git a/scripts/sudo-check.sh b/scripts/sudo-check.sh index 5438bcef9..a604430b7 100755 --- a/scripts/sudo-check.sh +++ b/scripts/sudo-check.sh @@ -4,6 +4,12 @@ if command -v sudo /dev/null 2>&1; then echo The command sudo is available else echo The command sudo is not available installing... - apt-get update && apt-get install -y sudo - ls -al /bin/sh && sudo rm /bin/sh && sudo ln -s /bin/bash /bin/sh && ls -al /bin/sh + if [ -f /bin/terraform ]; then + echo "This is our Terraform executor, Alpine Linux v3.13" + apk update + apk add sudo + else + apt-get update && apt-get install -y sudo + ls -al /bin/sh && sudo rm /bin/sh && sudo ln -s /bin/bash /bin/sh && ls -al /bin/sh + fi fi \ No newline at end of file diff --git a/tdrs-backend/Dockerfile b/tdrs-backend/Dockerfile index 34ef5dd9b..6b908eee6 100644 --- a/tdrs-backend/Dockerfile +++ b/tdrs-backend/Dockerfile @@ -9,7 +9,7 @@ ENV DJANGO_SETTINGS_MODULE=tdpservice.settings.local ENV DJANGO_CONFIGURATION=Local # Allows docker to cache installed dependencies between builds COPY Pipfile Pipfile.lock /tdpapp/ -COPY sources.list /etc/apt/sources.list +# COPY sources.list /etc/apt/sources.list WORKDIR /tdpapp/ # Download latest listing of available packages: RUN apt-get -y update diff --git a/tdrs-backend/docker-compose.yml b/tdrs-backend/docker-compose.yml index a66fed7a5..07d2b502a 100644 --- a/tdrs-backend/docker-compose.yml +++ b/tdrs-backend/docker-compose.yml @@ -101,6 +101,15 @@ services: command: --config /usr/share/grafana/conf/custom.ini depends_on: - grafana-pg + + alertmanager: + restart: always + image: prom/alertmanager:v0.27.0 + ports: + - 9093:9093 + volumes: + - ./plg/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml + command: --config.file=/etc/alertmanager/alertmanager.yml --storage.path=/alertmanager --log.level=debug --web.external-url=http://localhost:3000/alerts --web.route-prefix=/alerts --cluster.listen-address="" prometheus: restart: always @@ -109,12 +118,14 @@ services: - 9090:9090 volumes: - ./plg/prometheus/prometheus.local.yml:/etc/prometheus/prometheus.yml - - ./plg/prometheus/django_rules.yml:/etc/prometheus/django_rules.yml + - ./plg/prometheus/django-rules.yml:/etc/prometheus/django-rules.yml + - ./plg/prometheus/alerts.local.yml:/etc/prometheus/alerts.yml - prometheus_data:/prometheus depends_on: - web - celery-exporter - postgres-exporter + - alertmanager promtail: restart: always diff --git a/tdrs-backend/plg/README.md b/tdrs-backend/plg/README.md new file mode 100644 index 000000000..f0438e8f4 --- /dev/null +++ b/tdrs-backend/plg/README.md @@ -0,0 +1,6 @@ +### Grafana Auth and RBAC Config +Grafana is accessible by any frontend app on a private route to users who have the correct role. The Grafana UI is not be accessible to any user or application unless they are routed to it via a frontend app. Grafana is configured to require user and password authentication. Having the extra layer of authentication is required because the roles defined in Grafana are not in alignment with the roles TDP defines. Assigning users to appropriate role and teams in Grafana allows for least privilege access to any information that Grafana might be able to display. + +Grafana has three roles: `Admin`, `Editor`, and `Viewer`. We have also defined two teams (groups) in Grafana: `OFA` and `Raft` and several users. The teams are how we manage least privilege to Grafana's resources. Upon creation, all users are given one of the base roles. All Raft dev user accounts are given read only access (`Viewer`) to Grafana and OFA has a user account(s) associated with each of the roles. All users who are outside of OFA should always be assigned the `Viewer` role to maintain least privilege. All dashboards in Grafana are viewable by team as opposed to individual users/roles. Dashboard permissions are configured per dashboard and each team is given read only access to the appropriate dashboards. The `ofa-admin` user is the only direct user given access to resources. This account is given exclusive admin rights to all of Grafana. + +All Grafana administration is handled under the `Administration` drop down in the hamburger menu which is only accessible to `Admin` users. Users can be created, assigned a role, and then associated with a team. As new dashboards are added to Grafana their permissions need to be configured for least privilege by going to Dashboards->->Settings->Permissions. The admin can use other dashboard permission configurations to help finish the configuration. diff --git a/tdrs-backend/plg/alertmanager/alertmanager.yml b/tdrs-backend/plg/alertmanager/alertmanager.yml new file mode 100644 index 000000000..9414062ae --- /dev/null +++ b/tdrs-backend/plg/alertmanager/alertmanager.yml @@ -0,0 +1,71 @@ +global: + # The smarthost and SMTP sender used for mail notifications. + smtp_smarthost: 'smtp.sendgrid.net:587' + smtp_from: 'no-reply@tanfdata.acf.hhs.gov' + smtp_auth_username: 'apikey' + smtp_auth_password: '{{ sendgrid_api_key }}' + +# The directory from which notification templates are read. +templates: + - '/etc/alertmanager/template/*.tmpl' + +# The root route on which each incoming alert enters. +route: + # The labels by which incoming alerts are grouped together. For example, + # multiple alerts coming in for cluster=A and alertname=LatencyHigh would + # be batched into a single group. + group_by: ['alertname', 'env', 'service'] + + # When a new group of alerts is created by an incoming alert, wait at + # least 'group_wait' to send the initial notification. + # This way ensures that you get multiple alerts for the same group that start + # firing shortly after another are batched together on the first + # notification. + group_wait: 30s + + # When the first notification was sent, wait 'group_interval' to send a batch + # of new alerts that started firing for that group. + group_interval: 5m + + # If an alert has successfully been sent, wait 'repeat_interval' to + # resend them. + repeat_interval: 5m + + # A default receiver + receiver: admin-team-emails + + # All the above attributes are inherited by all child routes and can + # overwritten on each. + + # The child route trees. + routes: + # This routes performs a regular expression match on alert labels to + # catch alerts that are related to a list of services. + - matchers: + - alertname=~"UpTime" + receiver: dev-team-emails + group_wait: 30m + +# Inhibition rules allow to mute a set of alerts given that another alert is +# firing. +# We use this to mute any warning-level notifications if the same alert is +# already critical. +inhibit_rules: + - source_matchers: [severity="CRITICAL"] + target_matchers: [severity="WARNING"] + # Apply inhibition if the alertname is the same. + # CAUTION: + # If all label names listed in `equal` are missing + # from both the source and target alerts, + # the inhibition rule will apply! + equal: [alertname, env, service] + + +receivers: + - name: 'admin-team-emails' + email_configs: + - to: '{{ admin_team_emails }}' + + - name: 'dev-team-emails' + email_configs: + - to: '{{ dev_team_emails }}' diff --git a/tdrs-backend/plg/alertmanager/manifest.yml b/tdrs-backend/plg/alertmanager/manifest.yml new file mode 100644 index 000000000..80067f717 --- /dev/null +++ b/tdrs-backend/plg/alertmanager/manifest.yml @@ -0,0 +1,10 @@ +version: 1 +applications: + - name: alertmanager + memory: 512M + disk_quota: 1G + instances: 1 + command: | + mkdir /tmp + buildpacks: + - https://github.com/cloudfoundry/binary-buildpack diff --git a/tdrs-backend/plg/deploy.sh b/tdrs-backend/plg/deploy.sh index 11adaebdd..c411f5457 100755 --- a/tdrs-backend/plg/deploy.sh +++ b/tdrs-backend/plg/deploy.sh @@ -1,13 +1,21 @@ #!/bin/bash set -e +DEV_BACKEND_APPS=("tdp-backend-raft" "tdp-backend-qasp" "tdp-backend-a11y") +STAGING_BACKEND_APPS=("tdp-backend-develop" "tdp-backend-staging") +PROD_BACKEND="tdp-backend-prod" + +DEV_FRONTEND_APPS=("tdp-frontend-raft" "tdp-frontend-qasp" "tdp-frontend-a11y") +STAGING_FRONTEND_APPS=("tdp-frontend-develop" "tdp-frontend-staging") +PROD_FRONTEND="tdp-frontend-prod" + help() { echo "Deploy the PLG stack or a Postgres exporter to the Cloud Foundry space you're currently authenticated in." echo "Syntax: deploy.sh [-h|a|p|u|d]" echo "Options:" echo "h Print this help message." echo "a Deploy the entire PLG stack." - echo "p Deploy a postgres exporter. Requires -u and -d" + echo "p Deploy a postgres exporter, expects the environment name (dev, staging, production) to be passed with switch. Requires -u and -d" echo "u Requires -p. The database URI the exporter should connect with." echo "d The Cloud Foundry service name of the RDS instance. Should be included with all deployments." echo @@ -19,6 +27,7 @@ deploy_pg_exporter() { cp manifest.yml $MANIFEST APP_NAME="pg-exporter-$1" + EXPORTER_SPACE=$(cf target | grep -Eo "tanf(.*)") yq eval -i ".applications[0].name = \"$APP_NAME\"" $MANIFEST yq eval -i ".applications[0].env.DATA_SOURCE_NAME = \"$2\"" $MANIFEST @@ -27,9 +36,10 @@ deploy_pg_exporter() { cf push --no-route -f $MANIFEST -t 180 --strategy rolling cf map-route $APP_NAME apps.internal --hostname $APP_NAME - # Add policy to allow prometheus to talk to pg-exporter - # TODO: this logic needs to be updated to allow routing accross spaces based on where we want PLG to live. - cf add-network-policy prometheus $APP_NAME -s "tanf-dev" --protocol tcp --port 9187 + # Add policy to allow prometheus to talk to pg-exporter regardless of environment + cf target -o hhs-acf-ofa -s tanf-prod + cf add-network-policy prometheus $APP_NAME -s "$EXPORTER_SPACE" --protocol tcp --port 9187 + cf target -o hhs-acf-ofa -s "$EXPORTER_SPACE" rm $MANIFEST popd } @@ -47,13 +57,21 @@ deploy_grafana() { yq eval -i ".applications[0].services[0] = \"$1\"" $MANIFEST cf push --no-route -f $MANIFEST -t 180 --strategy rolling - # cf map-route $APP_NAME apps.internal --hostname $APP_NAME - # Give Grafana a public route for now. Might be able to swap to internal route later. - cf map-route "$APP_NAME" app.cloud.gov --hostname "${APP_NAME}" + cf map-route $APP_NAME apps.internal --hostname $APP_NAME # Add policy to allow grafana to talk to prometheus and loki cf add-network-policy $APP_NAME prometheus --protocol tcp --port 8080 cf add-network-policy $APP_NAME loki --protocol tcp --port 8080 + + # Add network policies to allow grafana to talk to all frontend apps in all environments + for app in ${DEV_FRONTEND_APPS[@]}; do + cf add-network-policy "grafana" $app -s "tanf-dev" --protocol tcp --port 80 + done + for app in ${STAGING_FRONTEND_APPS[@]}; do + cf add-network-policy "grafana" $app -s "tanf-staging" --protocol tcp --port 80 + done + cf add-network-policy "grafana" $PROD_FRONTEND --protocol tcp --port 80 + rm $DATASOURCES rm $MANIFEST popd @@ -63,6 +81,16 @@ deploy_prometheus() { pushd prometheus cf push --no-route -f manifest.yml -t 180 --strategy rolling cf map-route prometheus apps.internal --hostname prometheus + + # Add network policies to allow prometheus to talk to all backend apps in all environments + for app in ${DEV_BACKEND_APPS[@]}; do + cf add-network-policy prometheus $app -s "tanf-dev" --protocol tcp --port 8080 + done + for app in ${STAGING_BACKEND_APPS[@]}; do + cf add-network-policy prometheus $app -s "tanf-staging" --protocol tcp --port 8080 + done + cf add-network-policy prometheus $PROD_BACKEND --protocol tcp --port 8080 + popd } @@ -73,6 +101,25 @@ deploy_loki() { popd } +setup_extra_net_pols() { + # Add network policies to allow frontend/backend to talk to grafana/loki + cf target -o hhs-acf-ofa -s tanf-dev + for i in ${!DEV_BACKEND_APPS[@]}; do + cf add-network-policy ${DEV_FRONTEND_APPS[$i]} grafana -s tanf-prod --protocol tcp --port 8080 + cf add-network-policy ${DEV_BACKEND_APPS[$i]} loki -s tanf-prod --protocol tcp --port 8080 + done + + cf target -o hhs-acf-ofa -s tanf-staging + for i in ${!STAGING_BACKEND_APPS[@]}; do + cf add-network-policy ${STAGING_FRONTEND_APPS[$i]} grafana -s tanf-prod --protocol tcp --port 8080 + cf add-network-policy ${STAGING_BACKEND_APPS[$i]} loki -s tanf-prod --protocol tcp --port 8080 + done + + cf target -o hhs-acf-ofa -s tanf-prod + cf add-network-policy $PROD_FRONTEND grafana -s tanf-prod --protocol tcp --port 8080 + cf add-network-policy $PROD_BACKEND loki -s tanf-prod --protocol tcp --port 8080 +} + err_help_exit() { echo $1 echo @@ -97,6 +144,7 @@ while getopts ":hap:u:d:" option; do DB_SERVICE_NAME=$OPTARG;; \?) # Invalid option echo "Error: Invalid option" + help exit;; esac done @@ -107,13 +155,14 @@ if [ "$#" -eq 0 ]; then fi pushd "$(dirname "$0")" -if [ "$DB_URI" == "" ] || [ "$DB_SERVICE_NAME" == "" ]; then +if [ "$DB_SERVICE_NAME" == "" ]; then err_help_exit "Error: you must include a database service name." fi if [ "$DEPLOY" == "plg" ]; then deploy_prometheus deploy_loki - deploy_grafana + deploy_grafana $DB_SERVICE_NAME + setup_extra_net_pols fi if [ "$DEPLOY" == "pg-exporter" ]; then if [ "$DB_URI" == "" ]; then diff --git a/tdrs-backend/plg/grafana/custom.ini b/tdrs-backend/plg/grafana/custom.ini index 7d8be7d57..fef040207 100644 --- a/tdrs-backend/plg/grafana/custom.ini +++ b/tdrs-backend/plg/grafana/custom.ini @@ -40,7 +40,7 @@ http_addr = http_port = 8080 # The public facing domain name used to access grafana from a browser -domain = app.cloud.gov +domain = grafana.apps.internal # Redirect to correct domain if host header does not match domain # Prevents DNS rebinding attacks @@ -553,10 +553,10 @@ login_cookie_name = grafana_session disable_login = false # The maximum lifetime (duration) an authenticated user can be inactive before being required to login at next visit. Default is 7 days (7d). This setting should be expressed as a duration, e.g. 5m (minutes), 6h (hours), 10d (days), 2w (weeks), 1M (month). The lifetime resets at each successful token rotation (token_rotation_interval_minutes). -login_maximum_inactive_lifetime_duration = +login_maximum_inactive_lifetime_duration = 30m # The maximum lifetime (duration) an authenticated user can be logged in since login time before being required to login. Default is 30 days (30d). This setting should be expressed as a duration, e.g. 5m (minutes), 6h (hours), 10d (days), 2w (weeks), 1M (month). -login_maximum_lifetime_duration = +login_maximum_lifetime_duration = 1d # How often should auth tokens be rotated for authenticated users when being active. The default is each 10 minutes. token_rotation_interval_minutes = 10 diff --git a/tdrs-backend/plg/grafana/dashboards/dev_logs_dashboard.json b/tdrs-backend/plg/grafana/dashboards/dev_logs_dashboard.json new file mode 100644 index 000000000..7fc27f8d5 --- /dev/null +++ b/tdrs-backend/plg/grafana/dashboards/dev_logs_dashboard.json @@ -0,0 +1,113 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Dashboard allowing log visualization", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "L8E80F9AEF21F6940" + }, + "gridPos": { + "h": 28, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": true, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "L8E80F9AEF21F6940" + }, + "editorMode": "code", + "expr": "{job=~\"$job\"}", + "queryType": "range", + "refId": "A" + } + ], + "title": "Job Logs", + "type": "logs" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "Django", + "Logs", + "Loki" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "loki", + "uid": "L8E80F9AEF21F6940" + }, + "definition": "", + "description": "Filter logs by job.", + "hide": 0, + "includeAll": true, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": { + "label": "job", + "refId": "LokiVariableQueryEditor-VariableQuery", + "stream": "", + "type": 1 + }, + "refresh": 1, + "regex": "^(?!.*[-]prod$).*$", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Dev Logs", + "uid": "cdyz6flmh0ttsy", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/tdrs-backend/plg/grafana/dashboards/logs_dashboard.json b/tdrs-backend/plg/grafana/dashboards/logs_dashboard.json index 6843e5a85..ef2c34f56 100644 --- a/tdrs-backend/plg/grafana/dashboards/logs_dashboard.json +++ b/tdrs-backend/plg/grafana/dashboards/logs_dashboard.json @@ -19,7 +19,6 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 4, "links": [], "panels": [ { @@ -56,7 +55,7 @@ "refId": "A" } ], - "title": "Logs", + "title": "Job Logs", "type": "logs" } ], @@ -102,7 +101,7 @@ ] }, "time": { - "from": "now-3h", + "from": "now-24h", "to": "now" }, "timepicker": {}, diff --git a/tdrs-backend/plg/grafana/manifest.yml b/tdrs-backend/plg/grafana/manifest.yml index 2f796535f..1d6be5f3a 100644 --- a/tdrs-backend/plg/grafana/manifest.yml +++ b/tdrs-backend/plg/grafana/manifest.yml @@ -5,12 +5,12 @@ applications: disk_quota: 2G instances: 1 env: - GF_PATHS_PROVISIONING: "/conf/provisioning" + GF_PATHS_PROVISIONING: "conf/provisioning" GF_PATHS_CONFIG: "/home/vcap/app/custom.ini" GF_PATHS_HOME: "/home/vcap/app/grafana-v11.2.0" GF_PATHS_DATA: "/home/vcap/app/data" GF_PATHS_LOGS: "/home/vcap/app/logs" - GF_PATHS_PLUGINS: "/conf/provisioning/plugins" + GF_PATHS_PLUGINS: "conf/provisioning/plugins" GF_SERVER_HTTP_PORT: 8080 GF_DATABASE_TYPE: postgres GF_DATABASE_SSL_MODE: require diff --git a/tdrs-backend/plg/loki/manifest.yml b/tdrs-backend/plg/loki/manifest.yml index ab0d5d532..3f747daf4 100644 --- a/tdrs-backend/plg/loki/manifest.yml +++ b/tdrs-backend/plg/loki/manifest.yml @@ -1,7 +1,7 @@ version: 1 applications: - name: loki - memory: 512M + memory: 1G disk_quota: 7G instances: 1 command: | diff --git a/tdrs-backend/plg/prometheus/alerts.local.yml b/tdrs-backend/plg/prometheus/alerts.local.yml new file mode 100644 index 000000000..99183c544 --- /dev/null +++ b/tdrs-backend/plg/prometheus/alerts.local.yml @@ -0,0 +1,39 @@ +groups: + - name: database.alerts + rules: + - alert: LocalDatabaseDown + expr: last_over_time(pg_up{job="postgres"}[1m]) == 0 + for: 1m + labels: + severity: CRITICAL + annotations: + summary: "The {{ $labels.service }} service is down." + description: "The {{ $labels.service }} service in the {{ $labels.env }} environment has been down for more than 1 minute." + - name: backend.alerts + rules: + - alert: LocalBackendDown + expr: last_over_time(up{job=~"tdp-backend.*"}[1m]) == 0 + for: 1m + labels: + severity: ERROR + annotations: + summary: "The {{ $labels.service }} service is down." + description: "The {{ $labels.service }} service in the {{ $labels.env }} environment has been down for more than 1 minute." + - name: plg.alerts + rules: + - alert: LocalLokiDown + expr: last_over_time(up{job="loki"}[1m]) == 0 + labels: + severity: ERROR + annotations: + summary: "The {{ $labels.service }} service is down." + description: "The {{ $labels.service }} service in the {{ $labels.env }} environment has been down for more than 1 minute." + - name: app.alerts + rules: + - alert: UpTime + expr: avg_over_time(up[1m]) < 0.95 + labels: + severity: WARNING + annotations: + summary: "The {{ $labels.service }} service has a uptime warning." + description: "The {{ $labels.service }} service in the {{ $labels.env }} environment is not maintaining 95% uptime." diff --git a/tdrs-backend/plg/prometheus/alerts.yml b/tdrs-backend/plg/prometheus/alerts.yml new file mode 100644 index 000000000..affe54498 --- /dev/null +++ b/tdrs-backend/plg/prometheus/alerts.yml @@ -0,0 +1,73 @@ +groups: + - name: database.alerts + rules: + - alert: DevDatabaseDown + expr: last_over_time(pg_up{job="postgres-dev"}[1m]) == 0 + labels: + severity: ERROR + annotations: + summary: "The {{ $labels.service }} service is down." + description: "The {{ $labels.service }} service in the {{ $labels.env }} environment has been down for more than 1 minute." + - alert: StagingDatabaseDown + expr: last_over_time(pg_up{job="postgres-staging"}[1m]) == 0 + labels: + severity: ERROR + annotations: + summary: "The {{ $labels.service }} service is down." + description: "The {{ $labels.service }} service in the {{ $labels.env }} environment has been down for more than 1 minute." + - alert: ProductionDatabaseDown + expr: last_over_time(pg_up{job="postgres-production"}[1m]) == 0 + labels: + severity: CRITICAL + annotations: + summary: "The {{ $labels.service }} service is down." + description: "The {{ $labels.service }} service in the {{ $labels.env }} environment has been down for more than 1 minute." + - name: backend.alerts + rules: + - alert: DevEnvironmentBackendDown + expr: last_over_time(up{job=~"tdp-backend.*", job!~".*prod", job!~".*staging"}[5m]) == 0 + labels: + severity: ERROR + annotations: + summary: "The {{ $labels.service }} service is down." + description: "The {{ $labels.service }} service in the {{ $labels.env }} environment has been down for more than 5 minutes." + - alert: StagingBackendDown + expr: last_over_time(up{job=~"tdp-backend-staging""}[1m]) == 0 + labels: + severity: ERROR + annotations: + summary: "The {{ $labels.service }} service is down." + description: "The {{ $labels.service }} service in the {{ $labels.env }} environment has been down for more than 1 minute." + - alert: ProductionBackendDown + expr: last_over_time(up{job=~"tdp-backend-prod"}[1m]) == 0 + labels: + severity: CRITICAL + annotations: + summary: "The {{ $labels.service }} service is down." + description: "The {{ $labels.service }} service in the {{ $labels.env }} environment has been down for more than 1 minute." + - name: plg.alerts + rules: + - alert: LokiDown + expr: last_over_time(up{job="loki"}[1m]) == 0 + labels: + severity: ERROR + annotations: + summary: "The {{ $labels.service }} service is down." + description: "The {{ $labels.service }} service in the {{ $labels.env }} environment has been down for more than 1 minute." + - alert: GrafanaDown + expr: last_over_time(up{job="grafana"}[1m]) == 0 + labels: + severity: ERROR + annotations: + summary: "The {{ $labels.service }} service is down." + description: "The {{ $labels.service }} service in the {{ $labels.env }} environment has been down for more than 1 minute." + - name: app.alerts + rules: + - alert: UpTime + expr: avg_over_time(up[1d]) < 0.95 + for: 30m + labels: + severity: WARNING + annotations: + summary: "The {{ $labels.service }} service has a uptime warning." + description: "The {{ $labels.service }} service in the {{ $labels.env }} environment is not maintaining 95% uptime." diff --git a/tdrs-backend/plg/prometheus/django_rules.yml b/tdrs-backend/plg/prometheus/django-rules.yml similarity index 100% rename from tdrs-backend/plg/prometheus/django_rules.yml rename to tdrs-backend/plg/prometheus/django-rules.yml diff --git a/tdrs-backend/plg/prometheus/prometheus.local.yml b/tdrs-backend/plg/prometheus/prometheus.local.yml index b9d8256b1..8b0a4517d 100644 --- a/tdrs-backend/plg/prometheus/prometheus.local.yml +++ b/tdrs-backend/plg/prometheus/prometheus.local.yml @@ -7,18 +7,19 @@ global: # Alertmanager configuration alerting: alertmanagers: - - static_configs: + - path_prefix: /alerts + static_configs: - targets: - # - alertmanager:9093 + - alertmanager:9093 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: - - "django_rules.yml" + - "django-rules.yml" + - "alerts.yml" # A scrape configuration containing exactly one endpoint to scrape: # Here it's Prometheus itself. scrape_configs: - # The job name is added as a label `job=` to any timeseries scraped from this config. - job_name: "prometheus" static_configs: - targets: ["localhost:9090"] @@ -27,11 +28,35 @@ scrape_configs: metrics_path: "/prometheus/metrics" static_configs: - targets: ["web:8080"] + labels: + service: "tdp-backend" + env: "local" - job_name: "celery" static_configs: - targets: ["celery-exporter:9540"] + labels: + service: "celery" + env: "local" - job_name: postgres static_configs: - targets: ["postgres-exporter:9187"] + labels: + service: "postgres" + env: "local" + + - job_name: loki + static_configs: + - targets: ["loki:3100"] + labels: + service: "loki" + env: "local" + + - job_name: grafana + metrics_path: /grafana/metrics + static_configs: + - targets: ["grafana:9400"] + labels: + service: "grafana" + env: "local" diff --git a/tdrs-backend/plg/prometheus/prometheus.yml b/tdrs-backend/plg/prometheus/prometheus.yml index a8afaaa38..66e35c519 100644 --- a/tdrs-backend/plg/prometheus/prometheus.yml +++ b/tdrs-backend/plg/prometheus/prometheus.yml @@ -6,24 +6,31 @@ global: # Alertmanager configuration alerting: alertmanagers: - - static_configs: + - path_prefix: /alerts + static_configs: - targets: - # - alertmanager:9093 + # - alertmanager.apps.internal:8080 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'. rule_files: - - "django_rules.yml" + - "django-rules.yml" scrape_configs: # The job name is added as a label `job=` to any timeseries scraped from this config. - job_name: "prometheus" static_configs: - targets: ["localhost:8080"] + labels: + service: "prometheus" + env: "production" - job_name: "tdp-backend-raft" metrics_path: "/prometheus/metrics" static_configs: - targets: ["tdp-backend-raft.apps.internal:8080"] + labels: + service: "tdp-backend" + env: "dev" - job_name: "tdp-backend-qasp" metrics_path: "/prometheus/metrics" @@ -34,29 +41,58 @@ scrape_configs: metrics_path: "/prometheus/metrics" static_configs: - targets: ["tdp-backend-develop.apps.internal:8080"] + labels: + service: "tdp-backend" + env: "dev" - job_name: "tdp-backend-staging" metrics_path: "/prometheus/metrics" static_configs: - targets: ["tdp-backend-staging.apps.internal:8080"] + labels: + service: "tdp-backend" + env: "staging" - job_name: "tdp-backend-prod" metrics_path: "/prometheus/metrics" static_configs: - targets: ["tdp-backend-prod.apps.internal:8080"] - - - job_name: "celery-exporter-raft" - static_configs: - - targets: ["celery-exporter-raft.apps.internal:9540"] + labels: + service: "tdp-backend" + env: "production" - job_name: postgres-dev static_configs: - targets: ["pg-exporter-dev.apps.internal:9187"] + labels: + service: "postgres" + env: "dev" - job_name: postgres-staging static_configs: - targets: ["pg-exporter-staging.apps.internal:9187"] + labels: + service: "postgres" + env: "staging" - job_name: postgres-production static_configs: - targets: ["pg-exporter-production.apps.internal:9187"] + labels: + service: "postgres" + env: "production" + + - job_name: loki + static_configs: + - targets: ["loki.apps.internal:3100"] + labels: + service: "loki" + env: "production" + + - job_name: grafana + metrics_path: /grafana/metrics + static_configs: + - targets: ["grafana.app.cloud.gov:9400"] + labels: + service: "grafana" + env: "production" diff --git a/tdrs-backend/plg/promtail/config.local.yml b/tdrs-backend/plg/promtail/config.local.yml index 9cb617c11..dc6eb0da4 100644 --- a/tdrs-backend/plg/promtail/config.local.yml +++ b/tdrs-backend/plg/promtail/config.local.yml @@ -15,19 +15,26 @@ scrape_configs: - targets: - localhost labels: - job: varlogs + job: varlogs-local __path__: /var/log/*log - - job_name: django + - job_name: backend-local static_configs: - targets: - localhost labels: - job: django + job: backend-local __path__: /logs/django.log - - job_name: nginx + - job_name: backend-prod static_configs: - targets: - localhost labels: - job: nginx + job: backend-prod + __path__: /logs/django.log + - job_name: frontend-local + static_configs: + - targets: + - localhost + labels: + job: frontend-local __path__: /var/log/nginx/*log diff --git a/tdrs-backend/tdpservice/data_files/static/admin/js/admin/admin_datafile_model.js b/tdrs-backend/tdpservice/data_files/static/admin/js/admin/admin_datafile_model.js index 31d52ae20..ddde1dbde 100644 --- a/tdrs-backend/tdpservice/data_files/static/admin/js/admin/admin_datafile_model.js +++ b/tdrs-backend/tdpservice/data_files/static/admin/js/admin/admin_datafile_model.js @@ -2,29 +2,44 @@ $(window).on('load', function() { console.log('loaded'); var submitBtn=document.querySelector('button[type=submit]'); // add the first listener var theForm = submitBtn.parentNode.parentNode; + var action = ""; + var number_of_files_line = ""; - for (var i = 0; i < theForm.childNodes.length; i++) { - if (theForm.childNodes[i].className == "actions") { - form_header = theForm.childNodes[i]; + submitBtn.addEventListener('click', function(e) { + e.preventDefault(); + // number of files + action_counter = document.querySelector('span.action-counter') + is_action_counter_hidden = action_counter.className === "action-counter hidden" - for (var i = 0; i < form_header.childNodes.length; i++) { - if (form_header.childNodes[i].className == "action-counter") { - number_of_files = form_header.childNodes[i]; - break; - } - } + action_counter_all = document.querySelector('span.all') - break; + if (is_action_counter_hidden) { + number_of_files_line = action_counter_all.innerText; + } else { + number_of_files_line = action_counter.innerText; } - } - submitBtn.addEventListener('click', function(e) { - e.preventDefault(); - if (confirm("You are about to re-parse " + number_of_files.innerHTML.split(/(\s+)/)[0] + " files. Are you sure you want to continue?")) { - console.log('submitting'); - theForm.submit(); + + // what action is selected + action = document.querySelector('select[name=action]').value; + + if (action === "reparse") { + console.log('reparse'); + var splitted_number_of_files = number_of_files_line.split(/(\s+)/); + if (Number(splitted_number_of_files[0]) > 0 ) { + number_of_files = splitted_number_of_files[0]; + } else { + number_of_files = splitted_number_of_files[2]; + } + if (confirm("You are about to re-parse " + number_of_files + " files. Are you sure you want to continue?")) { + console.log('submitting'); + theForm.submit(); + } else { + console.log('not submitting'); + }; } else { - console.log('not submitting'); - }; + console.log('not reparse'); + alert('Please select the "Reparse" action to continue.'); + } }); }); diff --git a/tdrs-backend/tdpservice/data_files/test/test_api.py b/tdrs-backend/tdpservice/data_files/test/test_api.py index 78685b075..5fb3a0a5c 100644 --- a/tdrs-backend/tdpservice/data_files/test/test_api.py +++ b/tdrs-backend/tdpservice/data_files/test/test_api.py @@ -1,4 +1,5 @@ """Tests for DataFiles Application.""" +import os from rest_framework import status import pytest import base64 @@ -82,62 +83,58 @@ def get_spreadsheet(response): """Return error report.""" decoded_response = base64.b64decode(response.data['xls_report']) + if os.path.exists('mycls.xlsx'): + os.remove('mycls.xlsx') + # write the excel file to disk with open('mycls.xlsx', 'wb') as f: f.write(decoded_response) # read the excel file from disk wb = openpyxl.load_workbook('mycls.xlsx') - ws = wb.get_sheet_by_name('Sheet1') - return ws + critical = wb['Critical'] + summary = wb['Summary'] + return critical, summary @staticmethod def assert_error_report_tanf_file_content_matches_with_friendly_names(response): """Assert the error report file contents match expected with friendly names.""" - ws = DataFileAPITestBase.get_spreadsheet(response) + critical, summary = DataFileAPITestBase.get_spreadsheet(response) COL_ERROR_MESSAGE = 4 + COL_NUM_OCCURRENCES = 8 - assert ws.cell(row=1, column=1).value == "Please refer to the most recent versions of the coding " \ + assert critical.cell(row=1, column=1).value == "Please refer to the most recent versions of the coding " \ + "instructions (linked below) when looking up items and allowable values during the data revision process" - assert ws.cell(row=8, column=COL_ERROR_MESSAGE).value == ( - "Since Item 21A (Cash Amount) is 873, then Item 21B " - "(Cash and Cash Equivalents: Number of Months) 0 must be greater than 0" - ) + assert critical.cell(row=8, column=COL_ERROR_MESSAGE).value == "No records created." + assert summary.cell(row=7, column=COL_NUM_OCCURRENCES).value == 1 @staticmethod def assert_error_report_ssp_file_content_matches_with_friendly_names(response): """Assert the error report file contents match expected with friendly names.""" - ws = DataFileAPITestBase.get_spreadsheet(response) + critical, summary = DataFileAPITestBase.get_spreadsheet(response) COL_ERROR_MESSAGE = 4 + COL_NUM_OCCURRENCES = 8 - assert ws.cell(row=1, column=1).value == "Please refer to the most recent versions of the coding " \ + assert critical.cell(row=1, column=1).value == "Please refer to the most recent versions of the coding " \ + "instructions (linked below) when looking up items and allowable values during the data revision process" - assert ws.cell(row=7, column=COL_ERROR_MESSAGE).value == ("M1 Item 11 (Receives Subsidized Housing): 3 is " - "not in range [1, 2].") + assert critical.cell(row=7, column=COL_ERROR_MESSAGE).value == ("TRAILER: record length is 15 characters " + "but must be 23.") + assert summary.cell(row=7, column=COL_NUM_OCCURRENCES).value == 5 @staticmethod def assert_error_report_file_content_matches_without_friendly_names(response): """Assert the error report file contents match expected without friendly names.""" - decoded_response = base64.b64decode(response.data['xls_report']) - - # write the excel file to disk - with open('mycls.xlsx', 'wb') as f: - f.write(decoded_response) - - # read the excel file from disk - wb = openpyxl.load_workbook('mycls.xlsx') - ws = wb.get_sheet_by_name('Sheet1') + critical, summary = DataFileAPITestBase.get_spreadsheet(response) COL_ERROR_MESSAGE = 4 + COL_NUM_OCCURRENCES = 8 - assert ws.cell(row=1, column=1).value == "Please refer to the most recent versions of the coding " \ + assert critical.cell(row=1, column=1).value == "Please refer to the most recent versions of the coding " \ + "instructions (linked below) when looking up items and allowable values during the data revision process" - assert ws.cell(row=8, column=COL_ERROR_MESSAGE).value == ( - "Since Item 21A (Cash Amount) is 873, then Item 21B " - "(Cash and Cash Equivalents: Number of Months) 0 must be greater than 0" - ) + assert critical.cell(row=8, column=COL_ERROR_MESSAGE).value == "No records created." + assert summary.cell(row=7, column=COL_NUM_OCCURRENCES).value == 1 @staticmethod def assert_data_file_exists(data_file_data, version, user): diff --git a/tdrs-backend/tdpservice/data_files/util.py b/tdrs-backend/tdpservice/data_files/util.py index 0d2d7a941..b7cc836b0 100644 --- a/tdrs-backend/tdpservice/data_files/util.py +++ b/tdrs-backend/tdpservice/data_files/util.py @@ -3,54 +3,88 @@ from io import BytesIO import xlsxwriter import calendar -from tdpservice.parsers.models import ParserErrorCategoryChoices +from django.conf import settings +from django.core.paginator import Paginator +from django.db import models +from django.db.models import Count, Q +from django.utils.translation import gettext_lazy as _ -def get_xls_serialized_file(data): - """Return xls file created from the error.""" +class ParserErrorCategoryChoices(models.TextChoices): + """Enum of ParserError error_type.""" + + PRE_CHECK = "1", _("File pre-check") + FIELD_VALUE = "2", _("Record value invalid") + VALUE_CONSISTENCY = "3", _("Record value consistency") + CASE_CONSISTENCY = "4", _("Case consistency") + SECTION_CONSISTENCY = "5", _("Section consistency") + HISTORICAL_CONSISTENCY = "6", _("Historical consistency") + + +def get_prioritized_queryset(parser_errors): + """Generate a prioritized queryset of ParserErrors.""" + PRIORITIZED_CAT2 = ( + ("FAMILY_AFFILIATION", "CITIZENSHIP_STATUS", "CLOSURE_REASON"), + ) + PRIORITIZED_CAT3 = ( + ("FAMILY_AFFILIATION", "SSN"), + ("FAMILY_AFFILIATION", "CITIZENSHIP_STATUS"), + ("AMT_FOOD_STAMP_ASSISTANCE", "AMT_SUB_CC", "CASH_AMOUNT", "CC_AMOUNT", "TRANSP_AMOUNT"), + ("FAMILY_AFFILIATION", "SSN", "CITIZENSHIP_STATUS"), + ("FAMILY_AFFILIATION", "PARENT_MINOR_CHILD"), + ("FAMILY_AFFILIATION", "EDUCATION_LEVEL"), + ("FAMILY_AFFILIATION", "WORK_ELIGIBLE_INDICATOR"), + ("CITIZENSHIP_STATUS", "WORK_ELIGIBLE_INDICATOR"), + ) + + # All cat1/4 errors + error_type_query = Q(error_type=ParserErrorCategoryChoices.PRE_CHECK) | \ + Q(error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY) + filtered_errors = parser_errors.filter(error_type_query) + + for fields in PRIORITIZED_CAT2: + filtered_errors = filtered_errors.union(parser_errors.filter( + field_name__in=fields, + error_type=ParserErrorCategoryChoices.FIELD_VALUE + )) + + for fields in PRIORITIZED_CAT3: + filtered_errors = filtered_errors.union(parser_errors.filter( + fields_json__friendly_name__has_keys=fields, + error_type=ParserErrorCategoryChoices.VALUE_CONSISTENCY + )) + + return filtered_errors + + +def format_error_msg(error_msg, fields_json): + """Format error message.""" + for key, value in fields_json['friendly_name'].items(): + error_msg = error_msg.replace(key, value) if value else error_msg + return error_msg + + +def friendly_names(fields_json): + """Return comma separated string of friendly names.""" + return ','.join([i for i in fields_json['friendly_name'].values()]) + + +def internal_names(fields_json): + """Return comma separated string of internal names.""" + return ','.join([i for i in fields_json['friendly_name'].keys()]) - def chk(x): - """Check if fields_json is not None.""" - x['fields_json'] = x['fields_json'] if x.get('fields_json', None) else { - 'friendly_name': { - x['field_name']: x['field_name'] - }, - } - x['fields_json']['friendly_name'] = x['fields_json']['friendly_name'] if x['fields_json'].get( - 'friendly_name', None) else { - x['field_name']: x['field_name'] - } - if None in x['fields_json']['friendly_name'].keys(): - x['fields_json']['friendly_name'].pop(None) - if None in x['fields_json']['friendly_name'].values(): - x['fields_json']['friendly_name'].pop() - return x - - def format_error_msg(x): - """Format error message.""" - error_msg = x['error_message'] - for key, value in x['fields_json']['friendly_name'].items(): - error_msg = error_msg.replace(key, value) if value else error_msg - return error_msg +def check_fields_json(fields_json, field_name): + """If fields_json is None, impute field name to avoid NoneType errors.""" + if not fields_json: + child_dict = {field_name: field_name} if field_name else {} + fields_json = {'friendly_name': child_dict} + return fields_json + + +def write_worksheet_banner(worksheet): + """Write worksheet banner.""" row, col = 0, 0 - output = BytesIO() - workbook = xlsxwriter.Workbook(output) - worksheet = workbook.add_worksheet() - - report_columns = [ - ('case_number', lambda x: x['case_number']), - ('year', lambda x: str(x['rpt_month_year'])[0:4] if x['rpt_month_year'] else None), - ('month', lambda x: calendar.month_name[ - int(str(x['rpt_month_year'])[4:]) - ] if x['rpt_month_year'] else None), - ('error_message', lambda x: format_error_msg(chk(x))), - ('item_number', lambda x: x['item_number']), - ('item_name', lambda x: ','.join([i for i in chk(x)['fields_json']['friendly_name'].values()])), - ('internal_variable_name', lambda x: ','.join([i for i in chk(x)['fields_json']['friendly_name'].keys()])), - ('row_number', lambda x: x['row_number']), - ('error_type', lambda x: str(ParserErrorCategoryChoices(x['error_type']).label)), - ] # write beta banner worksheet.write( @@ -81,26 +115,99 @@ def format_error_msg(x): string='Visit the Knowledge Center for further guidance on reviewing error reports' ) - row, col = 5, 0 - # write csv header - bold = workbook.add_format({'bold': True}) +def format_header(header_list: list): + """Format header.""" + return ' '.join([i.capitalize() for i in header_list.split('_')]) + + +def write_prioritized_errors(worksheet, prioritized_errors, bold): + """Write prioritized errors to spreadsheet.""" + row, col = 5, 0 - def format_header(header_list: list): - """Format header.""" - return ' '.join([i.capitalize() for i in header_list.split('_')]) + # We will write the headers in the first row + columns = ['case_number', 'year', 'month', + 'error_message', 'item_number', 'item_name', + 'internal_variable_name', 'row_number', 'error_type', + ] + for idx, col in enumerate(columns): + worksheet.write(row, idx, format_header(col), bold) + + paginator = Paginator(prioritized_errors.order_by('pk'), settings.BULK_CREATE_BATCH_SIZE) + row_idx = 6 + for page in paginator: + for record in page.object_list: + rpt_month_year = getattr(record, 'rpt_month_year', None) + rpt_month_year = str(rpt_month_year) if rpt_month_year else "" + + fields_json = check_fields_json(getattr(record, 'fields_json', {}), record.field_name) + + worksheet.write(row_idx, 0, record.case_number) + worksheet.write(row_idx, 1, rpt_month_year[:4]) + worksheet.write(row_idx, 2, calendar.month_name[int(rpt_month_year[4:])] if rpt_month_year[4:] else None) + worksheet.write(row_idx, 3, format_error_msg(record.error_message, fields_json)) + worksheet.write(row_idx, 4, record.item_number) + worksheet.write(row_idx, 5, friendly_names(fields_json)) + worksheet.write(row_idx, 6, internal_names(fields_json)) + worksheet.write(row_idx, 7, record.row_number) + worksheet.write(row_idx, 8, str(ParserErrorCategoryChoices(record.error_type).label)) + row_idx += 1 + + +def write_aggregate_errors(worksheet, all_errors, bold): + """Aggregate by error message and write.""" + row, col = 5, 0 # We will write the headers in the first row - [worksheet.write(row, col, format_header(key[0]), bold) for col, key in enumerate(report_columns)] + columns = ['year', 'month', 'error_message', 'item_number', 'item_name', + 'internal_variable_name', 'error_type', 'number_of_occurrences' + ] + for idx, col in enumerate(columns): + worksheet.write(row, idx, format_header(col), bold) + + aggregates = all_errors.values('rpt_month_year', 'error_message', + 'item_number', 'field_name', + 'fields_json', 'error_type').annotate(num_occurrences=Count('error_message')) + + paginator = Paginator(aggregates.order_by('-num_occurrences'), settings.BULK_CREATE_BATCH_SIZE) + row_idx = 6 + for page in paginator: + for record in page.object_list: + rpt_month_year = record['rpt_month_year'] + rpt_month_year = str(rpt_month_year) if rpt_month_year else "" + + fields_json = check_fields_json(record['fields_json'], record['field_name']) + + worksheet.write(row_idx, 0, rpt_month_year[:4]) + worksheet.write(row_idx, 1, calendar.month_name[int(rpt_month_year[4:])] if rpt_month_year[4:] else None) + worksheet.write(row_idx, 2, format_error_msg(record['error_message'], fields_json)) + worksheet.write(row_idx, 3, record['item_number']) + worksheet.write(row_idx, 4, friendly_names(fields_json)) + worksheet.write(row_idx, 5, internal_names(fields_json)) + worksheet.write(row_idx, 6, str(ParserErrorCategoryChoices(record['error_type']).label)) + worksheet.write(row_idx, 7, record['num_occurrences']) + row_idx += 1 + + +def get_xls_serialized_file(all_errors, prioritized_errors): + """Return xls file created from the error.""" + output = BytesIO() + workbook = xlsxwriter.Workbook(output) + prioritized_sheet = workbook.add_worksheet(name="Critical") + aggregate_sheet = workbook.add_worksheet(name="Summary") - [ - worksheet.write(row + 6, col, key[1](data_i)) for col, key in enumerate(report_columns) - for row, data_i in enumerate(data) - ] + write_worksheet_banner(prioritized_sheet) + write_worksheet_banner(aggregate_sheet) + + bold = workbook.add_format({'bold': True}) + write_prioritized_errors(prioritized_sheet, prioritized_errors, bold) + write_aggregate_errors(aggregate_sheet, all_errors, bold) # autofit all columns except for the first one - worksheet.autofit() - worksheet.set_column(0, 0, 20) + prioritized_sheet.autofit() + prioritized_sheet.set_column(0, 0, 20) + aggregate_sheet.autofit() + aggregate_sheet.set_column(0, 0, 20) workbook.close() - return {"data": data, "xls_report": base64.b64encode(output.getvalue()).decode("utf-8")} + return {"xls_report": base64.b64encode(output.getvalue()).decode("utf-8")} diff --git a/tdrs-backend/tdpservice/data_files/views.py b/tdrs-backend/tdpservice/data_files/views.py index 3f67d7cb3..8263fe62b 100644 --- a/tdrs-backend/tdpservice/data_files/views.py +++ b/tdrs-backend/tdpservice/data_files/views.py @@ -15,13 +15,12 @@ from rest_framework import status from tdpservice.data_files.serializers import DataFileSerializer -from tdpservice.data_files.util import get_xls_serialized_file +from tdpservice.data_files.util import get_xls_serialized_file, get_prioritized_queryset from tdpservice.data_files.models import DataFile, get_s3_upload_path from tdpservice.users.permissions import DataFilePermissions, IsApprovedPermission from tdpservice.scheduling import parser_task from tdpservice.data_files.s3_client import S3Client from tdpservice.parsers.models import ParserError -from tdpservice.parsers.serializers import ParsingErrorSerializer logger = logging.getLogger(__name__) @@ -147,9 +146,10 @@ def download(self, request, pk=None): def download_error_report(self, request, pk=None): """Generate and return the parsing error report xlsx.""" datafile = self.get_object() - parser_errors = ParserError.objects.all().filter(file=datafile) - serializer = ParsingErrorSerializer(parser_errors, many=True, context=self.get_serializer_context()) - return Response(get_xls_serialized_file(serializer.data)) + all_errors = ParserError.objects.filter(file=datafile) + filtered_errors = get_prioritized_queryset(all_errors) + + return Response(get_xls_serialized_file(all_errors, filtered_errors)) class GetYearList(APIView): diff --git a/tdrs-backend/tdpservice/parsers/fields.py b/tdrs-backend/tdpservice/parsers/fields.py index d26c27bb4..11872d2bb 100644 --- a/tdrs-backend/tdpservice/parsers/fields.py +++ b/tdrs-backend/tdpservice/parsers/fields.py @@ -53,7 +53,7 @@ def parse_value(self, line): value = int(value) return value except ValueError: - logger.error(f"Error parsing field value: {value} to integer.") + logger.error(f"Error parsing field {self.name} value to integer.") return None case "string": return value @@ -83,4 +83,8 @@ def __init__(self, transform_func, item, name, friendly_name, type, startIndex, def parse_value(self, line): """Parse and transform the value for a field given a line, startIndex, endIndex, and field type.""" value = super().parse_value(line) - return self.transform_func(value, **self.kwargs) + try: + return_value = self.transform_func(value, **self.kwargs) + return return_value + except Exception: + raise ValueError(f"Error transforming field value for field: {self.name}.") diff --git a/tdrs-backend/tdpservice/parsers/models.py b/tdrs-backend/tdpservice/parsers/models.py index f9c5f3c63..f1e470e6e 100644 --- a/tdrs-backend/tdpservice/parsers/models.py +++ b/tdrs-backend/tdpservice/parsers/models.py @@ -2,24 +2,15 @@ import datetime from django.db import models -from django.utils.translation import gettext_lazy as _ from django.contrib.contenttypes.fields import GenericForeignKey from django.contrib.contenttypes.models import ContentType from tdpservice.data_files.models import DataFile +from tdpservice.data_files.util import ParserErrorCategoryChoices + import logging logger = logging.getLogger(__name__) -class ParserErrorCategoryChoices(models.TextChoices): - """Enum of ParserError error_type.""" - - PRE_CHECK = "1", _("File pre-check") - FIELD_VALUE = "2", _("Record value invalid") - VALUE_CONSISTENCY = "3", _("Record value consistency") - CASE_CONSISTENCY = "4", _("Case consistency") - SECTION_CONSISTENCY = "5", _("Section consistency") - HISTORICAL_CONSISTENCY = "6", _("Historical consistency") - class ParserError(models.Model): """Model representing a parser error.""" @@ -139,7 +130,7 @@ def get_status(self): return DataFileSummary.Status.REJECTED elif errors.count() == 0: return DataFileSummary.Status.ACCEPTED - elif row_precheck_errors.count() > 0 or case_consistency_errors.count() > 0: + elif (row_precheck_errors.count() > 0 or case_consistency_errors.count()): return DataFileSummary.Status.PARTIALLY_ACCEPTED else: return DataFileSummary.Status.ACCEPTED_WITH_ERRORS diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py index 82d5c2c46..20edf6fdb 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/ssp/m2.py @@ -317,7 +317,7 @@ startIndex=48, endIndex=49, required=False, - validators=[category2.isGreaterThan(0)] + validators=[category2.isGreaterThan(0, inclusive=True)] ), Field( item="32E", diff --git a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py index 8f9aba575..9dc92acd1 100644 --- a/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py +++ b/tdrs-backend/tdpservice/parsers/schema_defs/tanf/t1.py @@ -66,12 +66,6 @@ result_field_name="WORK_REQ_SANCTION", result_function=category3.isOneOf((1, 2)), ), - category3.ifThenAlso( - condition_field_name="SANC_REDUCTION_AMT", - condition_function=category3.isGreaterThan(0), - result_field_name="FAMILY_SANC_ADULT", - result_function=category3.isOneOf((1, 2)), - ), category3.ifThenAlso( condition_field_name="SANC_REDUCTION_AMT", condition_function=category3.isGreaterThan(0), @@ -635,7 +629,7 @@ endIndex=114, required=False, validators=[ - category2.isOneOf(["9", " "]), + category2.isOneOf(["9", "0", " "]), category2.isAlphaNumeric(), ], ), @@ -658,7 +652,7 @@ endIndex=117, required=False, validators=[ - category2.isOneOf([1, 2]), + category2.isOneOf([0, 1, 2]), ], ), Field( diff --git a/tdrs-backend/tdpservice/parsers/util.py b/tdrs-backend/tdpservice/parsers/util.py index 69a53dadd..571261221 100644 --- a/tdrs-backend/tdpservice/parsers/util.py +++ b/tdrs-backend/tdpservice/parsers/util.py @@ -284,13 +284,12 @@ def remove_case_due_to_errors(self, should_remove, case_hash): def generate_t1_t4_hashes(line, record): """Return hashes for duplicate and partial duplicate detection for T1 & T4 records.""" - logger.debug(f"Partial Hash Field Values: {record.RecordType} {str(record.RPT_MONTH_YEAR)} {record.CASE_NUMBER}") + logger.debug(f"Partial Hash Field Values: for T1/T4: {record.RecordType} {str(record.RPT_MONTH_YEAR)} ") return hash(line), hash(record.RecordType + str(record.RPT_MONTH_YEAR or '') + str(record.CASE_NUMBER or '')) def generate_t2_t3_t5_hashes(line, record): """Return hashes for duplicate and partial duplicate detection for T2 & T3 & T5 records.""" - logger.debug(f"Partial Hash Field Values: {record.RecordType} {str(record.RPT_MONTH_YEAR)} {record.CASE_NUMBER} " + - f"{str(record.FAMILY_AFFILIATION)} {record.DATE_OF_BIRTH} {record.SSN}") + logger.debug(f"Partial Hash Field Values: for T2/T3/T5: {record.RecordType} {str(record.RPT_MONTH_YEAR)} ") return hash(line), hash(record.RecordType + str(record.RPT_MONTH_YEAR or '') + str(record.CASE_NUMBER or '') + str(record.FAMILY_AFFILIATION or '') + str(record.DATE_OF_BIRTH or '') + str(record.SSN or '')) diff --git a/tdrs-backend/tdpservice/parsers/validators/category3.py b/tdrs-backend/tdpservice/parsers/validators/category3.py index 89f9547c8..d8ed34e76 100644 --- a/tdrs-backend/tdpservice/parsers/validators/category3.py +++ b/tdrs-backend/tdpservice/parsers/validators/category3.py @@ -376,17 +376,15 @@ def validate(record, row_schema): return true_case else: return true_case - except Exception as e: + except Exception: vals = { "WORK_ELIGIBLE_INDICATOR": WORK_ELIGIBLE_INDICATOR, "RELATIONSHIP_HOH": RELATIONSHIP_HOH, - "DOB": DOB } logger.debug( "Caught exception in validator: validate__WORK_ELIGIBLE_INDICATOR__HOH__AGE. " + f"With field values: {vals}." ) - logger.debug(f'Exception: {e}') # Per conversation with Alex on 03/26/2024, returning the true case during exception handling to avoid # confusing the STTs. return true_case diff --git a/tdrs-backend/tdpservice/stts/models.py b/tdrs-backend/tdpservice/stts/models.py index b883ded74..b960d0e55 100644 --- a/tdrs-backend/tdpservice/stts/models.py +++ b/tdrs-backend/tdpservice/stts/models.py @@ -4,6 +4,9 @@ from django.db.models import constraints +DEFAULT_NUMBER_OF_SECTIONS = 4 + + class Region(models.Model): """A model representing a US region.""" @@ -39,6 +42,14 @@ class EntityType(models.TextChoices): ssp = models.BooleanField(default=False, null=True) sample = models.BooleanField(default=False, null=True) + @property + def num_sections(self): + """The number of sections this STT submits.""" + if self.filenames is None: + return DEFAULT_NUMBER_OF_SECTIONS + divisor = int(self.ssp) + 1 + return len(self.filenames) // divisor + class Meta: """Metadata.""" diff --git a/tdrs-backend/tdpservice/stts/serializers.py b/tdrs-backend/tdpservice/stts/serializers.py index be2ec88b6..7774e87ab 100644 --- a/tdrs-backend/tdpservice/stts/serializers.py +++ b/tdrs-backend/tdpservice/stts/serializers.py @@ -14,7 +14,7 @@ class Meta: """Metadata.""" model = STT - fields = ["id", "type", "postal_code", "name", "region", "filenames", "stt_code", "ssp",] + fields = ["id", "type", "postal_code", "name", "region", "filenames", "stt_code", "ssp", "num_sections"] def get_postal_code(self, obj): """Return the state postal_code.""" diff --git a/tdrs-backend/tdpservice/urls.py b/tdrs-backend/tdpservice/urls.py index eb91ffe48..e6b22e876 100755 --- a/tdrs-backend/tdpservice/urls.py +++ b/tdrs-backend/tdpservice/urls.py @@ -11,7 +11,7 @@ from rest_framework.permissions import AllowAny -from .users.api.authorization_check import AuthorizationCheck, KibanaAuthorizationCheck, GrafanaAuthorizationCheck +from .users.api.authorization_check import AuthorizationCheck, KibanaAuthorizationCheck, PlgAuthorizationCheck from .users.api.login import TokenAuthorizationLoginDotGov, TokenAuthorizationAMS from .users.api.login import CypressLoginDotGovAuthenticationOverride from .users.api.login_redirect_oidc import LoginRedirectAMS, LoginRedirectLoginDotGov @@ -54,7 +54,7 @@ path("admin/", admin.site.urls, name="admin"), path("prometheus/", include("django_prometheus.urls")), path("kibana_auth_check/", KibanaAuthorizationCheck.as_view(), name="kibana-authorization-check"), - path("grafana_auth_check/", GrafanaAuthorizationCheck.as_view(), name="grafana-authorization-check"), + path("plg_auth_check/", PlgAuthorizationCheck.as_view(), name="plg-authorization-check"), ] + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT) # TODO: Supply `terms_of_service` argument in OpenAPI Info once implemented diff --git a/tdrs-backend/tdpservice/users/api/authorization_check.py b/tdrs-backend/tdpservice/users/api/authorization_check.py index 60da3a17b..1309d5a51 100644 --- a/tdrs-backend/tdpservice/users/api/authorization_check.py +++ b/tdrs-backend/tdpservice/users/api/authorization_check.py @@ -66,28 +66,26 @@ def get(self, request, *args, **kwargs): user_in_valid_group = user.is_ofa_sys_admin or user.is_digit_team if (user.hhs_id is not None and user_in_valid_group) or settings.BYPASS_OFA_AUTH: - logger.debug(f"User: {user} has correct authentication credentials. Allowing access to Kibana.") return HttpResponse(status=200) else: - logger.debug(f"User: {user} has incorrect authentication credentials. Not allowing access to Kibana.") + logger.warning(f"User: {user} has incorrect authentication credentials. Not allowing access to Kibana.") return HttpResponse(status=401) -class GrafanaAuthorizationCheck(APIView): +class PlgAuthorizationCheck(APIView): """Check if user is authorized to view Grafana.""" query_string = False - pattern_name = "grafana-authorization-check" + pattern_name = "plg-authorization-check" permission_classes = [IsAuthenticated] def get(self, request, *args, **kwargs): - """Handle get request and verify user is authorized to access grafana.""" + """Handle get request and verify user is authorized to access plg apps.""" user = request.user user_in_valid_group = user.is_ofa_sys_admin or user.is_developer if user_in_valid_group: - logger.debug(f"User: {user} has correct authentication credentials. Allowing access to Grafana.") return HttpResponse(status=200) else: - logger.debug(f"User: {user} has incorrect authentication credentials. Not allowing access to Grafana.") + logger.warning(f"User: {user} has incorrect authentication credentials. Not allowing access to Grafana.") return HttpResponse(status=401) diff --git a/tdrs-backend/tdpservice/users/models.py b/tdrs-backend/tdpservice/users/models.py index 40f8dc900..3cf094264 100644 --- a/tdrs-backend/tdpservice/users/models.py +++ b/tdrs-backend/tdpservice/users/models.py @@ -118,9 +118,11 @@ def __str__(self): """Return the username as the string representation of the object.""" return self.username - def is_in_group(self, group_name: str) -> bool: - """Return whether or not the user is a member of the specified Group.""" - return self.groups.filter(name=group_name).exists() + def is_in_group(self, group_names: list) -> bool: + """Return whether or not the user is a member of the specified Group(s).""" + if type(group_names) == str: + group_names = [group_names] + return self.groups.filter(name__in=group_names).exists() def validate_location(self): """Throw a validation error if a user has a location type incompatable with their role.""" @@ -180,6 +182,11 @@ def is_ocio_staff(self) -> bool: """Return whether or not the user is in the ACF OCIO Group.""" return self.is_in_group("ACF OCIO") + @property + def is_an_admin(self) -> bool: + """Return whether or not the user is in the OFA Admin Group or OFA System Admin.""" + return self.is_in_group(["OFA Admin", "OFA System Admin"]) + @property def is_ofa_sys_admin(self) -> bool: """Return whether or not the user is in the OFA System Admin Group.""" diff --git a/tdrs-frontend/docker-compose.yml b/tdrs-frontend/docker-compose.yml index 13094148b..4a1a41fac 100644 --- a/tdrs-frontend/docker-compose.yml +++ b/tdrs-frontend/docker-compose.yml @@ -32,11 +32,12 @@ services: - LOCAL_DEV=true - KIBANA=kibana - GRAFANA=grafana + - ALERTS=alertmanager - REACT_APP_DEVAUTH=${REACT_APP_DEVAUTH} command: > /bin/sh -c "echo 'starting nginx' && - envsubst '$${BACK_END} $${KIBANA} $${GRAFANA}' < /etc/nginx/locations.conf > /etc/nginx/locations_.conf && + envsubst '$${BACK_END} $${KIBANA} $${GRAFANA} $${ALERTS}' < /etc/nginx/locations.conf > /etc/nginx/locations_.conf && rm /etc/nginx/locations.conf && cp /etc/nginx/locations_.conf /etc/nginx/locations.conf && envsubst ' diff --git a/tdrs-frontend/nginx/cloud.gov/locations.conf b/tdrs-frontend/nginx/cloud.gov/locations.conf index 592063439..2e14fc69f 100644 --- a/tdrs-frontend/nginx/cloud.gov/locations.conf +++ b/tdrs-frontend/nginx/cloud.gov/locations.conf @@ -60,6 +60,42 @@ location = /kibana_auth_check { send_timeout 900; } +location /grafana/ { + auth_request /plg_auth_check; + auth_request_set $auth_status $upstream_status; + + set $grafana http://grafana.apps.internal:8080$request_uri; + proxy_pass $grafana; + proxy_set_header Host $host:3000; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto https; + + proxy_connect_timeout 300; + proxy_read_timeout 300; + proxy_send_timeout 300; + send_timeout 900; + proxy_buffer_size 4k; +} + +location = /plg_auth_check { + internal; + set $endpoint http://{{env "BACKEND_HOST"}}.apps.internal:8080/plg_auth_check/; + proxy_pass $endpoint$1$is_args$args; + proxy_set_header Host $host:3000; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto https; + proxy_set_header Content-Length ""; + proxy_set_header X-Original-URI $request_uri; + + proxy_connect_timeout 300; + proxy_read_timeout 300; + proxy_send_timeout 300; + send_timeout 900; + proxy_pass_header x-csrftoken; +} + if ($request_method ~ ^(TRACE|OPTION)$) { return 405; } diff --git a/tdrs-frontend/nginx/local/locations.conf b/tdrs-frontend/nginx/local/locations.conf index e25dad318..29ec9dec3 100644 --- a/tdrs-frontend/nginx/local/locations.conf +++ b/tdrs-frontend/nginx/local/locations.conf @@ -62,7 +62,7 @@ location = /kibana_auth_check { } location /grafana/ { - auth_request /grafana_auth_check; + auth_request /plg_auth_check; auth_request_set $auth_status $upstream_status; set $grafana http://${GRAFANA}:9400$request_uri; @@ -79,9 +79,27 @@ location /grafana/ { proxy_buffer_size 4k; } -location = /grafana_auth_check { +location /alerts/ { + auth_request /plg_auth_check; + auth_request_set $auth_status $upstream_status; + + set $alerts http://${ALERTS}:9093$request_uri; + proxy_pass $alerts; + proxy_set_header Host $host:3000; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto https; + + proxy_connect_timeout 300; + proxy_read_timeout 300; + proxy_send_timeout 300; + send_timeout 900; + proxy_buffer_size 4k; +} + +location = /plg_auth_check { internal; - set $endpoint http://${BACK_END}:8080/grafana_auth_check/; + set $endpoint http://${BACK_END}:8080/plg_auth_check/; proxy_pass $endpoint$1$is_args$args; proxy_set_header Host $host:3000; proxy_set_header X-Real-IP $remote_addr; diff --git a/tdrs-frontend/src/actions/reports.js b/tdrs-frontend/src/actions/reports.js index 8ecb8839e..766aafc7f 100644 --- a/tdrs-frontend/src/actions/reports.js +++ b/tdrs-frontend/src/actions/reports.js @@ -4,6 +4,7 @@ import axios from 'axios' import axiosInstance from '../axios-instance' import { logErrorToServer } from '../utils/eventLogger' import removeFileInputErrorState from '../utils/removeFileInputErrorState' +import { fileUploadSections } from '../reducers/reports' const BACKEND_URL = process.env.REACT_APP_BACKEND_URL diff --git a/tdrs-frontend/src/actions/reports.test.js b/tdrs-frontend/src/actions/reports.test.js index 40593f3bb..294e31c9a 100644 --- a/tdrs-frontend/src/actions/reports.test.js +++ b/tdrs-frontend/src/actions/reports.test.js @@ -241,6 +241,18 @@ describe('actions/reports', () => { }) }) + it('should dispatch SET_SELECTED_STT with empty stt', async () => { + const store = mockStore() + + await store.dispatch(setStt('')) + + const actions = store.getActions() + expect(actions[0].type).toBe(SET_SELECTED_STT) + expect(actions[0].payload).toStrictEqual({ + stt: '', + }) + }) + it('should dispatch SET_SELECTED_QUARTER', async () => { const store = mockStore() diff --git a/tdrs-frontend/src/components/Header/Header.jsx b/tdrs-frontend/src/components/Header/Header.jsx index cd25c3930..602a2a613 100644 --- a/tdrs-frontend/src/components/Header/Header.jsx +++ b/tdrs-frontend/src/components/Header/Header.jsx @@ -8,7 +8,7 @@ import { accountIsInReview, accountCanViewAdmin, accountCanViewKibana, - accountCanViewGrafana, + accountCanViewPlg, } from '../../selectors/auth' import NavItem from '../NavItem/NavItem' @@ -32,7 +32,7 @@ function Header() { const userAccessRequestApproved = useSelector(accountStatusIsApproved) const userIsAdmin = useSelector(accountCanViewAdmin) const userViewKibana = useSelector(accountCanViewKibana) - const userViewGrafana = useSelector(accountCanViewGrafana) + const userViewPlg = useSelector(accountCanViewPlg) const menuRef = useRef() @@ -148,12 +148,19 @@ function Header() { href={`${process.env.REACT_APP_BACKEND_HOST}/kibana/`} /> )} - {userViewGrafana && ( - + {userViewPlg && ( + <> + + + )} )} diff --git a/tdrs-frontend/src/components/Reports/Reports.jsx b/tdrs-frontend/src/components/Reports/Reports.jsx index a22ae4fb1..0ac0f3d98 100644 --- a/tdrs-frontend/src/components/Reports/Reports.jsx +++ b/tdrs-frontend/src/components/Reports/Reports.jsx @@ -455,7 +455,7 @@ function Reports() { {selectedSubmissionTab === 1 && ( { setIsToggled(false) resetPreviousValues() diff --git a/tdrs-frontend/src/components/SiteMap/SiteMap.jsx b/tdrs-frontend/src/components/SiteMap/SiteMap.jsx index e7355842c..84e38dda0 100644 --- a/tdrs-frontend/src/components/SiteMap/SiteMap.jsx +++ b/tdrs-frontend/src/components/SiteMap/SiteMap.jsx @@ -4,14 +4,14 @@ import { accountStatusIsApproved, accountCanViewAdmin, accountCanViewKibana, - accountCanViewGrafana, + accountCanViewPlg, } from '../../selectors/auth' const SiteMap = ({ user }) => { const userIsApproved = useSelector(accountStatusIsApproved) const userIsAdmin = useSelector(accountCanViewAdmin) const userViewKibana = useSelector(accountCanViewKibana) - const userViewGrafana = useSelector(accountCanViewGrafana) + const userViewPlg = useSelector(accountCanViewPlg) return (
@@ -43,11 +43,17 @@ const SiteMap = ({ user }) => { /> )} - {userViewGrafana && ( - + {userViewPlg && ( + <> + + + )}
) diff --git a/tdrs-frontend/src/components/SubmissionHistory/SubmissionHistory.jsx b/tdrs-frontend/src/components/SubmissionHistory/SubmissionHistory.jsx index a1e28b7c0..b768fb7cd 100644 --- a/tdrs-frontend/src/components/SubmissionHistory/SubmissionHistory.jsx +++ b/tdrs-frontend/src/components/SubmissionHistory/SubmissionHistory.jsx @@ -1,10 +1,9 @@ import React from 'react' import PropTypes from 'prop-types' -import classNames from 'classnames' import { useDispatch, useSelector } from 'react-redux' -import { fileUploadSections } from '../../reducers/reports' import Paginator from '../Paginator' import { getAvailableFileList } from '../../actions/reports' +import { fileUploadSections } from '../../reducers/reports' import { useEffect } from 'react' import { useState } from 'react' import { CaseAggregatesTable } from './CaseAggregatesTable' @@ -64,6 +63,7 @@ const SubmissionHistory = ({ filterValues }) => { const dispatch = useDispatch() const [hasFetchedFiles, setHasFetchedFiles] = useState(false) const { files } = useSelector((state) => state.reports) + const num_sections = filterValues.stt.num_sections useEffect(() => { if (!hasFetchedFiles) { @@ -87,15 +87,17 @@ const SubmissionHistory = ({ filterValues }) => {
- {fileUploadSections.map((section, index) => ( - f.section.includes(section))} - /> - ))} + {fileUploadSections.slice(0, num_sections).map((section, index) => { + return ( + f.section.includes(section))} + /> + ) + })}
) diff --git a/tdrs-frontend/src/components/SubmissionHistory/SubmissionHistory.test.js b/tdrs-frontend/src/components/SubmissionHistory/SubmissionHistory.test.js index 325c7d898..eda2d13b8 100644 --- a/tdrs-frontend/src/components/SubmissionHistory/SubmissionHistory.test.js +++ b/tdrs-frontend/src/components/SubmissionHistory/SubmissionHistory.test.js @@ -18,7 +18,7 @@ describe('SubmissionHistory', () => { const defaultFilterValues = { quarter: 'Q1', year: '2023', - stt: { id: 4 }, + stt: { id: 5 }, file_type: 'TANF', } @@ -324,7 +324,7 @@ describe('SubmissionHistory', () => { setup(store, { ...defaultFilterValues, - stt: { id: 48 }, + stt: { id: 5 }, file_type: 'SSP', }) diff --git a/tdrs-frontend/src/components/UploadReport/UploadReport.jsx b/tdrs-frontend/src/components/UploadReport/UploadReport.jsx index 9e51c11a7..a2348fe65 100644 --- a/tdrs-frontend/src/components/UploadReport/UploadReport.jsx +++ b/tdrs-frontend/src/components/UploadReport/UploadReport.jsx @@ -7,8 +7,8 @@ import Button from '../Button' import FileUpload from '../FileUpload' import { submit } from '../../actions/reports' -import { useEventLogger } from '../../utils/eventLogger' import { fileUploadSections } from '../../reducers/reports' +import { useEventLogger } from '../../utils/eventLogger' function UploadReport({ handleCancel, stt }) { // The currently selected year from the reportingYears dropdown @@ -20,9 +20,14 @@ function UploadReport({ handleCancel, stt }) { // The set of uploaded files in our Redux state const files = useSelector((state) => state.reports.submittedFiles) + // The logged in user in our Redux state const user = useSelector((state) => state.auth.user) + // The number of sections this stt submits data for and it's ID + const stt_id = stt?.id + const num_sections = stt === undefined ? 4 : stt.num_sections + // TODO: Move this to Redux state so we can modify this value outside of // this component without having to pass the setter function around const [localAlert, setLocalAlertState] = useState({ @@ -70,7 +75,7 @@ function UploadReport({ handleCancel, stt }) { formattedSections, logger, setLocalAlertState, - stt, + stt: stt_id, uploadedFiles, user, ssp: selectedFileType === 'ssp-moe', @@ -105,13 +110,15 @@ function UploadReport({ handleCancel, stt }) { )}
- {fileUploadSections.map((name, index) => ( - - ))} + {fileUploadSections.slice(0, num_sections).map((section, index) => { + return ( + + ) + })}