From eac7e505eac1596af557608760fffad0833742e6 Mon Sep 17 00:00:00 2001 From: raftmsohani <97037188+raftmsohani@users.noreply.github.com> Date: Wed, 29 Nov 2023 15:53:45 -0500 Subject: [PATCH 1/4] add file size to clamav-nginx (#2760) --- tdrs-backend/clamav-router/nginx.conf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tdrs-backend/clamav-router/nginx.conf b/tdrs-backend/clamav-router/nginx.conf index 50cc6395b..142657ffb 100644 --- a/tdrs-backend/clamav-router/nginx.conf +++ b/tdrs-backend/clamav-router/nginx.conf @@ -5,6 +5,7 @@ events { worker_connections 1024; http{ server { listen {{port}}; + client_max_body_size 100m; location /scan { proxy_pass http://tanf-prod-clamav-rest.apps.internal:9000/scan; proxy_pass_request_headers on; @@ -12,6 +13,7 @@ http{ } server { listen 9000; + client_max_body_size 100m; location /scan { proxy_pass http://tanf-prod-clamav-rest.apps.internal:9000/scan; proxy_pass_request_headers on; From eb16d8b3d4bd868d508203038ffb3fdc33002598 Mon Sep 17 00:00:00 2001 From: raftmsohani <97037188+raftmsohani@users.noreply.github.com> Date: Fri, 1 Dec 2023 10:55:28 -0500 Subject: [PATCH 2/4] 2683-zap-CORS-misconfig (#2727) * added cloud.gov back to scan urls * temp * revert changes on zap.conf * revert change on zap.conf * revert changes on zap-hook.conf * revert changes on nginx.conf * revert changes on middleware.py * linting * revert change on docker-compose file * revert changes on .env file * remove file not needed * linting * revert changes on deploy-backend * revert changes on zap-scanner.py * revrt some changes on zap-scanner * Added CORS settings * increase max_file allow large file clamav scans --------- Co-authored-by: Alex P <63075587+ADPennington@users.noreply.github.com> --- scripts/deploy-backend.sh | 1 - scripts/deploy-frontend.sh | 0 scripts/zap-scanner.sh | 7 +----- tdrs-backend/clamav-router/nginx.conf | 2 ++ tdrs-backend/tdpservice/settings/cloudgov.py | 25 ++++++++++++++++++-- tdrs-frontend/nginx/cloud.gov/locations.conf | 7 ++++++ 6 files changed, 33 insertions(+), 9 deletions(-) mode change 100644 => 100755 scripts/deploy-frontend.sh diff --git a/scripts/deploy-backend.sh b/scripts/deploy-backend.sh index ec372396a..3547debb7 100755 --- a/scripts/deploy-backend.sh +++ b/scripts/deploy-backend.sh @@ -100,7 +100,6 @@ update_backend() if [ "$1" = "rolling" ] ; then set_cf_envs - # Do a zero downtime deploy. This requires enough memory for # two apps to exist in the org/space at one time. cf push "$CGAPPNAME_BACKEND" --no-route -f manifest.buildpack.yml -t 180 --strategy rolling || exit 1 diff --git a/scripts/deploy-frontend.sh b/scripts/deploy-frontend.sh old mode 100644 new mode 100755 diff --git a/scripts/zap-scanner.sh b/scripts/zap-scanner.sh index c3f534b84..d03259221 100755 --- a/scripts/zap-scanner.sh +++ b/scripts/zap-scanner.sh @@ -40,7 +40,7 @@ cd "$TARGET_DIR" || exit 2 if [[ $(docker network inspect external-net 2>&1 | grep -c Scope) == 0 ]]; then - docker network create external-net + docker network create external-net fi # Ensure the APP_URL is reachable from the zaproxy container @@ -112,10 +112,6 @@ ZAP_CLI_OPTIONS="\ -config globalexcludeurl.url_list.url\(14\).description='Site - FontAwesome.com' \ -config globalexcludeurl.url_list.url\(14\).enabled=true \ - -config globalexcludeurl.url_list.url\(15\).regex='^https:\/\/.*\.cloud.gov\/.*$' \ - -config globalexcludeurl.url_list.url\(15\).description='Site - Cloud.gov' \ - -config globalexcludeurl.url_list.url\(15\).enabled=true \ - -config globalexcludeurl.url_list.url\(16\).regex='^https:\/\/.*\.googletagmanager.com\/.*$' \ -config globalexcludeurl.url_list.url\(16\).description='Site - googletagmanager.com' \ -config globalexcludeurl.url_list.url\(16\).enabled=true \ @@ -140,7 +136,6 @@ ZAP_CLI_OPTIONS="\ -config globalexcludeurl.url_list.url\(21\).description='Site - IdentitySandbox.gov' \ -config globalexcludeurl.url_list.url\(21\).enabled=true \ -config spider.postform=true" - # How long ZAP will crawl the app with the spider process ZAP_SPIDER_MINS=10 diff --git a/tdrs-backend/clamav-router/nginx.conf b/tdrs-backend/clamav-router/nginx.conf index 142657ffb..35e95e7a7 100644 --- a/tdrs-backend/clamav-router/nginx.conf +++ b/tdrs-backend/clamav-router/nginx.conf @@ -4,6 +4,7 @@ events { worker_connections 1024; # This opens a route to clamav prod http{ server { + client_max_body_size 100m; listen {{port}}; client_max_body_size 100m; location /scan { @@ -12,6 +13,7 @@ http{ } } server { + client_max_body_size 100m; listen 9000; client_max_body_size 100m; location /scan { diff --git a/tdrs-backend/tdpservice/settings/cloudgov.py b/tdrs-backend/tdpservice/settings/cloudgov.py index b7def9383..b00f76fa9 100644 --- a/tdrs-backend/tdpservice/settings/cloudgov.py +++ b/tdrs-backend/tdpservice/settings/cloudgov.py @@ -155,7 +155,14 @@ class Development(CloudGov): # https://docs.djangoproject.com/en/2.0/ref/settings/#allowed-hosts ALLOWED_HOSTS = ['.app.cloud.gov'] - + CORS_ORIGIN_ALLOW_ALL = False + CORS_ALLOWED_ORIGINS = ['https://*.app.cloud.gov'] + CORS_ALLOW_CREDENTIALS = True + CORS_ALLOW_METHODS = ( + "GET", + "PATCH", + "POST", + ) class Staging(CloudGov): """Settings for applications deployed in the Cloud.gov staging space.""" @@ -164,7 +171,14 @@ class Staging(CloudGov): 'tdp-frontend-staging.acf.hhs.gov', 'tdp-frontend-develop.acf.hhs.gov' ] - + CORS_ALLOWED_ORIGINS = ['https://*.acf.hhs.gov'] + CORS_ORIGIN_ALLOW_ALL = False + CORS_ALLOW_CREDENTIALS = True + CORS_ALLOW_METHODS = ( + "GET", + "PATCH", + "POST", + ) LOGIN_GOV_CLIENT_ID = os.getenv( 'OIDC_RP_CLIENT_ID', 'urn:gov:gsa:openidconnect.profiles:sp:sso:hhs:tanf-proto-staging' @@ -189,3 +203,10 @@ class Production(CloudGov): # CORS allowed origins CORS_ALLOWED_ORIGINS = ['https://tanfdata.acf.hhs.gov'] + CORS_ORIGIN_ALLOW_ALL = False + CORS_ALLOW_CREDENTIALS = True + CORS_ALLOW_METHODS = ( + "GET", + "PATCH", + "POST", + ) diff --git a/tdrs-frontend/nginx/cloud.gov/locations.conf b/tdrs-frontend/nginx/cloud.gov/locations.conf index 779dc9f2a..1bccf800e 100644 --- a/tdrs-frontend/nginx/cloud.gov/locations.conf +++ b/tdrs-frontend/nginx/cloud.gov/locations.conf @@ -16,9 +16,16 @@ location ~ ^/(v1|admin|static/admin|swagger|redocs) { proxy_buffer_size 4k; proxy_temp_file_write_size 64k; + limit_except GET HEAD POST { deny all; + } + add_header Access-Control-Allow-Origin 's3-us-gov-west-1.amazonaws.com'; } +if ($request_method ~ ^(PATCH|TRACE|OPTION)$) { + return 405; +} + location = /profile { index index.html index.htm; try_files $uri $uri/ /index.html; From 11aef55904828519c9abb510f69f781105e49b15 Mon Sep 17 00:00:00 2001 From: jtimpe <111305129+jtimpe@users.noreply.github.com> Date: Wed, 6 Dec 2023 09:46:08 -0500 Subject: [PATCH 3/4] Fix parser/preparser validation of empty strings (#2748) * add validator tests * add notEmpty test * add blank string to value_is_empty values * replace removed text * fix partial oob string indexing --------- Co-authored-by: Alex P <63075587+ADPennington@users.noreply.github.com> --- tdrs-backend/tdpservice/parsers/fields.py | 14 ++----- tdrs-backend/tdpservice/parsers/row_schema.py | 3 +- .../tdpservice/parsers/test/test_util.py | 30 +------------- .../parsers/test/test_validators.py | 40 +++++++++++++++++++ tdrs-backend/tdpservice/parsers/validators.py | 24 ++++++++++- 5 files changed, 69 insertions(+), 42 deletions(-) diff --git a/tdrs-backend/tdpservice/parsers/fields.py b/tdrs-backend/tdpservice/parsers/fields.py index 1487cf498..c1b610821 100644 --- a/tdrs-backend/tdpservice/parsers/fields.py +++ b/tdrs-backend/tdpservice/parsers/fields.py @@ -1,19 +1,10 @@ """Datafile field representations.""" import logging +from .validators import value_is_empty logger = logging.getLogger(__name__) -def value_is_empty(value, length): - """Handle 'empty' values as field inputs.""" - empty_values = [ - ' '*length, # ' ' - '#'*length, # '#####' - '_'*length, # '_____' - ] - - return value is None or value in empty_values - class Field: """Provides a mapping between a field name and its position.""" @@ -38,8 +29,9 @@ def __repr__(self): def parse_value(self, line): """Parse the value for a field given a line, startIndex, endIndex, and field type.""" value = line[self.startIndex:self.endIndex] + value_length = self.endIndex-self.startIndex - if value_is_empty(value, self.endIndex-self.startIndex): + if len(value) < value_length or value_is_empty(value, value_length): logger.debug(f"Field: '{self.name}' at position: [{self.startIndex}, {self.endIndex}) is empty.") return None diff --git a/tdrs-backend/tdpservice/parsers/row_schema.py b/tdrs-backend/tdpservice/parsers/row_schema.py index 83885042c..b7fafcdd8 100644 --- a/tdrs-backend/tdpservice/parsers/row_schema.py +++ b/tdrs-backend/tdpservice/parsers/row_schema.py @@ -1,6 +1,7 @@ """Row schema for datafile.""" from .models import ParserErrorCategoryChoices -from .fields import Field, value_is_empty +from .fields import Field +from .validators import value_is_empty import logging logger = logging.getLogger(__name__) diff --git a/tdrs-backend/tdpservice/parsers/test/test_util.py b/tdrs-backend/tdpservice/parsers/test/test_util.py index 03997597a..d7b2afc0e 100644 --- a/tdrs-backend/tdpservice/parsers/test/test_util.py +++ b/tdrs-backend/tdpservice/parsers/test/test_util.py @@ -1,7 +1,7 @@ """Test the methods of RowSchema to ensure parsing and validation work in all individual cases.""" import pytest -from ..fields import Field, value_is_empty +from ..fields import Field from ..row_schema import RowSchema from ..util import SchemaManager @@ -224,6 +224,7 @@ class TestModel: @pytest.mark.parametrize('first,second', [ + ('', ''), (' ', ' '), ('#', '##'), (None, None), @@ -308,33 +309,6 @@ def test_run_postparsing_validators_returns_invalid_and_errors(): assert errors == ['Value is not valid.'] -@pytest.mark.parametrize("value,length", [ - (None, 0), - (None, 10), - (' ', 5), - ('###', 3) -]) -def test_value_is_empty_returns_true(value, length): - """Test value_is_empty returns valid.""" - result = value_is_empty(value, length) - assert result is True - - -@pytest.mark.parametrize("value,length", [ - (0, 1), - (1, 1), - (10, 2), - ('0', 1), - ('0000', 4), - ('1 ', 5), - ('##3', 3) -]) -def test_value_is_empty_returns_false(value, length): - """Test value_is_empty returns invalid.""" - result = value_is_empty(value, length) - assert result is False - - def test_multi_record_schema_parses_and_validates(): """Test SchemaManager parse_and_validate.""" line = '12345' diff --git a/tdrs-backend/tdpservice/parsers/test/test_validators.py b/tdrs-backend/tdpservice/parsers/test/test_validators.py index db7153c41..79e52518b 100644 --- a/tdrs-backend/tdpservice/parsers/test/test_validators.py +++ b/tdrs-backend/tdpservice/parsers/test/test_validators.py @@ -6,6 +6,35 @@ from tdpservice.parsers.test.factories import SSPM5Factory +@pytest.mark.parametrize("value,length", [ + (None, 0), + (None, 10), + (' ', 5), + ('###', 3), + ('', 0), + ('', 10), +]) +def test_value_is_empty_returns_true(value, length): + """Test value_is_empty returns valid.""" + result = validators.value_is_empty(value, length) + assert result is True + + +@pytest.mark.parametrize("value,length", [ + (0, 1), + (1, 1), + (10, 2), + ('0', 1), + ('0000', 4), + ('1 ', 5), + ('##3', 3), +]) +def test_value_is_empty_returns_false(value, length): + """Test value_is_empty returns invalid.""" + result = validators.value_is_empty(value, length) + assert result is False + + def test_or_validators(): """Test `or_validators` gives a valid result.""" value = "2" @@ -295,6 +324,17 @@ def test_notEmpty_returns_invalid_substring(): assert is_valid is False assert error == "111 333 contains blanks between positions 3 and 5." + +def test_notEmpty_returns_nonexistent_substring(): + """Test `notEmpty` gives an invalid result for a nonexistent substring.""" + value = '111 333' + + validator = validators.notEmpty(start=10, end=12) + is_valid, error = validator(value) + + assert is_valid is False + assert error == "111 333 contains blanks between positions 10 and 12." + @pytest.mark.usefixtures('db') class TestCat3ValidatorsBase: """A base test class for tests that evaluate category three validators.""" diff --git a/tdrs-backend/tdpservice/parsers/validators.py b/tdrs-backend/tdpservice/parsers/validators.py index e516a6530..eb32c666e 100644 --- a/tdrs-backend/tdpservice/parsers/validators.py +++ b/tdrs-backend/tdpservice/parsers/validators.py @@ -6,6 +6,18 @@ logger = logging.getLogger(__name__) + +def value_is_empty(value, length): + """Handle 'empty' values as field inputs.""" + empty_values = [ + '', + ' '*length, # ' ' + '#'*length, # '#####' + '_'*length, # '_____' + ] + + return value is None or value in empty_values + # higher order validator func def make_validator(validator_func, error_func): @@ -191,17 +203,25 @@ def isStringLargerThan(val): lambda value: f'{value} is not larger than {val}.' ) + +def _is_empty(value, start, end): + end = end if end else len(str(value)) + vlen = end - start + subv = str(value)[start:end] + return value_is_empty(subv, vlen) or len(subv) < vlen + + def notEmpty(start=0, end=None): """Validate that string value isn't only blanks.""" return make_validator( - lambda value: not str(value)[start:end if end else len(str(value))].isspace(), + lambda value: not _is_empty(value, start, end), lambda value: f'{str(value)} contains blanks between positions {start} and {end if end else len(str(value))}.' ) def isEmpty(start=0, end=None): """Validate that string value is only blanks.""" return make_validator( - lambda value: value[start:end if end else len(value)].isspace(), + lambda value: _is_empty(value, start, end), lambda value: f'{value} is not blank between positions {start} and {end if end else len(value)}.' ) From 25376871c98f77e79936860f43ca487a0a66b0ed Mon Sep 17 00:00:00 2001 From: George Hudson Date: Wed, 6 Dec 2023 09:12:18 -0700 Subject: [PATCH 4/4] added needed space into logic (#2765) * added needed space into logic * updated security controls documentation to mention how NGINX is used by lower envs to connect to prod clamav server --------- Co-authored-by: George Hudson Co-authored-by: Andrew <84722778+andrew-jameson@users.noreply.github.com> --- docs/Security-Compliance/boundary-diagram.md | 6 +++++- scripts/deploy-backend.sh | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/Security-Compliance/boundary-diagram.md b/docs/Security-Compliance/boundary-diagram.md index e58c88ad4..d548a41b0 100644 --- a/docs/Security-Compliance/boundary-diagram.md +++ b/docs/Security-Compliance/boundary-diagram.md @@ -6,7 +6,11 @@ ### Data flow -Users with `OFA Admin` and (STT) `Data Analyst` roles can upload data on upload data files locally into the web application which will store the files in cloud.gov AWS S3 buckets only after the files are successfully scanned for viruses via [ClamAV](../Technical-Documentation/Architecture-Decision-Record/012-antivirus-strategy.md). Developers will deploy new code through GitHub, initiating the continuous integration process through Circle CI. +Users with `OFA Admin` and (STT) `Data Analyst` roles can upload data on upload data files locally into the web application which will store the files in cloud.gov AWS S3 buckets only after the files are successfully scanned for viruses via [ClamAV](../Technical-Documentation/Architecture-Decision-Record/012-antivirus-strategy.md). For lower environments, we use an NGINX server to function as a proxy, routing to the ClamAV-rest server in the production space. The NGINX server also functions as a gatekeeper, allowing documents for scanning to only come from backend servers, and only able to route them directly to the ClamAV-rest server. + +### Code Repository and CI Pipeline + +Developers will deploy new code through GitHub, initiating the continuous integration process through Circle CI. ### Environments/Spaces diff --git a/scripts/deploy-backend.sh b/scripts/deploy-backend.sh index 3547debb7..6e46fe93a 100755 --- a/scripts/deploy-backend.sh +++ b/scripts/deploy-backend.sh @@ -91,7 +91,7 @@ update_backend() cd tdrs-backend || exit cf unset-env "$CGAPPNAME_BACKEND" "AV_SCAN_URL" - if ["$CF_SPACE" = "tanf-prod" ]; then + if [ "$CF_SPACE" = "tanf-prod" ]; then cf set-env "$CGAPPNAME_BACKEND" AV_SCAN_URL "http://tanf-prod-clamav-rest.apps.internal:9000/scan" else # Add environment varilables for clamav @@ -120,7 +120,7 @@ update_backend() # Add network policy to allow frontend to access backend cf add-network-policy "$CGAPPNAME_FRONTEND" "$CGAPPNAME_BACKEND" --protocol tcp --port 8080 - if ["$CF_SPACE" = "tanf-prod" ]; then + if [ "$CF_SPACE" = "tanf-prod" ]; then # Add network policy to allow backend to access tanf-prod services cf add-network-policy "$CGAPPNAME_BACKEND" clamav-rest --protocol tcp --port 9000 else