Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/3222-plg-prod' into 3254-deploy-…
Browse files Browse the repository at this point in the history
…alertmanager
  • Loading branch information
elipe17 committed Nov 15, 2024
2 parents fa5f15c + e69df5c commit 6b2760e
Show file tree
Hide file tree
Showing 9 changed files with 103 additions and 42 deletions.
14 changes: 0 additions & 14 deletions scripts/deploy-backend.sh
Original file line number Diff line number Diff line change
Expand Up @@ -118,17 +118,6 @@ prepare_promtail() {
popd
}

update_plg_networking() {
# Need to switch the space after deploy since we're not always in dev space to handle specific networking from dev
# PLG apps to the correct backend app.
cf target -o hhs-acf-ofa -s tanf-dev
cf add-network-policy prometheus "$CGAPPNAME_BACKEND" -s "$CF_SPACE" --protocol tcp --port 8080
cf target -o hhs-acf-ofa -s "$CF_SPACE"

# Promtial needs to send logs to Loki
cf add-network-policy "$CGAPPNAME_BACKEND" loki -s "tanf-dev" --protocol tcp --port 8080
}

update_backend()
{
cd tdrs-backend || exit
Expand Down Expand Up @@ -167,9 +156,6 @@ update_backend()
# Add network policy to allow frontend to access backend
cf add-network-policy "$CGAPPNAME_FRONTEND" "$CGAPPNAME_BACKEND" --protocol tcp --port 8080

# Add PLG routing
update_plg_networking

if [ "$CF_SPACE" = "tanf-prod" ]; then
# Add network policy to allow backend to access tanf-prod services
cf add-network-policy "$CGAPPNAME_BACKEND" clamav-rest --protocol tcp --port 9000
Expand Down
1 change: 0 additions & 1 deletion scripts/deploy-frontend.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ CF_SPACE=${5}
ENVIRONMENT=${6}

env=${CF_SPACE#"tanf-"}
frontend_app_name=$(echo $CGHOSTNAME_FRONTEND | cut -d"-" -f3)

# Update the Kibana name to include the environment
KIBANA_BASE_URL="${CGAPPNAME_KIBANA}-${env}.apps.internal"
Expand Down
67 changes: 58 additions & 9 deletions tdrs-backend/plg/deploy.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
#!/bin/bash
set -e

DEV_BACKEND_APPS=("tdp-backend-raft" "tdp-backend-qasp" "tdp-backend-a11y")
STAGING_BACKEND_APPS=("tdp-backend-develop" "tdp-backend-staging")
PROD_BACKEND="tdp-backend-prod"

DEV_FRONTEND_APPS=("tdp-frontend-raft" "tdp-frontend-qasp" "tdp-frontend-a11y")
STAGING_FRONTEND_APPS=("tdp-frontend-develop" "tdp-frontend-staging")
PROD_FRONTEND="tdp-frontend-prod"

help() {
echo "Deploy the PLG stack or a Postgres exporter to the Cloud Foundry space you're currently authenticated in."
echo "Syntax: deploy.sh [-h|a|p|u|d]"
echo "Options:"
echo "h Print this help message."
echo "a Deploy the entire PLG stack."
echo "p Deploy a postgres exporter. Requires -u and -d"
echo "p Deploy a postgres exporter, expects the environment name (dev, staging, production) to be passed with switch. Requires -u and -d"
echo "u Requires -p. The database URI the exporter should connect with."
echo "d The Cloud Foundry service name of the RDS instance. Should be included with all deployments."
echo
Expand All @@ -19,6 +27,7 @@ deploy_pg_exporter() {
cp manifest.yml $MANIFEST

APP_NAME="pg-exporter-$1"
EXPORTER_SPACE=$(cf target | grep -Eo "tanf(.*)")

yq eval -i ".applications[0].name = \"$APP_NAME\"" $MANIFEST
yq eval -i ".applications[0].env.DATA_SOURCE_NAME = \"$2\"" $MANIFEST
Expand All @@ -27,9 +36,10 @@ deploy_pg_exporter() {
cf push --no-route -f $MANIFEST -t 180 --strategy rolling
cf map-route $APP_NAME apps.internal --hostname $APP_NAME

# Add policy to allow prometheus to talk to pg-exporter
# TODO: this logic needs to be updated to allow routing accross spaces based on where we want PLG to live.
cf add-network-policy prometheus $APP_NAME -s "tanf-dev" --protocol tcp --port 9187
# Add policy to allow prometheus to talk to pg-exporter regardless of environment
cf target -o hhs-acf-ofa -s tanf-prod
cf add-network-policy prometheus $APP_NAME -s "$EXPORTER_SPACE" --protocol tcp --port 9187
cf target -o hhs-acf-ofa -s "$EXPORTER_SPACE"
rm $MANIFEST
popd
}
Expand All @@ -47,13 +57,21 @@ deploy_grafana() {
yq eval -i ".applications[0].services[0] = \"$1\"" $MANIFEST

cf push --no-route -f $MANIFEST -t 180 --strategy rolling
# cf map-route $APP_NAME apps.internal --hostname $APP_NAME
# Give Grafana a public route for now. Might be able to swap to internal route later.
cf map-route "$APP_NAME" app.cloud.gov --hostname "${APP_NAME}"
cf map-route $APP_NAME apps.internal --hostname $APP_NAME

# Add policy to allow grafana to talk to prometheus and loki
cf add-network-policy $APP_NAME prometheus --protocol tcp --port 8080
cf add-network-policy $APP_NAME loki --protocol tcp --port 8080

# Add network policies to allow grafana to talk to all frontend apps in all environments
for app in ${DEV_FRONTEND_APPS[@]}; do
cf add-network-policy "grafana" $app -s "tanf-dev" --protocol tcp --port 80
done
for app in ${STAGING_FRONTEND_APPS[@]}; do
cf add-network-policy "grafana" $app -s "tanf-staging" --protocol tcp --port 80
done
cf add-network-policy "grafana" $PROD_FRONTEND --protocol tcp --port 80

rm $DATASOURCES
rm $MANIFEST
popd
Expand All @@ -63,6 +81,16 @@ deploy_prometheus() {
pushd prometheus
cf push --no-route -f manifest.yml -t 180 --strategy rolling
cf map-route prometheus apps.internal --hostname prometheus

# Add network policies to allow prometheus to talk to all backend apps in all environments
for app in ${DEV_BACKEND_APPS[@]}; do
cf add-network-policy prometheus $app -s "tanf-dev" --protocol tcp --port 8080
done
for app in ${STAGING_BACKEND_APPS[@]}; do
cf add-network-policy prometheus $app -s "tanf-staging" --protocol tcp --port 8080
done
cf add-network-policy prometheus $PROD_BACKEND --protocol tcp --port 8080

popd
}

Expand All @@ -73,6 +101,25 @@ deploy_loki() {
popd
}

setup_extra_net_pols() {
# Add network policies to allow frontend/backend to talk to grafana/loki
cf target -o hhs-acf-ofa -s tanf-dev
for i in ${!DEV_BACKEND_APPS[@]}; do
cf add-network-policy ${DEV_FRONTEND_APPS[$i]} grafana -s tanf-prod --protocol tcp --port 8080
cf add-network-policy ${DEV_BACKEND_APPS[$i]} loki -s tanf-prod --protocol tcp --port 8080
done

cf target -o hhs-acf-ofa -s tanf-staging
for i in ${!STAGING_BACKEND_APPS[@]}; do
cf add-network-policy ${STAGING_FRONTEND_APPS[$i]} grafana -s tanf-prod --protocol tcp --port 8080
cf add-network-policy ${STAGING_BACKEND_APPS[$i]} loki -s tanf-prod --protocol tcp --port 8080
done

cf target -o hhs-acf-ofa -s tanf-prod
cf add-network-policy $PROD_FRONTEND grafana -s tanf-prod --protocol tcp --port 8080
cf add-network-policy $PROD_BACKEND loki -s tanf-prod --protocol tcp --port 8080
}

err_help_exit() {
echo $1
echo
Expand All @@ -97,6 +144,7 @@ while getopts ":hap:u:d:" option; do
DB_SERVICE_NAME=$OPTARG;;
\?) # Invalid option
echo "Error: Invalid option"
help
exit;;
esac
done
Expand All @@ -107,13 +155,14 @@ if [ "$#" -eq 0 ]; then
fi

pushd "$(dirname "$0")"
if [ "$DB_URI" == "" ] || [ "$DB_SERVICE_NAME" == "" ]; then
if [ "$DB_SERVICE_NAME" == "" ]; then
err_help_exit "Error: you must include a database service name."
fi
if [ "$DEPLOY" == "plg" ]; then
deploy_prometheus
deploy_loki
deploy_grafana
deploy_grafana $DB_SERVICE_NAME
setup_extra_net_pols
fi
if [ "$DEPLOY" == "pg-exporter" ]; then
if [ "$DB_URI" == "" ]; then
Expand Down
6 changes: 3 additions & 3 deletions tdrs-backend/plg/grafana/custom.ini
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ http_addr =
http_port = 8080

# The public facing domain name used to access grafana from a browser
domain = app.cloud.gov
domain = grafana.apps.internal

# Redirect to correct domain if host header does not match domain
# Prevents DNS rebinding attacks
Expand Down Expand Up @@ -553,10 +553,10 @@ login_cookie_name = grafana_session
disable_login = false

# The maximum lifetime (duration) an authenticated user can be inactive before being required to login at next visit. Default is 7 days (7d). This setting should be expressed as a duration, e.g. 5m (minutes), 6h (hours), 10d (days), 2w (weeks), 1M (month). The lifetime resets at each successful token rotation (token_rotation_interval_minutes).
login_maximum_inactive_lifetime_duration =
login_maximum_inactive_lifetime_duration = 30m

# The maximum lifetime (duration) an authenticated user can be logged in since login time before being required to login. Default is 30 days (30d). This setting should be expressed as a duration, e.g. 5m (minutes), 6h (hours), 10d (days), 2w (weeks), 1M (month).
login_maximum_lifetime_duration =
login_maximum_lifetime_duration = 1d

# How often should auth tokens be rotated for authenticated users when being active. The default is each 10 minutes.
token_rotation_interval_minutes = 10
Expand Down
4 changes: 2 additions & 2 deletions tdrs-backend/plg/grafana/manifest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ applications:
disk_quota: 2G
instances: 1
env:
GF_PATHS_PROVISIONING: "/conf/provisioning"
GF_PATHS_PROVISIONING: "conf/provisioning"
GF_PATHS_CONFIG: "/home/vcap/app/custom.ini"
GF_PATHS_HOME: "/home/vcap/app/grafana-v11.2.0"
GF_PATHS_DATA: "/home/vcap/app/data"
GF_PATHS_LOGS: "/home/vcap/app/logs"
GF_PATHS_PLUGINS: "/conf/provisioning/plugins"
GF_PATHS_PLUGINS: "conf/provisioning/plugins"
GF_SERVER_HTTP_PORT: 8080
GF_DATABASE_TYPE: postgres
GF_DATABASE_SSL_MODE: require
Expand Down
2 changes: 1 addition & 1 deletion tdrs-backend/plg/loki/manifest.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
version: 1
applications:
- name: loki
memory: 512M
memory: 1G
disk_quota: 7G
instances: 1
command: |
Expand Down
9 changes: 1 addition & 8 deletions tdrs-backend/plg/prometheus/prometheus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,6 @@ scrape_configs:
service: "tdp-backend"
env: "production"

- job_name: "celery-exporter-raft"
static_configs:
- targets: ["celery-exporter-raft.apps.internal:9540"]
labels:
service: "celery"
env: "dev"

- job_name: postgres-dev
static_configs:
- targets: ["pg-exporter-dev.apps.internal:9187"]
Expand Down Expand Up @@ -95,7 +88,7 @@ scrape_configs:
labels:
service: "loki"
env: "production"

- job_name: grafana
metrics_path: /grafana/metrics
static_configs:
Expand Down
6 changes: 2 additions & 4 deletions tdrs-backend/tdpservice/users/api/authorization_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,9 @@ def get(self, request, *args, **kwargs):
user_in_valid_group = user.is_ofa_sys_admin or user.is_digit_team

if (user.hhs_id is not None and user_in_valid_group) or settings.BYPASS_OFA_AUTH:
logger.debug(f"User: {user} has correct authentication credentials. Allowing access to Kibana.")
return HttpResponse(status=200)
else:
logger.debug(f"User: {user} has incorrect authentication credentials. Not allowing access to Kibana.")
logger.warning(f"User: {user} has incorrect authentication credentials. Not allowing access to Kibana.")
return HttpResponse(status=401)

class PlgAuthorizationCheck(APIView):
Expand All @@ -88,8 +87,7 @@ def get(self, request, *args, **kwargs):
print("\n\nINSIDE AUTH CHECK\n\n")

if user_in_valid_group:
logger.debug(f"User: {user} has correct authentication credentials. Allowing access to plg.")
return HttpResponse(status=200)
else:
logger.debug(f"User: {user} has incorrect authentication credentials. Not allowing access to plg.")
logger.warning(f"User: {user} has incorrect authentication credentials. Not allowing access to Grafana.")
return HttpResponse(status=401)
36 changes: 36 additions & 0 deletions tdrs-frontend/nginx/cloud.gov/locations.conf
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,42 @@ location = /kibana_auth_check {
send_timeout 900;
}

location /grafana/ {
auth_request /grafana_auth_check;
auth_request_set $auth_status $upstream_status;

set $grafana http://grafana.apps.internal:8080$request_uri;
proxy_pass $grafana;
proxy_set_header Host $host:3000;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto https;

proxy_connect_timeout 300;
proxy_read_timeout 300;
proxy_send_timeout 300;
send_timeout 900;
proxy_buffer_size 4k;
}

location = /grafana_auth_check {
internal;
set $endpoint http://{{env "BACKEND_HOST"}}.apps.internal:8080/grafana_auth_check/;
proxy_pass $endpoint$1$is_args$args;
proxy_set_header Host $host:3000;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto https;
proxy_set_header Content-Length "";
proxy_set_header X-Original-URI $request_uri;

proxy_connect_timeout 300;
proxy_read_timeout 300;
proxy_send_timeout 300;
send_timeout 900;
proxy_pass_header x-csrftoken;
}

if ($request_method ~ ^(TRACE|OPTION)$) {
return 405;
}
Expand Down

0 comments on commit 6b2760e

Please sign in to comment.