From d89db1cc7bce3223cc0199314e04640e4bceb3dd Mon Sep 17 00:00:00 2001 From: nimalank7 Date: Fri, 5 Jul 2024 14:14:33 +0100 Subject: [PATCH] Load static assets from S3 directly Description: - Static assets (such as CSS, images and Javascript) are stored in an S3 bucket. Currently to retrieve a static asset Fastly will make a request to an nginx pod running in router. nginx will then retrieve the static asset from S3 before passing it to Fastly. - This commit makes Fastly talk directly to S3 without going through nginx and Router by are handled in the if block above which triggers a restart causing Fastly to poll the S3 mirror instead. - If the static asset doesn't exist in S3 (e.g. www.gov.uk/assets/non-existent.jpg) then S3 returns an XML document and 403. Adding the condition to display a prettified error page for this would make the VCL code difficult to read as well as a special case to handle integration which has no mirrors. Since this error can only be seen if someone looks in the Chrome dev tools it's agreed that it is permissible to just return the S3 403 error instead. - See the diagram in https://github.com/alphagov/govuk-fastly/issues/73 for the previous method and the proposed new implementation --- modules/www/main.tf | 3 +++ modules/www/www.vcl.tftpl | 47 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/modules/www/main.tf b/modules/www/main.tf index d962281..67d8473 100644 --- a/modules/www/main.tf +++ b/modules/www/main.tf @@ -14,6 +14,9 @@ locals { ssl_ciphers = "ECDHE-RSA-AES256-GCM-SHA384" basic_authentication = null + s3_static_assets_port = 443 + s3_static_assets_hostname = null + # these values are needed even if mirrors aren't enabled in an environment s3_mirror_hostname = null s3_mirror_prefix = null diff --git a/modules/www/www.vcl.tftpl b/modules/www/www.vcl.tftpl index 361aab7..bb01499 100644 --- a/modules/www/www.vcl.tftpl +++ b/modules/www/www.vcl.tftpl @@ -36,6 +36,32 @@ backend F_origin { %{ endif ~} } +backend F_staticAssetsS3 { + .connect_timeout = 1s; + .dynamic = true; + .port = "${s3_static_assets_port}"; + .host = "${s3_static_assets_hostname}"; + .first_byte_timeout = 15s; + .max_connections = 200; + .between_bytes_timeout = 10s; + + .ssl = true; + .ssl_check_cert = always; + .min_tls_version = "${minimum_tls_version}"; + .ssl_ciphers = "${ssl_ciphers}"; + .ssl_cert_hostname = "${s3_static_assets_hostname}"; + .ssl_sni_hostname = "${s3_static_assets_hostname}"; + + .probe = { + .dummy = ${probe_dns_only}; + .threshold = 1; + .window = 2; + .timeout = 5s; + .initial = 1; + .interval = ${probe_interval}; + } +} + %{ if contains(["staging", "production"], environment) ~} # Mirror backend for S3 backend F_mirrorS3 { @@ -342,6 +368,12 @@ sub vcl_recv { unset req.http.Cookie; } + if (req.url.path ~ "^\/assets(\/.*)?\z") { + set req.backend = F_staticAssetsS3; + set req.http.host = "${s3_static_assets_hostname}"; + set req.http.Fastly-Backend-Name = "staticAssetsS3"; + } + return(lookup); } @@ -433,7 +465,22 @@ sub vcl_fetch { set beresp.ttl = 900s; set beresp.http.Cache-Control = "max-age=900"; } + + # Static Assets S3 bucket do not set cache headers by default. Override TTL and add cache-control to 31536000s. + # Strip out common S3 headers + if (beresp.http.Fastly-Backend-Name ~ "^staticAssetsS3$") { + set beresp.ttl = 31536000s; + set beresp.http.Cache-Control = "max-age=31536000, public, immutable"; + set beresp.http.Access-Control-Allow-Origin = "*"; + + unset beresp.http.x-amz-id-2; + unset beresp.http.x-amz-meta-server-side-encryption; + unset beresp.http.x-amz-request-id; + unset beresp.http.x-amz-version-id; + unset beresp.http.x-amz-server-side-encryption; + } } + # Strip cookies from outbound requests. Corresponding rule in vcl_recv{} if (req.url !~ "^/(apply-for-a-licence|email|sign-in/callback)") { unset beresp.http.Set-Cookie;