From 0968844a0c07af4c2c46195678506a744334d1d9 Mon Sep 17 00:00:00 2001 From: aaltonja Date: Tue, 3 Dec 2024 12:44:05 +0200 Subject: [PATCH 01/13] install etcdctl, add api6 snapshot script, move common logic to own file --- ewc/cron-jobs/Dockerfile | 44 ++++++++++++++++++++++++++++++ ewc/cron-jobs/apisix-snapshot.sh | 36 +++++++++++++++++++++++++ ewc/cron-jobs/common-functions.sh | 24 +++++++++++++++++ ewc/cron-jobs/vault-snapshot.sh | 45 +++++++++++++++++++++++++++++++ 4 files changed, 149 insertions(+) create mode 100644 ewc/cron-jobs/Dockerfile create mode 100644 ewc/cron-jobs/apisix-snapshot.sh create mode 100644 ewc/cron-jobs/common-functions.sh create mode 100644 ewc/cron-jobs/vault-snapshot.sh diff --git a/ewc/cron-jobs/Dockerfile b/ewc/cron-jobs/Dockerfile new file mode 100644 index 0000000..7a6e64b --- /dev/null +++ b/ewc/cron-jobs/Dockerfile @@ -0,0 +1,44 @@ +FROM alpine:3.20 + +# Set arguments for product and version +ARG PRODUCT=vault +ARG VAULT_VERSION=1.18.0 +ARG ETCD_VERSION=v3.5.17 + +# Install dependencies and Vault +# Vault installation snippet from https://www.hashicorp.com/blog/installing-hashicorp-tools-in-alpine-linux-containers +# Vault binary is enormous ~436MB, might need to think some other option.. +# Someone else also not happy about it https://github.com/hashicorp/vault/issues/22893 +# aws-cli is around ~100MB +RUN apk add --update --virtual .deps --no-cache gnupg wget unzip && \ + apk add --no-cache aws-cli bash && \ + cd /tmp && \ + # Install Vault + wget https://releases.hashicorp.com/${PRODUCT}/${VAULT_VERSION}/${PRODUCT}_${VAULT_VERSION}_linux_amd64.zip && \ + wget https://releases.hashicorp.com/${PRODUCT}/${VAULT_VERSION}/${PRODUCT}_${VAULT_VERSION}_SHA256SUMS && \ + wget https://releases.hashicorp.com/${PRODUCT}/${VAULT_VERSION}/${PRODUCT}_${VAULT_VERSION}_SHA256SUMS.sig && \ + wget -qO- https://www.hashicorp.com/.well-known/pgp-key.txt | gpg --import && \ + gpg --verify ${PRODUCT}_${VAULT_VERSION}_SHA256SUMS.sig ${PRODUCT}_${VAULT_VERSION}_SHA256SUMS && \ + grep ${PRODUCT}_${VAULT_VERSION}_linux_amd64.zip ${PRODUCT}_${VAULT_VERSION}_SHA256SUMS | sha256sum -c && \ + unzip /tmp/${PRODUCT}_${VAULT_VERSION}_linux_amd64.zip -d /tmp && \ + mv /tmp/${PRODUCT} /usr/local/bin/${PRODUCT} && \ + # Install etcd + wget https://github.com/etcd-io/etcd/releases/download/${ETCD_VERSION}/etcd-${ETCD_VERSION}-linux-amd64.tar.gz -O /tmp/etcd-${ETCD_VERSION}-linux-amd64.tar.gz && \ + tar xzvf /tmp/etcd-${ETCD_VERSION}-linux-amd64.tar.gz && \ + mv /tmp/etcd-${ETCD_VERSION}-linux-amd64/etcdctl /usr/local/bin/etcdctl && \ + # Remove temporary files & build dependencies + rm -rf /tmp/* && \ + rm -f ${PRODUCT}_${VAULT_VERSION}_SHA256SUMS ${PRODUCT}_${VAULT_VERSION}_SHA256SUMS.sig && \ + apk del .deps + +# Verify installations +RUN etcdctl version && vault version && aws --version + +# Add scripts and make them executable +COPY common-functions.sh /usr/local/bin/common-functions.sh +COPY vault-snapshot.sh /usr/local/bin/vault-snapshot.sh +COPY apisix-snapshot.sh /usr/local/bin/apisix-snapshot.sh +RUN chmod +x /usr/local/bin/common-functions.sh /usr/local/bin/vault-snapshot.sh /usr/local/bin/apisix-snapshot.sh + + +CMD ["/usr/local/bin/vault-snapshot.sh"] diff --git a/ewc/cron-jobs/apisix-snapshot.sh b/ewc/cron-jobs/apisix-snapshot.sh new file mode 100644 index 0000000..0c6f7de --- /dev/null +++ b/ewc/cron-jobs/apisix-snapshot.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Source common functions +source /usr/local/bin/common-functions.sh + +# Variables +ETCD_ENDPOINT=${ETCD_ENDPOINT} +S3_BUCKET_BASE_PATH=${S3_BUCKET_BASE_PATH} +AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} +AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} +AWS_REGION=${AWS_REGION:-"eu-north-1"} + +# Check required variables +check_var "ETCD_ENDPOINT" "$ETCD_ENDPOINT" +check_var "S3_BUCKET_BASE_PATH" "$S3_BUCKET_BASE_PATH" +check_var "AWS_ACCESS_KEY_ID" "$AWS_ACCESS_KEY_ID" +check_var "AWS_SECRET_ACCESS_KEY" "$AWS_SECRET_ACCESS_KEY" + +# Generate ISO 8601 compliant timestamp +TIMESTAMP_ISO_8601=$(generate_iso_8601_timestamp) + +SNAPSHOT_NAME="snapshot-$TIMESTAMP_ISO_8601.db" + +# Take the etcd snapshot +ETCDCTL_API=3 etcdctl --endpoints=${ETCD_ENDPOINT} snapshot save /tmp/$SNAPSHOT_NAME + +# Upload to S3 +aws s3 cp /tmp/$SNAPSHOT_NAME s3://${S3_BUCKET_BASE_PATH}${SNAPSHOT_NAME} --region "${AWS_REGION}" + +if [ $? -ne 0 ]; then + echo "Error: Failed to upload snapshot to S3" + exit 1 +fi + +# Clean up +rm /tmp/$SNAPSHOT_NAME diff --git a/ewc/cron-jobs/common-functions.sh b/ewc/cron-jobs/common-functions.sh new file mode 100644 index 0000000..d4ef81a --- /dev/null +++ b/ewc/cron-jobs/common-functions.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Function to check if a variable is set +check_var() { + local var_name=$1 + local var_value=$2 + if [ -z "$var_value" ]; then + echo "Error: $var_name is not set." + exit 1 + fi +} + +# Function to generate ISO 8601 compliant timestamp +generate_iso_8601_timestamp() { + local timezone_offset=$(date +%z) + local timestamp=$(date +%Y-%m-%dT%H:%M:%S) + + if [ "$timezone_offset" == "+0000" ]; then + echo "${timestamp}Z" + else + # (need to use sed as couldn't make it work with '%:z' in date command) + echo "${timestamp}$(echo $timezone_offset | sed 's/\(..\)$/:\1/')" + fi +} \ No newline at end of file diff --git a/ewc/cron-jobs/vault-snapshot.sh b/ewc/cron-jobs/vault-snapshot.sh new file mode 100644 index 0000000..d49d4f3 --- /dev/null +++ b/ewc/cron-jobs/vault-snapshot.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# Source common functions +source /usr/local/bin/common-functions.sh + +# Variables +VAULT_ADDR=${VAULT_ADDR} +S3_BUCKET_BASE_PATH=${S3_BUCKET_BASE_PATH} +AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} +AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} +AWS_REGION=${AWS_REGION:-"eu-north-1"} + +# Check required variables +check_var "VAULT_ADDR" "$VAULT_ADDR" +check_var "S3_BUCKET_BASE_PATH" "$S3_BUCKET_BASE_PATH" +check_var "AWS_ACCESS_KEY_ID" "$AWS_ACCESS_KEY_ID" +check_var "AWS_SECRET_ACCESS_KEY" "$AWS_SECRET_ACCESS_KEY" + +# Retrieve the provided service account token +SA_TOKEN=$(cat /var/run/secrets/kubernetes.io/serviceaccount/token) + +# Authenticate with Vault using the Kubernetes auth method to obtain a Vault token +export VAULT_TOKEN=$(vault write -field=token auth/kubernetes/login \ + role=backup-cron-job \ + jwt=$SA_TOKEN) + +# Generate ISO 8601 compliant timestamp +TIMESTAMP_ISO_8601=$(generate_iso_8601_timestamp) + +SNAPSHOT_NAME="snapshot-$TIMESTAMP_ISO_8601.snap" + +# Take the snapshot +# https://developer.hashicorp.com/vault/tutorials/standard-procedures/sop-backup +vault operator raft snapshot save /tmp/$SNAPSHOT_NAME + +# Upload to S3 +aws s3 cp /tmp/$SNAPSHOT_NAME s3://${S3_BUCKET_BASE_PATH}${SNAPSHOT_NAME} --region "${AWS_REGION}" + +if [ $? -ne 0 ]; then + echo "Error: Failed to upload snapshot to S3" + exit 1 +fi + +# Clean up +rm /tmp/$SNAPSHOT_NAME From faef6fe4c64fd6ff9056fc8dd45072a6e77bc46f Mon Sep 17 00:00:00 2001 From: aaltonja Date: Mon, 9 Dec 2024 15:30:03 +0200 Subject: [PATCH 02/13] add apisix backup job, use common namespace for shared secret and use role and role binding to read the values --- ewc/cron_jobs.tf | 251 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 251 insertions(+) create mode 100644 ewc/cron_jobs.tf diff --git a/ewc/cron_jobs.tf b/ewc/cron_jobs.tf new file mode 100644 index 0000000..4f2594c --- /dev/null +++ b/ewc/cron_jobs.tf @@ -0,0 +1,251 @@ +################################################################################ +# Backups +################################################################################ + +# TODO consider using a service account for assuming AWS role - no need to use access key and secret key +# E.g. using service account with self-hosted k8s cluster requires own OIDC provider, keycloak, s3, dex etc. + + +################################################################################ +# Common +################################################################################ + +resource "kubernetes_namespace" "backup_cron_jobs" { + metadata { + annotations = { + "field.cattle.io/projectId" = rancher2_project.gateway.id + } + + name = "backup-cron-jobs" + } +} + +resource "kubernetes_secret" "backup_cron_job_secrets" { + metadata { + name = "backup-cron-jobs" + namespace = kubernetes_namespace.backup_cron_jobs.metadata.0.name + } + + data = { + AWS_ACCESS_KEY_ID = var.s3_bucket_access_key + AWS_SECRET_ACCESS_KEY = var.s3_bucket_secret_key + } + + type = "Opaque" +} + +# Role that allows read access to the secret defined above +resource "kubernetes_role" "secret_access_role" { + metadata { + name = "secret-access-role" + namespace = kubernetes_namespace.backup_cron_jobs.metadata[0].name + } + + rule { + api_groups = [""] # secrets are part of core API group + resources = ["secrets"] + resource_names = [kubernetes_secret.backup_cron_job_secrets.metadata.0.name] + verbs = ["get"] + } +} + + +################################################################################ +# Vault +################################################################################ +resource "kubernetes_service_account" "vault_backup_cron_job_service_account" { + metadata { + name = "vault-backup-cron-job-sa" + namespace = module.ewc-vault-init.vault_namespace_name + } + + depends_on = [module.ewc-vault-init] + +} + +# Role binding that allows the service account to access the common secret in different namespace +resource "kubernetes_role_binding" "vault_backup_secret_access_binding" { + metadata { + name = "vault-backup-secret-access-binding" + namespace = kubernetes_namespace.backup_cron_jobs.metadata[0].name + } + + subject { + kind = "ServiceAccount" + name = kubernetes_service_account.vault_backup_cron_job_service_account.metadata[0].name + namespace = module.ewc-vault-init.vault_namespace_name + } + + role_ref { + kind = "Role" + name = kubernetes_role.secret_access_role.metadata[0].name + api_group = "rbac.authorization.k8s.io" + } +} + +resource "kubernetes_cron_job_v1" "vault_backup" { + metadata { + name = "vault-backup" + namespace = module.ewc-vault-init.vault_namespace_name + } + + spec { + concurrency_policy = "Replace" + failed_jobs_history_limit = 3 # Keep the latest 3 failed jobs + schedule = "1 0 * * *" + timezone = "Etc/UTC" + starting_deadline_seconds = 43200 # 12 hours + successful_jobs_history_limit = 1 # Keep the latest + + job_template { + metadata {} + spec { + backoff_limit = 6 # This the default value + template { + metadata {} + spec { + restart_policy = "OnFailure" + service_account_name = kubernetes_service_account.vault_backup_cron_job_service_account.metadata.0.name + container { + name = "vault-backup" + image = "ghcr.io/eurodeo/femdi-gateway-iac/vault-snapshot:latest" + image_pull_policy = "Always" # TODO change to IfNotPresent once tested out to be working + command = ["/bin/sh", "-c", "/usr/local/bin/vault-snapshot.sh"] + + env { + name = "VAULT_ADDR" + value = local.vault_host + } + + env { + name = "S3_BUCKET_BASE_PATH" + value = var.vault_backup_bucket_base_path + } + + env { + name = "AWS_ACCESS_KEY_ID" + value_from { + secret_key_ref { + name = kubernetes_secret.backup_cron_job_secrets.metadata.0.name + key = "AWS_ACCESS_KEY_ID" + } + } + } + + env { + name = "AWS_SECRET_ACCESS_KEY" + value_from { + secret_key_ref { + name = kubernetes_secret.backup_cron_job_secrets.metadata.0.name + key = "AWS_SECRET_ACCESS_KEY" + } + } + } + } + } + } + } + } + } + + depends_on = [module.ewc-vault-init] + +} + + +################################################################################ +# APISIX backup +################################################################################ +resource "kubernetes_service_account" "apisix_backup_cron_job_service_account" { + metadata { + name = "apisix-backup-cron-job-sa" + namespace = kubernetes_namespace.apisix.metadata.0.name + } + +} + +resource "kubernetes_role_binding" "apisix_backup_secret_access_binding" { + metadata { + name = "apisix-backup-secret-access-binding" + namespace = kubernetes_namespace.backup_cron_jobs.metadata[0].name + } + + subject { + kind = "ServiceAccount" + name = kubernetes_service_account.apisix_backup_cron_job_service_account.metadata[0].name + namespace = kubernetes_namespace.apisix.metadata[0].name + } + + role_ref { + kind = "Role" + name = kubernetes_role.secret_access_role.metadata[0].name + api_group = "rbac.authorization.k8s.io" + } +} + +resource "kubernetes_cron_job_v1" "apisix_backup" { + metadata { + name = "apisix-backup" + namespace = kubernetes_namespace.apisix.metadata.0.name + } + + spec { + concurrency_policy = "Replace" + failed_jobs_history_limit = 3 # Keep the latest 3 failed jobs + schedule = "1 0 * * *" + timezone = "Etc/UTC" + starting_deadline_seconds = 43200 # 12 hours + successful_jobs_history_limit = 1 # Keep the latest + + job_template { + metadata {} + spec { + backoff_limit = 6 # This the default value + template { + metadata {} + spec { + restart_policy = "OnFailure" + service_account_name = kubernetes_service_account.apisix_backup_cron_job_service_account.metadata.0.name + container { + name = "apisix-backup" + image = "ghcr.io/eurodeo/femdi-gateway-iac/vault-snapshot:latest" + image_pull_policy = "Always" # TODO change to IfNotPresent once tested out to be working + command = ["/bin/sh", "-c", "/usr/local/bin/apisix-snapshot.sh"] + + env { + name = "ETCD_ENDPOINT" + value = local.etcd_host + } + + env { + name = "S3_BUCKET_BASE_PATH" + value = var.apisix_backup_bucket_base_path + } + + env { + name = "AWS_ACCESS_KEY_ID" + value_from { + secret_key_ref { + name = kubernetes_secret.backup_cron_job_secrets.metadata.0.name + key = "AWS_ACCESS_KEY_ID" + } + } + } + + env { + name = "AWS_SECRET_ACCESS_KEY" + value_from { + secret_key_ref { + name = kubernetes_secret.backup_cron_job_secrets.metadata.0.name + key = "AWS_SECRET_ACCESS_KEY" + } + } + } + } + } + } + } + } + } + +} From c3ba2844ff3c5b5e60628caf25285e566f20267b Mon Sep 17 00:00:00 2001 From: aaltonja Date: Mon, 9 Dec 2024 15:31:00 +0200 Subject: [PATCH 03/13] vault backup changes, local value for etcd cluster host address --- ewc/ewc-vault-init/outuputs.tf | 5 ++++ ewc/main.tf | 51 ++++++++++++++++++++++++++++++++-- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/ewc/ewc-vault-init/outuputs.tf b/ewc/ewc-vault-init/outuputs.tf index b892cc4..1cb6916 100644 --- a/ewc/ewc-vault-init/outuputs.tf +++ b/ewc/ewc-vault-init/outuputs.tf @@ -39,3 +39,8 @@ output "vault_pod_ready_statuses_after_init" { ] ]) } + +output "vault_namespace_name" { + description = "Name of the namespace where Vault is running" + value = kubernetes_namespace.vault.metadata[0].name +} diff --git a/ewc/main.tf b/ewc/main.tf index c7113b9..d6aa0ce 100644 --- a/ewc/main.tf +++ b/ewc/main.tf @@ -55,6 +55,11 @@ module "ewc-vault-init" { } +locals { + vault_host = "http://vault-active.vault.svc.cluster.local:8200" + etcd_host = "http://apisix-etcd.apisix.svc.cluster.local:2379" +} + ################################################################################ # Install gateway apps ################################################################################ @@ -203,6 +208,25 @@ resource "vault_jwt_auth_backend" "github" { depends_on = [module.ewc-vault-init] } +resource "vault_auth_backend" "kubernetes" { + type = "kubernetes" + description = "Kubernetes auth backend" + + depends_on = [module.ewc-vault-init] +} + +resource "vault_kubernetes_auth_backend_config" "k8s_auth_config" { + backend = vault_auth_backend.kubernetes.path + + # Use the internal Kubernetes API server URL for communication within the cluster. + # This URL is automatically resolved by the Kubernetes DNS service to the internal IP address of the Kubernetes API server. + kubernetes_host = "https://kubernetes.default.svc.cluster.local" + + # We can omit rest of params, e.g. CA certificate and token reviewer JWT as long as + # Vault and calling service are run in same k8s cluster + # https://developer.hashicorp.com/vault/docs/auth/kubernetes#use-local-service-account-token-as-the-reviewer-jwt +} + resource "vault_policy" "apisix-global" { name = "apisix-global" @@ -240,6 +264,18 @@ EOT depends_on = [module.ewc-vault-init] } +resource "vault_policy" "backup-cron-job" { + name = "backup-cron-job" + + policy = < Date: Mon, 9 Dec 2024 15:37:52 +0200 Subject: [PATCH 04/13] add vault, and apisix related variables --- ewc/variables.tf | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/ewc/variables.tf b/ewc/variables.tf index facae5e..17aee48 100644 --- a/ewc/variables.tf +++ b/ewc/variables.tf @@ -91,6 +91,12 @@ variable "apisix_ip_list" { } } +variable "apisix_backup_bucket_base_path" { + description = "AWS S3 bucket base path for APISIX backup files" + type = string + default = "dev-rodeo-ewc-vault/apisix/" +} + variable "keycloak_admin_password" { description = "Password for keycloak admin" type = string @@ -133,6 +139,11 @@ variable "vault_token" { sensitive = true } +variable "vault_backup_bucket_base_path" { + description = "AWS S3 bucket base path for vault backup files" + type = string + default = "dev-rodeo-ewc-vault/vault/" +} variable "dev-portal_subdomain" { description = "subdomain where devportal will be hosted" @@ -157,3 +168,15 @@ variable "github_idp_client_secret" { type = string sensitive = true } + +variable "s3_bucket_access_key" { + description = "AWS access key for S3 bucket for backups" + type = string + sensitive = true +} + +variable "s3_bucket_secret_key" { + description = "AWS secret key for S3 bucket for backups" + type = string + sensitive = true +} From 34307e4333d4f5376cb6cfc136223eccc2670913 Mon Sep 17 00:00:00 2001 From: aaltonja Date: Tue, 10 Dec 2024 10:07:54 +0200 Subject: [PATCH 05/13] use general image ment for all cron jobs --- ewc/cron_jobs.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ewc/cron_jobs.tf b/ewc/cron_jobs.tf index 4f2594c..cbf53e5 100644 --- a/ewc/cron_jobs.tf +++ b/ewc/cron_jobs.tf @@ -108,7 +108,7 @@ resource "kubernetes_cron_job_v1" "vault_backup" { service_account_name = kubernetes_service_account.vault_backup_cron_job_service_account.metadata.0.name container { name = "vault-backup" - image = "ghcr.io/eurodeo/femdi-gateway-iac/vault-snapshot:latest" + image = "ghcr.io/eurodeo/femdi-gateway-iac/cron-jobs:latest" image_pull_policy = "Always" # TODO change to IfNotPresent once tested out to be working command = ["/bin/sh", "-c", "/usr/local/bin/vault-snapshot.sh"] @@ -208,7 +208,7 @@ resource "kubernetes_cron_job_v1" "apisix_backup" { service_account_name = kubernetes_service_account.apisix_backup_cron_job_service_account.metadata.0.name container { name = "apisix-backup" - image = "ghcr.io/eurodeo/femdi-gateway-iac/vault-snapshot:latest" + image = "ghcr.io/eurodeo/femdi-gateway-iac/cron-jobs:latest" image_pull_policy = "Always" # TODO change to IfNotPresent once tested out to be working command = ["/bin/sh", "-c", "/usr/local/bin/apisix-snapshot.sh"] From 29c7cf3c907addec2709f3fbf6781b680c34e0c3 Mon Sep 17 00:00:00 2001 From: Jani Aaltonen <122603649+aaltonja@users.noreply.github.com> Date: Tue, 10 Dec 2024 10:39:20 +0200 Subject: [PATCH 06/13] fix vault service account reference --- ewc/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ewc/main.tf b/ewc/main.tf index 0dd243d..cfc4b35 100644 --- a/ewc/main.tf +++ b/ewc/main.tf @@ -169,7 +169,7 @@ resource "vault_jwt_auth_backend_role" "api-management-tool-gha" { resource "vault_kubernetes_auth_backend_role" "backup-cron-job" { backend = vault_auth_backend.kubernetes.path role_name = "backup-cron-job" - bound_service_account_names = [kubernetes_service_account.backup_cron_job_service_account.metadata.0.name] + bound_service_account_names = [kubernetes_service_account.vault_backup_cron_job_service_account.metadata.0.name] bound_service_account_namespaces = [module.ewc-vault-init.vault_namespace_name] token_policies = [vault_policy.backup-cron-job.name] token_ttl = 300 From 563ee9366a72f3e258caeffdf8fb8227f1f51229 Mon Sep 17 00:00:00 2001 From: aaltonja Date: Wed, 11 Dec 2024 14:46:23 +0200 Subject: [PATCH 07/13] revert to namespace specific secrets, common one with roles and bindings not worked as expected --- ewc/cron_jobs.tf | 105 +++++++++++------------------------------------ 1 file changed, 23 insertions(+), 82 deletions(-) diff --git a/ewc/cron_jobs.tf b/ewc/cron_jobs.tf index cbf53e5..a56181e 100644 --- a/ewc/cron_jobs.tf +++ b/ewc/cron_jobs.tf @@ -1,56 +1,14 @@ ################################################################################ + # Backups + ################################################################################ # TODO consider using a service account for assuming AWS role - no need to use access key and secret key # E.g. using service account with self-hosted k8s cluster requires own OIDC provider, keycloak, s3, dex etc. - -################################################################################ -# Common ################################################################################ -resource "kubernetes_namespace" "backup_cron_jobs" { - metadata { - annotations = { - "field.cattle.io/projectId" = rancher2_project.gateway.id - } - - name = "backup-cron-jobs" - } -} - -resource "kubernetes_secret" "backup_cron_job_secrets" { - metadata { - name = "backup-cron-jobs" - namespace = kubernetes_namespace.backup_cron_jobs.metadata.0.name - } - - data = { - AWS_ACCESS_KEY_ID = var.s3_bucket_access_key - AWS_SECRET_ACCESS_KEY = var.s3_bucket_secret_key - } - - type = "Opaque" -} - -# Role that allows read access to the secret defined above -resource "kubernetes_role" "secret_access_role" { - metadata { - name = "secret-access-role" - namespace = kubernetes_namespace.backup_cron_jobs.metadata[0].name - } - - rule { - api_groups = [""] # secrets are part of core API group - resources = ["secrets"] - resource_names = [kubernetes_secret.backup_cron_job_secrets.metadata.0.name] - verbs = ["get"] - } -} - - -################################################################################ # Vault ################################################################################ resource "kubernetes_service_account" "vault_backup_cron_job_service_account" { @@ -59,28 +17,24 @@ resource "kubernetes_service_account" "vault_backup_cron_job_service_account" { namespace = module.ewc-vault-init.vault_namespace_name } + automount_service_account_token = true + depends_on = [module.ewc-vault-init] } -# Role binding that allows the service account to access the common secret in different namespace -resource "kubernetes_role_binding" "vault_backup_secret_access_binding" { +resource "kubernetes_secret" "vault_backup_cron_job_secrets" { metadata { - name = "vault-backup-secret-access-binding" - namespace = kubernetes_namespace.backup_cron_jobs.metadata[0].name - } - - subject { - kind = "ServiceAccount" - name = kubernetes_service_account.vault_backup_cron_job_service_account.metadata[0].name + name = "vault-backup-cron-jobs" namespace = module.ewc-vault-init.vault_namespace_name } - role_ref { - kind = "Role" - name = kubernetes_role.secret_access_role.metadata[0].name - api_group = "rbac.authorization.k8s.io" + data = { + AWS_ACCESS_KEY_ID = var.s3_bucket_access_key + AWS_SECRET_ACCESS_KEY = var.s3_bucket_secret_key } + + type = "Opaque" } resource "kubernetes_cron_job_v1" "vault_backup" { @@ -126,7 +80,7 @@ resource "kubernetes_cron_job_v1" "vault_backup" { name = "AWS_ACCESS_KEY_ID" value_from { secret_key_ref { - name = kubernetes_secret.backup_cron_job_secrets.metadata.0.name + name = kubernetes_secret.vault_backup_cron_job_secrets.metadata.0.name key = "AWS_ACCESS_KEY_ID" } } @@ -136,7 +90,7 @@ resource "kubernetes_cron_job_v1" "vault_backup" { name = "AWS_SECRET_ACCESS_KEY" value_from { secret_key_ref { - name = kubernetes_secret.backup_cron_job_secrets.metadata.0.name + name = kubernetes_secret.vault_backup_cron_job_secrets.metadata.0.name key = "AWS_SECRET_ACCESS_KEY" } } @@ -154,33 +108,21 @@ resource "kubernetes_cron_job_v1" "vault_backup" { ################################################################################ + # APISIX backup ################################################################################ -resource "kubernetes_service_account" "apisix_backup_cron_job_service_account" { +resource "kubernetes_secret" "apisix_backup_cron_job_secrets" { metadata { - name = "apisix-backup-cron-job-sa" + name = "apisix-backup-cron-jobs" namespace = kubernetes_namespace.apisix.metadata.0.name } -} - -resource "kubernetes_role_binding" "apisix_backup_secret_access_binding" { - metadata { - name = "apisix-backup-secret-access-binding" - namespace = kubernetes_namespace.backup_cron_jobs.metadata[0].name - } - - subject { - kind = "ServiceAccount" - name = kubernetes_service_account.apisix_backup_cron_job_service_account.metadata[0].name - namespace = kubernetes_namespace.apisix.metadata[0].name + data = { + AWS_ACCESS_KEY_ID = var.s3_bucket_access_key + AWS_SECRET_ACCESS_KEY = var.s3_bucket_secret_key } - role_ref { - kind = "Role" - name = kubernetes_role.secret_access_role.metadata[0].name - api_group = "rbac.authorization.k8s.io" - } + type = "Opaque" } resource "kubernetes_cron_job_v1" "apisix_backup" { @@ -204,8 +146,7 @@ resource "kubernetes_cron_job_v1" "apisix_backup" { template { metadata {} spec { - restart_policy = "OnFailure" - service_account_name = kubernetes_service_account.apisix_backup_cron_job_service_account.metadata.0.name + restart_policy = "OnFailure" container { name = "apisix-backup" image = "ghcr.io/eurodeo/femdi-gateway-iac/cron-jobs:latest" @@ -226,7 +167,7 @@ resource "kubernetes_cron_job_v1" "apisix_backup" { name = "AWS_ACCESS_KEY_ID" value_from { secret_key_ref { - name = kubernetes_secret.backup_cron_job_secrets.metadata.0.name + name = kubernetes_secret.apisix_backup_cron_job_secrets.metadata.0.name key = "AWS_ACCESS_KEY_ID" } } @@ -236,7 +177,7 @@ resource "kubernetes_cron_job_v1" "apisix_backup" { name = "AWS_SECRET_ACCESS_KEY" value_from { secret_key_ref { - name = kubernetes_secret.backup_cron_job_secrets.metadata.0.name + name = kubernetes_secret.apisix_backup_cron_job_secrets.metadata.0.name key = "AWS_SECRET_ACCESS_KEY" } } From 04f1052df937575e96d731ca939983f56c25f229 Mon Sep 17 00:00:00 2001 From: aaltonja Date: Wed, 11 Dec 2024 14:46:57 +0200 Subject: [PATCH 08/13] change backup bucket paths --- ewc/variables.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ewc/variables.tf b/ewc/variables.tf index c883a41..274c475 100644 --- a/ewc/variables.tf +++ b/ewc/variables.tf @@ -94,7 +94,7 @@ variable "apisix_ip_list" { variable "apisix_backup_bucket_base_path" { description = "AWS S3 bucket base path for APISIX backup files" type = string - default = "dev-rodeo-ewc-vault/apisix/" + default = "dev-rodeo-backups/ewc/apisix/" } variable "keycloak_admin_password" { @@ -142,7 +142,7 @@ variable "vault_token" { variable "vault_backup_bucket_base_path" { description = "AWS S3 bucket base path for vault backup files" type = string - default = "dev-rodeo-ewc-vault/vault/" + default = "dev-rodeo-backups/ewc/vault/" } variable "install_dev-portal" { From e2ca5844e6b4166d22d0ba6e06e191bdcb7ac260 Mon Sep 17 00:00:00 2001 From: aaltonja Date: Wed, 11 Dec 2024 14:56:51 +0200 Subject: [PATCH 09/13] change kubernetes host to match cluster DNS search paths --- ewc/main.tf | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ewc/main.tf b/ewc/main.tf index cfc4b35..8440aa8 100644 --- a/ewc/main.tf +++ b/ewc/main.tf @@ -95,7 +95,11 @@ resource "vault_kubernetes_auth_backend_config" "k8s_auth_config" { # Use the internal Kubernetes API server URL for communication within the cluster. # This URL is automatically resolved by the Kubernetes DNS service to the internal IP address of the Kubernetes API server. - kubernetes_host = "https://kubernetes.default.svc.cluster.local" + # If the provided host doesn't work (403 response) in future you can check correct DNS search paths using: + # kubectl run -it --rm --restart=Never busybox --image=busybox -- sh + # cat /etc/resolv.conf + + kubernetes_host = "https://kubernetes.default.svc.kubernetes.local" # We can omit rest of params, e.g. CA certificate and token reviewer JWT as long as # Vault and calling service are run in same k8s cluster From da00aeff779b253c436a40d06a10ee86ecd7c871 Mon Sep 17 00:00:00 2001 From: aaltonja Date: Thu, 12 Dec 2024 09:01:28 +0200 Subject: [PATCH 10/13] add postgresql-client --- ewc/cron-jobs/Dockerfile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ewc/cron-jobs/Dockerfile b/ewc/cron-jobs/Dockerfile index 7a6e64b..fd3fba7 100644 --- a/ewc/cron-jobs/Dockerfile +++ b/ewc/cron-jobs/Dockerfile @@ -5,15 +5,15 @@ ARG PRODUCT=vault ARG VAULT_VERSION=1.18.0 ARG ETCD_VERSION=v3.5.17 -# Install dependencies and Vault -# Vault installation snippet from https://www.hashicorp.com/blog/installing-hashicorp-tools-in-alpine-linux-containers -# Vault binary is enormous ~436MB, might need to think some other option.. -# Someone else also not happy about it https://github.com/hashicorp/vault/issues/22893 +# Install dependencies and AWS CLI & PostgreSQL client # aws-cli is around ~100MB RUN apk add --update --virtual .deps --no-cache gnupg wget unzip && \ - apk add --no-cache aws-cli bash && \ + apk add --no-cache aws-cli postgresql-client bash && \ cd /tmp && \ # Install Vault + # Vault installation snippet from https://www.hashicorp.com/blog/installing-hashicorp-tools-in-alpine-linux-containers + # Vault binary is enormous ~436MB, might need to think some other option.. + # Someone else also not happy about it https://github.com/hashicorp/vault/issues/22893 wget https://releases.hashicorp.com/${PRODUCT}/${VAULT_VERSION}/${PRODUCT}_${VAULT_VERSION}_linux_amd64.zip && \ wget https://releases.hashicorp.com/${PRODUCT}/${VAULT_VERSION}/${PRODUCT}_${VAULT_VERSION}_SHA256SUMS && \ wget https://releases.hashicorp.com/${PRODUCT}/${VAULT_VERSION}/${PRODUCT}_${VAULT_VERSION}_SHA256SUMS.sig && \ @@ -32,7 +32,7 @@ RUN apk add --update --virtual .deps --no-cache gnupg wget unzip && \ apk del .deps # Verify installations -RUN etcdctl version && vault version && aws --version +RUN etcdctl version && vault version && aws --version && pg_dump --version # Add scripts and make them executable COPY common-functions.sh /usr/local/bin/common-functions.sh From 2a668bcace8c64f2bd21847a1e73ae6dc3f5af25 Mon Sep 17 00:00:00 2001 From: aaltonja Date: Thu, 12 Dec 2024 09:17:49 +0200 Subject: [PATCH 11/13] add keycloak snapshot script --- ewc/cron-jobs/Dockerfile | 7 +++-- ewc/cron-jobs/keycloak-snapshot.sh | 42 ++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 ewc/cron-jobs/keycloak-snapshot.sh diff --git a/ewc/cron-jobs/Dockerfile b/ewc/cron-jobs/Dockerfile index fd3fba7..63be040 100644 --- a/ewc/cron-jobs/Dockerfile +++ b/ewc/cron-jobs/Dockerfile @@ -38,7 +38,10 @@ RUN etcdctl version && vault version && aws --version && pg_dump --version COPY common-functions.sh /usr/local/bin/common-functions.sh COPY vault-snapshot.sh /usr/local/bin/vault-snapshot.sh COPY apisix-snapshot.sh /usr/local/bin/apisix-snapshot.sh -RUN chmod +x /usr/local/bin/common-functions.sh /usr/local/bin/vault-snapshot.sh /usr/local/bin/apisix-snapshot.sh - +COPY keycloak-snapshot.sh /usr/local/bin/keycloak-snapshot.sh +RUN chmod +x /usr/local/bin/common-functions.sh \ + /usr/local/bin/vault-snapshot.sh \ + /usr/local/bin/apisix-snapshot.sh \ + /usr/local/bin/keycloak-snapshot.sh CMD ["/usr/local/bin/vault-snapshot.sh"] diff --git a/ewc/cron-jobs/keycloak-snapshot.sh b/ewc/cron-jobs/keycloak-snapshot.sh new file mode 100644 index 0000000..83dad65 --- /dev/null +++ b/ewc/cron-jobs/keycloak-snapshot.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Source common functions +source /usr/local/bin/common-functions.sh + +# Variables +POSTGRES_HOST=${POSTGRES_HOST} +POSTGRES_DB=${POSTGRES_DB} +POSTGRES_USER=${POSTGRES_USER} +POSTGRES_PASSWORD=${POSTGRES_PASSWORD} +S3_BUCKET_BASE_PATH=${S3_BUCKET_BASE_PATH} +AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} +AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} +AWS_REGION=${AWS_REGION:-"eu-north-1"} + +# Check required variables +check_var "POSTGRES_HOST" "$POSTGRES_HOST" +check_var "POSTGRES_DB" "$POSTGRES_DB" +check_var "POSTGRES_USER" "$POSTGRES_USER" +check_var "POSTGRES_PASSWORD" "$POSTGRES_PASSWORD" +check_var "S3_BUCKET_BASE_PATH" "$S3_BUCKET_BASE_PATH" +check_var "AWS_ACCESS_KEY_ID" "$AWS_ACCESS_KEY_ID" +check_var "AWS_SECRET_ACCESS_KEY" "$AWS_SECRET_ACCESS_KEY" + +# Generate ISO 8601 compliant timestamp +TIMESTAMP_ISO_8601=$(generate_iso_8601_timestamp) + +SNAPSHOT_NAME="snapshot-$TIMESTAMP_ISO_8601.sql" + +# Take the db snapshot +PGPASSWORD=$POSTGRES_PASSWORD pg_dump -h $POSTGRES_HOST -U $POSTGRES_USER -d $POSTGRES_DB -F c -b -v -f /tmp/$SNAPSHOT_NAME + +# Upload to S3 +aws s3 cp /tmp/$SNAPSHOT_NAME s3://${S3_BUCKET_BASE_PATH}${SNAPSHOT_NAME} --region "${AWS_REGION}" + +if [ $? -ne 0 ]; then + echo "Error: Failed to upload snapshot to S3" + exit 1 +fi + +# Clean up +rm /tmp/$SNAPSHOT_NAME From 9db9593edc7795b842ed06df8818d44900f6cc62 Mon Sep 17 00:00:00 2001 From: aaltonja Date: Fri, 13 Dec 2024 10:12:28 +0200 Subject: [PATCH 12/13] add keycloak backup cron job to dev-portal module --- ewc/dev-portal-init/cron_jobs.tf | 106 +++++++++++++++++++++++++++++++ ewc/dev-portal-init/main.tf | 20 ++++++ ewc/dev-portal-init/variables.tf | 19 ++++++ ewc/variables.tf | 6 ++ 4 files changed, 151 insertions(+) create mode 100644 ewc/dev-portal-init/cron_jobs.tf diff --git a/ewc/dev-portal-init/cron_jobs.tf b/ewc/dev-portal-init/cron_jobs.tf new file mode 100644 index 0000000..970ead8 --- /dev/null +++ b/ewc/dev-portal-init/cron_jobs.tf @@ -0,0 +1,106 @@ +################################################################################ + +# Keycloak backup +################################################################################ +resource "kubernetes_secret" "keycloak_backup_cron_job_secrets" { + metadata { + name = "keycloak-backup-cron-job" + namespace = kubernetes_namespace.keycloak.metadata.0.name + } + + data = { + AWS_ACCESS_KEY_ID = var.s3_bucket_access_key + AWS_SECRET_ACCESS_KEY = var.s3_bucket_secret_key + } + + type = "Opaque" + +} + +resource "kubernetes_cron_job_v1" "keycloak_backup" { + metadata { + name = "keycloak-backup" + namespace = kubernetes_namespace.keycloak.metadata.0.name + } + + spec { + concurrency_policy = "Replace" + failed_jobs_history_limit = 3 # Keep the latest 3 failed jobs + schedule = "1 0 * * *" + timezone = "Etc/UTC" + starting_deadline_seconds = 43200 # 12 hours + successful_jobs_history_limit = 1 # Keep the latest + + job_template { + metadata {} + spec { + backoff_limit = 6 # This the default value + template { + metadata {} + spec { + restart_policy = "OnFailure" + container { + name = "keycloak-backup" + image = "ghcr.io/eurodeo/femdi-gateway-iac/cron-jobs:latest" + image_pull_policy = "Always" # TODO change to IfNotPresent once tested out to be working + command = ["/bin/sh", "-c", "/usr/local/bin/keycloak-snapshot.sh"] + + env { + name = "POSTGRES_HOST" + value = local.postgres_host + } + + env { + name = "POSTGRES_DB" + value = local.postgres_db_name + } + + env { + name = "POSTGRES_USER" + value = local.postgres_db_user + } + + # A bit magic here to get the password from Keycloak Helm chart generated secret + # Reference dev-portal-init/main.tf resource "helm_release" "keycloak" for more info + env { + name = "POSTGRES_PASSWORD" + value_from { + secret_key_ref { + name = "keycloak-postgresql" + key = "password" + } + } + } + + env { + name = "S3_BUCKET_BASE_PATH" + value = var.keycloak_backup_bucket_base_path + } + + env { + name = "AWS_ACCESS_KEY_ID" + value_from { + secret_key_ref { + name = kubernetes_secret.keycloak_backup_cron_job_secrets.metadata.0.name + key = "AWS_ACCESS_KEY_ID" + } + } + } + + env { + name = "AWS_SECRET_ACCESS_KEY" + value_from { + secret_key_ref { + name = kubernetes_secret.keycloak_backup_cron_job_secrets.metadata.0.name + key = "AWS_SECRET_ACCESS_KEY" + } + } + } + } + } + } + } + } + } + +} \ No newline at end of file diff --git a/ewc/dev-portal-init/main.tf b/ewc/dev-portal-init/main.tf index 80dd93c..ffeca16 100644 --- a/ewc/dev-portal-init/main.tf +++ b/ewc/dev-portal-init/main.tf @@ -1,6 +1,13 @@ ################################################################################ # Install Keycloak ################################################################################ + +locals { + postgres_host = "keycloak-postgresql.keycloak.svc.cluster.local" + postgres_db_name = "bitnami_keycloak" # Default from Helm chart + postgres_db_user = "bn_keycloak" # default from Helm chart +} + resource "kubernetes_namespace" "keycloak" { metadata { annotations = { @@ -32,6 +39,8 @@ resource "kubernetes_config_map" "realm-json" { } #TODO: Add HPA +#TODO: Consider managing the secrets in self managed kubernetes_secret instead of using Helm chart generated secret +# Could not make self managed secret work reliably. Possible cause of this https://github.com/bitnami/charts/issues/18014 resource "helm_release" "keycloak" { name = "keycloak" repository = "https://charts.bitnami.com/bitnami" @@ -65,6 +74,16 @@ resource "helm_release" "keycloak" { value = var.keycloak_admin_password } + set { + name = "postgresql.auth.username" + value = local.postgres_db_user + } + + set { + name = "postgresql.auth.database" + value = local.postgres_db_name + } + # Needed for configmap realm import # See: https://github.com/bitnami/charts/issues/5178#issuecomment-765361901 set { @@ -117,6 +136,7 @@ resource "helm_release" "keycloak" { } + ################################################################################ # Install Dev-portal diff --git a/ewc/dev-portal-init/variables.tf b/ewc/dev-portal-init/variables.tf index fe350b1..ca3f997 100644 --- a/ewc/dev-portal-init/variables.tf +++ b/ewc/dev-portal-init/variables.tf @@ -37,6 +37,13 @@ variable "keycloak_subdomain" { description = "subdomain where keycloak will be hosted" type = string } + +variable "keycloak_backup_bucket_base_path" { + description = "AWS S3 bucket base path for Keycloak backup files" + type = string + default = "dev-rodeo-backups/ewc/keycloak/" +} + variable "dev-portal_subdomain" { description = "subdomain where devportal will be hosted" type = string @@ -76,3 +83,15 @@ variable "github_idp_client_secret" { type = string sensitive = true } + +variable "s3_bucket_access_key" { + description = "AWS access key for S3 bucket for backups" + type = string + sensitive = true +} + +variable "s3_bucket_secret_key" { + description = "AWS secret key for S3 bucket for backups" + type = string + sensitive = true +} diff --git a/ewc/variables.tf b/ewc/variables.tf index 274c475..60a4dfa 100644 --- a/ewc/variables.tf +++ b/ewc/variables.tf @@ -109,6 +109,12 @@ variable "keycloak_subdomain" { default = "keycloak" } +variable "keycloak_backup_bucket_base_path" { + description = "AWS S3 bucket base path for Keycloak backup files" + type = string + default = "dev-rodeo-backups/ewc/keycloak/" +} + variable "vault_subdomain" { description = "subdomain where vault will be hosted" type = string From e2343b2f4badd57dfdcac464d27996abdd54b730 Mon Sep 17 00:00:00 2001 From: aaltonja Date: Fri, 13 Dec 2024 10:26:17 +0200 Subject: [PATCH 13/13] pass new vars to dev-portal module --- ewc/main.tf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ewc/main.tf b/ewc/main.tf index 8440aa8..bcb3f8b 100644 --- a/ewc/main.tf +++ b/ewc/main.tf @@ -225,8 +225,9 @@ module "dev-portal-init" { rancher_project_id = rancher2_project.gateway.id - keycloak_subdomain = var.keycloak_subdomain - keycloak_admin_password = var.keycloak_admin_password + keycloak_subdomain = var.keycloak_subdomain + keycloak_admin_password = var.keycloak_admin_password + keycloak_backup_bucket_base_path = var.keycloak_backup_bucket_base_path dev-portal_subdomain = var.dev-portal_subdomain dev-portal_registry_password = var.dev-portal_registry_password @@ -238,6 +239,8 @@ module "dev-portal-init" { google_idp_client_secret = var.google_idp_client_secret github_idp_client_secret = var.github_idp_client_secret + s3_bucket_access_key = var.s3_bucket_access_key + s3_bucket_secret_key = var.s3_bucket_secret_key }