From 5d49429c26196f9f1552c374ee7694e91b7b40b8 Mon Sep 17 00:00:00 2001 From: Conor Schaefer Date: Mon, 29 Jan 2024 15:16:00 -0800 Subject: [PATCH] ci: workflow for standalone pd We want to exercise the pd https logic, but we can't naively run it from scratch on every deploy, because that'd be far too many API requests to reissue certs from ACME. Instead, let's preserve the ACME directory before wiping state, and reuse it before bouncing the service. This setup requires always-on bxoes provisioned out of band. Still TK: * use dedicated `ci` shell account * add GHA secrets for key material * use --acme-staging arg for first few runs * add dedicated workflow ad-hoc runs Refs #3336. --- .../scripts/ci-fullnode-redeploy-via-remote | 72 +++++++++++++++++++ .../scripts/ci-fullnode-redeploy-via-runner | 37 ++++++++++ 2 files changed, 109 insertions(+) create mode 100644 deployments/scripts/ci-fullnode-redeploy-via-remote create mode 100644 deployments/scripts/ci-fullnode-redeploy-via-runner diff --git a/deployments/scripts/ci-fullnode-redeploy-via-remote b/deployments/scripts/ci-fullnode-redeploy-via-remote new file mode 100644 index 0000000000..ea36c54330 --- /dev/null +++ b/deployments/scripts/ci-fullnode-redeploy-via-remote @@ -0,0 +1,72 @@ +#!/bin/bash +# CI script to manage a standalone fullnode, created in order to exercise +# direct serving of pd. This script is intended to executed on the remote host +# that serves `pd`, triggered from a CI runner over SSH. +set -euo pipefail + +# Unpack args. +if [[ $# -lt 2 ]] ; then + >&2 echo "ERROR: required arguments not specified." + >&2 echo "Usage: $0 " + exit 1 +fi +PENUMBRA_VERSION="${1:-}" +PENUMBRA_ENVIRONMENT="${2:-}" +shift 2 + +# Additional sanity-check to ensure we're running in the proper CI context. +if [[ ! getent passwd | grep -q "^penumbra:" ]] ; then + >&2 echo "ERROR: 'penumbra' user not found." + >&2 echo "This script should only be run within a dedicated CI box." + exit 2 +fi + + +if [[ "$PENUMBRA_ENVIRONMENT" = "penumbra-preview" ]] ; then + pd_bootstrap_url="https://rpc.testnet-preview.penumbra.zone" +elif [[ "$PENUMBRA_ENVIRONMENT" = "penumbra-testnet" ]] ; then + pd_bootstrap_url="https://rpc.testnet.penumbra.zone" +else + >&2 echo "ERROR: unsupported PENUMBRA_ENVIRONMENT: '$PENUMBRA_ENVIRONMENT'" + exit 3 +fi + +# Take down running service prior to maintenance. +sudo systemctl stop cometbft penumbra + +# Pluck out recently built `pd` from packaged container. +# We reuse existing build artifacts to ensure what's deployed it what was built, +# and it has the nice benefit of being faster, because we don't have to rebuild +# the same gitref on a slower remote host. +container_img="ghcr.io/penumbra-zone/penumbra:${PENUMBRA_VERSION}" +podman pull "$container_img" +container_id="$(podman run "$container_img" sleep infinity)" +f="$(mktemp)" +podman cp "${container_id}:/usr/bin/pd" "$f" +podman kill "$container_id" +# Ensure unprivileged (i.e. non-root) user account can bind to 443 for HTTPS. +sudo setcap 'cap_net_bind_service=+ep' "$f" +sudo mv -v -f "$f" /usr/local/bin/pd + +# Back up ACME dir, so we don't hit ratelimit requesting new certs. +acme_cache="/home/penumbra/.penumbra/testnet_data/node0/pd/tokio_rustls_acme_cache" +if [[ -d "$acme_cache" ]]; then + sudo rm -rf /opt/penumbra-ci + sudo mkdir -p /opt/penumbra-ci + sudo mv "$acme_cache" /opt/penumbra-ci/ +fi + +# Nuke state, rejoin. +pd testnet unsafe-reset-all +pd testnet join "$pd_bootstrap_url" +# Restore ACME dir prior to service start +mv -v "/opt/penumbra-ci/$(basename "$acme_cache")" "$acme_cache" +sudo chown -R penumbra: /home/penumbra/.penumbra + +# Bring service back up. +sudo systemctl daemon-reload +sudo systemctl restart penumbra cometbft +# Verify that the services are in fact running, else exit non-zero. +sleep 5 +sudo systemctl is-active penumbra +sudo systemctl is-active cometbft diff --git a/deployments/scripts/ci-fullnode-redeploy-via-runner b/deployments/scripts/ci-fullnode-redeploy-via-runner new file mode 100644 index 0000000000..90fc1ddc17 --- /dev/null +++ b/deployments/scripts/ci-fullnode-redeploy-via-runner @@ -0,0 +1,37 @@ +#!/bin/bash +# CI script to manage a standalone fullnode, created in order to exercise +# direct serving of pd. This script is intended to be run from CI, +# communicating with a remote node over SSH and munging its state. +set -euo pipefail +set -x + +# Unpack args. Different CI workflows can override these settings, +# to determine whether we're targeting testnet or preview. +PENUMBRA_VERSION="${PENUMBRA_VERSION:-main}" +PENUMBRA_ENVIRONMENT="${PENUMBRA_ENVIRONMENT:-penumbra-preview}" + +if [[ -z "$PENUMBRA_VERSION" || -z "$PENUMBRA_ENVIRONMENT" ]] ; then + >&2 echo "ERROR: required env vars not set: PENUMBRA_VERSION, PENUMBRA_ENVIRONMENT" + exit 1 +fi + +if [[ "$PENUMBRA_ENVIRONMENT" = "penumbra-preview" ]] ; then + ci_ssh_host="solo-pd.testnet-preview.plinfra.net" +elif [[ "$PENUMBRA_ENVIRONMENT" = "penumbra-testnet" ]] ; then + ci_ssh_host="solo-pd.testnet.plinfra.net" +else + >&2 echo "ERROR: unsupported PENUMBRA_ENVIRONMENT: '$PENUMBRA_ENVIRONMENT'" + exit 2 +fi + +# Communicate with target host over SSH, run the script. +# The remote box has been provisioend with: +# +# 1) an ssh keypair assigned to admin user `ci` +# 2) a normal user account `penumbra` for running services +# 3) systemd service files for pd & cometbft +# +# As for the script that's being execute on the target, we'll copy that up from local context. +scp ./deployments/scripts/ci-fullnode-redeploy-via-remote "${ci_ssh_host}:" +ssh "$ci_ssh_host" sudo mv ci-fullnode-redeploy-via-remote /usr/local/bin/ci-full-node-redeploy-via-remote +ssh "$ci_ssh_host" sudo /usr/local/bin/ci-full-node-redeploy-via-remote