From 5ab68dc43c43fcb747d8e76472ebcc5acbf7b713 Mon Sep 17 00:00:00 2001 From: Conor Schaefer Date: Sat, 4 May 2024 23:41:23 -0700 Subject: [PATCH] feat(tests): automated migration testing Building on the smoke-test rewrite to use process-compose, let's script the migration process, so that we can test current HEAD of the monorepo against a prior tagged version, and validate that necessary migrations are in place. One possible approach is to fetch prebuilt binaries from uploaded artifacts on Github. That's fine for `pd`, but doesn't work for running the smoke tests, due to client/server incompatibility. Therefore we'll clone the entire repo in a git-ignored subdir, and build the old binaries there. Heavy, but reliable. Updated to use the concise `pd migrate` UX from #4339. Previously, there were missing AuctionParams, resolved by #4338. Still seeing some proto incompat post-migration, via the test runs, which appears to match the report in #4340. --- .github/workflows/migration.yml | 31 +++++ .gitignore | 3 + .../process-compose-migration-test-1.yml | 58 ++++++++ .../process-compose-migration-test-2.yml | 28 ++++ deployments/compose/process-compose.yml | 45 ++++++ deployments/scripts/migration-test | 129 ++++++++++++++++++ justfile | 8 ++ 7 files changed, 302 insertions(+) create mode 100644 .github/workflows/migration.yml create mode 100644 deployments/compose/process-compose-migration-test-1.yml create mode 100644 deployments/compose/process-compose-migration-test-2.yml create mode 100644 deployments/compose/process-compose.yml create mode 100755 deployments/scripts/migration-test diff --git a/.github/workflows/migration.yml b/.github/workflows/migration.yml new file mode 100644 index 0000000000..e6bcd773e9 --- /dev/null +++ b/.github/workflows/migration.yml @@ -0,0 +1,31 @@ +--- +name: Migration test +on: + pull_request: + +jobs: + smoke_test: + runs-on: buildjet-16vcpu-ubuntu-2204 + concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + steps: + - uses: actions/checkout@v4 + with: + lfs: true + + - name: Load rust cache + uses: astriaorg/buildjet-rust-cache@v2.5.1 + + - name: Install cometbft binary + run: ./deployments/scripts/install-cometbft + + - name: Install process-compose + run: >- + sh -c "$(curl --location https://raw.githubusercontent.com/F1bonacc1/process-compose/main/scripts/get-pc.sh)" -- + -d -b ~/bin + + - name: Run migration test + run: | + export PATH="$HOME/bin:$PATH" + ./deployments/scripts/migration-test diff --git a/.gitignore b/.gitignore index 0ea14d1815..b1dbbe011d 100644 --- a/.gitignore +++ b/.gitignore @@ -50,8 +50,11 @@ deployments/relayer/configs/penumbra-local.json # Logs, and other files from smoke tests deployments/logs/ +deployments/worktrees/ +deployments/bin/ crates/bin/pcli/proposal.toml + # Memory profiler, via bytehound or otherwise *.dat diff --git a/deployments/compose/process-compose-migration-test-1.yml b/deployments/compose/process-compose-migration-test-1.yml new file mode 100644 index 0000000000..3403216ae7 --- /dev/null +++ b/deployments/compose/process-compose-migration-test-1.yml @@ -0,0 +1,58 @@ +--- +# A process-compose configuration for running penumbra migration-tests. + +# Interleave logs from all services in single file, so it's greppable. +log_location: deployments/logs/migration-test-1-combined.log +is_strict: true + +processes: + build-code: + working_dir: deployments/worktrees/v0.73.1 + + # Create network configuration, for running a pd validator locally. + network-generate: + working_dir: deployments/worktrees/v0.73.1 + command: > + cargo run --quiet --release --bin pd -- + testnet generate --unbonding-delay 50 + --epoch-duration 50 --timeout-commit 500ms + + # Run pd validator based on generated network. + pd: + working_dir: deployments/worktrees/v0.73.1 + + # Run `pclientd` integration tests. + test-pclientd: + working_dir: deployments/worktrees/v0.73.1 + log_location: deployments/logs/migration-test-1-pclientd.log + + # Run `pcli` integration tests. + test-pcli: + working_dir: deployments/worktrees/v0.73.1 + log_location: deployments/logs/migration-test-1-pcli.log + # We add `--skip delegate_and_undelegate` because the old smoke-test has an unreliable + # regex, that's since been fixed. + command: >- + cargo test --release --features sct-divergence-check,download-proving-keys --package pcli -- + --ignored --test-threads 1 --nocapture + --skip delegate_and_undelegate + log_location: deployments/logs/smoke-test-pcli.log + depends_on: + pd: + condition: process_healthy + cometbft: + condition: process_started + test-pclientd: + condition: process_completed + availability: + restart: exit_on_failure + + test-pd: + working_dir: deployments/worktrees/v0.73.1 + + # Finalizer task, which will wait until all test suites have finished. + # This allows us to ensure that. + summary: + # The `command` only runs if all tests were succesful, + # otherwise the process exits due to dep failure. + command: echo "migration tests phase 1 finished" diff --git a/deployments/compose/process-compose-migration-test-2.yml b/deployments/compose/process-compose-migration-test-2.yml new file mode 100644 index 0000000000..c45426a4a1 --- /dev/null +++ b/deployments/compose/process-compose-migration-test-2.yml @@ -0,0 +1,28 @@ +--- +# A process-compose configuration for running penumbra migration-tests. +# This series of commands represents performing the actual migration, +# then starting up the network again. + +# Interleave logs from all services in single file, so it's greppable. +log_location: deployments/logs/migration-test-2-combined.log +is_strict: true + +processes: + # Don't generate, since we already did that on the old tag. + network-generate: + command: echo "skipping network generation, deferring to migration..." + + # Run `pclientd` integration tests. + test-pclientd: + log_location: deployments/logs/migration-test-2-pclientd.log + + # Run `pcli` integration tests. + test-pcli: + log_location: deployments/logs/migration-test-2-pcli.log + + # Finalizer task, which will wait until all test suites have finished. + # This allows us to ensure that. + summary: + # The `command` only runs if all tests were succesful, + # otherwise the process exits due to dep failure. + command: echo "migration tests phase 2 finished" diff --git a/deployments/compose/process-compose.yml b/deployments/compose/process-compose.yml new file mode 100644 index 0000000000..669e4cdffb --- /dev/null +++ b/deployments/compose/process-compose.yml @@ -0,0 +1,45 @@ +--- +# A process-compose configuration for running a local Penumbra devnet. +# This isn't used in scripts anywhere (yet?) but serves as a reference point. +# Potentially could be integrated with e.g. https://www.jetify.com/devbox later on. +# +version: "0.5" + +# Env vars set here will be accessible to all processes. +environment: + - "RUST_LOG=info,network_integration=debug,pclientd=debug,pcli=info,pd=info,penumbra=info" + +log_level: info +is_strict: true +# Interleave logs from all services in single file, so it's greppable. +log_location: deployments/logs/dev-env-combined.log + +processes: + # Build latest version of local code. We do this once, up front, + # so that each test suite runs immediately when ready, without iterative building. + build-code: + command: |- + printf "Building source code before running tests..." + cargo --quiet build --release --all-targets + printf " OK" + + # Run pd validator based on generated network. + pd: + command: "cargo run --release --bin pd -- start" + readiness_probe: + http_get: + host: 127.0.0.1 + scheme: http + path: "/" + port: 8080 + period_seconds: 5 + depends_on: + build-code: + condition: process_completed_successfully + + # Run CometBFT for pd p2p. + cometbft: + command: "cometbft --home ~/.penumbra/testnet_data/node0/cometbft start" + depends_on: + pd: + condition: process_healthy diff --git a/deployments/scripts/migration-test b/deployments/scripts/migration-test new file mode 100755 index 0000000000..f05f0dbb50 --- /dev/null +++ b/deployments/scripts/migration-test @@ -0,0 +1,129 @@ +#!/usr/bin/env bash +# CI script to test migration compatibility. Orchestrates the following: +# +# * [x] checks out prior tag and builds its binaries and tests +# * [x] creates local devnet via that target version +# * [ ] make sure devnet has very fast voting +# * [x] runs smoke tests against devnet, to generate txs +# * [ ] submit governance proposal for chain halt +# * [ ] vote for proposal to pass +# * [ ] wait for halt +# * [x] stops devnet ~ ends phase 1 +# * [x] migrates devnet via latest (i.e. HEAD) pd version ~ begins phase 2 +# * [x] restarts devnet +# * [ ] re-runs smoke tests, from latest version, to validate +# +# After all that's done, we only know that the migration didn't badly +# break things. We should also have dedicated integration tests +# for pre- and post-migration checks. + +set -euo pipefail + + +# By default, look up the most recently released version of `pd` and use that to test against. +# Otherwise, accept an override version and pull that. +get_latest_penumbra_version() { + curl -sSfL https://api.github.com/repos/penumbra-zone/penumbra/releases/latest \ + | jq -r .tag_name +} + +target_version="${1:-}" +if [[ -z "$target_version" ]] ; then + target_version="$(get_latest_penumbra_version)" + >&2 echo "No target version specified, using latest release: ${target_version}" +fi + +>&2 echo "Beginning migration test from '$target_version' -> '$(git rev-parse HEAD) (HEAD)'" + +repo_root="$(git rev-parse --show-toplevel)" + +# Download prebuilt artifact from a prior release. +# Unlikely to use this, since we need compatible tests, too. +download_historical_binary() { + target_version_bin_dir="deployments/bin/${target_version}" + pd_pre_migration_bin="${target_version_bin_dir}/pd" + if [[ ! -e "$pd_pre_migration_bin" ]] ; then + >&2 echo "Installing pd ${target_version}..." + # Download jawn. + # N.B. the one-liner script only exists >=0.73.1. + curl --proto '=https' --tlsv1.2 -LsSf "https://github.com/penumbra-zone/penumbra/releases/download/${target_version}/pd-installer.sh" | sh + mkdir -p "$target_version_bin_dir" + cp -v ~/.cargo/bin/pd "$pd_pre_migration_bin" + else + >&2 echo "pd ${target_version} already present locally, skipping download..." + fi +} + +worktree_dir="${repo_root}/deployments/worktrees/${target_version}" +# Create a local git-worktree so that we can check out a prior +# tag and build its version of tests and suchwhat. +prepare_local_worktree() { + if [[ ! -d "$worktree_dir" ]] ; then + >&2 echo "Creating new git worktree: $worktree_dir" + git worktree add "$worktree_dir" "$target_version" + fi +} + +prepare_local_worktree + +function run_migration_test_phase_1() { + >&2 echo "Running smoke-tests against pre-migration devnet, phase 1..." + # Override the pc API port 8080 -> 9191, to avoid conflict with pd. + if ! process-compose \ + --config deployments/compose/process-compose-smoke-test.yml \ + --config deployments/compose/process-compose-migration-test-1.yml \ + --port 9191 \ + -t=true \ + ; then + >&2 echo "ERROR: migration tests phase 1 failed" + >&2 echo "Review logs in: deployments/logs/migration-*.log" + find "${repo_root}/deployments/logs/migration-"*".log" | sort >&2 + exit 1 + else + echo "SUCCESS! Migration test phase 1 complete." + fi +} + + +# Post-migration, restart the network, and rerun the smoke tests against it. +function run_migration_test_phase_2() { + >&2 echo "Running smoke-tests against post-migration devnet, phase 2..." + if ! process-compose \ + --config deployments/compose/process-compose-smoke-test.yml \ + --config deployments/compose/process-compose-migration-test-2.yml \ + --port 9191 \ + -t=true \ + ; then + >&2 echo "ERROR: migration tests phase 2 failed" + >&2 echo "Review logs in: deployments/logs/migration-*.log" + find "${repo_root}/deployments/logs/migration-"*".log" | sort >&2 + exit 1 + else + echo "SUCCESS! Migration test phase 2 complete." + fi +} + +# Perform most recent migration against local state, according to steps in +# https://guide.penumbra.zone/main/node/pd/chain-upgrade.html +function perform_migration() { + node0_dir="${HOME}/.penumbra/testnet_data/node0" + + # TEMPORARY: backup entire state during development so it's easy to recover + tarball_backup="${node0_dir}/../node0-state-backup.tar" + if [[ ! -e "$tarball_backup" ]] ; then + tar -cf "$tarball_backup" "$node0_dir" + fi + # Avoid lock contention + sync + sleep 5 + cargo run --quiet --release --bin pd -- migrate +} + +function main() { + run_migration_test_phase_1 + >&2 echo PERFORMING MIGRATION + perform_migration + run_migration_test_phase_2 +} + +main diff --git a/justfile b/justfile index e63d30dea8..823c43fe65 100644 --- a/justfile +++ b/justfile @@ -1,4 +1,12 @@ +migration-test: + # resetting network state + cargo run --release --bin pd -- testnet unsafe-reset-all || true + ./deployments/scripts/migration-test v0.73.1 + smoke: # resetting network state cargo run --release --bin pd -- testnet unsafe-reset-all || true ./deployments/scripts/smoke-test.sh + +dev: + process-compose up --port 9191 --config ./deployments/compose/process-compose.yml