From 1024ec615e93b1fcde8404182ab4b0646f4ee9f7 Mon Sep 17 00:00:00 2001 From: Conor Schaefer Date: Fri, 26 Apr 2024 09:55:09 -0700 Subject: [PATCH] feat(tests): use process-compose for smoke tests Containers are out, process orchestration is in. A while back we ditched using containers for the smoke tests, mostly because the caching on the container-build story was atrocious, so test re-runs took a really long time. And frankly, container ergonomics on dev workstations, particularly macOS, are not awesome. Instead, let's assume the dev env can run processes for cargo, pd, and cometbft. If so, that's all we need to wire up our integration testing. Enter process-compose [0]. The new smoke test setup ditches the bash script and delegates to process-compose for orchestrating processes. Benchmarking via hyperfine shows a decrease of over 2x in runtime. There's one substantive change to the integration test logic, in the pcli suite, that reduces the sleep time between tests, refining it to be more precisely the duration necessary for claiming an undelegation. [0] https://github.com/F1bonacc1/process-compose --- .github/workflows/smoke.yml | 24 +--- crates/bin/pcli/tests/network_integration.rs | 10 +- .../compose/process-compose-smoke-test.yml | 126 ++++++++++++++++++ deployments/scripts/smoke-test.sh | 80 ++--------- docs/guide/src/dev/devnet-quickstart.md | 5 + justfile | 4 + 6 files changed, 160 insertions(+), 89 deletions(-) create mode 100644 deployments/compose/process-compose-smoke-test.yml create mode 100644 justfile diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index cceb802326..f14a098f5a 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -23,26 +23,16 @@ jobs: - name: Install cometbft binary run: ./deployments/scripts/install-cometbft + - name: Install process-compose + run: >- + sh -c "$(curl --location https://raw.githubusercontent.com/F1bonacc1/process-compose/main/scripts/get-pc.sh)" -- + -d -b ~/bin + - name: Run the smoke test suite run: | export PATH="$HOME/bin:$PATH" ./deployments/scripts/smoke-test.sh - - name: Display comet logs - if: always() - run: cat deployments/logs/comet.log - - name: Display pd runtime logs - if: always() - run: cat deployments/logs/pd.log - - name: Display pd test logs + - name: Display smoke-test logs if: always() - run: cat deployments/logs/pd-tests.log - - name: Display pclientd logs - if: always() - run: cat deployments/logs/pclientd.log - - name: Display pcli logs - if: always() - run: cat deployments/logs/pcli.log - env: - TESTNET_RUNTIME: 2m - + run: cat deployments/logs/smoke-*.log diff --git a/crates/bin/pcli/tests/network_integration.rs b/crates/bin/pcli/tests/network_integration.rs index 4e37f59a8c..b84a660413 100644 --- a/crates/bin/pcli/tests/network_integration.rs +++ b/crates/bin/pcli/tests/network_integration.rs @@ -36,14 +36,16 @@ const TEST_ASSET: &str = "1020test_usd"; const TIMEOUT_COMMAND_SECONDS: u64 = 20; // The time to wait before attempting to perform an undelegation claim. -// By default the epoch duration is 100 blocks, the block time is ~500 ms, -// and the number of unbonding epochs is 2. +// The "unbonding_delay" value is specified in blocks, and in the smoke tests, +// block time is set to ~500ms, so we'll take the total number of blocks +// that must elapse and sleep half that many seconds. static UNBONDING_DURATION: Lazy = Lazy::new(|| { - let blocks: f64 = std::env::var("EPOCH_DURATION") + let blocks: f64 = std::env::var("UNBONDING_DELAY") .unwrap_or("100".to_string()) .parse() .unwrap(); - Duration::from_secs((1.5 * blocks) as u64) + // 0.5 -> 0.6 for comfort, since 500ms is only an estimate. + Duration::from_secs((0.6 * blocks) as u64) }); /// Import the wallet from seed phrase into a temporary directory. diff --git a/deployments/compose/process-compose-smoke-test.yml b/deployments/compose/process-compose-smoke-test.yml new file mode 100644 index 0000000000..c4594fe403 --- /dev/null +++ b/deployments/compose/process-compose-smoke-test.yml @@ -0,0 +1,126 @@ +--- +# A process-compose configuration for running penumbra smoke-tests. +# +# https://github.com/F1bonacc1/process-compose/ +# +version: "0.5" + +# Env vars set here will be accessible to all processes. +environment: + - "PENUMBRA_NODE_PD_URL=http://127.0.0.1:8080" + - "PCLI_UNLEASH_DANGER=yes" + - "EPOCH_DURATION=50" + - "UNBONDING_DELAY=50" + - "RUST_LOG=info,network_integration=debug,pclientd=debug,pcli=info,pd=info,penumbra=info" + +log_level: info +is_strict: true +# Interleave logs from all services in single file, so it's greppable. +log_location: deployments/logs/smoke-combined.log + +processes: + # Build latest version of local code. We do this once, up front, + # so that each test suite runs immediately when ready, without iterative building. + build-code: + command: |- + echo "Building source code before running tests..." + cargo --quiet build --release --all-targets + cargo --quiet test --release --no-run + cargo --quiet test --release --no-run -- --ignored + cargo --quiet test --release --features sct-divergence-check --package pclientd --no-run -- \ + --ignored --test-threads 1 --nocapture + cargo --quiet test --release --features sct-divergence-check,download-proving-keys --package pcli --no-run -- \ + --ignored --test-threads 1 --nocapture + cargo --quiet test --release --package pd --no-run -- \ + --ignored --test-threads 1 --nocapture + + # Create network configuration, for running a pd validator locally. + network-generate: + command: > + cargo run --quiet --release --bin pd -- + testnet generate --unbonding-delay 50 + --epoch-duration 50 --timeout-commit 500ms --gas-price-simple 1000 + depends_on: + build-code: + condition: process_completed_successfully + + # Run pd validator based on generated network. + pd: + command: "cargo run --release --bin pd -- start" + readiness_probe: + http_get: + host: 127.0.0.1 + scheme: http + path: "/" + port: 8080 + period_seconds: 5 + depends_on: + network-generate: + condition: process_completed_successfully + + # Run CometBFT for pd p2p. + cometbft: + command: "cometbft --home ~/.penumbra/testnet_data/node0/cometbft start" + depends_on: + pd: + condition: process_healthy + + # Run `pd` integration tests. + test-pd: + command: >- + cargo test --release --package pd -- --ignored --test-threads 1 --nocapture + depends_on: + pd: + condition: process_healthy + cometbft: + condition: process_started + availability: + restart: exit_on_failure + + # Run `pclientd` integration tests. + test-pclientd: + command: >- + cargo test --release --features sct-divergence-check --package pclientd -- + --ignored --test-threads 1 --nocapture + log_location: deployments/logs/smoke-test-pclientd.log + depends_on: + pd: + condition: process_healthy + cometbft: + condition: process_started + test-pd: + condition: process_completed + availability: + restart: exit_on_failure + + # Run `pcli` integration tests. + test-pcli: + command: >- + cargo test --release --features sct-divergence-check,download-proving-keys --package pcli -- + --ignored --test-threads 1 --nocapture + log_location: deployments/logs/smoke-test-pcli.log + depends_on: + pd: + condition: process_healthy + cometbft: + condition: process_started + test-pclientd: + condition: process_completed + availability: + restart: exit_on_failure + + # Finalizer task, which will wait until all test suites have finished. + # This allows us to ensure that. + summary: + # The `command` only runs if all tests were succesful, + # otherwise the process exits due to dep failure. + command: echo tests finished + depends_on: + test-pd: + condition: process_completed_successfully + test-pclientd: + condition: process_completed_successfully + test-pcli: + condition: process_completed_successfully + availability: + exit_on_end: true diff --git a/deployments/scripts/smoke-test.sh b/deployments/scripts/smoke-test.sh index a619c347e3..7e65cce2d3 100755 --- a/deployments/scripts/smoke-test.sh +++ b/deployments/scripts/smoke-test.sh @@ -1,15 +1,5 @@ #!/usr/bin/env bash -# Wrapper script to bottle up logic for running "smoke tests" in CI, -# supporting backgrounding tasks and checking on their status later. -# The execution plan is: -# -# 1. Start the network -# 2. Wait ~10s -# 3. Run integration tests (fail here if non-zero) -# 4. Continue running network ~5m -# -# The goal is to fail fast if an integration test exits, but permit -# a slightly longer runtime for the suite to find more errors. +# Run smoke test suite, via process-compose config. set -euo pipefail @@ -27,66 +17,20 @@ if ! hash cometbft > /dev/null 2>&1 ; then exit 1 fi -# If the action is running in debugging mode, then show me *everything* -if [ -n "${RUNNER_DEBUG:-}" ]; then - export RUST_LOG=debug +# Check for interactive terminal session, enable TUI if yes. +if [[ -t 1 ]] ; then + use_tui="true" else - export RUST_LOG="info,network_integration=debug,pclientd=debug,pcli=info,pd=info,penumbra=info" + use_tui="false" fi -# Duration that the network will be left running before script exits. -TESTNET_RUNTIME="${TESTNET_RUNTIME:-120}" -# Duration that the network will run before integration tests are run. -TESTNET_BOOTTIME="${TESTNET_BOOTTIME:-20}" - -# Directory to store log output, useful for debugging; is git-ignored. -SMOKE_LOG_DIR="deployments/logs" - -echo "Building latest version of pd from source..." -cargo build --quiet --release --bin pd - -echo "Generating testnet config..." -EPOCH_DURATION="${EPOCH_DURATION:-50}" -UNBONDING_DELAY="${UNBONDING_DELAY:-50}" -cargo run --quiet --release --bin pd -- testnet generate --unbonding-delay "$UNBONDING_DELAY" --epoch-duration "$EPOCH_DURATION" --timeout-commit 500ms --gas-price-simple=1000 - -echo "Starting CometBFT..." -cometbft start --log_level=error --home "${HOME}/.penumbra/testnet_data/node0/cometbft" > "${SMOKE_LOG_DIR}/comet.log" & -cometbft_pid="$!" - -echo "Starting pd..." -cargo run --release --bin pd -- start --home "${HOME}/.penumbra/testnet_data/node0/pd" > "${SMOKE_LOG_DIR}/pd.log" & -pd_pid="$!" - -# Ensure processes are cleaned up after script exits, regardless of status. -trap 'kill -9 "$cometbft_pid" "$pd_pid"' EXIT - -echo "Waiting $TESTNET_BOOTTIME seconds for network to boot..." -sleep "$TESTNET_BOOTTIME" - -echo "Running pd integration tests against running pd binary" - cargo test --release --package pd -- --ignored --test-threads 1 --nocapture | tee "${SMOKE_LOG_DIR}/pd-tests.log" - -echo "Running pclientd integration tests against network" -PENUMBRA_NODE_PD_URL="http://127.0.0.1:8080" \ - PCLI_UNLEASH_DANGER="yes" \ - cargo test --release --features sct-divergence-check --package pclientd -- --ignored --test-threads 1 --nocapture | tee "${SMOKE_LOG_DIR}/pclientd.log" - -echo "Running pcli integration tests against network" -PENUMBRA_NODE_PD_URL="http://127.0.0.1:8080" \ - PCLI_UNLEASH_DANGER="yes" \ - cargo test --release --features sct-divergence-check,download-proving-keys --package pcli -- --ignored --test-threads 1 --nocapture | tee "${SMOKE_LOG_DIR}/pcli.log" - -echo "Waiting another $TESTNET_RUNTIME seconds while network runs..." -sleep "$TESTNET_RUNTIME" -# `kill -0` checks existence of pid, i.e. whether the process is still running. -# It doesn't inspect errors, but the only reason the process would be stopped -# is if it failed, so it's good enough for our needs. -if ! kill -0 "$cometbft_pid" || ! kill -0 "$pd_pid" ; then - >&2 echo "ERROR: smoke test process exited early" - >&2 echo "Review logs in: ${SMOKE_LOG_DIR}/" +repo_root="$(git rev-parse --show-toplevel)" +# Override the pc API port 8080 -> 9191, to avoid conflict with pd. +if ! process-compose --config deployments/compose/process-compose-smoke-test.yml --port 9191 -t="$use_tui" ; then + >&2 echo "ERROR: smoke tests failed" + >&2 echo "Review logs in: deployments/logs/smoke-*.log" + find "${repo_root}/deployments/logs/smoke-"*".log" | sort >&2 exit 1 else - echo "SUCCESS! Smoke test complete. Ran for $TESTNET_RUNTIME, found no errors." + echo "SUCCESS! Smoke test complete." fi -exit 0 diff --git a/docs/guide/src/dev/devnet-quickstart.md b/docs/guide/src/dev/devnet-quickstart.md index 07216b3a27..b5f4f169f6 100644 --- a/docs/guide/src/dev/devnet-quickstart.md +++ b/docs/guide/src/dev/devnet-quickstart.md @@ -93,3 +93,8 @@ To run the smoke tests: ```shell PENUMBRA_NODE_PD_URL=http://127.0.0.1:8080 PCLI_UNLEASH_DANGER=yes cargo test --package pcli -- --ignored --test-threads 1 ``` + +Find the exact commands for each binary's smoke tests in `deployments/compose/process-compose-smoke-test.yml`. +You can also run the entire smoke test suite end-to-end via `just smoke`, including setup and teardown of the network. +If you want to execute the tests against an already-running devnet, however, use manual invocations like +the `cargo test` example above. diff --git a/justfile b/justfile new file mode 100644 index 0000000000..e63d30dea8 --- /dev/null +++ b/justfile @@ -0,0 +1,4 @@ +smoke: + # resetting network state + cargo run --release --bin pd -- testnet unsafe-reset-all || true + ./deployments/scripts/smoke-test.sh