Skip to content

Commit

Permalink
feat(tests): use process-compose for smoke tests
Browse files Browse the repository at this point in the history
Containers are out, process orchestration is in.

A while back we ditched using containers for the smoke tests,
mostly because the caching on the container-build story was
atrocious, so test re-runs took a really long time. And frankly,
container ergonomics on dev workstations, particularly macOS,
are not awesome. Instead, let's assume the dev env can run
processes for cargo, pd, and cometbft. If so, that's all
we need to wire up our integration testing. Enter process-compose [0].

The new smoke test setup ditches the bash script and delegates
to process-compose for orchestrating processes.

Benchmarking via hyperfine shows a decrease of over 2x in runtime.

There's one substantive change to the integration test logic,
in the pcli suite, that reduces the sleep time between tests,
refining it to be more precisely the duration necessary for
claiming an undelegation.

[0] https://github.com/F1bonacc1/process-compose
  • Loading branch information
conorsch committed May 6, 2024
1 parent ac169d7 commit 7cb1e2f
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 89 deletions.
24 changes: 7 additions & 17 deletions .github/workflows/smoke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,26 +23,16 @@ jobs:
- name: Install cometbft binary
run: ./deployments/scripts/install-cometbft

- name: Install process-compose
run: >-
sh -c "$(curl --location https://raw.githubusercontent.com/F1bonacc1/process-compose/main/scripts/get-pc.sh)" --
-d -b ~/bin
- name: Run the smoke test suite
run: |
export PATH="$HOME/bin:$PATH"
./deployments/scripts/smoke-test.sh
- name: Display comet logs
if: always()
run: cat deployments/logs/comet.log
- name: Display pd runtime logs
if: always()
run: cat deployments/logs/pd.log
- name: Display pd test logs
- name: Display smoke-test logs
if: always()
run: cat deployments/logs/pd-tests.log
- name: Display pclientd logs
if: always()
run: cat deployments/logs/pclientd.log
- name: Display pcli logs
if: always()
run: cat deployments/logs/pcli.log
env:
TESTNET_RUNTIME: 2m

run: cat deployments/logs/smoke-*.log
10 changes: 6 additions & 4 deletions crates/bin/pcli/tests/network_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,16 @@ const TEST_ASSET: &str = "1020test_usd";
const TIMEOUT_COMMAND_SECONDS: u64 = 20;

// The time to wait before attempting to perform an undelegation claim.
// By default the epoch duration is 100 blocks, the block time is ~500 ms,
// and the number of unbonding epochs is 2.
// The "unbonding_delay" value is specified in blocks, and in the smoke tests,
// block time is set to ~500ms, so we'll take the total number of blocks
// that must elapse and sleep half that many seconds.
static UNBONDING_DURATION: Lazy<Duration> = Lazy::new(|| {
let blocks: f64 = std::env::var("EPOCH_DURATION")
let blocks: f64 = std::env::var("UNBONDING_DELAY")
.unwrap_or("100".to_string())
.parse()
.unwrap();
Duration::from_secs((1.5 * blocks) as u64)
// 0.5 -> 0.6 for comfort, since 500ms is only an estimate.
Duration::from_secs((0.6 * blocks) as u64)
});

/// Import the wallet from seed phrase into a temporary directory.
Expand Down
128 changes: 128 additions & 0 deletions deployments/compose/process-compose-smoke-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
---
# A process-compose configuration for running penumbra smoke-tests.
#
# https://github.com/F1bonacc1/process-compose/
#
version: "0.5"

# Env vars set here will be accessible to all processes.
environment:
- "PENUMBRA_NODE_PD_URL=http://127.0.0.1:8080"
- "PCLI_UNLEASH_DANGER=yes"
- "EPOCH_DURATION=50"
- "UNBONDING_DELAY=50"
- "RUST_LOG=info,network_integration=debug,pclientd=debug,pcli=info,pd=info,penumbra=info"

log_level: info
is_strict: true
# Interleave logs from all services in single file, so it's greppable.
log_location: deployments/logs/smoke-combined.log

processes:
# Build latest version of local code. We do this once, up front,
# so that each test suite runs immediately when ready, without iterative building.
build-code:
command: |-
echo "Building source code before running tests..."
cargo --quiet build --release --all-targets
cargo --quiet test --release --no-run
cargo --quiet test --release --no-run -- --ignored
cargo --quiet test --release --features sct-divergence-check --package pclientd --no-run -- \
--ignored --test-threads 1 --nocapture
cargo --quiet test --release --features sct-divergence-check,download-proving-keys --package pcli --no-run -- \
--ignored --test-threads 1 --nocapture
cargo --quiet test --release --package pd --no-run -- \
--ignored --test-threads 1 --nocapture
# Create network configuration, for running a pd validator locally.
network-generate:
command: >
cargo run --quiet --release --bin pd --
testnet generate --unbonding-delay 50
--epoch-duration 50 --timeout-commit 500ms --gas-price-simple 1000
depends_on:
build-code:
condition: process_completed_successfully

# Run pd validator based on generated network.
pd:
command: "cargo run --release --bin pd -- start"
readiness_probe:
http_get:
host: 127.0.0.1
scheme: http
path: "/"
port: 8080
period_seconds: 5
depends_on:
network-generate:
condition: process_completed_successfully

# Run CometBFT for pd p2p.
cometbft:
command: "cometbft --home ~/.penumbra/testnet_data/node0/cometbft start"
depends_on:
pd:
condition: process_healthy
environment:
- "LOCAL_ENV_VAR=1"

# Run `pd` integration tests.
test-pd:
command: >-
cargo test --release --package pd -- --ignored --test-threads 1 --nocapture
depends_on:
pd:
condition: process_healthy
cometbft:
condition: process_started
availability:
restart: exit_on_failure

# Run `pclientd` integration tests.
test-pclientd:
command: >-
cargo test --release --features sct-divergence-check --package pclientd --
--ignored --test-threads 1 --nocapture
log_location: deployments/logs/smoke-test-pclientd.log
depends_on:
pd:
condition: process_healthy
cometbft:
condition: process_started
test-pd:
condition: process_completed
availability:
restart: exit_on_failure

# Run `pcli` integration tests.
test-pcli:
command: >-
cargo test --release --features sct-divergence-check,download-proving-keys --package pcli --
--ignored --test-threads 1 --nocapture
log_location: deployments/logs/smoke-test-pcli.log
depends_on:
pd:
condition: process_healthy
cometbft:
condition: process_started
test-pclientd:
condition: process_completed
availability:
restart: exit_on_failure

# Finalizer task, which will wait until all test suites have finished.
# This allows us to ensure that.
summary:
# The `command` only runs if all tests were succesful,
# otherwise the process exits due to dep failure.
command: echo tests finished
depends_on:
test-pd:
condition: process_completed_successfully
test-pclientd:
condition: process_completed_successfully
test-pcli:
condition: process_completed_successfully
availability:
exit_on_end: true
80 changes: 12 additions & 68 deletions deployments/scripts/smoke-test.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,5 @@
#!/usr/bin/env bash
# Wrapper script to bottle up logic for running "smoke tests" in CI,
# supporting backgrounding tasks and checking on their status later.
# The execution plan is:
#
# 1. Start the network
# 2. Wait ~10s
# 3. Run integration tests (fail here if non-zero)
# 4. Continue running network ~5m
#
# The goal is to fail fast if an integration test exits, but permit
# a slightly longer runtime for the suite to find more errors.
# Run smoke test suite, via process-compose config.
set -euo pipefail


Expand All @@ -27,66 +17,20 @@ if ! hash cometbft > /dev/null 2>&1 ; then
exit 1
fi

# If the action is running in debugging mode, then show me *everything*
if [ -n "${RUNNER_DEBUG:-}" ]; then
export RUST_LOG=debug
# Check for interactive terminal session, enable TUI if yes.
if [[ -t 1 ]] ; then
use_tui="true"
else
export RUST_LOG="info,network_integration=debug,pclientd=debug,pcli=info,pd=info,penumbra=info"
use_tui="false"
fi

# Duration that the network will be left running before script exits.
TESTNET_RUNTIME="${TESTNET_RUNTIME:-120}"
# Duration that the network will run before integration tests are run.
TESTNET_BOOTTIME="${TESTNET_BOOTTIME:-20}"

# Directory to store log output, useful for debugging; is git-ignored.
SMOKE_LOG_DIR="deployments/logs"

echo "Building latest version of pd from source..."
cargo build --quiet --release --bin pd

echo "Generating testnet config..."
EPOCH_DURATION="${EPOCH_DURATION:-50}"
UNBONDING_DELAY="${UNBONDING_DELAY:-50}"
cargo run --quiet --release --bin pd -- testnet generate --unbonding-delay "$UNBONDING_DELAY" --epoch-duration "$EPOCH_DURATION" --timeout-commit 500ms --gas-price-simple=1000

echo "Starting CometBFT..."
cometbft start --log_level=error --home "${HOME}/.penumbra/testnet_data/node0/cometbft" > "${SMOKE_LOG_DIR}/comet.log" &
cometbft_pid="$!"

echo "Starting pd..."
cargo run --release --bin pd -- start --home "${HOME}/.penumbra/testnet_data/node0/pd" > "${SMOKE_LOG_DIR}/pd.log" &
pd_pid="$!"

# Ensure processes are cleaned up after script exits, regardless of status.
trap 'kill -9 "$cometbft_pid" "$pd_pid"' EXIT

echo "Waiting $TESTNET_BOOTTIME seconds for network to boot..."
sleep "$TESTNET_BOOTTIME"

echo "Running pd integration tests against running pd binary"
cargo test --release --package pd -- --ignored --test-threads 1 --nocapture | tee "${SMOKE_LOG_DIR}/pd-tests.log"

echo "Running pclientd integration tests against network"
PENUMBRA_NODE_PD_URL="http://127.0.0.1:8080" \
PCLI_UNLEASH_DANGER="yes" \
cargo test --release --features sct-divergence-check --package pclientd -- --ignored --test-threads 1 --nocapture | tee "${SMOKE_LOG_DIR}/pclientd.log"

echo "Running pcli integration tests against network"
PENUMBRA_NODE_PD_URL="http://127.0.0.1:8080" \
PCLI_UNLEASH_DANGER="yes" \
cargo test --release --features sct-divergence-check,download-proving-keys --package pcli -- --ignored --test-threads 1 --nocapture | tee "${SMOKE_LOG_DIR}/pcli.log"

echo "Waiting another $TESTNET_RUNTIME seconds while network runs..."
sleep "$TESTNET_RUNTIME"
# `kill -0` checks existence of pid, i.e. whether the process is still running.
# It doesn't inspect errors, but the only reason the process would be stopped
# is if it failed, so it's good enough for our needs.
if ! kill -0 "$cometbft_pid" || ! kill -0 "$pd_pid" ; then
>&2 echo "ERROR: smoke test process exited early"
>&2 echo "Review logs in: ${SMOKE_LOG_DIR}/"
repo_root="$(git rev-parse --show-toplevel)"
# Override the pc API port 8080 -> 9191, to avoid conflict with pd.
if ! process-compose --config deployments/compose/process-compose-smoke-test.yml --port 9191 -t="$use_tui" ; then
>&2 echo "ERROR: smoke tests failed"
>&2 echo "Review logs in: deployments/logs/smoke-*.log"
find "${repo_root}/deployments/logs/smoke-"*".log" | sort >&2
exit 1
else
echo "SUCCESS! Smoke test complete. Ran for $TESTNET_RUNTIME, found no errors."
echo "SUCCESS! Smoke test complete."
fi
exit 0
4 changes: 4 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
smoke:
# resetting network state
cargo run --release --bin pd -- testnet unsafe-reset-all || true
./deployments/scripts/smoke-test.sh

0 comments on commit 7cb1e2f

Please sign in to comment.