Skip to content

Nightly -- Full Network Tests #446

Nightly -- Full Network Tests

Nightly -- Full Network Tests #446

Workflow file for this run

name: Nightly -- Full Network Tests
on:
schedule:
- cron: "0 0 * * *"
workflow_dispatch:
env:
CARGO_INCREMENTAL: 0 # bookkeeping for incremental builds has overhead, not useful in CI.
WORKFLOW_URL: https://github.com/maidsafe/stableset_net/actions/runs
jobs:
e2e:
name: E2E tests
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
steps:
- uses: actions/checkout@v4
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
continue-on-error: true
- name: Build binaries
run: cargo build --release --bin safenode --bin safe --bin faucet
timeout-minutes: 30
- name: Start a local network
uses: maidsafe/sn-local-testnet-action@main
with:
action: start
interval: 2000
node-path: target/release/safenode
faucet-path: target/release/faucet
platform: ${{ matrix.os }}
build: true
- name: Check contact peer
shell: bash
run: echo "Peer is $SAFE_PEERS"
# only these unit tests require a network, the rest are run above in unit test section
- name: Run sn_client --tests
run: cargo test --package sn_client --release --tests
env:
SN_LOG: "all"
# only set the target dir for windows to bypass the linker issue.
# happens if we build the node manager via testnet action
CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }}
timeout-minutes: 15
- name: Create and fund a wallet to pay for files storage
run: |
cargo run --bin faucet --release -- --log-output-dest=data-dir send 1000000 $(cargo run --bin safe --release -- --log-output-dest=data-dir wallet address | tail -n 1) | tail -n 1 > transfer_hex
cargo run --bin safe --release -- --log-output-dest=data-dir wallet receive --file transfer_hex
env:
SN_LOG: "all"
timeout-minutes: 2
- name: Start a client to carry out chunk actions
run: cargo run --bin safe --release -- --log-output-dest=data-dir files upload "./resources" --retry-strategy quick
env:
SN_LOG: "all"
timeout-minutes: 2
# Client FoldersApi tests against local network
- name: Client FoldersApi tests against local network
run: cargo test --release --package sn_client --test folders_api
env:
SN_LOG: "all"
timeout-minutes: 10
# CLI Acc-Packet files and folders tests against local network
- name: CLI Acc-Packet files and folders tests
run: cargo test --release -p sn_cli test_acc_packet -- --nocapture
env:
SN_LOG: "all"
timeout-minutes: 10
- name: Start a client to create a register
run: cargo run --bin safe --release -- --log-output-dest=data-dir register create -n baobao
env:
SN_LOG: "all"
timeout-minutes: 2
- name: Start a client to get a register
run: cargo run --bin safe --release -- --log-output-dest=data-dir register get -n baobao
env:
SN_LOG: "all"
timeout-minutes: 2
- name: Start a client to edit a register
run: cargo run --bin safe --release -- --log-output-dest=data-dir register edit -n baobao wood
env:
SN_LOG: "all"
timeout-minutes: 2
- name: Stop the local network and upload logs
if: always()
uses: maidsafe/sn-local-testnet-action@main
with:
action: stop
log_file_prefix: safe_test_logs_e2e
platform: ${{ matrix.os }}
- name: post notification to slack on failure
if: ${{ failure() }}
uses: bryannice/[email protected]
env:
SLACK_INCOMING_WEBHOOK: ${{ secrets.SLACK_GH_ACTIONS_WEBHOOK_URL }}
SLACK_MESSAGE: "Please check the logs for the run at ${{ env.WORKFLOW_URL }}/${{ github.run_id }}"
SLACK_TITLE: "Nightly E2E Test Run Failed"
full_unit:
name: Full Unit Tests (including proptests)
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
steps:
- uses: actions/checkout@v4
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
continue-on-error: true
- name: Build unit tests before running
run: cargo test --release --lib --bins --no-run
timeout-minutes: 30
- name: Run CLI tests
timeout-minutes: 25
run: cargo test --release --package sn_cli -- --skip test_acc_packet_
- name: Run client tests
timeout-minutes: 25
# we do not run the `--tests` here are they are run in the e2e job
# as they rquire a network
run: |
cargo test --release --package sn_client --doc
cargo test --release --package sn_client --lib
cargo test --release --package sn_client --bins
cargo test --release --package sn_client --examples
- name: Run node tests
timeout-minutes: 25
run: cargo test --release --package sn_node --lib
- name: Run network tests
timeout-minutes: 25
run: cargo test --release -p sn_networking
- name: Run protocol tests
timeout-minutes: 25
run: cargo test --release -p sn_protocol
- name: Run transfers tests
timeout-minutes: 25
run: cargo test --release --package sn_transfers
- name: Run logging tests
timeout-minutes: 25
run: cargo test --release --package sn_logging
- name: Run register tests
shell: bash
timeout-minutes: 50
env:
PROPTEST_CASES: 512
run: cargo test --release -p sn_registers
- name: post notification to slack on failure
if: ${{ failure() }}
uses: bryannice/[email protected]
env:
SLACK_INCOMING_WEBHOOK: ${{ secrets.SLACK_GH_ACTIONS_WEBHOOK_URL }}
SLACK_MESSAGE: "Please check the logs for the run at ${{ env.WORKFLOW_URL }}/${{ github.run_id }}"
SLACK_TITLE: "Nightly Unit Test Run Failed"
spend_test:
name: spend tests against network
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
steps:
- uses: actions/checkout@v4
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
continue-on-error: true
- name: Build binaries
run: cargo build --release --features=local-discovery --bin safenode --bin faucet
timeout-minutes: 30
- name: Build testing executable
run: cargo test --release -p sn_node --features=local-discovery --test sequential_transfers --test storage_payments --no-run
env:
# only set the target dir for windows to bypass the linker issue.
# happens if we build the node manager via testnet action
CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }}
timeout-minutes: 30
- name: Start a local network
uses: maidsafe/sn-local-testnet-action@main
with:
action: start
interval: 2000
node-path: target/release/safenode
faucet-path: target/release/faucet
platform: ${{ matrix.os }}
build: true
- name: execute the spend test
run: cargo test --release -p sn_node --features="local-discovery" --test sequential_transfers -- --nocapture
env:
CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }}
SN_LOG: "all"
timeout-minutes: 10
- name: execute the storage payment tests
run: cargo test --release -p sn_node --features="local-discovery" --test storage_payments -- --nocapture --test-threads=1
env:
CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }}
SN_LOG: "all"
timeout-minutes: 10
- name: Small wait to allow reward receipt
run: sleep 30
timeout-minutes: 1
- name: Stop the local network and upload logs
if: always()
uses: maidsafe/sn-local-testnet-action@main
with:
action: stop
log_file_prefix: safe_test_logs_spend
platform: ${{ matrix.os }}
- name: post notification to slack on failure
if: ${{ failure() }}
uses: bryannice/[email protected]
env:
SLACK_INCOMING_WEBHOOK: ${{ secrets.SLACK_GH_ACTIONS_WEBHOOK_URL }}
SLACK_MESSAGE: "Please check the logs for the run at ${{ env.WORKFLOW_URL }}/${{ github.run_id }}"
SLACK_TITLE: "Nightly Spend Test Run Failed"
token_distribution_test:
if: "!startsWith(github.event.head_commit.message, 'chore(release):')"
name: token distribution test
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
steps:
- uses: actions/checkout@v4
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
- name: Build binaries
run: cargo build --release --features=local-discovery,distribution --bin safenode --bin faucet
timeout-minutes: 30
- name: Build testing executable
run: cargo test --release --features=local-discovery,distribution --no-run
env:
# only set the target dir for windows to bypass the linker issue.
# happens if we build the node manager via testnet action
CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }}
timeout-minutes: 30
- name: Start a local network
uses: maidsafe/sn-local-testnet-action@main
with:
action: start
interval: 2000
node-path: target/release/safenode
faucet-path: target/release/faucet
platform: ${{ matrix.os }}
build: true
- name: Check SAFE_PEERS was set
shell: bash
run: |
if [[ -z "$SAFE_PEERS" ]]; then
echo "The SAFE_PEERS variable has not been set"
exit 1
else
echo "SAFE_PEERS has been set to $SAFE_PEERS"
fi
- name: execute token_distribution tests
run: cargo test --release --features=local-discovery,distribution token_distribution -- --nocapture --test-threads=1
env:
SN_LOG: "all"
CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }}
timeout-minutes: 25
- name: Stop the local network and upload logs
if: always()
uses: maidsafe/sn-local-testnet-action@main
with:
action: stop
log_file_prefix: safe_test_logs_token_distribution
platform: ${{ matrix.os }}
churn:
name: Network churning tests
runs-on: ${{ matrix.os }}
strategy:
matrix:
include:
- os: ubuntu-latest
node_data_path: /home/runner/.local/share/safe/node
safe_path: /home/runner/.local/share/safe
- os: windows-latest
node_data_path: C:\\Users\\runneradmin\\AppData\\Roaming\\safe\\node
safe_path: C:\\Users\\runneradmin\\AppData\\Roaming\\safe
- os: macos-latest
node_data_path: /Users/runner/Library/Application Support/safe/node
safe_path: /Users/runner/Library/Application Support/safe
steps:
- uses: actions/checkout@v4
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
continue-on-error: true
- name: Build binaries
run: cargo build --release --features local-discovery --bin safenode --bin faucet
timeout-minutes: 30
- name: Build churn tests
run: cargo test --release -p sn_node --features=local-discovery --test data_with_churn --no-run
env:
# only set the target dir for windows to bypass the linker issue.
# happens if we build the node manager via testnet action
CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }}
timeout-minutes: 30
- name: Start a local network
uses: maidsafe/sn-local-testnet-action@main
with:
action: start
interval: 2000
node-path: target/release/safenode
faucet-path: target/release/faucet
platform: ${{ matrix.os }}
build: true
- name: Chunks data integrity during nodes churn (during 10min) (in theory)
run: cargo test --release -p sn_node --features="local-discovery" --test data_with_churn -- --nocapture
env:
TEST_DURATION_MINS: 60
TEST_CHURN_CYCLES: 6
SN_LOG: "all"
CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }}
timeout-minutes: 90
- name: Verify restart of nodes using rg
shell: bash
timeout-minutes: 1
# get the counts, then the specific line, and then the digit count only
# then check we have an expected level of restarts
# TODO: make this use an env var, or relate to testnet size
run: |
restart_count=$(rg "Node is restarting in" "${{ matrix.node_data_path }}" -c --stats | \
rg "(\d+) matches" | rg "\d+" -o)
echo "Restart $restart_count nodes"
peer_removed=$(rg "PeerRemovedFromRoutingTable" "${{ matrix.node_data_path }}" -c --stats | \
rg "(\d+) matches" | rg "\d+" -o)
echo "PeerRemovedFromRoutingTable $peer_removed times"
if [ $peer_removed -lt $restart_count ]; then
echo "PeerRemovedFromRoutingTable times of: $peer_removed is less than the restart count of: $restart_count"
exit 1
fi
node_count=$(ls "${{ matrix.node_data_path }}" | wc -l)
echo "Node dir count is $node_count"
# TODO: reenable this once the testnet dir creation is tidied up to avoid a large count here
# if [ $restart_count -lt $node_count ]; then
# echo "Restart count of: $restart_count is less than the node count of: $node_count"
# exit 1
# fi
- name: Verify data replication using rg
shell: bash
timeout-minutes: 1
# get the counts, then the specific line, and then the digit count only
# then check we have an expected level of replication
# TODO: make this use an env var, or relate to testnet size
run: |
fetching_attempt_count=$(rg "FetchingKeysForReplication" "${{ matrix.node_data_path }}" -c --stats | \
rg "(\d+) matches" | rg "\d+" -o)
echo "Carried out $fetching_attempt_count fetching attempts"
node_count=$(ls "${{ matrix.node_data_path }}" | wc -l)
if [ $fetching_attempt_count -lt $node_count ]; then
echo "Replication fetching attempts of: $fetching_attempt_count is less than the node count of: $node_count"
exit 1
fi
- name: Stop the local network and upload logs
if: always()
uses: maidsafe/sn-local-testnet-action@main
with:
action: stop
log_file_prefix: safe_test_logs_churn
platform: ${{ matrix.os }}
- name: post notification to slack on failure
if: ${{ failure() }}
uses: bryannice/[email protected]
env:
SLACK_INCOMING_WEBHOOK: ${{ secrets.SLACK_GH_ACTIONS_WEBHOOK_URL }}
SLACK_MESSAGE: "Please check the logs for the run at ${{ env.WORKFLOW_URL }}/${{ github.run_id }}"
SLACK_TITLE: "Nightly Churn Test Run Failed"
# Only error out after uploading the logs
- name: Don't log raw data
if: matrix.os != 'windows-latest' # causes error
shell: bash
timeout-minutes: 10
run: |
if ! rg '^' "${{ matrix.safe_path }}"/*/*/logs | awk 'length($0) > 15000 { print; exit 1 }'
then
echo "We are logging an extremely large data"
exit 1
fi
verify_data_location_routing_table:
name: Verify data location and Routing Table
runs-on: ${{ matrix.os }}
strategy:
matrix:
include:
- os: ubuntu-latest
node_data_path: /home/runner/.local/share/safe/node
safe_path: /home/runner/.local/share/safe
- os: windows-latest
node_data_path: C:\\Users\\runneradmin\\AppData\\Roaming\\safe\\node
safe_path: C:\\Users\\runneradmin\\AppData\\Roaming\\safe
- os: macos-latest
node_data_path: /Users/runner/Library/Application Support/safe/node
safe_path: /Users/runner/Library/Application Support/safe
steps:
- uses: actions/checkout@v4
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
continue-on-error: true
- name: Build binaries
run: cargo build --release --features local-discovery --bin safenode --bin faucet
timeout-minutes: 30
- name: Build data location and routing table tests
run: cargo test --release -p sn_node --features=local-discovery --test verify_data_location --test verify_routing_table --no-run
env:
# only set the target dir for windows to bypass the linker issue.
# happens if we build the node manager via testnet action
CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }}
timeout-minutes: 30
- name: Start a local network
uses: maidsafe/sn-local-testnet-action@main
with:
action: start
interval: 2000
node-path: target/release/safenode
faucet-path: target/release/faucet
platform: ${{ matrix.os }}
build: true
- name: Verify the Routing table of the nodes
run: cargo test --release -p sn_node --features="local-discovery" --test verify_routing_table -- --nocapture
env:
CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }}
timeout-minutes: 5
- name: Verify the location of the data on the network
run: cargo test --release -p sn_node --features="local-discovery" --test verify_data_location -- --nocapture
env:
SN_LOG: "all"
CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }}
timeout-minutes: 90
- name: Verify the routing tables of the nodes
run: cargo test --release -p sn_node --features="local-discovery" --test verify_routing_table -- --nocapture
env:
CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }}
timeout-minutes: 5
- name: Verify restart of nodes using rg
shell: bash
timeout-minutes: 1
# get the counts, then the specific line, and then the digit count only
# then check we have an expected level of restarts
# TODO: make this use an env var, or relate to testnet size
run: |
restart_count=$(rg "Node is restarting in" "${{ matrix.node_data_path }}" -c --stats | \
rg "(\d+) matches" | rg "\d+" -o)
echo "Restart $restart_count nodes"
peer_removed=$(rg "PeerRemovedFromRoutingTable" "${{ matrix.node_data_path }}" -c --stats | \
rg "(\d+) matches" | rg "\d+" -o)
echo "PeerRemovedFromRoutingTable $peer_removed times"
if [ $peer_removed -lt $restart_count ]; then
echo "PeerRemovedFromRoutingTable times of: $peer_removed is less than the restart count of: $restart_count"
exit 1
fi
node_count=$(ls "${{ matrix.node_data_path }}" | wc -l)
echo "Node dir count is $node_count"
- name: Stop the local network and upload logs
if: always()
uses: maidsafe/sn-local-testnet-action@main
with:
action: stop
log_file_prefix: safe_test_logs_data_location
platform: ${{ matrix.os }}
- name: post notification to slack on failure
if: ${{ failure() }}
uses: bryannice/[email protected]
env:
SLACK_INCOMING_WEBHOOK: ${{ secrets.SLACK_GH_ACTIONS_WEBHOOK_URL }}
SLACK_MESSAGE: "Please check the logs for the run at ${{ env.WORKFLOW_URL }}/${{ github.run_id }}"
SLACK_TITLE: "Nightly Data Location Test Run Failed"
# Only error out after uploading the logs
- name: Don't log raw data
if: matrix.os != 'windows-latest' # causes error
shell: bash
timeout-minutes: 10
run: |
if ! rg '^' "${{ matrix.safe_path }}"/*/*/logs | awk 'length($0) > 15000 { print; exit 1 }'
then
echo "We are logging an extremely large data"
exit 1
fi