Skip to content

Commit

Permalink
ci: add test to memcheck to restart a node and check for reloaded chunks
Browse files Browse the repository at this point in the history
  • Loading branch information
joshuef committed Feb 20, 2024
1 parent 9427454 commit 89bcce5
Showing 1 changed file with 30 additions and 7 deletions.
37 changes: 30 additions & 7 deletions .github/workflows/memcheck.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ env:
CLIENT_DATA_PATH: /home/runner/.local/share/safe/client
NODE_DATA_PATH: /home/runner/.local/share/safe/node
BOOTSTRAP_NODE_DATA_PATH: /home/runner/.local/share/safe/bootstrap_node
RESTART_TEST_NODE_DATA_PATH: /home/runner/.local/share/safe/restart_node

jobs:
memory-check:
Expand Down Expand Up @@ -59,6 +60,15 @@ jobs:
shell: bash
run: echo "The SAFE_PEERS variable has been set to $SAFE_PEERS"

- name: Start a node instance to be restarted
run: |
mkdir -p $RESTART_TEST_NODE_DATA_PATH
./target/release/safenode \
--root-dir $RESTART_TEST_NODE_DATA_PATH --log-output-dest $RESTART_TEST_NODE_DATA_PATH --local &
sleep 10
env:
SN_LOG: "all"

- name: Start a local network
env:
SN_LOG: "all"
Expand Down Expand Up @@ -102,7 +112,7 @@ jobs:
# The resources file we upload may change, and with it mem consumption.
# Be aware!
- name: Start a client to upload files
# -p makes files public
# -p makes files public
run: |
ls -l
cargo run --bin safe --release -- --log-output-dest=data-dir files upload "./the-test-data.zip" --retry-strategy quick -p
Expand Down Expand Up @@ -135,6 +145,20 @@ jobs:
SN_LOG: "all"
timeout-minutes: 25

- name: Stop the restart node
run: kill $( cat $RESTART_TEST_NODE_DATA_PATH/safenode.pid )

- name: Start the restart node again
run: |
./target/release/safenode \
--root-dir $RESTART_TEST_NODE_DATA_PATH --log-output-dest $RESTART_TEST_NODE_DATA_PATH --local &
sleep 10
env:
SN_LOG: "all"

- name: Assert we've reloaded some chunks
run: rg "Existing record loaded" $RESTART_TEST_NODE_DATA_PATH

- name: Chunks data integrity during nodes churn
run: cargo test --release -p sn_node --test data_with_churn -- --nocapture
env:
Expand All @@ -147,7 +171,7 @@ jobs:
run: ls -la
- name: Check safenode file
run: ls /home/runner/work/safe_network/safe_network/target/release

- name: Check there was no restart issues
run: |
if rg 'Failed to execute hard-restart command' $NODE_DATA_PATH; then
Expand All @@ -157,9 +181,8 @@ jobs:
echo "No restart issues detected"
fi
- name: Verify the routing tables of the nodes
run: cargo test --release -p sn_node --test verify_routing_table -- --nocapture
run: cargo test --release -p sn_node --test verify_routing_table -- --nocapture
env:
SLEEP_BEFORE_VERIFICATION: 300
timeout-minutes: 10
Expand All @@ -170,7 +193,7 @@ jobs:
# get the counts, then the specific line, and then the digit count only
# then check we have an expected level of restarts
# TODO: make this use an env var, or relate to testnet size
run : |
run: |
restart_count=$(rg "Node is restarting in" $NODE_DATA_PATH -c --stats | \
rg "(\d+) matches" | rg "\d+" -o)
echo "Restart $restart_count nodes"
Expand All @@ -197,7 +220,7 @@ jobs:
# TODO: make this use an env var, or relate to testnet size
# As the bootstrap_node using separate folder for logging,
# hence the folder input to rg needs to cover that as well.
run : |
run: |
sending_list_count=$(rg "Sending a replication list" $NODE_DATA_PATH -c --stats | \
rg "(\d+) matches" | rg "\d+" -o)
echo "Sent $sending_list_count replication lists"
Expand Down Expand Up @@ -291,7 +314,7 @@ jobs:
run: |
client_peak_mem_limit_mb="1024" # mb
client_avg_mem_limit_mb="512" # mb
peak_mem_usage=$(
rg '"memory_used_mb":[^,]*' $CLIENT_DATA_PATH/logs --glob safe.* -o --no-line-number --no-filename |
awk -F':' '/"memory_used_mb":/{print $2}' |
Expand Down

0 comments on commit 89bcce5

Please sign in to comment.