From 42c3067fa78c2ff82a5301849e2e46b8601ead15 Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Mon, 9 Dec 2024 15:52:16 +0100 Subject: [PATCH 1/2] Add cth_validate_nodes The hook would check if MIM is running before init_per_suite for each big test It would check it for nodes requested by setting TEST_HOSTS variable --one-node flag still works as expected To disable this check use new --skip-validate-nodes flag It is compatible with cth_surefire --- big_tests/default.spec | 12 +++++----- big_tests/dynamic_domains.spec | 10 ++++---- big_tests/src/ct_mongoose_hook.erl | 6 +++-- big_tests/src/cth_validate_nodes.erl | 35 ++++++++++++++++++++++++++++ test/common/distributed_helper.erl | 32 ++++++++++++++++++++++++- tools/test-runner.sh | 12 ++++++++++ 6 files changed, 93 insertions(+), 14 deletions(-) create mode 100644 big_tests/src/cth_validate_nodes.erl diff --git a/big_tests/default.spec b/big_tests/default.spec index 6df21abda9c..5cf76ca2c71 100644 --- a/big_tests/default.spec +++ b/big_tests/default.spec @@ -130,13 +130,13 @@ %% ct_mongoose_hook will: %% * ensure preset & mim_data_dir values are passed to ct Config %% * check server's purity after SUITE -{ct_hooks, [ct_groups_summary_hook, ct_tty_hook, +{ct_hooks, [cth_validate_nodes, + ct_groups_summary_hook, + ct_tty_hook, ct_mongoose_hook, + ct_mongoose_log_hook, {ct_mongoose_log_hook, [{host, mim2}, {print_init_and_done_for_testcases, false}]}, {ct_mongoose_log_hook, [{host, mim3}, {print_init_and_done_for_testcases, false}]}, ct_progress_hook, - ct_markdown_errors_hook, ct_mongoose_log_hook]}. - -%% since test-runner.sh can be executed with the --one-node option, -%% log collection is enabled by default for host mim1 only. -% {ct_hooks, [{ct_mongoose_log_hook,[{host, mim2}, {log, []}]}]}. + ct_markdown_errors_hook +]}. diff --git a/big_tests/dynamic_domains.spec b/big_tests/dynamic_domains.spec index 7ed9ad920b0..0df35ae5d33 100644 --- a/big_tests/dynamic_domains.spec +++ b/big_tests/dynamic_domains.spec @@ -173,12 +173,12 @@ %% ct_mongoose_hook will: %% * ensure preset & mim_data_dir values are passed to ct Config %% * check server's purity after SUITE -{ct_hooks, [ct_groups_summary_hook, ct_tty_hook, ct_mongoose_hook, ct_progress_hook, +{ct_hooks, [cth_validate_nodes, + ct_groups_summary_hook, + ct_tty_hook, + ct_mongoose_hook, + ct_progress_hook, ct_markdown_errors_hook, ct_mongoose_log_hook, {ct_mongoose_log_hook, [{host, mim2}, {print_init_and_done_for_testcases, false}]}, {ct_mongoose_log_hook, [{host, mim3}, {print_init_and_done_for_testcases, false}]}]}. - -%% since test-runner.sh can be executed with the --one-node option, -%% log collection is enabled by default for host mim1 only. -% {ct_hooks, [{ct_mongoose_log_hook,[{host, mim2}, {log, []}]}]}. diff --git a/big_tests/src/ct_mongoose_hook.erl b/big_tests/src/ct_mongoose_hook.erl index 921adfe111b..e02fc68b2aa 100644 --- a/big_tests/src/ct_mongoose_hook.erl +++ b/big_tests/src/ct_mongoose_hook.erl @@ -41,14 +41,16 @@ init(_Id, _Opts) -> {ok, #{}}. %% @doc Called before init_per_suite is called. -pre_init_per_suite(Suite,Config,State) -> +pre_init_per_suite(Suite, [_|_] = Config, State) -> Preset = case application:get_env(common_test, test_label) of {ok, Value} -> Value; _ -> undefined end, DataDir = path_helper:data_dir(Suite, Config), NewConfig = [{preset, Preset}, {mim_data_dir, DataDir} | Config], - {NewConfig, State}. + {NewConfig, State}; +pre_init_per_suite(_Suite, SkipOrFail, State) -> + {SkipOrFail, State}. %% @doc Called after init_per_suite. post_init_per_suite(_Suite, _Config, Return, State) -> diff --git a/big_tests/src/cth_validate_nodes.erl b/big_tests/src/cth_validate_nodes.erl new file mode 100644 index 00000000000..aa4f5ce8f0d --- /dev/null +++ b/big_tests/src/cth_validate_nodes.erl @@ -0,0 +1,35 @@ +-module(cth_validate_nodes). + +%% Callbacks +-export([id/1]). +-export([init/2]). + +-export([pre_init_per_suite/3]). +-export([terminate/1]). + +-record(state, {}). + +%% CT callbacks + +id(_Opts) -> + "cth_validate_nodes_001". + +init(_Id, _Opts) -> + {ok, #state{}}. + +pre_init_per_suite(_Suite, Config, State) -> + case distributed_helper:validate_nodes() of + ok -> + {Config, State}; + {error, Reason} -> + case os:getenv("SKIP_VALIDATE_NODES") of + "true" -> + ct:pal("Skip failing with ~p in ct_check_rpc_nodes", [Reason]), + {Config, State}; + _ -> + {{fail, Reason}, State} + end + end. + +terminate(_State) -> + ok. diff --git a/test/common/distributed_helper.erl b/test/common/distributed_helper.erl index 7f1ffe86726..5bb856b5e97 100644 --- a/test/common/distributed_helper.erl +++ b/test/common/distributed_helper.erl @@ -39,7 +39,7 @@ add_node_to_cluster(Node, Config) -> end, Config. -add_node_to_mnesia_cluster(Node, Config) -> +add_node_to_mnesia_cluster(Node, _Config) -> ClusterMemberNode = maps:get(node, mim()), ok = rpc(Node#{timeout => cluster_op_timeout()}, mongoose_cluster, join, [ClusterMemberNode]), @@ -205,3 +205,33 @@ subhost_pattern(SubhostTemplate) -> lookup_config_opt(Key) -> rpc(mim(), mongoose_config, lookup_opt, [Key]). + +%% @doc Checks if MongooseIM nodes are running +validate_nodes() -> + Results = [validate_node(Node) || Node <- get_node_keys()], + Errors = [Res || Res <- Results, Res =/= ok], + case Errors of + [] -> ok; + _ -> {error, Errors} + end. + +get_node_keys() -> + case os:getenv("TEST_HOSTS") of + false -> + [NodeKey || {NodeKey, _Opts} <- ct:get_config(hosts)]; + EnvValue -> %% EnvValue examples are "mim" or "mim mim2" + BinHosts = binary:split(iolist_to_binary(EnvValue), <<" ">>, [global]), + [binary_to_atom(Node, utf8) || Node <- BinHosts] + end. + +validate_node(NodeKey) -> + Spec = #{node := Node} = rpc_spec(NodeKey), + try rpc(Spec, application, which_applications, []) of + Loaded -> + case lists:keymember(mongooseim, 1, Loaded) of + true -> ok; + false -> {validate_node_failed, mongooseim_not_running, Node} + end + catch error:{badrpc, Reason} -> + {validate_node_failed, {badrpc, Reason}, Node} + end. diff --git a/tools/test-runner.sh b/tools/test-runner.sh index 1828d829e67..46730446f21 100755 --- a/tools/test-runner.sh +++ b/tools/test-runner.sh @@ -37,6 +37,7 @@ Options: --skip-start-nodes -- do not start nodes before big tests --skip-stop-nodes -- do not stop nodes after big tests --skip-setup-db -- do not start any databases, the same as "--db --" option +--skip-validate-nodes -- do not check if MongooseIM is running --no-parallel -- run most commands in sequence --tls-dist -- enable encryption between nodes in big tests --verbose -- print script output @@ -115,6 +116,9 @@ Script examples: Sets dev-nodes and test-hosts to empty lists Reruns mam_SUITE +./tools/test-runner.sh --skip-small-tests --skip-start-nodes --skip-validate-nodes -- connect + Continues test execution even if MongooseIM is not running on all nodes + ./tools/test-runner.sh --rerun-big-tests -- mam The same command as above @@ -309,6 +313,7 @@ SELECTED_TESTS=() STOP_SCRIPT=false SKIP_DB_SETUP=false DB_FROM_PRESETS=true +SKIP_VALIDATE_NODES=false # Parse command line arguments # Prefer arguments to env variables @@ -353,6 +358,11 @@ case $key in DB_FROM_PRESETS=false ;; + --skip-validate-nodes) + shift # past argument + SKIP_VALIDATE_NODES=true + ;; + # Similar how we parse --db option --preset) shift # past argument @@ -635,6 +645,7 @@ export TESTSPEC="auto_big_tests.spec" export START_NODES="$START_NODES" export STOP_NODES="$STOP_NODES" export PAUSE_BEFORE_BIG_TESTS="$PAUSE_BEFORE_BIG_TESTS" +export SKIP_VALIDATE_NODES="$SKIP_VALIDATE_NODES" # Debug printing echo "Variables:" @@ -653,6 +664,7 @@ echo " TESTSPEC=$TESTSPEC" echo " TLS_DIST=$TLS_DIST" echo " START_NODES=$START_NODES" echo " STOP_NODES=$STOP_NODES" +echo " SKIP_VALIDATE_NODES=$SKIP_VALIDATE_NODES" echo "" From fa9a11e40f03744a9091d07319b6a71523f819ce Mon Sep 17 00:00:00 2001 From: Mikhail Uvarov Date: Mon, 9 Dec 2024 16:58:24 +0100 Subject: [PATCH 2/2] Handle race-condition of node not being validated If it is restartating at the end of the test suite If MIM is restarted asynchronously, simple checking if application is running is not enough. We first need to wait for MIM to actually fully restart. The best place to do it is in end_per_suite. So, if some suite crashes MIM, the rest of suites would quickly fail in init_per_suite (and we need to wait only once) --- big_tests/src/cth_validate_nodes.erl | 13 ++++++++++--- test/common/distributed_helper.erl | 14 ++++++++++++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/big_tests/src/cth_validate_nodes.erl b/big_tests/src/cth_validate_nodes.erl index aa4f5ce8f0d..3e91fe6d9da 100644 --- a/big_tests/src/cth_validate_nodes.erl +++ b/big_tests/src/cth_validate_nodes.erl @@ -5,9 +5,10 @@ -export([init/2]). -export([pre_init_per_suite/3]). +-export([post_end_per_suite/4]). -export([terminate/1]). --record(state, {}). +-record(state, {node_keys = []}). %% CT callbacks @@ -19,8 +20,8 @@ init(_Id, _Opts) -> pre_init_per_suite(_Suite, Config, State) -> case distributed_helper:validate_nodes() of - ok -> - {Config, State}; + {ok, NodeKeys} -> + {Config, State#state{node_keys = NodeKeys}}; {error, Reason} -> case os:getenv("SKIP_VALIDATE_NODES") of "true" -> @@ -31,5 +32,11 @@ pre_init_per_suite(_Suite, Config, State) -> end end. +post_end_per_suite(_SuiteName, _Config, Return, State = #state{node_keys = NodeKeys}) -> + %% In case a suite is restarting the node at the end of the suite execution + %% ensure we wait enough for it actually start + distributed_helper:wait_for_nodes_to_start(NodeKeys), + {Return, State#state{node_keys = []}}. + terminate(_State) -> ok. diff --git a/test/common/distributed_helper.erl b/test/common/distributed_helper.erl index 5bb856b5e97..9ace65deb7d 100644 --- a/test/common/distributed_helper.erl +++ b/test/common/distributed_helper.erl @@ -208,13 +208,23 @@ lookup_config_opt(Key) -> %% @doc Checks if MongooseIM nodes are running validate_nodes() -> - Results = [validate_node(Node) || Node <- get_node_keys()], + validate_nodes(get_node_keys()). + +validate_nodes(NodeKeys) -> + Results = [validate_node(Node) || Node <- NodeKeys], Errors = [Res || Res <- Results, Res =/= ok], case Errors of - [] -> ok; + [] -> {ok, NodeKeys}; _ -> {error, Errors} end. +wait_for_nodes_to_start([]) -> ok; +wait_for_nodes_to_start(NodeKeys) -> + wait_helper:wait_until(fun() -> validate_nodes(NodeKeys) end, {ok, NodeKeys}, + #{time_left => timer:seconds(20), + sleep_time => 1000, + name => wait_for_nodes_to_start}). + get_node_keys() -> case os:getenv("TEST_HOSTS") of false ->