diff --git a/src/ejabberd_app.erl b/src/ejabberd_app.erl index 877d82ebe1f..165276037cf 100644 --- a/src/ejabberd_app.erl +++ b/src/ejabberd_app.erl @@ -40,7 +40,10 @@ start(normal, _Args) -> try - do_start() + %% We want to print the starting progress if starting is too slow + %% (i.e. it would print the current stacktrace each several seconds + %% during the startup) + mongoose_task:run_tracked(#{task => start_mongooseim}, fun do_start/0) catch Class:Reason:StackTrace -> %% Log a stacktrace because while proc_lib:crash_report/4 would report a crash reason, %% it would not report the stacktrace diff --git a/src/ejabberd_sup.erl b/src/ejabberd_sup.erl index f231133210c..922ba019c3c 100644 --- a/src/ejabberd_sup.erl +++ b/src/ejabberd_sup.erl @@ -32,6 +32,9 @@ -export([start_child/1, start_child/2, stop_child/1]). -export([create_ets_table/2]). +-export([start_linked_child/2]). +-ignore_xref([start_linked_child/2]). + -include("mongoose_logger.hrl"). start_link() -> @@ -121,7 +124,17 @@ worker_spec(Mod) -> worker_spec(Mod, []). worker_spec(Mod, Args) -> - {Mod, {Mod, start_link, Args}, permanent, timer:seconds(5), worker, [Mod]}. + %% We use `start_linked_child' wrapper to log delays + %% in the slow init worker functions. + MFA = {?MODULE, start_linked_child, [Mod, Args]}, + {Mod, MFA, permanent, timer:seconds(5), worker, [Mod]}. + +%% In case one of the workers takes long time to start +%% we want the logging progress (to know which child got stuck). +%% This could happend on CI during the node restarts. +start_linked_child(Mod, Args) -> + F = fun() -> erlang:apply(Mod, start_link, Args) end, + mongoose_task:run_tracked(#{task => start_linked_child, child_module => Mod}, F). -spec create_ets_table(atom(), list()) -> ok. create_ets_table(TableName, TableOpts) -> diff --git a/src/mongoose_cluster_id.erl b/src/mongoose_cluster_id.erl index bf523aaad30..2ac20af3f60 100644 --- a/src/mongoose_cluster_id.erl +++ b/src/mongoose_cluster_id.erl @@ -24,8 +24,9 @@ start() -> PersistentBackend = which_persistent_backend_enabled(), VolatileBackend = which_volatile_backend_available(), maybe_prepare_queries(PersistentBackend), - cets_long:run_tracked(#{task => wait_for_any_backend, - backend => PersistentBackend, volatile_backend => VolatileBackend}, + mongoose_task:run_tracked(#{task => wait_for_any_backend, + backend => PersistentBackend, + volatile_backend => VolatileBackend}, fun() -> wait_for_any_backend(PersistentBackend, VolatileBackend) end), CachedRes = get_cached_cluster_id(VolatileBackend), BackendRes = get_backend_cluster_id(PersistentBackend), @@ -80,7 +81,8 @@ wait_for_backend_promise(cets, Alias) -> end)]; wait_for_backend_promise(rdbms, Alias) -> [spawn(fun() -> - cets_long:run_tracked(#{task => wait_for_rdbms}, fun() -> wait_for_rdbms() end), + mongoose_task:run_tracked(#{task => wait_for_rdbms}, + fun wait_for_rdbms/0), Alias ! {ready, Alias} end)]; wait_for_backend_promise(_, Alias) -> diff --git a/src/mongoose_start_node_id.erl b/src/mongoose_start_node_id.erl index 85be91312a3..1dd3ba11fe3 100644 --- a/src/mongoose_start_node_id.erl +++ b/src/mongoose_start_node_id.erl @@ -41,6 +41,9 @@ start_link() -> gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). init(_) -> + mongoose_task:run_tracked(#{task => mongoose_start_node_id_init}, fun do_init/0). + +do_init() -> net_kernel:monitor_nodes(true), StartId = mongoose_bin:gen_from_crypto(), persistent_term:put(mongoose_start_node_id, StartId), @@ -93,8 +96,15 @@ code_change(_OldVsn, State, _Extra) -> {ok, State}. register_on_remote_node(RemoteNode, StartId) -> + Info = #{task => register_on_remote_node, + remote_node => RemoteNode, + start_id => StartId}, + F = fun() -> do_register_on_remote_node(RemoteNode, StartId) end, + mongoose_task:run_tracked(Info, F). + +do_register_on_remote_node(RemoteNode, StartId) -> Res = rpc:call(RemoteNode, ?MODULE, register_on_remote_node_rpc, - [node(), StartId, self()]), + [node(), StartId, self()], timer:seconds(10)), case Res of ok -> ok; diff --git a/src/mongoose_task.erl b/src/mongoose_task.erl new file mode 100644 index 00000000000..857cb39ae0a --- /dev/null +++ b/src/mongoose_task.erl @@ -0,0 +1,12 @@ +%% @doc Helper module for slow tasks. +-module(mongoose_task). +-export([run_tracked/2]). + +%% @doc Run a task, log if it takes too long +-spec run_tracked(Info :: map(), F :: fun(() -> Res)) -> Res + when Res :: term(). +run_tracked(Info, F) -> + %% Reuse code from CETS. + %% `cets_long' module does not use any CETS functionality and + %% can be used, even if CETS tables are not used. + cets_long:run_tracked(Info, F). diff --git a/src/wpool/mongoose_wpool_mgr.erl b/src/wpool/mongoose_wpool_mgr.erl index 14f7ddb25a3..251e7d77780 100644 --- a/src/wpool/mongoose_wpool_mgr.erl +++ b/src/wpool/mongoose_wpool_mgr.erl @@ -71,10 +71,10 @@ start_link(Type) -> start(Type, Host, Tag, PoolOpts, ConnOpts) -> ok = ensure_started(Type), - gen_server:call(name(Type), {start_pool, Host, Tag, PoolOpts, ConnOpts}). + gen_server:call(name(Type), {start_pool, Host, Tag, PoolOpts, ConnOpts}, timer:seconds(30)). stop(Type, Host, Tag) -> - gen_server:call(name(Type), {stop_pool, Host, Tag}). + gen_server:call(name(Type), {stop_pool, Host, Tag}, timer:seconds(30)). -spec name(mongoose_wpool:pool_type()) -> mongoose_wpool:proc_name(). name(Type) -> @@ -93,7 +93,11 @@ handle_call({start_pool, Host, Tag, WpoolOpts, ConnOpts}, _From, #state{type = Type, pools = Pools, monitors = Monitors} = State) -> ?LOG_INFO(#{what => pool_starting, pool_type => Type, tag => Tag, server => Host, pool_opts => WpoolOpts}), - case mongoose_wpool:call_start_callback(Type, [Host, Tag, WpoolOpts, ConnOpts]) of + F = fun() -> + mongoose_wpool:call_start_callback(Type, [Host, Tag, WpoolOpts, ConnOpts]) + end, + Info = #{task => start_pool, tag => Tag}, + case mongoose_task:run_tracked(Info, F) of {_, Pid} = OkReply when is_pid(Pid) -> Ref = erlang:monitor(process, Pid), Key = {Type, Host, Tag},