Skip to content

Commit

Permalink
failover: add failover_primary_timeout option
Browse files Browse the repository at this point in the history
This was previously hardcoded to 31 seconds (hardcoded retry_timout +
1). This may be too short period under some circumstances.

When we retry primary server we drop connection to the backup server and
if the primary server is not yet available (and there are many
unavailable primary servers) we may go through a long timeout cycle
every half minute.

This patch makes the value configurable.

:config: Added `failover_primary_timout` configuration option. This
  can be used to configure how often SSSD tries to reconnect to a
  primary server after a successful connection to a backup server.
  This was previously hardcoded to 31 seconds which is kept as
  the default value.
  • Loading branch information
pbrezina committed May 13, 2024
1 parent 31bd16f commit 5b1b680
Show file tree
Hide file tree
Showing 9 changed files with 51 additions and 2 deletions.
2 changes: 2 additions & 0 deletions src/config/SSSDConfig/sssdoptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ def __init__(self):
'dns_resolver_op_timeout': _('How long should keep trying to resolve single DNS query (seconds)'),
'dns_resolver_timeout': _('How long to wait for replies from DNS when resolving servers (seconds)'),
'dns_discovery_domain': _('The domain part of service discovery DNS query'),
'failover_primary_timeout': _('How long should we stay connected to backup server before retrying '
'primary server').
'override_gid': _('Override GID value from the identity provider with this value'),
'case_sensitive': _('Treat usernames as case sensitive'),
'entry_cache_user_timeout': _('Entry cache timeout length (seconds)'),
Expand Down
2 changes: 2 additions & 0 deletions src/config/SSSDConfigTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,7 @@ def testListOptions(self):
'dns_resolver_op_timeout',
'dns_resolver_timeout',
'dns_discovery_domain',
'failover_primary_timeout',
'dyndns_update',
'dyndns_ttl',
'dyndns_iface',
Expand Down Expand Up @@ -938,6 +939,7 @@ def testRemoveProvider(self):
'dns_resolver_op_timeout',
'dns_resolver_timeout',
'dns_discovery_domain',
'failover_primary_timeout',
'dyndns_update',
'dyndns_ttl',
'dyndns_iface',
Expand Down
1 change: 1 addition & 0 deletions src/config/cfg_rules.ini
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ option = dns_resolver_op_timeout
option = dns_resolver_timeout
option = dns_resolver_use_search_list
option = dns_discovery_domain
option = failover_primary_timeout
option = override_gid
option = case_sensitive
option = override_homedir
Expand Down
1 change: 1 addition & 0 deletions src/config/etc/sssd.api.conf
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ dns_resolver_server_timeout = int, None, false
dns_resolver_op_timeout = int, None, false
dns_resolver_timeout = int, None, false
dns_discovery_domain = str, None, false
failover_primary_timeout = int, None, false
override_gid = int, None, false
case_sensitive = str, None, false
override_homedir = str, None, false
Expand Down
19 changes: 19 additions & 0 deletions src/man/sssd.conf.5.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3779,6 +3779,25 @@ pam_gssapi_indicators_map = sudo:pkinit, sudo-i:pkinit
</listitem>
</varlistentry>

<varlistentry>
<term>failover_primary_timeout (integer)</term>
<listitem>
<para>
When no primary server is currently available,
SSSD fail overs to a backup server. This option
defines the amount of time (in seconds) to
wait before SSSD tries to reconnect to a primary
server again.
</para>
<para>
Note: The minimum value is 31.
</para>
<para>
Default: 31
</para>
</listitem>
</varlistentry>

<varlistentry>
<term>override_gid (integer)</term>
<listitem>
Expand Down
1 change: 1 addition & 0 deletions src/providers/data_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ enum dp_res_opts {
DP_RES_OPT_RESOLVER_SERVER_TIMEOUT,
DP_RES_OPT_RESOLVER_USE_SEARCH_LIST,
DP_RES_OPT_DNS_DOMAIN,
DP_RES_OPT_FAILOVER_PRIMARY_TIMEOUT,

DP_RES_OPTS /* attrs counter */
};
Expand Down
14 changes: 12 additions & 2 deletions src/providers/data_provider_fo.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,20 @@ static int be_fo_get_options(struct be_ctx *ctx,
DP_RES_OPT_RESOLVER_TIMEOUT);
opts->use_search_list = dp_opt_get_bool(ctx->be_res->opts,
DP_RES_OPT_RESOLVER_USE_SEARCH_LIST);
opts->primary_timeout = dp_opt_get_int(ctx->be_res->opts,
DP_RES_OPT_FAILOVER_PRIMARY_TIMEOUT);

opts->retry_timeout = 30;
opts->srv_retry_neg_timeout = 15;
opts->family_order = ctx->be_res->family_order;

if (opts->primary_timeout <= opts->retry_timeout) {
opts->primary_timeout = opts->retry_timeout + 1;
DEBUG(SSSDBG_CONF_SETTINGS,
"Warning: failover_primary_timeout is too low, using %lu "
"seconds instead\n", opts->primary_timeout);
}

return EOK;
}

Expand Down Expand Up @@ -551,7 +561,7 @@ static void be_resolve_server_done(struct tevent_req *subreq)
struct tevent_req);
struct be_resolve_server_state *state = tevent_req_data(req,
struct be_resolve_server_state);
time_t timeout = fo_get_service_retry_timeout(state->svc->fo_service) + 1;
time_t timeout = fo_get_primary_retry_timeout(state->svc->fo_service);
int ret;

ret = be_resolve_server_process(subreq, state, &new_subreq);
Expand All @@ -564,7 +574,6 @@ static void be_resolve_server_done(struct tevent_req *subreq)
}

if (!fo_is_server_primary(state->srv)) {
/* FIXME: make the timeout configurable */
ret = be_primary_server_timeout_activate(state->ctx, state->ev,
state->ctx, state->svc,
timeout);
Expand Down Expand Up @@ -871,6 +880,7 @@ static struct dp_option dp_res_default_opts[] = {
{ "dns_resolver_server_timeout", DP_OPT_NUMBER, { .number = 1000 }, NULL_NUMBER },
{ "dns_resolver_use_search_list", DP_OPT_BOOL, BOOL_TRUE, BOOL_TRUE },
{ "dns_discovery_domain", DP_OPT_STRING, NULL_STRING, NULL_STRING },
{ "failover_primary_timeout", DP_OPT_NUMBER, { .number = 31 }, NULL_NUMBER },
DP_OPTION_TERMINATOR
};

Expand Down
10 changes: 10 additions & 0 deletions src/providers/fail_over.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ fo_context_init(TALLOC_CTX *mem_ctx, struct fo_options *opts)

ctx->opts->srv_retry_neg_timeout = opts->srv_retry_neg_timeout;
ctx->opts->retry_timeout = opts->retry_timeout;
ctx->opts->primary_timeout = opts->primary_timeout;
ctx->opts->family_order = opts->family_order;
ctx->opts->service_resolv_timeout = opts->service_resolv_timeout;
ctx->opts->use_search_list = opts->use_search_list;
Expand Down Expand Up @@ -1740,6 +1741,15 @@ time_t fo_get_service_retry_timeout(struct fo_service *svc)
return svc->ctx->opts->retry_timeout;
}

time_t fo_get_primary_retry_timeout(struct fo_service *svc)
{
if (svc == NULL || svc->ctx == NULL || svc->ctx->opts == NULL) {
return 0;
}

return svc->ctx->opts->primary_timeout;
}

bool fo_get_use_search_list(struct fo_server *server)
{
if (
Expand Down
3 changes: 3 additions & 0 deletions src/providers/fail_over.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ struct fo_server;
struct fo_options {
time_t srv_retry_neg_timeout;
time_t retry_timeout;
time_t primary_timeout;
int service_resolv_timeout;
bool use_search_list;
enum restrict_family family_order;
Expand Down Expand Up @@ -211,6 +212,8 @@ int fo_is_srv_lookup(struct fo_server *s);

time_t fo_get_service_retry_timeout(struct fo_service *svc);

time_t fo_get_primary_retry_timeout(struct fo_service *svc);

bool fo_get_use_search_list(struct fo_server *server);

void fo_reset_services(struct fo_ctx *fo_ctx);
Expand Down

0 comments on commit 5b1b680

Please sign in to comment.