From 5b1b6805e76b764f459eaf5d3d4ace63c34eeb94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavel=20B=C5=99ezina?= Date: Tue, 30 Apr 2024 12:28:53 +0200 Subject: [PATCH] failover: add failover_primary_timeout option This was previously hardcoded to 31 seconds (hardcoded retry_timout + 1). This may be too short period under some circumstances. When we retry primary server we drop connection to the backup server and if the primary server is not yet available (and there are many unavailable primary servers) we may go through a long timeout cycle every half minute. This patch makes the value configurable. :config: Added `failover_primary_timout` configuration option. This can be used to configure how often SSSD tries to reconnect to a primary server after a successful connection to a backup server. This was previously hardcoded to 31 seconds which is kept as the default value. --- src/config/SSSDConfig/sssdoptions.py | 2 ++ src/config/SSSDConfigTest.py | 2 ++ src/config/cfg_rules.ini | 1 + src/config/etc/sssd.api.conf | 1 + src/man/sssd.conf.5.xml | 19 +++++++++++++++++++ src/providers/data_provider.h | 1 + src/providers/data_provider_fo.c | 14 ++++++++++++-- src/providers/fail_over.c | 10 ++++++++++ src/providers/fail_over.h | 3 +++ 9 files changed, 51 insertions(+), 2 deletions(-) diff --git a/src/config/SSSDConfig/sssdoptions.py b/src/config/SSSDConfig/sssdoptions.py index ec0e276cbda..1789d2d0b4f 100644 --- a/src/config/SSSDConfig/sssdoptions.py +++ b/src/config/SSSDConfig/sssdoptions.py @@ -184,6 +184,8 @@ def __init__(self): 'dns_resolver_op_timeout': _('How long should keep trying to resolve single DNS query (seconds)'), 'dns_resolver_timeout': _('How long to wait for replies from DNS when resolving servers (seconds)'), 'dns_discovery_domain': _('The domain part of service discovery DNS query'), + 'failover_primary_timeout': _('How long should we stay connected to backup server before retrying ' + 'primary server'). 'override_gid': _('Override GID value from the identity provider with this value'), 'case_sensitive': _('Treat usernames as case sensitive'), 'entry_cache_user_timeout': _('Entry cache timeout length (seconds)'), diff --git a/src/config/SSSDConfigTest.py b/src/config/SSSDConfigTest.py index 52485810865..ef4dcd295a7 100755 --- a/src/config/SSSDConfigTest.py +++ b/src/config/SSSDConfigTest.py @@ -578,6 +578,7 @@ def testListOptions(self): 'dns_resolver_op_timeout', 'dns_resolver_timeout', 'dns_discovery_domain', + 'failover_primary_timeout', 'dyndns_update', 'dyndns_ttl', 'dyndns_iface', @@ -938,6 +939,7 @@ def testRemoveProvider(self): 'dns_resolver_op_timeout', 'dns_resolver_timeout', 'dns_discovery_domain', + 'failover_primary_timeout', 'dyndns_update', 'dyndns_ttl', 'dyndns_iface', diff --git a/src/config/cfg_rules.ini b/src/config/cfg_rules.ini index 836b74bd8c9..9bb3d8a53cb 100644 --- a/src/config/cfg_rules.ini +++ b/src/config/cfg_rules.ini @@ -404,6 +404,7 @@ option = dns_resolver_op_timeout option = dns_resolver_timeout option = dns_resolver_use_search_list option = dns_discovery_domain +option = failover_primary_timeout option = override_gid option = case_sensitive option = override_homedir diff --git a/src/config/etc/sssd.api.conf b/src/config/etc/sssd.api.conf index d4d79c96966..28f057978db 100644 --- a/src/config/etc/sssd.api.conf +++ b/src/config/etc/sssd.api.conf @@ -171,6 +171,7 @@ dns_resolver_server_timeout = int, None, false dns_resolver_op_timeout = int, None, false dns_resolver_timeout = int, None, false dns_discovery_domain = str, None, false +failover_primary_timeout = int, None, false override_gid = int, None, false case_sensitive = str, None, false override_homedir = str, None, false diff --git a/src/man/sssd.conf.5.xml b/src/man/sssd.conf.5.xml index f53fcb35df0..803a74d4c5b 100644 --- a/src/man/sssd.conf.5.xml +++ b/src/man/sssd.conf.5.xml @@ -3779,6 +3779,25 @@ pam_gssapi_indicators_map = sudo:pkinit, sudo-i:pkinit + + failover_primary_timeout (integer) + + + When no primary server is currently available, + SSSD fail overs to a backup server. This option + defines the amount of time (in seconds) to + wait before SSSD tries to reconnect to a primary + server again. + + + Note: The minimum value is 31. + + + Default: 31 + + + + override_gid (integer) diff --git a/src/providers/data_provider.h b/src/providers/data_provider.h index 36a82b84d7d..def35e491a3 100644 --- a/src/providers/data_provider.h +++ b/src/providers/data_provider.h @@ -267,6 +267,7 @@ enum dp_res_opts { DP_RES_OPT_RESOLVER_SERVER_TIMEOUT, DP_RES_OPT_RESOLVER_USE_SEARCH_LIST, DP_RES_OPT_DNS_DOMAIN, + DP_RES_OPT_FAILOVER_PRIMARY_TIMEOUT, DP_RES_OPTS /* attrs counter */ }; diff --git a/src/providers/data_provider_fo.c b/src/providers/data_provider_fo.c index b0aed54e97b..c23f92e3556 100644 --- a/src/providers/data_provider_fo.c +++ b/src/providers/data_provider_fo.c @@ -48,10 +48,20 @@ static int be_fo_get_options(struct be_ctx *ctx, DP_RES_OPT_RESOLVER_TIMEOUT); opts->use_search_list = dp_opt_get_bool(ctx->be_res->opts, DP_RES_OPT_RESOLVER_USE_SEARCH_LIST); + opts->primary_timeout = dp_opt_get_int(ctx->be_res->opts, + DP_RES_OPT_FAILOVER_PRIMARY_TIMEOUT); + opts->retry_timeout = 30; opts->srv_retry_neg_timeout = 15; opts->family_order = ctx->be_res->family_order; + if (opts->primary_timeout <= opts->retry_timeout) { + opts->primary_timeout = opts->retry_timeout + 1; + DEBUG(SSSDBG_CONF_SETTINGS, + "Warning: failover_primary_timeout is too low, using %lu " + "seconds instead\n", opts->primary_timeout); + } + return EOK; } @@ -551,7 +561,7 @@ static void be_resolve_server_done(struct tevent_req *subreq) struct tevent_req); struct be_resolve_server_state *state = tevent_req_data(req, struct be_resolve_server_state); - time_t timeout = fo_get_service_retry_timeout(state->svc->fo_service) + 1; + time_t timeout = fo_get_primary_retry_timeout(state->svc->fo_service); int ret; ret = be_resolve_server_process(subreq, state, &new_subreq); @@ -564,7 +574,6 @@ static void be_resolve_server_done(struct tevent_req *subreq) } if (!fo_is_server_primary(state->srv)) { - /* FIXME: make the timeout configurable */ ret = be_primary_server_timeout_activate(state->ctx, state->ev, state->ctx, state->svc, timeout); @@ -871,6 +880,7 @@ static struct dp_option dp_res_default_opts[] = { { "dns_resolver_server_timeout", DP_OPT_NUMBER, { .number = 1000 }, NULL_NUMBER }, { "dns_resolver_use_search_list", DP_OPT_BOOL, BOOL_TRUE, BOOL_TRUE }, { "dns_discovery_domain", DP_OPT_STRING, NULL_STRING, NULL_STRING }, + { "failover_primary_timeout", DP_OPT_NUMBER, { .number = 31 }, NULL_NUMBER }, DP_OPTION_TERMINATOR }; diff --git a/src/providers/fail_over.c b/src/providers/fail_over.c index 7cb64244877..7f94407c538 100644 --- a/src/providers/fail_over.c +++ b/src/providers/fail_over.c @@ -158,6 +158,7 @@ fo_context_init(TALLOC_CTX *mem_ctx, struct fo_options *opts) ctx->opts->srv_retry_neg_timeout = opts->srv_retry_neg_timeout; ctx->opts->retry_timeout = opts->retry_timeout; + ctx->opts->primary_timeout = opts->primary_timeout; ctx->opts->family_order = opts->family_order; ctx->opts->service_resolv_timeout = opts->service_resolv_timeout; ctx->opts->use_search_list = opts->use_search_list; @@ -1740,6 +1741,15 @@ time_t fo_get_service_retry_timeout(struct fo_service *svc) return svc->ctx->opts->retry_timeout; } +time_t fo_get_primary_retry_timeout(struct fo_service *svc) +{ + if (svc == NULL || svc->ctx == NULL || svc->ctx->opts == NULL) { + return 0; + } + + return svc->ctx->opts->primary_timeout; +} + bool fo_get_use_search_list(struct fo_server *server) { if ( diff --git a/src/providers/fail_over.h b/src/providers/fail_over.h index 36021ad6ffb..924a09970b1 100644 --- a/src/providers/fail_over.h +++ b/src/providers/fail_over.h @@ -83,6 +83,7 @@ struct fo_server; struct fo_options { time_t srv_retry_neg_timeout; time_t retry_timeout; + time_t primary_timeout; int service_resolv_timeout; bool use_search_list; enum restrict_family family_order; @@ -211,6 +212,8 @@ int fo_is_srv_lookup(struct fo_server *s); time_t fo_get_service_retry_timeout(struct fo_service *svc); +time_t fo_get_primary_retry_timeout(struct fo_service *svc); + bool fo_get_use_search_list(struct fo_server *server); void fo_reset_services(struct fo_ctx *fo_ctx);