Skip to content

Commit

Permalink
Merge pull request #2043 from gao-yan/priority-fencing-delay-1.1
Browse files Browse the repository at this point in the history
[1.1] Feature: priority-fencing-delay
  • Loading branch information
kgaillot authored Apr 27, 2020
2 parents 63d2d79 + 7367658 commit 2b620fa
Show file tree
Hide file tree
Showing 25 changed files with 1,799 additions and 64 deletions.
12 changes: 8 additions & 4 deletions crmd/te_actions.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ te_fence_node(crm_graph_t * graph, crm_action_t * action)
const char *uuid = NULL;
const char *target = NULL;
const char *type = NULL;
const char *priority_delay = NULL;
gboolean invalid_action = FALSE;
enum stonith_call_options options = st_opt_none;

Expand All @@ -182,9 +183,11 @@ te_fence_node(crm_graph_t * graph, crm_action_t * action)
return FALSE;
}

priority_delay = crm_meta_value(action->params, XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY);

crm_notice("Requesting fencing (%s) of node %s "
CRM_XS " action=%s timeout=%d",
type, target, id, transition_graph->stonith_timeout);
CRM_XS " action=%s timeout=%u priority_delay=%s",
type, target, id, transition_graph->stonith_timeout, priority_delay);

/* Passing NULL means block until we can connect... */
te_connect_stonith(NULL);
Expand All @@ -193,8 +196,9 @@ te_fence_node(crm_graph_t * graph, crm_action_t * action)
options |= st_opt_allow_suicide;
}

rc = stonith_api->cmds->fence(stonith_api, options, target, type,
transition_graph->stonith_timeout / 1000, 0);
rc = stonith_api->cmds->fence_with_delay(stonith_api, options, target, type,
(int) (transition_graph->stonith_timeout / 1000),
0, crm_atoi(priority_delay, "0"));

stonith_api->cmds->register_callback(stonith_api, rc, transition_graph->stonith_timeout / 1000,
st_opt_timeout_updates,
Expand Down
16 changes: 16 additions & 0 deletions doc/Pacemaker_Explained/en-US/Ch-Options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,22 @@ indexterm:[Cluster,Option,concurrent-fencing]
Is the cluster allowed to initiate multiple fence actions concurrently?
'(since 1.1.15)'

| priority-fencing-delay | 0 |
indexterm:[priority-fencing-delay,Cluster Option]
indexterm:[Cluster,Option,priority-fencing-delay]
Apply specified delay for the fencings that are targeting the lost
nodes with the highest total resource priority in case we don't
have the majority of the nodes in our cluster partition, so that
the more significant nodes potentially win any fencing match,
which is especially meaningful under split-brain of 2-node
cluster. A promoted resource instance takes the base priority + 1
on calculation if the base priority is not 0. Any static/random
delays that are introduced by `pcmk_delay_base/max` configured
for the corresponding fencing resources will be added to this
delay. This delay should be significantly greater than, safely
twice, the maximum `pcmk_delay_base/max`. By default, priority
fencing delay is disabled. '(since 1.1.23)'

| cluster-delay | 60s |
indexterm:[cluster-delay,Cluster Option]
indexterm:[Cluster,Option,cluster-delay]
Expand Down
33 changes: 24 additions & 9 deletions fencing/admin.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,12 @@ static struct crm_option long_options[] = {
"Operation timeout in seconds (default 120;\n"
"\t\t\tused with most commands)."
},
{ "delay", required_argument, NULL, 'y',
"Apply a fencing delay in seconds. Any static/random delays from\n"
"\t\t\tpcmk_delay_base/max will be added, otherwise all\n"
"\t\t\tdisabled with the value -1\n"
"\t\t\t(default 0; with --fence, --reboot, --unfence)."
},
{ "as-node-id", no_argument, NULL, 'n',
"(Advanced) The supplied node is the corosync node ID\n"
"\t\t\t(with --last)."
Expand All @@ -201,6 +207,7 @@ struct {
char *name;
int timeout;
int tolerance;
int delay;
int rc;
} async_fence_data;

Expand Down Expand Up @@ -265,11 +272,13 @@ async_fence_helper(gpointer user_data)

st->cmds->register_notification(st, T_STONITH_NOTIFY_FENCE, notify_callback);

call_id = st->cmds->fence(st,
st_opt_allow_suicide,
async_fence_data.target,
async_fence_data.action,
async_fence_data.timeout, async_fence_data.tolerance);
call_id = st->cmds->fence_with_delay(st,
st_opt_allow_suicide,
async_fence_data.target,
async_fence_data.action,
async_fence_data.timeout,
async_fence_data.tolerance,
async_fence_data.delay);

if (call_id < 0) {
g_main_loop_quit(mainloop);
Expand All @@ -285,7 +294,8 @@ async_fence_helper(gpointer user_data)
}

static int
mainloop_fencing(stonith_t * st, const char *target, const char *action, int timeout, int tolerance)
mainloop_fencing(stonith_t * st, const char *target, const char *action,
int timeout, int tolerance, int delay)
{
crm_trigger_t *trig;

Expand All @@ -294,6 +304,7 @@ mainloop_fencing(stonith_t * st, const char *target, const char *action, int tim
async_fence_data.action = action;
async_fence_data.timeout = timeout;
async_fence_data.tolerance = tolerance;
async_fence_data.delay = delay;
async_fence_data.rc = -1;

trig = mainloop_add_trigger(G_PRIORITY_HIGH, async_fence_helper, NULL);
Expand Down Expand Up @@ -492,6 +503,7 @@ main(int argc, char **argv)
int verbose = 0;
int argerr = 0;
int timeout = 120;
int delay = 0;
int option_index = 0;
int fence_level = 0;
int no_connect = 0;
Expand Down Expand Up @@ -574,6 +586,9 @@ main(int argc, char **argv)
case 't':
timeout = crm_atoi(optarg, NULL);
break;
case 'y':
delay = crm_atoi(optarg, NULL);
break;
case 'B':
case 'F':
case 'U':
Expand Down Expand Up @@ -760,13 +775,13 @@ main(int argc, char **argv)
rc = st->cmds->confirm(st, st_opts, target);
break;
case 'B':
rc = mainloop_fencing(st, target, "reboot", timeout, tolerance);
rc = mainloop_fencing(st, target, "reboot", timeout, tolerance, delay);
break;
case 'F':
rc = mainloop_fencing(st, target, "off", timeout, tolerance);
rc = mainloop_fencing(st, target, "off", timeout, tolerance, delay);
break;
case 'U':
rc = mainloop_fencing(st, target, "on", timeout, tolerance);
rc = mainloop_fencing(st, target, "on", timeout, tolerance, delay);
break;
case 'h':
show_last_fenced(as_nodeid, target);
Expand Down
45 changes: 28 additions & 17 deletions fencing/commands.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ typedef struct async_command_s {
int default_timeout; /* seconds */
int timeout; /* seconds */

int start_delay; /* milliseconds */
int start_delay; /* seconds */
int delay_id;

char *op;
Expand Down Expand Up @@ -123,36 +123,36 @@ static int
get_action_delay_max(stonith_device_t * device, const char * action)
{
const char *value = NULL;
int delay_max_ms = 0;
int delay_max = 0;

if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) {
return 0;
}

value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_MAX);
if (value) {
delay_max_ms = crm_get_msec(value);
delay_max = crm_get_msec(value) / 1000;
}

return delay_max_ms;
return delay_max;
}

static int
get_action_delay_base(stonith_device_t * device, const char * action)
{
const char *value = NULL;
int delay_base_ms = 0;
int delay_base = 0;

if (safe_str_neq(action, "off") && safe_str_neq(action, "reboot")) {
return 0;
}

value = g_hash_table_lookup(device->params, STONITH_ATTR_DELAY_BASE);
if (value) {
delay_base_ms = crm_get_msec(value);
delay_base = crm_get_msec(value) / 1000;
}

return delay_base_ms;
return delay_base;
}

/*!
Expand Down Expand Up @@ -243,6 +243,8 @@ create_async_command(xmlNode * msg)
crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options));
crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout));
cmd->timeout = cmd->default_timeout;
// Value -1 means disable any static/random fencing delays
crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay));

cmd->origin = crm_element_value_copy(msg, F_ORIG);
cmd->remote_op_id = crm_element_value_copy(msg, F_STONITH_REMOTE_OP_ID);
Expand Down Expand Up @@ -349,7 +351,7 @@ stonith_device_execute(stonith_device_t * device)

if (pending_op && pending_op->delay_id) {
crm_trace
("Operation '%s'%s%s on %s was asked to run too early, waiting for start_delay timeout of %dms",
("Operation '%s'%s%s on %s was asked to run too early, waiting for start_delay timeout of %ds",
pending_op->action, pending_op->victim ? " targeting " : "",
pending_op->victim ? pending_op->victim : "",
device->id, pending_op->start_delay);
Expand Down Expand Up @@ -464,6 +466,7 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
{
int delay_max = 0;
int delay_base = 0;
int requested_delay = cmd->start_delay;

CRM_CHECK(cmd != NULL, return);
CRM_CHECK(device != NULL, return);
Expand Down Expand Up @@ -496,30 +499,38 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
device->pending_ops = g_list_append(device->pending_ops, cmd);
mainloop_set_trigger(device->work);

// Value -1 means disable any static/random fencing delays
if (requested_delay < 0) {
return;
}

delay_max = get_action_delay_max(device, cmd->action);
delay_base = get_action_delay_base(device, cmd->action);
if (delay_max == 0) {
delay_max = delay_base;
}
if (delay_max < delay_base) {
crm_warn("Base-delay (%dms) is larger than max-delay (%dms) "
crm_warn("Base-delay (%ds) is larger than max-delay (%ds) "
"for %s on %s - limiting to max-delay",
delay_base, delay_max, cmd->action, device->id);
delay_base = delay_max;
}
if (delay_max > 0) {
// coverity[dont_call] We're not using rand() for security
cmd->start_delay =
cmd->start_delay +=
((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
+ delay_base;
crm_notice("Delaying '%s' action%s%s on %s for %dms (timeout=%ds, base=%dms, "
"max=%dms)",
cmd->action,
cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
device->id, cmd->start_delay, cmd->timeout,
delay_base, delay_max);
}

if (cmd->start_delay > 0) {
crm_notice("Delaying '%s' action%s%s on %s for %ds (timeout=%ds, "
"requested_delay=%ds, base=%ds, max=%ds)",
cmd->action,
cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
device->id, cmd->start_delay, cmd->timeout,
requested_delay, delay_base, delay_max);
cmd->delay_id =
g_timeout_add(cmd->start_delay, start_delay_helper, cmd);
g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd);
}
}

Expand Down
4 changes: 4 additions & 0 deletions fencing/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ typedef struct remote_fencing_op_s {
* values associated with the devices this fencing operation may call */
gint total_timeout;

/*! Requested fencing delay.
* Value -1 means disable any static/random fencing delays. */
int delay;

/*! Delegate is the node being asked to perform a fencing action
* on behalf of the node that owns the remote operation. Some operations
* will involve multiple delegates. This value represents the final delegate
Expand Down
30 changes: 30 additions & 0 deletions fencing/regression.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,36 @@ class Tests(object):
test.add_stonith_neg_log_pattern("does not advertise support for 'reboot', performing 'off'")
test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)")

# make sure requested fencing delay is applied only for the first device in the first level
# make sure static delay from pcmk_delay_base is added
for test_type in test_types:
if test_type["use_cpg"] == 0:
continue

test = self.new_test("%s_topology_delay" % test_type["prefix"],
"Verify requested fencing delay is applied only for the first device in the first level and pcmk_delay_base is added.",
test_type["use_cpg"])
test.add_cmd("stonith_admin",
"-R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\" -o \"pcmk_delay_base=1\"")
test.add_cmd("stonith_admin",
"-R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\" -o \"pcmk_delay_base=1\"")
test.add_cmd("stonith_admin",
"-R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin",
"-R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")

test.add_cmd("stonith_admin", "-r node3 -i 1 -v true1")
test.add_cmd("stonith_admin", "-r node3 -i 1 -v false1")
test.add_cmd("stonith_admin", "-r node3 -i 2 -v true2")
test.add_cmd("stonith_admin", "-r node3 -i 2 -v true3")

test.add_cmd("stonith_admin", "-F node3 --delay 1")

test.add_stonith_log_pattern("Delaying 'off' action targeting node3 on true1 for 2s (timeout=120s, requested_delay=1s, base=1s, max=1s)")
test.add_stonith_log_pattern("Delaying 'off' action targeting node3 on false1 for 1s (timeout=120s, requested_delay=0s, base=1s, max=1s)")
test.add_stonith_neg_log_pattern("Delaying 'off' action targeting node3 on true2")
test.add_stonith_neg_log_pattern("Delaying 'off' action targeting node3 on true3")

def build_nodeid_tests(self):
""" Register tests that use a corosync node id """

Expand Down
14 changes: 14 additions & 0 deletions fencing/remote.c
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,11 @@ stonith_topology_next(remote_fencing_op_t * op)
op->client_name, op->originator, op->id);
set_op_device_list(op, tp->levels[op->level]);

// The requested delay has been applied for the first fencing level
if (op->level > 1 && op->delay > 0) {
op->delay = 0;
}

if (g_list_next(op->devices_list) && safe_str_eq(op->action, "reboot")) {
/* A reboot has been requested for a topology level with multiple
* devices. Instead of rebooting the devices sequentially, we will
Expand Down Expand Up @@ -992,6 +997,8 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
op = calloc(1, sizeof(remote_fencing_op_t));

crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout));
// Value -1 means disable any static/random fencing delays
crm_element_value_int(request, F_STONITH_DELAY, &(op->delay));

if (peer && dev) {
op->id = crm_element_value_copy(dev, F_STONITH_REMOTE_OP_ID);
Expand Down Expand Up @@ -1440,6 +1447,12 @@ advance_op_topology(remote_fencing_op_t *op, const char *device, xmlNode *msg,
/* Necessary devices remain, so execute the next one */
crm_trace("Next targeting %s on behalf of %s@%s (rc was %d)",
op->target, op->originator, op->client_name, rc);

// The requested delay has been applied for the first device
if (op->delay > 0) {
op->delay = 0;
}

call_remote_stonith(op, NULL);
} else {
/* We're done with all devices and phases, so finalize operation */
Expand Down Expand Up @@ -1494,6 +1507,7 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name);
crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay);

if (device) {
timeout_one = TIMEOUT_MULTIPLY_FACTOR *
Expand Down
1 change: 1 addition & 0 deletions include/crm/fencing/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ xmlNode *create_device_registration_xml(const char *id,
/*! Timeout period per a device execution */
# define F_STONITH_TIMEOUT "st_timeout"
# define F_STONITH_TOLERANCE "st_tolerance"
# define F_STONITH_DELAY "st_delay"
/*! Action specific timeout period returned in query of fencing devices. */
# define F_STONITH_ACTION_TIMEOUT "st_action_timeout"
/*! Host in query result is not allowed to run this action */
Expand Down
1 change: 1 addition & 0 deletions include/crm/msg_xml.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,7 @@
# define XML_CONFIG_ATTR_FORCE_QUIT "shutdown-escalation"
# define XML_CONFIG_ATTR_RECHECK "cluster-recheck-interval"
# define XML_CONFIG_ATTR_FENCE_REACTION "fence-reaction"
# define XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY "priority-fencing-delay"

# define XML_ALERT_ATTR_PATH "path"
# define XML_ALERT_ATTR_TIMEOUT "timeout"
Expand Down
4 changes: 2 additions & 2 deletions include/crm/pengine/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ typedef struct op_digest_cache_s {
op_digest_cache_t *rsc_action_digest_cmp(resource_t * rsc, xmlNode * xml_op, node_t * node,
pe_working_set_t * data_set);

action_t *pe_fence_op(node_t * node, const char *op, bool optional, const char *reason, pe_working_set_t * data_set);
pe_action_t *pe_fence_op(pe_node_t * node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t * data_set);
void trigger_unfencing(
resource_t * rsc, node_t *node, const char *reason, action_t *dependency, pe_working_set_t * data_set);

Expand All @@ -346,7 +346,7 @@ gboolean add_tag_ref(GHashTable * tags, const char * tag_name, const char * obj

void print_rscs_brief(GListPtr rsc_list, const char * pre_text, long options,
void * print_data, gboolean print_all);
void pe_fence_node(pe_working_set_t * data_set, node_t * node, const char *reason);
void pe_fence_node(pe_working_set_t * data_set, pe_node_t * node, const char *reason, bool priority_delay);

node_t *pe_create_node(const char *id, const char *uname, const char *type,
const char *score, pe_working_set_t * data_set);
Expand Down
Loading

0 comments on commit 2b620fa

Please sign in to comment.