Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support starting corosync-notifyd at cluster startup (draft) #296

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pcs/cli/common/lib_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ def load_module(env, middleware_factory, name):
"setup_local": cluster.setup_local,
"update_link": cluster.update_link,
"verify": cluster.verify,
"enable_corosync_notifyd": cluster.enable_corosync_notifyd,
"disable_corosync_notifyd": cluster.disable_corosync_notifyd,
},
)

Expand Down
2 changes: 2 additions & 0 deletions pcs/cli/routing/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@
"unstandby": lambda lib, argv, modifiers: raise_command_replaced(
"pcs node unstandby"
),
"enable-corosync-notifyd": cluster.corosync_notifyd_enable_cmd,
"disable-corosync-notifyd": cluster.corosync_notifyd_disable_cmd,
},
["cluster"],
)
15 changes: 14 additions & 1 deletion pcs/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,8 @@ def start_cluster(argv):
service_list = ["corosync"]
if utils.need_to_handle_qdevice_service():
service_list.append("corosync-qdevice")
if utils.get_enable_corosync_notifyd() == "yes":
service_list.append("corosync-notifyd")
service_list.append("pacemaker")
for service in service_list:
output, retval = utils.start_service(service)
Expand Down Expand Up @@ -703,6 +705,8 @@ def stop_cluster_corosync():
service_list = []
if utils.need_to_handle_qdevice_service():
service_list.append("corosync-qdevice")
if utils.get_enable_corosync_notifyd() == "yes":
service_list.append("corosync-notifyd")
service_list.append("corosync")
for service in service_list:
output, retval = utils.stop_service(service)
Expand Down Expand Up @@ -746,6 +750,7 @@ def kill_local_cluster_services():
"gfs_controld",
# Corosync daemons
"corosync-qdevice",
"corosync-notifyd",
"corosync",
]
return utils.run([settings.killall_executable, "-9"] + all_cluster_daemons)
Expand Down Expand Up @@ -1341,7 +1346,7 @@ def cluster_destroy(lib, argv, modifiers):
destroy_cluster(corosync_nodes)
else:
print("Shutting down pacemaker/corosync services...")
for service in ["pacemaker", "corosync-qdevice", "corosync"]:
for service in ["pacemaker", "corosync-qdevice", "corosync-notifyd", "corosync"]:
# Returns an error if a service is not running. It is safe to
# ignore it since we want it not to be running anyways.
utils.stop_service(service)
Expand Down Expand Up @@ -2181,3 +2186,11 @@ def link_update(lib, argv, modifiers):
parse_args.prepare_options(parsed["options"]),
force_flags=force_flags,
)


def corosync_notifyd_disable_cmd(lib, argv, modifiers):
lib.cluster.disable_corosync_notifyd(argv)


def corosync_notifyd_enable_cmd(lib, argv, modifiers):
lib.cluster.enable_corosync_notifyd(argv)
2 changes: 2 additions & 0 deletions pcs/common/reports/codes.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,8 @@
SBD_WATCHDOG_TEST_ERROR = M("SBD_WATCHDOG_TEST_ERROR")
SBD_WATCHDOG_TEST_MULTIPLE_DEVICES = M("SBD_WATCHDOG_TEST_MULTIPLE_DEVICES")
SBD_WATCHDOG_TEST_FAILED = M("SBD_WATCHDOG_TEST_FAILED")
PCS_CONFIG_ACCEPTED_BY_NODE = M("PCS_CONFIG_ACCEPTED_BY_NODE")
PCS_CONFIG_DISTRIBUTION_STARTED = M("PCS_CONFIG_DISTRIBUTION_STARTED")
SERVICE_ACTION_STARTED = M("SERVICE_ACTION_STARTED")
SERVICE_ACTION_FAILED = M("SERVICE_ACTION_FAILED")
SERVICE_ACTION_SUCCEEDED = M("SERVICE_ACTION_SUCCEEDED")
Expand Down
29 changes: 29 additions & 0 deletions pcs/common/reports/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -6582,3 +6582,32 @@ class CibNvsetAmbiguousProvideNvsetId(ReportItemMessage):
@property
def message(self) -> str:
return "Several options sets exist, please specify an option set ID"


@dataclass(frozen=True)
class PcsConfigDistributionStarted(ReportItemMessage):
"""
Distribution of PCS configuration started
"""

_code = codes.PCS_CONFIG_DISTRIBUTION_STARTED

@property
def message(self) -> str:
return "Distributing PCS config..."


@dataclass(frozen=True)
class PcsConfigAcceptedByNode(ReportItemMessage):
"""
info that PCS configuration has been saved successfully on specified node

node -- node name
"""

node: str
_code = codes.PCS_CONFIG_ACCEPTED_BY_NODE

@property
def message(self) -> str:
return f"{self.node}: PCS config saved"
63 changes: 63 additions & 0 deletions pcs/lib/commands/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@
get_resources,
)
from pcs.lib.communication import cluster
from pcs.lib.communication.cluster import (
SetPcsConfig,
GetPcsConfig,
)
from pcs.lib.tools import dict_to_environment_file
from pcs.lib.communication.corosync import (
CheckCorosyncOffline,
DistributeCorosyncConf,
Expand Down Expand Up @@ -2179,3 +2184,61 @@ def corosync_authkey_change(
com_cmd = ReloadCorosyncConf(env.report_processor)
com_cmd.set_targets(online_cluster_target_list)
run_and_raise(env.get_node_communicator(), com_cmd)


def enable_corosync_notifyd(env, node=None):
corosync_conf = env.get_corosync_conf()
node_list, get_nodes_report_list = get_existing_nodes_names(corosync_conf)
if not node_list:
get_nodes_report_list.append(
ReportItem.error(reports.messages.CorosyncConfigNoNodesDefined())
)
target_list = env.get_node_target_factory().get_target_list(
node if node else node_list,
)

com_cmd = GetOnlineTargets(
env.report_processor,
)
com_cmd.set_targets(target_list)
online_targets = run_and_raise(env.get_node_communicator(), com_cmd)

config = {
"ENABLE_COROSYNC_NOTIFYD": "yes",
}
com_cmd = SetPcsConfig(env.report_processor)
for target in online_targets:
com_cmd.add_request(
target,
dict_to_environment_file(config),
)
run_and_raise(env.get_node_communicator(), com_cmd)


def disable_corosync_notifyd(env, node=None):
corosync_conf = env.get_corosync_conf()
node_list, get_nodes_report_list = get_existing_nodes_names(corosync_conf)
if not node_list:
get_nodes_report_list.append(
ReportItem.error(reports.messages.CorosyncConfigNoNodesDefined())
)
target_list = env.get_node_target_factory().get_target_list(
node if node else node_list,
)

com_cmd = GetOnlineTargets(
env.report_processor,
)
com_cmd.set_targets(target_list)
online_targets = run_and_raise(env.get_node_communicator(), com_cmd)

config = {
"ENABLE_COROSYNC_NOTIFYD": "no",
}
com_cmd = SetPcsConfig(env.report_processor)
for target in online_targets:
com_cmd.add_request(
target,
dict_to_environment_file(config),
)
run_and_raise(env.get_node_communicator(), com_cmd)
1 change: 1 addition & 0 deletions pcs/lib/commands/status.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ def _get_local_services_status(runner: CommandRunner) -> List[_ServiceStatus]:
service_def = [
# (service name, display even if not enabled nor running)
("corosync", True),
("corosync-notifyd", True),
("pacemaker", True),
("pacemaker_remote", False),
("pcsd", True),
Expand Down
76 changes: 75 additions & 1 deletion pcs/lib/communication/cluster.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
from pcs.common import reports
from pcs.common.node_communicator import RequestData

from pcs.common.node_communicator import (
Request,
RequestData,
)
from pcs.lib.tools import environment_file_to_dict

from pcs.common.reports.item import ReportItem
from pcs.lib.corosync import live as corosync_live
from pcs.lib.communication.tools import (
Expand Down Expand Up @@ -124,3 +130,71 @@ def _process_response(self, response):

def on_complete(self):
return self._has_failure, self._quorum_status


class SetPcsConfig(
SimpleResponseProcessingMixin, AllAtOnceStrategyMixin, RunRemotelyBase
):
def __init__(self, report_processor):
super().__init__(report_processor)
self._request_data_list = []

def _prepare_initial_requests(self):
return [
Request(
target,
RequestData("remote/set_pcs_config", [("config", config)]),
)
for target, config in self._request_data_list
]

def _get_success_report(self, node_label):
return ReportItem.info(
reports.messages.PcsConfigAcceptedByNode(node_label)
)

def add_request(self, target, config):
self._request_data_list.append((target, config))

def before(self):
self._report(
ReportItem.info(reports.messages.PcsConfigDistributionStarted())
)


class GetPcsConfig(AllSameDataMixin, AllAtOnceStrategyMixin, RunRemotelyBase):
def __init__(self, report_processor):
super().__init__(report_processor)
self._config_list = []
self._successful_target_list = []

def _get_request_data(self):
return RequestData("remote/get_pcs_config")

def _process_response(self, response):
report_item = response_to_report_item(
response, severity=reports.ReportItemSeverity.WARNING
)
node_label = response.request.target.label
if report_item is not None:
if not response.was_connected:
self._report(report_item)
self._report(
ReportItem.warning(
reports.messages.UnableToGetPcsConfig(node_label, "")
)
)
return
self._config_list.append(
{
"node": node_label,
"config": environment_file_to_dict(response.data),
}
)
self._successful_target_list.append(node_label)

def on_complete(self):
for node in self._target_list:
if node.label not in self._successful_target_list:
self._config_list.append({"node": node.label, "config": None})
return self._config_list
1 change: 1 addition & 0 deletions pcs/settings.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ pcsd_exec_location = "@LIB_DIR@/pcsd/"
pcsd_log_location = "@LOCALSTATEDIR@/log/pcsd/pcsd.log"
pcsd_default_port = 2224
pcsd_config = "@CONF_DIR@/pcsd"
pcs_config = "@CONF_DIR@/pcs"
cib_dir = "@PCMK_CIB_DIR@"
pacemaker_uname = "@PCMK_USER@"
pacemaker_gname = "@PCMK_GROUP@"
Expand Down
12 changes: 12 additions & 0 deletions pcs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@
timeout_to_seconds as get_timeout_seconds,
validate_id,
)
from pcs.lib.communication.nodes import GetOnlineTargets
import configparser

# pylint: disable=invalid-name
# pylint: disable=too-many-branches
Expand Down Expand Up @@ -2899,3 +2901,13 @@ def get_token_from_file(file_name: str) -> str:
except OSError as e:
err(f"Unable to read file '{file_name}': {e}", exit_after_error=False)
raise SystemExit(1) from e

def get_enable_corosync_notifyd():
try:
with open(settings.pcs_config, "r", encoding="utf-8") as f:
config_str = "[dummy]\n" + f.read()
config = configparser.ConfigParser()
config.read_string(config_str)
except IOError as e:
err("Unable to read %s: %s" % (settings.pcs_config, e.strerror))
return config["dummy"]["ENABLE_COROSYNC_NOTIFYD"]
48 changes: 48 additions & 0 deletions pcsd/remote.rb
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ def remote(params, request, auth_user)
:sbd_enable => method(:sbd_enable),
:remove_stonith_watchdog_timeout=> method(:remove_stonith_watchdog_timeout),
:set_stonith_watchdog_timeout_to_zero => method(:set_stonith_watchdog_timeout_to_zero),
:set_pcs_config => method(:set_pcs_config),
:get_pcs_config => method(:get_pcs_config),
# lib api:
# /api/v1/sbd-enable-sbd/v1
:remote_enable_sbd => method(:remote_enable_sbd),
Expand Down Expand Up @@ -3077,3 +3079,49 @@ def remove_nodes_from_cib(params, request, auth_user)
return 400, "Invalid input data format: #{e.message}"
end
end

def set_pcs_config(param, request, auth_user)
unless allowed_for_local_cluster(auth_user, Permissions::WRITE)
return 403, 'Permission denied'
end
config = param[:config]
unless config
return [400, 'Parameter "config" required']
end

file = nil
begin
file = File.open(PCS_CONFIG, 'w')
file.flock(File::LOCK_EX)
file.write(config)
rescue => e
return pcsd_error("Unable to save PCS configuration: #{e}")
ensure
if file
file.flock(File::LOCK_UN)
file.close()
end
end
return pcsd_success('PCS configuration saved.')
end

def get_pcs_config(param, request, auth_user)
unless allowed_for_local_cluster(auth_user, Permissions::READ)
return 403, 'Permission denied'
end
out = []
file = nil
begin
file = File.open(PCS_CONFIG, 'r')
file.flock(File::LOCK_SH)
out = file.readlines()
rescue => e
return pcsd_error("Unable to get PCS configuration: #{e}")
ensure
if file
file.flock(File::LOCK_UN)
file.close()
end
end
return [200, out.join('')]
end
2 changes: 2 additions & 0 deletions pcsd/settings.rb.in
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ SBD_CONFIG = '@SBDCONFDIR@/sbd'

BOOTH_CONFIG_DIR='@BOOTHCONFDIR@'

PCS_CONFIG = '@CONF_DIR@/pcs'

SUPERUSER = '@PCMK_USER@'
ADMIN_GROUP = '@PCMK_GROUP@'

Expand Down