diff --git a/ocs_ci/ocs/node.py b/ocs_ci/ocs/node.py index c594472dd711..bc85693e833b 100644 --- a/ocs_ci/ocs/node.py +++ b/ocs_ci/ocs/node.py @@ -2947,3 +2947,64 @@ def verify_crypt_device_present_onnode(node, vol_handle): log.info(f"Crypt device for volume handle {vol_handle} present on the node: {node}") return True + + +def get_worker_node_where_ceph_toolbox_not_running(): + log.info( + "Testing cephtoolbox pod with nodeaffinity in the openshift-storage namespace." + ) + + ct_pod = pod.get_ceph_tools_pod() + # Identify on which node the ceph toolbox is running currently + ct_pod_running_node_name = ct_pod.data["spec"].get("nodeName") + + worker_nodes = get_worker_nodes() + log.info(worker_nodes) + + other_nodes = [node for node in worker_nodes if node != ct_pod_running_node_name] + return other_nodes + + +def apply_node_affinity_for_ceph_toolbox(node_name): + """ + Apply node affinity for ceph toolbox pod. + + Args: + node_name = node name which need to be added in the node affinity + + Returns: + True: if node affinity applied successfully . + False: if node affinity fails + """ + resource_name = constants.DEFAULT_CLUSTERNAME + if config.DEPLOYMENT["external_mode"]: + resource_name = constants.DEFAULT_CLUSTERNAME_EXTERNAL_MODE + + storagecluster_obj = ocp.OCP( + resource_name=resource_name, + namespace=config.ENV_DATA["cluster_namespace"], + kind=constants.STORAGECLUSTER, + ) + nodeaffinity = ( + f'{{"toolbox": {{"nodeAffinity": {{"requiredDuringSchedulingIgnoredDuringExecution": ' + f'{{"nodeSelectorTerms": [{{"matchExpressions": [{{"key": "kubernetes.io/hostname",' + f'"operator": "In",' + f'"values": ["{node_name}"]}}]}}]}}}}}}}}' + ) + param = f'{{"spec": {{"placement": {nodeaffinity}}}}}' + storagecluster_obj.patch(params=param, format_type="merge") + log.info( + f"Successfully applied node affinity for ceph toolbox pod with {node_name}" + ) + + ct_new_pod = pod.get_ceph_tools_pod() + # Identify on which node the ceph toolbox is running after failover due to nodeaffinity + ct_new_pod_running_node_name = ct_new_pod.data["spec"].get("nodeName") + if node_name == ct_new_pod_running_node_name: + log.info( + f"ceph toolbox pod failovered to the new node {ct_new_pod_running_node_name}" + f" given in node affinity successfully " + ) + return True + else: + return False diff --git a/tests/functional/pod_and_daemons/test_cephtoolbox_pod_nodeaffinity.py b/tests/functional/pod_and_daemons/test_cephtoolbox_pod_nodeaffinity.py new file mode 100644 index 000000000000..ff96cf60b850 --- /dev/null +++ b/tests/functional/pod_and_daemons/test_cephtoolbox_pod_nodeaffinity.py @@ -0,0 +1,70 @@ +import logging +import pytest +import time + +from ocs_ci.ocs import node +from ocs_ci.ocs.resources import pod +from ocs_ci.framework.pytest_customization.marks import bugzilla, magenta_squad +from ocs_ci.framework.testlib import tier1 +from ocs_ci.helpers.sanity_helpers import Sanity +from ocs_ci.ocs.node import ( + unschedule_nodes, + drain_nodes, + schedule_nodes, +) + +log = logging.getLogger(__name__) + + +@tier1 +@magenta_squad +@bugzilla("2249640") +class TestCephtoolboxPod: + @pytest.fixture(autouse=True) + def init_sanity(self): + """ + Initialize Sanity instance + + """ + self.sanity_helpers = Sanity() + + def test_node_affinity_to_ceph_toolbox_pod(self): + # This test verifies whether ceph toolbox failovered or not after applying node affinity + other_nodes = node.get_worker_node_where_ceph_toolbox_not_running() + # Apply node affinity with a node name other than currently running node. + assert node.apply_node_affinity_for_ceph_toolbox(other_nodes[0]) + + def test_reboot_node_affinity_node(self): + # This test verifies ceph toolbox runs only on the node given in node-affility. + # Reboot the node after applying node-affinity. + # Expectation is the pod should come up only on that node mentioned in affinity. + + other_nodes = node.get_worker_node_where_ceph_toolbox_not_running() + node.apply_node_affinity_for_ceph_toolbox(other_nodes[0]) + + node_name = other_nodes[0] + + # Unschedule ceph tool box running node. + unschedule_nodes([node_name]) + log.info(f"node {node_name} unscheduled successfully") + + # Drain node operation + drain_nodes([node_name]) + log.info(f"node {node_name} drained successfully") + + # Make the node schedule-able + schedule_nodes([node_name]) + log.info(f"Scheduled the node {node_name}") + log.info( + "Script will sleep for 3 minutes before validating the ceph toolbox running node" + ) + time.sleep(180) + + ct_pod = pod.get_ceph_tools_pod() + # Identify on which node the ceph toolbox is running after node drain + ct_pod_running_node_name = ct_pod.data["spec"].get("nodeName") + if node_name == ct_pod_running_node_name: + log.info( + f"ceph toolbox pod is running only on a node {ct_pod_running_node_name} which is in node-affinity" + ) + assert True