From afd6b4093f80aea6d945944a4274493c4ee789c8 Mon Sep 17 00:00:00 2001 From: vavuthu Date: Wed, 27 Nov 2024 12:09:38 +0530 Subject: [PATCH 01/44] RBD namespace in external cluster Signed-off-by: vavuthu --- conf/README.md | 2 + .../external_rhcs_with_rbd_namespace.yaml | 4 ++ .../helpers/external_cluster_helpers.py | 38 +++++++++++++++++-- ocs_ci/ocs/exceptions.py | 4 ++ 4 files changed, 45 insertions(+), 3 deletions(-) create mode 100644 conf/ocsci/external_rhcs_with_rbd_namespace.yaml diff --git a/conf/README.md b/conf/README.md index daf241d91fe..6f157de3ed7 100644 --- a/conf/README.md +++ b/conf/README.md @@ -416,6 +416,8 @@ Configuration specific to external Ceph cluster * `external_cluster_details` - base64 encoded data of json output from exporter script * `rgw_secure` - boolean parameter which defines if external Ceph cluster RGW is secured using SSL * `rgw_cert_ca` - url pointing to CA certificate used to sign certificate for RGW with SSL +* `use_rbd_namespace` - boolean parameter to use RBD namespace in pool +* `rbd_namespace` - Name of RBD namespace to use in pool ##### login diff --git a/conf/ocsci/external_rhcs_with_rbd_namespace.yaml b/conf/ocsci/external_rhcs_with_rbd_namespace.yaml new file mode 100644 index 00000000000..c88a261137d --- /dev/null +++ b/conf/ocsci/external_rhcs_with_rbd_namespace.yaml @@ -0,0 +1,4 @@ +# Config file to use RBD namespace in a pool +--- +EXTERNAL_MODE: + use_rbd_namespace: True diff --git a/ocs_ci/deployment/helpers/external_cluster_helpers.py b/ocs_ci/deployment/helpers/external_cluster_helpers.py index ebd1370a677..b6dce48f683 100644 --- a/ocs_ci/deployment/helpers/external_cluster_helpers.py +++ b/ocs_ci/deployment/helpers/external_cluster_helpers.py @@ -7,17 +7,19 @@ import logging import re import tempfile +import uuid from ocs_ci.framework import config from ocs_ci.ocs import defaults from ocs_ci.ocs.exceptions import ( + ExternalClusterCephfsMissing, + ExternalClusterCephSSHAuthDetailsMissing, ExternalClusterExporterRunFailed, + ExternalClusterRBDNamespaceCreationFailed, ExternalClusterRGWEndPointMissing, ExternalClusterRGWEndPointPortMissing, - ExternalClusterCephSSHAuthDetailsMissing, - ExternalClusterObjectStoreUserCreationFailed, - ExternalClusterCephfsMissing, ExternalClusterNodeRoleNotFound, + ExternalClusterObjectStoreUserCreationFailed, ) from ocs_ci.ocs.resources import pod from ocs_ci.ocs.resources.csv import get_csv_name_start_with_prefix @@ -132,6 +134,12 @@ def get_external_cluster_details(self): ceph_user = config.EXTERNAL_MODE["run_as_user"] params = f"{params} --run-as-user {ceph_user}" + if config.EXTERNAL_MODE.get("use_rbd_namespace"): + rbd_namespace = config.EXTERNAL_MODE.get( + "rbd_namespace" + ) or self.create_rbd_namespace(rbd=rbd_name) + params = f"{params} --rados-namespace {rbd_namespace}" + out = self.run_exporter_script(params=params) # encode the exporter script output to base64 @@ -475,6 +483,30 @@ def is_rgw_user_exists(self, user): logger.debug(f"RGW users: {rgw_user_list}") return True if user in rgw_user_list else False + def create_rbd_namespace(self, rbd, namespace=None): + """ + Create RBD namespace + + Args: + rbd (str): RBD pool name where namespace has to create + namespace (str): Name of RBD namespace + + Returns: + str: RBD Namepsace name + + Raises: + ExternalClusterRBDNamespaceCreationFailed: In case fails to create RBD namespace + + """ + namespace = namespace or f"rbd_namespace_{uuid.uuid4().hex[:8]}" + logger.info(f"creating RBD namespace {namespace}") + cmd = f"rbd namespace create {rbd}/{namespace}" + retcode, out, err = self.rhcs_conn.exec_cmd(cmd) + if retcode != 0: + logger.error(f"Failed to create RBD namespace in {rbd}. Error: {err}") + raise ExternalClusterRBDNamespaceCreationFailed + return namespace + def generate_exporter_script(): """ diff --git a/ocs_ci/ocs/exceptions.py b/ocs_ci/ocs/exceptions.py index d0c1efe6918..3c2e0b6dcd3 100644 --- a/ocs_ci/ocs/exceptions.py +++ b/ocs_ci/ocs/exceptions.py @@ -288,6 +288,10 @@ class ExternalClusterCephSSHAuthDetailsMissing(Exception): pass +class ExternalClusterRBDNamespaceCreationFailed(Exception): + pass + + class CredReqSecretNotFound(Exception): pass From cd62ebd861ef63099bfbb89a5b1849d0f8cfb352 Mon Sep 17 00:00:00 2001 From: Petr Balogh Date: Wed, 27 Nov 2024 17:16:27 +0100 Subject: [PATCH 02/44] Add new production configuration for job intransit encryption qe-trigger-vsphere-ipi-intransit-encryption-1az-rhcos-vsan-3m-6w Related ticket: https://issues.redhat.com/browse/OCSQE-2963 Signed-off-by: Petr Balogh --- ...az_rhcos_vsan_3m_6w_intransit_encryption.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 conf/deployment/vsphere/ipi_1az_rhcos_vsan_3m_6w_intransit_encryption.yaml diff --git a/conf/deployment/vsphere/ipi_1az_rhcos_vsan_3m_6w_intransit_encryption.yaml b/conf/deployment/vsphere/ipi_1az_rhcos_vsan_3m_6w_intransit_encryption.yaml new file mode 100644 index 00000000000..c4c864311f6 --- /dev/null +++ b/conf/deployment/vsphere/ipi_1az_rhcos_vsan_3m_6w_intransit_encryption.yaml @@ -0,0 +1,16 @@ +--- +# This config is suppose to work on most of DCs we have. +DEPLOYMENT: + allow_lower_instance_requirements: false +ENV_DATA: + platform: 'vsphere' + deployment_type: 'ipi' + worker_replicas: 6 + master_replicas: 3 + master_num_cpus: '16' + master_memory: '65536' + fio_storageutilization_min_mbps: 10.0 + in_transit_encryption: true +REPORTING: + polarion: + deployment_id: 'OCS-4690' From d4731813739c27e85f290730bd32f853777a5db9 Mon Sep 17 00:00:00 2001 From: vavuthu Date: Thu, 28 Nov 2024 11:19:20 +0530 Subject: [PATCH 03/44] remove gather_common_ceph_resources in external mg script Signed-off-by: vavuthu --- scripts/bash/mg_external.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/bash/mg_external.sh b/scripts/bash/mg_external.sh index 968bad726c6..30a18c99c0b 100644 --- a/scripts/bash/mg_external.sh +++ b/scripts/bash/mg_external.sh @@ -40,7 +40,6 @@ if [ -z "$TOOL_POD_NAME" ]; then fi -gather_common_ceph_resources "${BASE_COLLECTION_PATH}" CEPH_COLLECTION_PATH="${BASE_COLLECTION_PATH}/ceph" COMMAND_OUTPUT_DIR=${CEPH_COLLECTION_PATH}/must_gather_commands COMMAND_JSON_OUTPUT_DIR=${CEPH_COLLECTION_PATH}/must_gather_commands_json_output From e1487bb7c5c69eceefee86b094705c16ddb083e6 Mon Sep 17 00:00:00 2001 From: vavuthu Date: Thu, 28 Nov 2024 15:32:24 +0530 Subject: [PATCH 04/44] redirect debugging output to stdout for mg external script Signed-off-by: vavuthu --- ocs_ci/ocs/utils.py | 3 ++- scripts/bash/mg_external.sh | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ocs_ci/ocs/utils.py b/ocs_ci/ocs/utils.py index f67d2701e8b..9212f467f23 100644 --- a/ocs_ci/ocs/utils.py +++ b/ocs_ci/ocs/utils.py @@ -1010,7 +1010,8 @@ def collect_ceph_external(path): current_dir = Path(__file__).parent.parent.parent script_path = os.path.join(current_dir, "scripts", "bash", "mg_external.sh") run_cmd( - f"sh {script_path} {os.path.join(path, 'ceph_external')} {kubeconfig_path}", + f"sh {script_path} {os.path.join(path, 'ceph_external')} {kubeconfig_path} " + f"{ocsci_config.ENV_DATA['cluster_namespace']}", timeout=140, ) except Exception as ex: diff --git a/scripts/bash/mg_external.sh b/scripts/bash/mg_external.sh index 968bad726c6..b5cfec607ce 100644 --- a/scripts/bash/mg_external.sh +++ b/scripts/bash/mg_external.sh @@ -1,4 +1,7 @@ #!/usr/bin/env bash + +# redirect the debugging output to stdout +exec 2>&1 set -x # Function to print usage information From 7d25c9954253a5d77a606b71e8d693ee2a91fa44 Mon Sep 17 00:00:00 2001 From: Nagendra Reddy Date: Mon, 2 Dec 2024 17:38:46 +0530 Subject: [PATCH 05/44] deprecated an amq test (#10965) Signed-off-by: nagendra202 --- .../functional/workloads/app/amq/test_amq_streamer_creation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/functional/workloads/app/amq/test_amq_streamer_creation.py b/tests/functional/workloads/app/amq/test_amq_streamer_creation.py index 35827ea3320..646169969b5 100644 --- a/tests/functional/workloads/app/amq/test_amq_streamer_creation.py +++ b/tests/functional/workloads/app/amq/test_amq_streamer_creation.py @@ -35,7 +35,7 @@ class TestAMQBasics(E2ETest): ) ], ) - def test_install_and_run_amq_benchmark( + def deprecated_test_install_and_run_amq_benchmark( self, interface, test_fixture_amq, From 4588eaca346d780b42d1ffbc845b48f259b68a70 Mon Sep 17 00:00:00 2001 From: Jilju Joy Date: Tue, 3 Dec 2024 12:40:55 +0530 Subject: [PATCH 06/44] Remove unwanted tests that are already disabled (#10950) Signed-off-by: Jilju Joy --- ...ph_daemon_kill_during_resource_creation.py | 351 ---------------- ...ph_daemon_kill_during_resource_deletion.py | 345 ---------------- ...mon_kill_during_pvc_pod_creation_and_io.py | 373 ------------------ 3 files changed, 1069 deletions(-) delete mode 100644 tests/functional/pv/pv_services/test_ceph_daemon_kill_during_resource_creation.py delete mode 100644 tests/functional/pv/pv_services/test_ceph_daemon_kill_during_resource_deletion.py delete mode 100644 tests/functional/pv/pv_services/test_daemon_kill_during_pvc_pod_creation_and_io.py diff --git a/tests/functional/pv/pv_services/test_ceph_daemon_kill_during_resource_creation.py b/tests/functional/pv/pv_services/test_ceph_daemon_kill_during_resource_creation.py deleted file mode 100644 index 4908a53c12e..00000000000 --- a/tests/functional/pv/pv_services/test_ceph_daemon_kill_during_resource_creation.py +++ /dev/null @@ -1,351 +0,0 @@ -import logging -from concurrent.futures import ThreadPoolExecutor -import pytest -from functools import partial - -from ocs_ci.framework.pytest_customization.marks import green_squad -from ocs_ci.framework.testlib import ManageTest -from ocs_ci.framework import config -from ocs_ci.ocs import constants, node -from ocs_ci.ocs.resources.pvc import get_all_pvcs -from ocs_ci.ocs.resources import pod -from ocs_ci.utility.utils import TimeoutSampler, ceph_health_check -from ocs_ci.helpers import helpers, disruption_helpers - -log = logging.getLogger(__name__) - - -@green_squad -@pytest.mark.skip( - reason="This test is disabled because this scenario is covered in the " - "test test_daemon_kill_during_pvc_pod_creation_deletion_and_io.py" -) -@pytest.mark.parametrize( - argnames=["interface", "operation_to_disrupt", "resource_to_delete"], - argvalues=[ - pytest.param( - *[constants.CEPHBLOCKPOOL, "create_pvc", "mgr"], - marks=pytest.mark.polarion_id("OCS-1131"), - ), - pytest.param( - *[constants.CEPHBLOCKPOOL, "create_pod", "mgr"], - marks=pytest.mark.polarion_id("OCS-1130"), - ), - pytest.param( - *[constants.CEPHBLOCKPOOL, "run_io", "mgr"], - marks=pytest.mark.polarion_id("OCS-1132"), - ), - pytest.param( - *[constants.CEPHBLOCKPOOL, "create_pvc", "mon"], - marks=pytest.mark.polarion_id("OCS-1117"), - ), - pytest.param( - *[constants.CEPHBLOCKPOOL, "create_pod", "mon"], - marks=pytest.mark.polarion_id("OCS-1116"), - ), - pytest.param( - *[constants.CEPHBLOCKPOOL, "run_io", "mon"], - marks=pytest.mark.polarion_id("OCS-1118"), - ), - pytest.param( - *[constants.CEPHBLOCKPOOL, "create_pvc", "osd"], - marks=pytest.mark.polarion_id("OCS-1124"), - ), - pytest.param( - *[constants.CEPHBLOCKPOOL, "create_pod", "osd"], - marks=pytest.mark.polarion_id("OCS-1123"), - ), - pytest.param( - *[constants.CEPHBLOCKPOOL, "run_io", "osd"], - marks=pytest.mark.polarion_id("OCS-1125"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "create_pvc", "mgr"], - marks=pytest.mark.polarion_id("OCS-1103"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "create_pod", "mgr"], - marks=pytest.mark.polarion_id("OCS-1102"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "run_io", "mgr"], - marks=pytest.mark.polarion_id("OCS-1106"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "create_pvc", "mon"], - marks=pytest.mark.polarion_id("OCS-1089"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "create_pod", "mon"], - marks=pytest.mark.polarion_id("OCS-1087"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "run_io", "mon"], - marks=pytest.mark.polarion_id("OCS-1092"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "create_pvc", "osd"], - marks=pytest.mark.polarion_id("OCS-1096"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "create_pod", "osd"], - marks=pytest.mark.polarion_id("OCS-1095"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "run_io", "osd"], - marks=pytest.mark.polarion_id("OCS-1099"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "create_pvc", "mds"], - marks=pytest.mark.polarion_id("OCS-1110"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "create_pod", "mds"], - marks=pytest.mark.polarion_id("OCS-1109"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "run_io", "mds"], - marks=pytest.mark.polarion_id("OCS-1113"), - ), - ], -) -class TestDaemonKillDuringResourceCreation(ManageTest): - """ - Base class for ceph daemon kill related disruption tests - """ - - @pytest.fixture(autouse=True) - def setup(self, project_factory): - """ - Create Project for the test - - Returns: - OCP: An OCP instance of project - """ - self.proj_obj = project_factory() - - def test_ceph_daemon_kill_during_resource_creation( - self, - interface, - operation_to_disrupt, - resource_to_delete, - multi_pvc_factory, - pod_factory, - ): - """ - Base function for ceph daemon kill disruptive tests. - Deletion of 'resource_to_delete' daemon will be introduced while - 'operation_to_disrupt' is progressing. - """ - disruption = disruption_helpers.Disruptions() - pod_functions = { - "mds": partial(pod.get_mds_pods), - "mon": partial(pod.get_mon_pods), - "mgr": partial(pod.get_mgr_pods), - "osd": partial(pod.get_osd_pods), - "rbdplugin": partial(pod.get_plugin_pods, interface=interface), - "cephfsplugin": partial(pod.get_plugin_pods, interface=interface), - "cephfsplugin_provisioner": partial(pod.get_cephfsplugin_provisioner_pods), - "rbdplugin_provisioner": partial(pod.get_rbdfsplugin_provisioner_pods), - "operator": partial(pod.get_operator_pods), - } - - # Get number of pods of type 'resource_to_delete' - num_of_resource_to_delete = len(pod_functions[resource_to_delete]()) - - namespace = self.proj_obj.namespace - - # Fetch the number of Pods and PVCs - initial_num_of_pods = len(pod.get_all_pods(namespace=namespace)) - initial_num_of_pvc = len(get_all_pvcs(namespace=namespace)["items"]) - - disruption.set_resource(resource=resource_to_delete) - disruption.select_daemon() - - access_modes = [constants.ACCESS_MODE_RWO] - if interface == constants.CEPHFILESYSTEM: - access_modes.append(constants.ACCESS_MODE_RWX) - num_of_pvc = 8 - access_mode_dist_ratio = [6, 2] - - # Modify access_modes list to create rbd `block` type volume with - # RWX access mode. RWX is not supported in non-block type rbd - if interface == constants.CEPHBLOCKPOOL: - access_modes.extend( - [ - f"{constants.ACCESS_MODE_RWO}-Block", - f"{constants.ACCESS_MODE_RWX}-Block", - ] - ) - num_of_pvc = 9 - access_mode_dist_ratio = [4, 3, 2] - - executor = ThreadPoolExecutor(max_workers=(2 * num_of_pvc)) - - # Start creation of PVCs - bulk_pvc_create = executor.submit( - multi_pvc_factory, - interface=interface, - project=self.proj_obj, - size=8, - access_modes=access_modes, - access_modes_selection="distribute_random", - access_mode_dist_ratio=access_mode_dist_ratio, - status=constants.STATUS_BOUND, - num_of_pvc=num_of_pvc, - wait_each=False, - timeout=90, - ) - - if operation_to_disrupt == "create_pvc": - # Ensure PVCs are being created before deleting the resource - ret = helpers.wait_for_resource_count_change( - get_all_pvcs, initial_num_of_pvc, namespace, "increase" - ) - assert ret, "Wait timeout: PVCs are not being created." - log.info("PVCs creation has started.") - disruption.kill_daemon() - - pvc_objs = bulk_pvc_create.result() - - # Confirm that PVCs are Bound - for pvc_obj in pvc_objs: - helpers.wait_for_resource_state( - resource=pvc_obj, state=constants.STATUS_BOUND, timeout=120 - ) - pvc_obj.reload() - log.info("Verified: PVCs are Bound.") - - # Start creating pods - bulk_pod_create = executor.submit( - helpers.create_pods, - pvc_objs, - pod_factory, - interface, - 2, - nodes=node.get_worker_nodes(), - ) - - if operation_to_disrupt == "create_pod": - # Ensure that pods are being created before deleting the resource - ret = helpers.wait_for_resource_count_change( - pod.get_all_pods, initial_num_of_pods, namespace, "increase" - ) - assert ret, "Wait timeout: Pods are not being created." - log.info("Pods creation has started.") - disruption.kill_daemon() - - pod_objs = bulk_pod_create.result() - - # Verify pods are Running - for pod_obj in pod_objs: - helpers.wait_for_resource_state( - resource=pod_obj, state=constants.STATUS_RUNNING, timeout=180 - ) - pod_obj.reload() - log.info("Verified: All pods are Running.") - - # Do setup on pods for running IO - log.info("Setting up pods for running IO.") - for pod_obj in pod_objs: - pvc_info = pod_obj.pvc.get() - if pvc_info["spec"]["volumeMode"] == "Block": - storage_type = "block" - else: - storage_type = "fs" - executor.submit(pod_obj.workload_setup, storage_type=storage_type) - - # Wait for setup on pods to complete - for pod_obj in pod_objs: - log.info(f"Waiting for IO setup to complete on pod {pod_obj.name}") - for sample in TimeoutSampler(360, 2, getattr, pod_obj, "wl_setup_done"): - if sample: - log.info( - f"Setup for running IO is completed on pod " f"{pod_obj.name}." - ) - break - log.info("Setup for running IO is completed on all pods.") - - # Start IO on each pod - for pod_obj in pod_objs: - pvc_info = pod_obj.pvc.get() - if pvc_info["spec"]["volumeMode"] == "Block": - storage_type = "block" - else: - storage_type = "fs" - pod_obj.run_io( - storage_type=storage_type, - size="2G", - runtime=30, - fio_filename=f"{pod_obj.name}_io_file1", - ) - log.info("FIO started on all pods.") - - if operation_to_disrupt == "run_io": - disruption.kill_daemon() - - log.info("Fetching FIO results.") - for pod_obj in pod_objs: - fio_result = pod_obj.get_fio_results() - err_count = fio_result.get("jobs")[0].get("error") - assert ( - err_count == 0 - ), f"FIO error on pod {pod_obj.name}. FIO result: {fio_result}" - log.info(f"FIO is success on pod {pod_obj.name}") - log.info("Verified FIO result on pods.") - - # Delete pods - for pod_obj in pod_objs: - pod_obj.delete(wait=True) - for pod_obj in pod_objs: - pod_obj.ocp.wait_for_delete(pod_obj.name) - - # Verify that PVCs are reusable by creating new pods - pod_objs = helpers.create_pods( - pvc_objs, pod_factory, interface, 2, nodes=node.get_worker_nodes() - ) - - # Verify new pods are Running - for pod_obj in pod_objs: - helpers.wait_for_resource_state( - resource=pod_obj, state=constants.STATUS_RUNNING - ) - pod_obj.reload() - log.info("Verified: All new pods are Running.") - - # Run IO on each of the new pods - for pod_obj in pod_objs: - pvc_info = pod_obj.pvc.get() - if pvc_info["spec"]["volumeMode"] == "Block": - storage_type = "block" - else: - storage_type = "fs" - pod_obj.run_io( - storage_type=storage_type, - size="1G", - runtime=10, - fio_filename=f"{pod_obj.name}_io_file2", - ) - - log.info("Fetching FIO results from new pods") - for pod_obj in pod_objs: - fio_result = pod_obj.get_fio_results() - err_count = fio_result.get("jobs")[0].get("error") - assert ( - err_count == 0 - ), f"FIO error on pod {pod_obj.name}. FIO result: {fio_result}" - log.info(f"FIO is success on pod {pod_obj.name}") - log.info("Verified FIO result on new pods.") - - # Verify number of pods of type 'resource_to_delete' - final_num_resource_to_delete = len(pod_functions[resource_to_delete]()) - assert final_num_resource_to_delete == num_of_resource_to_delete, ( - f"Total number of {resource_to_delete} pods is not matching with " - f"initial value. Total number of pods before deleting a pod: " - f"{num_of_resource_to_delete}. Total number of pods present now: " - f"{final_num_resource_to_delete}" - ) - - # Check ceph status - ceph_health_check(namespace=config.ENV_DATA["cluster_namespace"]) - log.info("Ceph cluster health is OK") diff --git a/tests/functional/pv/pv_services/test_ceph_daemon_kill_during_resource_deletion.py b/tests/functional/pv/pv_services/test_ceph_daemon_kill_during_resource_deletion.py deleted file mode 100644 index 1b37a481473..00000000000 --- a/tests/functional/pv/pv_services/test_ceph_daemon_kill_during_resource_deletion.py +++ /dev/null @@ -1,345 +0,0 @@ -import logging -from concurrent.futures import ThreadPoolExecutor -import pytest -from functools import partial - -from ocs_ci.framework.pytest_customization.marks import green_squad -from ocs_ci.framework.testlib import ManageTest -from ocs_ci.framework import config -from ocs_ci.ocs import constants -from ocs_ci.ocs.resources.pvc import get_all_pvcs, delete_pvcs -from ocs_ci.ocs.resources.pod import get_all_pods -from ocs_ci.utility.utils import ceph_health_check, run_cmd -from ocs_ci.ocs.resources.pod import ( - get_mds_pods, - get_mon_pods, - get_mgr_pods, - get_osd_pods, - get_plugin_pods, - get_rbdfsplugin_provisioner_pods, - get_cephfsplugin_provisioner_pods, - get_operator_pods, - delete_pods, -) -from ocs_ci.helpers.helpers import ( - verify_volume_deleted_in_backend, - wait_for_resource_count_change, - default_ceph_block_pool, -) -from ocs_ci.helpers import disruption_helpers - -log = logging.getLogger(__name__) - - -@green_squad -@pytest.mark.skip( - reason="This test is disabled because this scenario is covered in the " - "test test_daemon_kill_during_pvc_pod_creation_deletion_and_io.py" -) -@pytest.mark.parametrize( - argnames=["interface", "operation_to_disrupt", "resource_name"], - argvalues=[ - pytest.param( - *[constants.CEPHBLOCKPOOL, "delete_pvcs", "mgr"], - marks=pytest.mark.polarion_id("OCS-1134"), - ), - pytest.param( - *[constants.CEPHBLOCKPOOL, "delete_pods", "mgr"], - marks=pytest.mark.polarion_id("OCS-1133"), - ), - pytest.param( - *[constants.CEPHBLOCKPOOL, "delete_pvcs", "mon"], - marks=pytest.mark.polarion_id("OCS-1120"), - ), - pytest.param( - *[constants.CEPHBLOCKPOOL, "delete_pods", "mon"], - marks=pytest.mark.polarion_id("OCS-1119"), - ), - pytest.param( - *[constants.CEPHBLOCKPOOL, "delete_pvcs", "osd"], - marks=pytest.mark.polarion_id("OCS-1127"), - ), - pytest.param( - *[constants.CEPHBLOCKPOOL, "delete_pods", "osd"], - marks=pytest.mark.polarion_id("OCS-1126"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "delete_pvcs", "mgr"], - marks=pytest.mark.polarion_id("OCS-1105"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "delete_pods", "mgr"], - marks=pytest.mark.polarion_id("OCS-1104"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "delete_pvcs", "mon"], - marks=pytest.mark.polarion_id("OCS-1091"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "delete_pods", "mon"], - marks=pytest.mark.polarion_id("OCS-1090"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "delete_pvcs", "osd"], - marks=pytest.mark.polarion_id("OCS-1098"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "delete_pods", "osd"], - marks=pytest.mark.polarion_id("OCS-1097"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "delete_pvcs", "mds"], - marks=pytest.mark.polarion_id("OCS-1112"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "delete_pods", "mds"], - marks=pytest.mark.polarion_id("OCS-1111"), - ), - ], -) -class TestDaemonKillDuringPodPvcDeletion(ManageTest): - """ - Delete ceph daemon while deletion of PVCs/pods is progressing - """ - - num_of_pvcs = 12 - pvc_size = 3 - - @pytest.fixture() - def setup_base(self, interface, multi_pvc_factory, pod_factory): - """ - Create PVCs and pods - """ - access_modes = [constants.ACCESS_MODE_RWO] - if interface == constants.CEPHFILESYSTEM: - access_modes.append(constants.ACCESS_MODE_RWX) - - # Modify access_modes list to create rbd `block` type volume with - # RWX access mode. RWX is not supported in filesystem type rbd - if interface == constants.CEPHBLOCKPOOL: - access_modes.extend( - [ - f"{constants.ACCESS_MODE_RWO}-Block", - f"{constants.ACCESS_MODE_RWX}-Block", - ] - ) - - pvc_objs = multi_pvc_factory( - interface=interface, - project=None, - storageclass=None, - size=self.pvc_size, - access_modes=access_modes, - status=constants.STATUS_BOUND, - num_of_pvc=self.num_of_pvcs, - wait_each=False, - ) - - pod_objs = [] - - # Create one pod using each RWO PVC and two pods using each RWX PVC - for pvc_obj in pvc_objs: - pvc_info = pvc_obj.get() - if pvc_info["spec"]["volumeMode"] == "Block": - pod_dict = constants.CSI_RBD_RAW_BLOCK_POD_YAML - raw_block_pv = True - else: - raw_block_pv = False - pod_dict = "" - if pvc_obj.access_mode == constants.ACCESS_MODE_RWX: - pod_obj = pod_factory( - interface=interface, - pvc=pvc_obj, - status=constants.STATUS_RUNNING, - pod_dict_path=pod_dict, - raw_block_pv=raw_block_pv, - ) - pod_objs.append(pod_obj) - pod_obj = pod_factory( - interface=interface, - pvc=pvc_obj, - status=constants.STATUS_RUNNING, - pod_dict_path=pod_dict, - raw_block_pv=raw_block_pv, - ) - pod_objs.append(pod_obj) - - log.info(f"Created {len(pod_objs)} pods.") - return pvc_objs, pod_objs - - def test_ceph_daemon_kill_during_pod_pvc_deletion( - self, interface, operation_to_disrupt, resource_name, setup_base - ): - """ - Kill 'resource_name' daemon while deletion of PVCs/pods is progressing - """ - pvc_objs, self.pod_objs = setup_base - self.namespace = pvc_objs[0].project.namespace - pod_functions = { - "mds": partial(get_mds_pods), - "mon": partial(get_mon_pods), - "mgr": partial(get_mgr_pods), - "osd": partial(get_osd_pods), - "rbdplugin": partial(get_plugin_pods, interface=interface), - "cephfsplugin": partial(get_plugin_pods, interface=interface), - "cephfsplugin_provisioner": partial(get_cephfsplugin_provisioner_pods), - "rbdplugin_provisioner": partial(get_rbdfsplugin_provisioner_pods), - "operator": partial(get_operator_pods), - } - disruption = disruption_helpers.Disruptions() - disruption.set_resource(resource=resource_name) - executor = ThreadPoolExecutor(max_workers=1) - - # Get number of pods of type 'resource_name' - num_of_resource_pods = len(pod_functions[resource_name]()) - - # Fetch the number of Pods and PVCs - initial_num_of_pods = len(get_all_pods(namespace=self.namespace)) - initial_num_of_pvc = len(get_all_pvcs(namespace=self.namespace)["items"]) - - # Fetch PV names - pv_objs = [] - for pvc_obj in pvc_objs: - pvc_obj.reload() - pv_objs.append(pvc_obj.backed_pv_obj) - - # Fetch volume details from pods for the purpose of verification - node_pv_dict = {} - for pod_obj in self.pod_objs: - pod_info = pod_obj.get() - node = pod_info["spec"]["nodeName"] - pvc = pod_info["spec"]["volumes"][0]["persistentVolumeClaim"]["claimName"] - for pvc_obj in pvc_objs: - if pvc_obj.name == pvc: - pvc_obj.reload() - pv = pvc_obj.backed_pv - break - if node in node_pv_dict: - node_pv_dict[node].append(pv) - else: - node_pv_dict[node] = [pv] - - # Do setup for running IO on pods - log.info("Setting up pods for running IO") - for pod_obj in self.pod_objs: - pvc_info = pod_obj.pvc.get() - if pvc_info["spec"]["volumeMode"] == "Block": - pod_obj.pvc.storage_type = "block" - else: - pod_obj.pvc.storage_type = "fs" - pod_obj.workload_setup(storage_type=pod_obj.pvc.storage_type) - log.info("Setup for running IO is completed on pods") - - # Start IO on each pod. RWX PVC will be used on two pods. So split the - # size accordingly - log.info("Starting IO on pods") - for pod_obj in self.pod_objs: - if pod_obj.pvc.access_mode == constants.ACCESS_MODE_RWX: - io_size = int((self.pvc_size - 1) / 2) - else: - io_size = self.pvc_size - 1 - pod_obj.run_io( - storage_type=pod_obj.pvc.storage_type, - size=f"{io_size}G", - fio_filename=f"{pod_obj.name}_io", - end_fsync=1, - ) - log.info("IO started on all pods.") - - # Set the daemon to be killed - disruption.select_daemon() - - # Start deleting pods - pod_bulk_delete = executor.submit(delete_pods, self.pod_objs, wait=False) - - if operation_to_disrupt == "delete_pods": - ret = wait_for_resource_count_change( - get_all_pods, initial_num_of_pods, self.namespace, "decrease", 1, 60 - ) - assert ret, "Wait timeout: Pods are not being deleted." - log.info("Pods deletion has started.") - disruption.kill_daemon() - - pod_bulk_delete.result() - - # Verify pods are deleted - for pod_obj in self.pod_objs: - assert pod_obj.ocp.wait_for_delete( - pod_obj.name, 180 - ), f"Pod {pod_obj.name} is not deleted" - log.info("Verified: Pods are deleted.") - - # Verify that the mount point is removed from nodes after deleting pod - for node, pvs in node_pv_dict.items(): - cmd = f"oc debug nodes/{node} --to-namespace={config.ENV_DATA['cluster_namespace']} -- df" - df_on_node = run_cmd(cmd) - for pv in pvs: - assert pv not in df_on_node, ( - f"{pv} is still present on node {node} after " f"deleting the pods." - ) - log.info( - "Verified: mount points are removed from nodes after deleting " "the pods." - ) - - # Fetch image uuid associated with PVCs - pvc_uuid_map = {} - for pvc_obj in pvc_objs: - pvc_uuid_map[pvc_obj.name] = pvc_obj.image_uuid - log.info("Fetched image uuid associated with each PVC") - - # Start deleting PVCs - pvc_bulk_delete = executor.submit(delete_pvcs, pvc_objs) - - if operation_to_disrupt == "delete_pvcs": - ret = wait_for_resource_count_change( - get_all_pvcs, initial_num_of_pvc, self.namespace, "decrease" - ) - assert ret, "Wait timeout: PVCs are not being deleted." - log.info("PVCs deletion has started.") - disruption.kill_daemon() - - pvcs_deleted = pvc_bulk_delete.result() - - assert pvcs_deleted, "Deletion of PVCs failed." - - # Verify PVCs are deleted - for pvc_obj in pvc_objs: - assert pvc_obj.ocp.wait_for_delete( - pvc_obj.name - ), f"PVC {pvc_obj.name} is not deleted" - log.info("Verified: PVCs are deleted.") - - # Verify PVs are deleted - for pv_obj in pv_objs: - assert pv_obj.ocp.wait_for_delete( - pv_obj.name, 120 - ), f"PV {pv_obj.name} is not deleted" - log.info("Verified: PVs are deleted.") - - # Verify PV using ceph toolbox. Image/Subvolume should be deleted. - pool_name = default_ceph_block_pool() - for pvc_name, uuid in pvc_uuid_map.items(): - if interface == constants.CEPHBLOCKPOOL: - ret = verify_volume_deleted_in_backend( - interface=interface, image_uuid=uuid, pool_name=pool_name - ) - if interface == constants.CEPHFILESYSTEM: - ret = verify_volume_deleted_in_backend( - interface=interface, image_uuid=uuid - ) - assert ret, ( - f"Volume associated with PVC {pvc_name} still exists " f"in backend" - ) - - # Verify number of pods of type 'resource_name' - final_num_of_resource_pods = len(pod_functions[resource_name]()) - assert final_num_of_resource_pods == num_of_resource_pods, ( - f"Total number of {resource_name} pods is not matching with " - f"initial value. Total number of pods before daemon kill: " - f"{num_of_resource_pods}. Total number of pods present now: " - f"{final_num_of_resource_pods}" - ) - - # Check ceph status - ceph_health_check(namespace=config.ENV_DATA["cluster_namespace"]) - log.info("Ceph cluster health is OK") diff --git a/tests/functional/pv/pv_services/test_daemon_kill_during_pvc_pod_creation_and_io.py b/tests/functional/pv/pv_services/test_daemon_kill_during_pvc_pod_creation_and_io.py deleted file mode 100644 index c10a1c2277f..00000000000 --- a/tests/functional/pv/pv_services/test_daemon_kill_during_pvc_pod_creation_and_io.py +++ /dev/null @@ -1,373 +0,0 @@ -import logging -from concurrent.futures import ThreadPoolExecutor -import pytest -from functools import partial - -from ocs_ci.framework.pytest_customization.marks import green_squad -from ocs_ci.framework.testlib import ManageTest -from ocs_ci.ocs import constants -from ocs_ci.ocs.resources.pod import ( - get_mds_pods, - get_mon_pods, - get_mgr_pods, - get_osd_pods, - get_plugin_pods, - get_rbdfsplugin_provisioner_pods, - get_cephfsplugin_provisioner_pods, - get_operator_pods, -) -from ocs_ci.utility.utils import TimeoutSampler -from ocs_ci.helpers import helpers, disruption_helpers - -log = logging.getLogger(__name__) - - -@green_squad -@pytest.mark.skip( - reason="This test is disabled because this scenario is covered in the " - "test test_daemon_kill_during_pvc_pod_creation_deletion_and_io.py" -) -@pytest.mark.parametrize( - argnames=["interface", "resource_name"], - argvalues=[ - pytest.param( - *[constants.CEPHBLOCKPOOL, "mgr"], marks=pytest.mark.polarion_id("OCS-1135") - ), - pytest.param( - *[constants.CEPHBLOCKPOOL, "mon"], marks=pytest.mark.polarion_id("OCS-1121") - ), - pytest.param( - *[constants.CEPHBLOCKPOOL, "osd"], marks=pytest.mark.polarion_id("OCS-1128") - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "mgr"], - marks=pytest.mark.polarion_id("OCS-1107"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "mon"], - marks=pytest.mark.polarion_id("OCS-1094"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "osd"], - marks=pytest.mark.polarion_id("OCS-1100"), - ), - pytest.param( - *[constants.CEPHFILESYSTEM, "mds"], - marks=pytest.mark.polarion_id("OCS-1114"), - ), - ], -) -class TestDaemonKillDuringCreationOperations(ManageTest): - """ - This class consists of tests which verifies ceph daemon kill during - multiple operations - pods creation, PVC creation and IO - """ - - num_of_pvcs = 6 - pvc_size = 5 - - @pytest.fixture() - def setup(self, interface, multi_pvc_factory, pod_factory): - """ - Create PVCs and pods - """ - access_modes = [constants.ACCESS_MODE_RWO] - if interface == constants.CEPHFILESYSTEM: - access_modes.append(constants.ACCESS_MODE_RWX) - - # Modify access_modes list to create rbd `block` type volume with - # RWX access mode. RWX is not supported in filesystem type rbd - if interface == constants.CEPHBLOCKPOOL: - access_modes.extend( - [ - f"{constants.ACCESS_MODE_RWO}-Block", - f"{constants.ACCESS_MODE_RWX}-Block", - ] - ) - - pvc_objs = multi_pvc_factory( - interface=interface, - project=None, - storageclass=None, - size=self.pvc_size, - access_modes=access_modes, - status=constants.STATUS_BOUND, - num_of_pvc=self.num_of_pvcs, - wait_each=False, - ) - - # Set volume mode on PVC objects - for pvc_obj in pvc_objs: - pvc_info = pvc_obj.get() - setattr(pvc_obj, "volume_mode", pvc_info["spec"]["volumeMode"]) - - rwo_pvcs = [ - pvc_obj - for pvc_obj in pvc_objs - if (pvc_obj.access_mode == constants.ACCESS_MODE_RWO) - ] - rwx_pvcs = [ - pvc_obj - for pvc_obj in pvc_objs - if (pvc_obj.access_mode == constants.ACCESS_MODE_RWX) - ] - - num_of_rwo_pvc = len(rwo_pvcs) - num_of_rwx_pvc = len(rwx_pvcs) - - block_rwo_pvcs = [] - for pvc_obj in rwo_pvcs[:]: - if pvc_obj.volume_mode == "Block": - block_rwo_pvcs.append(pvc_obj) - rwo_pvcs.remove(pvc_obj) - - log.info( - f"Created {num_of_rwo_pvc} RWO PVCs in which " - f"{len(block_rwo_pvcs)} are rbd block type." - ) - log.info(f"Created {num_of_rwx_pvc} RWX PVCs.") - - # Select 3 PVCs for IO pods and the remaining PVCs to create new pods - if block_rwo_pvcs: - pvc_objs_for_io_pods = rwo_pvcs[0:1] + rwx_pvcs[0:1] + block_rwo_pvcs[0:1] - pvc_objs_new_pods = rwo_pvcs[1:] + rwx_pvcs[1:] + block_rwo_pvcs[1:] - else: - pvc_objs_for_io_pods = rwo_pvcs[0:2] + rwx_pvcs[0:1] - pvc_objs_new_pods = rwo_pvcs[2:] + rwx_pvcs[1:] - - # Create one pod using each RWO PVC and two pods using each RWX PVC - # for running IO - io_pods = helpers.create_pods(pvc_objs_for_io_pods, pod_factory, interface, 2) - - # Wait for pods to be in Running state - for pod_obj in io_pods: - helpers.wait_for_resource_state( - resource=pod_obj, state=constants.STATUS_RUNNING, timeout=90 - ) - pod_obj.reload() - log.info(f"Created {len(io_pods)} pods for running IO.") - - return pvc_objs, io_pods, pvc_objs_new_pods, access_modes - - def test_daemon_kill_during_pvc_pod_creation_and_io( - self, interface, resource_name, setup, multi_pvc_factory, pod_factory - ): - """ - Kill 'resource_name' daemon while PVCs creation, pods - creation and IO operation are progressing. - """ - num_of_new_pvcs = 5 - pvc_objs, io_pods, pvc_objs_new_pods, access_modes = setup - proj_obj = pvc_objs[0].project - storageclass = pvc_objs[0].storageclass - - pod_functions = { - "mds": partial(get_mds_pods), - "mon": partial(get_mon_pods), - "mgr": partial(get_mgr_pods), - "osd": partial(get_osd_pods), - "rbdplugin": partial(get_plugin_pods, interface=interface), - "cephfsplugin": partial(get_plugin_pods, interface=interface), - "cephfsplugin_provisioner": partial(get_cephfsplugin_provisioner_pods), - "rbdplugin_provisioner": partial(get_rbdfsplugin_provisioner_pods), - "operator": partial(get_operator_pods), - } - - executor = ThreadPoolExecutor(max_workers=len(io_pods)) - - disruption = disruption_helpers.Disruptions() - disruption.set_resource(resource=resource_name) - - # Get number of pods of type 'resource_name' - resource_pods_num = len(pod_functions[resource_name]()) - - # Do setup for running IO on pods - log.info("Setting up pods for running IO") - for pod_obj in io_pods: - if pod_obj.pvc.volume_mode == "Block": - storage_type = "block" - else: - storage_type = "fs" - executor.submit(pod_obj.workload_setup, storage_type=storage_type) - - # Wait for setup on pods to complete - for pod_obj in io_pods: - log.info(f"Waiting for IO setup to complete on pod {pod_obj.name}") - for sample in TimeoutSampler(360, 2, getattr, pod_obj, "wl_setup_done"): - if sample: - log.info( - f"Setup for running IO is completed on pod " f"{pod_obj.name}." - ) - break - log.info("Setup for running IO is completed on pods") - - # Set daemon to be killed - disruption.select_daemon() - - # Start creating new pods - log.info("Start creating new pods.") - bulk_pod_create = executor.submit( - helpers.create_pods, pvc_objs_new_pods, pod_factory, interface, 2 - ) - - # Start creation of new PVCs - log.info("Start creating new PVCs.") - bulk_pvc_create = executor.submit( - multi_pvc_factory, - interface=interface, - project=proj_obj, - storageclass=storageclass, - size=self.pvc_size, - access_modes=access_modes, - access_modes_selection="distribute_random", - status="", - num_of_pvc=num_of_new_pvcs, - wait_each=False, - ) - - # Start IO on each pod - log.info("Start IO on pods") - for pod_obj in io_pods: - if pod_obj.pvc.volume_mode == "Block": - storage_type = "block" - else: - storage_type = "fs" - pod_obj.run_io( - storage_type=storage_type, - size="1G", - runtime=10, - fio_filename=f"{pod_obj.name}_io_file1", - ) - log.info("IO started on all pods.") - - # Kill daemon - disruption.kill_daemon() - - # Getting result of PVC creation as list of PVC objects - pvc_objs_new = bulk_pvc_create.result() - - # Confirm PVCs are Bound - for pvc_obj in pvc_objs_new: - helpers.wait_for_resource_state( - resource=pvc_obj, state=constants.STATUS_BOUND, timeout=180 - ) - pvc_obj.reload() - log.info("Verified: New PVCs are Bound.") - - # Getting result of pods creation as list of Pod objects - pod_objs_new = bulk_pod_create.result() - - # Verify new pods are Running - for pod_obj in pod_objs_new: - helpers.wait_for_resource_state( - resource=pod_obj, state=constants.STATUS_RUNNING, timeout=90 - ) - pod_obj.reload() - log.info("Verified: All new pods are Running.") - - # Verify IO - log.info("Fetching IO results from IO pods.") - for pod_obj in io_pods: - fio_result = pod_obj.get_fio_results() - err_count = fio_result.get("jobs")[0].get("error") - assert ( - err_count == 0 - ), f"FIO error on pod {pod_obj.name}. FIO result: {fio_result}" - log.info(f"IOPs after FIO on pod {pod_obj.name}:") - log.info(f"Read: {fio_result.get('jobs')[0].get('read').get('iops')}") - log.info(f"Write: {fio_result.get('jobs')[0].get('write').get('iops')}") - log.info("Verified IO result on IO pods.") - - all_pod_objs = io_pods + pod_objs_new - - # Fetch volume details from pods for the purpose of verification - node_pv_dict = {} - for pod in all_pod_objs: - pod_info = pod.get() - node = pod_info["spec"]["nodeName"] - pvc = pod_info["spec"]["volumes"][0]["persistentVolumeClaim"]["claimName"] - for pvc_obj in pvc_objs: - if pvc_obj.name == pvc: - pvc_obj.reload() - pv = pvc_obj.backed_pv - break - if node in node_pv_dict: - node_pv_dict[node].append(pv) - else: - node_pv_dict[node] = [pv] - - # Delete pods - for pod_obj in all_pod_objs: - pod_obj.delete(wait=False) - - # Verify pods are deleted - for pod_obj in all_pod_objs: - pod_obj.ocp.wait_for_delete(resource_name=pod_obj.name) - - # Verify number of 'resource_name' type pods - final_resource_pods_num = len(pod_functions[resource_name]()) - assert final_resource_pods_num == resource_pods_num, ( - f"Total number of {resource_name} pods is not matching with " - f"initial value. Total number of pods before daemon kill: " - f"{resource_pods_num}. Total number of pods present now: " - f"{final_resource_pods_num}" - ) - - # Verify volumes are unmapped from nodes after deleting the pods - node_pv_mounted = helpers.verify_pv_mounted_on_node(node_pv_dict) - for node, pvs in node_pv_mounted.items(): - assert not pvs, ( - f"PVs {pvs} is still present on node {node} after " - f"deleting the pods." - ) - log.info( - "Verified: mount points are removed from nodes after deleting " "the pods" - ) - - # Set volume mode on PVC objects - for pvc_obj in pvc_objs_new: - pvc_info = pvc_obj.get() - setattr(pvc_obj, "volume_mode", pvc_info["spec"]["volumeMode"]) - - # Verify that PVCs are reusable by creating new pods - all_pvc_objs = pvc_objs + pvc_objs_new - pod_objs_re = helpers.create_pods(all_pvc_objs, pod_factory, interface, 2) - - # Verify pods are Running - for pod_obj in pod_objs_re: - helpers.wait_for_resource_state( - resource=pod_obj, state=constants.STATUS_RUNNING, timeout=90 - ) - pod_obj.reload() - log.info("Successfully created new pods using all PVCs.") - - # Select pods from newly created pods list to run IO - pod_objs_re_io = [ - pod_obj - for pod_obj in pod_objs_re - if pod_obj.pvc - in helpers.select_unique_pvcs([pod_obj.pvc for pod_obj in pod_objs_re]) - ] - for pod_obj in pod_objs_re_io: - if pod_obj.pvc.volume_mode == "Block": - storage_type = "block" - else: - storage_type = "fs" - pod_obj.run_io( - storage_type=storage_type, - size="1G", - runtime=10, - fio_filename=f"{pod_obj.name}_io_file2", - ) - - log.info("Fetching IO results from newly created pods") - for pod_obj in pod_objs_re_io: - fio_result = pod_obj.get_fio_results() - err_count = fio_result.get("jobs")[0].get("error") - assert ( - err_count == 0 - ), f"FIO error on pod {pod_obj.name}. FIO result: {fio_result}" - log.info(f"IOPs after FIO on pod {pod_obj.name}:") - log.info(f"Read: {fio_result.get('jobs')[0].get('read').get('iops')}") - log.info(f"Write: {fio_result.get('jobs')[0].get('write').get('iops')}") - log.info("Verified IO result on newly created pods.") From ad5bff4992ad3b115d8a9d4beb8d2421116d05cc Mon Sep 17 00:00:00 2001 From: Sagi Hirshfeld Date: Tue, 3 Dec 2024 10:08:09 +0200 Subject: [PATCH 07/44] Set the namespace of the NSFS interface deployment dynamically (#10912) Signed-off-by: Sagi Hirshfeld --- tests/conftest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index da8fc5f43d5..6bbc7b5ec83 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5698,6 +5698,9 @@ def nsfs_interface_deployment_factory(pvc_name, pvc_mount_path="/nsfs"): nsfs_deployment_data["metadata"]["name"] = create_unique_resource_name( "nsfs-interface", "deployment" ) + nsfs_deployment_data["metadata"]["namespace"] = ocsci_config.ENV_DATA[ + "cluster_namespace" + ] uid = nsfs_deployment_data["metadata"]["name"].split("-")[-1] nsfs_deployment_data["spec"]["selector"]["matchLabels"]["app"] += f"-{uid}" nsfs_deployment_data["spec"]["template"]["metadata"]["labels"][ From 2aa4ab304f0d2cf5226d38f10dc85ecab195407c Mon Sep 17 00:00:00 2001 From: Itzhak Kave Date: Tue, 3 Dec 2024 11:43:11 +0200 Subject: [PATCH 08/44] Wait for the ceph command to execute successfully in the rolling nodes terminate and rolling nodes restart tests (#10915) Signed-off-by: Itzhak Kave Co-authored-by: Itzhak Kave --- ocs_ci/ocs/resources/pod.py | 59 ++++++++++++++++++- .../z_cluster/nodes/test_nodes_restart.py | 1 + .../test_rolling_terminate_and_recovery.py | 3 + 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/ocs_ci/ocs/resources/pod.py b/ocs_ci/ocs/resources/pod.py index ddbc2c5e6f3..69e4f626781 100644 --- a/ocs_ci/ocs/resources/pod.py +++ b/ocs_ci/ocs/resources/pod.py @@ -757,7 +757,9 @@ def get_all_pods( return pod_objs -def get_ceph_tools_pod(skip_creating_pod=False, wait=False, namespace=None): +def get_ceph_tools_pod( + skip_creating_pod=False, wait=False, namespace=None, get_running_pods=True +): """ Get the Ceph tools pod @@ -766,6 +768,8 @@ def get_ceph_tools_pod(skip_creating_pod=False, wait=False, namespace=None): if it doesn't exist wait (bool): True if you want to wait for the tool pods to be Running namespace: Namespace of OCS + get_running_pods (bool): If True, get only the ceph tool pods in a Running status. + If False, get the ceph tool pods even if they are not in a Running status. Returns: Pod object: The Ceph tools pod object @@ -839,6 +843,10 @@ def _get_tools_pod_objs(): if not ct_pod_items: raise CephToolBoxNotFoundException + if not get_running_pods: + # Return the ceph tool pod objects even if they are not running + return ct_pod_items + # In the case of node failure, the CT pod will be recreated with the old # one in status Terminated. Therefore, need to filter out the Terminated pod @@ -3580,12 +3588,13 @@ def restart_pods_in_statuses( logger.info("Finish restarting the pods") -def wait_for_ceph_cmd_execute_successfully(timeout=300): +def base_wait_for_ceph_cmd_execute_successfully(timeout=300, sleep=20): """ Wait for a Ceph command to execute successfully Args: timeout (int): The time to wait for a Ceph command to execute successfully + sleep (int): Time to sleep between the iterations Returns: bool: True, if the Ceph command executed successfully. False, otherwise @@ -3593,7 +3602,7 @@ def wait_for_ceph_cmd_execute_successfully(timeout=300): """ try: for res in TimeoutSampler( - timeout=timeout, sleep=10, func=check_ceph_cmd_execute_successfully + timeout=timeout, sleep=sleep, func=check_ceph_cmd_execute_successfully ): if res: return True @@ -3957,3 +3966,47 @@ def get_prometheus_pods( pods_with_label_match = get_pods_having_label(prometheus_label, namespace) prometheus_pod_objs = [Pod(**prometheus) for prometheus in pods_with_label_match] return prometheus_pod_objs + + +def wait_for_ceph_cmd_execute_successfully( + timeout=300, sleep=20, num_of_retries=1, restart_tool_pod_before_retry=True +): + """ + Wait for the Ceph command to execute successfully in the given timeout and number of retries. + For, example, if the timeout is 300 and 'num_of_retries' is 2, we will wait 600 seconds + for the ceph command to execute successfully. + + Args: + timeout (int): The time to wait for a Ceph command to execute successfully + sleep (int): Time to sleep between the iterations + num_of_retries (int): The number of retries to wait for the Ceph command to execute successfully. + restart_tool_pod_before_retry (bool): If True, restart the rook-ceph-tool pod before the next retry. + False, otherwise. + + Returns: + bool: True, if the Ceph command executed successfully. False, otherwise + + """ + logger.info("Wait for the ceph command to execute successfully") + + for num_of_retry in range(num_of_retries): + logger.info(f"num of retries = {num_of_retry}") + res = base_wait_for_ceph_cmd_execute_successfully(timeout=timeout, sleep=sleep) + if res: + return True + if num_of_retry < 1: + # Continue to the next iteration if we didn't reach the first retry + continue + + if restart_tool_pod_before_retry: + try: + logger.info("Trying to restart the rook-ceph-tool pods...") + ceph_tool_pod = get_ceph_tools_pod(get_running_pods=False) + delete_pods([ceph_tool_pod], wait=False) + except CommandFailed as ex: + logger.warning(ex) + + logger.warning( + f"The ceph command failed to execute successfully after {num_of_retries} retries" + ) + return False diff --git a/tests/functional/z_cluster/nodes/test_nodes_restart.py b/tests/functional/z_cluster/nodes/test_nodes_restart.py index 5eb6f0b24b3..85164c6716f 100644 --- a/tests/functional/z_cluster/nodes/test_nodes_restart.py +++ b/tests/functional/z_cluster/nodes/test_nodes_restart.py @@ -112,6 +112,7 @@ def test_rolling_nodes_restart( ocp_nodes = get_node_objs() for node in ocp_nodes: nodes.restart_nodes(nodes=[node], wait=False) + pod.wait_for_ceph_cmd_execute_successfully(timeout=420, num_of_retries=2) self.sanity_helpers.health_check(cluster_check=False, tries=60) retry(CommandFailed, tries=8, delay=40, backoff=1)( diff --git a/tests/functional/z_cluster/nodes/test_rolling_terminate_and_recovery.py b/tests/functional/z_cluster/nodes/test_rolling_terminate_and_recovery.py index c3bd13389a1..f3f49c678ee 100644 --- a/tests/functional/z_cluster/nodes/test_rolling_terminate_and_recovery.py +++ b/tests/functional/z_cluster/nodes/test_rolling_terminate_and_recovery.py @@ -29,6 +29,7 @@ ) from ocs_ci.ocs.resources.pod import ( check_pods_after_node_replacement, + wait_for_ceph_cmd_execute_successfully, ) from ocs_ci.helpers.sanity_helpers import SanityManagedService, Sanity from ocs_ci.ocs.cluster import ( @@ -154,6 +155,8 @@ def rolling_terminate_and_recovery_of_ocs_worker_nodes(self, nodes): label_nodes([new_ocs_node]) log.info(f"The new ocs node is: {new_ocs_node.name}") + log.info("Wait for the Ceph health command to execute successfully") + wait_for_ceph_cmd_execute_successfully(timeout=420, num_of_retries=2) log.info("Waiting for all the pods to be running") assert check_pods_after_node_replacement(), "Not all the pods are running" From ca27c38144e49c9ad98c539744c6c25e09940b16 Mon Sep 17 00:00:00 2001 From: Filip Balak Date: Tue, 3 Dec 2024 10:44:33 +0100 Subject: [PATCH 09/44] Update strategy for ODF upgrade testing (#10840) * Update strategy before ODF upgrade Signed-off-by: fbalak * fix tox Signed-off-by: fbalak * track daemonset count during upgrade Signed-off-by: fbalak * tox Signed-off-by: fbalak * add test_update_strategy_config_change test Signed-off-by: fbalak * update test_update_strategy_config_change test Signed-off-by: fbalak * update test_update_strategy_config_change test Signed-off-by: fbalak * update test_update_strategy_config_change test Signed-off-by: fbalak * update test_update_strategy_config_change test Signed-off-by: fbalak * update test owner decorator Signed-off-by: fbalak * update test_update_strategy_config_change test Signed-off-by: fbalak * update test_update_strategy_config_change test Signed-off-by: fbalak * add polarion markers Signed-off-by: fbalak * move rook_operator_configmap_cleanup to conftest Signed-off-by: fbalak * fix teardown Signed-off-by: fbalak * fix tox Signed-off-by: fbalak * fix tox Signed-off-by: fbalak * extend measurement part, fix key error Signed-off-by: fbalak * address Elena's comment Signed-off-by: fbalak --------- Signed-off-by: fbalak --- ocs_ci/framework/conf/default_config.yaml | 4 + ocs_ci/ocs/ocs_upgrade.py | 95 ++++++++++++++++++- ocs_ci/ocs/resources/daemonset.py | 43 +++++++++ tests/functional/upgrade/conftest.py | 72 ++++++++++++++ .../functional/upgrade/test_configuration.py | 80 ++++++++++++++++ tests/functional/upgrade/test_upgrade.py | 20 ++-- 6 files changed, 305 insertions(+), 9 deletions(-) create mode 100644 ocs_ci/ocs/resources/daemonset.py diff --git a/ocs_ci/framework/conf/default_config.yaml b/ocs_ci/framework/conf/default_config.yaml index c9f09f01196..e988c7ff40d 100644 --- a/ocs_ci/framework/conf/default_config.yaml +++ b/ocs_ci/framework/conf/default_config.yaml @@ -288,6 +288,10 @@ UPGRADE: ocp_upgrade_path: "quay.io/openshift-release-dev/ocp-release" ocp_arch: "x86_64" upgrade_logging_channel: "4.18" + # None value means that value in Rook operator config is used. + # Otherwise it is changed to the provided value before ODF upgrade. + csi_rbd_plugin_update_strategy_max_unavailable: null + csi_cephfs_plugin_update_strategy_max_unavailable: null # This section stores secret and uploaded from home dir or s3 # for entry into this section, please email ecosystem team diff --git a/ocs_ci/ocs/ocs_upgrade.py b/ocs_ci/ocs/ocs_upgrade.py index 8108c048417..3e30b6ea063 100644 --- a/ocs_ci/ocs/ocs_upgrade.py +++ b/ocs_ci/ocs/ocs_upgrade.py @@ -32,6 +32,7 @@ from ocs_ci.ocs.node import get_nodes from ocs_ci.ocs.resources.catalog_source import CatalogSource, disable_specific_source from ocs_ci.ocs.resources.csv import CSV, check_all_csvs_are_succeeded +from ocs_ci.ocs.resources.daemonset import DaemonSet from ocs_ci.ocs.resources.install_plan import wait_for_install_plan_and_approve from ocs_ci.ocs.resources.pod import get_noobaa_pods, verify_pods_upgraded from ocs_ci.ocs.resources.packagemanifest import ( @@ -573,6 +574,7 @@ def set_upgrade_images(self): def run_ocs_upgrade( operation=None, + upgrade_stats=None, *operation_args, **operation_kwargs, ): @@ -581,6 +583,8 @@ def run_ocs_upgrade( Args: operation: (function): Function to run + upgrade_stats: (dict): Dictionary where can be stored statistics + gathered during the upgrade operation_args: (iterable): Function's arguments operation_kwargs: (map): Function's keyword arguments @@ -604,6 +608,21 @@ def run_ocs_upgrade( f"{upgrade_ocs.version_before_upgrade}" ) + # Update values CSI_RBD_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE and CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE + # in rook-ceph-operator-config configmap + set_update_strategy() + if upgrade_stats: + cephfs_daemonset = DaemonSet( + resource_name="csi-cephfsplugin", + namespace=config.ENV_DATA["cluster_namespace"], + ) + rbd_daemonset = DaemonSet( + resource_name="csi-rbdplugin", + namespace=config.ENV_DATA["cluster_namespace"], + ) + upgrade_stats["odf_upgrade"]["rbd_max_unavailable"] = 0 + upgrade_stats["odf_upgrade"]["cephfs_max_unavailable"] = 0 + # create external cluster object if config.DEPLOYMENT["external_mode"]: host, user, password, ssh_key = get_external_cluster_client() @@ -631,6 +650,7 @@ def run_ocs_upgrade( csv_name_pre_upgrade = upgrade_ocs.get_csv_name_pre_upgrade() pre_upgrade_images = upgrade_ocs.get_pre_upgrade_image(csv_name_pre_upgrade) upgrade_ocs.load_version_config_file(upgrade_version) + start_time = time.time() if config.DEPLOYMENT.get("disconnected") and not config.DEPLOYMENT.get( "disconnected_env_skip_image_mirroring" ): @@ -727,12 +747,44 @@ def run_ocs_upgrade( channel=channel, csv_name_pre_upgrade=csv_name_pre_upgrade, ): + if upgrade_stats: + rbd_daemonset_status = rbd_daemonset.get_status() + cephfs_daemonset_status = cephfs_daemonset.get_status() + rbd_unavailable = ( + rbd_daemonset_status["desiredNumberScheduled"] + - rbd_daemonset_status["numberReady"] + ) + cephfs_unavailable = ( + cephfs_daemonset_status["desiredNumberScheduled"] + - cephfs_daemonset_status["numberReady"] + ) + if ( + rbd_unavailable + > upgrade_stats["odf_upgrade"]["rbd_max_unavailable"] + ): + upgrade_stats["odf_upgrade"][ + "rbd_max_unavailable" + ] = rbd_unavailable + if ( + cephfs_unavailable + > upgrade_stats["odf_upgrade"]["cephfs_max_unavailable"] + ): + upgrade_stats["odf_upgrade"][ + "cephfs_max_unavailable" + ] = cephfs_unavailable + log.debug(f"rbd daemonset status: {rbd_daemonset_status}") + log.debug(f"cephfs daemonset status: {cephfs_daemonset_status}") try: if sample: log.info("Upgrade success!") break except TimeoutException: raise TimeoutException("No new CSV found after upgrade!") + stop_time = time.time() + time_taken = stop_time - start_time + log.info(f"Upgrade took {time_taken} seconds to complete") + if upgrade_stats: + upgrade_stats["odf_upgrade"]["upgrade_time"] = time_taken old_image = upgrade_ocs.get_images_post_upgrade( channel, pre_upgrade_images, upgrade_version ) @@ -821,7 +873,7 @@ def ocs_odf_upgrade_ui(): Pass proper versions and upgrade_ui.yaml while running this function for validation to pass """ - + set_update_strategy() login_ui() val_obj = ValidationUI() pagenav_obj = ValidationUI() @@ -866,3 +918,44 @@ def ocs_odf_upgrade_ui(): val_obj.take_screenshot() pagenav_obj.odf_overview_ui() pagenav_obj.odf_storagesystems_ui() + + +def set_update_strategy(rbd_max_unavailable=None, cephfs_max_unavailable=None): + """ + Update rook-ceph-operator-config configmap with parameters: + CSI_RBD_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE and CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE. + If values are not provided as parameters of this function then values are taken + from ocs-ci config. If the values are not set in ocs-ci config or function + parameters then they are not updated. + + Args: + rbd_max_unavailable (int, str): Value of CSI_RBD_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE + to be updated in rook-ceph-operator-config configmap. + cephfs_max_unavailable (int, str): Value of CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE + to be updated in rook-ceph-operator-config configmap. + + """ + rbd_max = rbd_max_unavailable or config.ENV_DATA.get( + "csi_rbd_plugin_update_strategy_max_unavailable" + ) + cephfs_max = cephfs_max_unavailable or config.ENV_DATA.get( + "csi_cephfs_plugin_update_strategy_max_unavailable" + ) + if rbd_max: + config_map_patch = f'\'{{"data": {{"CSI_RBD_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE": "{rbd_max}"}}}}\'' + exec_cmd( + f"oc patch configmap -n {config.ENV_DATA['cluster_namespace']} " + f"{constants.ROOK_OPERATOR_CONFIGMAP} -p {config_map_patch}" + ) + logger.info( + f"CSI_RBD_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE is set to {rbd_max}" + ) + if cephfs_max: + config_map_patch = f'\'{{"data": {{"CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE": "{cephfs_max}"}}}}\'' + exec_cmd( + f"oc patch configmap -n {config.ENV_DATA['cluster_namespace']} " + f"{constants.ROOK_OPERATOR_CONFIGMAP} -p {config_map_patch}" + ) + logger.info( + f"CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE is set to {rbd_max}" + ) diff --git a/ocs_ci/ocs/resources/daemonset.py b/ocs_ci/ocs/resources/daemonset.py new file mode 100644 index 00000000000..6bdd79e076e --- /dev/null +++ b/ocs_ci/ocs/resources/daemonset.py @@ -0,0 +1,43 @@ +""" +DaemonSet related functionalities +""" +import logging + +from ocs_ci.ocs import constants +from ocs_ci.ocs.ocp import OCP + +log = logging.getLogger(__name__) + + +class DaemonSet(OCP): + """ + This class represent DaemonSet and contains methods for operations with + DaemonSets. + """ + + def __init__(self, *args, **kwargs): + """ + Initializer function for DaemonSet class + + """ + super(DaemonSet, self).__init__(kind=constants.DAEMONSET, *args, **kwargs) + + def get_status(self): + """ + Get infromation related to resource status. + + Returns: + dict: DaemonSet resource status + """ + resource_data = self.get() + return resource_data["status"] + + def get_update_strategy(self): + """ + Get infromation related to update strategy. + + Returns: + dict: DaemonSet resource update strategy + """ + resource_data = self.get() + return resource_data["spec"]["updateStrategy"] diff --git a/tests/functional/upgrade/conftest.py b/tests/functional/upgrade/conftest.py index 080de790941..32e96916095 100644 --- a/tests/functional/upgrade/conftest.py +++ b/tests/functional/upgrade/conftest.py @@ -4,6 +4,7 @@ import pytest +from ocs_ci.framework import config from ocs_ci.ocs import constants, ocp from ocs_ci.ocs.bucket_utils import craft_s3_command from ocs_ci.ocs.exceptions import CommandFailed @@ -654,3 +655,74 @@ def fs_md5(fs_pod): ) log.info(f"Ceph FS md5: {md5}") return md5 + + +@pytest.fixture(scope="session") +def upgrade_stats(): + """ + + Returns: + dict: List of statistics gathered during performed upgrade. + + """ + return {"odf_upgrade": {}, "ocp_upgrade": {}} + + +@pytest.fixture(scope="function") +def rook_operator_configmap_cleanup(request): + """ + Restore values of CSI_RBD_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE and + CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE parameters in + rook-ceph-operator-config configmap after a test. + """ + configmap = ocp.OCP( + kind=constants.CONFIGMAP, + namespace=config.ENV_DATA["cluster_namespace"], + resource_name=constants.ROOK_OPERATOR_CONFIGMAP, + ) + configmap_data = configmap.get() + rbd_max = configmap_data.get("data", {}).get( + "CSI_RBD_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE" + ) + cephfs_max = configmap_data.get("data", {}).get( + "CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE" + ) + + def restore_values(): + """ + Restore values of CSI_RBD_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE and + CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE to original values. + Remove them if they were not set. + """ + if rbd_max is None: + try: + params = '[{"op": "remove", "path": "/data/CSI_RBD_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE"}]' + configmap.patch(params=params, format_type="json") + except CommandFailed as e: + log.warning( + "delete failed - it is possible that " + f"CSI_RBD_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE was removed earlier: {e}" + ) + else: + params = f'{{"data": {{"CSI_RBD_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE": "{rbd_max}"}}}}' + configmap.patch( + params=params, + format_type="merge", + ) + if cephfs_max is None: + try: + params = '[{"op": "remove", "path": "/data/CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE"}]' + configmap.patch(params=params, format_type="json") + except CommandFailed as e: + log.warning( + "delete failed - it is possible that " + f"CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE was removed earlier: {e}" + ) + else: + params = f'{{"data": {{"CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE": "{cephfs_max}"}}}}' + configmap.patch( + params=params, + format_type="merge", + ) + + request.addfinalizer(restore_values) diff --git a/tests/functional/upgrade/test_configuration.py b/tests/functional/upgrade/test_configuration.py index 05aacfee443..e9edf07780d 100644 --- a/tests/functional/upgrade/test_configuration.py +++ b/tests/functional/upgrade/test_configuration.py @@ -2,12 +2,18 @@ import pytest +from ocs_ci.framework import config from ocs_ci.framework.pytest_customization.marks import ( pre_upgrade, post_upgrade, brown_squad, + tier1, ) +from ocs_ci.ocs import constants +from ocs_ci.ocs.ocp import OCP from ocs_ci.ocs.resources import pod +from ocs_ci.ocs.resources.daemonset import DaemonSet +from ocs_ci.utility.utils import exec_cmd log = logging.getLogger(__name__) @@ -58,3 +64,77 @@ def test_crush_map_unchanged(pre_upgrade_crush_map): upgrade. """ pre_upgrade_crush_map == get_crush_map() + + +@post_upgrade +@pytest.mark.polarion_id("OCS-6275") +@brown_squad +def test_max_unavaialable_rbd(upgrade_stats): + """ + Test that the number of unavailable RBD daemonset plugin pods during ODF + upgrade corresponds to the value set in rook-ceph-operator-config configmap. + """ + configmap = OCP( + kind=constants.CONFIGMAP, + namespace=config.ENV_DATA["cluster_namespace"], + resource_name=constants.ROOK_OPERATOR_CONFIGMAP, + ).get() + config_value = configmap.get("data").get( + "CSI_RBD_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE" + ) + assert config_value == upgrade_stats["odf_upgrade"]["rbd_max_unavailable"] + + +@post_upgrade +@pytest.mark.polarion_id("OCS-6278") +@brown_squad +def test_max_unavaialable_cephfs(upgrade_stats): + """ + Test that the number of unavailable CephFS daemonset plugin pods during ODF + upgrade corresponds to the value set in rook-ceph-operator-config configmap. + """ + configmap = OCP( + kind=constants.CONFIGMAP, + namespace=config.ENV_DATA["cluster_namespace"], + resource_name=constants.ROOK_OPERATOR_CONFIGMAP, + ).get() + config_value = configmap.get("data").get( + "CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE" + ) + assert config_value == upgrade_stats["odf_upgrade"]["cephfs_max_unavailable"] + + +@pytest.mark.parametrize( + argnames=["daemonset", "value_to_set", "expected_value"], + argvalues=[ + pytest.param( + "csi-rbdplugin", 2, 2, marks=[tier1, pytest.mark.polarion_id("OCS-6276")] + ), + pytest.param( + "csi-cephfsplugin", 2, 2, marks=[tier1, pytest.mark.polarion_id("OCS-6277")] + ), + ], +) +@brown_squad +def test_update_strategy_config_change( + daemonset, value_to_set, expected_value, rook_operator_configmap_cleanup +): + """ + Test that tested value added to configmap rook-ceph-operator-config is + reflected in respective daemonset. + """ + if daemonset == "csi-rbdplugin": + parameter_name = "CSI_RBD_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE" + elif daemonset == "csi-cephfsplugin": + parameter_name = "CSI_CEPHFS_PLUGIN_UPDATE_STRATEGY_MAX_UNAVAILABLE" + + config_map_patch = f'\'{{"data": {{"{parameter_name}": "{value_to_set}"}}}}\'' + exec_cmd( + f"oc patch configmap -n {config.ENV_DATA['cluster_namespace']} " + f"{constants.ROOK_OPERATOR_CONFIGMAP} -p {config_map_patch}" + ) + ds_obj = DaemonSet( + resource_name=daemonset, namespace=config.ENV_DATA["cluster_namespace"] + ) + results = ds_obj.get_update_strategy() + assert str(expected_value) == str(results["rollingUpdate"]["maxUnavailable"]) diff --git a/tests/functional/upgrade/test_upgrade.py b/tests/functional/upgrade/test_upgrade.py index 1f891ab0863..ac836b07ed2 100644 --- a/tests/functional/upgrade/test_upgrade.py +++ b/tests/functional/upgrade/test_upgrade.py @@ -28,46 +28,50 @@ def finalizer(): @purple_squad @pytest.mark.polarion_id("OCS-1579") -def test_worker_node_abrupt_shutdown(teardown): +def test_worker_node_abrupt_shutdown(teardown, upgrade_stats): """ Test OCS upgrade with disruption of shutting down worker node, for 5.5 minutes """ log.info("Starting disruptive function: test_worker_node_abrupt_shutdown") - run_ocs_upgrade(operation=worker_node_shutdown, abrupt=True) + run_ocs_upgrade( + operation=worker_node_shutdown, abrupt=True, upgrade_stats=upgrade_stats + ) @purple_squad @pytest.mark.polarion_id("OCS-1575") -def test_worker_node_permanent_shutdown(teardown): +def test_worker_node_permanent_shutdown(teardown, upgrade_stats): """ Test OCS upgrade with disruption of shutting down worker node """ log.info("Starting disruptive function: test_worker_node_permanent_shutdown") - run_ocs_upgrade(operation=worker_node_shutdown, abrupt=False) + run_ocs_upgrade( + operation=worker_node_shutdown, abrupt=False, upgrade_stats=upgrade_stats + ) @purple_squad @pytest.mark.polarion_id("OCS-1558") -def test_osd_reboot(teardown): +def test_osd_reboot(teardown, upgrade_stats): """ OCS Upgrade with node reboot: with 1 OSD going down and back up while upgrade is running """ log.info("Starting disruptive function: test_osd_reboot") - run_ocs_upgrade(operation=osd_node_reboot) + run_ocs_upgrade(operation=osd_node_reboot, upgrade_stats=upgrade_stats) @purple_squad @ocs_upgrade @polarion_id(get_polarion_id(upgrade=True)) -def test_upgrade(): +def test_upgrade(upgrade_stats): """ Tests upgrade procedure of OCS cluster """ - run_ocs_upgrade() + run_ocs_upgrade(upgrade_stats=upgrade_stats) From 72e1d77979b6c71111f08eaffdba5e7a1ace9fe0 Mon Sep 17 00:00:00 2001 From: udaysk23 <54358025+udaysk23@users.noreply.github.com> Date: Tue, 3 Dec 2024 16:01:55 +0530 Subject: [PATCH 10/44] Fixes dynamic namespace issue (#10932) Signed-off-by: Uday Kurundwade --- .../object/mcg/test_custom_credentials_using_mcg_cli.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/functional/object/mcg/test_custom_credentials_using_mcg_cli.py b/tests/functional/object/mcg/test_custom_credentials_using_mcg_cli.py index 2bf01a69fd0..12ef0186a15 100644 --- a/tests/functional/object/mcg/test_custom_credentials_using_mcg_cli.py +++ b/tests/functional/object/mcg/test_custom_credentials_using_mcg_cli.py @@ -9,6 +9,7 @@ red_squad, mcg, ) +from ocs_ci.framework import config from ocs_ci.ocs import constants from ocs_ci.helpers.helpers import retrieve_cli_binary from ocs_ci.utility.utils import run_cmd, get_random_str @@ -27,11 +28,12 @@ def update_nb_account(self, account_name, access_key, secret_key): """ Update noobaa account with custom credential values """ + namespace = config.ENV_DATA["cluster_namespace"] output = run_cmd( cmd=f"{mcg_cli} account credentials {account_name} " + f"--access-key={access_key} " + f"--secret-key={secret_key} " - + "-n openshift-storage", + + f"-n {namespace}", ignore_error=True, ) logger.info(output) From 2d4ed1bd352b38d28efbc2dbd6794729f96a3329 Mon Sep 17 00:00:00 2001 From: Daniel Osypenko Date: Tue, 3 Dec 2024 12:38:55 +0200 Subject: [PATCH 11/44] config namespace reference is set to every odf cli call (#10964) Signed-off-by: Daniel Osypenko --- ocs_ci/helpers/odf_cli.py | 13 ++++- .../test_pvc_stale_volume_cleanup_cli.py | 58 ++++++++++--------- 2 files changed, 43 insertions(+), 28 deletions(-) diff --git a/ocs_ci/helpers/odf_cli.py b/ocs_ci/helpers/odf_cli.py index 51e5d388a92..b2544e4bb9b 100644 --- a/ocs_ci/helpers/odf_cli.py +++ b/ocs_ci/helpers/odf_cli.py @@ -116,10 +116,19 @@ def __init__(self) -> None: self.binary_name = "odf" def run_command(self, command_args: Union[str, list]) -> str: + # by default Operator namespace is set to 'openshift-storage' in ODF CLI, + # when -n is not passed the command will fail if the namespace is not 'openshift-storage' if isinstance(command_args, str): - full_command = str(self.binary_name + command_args) + full_command = str( + self.binary_name + + f' -n {config.ENV_DATA["cluster_namespace"]} ' + + command_args + ) elif isinstance(command_args, list): - full_command = " ".join([self.binary_name] + command_args) + full_command = " ".join( + [self.binary_name, "-n", config.ENV_DATA["cluster_namespace"]] + + command_args + ) output = exec_cmd(full_command) log.info(f"output type: {type(output)}") diff --git a/tests/functional/odf-cli/test_pvc_stale_volume_cleanup_cli.py b/tests/functional/odf-cli/test_pvc_stale_volume_cleanup_cli.py index aefea63f935..410e327be07 100644 --- a/tests/functional/odf-cli/test_pvc_stale_volume_cleanup_cli.py +++ b/tests/functional/odf-cli/test_pvc_stale_volume_cleanup_cli.py @@ -1,4 +1,6 @@ import logging +from subprocess import CompletedProcess + import pytest from ocs_ci.framework.testlib import ( @@ -10,7 +12,6 @@ from ocs_ci.ocs import constants from ocs_ci.helpers import helpers -from ocs_ci.utility.utils import run_cmd from ocs_ci.framework.testlib import ignore_leftovers logger = logging.getLogger(__name__) @@ -20,6 +21,11 @@ @ignore_leftovers @green_squad class TestSubvolumesCommand(ManageTest): + + @pytest.fixture(autouse=True) + def setup(self, odf_cli_setup): + self.odf_cli_runner = odf_cli_setup + @skipif_ocs_version("<4.15") @pytest.mark.polarion_id("OCS-5794") def test_pvc_stale_volume_cleanup_cli(self, storageclass_factory, pvc_factory): @@ -31,11 +37,9 @@ def test_pvc_stale_volume_cleanup_cli(self, storageclass_factory, pvc_factory): 5. Check for stale volumes 6. No stale volumes should be present of the deleted PVC. """ - from pathlib import Path - if not Path(constants.CLI_TOOL_LOCAL_PATH).exists(): - helpers.retrieve_cli_binary(cli_type="odf") - output = run_cmd(cmd="odf-cli subvolume ls") + output = self.odf_cli_runner.run_command("subvolume ls") + inital_subvolume_list = self.parse_subvolume_ls_output(output) logger.info(f"{inital_subvolume_list=}") cephfs_sc_obj = storageclass_factory( @@ -49,7 +53,8 @@ def test_pvc_stale_volume_cleanup_cli(self, storageclass_factory, pvc_factory): access_mode=constants.ACCESS_MODE_RWX, status=constants.STATUS_BOUND, ) - output = run_cmd(cmd="odf-cli subvolume ls") + + output = self.odf_cli_runner.run_command("subvolume ls") later_subvolume_list = self.parse_subvolume_ls_output(output) old = set(inital_subvolume_list) new = set(later_subvolume_list) @@ -61,14 +66,19 @@ def test_pvc_stale_volume_cleanup_cli(self, storageclass_factory, pvc_factory): pvc_obj.delete() # Deleteing stale subvolume - run_cmd(cmd=f"odf-cli subvolume delete {new_pvc[0]} {new_pvc[1]} {new_pvc[2]}") + self.odf_cli_runner.run_command( + f"subvolume delete {new_pvc[0]} {new_pvc[1]} {new_pvc[2]}" + ) # Checking for stale volumes - output = run_cmd(cmd="odf-cli subvolume ls --stale") + output = self.odf_cli_runner.run_command("subvolume ls --stale") stale_volumes = self.parse_subvolume_ls_output(output) assert len(stale_volumes) == 0 # No stale volumes available def parse_subvolume_ls_output(self, output): + if isinstance(output, CompletedProcess): + output = output.stdout.decode("utf-8") + subvolumes = [] subvolumes_list = output.strip().split("\n")[1:] for item in subvolumes_list: @@ -95,11 +105,8 @@ def test_rox_pvc_stale_volume_cleanup_cli( 5. Check for stale volumes 6. No stale volumes should be present of the deleted PVC. """ - from pathlib import Path - if not Path(constants.CLI_TOOL_LOCAL_PATH).exists(): - helpers.retrieve_cli_binary(cli_type="odf") - output = run_cmd(cmd="odf-cli subvolume ls") + output = self.odf_cli_runner.run_command("subvolume ls") inital_subvolume_list = self.parse_subvolume_ls_output(output) logger.info(f"{inital_subvolume_list=}") cephfs_sc_obj = storageclass_factory( @@ -131,7 +138,7 @@ def test_rox_pvc_stale_volume_cleanup_cli( timeout=300, ) - output = run_cmd(cmd="odf-cli subvolume ls") + output = self.odf_cli_runner.run_command("subvolume ls") later_subvolume_list = self.parse_subvolume_ls_output(output) old = set(inital_subvolume_list) new = set(later_subvolume_list) @@ -149,14 +156,14 @@ def test_rox_pvc_stale_volume_cleanup_cli( ) pv_created_by_original_pvc.delete(wait=True) - # Checking for stale volumes - output = run_cmd(cmd="odf-cli subvolume ls --stale") - + self.odf_cli_runner.run_command("subvolume ls --stale") # Deleteing stale subvolume - run_cmd(cmd=f"odf-cli subvolume delete {new_pvc[0]} {new_pvc[1]} {new_pvc[2]}") + self.odf_cli_runner.run_command( + f"subvolume delete {new_pvc[0]} {new_pvc[1]} {new_pvc[2]}" + ) # Checking for stale volumes - output = run_cmd(cmd="odf-cli subvolume ls --stale") + output = self.odf_cli_runner.run_command("subvolume ls --stale") stale_volumes = self.parse_subvolume_ls_output(output) assert len(stale_volumes) == 0 # No stale volumes available @@ -179,11 +186,8 @@ def test_stale_volume_snapshot_cleanup_cli( 7. Run script 8. No stale volumes should be present of the deleted PVC and its snapshot. """ - from pathlib import Path - if not Path(constants.CLI_TOOL_LOCAL_PATH).exists(): - helpers.retrieve_cli_binary(cli_type="odf") - output = run_cmd(cmd="odf-cli subvolume ls") + output = self.odf_cli_runner.run_command("subvolume ls") inital_subvolume_list = self.parse_subvolume_ls_output(output) logger.info(f"{inital_subvolume_list=}") cephfs_sc_obj = storageclass_factory( @@ -204,7 +208,7 @@ def test_stale_volume_snapshot_cleanup_cli( snapshot_obj = snapshot_factory(pvc_obj, wait=False) logger.info("Verify snapshots moved from false state to true state") - output = run_cmd(cmd="odf-cli subvolume ls") + output = self.odf_cli_runner.run_command("subvolume ls") later_subvolume_list = self.parse_subvolume_ls_output(output) old = set(inital_subvolume_list) new = set(later_subvolume_list) @@ -221,7 +225,7 @@ def test_stale_volume_snapshot_cleanup_cli( pv_created_by_original_pvc.delete(wait=True) # Checking for stale volumes - output = run_cmd(cmd="odf-cli subvolume ls --stale") + output = self.odf_cli_runner.run_command("subvolume ls --stale") stale_with_snapshot_subvolume = self.parse_subvolume_ls_output(output)[0] logger.info(f"{stale_with_snapshot_subvolume=}") assert stale_with_snapshot_subvolume[3] == "stale-with-snapshot" @@ -230,9 +234,11 @@ def test_stale_volume_snapshot_cleanup_cli( snapshot_obj.delete(wait=True) # Deleteing stale subvolume - run_cmd(cmd=f"odf-cli subvolume delete {new_pvc[0]} {new_pvc[1]} {new_pvc[2]}") + self.odf_cli_runner.run_command( + f"subvolume delete {new_pvc[0]} {new_pvc[1]} {new_pvc[2]}" + ) # Checking for stale volumes - output = run_cmd(cmd="odf-cli subvolume ls --stale") + output = self.odf_cli_runner.run_command("subvolume ls --stale") stale_volumes = self.parse_subvolume_ls_output(output) assert len(stale_volumes) == 0 # No stale volumes available From b98dc50441a72e78a538b41054a43fbf249cffd9 Mon Sep 17 00:00:00 2001 From: Daniel Osypenko Date: Wed, 4 Dec 2024 10:02:31 +0200 Subject: [PATCH 12/44] add skip_if_rosa_hcp (#10969) Signed-off-by: Daniel Osypenko --- tests/functional/z_cluster/nodes/test_nodes_maintenance.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/functional/z_cluster/nodes/test_nodes_maintenance.py b/tests/functional/z_cluster/nodes/test_nodes_maintenance.py index 4752c9283a8..1521cddc4e3 100644 --- a/tests/functional/z_cluster/nodes/test_nodes_maintenance.py +++ b/tests/functional/z_cluster/nodes/test_nodes_maintenance.py @@ -27,7 +27,7 @@ ) from ocs_ci.ocs.cluster import validate_existence_of_blocking_pdb from ocs_ci.framework import config -from ocs_ci.framework.pytest_customization.marks import brown_squad +from ocs_ci.framework.pytest_customization.marks import brown_squad, skipif_rosa_hcp from ocs_ci.framework.testlib import ( tier1, tier2, @@ -128,7 +128,10 @@ def health_checker(self): argnames=["node_type"], argvalues=[ pytest.param(*["worker"], marks=pytest.mark.polarion_id("OCS-1269")), - pytest.param(*["master"], marks=pytest.mark.polarion_id("OCS-1272")), + pytest.param( + *["master"], + marks=[pytest.mark.polarion_id("OCS-1272"), skipif_rosa_hcp], + ), ], ) def test_node_maintenance( From 45b5877808f467f7d7bad0961c6328f12fe6994c Mon Sep 17 00:00:00 2001 From: Daniel Osypenko Date: Wed, 4 Dec 2024 10:04:05 +0200 Subject: [PATCH 13/44] add skipp collecting data on CP for managed_cp (#10948) Signed-off-by: Daniel Osypenko --- ocs_ci/ocs/perftests.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ocs_ci/ocs/perftests.py b/ocs_ci/ocs/perftests.py index 22a2044a7c2..957fd544ef7 100644 --- a/ocs_ci/ocs/perftests.py +++ b/ocs_ci/ocs/perftests.py @@ -96,7 +96,10 @@ def setup(self): self.get_osd_info() - self.get_node_info(node_type="master") + if config.ENV_DATA.get("deployment_type") != constants.MANAGED_CP_DEPL_TYPE: + self.get_node_info(node_type="master") + else: + log.info("No master nodes in a managed control plane clusters") self.get_node_info(node_type="worker") def teardown(self): From 9d1c07b94474847d92ee8608d98cdf5abe25a940 Mon Sep 17 00:00:00 2001 From: Shylesh Kumar Mohan Date: Wed, 4 Dec 2024 16:18:06 +0530 Subject: [PATCH 14/44] Fix black check Signed-off-by: Shylesh Kumar Mohan --- ocs_ci/ocs/resources/daemonset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ocs_ci/ocs/resources/daemonset.py b/ocs_ci/ocs/resources/daemonset.py index 6bdd79e076e..5d833995d74 100644 --- a/ocs_ci/ocs/resources/daemonset.py +++ b/ocs_ci/ocs/resources/daemonset.py @@ -1,6 +1,7 @@ """ DaemonSet related functionalities """ + import logging from ocs_ci.ocs import constants From 7efc8f7017b5ea37474b7a6d441ecd225ec78e77 Mon Sep 17 00:00:00 2001 From: Shylesh Kumar Mohan Date: Tue, 29 Oct 2024 22:13:36 +0530 Subject: [PATCH 15/44] Download subctl binary in anycase Signed-off-by: Shylesh Kumar Mohan --- ocs_ci/deployment/acm.py | 3 ++- ocs_ci/ocs/utils.py | 46 ++++++++++++++++++++++++++-------------- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/ocs_ci/deployment/acm.py b/ocs_ci/deployment/acm.py index ad0324e62e6..cd197a1e54f 100644 --- a/ocs_ci/deployment/acm.py +++ b/ocs_ci/deployment/acm.py @@ -86,6 +86,8 @@ def __init__(self): self.dr_only_list = [] def deploy(self): + # Download subctl binary in any case. + self.download_binary() if self.source == "upstream": self.deploy_upstream() elif self.source == "downstream": @@ -94,7 +96,6 @@ def deploy(self): raise Exception(f"The Submariner source: {self.source} is not recognized") def deploy_upstream(self): - self.download_binary() self.submariner_configure_upstream() def deploy_downstream(self): diff --git a/ocs_ci/ocs/utils.py b/ocs_ci/ocs/utils.py index 9212f467f23..494253dcbbc 100644 --- a/ocs_ci/ocs/utils.py +++ b/ocs_ci/ocs/utils.py @@ -1294,22 +1294,36 @@ def _collect_ocs_logs( cluster_config=cluster_config, ) - submariner_log_path = os.path.join( - log_dir_path, - "submariner", - ) - run_cmd(f"mkdir -p {submariner_log_path}") - cwd = os.getcwd() - run_cmd(f"chmod -R 777 {submariner_log_path}") - os.chdir(submariner_log_path) - submariner_log_collect = ( - f"subctl gather --kubeconfig {cluster_config.RUN['kubeconfig']}" - ) - log.info("Collecting submariner logs") - out = run_cmd(submariner_log_collect) - run_cmd(f"chmod -R 777 {submariner_log_path}") - os.chdir(cwd) - log.info(out) + # We want to skip submariner log collection if it's in import clusters phase + if not cluster_config.ENV_DATA.get( + "import_clusters_to_acm", False + ) or cluster_config.ENV_DATA.get("submariner_source", ""): + try: + run_cmd("subctl") + except (CommandFailed, FileNotFoundError): + log.debug("subctl binary not found, downloading now...") + # Importing here to avoid circular import error + from ocs_ci.deployment.acm import Submariner + + submariner = Submariner() + submariner.download_binary() + + submariner_log_path = os.path.join( + log_dir_path, + "submariner", + ) + run_cmd(f"mkdir -p {submariner_log_path}") + cwd = os.getcwd() + run_cmd(f"chmod -R 777 {submariner_log_path}") + os.chdir(submariner_log_path) + submariner_log_collect = ( + f"subctl gather --kubeconfig {cluster_config.RUN['kubeconfig']}" + ) + log.info("Collecting submariner logs") + out = run_cmd(submariner_log_collect) + run_cmd(f"chmod -R 777 {submariner_log_path}") + os.chdir(cwd) + log.info(out) def collect_ocs_logs( From 5a821c7d9e1cf6d727d5b1eacf654ce4abffb3a2 Mon Sep 17 00:00:00 2001 From: Sagi Hirshfeld Date: Wed, 4 Dec 2024 15:17:28 +0200 Subject: [PATCH 16/44] Move CA related runtime commands to container startup (#10702) Signed-off-by: Sagi Hirshfeld --- ocs_ci/ocs/awscli_pod.py | 43 ++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/ocs_ci/ocs/awscli_pod.py b/ocs_ci/ocs/awscli_pod.py index 20c2a87b91f..6a1f792ecfa 100644 --- a/ocs_ci/ocs/awscli_pod.py +++ b/ocs_ci/ocs/awscli_pod.py @@ -60,6 +60,8 @@ def create_awscli_pod(scope_name=None, namespace=None, service_account=None): awscli_sts_dict["metadata"]["namespace"] = namespace update_container_with_mirrored_image(awscli_sts_dict) update_container_with_proxy_env(awscli_sts_dict) + _add_startup_commands_to_set_ca(awscli_sts_dict) + s3cli_sts_obj = create_resource(**awscli_sts_dict) log.info("Verifying the AWS CLI StatefulSet is running") @@ -73,16 +75,6 @@ def create_awscli_pod(scope_name=None, namespace=None, service_account=None): )() wait_for_resource_state(awscli_pod_obj, constants.STATUS_RUNNING, timeout=180) - awscli_pod_obj.exec_cmd_on_pod( - f"cp {constants.SERVICE_CA_CRT_AWSCLI_PATH} {constants.AWSCLI_CA_BUNDLE_PATH}" - ) - - if storagecluster_independent_check() and config.EXTERNAL_MODE.get("rgw_secure"): - log.info("Concatenating the RGW CA to the AWS CLI pod's CA bundle") - awscli_pod_obj.exec_cmd_on_pod( - f"bash -c 'wget -O - {config.EXTERNAL_MODE['rgw_cert_ca']} >> {constants.AWSCLI_CA_BUNDLE_PATH}'" - ) - return awscli_pod_obj @@ -118,3 +110,34 @@ def awscli_pod_cleanup(namespace=None): ) if awscli_service_ca_query: ocp_cm.delete(resource_name=awscli_service_ca_query[0]["metadata"]["name"]) + + +def _add_startup_commands_to_set_ca(awscli_sts_dict): + """ + Add container startup commands to ensure the CA is at the expected location + + Args: + awscli_sts_dict (dict): The AWS CLI StatefulSet dict to modify + """ + startup_cmds = [] + + # Copy the CA cert to the expected location + startup_cmds.append( + f"cp {constants.SERVICE_CA_CRT_AWSCLI_PATH} {constants.AWSCLI_CA_BUNDLE_PATH}" + ) + + # Download and concatenate an additional CA cert if needed + if storagecluster_independent_check() and config.EXTERNAL_MODE.get("rgw_secure"): + startup_cmds.append( + f"wget -O - {config.EXTERNAL_MODE['rgw_cert_ca']} >> {constants.AWSCLI_CA_BUNDLE_PATH}" + ) + + # Keep the pod running after the commands + startup_cmds.append("sleep infinity") + + # Set the commands to run on pod startup + awscli_sts_dict["spec"]["template"]["spec"]["containers"][0]["command"] = [ + "/bin/sh", + "-c", + " && ".join(startup_cmds), + ] From c325e43f32ff111aa9766af68cd325c35ea4af5f Mon Sep 17 00:00:00 2001 From: Filip Balak Date: Wed, 4 Dec 2024 14:38:34 +0100 Subject: [PATCH 17/44] Add odf-dependencies to csv check (#10954) Signed-off-by: fbalak --- ocs_ci/deployment/helpers/odf_deployment_helpers.py | 3 +++ ocs_ci/ocs/defaults.py | 1 + 2 files changed, 4 insertions(+) diff --git a/ocs_ci/deployment/helpers/odf_deployment_helpers.py b/ocs_ci/deployment/helpers/odf_deployment_helpers.py index bdb9138fbee..57ae3512c15 100644 --- a/ocs_ci/deployment/helpers/odf_deployment_helpers.py +++ b/ocs_ci/deployment/helpers/odf_deployment_helpers.py @@ -37,4 +37,7 @@ def get_required_csvs(): if ocs_version >= version.VERSION_4_17: operators_4_17_additions = [defaults.CEPHCSI_OPERATOR] ocs_operator_names.extend(operators_4_17_additions) + if ocs_version >= version.VERSION_4_18: + operators_4_18_additions = [defaults.ODF_DEPENDENCIES] + ocs_operator_names.extend(operators_4_18_additions) return ocs_operator_names diff --git a/ocs_ci/ocs/defaults.py b/ocs_ci/ocs/defaults.py index 2a98377f194..426e1da7201 100644 --- a/ocs_ci/ocs/defaults.py +++ b/ocs_ci/ocs/defaults.py @@ -57,6 +57,7 @@ LIVE_CONTENT_SOURCE = "redhat-operators" OCS_CLIENT_OPERATOR_NAME = "ocs-client-operator" CEPHCSI_OPERATOR = "cephcsi-operator" +ODF_DEPENDENCIES = "odf-dependencies" # Noobaa S3 bucket website configurations website_config = { From eb9781094802811add138ae02d038bfd44727e61 Mon Sep 17 00:00:00 2001 From: Jilju Joy Date: Thu, 5 Dec 2024 12:18:59 +0530 Subject: [PATCH 18/44] Remove redundant test of basic cephfs and rbd PVC Signed-off-by: Jilju Joy --- .../test_create_storage_class_pvc.py | 131 ------------------ 1 file changed, 131 deletions(-) delete mode 100644 tests/functional/storageclass/test_create_storage_class_pvc.py diff --git a/tests/functional/storageclass/test_create_storage_class_pvc.py b/tests/functional/storageclass/test_create_storage_class_pvc.py deleted file mode 100644 index 225fec85df5..00000000000 --- a/tests/functional/storageclass/test_create_storage_class_pvc.py +++ /dev/null @@ -1,131 +0,0 @@ -import logging -import pytest - -from ocs_ci.ocs import constants -from ocs_ci.framework.pytest_customization.marks import green_squad -from ocs_ci.framework.testlib import ManageTest -from ocs_ci.helpers import helpers - -log = logging.getLogger(__name__) - - -@pytest.fixture(scope="function") -def test_fixture_rbd(request): - request.addfinalizer(teardown_rbd) - setup_rbd() - - -def setup_rbd(): - """ - Setting up the environment - Creating replicated pool,secret,storageclass for rbd - """ - log.info("Creating CephBlockPool") - global RBD_POOL - RBD_POOL = helpers.create_ceph_block_pool() - global RBD_SECRET_OBJ - RBD_SECRET_OBJ = helpers.create_secret(constants.CEPHBLOCKPOOL) - global RBD_SC_OBJ - log.info("Creating RBD Storage class ") - RBD_SC_OBJ = helpers.create_storage_class( - interface_type=constants.CEPHBLOCKPOOL, - interface_name=RBD_POOL.name, - secret_name=RBD_SECRET_OBJ.name, - ) - - -def teardown_rbd(): - """ - Tearing down the environment - Deleting pod,replicated pool,pvc,storageclass,secret of rbd - """ - global RBD_PVC_OBJ, RBD_POD_OBJ - log.info("deleting rbd pod") - RBD_POD_OBJ.delete() - log.info("Deleting RBD PVC") - RBD_PVC_OBJ.delete() - assert helpers.validate_pv_delete(RBD_PVC_OBJ.backed_pv) - log.info("Deleting CEPH BLOCK POOL") - RBD_POOL.delete() - log.info("Deleting RBD Secret") - RBD_SECRET_OBJ.delete() - log.info("Deleting RBD Storageclass") - RBD_SC_OBJ.delete() - - -@pytest.fixture(scope="function") -def test_fixture_cephfs(request): - - request.addfinalizer(teardown_fs) - setup_fs() - - -def setup_fs(): - log.info("Creating CEPHFS Secret") - global CEPHFS_SECRET_OBJ - CEPHFS_SECRET_OBJ = helpers.create_secret(constants.CEPHFILESYSTEM) - - global CEPHFS_SC_OBJ - log.info("Creating CephFS Storage class ") - CEPHFS_SC_OBJ = helpers.create_storage_class( - constants.CEPHFILESYSTEM, - helpers.get_cephfs_data_pool_name(), - CEPHFS_SECRET_OBJ.name, - ) - - -def teardown_fs(): - global CEPHFS_PVC_OBJ, CEPHFS_POD_OBJ - log.info("deleting cephfs pod") - CEPHFS_POD_OBJ.delete() - log.info("deleting cephfs pvc") - CEPHFS_PVC_OBJ.delete() - assert helpers.validate_pv_delete(CEPHFS_PVC_OBJ.backed_pv) - log.info("Deleting CEPHFS Secret") - CEPHFS_SECRET_OBJ.delete() - log.info("Deleting CephFS Storageclass") - CEPHFS_SC_OBJ.delete() - - -@green_squad -class TestOSCBasics(ManageTest): - @pytest.mark.polarion_id("OCS-336") - def test_basics_rbd(self, test_fixture_rbd): - """ - Testing basics: secret creation, - storage class creation,pvc and pod with rbd - """ - global RBD_PVC_OBJ, RBD_POD_OBJ - log.info("creating pvc for RBD ") - pvc_name = helpers.create_unique_resource_name("test-rbd", "pvc") - RBD_PVC_OBJ = helpers.create_pvc(sc_name=RBD_SC_OBJ.name, pvc_name=pvc_name) - helpers.wait_for_resource_state(RBD_PVC_OBJ, constants.STATUS_BOUND) - RBD_PVC_OBJ.reload() - if RBD_PVC_OBJ.backed_pv is None: - RBD_PVC_OBJ.reload() - RBD_POD_OBJ = helpers.create_pod( - interface_type=constants.CEPHBLOCKPOOL, pvc_name=RBD_PVC_OBJ.name - ) - helpers.wait_for_resource_state(RBD_POD_OBJ, constants.STATUS_RUNNING) - RBD_POD_OBJ.reload() - - @pytest.mark.polarion_id("OCS-346") - def test_basics_cephfs(self, test_fixture_cephfs): - """ - Testing basics: secret creation, - storage class creation, pvc and pod with cephfs - """ - global CEPHFS_PVC_OBJ, CEPHFS_POD_OBJ - log.info("creating pvc for CephFS ") - pvc_name = helpers.create_unique_resource_name("test-cephfs", "pvc") - CEPHFS_PVC_OBJ = helpers.create_pvc( - sc_name=CEPHFS_SC_OBJ.name, pvc_name=pvc_name - ) - helpers.wait_for_resource_state(CEPHFS_PVC_OBJ, constants.STATUS_BOUND) - CEPHFS_PVC_OBJ.reload() - log.info("creating cephfs pod") - CEPHFS_POD_OBJ = helpers.create_pod( - interface_type=constants.CEPHFILESYSTEM, pvc_name=CEPHFS_PVC_OBJ.name - ) - helpers.wait_for_resource_state(CEPHFS_POD_OBJ, constants.STATUS_RUNNING) - CEPHFS_POD_OBJ.reload() From 4f07b5040bc557fff9dbb9e572c98b7e7d6c1b1e Mon Sep 17 00:00:00 2001 From: Daniel Osypenko Date: Thu, 5 Dec 2024 09:31:43 +0200 Subject: [PATCH 19/44] add-new-test--test_add_ocs_node_non_default_machinepool (#10854) * add-new-test--test_add_ocs_node_non_default_machinepool * squad_lbl added * add health check Signed-off-by: Daniel Osypenko --- .../framework/pytest_customization/marks.py | 5 + ocs_ci/ocs/machinepool.py | 69 ++++++---- ocs_ci/ocs/node.py | 4 +- ocs_ci/ocs/platform_nodes.py | 27 ++-- .../z_cluster/cluster_expansion/conftest.py | 4 +- .../cluster_expansion/test_nodes_rosa_hcp.py | 123 ++++++++++++++++++ 6 files changed, 192 insertions(+), 40 deletions(-) create mode 100644 tests/functional/z_cluster/cluster_expansion/test_nodes_rosa_hcp.py diff --git a/ocs_ci/framework/pytest_customization/marks.py b/ocs_ci/framework/pytest_customization/marks.py index 55828ced455..7b3734bda08 100644 --- a/ocs_ci/framework/pytest_customization/marks.py +++ b/ocs_ci/framework/pytest_customization/marks.py @@ -384,6 +384,11 @@ reason="Azure KV config required to run the test.", ) +rosa_hcp_required = pytest.mark.skipif( + config.ENV_DATA["platform"].lower() != ROSA_HCP_PLATFORM, + reason="Test runs ONLY on ROSA HCP cluster", +) + external_mode_required = pytest.mark.skipif( config.DEPLOYMENT.get("external_mode") is not True, reason="Test will run on External Mode cluster only", diff --git a/ocs_ci/ocs/machinepool.py b/ocs_ci/ocs/machinepool.py index ae3ddcf93c6..765f4874538 100644 --- a/ocs_ci/ocs/machinepool.py +++ b/ocs_ci/ocs/machinepool.py @@ -25,7 +25,7 @@ class NodeConf: ``` node_conf_data = { "instance_type": "m5.large", - "machinepool": "mypool", + "machinepool_id": "mypool", "multi_availability_zone": "" } node_conf = NodeConf(**node_conf_data) @@ -43,7 +43,7 @@ class NodeConf: None # replicas are historically a separate parameter in node related functions of create_node functions ) instance_type: str = None - machinepool: str = None # machinepool id (machinepool name) + machinepool_id: str = None # machinepool id (machinepool name) subnet: Optional[str] = None availability_zone: Optional[str] = None disk_size: Optional[str] = None # e.g., '300GiB - default value' @@ -68,8 +68,8 @@ def _validate(self): node_conf_data = self._to_dict() if ( - node_conf_data.get("machinepool") - and len(node_conf_data.get("machinepool")) > 14 + node_conf_data.get("machinepool_id") + and len(node_conf_data.get("machinepool_id")) > 15 ): raise ValueError( "Machinepool name must be less than 15 characters or less." @@ -118,7 +118,9 @@ def __new__(cls, *args, **kwargs): @dataclass class MachinePool: cluster_name: str - id: str = field(default="") # machinepool id (machinepool name in NodeConf) + machinepool_id: str = field( + default="" + ) # machinepool id (machinepool name in NodeConf) auto_repair: Optional[bool] = field(default=None) availability_zone: Optional[str] = field(default=None) replicas: int = field(default=0) @@ -138,9 +140,11 @@ class MachinePool: def __post_init__(self): """Automatically populate fields by fetching machine pool details.""" - if self.cluster_name and self.id: + if self.cluster_name and self.machinepool_id: if not self.instance_type or not self.replicas: - details = self.get_machinepool_details(self.cluster_name, self.id) + details = self.get_machinepool_details( + self.cluster_name, self.machinepool_id + ) if details: self.__dict__.update(details.__dict__) self.exist = True @@ -165,7 +169,9 @@ def from_dict(cls, data: dict, cluster_name=None): tags=data.get("aws_node_pool", {}).get("tags", {}), node_drain_grace_period=f"{data.get('node_drain_grace_period', {}).get('value', 0)}" f"{data.get('node_drain_grace_period', {}).get('unit', '')}", - id=data.get("id"), + machinepool_id=data.get( + "id" + ), # this parameter is different in node_conf and data fetched from machinepool cluster_name=cluster_name, ) @@ -176,7 +182,7 @@ def get_machinepool_updated_replicas(self) -> Dict[str, int]: Returns: dict: { "replicas": , "current_replicas": } """ - cmd = f"rosa describe machinepool --cluster {self.cluster_name} --machinepool {self.id} -o json" + cmd = f"rosa describe machinepool --cluster {self.cluster_name} --machinepool {self.machinepool_id} -o json" try: res = exec_cmd(cmd) data = json.loads(res.stdout.strip().decode()) @@ -186,7 +192,7 @@ def get_machinepool_updated_replicas(self) -> Dict[str, int]: } except CommandFailed as ex: logger.error( - f"Failed to get replicas for machinepool '{self.id}' in cluster '{self.cluster_name}': {ex}" + f"Failed to get replicas for machinepool '{self.machinepool_id}' in cluster '{self.cluster_name}': {ex}" ) return {} @@ -299,7 +305,7 @@ def get_machinepool_replicas(self, machinepool_id: str): dict: {replicas: , current_replicas: } """ for machinepool in self.machinepools: - if machinepool.id == machinepool_id: + if machinepool.machinepool_id == machinepool_id: return { "replicas": machinepool.replicas, "current_replicas": machinepool.current_replicas, @@ -311,7 +317,7 @@ def get_machinepool_replicas(self, machinepool_id: str): def filter( self, instance_type: str = None, - id: str = None, + machinepool_id: str = None, availability_zone: str = None, subnet: str = None, version_raw_id: str = None, @@ -322,7 +328,7 @@ def filter( Args: instance_type (str): The instance type to search for. - id (str): The machinepool ID to search for. + machinepool_id (str): The machinepool ID to search for. availability_zone (str): The availability zone to search for. subnet (str): The subnet to search for. version_raw_id (str): The version raw ID to search for. @@ -330,13 +336,14 @@ def filter( Returns: + MachinePool | List[MachinePool]: The filtered machine; if pick_first is True, return a single instance. """ machinepools_filtered = [] for machinepool in self.machinepools: if instance_type and machinepool.instance_type != instance_type: continue - if id and machinepool.id != id: + if machinepool_id and machinepool.machinepool_id != machinepool_id: continue if availability_zone and machinepool.availability_zone != availability_zone: continue @@ -349,7 +356,15 @@ def filter( return ( machinepools_filtered[0] if machinepools_filtered - else MachinePool(cluster_name=self.cluster_name) + else MachinePool.from_dict( + { + "id": machinepool_id, + "availability_zone": availability_zone, + "subnet": subnet, + "version_raw_id": version_raw_id, + }, + cluster_name=self.cluster_name, + ) ) else: return machinepools_filtered @@ -365,7 +380,9 @@ def create_machine_pool(self, node_conf): """ run_create_machinepool(self.cluster_name, node_conf) self.load_all_machinepools() - mp = self.filter(id=node_conf.get("machinepool"), pick_first=True) + mp = self.filter( + machinepool_id=node_conf.get("machinepool_id"), pick_first=True + ) mp.wait_replicas_ready(node_conf.get("replicas")) return mp @@ -382,7 +399,9 @@ def edit_machine_pool(self, node_conf, wait_ready=True): """ run_edit_machinepool(self.cluster_name, node_conf) self.load_all_machinepools() - mp = self.filter(id=node_conf.get("machinepool"), pick_first=True) + mp = self.filter( + machinepool_id=node_conf.get("machinepool_id"), pick_first=True + ) if wait_ready: mp.wait_replicas_ready(node_conf.get("replicas")) return mp @@ -452,7 +471,7 @@ def build_machinepool_cmd_base(cluster_name, node_conf, action): cmd = f"rosa {action} machinepool --cluster {cluster_name} " if action == "create": - cmd += f"--name {node_conf.get('machinepool')} --instance-type {node_conf.get('instance_type', '')} --yes " + cmd += f"--name {node_conf.get('machinepool_id')} --instance-type {node_conf.get('instance_type', '')} --yes " if node_conf.get("disk_size", ""): cmd += f"--disk-size {str(node_conf.get('disk_size', ''))} " @@ -489,9 +508,9 @@ def build_machinepool_cmd_base(cluster_name, node_conf, action): # TODO: add unique edit actions by necessity # edit action has another structure, it reacquires name as a last value, without parameter name, e.g. - # rosa edit machinepool --cluster + # rosa edit machinepool_id --cluster if action == "edit": - cmd += f" {node_conf.get('machinepool')} " + cmd += f" {node_conf.get('machinepool_id')} " return cmd @@ -529,13 +548,13 @@ def run_edit_machinepool(cluster_name, node_conf): return exec_cmd(cmd) -def run_delete_machinepool(cluster_name, machinepool_name): +def run_delete_machinepool(cluster_name, machinepool_id): """ Delete a specified machine pool from a ROSA cluster. Args: cluster_name (str): The name or ID of the cluster. - machinepool_name (str): The ID of the machine pool to delete. + machinepool_id (str): The ID of the machine pool to delete. Raises: ValueError: If the cluster name or machine pool name is invalid. @@ -544,16 +563,16 @@ def run_delete_machinepool(cluster_name, machinepool_name): Returns: CompletedProcess: The result of the executed command """ - if not cluster_name or not machinepool_name: + if not cluster_name or not machinepool_id: raise ValueError("Both 'cluster_name' and 'machinepool_name' are required.") - cmd = f"rosa delete machinepool -c {shlex.quote(cluster_name)} {shlex.quote(machinepool_name)} --yes" + cmd = f"rosa delete machinepool -c {shlex.quote(cluster_name)} {shlex.quote(machinepool_id)} --yes" try: return exec_cmd(cmd) except CommandFailed as ex: logger.error( - f"Failed to delete machinepool '{machinepool_name}' from cluster '{cluster_name}': {ex}" + f"Failed to delete machinepool '{machinepool_id}' from cluster '{cluster_name}': {ex}" ) raise diff --git a/ocs_ci/ocs/node.py b/ocs_ci/ocs/node.py index 76964b854e7..fc779f2b410 100644 --- a/ocs_ci/ocs/node.py +++ b/ocs_ci/ocs/node.py @@ -500,7 +500,7 @@ def add_new_nodes_and_label_them_rosa_hcp( node_conf = node_conf or {} if kwargs.get("storage_nodes") and not node_conf: node_conf = { - "machinepool": config.ENV_DATA.get("machine_pool"), + "machinepool_id": config.ENV_DATA.get("machine_pool"), "instance_type": config.ENV_DATA.get("worker_instance_type"), } @@ -1046,7 +1046,7 @@ def delete_and_create_osd_node_managed_cp(osd_node_name): node_util.stop_nodes(osd_node_objs) node_util.terminate_nodes(osd_node_objs) machne_pools = MachinePools(config.ENV_DATA["cluster_name"]) - mp_filtered = machne_pools.filter(id=config.ENV_DATA["machine_pool"]) + mp_filtered = machne_pools.filter(machinepool_id=config.ENV_DATA["machine_pool"]) mp_filtered.wait_replicas_ready( target_replicas=node_num_before_delete, timeout=machine_start_timeout ) diff --git a/ocs_ci/ocs/platform_nodes.py b/ocs_ci/ocs/platform_nodes.py index 6d48030e36a..e3d1402534e 100644 --- a/ocs_ci/ocs/platform_nodes.py +++ b/ocs_ci/ocs/platform_nodes.py @@ -3536,7 +3536,7 @@ def create_and_attach_nodes_to_cluster( Args: node_conf (dict): node configuration - custom dict that we use in rosa create/edit machinepool e.g. - {'machinepool': '', + {'machinepool_id': '', 'instance_type': '', 'subnet': '', 'availability_zone': '', @@ -3556,8 +3556,6 @@ def create_and_attach_nodes_to_cluster( "https://docs.redhat.com/en/documentation/red_hat_openshift_service_on_aws/4/html/" "tutorials/getting-started-with-rosa#underlying-node-operating-system" ) - if num_nodes is None: - raise ValueError("num_nodes must be provided") node_conf = node_conf or {} @@ -3575,15 +3573,18 @@ def create_nodes(self, node_conf, node_type, num_nodes): num_nodes (int): Number of node instances to create. Returns: - list: List of AWSNode objects (no known use case for this for non UPI deploymen, which is ROSA HCP, - implemented due to common create_nodes signature) + list: List of (ROSAHCPNode) objects created by this method. """ node_list = [] cluster_name = config.ENV_DATA.get("cluster_name") node_conf = NodeConf(**node_conf) - machine_pool_id = node_conf.get("machinepool") + machinepool_id = node_conf.get("machinepool_id") machine_pools = MachinePools(cluster_name=cluster_name) - machine_pool = machine_pools.filter(id=machine_pool_id, pick_first=True) + machine_pool = machine_pools.filter( + machinepool_id=machinepool_id, + instance_type=node_conf.get("instance_type"), + pick_first=True, + ) if ( machine_pool.exist @@ -3591,30 +3592,32 @@ def create_nodes(self, node_conf, node_type, num_nodes): and machine_pool.instance_type != node_conf.get("instance_type") ): raise UnavailableResourceException( - f"MachinePool '{machine_pool_id}' " + f"MachinePool '{machinepool_id}' " "found with different instance type. " "The test brakes logic, aborting test." ) elif machine_pool.exist and machine_pool.instance_type == node_conf.get( "instance_type" ): - logger.info(f"MachinePool '{machine_pool_id}' found. Updating MachinePool") + logger.info(f"MachinePool '{machinepool_id}' found. Updating MachinePool") node_conf["replicas"] = machine_pool.replicas + num_nodes machine_pools.edit_machine_pool(node_conf, wait_ready=True) elif not machine_pool.exist: logger.info( - f"MachinePool '{machine_pool_id}' not found. Creating new MachinePool" + f"MachinePool '{machinepool_id}' not found. Creating new MachinePool" ) # create random machinepool name if not provided - node_conf["machinepool"] = machine_pool_id or "mp_" + random_string(3) + node_conf["machinepool_id"] = machinepool_id or "mp_" + random_string(3) node_conf["instance_type"] = ( node_conf.get("instance_type") or config.ENV_DATA["worker_instance_type"] ) - machine_pools.create_machine_pool(node_conf, num_nodes) + node_conf["replicas"] = num_nodes + machine_pools.create_machine_pool(node_conf) node_conf["zone"] = self.az.get_zone_number() for _ in range(num_nodes): + # adding created nodes to the list of a nodes returned by this method node_list.append(ROSAHCPNode(node_conf, constants.RHCOS)) return node_list diff --git a/tests/functional/z_cluster/cluster_expansion/conftest.py b/tests/functional/z_cluster/cluster_expansion/conftest.py index 4144f6f51c6..ade298fcfcf 100644 --- a/tests/functional/z_cluster/cluster_expansion/conftest.py +++ b/tests/functional/z_cluster/cluster_expansion/conftest.py @@ -25,12 +25,13 @@ def add_nodes(): Test for adding worker nodes to the cluster while IOs """ - def factory(ocs_nodes=False, node_count=3, taint_label=None): + def factory(ocs_nodes=False, node_count=3, taint_label=None, node_conf=None): """ Args: ocs_nodes (bool): True if new nodes are OCS, False otherwise node_count (int): Number of nodes to be added taint_label (str): Taint label to be added + node_conf (NodeConf): Node configuration """ @@ -55,6 +56,7 @@ def factory(ocs_nodes=False, node_count=3, taint_label=None): num_nodes=node_count, mark_for_ocs_label=ocs_nodes, storage_nodes=True, + node_conf=node_conf, ) ) else: diff --git a/tests/functional/z_cluster/cluster_expansion/test_nodes_rosa_hcp.py b/tests/functional/z_cluster/cluster_expansion/test_nodes_rosa_hcp.py new file mode 100644 index 00000000000..80df5295590 --- /dev/null +++ b/tests/functional/z_cluster/cluster_expansion/test_nodes_rosa_hcp.py @@ -0,0 +1,123 @@ +import random +import logging +import pytest + +from ocs_ci.framework import config +from ocs_ci.framework.pytest_customization.marks import ( + rosa_hcp_required, + tier4a, + polarion_id, + brown_squad, +) +from ocs_ci.framework.testlib import ManageTest +from ocs_ci.ocs.cluster import CephCluster +from ocs_ci.ocs.machinepool import NodeConf, MachinePools +from ocs_ci.ocs.node import unschedule_nodes, schedule_nodes, get_node_pods +from ocs_ci.ocs import node +from ocs_ci.ocs.resources.pod import get_osd_pods +from ocs_ci.utility.utils import get_random_str, ceph_health_check + +log = logging.getLogger(__name__) + + +def select_osd_node_name(): + """ + select randomly one of the osd nodes + + Returns: + str: the selected osd node name + + """ + osd_node_names = node.get_osd_running_nodes() + osd_node_name = random.choice(osd_node_names) + log.info(f"Selected OSD is {osd_node_name}") + return osd_node_name + + +def get_osd_pod_name(osd_node_name): + """ + get the osd pod name from the osd node name + + Args: + osd_node_name (str): the osd node name + + Returns: + Pod: the osd pod object + + """ + osd_pods = get_osd_pods() + osd_pod_name = get_node_pods(osd_node_name, osd_pods)[0] + log.info(f"OSD pod name is {osd_pod_name}") + return osd_pod_name + + +class TestAddDifferentInstanceTypeNode(ManageTest): + @pytest.fixture + def setup(self, request): + """ + Method to set test variables + """ + self.osd_node_name = select_osd_node_name() + self.osd_pod = get_osd_pod_name(self.osd_node_name) + self.machine_pool_new = f"workers-{get_random_str(3)}" + log.info(f"New machine pool name is {self.machine_pool_new}") + log.info(f"OSD node name is {self.osd_node_name}") + + def finalizer(): + """ + Teardown function to schedule initial node back + """ + schedule_nodes([self.osd_node_name]) + + request.addfinalizer(finalizer) + + @tier4a + @brown_squad + @rosa_hcp_required + @polarion_id("OCS-6270") + def test_add_ocs_node_non_default_machinepool(self, setup, add_nodes): + """ + Test to add 1 ocs node and wait till rebalance is completed + + Compute nodes minimal requirements are: + Compute node instance type m5.xlarge (4 vCPU 16, GiB RAM) + + Steps: + 1. Run create machinepool with node and label it with "openshif-storage" tag + 2. Select any node with osd and cordon it + 3. delete OSD pod on unscheduled node + 4. verify all OSD pods are running + 5. verify data rebalancing complete in reasonable time + """ + + instance_types = ["m5.xlarge", "m5.4xlarge", "m5.8xlarge", "m5.12xlarge"] + cluster_name = config.ENV_DATA["cluster_name"] + namespace = config.ENV_DATA["cluster_namespace"] + ceph_health_tries = 40 + machine_pools = MachinePools(cluster_name=cluster_name) + machine_pool = machine_pools.filter( + machinepool_id=config.ENV_DATA["machine_pool"], pick_first=True + ) + alt_inst_type = random.choice( + ( + [ + i_type + for i_type in instance_types + if i_type != machine_pool.instance_type + ] + ) + ) + + node_conf = NodeConf( + **{"machinepool_id": self.machine_pool_new, "instance_type": alt_inst_type} + ) + add_nodes(ocs_nodes=True, node_count=1, node_conf=node_conf) + + unschedule_nodes([self.osd_node_name]) + self.osd_pod.delete(wait=True) + + ceph_health_check(namespace=namespace, tries=ceph_health_tries, delay=60) + ceph_cluster_obj = CephCluster() + assert ceph_cluster_obj.wait_for_rebalance( + timeout=3600 + ), "Data re-balance failed to complete" From 051625cd4e4f08e01f9308849ae65863ad640414 Mon Sep 17 00:00:00 2001 From: Sidhant Agrawal Date: Thu, 5 Dec 2024 14:12:44 +0530 Subject: [PATCH 20/44] Remove / deprecate redundant Green Squad tests (#10959) Signed-off-by: Sidhant Agrawal --- .../pv/pv_services/test_pv_creation.py | 112 ------------------ .../pv_services/test_pvc_assign_pod_node.py | 8 +- .../test_rwo_pvc_fencing_unfencing.py | 4 +- .../storageclass/test_rbd_csi_default_sc.py | 93 --------------- 4 files changed, 8 insertions(+), 209 deletions(-) delete mode 100644 tests/functional/pv/pv_services/test_pv_creation.py delete mode 100644 tests/functional/storageclass/test_rbd_csi_default_sc.py diff --git a/tests/functional/pv/pv_services/test_pv_creation.py b/tests/functional/pv/pv_services/test_pv_creation.py deleted file mode 100644 index b8bbabb4fe5..00000000000 --- a/tests/functional/pv/pv_services/test_pv_creation.py +++ /dev/null @@ -1,112 +0,0 @@ -""" -A test for creating a PV -""" - -import logging -import os - -import pytest -import yaml - -from ocs_ci.framework import config -from ocs_ci.ocs import exceptions, ocp -from ocs_ci.ocs.constants import TEMPLATE_PV_PVC_DIR -from ocs_ci.framework.pytest_customization.marks import green_squad -from ocs_ci.framework.testlib import ManageTest -from ocs_ci.utility import templating, utils - -log = logging.getLogger(__name__) - -PV_YAML = os.path.join(TEMPLATE_PV_PVC_DIR, "PersistentVolume.yaml") -TEMP_YAML_FILE = "test.yaml" -VOLUME_DELETED = 'persistentvolume "{volume_name}" deleted' - - -OCP = ocp.OCP(kind="PersistentVolume", namespace=config.ENV_DATA["cluster_namespace"]) - - -@pytest.fixture(scope="class") -def test_fixture(request): - """ - Create disks - """ - self = request.node.cls - - def finalizer(): - teardown(self) - - request.addfinalizer(finalizer) - - -def teardown(self): - """ - Tearing down the environment - """ - if os.path.exists(TEMP_YAML_FILE): - assert delete_pv(self.pv_name) - assert not verify_pv_exist(self.pv_name) - utils.delete_file(TEMP_YAML_FILE) - - -def create_pv(pv_data): - """ - Create a new Persistent Volume - """ - file_y = templating.generate_yaml_from_jinja2_template_with_data(PV_YAML, **pv_data) - with open(TEMP_YAML_FILE, "w") as yaml_file: - yaml.dump(file_y, yaml_file, default_flow_style=False) - log.info("Creating new Persistent Volume") - assert OCP.create(yaml_file=TEMP_YAML_FILE) - return OCP.wait_for_resource( - resource_name=pv_data["pv_name"], condition="Available" - ) - - -def delete_pv(pv_name): - """ - Delete a Persistent Volume by given name - """ - log.info(f"Deleting the Persistent Volume {pv_name}") - stat = OCP.delete(TEMP_YAML_FILE) - if stat in VOLUME_DELETED.format(volume_name=pv_name): - return True - return False - - -def verify_pv_exist(pv_name): - """ - Verify a Persistent Volume exists by a given name - """ - try: - OCP.get(pv_name) - except exceptions.CommandFailed: - log.info(f"PV {pv_name} doesn't exist") - return False - log.info(f"PV {pv_name} exist") - return True - - -# @tier1 -# Test case is disabled. -# The Recycle reclaim policy is deprecated in OpenShift Container Platform 4. -# Dynamic provisioning is recommended for equivalent and better functionality. -@green_squad -@pytest.mark.usefixtures( - test_fixture.__name__, -) -class TestPvCreation(ManageTest): - """ - Testing PV creation - """ - - pv_data = {} - pv_name = "my-pv1" - pv_data["pv_name"] = pv_name - pv_data["pv_size"] = "3Gi" - - def test_pv_creation(self): - """ - Test PV creation - """ - assert create_pv(self.pv_data) - assert verify_pv_exist(self.pv_name) diff --git a/tests/functional/pv/pv_services/test_pvc_assign_pod_node.py b/tests/functional/pv/pv_services/test_pvc_assign_pod_node.py index 85776d9045d..aea62d65b9e 100644 --- a/tests/functional/pv/pv_services/test_pvc_assign_pod_node.py +++ b/tests/functional/pv/pv_services/test_pvc_assign_pod_node.py @@ -63,7 +63,9 @@ def verify_access_token_notin_odf_pod_logs(self): ), ], ) - def test_rwo_pvc_assign_pod_node(self, interface, pvc_factory, teardown_factory): + def deprecated_test_rwo_pvc_assign_pod_node( + self, interface, pvc_factory, teardown_factory + ): """ Test assign nodeName to a pod using RWO pvc """ @@ -130,7 +132,9 @@ def test_rwo_pvc_assign_pod_node(self, interface, pvc_factory, teardown_factory) ), ], ) - def test_rwx_pvc_assign_pod_node(self, interface, pvc_factory, teardown_factory): + def deprecated_test_rwx_pvc_assign_pod_node( + self, interface, pvc_factory, teardown_factory + ): """ Test assign nodeName to a pod using RWX pvc """ diff --git a/tests/functional/pv/pv_services/test_rwo_pvc_fencing_unfencing.py b/tests/functional/pv/pv_services/test_rwo_pvc_fencing_unfencing.py index c668568608c..b6f2e189d30 100644 --- a/tests/functional/pv/pv_services/test_rwo_pvc_fencing_unfencing.py +++ b/tests/functional/pv/pv_services/test_rwo_pvc_fencing_unfencing.py @@ -582,7 +582,7 @@ def verify_multi_attach_error(self, pod_list): ), ], ) - def test_rwo_pvc_fencing_node_short_network_failure( + def deprecated_test_rwo_pvc_fencing_node_short_network_failure( self, nodes, setup, node_restart_teardown ): """ @@ -728,7 +728,7 @@ def test_rwo_pvc_fencing_node_short_network_failure( ), ], ) - def test_rwo_pvc_fencing_node_prolonged_network_failure( + def deprecated_test_rwo_pvc_fencing_node_prolonged_network_failure( self, nodes, setup, node_restart_teardown ): """ diff --git a/tests/functional/storageclass/test_rbd_csi_default_sc.py b/tests/functional/storageclass/test_rbd_csi_default_sc.py deleted file mode 100644 index 3bb0d83fbf0..00000000000 --- a/tests/functional/storageclass/test_rbd_csi_default_sc.py +++ /dev/null @@ -1,93 +0,0 @@ -""" -Basic test for creating PVC with default StorageClass - RBD-CSI -""" - -import logging -import pytest - -from ocs_ci.framework.pytest_customization.marks import green_squad -from ocs_ci.framework.testlib import tier1, ManageTest, skipif_external_mode -from ocs_ci.helpers import helpers -from ocs_ci.ocs import constants -from ocs_ci.ocs.exceptions import ResourceLeftoversException -from tests.fixtures import ( - create_ceph_block_pool, - create_rbd_secret, -) - -log = logging.getLogger(__name__) - - -@pytest.fixture() -def resources(request): - """ - Delete the resources created during the test - Returns: - tuple: empty lists of resources - """ - pod, pvc, storageclass = ([] for _ in range(3)) - - def finalizer(): - """ - Delete the resources created during the test - """ - failed_to_delete = [] - for resource_type in pod, pvc, storageclass: - for resource in resource_type: - resource.delete() - try: - resource.ocp.wait_for_delete(resource.name) - except TimeoutError: - failed_to_delete.append(resource) - if resource.kind == constants.PVC: - log.info("Checking whether PV is deleted") - assert helpers.validate_pv_delete(resource.backed_pv) - if failed_to_delete: - raise ResourceLeftoversException( - f"Failed to delete resources: {failed_to_delete}" - ) - - request.addfinalizer(finalizer) - - return pod, pvc, storageclass - - -@green_squad -@skipif_external_mode -@tier1 -@pytest.mark.usefixtures( - create_ceph_block_pool.__name__, - create_rbd_secret.__name__, -) -@pytest.mark.polarion_id("OCS-347") -class TestBasicPVCOperations(ManageTest): - """ - Testing default storage class creation and pvc creation - with rbd pool - """ - - def test_ocs_347(self, resources): - pod, pvc, storageclass = resources - - log.info("Creating RBD StorageClass") - storageclass.append( - helpers.create_storage_class( - interface_type=constants.CEPHBLOCKPOOL, - interface_name=self.cbp_obj.name, - secret_name=self.rbd_secret_obj.name, - ) - ) - log.info("Creating a PVC") - pvc.append(helpers.create_pvc(sc_name=storageclass[0].name)) - for pvc_obj in pvc: - helpers.wait_for_resource_state(pvc_obj, constants.STATUS_BOUND) - pvc_obj.reload() - log.info(f"Creating a pod on with pvc {pvc[0].name}") - pod_obj = helpers.create_pod( - interface_type=constants.CEPHBLOCKPOOL, - pvc_name=pvc[0].name, - pod_dict_path=constants.NGINX_POD_YAML, - ) - pod.append(pod_obj) - helpers.wait_for_resource_state(pod_obj, constants.STATUS_RUNNING) - pod_obj.reload() From e17d1e9b3e944eebf9a75aa099901d93b8e7c8a3 Mon Sep 17 00:00:00 2001 From: Daniel Osypenko Date: Thu, 5 Dec 2024 11:05:35 +0200 Subject: [PATCH 21/44] fetch storage ns from config (#10951) * fetch storage ns from config Signed-off-by: Daniel Osypenko --- ocs_ci/deployment/hosted_cluster.py | 8 ++++---- .../provider_client/storage_client_deployment.py | 2 +- ocs_ci/helpers/helpers.py | 2 +- ocs_ci/ocs/bucket_utils.py | 2 +- ocs_ci/ocs/replica_one.py | 7 ++++--- ocs_ci/ocs/resources/pod.py | 8 ++++---- ocs_ci/ocs/resources/stretchcluster.py | 3 ++- ocs_ci/ocs/ui/validation_ui.py | 6 +++++- ocs_ci/utility/tests/test_prometheus.py | 5 +++-- tests/conftest.py | 8 ++++---- tests/cross_functional/ui/test_odf_topology.py | 4 +--- .../test_nfs_feature_enable_for_ODF_clusters.py | 4 ++-- .../object/mcg/test_bucket_delete_using_obc_creds.py | 4 ++-- tests/functional/object/mcg/test_multi_region.py | 3 ++- .../functional/object/mcg/test_noobaa_db_pg_expansion.py | 5 +++-- tests/functional/object/mcg/test_pv_pool.py | 2 +- tests/functional/object/mcg/test_s3_regenerate_creds.py | 6 +++--- .../functional/object/mcg/test_virtual_hosted_buckets.py | 5 ++--- .../functional/pod_and_daemons/test_csi_logs_rotation.py | 4 ++-- .../functional/pv/add_metadata_feature/test_metadata.py | 8 +++++--- .../pv/pv_services/test_cr_resources_validation.py | 4 +++- tests/functional/storageclass/test_replica1.py | 9 ++++++--- 22 files changed, 61 insertions(+), 48 deletions(-) diff --git a/ocs_ci/deployment/hosted_cluster.py b/ocs_ci/deployment/hosted_cluster.py index 066ea0b292f..1d9f76e6ef8 100644 --- a/ocs_ci/deployment/hosted_cluster.py +++ b/ocs_ci/deployment/hosted_cluster.py @@ -917,7 +917,7 @@ def get_onboarding_key(self): str: onboarding token key """ secret_ocp_obj = ocp.OCP( - kind=constants.SECRET, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE + kind=constants.SECRET, namespace=config.ENV_DATA["cluster_namespace"] ) key = ( @@ -1160,7 +1160,7 @@ def get_provider_address(self): """ Get the provider address """ - ocp = OCP(namespace=constants.OPENSHIFT_STORAGE_NAMESPACE) + ocp = OCP(namespace=config.ENV_DATA["cluster_namespace"]) storage_provider_endpoint = ocp.exec_oc_cmd( ( "get storageclusters.ocs.openshift.io -o jsonpath={'.items[*].status.storageProviderEndpoint'}" @@ -1210,7 +1210,7 @@ def storage_claim_exists_cephfs(self): else: ocp = OCP( kind=constants.STORAGECLAIM, - namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, + namespace=config.ENV_DATA["cluster_namespace"], cluster_kubeconfig=self.cluster_kubeconfig, ) @@ -1297,7 +1297,7 @@ def storage_claim_exists_rbd(self): else: ocp = OCP( kind=constants.STORAGECLAIM, - namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, + namespace=config.ENV_DATA["cluster_namespace"], cluster_kubeconfig=self.cluster_kubeconfig, ) diff --git a/ocs_ci/deployment/provider_client/storage_client_deployment.py b/ocs_ci/deployment/provider_client/storage_client_deployment.py index 84cf6bd118e..2af97a2205d 100644 --- a/ocs_ci/deployment/provider_client/storage_client_deployment.py +++ b/ocs_ci/deployment/provider_client/storage_client_deployment.py @@ -271,7 +271,7 @@ def provider_and_native_client_installation( if self.ocs_version >= version.VERSION_4_16: # Validate native client is created in openshift-storage namespace self.deployment.wait_for_csv( - self.ocs_client_operator, constants.OPENSHIFT_STORAGE_NAMESPACE + self.ocs_client_operator, config.ENV_DATA["cluster_namespace"] ) # Verify native storageclient is created successfully diff --git a/ocs_ci/helpers/helpers.py b/ocs_ci/helpers/helpers.py index 86f17363e18..e3fca3a7176 100644 --- a/ocs_ci/helpers/helpers.py +++ b/ocs_ci/helpers/helpers.py @@ -615,7 +615,7 @@ def create_ceph_block_pool( def create_ceph_file_system( - cephfs_name=None, label=None, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE + cephfs_name=None, label=None, namespace=config.ENV_DATA["cluster_namespace"] ): """ Create a Ceph file system diff --git a/ocs_ci/ocs/bucket_utils.py b/ocs_ci/ocs/bucket_utils.py index 19236b8bdcd..9b6089f110b 100644 --- a/ocs_ci/ocs/bucket_utils.py +++ b/ocs_ci/ocs/bucket_utils.py @@ -1090,7 +1090,7 @@ def check_pv_backingstore_status( def check_pv_backingstore_type( backingstore_name=constants.DEFAULT_NOOBAA_BACKINGSTORE, - namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, + namespace=config.ENV_DATA["cluster_namespace"], ): """ check if existing pv backing store is in READY state diff --git a/ocs_ci/ocs/replica_one.py b/ocs_ci/ocs/replica_one.py index 91b310d72be..5b6c050a9ce 100644 --- a/ocs_ci/ocs/replica_one.py +++ b/ocs_ci/ocs/replica_one.py @@ -13,7 +13,6 @@ from ocs_ci.ocs.constants import ( DEFAULT_CEPHBLOCKPOOL, DEFAULT_STORAGE_CLUSTER, - OPENSHIFT_STORAGE_NAMESPACE, OSD_APP_LABEL, CEPHBLOCKPOOL, STORAGECLASS, @@ -141,7 +140,9 @@ def scaledown_deployment(deployment_names: list[str]) -> None: """ log.info("Starts Scaledown deployments") - deployment_obj = OCP(kind=DEPLOYMENT, namespace=OPENSHIFT_STORAGE_NAMESPACE) + deployment_obj = OCP( + kind=DEPLOYMENT, namespace=config.ENV_DATA["cluster_namespace"] + ) for deployment in deployment_names: deployment_obj.exec_oc_cmd(f"scale deployment {deployment} --replicas=0") log.info(f"scaling to 0: {deployment}") @@ -221,7 +222,7 @@ def modify_replica1_osd_count(new_osd_count): """ storage_cluster = OCP(kind=STORAGECLUSTER, name=DEFAULT_STORAGE_CLUSTER) storage_cluster.exec_oc_cmd( - f"patch storagecluster {DEFAULT_STORAGE_CLUSTER} -n {OPENSHIFT_STORAGE_NAMESPACE} " + f"patch storagecluster {DEFAULT_STORAGE_CLUSTER} -n {config.ENV_DATA['cluster_namespace']} " f'--type json --patch \'[{{"op": "replace", "path": ' f'"/spec/managedResources/cephNonResilientPools/count", "value": {new_osd_count} }}]\'' ) diff --git a/ocs_ci/ocs/resources/pod.py b/ocs_ci/ocs/resources/pod.py index 69e4f626781..5e9398a1eb5 100644 --- a/ocs_ci/ocs/resources/pod.py +++ b/ocs_ci/ocs/resources/pod.py @@ -796,7 +796,7 @@ def get_ceph_tools_pod( cluster_kubeconfig = config.ENV_DATA.get("provider_kubeconfig", "") if cluster_kubeconfig: - namespace = constants.OPENSHIFT_STORAGE_NAMESPACE + namespace = config.ENV_DATA["cluster_namespace"] else: namespace = namespace or config.ENV_DATA["cluster_namespace"] @@ -1574,7 +1574,7 @@ def run_io_and_verify_mount_point(pod_obj, bs="10M", count="950"): def get_pods_having_label( label, - namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, + namespace=config.ENV_DATA["cluster_namespace"], retry=0, cluster_config=None, statuses=None, @@ -3727,7 +3727,7 @@ def get_mon_pod_by_pvc_name(pvc_name: str): return Pod(**mon_pod_ocp) -def get_debug_pods(debug_nodes, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE): +def get_debug_pods(debug_nodes, namespace=config.ENV_DATA["cluster_namespace"]): """ Get debug pods created for the nodes in debug @@ -3752,7 +3752,7 @@ def get_debug_pods(debug_nodes, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE) def wait_for_pods_deletion( - label, timeout=120, sleep=5, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE + label, timeout=120, sleep=5, namespace=config.ENV_DATA["cluster_namespace"] ): """ Wait for the pods with particular label to be deleted diff --git a/ocs_ci/ocs/resources/stretchcluster.py b/ocs_ci/ocs/resources/stretchcluster.py index 91273f8a5ff..0d7f52b8d13 100644 --- a/ocs_ci/ocs/resources/stretchcluster.py +++ b/ocs_ci/ocs/resources/stretchcluster.py @@ -5,6 +5,7 @@ from datetime import timedelta +from ocs_ci.framework import config from ocs_ci.ocs.resources import pod from ocs_ci.ocs.node import get_nodes_having_label, get_ocs_nodes, get_node_objs from ocs_ci.ocs.resources.ocs import OCS @@ -542,7 +543,7 @@ def reset_conn_score(self): Reset connection scores for all the mon's """ - mon_pods = get_mon_pods(namespace=constants.OPENSHIFT_STORAGE_NAMESPACE) + mon_pods = get_mon_pods(namespace=config.ENV_DATA["cluster_namespace"]) for pod_obj in mon_pods: mon_pod_id = get_mon_pod_id(pod_obj) cmd = f"ceph daemon mon.{mon_pod_id} connection scores reset" diff --git a/ocs_ci/ocs/ui/validation_ui.py b/ocs_ci/ocs/ui/validation_ui.py index ccfc28ead72..c6d2819bc6f 100644 --- a/ocs_ci/ocs/ui/validation_ui.py +++ b/ocs_ci/ocs/ui/validation_ui.py @@ -98,7 +98,11 @@ def verify_ocs_operator_tabs(self): ) logger.info("Verify Details tab on OCS operator") - strings_details_tab = ["Description", "Succeeded", "openshift-storage"] + strings_details_tab = [ + "Description", + "Succeeded", + config.ENV_DATA["cluster_namespace"], + ] self.verify_page_contain_strings( strings_on_page=strings_details_tab, page_name="details_tab" ) diff --git a/ocs_ci/utility/tests/test_prometheus.py b/ocs_ci/utility/tests/test_prometheus.py index 971f95f75ff..41c8130ef18 100644 --- a/ocs_ci/utility/tests/test_prometheus.py +++ b/ocs_ci/utility/tests/test_prometheus.py @@ -2,6 +2,7 @@ import pytest +from ocs_ci.framework import config from ocs_ci.utility.prometheus import check_query_range_result_enum @@ -22,7 +23,7 @@ def query_range_result_ok(): "endpoint": "http-metrics", "instance": "10.131.0.36:9283", "job": "rook-ceph-mgr", - "namespace": "openshift-storage", + "namespace": config.ENV_DATA["cluster_namespace"], "pod": "rook-ceph-mgr-a-66df496d9d-snssn", "service": "rook-ceph-mgr", }, @@ -52,7 +53,7 @@ def query_range_result_ok(): "endpoint": "http-metrics", "instance": "10.131.0.36:9283", "job": "rook-ceph-mgr", - "namespace": "openshift-storage", + "namespace": config.ENV_DATA["cluster_namespace"], "pod": "rook-ceph-mgr-a-66df496d9d-snssn", "service": "rook-ceph-mgr", }, diff --git a/tests/conftest.py b/tests/conftest.py index 6bbc7b5ec83..8820bac797b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,7 +24,7 @@ from ocs_ci.deployment import factory as dep_factory from ocs_ci.deployment.helpers.hypershift_base import HyperShiftBase from ocs_ci.deployment.hosted_cluster import HostedClients -from ocs_ci.framework import config as ocsci_config, Config +from ocs_ci.framework import config as ocsci_config, Config, config import ocs_ci.framework.pytest_customization.marks from ocs_ci.framework.pytest_customization.marks import ( deployment, @@ -8029,7 +8029,7 @@ def factory(min_ep_count=3, max_ep_count=3, cpu=6, memory="10Gi"): storagecluster_obj = OCP( kind=constants.STORAGECLUSTER, resource_name=constants.DEFAULT_STORAGE_CLUSTER, - namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, + namespace=config.ENV_DATA["cluster_namespace"], ) scale_endpoint_pods_param = ( @@ -8346,7 +8346,7 @@ def factory(pv_size="50"): get_pods_having_label( label=label, retry=5, - namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, + namespace=config.ENV_DATA["cluster_namespace"], ) ) @@ -8371,7 +8371,7 @@ def finalizer(): get_pods_having_label( label=label, retry=5, - namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, + namespace=config.ENV_DATA["cluster_namespace"], ) ) diff --git a/tests/cross_functional/ui/test_odf_topology.py b/tests/cross_functional/ui/test_odf_topology.py index b99a2adee04..0945675d717 100644 --- a/tests/cross_functional/ui/test_odf_topology.py +++ b/tests/cross_functional/ui/test_odf_topology.py @@ -123,9 +123,7 @@ def test_validate_topology_configuration( interface=constants.CEPHBLOCKPOOL, access_mode=constants.ACCESS_MODE_RWO, status=constants.STATUS_BOUND, - project=OCP( - kind="Project", namespace=constants.OPENSHIFT_STORAGE_NAMESPACE - ), + project=OCP(kind="Project", namespace=config.ENV_DATA["cluster_namespace"]), ) pod_obj = helpers.create_pod( interface_type=constants.CEPHBLOCKPOOL, diff --git a/tests/functional/nfs_feature/test_nfs_feature_enable_for_ODF_clusters.py b/tests/functional/nfs_feature/test_nfs_feature_enable_for_ODF_clusters.py index a84eeea14d4..9e198aa0d85 100644 --- a/tests/functional/nfs_feature/test_nfs_feature_enable_for_ODF_clusters.py +++ b/tests/functional/nfs_feature/test_nfs_feature_enable_for_ODF_clusters.py @@ -64,7 +64,7 @@ def test_nfs_not_enabled_by_default(self): """ storage_cluster_obj = ocp.OCP( - kind="Storagecluster", namespace="openshift-storage" + kind="Storagecluster", namespace=config.ENV_DATA["cluster_namespace"] ) # Checks cephnfs resources not available by default cephnfs_resource = storage_cluster_obj.exec_oc_cmd("get cephnfs") @@ -125,7 +125,7 @@ def setup_teardown(self, request): """ self = request.node.cls log.info("-----Setup-----") - self.namespace = "openshift-storage" + self.namespace = config.ENV_DATA["cluster_namespace"] self.storage_cluster_obj = ocp.OCP( kind="Storagecluster", namespace=self.namespace ) diff --git a/tests/functional/object/mcg/test_bucket_delete_using_obc_creds.py b/tests/functional/object/mcg/test_bucket_delete_using_obc_creds.py index 9e9ba73b2f1..59d5d68a724 100644 --- a/tests/functional/object/mcg/test_bucket_delete_using_obc_creds.py +++ b/tests/functional/object/mcg/test_bucket_delete_using_obc_creds.py @@ -2,6 +2,7 @@ import boto3 import logging +from ocs_ci.framework import config from ocs_ci.framework.pytest_customization.marks import ( tier2, bugzilla, @@ -15,7 +16,6 @@ from ocs_ci.ocs.ocp import OCP import botocore.exceptions as boto3exception from ocs_ci.ocs.constants import ( - OPENSHIFT_STORAGE_NAMESPACE, SECRET, ) from ocs_ci.ocs.exceptions import UnexpectedBehaviour @@ -41,7 +41,7 @@ def test_bucket_delete_using_obc_creds(mcg_obj, bucket_factory): logger.info("Creating OBC") bucket = bucket_factory(amount=1, interface="OC")[0].name # Fetch OBC credentials - secret_ocp_obj = OCP(kind=SECRET, namespace=OPENSHIFT_STORAGE_NAMESPACE) + secret_ocp_obj = OCP(kind=SECRET, namespace=config.ENV_DATA["cluster_namespace"]) obc_secret_obj = secret_ocp_obj.get(bucket) obc_access_key = base64.b64decode( obc_secret_obj.get("data").get("AWS_ACCESS_KEY_ID") diff --git a/tests/functional/object/mcg/test_multi_region.py b/tests/functional/object/mcg/test_multi_region.py index 9a7fb7544d2..c05d1d3da45 100644 --- a/tests/functional/object/mcg/test_multi_region.py +++ b/tests/functional/object/mcg/test_multi_region.py @@ -2,6 +2,7 @@ import pytest +from ocs_ci.framework import config from ocs_ci.framework.pytest_customization.marks import ( tier1, tier4a, @@ -200,7 +201,7 @@ def test_multiregion_spread_to_mirror( bucket = bucket_factory(1, "OC", bucketclass=bucket_class)[0] bucketclass_obj = ocp.OCP( kind=constants.BUCKETCLASS, - namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, + namespace=config.ENV_DATA["cluster_namespace"], resource_name=bucket.bucketclass.name, ) # Patch bucket class to update placement from "Spread" to "Mirror" diff --git a/tests/functional/object/mcg/test_noobaa_db_pg_expansion.py b/tests/functional/object/mcg/test_noobaa_db_pg_expansion.py index 4a753d1010b..01d41b8697a 100644 --- a/tests/functional/object/mcg/test_noobaa_db_pg_expansion.py +++ b/tests/functional/object/mcg/test_noobaa_db_pg_expansion.py @@ -1,5 +1,6 @@ import logging +from ocs_ci.framework import config from ocs_ci.utility import utils from ocs_ci.framework.pytest_customization.marks import ( vsphere_platform_required, @@ -40,7 +41,7 @@ def test_noobaa_db_pg_expansion(self, scale_noobaa_db_pod_pv_size): try: ceph_toolbox = get_ceph_tools_pod( - namespace=constants.OPENSHIFT_STORAGE_NAMESPACE + namespace=config.ENV_DATA["cluster_namespace"] ) except (AssertionError, CephToolBoxNotFoundException) as ex: raise CommandFailed(ex) @@ -74,7 +75,7 @@ def test_noobaa_db_pg_expansion(self, scale_noobaa_db_pod_pv_size): # Verify default backingstore is in ready state or not default_bs = OCP( - kind=constants.BACKINGSTORE, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE + kind=constants.BACKINGSTORE, namespace=config.ENV_DATA["cluster_namespace"] ).get(resource_name=constants.DEFAULT_NOOBAA_BACKINGSTORE) assert ( default_bs["status"]["phase"] == constants.STATUS_READY diff --git a/tests/functional/object/mcg/test_pv_pool.py b/tests/functional/object/mcg/test_pv_pool.py index a838d0dd3fe..88416e74e73 100644 --- a/tests/functional/object/mcg/test_pv_pool.py +++ b/tests/functional/object/mcg/test_pv_pool.py @@ -373,7 +373,7 @@ def test_pvpool_bs_in_fips(self, backingstore_factory): # the backingstore has reached Rejected state pv_bs_obj = OCP( kind=constants.BACKINGSTORE, - namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, + namespace=config.ENV_DATA["cluster_namespace"], resource_name=pv_backingstore.name, ) assert pv_bs_obj.wait_for_resource( diff --git a/tests/functional/object/mcg/test_s3_regenerate_creds.py b/tests/functional/object/mcg/test_s3_regenerate_creds.py index 52a77e7f4c8..56170e72cda 100644 --- a/tests/functional/object/mcg/test_s3_regenerate_creds.py +++ b/tests/functional/object/mcg/test_s3_regenerate_creds.py @@ -1,5 +1,6 @@ import logging +from ocs_ci.framework import config from ocs_ci.framework.pytest_customization.marks import ( tier2, bugzilla, @@ -9,7 +10,6 @@ mcg, ) from ocs_ci.ocs.ocp import OCP -from ocs_ci.ocs import constants logger = logging.getLogger(__name__) @@ -37,14 +37,14 @@ def test_s3_regenerate_creds(mcg_obj, project_factory): logger.info(f"Creating OBC {obc_name}") mcg_obj.exec_mcg_cmd( cmd=f"obc create {obc_name} --app-namespace {proj_name}", - namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, + namespace=config.ENV_DATA["cluster_namespace"], ) ocp_obj.get(resource_name=obc_name) # regenerate credential mcg_obj.exec_mcg_cmd( cmd=f"obc regenerate {obc_name} --app-namespace {proj_name}", - namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, + namespace=config.ENV_DATA["cluster_namespace"], use_yes=True, ) logger.info("Successfully regenerated s3 credentials") diff --git a/tests/functional/object/mcg/test_virtual_hosted_buckets.py b/tests/functional/object/mcg/test_virtual_hosted_buckets.py index 127dafbc8c5..f38a99798ea 100644 --- a/tests/functional/object/mcg/test_virtual_hosted_buckets.py +++ b/tests/functional/object/mcg/test_virtual_hosted_buckets.py @@ -1,9 +1,8 @@ import logging - +from ocs_ci.framework import config from ocs_ci.ocs.ocp import OCP from ocs_ci.ocs.resources.ocs import OCS -from ocs_ci.ocs import constants from ocs_ci.ocs.bucket_utils import ( verify_s3_object_integrity, write_random_objects_in_pod, @@ -45,7 +44,7 @@ def test_virtual_hosted_bucket( # create a route for the bucket create above s3_route_data = OCP( kind="route", - namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, + namespace=config.ENV_DATA["cluster_namespace"], resource_name="s3", ).get() host_base = f'{s3_route_data["spec"]["host"]}' diff --git a/tests/functional/pod_and_daemons/test_csi_logs_rotation.py b/tests/functional/pod_and_daemons/test_csi_logs_rotation.py index afa1160db0b..3823008abe4 100644 --- a/tests/functional/pod_and_daemons/test_csi_logs_rotation.py +++ b/tests/functional/pod_and_daemons/test_csi_logs_rotation.py @@ -2,9 +2,9 @@ import logging import pytest +from ocs_ci.framework import config from ocs_ci.framework.testlib import BaseTest from ocs_ci.ocs.resources import pod -from ocs_ci.ocs.constants import OPENSHIFT_STORAGE_NAMESPACE from ocs_ci.framework.pytest_customization.marks import ( brown_squad, tier2, @@ -161,7 +161,7 @@ def test_pods_csi_log_rotation( """ csi_interface_plugin_pod_objs = pod.get_all_pods( - namespace=OPENSHIFT_STORAGE_NAMESPACE, selector=[pod_selector] + namespace=config.ENV_DATA["cluster_namespace"], selector=[pod_selector] ) # check on the first pod diff --git a/tests/functional/pv/add_metadata_feature/test_metadata.py b/tests/functional/pv/add_metadata_feature/test_metadata.py index 22c87a80463..bccd6c60045 100644 --- a/tests/functional/pv/add_metadata_feature/test_metadata.py +++ b/tests/functional/pv/add_metadata_feature/test_metadata.py @@ -68,7 +68,7 @@ def test_metadata_feature_unavailable_for_previous_versions( config_map_obj = ocp.OCP( kind="Configmap", namespace=config.ENV_DATA["cluster_namespace"] ) - pod_obj = ocp.OCP(kind="Pod", namespace="openshift-storage") + pod_obj = ocp.OCP(kind="Pod", namespace=config.ENV_DATA["cluster_namespace"]) toolbox = pod.get_ceph_tools_pod() project_factory_class(project_name="test-metadata") enable_metadata = '{"data":{"CSI_ENABLE_METADATA": "true"}}' @@ -179,8 +179,10 @@ def test_metadata_not_enabled_by_default( fs, sc_name = metadata_utils.update_testdata_for_external_modes( sc_name, fs, external_mode=external_mode ) - config_map_obj = ocp.OCP(kind="Configmap", namespace="openshift-storage") - pod_obj = ocp.OCP(kind="Pod", namespace="openshift-storage") + config_map_obj = ocp.OCP( + kind="Configmap", namespace=config.ENV_DATA["cluster_namespace"] + ) + pod_obj = ocp.OCP(kind="Pod", namespace=config.ENV_DATA["cluster_namespace"]) toolbox = pod.get_ceph_tools_pod() # enable metadata flag not available by default metadata_flag = config_map_obj.exec_oc_cmd( diff --git a/tests/functional/pv/pv_services/test_cr_resources_validation.py b/tests/functional/pv/pv_services/test_cr_resources_validation.py index 8ead04c58f1..1324d769acc 100644 --- a/tests/functional/pv/pv_services/test_cr_resources_validation.py +++ b/tests/functional/pv/pv_services/test_cr_resources_validation.py @@ -4,6 +4,8 @@ import yaml from tempfile import NamedTemporaryFile + +from ocs_ci.framework import config from ocs_ci.framework.pytest_customization.marks import bugzilla, green_squad from ocs_ci.framework.testlib import ( skipif_ocp_version, @@ -66,7 +68,7 @@ def cr_resource_not_editable( cr_resource_yaml, non_editable_patches, editable_patches, - namespace="openshift-storage", + namespace=config.ENV_DATA["cluster_namespace"], ): """ Test that cr object is not editable once created diff --git a/tests/functional/storageclass/test_replica1.py b/tests/functional/storageclass/test_replica1.py index ef1e4cfae28..b30a6f23f80 100644 --- a/tests/functional/storageclass/test_replica1.py +++ b/tests/functional/storageclass/test_replica1.py @@ -25,7 +25,6 @@ VOLUME_MODE_BLOCK, CSI_RBD_RAW_BLOCK_POD_YAML, DEFALUT_DEVICE_CLASS, - OPENSHIFT_STORAGE_NAMESPACE, ) from ocs_ci.helpers.helpers import create_pvc from ocs_ci.utility.utils import validate_dict_values, compare_dictionaries @@ -92,7 +91,9 @@ def replica1_setup(self): for osd in osd_names: pod = OCP( - kind=POD, namespace=OPENSHIFT_STORAGE_NAMESPACE, resource_name=osd + kind=POD, + namespace=config.ENV_DATA["cluster_namespace"], + resource_name=osd, ) pod.wait_for_resource(condition=STATUS_RUNNING, column="STATUS") @@ -103,7 +104,9 @@ def replica1_teardown(self, request, replica1_setup): yield log.info("Teardown function called") storage_cluster = replica1_setup - cephblockpools = OCP(kind=CEPHBLOCKPOOL, namespace=OPENSHIFT_STORAGE_NAMESPACE) + cephblockpools = OCP( + kind=CEPHBLOCKPOOL, namespace=config.ENV_DATA["cluster_namespace"] + ) set_non_resilient_pool(storage_cluster, enable=False) delete_replica_1_sc() log.info("StorageClass Deleted") From c402bca85c439a192f17358545854646944b486c Mon Sep 17 00:00:00 2001 From: Sidhant Agrawal Date: Thu, 5 Dec 2024 16:00:00 +0530 Subject: [PATCH 22/44] Consolidate tests and fix intermittent failures (#10958) - Combined test_run_io_multiple_dc_pods.py and test_run_io_multiple_pods.py - Fixed intermittent failures in test_raw_block_pv and test_create_resize_delete_pvc for improved stability Signed-off-by: Sidhant Agrawal --- .../pv/pv_services/test_raw_block_pv.py | 3 + .../test_run_io_multiple_dc_pods.py | 73 ------------------- .../pv_services/test_run_io_multiple_pods.py | 15 ++-- tests/functional/ui/test_pvc_ui.py | 7 +- 4 files changed, 19 insertions(+), 79 deletions(-) delete mode 100644 tests/functional/pv/pv_services/test_run_io_multiple_dc_pods.py diff --git a/tests/functional/pv/pv_services/test_raw_block_pv.py b/tests/functional/pv/pv_services/test_raw_block_pv.py index 5350e7c7b25..8ea2a465afb 100644 --- a/tests/functional/pv/pv_services/test_raw_block_pv.py +++ b/tests/functional/pv/pv_services/test_raw_block_pv.py @@ -134,6 +134,7 @@ def raw_block_pv(self): storage_type=storage_type, size=f"{random.randint(10,200)}M", invalidate=0, + direct=1, ) for pod in pvc_gb_pods: log.info(f"running io on pod {pod.name}") @@ -142,6 +143,7 @@ def raw_block_pv(self): storage_type=storage_type, size=f"{random.randint(1,5)}G", invalidate=0, + direct=1, ) for pod in pvc_tb_pods: log.info(f"running io on pod {pod.name}") @@ -150,6 +152,7 @@ def raw_block_pv(self): storage_type=storage_type, size=f"{random.randint(10,15)}G", invalidate=0, + direct=1, ) for pod in pods: diff --git a/tests/functional/pv/pv_services/test_run_io_multiple_dc_pods.py b/tests/functional/pv/pv_services/test_run_io_multiple_dc_pods.py deleted file mode 100644 index 66c6804e74b..00000000000 --- a/tests/functional/pv/pv_services/test_run_io_multiple_dc_pods.py +++ /dev/null @@ -1,73 +0,0 @@ -import pytest -from ocs_ci.ocs.resources.pod import get_fio_rw_iops -from ocs_ci.ocs import constants -from ocs_ci.framework.pytest_customization.marks import green_squad -from ocs_ci.framework.testlib import ManageTest, tier2 - - -@green_squad -@tier2 -@pytest.mark.parametrize( - argnames=["interface"], - argvalues=[ - pytest.param( - constants.CEPHBLOCKPOOL, marks=pytest.mark.polarion_id("OCS-1284") - ), - pytest.param( - constants.CEPHFILESYSTEM, marks=pytest.mark.polarion_id("OCS-1285") - ), - ], -) -class TestRunIOMultipleDcPods(ManageTest): - """ - Run IO on multiple dc pods in parallel - - Steps: - 1:- Create project - 2:- Create serviceaccount - 3:- Add serviceaccount user to privileged policy - 4:- Create storageclass - 5:- Create PVC - 6:- Create pod with kind deploymentconfig - 7:- Add serviceaccount in yaml - 8:- Add privileged as True under securityContext - 9:- Deploy yaml using oc create -f yaml_name - 10:- oc get pods -n namespace - 11:- 2 pods will be Running for 1 deploymentconfig first will be deploy pod which actual deploys dc - and second pod will be actual deployed pod - 12:- For Deletion - 13:- oc get deploymentconfig -n namespace - 14:- get dc name and delete using oc delete deploymentconfig -n namespace - - Note:- Step 1,2,3,7 are not required if we deploy dc in openshift-storage namespace - """ - - num_of_pvcs = 10 - pvc_size = 5 - - @pytest.fixture() - def dc_pods(self, interface, multi_pvc_factory, dc_pod_factory): - """ - Prepare multiple dc pods for the test - - Returns: - list: Pod instances - """ - pvc_objs = multi_pvc_factory( - interface=interface, size=self.pvc_size, num_of_pvc=self.num_of_pvcs - ) - - dc_pod_objs = list() - for pvc_obj in pvc_objs: - dc_pod_objs.append(dc_pod_factory(pvc=pvc_obj)) - return dc_pod_objs - - def test_run_io_multiple_dc_pods(self, dc_pods): - """ - Run IO on multiple dc pods in parallel - """ - for dc_pod in dc_pods: - dc_pod.run_io("fs", f"{self.pvc_size - 1}G") - - for dc_pod in dc_pods: - get_fio_rw_iops(dc_pod) diff --git a/tests/functional/pv/pv_services/test_run_io_multiple_pods.py b/tests/functional/pv/pv_services/test_run_io_multiple_pods.py index 0530f60d120..2e0d7c61a11 100644 --- a/tests/functional/pv/pv_services/test_run_io_multiple_pods.py +++ b/tests/functional/pv/pv_services/test_run_io_multiple_pods.py @@ -1,4 +1,5 @@ import pytest + from ocs_ci.ocs.resources.pod import get_fio_rw_iops from ocs_ci.ocs import constants from ocs_ci.framework.pytest_customization.marks import green_squad, provider_mode @@ -22,11 +23,11 @@ class TestIOMultiplePods(ManageTest): Run IO on multiple pods in parallel """ - num_of_pvcs = 10 + num_of_pvcs = 6 pvc_size = 5 @pytest.fixture() - def pods(self, interface, pod_factory, multi_pvc_factory): + def pods(self, interface, multi_pvc_factory, pod_factory, dc_pod_factory): """ Prepare multiple pods for the test @@ -39,8 +40,12 @@ def pods(self, interface, pod_factory, multi_pvc_factory): ) pod_objs = list() - for pvc_obj in pvc_objs: - pod_objs.append(pod_factory(pvc=pvc_obj)) + for pvc_obj in pvc_objs[: len(pvc_objs) // 2]: + pod_objs.append(dc_pod_factory(interface=interface, pvc=pvc_obj)) + + for pvc_obj in pvc_objs[len(pvc_objs) // 2 :]: + pod_objs.append(pod_factory(interface=interface, pvc=pvc_obj)) + return pod_objs def test_run_io_multiple_pods(self, pods): @@ -48,7 +53,7 @@ def test_run_io_multiple_pods(self, pods): Run IO on multiple pods in parallel """ for pod in pods: - pod.run_io("fs", f"{self.pvc_size - 1}G") + pod.run_io(storage_type="fs", size="1G") for pod in pods: get_fio_rw_iops(pod) diff --git a/tests/functional/ui/test_pvc_ui.py b/tests/functional/ui/test_pvc_ui.py index c25c2ade3eb..247127fc156 100644 --- a/tests/functional/ui/test_pvc_ui.py +++ b/tests/functional/ui/test_pvc_ui.py @@ -200,7 +200,12 @@ def test_create_resize_delete_pvc( else: storage_type = constants.WORKLOAD_STORAGE_TYPE_FS - new_pod.run_io(storage_type, size=(new_size - 1), invalidate=0, rate="1000m") + new_pod.run_io( + storage_type, + size=(new_size - 1), + invalidate=0, + direct=int(storage_type == "block"), + ) get_fio_rw_iops(new_pod) logger.info("FIO execution on Pod successfully completed..!!") From 8333f5194c5e9995ae5af0fa27cde75f0c42dccd Mon Sep 17 00:00:00 2001 From: Avdhoot Sagare Date: Fri, 6 Dec 2024 11:03:26 +0530 Subject: [PATCH 23/44] Added code to select active mgr pod always (#10953) Signed-off-by: Avdhoot --- .../test_monitoring_on_negative_scenarios.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/functional/workloads/ocp/monitoring/test_monitoring_on_negative_scenarios.py b/tests/functional/workloads/ocp/monitoring/test_monitoring_on_negative_scenarios.py index 060ef5a6f8b..723e35878db 100644 --- a/tests/functional/workloads/ocp/monitoring/test_monitoring_on_negative_scenarios.py +++ b/tests/functional/workloads/ocp/monitoring/test_monitoring_on_negative_scenarios.py @@ -465,8 +465,18 @@ def test_monitoring_after_rebooting_node_where_mgr_is_running( # Get the mgr pod obj mgr_pod_obj = pod.get_mgr_pods() + # Get active mgr pod + toolbox = pod.get_ceph_tools_pod() + active_mgr_pod_output = toolbox.exec_cmd_on_pod("ceph mgr stat") + active_mgr_pod_suffix = active_mgr_pod_output.get("active_name") + log.info(f"The active MGR pod is {active_mgr_pod_suffix}") + for obj in mgr_pod_obj: + if active_mgr_pod_suffix in obj.name: + active_mgr_pod_obj = obj + log.info(f"The active MGR pod name is {active_mgr_pod_obj.name}") + # Get the node where the mgr pod is hosted - mgr_node_obj = pod.get_pod_node(mgr_pod_obj[0]) + mgr_node_obj = pod.get_pod_node(active_mgr_pod_obj) # Reboot the node where the mgr pod is hosted nodes.restart_nodes([mgr_node_obj]) From 295af66d6e877036764a4aa1896fda4bdbd3c205 Mon Sep 17 00:00:00 2001 From: vavuthu Date: Thu, 5 Dec 2024 16:10:19 +0530 Subject: [PATCH 24/44] delete COS bucket during teardown of cluster Description: Currently we are leaving COS buckets in IBM cloud as leftovers which was created by noobaa. So we are fetching bucket from backingstore and deleting. Signed-off-by: vavuthu --- ocs_ci/deployment/ibmcloud.py | 21 +++++ ocs_ci/ocs/resources/backingstore.py | 14 ++++ ocs_ci/utility/ibmcloud.py | 112 +++++++++++++++++++++++++++ setup.py | 3 +- 4 files changed, 149 insertions(+), 1 deletion(-) diff --git a/ocs_ci/deployment/ibmcloud.py b/ocs_ci/deployment/ibmcloud.py index d847f23a641..a5c5b2996ca 100644 --- a/ocs_ci/deployment/ibmcloud.py +++ b/ocs_ci/deployment/ibmcloud.py @@ -19,6 +19,7 @@ LeftoversExistError, VolumesExistError, ) +from ocs_ci.ocs.resources.backingstore import get_backingstore from ocs_ci.ocs.resources.pvc import ( scale_down_pods_and_remove_pvcs, ) @@ -203,6 +204,7 @@ def destroy_cluster(self, log_level="DEBUG"): resource_group = self.get_resource_group() if resource_group: try: + self.delete_bucket() scale_down_pods_and_remove_pvcs(self.DEFAULT_STORAGECLASS) except Exception as err: logger.warning( @@ -235,6 +237,25 @@ def destroy_cluster(self, log_level="DEBUG"): logger.info("Force cleaning up Service IDs and Account Policies leftovers") ibmcloud.cleanup_policies_and_service_ids(self.cluster_name) + def delete_bucket(self): + """ + Deletes the COS bucket + """ + api_key = config.AUTH["ibmcloud"]["api_key"] + service_instance_id = config.AUTH["ibmcloud"]["cos_instance_crn"] + endpoint_url = constants.IBM_COS_GEO_ENDPOINT_TEMPLATE.format( + config.ENV_DATA.get("region", "us-east").lower() + ) + backingstore = get_backingstore() + bucket_name = backingstore["spec"]["ibmCos"]["targetBucket"] + logger.debug(f"bucket name from backingstore: {bucket_name}") + cos = ibmcloud.IBMCloudObjectStorage( + api_key=api_key, + service_instance_id=service_instance_id, + endpoint_url=endpoint_url, + ) + cos.delete_bucket(bucket_name=bucket_name) + def manually_create_iam_for_vpc(self): """ Manually specify the IAM secrets for the cloud provider diff --git a/ocs_ci/ocs/resources/backingstore.py b/ocs_ci/ocs/resources/backingstore.py index 8dab18b7556..9b4ec268851 100644 --- a/ocs_ci/ocs/resources/backingstore.py +++ b/ocs_ci/ocs/resources/backingstore.py @@ -489,3 +489,17 @@ def clone_bs_dict_from_backingstore( raise UnknownCloneTypeException(prototype_bs_platform_name) return clone_bs_dict + + +def get_backingstore(): + """ + Fetches the backingstore + + Returns: + dict: backingstore details + + """ + backingstore = OCP( + kind=constants.BACKINGSTORE, namespace=config.ENV_DATA["cluster_namespace"] + ) + return backingstore.get(resource_name=constants.DEFAULT_NOOBAA_BACKINGSTORE) diff --git a/ocs_ci/utility/ibmcloud.py b/ocs_ci/utility/ibmcloud.py index cca67d66c16..5d30e0f2ab2 100644 --- a/ocs_ci/utility/ibmcloud.py +++ b/ocs_ci/utility/ibmcloud.py @@ -10,8 +10,11 @@ import re import requests import time +import ibm_boto3 import ipaddress + from copy import copy +from ibm_botocore.client import Config as IBMBotocoreConfig, ClientError from json import JSONDecodeError from ocs_ci.framework import config from ocs_ci.ocs import constants @@ -1237,3 +1240,112 @@ def find_free_network_subnets(subnet_cidr, network_prefix=27): if is_free: logger.info(f"Free set of subnets found: {possible_subnets}") return possible_subnets + + +class IBMCloudObjectStorage: + """ + IBM Cloud Object Storage (COS) class + """ + + def __init__(self, api_key, service_instance_id, endpoint_url): + """ + Initialize all necessary parameters + + Args: + api_key (str): API key for IBM Cloud Object Storage (COS) + service_instance_id (str): Service instance ID for COS + endpoint_url (str): COS endpoint URL + + """ + self.cos_api_key_id = api_key + self.cos_instance_crn = service_instance_id + self.cos_endpoint = endpoint_url + # create client + self.cos_client = ibm_boto3.client( + "s3", + ibm_api_key_id=self.cos_api_key_id, + ibm_service_instance_id=self.cos_instance_crn, + config=IBMBotocoreConfig(signature_version="oauth"), + endpoint_url=self.cos_endpoint, + ) + + def get_bucket_objects(self, bucket_name): + """ + Fetches the objects in a bucket + + Args: + bucket_name (str): Name of the bucket + + Returns: + list: List of objects in a bucket + + """ + bucket_objects = [] + logger.info(f"Retrieving bucket contents from {bucket_name}") + try: + objects = self.cos_client.list_objects(Bucket=bucket_name) + for obj in objects.get("Contents", []): + bucket_objects.append(obj["Key"]) + except ClientError as ce: + logger.error(f"CLIENT ERROR: {ce}") + except Exception as e: + logger.error(f"Unable to retrieve bucket contents: {e}") + logger.info(f"bucket objects: {bucket_objects}") + return bucket_objects + + def delete_objects(self, bucket_name): + """ + Delete objects in a bucket + + Args: + bucket_name (str): Name of the bucket + + """ + objects = self.get_bucket_objects(bucket_name) + if objects: + try: + # Form the delete request + delete_request = {"Objects": [{"Key": obj} for obj in objects]} + response = self.cos_client.delete_objects( + Bucket=bucket_name, Delete=delete_request + ) + logger.info(f"Deleted items for {bucket_name}") + logger.debug(json.dumps(response.get("Deleted"), indent=4)) + except ClientError as ce: + logger.error(f"CLIENT ERROR: {ce}") + except Exception as e: + logger.error(f"Unable to delete objects: {e}") + + def delete_bucket(self, bucket_name): + """ + Delete the bucket + + Args: + bucket_name (str): Name of the bucket + + """ + logger.info(f"Deleting bucket: {bucket_name}") + try: + self.delete_objects(bucket_name=bucket_name) + self.cos_client.delete_bucket(Bucket=bucket_name) + logger.info(f"Bucket: {bucket_name} deleted!") + except ClientError as ce: + logger.error(f"CLIENT ERROR: {ce}") + except Exception as e: + logger.error(f"Unable to delete bucket: {e}") + + def get_buckets(self): + """ + Fetches the buckets + """ + buckets = [] + logger.info("Retrieving list of buckets") + try: + buckets = self.cos_client.list_buckets() + for bucket in buckets["Buckets"]: + buckets.append(bucket["Name"]) + except ClientError as ce: + logger.error(f"CLIENT ERROR: {ce}") + except Exception as e: + logger.error(f"Unable to retrieve list buckets: {e}") + return buckets diff --git a/setup.py b/setup.py index 16deb7a894e..83012037d59 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ "apache-libcloud==3.1.0", "docopt==0.6.2", "gevent==23.9.1", + "ibm-cos-sdk==2.13.5", "reportportal-client==3.2.3", "requests==2.32.3", "paramiko==3.4.0", @@ -39,7 +40,7 @@ "pytest_marker_bugzilla>=0.9.3", "pyvmomi==7.0", "python-hcl2==3.0.1", - "python-dateutil==2.8.2", + "python-dateutil==2.9.0", "pytest-order==1.2.0", "funcy==1.14", "semantic-version==2.8.5", From 14689c85f8c1b4d60174631c01ee01675d4d36c1 Mon Sep 17 00:00:00 2001 From: Sagi Hirshfeld Date: Mon, 9 Dec 2024 06:40:24 +0200 Subject: [PATCH 25/44] [Red Squad] - Remove obsolete and redundant tests (#10852) Signed-off-by: Sagi Hirshfeld --- .../mcg/test_bucket_creation_deletion.py | 36 ----------- .../object/mcg/test_bucket_deletion.py | 8 --- .../functional/object/mcg/test_multicloud.py | 2 +- .../object/mcg/test_namespace_crd.py | 4 +- .../object/mcg/test_object_integrity.py | 7 +-- .../object/mcg/test_write_to_bucket.py | 60 +------------------ tests/functional/upgrade/test_noobaa.py | 2 +- tests/functional/upgrade/test_resources.py | 2 +- 8 files changed, 10 insertions(+), 111 deletions(-) diff --git a/tests/functional/object/mcg/test_bucket_creation_deletion.py b/tests/functional/object/mcg/test_bucket_creation_deletion.py index b60a027b081..acce0bdc05f 100644 --- a/tests/functional/object/mcg/test_bucket_creation_deletion.py +++ b/tests/functional/object/mcg/test_bucket_creation_deletion.py @@ -47,22 +47,6 @@ class TestBucketCreationAndDeletion(MCGTest): acceptance, ], ), - pytest.param( - *[100, "S3", None], - marks=[ - pytest.mark.skip(ERRATIC_TIMEOUTS_SKIP_REASON), - performance, - pytest.mark.polarion_id("OCS-1823"), - ], - ), - pytest.param( - *[1000, "S3", None], - marks=[ - pytest.mark.skip(ERRATIC_TIMEOUTS_SKIP_REASON), - performance, - pytest.mark.polarion_id("OCS-1824"), - ], - ), pytest.param( *[3, "OC", None], marks=[ @@ -103,22 +87,6 @@ class TestBucketCreationAndDeletion(MCGTest): pytest.mark.polarion_id("OCS-1298"), ], ), - pytest.param( - *[100, "CLI", None], - marks=[ - pytest.mark.skip(ERRATIC_TIMEOUTS_SKIP_REASON), - performance, - pytest.mark.polarion_id("OCS-1825"), - ], - ), - pytest.param( - *[1000, "CLI", None], - marks=[ - pytest.mark.skip(ERRATIC_TIMEOUTS_SKIP_REASON), - performance, - pytest.mark.polarion_id("OCS-1828"), - ], - ), pytest.param( *[ 1, @@ -148,15 +116,11 @@ class TestBucketCreationAndDeletion(MCGTest): ], ids=[ "3-S3-DEFAULT-BACKINGSTORE", - "100-S3-DEFAULT-BACKINGSTORE", - "1000-S3-DEFAULT-BACKINGSTORE", "3-OC-DEFAULT-BACKINGSTORE", "10-OC-DEFAULT-BACKINGSTORE", "100-OC-DEFAULT-BACKINGSTORE", "1000-OC-DEFAULT-BACKINGSTORE", "3-CLI-DEFAULT-BACKINGSTORE", - "100-CLI-DEFAULT-BACKINGSTORE", - "1000-CLI-DEFAULT-BACKINGSTORE", "1-OC-PVPOOL", "1-CLI-PVPOOL", ], diff --git a/tests/functional/object/mcg/test_bucket_deletion.py b/tests/functional/object/mcg/test_bucket_deletion.py index 5a8801baee9..c992443a526 100644 --- a/tests/functional/object/mcg/test_bucket_deletion.py +++ b/tests/functional/object/mcg/test_bucket_deletion.py @@ -74,13 +74,6 @@ class TestBucketDeletion(MCGTest): *["OC", {"interface": "OC", "backingstore_dict": {"gcp": [(1, None)]}}], marks=[tier1], ), - pytest.param( - *[ - "OC", - {"interface": "OC", "backingstore_dict": {"ibmcos": [(1, None)]}}, - ], - marks=[tier1], - ), pytest.param( *[ "CLI", @@ -106,7 +99,6 @@ class TestBucketDeletion(MCGTest): "OC-AWS", "OC-AZURE", "OC-GCP", - "OC-IBMCOS", "CLI-IBMCOS", "CLI-AWS-STS", ], diff --git a/tests/functional/object/mcg/test_multicloud.py b/tests/functional/object/mcg/test_multicloud.py index 4858cb0cb35..ca564c618ce 100644 --- a/tests/functional/object/mcg/test_multicloud.py +++ b/tests/functional/object/mcg/test_multicloud.py @@ -84,7 +84,7 @@ def test_multicloud_backingstore_creation( "IBMCOS-OC-1", ], ) - def test_multicloud_backingstore_deletion( + def deprecated_test_multicloud_backingstore_deletion( self, backingstore_factory, backingstore_tup ): """ diff --git a/tests/functional/object/mcg/test_namespace_crd.py b/tests/functional/object/mcg/test_namespace_crd.py index 9e4e03d1115..d4029a73ff8 100644 --- a/tests/functional/object/mcg/test_namespace_crd.py +++ b/tests/functional/object/mcg/test_namespace_crd.py @@ -86,7 +86,9 @@ class TestNamespace(MCGTest): ], ) @pytest.mark.polarion_id("OCS-2255") - def test_namespace_store_creation_crd(self, namespace_store_factory, nss_tup): + def deprecated_test_namespace_store_creation_crd( + self, namespace_store_factory, nss_tup + ): """ Test namespace store creation using the MCG CRDs. """ diff --git a/tests/functional/object/mcg/test_object_integrity.py b/tests/functional/object/mcg/test_object_integrity.py index 2bb78fd4c8c..c8e9626748d 100644 --- a/tests/functional/object/mcg/test_object_integrity.py +++ b/tests/functional/object/mcg/test_object_integrity.py @@ -63,10 +63,6 @@ class TestObjectIntegrity(MCGTest): {"interface": "OC", "backingstore_dict": {"ibmcos": [(1, None)]}}, marks=[tier1, skipif_disconnected_cluster], ), - pytest.param( - {"interface": "CLI", "backingstore_dict": {"ibmcos": [(1, None)]}}, - marks=[tier1, skipif_disconnected_cluster], - ), pytest.param( { "interface": "OC", @@ -92,7 +88,6 @@ class TestObjectIntegrity(MCGTest): "AZURE-OC-1", "GCP-OC-1", "IBMCOS-OC-1", - "IBMCOS-CLI-1", "AWS-OC-Cache", ], ) @@ -130,7 +125,7 @@ def test_check_object_integrity( @pytest.mark.polarion_id("OCS-1945") @tier2 - def test_empty_file_integrity( + def deprecated_test_empty_file_integrity( self, mcg_obj, awscli_pod, bucket_factory, test_directory_setup ): """ diff --git a/tests/functional/object/mcg/test_write_to_bucket.py b/tests/functional/object/mcg/test_write_to_bucket.py index de6da1ae840..31ebbff159d 100644 --- a/tests/functional/object/mcg/test_write_to_bucket.py +++ b/tests/functional/object/mcg/test_write_to_bucket.py @@ -18,7 +18,6 @@ MCGTest, tier1, tier2, - acceptance, performance, ) from ocs_ci.utility.utils import exec_nb_db_query @@ -101,45 +100,6 @@ class TestBucketIO(MCGTest): @pytest.mark.parametrize( argnames="interface,bucketclass_dict", argvalues=[ - pytest.param( - *["S3", None], - marks=[tier1, acceptance], - ), - pytest.param( - *[ - "OC", - { - "interface": "OC", - "backingstore_dict": {"aws": [(1, "eu-central-1")]}, - }, - ], - marks=[tier1], - ), - pytest.param( - *[ - "OC", - {"interface": "OC", "backingstore_dict": {"azure": [(1, None)]}}, - ], - marks=[tier1], - ), - pytest.param( - *["OC", {"interface": "OC", "backingstore_dict": {"gcp": [(1, None)]}}], - marks=[tier1], - ), - pytest.param( - *[ - "OC", - {"interface": "OC", "backingstore_dict": {"ibmcos": [(1, None)]}}, - ], - marks=[tier1], - ), - pytest.param( - *[ - "CLI", - {"interface": "CLI", "backingstore_dict": {"ibmcos": [(1, None)]}}, - ], - marks=[tier1], - ), pytest.param( *[ "OC", @@ -156,12 +116,6 @@ class TestBucketIO(MCGTest): ), ], ids=[ - "DEFAULT-BACKINGSTORE", - "AWS-OC-1", - "AZURE-OC-1", - "GCP-OC-1", - "IBMCOS-OC-1", - "IBMCOS-CLI-1", "RGW-OC-1", "RGW-CLI-1", ], @@ -220,10 +174,6 @@ def test_write_file_to_bucket( {"interface": "OC", "backingstore_dict": {"gcp": [(1, None)]}}, marks=[tier1], ), - pytest.param( - {"interface": "OC", "backingstore_dict": {"ibmcos": [(1, None)]}}, - marks=[tier1], - ), pytest.param( {"interface": "CLI", "backingstore_dict": {"ibmcos": [(1, None)]}}, marks=[tier1], @@ -234,7 +184,6 @@ def test_write_file_to_bucket( "AWS-OC-1", "AZURE-OC-1", "GCP-OC-1", - "IBMCOS-OC-1", "IBMCOS-CLI-1", ], ) @@ -292,10 +241,6 @@ def test_mcg_data_deduplication( {"interface": "OC", "backingstore_dict": {"ibmcos": [(1, None)]}}, marks=[tier1], ), - pytest.param( - {"interface": "CLI", "backingstore_dict": {"ibmcos": [(1, None)]}}, - marks=[tier1], - ), ], ids=[ "DEFAULT-BACKINGSTORE", @@ -303,7 +248,6 @@ def test_mcg_data_deduplication( "AZURE-OC-1", "GCP-OC-1", "IBMCOS-OC-1", - "IBMCOS-CLI-1", ], ) def test_mcg_data_compression( @@ -333,7 +277,9 @@ def test_mcg_data_compression( @tier2 @performance @skip_inconsistent - def test_data_reduction_performance(self, mcg_obj, awscli_pod, bucket_factory): + def deprecated_test_data_reduction_performance( + self, mcg_obj, awscli_pod, bucket_factory + ): """ Test data reduction performance """ diff --git a/tests/functional/upgrade/test_noobaa.py b/tests/functional/upgrade/test_noobaa.py index e9435d038f7..6b757aad645 100644 --- a/tests/functional/upgrade/test_noobaa.py +++ b/tests/functional/upgrade/test_noobaa.py @@ -183,7 +183,7 @@ def test_start_upgrade_mcg_io(mcg_workload_job): @bugzilla("1874243") @mcg @red_squad -def test_upgrade_mcg_io(mcg_workload_job): +def deprecated_test_upgrade_mcg_io(mcg_workload_job): """ Confirm that there is MCG workload job running after upgrade. """ diff --git a/tests/functional/upgrade/test_resources.py b/tests/functional/upgrade/test_resources.py index 98d9ce07485..9926cb9c26b 100644 --- a/tests/functional/upgrade/test_resources.py +++ b/tests/functional/upgrade/test_resources.py @@ -136,7 +136,7 @@ def test_pod_log_after_upgrade(): @pytest.mark.polarion_id("OCS-2666") @mcg @red_squad -def test_noobaa_service_mon_after_ocs_upgrade(): +def deprecated_test_noobaa_service_mon_after_ocs_upgrade(): """ Verify 'noobaa-service-monitor' does not exist after OCS upgrade. From af67d32dbb54b39fa9caec398e0c3c5ab396b811 Mon Sep 17 00:00:00 2001 From: dahorak Date: Mon, 9 Dec 2024 13:09:42 +0100 Subject: [PATCH 26/44] update list of packages for 4.18 discon. deployment (#11014) add `odf-dependencies` to list of mirrored packages for disconnected ODF 4.18 deployment - related to: https://url.corp.redhat.com/c3dedae - discussed here: https://url.corp.redhat.com/a69474b Signed-off-by: Daniel Horak --- ocs_ci/ocs/constants.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ocs_ci/ocs/constants.py b/ocs_ci/ocs/constants.py index 3331cfb3f9a..a419766b34f 100644 --- a/ocs_ci/ocs/constants.py +++ b/ocs_ci/ocs/constants.py @@ -2196,9 +2196,11 @@ DISCON_CL_REQUIRED_PACKAGES_PER_ODF_VERSION["4.16"] ) -DISCON_CL_REQUIRED_PACKAGES_PER_ODF_VERSION["4.18"] = ( - DISCON_CL_REQUIRED_PACKAGES_PER_ODF_VERSION["4.16"] -) +DISCON_CL_REQUIRED_PACKAGES_PER_ODF_VERSION[ + "4.18" +] = DISCON_CL_REQUIRED_PACKAGES_PER_ODF_VERSION["4.16"] + [ + "odf-dependencies", +] # PSI-openstack constants NOVA_CLNT_VERSION = "2.0" From 95336e760fba9f3b812e3e10300002fad751bac4 Mon Sep 17 00:00:00 2001 From: Daniel Osypenko Date: Mon, 9 Dec 2024 15:18:19 +0200 Subject: [PATCH 27/44] update scale tests with fetched noobaa sc to avoid create reference to non-existing sc (#10717) * update scale tests with fetched noobaa ns to avoid create static Signed-off-by: Daniel Osypenko --- ocs_ci/ocs/scale_noobaa_lib.py | 30 +++++++++++++++++-- .../cross_functional/scale/noobaa/conftest.py | 8 +++++ .../noobaa/test_scale_obc_create_delete.py | 3 +- .../scale/noobaa/test_scale_obc_creation.py | 16 ++++++++-- ...t_scale_obc_creation_repsin_noobaa_pods.py | 3 +- ...scale_obc_start_time_respin_noobaa_pods.py | 9 +++++- .../upgrade/test_upgrade_with_scaled_obc.py | 3 +- 7 files changed, 62 insertions(+), 10 deletions(-) create mode 100644 tests/cross_functional/scale/noobaa/conftest.py diff --git a/ocs_ci/ocs/scale_noobaa_lib.py b/ocs_ci/ocs/scale_noobaa_lib.py index 87b04f03f2b..bdf3a6737a5 100644 --- a/ocs_ci/ocs/scale_noobaa_lib.py +++ b/ocs_ci/ocs/scale_noobaa_lib.py @@ -10,9 +10,9 @@ from ocs_ci.ocs.utils import oc_get_all_obc_names from ocs_ci.ocs.resources import pod from ocs_ci.ocs.utils import get_pod_name_by_pattern -from ocs_ci.ocs.exceptions import UnexpectedBehaviour +from ocs_ci.ocs.exceptions import UnexpectedBehaviour, CommandFailed from ocs_ci.ocs.node import get_node_objs, wait_for_nodes_status -from ocs_ci.utility.utils import ceph_health_check, run_cmd +from ocs_ci.utility.utils import ceph_health_check, run_cmd, exec_cmd from ocs_ci.ocs.ocp import OCP from ocs_ci.ocs import hsbench @@ -20,7 +20,9 @@ hsbenchs3 = hsbench.HsBench() -def construct_obc_creation_yaml_bulk_for_kube_job(no_of_obc, sc_name, namespace): +def construct_obc_creation_yaml_bulk_for_kube_job( + no_of_obc, sc_name, namespace, noobaa_storage_class_name +): """ Constructing obc.yaml file to create bulk of obc's using kube_job @@ -28,12 +30,16 @@ def construct_obc_creation_yaml_bulk_for_kube_job(no_of_obc, sc_name, namespace) no_of_obc(int): Bulk obc count sc_name (str): storage class name using for obc creation namespace(str): Namespace uses to create bulk of obc + noobaa_storage_class_name (str): Noobaa storage class name fetched once per session run via fixture + Returns: obc_dict_list (list): List of all obc.yaml dicts """ + sc_name = noobaa_storage_class_name if noobaa_storage_class_name else sc_name + # Construct obc.yaml for the no_of_obc count # append all the obc.yaml dict to obc_dict_list and return the list obc_dict_list = list() @@ -49,6 +55,24 @@ def construct_obc_creation_yaml_bulk_for_kube_job(no_of_obc, sc_name, namespace) return obc_dict_list +def fetch_noobaa_storage_class_name(): + """ + Function to fetch noobaa storage class name from the Storage Cluster + """ + try: + sc_name_fetched = exec_cmd( + "oc get storageclass -o json | " + "jq -r '.items[] | " + f'select(.parameters.bucketclass == "{constants.DEFAULT_NOOBAA_BUCKETCLASS}") | ' + ".metadata.name'", + shell=True, + ).stdout.strip() + except CommandFailed as e: + log.error(f"Failed to get noobaa storageclass name: {e}") + raise + return sc_name_fetched + + def check_all_obc_reached_bound_state_in_kube_job( kube_job_obj, namespace, no_of_obc, timeout=60, no_wait_time=20 ): diff --git a/tests/cross_functional/scale/noobaa/conftest.py b/tests/cross_functional/scale/noobaa/conftest.py new file mode 100644 index 00000000000..d0d296f2436 --- /dev/null +++ b/tests/cross_functional/scale/noobaa/conftest.py @@ -0,0 +1,8 @@ +import pytest + +from ocs_ci.ocs.scale_noobaa_lib import fetch_noobaa_storage_class_name + + +@pytest.fixture(scope="session") +def noobaa_storage_class_name(): + return fetch_noobaa_storage_class_name() diff --git a/tests/cross_functional/scale/noobaa/test_scale_obc_create_delete.py b/tests/cross_functional/scale/noobaa/test_scale_obc_create_delete.py index c251b58681d..5fb49d3c976 100644 --- a/tests/cross_functional/scale/noobaa/test_scale_obc_create_delete.py +++ b/tests/cross_functional/scale/noobaa/test_scale_obc_create_delete.py @@ -37,7 +37,7 @@ class TestScaleOCBCreateDelete(E2ETest): num_obc_batch = 50 @pytest.mark.polarion_id("OCS-2667") - def test_scale_obc_create_delete_time(self, tmp_path): + def test_scale_obc_create_delete_time(self, tmp_path, noobaa_storage_class_name): """ MCG OBC creation and deletion using Noobaa MCG storage class @@ -55,6 +55,7 @@ def test_scale_obc_create_delete_time(self, tmp_path): no_of_obc=self.num_obc_batch, sc_name=constants.NOOBAA_SC, namespace=self.namespace, + noobaa_storage_class_name=noobaa_storage_class_name, ) ) # Create job profile diff --git a/tests/cross_functional/scale/noobaa/test_scale_obc_creation.py b/tests/cross_functional/scale/noobaa/test_scale_obc_creation.py index d9812bfa380..dde751f1249 100644 --- a/tests/cross_functional/scale/noobaa/test_scale_obc_creation.py +++ b/tests/cross_functional/scale/noobaa/test_scale_obc_creation.py @@ -41,7 +41,9 @@ class TestScaleOCBCreation(E2ETest): num_obc_batch = 50 @pytest.mark.polarion_id("OCS-2478") - def test_scale_mcg_obc_creation(self, tmp_path, timeout=60): + def test_scale_mcg_obc_creation( + self, tmp_path, noobaa_storage_class_name, timeout=60 + ): """ MCG OBC creation using Noobaa storage class """ @@ -56,6 +58,7 @@ def test_scale_mcg_obc_creation(self, tmp_path, timeout=60): no_of_obc=self.num_obc_batch, sc_name=self.sc_name, namespace=self.namespace, + noobaa_storage_class_name=noobaa_storage_class_name, ) ) # Create job profile @@ -81,7 +84,9 @@ def test_scale_mcg_obc_creation(self, tmp_path, timeout=60): @vsphere_platform_required @pytest.mark.polarion_id("OCS-2479") - def test_scale_rgw_obc_creation(self, tmp_path, timeout=60): + def test_scale_rgw_obc_creation( + self, tmp_path, noobaa_storage_class_name, timeout=60 + ): """ OBC creation using RGW storage class This test case only runs on vSphere cluster deployment @@ -97,6 +102,7 @@ def test_scale_rgw_obc_creation(self, tmp_path, timeout=60): no_of_obc=self.num_obc_batch, sc_name=self.sc_rgw_name, namespace=self.namespace, + noobaa_storage_class_name=noobaa_storage_class_name, ) ) # Create job profile @@ -122,7 +128,9 @@ def test_scale_rgw_obc_creation(self, tmp_path, timeout=60): @vsphere_platform_required @pytest.mark.polarion_id("OCS-2480") - def test_scale_mcg_rgw_obc_creation(self, tmp_path, timeout=60): + def test_scale_mcg_rgw_obc_creation( + self, tmp_path, noobaa_storage_class_name, timeout=60 + ): """ OBC creation for both MCG and RGW storage class This test case only runs on vSphere cluster deployment @@ -137,6 +145,7 @@ def test_scale_mcg_rgw_obc_creation(self, tmp_path, timeout=60): no_of_obc=int(self.num_obc_batch / 2), sc_name=self.sc_name, namespace=self.namespace, + noobaa_storage_class_name=noobaa_storage_class_name, ) ) obc_dict_list2 = ( @@ -144,6 +153,7 @@ def test_scale_mcg_rgw_obc_creation(self, tmp_path, timeout=60): no_of_obc=int(self.num_obc_batch / 2), sc_name=self.sc_rgw_name, namespace=self.namespace, + noobaa_storage_class_name=noobaa_storage_class_name, ) ) # Create job profile diff --git a/tests/cross_functional/scale/noobaa/test_scale_obc_creation_repsin_noobaa_pods.py b/tests/cross_functional/scale/noobaa/test_scale_obc_creation_repsin_noobaa_pods.py index ae42a849ce9..fb09ba0941a 100644 --- a/tests/cross_functional/scale/noobaa/test_scale_obc_creation_repsin_noobaa_pods.py +++ b/tests/cross_functional/scale/noobaa/test_scale_obc_creation_repsin_noobaa_pods.py @@ -76,7 +76,7 @@ class TestScaleOCBCreation(E2ETest): ], ) def test_scale_obc_creation_noobaa_pod_respin( - self, tmp_path, pod_name, sc_name, mcg_job_factory + self, tmp_path, pod_name, sc_name, mcg_job_factory, noobaa_storage_class_name ): """ OBC creation using RGW storage class @@ -97,6 +97,7 @@ def test_scale_obc_creation_noobaa_pod_respin( no_of_obc=self.num_obc_batch, sc_name=sc_name, namespace=self.namespace, + noobaa_storage_class_name=noobaa_storage_class_name, ) ) # Create job profile diff --git a/tests/cross_functional/scale/noobaa/test_scale_obc_start_time_respin_noobaa_pods.py b/tests/cross_functional/scale/noobaa/test_scale_obc_start_time_respin_noobaa_pods.py index bdd4483e68d..bbd04de2460 100644 --- a/tests/cross_functional/scale/noobaa/test_scale_obc_start_time_respin_noobaa_pods.py +++ b/tests/cross_functional/scale/noobaa/test_scale_obc_start_time_respin_noobaa_pods.py @@ -51,7 +51,13 @@ class TestScaleOBCStartTime(E2ETest): ], ) def test_scale_obc_start_time_noobaa_pod_respin( - self, tmp_path, pod_name, sc_name, mcg_job_factory, timeout=5 + self, + tmp_path, + pod_name, + sc_name, + mcg_job_factory, + noobaa_storage_class_name, + timeout=5, ): """ Created OBC without I/O running @@ -74,6 +80,7 @@ def test_scale_obc_start_time_noobaa_pod_respin( no_of_obc=self.num_obc_batch, sc_name=sc_name, namespace=self.namespace, + noobaa_storage_class_name=noobaa_storage_class_name, ) ) # Create job profile diff --git a/tests/cross_functional/scale/upgrade/test_upgrade_with_scaled_obc.py b/tests/cross_functional/scale/upgrade/test_upgrade_with_scaled_obc.py index 18cd9153b75..42a30c0a52a 100644 --- a/tests/cross_functional/scale/upgrade/test_upgrade_with_scaled_obc.py +++ b/tests/cross_functional/scale/upgrade/test_upgrade_with_scaled_obc.py @@ -38,7 +38,7 @@ @skipif_bm @skipif_managed_service @pytest.mark.polarion_id("OCS-3987") -def test_scale_obc_pre_upgrade(tmp_path, timeout=60): +def test_scale_obc_pre_upgrade(tmp_path, noobaa_storage_class_name, timeout=60): """ Create scaled MCG OBC using Noobaa storage class before upgrade Save scaled obc data in a file for post upgrade validation @@ -51,6 +51,7 @@ def test_scale_obc_pre_upgrade(tmp_path, timeout=60): no_of_obc=num_obc_batch, sc_name=sc_name, namespace=namespace, + noobaa_storage_class_name=noobaa_storage_class_name, ) # Create job profile job_file = ObjectConfFile( From b29f9c133c182b9eec406f886a176139b6e1956c Mon Sep 17 00:00:00 2001 From: dahorak Date: Mon, 9 Dec 2024 16:21:55 +0100 Subject: [PATCH 28/44] remove src/ocp-network-split submodule (#10993) Signed-off-by: Daniel Horak --- src/ocp-network-split | 1 - 1 file changed, 1 deletion(-) delete mode 160000 src/ocp-network-split diff --git a/src/ocp-network-split b/src/ocp-network-split deleted file mode 160000 index d5ea5d042ff..00000000000 --- a/src/ocp-network-split +++ /dev/null @@ -1 +0,0 @@ -Subproject commit d5ea5d042ff1709e10ce92755bbf147d361f253f From 3700d89c573a4568fa2e4aed54dca17f2b6e1326 Mon Sep 17 00:00:00 2001 From: dahorak Date: Mon, 9 Dec 2024 19:09:36 +0100 Subject: [PATCH 29/44] fix typo in retrieve_verification_mode (#11018) this typo breaks some tests when OCS-QE CA Ingress certificate is used Signed-off-by: Daniel Horak --- ocs_ci/ocs/bucket_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocs_ci/ocs/bucket_utils.py b/ocs_ci/ocs/bucket_utils.py index 9b6089f110b..44e5b28bdba 100644 --- a/ocs_ci/ocs/bucket_utils.py +++ b/ocs_ci/ocs/bucket_utils.py @@ -1566,7 +1566,8 @@ def retrieve_verification_mode(): verify = True elif ( config.DEPLOYMENT.get("use_custom_ingress_ssl_cert") - and config.DEPLOYMENT["custom_ssl_cert_provider"] == "ocs-ci-ca" + and config.DEPLOYMENT["custom_ssl_cert_provider"] + == constants.SSL_CERT_PROVIDER_OCS_QE_CA ): verify = get_root_ca_cert() else: From 18a16ec70bc6497ed15d4b42ae40a7562806576b Mon Sep 17 00:00:00 2001 From: Itzhak Kave Date: Tue, 10 Dec 2024 13:08:29 +0200 Subject: [PATCH 30/44] Remove redundant MS tests (#11020) Signed-off-by: Itzhak Kave Co-authored-by: Itzhak Kave --- .../multicluster/test_ms_markers.py | 96 -------------- .../multicluster/test_sanity_ms.py | 124 ------------------ .../test_switch_to_correct_index_at_setup.py | 93 ------------- 3 files changed, 313 deletions(-) delete mode 100644 tests/cross_functional/system_test/multicluster/test_ms_markers.py delete mode 100644 tests/cross_functional/system_test/multicluster/test_sanity_ms.py diff --git a/tests/cross_functional/system_test/multicluster/test_ms_markers.py b/tests/cross_functional/system_test/multicluster/test_ms_markers.py deleted file mode 100644 index d072274fd41..00000000000 --- a/tests/cross_functional/system_test/multicluster/test_ms_markers.py +++ /dev/null @@ -1,96 +0,0 @@ -import logging -import pytest - -from ocs_ci.framework.pytest_customization.marks import yellow_squad -from ocs_ci.framework.testlib import ( - libtest, - ManageTest, - ignore_leftovers, - managed_service_required, - skipif_ms_consumer, - skipif_ms_provider, - runs_on_provider, -) -from ocs_ci.ocs.cluster import ( - is_ms_consumer_cluster, - is_ms_provider_cluster, -) -from ocs_ci.ocs.managedservice import check_and_change_current_index_to_default_index - -logger = logging.getLogger(__name__) - - -@yellow_squad -@libtest -@managed_service_required -@ignore_leftovers -class TestManagedServiceMarkers(ManageTest): - """ - Test that the managed service markers work as expected - """ - - @pytest.mark.first - def test_default_cluster_context_index_equal_to_current_index(self): - """ - Test that the default cluster index is equal to the current cluster index. This test should run first - """ - assert ( - check_and_change_current_index_to_default_index() - ), "The default cluster index is different from the current cluster index" - logger.info( - "The default cluster index is equal to the current cluster index as expected" - ) - - @skipif_ms_consumer - def test_marker_skipif_ms_consumer(self): - """ - Test that the 'skipif_ms_consumer' marker work as expected - """ - assert ( - not is_ms_consumer_cluster() - ), "The cluster is a consumer cluster, even though we have the marker 'skipif_ms_consumer'" - logger.info("The cluster is not a consumer cluster as expected") - - assert check_and_change_current_index_to_default_index() - logger.info( - "The default cluster index is equal to the current cluster index as expected" - ) - - @skipif_ms_provider - def test_marker_skipif_ms_provider(self): - """ - Test that the 'skipif_ms_provider' marker work as expected - """ - assert ( - not is_ms_provider_cluster() - ), "The cluster is a provider cluster, even though we have the marker 'skipif_ms_provider'" - logger.info("The cluster is not a provider cluster as expected") - - assert check_and_change_current_index_to_default_index() - logger.info( - "The default cluster index is equal to the current cluster index as expected" - ) - - @runs_on_provider - @pytest.mark.order("second_to_last") - def test_runs_on_provider_marker(self): - """ - Test that the 'runs_on_provider' marker work as expected - """ - assert ( - is_ms_provider_cluster() - ), "The cluster is not a provider cluster, even though we have the marker 'runs_on_provider'" - logger.info("The cluster is a provider cluster as expected") - - @pytest.mark.order("last") - def test_current_index_not_change_after_using_runs_on_provider(self): - """ - Test that the current cluster index didn't change after using the 'runs_on_provider' - marker in the previous test. - """ - assert ( - check_and_change_current_index_to_default_index() - ), "The current cluster index has changed after using the 'runs_on_provider' marker" - logger.info( - "The current cluster index didn't change after using the 'runs_on_provider' marker" - ) diff --git a/tests/cross_functional/system_test/multicluster/test_sanity_ms.py b/tests/cross_functional/system_test/multicluster/test_sanity_ms.py deleted file mode 100644 index a0e0bc3168f..00000000000 --- a/tests/cross_functional/system_test/multicluster/test_sanity_ms.py +++ /dev/null @@ -1,124 +0,0 @@ -import logging -from time import sleep -import pytest - -from ocs_ci.helpers.sanity_helpers import SanityManagedService -from ocs_ci.framework import config -from ocs_ci.framework.pytest_customization.marks import yellow_squad -from ocs_ci.framework.testlib import ( - libtest, - ManageTest, - managed_service_required, -) - -log = logging.getLogger(__name__) - - -@yellow_squad -@libtest -@managed_service_required -class TestSanityManagedServiceWithDefaultParams(ManageTest): - """ - Test the usage of the 'SanityManagedService' class when using the default params - """ - - @pytest.fixture(autouse=True) - def setup(self, create_scale_pods_and_pvcs_using_kube_job_on_ms_consumers): - """ - Save the original index, and init the sanity instance - """ - self.orig_index = config.cur_index - # Pass the 'create_scale_pods_and_pvcs_using_kube_job_on_ms_consumers' factory to the - # init method and use the default params - self.sanity_helpers = SanityManagedService( - create_scale_pods_and_pvcs_using_kube_job_on_ms_consumers - ) - - @pytest.fixture(autouse=True) - def teardown(self, request, nodes): - """ - Make sure the original index is equal to the current index - """ - - def finalizer(): - log.info("Switch to the original cluster index") - config.switch_ctx(self.orig_index) - - request.addfinalizer(finalizer) - - def test_sanity_ms(self): - log.info("Start creating resources for the MS consumers") - self.sanity_helpers.create_resources_on_ms_consumers() - timeout = 60 - log.info(f"Waiting {timeout} seconds for the IO to be running") - sleep(timeout) - - log.info("Deleting the resources from the MS consumers") - self.sanity_helpers.delete_resources_on_ms_consumers() - log.info("Check the cluster health") - self.sanity_helpers.health_check_ms() - - assert ( - config.cur_index == self.orig_index - ), "The current index is different from the original index" - log.info("The current index is equal to the original index") - - -@yellow_squad -@libtest -@managed_service_required -class TestSanityManagedServiceWithOptionalParams(ManageTest): - """ - Test the usage of the 'SanityManagedService' class when passing optional params - """ - - @pytest.fixture(autouse=True) - def setup(self, create_scale_pods_and_pvcs_using_kube_job_on_ms_consumers): - """ - Save the original index, and init the sanity instance - """ - self.orig_index = config.cur_index - - first_consumer_i = config.get_consumer_indexes_list()[0] - # Pass the 'create_scale_pods_and_pvcs_using_kube_job_on_ms_consumers' factory to the - # init method and use the optional params - self.sanity_helpers = SanityManagedService( - create_scale_pods_and_pvcs_using_kube_job_on_ms_consumers, - scale_count=40, - pvc_per_pod_count=10, - start_io=True, - io_runtime=600, - max_pvc_size=25, - consumer_indexes=[first_consumer_i], - ) - - @pytest.fixture(autouse=True) - def teardown(self, request, nodes): - """ - Make sure the original index is equal to the current index - """ - - def finalizer(): - log.info("Switch to the original cluster index") - config.switch_ctx(self.orig_index) - - request.addfinalizer(finalizer) - - def test_sanity_ms_with_optional_params( - self, create_scale_pods_and_pvcs_using_kube_job_on_ms_consumers - ): - log.info("Start creating resources for the MS consumers") - self.sanity_helpers.create_resources_on_ms_consumers() - timeout = 60 - log.info(f"Waiting {timeout} seconds for the IO to be running") - sleep(timeout) - - log.info("Deleting the resources from the MS consumers") - self.sanity_helpers.delete_resources_on_ms_consumers() - log.info("Check the cluster health") - self.sanity_helpers.health_check_ms() - - assert ( - config.cur_index == self.orig_index - ), "The current index is different from the original index" - log.info("The current index is equal to the original index") diff --git a/tests/cross_functional/system_test/multicluster/test_switch_to_correct_index_at_setup.py b/tests/cross_functional/system_test/multicluster/test_switch_to_correct_index_at_setup.py index 7b4e32b9c6e..269ddbd852d 100644 --- a/tests/cross_functional/system_test/multicluster/test_switch_to_correct_index_at_setup.py +++ b/tests/cross_functional/system_test/multicluster/test_switch_to_correct_index_at_setup.py @@ -5,7 +5,6 @@ from ocs_ci.framework.testlib import ( libtest, ManageTest, - managed_service_required, hci_provider_and_client_required, ) from ocs_ci.ocs.cluster import ( @@ -15,7 +14,6 @@ from ocs_ci.utility.utils import switch_to_correct_cluster_at_setup from ocs_ci.helpers.sanity_helpers import Sanity, SanityManagedService from ocs_ci.ocs.constants import ( - MS_CONSUMER_TYPE, MS_PROVIDER_TYPE, HCI_CLIENT, HCI_PROVIDER, @@ -53,18 +51,6 @@ def setup(self, create_scale_pods_and_pvcs_using_kube_job_on_ms_consumers, reque else: self.sanity_helpers = Sanity() - @managed_service_required - @pytest.mark.parametrize( - "cluster_type", - [MS_PROVIDER_TYPE, MS_CONSUMER_TYPE], - ) - def test_switch_to_correct_cluster_with_ms_cluster_types(self, cluster_type): - """ - Test switch to the correct cluster index at setup, when we have MS cluster types - - """ - check_switch_to_correct_cluster_at_setup(cluster_type) - @hci_provider_and_client_required @pytest.mark.parametrize( "cluster_type", @@ -77,29 +63,6 @@ def test_switch_to_correct_cluster_with_hci_cluster_types(self, cluster_type): """ check_switch_to_correct_cluster_at_setup(cluster_type) - @managed_service_required - @pytest.mark.parametrize( - "cluster_type", - [MS_PROVIDER_TYPE], - ) - def test_switch_to_correct_cluster_with_provider_cluster_type(self, cluster_type): - """ - Test switch to the correct cluster index at setup, when we have MS provider cluster type - - """ - check_switch_to_correct_cluster_at_setup(cluster_type) - - @pytest.mark.parametrize( - "cluster_type", - [MS_PROVIDER_TYPE, MS_CONSUMER_TYPE, NON_MS_CLUSTER_TYPE], - ) - def test_switch_to_correct_cluster_with_all_cluster_types(self, cluster_type): - """ - Test switch to the correct cluster index at setup, when we have all the cluster types - - """ - check_switch_to_correct_cluster_at_setup(cluster_type) - @pytest.mark.parametrize( "cluster_type", [MS_PROVIDER_TYPE, NON_MS_CLUSTER_TYPE], @@ -114,65 +77,9 @@ def test_switch_to_correct_cluster_with_provider_and_non_ms_cluster_types( """ check_switch_to_correct_cluster_at_setup(cluster_type) - @pytest.mark.parametrize( - "cluster_type", - [MS_CONSUMER_TYPE, NON_MS_CLUSTER_TYPE], - ) - def test_switch_to_correct_cluster_with_consumer_and_non_ms_cluster_types( - self, cluster_type - ): - """ - Test switch to the correct cluster index at setup, - when we have MS consumer and non-MS cluster types - - """ - check_switch_to_correct_cluster_at_setup(cluster_type) - def test_switch_to_correct_cluster_without_cluster_type_param(self): """ Test switch to the correct cluster index at setup, when we don't pass the cluster type param """ check_switch_to_correct_cluster_at_setup() - - @pytest.mark.parametrize( - argnames=["cluster_type", "additional_param"], - argvalues=[ - pytest.param(*[MS_PROVIDER_TYPE, "common_value"]), - pytest.param(*[MS_CONSUMER_TYPE, "common_value"]), - pytest.param(*[NON_MS_CLUSTER_TYPE, "common_value"]), - pytest.param(*[MS_PROVIDER_TYPE, "provider_value"]), - pytest.param(*[MS_CONSUMER_TYPE, "consumer_value"]), - ], - ) - def test_switch_to_correct_cluster_with_all_cluster_types_with_additional_param( - self, cluster_type, additional_param - ): - """ - Test switch to the correct cluster index at setup when we have all cluster types, and we also pass - an additional parameter. Some param values we use for all the cluster types, and some we use only - for specific clusters. - - """ - logger.info(f"additional value = {additional_param}") - check_switch_to_correct_cluster_at_setup(cluster_type) - - @pytest.mark.parametrize( - argnames=["cluster_type", "additional_param"], - argvalues=[ - pytest.param(*[MS_PROVIDER_TYPE, "common_value"]), - pytest.param(*[MS_CONSUMER_TYPE, "common_value"]), - pytest.param(*[MS_CONSUMER_TYPE, "consumer_value"]), - ], - ) - def test_switch_to_correct_cluster_with_ms_cluster_types_with_additional_param( - self, cluster_type, additional_param - ): - """ - Test switch to the correct cluster index at setup when we have all cluster types, and we also pass - an additional parameter. Some param values we use for all the cluster types, and some we use only - for specific clusters. - - """ - logger.info(f"additional value = {additional_param}") - check_switch_to_correct_cluster_at_setup(cluster_type) From 3014d80e2b1f2493fa3a05cdf22dc4dbb08430ff Mon Sep 17 00:00:00 2001 From: Shrivaibavi Raghaventhiran <30822772+Shrivaibavi@users.noreply.github.com> Date: Tue, 10 Dec 2024 18:46:29 +0530 Subject: [PATCH 31/44] : Test to create SC & CBP from UI and validate pg_num (#10657) Signed-off-by: Shrivaibavi Raghaventhiran --- ocs_ci/ocs/cluster.py | 2 +- .../ui/test_create_pool_block_pool.py | 54 ++++++++++++++++++- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/ocs_ci/ocs/cluster.py b/ocs_ci/ocs/cluster.py index 441059f01a7..d4f979a268f 100644 --- a/ocs_ci/ocs/cluster.py +++ b/ocs_ci/ocs/cluster.py @@ -1368,7 +1368,6 @@ def parse_ceph_df_pools(raw_output: str) -> pd.DataFrame: "%USED", "MAX AVAIL", "QUOTA OBJECTS", - "QUOTA OBJECTS", "QUOTA BYTES", "DIRTY", "USED COMPR", @@ -1415,6 +1414,7 @@ def validate_num_of_pgs(expected_pgs: dict[str, int]) -> bool: Returns: bool: True if all pools have the expected number of PGs, False otherwise. """ + ceph_df_output = get_ceph_df_detail(format=None, out_yaml_format=False) pools_df = parse_ceph_df_pools(ceph_df_output) pools_dict = ceph_details_df_to_dict(pools_df) diff --git a/tests/cross_functional/ui/test_create_pool_block_pool.py b/tests/cross_functional/ui/test_create_pool_block_pool.py index 76ebfe40797..5f643356854 100644 --- a/tests/cross_functional/ui/test_create_pool_block_pool.py +++ b/tests/cross_functional/ui/test_create_pool_block_pool.py @@ -1,10 +1,13 @@ import logging import pytest + +from ocs_ci.framework import config from ocs_ci.framework.pytest_customization.marks import ( tier1, skipif_ui_not_support, skipif_hci_provider_or_client, green_squad, + bugzilla, ) from ocs_ci.framework.testlib import skipif_ocs_version, ManageTest, ui from ocs_ci.ocs.exceptions import ( @@ -18,8 +21,13 @@ validate_compression, validate_replica_data, check_pool_compression_replica_ceph_level, + validate_num_of_pgs, ) from ocs_ci.ocs.ui.block_pool import BlockPoolUI +from ocs_ci.ocs.resources.pod import get_ceph_tools_pod +from ocs_ci.ocs.ocp import OCP +from ocs_ci.utility.utils import run_cmd + logger = logging.getLogger(__name__) @@ -34,6 +42,7 @@ pytest.param(*[3, False], marks=pytest.mark.polarion_id("OCS-2588")), pytest.param(*[2, True], marks=pytest.mark.polarion_id("OCS-2587")), pytest.param(*[2, False], marks=pytest.mark.polarion_id("OCS-2586")), + pytest.param(*[2, False], marks=pytest.mark.polarion_id("OCS-6255")), ], ) @skipif_hci_provider_or_client @@ -75,7 +84,8 @@ def pod(self, pod_factory): @ui @tier1 - @skipif_ocs_version("<4.8") + @bugzilla("2253013") + @skipif_ocs_version("<4.16") @green_squad def test_create_delete_pool( self, @@ -95,6 +105,9 @@ def test_create_delete_pool( .* Create POD based on the PVC .* Run IO on the POD .* Check replication and compression + .* Check the values of pg_num , it should be equal to osd_pool_default_pg_num + .* Check PG autoscale is ON + .* New pool is having non-blank deviceclass """ @@ -143,3 +156,42 @@ def test_create_delete_pool( raise PoolNotReplicatedAsNeeded( f"Pool {self.pool_name} not replicated to size {replica}" ) + + # Check pg_num and osd_pool_default_pg_num matches + ct_pod = get_ceph_tools_pod() + osd_pool_default_pg_num = ct_pod.exec_ceph_cmd( + ceph_cmd="ceph config get mon osd_pool_default_pg_num" + ) + logger.info(f"The osd pool default pg num value is {osd_pool_default_pg_num}") + expected_pgs = { + self.pool_name: osd_pool_default_pg_num, + } + assert validate_num_of_pgs( + expected_pgs + ), "pg_num is not equal to the osd pool default pg num" + logger.info( + f"pg_num of the new pool {self.pool_name} " + f"is equal to the osd pool default pg num {osd_pool_default_pg_num}" + ) + + # Check if the pg-autoscale is ON + pool_autoscale_status = ct_pod.exec_ceph_cmd( + ceph_cmd="ceph osd pool autoscale-status" + ) + for pool in pool_autoscale_status: + if pool["pool_name"] == self.pool_name: + assert pool["pg_autoscale_mode"] == "on", "PG autoscale mode is off" + logger.info(f"{self.pool_name} autoscale mode is on") + + # Check the pool is not none + oc_obj = OCP(kind=constants.CEPHBLOCKPOOL) + cbp_output = run_cmd( + cmd=f"oc get cephblockpool/{self.pool_name} -n {config.ENV_DATA['cluster_namespace']} -o yaml" + ) + cbp_output = oc_obj.exec_oc_cmd( + command=f"get cephblockpool/{self.pool_name} -n {config.ENV_DATA['cluster_namespace']} -o yaml" + ) + assert cbp_output["spec"]["deviceClass"] is not None, "The Deviceclass is none" + logger.info( + f"The deviceClass of the pool {self.pool_name} is {cbp_output['spec']['deviceClass']}" + ) From 122a182f9555bf2a905cb1db8034fb625d57f652 Mon Sep 17 00:00:00 2001 From: Elena Bondarenko Date: Tue, 10 Dec 2024 16:13:09 +0100 Subject: [PATCH 32/44] Deprecate unneeded tests Signed-off-by: Elena Bondarenko --- .../system_test/multicluster/test_post_installation_state.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cross_functional/system_test/multicluster/test_post_installation_state.py b/tests/cross_functional/system_test/multicluster/test_post_installation_state.py index 5cdfcb821f7..542079f533a 100644 --- a/tests/cross_functional/system_test/multicluster/test_post_installation_state.py +++ b/tests/cross_functional/system_test/multicluster/test_post_installation_state.py @@ -57,7 +57,7 @@ def test_consumers_ceph_resources(self): @acceptance @pc_or_ms_provider_required @pytest.mark.polarion_id("OCS-3910") - def test_consumers_capacity(self): + def deprecated_test_consumers_capacity(self): """ Test each storageconsumer's capacity and requested capacity. Now only 1Ti value is possible. If more options get added, the test @@ -107,7 +107,7 @@ def test_provider_server_logs(self): @tier1 @pytest.mark.polarion_id("OCS-3918") @runs_on_provider - def test_ceph_clients(self): + def deprecated_test_ceph_clients(self): """ Test that for every consumer there are the following cephclients in the provider cluster: rbd provisioner, rbd node, cephfs provisioner, From 3c94039b1050ec9bd236ebfce0662c1f89995f25 Mon Sep 17 00:00:00 2001 From: udaysk23 <54358025+udaysk23@users.noreply.github.com> Date: Wed, 11 Dec 2024 13:43:56 +0530 Subject: [PATCH 33/44] Removes redundant tests from Orange squad (#10945) Signed-off-by: Uday Kurundwade --- .../scale/noobaa/test_scale_endpoint.py | 11 +-------- .../noobaa/test_scale_obc_create_delete.py | 4 +++- .../scale/noobaa/test_scale_obc_creation.py | 4 ++-- ...scale_12_OCS_worker_nodes_and_6000_PVCs.py | 13 +--------- ..._scale_3_OCS_worker_nodes_and_1500_PVCs.py | 24 ++----------------- .../cross_functional/scale/test_scale_amq.py | 11 +-------- ...test_scale_osds_fill_75%_reboot_workers.py | 13 +--------- .../scale/test_scale_pgsql.py | 2 +- 8 files changed, 12 insertions(+), 70 deletions(-) diff --git a/tests/cross_functional/scale/noobaa/test_scale_endpoint.py b/tests/cross_functional/scale/noobaa/test_scale_endpoint.py index 1b53af51d20..895c29016d0 100644 --- a/tests/cross_functional/scale/noobaa/test_scale_endpoint.py +++ b/tests/cross_functional/scale/noobaa/test_scale_endpoint.py @@ -56,15 +56,6 @@ def teardown(): @orange_squad @scale @skipif_ocs_version("<4.5") -@pytest.mark.parametrize( - argnames="resource_to_delete", - argvalues=[ - pytest.param(*["mgr"], marks=pytest.mark.polarion_id("OCS-2402")), - pytest.param(*["mon"], marks=pytest.mark.polarion_id("OCS-2420")), - pytest.param(*["osd"], marks=pytest.mark.polarion_id("OCS-2446")), - pytest.param(*["mds"], marks=pytest.mark.polarion_id("OCS-2447")), - ], -) class TestScaleEndpointAutoScale(MCGTest): """ Test MCG endpoint auto-scaling @@ -88,7 +79,7 @@ def _assert_endpoint_count(self, desired_count): timeout=900, ) - def test_scale_endpoint_and_respin_ceph_pods( + def deprecated_test_scale_endpoint_and_respin_ceph_pods( self, mcg_job_factory, resource_to_delete, diff --git a/tests/cross_functional/scale/noobaa/test_scale_obc_create_delete.py b/tests/cross_functional/scale/noobaa/test_scale_obc_create_delete.py index 5fb49d3c976..a9fa09ee4d8 100644 --- a/tests/cross_functional/scale/noobaa/test_scale_obc_create_delete.py +++ b/tests/cross_functional/scale/noobaa/test_scale_obc_create_delete.py @@ -37,7 +37,9 @@ class TestScaleOCBCreateDelete(E2ETest): num_obc_batch = 50 @pytest.mark.polarion_id("OCS-2667") - def test_scale_obc_create_delete_time(self, tmp_path, noobaa_storage_class_name): + def deprecated_test_scale_obc_create_delete_time( + self, tmp_path, noobaa_storage_class_name + ): """ MCG OBC creation and deletion using Noobaa MCG storage class diff --git a/tests/cross_functional/scale/noobaa/test_scale_obc_creation.py b/tests/cross_functional/scale/noobaa/test_scale_obc_creation.py index dde751f1249..15fec202f53 100644 --- a/tests/cross_functional/scale/noobaa/test_scale_obc_creation.py +++ b/tests/cross_functional/scale/noobaa/test_scale_obc_creation.py @@ -41,7 +41,7 @@ class TestScaleOCBCreation(E2ETest): num_obc_batch = 50 @pytest.mark.polarion_id("OCS-2478") - def test_scale_mcg_obc_creation( + def deprecated_test_scale_mcg_obc_creation( self, tmp_path, noobaa_storage_class_name, timeout=60 ): """ @@ -84,7 +84,7 @@ def test_scale_mcg_obc_creation( @vsphere_platform_required @pytest.mark.polarion_id("OCS-2479") - def test_scale_rgw_obc_creation( + def deprecated_test_scale_rgw_obc_creation( self, tmp_path, noobaa_storage_class_name, timeout=60 ): """ diff --git a/tests/cross_functional/scale/test_scale_12_OCS_worker_nodes_and_6000_PVCs.py b/tests/cross_functional/scale/test_scale_12_OCS_worker_nodes_and_6000_PVCs.py index 3949ab5477b..6e02fb6d9d9 100644 --- a/tests/cross_functional/scale/test_scale_12_OCS_worker_nodes_and_6000_PVCs.py +++ b/tests/cross_functional/scale/test_scale_12_OCS_worker_nodes_and_6000_PVCs.py @@ -192,18 +192,7 @@ def test_respin_ceph_pods(self, resource_to_delete): @ignore_leftovers @pytest.mark.skipif("TestAddNode.skip_all") - @pytest.mark.parametrize( - argnames=["node_type"], - argvalues=[ - pytest.param( - *[constants.MASTER_MACHINE], marks=pytest.mark.polarion_id("OCS-763") - ), - pytest.param( - *[constants.WORKER_MACHINE], marks=pytest.mark.polarion_id("OCS-754") - ), - ], - ) - def test_rolling_reboot_node(self, node_type): + def deprecated_test_rolling_reboot_node(self, node_type): """ Test to rolling reboot of nodes """ diff --git a/tests/cross_functional/scale/test_scale_3_OCS_worker_nodes_and_1500_PVCs.py b/tests/cross_functional/scale/test_scale_3_OCS_worker_nodes_and_1500_PVCs.py index cfa8ed8e7b4..b67c43c0ff8 100644 --- a/tests/cross_functional/scale/test_scale_3_OCS_worker_nodes_and_1500_PVCs.py +++ b/tests/cross_functional/scale/test_scale_3_OCS_worker_nodes_and_1500_PVCs.py @@ -63,21 +63,12 @@ def teardown(): @ignore_leftovers @skipif_external_mode @ipi_deployment_required -@pytest.mark.parametrize( - argnames="resource_to_delete", - argvalues=[ - pytest.param(*["mgr"], marks=[pytest.mark.polarion_id("OCS-766")]), - pytest.param(*["mon"], marks=[pytest.mark.polarion_id("OCS-764")]), - pytest.param(*["osd"], marks=[pytest.mark.polarion_id("OCS-765")]), - pytest.param(*["mds"], marks=[pytest.mark.polarion_id("OCS-613")]), - ], -) class TestScaleRespinCephPods(E2ETest): """ Scale the OCS cluster to reach 1500 PVC+POD """ - def test_pv_scale_out_create_pvcs_and_respin_ceph_pods( + def deprecated_test_pv_scale_out_create_pvcs_and_respin_ceph_pods( self, fioscale, resource_to_delete, @@ -186,23 +177,12 @@ def test_respin_operator_pods( @skipif_external_mode @skipif_vsphere_ipi @ipi_deployment_required -@pytest.mark.parametrize( - argnames=["node_type"], - argvalues=[ - pytest.param( - *[constants.MASTER_MACHINE], marks=pytest.mark.polarion_id("OCS-761") - ), - pytest.param( - *[constants.WORKER_MACHINE], marks=pytest.mark.polarion_id("OCS-762") - ), - ], -) class TestRebootNodes(E2ETest): """ Reboot nodes in scaled up cluster """ - def test_rolling_reboot_node(self, node_type): + def deprecated_test_rolling_reboot_node(self, node_type): """ Test to rolling reboot of nodes """ diff --git a/tests/cross_functional/scale/test_scale_amq.py b/tests/cross_functional/scale/test_scale_amq.py index 143adad3fec..9d9e022cc91 100644 --- a/tests/cross_functional/scale/test_scale_amq.py +++ b/tests/cross_functional/scale/test_scale_amq.py @@ -4,7 +4,6 @@ from ocs_ci.framework.pytest_customization.marks import orange_squad from ocs_ci.framework.testlib import E2ETest, scale -from ocs_ci.ocs import constants from ocs_ci.ocs.amq import AMQ from ocs_ci.helpers.helpers import default_storage_class @@ -30,15 +29,7 @@ def teardown(): "in each test-run, priority to fix this issue is lower" ) class TestAMQBasics(E2ETest): - @pytest.mark.parametrize( - argnames=["interface"], - argvalues=[ - pytest.param( - constants.CEPHBLOCKPOOL, marks=pytest.mark.polarion_id("OCS-424") - ) - ], - ) - def test_install_amq_scale(self, interface, test_fixture_amq): + def deprecated_test_install_amq_scale(self, interface, test_fixture_amq): """ Create amq cluster and run open messages on it """ diff --git a/tests/cross_functional/scale/test_scale_osds_fill_75%_reboot_workers.py b/tests/cross_functional/scale/test_scale_osds_fill_75%_reboot_workers.py index 151f08fe9a2..dad91fd0839 100644 --- a/tests/cross_functional/scale/test_scale_osds_fill_75%_reboot_workers.py +++ b/tests/cross_functional/scale/test_scale_osds_fill_75%_reboot_workers.py @@ -35,17 +35,6 @@ reason="Skipped due to failure in 75% filling-up cluster " "which created more PODs and failed for memory issue" ) -@pytest.mark.parametrize( - argnames=["interface"], - argvalues=[ - pytest.param( - constants.CEPHBLOCKPOOL, marks=pytest.mark.polarion_id("OCS-2117") - ), - pytest.param( - constants.CEPHFILESYSTEM, marks=pytest.mark.polarion_id("OCS-2117") - ), - ], -) @skipif_aws_i3 class TestScaleOSDsRebootNodes(E2ETest): """ @@ -57,7 +46,7 @@ class TestScaleOSDsRebootNodes(E2ETest): num_of_pvcs = 10 pvc_size = 5 - def test_scale_osds_reboot_nodes( + def deprecated_test_scale_osds_reboot_nodes( self, interface, project_factory, multi_pvc_factory, dc_pod_factory ): """ diff --git a/tests/cross_functional/scale/test_scale_pgsql.py b/tests/cross_functional/scale/test_scale_pgsql.py index 51a550067a3..db6dc5201a1 100644 --- a/tests/cross_functional/scale/test_scale_pgsql.py +++ b/tests/cross_functional/scale/test_scale_pgsql.py @@ -34,7 +34,7 @@ class TestPgsqlPodScale(E2ETest): Scale test case using PGSQL Pods """ - def test_scale_pgsql(self, pgsql): + def deprecated_test_scale_pgsql(self, pgsql): """ Test case to scale pgsql pods: * Add worker nodes to existing cluster From 293573872e7a0d1d7d4756ea3e8b1119c16ce9c2 Mon Sep 17 00:00:00 2001 From: Aviad P Date: Wed, 11 Dec 2024 14:53:10 +0200 Subject: [PATCH 34/44] Added 4.18 to const (#11030) Signed-off-by: Aviadp --- ocs_ci/ocs/must_gather/const_must_gather.py | 27 +++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/ocs_ci/ocs/must_gather/const_must_gather.py b/ocs_ci/ocs/must_gather/const_must_gather.py index 39d497534ad..7b4d8fc03ac 100644 --- a/ocs_ci/ocs/must_gather/const_must_gather.py +++ b/ocs_ci/ocs/must_gather/const_must_gather.py @@ -1034,6 +1034,33 @@ - set(GATHER_COMMANDS_OTHERS_EXTERNAL_EXCLUDE) ), }, + 4.18: { + "CEPH": GATHER_COMMANDS_CEPH + GATHER_COMMANDS_CEPH_4_7, + "JSON": GATHER_COMMANDS_JSON + GATHER_COMMANDS_JSON_4_7, + "OTHERS": list( + set( + GATHER_COMMANDS_OTHERS + + GATHER_COMMANDS_OTHERS_4_7 + + GATHER_COMMANDS_OTHERS_4_10 + ) + - set( + GATHER_COMMANDS_OTHERS_EXCLUDE_4_11 + + GATHER_COMMANDS_OTHERS_EXCLUDE_4_13 + ) + ), + "OTHERS_MANAGED_SERVICES": list( + set( + GATHER_COMMANDS_OTHERS + + GATHER_COMMANDS_OTHERS_4_7 + + GATHER_COMMANDS_OTHERS_4_10 + ) + - set(GATHER_COMMANDS_OTHERS_MANAGED_SERVICES_EXCLUDE) + ), + "OTHERS_EXTERNAL": list( + set(GATHER_COMMANDS_OTHERS_EXTERNAL + GATHER_COMMANDS_OTHERS_EXTERNAL_4_8) + - set(GATHER_COMMANDS_OTHERS_EXTERNAL_EXCLUDE) + ), + }, } CEPH_ONLY = [ From 71fee55941a52a99dbeac5f3e9cf0bbdfc3c3a27 Mon Sep 17 00:00:00 2001 From: Daniel Osypenko Date: Wed, 11 Dec 2024 15:18:53 +0200 Subject: [PATCH 35/44] Rosa hcp node labeling (#11024) * label nodes for node stop and autoscaling Signed-off-by: Daniel Osypenko --- conf/README.md | 1 + .../aws/rosa_hcp_1az_3w_m5.12x.yaml | 1 + .../deployment/aws/rosa_hcp_1az_6w_m5.2x.yaml | 1 + ocs_ci/deployment/rosa.py | 5 +++ ocs_ci/framework/conf/default_config.yaml | 3 ++ ocs_ci/ocs/machinepool.py | 13 +++++-- ocs_ci/utility/rosa.py | 39 +++++++++++++++++++ 7 files changed, 59 insertions(+), 4 deletions(-) diff --git a/conf/README.md b/conf/README.md index 09db077e697..4f9e45f04ef 100644 --- a/conf/README.md +++ b/conf/README.md @@ -349,6 +349,7 @@ higher priority). * `continue_upgrade_after_checks_even_if_not_healthy` - if set to true Rook will continue the OSD daemon upgrade process even if the PGs are not clean. * `upgrade_osd_requires_healthy_pgs` - If set to true OSD upgrade process won't start until PGs are healthy. * `workaround_mark_disks_as_ssd` - WORKAROUND: mark disks as SSD (not rotational - `0` in `/sys/block/*d*/queue/rotational`) +* `node_labels` - Comma-separated labels to be applied to the nodes in the cluster, e.g. 'cluster.ocs.openshift.io/openshift-storage="",node-role.kubernetes.io/infra=""', default - empty string #### UPGRADE diff --git a/conf/deployment/aws/rosa_hcp_1az_3w_m5.12x.yaml b/conf/deployment/aws/rosa_hcp_1az_3w_m5.12x.yaml index a8438908e9f..33a1f036a3e 100644 --- a/conf/deployment/aws/rosa_hcp_1az_3w_m5.12x.yaml +++ b/conf/deployment/aws/rosa_hcp_1az_3w_m5.12x.yaml @@ -26,3 +26,4 @@ ENV_DATA: ms_env_type: "staging" addon_name: "ocs-converged" persistent-monitoring: false + node_labels: cluster.ocs.openshift.io/openshift-storage="" diff --git a/conf/deployment/aws/rosa_hcp_1az_6w_m5.2x.yaml b/conf/deployment/aws/rosa_hcp_1az_6w_m5.2x.yaml index 960bbde5a56..f2c44d37f9e 100644 --- a/conf/deployment/aws/rosa_hcp_1az_6w_m5.2x.yaml +++ b/conf/deployment/aws/rosa_hcp_1az_6w_m5.2x.yaml @@ -26,3 +26,4 @@ ENV_DATA: ms_env_type: "staging" addon_name: "ocs-converged" persistent-monitoring: false + node_labels: cluster.ocs.openshift.io/openshift-storage="" diff --git a/ocs_ci/deployment/rosa.py b/ocs_ci/deployment/rosa.py index 6b03b58c259..2d803dac102 100644 --- a/ocs_ci/deployment/rosa.py +++ b/ocs_ci/deployment/rosa.py @@ -100,6 +100,11 @@ def deploy(self, log_level=""): machinepool_details.wait_replicas_ready( target_replicas=config.ENV_DATA["worker_replicas"], timeout=1200 ) + if node_labels := config.ENV_DATA.get("node_labels"): + if machinepool_id := config.ENV_DATA.get("machine_pool"): + rosa.label_nodes( + self.cluster_name, machinepool_id, node_labels, rewrite=False + ) logger.info("generate kubeconfig and kubeadmin-password files") if config.ENV_DATA["ms_env_type"] == "staging": diff --git a/ocs_ci/framework/conf/default_config.yaml b/ocs_ci/framework/conf/default_config.yaml index e988c7ff40d..23f01d8feb8 100644 --- a/ocs_ci/framework/conf/default_config.yaml +++ b/ocs_ci/framework/conf/default_config.yaml @@ -279,6 +279,9 @@ ENV_DATA: #RDR Green field rdr_osd_deployment_mode: "greenfield" + # Label nodes with specific labels, used for example fot ODF deployment on ROSA HCP + node_labels: "" + # Assisted Installer related settings # This section is related to upgrade diff --git a/ocs_ci/ocs/machinepool.py b/ocs_ci/ocs/machinepool.py index 765f4874538..bfb66d0840d 100644 --- a/ocs_ci/ocs/machinepool.py +++ b/ocs_ci/ocs/machinepool.py @@ -137,6 +137,7 @@ class MachinePool: exist: bool = field( default=False ) # not a part of the data fetched from the cluster + labels: Dict[str, str] = field(default_factory=dict) def __post_init__(self): """Automatically populate fields by fetching machine pool details.""" @@ -173,8 +174,16 @@ def from_dict(cls, data: dict, cluster_name=None): "id" ), # this parameter is different in node_conf and data fetched from machinepool cluster_name=cluster_name, + labels=data.get("labels", {}), ) + def refresh(self): + """Refresh the machine pool details.""" + details = self.get_machinepool_details(self.cluster_name, self.machinepool_id) + if details: + self.__dict__.update(details.__dict__) + self.exist = True + def get_machinepool_updated_replicas(self) -> Dict[str, int]: """ Retrieve the number of replicas and current replicas for this machine pool. @@ -463,10 +472,6 @@ def build_machinepool_cmd_base(cluster_name, node_conf, action): raise ValueError( "When 'enable_autoscaling' is True, 'min_replicas' and 'max_replicas' are required." ) - elif node_conf.get("replicas") is None: - raise ValueError( - "Parameter 'replicas' is required when autoscaling is disabled." - ) cmd = f"rosa {action} machinepool --cluster {cluster_name} " diff --git a/ocs_ci/utility/rosa.py b/ocs_ci/utility/rosa.py index 3e317f6418e..0e35bbb390e 100644 --- a/ocs_ci/utility/rosa.py +++ b/ocs_ci/utility/rosa.py @@ -19,6 +19,7 @@ ResourceWrongStatusException, TimeoutExpiredError, ) +from ocs_ci.ocs.machinepool import MachinePools, NodeConf from ocs_ci.utility import openshift_dedicated as ocm from ocs_ci.utility import utils @@ -1112,3 +1113,41 @@ def get_associated_oidc_config_id(cluster_name): logger.warning(f"Failed to get OIDC config id: {proc.stderr.decode().strip()}") return "" return proc.stdout.decode().strip() + + +def label_nodes(cluster_name, machinepool_id, labels, rewrite=False): + """ + Label nodes of the given cluster. + ! Important + This method rewrites existing behavior of labeling nodes in the cluster, it appends the labels to the existing + labels, but not rewrite them. This prevents the issue of accidental overwriting the existing labels. + + Args: + cluster_name (str): The cluster name + machinepool_id (str): The machinepool id + labels (str): The labels to apply + rewrite (bool): If True, rewrite the labels. False, otherwise. + + Returns: + str: The output of the command + """ + machine_pools = MachinePools(cluster_name) + machine_pool = machine_pools.filter(machinepool_id="workers", pick_first=True) + if not rewrite: + labels_dict = machine_pool.labels + logger.info(f"Existing labels: {labels_dict}") + # convert to comma separated string + if labels_dict: + labels = ( + ",".join([f"{key}={value}" for key, value in labels_dict.items()]) + + "," + + labels + ) + else: + labels = labels + machine_pools.edit_machine_pool( + NodeConf(**{"machinepool_id": machinepool_id, "labels": labels}), + wait_ready=False, + ) + machine_pool.refresh() + return machine_pool.labels From 6ca89967bc10e265408811f26ed172638d795cbd Mon Sep 17 00:00:00 2001 From: Daniel Osypenko Date: Wed, 11 Dec 2024 15:19:46 +0200 Subject: [PATCH 36/44] fix error in test 0.2 (#10910) Signed-off-by: Daniel Osypenko --- .../utility/deployment_openshift_logging.py | 54 ++++++++++++++----- tests/conftest.py | 22 ++++---- 2 files changed, 53 insertions(+), 23 deletions(-) diff --git a/ocs_ci/utility/deployment_openshift_logging.py b/ocs_ci/utility/deployment_openshift_logging.py index a51068c5a15..768de134fbe 100644 --- a/ocs_ci/utility/deployment_openshift_logging.py +++ b/ocs_ci/utility/deployment_openshift_logging.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) -def create_namespace(yaml_file): +def create_namespace(yaml_file, skip_resource_exists=False): """ Creation of namespace "openshift-operators-redhat" for Elasticsearch-operator and "openshift-logging" @@ -29,6 +29,7 @@ def create_namespace(yaml_file): Args: yaml_file (str): Path to yaml file to create namespace + skip_resource_exists: Skip the namespace creation if it already exists Example: create_namespace(yaml_file=constants.EO_NAMESPACE_YAML) @@ -38,11 +39,20 @@ def create_namespace(yaml_file): namespaces = ocp.OCP(kind=constants.NAMESPACES) logger.info("Creating Namespace.........") - assert namespaces.create(yaml_file=yaml_file), "Failed to create namespace" + try: + assert namespaces.create(yaml_file=yaml_file), "Failed to create namespace" + except CommandFailed as e: + if "AlreadyExists" in str(e) and skip_resource_exists: + # on Rosa HCP the ns created from the deployment + logger.warning("Namespace already exists") + else: + raise logger.info("Successfully created Namespace") -def create_elasticsearch_operator_group(yaml_file, resource_name): +def create_elasticsearch_operator_group( + yaml_file, resource_name, skip_resource_exists=False +): """ Creation of operator-group for Elastic-search operator @@ -51,6 +61,7 @@ def create_elasticsearch_operator_group(yaml_file, resource_name): elastic-search resource_name (str): Name of the operator group to create for elastic-search + skip_resource_exists: Skip the resource creation if it already exists Returns: bool: True if operator group for elastic search is created @@ -69,7 +80,14 @@ def create_elasticsearch_operator_group(yaml_file, resource_name): namespace=constants.OPENSHIFT_OPERATORS_REDHAT_NAMESPACE, ) - es_operator_group.create(yaml_file=yaml_file) + try: + es_operator_group.create(yaml_file=yaml_file) + except CommandFailed as e: + if "AlreadyExists" in str(e) and skip_resource_exists: + logger.warning("Operator group already exists") + return True + else: + raise try: es_operator_group.get(resource_name, out_yaml_format=True) logger.info("The Operator group is created successfully") @@ -79,7 +97,7 @@ def create_elasticsearch_operator_group(yaml_file, resource_name): return True -def set_rbac(yaml_file, resource_name): +def set_rbac(yaml_file, resource_name, skip_resource_exists=False): """ Setting Role Based Access Control to grant Prometheus permission to access the openshift-operators-redhat namespace @@ -89,7 +107,7 @@ def set_rbac(yaml_file, resource_name): (ROLE BASED ACCESS CONTROL) resource_name (str): Name of the resource for which we give RBAC permissions - + skip_resource_exists: Skip the resource creation if it already exists Returns: bool: True if RBAC is set successfully, false otherwise @@ -107,7 +125,14 @@ def set_rbac(yaml_file, resource_name): namespace=constants.OPENSHIFT_OPERATORS_REDHAT_NAMESPACE, ) - rbac_role.create(yaml_file=yaml_file, out_yaml_format=False) + try: + rbac_role.create(yaml_file=yaml_file, out_yaml_format=False) + except CommandFailed as e: + if "AlreadyExists" in str(e) and skip_resource_exists: + logger.warning("RBAC role already exists") + return True + else: + raise try: rbac_role.get(resource_name, out_yaml_format=True) rbac_rolebinding.get(resource_name, out_yaml_format=True) @@ -151,7 +176,7 @@ def get_elasticsearch_subscription(): return bool(es_sub) -def create_clusterlogging_operator_group(yaml_file): +def create_clusterlogging_operator_group(yaml_file, skip_resource_exists=False): """ Creation of operator-group for clusterlogging operator. @@ -159,8 +184,7 @@ def create_clusterlogging_operator_group(yaml_file): Args: yaml_file (str): Path to yaml file to create operator group for cluster-logging operator - resource_name (str): Name of the operator group to create for - cluster-logging operator + skip_resource_exists: Skip the resource creation if it already exists Returns: bool: True if operator group for cluster-logging is created @@ -174,8 +198,14 @@ def create_clusterlogging_operator_group(yaml_file): operator_group = ocp.OCP( kind=constants.OPERATOR_GROUP, namespace=constants.OPENSHIFT_LOGGING_NAMESPACE ) - - operator_group.create(yaml_file=yaml_file) + try: + operator_group.create(yaml_file=yaml_file) + except CommandFailed as e: + if "AlreadyExists" in str(e) and skip_resource_exists: + logger.warning("Operator group already exists") + return True + else: + raise try: operator_group.get(out_yaml_format=True) logger.info("The Operator group is created successfully") diff --git a/tests/conftest.py b/tests/conftest.py index 8820bac797b..3e9585d9676 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3268,6 +3268,7 @@ def install_logging(request): * The teardown will uninstall cluster-logging from the cluster """ + rosa_hcp_depl = config.ENV_DATA.get("platform") == constants.ROSA_HCP_PLATFORM def finalizer(): uninstall_cluster_logging() @@ -3291,18 +3292,15 @@ def finalizer(): logging_channel = "stable" if ocp_version >= version.VERSION_4_7 else ocp_version # Creates namespace openshift-operators-redhat - try: - ocp_logging_obj.create_namespace(yaml_file=constants.EO_NAMESPACE_YAML) - except CommandFailed as e: - if "AlreadyExists" in str(e): - # on Rosa HCP the ns created from the deployment - log.info("Namespace openshift-operators-redhat already exists") - else: - raise + ocp_logging_obj.create_namespace( + yaml_file=constants.EO_NAMESPACE_YAML, skip_resource_exists=rosa_hcp_depl + ) # Creates an operator-group for elasticsearch assert ocp_logging_obj.create_elasticsearch_operator_group( - yaml_file=constants.EO_OG_YAML, resource_name="openshift-operators-redhat" + yaml_file=constants.EO_OG_YAML, + resource_name="openshift-operators-redhat", + skip_resource_exists=rosa_hcp_depl, ) # Set RBAC policy on the project @@ -3325,11 +3323,13 @@ def finalizer(): ) # Creates a namespace openshift-logging - ocp_logging_obj.create_namespace(yaml_file=constants.CL_NAMESPACE_YAML) + ocp_logging_obj.create_namespace( + yaml_file=constants.CL_NAMESPACE_YAML, skip_resource_exists=rosa_hcp_depl + ) # Creates an operator-group for cluster-logging assert ocp_logging_obj.create_clusterlogging_operator_group( - yaml_file=constants.CL_OG_YAML + yaml_file=constants.CL_OG_YAML, skip_resource_exists=rosa_hcp_depl ) # Creates subscription for cluster-logging From e2b7268538d0872c07a978c8d34ee7b6ce68873b Mon Sep 17 00:00:00 2001 From: Daniel Osypenko Date: Wed, 11 Dec 2024 15:20:14 +0200 Subject: [PATCH 37/44] add jira ticket to prevent cascade failures (#11031) Signed-off-by: Daniel Osypenko --- tests/functional/z_cluster/nodes/test_disk_failures.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/functional/z_cluster/nodes/test_disk_failures.py b/tests/functional/z_cluster/nodes/test_disk_failures.py index e8463a9e623..d3332017cf4 100644 --- a/tests/functional/z_cluster/nodes/test_disk_failures.py +++ b/tests/functional/z_cluster/nodes/test_disk_failures.py @@ -4,7 +4,7 @@ from ocs_ci.ocs import node, constants from ocs_ci.framework import config -from ocs_ci.framework.pytest_customization.marks import brown_squad +from ocs_ci.framework.pytest_customization.marks import brown_squad, jira from ocs_ci.framework.testlib import ( tier4a, ignore_leftovers, @@ -143,6 +143,7 @@ def init_sanity(self): """ self.sanity_helpers = Sanity() + @jira("DFBUGS-849") @skipif_managed_service @skipif_ibm_cloud @skipif_hci_provider_and_client From db8c52343af1b6e3f0685c0f517e3fab6f813458 Mon Sep 17 00:00:00 2001 From: Daniel Osypenko Date: Wed, 11 Dec 2024 15:21:32 +0200 Subject: [PATCH 38/44] add managed platforms to exclusion and stop collect rgw-data for test (#10990) Signed-off-by: Daniel Osypenko --- tests/functional/z_cluster/test_rook_ceph_log_rotate.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/functional/z_cluster/test_rook_ceph_log_rotate.py b/tests/functional/z_cluster/test_rook_ceph_log_rotate.py index 6ae032d1409..54195cc52e1 100644 --- a/tests/functional/z_cluster/test_rook_ceph_log_rotate.py +++ b/tests/functional/z_cluster/test_rook_ceph_log_rotate.py @@ -3,6 +3,7 @@ import pytest import re +from ocs_ci.ocs.constants import MANAGED_SERVICE_PLATFORMS from ocs_ci.ocs.resources.storage_cluster import verify_storage_cluster from ocs_ci.utility.utils import TimeoutSampler from ocs_ci.ocs.cluster import ceph_health_check @@ -134,7 +135,10 @@ def test_rook_ceph_log_rotate(self): pod.get_ceph_daemon_id(pod.get_mon_pods()[0]), "ceph-mon.", ] - if config.ENV_DATA["platform"].lower() in constants.CLOUD_PLATFORMS: + if config.ENV_DATA["platform"].lower() in ( + *constants.CLOUD_PLATFORMS, + *MANAGED_SERVICE_PLATFORMS, + ): self.podtype_id["rgw"] = [ pod.get_rgw_pods, pod.get_ceph_daemon_id(pod.get_rgw_pods()[0]), From f2e60611fe1f09452ccef86bc4af9c30614fe85d Mon Sep 17 00:00:00 2001 From: Daniel Osypenko Date: Wed, 11 Dec 2024 15:21:49 +0200 Subject: [PATCH 39/44] add dynamic noobaa sc name (#11016) Signed-off-by: Daniel Osypenko --- tests/functional/object/mcg/ui/test_mcg_ui.py | 75 +++++++++++-------- 1 file changed, 43 insertions(+), 32 deletions(-) diff --git a/tests/functional/object/mcg/ui/test_mcg_ui.py b/tests/functional/object/mcg/ui/test_mcg_ui.py index 336c3bcb138..c4fc1dfba69 100644 --- a/tests/functional/object/mcg/ui/test_mcg_ui.py +++ b/tests/functional/object/mcg/ui/test_mcg_ui.py @@ -31,6 +31,8 @@ ) from ocs_ci.ocs.ui.page_objects.object_buckets_tab import ObjectBucketsTab from ocs_ci.ocs.ui.page_objects.page_navigator import PageNavigator +from ocs_ci.ocs.scale_noobaa_lib import fetch_noobaa_storage_class_name + logger = logging.getLogger(__name__) @@ -281,6 +283,43 @@ def test_namespace_bc_creation_and_deletion( assert test_bc.check_resource_existence(should_exist=False) +def generate_test_params(): + """ + Generate test parameters for the test_obc_creation_and_deletion - helper function to reuse fixture in parametrize + """ + + noobaa_sc = fetch_noobaa_storage_class_name().decode("utf-8") + return [ + pytest.param( + *[ + noobaa_sc, + "noobaa-default-bucket-class", + "three_dots", + True, + ], + marks=[pytest.mark.polarion_id("OCS-4698"), mcg], + ), + pytest.param( + *[ + noobaa_sc, + "noobaa-default-bucket-class", + "Actions", + True, + ], + marks=[pytest.mark.polarion_id("OCS-2542"), mcg], + ), + pytest.param( + *[ + "ocs-storagecluster-ceph-rgw", + None, + "three_dots", + True, + ], + marks=[pytest.mark.polarion_id("OCS-4845"), on_prem_platform_required], + ), + ] + + @skipif_disconnected_cluster @black_squad @runs_on_provider @@ -299,43 +338,15 @@ def teardown(self): resource_name=obc_name ) + @pytest.mark.parametrize( + argnames=["storageclass", "bucketclass", "delete_via", "verify_ob_removal"], + argvalues=generate_test_params(), + ) @provider_mode @ui @tier1 @runs_on_provider @bugzilla("2097772") - @pytest.mark.parametrize( - argnames=["storageclass", "bucketclass", "delete_via", "verify_ob_removal"], - argvalues=[ - pytest.param( - *[ - "openshift-storage.noobaa.io", - "noobaa-default-bucket-class", - "three_dots", - True, - ], - marks=[pytest.mark.polarion_id("OCS-4698"), mcg], - ), - pytest.param( - *[ - "openshift-storage.noobaa.io", - "noobaa-default-bucket-class", - "Actions", - True, - ], - marks=[pytest.mark.polarion_id("OCS-2542"), mcg], - ), - pytest.param( - *[ - "ocs-storagecluster-ceph-rgw", - None, - "three_dots", - True, - ], - marks=[pytest.mark.polarion_id("OCS-4845"), on_prem_platform_required], - ), - ], - ) def test_obc_creation_and_deletion( self, setup_ui_class_factory, From 83fd2988ea241d35fbd71feca7ea2635ccf63c1a Mon Sep 17 00:00:00 2001 From: Jilju Joy Date: Thu, 12 Dec 2024 14:22:39 +0530 Subject: [PATCH 40/44] Provider mode - Skip ACM install based on config value (#11033) Signed-off-by: Jilju Joy --- ocs_ci/deployment/hosted_cluster.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ocs_ci/deployment/hosted_cluster.py b/ocs_ci/deployment/hosted_cluster.py index 1d9f76e6ef8..b855dabaaef 100644 --- a/ocs_ci/deployment/hosted_cluster.py +++ b/ocs_ci/deployment/hosted_cluster.py @@ -402,6 +402,9 @@ def deploy_ocp( if not config.ENV_DATA["platform"].lower() in HCI_PROVIDER_CLIENT_PLATFORMS: raise ProviderModeNotFoundException() + if not config.ENV_DATA.get("deploy_acm_hub_cluster", True): + deploy_acm_hub = False + self.deploy_dependencies( deploy_acm_hub, deploy_cnv, deploy_metallb, download_hcp_binary ) From 8293e2455b8a8e8643882ddb2c22f61f0fde4532 Mon Sep 17 00:00:00 2001 From: Parikshith Date: Thu, 12 Dec 2024 14:26:27 +0530 Subject: [PATCH 41/44] CNV vm pvc/dv/dvt cloning ops (#10922) Signed-off-by: Parikshith --- ocs_ci/helpers/cnv_helpers.py | 34 +- ocs_ci/ocs/cnv/virtual_machine.py | 403 ++++++++++++++---- ocs_ci/ocs/constants.py | 7 + .../templates/cnv-vm-workload/clone_dv.yaml | 11 + .../cnv-vm-workload/dv_cluster_role.yaml | 8 + .../cnv-vm-workload/dv_role_binding.yaml | 13 + ocs_ci/templates/cnv-vm-workload/vm.yaml | 6 +- tests/conftest.py | 47 +- .../workloads/cnv/test_vm_cloning_ops.py | 64 +++ 9 files changed, 497 insertions(+), 96 deletions(-) create mode 100644 ocs_ci/templates/cnv-vm-workload/clone_dv.yaml create mode 100644 ocs_ci/templates/cnv-vm-workload/dv_cluster_role.yaml create mode 100644 ocs_ci/templates/cnv-vm-workload/dv_role_binding.yaml create mode 100644 tests/functional/workloads/cnv/test_vm_cloning_ops.py diff --git a/ocs_ci/helpers/cnv_helpers.py b/ocs_ci/helpers/cnv_helpers.py index f5044516e37..6fcaa376cc8 100644 --- a/ocs_ci/helpers/cnv_helpers.py +++ b/ocs_ci/helpers/cnv_helpers.py @@ -176,31 +176,53 @@ def create_dv( namespace=constants.CNV_NAMESPACE, ): """ - Create a PVC using a specified data source + Create/Clones a DV using a specified data source Args: access_mode (str): The access mode for the volume. Default is `constants.ACCESS_MODE_RWX` sc_name (str): The name of the storage class to use. Default is `constants.DEFAULT_CNV_CEPH_RBD_SC`. pvc_size (str): The size of the PVC. Default is "30Gi". source_url (str): The URL of the vm registry image. Default is `constants.CNV_CENTOS_SOURCE`. - namespace (str, optional): The namespace to create the vm on. - + namespace (str, optional): The namespace to create the DV on. Returns: dv_obj: DV object """ - dv_data = templating.load_yaml(constants.CNV_VM_DV_YAML) dv_name = create_unique_resource_name("test", "dv") - dv_data["metadata"]["name"] = dv_name - dv_data["metadata"]["namespace"] = namespace + dv_data = templating.load_yaml(constants.CNV_VM_DV_YAML) dv_data["spec"]["storage"]["accessModes"] = [access_mode] dv_data["spec"]["storage"]["resources"]["requests"]["storage"] = pvc_size dv_data["spec"]["storage"]["storageClassName"] = sc_name dv_data["spec"]["source"]["registry"]["url"] = source_url + dv_data["metadata"]["name"] = dv_name + dv_data["metadata"]["namespace"] = namespace dv_data_obj = create_resource(**dv_data) logger.info(f"Successfully created DV - {dv_data_obj.name}") + return dv_data_obj + +def clone_dv(source_pvc_name, source_pvc_ns, destination_ns): + """ + Clones a DV using a specified data source + + Args: + source_pvc_name (str): PVC name of source vm used for cloning. + source_pvc_ns (str): PVC namespace of source vm used for cloning. + destination_ns (str): Namespace of cloned dv to be created on + + Returns: + dv_obj: Cloned DV object + + """ + dv_name = create_unique_resource_name("clone", "dv") + dv_data = templating.load_yaml(constants.CNV_VM_DV_CLONE_YAML) + dv_data["spec"]["source"]["pvc"]["name"] = source_pvc_name + dv_data["spec"]["source"]["pvc"]["namespace"] = source_pvc_ns + dv_data["metadata"]["name"] = dv_name + dv_data["metadata"]["namespace"] = destination_ns + dv_data_obj = create_resource(**dv_data) + logger.info(f"Successfully created DV - {dv_data_obj.name}") return dv_data_obj diff --git a/ocs_ci/ocs/cnv/virtual_machine.py b/ocs_ci/ocs/cnv/virtual_machine.py index 8107203fdf0..cb94b5949dd 100644 --- a/ocs_ci/ocs/cnv/virtual_machine.py +++ b/ocs_ci/ocs/cnv/virtual_machine.py @@ -10,6 +10,7 @@ create_volume_import_source, create_vm_secret, create_dv, + clone_dv, ) from ocs_ci.helpers.helpers import ( @@ -21,7 +22,8 @@ from ocs_ci.ocs.ocp import OCP from ocs_ci.ocs.cnv.virtctl import Virtctl from ocs_ci.ocs.cnv.virtual_machine_instance import VirtualMachineInstance -from ocs_ci.ocs import constants +from ocs_ci.ocs import constants, ocp +from ocs_ci.ocs.resources import pvc from ocs_ci.utility import templating from ocs_ci.utility.utils import TimeoutSampler from ocs_ci.ocs.exceptions import UsernameNotFoundException, CommandFailed @@ -58,6 +60,13 @@ def __init__( self.ns_obj = None self.pvc_obj = None self.dv_obj = None + self.pvc_name = "" + self.sc_name = "" + self.pvc_size = "" + self.pvc_access_mode = "" + self.source_url = "" + self.source_ns = "" + self.dvt_name = "" self.secret_obj = None self.volumeimportsource_obj = None self.volume_interface = "" @@ -86,7 +95,6 @@ def create_vm_workload( source_url=constants.CNV_CENTOS_SOURCE, ssh=True, verify=True, - vm_dict_path=None, ): """ Create a Virtual Machine (VM) in the specified namespace using a standalone Persistent Volume Claim (PVC) @@ -95,113 +103,168 @@ def create_vm_workload( volume_interface (str): The type of volume interface to use. Default is `constants.VM_VOLUME_PVC`. ssh (bool): If set to True, it adds a statically manged public SSH key during the VM creation verify (bool): Set to True for to verify vm is running and ssh connectivity, False otherwise - vm_dict_path (str): Path to the VM YAML file access_mode (str): The access mode for the volume. Default is `constants.ACCESS_MODE_RWX` sc_name (str): The name of the storage class to use. Default is `constants.DEFAULT_CNV_CEPH_RBD_SC`. pvc_size (str): The size of the PVC. Default is "30Gi". - source_url (str): The URL of the vm registry image. Default is `constants.CNV_CENTOS_SOURCE`. + source_url (str): The URL of the vm registry image. Default is `constants.CNV_CENTOS_SOURCE` - Returns: - vm_obj: The VirtualMachine object + """ + self.volume_interface = volume_interface + self.sc_name = sc_name + self.pvc_size = pvc_size + self.pvc_access_mode = access_mode + self.source_url = source_url - Raises: - CommandFailed: If an error occurs during the creation of the VM + self._create_namespace_if_not_exists() + vm_data = self._prepare_vm_data() + if ssh: + self._add_ssh_key_to_vm(vm_data) + + if volume_interface == constants.VM_VOLUME_PVC: + self._create_vm_pvc(vm_data=vm_data) + elif volume_interface == constants.VM_VOLUME_DV: + self._create_vm_data_volume(vm_data=vm_data) + elif volume_interface == constants.VM_VOLUME_DVT: + self._configure_dvt(vm_data=vm_data) + vm_ocs_obj = create_resource(**vm_data) + logger.info(f"Successfully created VM: {vm_ocs_obj.name}") + + if verify: + self.verify_vm(verify_ssh=True) + + def _prepare_vm_data(self): + """ + Prepares the VM data. + """ + vm_data = templating.load_yaml(constants.CNV_VM_TEMPLATE_YAML) + vm_data["metadata"]["name"] = self._vm_name + vm_data["metadata"]["namespace"] = self.namespace + + return vm_data + + def _create_namespace_if_not_exists(self): + """ + Create a namespace if it doesn't exist. """ - self.volume_interface = volume_interface - # Create namespace if it doesn't exist try: self.ns_obj = create_project(project_name=self.namespace) except CommandFailed as ex: if "(AlreadyExists)" in str(ex): logger.warning(f"The namespace: {self.namespace} already exists!") - vm_dict_path = vm_dict_path if vm_dict_path else constants.CNV_VM_TEMPLATE_YAML - vm_data = templating.load_yaml(vm_dict_path) - vm_data["metadata"]["name"] = self._vm_name - vm_data["metadata"]["namespace"] = self.namespace - if ssh: - self.secret_obj = create_vm_secret(namespace=self.namespace) - ssh_secret_dict = [ - { - "sshPublicKey": { - "propagationMethod": {"noCloud": {}}, - "source": {"secret": {"secretName": f"{self.secret_obj.name}"}}, - } - } - ] - vm_data["spec"]["template"]["spec"]["accessCredentials"] = ssh_secret_dict - if volume_interface == constants.VM_VOLUME_PVC: - self.volumeimportsource_obj = create_volume_import_source(url=source_url) - self.pvc_obj = create_pvc_using_data_source( - source_name=self.volumeimportsource_obj.name, - pvc_size=pvc_size, - sc_name=sc_name, - access_mode=access_mode, - namespace=self.namespace, - ) - vm_data["spec"]["template"]["spec"]["volumes"][0]["persistentVolumeClaim"][ - "claimName" - ] = self.pvc_obj.name - wait_for_resource_state( - resource=self.pvc_obj, state=constants.STATUS_BOUND, timeout=300 - ) - if volume_interface == constants.VM_VOLUME_DV: - self.dv_obj = create_dv( - pvc_size=pvc_size, - sc_name=sc_name, - access_mode=access_mode, - namespace=self.namespace, - source_url=source_url, - ) - del vm_data["spec"]["template"]["spec"]["volumes"][0][ - "persistentVolumeClaim" - ] - vm_data["spec"]["template"]["spec"]["volumes"][0]["dataVolume"] = { - "name": f"{self.dv_obj.name}" - } + def _add_ssh_key_to_vm(self, vm_data): + """ + Add SSH key to VM data. - if volume_interface == constants.VM_VOLUME_DVT: - # Define the dataVolumeTemplates content with parameters - dvt_name = create_unique_resource_name("test", "dvt") - vm_data["spec"]["dataVolumeTemplates"] = [] - metadata = { - "name": dvt_name, - "annotations": {"cdi.kubevirt.io/storage.checkStaticVolume": "true"}, - } - storage_spec = { - "storage": { - "accessModes": [access_mode], - "storageClassName": sc_name, - "resources": {"requests": {"storage": pvc_size}}, - }, - "source": {"registry": {"url": source_url}}, - } + Args: + vm_data (dict): The VM data to modify - vm_data["spec"]["dataVolumeTemplates"].append( - {"metadata": metadata, "spec": storage_spec} - ) - del vm_data["spec"]["template"]["spec"]["volumes"][0][ - "persistentVolumeClaim" - ] - vm_data["spec"]["template"]["spec"]["volumes"][0]["dataVolume"] = { - "name": f"{dvt_name}" + """ + self.secret_obj = create_vm_secret(namespace=self.namespace) + ssh_secret_dict = [ + { + "sshPublicKey": { + "propagationMethod": {"noCloud": {}}, + "source": {"secret": {"secretName": f"{self.secret_obj.name}"}}, + } } + ] + vm_data["spec"]["template"]["spec"]["accessCredentials"] = ssh_secret_dict - vm_ocs_obj = create_resource(**vm_data) - logger.info(f"Successfully created VM: {vm_ocs_obj.name}") + def _create_vm_pvc(self, vm_data): + """ + Creates VolumeSource and PersistentVolumeClaim - if verify: - self.verify_vm(verify_ssh=ssh) + Args: + vm_data (dict): The VM data to modify + + """ + self.volumeimportsource_obj = create_volume_import_source(url=self.source_url) + self.pvc_obj = create_pvc_using_data_source( + source_name=self.volumeimportsource_obj.name, + pvc_size=self.pvc_size, + sc_name=self.sc_name, + access_mode=self.pvc_access_mode, + namespace=self.namespace, + ) + wait_for_resource_state(self.pvc_obj, state=constants.STATUS_BOUND, timeout=300) + self.pvc_name = self.pvc_obj.name + vm_data["spec"]["template"]["spec"]["volumes"][0]["persistentVolumeClaim"] = { + "claimName": self.pvc_obj.name + } + + def _create_vm_data_volume(self, vm_data): + """ + Creates a DataVolume + + Args: + vm_data (dict): The VM data to modify. + + """ + self.dv_obj = create_dv( + pvc_size=self.pvc_size, + sc_name=self.sc_name, + access_mode=self.pvc_access_mode, + namespace=self.namespace, + source_url=self.source_url, + ) + self.pvc_name = self.dv_obj.name + vm_data["spec"]["template"]["spec"]["volumes"][0]["dataVolume"] = { + "name": self.dv_obj.name + } + + def _configure_dvt(self, vm_data): + """ + Configures DataVolumeTemplate on vm template provided. + + Args: + vm_data (dict): The VM data to modify. + + """ + self.dvt_name = create_unique_resource_name("test", "dvt") + storage_spec = { + "storage": { + "accessModes": [self.pvc_access_mode], + "storageClassName": self.sc_name, + "resources": {"requests": {"storage": self.pvc_size}}, + }, + "source": {"registry": {"url": self.source_url}}, + } + metadata = {"name": self.dvt_name} + vm_data["spec"]["dataVolumeTemplates"] = [ + {"metadata": metadata, "spec": storage_spec} + ] + self.pvc_name = self.dvt_name + vm_data["spec"]["template"]["spec"]["volumes"][0]["dataVolume"] = { + "name": self.dvt_name + } def verify_vm(self, verify_ssh=False): """ - Verifies vm status and ssh connectivity if ssh is configured + Verifies vm status, its volume and ssh connectivity if ssh is configured """ + if self.volume_interface in (constants.VM_VOLUME_DV, constants.VM_VOLUME_DVT): + self.verify_dv() self.wait_for_vm_status(status=constants.VM_RUNNING) if verify_ssh: self.wait_for_ssh_connectivity(timeout=1200) + def verify_dv(self): + """ + Verifies DV/DVT based volume is in succeeded state + """ + assert ocp.OCP(kind="dv", namespace=self.namespace).wait_for_resource( + condition="Succeeded", + resource_name=( + self.dv_obj.name + if self.volume_interface == constants.VM_VOLUME_DV + else self.dvt_name + ), + column="PHASE", + timeout=300, + ), "VM Data Volume not in Succeeded state" + def get(self, out_yaml_format=True): """ Get information about the VirtualMachine. @@ -566,8 +629,180 @@ def delete(self): self.vm_ocp_obj.wait_for_delete(resource_name=self._vm_name, timeout=180) if self.volume_interface == constants.VM_VOLUME_PVC: self.pvc_obj.delete() + self.pvc_obj.ocp.wait_for_delete( + resource_name=self.pvc_obj.name, timeout=180 + ) self.volumeimportsource_obj.delete() - if self.volume_interface == constants.VM_VOLUME_DV: + elif self.volume_interface == constants.VM_VOLUME_DV: + self.dv_obj.delete() + self.dv_obj.ocp.wait_for_delete(resource_name=self.dv_obj.name, timeout=180) + if self.ns_obj: + self.ns_obj.delete_project(project_name=self.namespace) + + +class VMCloner(VirtualMachine): + """ + Class for handling cloning of a Virtual Machine. + Inherits from VirtualMachine to have access to its attributes and methods. + """ + + def __init__(self, vm_name, namespace=None): + """ + Initializes cloned vm obj + """ + super().__init__(vm_name=vm_name, namespace=namespace) + self.source_pvc_name = "" + self.dv_cr_data_obj = self.dv_rb_data_obj = None + + def clone_vm(self, source_vm_obj, volume_interface, ssh=True, verify=True): + """ + Clone an existing virtual machine. + + Args: + source_vm_obj (VirtualMachine): The source VM object to clone. + volume_interface (str): The volume interface to use. + ssh (bool): Whether to verify SSH connectivity. + verify (bool): Whether to verify the VM status after cloning + + """ + self.source_pvc_name = source_vm_obj.pvc_name + self.source_ns = source_vm_obj.namespace + self.volume_interface = source_vm_obj.volume_interface + self.sc_name = source_vm_obj.sc_name + self.pvc_size = source_vm_obj.pvc_size + self.pvc_access_mode = source_vm_obj.pvc_access_mode + + # Using methods from the parent class + self._create_namespace_if_not_exists() + vm_data = self._prepare_vm_data() + if ssh: + self._add_ssh_key_to_vm(vm_data) + # Handle cloning based on volume interface + if volume_interface == constants.VM_VOLUME_PVC: + self._clone_vm_pvc(vm_data=vm_data) + elif volume_interface == constants.VM_VOLUME_DV: + self._clone_vm_data_volume(vm_data=vm_data) + elif volume_interface == constants.VM_VOLUME_DVT: + self._configure_dvt_clone(vm_data=vm_data) + + vm_ocs_obj = create_resource(**vm_data) + logger.info(f"Successfully cloned VM: {vm_ocs_obj.name}") + + if verify: + self.verify_vm(verify_ssh=True) + + def _clone_vm_pvc(self, vm_data): + """ + Clone the PVC based on the source VM's PVC details. + + Args: + vm_data (dict): The VM data to modify. + + """ + self.pvc_obj = pvc.create_pvc_clone( + sc_name=self.sc_name, + parent_pvc=self.source_pvc_name, + clone_yaml=constants.CSI_RBD_PVC_CLONE_YAML, + namespace=self.namespace, + storage_size=self.pvc_size, + access_mode=self.pvc_access_mode, + volume_mode=constants.VOLUME_MODE_BLOCK, + ) + wait_for_resource_state(self.pvc_obj, state=constants.STATUS_BOUND, timeout=300) + vm_data["spec"]["template"]["spec"]["volumes"][0]["persistentVolumeClaim"] = { + "claimName": self.pvc_obj.name + } + + def _clone_vm_data_volume(self, vm_data): + """ + Clone the DataVolume for the VM based on the source VM's details. + + Args: + vm_data (dict): The VM data to modify. + + """ + self.dv_obj = clone_dv( + source_pvc_name=self.source_pvc_name, + source_pvc_ns=self.source_ns, + destination_ns=self.namespace, + ) + vm_data["spec"]["template"]["spec"]["volumes"][0]["dataVolume"] = { + "name": self.dv_obj.name + } + + def _configure_dvt_clone(self, vm_data): + """ + Clone the DataVolumeTemplate for the VM based on the source VM's details. + + Args: + vm_data (dict): The VM data to modify. + + """ + self.dvt_name = create_unique_resource_name("clone", "dvt") + self._create_role() + vm_data["spec"]["dataVolumeTemplates"] = [] + metadata = { + "name": self.dvt_name, + } + storage_spec = { + "storage": { + "accessModes": [self.pvc_access_mode], + "resources": {"requests": {"storage": self.pvc_size}}, + }, + "source": { + "pvc": { + "namespace": self.source_ns, + "name": self.source_pvc_name, + } + }, + } + vm_data["spec"]["dataVolumeTemplates"].append( + {"metadata": metadata, "spec": storage_spec} + ) + vm_data["spec"]["template"]["spec"]["volumes"][0]["dataVolume"] = { + "name": self.dvt_name + } + + def _create_role(self): + """ + Creates ClusterRole and RoleBinding for authorizing DVT based cloning + """ + dv_cr_name = create_unique_resource_name("cr", "dvt") + dv_rb_name = create_unique_resource_name("rb", "dvt") + dv_cr_data = templating.load_yaml(constants.CNV_VM_DV_CLUSTER_ROLE_YAML) + dv_cr_data["metadata"]["name"] = dv_cr_name + self.dv_cr_data_obj = create_resource(**dv_cr_data) + logger.info( + f"Successfully created DV cluster role - {self.dv_cr_data_obj.name}" + ) + dv_rb_data = templating.load_yaml(constants.CNV_VM_DV_ROLE_BIND_YAML) + dv_rb_data["metadata"]["name"] = dv_rb_name + dv_rb_data["metadata"]["namespace"] = self.source_ns + dv_rb_data["subjects"][0]["namespace"] = self.namespace + dv_rb_data["roleRef"]["name"] = dv_cr_name + self.dv_rb_data_obj = create_resource(**dv_rb_data) + logger.info( + f"Successfully created DV role binding - {self.dv_rb_data_obj.name}" + ) + + def delete(self): + """ + Delete the cloned VirtualMachine + """ + if self.secret_obj: + self.secret_obj.delete() + self.vm_ocp_obj.delete(resource_name=self._vm_name) + self.vm_ocp_obj.wait_for_delete(resource_name=self._vm_name, timeout=180) + if self.volume_interface == constants.VM_VOLUME_PVC: + self.pvc_obj.delete() + self.pvc_obj.ocp.wait_for_delete( + resource_name=self.pvc_obj.name, timeout=180 + ) + elif self.volume_interface == constants.VM_VOLUME_DV: self.dv_obj.delete() + self.dv_obj.ocp.wait_for_delete(resource_name=self.dv_obj.name, timeout=180) + elif self.volume_interface == constants.VM_VOLUME_DVT: + self.dv_rb_data_obj.delete() + self.dv_cr_data_obj.delete() if self.ns_obj: self.ns_obj.delete_project(project_name=self.namespace) diff --git a/ocs_ci/ocs/constants.py b/ocs_ci/ocs/constants.py index a419766b34f..2c14f51b679 100644 --- a/ocs_ci/ocs/constants.py +++ b/ocs_ci/ocs/constants.py @@ -987,6 +987,13 @@ CNV_VM_SOURCE_YAML = os.path.join(TEMPLATE_CNV_VM_WORKLOAD_DIR, "source.yaml") CNV_VM_PVC_YAML = os.path.join(TEMPLATE_CNV_VM_WORKLOAD_DIR, "pvc.yaml") CNV_VM_DV_YAML = os.path.join(TEMPLATE_CNV_VM_WORKLOAD_DIR, "dv.yaml") +CNV_VM_DV_CLONE_YAML = os.path.join(TEMPLATE_CNV_VM_WORKLOAD_DIR, "clone_dv.yaml") +CNV_VM_DV_CLUSTER_ROLE_YAML = os.path.join( + TEMPLATE_CNV_VM_WORKLOAD_DIR, "dv_cluster_role.yaml" +) +CNV_VM_DV_ROLE_BIND_YAML = os.path.join( + TEMPLATE_CNV_VM_WORKLOAD_DIR, "dv_role_binding.yaml" +) CNV_VM_TEMPLATE_YAML = os.path.join(TEMPLATE_CNV_VM_WORKLOAD_DIR, "vm.yaml") METALLB = "metallb-operator" diff --git a/ocs_ci/templates/cnv-vm-workload/clone_dv.yaml b/ocs_ci/templates/cnv-vm-workload/clone_dv.yaml new file mode 100644 index 00000000000..75222d081d0 --- /dev/null +++ b/ocs_ci/templates/cnv-vm-workload/clone_dv.yaml @@ -0,0 +1,11 @@ +apiVersion: cdi.kubevirt.io/v1beta1 +kind: DataVolume +metadata: + name: "" + namespace: "" +spec: + source: + pvc: + namespace: "" + name: "" + storage: {} diff --git a/ocs_ci/templates/cnv-vm-workload/dv_cluster_role.yaml b/ocs_ci/templates/cnv-vm-workload/dv_cluster_role.yaml new file mode 100644 index 00000000000..d911d2641d4 --- /dev/null +++ b/ocs_ci/templates/cnv-vm-workload/dv_cluster_role.yaml @@ -0,0 +1,8 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: +rules: +- apiGroups: ["cdi.kubevirt.io"] + resources: ["datavolumes/source"] + verbs: ["*"] diff --git a/ocs_ci/templates/cnv-vm-workload/dv_role_binding.yaml b/ocs_ci/templates/cnv-vm-workload/dv_role_binding.yaml new file mode 100644 index 00000000000..03383bc21e3 --- /dev/null +++ b/ocs_ci/templates/cnv-vm-workload/dv_role_binding.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: + namespace: +subjects: +- kind: ServiceAccount + name: default + namespace: +roleRef: + kind: ClusterRole + name: datavolume-cloner + apiGroup: rbac.authorization.k8s.io diff --git a/ocs_ci/templates/cnv-vm-workload/vm.yaml b/ocs_ci/templates/cnv-vm-workload/vm.yaml index 1750512a751..a5f34028e3f 100644 --- a/ocs_ci/templates/cnv-vm-workload/vm.yaml +++ b/ocs_ci/templates/cnv-vm-workload/vm.yaml @@ -36,8 +36,6 @@ spec: model: virtio networkInterfaceMultiqueue: true rng: {} - machine: - type: pc-q35-rhel9.2.0 memory: guest: 2Gi resources: {} @@ -47,9 +45,7 @@ spec: pod: {} terminationGracePeriodSeconds: 180 volumes: - - name: rootdisk - persistentVolumeClaim: - claimName: sample-vm-pvc + - name: rootdisk - cloudInitNoCloud: userData: |- #cloud-config diff --git a/tests/conftest.py b/tests/conftest.py index 3e9585d9676..70993c5cb3a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -46,7 +46,7 @@ put_bucket_policy, ) from ocs_ci.ocs.constants import FUSION_CONF_DIR -from ocs_ci.ocs.cnv.virtual_machine import VirtualMachine +from ocs_ci.ocs.cnv.virtual_machine import VirtualMachine, VMCloner from ocs_ci.ocs.dr.dr_workload import ( BusyBox, BusyBox_AppSet, @@ -7095,6 +7095,51 @@ def teardown(): return factory +@pytest.fixture() +def clone_vm_workload(request): + """ + Clones VM workloads + + """ + cloned_vms = [] + + def factory( + vm_obj, + volume_interface=None, + namespace=None, + ): + """ + Args: + vm_obj (VirtualMachine): Object of source vm to clone + volume_interface (str): The type of volume interface to use. Default is `constants.VM_VOLUME_PVC`. + namespace (str, optional): The namespace to create the vm on. Default, creates a unique namespace. + + Returns: + list: objects of VM clone class + + """ + clone_vm_name = create_unique_resource_name("clone", "vm") + clone_vm_obj = VMCloner(vm_name=clone_vm_name, namespace=namespace) + volume_iface = volume_interface if volume_interface else vm_obj.volume_interface + clone_vm_obj.clone_vm( + source_vm_obj=vm_obj, + volume_interface=volume_iface, + ) + cloned_vms.append(clone_vm_obj) + return cloned_vms + + def teardown(): + """ + Cleans up cloned vm workloads + + """ + for vm_wl in cloned_vms: + vm_wl.delete() + + request.addfinalizer(teardown) + return factory + + @pytest.fixture(scope="class") def lvm_storageclass_factory_class(request, storageclass_factory_class): return lvm_storageclass_factory_fixture(request, storageclass_factory_class) diff --git a/tests/functional/workloads/cnv/test_vm_cloning_ops.py b/tests/functional/workloads/cnv/test_vm_cloning_ops.py new file mode 100644 index 00000000000..ce3b2afbe3f --- /dev/null +++ b/tests/functional/workloads/cnv/test_vm_cloning_ops.py @@ -0,0 +1,64 @@ +import logging +import pytest + +from ocs_ci.framework.pytest_customization.marks import magenta_squad, workloads +from ocs_ci.framework.testlib import E2ETest +from ocs_ci.helpers.cnv_helpers import cal_md5sum_vm, run_dd_io +from ocs_ci.ocs import constants + +log = logging.getLogger(__name__) + + +@magenta_squad +class TestVmSnapshotClone(E2ETest): + """ + Tests related VM snapshots and clones + """ + + @workloads + @pytest.mark.polarion_id("OCS-6288") + def test_vm_clone(self, cnv_workload, clone_vm_workload, setup_cnv): + """ + This test performs the VM cloning and IOs created using different volume interfaces(PVC/DV/DVT) + + Test steps: + 1. Create a clone of a VM PVC by following the documented procedure from ODF official docs. + 1.1 Create clone of the pvc associated with VM. + 1.2 Cloned pvc successfully created and listed + 2. Verify the cloned PVc is created. + 3. create vm using cloned pvc. + 4. Verify that the data on VM backed by cloned pvc is same as that in the original VM. + 5. Add additional data to the cloned VM. + 6. Delete the clone by following the documented procedure from ODF official docs + 6.1 Delete clone of the pvc associated with VM. + 6.2 cloned pvc successfully deleted + 7. Repeat the above procedure for all the VMs in the system + 8. Delete all the clones created as part of this test + """ + + file_paths = ["/source_file.txt", "/new_file.txt"] + # TODO: Add multi_cnv fixture to configure VMs based on specifications + volume_interface = [ + constants.VM_VOLUME_PVC, + constants.VM_VOLUME_DV, + constants.VM_VOLUME_DVT, + ] + for index, vl_if in enumerate(volume_interface): + vm_obj = cnv_workload( + volume_interface=vl_if, source_url=constants.CNV_FEDORA_SOURCE + )[index] + source_csum = run_dd_io(vm_obj=vm_obj, file_path=file_paths[0], verify=True) + vm_obj.stop() + clone_obj = clone_vm_workload( + vm_obj=vm_obj, + volume_interface=vl_if, + namespace=( + vm_obj.namespace if vl_if == constants.VM_VOLUME_PVC else None + ), + )[index] + new_csum = cal_md5sum_vm(vm_obj=clone_obj, file_path=file_paths[0]) + assert ( + source_csum == new_csum + ), f"Failed: MD5 comparison between source {vm_obj.name} and cloned {clone_obj.name} VMs" + run_dd_io(vm_obj=clone_obj, file_path=file_paths[1]) + clone_obj.stop() From 04aa25a181175fe579ebf52fa84ba92b17f8b9c4 Mon Sep 17 00:00:00 2001 From: Filip Balak Date: Thu, 12 Dec 2024 10:40:10 +0100 Subject: [PATCH 42/44] Provider-client context for in-transit verification tests (#10458) * add provider-client context for in-transit verification tests Signed-off-by: fbalak * remove cloud_platform_required marker for in-transit tests Signed-off-by: fbalak * fix context Signed-off-by: fbalak * fix black Signed-off-by: fbalak * keep test_intransit_encryption_enable_disable_statetransition skipped for provider mode Signed-off-by: fbalak * fix debug message Signed-off-by: fbalak * update markers Signed-off-by: fbalak --------- Signed-off-by: fbalak --- ocs_ci/framework/__init__.py | 4 +- ocs_ci/ocs/resources/storage_cluster.py | 171 ++++++++++-------- .../test_intransit_encryption_sanity.py | 2 - ...est_mon_failure_in_intransit_encryption.py | 2 - 4 files changed, 94 insertions(+), 85 deletions(-) diff --git a/ocs_ci/framework/__init__.py b/ocs_ci/framework/__init__.py index be332687753..7ecd7cedd7e 100644 --- a/ocs_ci/framework/__init__.py +++ b/ocs_ci/framework/__init__.py @@ -493,7 +493,7 @@ def __init__(self): except ClusterNotFoundException: # if no provider is available then set the switch to current index so that # no switch happens and code runs on current cluster - logger.DEBUG("No provider was found - using current cluster") + logger.debug("No provider was found - using current cluster") switch_index = config.cur_index super().__init__(switch_index) @@ -509,7 +509,7 @@ def __init__(self): except ClusterNotFoundException: # if no provider is available then set the switch to current index so that # no switch happens and code runs on current cluster - logger.DEBUG("No Consumer was found - using current cluster") + logger.debug("No Consumer was found - using current cluster") switch_index = config.cur_index super().__init__(switch_index) diff --git a/ocs_ci/ocs/resources/storage_cluster.py b/ocs_ci/ocs/resources/storage_cluster.py index fe66ee8b216..915ee172e09 100644 --- a/ocs_ci/ocs/resources/storage_cluster.py +++ b/ocs_ci/ocs/resources/storage_cluster.py @@ -996,22 +996,25 @@ def verify_storage_cluster(): """ Verify storage cluster status """ - storage_cluster_name = config.ENV_DATA["storage_cluster_name"] - log.info("Verifying status of storage cluster: %s", storage_cluster_name) - storage_cluster = StorageCluster( - resource_name=storage_cluster_name, - namespace=config.ENV_DATA["cluster_namespace"], - ) - log.info(f"Check if StorageCluster: {storage_cluster_name} is in Succeeded phase") - if config.ENV_DATA.get("platform") == constants.FUSIONAAS_PLATFORM: - timeout = 1000 - elif storage_cluster.data["spec"].get("resourceProfile") != storage_cluster.data[ - "status" - ].get("lastAppliedResourceProfile"): - timeout = 1800 - else: - timeout = 600 - storage_cluster.wait_for_phase(phase="Ready", timeout=timeout) + with config.RunWithProviderConfigContextIfAvailable(): + storage_cluster_name = config.ENV_DATA["storage_cluster_name"] + log.info("Verifying status of storage cluster: %s", storage_cluster_name) + storage_cluster = StorageCluster( + resource_name=storage_cluster_name, + namespace=config.ENV_DATA["cluster_namespace"], + ) + log.info( + f"Check if StorageCluster: {storage_cluster_name} is in Succeeded phase" + ) + if config.ENV_DATA.get("platform") == constants.FUSIONAAS_PLATFORM: + timeout = 1000 + elif storage_cluster.data["spec"].get( + "resourceProfile" + ) != storage_cluster.data["status"].get("lastAppliedResourceProfile"): + timeout = 1800 + else: + timeout = 600 + storage_cluster.wait_for_phase(phase="Ready", timeout=timeout) # verify storage cluster version if not config.ENV_DATA.get("disable_storage_cluster_version_check"): @@ -1026,28 +1029,29 @@ def verify_storage_cluster_version(storage_cluster): storage_cluster (obj): storage cluster object """ - # verify storage cluster version - if config.RUN["cli_params"].get("deploy") and not config.UPGRADE.get( - "upgrade_ocs_version" - ): - log.info("Verifying storage cluster version") - try: - storage_cluster_version = storage_cluster.get()["status"]["version"] - ocs_csv = get_ocs_csv() - csv_version = ocs_csv.data["spec"]["version"] - assert ( - storage_cluster_version in csv_version - ), f"storage cluster version {storage_cluster_version} is not same as csv version {csv_version}" - except KeyError as e: - if ( - config.ENV_DATA.get("platform", "").lower() - in constants.MANAGED_SERVICE_PLATFORMS - ): - # This is a workaround. The issue for tracking is - # https://github.com/red-hat-storage/ocs-ci/issues/8390 - log.warning(f"Can't get the sc version due to the error: {str(e)}") - else: - raise e + with config.RunWithProviderConfigContextIfAvailable(): + # verify storage cluster version + if config.RUN["cli_params"].get("deploy") and not config.UPGRADE.get( + "upgrade_ocs_version" + ): + log.info("Verifying storage cluster version") + try: + storage_cluster_version = storage_cluster.get()["status"]["version"] + ocs_csv = get_ocs_csv() + csv_version = ocs_csv.data["spec"]["version"] + assert ( + storage_cluster_version in csv_version + ), f"storage cluster version {storage_cluster_version} is not same as csv version {csv_version}" + except KeyError as e: + if ( + config.ENV_DATA.get("platform", "").lower() + in constants.MANAGED_SERVICE_PLATFORMS + ): + # This is a workaround. The issue for tracking is + # https://github.com/red-hat-storage/ocs-ci/issues/8390 + log.warning(f"Can't get the sc version due to the error: {str(e)}") + else: + raise e def verify_storage_device_class(device_class): @@ -1395,18 +1399,19 @@ def in_transit_encryption_verification(): intransit_config_state = get_in_transit_encryption_config_state() def search_secure_keys(): - ceph_dump_data = ceph_config_dump() - keys_found = [ - record["name"] - for record in ceph_dump_data - if record["name"] in keys_to_match - ] + with config.RunWithProviderConfigContextIfAvailable(): + ceph_dump_data = ceph_config_dump() + keys_found = [ + record["name"] + for record in ceph_dump_data + if record["name"] in keys_to_match + ] - if (intransit_config_state) and (len(keys_found) != len(keys_to_match)): - raise ValueError("Not all secure keys are present in the config") + if (intransit_config_state) and (len(keys_found) != len(keys_to_match)): + raise ValueError("Not all secure keys are present in the config") - if (not intransit_config_state) and (len(keys_found) > 0): - raise ValueError("Some secure keys are Still in the config") + if (not intransit_config_state) and (len(keys_found) > 0): + raise ValueError("Some secure keys are Still in the config") return keys_found @@ -1440,22 +1445,27 @@ def get_in_transit_encryption_config_state(): bool: True if in-transit encryption is enabled, False if it is disabled, or None if an error occurred. """ - cluster_name = ( - constants.DEFAULT_CLUSTERNAME_EXTERNAL_MODE - if storagecluster_independent_check() - else constants.DEFAULT_CLUSTERNAME - ) + with config.RunWithProviderConfigContextIfAvailable(): + cluster_name = ( + constants.DEFAULT_CLUSTERNAME_EXTERNAL_MODE + if storagecluster_independent_check() + else constants.DEFAULT_CLUSTERNAME + ) - ocp_obj = StorageCluster( - resource_name=cluster_name, - namespace=config.ENV_DATA["cluster_namespace"], - ) + ocp_obj = StorageCluster( + resource_name=cluster_name, + namespace=config.ENV_DATA["cluster_namespace"], + ) - try: - return ocp_obj.data["spec"]["network"]["connections"]["encryption"]["enabled"] - except KeyError as e: - log.error(f"In-transit Encryption key {e}. not present in the storagecluster.") - return False + try: + return ocp_obj.data["spec"]["network"]["connections"]["encryption"][ + "enabled" + ] + except KeyError as e: + log.error( + f"In-transit Encryption key {e}. not present in the storagecluster." + ) + return False def set_in_transit_encryption(enabled=True): @@ -1477,27 +1487,30 @@ def set_in_transit_encryption(enabled=True): log.info("Existing in-transit encryption state is same as desire state.") return True - cluster_name = ( - constants.DEFAULT_CLUSTERNAME_EXTERNAL_MODE - if storagecluster_independent_check() - else constants.DEFAULT_CLUSTERNAME - ) + with config.RunWithProviderConfigContextIfAvailable(): + cluster_name = ( + constants.DEFAULT_CLUSTERNAME_EXTERNAL_MODE + if storagecluster_independent_check() + else constants.DEFAULT_CLUSTERNAME + ) - ocp_obj = StorageCluster( - resource_name=cluster_name, - namespace=config.ENV_DATA["cluster_namespace"], - ) + ocp_obj = StorageCluster( + resource_name=cluster_name, + namespace=config.ENV_DATA["cluster_namespace"], + ) - patch = {"spec": {"network": {"connections": {"encryption": {"enabled": enabled}}}}} - action = "enable" if enabled else "disable" - log.info(f"Patching storage class to {action} in-transit encryption.") + patch = { + "spec": {"network": {"connections": {"encryption": {"enabled": enabled}}}} + } + action = "enable" if enabled else "disable" + log.info(f"Patching storage class to {action} in-transit encryption.") - if not ocp_obj.patch(params=json.dumps(patch), format_type="merge"): - log.error(f"Error {action} in-transit encryption.") - return False + if not ocp_obj.patch(params=json.dumps(patch), format_type="merge"): + log.error(f"Error {action} in-transit encryption.") + return False - log.info(f"In-transit encryption is {action}d successfully.") - ocp_obj.wait_for_phase("Progressing", timeout=60) + log.info(f"In-transit encryption is {action}d successfully.") + ocp_obj.wait_for_phase("Progressing", timeout=60) verify_storage_cluster() return True diff --git a/tests/functional/encryption/test_intransit_encryption_sanity.py b/tests/functional/encryption/test_intransit_encryption_sanity.py index 5e1a43f1b35..fb7525922dc 100644 --- a/tests/functional/encryption/test_intransit_encryption_sanity.py +++ b/tests/functional/encryption/test_intransit_encryption_sanity.py @@ -10,7 +10,6 @@ tier1, skipif_ocs_version, green_squad, - skipif_hci_provider_and_client, cloud_platform_required, ) from ocs_ci.framework import config @@ -19,7 +18,6 @@ @green_squad -@skipif_hci_provider_and_client @cloud_platform_required class TestInTransitEncryptionSanity: @pytest.fixture(autouse=True) diff --git a/tests/functional/encryption/test_mon_failure_in_intransit_encryption.py b/tests/functional/encryption/test_mon_failure_in_intransit_encryption.py index 8701bd003be..294ee8339d3 100644 --- a/tests/functional/encryption/test_mon_failure_in_intransit_encryption.py +++ b/tests/functional/encryption/test_mon_failure_in_intransit_encryption.py @@ -11,7 +11,6 @@ tier4a, skipif_ocs_version, green_squad, - cloud_platform_required, ) from ocs_ci.framework import config from ocs_ci.ocs import constants @@ -29,7 +28,6 @@ @skipif_ocs_version("<4.13") @pytest.mark.polarion_id("OCS-4919") @green_squad -@cloud_platform_required class TestMonFailuresWithIntransitEncryption: @pytest.fixture(autouse=True) def teardown_fixture(self, request): From 1a717e3dff7fa3a5fff6091658cc20114aa824bb Mon Sep 17 00:00:00 2001 From: Filip Balak Date: Thu, 12 Dec 2024 12:32:15 +0100 Subject: [PATCH 43/44] Fix bad file name (#9925) Signed-off-by: fbalak --- - | 80 --------------------- tests/functional/upgrade/test_ms_upgrade.py | 80 +++++++++++++++++++++ 2 files changed, 80 insertions(+), 80 deletions(-) delete mode 100644 - diff --git a/- b/- deleted file mode 100644 index 8b79894603f..00000000000 --- a/- +++ /dev/null @@ -1,80 +0,0 @@ -import logging - -from ocs_ci.ocs.resources.pod import cal_md5sum -from ocs_ci.helpers.managed_services import verify_provider_topology -from ocs_ci.framework.pytest_customization.marks import yellow_squad -from ocs_ci.framework.testlib import ( - pre_upgrade, - post_upgrade, - ms_consumer_required, - ms_provider_required, -) - -logger = logging.getLogger(name=__file__) - - -@yellow_squad -@pre_upgrade -@ms_consumer_required -def test_prepare_block_md5_before_upgrade(block_md5): - """ - Prepare md5 results for utilized RBD PVC. - - """ - pass - - -@yellow_squad -@pre_upgrade -@ms_consumer_required -def test_prepare_fs_md5_before_upgrade(fs_md5): - """ - Prepare md5 results for utilized Ceph FS PVC. - - """ - pass - - -@yellow_squad -@post_upgrade -@ms_consumer_required -def test_verify_block_md5_after_upgrade(block_md5, block_pod): - """ - Check that md5 checksum of file on RBD PVC did not changed during upgrade. - - """ - md5_after_upgrade = cal_md5sum( - pod_obj=block_pod, - file_name="fio-rand-write", - block=False, - ) - logger.info(f"RBD file md5 after upgrade: {md5_after_upgrade}") - assert md5_after_upgrade == block_md5 - - -@yellow_squad -@post_upgrade -@ms_consumer_required -def test_verify_fs_md5_after_upgrade(fs_md5, fs_pod): - """ - Check that md5 checksum of file on Ceph FS PVC did not changed during upgrade. - - """ - md5_after_upgrade = cal_md5sum( - pod_obj=fs_pod, - file_name="fio-rand-write", - block=False, - ) - logger.info(f"Ceph FS file md5 after upgrade: {md5_after_upgrade}") - assert md5_after_upgrade == fs_md5 - - -@yellow_squad -@post_upgrade -@ms_provider_required -def test_verify_provider_topology_after_upgrade(): - """ - Verify topology in a Managed Services provider cluster after upgrade - - """ - verify_provider_topology() diff --git a/tests/functional/upgrade/test_ms_upgrade.py b/tests/functional/upgrade/test_ms_upgrade.py index e69de29bb2d..8b79894603f 100644 --- a/tests/functional/upgrade/test_ms_upgrade.py +++ b/tests/functional/upgrade/test_ms_upgrade.py @@ -0,0 +1,80 @@ +import logging + +from ocs_ci.ocs.resources.pod import cal_md5sum +from ocs_ci.helpers.managed_services import verify_provider_topology +from ocs_ci.framework.pytest_customization.marks import yellow_squad +from ocs_ci.framework.testlib import ( + pre_upgrade, + post_upgrade, + ms_consumer_required, + ms_provider_required, +) + +logger = logging.getLogger(name=__file__) + + +@yellow_squad +@pre_upgrade +@ms_consumer_required +def test_prepare_block_md5_before_upgrade(block_md5): + """ + Prepare md5 results for utilized RBD PVC. + + """ + pass + + +@yellow_squad +@pre_upgrade +@ms_consumer_required +def test_prepare_fs_md5_before_upgrade(fs_md5): + """ + Prepare md5 results for utilized Ceph FS PVC. + + """ + pass + + +@yellow_squad +@post_upgrade +@ms_consumer_required +def test_verify_block_md5_after_upgrade(block_md5, block_pod): + """ + Check that md5 checksum of file on RBD PVC did not changed during upgrade. + + """ + md5_after_upgrade = cal_md5sum( + pod_obj=block_pod, + file_name="fio-rand-write", + block=False, + ) + logger.info(f"RBD file md5 after upgrade: {md5_after_upgrade}") + assert md5_after_upgrade == block_md5 + + +@yellow_squad +@post_upgrade +@ms_consumer_required +def test_verify_fs_md5_after_upgrade(fs_md5, fs_pod): + """ + Check that md5 checksum of file on Ceph FS PVC did not changed during upgrade. + + """ + md5_after_upgrade = cal_md5sum( + pod_obj=fs_pod, + file_name="fio-rand-write", + block=False, + ) + logger.info(f"Ceph FS file md5 after upgrade: {md5_after_upgrade}") + assert md5_after_upgrade == fs_md5 + + +@yellow_squad +@post_upgrade +@ms_provider_required +def test_verify_provider_topology_after_upgrade(): + """ + Verify topology in a Managed Services provider cluster after upgrade + + """ + verify_provider_topology() From d29c56c4b4f2047c07e06dbe86bb23d279e08446 Mon Sep 17 00:00:00 2001 From: Mahesh Shetty Date: Fri, 13 Dec 2024 12:59:56 +0530 Subject: [PATCH 44/44] [MCG] Assert when objects are not replicated in MCG replication system test and fix replication sync issue (#10924) Signed-off-by: Mahesh Shetty --- .../test_mcg_replication_with_disruptions.py | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/tests/cross_functional/system_test/test_mcg_replication_with_disruptions.py b/tests/cross_functional/system_test/test_mcg_replication_with_disruptions.py index f2c35bfb428..89dfe0aa7f8 100644 --- a/tests/cross_functional/system_test/test_mcg_replication_with_disruptions.py +++ b/tests/cross_functional/system_test/test_mcg_replication_with_disruptions.py @@ -104,8 +104,13 @@ def test_replication_with_disruptions( nodes, ): # check uni bucket replication from multi (aws+azure) namespace bucket to s3-compatible namespace bucket + prefix_site_1 = "site1" target_bucket_name = bucket_factory(bucketclass=target_bucketclass)[0].name - replication_policy = ("basic-replication-rule", target_bucket_name, None) + replication_policy = ( + "basic-replication-rule", + target_bucket_name, + prefix_site_1, + ) source_bucket_name = bucket_factory( bucketclass=source_bucketclass, replication_policy=replication_policy )[0].name @@ -116,18 +121,23 @@ def test_replication_with_disruptions( mcg_obj=mcg_obj_session, amount=5, pattern="first-write-", + prefix=prefix_site_1, ) logger.info(f"Written objects: {written_random_objects}") - compare_bucket_object_list( + assert compare_bucket_object_list( mcg_obj_session, source_bucket_name, target_bucket_name ) logger.info("Uni-directional bucket replication working as expected") # change from uni-directional to bi-directional replication policy logger.info("Changing the replication policy from uni to bi-directional!") + prefix_site_2 = "site2" patch_replication_policy_to_bucket( - target_bucket_name, "basic-replication-rule-2", source_bucket_name + target_bucket_name, + "basic-replication-rule-2", + source_bucket_name, + prefix=prefix_site_2, ) logger.info( "Patch ran successfully! Changed the replication policy from uni to bi directional" @@ -142,9 +152,10 @@ def test_replication_with_disruptions( mcg_obj=mcg_obj_session, amount=3, pattern="second-write-", + prefix=prefix_site_2, ) logger.info(f"Written objects: {written_random_objects}") - compare_bucket_object_list( + assert compare_bucket_object_list( mcg_obj_session, source_bucket_name, target_bucket_name ) logger.info("Bi directional bucket replication working as expected") @@ -173,10 +184,11 @@ def test_replication_with_disruptions( mcg_obj=mcg_obj_session, amount=1, pattern="third-write-", + prefix=prefix_site_2, ) logger.info(f"Written objects: {written_random_objects}") - compare_bucket_object_list( + assert compare_bucket_object_list( mcg_obj_session, source_bucket_name, target_bucket_name ) logger.info( @@ -194,6 +206,7 @@ def test_replication_with_disruptions( mcg_obj=mcg_obj_session, amount=1, pattern="fourth-write-", + prefix=prefix_site_2, ) logger.info(f"Written objects: {written_random_objects}") @@ -206,7 +219,7 @@ def test_replication_with_disruptions( pod_names=pod_names, namespace=config.ENV_DATA["cluster_namespace"] ) - compare_bucket_object_list( + assert compare_bucket_object_list( mcg_obj_session, source_bucket_name, target_bucket_name ) logger.info("Object sync works after the RGW pod restarted!!") @@ -220,6 +233,7 @@ def test_replication_with_disruptions( mcg_obj=mcg_obj_session, amount=1, pattern="fifth-write-", + prefix=prefix_site_2, ) logger.info(f"Written objects: {written_random_objects}") @@ -236,7 +250,7 @@ def test_replication_with_disruptions( ) logger.info("Nodes rebooted successfully!!") - compare_bucket_object_list( + assert compare_bucket_object_list( mcg_obj_session, source_bucket_name, target_bucket_name ) logger.info("Objects sync works even when the cluster is rebooted")