From 21101de9bb6f0b08633418c43b1d495c54dfaf5f Mon Sep 17 00:00:00 2001 From: Itzhak Kave Date: Thu, 28 Mar 2024 20:12:48 +0200 Subject: [PATCH 1/4] - Create the resize osd test and the verification steps - Create new Plarion test case - Create new file for the resize osd functions - Raise excpetion intead of assert in the relevant two functions Signed-off-by: Itzhak Kave --- ocs_ci/ocs/exceptions.py | 4 + ocs_ci/ocs/resources/osd_resize.py | 306 ++++++++++++++++++ ocs_ci/ocs/resources/pod.py | 61 ++++ ocs_ci/ocs/resources/storage_cluster.py | 74 +++-- .../cluster_expansion/test_resize_osd.py | 158 +++++++++ 5 files changed, 583 insertions(+), 20 deletions(-) create mode 100644 ocs_ci/ocs/resources/osd_resize.py create mode 100644 tests/functional/z_cluster/cluster_expansion/test_resize_osd.py diff --git a/ocs_ci/ocs/exceptions.py b/ocs_ci/ocs/exceptions.py index a5d995fce6b..cc749d64f92 100644 --- a/ocs_ci/ocs/exceptions.py +++ b/ocs_ci/ocs/exceptions.py @@ -694,3 +694,7 @@ class UsernameNotFoundException(Exception): class MultiStorageClusterExternalCephHealth(Exception): pass + + +class StorageSizeNotReflectedException(Exception): + pass diff --git a/ocs_ci/ocs/resources/osd_resize.py b/ocs_ci/ocs/resources/osd_resize.py new file mode 100644 index 00000000000..eb7d4ca7e71 --- /dev/null +++ b/ocs_ci/ocs/resources/osd_resize.py @@ -0,0 +1,306 @@ +import logging + +from ocs_ci.ocs.exceptions import ( + StorageSizeNotReflectedException, + ResourceWrongStatusException, + CephHealthException, +) +from ocs_ci.ocs.resources.pod import ( + get_osd_pods, + delete_pods, + wait_for_pods_to_be_in_statuses, + get_ceph_tools_pod, +) +from ocs_ci.ocs.resources.pvc import get_deviceset_pvcs, get_deviceset_pvs +from ocs_ci.ocs.resources.pv import get_pv_size +from ocs_ci.ocs.resources.storage_cluster import ( + get_storage_size, + get_device_class, + verify_storage_device_class, + verify_device_class_in_osd_tree, +) +from ocs_ci.ocs.cluster import check_ceph_osd_tree, CephCluster +from ocs_ci.utility.utils import ceph_health_check, TimeoutSampler, convert_device_size +from ocs_ci.ocs import constants +from ocs_ci.ocs.ocp import OCP +from ocs_ci.framework import config + + +logger = logging.getLogger(__name__) + + +def check_resources_state_post_resize_osd(old_osd_pods, old_osd_pvcs, old_osd_pvs): + """ + Check that the pods, PVCs, and PVs are in the expected state post resizing the osd. + It will perform the following steps: + 1. Check that the old osd pods are in a terminating state or deleted + 2. Check that the new osd pods running, and we have exactly the same number of osd pods as the old ones. + 3. Check that the PVCs are in a Bound state + 4. Check that the old PVC and PV names are equal to the current PVC and PV names + + Args: + old_osd_pods (list): The old osd pod objects before resizing the osd + old_osd_pvcs (list): The old osd PVC objects before resizing the osd + old_osd_pvs (list): The old osd PV objects before resizing the osd + + Raises: + ResourceWrongStatusException: If the following occurs: + 1. The OSD pods failed to reach the status Terminated or to be deleted + 2. The old PVC and PV names are not equal to the current PVC and PV names + + """ + old_osd_pods_count = len(old_osd_pods) + logger.info("Wait for the OSD pods to reach the status Terminated or to be deleted") + old_osd_pod_names = [p.name for p in old_osd_pods] + res = wait_for_pods_to_be_in_statuses( + expected_statuses=[constants.STATUS_TERMINATING], + pod_names=old_osd_pod_names, + timeout=300, + sleep=20, + ) + if not res: + raise ResourceWrongStatusException( + "The OSD pods failed to reach the status Terminated or to be deleted" + ) + + logger.info("Check that the new OSD pods are running") + ocp_pod = OCP(kind=constants.POD, namespace=config.ENV_DATA["cluster_namespace"]) + ocp_pod.wait_for_resource( + condition=constants.STATUS_RUNNING, + selector=constants.OSD_APP_LABEL, + resource_count=old_osd_pods_count, + timeout=300, + sleep=20, + ) + + logger.info( + f"Check that the number of the new OSD pods are exactly {old_osd_pods_count}" + ) + for osd_pods in TimeoutSampler(timeout=180, sleep=10, func=get_osd_pods): + osd_pods_count = len(osd_pods) + logger.info(f"number of osd pods = {osd_pods_count}") + if old_osd_pods_count == osd_pods_count: + break + + logger.info("Check that the PVCs are in a Bound state") + ocp_pvc = OCP(kind=constants.PVC, namespace=config.ENV_DATA["cluster_namespace"]) + ocp_pvc.wait_for_resource( + timeout=30, + sleep=5, + condition=constants.STATUS_BOUND, + selector=constants.OSD_PVC_GENERIC_LABEL, + resource_count=len(old_osd_pvcs), + ) + + current_osd_pvcs = get_deviceset_pvcs() + old_pvc_names = [p.name for p in old_osd_pvcs] + current_pvc_names = [p.name for p in current_osd_pvcs] + logger.info(f"Old PVC names = {old_pvc_names}") + logger.info(f"Current PVC names = {current_pvc_names}") + + current_osd_pvs = get_deviceset_pvs() + old_pv_names = [p.name for p in old_osd_pvs] + current_pv_names = [p.name for p in current_osd_pvs] + logger.info(f"Old PV names = {old_pv_names}") + logger.info(f"Current PV names = {current_pv_names}") + + logger.info( + "Check that the old PVC and PV names are equal to the current PVC and PV names" + ) + if not old_pvc_names == current_pvc_names: + raise ResourceWrongStatusException( + f"The old PVC names {old_pvc_names} are not equal to the " + f"current PVC names {current_pvc_names}" + ) + if not old_pv_names == current_pv_names: + raise ResourceWrongStatusException( + f"The old PV names {old_pv_names} are not equal to the " + f"current PV names {current_pv_names}" + ) + + +def check_storage_size_is_reflected(expected_storage_size): + """ + Check that the expected storage size is reflected in the current storage size, PVCs, PVs, + and ceph capacity. + + Args: + expected_storage_size (str): The expected storage size + + Raises: + StorageSizeNotReflectedException: If the current storage size, PVCs, PVs, and ceph capacity + are not in the expected size + + """ + logger.info(f"The expected storage size is {expected_storage_size}") + + current_storage_size = get_storage_size() + logger.info(f"The current storage size is {current_storage_size}") + logger.info( + "Check that the current storage size equal to the expected storage size" + ) + if get_storage_size() != expected_storage_size: + raise StorageSizeNotReflectedException( + f"The current storage size {current_storage_size} is not equal " + f"to the expected size {expected_storage_size}" + ) + + logger.info( + "Check that the PVC and PV sizes are equal to the expected storage size" + ) + current_osd_pvcs = get_deviceset_pvcs() + expected_storage_size_in_gb = convert_device_size(expected_storage_size, "GB") + pvc_sizes = [pvc.size for pvc in current_osd_pvcs] + logger.info(f"PVC sizes = {pvc_sizes}") + if not all([p_size == expected_storage_size_in_gb for p_size in pvc_sizes]): + raise StorageSizeNotReflectedException( + f"The PVC sizes are not equal to the expected storage size {expected_storage_size_in_gb}" + ) + + current_osd_pvs = get_deviceset_pvs() + pv_sizes = [get_pv_size(pv.get()) for pv in current_osd_pvs] + logger.info(f"PV sizes {pv_sizes}") + if not all([p_size == expected_storage_size_in_gb for p_size in pv_sizes]): + raise StorageSizeNotReflectedException( + f"The PV sizes are not equal to the expected storage size {expected_storage_size_in_gb}" + ) + + ceph_cluster = CephCluster() + ceph_capacity = ceph_cluster.get_ceph_capacity() + expected_storage_size_in_gb = convert_device_size(expected_storage_size, "GB") + logger.info( + f"Check that the Ceph capacity {ceph_capacity} is equal " + f"to the expected storage size {expected_storage_size_in_gb}" + ) + if not int(ceph_capacity) == expected_storage_size_in_gb: + raise StorageSizeNotReflectedException( + f"The Ceph capacity {ceph_capacity} is not equal to the " + f"expected storage size {expected_storage_size_in_gb}" + ) + + +def check_ceph_state_post_resize_osd(): + """ + Check the Ceph state post resize osd. + The function checks the Ceph device classes and osd tree. + + Raises: + CephHealthException: In case the Ceph device classes and osd tree checks + didn't finish successfully + + """ + logger.info("Check the Ceph device classes and osd tree") + device_class = get_device_class() + ct_pod = get_ceph_tools_pod() + try: + verify_storage_device_class(device_class) + verify_device_class_in_osd_tree(ct_pod, device_class) + except AssertionError as ex: + raise CephHealthException(ex) + if not check_ceph_osd_tree(): + raise CephHealthException("The ceph osd tree checks didn't finish successfully") + + +def base_ceph_verification_steps_post_resize_osd( + old_osd_pods, old_osd_pvcs, old_osd_pvs, expected_storage_size +): + """ + Check the Ceph verification steps post resize OSD. + It will perform the following steps: + 1. Check the resources state post resize OSD + 2. Check the resources size post resize OSD + 3. Check the Ceph state post resize OSD + + Args: + old_osd_pods (list): The old osd pod objects before resizing the osd + old_osd_pvcs (list): The old osd PVC objects before resizing the osd + old_osd_pvs (list): The old osd PV objects before resizing the osd + expected_storage_size (str): The expected storage size after resizing the osd + + Raises: + StorageSizeNotReflectedException: If the current storage size, PVCs, PVs, and ceph capacity + are not in the expected size + + """ + logger.info("Check the resources state post resize OSD") + check_resources_state_post_resize_osd(old_osd_pods, old_osd_pvcs, old_osd_pvs) + logger.info("Check the resources size post resize OSD") + check_storage_size_is_reflected(expected_storage_size) + logger.info("Check the Ceph state post resize OSD") + check_ceph_state_post_resize_osd() + logger.info("All the Ceph verification steps post resize osd finished successfully") + + +def ceph_verification_steps_post_resize_osd( + old_osd_pods, old_osd_pvcs, old_osd_pvs, expected_storage_size, num_of_tries=6 +): + """ + Try to execute the function 'base_ceph_verification_steps_post_resize_osd' a number of tries + until success, ignoring the exception 'StorageSizeNotReflectedException'. + In every iteration, if we get the exception 'StorageSizeNotReflectedException', it will restart + the osd pods and try again until it reaches the maximum tries. + + Args: + old_osd_pods (list): The old osd pod objects before resizing the osd + old_osd_pvcs (list): The old osd PVC objects before resizing the osd + old_osd_pvs (list): The old osd PV objects before resizing the osd + expected_storage_size (str): The expected storage size after resizing the osd + num_of_tries (int): The number of tries to try executing the + function 'base_ceph_verification_steps_post_resize_osd'. + + Raises: + StorageSizeNotReflectedException: If the current storage size, PVCs, PVs, and ceph capacity + are not in the expected size + + """ + ex = StorageSizeNotReflectedException() + for i in range(1, num_of_tries + 1): + try: + base_ceph_verification_steps_post_resize_osd( + old_osd_pods, old_osd_pvcs, old_osd_pvs, expected_storage_size + ) + return + except StorageSizeNotReflectedException as local_ex: + ex = local_ex + logger.warning( + f"The Ceph verification steps failed due to the error: {str(local_ex)}. " + f"Try to restart the OSD pods before the next iteration" + ) + old_osd_pods = get_osd_pods() + delete_pods(old_osd_pods, wait=False) + + logger.warning( + f"Failed to complete the Ceph verification steps post resize osd after {num_of_tries} tries" + ) + raise ex + + +def check_ceph_health_after_resize_osd( + ceph_health_tries=40, ceph_rebalance_timeout=900 +): + """ + Check Ceph health after resize osd + + Args: + ceph_health_tries (int): The number of tries to wait for the Ceph health to be OK. + ceph_rebalance_timeout (int): The time to wait for the Ceph cluster rebalanced. + + """ + if config.RUN.get("io_in_bg"): + logger.info( + "Increase the time to wait for Ceph health to be health OK, " + "because we run IO in the background" + ) + additional_ceph_health_tries = int(config.RUN.get("io_load") * 1.3) + ceph_health_tries += additional_ceph_health_tries + + additional_ceph_rebalance_timeout = int(config.RUN.get("io_load") * 100) + ceph_rebalance_timeout += additional_ceph_rebalance_timeout + + ceph_health_check( + namespace=config.ENV_DATA["cluster_namespace"], tries=ceph_health_tries + ) + ceph_cluster_obj = CephCluster() + assert ceph_cluster_obj.wait_for_rebalance( + timeout=ceph_rebalance_timeout + ), "Data re-balance failed to complete" diff --git a/ocs_ci/ocs/resources/pod.py b/ocs_ci/ocs/resources/pod.py index 6b319bf10fa..dbe298ebc85 100644 --- a/ocs_ci/ocs/resources/pod.py +++ b/ocs_ci/ocs/resources/pod.py @@ -3583,3 +3583,64 @@ def _check_if_pod_deleted(label, namespace): namespace=namespace, ) sampler.wait_for_func_status(True) + + +def calculate_md5sum_of_pod_files(pods_for_integrity_check, pod_file_name): + """ + Calculate the md5sum of the pod files, and save it in the pod objects + + Args: + pods_for_integrity_check (list): The list of the pod objects to calculate the md5sum + pod_file_name (str): The pod file name to save the md5sum + + """ + # Wait for IO to finish + logger.info("Wait for IO to finish on pods") + for pod_obj in pods_for_integrity_check: + pod_obj.get_fio_results() + logger.info(f"IO finished on pod {pod_obj.name}") + # Calculate md5sum + pod_file_name = ( + pod_file_name + if (pod_obj.pvc.volume_mode == constants.VOLUME_MODE_FILESYSTEM) + else pod_obj.get_storage_path(storage_type="block") + ) + logger.info( + f"Calculate the md5sum of the file {pod_file_name} in the pod {pod_obj.name}" + ) + pod_obj.pvc.md5sum = cal_md5sum( + pod_obj, + pod_file_name, + pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK, + ) + + +def verify_md5sum_on_pod_files(pods_for_integrity_check, pod_file_name): + """ + Verify the md5sum of the pod files + + Args: + pods_for_integrity_check (list): The list of the pod objects to verify the md5sum + pod_file_name (str): The pod file name to verify its md5sum + + Raises: + AssertionError: If file doesn't exist or md5sum mismatch + + """ + for pod_obj in pods_for_integrity_check: + pod_file_name = ( + pod_obj.get_storage_path(storage_type="block") + if (pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK) + else pod_file_name + ) + verify_data_integrity( + pod_obj, + pod_file_name, + pod_obj.pvc.md5sum, + pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK, + ) + logger.info( + f"Verified: md5sum of {pod_file_name} on pod {pod_obj.name} " + f"matches with the original md5sum" + ) + logger.info("Data integrity check passed on all pods") diff --git a/ocs_ci/ocs/resources/storage_cluster.py b/ocs_ci/ocs/resources/storage_cluster.py index 94c7b5baffe..fc2ecb58da7 100644 --- a/ocs_ci/ocs/resources/storage_cluster.py +++ b/ocs_ci/ocs/resources/storage_cluster.py @@ -1648,26 +1648,7 @@ def get_osd_size(): int: osd size """ - sc = get_storage_cluster() - size = ( - sc.get() - .get("items")[0] - .get("spec") - .get("storageDeviceSets")[0] - .get("dataPVCTemplate") - .get("spec") - .get("resources") - .get("requests") - .get("storage") - ) - if size.isdigit or config.DEPLOYMENT.get("local_storage"): - # In the case of UI deployment of LSO cluster, the value in StorageCluster CR - # is set to 1, so we can not take OSD size from there. For LSO we will return - # the size from PVC. - pvc = get_deviceset_pvcs()[0] - return int(pvc.get()["status"]["capacity"]["storage"][:-2]) - else: - return int(size[:-2]) + return get_storage_size()[:-2] def get_deviceset_count(): @@ -2669,3 +2650,56 @@ def validate_serviceexport(): assert mon_count == len( get_mon_pods() ), f"Mon serviceexport count mismatch {mon_count} != {len(get_mon_pods())}" + + +def get_storage_size(): + """ + Get the storagecluster storage size + + Returns: + str: The storagecluster storage size + + """ + sc = get_storage_cluster() + storage = ( + sc.get() + .get("items")[0] + .get("spec") + .get("storageDeviceSets")[0] + .get("dataPVCTemplate") + .get("spec") + .get("resources") + .get("requests") + .get("storage") + ) + if storage.isdigit or config.DEPLOYMENT.get("local_storage"): + # In the case of UI deployment of LSO cluster, the value in StorageCluster CR + # is set to 1, so we can not take OSD size from there. For LSO we will return + # the size from PVC. + pvc = get_deviceset_pvcs()[0] + return pvc.get()["status"]["capacity"]["storage"] + else: + return storage + + +def resize_osd(new_osd_size): + """ + Resize the OSD(e.g., from 512 to 1024, 1024 to 2048, etc.) + + Args: + new_osd_size (str): The new osd size(e.g, 512Gi, 1024Gi, 1Ti, 2Ti, etc.) + + Returns: + bool: True in case if changes are applied. False otherwise + + """ + sc = get_storage_cluster() + # Patch the OSD storage size + path = "/spec/storageDeviceSets/0/dataPVCTemplate/spec/resources/requests/storage" + params = f"""[{{ "op": "replace", "path": "{path}", "value": {new_osd_size}}}]""" + res = sc.patch( + resource_name=sc.get()["items"][0]["metadata"]["name"], + params=params.strip("\n"), + format_type="json", + ) + return res diff --git a/tests/functional/z_cluster/cluster_expansion/test_resize_osd.py b/tests/functional/z_cluster/cluster_expansion/test_resize_osd.py new file mode 100644 index 00000000000..e720443e7d5 --- /dev/null +++ b/tests/functional/z_cluster/cluster_expansion/test_resize_osd.py @@ -0,0 +1,158 @@ +import random +import pytest +import logging + +from ocs_ci.framework.pytest_customization.marks import ( + polarion_id, + skipif_aws_i3, + skipif_bm, + skipif_external_mode, + skipif_bmpsi, + skipif_ibm_power, + skipif_lso, + skipif_managed_service, + skipif_hci_provider_and_client, + brown_squad, +) +from ocs_ci.framework.testlib import ( + ignore_leftovers, + ManageTest, + tier1, +) +from ocs_ci.ocs import constants +from ocs_ci.ocs.resources.osd_resize import ( + ceph_verification_steps_post_resize_osd, + check_ceph_health_after_resize_osd, +) +from ocs_ci.ocs.resources.pod import ( + get_osd_pods, + calculate_md5sum_of_pod_files, + verify_md5sum_on_pod_files, +) +from ocs_ci.ocs.resources.pvc import get_deviceset_pvcs, get_deviceset_pvs +from ocs_ci.ocs.resources.storage_cluster import resize_osd, get_storage_size +from ocs_ci.helpers.sanity_helpers import Sanity + + +logger = logging.getLogger(__name__) + + +@brown_squad +@ignore_leftovers +@skipif_managed_service +@skipif_aws_i3 +@skipif_bm +@skipif_bmpsi +@skipif_lso +@skipif_external_mode +@skipif_ibm_power +@skipif_managed_service +@skipif_hci_provider_and_client +class TestResizeOSD(ManageTest): + """ + Automates the resize OSD test procedure + """ + + @pytest.fixture(autouse=True) + def setup(self, create_pvcs_and_pods): + """ + Init all the data for the resize osd test + + """ + self.create_pvcs_and_pods = create_pvcs_and_pods + + self.old_osd_pods = get_osd_pods() + self.old_storage_size = get_storage_size() + self.old_osd_pvcs = get_deviceset_pvcs() + self.old_osd_pvs = get_deviceset_pvs() + self.new_storage_size = None + + self.pod_file_name = "fio_test" + self.sanity_helpers = Sanity() + pvc_size = random.randint(3, 7) + self.pvcs1, self.pods_for_integrity_check = create_pvcs_and_pods( + pvc_size=pvc_size, num_of_rbd_pvc=6, num_of_cephfs_pvc=6 + ) + pvc_size = random.randint(3, 8) + self.pvcs2, self.pods_for_run_io = create_pvcs_and_pods( + pvc_size=pvc_size, num_of_rbd_pvc=5, num_of_cephfs_pvc=5 + ) + + def run_io_on_pods(self, pods, size="1G", runtime=30): + """ + Run IO on the pods + + Args: + pods (list): The list of pods for running the IO + size (str): Size in MB or Gi, e.g. '200M'. Default value is '1G' + runtime (int): The number of seconds IO should run for + + """ + logger.info("Starting IO on all pods") + for pod_obj in pods: + storage_type = ( + "block" + if pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK + else "fs" + ) + rate = f"{random.randint(1, 5)}M" + pod_obj.run_io( + storage_type=storage_type, + size=size, + runtime=runtime, + rate=rate, + fio_filename=self.pod_file_name, + end_fsync=1, + ) + logger.info(f"IO started on pod {pod_obj.name}") + logger.info("Started IO on all pods") + + def prepare_data_before_resize_osd(self): + """ + Prepare the data before resizing the osd + + """ + logger.info("Run IO on the pods for integrity check") + self.run_io_on_pods(self.pods_for_integrity_check) + logger.info("Calculate the md5sum of the pods for integrity check") + calculate_md5sum_of_pod_files(self.pods_for_integrity_check, self.pod_file_name) + runtime = 180 + logger.info(f"Run IO on the pods in the test background for {runtime} seconds") + self.run_io_on_pods(self.pods_for_run_io, size="2G", runtime=runtime) + + def verification_steps_post_resize_osd(self): + ceph_verification_steps_post_resize_osd( + self.old_osd_pods, + self.old_osd_pvcs, + self.old_osd_pvs, + self.new_storage_size, + ) + logger.info("Verify the md5sum of the pods for integrity check") + verify_md5sum_on_pod_files(self.pods_for_integrity_check, self.pod_file_name) + check_ceph_health_after_resize_osd() + + logger.info("Try to create more resources and run IO") + pvc_size = random.randint(3, 7) + self.pvcs3, self.pods_for_run_io = self.create_pvcs_and_pods( + pvc_size=pvc_size, num_of_rbd_pvc=6, num_of_cephfs_pvc=6 + ) + self.run_io_on_pods(self.pods_for_run_io, size="2G") + logger.info("Check the cluster health") + self.sanity_helpers.health_check() + + @tier1 + @polarion_id("OCS-5506") + def test_resize_osd(self): + """ + Test resize OSD + """ + self.prepare_data_before_resize_osd() + + logger.info(f"The current osd size is {self.old_storage_size}") + size = int(self.old_storage_size[0:-2]) + size_type = self.old_storage_size[-2:] + self.new_storage_size = f"{size * 2}{size_type}" + logger.info(f"Increase the osd size to {self.new_storage_size}") + resize_osd(self.new_storage_size) + + self.verification_steps_post_resize_osd() From 0f34a38b041312a5d8e061af900d3b639a974944 Mon Sep 17 00:00:00 2001 From: Itzhak Kave Date: Thu, 11 Apr 2024 15:29:07 +0300 Subject: [PATCH 2/4] Check that the new osd pod names are different than the old ones, add validation check for the 'resize_osd' function Signed-off-by: Itzhak Kave --- ocs_ci/ocs/resources/osd_resize.py | 10 ++++++++++ ocs_ci/ocs/resources/storage_cluster.py | 24 +++++++++++++++++++++--- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/ocs_ci/ocs/resources/osd_resize.py b/ocs_ci/ocs/resources/osd_resize.py index eb7d4ca7e71..2ed52d1a48c 100644 --- a/ocs_ci/ocs/resources/osd_resize.py +++ b/ocs_ci/ocs/resources/osd_resize.py @@ -82,6 +82,16 @@ def check_resources_state_post_resize_osd(old_osd_pods, old_osd_pvcs, old_osd_pv if old_osd_pods_count == osd_pods_count: break + logger.info("Verify that the new osd pod names are different than the old ones") + osd_pods = get_osd_pods() + new_name_set = {p.name for p in osd_pods} + old_name_set = {p.name for p in old_osd_pods} + if new_name_set.intersection(old_name_set): + raise ResourceWrongStatusException( + f"There are shared values between the new osd pod names and the old osd pod names. " + f"old osd pod names = {old_name_set}, new osd pod names = {new_name_set}" + ) + logger.info("Check that the PVCs are in a Bound state") ocp_pvc = OCP(kind=constants.PVC, namespace=config.ENV_DATA["cluster_namespace"]) ocp_pvc.wait_for_resource( diff --git a/ocs_ci/ocs/resources/storage_cluster.py b/ocs_ci/ocs/resources/storage_cluster.py index fc2ecb58da7..0e9f71722fe 100644 --- a/ocs_ci/ocs/resources/storage_cluster.py +++ b/ocs_ci/ocs/resources/storage_cluster.py @@ -73,7 +73,7 @@ ) from ocs_ci.utility.retry import retry from ocs_ci.utility.rgwutils import get_rgw_count -from ocs_ci.utility.utils import run_cmd, TimeoutSampler +from ocs_ci.utility.utils import run_cmd, TimeoutSampler, convert_device_size from ocs_ci.utility.decorators import switch_to_orig_index_at_last from ocs_ci.helpers.helpers import storagecluster_independent_check from ocs_ci.deployment.helpers.mcg_helpers import check_if_mcg_root_secret_public @@ -2682,17 +2682,35 @@ def get_storage_size(): return storage -def resize_osd(new_osd_size): +def resize_osd(new_osd_size, check_size=True): """ Resize the OSD(e.g., from 512 to 1024, 1024 to 2048, etc.) Args: new_osd_size (str): The new osd size(e.g, 512Gi, 1024Gi, 1Ti, 2Ti, etc.) + check_size (bool): Check that the given osd size is valid Returns: bool: True in case if changes are applied. False otherwise - """ + Raises: + ValueError: In case the osd size is not valid(start with digits and follow by string) + or the new osd size is less than the current osd size + + """ + if check_size: + pattern = r"^\d+[a-zA-Z]+$" + if not re.match(pattern, new_osd_size): + raise ValueError(f"The osd size '{new_osd_size}' is not valid") + new_osd_size_in_gb = convert_device_size(new_osd_size, "GB") + current_osd_size = get_storage_size() + current_osd_size_in_gb = convert_device_size(current_osd_size, "GB") + if new_osd_size_in_gb < current_osd_size_in_gb: + raise ValueError( + f"The new osd size {new_osd_size} is less than the " + f"current osd size {current_osd_size}" + ) + sc = get_storage_cluster() # Patch the OSD storage size path = "/spec/storageDeviceSets/0/dataPVCTemplate/spec/resources/requests/storage" From 5898af648d181943b698aec4aabfc03b93d0c5b6 Mon Sep 17 00:00:00 2001 From: Itzhak Kave Date: Thu, 11 Apr 2024 19:49:16 +0300 Subject: [PATCH 3/4] Get the storage size, PVC size, and PV size in GB with the convert size 1024 Signed-off-by: Itzhak Kave --- ocs_ci/ocs/resources/osd_resize.py | 11 ++++++----- ocs_ci/ocs/resources/pv.py | 7 ++++--- ocs_ci/ocs/resources/pvc.py | 18 ++++++++++++++++++ 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/ocs_ci/ocs/resources/osd_resize.py b/ocs_ci/ocs/resources/osd_resize.py index 2ed52d1a48c..6baf9cfdd80 100644 --- a/ocs_ci/ocs/resources/osd_resize.py +++ b/ocs_ci/ocs/resources/osd_resize.py @@ -11,7 +11,7 @@ wait_for_pods_to_be_in_statuses, get_ceph_tools_pod, ) -from ocs_ci.ocs.resources.pvc import get_deviceset_pvcs, get_deviceset_pvs +from ocs_ci.ocs.resources.pvc import get_deviceset_pvcs, get_deviceset_pvs, get_pvc_size from ocs_ci.ocs.resources.pv import get_pv_size from ocs_ci.ocs.resources.storage_cluster import ( get_storage_size, @@ -143,13 +143,15 @@ def check_storage_size_is_reflected(expected_storage_size): """ logger.info(f"The expected storage size is {expected_storage_size}") - current_storage_size = get_storage_size() logger.info(f"The current storage size is {current_storage_size}") + + expected_storage_size_in_gb = convert_device_size(expected_storage_size, "GB", 1024) + current_storage_size_in_gb = convert_device_size(current_storage_size, "GB", 1024) logger.info( "Check that the current storage size equal to the expected storage size" ) - if get_storage_size() != expected_storage_size: + if current_storage_size_in_gb != expected_storage_size_in_gb: raise StorageSizeNotReflectedException( f"The current storage size {current_storage_size} is not equal " f"to the expected size {expected_storage_size}" @@ -159,8 +161,7 @@ def check_storage_size_is_reflected(expected_storage_size): "Check that the PVC and PV sizes are equal to the expected storage size" ) current_osd_pvcs = get_deviceset_pvcs() - expected_storage_size_in_gb = convert_device_size(expected_storage_size, "GB") - pvc_sizes = [pvc.size for pvc in current_osd_pvcs] + pvc_sizes = [get_pvc_size(pvc) for pvc in current_osd_pvcs] logger.info(f"PVC sizes = {pvc_sizes}") if not all([p_size == expected_storage_size_in_gb for p_size in pvc_sizes]): raise StorageSizeNotReflectedException( diff --git a/ocs_ci/ocs/resources/pv.py b/ocs_ci/ocs/resources/pv.py index 41a7706da78..a5fb8a051f3 100644 --- a/ocs_ci/ocs/resources/pv.py +++ b/ocs_ci/ocs/resources/pv.py @@ -146,19 +146,20 @@ def delete_released_pvs_in_sc(sc_name): return num_of_deleted_pvs -def get_pv_size(pv_obj): +def get_pv_size(pv_obj, convert_size=1024): """ - Get the size of a pv object + Get the size of a pv object in GB Args: pv_obj (dict): A dictionary that represent the pv object + convert_size (int): set convert by 1024 or 1000 Returns: int: The size of the pv object """ storage_size = pv_obj.get("spec").get("capacity").get("storage") - return convert_device_size(storage_size, "GB") + return convert_device_size(storage_size, "GB", convert_size) def check_pvs_present_for_ocs_expansion(sc=constants.LOCALSTORAGE_SC): diff --git a/ocs_ci/ocs/resources/pvc.py b/ocs_ci/ocs/resources/pvc.py index b6bbe2441fa..d6a40866285 100644 --- a/ocs_ci/ocs/resources/pvc.py +++ b/ocs_ci/ocs/resources/pvc.py @@ -635,3 +635,21 @@ def scale_down_pods_and_remove_pvcs(sc_name): time.sleep(10) delete_pvcs([pvc_obj]) + + +def get_pvc_size(pvc_obj, convert_size=1024): + """ + Returns the PVC size in GB + + Args: + pvc_obj (ocs_ci.ocs.resources.ocs.OCS): The pvc object + convert_size (int): set convert by 1024 or 1000 + + Returns: + int: PVC size + + """ + unformatted_size = ( + pvc_obj.data.get("spec").get("resources").get("requests").get("storage") + ) + return convert_device_size(unformatted_size, "GB", convert_size) From 2b6a77b938126441a1b44f0fe01719deab7cd896 Mon Sep 17 00:00:00 2001 From: Itzhak Kave Date: Sun, 14 Apr 2024 14:31:02 +0300 Subject: [PATCH 4/4] Remove a redundant line Signed-off-by: Itzhak Kave --- ocs_ci/ocs/resources/osd_resize.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ocs_ci/ocs/resources/osd_resize.py b/ocs_ci/ocs/resources/osd_resize.py index 6baf9cfdd80..3f746ca1cda 100644 --- a/ocs_ci/ocs/resources/osd_resize.py +++ b/ocs_ci/ocs/resources/osd_resize.py @@ -178,7 +178,6 @@ def check_storage_size_is_reflected(expected_storage_size): ceph_cluster = CephCluster() ceph_capacity = ceph_cluster.get_ceph_capacity() - expected_storage_size_in_gb = convert_device_size(expected_storage_size, "GB") logger.info( f"Check that the Ceph capacity {ceph_capacity} is equal " f"to the expected storage size {expected_storage_size_in_gb}"