Skip to content

Commit

Permalink
Start working on the resize osd test
Browse files Browse the repository at this point in the history
Signed-off-by: Itzhak Kave <[email protected]>
  • Loading branch information
Itzhak Kave committed Apr 2, 2024
1 parent 819a522 commit 24f62e2
Show file tree
Hide file tree
Showing 4 changed files with 375 additions and 21 deletions.
125 changes: 124 additions & 1 deletion ocs_ci/ocs/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,19 @@
from ocs_ci.framework import config
from ocs_ci.ocs import ocp, constants, exceptions
from ocs_ci.ocs.exceptions import PoolNotFound
from ocs_ci.ocs.resources.pvc import get_all_pvc_objs
from ocs_ci.ocs.resources.pvc import (
get_all_pvc_objs,
get_deviceset_pvcs,
get_deviceset_pvs,
)
from ocs_ci.ocs.ocp import OCP, wait_for_cluster_connectivity
from ocs_ci.ocs.resources.ocs import OCS
from ocs_ci.ocs.resources.pvc import PVC
from ocs_ci.utility.connection import Connection
from ocs_ci.utility.lvmo_utils import get_lvm_cluster_name
from ocs_ci.ocs.resources.pod import get_mds_pods, wait_for_pods_to_be_running
from ocs_ci.utility.decorators import switch_to_orig_index_at_last
from ocs_ci.ocs.resources.pv import get_pv_size

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -3268,3 +3273,121 @@ def client_clusters_health_check():
client_cluster_health_check()

logger.info("The client clusters health check passed successfully")


def check_resources_state_post_resize_osd(old_osd_pods, old_osd_pvcs, old_osd_pvs):
old_osd_pod_names = [p.name for p in old_osd_pods]
pod.wait_for_pods_to_be_in_statuses(
expected_statuses=[constants.STATUS_TERMINATING],
pod_names=old_osd_pod_names,
timeout=300,
sleep=20,
)

ocp_pod = OCP(kind=constants.POD, namespace=config.ENV_DATA["cluster_namespace"])
ocp_pod.wait_for_resource(
condition=constants.STATUS_RUNNING,
selector=constants.OSD_APP_LABEL,
resource_count=len(old_osd_pods),
timeout=300,
sleep=20,
)

ocp_pvc = OCP(kind=constants.PVC, namespace=config.ENV_DATA["cluster_namespace"])
ocp_pvc.wait_for_resource(
timeout=30,
sleep=5,
condition=constants.STATUS_BOUND,
selector=constants.OSD_PVC_GENERIC_LABEL,
resource_count=len(old_osd_pvcs),
)

current_osd_pvcs = get_deviceset_pvcs()
old_pvc_names = [p.name for p in old_osd_pvcs]
current_pvc_names = [p.name for p in current_osd_pvcs]
logger.info(f"Old PVC names = {old_pvc_names}")
logger.info(f"Current PVC names = {current_pvc_names}")

current_osd_pvs = get_deviceset_pvs()
old_pv_names = [p.name for p in old_osd_pvs]
current_pv_names = [p.name for p in current_osd_pvs]
logger.info(f"Old PV names = {old_pv_names}")
logger.info(f"Current PV names = {current_pv_names}")

logger.info(
"Check that the old PVC and PV names are equal to the current PVC and PV names"
)
if not old_pvc_names == current_pvc_names:
raise ResourceWrongStatusException(
f"The old PVC names {old_pvc_names} are not equal to the "
f"current PVC names {current_pvc_names}"
)
if not old_pv_names == current_pv_names:
raise ResourceWrongStatusException(
f"The old PV names {old_pv_names} are not equal to the "
f"current PV names {current_pv_names}"
)


def check_resources_size_post_resize_osd(expected_storage_size):
current_storage_size = storage_cluster.get_storage_size()
logger.info(f"The current storage size is {current_storage_size}")
logger.info(
"Check that the current storage size equal to the expected storage size"
)
if storage_cluster.get_storage_size() != expected_storage_size:
raise ResourceWrongStatusException(
f"The current storage size {current_storage_size} is not equal "
f"to the expected size {expected_storage_size}"
)

logger.info(
"Check that the PVC and PV sizes are equal to the expected storage size"
)
current_osd_pvcs = get_deviceset_pvcs()
expected_storage_size_in_gb = convert_device_size(expected_storage_size, "GB")
pvc_sizes = [pvc.size for pvc in current_osd_pvcs]
logger.info(f"PVC sizes = {pvc_sizes}")
if not all([p_size == expected_storage_size_in_gb for p_size in pvc_sizes]):
raise ResourceWrongStatusException(
f"The PVC sizes are not equal to the expected storage size {expected_storage_size_in_gb}"
)

current_osd_pvs = get_deviceset_pvs()
pv_sizes = [get_pv_size(pv.get()) for pv in current_osd_pvs]
logger.info(f"PV sizes {pv_sizes}")
if not all([p_size == expected_storage_size_in_gb for p_size in pv_sizes]):
raise ResourceWrongStatusException(
f"The PV sizes are not equal to the expected storage size {expected_storage_size_in_gb}"
)


def check_ceph_state_post_resize_osd(expected_storage_size):
ceph_cluster = CephCluster()
ceph_capacity = ceph_cluster.get_ceph_capacity()
expected_storage_size_in_gb = convert_device_size(expected_storage_size, "GB")
logger.info(
f"Check that the Ceph capacity {ceph_capacity} is equal "
f"to the expected storage size {expected_storage_size_in_gb}"
)
if not int(ceph_capacity) == expected_storage_size_in_gb:
raise ResourceWrongStatusException(
f"The Ceph capcity {ceph_capacity} is not equal to the "
f"expected storage size {expected_storage_size_in_gb}"
)

logger.info("Check the Ceph device classes and osd tree")
device_class = storage_cluster.get_device_class()
ct_pod = pod.get_ceph_tools_pod()
storage_cluster.verify_storage_device_class(device_class)
storage_cluster.verify_device_class_in_osd_tree(ct_pod, device_class)
assert check_ceph_osd_tree()


def ceph_verification_steps_post_resize_osd(
old_osd_pods, old_osd_pvcs, old_osd_pvs, expected_storage_size
):
check_resources_state_post_resize_osd(old_osd_pods, old_osd_pvcs, old_osd_pvs)
check_resources_size_post_resize_osd(expected_storage_size)
check_ceph_state_post_resize_osd(expected_storage_size)
logger.info("All the Ceph verification steps post resize osd finished successfully")
61 changes: 61 additions & 0 deletions ocs_ci/ocs/resources/pod.py
Original file line number Diff line number Diff line change
Expand Up @@ -3583,3 +3583,64 @@ def _check_if_pod_deleted(label, namespace):
namespace=namespace,
)
sampler.wait_for_func_status(True)


def calculate_md5sum_of_pod_files(pods_for_integrity_check, pod_file_name):
"""
Calculate the md5sum of the pod files, and save it in the pod objects
Args:
pods_for_integrity_check (list): The list of the pod objects to calculate the md5sum
pod_file_name (str): The pod file name to save the md5sum
"""
# Wait for IO to finish
logger.info("Wait for IO to finish on pods")
for pod_obj in pods_for_integrity_check:
pod_obj.get_fio_results()
logger.info(f"IO finished on pod {pod_obj.name}")
# Calculate md5sum
pod_file_name = (
pod_file_name
if (pod_obj.pvc.volume_mode == constants.VOLUME_MODE_FILESYSTEM)
else pod_obj.get_storage_path(storage_type="block")
)
logger.info(
f"Calculate the md5sum of the file {pod_file_name} in the pod {pod_obj.name}"
)
pod_obj.pvc.md5sum = cal_md5sum(
pod_obj,
pod_file_name,
pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK,
)


def verify_md5sum_on_pod_files(pods_for_integrity_check, pod_file_name):
"""
Verify the md5sum of the pod files
Args:
pods_for_integrity_check (list): The list of the pod objects to verify the md5sum
pod_file_name (str): The pod file name to verify its md5sum
Raises:
AssertionError: If file doesn't exist or md5sum mismatch
"""
for pod_obj in pods_for_integrity_check:
pod_file_name = (
pod_obj.get_storage_path(storage_type="block")
if (pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK)
else pod_file_name
)
verify_data_integrity(
pod_obj,
pod_file_name,
pod_obj.pvc.md5sum,
pod_obj.pvc.volume_mode == constants.VOLUME_MODE_BLOCK,
)
logger.info(
f"Verified: md5sum of {pod_file_name} on pod {pod_obj.name} "
f"matches with the original md5sum"
)
logger.info("Data integrity check passed on all pods")
74 changes: 54 additions & 20 deletions ocs_ci/ocs/resources/storage_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -1648,26 +1648,7 @@ def get_osd_size():
int: osd size
"""
sc = get_storage_cluster()
size = (
sc.get()
.get("items")[0]
.get("spec")
.get("storageDeviceSets")[0]
.get("dataPVCTemplate")
.get("spec")
.get("resources")
.get("requests")
.get("storage")
)
if size.isdigit or config.DEPLOYMENT.get("local_storage"):
# In the case of UI deployment of LSO cluster, the value in StorageCluster CR
# is set to 1, so we can not take OSD size from there. For LSO we will return
# the size from PVC.
pvc = get_deviceset_pvcs()[0]
return int(pvc.get()["status"]["capacity"]["storage"][:-2])
else:
return int(size[:-2])
return get_storage_size()[:-2]


def get_deviceset_count():
Expand Down Expand Up @@ -2669,3 +2650,56 @@ def validate_serviceexport():
assert mon_count == len(
get_mon_pods()
), f"Mon serviceexport count mismatch {mon_count} != {len(get_mon_pods())}"


def get_storage_size():
"""
Get the storagecluster storage size
Returns:
str: The storagecluster storage size
"""
sc = get_storage_cluster()
storage = (
sc.get()
.get("items")[0]
.get("spec")
.get("storageDeviceSets")[0]
.get("dataPVCTemplate")
.get("spec")
.get("resources")
.get("requests")
.get("storage")
)
if storage.isdigit or config.DEPLOYMENT.get("local_storage"):
# In the case of UI deployment of LSO cluster, the value in StorageCluster CR
# is set to 1, so we can not take OSD size from there. For LSO we will return
# the size from PVC.
pvc = get_deviceset_pvcs()[0]
return pvc.get()["status"]["capacity"]["storage"]
else:
return storage


def resize_osd(new_osd_size):
"""
Resize the OSD(e.g., from 512 to 1024, 1024 to 2048, etc.)
Args:
new_osd_size (str): The new osd size(e.g, 512Gi, 1024Gi, 1Ti, 2Ti, etc.)
Returns:
bool: True in case if changes are applied. False otherwise
"""
sc = get_storage_cluster()
# Patch the OSD storage size
path = "/spec/storageDeviceSets/0/dataPVCTemplate/spec/resources/requests/storage"
params = f"""[{{ "op": "replace", "path": "{path}", "value": {new_osd_size}}}]"""
res = sc.patch(
resource_name=sc.get()["items"][0]["metadata"]["name"],
params=params.strip("\n"),
format_type="json",
)
return res
Loading

0 comments on commit 24f62e2

Please sign in to comment.