From 14cda0cc6d30946da94188892f0b8e51ab452f38 Mon Sep 17 00:00:00 2001 From: Jilju Joy Date: Mon, 23 Oct 2023 10:14:30 +0530 Subject: [PATCH] Test case OCS-4761 (#8633) * Test case OCS-4761 Signed-off-by: Jilju Joy --- .../regional-dr/test_failover_and_relocate.py | 131 +++++++++++++++++- 1 file changed, 130 insertions(+), 1 deletion(-) diff --git a/tests/disaster-recovery/regional-dr/test_failover_and_relocate.py b/tests/disaster-recovery/regional-dr/test_failover_and_relocate.py index 5b514c5dc11..0adff5700e7 100644 --- a/tests/disaster-recovery/regional-dr/test_failover_and_relocate.py +++ b/tests/disaster-recovery/regional-dr/test_failover_and_relocate.py @@ -1,4 +1,6 @@ import logging +import os +from datetime import datetime from time import sleep import pytest @@ -15,9 +17,11 @@ from ocs_ci.ocs import constants from ocs_ci.ocs.acm.acm import AcmAddClusters from ocs_ci.ocs.node import wait_for_nodes_status, get_node_objs +from ocs_ci.ocs.resources.drpc import DRPC from ocs_ci.ocs.resources.pod import wait_for_pods_to_be_running +from ocs_ci.ocs.utils import get_active_acm_index from ocs_ci.utility import version -from ocs_ci.utility.utils import ceph_health_check +from ocs_ci.utility.utils import ceph_health_check, TimeoutSampler logger = logging.getLogger(__name__) @@ -88,6 +92,7 @@ def test_failover_and_relocate( This test is also compatible to be run from ACM UI, pass the yaml conf/ocsci/dr_ui.yaml to trigger it. + The value of lastGroupSyncTime is verified in each stage. """ if config.RUN.get("rdr_failover_via_ui"): @@ -101,8 +106,13 @@ def test_failover_and_relocate( acm_obj = AcmAddClusters() if workload_type == constants.SUBSCRIPTION: rdr_workload = dr_workload(num_of_subscription=1)[0] + drpc_obj = DRPC(namespace=rdr_workload.workload_namespace) else: rdr_workload = dr_workload(num_of_subscription=0, num_of_appset=1)[0] + drpc_obj = DRPC( + namespace=constants.GITOPS_CLUSTER_NAMESPACE, + resource_name=f"{rdr_workload.appset_placement_name}-drpc", + ) primary_cluster_name = dr_helpers.get_current_primary_cluster_name( rdr_workload.workload_namespace, workload_type @@ -121,6 +131,39 @@ def test_failover_and_relocate( logger.info(f"Waiting for {wait_time} minutes to run IOs") sleep(wait_time * 60) + # Set cluster_kubeconfig value for 'drpc_obj' object to fetch the details without switching the cluster context + acm_cluster_kubeconfig = os.path.join( + config.clusters[get_active_acm_index()].ENV_DATA["cluster_path"], + config.clusters[get_active_acm_index()].RUN.get("kubeconfig_location"), + ) + drpc_obj.cluster_kubeconfig = acm_cluster_kubeconfig + + # Get lastGroupSyncTime before failover + drpc_data = drpc_obj.get() + last_group_sync_time = drpc_data.get("status").get("lastGroupSyncTime") + logger.info( + f"The value of lastGroupSyncTime before failover is {last_group_sync_time}." + ) + + # Verify lastGroupSyncTime before failover + time_format = "%Y-%m-%dT%H:%M:%SZ" + last_group_sync_time_formatted = datetime.strptime( + last_group_sync_time, time_format + ) + current_time = datetime.strptime( + datetime.utcnow().strftime(time_format), time_format + ) + time_since_last_sync = ( + current_time - last_group_sync_time_formatted + ).total_seconds() / 60 + logger.info( + f"Before failover - Time in minutes since the last sync {time_since_last_sync}" + ) + assert ( + time_since_last_sync < 2 * scheduling_interval + ), "Before failover - Time since last sync is two times greater than the scheduling interval." + logger.info("Verified lastGroupSyncTime before failover.") + if config.RUN.get("rdr_failover_via_ui"): logger.info("Start the process of Failover from ACM UI") config.switch_acm_ctx() @@ -201,6 +244,46 @@ def test_failover_and_relocate( logger.info(f"Waiting for {wait_time} minutes to run IOs") sleep(wait_time * 60) + # Get lastGroupSyncTime after failover. + # The parameter lastGroupSyncTime may not be present for some time after failover. + for drpc_data in TimeoutSampler(300, 5, drpc_obj.get): + post_failover_last_group_sync_time = drpc_data.get("status").get( + "lastGroupSyncTime" + ) + if post_failover_last_group_sync_time: + logger.info("After failover - Obtained lastGroupSyncTime.") + # Adding an additional check to make sure that the old value is not populated again. + if post_failover_last_group_sync_time != last_group_sync_time: + logger.info( + "After failover - Verified: lastGroupSyncTime after failover is different from initial value." + ) + break + logger.info( + "The value of lastGroupSyncTime in drpc is not updated after failover. Retrying." + ) + logger.info( + f"The value of lastGroupSyncTime after failover is {post_failover_last_group_sync_time}." + ) + + # Verify lastGroupSyncTime after failover + time_format = "%Y-%m-%dT%H:%M:%SZ" + post_failover_last_group_sync_time_formatted = datetime.strptime( + post_failover_last_group_sync_time, time_format + ) + current_time = datetime.strptime( + datetime.utcnow().strftime(time_format), time_format + ) + time_since_last_sync = ( + current_time - post_failover_last_group_sync_time_formatted + ).total_seconds() / 60 + logger.info( + f"After failover - Time in minutes since the last sync is {time_since_last_sync}" + ) + assert ( + time_since_last_sync < 3 * scheduling_interval + ), "After failover - Time since last sync is three times greater than the scheduling interval." + logger.info("Verified lastGroupSyncTime after failover.") + # Relocate action if config.RUN.get("rdr_relocate_via_ui"): logger.info("Start the process of Relocate from ACM UI") @@ -249,3 +332,49 @@ def test_failover_and_relocate( ) # TODO: Add data integrity checks + + # Get lastGroupSyncTime after relocate. The parameter lastGroupSyncTime may not be present in drpc yaml for + # some time after relocate. So the wait time given is more than the scheduling interval. + for drpc_data in TimeoutSampler( + (scheduling_interval * 60) + 300, 15, drpc_obj.get + ): + post_relocate_last_group_sync_time = drpc_data.get("status").get( + "lastGroupSyncTime" + ) + if post_relocate_last_group_sync_time: + logger.info("After relocate - Obtained lastGroupSyncTime.") + # Adding an additional check to make sure that the old value is not populated again. + if ( + post_relocate_last_group_sync_time + != post_failover_last_group_sync_time + ): + logger.info( + "After relocate - Verified: lastGroupSyncTime after relocate is different from the previous " + "value." + ) + break + logger.info( + "The value of lastGroupSyncTime in drpc is not updated after relocate. Retrying." + ) + logger.info( + f"The value of lastGroupSyncTime after relocate is {post_relocate_last_group_sync_time}." + ) + + # Verify lastGroupSyncTime after relocate + time_format = "%Y-%m-%dT%H:%M:%SZ" + post_relocate_last_group_sync_time_formatted = datetime.strptime( + post_relocate_last_group_sync_time, time_format + ) + current_time = datetime.strptime( + datetime.utcnow().strftime(time_format), time_format + ) + time_since_last_sync = ( + current_time - post_relocate_last_group_sync_time_formatted + ).total_seconds() / 60 + logger.info( + f"After relocate - Time in minutes since the last sync is {time_since_last_sync}" + ) + assert ( + time_since_last_sync < 3 * scheduling_interval + ), "After relocate - Time since last sync is three times greater than the scheduling interval." + logger.info("Verified lastGroupSyncTime after relocate.")