Test case OCS-4761 (#8633)

* Test case OCS-4761 Signed-off-by: Jilju Joy <[email protected]>
red-hat-storage · Oct 23, 2023 · 14cda0c · 14cda0c
1 parent 4388a4d
commit 14cda0c
Showing 1 changed file with 130 additions and 1 deletion.
diff --git a/tests/disaster-recovery/regional-dr/test_failover_and_relocate.py b/tests/disaster-recovery/regional-dr/test_failover_and_relocate.py
@@ -1,4 +1,6 @@
 import logging
+import os
+from datetime import datetime
 from time import sleep
 
 import pytest
@@ -15,9 +17,11 @@
 from ocs_ci.ocs import constants
 from ocs_ci.ocs.acm.acm import AcmAddClusters
 from ocs_ci.ocs.node import wait_for_nodes_status, get_node_objs
+from ocs_ci.ocs.resources.drpc import DRPC
 from ocs_ci.ocs.resources.pod import wait_for_pods_to_be_running
+from ocs_ci.ocs.utils import get_active_acm_index
 from ocs_ci.utility import version
-from ocs_ci.utility.utils import ceph_health_check
+from ocs_ci.utility.utils import ceph_health_check, TimeoutSampler
 
 logger = logging.getLogger(__name__)
 
@@ -88,6 +92,7 @@ def test_failover_and_relocate(
 
         This test is also compatible to be run from ACM UI,
         pass the yaml conf/ocsci/dr_ui.yaml to trigger it.
+        The value of lastGroupSyncTime is verified in each stage.
 
         """
         if config.RUN.get("rdr_failover_via_ui"):
@@ -101,8 +106,13 @@ def test_failover_and_relocate(
         acm_obj = AcmAddClusters()
         if workload_type == constants.SUBSCRIPTION:
             rdr_workload = dr_workload(num_of_subscription=1)[0]
+            drpc_obj = DRPC(namespace=rdr_workload.workload_namespace)
         else:
             rdr_workload = dr_workload(num_of_subscription=0, num_of_appset=1)[0]
+            drpc_obj = DRPC(
+                namespace=constants.GITOPS_CLUSTER_NAMESPACE,
+                resource_name=f"{rdr_workload.appset_placement_name}-drpc",
+            )
 
         primary_cluster_name = dr_helpers.get_current_primary_cluster_name(
             rdr_workload.workload_namespace, workload_type
@@ -121,6 +131,39 @@ def test_failover_and_relocate(
         logger.info(f"Waiting for {wait_time} minutes to run IOs")
         sleep(wait_time * 60)
 
+        # Set cluster_kubeconfig value for 'drpc_obj' object to fetch the details without switching the cluster context
+        acm_cluster_kubeconfig = os.path.join(
+            config.clusters[get_active_acm_index()].ENV_DATA["cluster_path"],
+            config.clusters[get_active_acm_index()].RUN.get("kubeconfig_location"),
+        )
+        drpc_obj.cluster_kubeconfig = acm_cluster_kubeconfig
+
+        # Get lastGroupSyncTime before failover
+        drpc_data = drpc_obj.get()
+        last_group_sync_time = drpc_data.get("status").get("lastGroupSyncTime")
+        logger.info(
+            f"The value of lastGroupSyncTime before failover is {last_group_sync_time}."
+        )
+
+        # Verify lastGroupSyncTime before failover
+        time_format = "%Y-%m-%dT%H:%M:%SZ"
+        last_group_sync_time_formatted = datetime.strptime(
+            last_group_sync_time, time_format
+        )
+        current_time = datetime.strptime(
+            datetime.utcnow().strftime(time_format), time_format
+        )
+        time_since_last_sync = (
+            current_time - last_group_sync_time_formatted
+        ).total_seconds() / 60
+        logger.info(
+            f"Before failover - Time in minutes since the last sync {time_since_last_sync}"
+        )
+        assert (
+            time_since_last_sync < 2 * scheduling_interval
+        ), "Before failover - Time since last sync is two times greater than the scheduling interval."
+        logger.info("Verified lastGroupSyncTime before failover.")
+
         if config.RUN.get("rdr_failover_via_ui"):
             logger.info("Start the process of Failover from ACM UI")
             config.switch_acm_ctx()
@@ -201,6 +244,46 @@ def test_failover_and_relocate(
         logger.info(f"Waiting for {wait_time} minutes to run IOs")
         sleep(wait_time * 60)
 
+        # Get lastGroupSyncTime after failover.
+        # The parameter lastGroupSyncTime may not be present for some time after failover.
+        for drpc_data in TimeoutSampler(300, 5, drpc_obj.get):
+            post_failover_last_group_sync_time = drpc_data.get("status").get(
+                "lastGroupSyncTime"
+            )
+            if post_failover_last_group_sync_time:
+                logger.info("After failover - Obtained lastGroupSyncTime.")
+                # Adding an additional check to make sure that the old value is not populated again.
+                if post_failover_last_group_sync_time != last_group_sync_time:
+                    logger.info(
+                        "After failover - Verified: lastGroupSyncTime after failover is different from initial value."
+                    )
+                    break
+            logger.info(
+                "The value of lastGroupSyncTime in drpc is not updated after failover. Retrying."
+            )
+        logger.info(
+            f"The value of lastGroupSyncTime after failover is {post_failover_last_group_sync_time}."
+        )
+
+        # Verify lastGroupSyncTime after failover
+        time_format = "%Y-%m-%dT%H:%M:%SZ"
+        post_failover_last_group_sync_time_formatted = datetime.strptime(
+            post_failover_last_group_sync_time, time_format
+        )
+        current_time = datetime.strptime(
+            datetime.utcnow().strftime(time_format), time_format
+        )
+        time_since_last_sync = (
+            current_time - post_failover_last_group_sync_time_formatted
+        ).total_seconds() / 60
+        logger.info(
+            f"After failover - Time in minutes since the last sync is {time_since_last_sync}"
+        )
+        assert (
+            time_since_last_sync < 3 * scheduling_interval
+        ), "After failover - Time since last sync is three times greater than the scheduling interval."
+        logger.info("Verified lastGroupSyncTime after failover.")
+
         # Relocate action
         if config.RUN.get("rdr_relocate_via_ui"):
             logger.info("Start the process of Relocate from ACM UI")
@@ -249,3 +332,49 @@ def test_failover_and_relocate(
             )
 
         # TODO: Add data integrity checks
+
+        # Get lastGroupSyncTime after relocate. The parameter lastGroupSyncTime may not be present in drpc yaml for
+        # some time after relocate. So the wait time given is more than the scheduling interval.
+        for drpc_data in TimeoutSampler(
+            (scheduling_interval * 60) + 300, 15, drpc_obj.get
+        ):
+            post_relocate_last_group_sync_time = drpc_data.get("status").get(
+                "lastGroupSyncTime"
+            )
+            if post_relocate_last_group_sync_time:
+                logger.info("After relocate - Obtained lastGroupSyncTime.")
+                # Adding an additional check to make sure that the old value is not populated again.
+                if (
+                    post_relocate_last_group_sync_time
+                    != post_failover_last_group_sync_time
+                ):
+                    logger.info(
+                        "After relocate - Verified: lastGroupSyncTime after relocate is different from the previous "
+                        "value."
+                    )
+                    break
+            logger.info(
+                "The value of lastGroupSyncTime in drpc is not updated after relocate. Retrying."
+            )
+        logger.info(
+            f"The value of lastGroupSyncTime after relocate is {post_relocate_last_group_sync_time}."
+        )
+
+        # Verify lastGroupSyncTime after relocate
+        time_format = "%Y-%m-%dT%H:%M:%SZ"
+        post_relocate_last_group_sync_time_formatted = datetime.strptime(
+            post_relocate_last_group_sync_time, time_format
+        )
+        current_time = datetime.strptime(
+            datetime.utcnow().strftime(time_format), time_format
+        )
+        time_since_last_sync = (
+            current_time - post_relocate_last_group_sync_time_formatted
+        ).total_seconds() / 60
+        logger.info(
+            f"After relocate - Time in minutes since the last sync is {time_since_last_sync}"
+        )
+        assert (
+            time_since_last_sync < 3 * scheduling_interval
+        ), "After relocate - Time since last sync is three times greater than the scheduling interval."
+        logger.info("Verified lastGroupSyncTime after relocate.")