From 6bd7075e3bf3c9c1e39806985c7e24fadac2afca Mon Sep 17 00:00:00 2001
From: Akarsha Rai <akrai@redhat.com>
Date: Thu, 29 Feb 2024 10:12:27 +0530
Subject: [PATCH] Test to verify the hub restore when active hub down following
 failover and relocate of app (#9094)

* Test failover and relocate all apps in a single zone after a zone disruption

Signed-off-by: Akarsha-rai <akrai@redhat.com>
---
 ocs_ci/deployment/deployment.py               |  84 ++---
 ocs_ci/helpers/dr_helpers.py                  | 135 ++++++--
 ocs_ci/ocs/acm/acm.py                         |   5 +-
 ocs_ci/ocs/constants.py                       |   2 +
 ocs_ci/ocs/dr/dr_workload.py                  |  12 +-
 ocs_ci/ocs/node.py                            |  31 +-
 ocs_ci/ocs/utils.py                           |  12 +
 .../ocs-deployment/multicluster/restore.yaml  |  10 +
 tests/conftest.py                             |  10 +-
 .../test_active_hub_down_and_restore.py       | 314 ++++++++++++++++++
 10 files changed, 537 insertions(+), 78 deletions(-)
 create mode 100644 ocs_ci/templates/ocs-deployment/multicluster/restore.yaml
 create mode 100644 tests/functional/disaster-recovery/metro-dr/test_active_hub_down_and_restore.py

diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py
index 5b57ad50c84..e0fd71ec034 100644
--- a/ocs_ci/deployment/deployment.py
+++ b/ocs_ci/deployment/deployment.py
@@ -220,6 +220,52 @@ def do_deploy_submariner(self):
             submariner = Submariner()
             submariner.deploy()
 
+    def deploy_gitops_operator(self, switch_ctx=None):
+        """
+        Deploy GitOps operator
+
+        Args:
+            switch_ctx (int): The cluster index by the cluster name
+
+        """
+        config.switch_ctx(switch_ctx) if switch_ctx else config.switch_acm_ctx()
+
+        logger.info("Creating GitOps Operator Subscription")
+        gitops_subscription_yaml_data = templating.load_yaml(
+            constants.GITOPS_SUBSCRIPTION_YAML
+        )
+        package_manifest = PackageManifest(
+            resource_name=constants.GITOPS_OPERATOR_NAME,
+        )
+        gitops_subscription_yaml_data["spec"][
+            "startingCSV"
+        ] = package_manifest.get_current_csv(
+            channel="latest", csv_pattern=constants.GITOPS_OPERATOR_NAME
+        )
+
+        gitops_subscription_manifest = tempfile.NamedTemporaryFile(
+            mode="w+", prefix="gitops_subscription_manifest", delete=False
+        )
+        templating.dump_data_to_temp_yaml(
+            gitops_subscription_yaml_data, gitops_subscription_manifest.name
+        )
+        run_cmd(f"oc create -f {gitops_subscription_manifest.name}")
+
+        self.wait_for_subscription(
+            constants.GITOPS_OPERATOR_NAME, namespace=constants.OPENSHIFT_OPERATORS
+        )
+        logger.info("Sleeping for 90 seconds after subscribing to GitOps Operator")
+        time.sleep(90)
+        subscriptions = ocp.OCP(
+            kind=constants.SUBSCRIPTION_WITH_ACM,
+            resource_name=constants.GITOPS_OPERATOR_NAME,
+            namespace=constants.OPENSHIFT_OPERATORS,
+        ).get()
+        gitops_csv_name = subscriptions["status"]["currentCSV"]
+        csv = CSV(resource_name=gitops_csv_name, namespace=constants.GITOPS_NAMESPACE)
+        csv.wait_for_phase("Succeeded", timeout=720)
+        logger.info("GitOps Operator Deployment Succeeded")
+
     def do_gitops_deploy(self):
         """
         Deploy GitOps operator
@@ -233,43 +279,7 @@ def do_gitops_deploy(self):
         # Multicluster operations
         if config.multicluster:
             config.switch_acm_ctx()
-            logger.info("Creating GitOps Operator Subscription")
-            gitops_subscription_yaml_data = templating.load_yaml(
-                constants.GITOPS_SUBSCRIPTION_YAML
-            )
-            package_manifest = PackageManifest(
-                resource_name=constants.GITOPS_OPERATOR_NAME,
-            )
-            gitops_subscription_yaml_data["spec"][
-                "startingCSV"
-            ] = package_manifest.get_current_csv(
-                channel="latest", csv_pattern=constants.GITOPS_OPERATOR_NAME
-            )
-
-            gitops_subscription_manifest = tempfile.NamedTemporaryFile(
-                mode="w+", prefix="gitops_subscription_manifest", delete=False
-            )
-            templating.dump_data_to_temp_yaml(
-                gitops_subscription_yaml_data, gitops_subscription_manifest.name
-            )
-            run_cmd(f"oc create -f {gitops_subscription_manifest.name}")
-
-            self.wait_for_subscription(
-                constants.GITOPS_OPERATOR_NAME, namespace=constants.OPENSHIFT_OPERATORS
-            )
-            logger.info("Sleeping for 90 seconds after subscribing to GitOps Operator")
-            time.sleep(90)
-            subscriptions = ocp.OCP(
-                kind=constants.SUBSCRIPTION_WITH_ACM,
-                resource_name=constants.GITOPS_OPERATOR_NAME,
-                namespace=constants.OPENSHIFT_OPERATORS,
-            ).get()
-            gitops_csv_name = subscriptions["status"]["currentCSV"]
-            csv = CSV(
-                resource_name=gitops_csv_name, namespace=constants.GITOPS_NAMESPACE
-            )
-            csv.wait_for_phase("Succeeded", timeout=720)
-            logger.info("GitOps Operator Deployment Succeeded")
+            self.deploy_gitops_operator()
 
             logger.info("Creating GitOps CLuster Resource")
             run_cmd(f"oc create -f {constants.GITOPS_CLUSTER_YAML}")
diff --git a/ocs_ci/helpers/dr_helpers.py b/ocs_ci/helpers/dr_helpers.py
index 87dff81d684..b164917493b 100644
--- a/ocs_ci/helpers/dr_helpers.py
+++ b/ocs_ci/helpers/dr_helpers.py
@@ -7,7 +7,7 @@
 
 from ocs_ci.framework import config
 from ocs_ci.ocs import constants, ocp
-from ocs_ci.ocs.exceptions import TimeoutExpiredError
+from ocs_ci.ocs.exceptions import TimeoutExpiredError, UnexpectedBehaviour
 from ocs_ci.ocs.resources.drpc import DRPC
 from ocs_ci.ocs.resources.pod import get_all_pods
 from ocs_ci.ocs.resources.pv import get_all_pvs
@@ -17,8 +17,10 @@
     get_non_acm_cluster_config,
     get_active_acm_index,
     get_primary_cluster_config,
+    get_passive_acm_index,
 )
 from ocs_ci.utility import version, templating
+from ocs_ci.utility.retry import retry
 from ocs_ci.utility.utils import TimeoutSampler, CommandFailed, run_cmd
 
 logger = logging.getLogger(__name__)
@@ -131,6 +133,7 @@ def failover(
     namespace,
     workload_type=constants.SUBSCRIPTION,
     workload_placement_name=None,
+    switch_ctx=None,
 ):
     """
     Initiates Failover action to the specified cluster
@@ -140,10 +143,11 @@ def failover(
         namespace (str): Namespace where workload is running
         workload_type (str): Type of workload, i.e., Subscription or ApplicationSet
         workload_placement_name (str): Placement name
+        switch_ctx (int): The cluster index by the cluster name
 
     """
     restore_index = config.cur_index
-    config.switch_acm_ctx()
+    config.switch_ctx(switch_ctx) if switch_ctx else config.switch_acm_ctx()
     failover_params = f'{{"spec":{{"action":"{constants.ACTION_FAILOVER}","failoverCluster":"{failover_cluster}"}}}}'
     if workload_type == constants.APPLICATION_SET:
         namespace = constants.GITOPS_CLUSTER_NAMESPACE
@@ -171,6 +175,7 @@ def relocate(
     namespace,
     workload_type=constants.SUBSCRIPTION,
     workload_placement_name=None,
+    switch_ctx=None,
 ):
     """
     Initiates Relocate action to the specified cluster
@@ -180,10 +185,11 @@ def relocate(
         namespace (str): Namespace where workload is running
         workload_type (str): Type of workload, i.e., Subscription or ApplicationSet
         workload_placement_name (str): Placement name
+        switch_ctx (int): The cluster index by the cluster name
 
     """
     restore_index = config.cur_index
-    config.switch_acm_ctx()
+    config.switch_ctx(switch_ctx) if switch_ctx else config.switch_acm_ctx()
     relocate_params = f'{{"spec":{{"action":"{constants.ACTION_RELOCATE}","preferredCluster":"{preferred_cluster}"}}}}'
     if workload_type == constants.APPLICATION_SET:
         namespace = constants.GITOPS_CLUSTER_NAMESPACE
@@ -835,13 +841,14 @@ def get_managed_cluster_node_ips():
     return cluster_data
 
 
-def enable_fence(drcluster_name):
+def enable_fence(drcluster_name, switch_ctx=None):
     """
     Once the managed cluster is fenced, all communication
     from applications to the ODF external storage cluster will fail
 
     Args:
         drcluster_name (str): Name of the DRcluster which needs to be fenced
+        switch_ctx (int): The cluster index by the cluster name
 
     """
 
@@ -849,7 +856,7 @@ def enable_fence(drcluster_name):
         f"Edit the DRCluster resource for {drcluster_name} cluster on the Hub cluster"
     )
     restore_index = config.cur_index
-    config.switch_acm_ctx()
+    config.switch_ctx(switch_ctx) if switch_ctx else config.switch_acm_ctx()
     fence_params = f'{{"spec":{{"clusterFence":"{constants.ACTION_FENCE}"}}}}'
     drcluster_obj = ocp.OCP(resource_name=drcluster_name, kind=constants.DRCLUSTER)
     if not drcluster_obj.patch(params=fence_params, format_type="merge"):
@@ -888,13 +895,14 @@ def configure_drcluster_for_fencing():
     config.switch_ctx(old_ctx)
 
 
-def enable_unfence(drcluster_name):
+def enable_unfence(drcluster_name, switch_ctx=None):
     """
     The OpenShift cluster to be Unfenced is the one where applications
     are not currently running and the cluster that was Fenced earlier.
 
     Args:
         drcluster_name (str): Name of the DRcluster which needs to be fenced
+        switch_ctx (int): The cluster index by the cluster name
 
     """
 
@@ -902,7 +910,7 @@ def enable_unfence(drcluster_name):
         f"Edit the DRCluster resource for {drcluster_name} cluster on the Hub cluster"
     )
     restore_index = config.cur_index
-    config.switch_acm_ctx()
+    config.switch_ctx(switch_ctx) if switch_ctx else config.switch_acm_ctx()
     unfence_params = f'{{"spec":{{"clusterFence":"{constants.ACTION_UNFENCE}"}}}}'
     drcluster_obj = ocp.OCP(resource_name=drcluster_name, kind=constants.DRCLUSTER)
     if not drcluster_obj.patch(params=unfence_params, format_type="merge"):
@@ -911,13 +919,14 @@ def enable_unfence(drcluster_name):
     config.switch_ctx(restore_index)
 
 
-def fence_state(drcluster_name, fence_state):
+def fence_state(drcluster_name, fence_state, switch_ctx=None):
     """
     Sets the specified clusterFence state
 
     Args:
        drcluster_name (str): Name of the DRcluster which needs to be fenced
        fence_state (str): Specify the clusterfence state either constants.ACTION_UNFENCE and ACTION_FENCE
+       switch_ctx (int): The cluster index by the cluster name
 
     """
 
@@ -925,7 +934,7 @@ def fence_state(drcluster_name, fence_state):
         f"Edit the DRCluster {drcluster_name} cluster clusterfence state {fence_state}  "
     )
     restore_index = config.cur_index
-    config.switch_acm_ctx()
+    config.switch_ctx(switch_ctx) if switch_ctx else config.switch_acm_ctx()
     params = f'{{"spec":{{"clusterFence":"{fence_state}"}}}}'
     drcluster_obj = ocp.OCP(resource_name=drcluster_name, kind=constants.DRCLUSTER)
     if not drcluster_obj.patch(params=params, format_type="merge"):
@@ -936,25 +945,51 @@ def fence_state(drcluster_name, fence_state):
     config.switch_ctx(restore_index)
 
 
-def get_fence_state(drcluster_name):
+def get_fence_state(drcluster_name, switch_ctx=None):
     """
     Returns the clusterfence state of given drcluster
 
     Args:
         drcluster_name (str): Name of the DRcluster
+        switch_ctx (int): The cluster index by the cluster name
 
     Returns:
         state (str): If drcluster are fenced: Fenced or Unfenced, else None if not defined
 
     """
     restore_index = config.cur_index
-    config.switch_acm_ctx()
+    config.switch_ctx(switch_ctx) if switch_ctx else config.switch_acm_ctx()
     drcluster_obj = ocp.OCP(resource_name=drcluster_name, kind=constants.DRCLUSTER)
-    state = drcluster_obj.get().get("spec").get("clusterFence")
+    state = drcluster_obj.get().get("status").get("phase")
     config.switch_ctx(restore_index)
     return state
 
 
+@retry(UnexpectedBehaviour, tries=40, delay=5, backoff=5)
+def verify_fence_state(drcluster_name, state, switch_ctx=None):
+    """
+    Verify the specified drcluster is in expected state
+
+    Args:
+        drcluster_name (str): Name of the DRcluster
+        state (str): The fence state it is either constants.ACTION_FENCE or constants.ACTION_UNFENCE
+        switch_ctx (int): The cluster index by the cluster name
+
+    Raises:
+        Raises exception Unexpected-behaviour if the specified drcluster is not in the given state condition
+    """
+    sample = get_fence_state(drcluster_name=drcluster_name, switch_ctx=switch_ctx)
+    if sample == state:
+        logger.info(f"Primary managed cluster {drcluster_name} reached {state} state")
+    else:
+        logger.error(
+            f"Primary managed cluster {drcluster_name} not reached {state} state"
+        )
+        raise UnexpectedBehaviour(
+            f"Primary managed cluster {drcluster_name} not reached {state} state"
+        )
+
+
 def create_backup_schedule():
     """
     Create backupschedule resource only on active hub
@@ -971,25 +1006,79 @@ def create_backup_schedule():
     config.switch_ctx(old_ctx)
 
 
-def gracefully_reboot_ocp_nodes(
-    namespace, drcluster_name, workload_type=constants.SUBSCRIPTION
-):
+def gracefully_reboot_ocp_nodes(drcluster_name):
     """
     Gracefully reboot OpenShift Container Platform
     nodes which was fenced before
 
     Args:
-        namespace (str): Name of the namespace
         drcluster_name (str): Name of the drcluster which needs to be rebooted
-        workload_type (str): Type of workload. ie Subscription(Default) or ApplicationSet
 
     """
+    config.switch_to_cluster_by_name(drcluster_name)
+    gracefully_reboot_nodes()
 
-    primary_cluster_name = get_current_primary_cluster_name(
-        namespace=namespace, workload_type=workload_type
+
+def restore_backup():
+    """
+    Restores the backup in new hub and make it as active
+
+    """
+
+    restore_index = config.cur_index
+    config.switch_ctx(get_passive_acm_index())
+    backup_schedule = templating.load_yaml(constants.DR_RESTORE_YAML)
+    backup_schedule_yaml = tempfile.NamedTemporaryFile(
+        mode="w+", prefix="restore", delete=False
     )
-    if primary_cluster_name == drcluster_name:
-        set_current_primary_cluster_context(namespace, workload_type)
+    templating.dump_data_to_temp_yaml(backup_schedule, backup_schedule_yaml.name)
+    run_cmd(f"oc create -f {backup_schedule_yaml.name}")
+    config.switch_ctx(restore_index)
+
+
+@retry(UnexpectedBehaviour, tries=40, delay=5, backoff=5)
+def verify_restore_is_completed():
+    """
+    Function to verify restore is completed or finished
+
+    """
+    restore_index = config.cur_index
+    config.switch_ctx(get_passive_acm_index())
+    restore_obj = ocp.OCP(
+        kind=constants.ACM_HUB_RESTORE, namespace=constants.ACM_HUB_BACKUP_NAMESPACE
+    )
+    cmd_output = restore_obj.exec_oc_cmd(command="get restore -oyaml")
+    status = cmd_output["items"][0]["status"]["phase"]
+    if status == "Finished":
+        logger.info("Restore completed successfully")
     else:
-        set_current_secondary_cluster_context(namespace, workload_type)
-    gracefully_reboot_nodes()
+        logger.error(f"Restore failed with some errors: {cmd_output}")
+        raise UnexpectedBehaviour("Restore failed with some errors")
+    config.switch_ctx(restore_index)
+
+
+@retry(UnexpectedBehaviour, tries=60, delay=5, backoff=2)
+def verify_drpolicy_cli(switch_ctx=None):
+    """
+    Function to verify DRPolicy status
+
+    Returns:
+        bool: True if the status is in succeed state, else raise exception
+        switch_ctx (int): The cluster index by the cluster name
+
+    """
+
+    restore_index = config.cur_index
+    config.switch_ctx(switch_ctx) if switch_ctx else config.switch_acm_ctx()
+    drpolicy_obj = ocp.OCP(kind=constants.DRPOLICY)
+    status = drpolicy_obj.get().get("items")[0].get("status").get("conditions")[0]
+    if status.get("reason") == "Succeeded":
+        logger.info("DRPolicy validation succeeded")
+        config.switch_ctx(restore_index)
+        return True
+    else:
+        logger.warning(f"DRPolicy is not in succeeded or validated state: {status}")
+        config.switch_ctx(restore_index)
+        raise UnexpectedBehaviour(
+            f"DRPolicy is not in succeeded or validated state: {status}"
+        )
diff --git a/ocs_ci/ocs/acm/acm.py b/ocs_ci/ocs/acm/acm.py
index 565ca908a7f..516002218f5 100644
--- a/ocs_ci/ocs/acm/acm.py
+++ b/ocs_ci/ocs/acm/acm.py
@@ -466,12 +466,13 @@ def verify_running_acm():
     log.info(f"ACM Version Detected: {acm_version}")
 
 
-def validate_cluster_import(cluster_name):
+def validate_cluster_import(cluster_name, switch_ctx=None):
     """
     Validate ACM status of managed cluster
 
     Args:
         cluster_name: (str): cluster name to validate
+        switch_ctx (int): The cluster index by the cluster name
 
     Assert:
         All conditions of selected managed cluster should be "True", Failed otherwise
@@ -479,7 +480,7 @@ def validate_cluster_import(cluster_name):
     Return:
         True, if not AssertionError
     """
-    config.switch_ctx(0)
+    config.switch_ctx(switch_ctx) if switch_ctx else config.switch_ctx(0)
     oc_obj = OCP(kind=ACM_MANAGED_CLUSTERS)
     conditions = oc_obj.exec_oc_cmd(
         f"get managedclusters {cluster_name} -ojsonpath='{{.status.conditions}}'"
diff --git a/ocs_ci/ocs/constants.py b/ocs_ci/ocs/constants.py
index e8a817fef4f..db415179f28 100644
--- a/ocs_ci/ocs/constants.py
+++ b/ocs_ci/ocs/constants.py
@@ -917,6 +917,7 @@
 DR_RAMEN_HUB_OPERATOR_CONFIG = "ramen-hub-operator-config"
 DR_RAMEN_CLUSTER_OPERATOR_CONFIG = "ramen-dr-cluster-operator-config"
 ODF_MULTICLUSTER_ORCHESTRATOR_CONTROLLER_MANAGER = "odfmo-controller-manager"
+DR_RESTORE_YAML = os.path.join(TEMPLATE_MULTICLUSTER_DIR, "restore.yaml")
 RDR_MODE = "regional-dr"
 MDR_MODE = "metro-dr"
 MDR_DR_POLICY = "odr-policy-mdr"
@@ -2271,6 +2272,7 @@
     TEMPLATE_DIR, "gitops-deployment", "subscription.yaml"
 )
 ACM_HUB_BACKUP_NAMESPACE = "open-cluster-management-backup"
+ACM_HUB_RESTORE = "Restore"
 
 # Vault encryption KMS types for PV encryption
 VAULT_TOKEN = "vaulttokens"
diff --git a/ocs_ci/ocs/dr/dr_workload.py b/ocs_ci/ocs/dr/dr_workload.py
index 69a183eaeb4..613b7fa43c1 100644
--- a/ocs_ci/ocs/dr/dr_workload.py
+++ b/ocs_ci/ocs/dr/dr_workload.py
@@ -213,13 +213,14 @@ def verify_workload_deployment(self):
             self.workload_pvc_count, self.workload_pod_count, self.workload_namespace
         )
 
-    def delete_workload(self, force=False, rbd_name="rbd"):
+    def delete_workload(self, force=False, rbd_name="rbd", switch_ctx=None):
         """
         Delete busybox workload
 
         Args:
             force (bool): If True, force remove the stuck resources, default False
             rbd_name (str): Name of the pool
+            switch_ctx (int): The cluster index by the cluster name
 
         Raises:
             ResourceNotDeleted: In case workload resources not deleted properly
@@ -238,7 +239,7 @@ def delete_workload(self, force=False, rbd_name="rbd"):
         )
 
         try:
-            config.switch_acm_ctx()
+            config.switch_ctx(switch_ctx) if switch_ctx else config.switch_acm_ctx()
             run_cmd(
                 f"oc delete -k {self.workload_subscription_dir}/{self.workload_name}"
             )
@@ -288,7 +289,7 @@ def delete_workload(self, force=False, rbd_name="rbd"):
             raise ResourceNotDeleted(err_msg)
 
         finally:
-            config.switch_acm_ctx()
+            config.switch_ctx(switch_ctx) if switch_ctx else config.switch_acm_ctx()
             run_cmd(f"oc delete -k {self.workload_subscription_dir}")
 
 
@@ -444,13 +445,14 @@ def check_pod_pvc_status(self, skip_replication_resources=False):
             skip_replication_resources=skip_replication_resources,
         )
 
-    def delete_workload(self, force=False, rbd_name="rbd"):
+    def delete_workload(self, force=False, rbd_name="rbd", switch_ctx=None):
         """
         Delete busybox workload
 
         Args:
             force (bool): If True, force remove the stuck resources, default False
             rbd_name (str): Name of the pool, default "rbd"
+            switch_ctx (int): The cluster index by the cluster name
 
         Raises:
             ResourceNotDeleted: In case workload resources not deleted properly
@@ -458,7 +460,7 @@ def delete_workload(self, force=False, rbd_name="rbd"):
         """
         image_uuids = dr_helpers.get_image_uuids(self.workload_namespace)
         try:
-            config.switch_acm_ctx()
+            config.switch_ctx(switch_ctx) if switch_ctx else config.switch_acm_ctx()
             run_cmd(cmd=f"oc delete -f {self.appset_yaml_file}", timeout=900)
 
             for cluster in get_non_acm_cluster_config():
diff --git a/ocs_ci/ocs/node.py b/ocs_ci/ocs/node.py
index a339062bebe..8c549befe04 100644
--- a/ocs_ci/ocs/node.py
+++ b/ocs_ci/ocs/node.py
@@ -231,13 +231,14 @@ def schedule_nodes(node_names):
     wait_for_nodes_status(node_names)
 
 
-def drain_nodes(node_names, timeout=1800):
+def drain_nodes(node_names, timeout=1800, disable_eviction=False):
     """
     Drain nodes
 
     Args:
         node_names (list): The names of the nodes
         timeout (int): Time to wait for the drain nodes 'oc' command
+        disable_eviction (bool): On True will delete pod that is protected by PDB, False by default
 
     Raises:
         TimeoutExpired: in case drain command fails to complete in time
@@ -253,11 +254,18 @@ def drain_nodes(node_names, timeout=1800):
             >= version.VERSION_4_7
             else "--delete-local-data"
         )
-        ocp.exec_oc_cmd(
-            f"adm drain {node_names_str} --force=true --ignore-daemonsets "
-            f"{drain_deletion_flag}",
-            timeout=timeout,
-        )
+        if disable_eviction:
+            ocp.exec_oc_cmd(
+                f"adm drain {node_names_str} --force=true --ignore-daemonsets "
+                f"{drain_deletion_flag} --disable-eviction",
+                timeout=timeout,
+            )
+        else:
+            ocp.exec_oc_cmd(
+                f"adm drain {node_names_str} --force=true --ignore-daemonsets "
+                f"{drain_deletion_flag}",
+                timeout=timeout,
+            )
     except TimeoutExpired:
         ct_pod = pod.get_ceph_tools_pod()
         ceph_status = ct_pod.exec_cmd_on_pod("ceph status", out_yaml_format=False)
@@ -2756,11 +2764,14 @@ def generate_nodes_for_provider_worker_node_tests():
     return generated_nodes
 
 
-def gracefully_reboot_nodes():
+def gracefully_reboot_nodes(disable_eviction=False):
     """
 
     Gracefully reboot OpenShift Container Platform nodes
 
+    Args:
+        disable_eviction (bool): On True will delete pod that is protected by PDB, False by default
+
     """
     from ocs_ci.ocs import platform_nodes
 
@@ -2771,12 +2782,14 @@ def gracefully_reboot_nodes():
     for node in node_objs:
         node_name = node.name
         unschedule_nodes([node_name])
-        drain_nodes([node_name])
+        drain_nodes(node_names=[node_name], disable_eviction=disable_eviction)
         nodes.restart_nodes([node], wait=False)
         log.info(f"Waiting for {waiting_time} seconds")
         time.sleep(waiting_time)
         schedule_nodes([node_name])
-    wait_for_nodes_status(status=constants.NODE_READY, timeout=180)
+        wait_for_nodes_status(
+            node_names=[node], status=constants.NODE_READY, timeout=1800
+        )
 
 
 def get_num_of_racks():
diff --git a/ocs_ci/ocs/utils.py b/ocs_ci/ocs/utils.py
index 839cd31ffbc..d9231d7bb45 100644
--- a/ocs_ci/ocs/utils.py
+++ b/ocs_ci/ocs/utils.py
@@ -1537,6 +1537,18 @@ def get_active_acm_index():
             return cluster.MULTICLUSTER["multicluster_index"]
 
 
+def get_passive_acm_index():
+    """
+    Get index of passive acm cluster
+    """
+    for cluster in ocsci_config.clusters:
+        if (
+            cluster.MULTICLUSTER["acm_cluster"]
+            and not cluster.MULTICLUSTER["active_acm_cluster"]
+        ):
+            return cluster.MULTICLUSTER["multicluster_index"]
+
+
 def get_primary_cluster_config():
     """
     Get the primary cluster config object in a DR scenario
diff --git a/ocs_ci/templates/ocs-deployment/multicluster/restore.yaml b/ocs_ci/templates/ocs-deployment/multicluster/restore.yaml
new file mode 100644
index 00000000000..a4d21c1fa6e
--- /dev/null
+++ b/ocs_ci/templates/ocs-deployment/multicluster/restore.yaml
@@ -0,0 +1,10 @@
+apiVersion: cluster.open-cluster-management.io/v1beta1
+kind: Restore
+metadata:
+  name: restore-acm
+  namespace: open-cluster-management-backup
+spec:
+  cleanupBeforeRestore: None
+  veleroManagedClustersBackupName: latest
+  veleroCredentialsBackupName: latest
+  veleroResourcesBackupName: latest
diff --git a/tests/conftest.py b/tests/conftest.py
index fd60070e55f..d541b740bb4 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -6429,9 +6429,13 @@ def dr_workload(request):
 
     """
     instances = []
+    ctx = []
 
     def factory(
-        num_of_subscription=1, num_of_appset=0, pvc_interface=constants.CEPHBLOCKPOOL
+        num_of_subscription=1,
+        num_of_appset=0,
+        pvc_interface=constants.CEPHBLOCKPOOL,
+        switch_ctx=None,
     ):
         """
         Args:
@@ -6439,6 +6443,7 @@ def factory(
             num_of_appset (int): Number of ApplicationSet type workload to be created
             pvc_interface (str): 'CephBlockPool' or 'CephFileSystem'.
                 This decides whether a RBD based or CephFS based resource is created. RBD is default.
+            switch_ctx (int): The cluster index by the cluster name
 
         Raises:
             ResourceNotDeleted: In case workload resources not deleted properly
@@ -6482,13 +6487,14 @@ def factory(
                 dr_helpers.wait_for_mirroring_status_ok(
                     replaying_images=total_pvc_count
                 )
+        ctx.append(switch_ctx)
         return instances
 
     def teardown():
         failed_to_delete = False
         for instance in instances:
             try:
-                instance.delete_workload(force=True)
+                instance.delete_workload(switch_ctx=ctx[0], force=True)
             except ResourceNotDeleted:
                 failed_to_delete = True
 
diff --git a/tests/functional/disaster-recovery/metro-dr/test_active_hub_down_and_restore.py b/tests/functional/disaster-recovery/metro-dr/test_active_hub_down_and_restore.py
new file mode 100644
index 00000000000..ee3774ba80a
--- /dev/null
+++ b/tests/functional/disaster-recovery/metro-dr/test_active_hub_down_and_restore.py
@@ -0,0 +1,314 @@
+import logging
+import time
+from concurrent.futures import ThreadPoolExecutor
+
+import pytest
+
+from ocs_ci.framework.pytest_customization.marks import tier4a, turquoise_squad
+from ocs_ci.framework import config
+from ocs_ci.ocs.acm.acm import AcmAddClusters, validate_cluster_import
+from ocs_ci.ocs.dr.dr_workload import validate_data_integrity
+from ocs_ci.ocs import constants
+from ocs_ci.deployment.deployment import Deployment
+from ocs_ci.ocs.node import get_node_objs
+from ocs_ci.helpers.dr_helpers import (
+    enable_fence,
+    enable_unfence,
+    get_fence_state,
+    failover,
+    relocate,
+    restore_backup,
+    create_backup_schedule,
+    set_current_primary_cluster_context,
+    get_current_primary_cluster_name,
+    get_current_secondary_cluster_name,
+    get_passive_acm_index,
+    wait_for_all_resources_creation,
+    wait_for_all_resources_deletion,
+    gracefully_reboot_ocp_nodes,
+    verify_drpolicy_cli,
+    verify_restore_is_completed,
+    verify_fence_state,
+)
+from ocs_ci.helpers.dr_helpers_ui import (
+    check_cluster_status_on_acm_console,
+    failover_relocate_ui,
+)
+from ocs_ci.ocs.exceptions import UnexpectedBehaviour
+from ocs_ci.ocs.resources.pod import wait_for_pods_to_be_running
+from ocs_ci.ocs.utils import get_active_acm_index
+from ocs_ci.utility import version
+from ocs_ci.utility.utils import TimeoutSampler
+
+
+logger = logging.getLogger(__name__)
+
+
+@tier4a
+@turquoise_squad
+class TestActiveHubDownAndRestore:
+    """
+    Test failover and relocate all apps when active hub down and restored
+    """
+
+    @pytest.fixture(autouse=True)
+    def teardown(self, request, dr_workload):
+        """
+        If fenced, unfence the cluster and reboot nodes
+        """
+
+        def finalizer():
+            if (
+                self.primary_cluster_name
+                and get_fence_state(
+                    drcluster_name=self.primary_cluster_name,
+                    switch_ctx=get_passive_acm_index(),
+                )
+                == "Fenced"
+            ):
+                enable_unfence(
+                    drcluster_name=self.primary_cluster_name,
+                    switch_ctx=get_passive_acm_index(),
+                )
+                gracefully_reboot_ocp_nodes(self.primary_cluster_name)
+
+        request.addfinalizer(finalizer)
+
+    @pytest.mark.polarion_id("OCS-XXXX")
+    def test_application_failover_and_relocate(
+        self, setup_acm_ui, nodes_multicluster, dr_workload
+    ):
+
+        """
+        Tests to verify failover and relocate all appswhen active hub down and restored
+
+        """
+
+        if config.RUN.get("mdr_failover_via_ui"):
+            ocs_version = version.get_semantic_ocs_version_from_config()
+            if ocs_version <= version.VERSION_4_12:
+                logger.error(
+                    "ODF/ACM version isn't supported for Failover/Relocate operation"
+                )
+                raise NotImplementedError
+
+        acm_obj = AcmAddClusters()
+        # Deploy Subscription based application
+        sub = dr_workload(num_of_subscription=1, switch_ctx=get_passive_acm_index())[0]
+        self.namespace = sub.workload_namespace
+        self.workload_type = sub.workload_type
+
+        # Deploy AppSet based application
+        appset = dr_workload(
+            num_of_subscription=0, num_of_appset=1, switch_ctx=get_passive_acm_index()
+        )[0]
+
+        # Workloads list
+        workloads = [sub, appset]
+
+        # Create application on Primary managed cluster
+        set_current_primary_cluster_context(self.namespace)
+        self.primary_cluster_name = get_current_primary_cluster_name(
+            namespace=self.namespace
+        )
+        secondary_cluster_name = get_current_secondary_cluster_name(self.namespace)
+
+        # Create backup-schedule on active hub
+        create_backup_schedule()
+        # ToDo: To verify all the backups are taken Need to find a exact way to verify backups
+        wait_time = 300
+        logger.info(f"Wait {wait_time} until backup is taken ")
+        time.sleep(wait_time)
+
+        # Install gitops operator on passive hub
+        dep_obj = Deployment()
+        dep_obj.deploy_gitops_operator(switch_ctx=get_passive_acm_index())
+
+        # Get the active hub nodes
+        config.switch_ctx(get_active_acm_index())
+        active_hub_index = config.cur_index
+        active_hub_cluster_node_objs = get_node_objs()
+
+        # Shutdown active hub nodes
+        logger.info("Shutting down all the nodes of active hub")
+        nodes_multicluster[active_hub_index].stop_nodes(active_hub_cluster_node_objs)
+        logger.info(
+            "All nodes of active hub zone are powered off, "
+            f"wait {wait_time} seconds before restoring in passive hub"
+        )
+
+        # Restore new hub
+        restore_backup()
+        logger.info(f"Wait {wait_time} until restores are taken ")
+        time.sleep(wait_time)
+        # Verify the restore is completed
+        verify_restore_is_completed()
+
+        # Validate the clusters are imported
+        clusters = [self.primary_cluster_name, secondary_cluster_name]
+        for cluster in clusters:
+            for sample in TimeoutSampler(
+                timeout=1800,
+                sleep=60,
+                func=validate_cluster_import,
+                cluster_name=cluster,
+                switch_ctx=get_passive_acm_index(),
+            ):
+                if sample:
+                    logger.info(
+                        f"Cluster: {cluster} successfully imported post hub recovery"
+                    )
+                    # Validate klusterlet addons are running on managed cluster
+                    config.switch_to_cluster_by_name(cluster)
+                    wait_for_pods_to_be_running(
+                        namespace=constants.ACM_ADDONS_NAMESPACE, timeout=300, sleep=15
+                    )
+                    break
+                else:
+                    logger.error(
+                        f"import of cluster: {cluster} failed post hub recovery"
+                    )
+                    raise UnexpectedBehaviour(
+                        f"import of cluster: {cluster} failed post hub recovery"
+                    )
+
+        # Wait or verify the drpolicy is in validated state
+        verify_drpolicy_cli(switch_ctx=get_passive_acm_index())
+
+        # ToDo: Deploy application in both managed cluster and
+        #  to verify the applications are present in secondary cluster
+
+        # Fenced the primary managed cluster
+        enable_fence(
+            drcluster_name=self.primary_cluster_name,
+            switch_ctx=get_passive_acm_index(),
+        )
+        # Verify the primary managed cluster is in Fenced state
+        verify_fence_state(
+            drcluster_name=self.primary_cluster_name,
+            state=constants.ACTION_FENCE,
+            switch_ctx=get_passive_acm_index(),
+        )
+
+        # Application Failover to Secondary managed cluster
+        if (
+            config.RUN.get("mdr_failover_via_ui")
+            and self.workload_type == constants.SUBSCRIPTION
+        ):
+            logger.info("Start the process of Failover from ACM UI")
+            config.switch_ctx(get_passive_acm_index())
+            failover_relocate_ui(
+                acm_obj,
+                workload_to_move=f"{workloads[0].workload_name}-1",
+                policy_name=workloads[0].dr_policy_name,
+                failover_or_preferred_cluster=secondary_cluster_name,
+            )
+        else:
+            failover_results = []
+            with ThreadPoolExecutor() as executor:
+                for wl in workloads:
+                    failover_results.append(
+                        executor.submit(
+                            failover,
+                            failover_cluster=secondary_cluster_name,
+                            namespace=wl.workload_namespace,
+                            switch_ctx=get_passive_acm_index(),
+                        )
+                    )
+                    time.sleep(5)
+
+            # Wait for failover results
+            for fl in failover_results:
+                fl.result()
+
+        # Verify resources creation on secondary cluster (failoverCluster)
+        config.switch_to_cluster_by_name(secondary_cluster_name)
+        for wl in workloads:
+            wait_for_all_resources_creation(
+                wl.workload_pvc_count,
+                wl.workload_pod_count,
+                wl.workload_namespace,
+            )
+
+        # Verify application are deleted from old cluster
+        config.switch_to_cluster_by_name(self.primary_cluster_name)
+        for wl in workloads:
+            wait_for_all_resources_deletion(wl.workload_namespace)
+
+        # Validate data integrity
+        config.switch_to_cluster_by_name(secondary_cluster_name)
+        for wl in workloads:
+            validate_data_integrity(wl.workload_namespace)
+
+        # Unfenced the managed cluster which was Fenced earlier
+        enable_unfence(
+            drcluster_name=self.primary_cluster_name,
+            switch_ctx=get_passive_acm_index(),
+        )
+        # Verify the primary managed cluster is in Unfenced state
+        verify_fence_state(
+            drcluster_name=self.primary_cluster_name,
+            state=constants.ACTION_UNFENCE,
+            switch_ctx=get_passive_acm_index(),
+        )
+
+        # Reboot the nodes which unfenced
+        gracefully_reboot_ocp_nodes(self.primary_cluster_name)
+
+        # Application Relocate to Primary managed cluster
+        secondary_cluster_name = get_current_secondary_cluster_name(self.namespace)
+        if (
+            config.RUN.get("mdr_relocate_via_ui")
+            and self.workload_type == constants.SUBSCRIPTION
+        ):
+            logger.info("Start the process of Relocate from ACM UI")
+            # Relocate via ACM UI
+            config.switch_ctx(get_passive_acm_index())
+            check_cluster_status_on_acm_console(acm_obj)
+            failover_relocate_ui(
+                acm_obj,
+                workload_to_move=f"{workloads[0].workload_name}-1",
+                policy_name=workloads[0].dr_policy_name,
+                failover_or_preferred_cluster=secondary_cluster_name,
+                action=constants.ACTION_RELOCATE,
+            )
+        else:
+            relocate_results = []
+            with ThreadPoolExecutor() as executor:
+                for wl in workloads:
+                    relocate_results.append(
+                        executor.submit(
+                            relocate,
+                            preferred_cluster=secondary_cluster_name,
+                            namespace=wl.workload_namespace,
+                            switch_ctx=get_passive_acm_index(),
+                        )
+                    )
+                    time.sleep(5)
+
+            # Wait for relocate results
+            for rl in relocate_results:
+                rl.result()
+
+        # Verify resources deletion from previous primary or current secondary cluster
+        config.switch_to_cluster_by_name(secondary_cluster_name)
+        for wl in workloads:
+            wait_for_all_resources_creation(
+                wl.workload_pvc_count,
+                wl.workload_pod_count,
+                wl.workload_namespace,
+            )
+
+        # Verify resources creation on preferredCluster
+        config.switch_to_cluster_by_name(self.primary_cluster_name)
+        for wl in workloads:
+            wait_for_all_resources_creation(
+                wl.workload_pvc_count,
+                wl.workload_pod_count,
+                wl.workload_namespace,
+            )
+
+        # Validate data integrity
+        config.switch_to_cluster_by_name(self.primary_cluster_name)
+        for wl in workloads:
+            validate_data_integrity(wl.workload_namespace)