diff --git a/ocs_ci/helpers/stretchcluster_helper.py b/ocs_ci/helpers/stretchcluster_helper.py
index 3e745511f70..4dede44b723 100644
--- a/ocs_ci/helpers/stretchcluster_helper.py
+++ b/ocs_ci/helpers/stretchcluster_helper.py
@@ -141,7 +141,6 @@ def check_errors_regex(desc_out, err_msgs):
                         replica_count=4,
                         namespace=constants.STRETCH_CLUSTER_NAMESPACE,
                     )
-                    break
 
                 elif (
                     app_label.split("=")[1] in str(pod.get_labels())
@@ -164,7 +163,6 @@ def check_errors_regex(desc_out, err_msgs):
                         count=4,
                         namespace=constants.STRETCH_CLUSTER_NAMESPACE,
                     )
-                    break
 
                 elif (
                     app_label.split("=")[1] in str(pod.get_labels())
@@ -187,7 +185,7 @@ def check_errors_regex(desc_out, err_msgs):
                         replica_count=2,
                         namespace=constants.STRETCH_CLUSTER_NAMESPACE,
                     )
-                    break
+                break
 
     # fetch workload pod details now and make sure all of them are running
     logger.info("Checking if the logwriter pods are up and running now")
@@ -261,6 +259,7 @@ def check_errors_regex(desc_out, err_msgs):
 
         if check_errors_regex(desc_out, error_messages) and not restarted:
 
+            logger.info(f"{pod.name} description:\n{desc_out}")
             pod_node = get_pod_node(pod)
             logger.info(
                 f"We need to restart the all the nodes in the zone of node {pod_node.name}"
@@ -295,7 +294,11 @@ def check_errors_regex(desc_out, err_msgs):
             "because of known errors and no nodes restart was done."
             "Please check..."
         )
-        raise Exception
+        raise Exception(
+            "Raising exception because none of the pods are failing"
+            "because of known errors and no nodes restart was done."
+            "Please check..."
+        )
 
     # fetch workload pod details now and make sure all of them are running
     logger.info("Checking if the logwriter pods are up and running now")
@@ -319,4 +322,4 @@ def recover_from_ceph_stuck(sc_obj):
     """
 
     sc_obj.reset_conn_score()
-    return sc_obj.check_ceph_accessibility(timeout=30)
+    return sc_obj.check_ceph_accessibility(timeout=120)
diff --git a/ocs_ci/ocs/cnv/virtual_machine.py b/ocs_ci/ocs/cnv/virtual_machine.py
index bcf003f65a1..044dd4a4aba 100644
--- a/ocs_ci/ocs/cnv/virtual_machine.py
+++ b/ocs_ci/ocs/cnv/virtual_machine.py
@@ -646,6 +646,8 @@ def delete(self):
         """
         Delete the VirtualMachine
         """
+        if self.ready():
+            self.stop()
         if self.secret_obj:
             self.secret_obj.delete()
         self.vm_ocp_obj.delete(resource_name=self._vm_name)
diff --git a/ocs_ci/ocs/resources/stretchcluster.py b/ocs_ci/ocs/resources/stretchcluster.py
index e1100cfedbf..5a204e7ce94 100644
--- a/ocs_ci/ocs/resources/stretchcluster.py
+++ b/ocs_ci/ocs/resources/stretchcluster.py
@@ -284,6 +284,7 @@ def check_for_write_pause(self, label, start_time, end_time):
                         failed += 1
                     elif failed <= max_fail_expected:
                         failed += 1
+                        break
                     else:
                         raise
 
@@ -468,20 +469,23 @@ def check_ceph_accessibility(self, timeout, delay=60, grace=180):
             ceph_out = ceph_tools_pod.exec_sh_cmd_on_pod(
                 command=command, timeout=timeout + grace
             )
-            logger.info(ceph_out)
+            logger.info(f"Ceph status output:\n{ceph_out}")
             if "monclient(hunting): authenticate timed out" in ceph_out:
                 logger.warning("Ceph was hung for sometime.")
                 return False
             return True
         except Exception as err:
-            if "TimeoutExpired" in err.args[0]:
+            if (
+                "TimeoutExpired" in err.args[0]
+                or "monclient(hunting): authenticate timed out" in err.args[0]
+            ):
                 logger.error("Ceph status check got timed out. maybe ceph is hung.")
                 return False
             elif (
                 "connect: no route to host" in err.args[0]
                 or "error dialing backend" in err.args[0]
             ):
-                ceph_tools_pod.delete(wait=False)
+                ceph_tools_pod.delete(force=True)
             raise
 
     def get_out_of_quorum_nodes(self):
diff --git a/tests/functional/disaster-recovery/sc_arbiter/test_mon_osd_failures.py b/tests/functional/disaster-recovery/sc_arbiter/test_mon_osd_failures.py
index 4adb635addc..08ff761150b 100644
--- a/tests/functional/disaster-recovery/sc_arbiter/test_mon_osd_failures.py
+++ b/tests/functional/disaster-recovery/sc_arbiter/test_mon_osd_failures.py
@@ -20,6 +20,8 @@
     wait_for_pods_to_be_in_statuses,
     get_deployment_name,
     wait_for_pods_by_label_count,
+    get_all_pods,
+    get_pod_node,
 )
 from ocs_ci.ocs.resources.pvc import get_pvc_objs
 from ocs_ci.ocs.resources.stretchcluster import StretchCluster
@@ -27,6 +29,8 @@
 
 logger = logging.getLogger(__name__)
 
+CNV_WORKLOAD_NAMESPACE = "namespace-cnv-workload"
+
 
 @pytest.fixture(scope="class")
 def setup_logwriter_workloads(
@@ -141,7 +145,9 @@ def finalizer():
 def setup_cnv_workload(request, cnv_workload_class, setup_cnv):
 
     logger.info("Setting up CNV workload and creating some data")
-    vm_obj = cnv_workload_class(volume_interface=constants.VM_VOLUME_PVC)[0]
+    vm_obj = cnv_workload_class(
+        volume_interface=constants.VM_VOLUME_PVC, namespace=CNV_WORKLOAD_NAMESPACE
+    )[0]
     vm_obj.run_ssh_cmd(command="dd if=/dev/zero of=/file_1.txt bs=1024 count=102400")
     md5sum_before = cal_md5sum_vm(vm_obj, file_path="/file_1.txt")
 
@@ -198,8 +204,14 @@ def test_single_mon_failures(self):
         logger.info("testing single mon failures scenario")
         sc_obj = StretchCluster()
 
-        # get mon-pod of a single zone
-        mon_pods_in_zone = sc_obj.get_mon_pods_in_a_zone("data-1")
+        # get mon-pod of a zone where the cnv workloads
+        # are running
+        pod_objs = get_all_pods(namespace=CNV_WORKLOAD_NAMESPACE)
+        assert len(pod_objs) != 0, "No vmi pod instances are running"
+        node_obj = get_pod_node(pod_objs[0])
+        mon_pods_in_zone = sc_obj.get_mon_pods_in_a_zone(
+            node_obj.get()["metadata"]["labels"][constants.ZONE_LABEL]
+        )
         mon_pod_to_fail = random.choice(mon_pods_in_zone).name
 
         # get the deployment of the mon-pod
@@ -267,8 +279,14 @@ def test_single_osd_failure(self):
         logger.info("testing single osd failure scenarios")
         sc_obj = StretchCluster()
 
-        # get osd-pod of a single zone
-        osd_pods_in_zone = sc_obj.get_osd_pods_in_a_zone("data-1")
+        # get osd-pod of a zone where the cnv
+        # workloads are running
+        pod_objs = get_all_pods(namespace=CNV_WORKLOAD_NAMESPACE)
+        assert len(pod_objs) != 0, "No vmi pod instances are running"
+        node_obj = get_pod_node(pod_objs[0])
+        osd_pods_in_zone = sc_obj.get_osd_pods_in_a_zone(
+            node_obj.get()["metadata"]["labels"][constants.ZONE_LABEL]
+        )
         osd_pod_to_fail = random.choice(osd_pods_in_zone).name
 
         # get the deployment of the osd-pod
diff --git a/tests/functional/disaster-recovery/sc_arbiter/test_netsplit.py b/tests/functional/disaster-recovery/sc_arbiter/test_netsplit.py
index 5cbf9563c0b..c2c77b92562 100644
--- a/tests/functional/disaster-recovery/sc_arbiter/test_netsplit.py
+++ b/tests/functional/disaster-recovery/sc_arbiter/test_netsplit.py
@@ -15,7 +15,7 @@
 )
 
 from ocs_ci.ocs.resources.stretchcluster import StretchCluster
-from ocs_ci.ocs.exceptions import CephHealthException
+from ocs_ci.ocs.exceptions import CephHealthException, CommandFailed
 
 from ocs_ci.ocs import constants
 from ocs_ci.ocs.node import get_all_nodes
@@ -26,6 +26,7 @@
     wait_for_pods_to_be_in_statuses,
     get_ceph_tools_pod,
 )
+from ocs_ci.utility.retry import retry
 
 logger = logging.getLogger(__name__)
 
@@ -185,29 +186,37 @@ def test_netsplit(
         logger.info(f"Ended netsplit at {end_time}")
 
         # check vm data written before the failure for integrity
-        logger.info("Waiting for VM SSH connectivity!")
-        vm_obj.wait_for_ssh_connectivity()
-        md5sum_after = cal_md5sum_vm(vm_obj, file_path="/file_1.txt")
-        assert (
-            md5sum_before == md5sum_after
-        ), "Data integrity of the file inside VM is not maintained during the failure"
-        logger.info(
-            "Data integrity of the file inside VM is maintained during the failure"
-        )
+        @retry(CommandFailed, tries=10, delay=10, backoff=1)
+        def _validate_vm_workload():
+            """
+            Validate vm workload post recovery
 
-        # check if new data can be created
-        vm_obj.run_ssh_cmd(
-            command="dd if=/dev/zero of=/file_2.txt bs=1024 count=103600"
-        )
-        logger.info("Successfully created new data inside VM")
+            """
+            logger.info("Waiting for VM SSH connectivity!")
+            vm_obj.wait_for_ssh_connectivity()
+            md5sum_after = cal_md5sum_vm(vm_obj, file_path="/file_1.txt")
+            assert (
+                md5sum_before == md5sum_after
+            ), "Data integrity of the file inside VM is not maintained during the failure"
+            logger.info(
+                "Data integrity of the file inside VM is maintained during the failure"
+            )
+
+            # check if new data can be created
+            vm_obj.run_ssh_cmd(
+                command="dd if=/dev/zero of=/file_2.txt bs=1024 count=103600"
+            )
+            logger.info("Successfully created new data inside VM")
+
+            # check if the data can be copied back to local machine
+            vm_obj.scp_from_vm(local_path="/tmp", vm_src_path="/file_1.txt")
+            logger.info("VM data is successfully copied back to local machine")
 
-        # check if the data can be copied back to local machine
-        vm_obj.scp_from_vm(local_path="/tmp", vm_src_path="/file_1.txt")
-        logger.info("VM data is successfully copied back to local machine")
+            # stop the VM
+            vm_obj.stop()
+            logger.info("Stoped the VM successfully")
 
-        # stop the VM
-        vm_obj.stop()
-        logger.info("Stoped the VM successfully")
+        _validate_vm_workload()
 
         # get all the running logwriter pods
         sc_obj.get_logwriter_reader_pods(