Skip to content

Commit

Permalink
Test case OCS-4761 (#8633)
Browse files Browse the repository at this point in the history
* Test case OCS-4761

Signed-off-by: Jilju Joy <[email protected]>
  • Loading branch information
jilju authored Oct 23, 2023
1 parent 4388a4d commit 14cda0c
Showing 1 changed file with 130 additions and 1 deletion.
131 changes: 130 additions & 1 deletion tests/disaster-recovery/regional-dr/test_failover_and_relocate.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import logging
import os
from datetime import datetime
from time import sleep

import pytest
Expand All @@ -15,9 +17,11 @@
from ocs_ci.ocs import constants
from ocs_ci.ocs.acm.acm import AcmAddClusters
from ocs_ci.ocs.node import wait_for_nodes_status, get_node_objs
from ocs_ci.ocs.resources.drpc import DRPC
from ocs_ci.ocs.resources.pod import wait_for_pods_to_be_running
from ocs_ci.ocs.utils import get_active_acm_index
from ocs_ci.utility import version
from ocs_ci.utility.utils import ceph_health_check
from ocs_ci.utility.utils import ceph_health_check, TimeoutSampler

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -88,6 +92,7 @@ def test_failover_and_relocate(
This test is also compatible to be run from ACM UI,
pass the yaml conf/ocsci/dr_ui.yaml to trigger it.
The value of lastGroupSyncTime is verified in each stage.
"""
if config.RUN.get("rdr_failover_via_ui"):
Expand All @@ -101,8 +106,13 @@ def test_failover_and_relocate(
acm_obj = AcmAddClusters()
if workload_type == constants.SUBSCRIPTION:
rdr_workload = dr_workload(num_of_subscription=1)[0]
drpc_obj = DRPC(namespace=rdr_workload.workload_namespace)
else:
rdr_workload = dr_workload(num_of_subscription=0, num_of_appset=1)[0]
drpc_obj = DRPC(
namespace=constants.GITOPS_CLUSTER_NAMESPACE,
resource_name=f"{rdr_workload.appset_placement_name}-drpc",
)

primary_cluster_name = dr_helpers.get_current_primary_cluster_name(
rdr_workload.workload_namespace, workload_type
Expand All @@ -121,6 +131,39 @@ def test_failover_and_relocate(
logger.info(f"Waiting for {wait_time} minutes to run IOs")
sleep(wait_time * 60)

# Set cluster_kubeconfig value for 'drpc_obj' object to fetch the details without switching the cluster context
acm_cluster_kubeconfig = os.path.join(
config.clusters[get_active_acm_index()].ENV_DATA["cluster_path"],
config.clusters[get_active_acm_index()].RUN.get("kubeconfig_location"),
)
drpc_obj.cluster_kubeconfig = acm_cluster_kubeconfig

# Get lastGroupSyncTime before failover
drpc_data = drpc_obj.get()
last_group_sync_time = drpc_data.get("status").get("lastGroupSyncTime")
logger.info(
f"The value of lastGroupSyncTime before failover is {last_group_sync_time}."
)

# Verify lastGroupSyncTime before failover
time_format = "%Y-%m-%dT%H:%M:%SZ"
last_group_sync_time_formatted = datetime.strptime(
last_group_sync_time, time_format
)
current_time = datetime.strptime(
datetime.utcnow().strftime(time_format), time_format
)
time_since_last_sync = (
current_time - last_group_sync_time_formatted
).total_seconds() / 60
logger.info(
f"Before failover - Time in minutes since the last sync {time_since_last_sync}"
)
assert (
time_since_last_sync < 2 * scheduling_interval
), "Before failover - Time since last sync is two times greater than the scheduling interval."
logger.info("Verified lastGroupSyncTime before failover.")

if config.RUN.get("rdr_failover_via_ui"):
logger.info("Start the process of Failover from ACM UI")
config.switch_acm_ctx()
Expand Down Expand Up @@ -201,6 +244,46 @@ def test_failover_and_relocate(
logger.info(f"Waiting for {wait_time} minutes to run IOs")
sleep(wait_time * 60)

# Get lastGroupSyncTime after failover.
# The parameter lastGroupSyncTime may not be present for some time after failover.
for drpc_data in TimeoutSampler(300, 5, drpc_obj.get):
post_failover_last_group_sync_time = drpc_data.get("status").get(
"lastGroupSyncTime"
)
if post_failover_last_group_sync_time:
logger.info("After failover - Obtained lastGroupSyncTime.")
# Adding an additional check to make sure that the old value is not populated again.
if post_failover_last_group_sync_time != last_group_sync_time:
logger.info(
"After failover - Verified: lastGroupSyncTime after failover is different from initial value."
)
break
logger.info(
"The value of lastGroupSyncTime in drpc is not updated after failover. Retrying."
)
logger.info(
f"The value of lastGroupSyncTime after failover is {post_failover_last_group_sync_time}."
)

# Verify lastGroupSyncTime after failover
time_format = "%Y-%m-%dT%H:%M:%SZ"
post_failover_last_group_sync_time_formatted = datetime.strptime(
post_failover_last_group_sync_time, time_format
)
current_time = datetime.strptime(
datetime.utcnow().strftime(time_format), time_format
)
time_since_last_sync = (
current_time - post_failover_last_group_sync_time_formatted
).total_seconds() / 60
logger.info(
f"After failover - Time in minutes since the last sync is {time_since_last_sync}"
)
assert (
time_since_last_sync < 3 * scheduling_interval
), "After failover - Time since last sync is three times greater than the scheduling interval."
logger.info("Verified lastGroupSyncTime after failover.")

# Relocate action
if config.RUN.get("rdr_relocate_via_ui"):
logger.info("Start the process of Relocate from ACM UI")
Expand Down Expand Up @@ -249,3 +332,49 @@ def test_failover_and_relocate(
)

# TODO: Add data integrity checks

# Get lastGroupSyncTime after relocate. The parameter lastGroupSyncTime may not be present in drpc yaml for
# some time after relocate. So the wait time given is more than the scheduling interval.
for drpc_data in TimeoutSampler(
(scheduling_interval * 60) + 300, 15, drpc_obj.get
):
post_relocate_last_group_sync_time = drpc_data.get("status").get(
"lastGroupSyncTime"
)
if post_relocate_last_group_sync_time:
logger.info("After relocate - Obtained lastGroupSyncTime.")
# Adding an additional check to make sure that the old value is not populated again.
if (
post_relocate_last_group_sync_time
!= post_failover_last_group_sync_time
):
logger.info(
"After relocate - Verified: lastGroupSyncTime after relocate is different from the previous "
"value."
)
break
logger.info(
"The value of lastGroupSyncTime in drpc is not updated after relocate. Retrying."
)
logger.info(
f"The value of lastGroupSyncTime after relocate is {post_relocate_last_group_sync_time}."
)

# Verify lastGroupSyncTime after relocate
time_format = "%Y-%m-%dT%H:%M:%SZ"
post_relocate_last_group_sync_time_formatted = datetime.strptime(
post_relocate_last_group_sync_time, time_format
)
current_time = datetime.strptime(
datetime.utcnow().strftime(time_format), time_format
)
time_since_last_sync = (
current_time - post_relocate_last_group_sync_time_formatted
).total_seconds() / 60
logger.info(
f"After relocate - Time in minutes since the last sync is {time_since_last_sync}"
)
assert (
time_since_last_sync < 3 * scheduling_interval
), "After relocate - Time since last sync is three times greater than the scheduling interval."
logger.info("Verified lastGroupSyncTime after relocate.")

0 comments on commit 14cda0c

Please sign in to comment.