diff --git a/conf/ocsci/dr_workload.yaml b/conf/ocsci/dr_workload.yaml index acbe594705a..a4b5154c1ec 100644 --- a/conf/ocsci/dr_workload.yaml +++ b/conf/ocsci/dr_workload.yaml @@ -83,4 +83,16 @@ ENV_DATA: dr_workload_app_pvc_selector: { 'appname': 'kubevirt' }, pod_count: 1, pvc_count: 1 }, ] + dr_workload_discovered_apps_rbd: [ + { name: "busybox-dict-1", workload_dir: "rdr/busybox/app-busybox-1/resources/deployment", + pod_count: 10, pvc_count: 10, + dr_workload_app_pod_selector_key: "workloadpattern", + dr_workload_app_pod_selector_value: "simple_io", + dr_workload_app_pvc_selector_key: "appname", + dr_workload_app_pvc_selector_value: "busybox_app1", + workload_namespace: "busybox-dict-1", + dr_workload_app_placement_name: "busybox-dict-1" + } + ] + # dr_policy_name: PLACEHOLDER diff --git a/ocs_ci/helpers/dr_helpers.py b/ocs_ci/helpers/dr_helpers.py index ee4b9d7d002..490d084e50c 100644 --- a/ocs_ci/helpers/dr_helpers.py +++ b/ocs_ci/helpers/dr_helpers.py @@ -41,13 +41,16 @@ logger = logging.getLogger(__name__) -def get_current_primary_cluster_name(namespace, workload_type=constants.SUBSCRIPTION): +def get_current_primary_cluster_name( + namespace, workload_type=constants.SUBSCRIPTION, discovered_apps=False +): """ Get current primary cluster name based on workload namespace Args: namespace (str): Name of the namespace workload_type (str): Type of workload, i.e., Subscription or ApplicationSet + discovered_apps (bool): If true then deployed workload is discovered_apps Returns: str: Current primary cluster name @@ -56,6 +59,8 @@ def get_current_primary_cluster_name(namespace, workload_type=constants.SUBSCRIP restore_index = config.cur_index if workload_type == constants.APPLICATION_SET: namespace = constants.GITOPS_CLUSTER_NAMESPACE + if discovered_apps: + namespace = constants.DR_OPS_NAMESAPCE drpc_data = DRPC(namespace=namespace).get() if drpc_data.get("spec").get("action") == constants.ACTION_FAILOVER: cluster_name = drpc_data["spec"]["failoverCluster"] @@ -65,13 +70,16 @@ def get_current_primary_cluster_name(namespace, workload_type=constants.SUBSCRIP return cluster_name -def get_current_secondary_cluster_name(namespace, workload_type=constants.SUBSCRIPTION): +def get_current_secondary_cluster_name( + namespace, workload_type=constants.SUBSCRIPTION, discovered_apps=False +): """ Get current secondary cluster name based on workload namespace Args: namespace (str): Name of the namespace workload_type (str): Type of workload, i.e., Subscription or ApplicationSet + discovered_apps (bool): If true then deployed workload is discovered_apps Returns: str: Current secondary cluster name @@ -80,6 +88,8 @@ def get_current_secondary_cluster_name(namespace, workload_type=constants.SUBSCR restore_index = config.cur_index if workload_type == constants.APPLICATION_SET: namespace = constants.GITOPS_CLUSTER_NAMESPACE + if discovered_apps: + namespace = constants.DR_OPS_NAMESAPCE primary_cluster_name = get_current_primary_cluster_name(namespace) drpolicy_data = DRPC(namespace=namespace).drpolicy_obj.get() config.switch_ctx(restore_index) @@ -122,13 +132,16 @@ def set_current_secondary_cluster_context( config.switch_to_cluster_by_name(cluster_name) -def get_scheduling_interval(namespace, workload_type=constants.SUBSCRIPTION): +def get_scheduling_interval( + namespace, workload_type=constants.SUBSCRIPTION, discovered_apps=False +): """ Get scheduling interval for the workload in the given namespace Args: namespace (str): Name of the namespace workload_type (str): Type of workload, i.e., Subscription or ApplicationSet + discovered_apps (bool): If true then deployed workload is discovered_apps Returns: int: scheduling interval value from DRPolicy @@ -137,6 +150,8 @@ def get_scheduling_interval(namespace, workload_type=constants.SUBSCRIPTION): restore_index = config.cur_index if workload_type == constants.APPLICATION_SET: namespace = constants.GITOPS_CLUSTER_NAMESPACE + if discovered_apps: + namespace = constants.DR_OPS_NAMESAPCE drpolicy_obj = DRPC(namespace=namespace).drpolicy_obj interval_value = int(drpolicy_obj.get()["spec"]["schedulingInterval"][:-1]) config.switch_ctx(restore_index) @@ -149,6 +164,8 @@ def failover( workload_type=constants.SUBSCRIPTION, workload_placement_name=None, switch_ctx=None, + discovered_apps=False, + old_primary=None, ): """ Initiates Failover action to the specified cluster @@ -159,6 +176,8 @@ def failover( workload_type (str): Type of workload, i.e., Subscription or ApplicationSet workload_placement_name (str): Placement name switch_ctx (int): The cluster index by the cluster name + discovered_apps (bool): True when cluster is failing over DiscoveredApps + old_primary (str): Name of cluster where workload were running """ restore_index = config.cur_index @@ -171,9 +190,16 @@ def failover( resource_name=f"{workload_placement_name}-drpc", switch_ctx=switch_ctx, ) + elif discovered_apps: + failover_params = ( + f'{{"spec":{{"action":"{constants.ACTION_FAILOVER}",' + f'"failoverCluster":"{failover_cluster}",' + f'"preferredCluster":"{old_primary}"}}}}' + ) + namespace = constants.DR_OPS_NAMESAPCE + drpc_obj = DRPC(namespace=namespace, resource_name=f"{workload_placement_name}") else: drpc_obj = DRPC(namespace=namespace, switch_ctx=switch_ctx) - drpc_obj.wait_for_peer_ready_status() logger.info(f"Initiating Failover action with failoverCluster:{failover_cluster}") assert drpc_obj.patch( @@ -183,6 +209,7 @@ def failover( logger.info( f"Wait for {constants.DRPC}: {drpc_obj.resource_name} to reach {constants.STATUS_FAILEDOVER} phase" ) + drpc_obj.wait_for_phase(constants.STATUS_FAILEDOVER) config.switch_ctx(restore_index) @@ -193,6 +220,9 @@ def relocate( workload_type=constants.SUBSCRIPTION, workload_placement_name=None, switch_ctx=None, + discovered_apps=False, + old_primary=None, + workload_instance=None, ): """ Initiates Relocate action to the specified cluster @@ -203,6 +233,10 @@ def relocate( workload_type (str): Type of workload, i.e., Subscription or ApplicationSet workload_placement_name (str): Placement name switch_ctx (int): The cluster index by the cluster name + discovered_apps (bool): If true then deployed workload is discovered_apps + old_primary (str): Name of cluster where workload were running + workload_instance (object): Discovered App instance to get namespace and dir location + """ restore_index = config.cur_index @@ -215,6 +249,14 @@ def relocate( resource_name=f"{workload_placement_name}-drpc", switch_ctx=switch_ctx, ) + elif discovered_apps: + relocate_params = ( + f'{{"spec":{{"action":"{constants.ACTION_RELOCATE}",' + f'"failoverCluster":"{old_primary}",' + f'"preferredCluster":"{preferred_cluster}"}}}}' + ) + namespace = constants.DR_OPS_NAMESAPCE + drpc_obj = DRPC(namespace=namespace, resource_name=f"{workload_placement_name}") else: drpc_obj = DRPC(namespace=namespace, switch_ctx=switch_ctx) drpc_obj.wait_for_peer_ready_status() @@ -226,7 +268,19 @@ def relocate( logger.info( f"Wait for {constants.DRPC}: {drpc_obj.resource_name} to reach {constants.STATUS_RELOCATED} phase" ) - drpc_obj.wait_for_phase(constants.STATUS_RELOCATED) + relocate_condition = constants.STATUS_RELOCATED + if discovered_apps: + relocate_condition = constants.STATUS_RELOCATING + drpc_obj.wait_for_phase(relocate_condition) + + if discovered_apps and workload_instance: + logger.info("Doing Cleanup Operations") + do_discovered_apps_cleanup( + drpc_name=workload_placement_name, + old_primary=old_primary, + workload_namespace=workload_instance.workload_namespace, + workload_dir=workload_instance.workload_dir, + ) config.switch_ctx(restore_index) @@ -487,7 +541,9 @@ def check_vrg_state(state, namespace): return False -def wait_for_replication_resources_creation(vr_count, namespace, timeout): +def wait_for_replication_resources_creation( + vr_count, namespace, timeout, discovered_apps=False +): """ Wait for replication resources to be created @@ -496,13 +552,18 @@ def wait_for_replication_resources_creation(vr_count, namespace, timeout): namespace (str): the namespace of the VR or ReplicationSource resources timeout (int): time in seconds to wait for VR or ReplicationSource resources to be created or reach expected state + discovered_apps (bool): If true then deployed workload is discovered_apps + Raises: TimeoutExpiredError: In case replication resources not created """ logger.info("Waiting for VRG to be created") + if discovered_apps: + vrg_namespace = constants.DR_OPS_NAMESAPCE + sample = TimeoutSampler( - timeout=timeout, sleep=5, func=check_vrg_existence, namespace=namespace + timeout=timeout, sleep=5, func=check_vrg_existence, namespace=vrg_namespace ) if not sample.wait_for_func_status(result=True): error_msg = "VRG resource is not created" @@ -516,7 +577,6 @@ def wait_for_replication_resources_creation(vr_count, namespace, timeout): else: resource_kind = constants.VOLUME_REPLICATION count_function = get_vr_count - if config.MULTICLUSTER["multicluster_mode"] != "metro-dr": logger.info(f"Waiting for {vr_count} {resource_kind}s to be created") sample = TimeoutSampler( @@ -549,7 +609,7 @@ def wait_for_replication_resources_creation(vr_count, namespace, timeout): sleep=5, func=check_vrg_state, state="primary", - namespace=namespace, + namespace=vrg_namespace, ) if not sample.wait_for_func_status(result=True): error_msg = "VRG hasn't reached expected state primary within the time limit." @@ -631,7 +691,12 @@ def wait_for_replication_resources_deletion(namespace, timeout, check_state=True def wait_for_all_resources_creation( - pvc_count, pod_count, namespace, timeout=900, skip_replication_resources=False + pvc_count, + pod_count, + namespace, + timeout=900, + skip_replication_resources=False, + discovered_apps=False, ): """ Wait for workload and replication resources to be created @@ -642,6 +707,8 @@ def wait_for_all_resources_creation( namespace (str): the namespace of the workload timeout (int): time in seconds to wait for resource creation skip_replication_resources (bool): if true vr status wont't be check + discovered_apps (bool): If true then deployed workload is discovered_apps + """ logger.info(f"Waiting for {pvc_count} PVCs to reach {constants.STATUS_BOUND} state") @@ -660,9 +727,10 @@ def wait_for_all_resources_creation( timeout=timeout, sleep=5, ) - if not skip_replication_resources: - wait_for_replication_resources_creation(pvc_count, namespace, timeout) + wait_for_replication_resources_creation( + pvc_count, namespace, timeout, discovered_apps + ) def wait_for_all_resources_deletion( @@ -1493,3 +1561,46 @@ def replace_cluster(workload, primary_cluster_name, secondary_cluster_name): # Configure DRClusters for fencing automation configure_drcluster_for_fencing() + + +def do_discovered_apps_cleanup( + drpc_name, old_primary, workload_namespace, workload_dir +): + """ + Function to clean up Resources + + Args: + drpc_name (str): Name of DRPC + old_primary (str): Name of old primary where cleanup will happen + workload_namespace (str): Workload namespace + workload_dir (str): Dir location of workload + """ + restore_index = config.cur_index + config.switch_acm_ctx() + drpc_obj = DRPC(namespace=constants.DR_OPS_NAMESAPCE, resource_name=drpc_name) + drpc_obj.wait_for_progression_status(status=constants.STATUS_WAITFORUSERTOCLEANUP) + config.switch_to_cluster_by_name(old_primary) + workload_path = constants.DR_WORKLOAD_REPO_BASE_DIR + "/" + workload_dir + run_cmd(f"oc delete -k {workload_path} -n {workload_namespace} --wait=false") + wait_for_all_resources_deletion(namespace=workload_namespace) + config.switch_acm_ctx() + drpc_obj.wait_for_progression_status(status=constants.STATUS_COMPLETED) + config.switch_ctx(restore_index) + + +def generate_kubeobject_capture_interval(): + """ + Generate KubeObject Capture Interval + + Returns: + int: capture interval value to be used + + """ + capture_interval = int(get_all_drpolicy()[0]["spec"]["schedulingInterval"][:-1]) + + if capture_interval <= 5 and capture_interval != 1: + return capture_interval - 1 + elif capture_interval > 6: + return 5 + else: + return capture_interval diff --git a/ocs_ci/ocs/constants.py b/ocs_ci/ocs/constants.py index f59562c814d..9c9300a6bfa 100644 --- a/ocs_ci/ocs/constants.py +++ b/ocs_ci/ocs/constants.py @@ -125,7 +125,11 @@ STATUS_FAILED = "Failed" STATUS_FAILEDOVER = "FailedOver" STATUS_RELOCATED = "Relocated" +STATUS_RELOCATING = "Relocating" STATUS_CONTAINER_STATUS_UNKNOWN = "ContainerStatusUnknown" +STATUS_WAITFORUSERTOCLEANUP = "WaitOnUserToCleanUp" +STATUS_POWERON = "ON" +STATUS_POWEROFF = "OFF" # NooBaa statuses BS_AUTH_FAILED = "AUTH_FAILED" @@ -2842,6 +2846,7 @@ # DR DRPC_PATH = os.path.join(TEMPLATE_DIR, "DR", "drpc.yaml") +PLACEMENT_PATH = os.path.join(TEMPLATE_DIR, "DR", "placement.yaml") CLUSTERROLEBINDING_APPSET_PULLMODEL_PATH = os.path.join( TEMPLATE_DIR, "DR", "clusterrolebinding_appset_pullmodel.yaml" ) @@ -2857,6 +2862,9 @@ GITOPS_CLUSTER_NAMESPACE = "openshift-gitops" APPLICATION_ARGOCD = "applications.argoproj.io" PLACEMENT_KIND = "placements.cluster.open-cluster-management.io" + +DISCOVERED_APPS = "DiscoveredApps" +DR_OPS_NAMESAPCE = "openshift-dr-ops" DPA_DISCOVERED_APPS_PATH = os.path.join(TEMPLATE_DIR, "DR", "dpa_discovered_apps.yaml") DISABLE_DR_EACH_APP = os.path.join(TEMPLATE_DIR, "DR", "disable_dr_each_app.sh") diff --git a/ocs_ci/ocs/dr/dr_workload.py b/ocs_ci/ocs/dr/dr_workload.py index 5b0d0620807..545a804d3d3 100644 --- a/ocs_ci/ocs/dr/dr_workload.py +++ b/ocs_ci/ocs/dr/dr_workload.py @@ -13,6 +13,7 @@ from ocs_ci.framework import config from ocs_ci.helpers import dr_helpers, helpers from ocs_ci.helpers.cnv_helpers import create_vm_secret, cal_md5sum_vm +from ocs_ci.helpers.dr_helpers import generate_kubeobject_capture_interval from ocs_ci.helpers.helpers import ( create_project, create_unique_resource_name, @@ -499,6 +500,8 @@ def deploy_workload(self): drpc_yaml_data["spec"]["preferredCluster"] = self.preferred_primary_cluster drpc_yaml_data["spec"]["drPolicyRef"]["name"] = self.dr_policy_name drpc_yaml_data["spec"]["placementRef"]["name"] = self.appset_placement_name + del drpc_yaml_data["spec"]["matchExpressions"] + del drpc_yaml_data["spec"]["kubeObjectProtection"] drpc_yaml_data["spec"]["pvcSelector"]["matchLabels"] = self.appset_pvc_selector self.drcp_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="drpc", delete=False @@ -744,6 +747,8 @@ def deploy_workload(self): drpc_yaml_data["metadata"]["name"] = f"{self.cnv_workload_placement_name}-drpc" drpc_yaml_data["spec"]["preferredCluster"] = self.preferred_primary_cluster drpc_yaml_data["spec"]["drPolicyRef"]["name"] = self.dr_policy_name + del drpc_yaml_data["spec"]["matchExpressions"] + del drpc_yaml_data["spec"]["kubeObjectProtection"] drpc_yaml_data["spec"]["placementRef"][ "name" ] = self.cnv_workload_placement_name @@ -1046,6 +1051,221 @@ def validate_data_integrity_vm( ), f"Failed: MD5 comparison after {app_state}" +class BusyboxDiscoveredApps(DRWorkload): + """ + Class handling everything related to busybox workload for Discovered/Imperative Apps + + """ + + def __init__(self, **kwargs): + workload_repo_url = config.ENV_DATA["dr_workload_repo_url"] + log.info(f"Repo used: {workload_repo_url}") + workload_repo_branch = config.ENV_DATA["dr_workload_repo_branch"] + super().__init__("busybox", workload_repo_url, workload_repo_branch) + self.workload_type = kwargs.get("workload_type", constants.DISCOVERED_APPS) + self.workload_namespace = kwargs.get("workload_namespace", None) + self.workload_pod_count = kwargs.get("workload_pod_count") + self.workload_pvc_count = kwargs.get("workload_pvc_count") + self.dr_policy_name = kwargs.get( + "dr_policy_name", config.ENV_DATA.get("dr_policy_name") + ) or (dr_helpers.get_all_drpolicy()[0]["metadata"]["name"]) + self.preferred_primary_cluster = kwargs.get("preferred_primary_cluster") or ( + get_primary_cluster_config().ENV_DATA["cluster_name"] + ) + self.workload_dir = kwargs.get("workload_dir") + self.discovered_apps_placement_name = kwargs.get("workload_placement_name") + self.drpc_yaml_file = os.path.join(constants.DRPC_PATH) + self.placement_yaml_file = os.path.join(constants.PLACEMENT_PATH) + self.kubeobject_capture_interval = f"{generate_kubeobject_capture_interval()}m" + self.protection_type = kwargs.get("protection_type") + self.target_clone_dir = config.ENV_DATA.get( + "target_clone_dir", constants.DR_WORKLOAD_REPO_BASE_DIR + ) + self.discovered_apps_pvc_selector_key = kwargs.get( + "discovered_apps_pvc_selector_key" + ) + self.discovered_apps_pvc_selector_value = kwargs.get( + "discovered_apps_pvc_selector_value" + ) + self.discovered_apps_pod_selector_key = kwargs.get( + "discovered_apps_pod_selector_key" + ) + self.discovered_apps_pod_selector_value = kwargs.get( + "discovered_apps_pod_selector_value" + ) + + def deploy_workload(self): + """ + + Deployment specific to busybox workload for Discovered/Imperative Apps + + """ + self._deploy_prereqs() + for cluster in get_non_acm_cluster_config(): + config.switch_ctx(cluster.MULTICLUSTER["multicluster_index"]) + self.create_namespace() + config.switch_to_cluster_by_name(self.preferred_primary_cluster) + self.workload_path = self.target_clone_dir + "/" + self.workload_dir + run_cmd(f"oc create -k {self.workload_path} -n {self.workload_namespace} ") + self.check_pod_pvc_status(skip_replication_resources=True) + config.switch_acm_ctx() + self.create_placement() + self.create_dprc() + self.verify_workload_deployment() + + def _deploy_prereqs(self): + """ + Perform prerequisites + + """ + # Clone workload repo + clone_repo( + url=self.workload_repo_url, + location=self.target_clone_dir, + branch=self.workload_repo_branch, + ) + + def verify_workload_deployment(self): + """ + Verify busybox workload Discovered App + + """ + config.switch_to_cluster_by_name(self.preferred_primary_cluster) + dr_helpers.wait_for_all_resources_creation( + self.workload_pvc_count, + self.workload_pod_count, + self.workload_namespace, + discovered_apps=True, + ) + + def create_placement(self): + """ + Create placement CR for discovered Apps + + """ + + placement_yaml_data = templating.load_yaml(self.placement_yaml_file) + placement_yaml_data["metadata"]["name"] = ( + self.discovered_apps_placement_name + "-placement-1" + ) + placement_yaml_data["metadata"].setdefault("annotations", {}) + placement_yaml_data["metadata"]["annotations"][ + "cluster.open-cluster-management.io/experimental-scheduling-disable" + ] = "true" + placement_yaml_data["metadata"]["namespace"] = constants.DR_OPS_NAMESAPCE + placement_yaml = tempfile.NamedTemporaryFile( + mode="w+", prefix="drpc", delete=False + ) + templating.dump_data_to_temp_yaml(placement_yaml_data, placement_yaml.name) + log.info(f"Creating Placement for workload {self.workload_name}") + run_cmd(f"oc create -f {placement_yaml.name}") + + def create_dprc(self): + """ + Create DRPC for discovered Apps + + """ + drpc_yaml_data = templating.load_yaml(self.drpc_yaml_file) + drpc_yaml_data["spec"].setdefault("kubeObjectProtection", {}) + drpc_yaml_data["spec"]["kubeObjectProtection"].setdefault("kubeObjectSelector") + drpc_yaml_data["spec"].setdefault("protectedNamespaces", []).append( + self.workload_namespace + ) + del drpc_yaml_data["spec"]["pvcSelector"]["matchLabels"] + + log.info(self.discovered_apps_pvc_selector_key) + drpc_yaml_data["metadata"]["name"] = self.discovered_apps_placement_name + drpc_yaml_data["metadata"]["namespace"] = constants.DR_OPS_NAMESAPCE + drpc_yaml_data["spec"]["preferredCluster"] = self.preferred_primary_cluster + drpc_yaml_data["spec"]["drPolicyRef"]["name"] = self.dr_policy_name + drpc_yaml_data["spec"]["placementRef"]["name"] = ( + self.discovered_apps_placement_name + "-placement-1" + ) + drpc_yaml_data["spec"]["placementRef"]["namespace"] = constants.DR_OPS_NAMESAPCE + drcp_data_yaml = tempfile.NamedTemporaryFile( + mode="w+", prefix="drpc", delete=False + ) + templating.dump_data_to_temp_yaml(drpc_yaml_data, drcp_data_yaml.name) + log.info(drcp_data_yaml.name) + drpc_yaml_data["spec"]["pvcSelector"]["matchExpressions"][0][ + "key" + ] = self.discovered_apps_pvc_selector_key + drpc_yaml_data["spec"]["pvcSelector"]["matchExpressions"][0]["operator"] = "In" + drpc_yaml_data["spec"]["pvcSelector"]["matchExpressions"][0]["values"][ + 0 + ] = self.discovered_apps_pvc_selector_value + drpc_yaml_data["spec"]["protectedNamespaces"][0] = self.workload_namespace + drpc_yaml_data["spec"]["kubeObjectProtection"][ + "captureInterval" + ] = self.kubeobject_capture_interval + drpc_yaml_data["spec"]["kubeObjectProtection"]["kubeObjectSelector"][ + "matchExpressions" + ][0]["key"] = self.discovered_apps_pod_selector_key + drpc_yaml_data["spec"]["kubeObjectProtection"]["kubeObjectSelector"][ + "matchExpressions" + ][0]["operator"] = "In" + drpc_yaml_data["spec"]["kubeObjectProtection"]["kubeObjectSelector"][ + "matchExpressions" + ][0]["values"][0] = self.discovered_apps_pod_selector_value + drcp_data_yaml = tempfile.NamedTemporaryFile( + mode="w+", prefix="drpc", delete=False + ) + templating.dump_data_to_temp_yaml(drpc_yaml_data, drcp_data_yaml.name) + log.info("Creating DRPC") + run_cmd(f"oc create -f {drcp_data_yaml.name}") + + def check_pod_pvc_status(self, skip_replication_resources=False): + """ + Check for Pod and PVC status + + Args: + skip_replication_resources (bool): Skip Volumereplication check + + """ + config.switch_to_cluster_by_name(self.preferred_primary_cluster) + dr_helpers.wait_for_all_resources_creation( + self.workload_pvc_count, + self.workload_pod_count, + self.workload_namespace, + skip_replication_resources=skip_replication_resources, + ) + + def create_namespace(self): + """ + Create Namespace for Workload's to run + """ + + run_cmd(f"oc create namespace {self.workload_namespace}") + + def delete_workload(self, force=False): + """ + Delete Discovered Apps + + """ + + log.info("Deleting DRPC") + config.switch_acm_ctx() + run_cmd( + f"oc delete drpc -n {constants.DR_OPS_NAMESAPCE} {self.discovered_apps_placement_name}" + ) + log.info("Deleting Placement") + run_cmd( + f"oc delete placement -n {constants.DR_OPS_NAMESAPCE} {self.discovered_apps_placement_name}-placement-1" + ) + + for cluster in get_non_acm_cluster_config(): + log.info(f"Deleting Workload from {cluster}") + config.switch_ctx(cluster.MULTICLUSTER["multicluster_index"]) + run_cmd( + f"oc delete -k {self.workload_path} -n {self.workload_namespace}", + ignore_error=True, + ) + dr_helpers.wait_for_all_resources_deletion( + namespace=self.workload_namespace + ) + run_cmd(f"oc delete project {self.workload_namespace}") + + def validate_data_integrity(namespace, path="/mnt/test/hashfile", timeout=600): """ Verifies the md5sum values of files are OK diff --git a/ocs_ci/ocs/resources/drpc.py b/ocs_ci/ocs/resources/drpc.py index debdadee967..fc7f395b17c 100644 --- a/ocs_ci/ocs/resources/drpc.py +++ b/ocs_ci/ocs/resources/drpc.py @@ -66,6 +66,29 @@ def wait_for_peer_ready_status(self): result=True ), "PeerReady status is not true, failover or relocate action can not be performed" + def get_progression_status(self, status_to_check=None): + logger.info("Getting progression Status") + progression_status = self.get()["status"]["progression"] + if status_to_check: + logger.info(f"Current progression Status {progression_status}") + if progression_status == status_to_check: + return True + else: + return False + return progression_status + + def wait_for_progression_status(self, status): + logger.info(f"Waiting for Progression status to be {status}") + sample = TimeoutSampler( + timeout=300, + sleep=10, + func=self.get_progression_status, + status_to_check=status, + ) + assert sample.wait_for_func_status( + result=True + ), f"Progression status is not expected current status {self.get_progression_status()} expected status {status}" + def get_drpc_name(namespace, switch_ctx=None): """ diff --git a/ocs_ci/templates/DR/drpc.yaml b/ocs_ci/templates/DR/drpc.yaml index 3deadcf3fa0..d476c44a778 100644 --- a/ocs_ci/templates/DR/drpc.yaml +++ b/ocs_ci/templates/DR/drpc.yaml @@ -1,8 +1,6 @@ apiVersion: ramendr.openshift.io/v1alpha1 kind: DRPlacementControl metadata: - labels: - cluster.open-cluster-management.io/backup: resource name: PLACEHOLDER namespace: openshift-gitops spec: @@ -16,6 +14,19 @@ spec: name: PLACEHOLDER namespace: openshift-gitops preferredCluster: PLACEHOLDER + kubeObjectProtection: + captureInterval: PLACEHOLDER + kubeObjectSelector: + matchExpressions: + - key: PLACEHOLDER + operator: In + values: + - PLACEHOLDER pvcSelector: matchLabels: PLACEHOLDER + matchExpressions: + - key: PLACEHOLDER + operator: In + values: + - PLACEHOLDER diff --git a/ocs_ci/templates/DR/placement.yaml b/ocs_ci/templates/DR/placement.yaml new file mode 100644 index 00000000000..85c16e6e59a --- /dev/null +++ b/ocs_ci/templates/DR/placement.yaml @@ -0,0 +1,7 @@ +apiVersion: cluster.open-cluster-management.io/v1beta1 +kind: Placement +metadata: + name: PLACEHOLDER + namespace: PLACEHOLDER +spec: + predicates: [] diff --git a/tests/conftest.py b/tests/conftest.py index fa78c98f8d6..d9160a1fecc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -46,7 +46,12 @@ ) from ocs_ci.ocs.constants import FUSION_CONF_DIR from ocs_ci.ocs.cnv.virtual_machine import VirtualMachine -from ocs_ci.ocs.dr.dr_workload import BusyBox, BusyBox_AppSet, CnvWorkload +from ocs_ci.ocs.dr.dr_workload import ( + BusyBox, + BusyBox_AppSet, + CnvWorkload, + BusyboxDiscoveredApps, +) from ocs_ci.ocs.exceptions import ( CommandFailed, TimeoutExpiredError, @@ -6868,6 +6873,74 @@ def teardown(): return factory +@pytest.fixture() +def discovered_apps_dr_workload(request): + """ + Deploys Discovered App based workload for DR setup + + """ + instances = [] + + def factory(kubeobject=1): + """ + Args: + kubeobject (int): Number if Discovered Apps workload with kube object protection to be created + recipe (int): Number if Discovered Apps workload with recipe protection to be created + pvc_interface (str): 'CephBlockPool' or 'CephFileSystem'. + This decides whether a RBD based or CephFS based resource is created. RBD is default. + + Raises: + ResourceNotDeleted: In case workload resources not deleted properly + + Returns: + list: objects of workload class + + """ + total_pvc_count = 0 + workload_key = "dr_workload_discovered_apps_rbd" + # TODO: When cephfs is ready + # if pvc_interface == constants.CEPHFILESYSTEM: + # workload_key = "dr_workload_discovered_apps_cephfs" + for index in range(kubeobject): + workload_details = ocsci_config.ENV_DATA[workload_key][index] + workload = BusyboxDiscoveredApps( + workload_dir=workload_details["workload_dir"], + workload_pod_count=workload_details["pod_count"], + workload_pvc_count=workload_details["pvc_count"], + workload_namespace=workload_details["workload_namespace"], + discovered_apps_pvc_selector_key=workload_details[ + "dr_workload_app_pvc_selector_key" + ], + discovered_apps_pvc_selector_value=workload_details[ + "dr_workload_app_pvc_selector_value" + ], + discovered_apps_pod_selector_key=workload_details[ + "dr_workload_app_pod_selector_key" + ], + discovered_apps_pod_selector_value=workload_details[ + "dr_workload_app_pod_selector_value" + ], + workload_placement_name=workload_details[ + "dr_workload_app_placement_name" + ], + ) + instances.append(workload) + total_pvc_count += workload_details["pvc_count"] + workload.deploy_workload() + + return instances + + def teardown(): + for instance in instances: + try: + instance.delete_workload(force=True) + except ResourceNotDeleted: + raise ResourceNotDeleted("Workload deletion was unsuccessful") + + request.addfinalizer(teardown) + return factory + + @pytest.fixture() def cnv_workload(request): """ diff --git a/tests/functional/disaster-recovery/regional-dr/test_failover_and_relocate_discovered_apps.py b/tests/functional/disaster-recovery/regional-dr/test_failover_and_relocate_discovered_apps.py new file mode 100644 index 00000000000..ae9dd32a5f3 --- /dev/null +++ b/tests/functional/disaster-recovery/regional-dr/test_failover_and_relocate_discovered_apps.py @@ -0,0 +1,103 @@ +import logging +from time import sleep + + +from ocs_ci.framework import config +from ocs_ci.framework.testlib import acceptance, tier1 +from ocs_ci.framework.pytest_customization.marks import turquoise_squad +from ocs_ci.helpers import dr_helpers + + +logger = logging.getLogger(__name__) + + +@acceptance +@tier1 +@turquoise_squad +class TestFailoverAndRelocateWithDiscoveredApps: + """ + Test Failover and Relocate with Discovered Apps + + """ + + def test_failover_and_relocate_discovered_apps(self, discovered_apps_dr_workload): + """ + Tests to verify application failover and Relocate with Discovered Apps + There are two test cases: + 1) Failover to secondary cluster when primary cluster is UP + 2) Relocate back to primary + + """ + + rdr_workload = discovered_apps_dr_workload()[0] + + primary_cluster_name_before_failover = ( + dr_helpers.get_current_primary_cluster_name( + rdr_workload.workload_namespace, discovered_apps=True + ) + ) + config.switch_to_cluster_by_name(primary_cluster_name_before_failover) + secondary_cluster_name = dr_helpers.get_current_secondary_cluster_name( + rdr_workload.workload_namespace, discovered_apps=True + ) + + scheduling_interval = dr_helpers.get_scheduling_interval( + rdr_workload.workload_namespace, discovered_apps=True + ) + wait_time = 2 * scheduling_interval # Time in minutes + logger.info(f"Waiting for {wait_time} minutes to run IOs") + sleep(wait_time * 60) + + dr_helpers.failover( + failover_cluster=secondary_cluster_name, + namespace=rdr_workload.workload_namespace, + discovered_apps=True, + workload_placement_name=rdr_workload.discovered_apps_placement_name, + old_primary=primary_cluster_name_before_failover, + ) + logger.info("Doing Cleanup Operations") + dr_helpers.do_discovered_apps_cleanup( + drpc_name=rdr_workload.discovered_apps_placement_name, + old_primary=primary_cluster_name_before_failover, + workload_namespace=rdr_workload.workload_namespace, + workload_dir=rdr_workload.workload_dir, + ) + + # Verify resources creation on secondary cluster (failoverCluster) + config.switch_to_cluster_by_name(secondary_cluster_name) + dr_helpers.wait_for_all_resources_creation( + rdr_workload.workload_pvc_count, + rdr_workload.workload_pod_count, + rdr_workload.workload_namespace, + discovered_apps=True, + ) + + # Doing Relocate + primary_cluster_name_after_failover = ( + dr_helpers.get_current_primary_cluster_name( + rdr_workload.workload_namespace, discovered_apps=True + ) + ) + config.switch_to_cluster_by_name(primary_cluster_name_before_failover) + secondary_cluster_name = dr_helpers.get_current_secondary_cluster_name( + rdr_workload.workload_namespace, discovered_apps=True + ) + + scheduling_interval = dr_helpers.get_scheduling_interval( + rdr_workload.workload_namespace, discovered_apps=True + ) + logger.info("Running Relocate Steps") + wait_time = 2 * scheduling_interval # Time in minutes + logger.info(f"Waiting for {wait_time} minutes to run IOs") + sleep(wait_time * 60) + + dr_helpers.relocate( + preferred_cluster=secondary_cluster_name, + namespace=rdr_workload.workload_namespace, + workload_placement_name=rdr_workload.discovered_apps_placement_name, + discovered_apps=True, + old_primary=primary_cluster_name_after_failover, + workload_instance=rdr_workload, + ) + + # TODO: Add data integrity checks