diff --git a/ocs_ci/ocs/bucket_utils.py b/ocs_ci/ocs/bucket_utils.py index 787a218e4cd4..aea375e7d4e3 100644 --- a/ocs_ci/ocs/bucket_utils.py +++ b/ocs_ci/ocs/bucket_utils.py @@ -1796,6 +1796,25 @@ def update_replication_policy(bucket_name, replication_policy_dict): ).patch(params=json.dumps(replication_policy_patch_dict), format_type="merge") +def remove_replication_policy(bucket_name): + """ + Remove replication policy for a bucket + + Args: + bucket_name(str): Name of the bucket + + """ + replication_policy_patch_dict = { + "spec": {"additionalConfig": {{"replicationPolicy": ""}}} + } + + OCP( + kind="obc", + namespace=config.ENV_DATA["cluster_namespace"], + resource_name=bucket_name, + ).patch(params=json.dumps(replication_policy_patch_dict), format_type="merge") + + def random_object_round_trip_verification( io_pod, bucket_name, @@ -2023,3 +2042,46 @@ def sample_if_objects_expired(mcg_obj, bucket_name, prefix="", timeout=600, slee assert sampler.wait_for_func_status(result=True), f"{message} are not expired" logger.info(f"{message} are expired") + +def upload_test_objects_to_source_and_wait_for_replication( + mcg_obj, source_bucket, target_bucket, mockup_logger, timeout +): + """ + Upload a set of objects to the source bucket, logs the operations and wait for the replication to complete. + + """ + logger.info("Uploading test objects and waiting for replication to complete") + mockup_logger.upload_test_objs_and_log(source_bucket.name) + + logger.info( + "Resetting the noobaa-core pod to trigger the replication background worker" + ) + + assert compare_bucket_object_list( + mcg_obj, + source_bucket.name, + target_bucket.name, + timeout=timeout, + ), f"Standard replication failed to complete in {timeout} seconds" + + +def delete_objects_from_source_and_wait_for_deletion_sync( + mcg_obj, source_bucket, target_bucket, mockup_logger, timeout +): + """ + Delete all objects from the source bucket,logs the operations and wait for the deletion sync to complete. + + """ + logger.info("Deleting source objects and waiting for deletion sync with target") + mockup_logger.delete_all_objects_and_log(source_bucket.name) + + logger.info( + "Resetting the noobaa-core pod to trigger the replication background worker" + ) + + assert compare_bucket_object_list( + mcg_obj, + source_bucket.name, + target_bucket.name, + timeout=timeout, + ), f"Deletion sync failed to complete in {timeout} seconds" diff --git a/ocs_ci/ocs/constants.py b/ocs_ci/ocs/constants.py index 71a0bd9894d6..f89ddf44a3b9 100644 --- a/ocs_ci/ocs/constants.py +++ b/ocs_ci/ocs/constants.py @@ -1019,6 +1019,7 @@ FUSIONAAS_PLATFORM, ] BAREMETAL_PLATFORMS = [BAREMETAL_PLATFORM, BAREMETALPSI_PLATFORM] +DEFAULT_AWS_REGION = "us-east-2" HCI_PROVIDER_CLIENT_PLATFORMS = [ HCI_BAREMETAL, diff --git a/ocs_ci/ocs/resources/mockup_bucket_logger.py b/ocs_ci/ocs/resources/mockup_bucket_logger.py index ddc532dc1352..8c535493a23f 100644 --- a/ocs_ci/ocs/resources/mockup_bucket_logger.py +++ b/ocs_ci/ocs/resources/mockup_bucket_logger.py @@ -56,6 +56,10 @@ def __init__(self, awscli_pod, mcg_obj, bucket_factory, platform, region): f"ls -A1 {constants.AWSCLI_TEST_OBJ_DIR}" ).split(" ") + @property + def standard_test_obj_list(self): + return self._standard_test_obj_list + def upload_test_objs_and_log(self, bucket_name): """ Uploads files from files_dir to the MCG bucket and write matching @@ -97,6 +101,30 @@ def upload_arbitrary_object_and_log(self, bucket_name): self._upload_mockup_logs(bucket_name, [obj_name], "PUT") + def delete_objs_and_log(self, bucket_name, objs): + """ + Delete list of objects from the MCG bucket and write + matching mockup logs + + Args: + bucket_name(str): Name of the MCG bucket + objs(list): List of the objects to delete + + """ + logger.info(f"Deleting the {objs} from the bucket") + obj_list = list_objects_from_bucket( + self.awscli_pod, + f"s3://{bucket_name}", + s3_obj=self.mcg_obj, + ) + if set(objs).issubset(set(obj_list)): + for i in range(len(objs)): + s3cmd = craft_s3_command( + f"rm s3://{bucket_name}/{objs[i]}", self.mcg_obj + ) + self.awscli_pod.exec_cmd_on_pod(s3cmd) + self._upload_mockup_logs(bucket_name, objs, "DELETE") + def delete_all_objects_and_log(self, bucket_name): """ Deletes all objects from the MCG bucket and write matching mockup logs diff --git a/ocs_ci/ocs/resources/pod.py b/ocs_ci/ocs/resources/pod.py index 738b314cf324..19e2f1fb4be9 100644 --- a/ocs_ci/ocs/resources/pod.py +++ b/ocs_ci/ocs/resources/pod.py @@ -943,6 +943,16 @@ def get_noobaa_endpoint_pods(): return noobaa_endpoint_pods +def get_noobaa_db_pod(): + + noobaa_db = get_pods_having_label( + label=constants.NOOBAA_DB_LABEL_47_AND_ABOVE, + namespace=config.ENV_DATA["cluster_namespace"], + ) + nb_db_pods = [Pod(**pod) for pod in noobaa_db] + return nb_db_pods[0] + + def get_odf_operator_controller_manager( ocs_label=constants.ODF_OPERATOR_CONTROL_MANAGER_LABEL, namespace=None ): diff --git a/tests/conftest.py b/tests/conftest.py index 6b5585307664..57c5626bbcd7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -55,6 +55,8 @@ from ocs_ci.ocs.ocp import OCP from ocs_ci.ocs.resources import pvc from ocs_ci.ocs.resources.bucket_policy import gen_bucket_policy +from ocs_ci.ocs.resources.mcg_replication_policy import LogBasedReplicationPolicy +from ocs_ci.ocs.resources.mockup_bucket_logger import MockupBucketLogger from ocs_ci.ocs.scale_lib import FioPodScale from ocs_ci.ocs.utils import ( setup_ceph_toolbox, @@ -6939,3 +6941,80 @@ def factory(interval): ) return factory + + +@pytest.fixture(scope="class") +def reduce_replication_delay_setup(add_env_vars_to_noobaa_core_class): + """ + A fixture to reduce the replication delay to one minute. + + Args: + new_delay_in_miliseconds (function): A function to add env vars to the noobaa-core pod + + """ + log.warning("Reducing replication delay") + + def factory(new_delay_in_miliseconds=60 * 1000): + new_env_var_tuples = [ + (constants.BUCKET_REPLICATOR_DELAY_PARAM, new_delay_in_miliseconds), + (constants.BUCKET_LOG_REPLICATOR_DELAY_PARAM, new_delay_in_miliseconds), + ] + add_env_vars_to_noobaa_core_class(new_env_var_tuples) + + return factory + + +@pytest.fixture() +def log_based_replication_setup( + awscli_pod_session, mcg_obj_session, bucket_factory, reduce_replication_delay_setup +): + """ + A fixture to set up standard log-based replication with deletion sync. + + Args: + awscli_pod_session(Pod): A pod running the AWS CLI + mcg_obj_session(MCG): An MCG object + bucket_factory: A bucket factory fixture + + Returns: + MockupBucketLogger: A MockupBucketLogger object + Bucket: The source bucket + Bucket: The target bucket + """ + + def factory(bucketclass_dict=None): + log.info("Starting log-based replication setup") + if bucketclass_dict is None: + bucketclass_dict = { + "interface": "OC", + "namespace_policy_dict": { + "type": "Single", + "namespacestore_dict": { + constants.AWS_PLATFORM: [(1, constants.DEFAULT_AWS_REGION)] + }, + }, + } + target_bucket = bucket_factory(bucketclass=bucketclass_dict)[0] + + mockup_logger = MockupBucketLogger( + awscli_pod=awscli_pod_session, + mcg_obj=mcg_obj_session, + bucket_factory=bucket_factory, + platform=constants.AWS_PLATFORM, + region=constants.DEFAULT_AWS_REGION, + ) + replication_policy = LogBasedReplicationPolicy( + destination_bucket=target_bucket.name, + sync_deletions=True, + logs_bucket=mockup_logger.logs_bucket_uls_name, + ) + + source_bucket = bucket_factory( + 1, bucketclass=bucketclass_dict, replication_policy=replication_policy + )[0] + + log.info("log-based replication setup complete") + + return mockup_logger, source_bucket, target_bucket + + return factory diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 2b83665869b2..1617e176a49c 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -298,31 +298,10 @@ def finalizer(): @pytest.fixture() -def noobaa_db_backup_and_recovery(request, snapshot_factory): - """ - Verify noobaa backup and recovery - - 1. Take snapshot db-noobaa-db-0 PVC and retore it to PVC - 2. Scale down the statefulset noobaa-db - 3. Get the yaml of the current PVC, db-noobaa-db-0 and - change the parameter persistentVolumeReclaimPolicy to Retain for restored PVC - 4. Delete both PVCs, the PV for the original claim db-noobaa-db-0 will be removed. - The PV for claim db-noobaa-db-0-snapshot-restore will move to ‘Released’ - 5. Edit again restore PV and remove the claimRef section. - The volume will transition to Available. - 6. Edit the yaml db-noobaa-db-0.yaml and change the setting volumeName to restored PVC. - 7. Scale up the stateful set again and the pod should be running - - """ +def noobaa_db_backup(request, snapshot_factory): restore_pvc_objs = [] - def factory(snapshot_factory=snapshot_factory): - # Get noobaa pods before execution - noobaa_pods = pod.get_noobaa_pods() - - # Get noobaa PVC before execution - noobaa_pvc_obj = pvc.get_pvc_objs(pvc_names=["db-noobaa-db-pg-0"]) - noobaa_pv_name = noobaa_pvc_obj[0].get("spec").get("spec").get("volumeName") + def factory(noobaa_pvc_obj): # Take snapshot db-noobaa-db-0 PVC logger.info(f"Creating snapshot of the {noobaa_pvc_obj[0].name} PVC") @@ -360,6 +339,15 @@ def factory(snapshot_factory=snapshot_factory): f"Succeesfuly created PVC {restore_pvc_obj.name} " f"from snapshot {snap_obj.name}" ) + return restore_pvc_objs, snap_obj + + return factory + + +@pytest.fixture() +def noobaa_db_recovery_from_backup(request): + def factory(snap_obj, noobaa_pvc_obj, noobaa_pods): + noobaa_pv_name = noobaa_pvc_obj[0].get("spec").get("spec").get("volumeName") # Scale down the statefulset noobaa-db modify_statefulset_replica_count( @@ -453,6 +441,41 @@ def factory(snapshot_factory=snapshot_factory): "Changed the parameter persistentVolumeReclaimPolicy to Delete again" ) + return factory + + +@pytest.fixture() +def noobaa_db_backup_and_recovery( + request, snapshot_factory, noobaa_db_backup, noobaa_db_recovery_from_backup +): + """ + Verify noobaa backup and recovery + + 1. Take snapshot db-noobaa-db-0 PVC and retore it to PVC + 2. Scale down the statefulset noobaa-db + 3. Get the yaml of the current PVC, db-noobaa-db-0 and + change the parameter persistentVolumeReclaimPolicy to Retain for restored PVC + 4. Delete both PVCs, the PV for the original claim db-noobaa-db-0 will be removed. + The PV for claim db-noobaa-db-0-snapshot-restore will move to ‘Released’ + 5. Edit again restore PV and remove the claimRef section. + The volume will transition to Available. + 6. Edit the yaml db-noobaa-db-0.yaml and change the setting volumeName to restored PVC. + 7. Scale up the stateful set again and the pod should be running + + """ + restore_pvc_objs = [] + + def factory(snapshot_factory=snapshot_factory): + global restore_pvc_objs + # Get noobaa pods before execution + noobaa_pods = pod.get_noobaa_pods() + + # Get noobaa PVC before execution + noobaa_pvc_obj = pvc.get_pvc_objs(pvc_names=["db-noobaa-db-pg-0"]) + + restore_pvc_objs, snap_obj = noobaa_db_backup(noobaa_pvc_obj) + noobaa_db_recovery_from_backup(snap_obj, noobaa_pvc_obj, noobaa_pods) + def finalizer(): # Get the statefulset replica count sst_obj = OCP( diff --git a/tests/e2e/system_test/test_mcg_replication_with_disruptions.py b/tests/e2e/system_test/test_mcg_replication_with_disruptions.py index 9cb85adc5bab..8098f9665276 100644 --- a/tests/e2e/system_test/test_mcg_replication_with_disruptions.py +++ b/tests/e2e/system_test/test_mcg_replication_with_disruptions.py @@ -1,6 +1,9 @@ import logging import pytest +import random +import time +from concurrent.futures import ThreadPoolExecutor from ocs_ci.framework.testlib import ( E2ETest, @@ -20,13 +23,21 @@ compare_bucket_object_list, patch_replication_policy_to_bucket, write_random_test_objects_to_bucket, + upload_test_objects_to_source_and_wait_for_replication, + update_replication_policy, + remove_replication_policy, ) from ocs_ci.ocs import ocp +from ocs_ci.ocs.resources.pvc import get_pvc_objs from ocs_ci.ocs.utils import get_pod_name_by_pattern from ocs_ci.ocs.resources.pod import ( delete_pods, wait_for_pods_to_be_running, get_rgw_pods, + get_noobaa_db_pod, + get_noobaa_core_pod, + wait_for_storage_pods, + get_noobaa_pods, ) from ocs_ci.utility.retry import retry from ocs_ci.ocs.exceptions import CommandFailed, ResourceWrongStatusException @@ -227,3 +238,130 @@ def test_replication_with_disruptions( mcg_obj_session, source_bucket_name, target_bucket_name ) logger.info("Objects sync works even when the cluster is rebooted") + + +@system_test +@skipif_vsphere_ipi +class TestLogBasedReplicationWithDisruptions: + def test_log_based_replication_with_disruptions( + self, + mcg_obj_session, + log_based_replication_setup, + noobaa_db_backup, + noobaa_db_recovery_from_backup, + ): + """ + This is a system test flow to test log based bucket replication + deletion sync is not impacted due to some noobaa specific disruptions + like noobaa pod restarts, noobaa db backup & recovery etc + + 1. Setup log based bucket replication between the buckets + 2. Upload some objects and make sure replication works + 3. Keep deleting some objects from the source bucket and make sure + deletion sync works as expected through out. + 4. In another thread, restart the noobaa pods (db & core), make sure + deletion sync works for the step-3 deletion works as expected + 5. Now take backup of Noobaa db using PV backup method + 6. Remove the log based replication rules, perform some deletion in + source bucket. make sure deletion sync doesn't work + 7. Recover noobaa db from the backup taken in step-5 + 8. Now check if deletion sync works by deleting some objects from + source bucket. Note: Expectation is still unclear + 9. Now patch the bucket to remove complete replication policy and + make sure no replication - no deletion sync works + + """ + + mockup_logger, source_bucket, target_bucket = log_based_replication_setup() + + # upload test objects to the bucket and verify replication + upload_test_objects_to_source_and_wait_for_replication( + mcg_obj_session, + source_bucket, + target_bucket, + mockup_logger, + 600, + ) + + # Delete objects in the first set in a batch and perform noobaa pod + # restarts at the same time and make sure deletion sync works + + objs_in_bucket = mockup_logger.standard_test_obj_list + objs_to_delete = random.sample(objs_in_bucket, 3) + + from ocs_ci.utility.retry import retry + + @retry(Exception, tries=10, delay=5) + def delete_objs_in_batch(): + for obj in objs_to_delete: + mockup_logger.delete_objs_and_log(source_bucket.name, [obj]) + time.sleep(5) + logger.info(f"Successfully deleted these objects: {objs_to_delete}") + + with ThreadPoolExecutor(max_workers=1) as executor: + future = executor.submit(delete_objs_in_batch) + + # Restart noobaa pods + nb_core_pod = get_noobaa_core_pod() + nb_db_pod = get_noobaa_db_pod() + nb_core_pod.delete() + nb_db_pod.delete() + wait_for_pods_to_be_running(pod_names=[nb_core_pod.name, nb_db_pod.name]) + + # Wait for the object deletion worker in the BG to completion + future.result() + assert compare_bucket_object_list( + mcg_obj_session, + source_bucket.name, + target_bucket.name, + timeout=600, + ), f"Deletion sync failed to complete for the objects {objs_to_delete} deleted in the first bucket set" + + # Take noobaa db backup and remove deletion replication policy for the second bucket set + # Get noobaa pods before execution + noobaa_pods = get_noobaa_pods() + + # Get noobaa PVC before execution + noobaa_pvc_obj = get_pvc_objs(pvc_names=["db-noobaa-db-pg-0"]) + + _, snap_obj = noobaa_db_backup(noobaa_pvc_obj) + + disable_deletion_sync = source_bucket.replication_policy + disable_deletion_sync["rules"][0]["sync_deletions"] = False + update_replication_policy(source_bucket.name, disable_deletion_sync) + logger.info("Deleting all the objects from the second bucket") + mockup_logger.delete_all_objects_and_log(source_bucket.name) + assert not compare_bucket_object_list( + mcg_obj_session, + source_bucket.name, + target_bucket.name, + timeout=600, + ), "Deletion sync was done but not expected" + + # Do noobaa db recovery and see if the deletion sync works now + noobaa_db_recovery_from_backup(snap_obj, noobaa_pvc_obj, noobaa_pods) + wait_for_storage_pods() + + assert compare_bucket_object_list( + mcg_obj_session, + source_bucket.name, + target_bucket.name, + timeout=600, + ), "Deletion sync was not done but expected" + + # Remove replication policy and upload some objects to the bucket + # make sure the replication itself doesn't take place + remove_replication_policy(source_bucket.name) + logger.info("Uploading test objects and waiting for replication to complete") + mockup_logger.upload_test_objs_and_log(source_bucket.name) + + logger.info( + "Resetting the noobaa-core pod to trigger the replication background worker" + ) + + assert not compare_bucket_object_list( + mcg_obj_session, + source_bucket.name, + target_bucket.name, + timeout=600, + ), f"Standard replication completed even though replication policy is removed"