From 46079cf913463a899dac8ad490069ee1d1970578 Mon Sep 17 00:00:00 2001
From: Mahesh Shetty <mashetty@Maheshs-MacBook-Pro.local>
Date: Fri, 6 Dec 2024 18:21:56 +0530
Subject: [PATCH] Background check implementation and Implementing multiplier
 upload

Signed-off-by: Mahesh Shetty <mashetty@Maheshs-MacBook-Pro.local>
---
 ocs_ci/helpers/helpers.py                     |  18 +++
 ocs_ci/helpers/mcg_stress_helper.py           | 111 ++++++++++++++++--
 ocs_ci/ocs/constants.py                       |   5 +
 tests/conftest.py                             |   4 +
 .../stress/test_noobaa_under_stress.py        |  19 +++
 5 files changed, 147 insertions(+), 10 deletions(-)

diff --git a/ocs_ci/helpers/helpers.py b/ocs_ci/helpers/helpers.py
index e3fca3a71762..623d94ffcb40 100644
--- a/ocs_ci/helpers/helpers.py
+++ b/ocs_ci/helpers/helpers.py
@@ -4298,6 +4298,24 @@ def get_mon_db_size_in_kb(mon_pod_obj):
     return mon_db_size_kb
 
 
+def get_noobaa_db_size():
+    """
+    Get noobaa db size
+
+    Returns:
+        str: Noobaa db size
+
+    """
+    noobaa_db_pod_obj = pod.get_noobaa_pods(
+        noobaa_label=constants.NOOBAA_DB_LABEL_47_AND_ABOVE
+    )
+    cmd_out = noobaa_db_pod_obj[0].exec_cmd_on_pod(
+        command="df -h /var/lib/pgsql/", out_yaml_format=False
+    )
+    df_out = cmd_out.split()
+    return df_out[2]
+
+
 def get_noobaa_db_used_space():
     """
     Get noobaa db size
diff --git a/ocs_ci/helpers/mcg_stress_helper.py b/ocs_ci/helpers/mcg_stress_helper.py
index f78ca692ba7e..3eea7f57fcb2 100644
--- a/ocs_ci/helpers/mcg_stress_helper.py
+++ b/ocs_ci/helpers/mcg_stress_helper.py
@@ -1,6 +1,9 @@
 import logging
 import concurrent.futures
+import time
 
+from ocs_ci.helpers.helpers import get_noobaa_db_size, get_noobaa_db_used_space
+from ocs_ci.ocs import constants
 from ocs_ci.ocs.resources.mcg import MCG
 from ocs_ci.ocs.resources.objectbucket import OBC
 from ocs_ci.ocs.resources.bucket_policy import NoobaaAccount
@@ -14,11 +17,20 @@
     list_objects_in_batches,
     s3_delete_objects,
 )
+from ocs_ci.utility.retry import retry
+from ocs_ci.ocs.cluster import CephCluster
+from ocs_ci.ocs.exceptions import (
+    NoobaaHealthException,
+    CephHealthException,
+    CommandFailed,
+)
 
 logger = logging.getLogger(__name__)
 
 
-def upload_objs_to_buckets(mcg_obj, pod_obj, buckets, iteration_no, event=None):
+def upload_objs_to_buckets(
+    mcg_obj, pod_obj, buckets, iteration_no, event=None, multiplier=1
+):
     """
     This will upload objects present in the stress-cli pod
     to the buckets provided concurrently
@@ -46,15 +58,16 @@ def upload_objs_to_buckets(mcg_obj, pod_obj, buckets, iteration_no, event=None):
                 logger.info(
                     f"OBJECT UPLOAD: Uploading objects to the bucket {bucket.name}"
                 )
-                future = executor.submit(
-                    sync_object_directory,
-                    pod_obj,
-                    src_path,
-                    f"s3://{bucket.name}/{iteration_no}/",
-                    s3_obj,
-                    timeout=20000,
-                )
-                futures.append(future)
+                for i in range(multiplier):
+                    future = executor.submit(
+                        sync_object_directory,
+                        pod_obj,
+                        src_path,
+                        f"s3://{bucket.name}/{iteration_no}/{i+1}/",
+                        s3_obj,
+                        timeout=20000,
+                    )
+                    futures.append(future)
 
             logger.info(
                 "OBJECT UPLOAD: Waiting for the objects upload to complete for all the buckets"
@@ -353,3 +366,81 @@ def delete_objects_in_batches(bucket, batch_size):
         logger.info(
             f"Total objects deleted {total_objs_deleted} in bucket {bucket_name}"
         )
+
+
+def run_background_cluster_checks(scale_noobaa_db_pv, event=None):
+    """
+    Run background checks to verify noobaa health
+    and cluster health overall
+
+        1. Check Noobaa Health
+        2. Check Ceph Health
+        3. Check Noobaa db usage
+        4. Check for any alerts
+        5. Memory and CPU utilization
+
+    """
+    ceph_cluster = CephCluster()
+
+    @retry(NoobaaHealthException, tries=10, delay=60)
+    def check_noobaa_health():
+
+        while True:
+
+            ceph_cluster.noobaa_health_check()
+            logger.info("BACKGROUND CHECK: Noobaa is healthy... rechecking in 1 minute")
+            time.sleep(60)
+
+            if event.is_set():
+                logger.info("BACKGROUND CHECK: Stopping the Noobaa health check")
+                break
+
+    @retry(CephHealthException, tries=10, delay=60)
+    def check_ceph_health():
+
+        while True:
+
+            if ceph_cluster.get_ceph_health() == constants.CEPH_HEALTH_ERROR:
+                raise CephHealthException
+            logger.info("BACKGROUND CHECK: Ceph is healthy... rechecking in 1 minute")
+            time.sleep(60)
+
+            if event.is_set():
+                logger.info("BACKGROUND CHECK: Stopping the Ceph health check")
+                break
+
+    @retry(CommandFailed, tries=10, delay=60)
+    def check_noobaa_db_size():
+
+        while True:
+
+            nb_db_pv_used = get_noobaa_db_used_space()
+            nb_db_pv_size = get_noobaa_db_size()
+            used_percent = int((nb_db_pv_used * 100) / nb_db_pv_size)
+            if used_percent > 85:
+                logger.info(
+                    f"BACKGROUND CHECK: Noobaa db is {used_percent} percentage. Increasing the noobaa db by 50%"
+                )
+                new_size = int(nb_db_pv_size + int(nb_db_pv_size.split("G")[0]) / 2)
+                scale_noobaa_db_pv(pvc_size=new_size)
+                logger.info(
+                    f"BACKGROUND CHECK: Scaled noobaa db to new size {new_size}"
+                )
+            logger.info(
+                f"BACKGROUND CHECK: Current noobaa db usage is at {used_percent}%... Rechecking in 5 minutes..."
+            )
+            time.sleep(300)
+
+            if event.is_set():
+                logger.info("BACKGROUND CHECK: Stopping the Noobaa db size check")
+                break
+
+    logger.info("Initiating background ops")
+    executor = concurrent.futures.ThreadPoolExecutor(max_workers=3)
+    futures_obj = list()
+    futures_obj.append(executor.submit(check_noobaa_health))
+    futures_obj.append(executor.submit(check_ceph_health))
+    futures_obj.append(executor.submit(check_noobaa_db_size))
+
+    for future in futures_obj:
+        future.result()
diff --git a/ocs_ci/ocs/constants.py b/ocs_ci/ocs/constants.py
index 0666b7aa31aa..95c31de92276 100644
--- a/ocs_ci/ocs/constants.py
+++ b/ocs_ci/ocs/constants.py
@@ -140,6 +140,11 @@
 HEALTHY_OB_CLI_MODE = "Mode:OPTIMAL"
 HEALTHY_PV_BS = ["OPTIMAL", "LOW_CAPACITY"]
 
+# check health
+CEPH_HEALTH_WARN = "HEALTH_WARN"
+CEPH_HEALTH_ERROR = "HEALTH_ERR"
+CEPH_HEALTH_OK = "HEALTH_OK"
+
 # noobaa-core config.js parameters
 CONFIG_JS_PREFIX = "CONFIG_JS_"
 BUCKET_REPLICATOR_DELAY_PARAM = CONFIG_JS_PREFIX + "BUCKET_REPLICATOR_DELAY"
diff --git a/tests/conftest.py b/tests/conftest.py
index 2e77d31d14f3..c40905daa1d2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -8390,6 +8390,10 @@ def finalizer():
 
 @pytest.fixture()
 def scale_noobaa_db_pod_pv_size(request):
+    return scale_noobaa_db_pv(request)
+
+
+def scale_noobaa_db_pv(request):
     """
     This fixtue helps to scale the noobaa db pv size.
     follows KCS: https://access.redhat.com/solutions/6976547
diff --git a/tests/cross_functional/stress/test_noobaa_under_stress.py b/tests/cross_functional/stress/test_noobaa_under_stress.py
index 23b9646d72cd..e7c5bca81adb 100644
--- a/tests/cross_functional/stress/test_noobaa_under_stress.py
+++ b/tests/cross_functional/stress/test_noobaa_under_stress.py
@@ -11,6 +11,7 @@
     list_objs_from_bucket,
     download_objs_from_bucket,
     delete_objects_in_batches,
+    run_background_cluster_checks,
 )
 
 logger = logging.getLogger(__name__)
@@ -29,6 +30,7 @@ def test_noobaa_under_stress(
         rgw_obj_session,
         stress_test_directory_setup,
         bucket_factory,
+        scale_noobaa_resources_session,
     ):
         """
         Stress Noobaa by performing bulk s3 operations. This consists mainly 3 stages
@@ -162,3 +164,20 @@ def test_noobaa_under_stress(
             logger.info("Waiting for all the delete object operations to complete")
             for future in futures:
                 future.result()
+
+
+def test_sample(scale_noobaa_db_pod_pv_size):
+
+    bg_event = Event()
+    executor = ThreadPoolExecutor(max_workers=1)
+
+    bg_future = executor.submit(
+        run_background_cluster_checks, scale_noobaa_db_pod_pv_size, event=bg_event
+    )
+
+    import time
+
+    time.sleep(300)
+
+    bg_event.set()
+    bg_future.result()