Fixed issue with Endpoint pod count is not expected (red-hat-storage#…

…8857) * Fixed issue Endpoint pod count is not expected, Timed out after 900s. Endpoint pod count is not expected, Timed out after 900s running get("", True, "noobaa-s3=noobaa") red-hat-storage#8843 Signed-off-by: tiffanyn108 <[email protected]>
tiffanyn108 · Mar 18, 2024 · a7936c0 · a7936c0
1 parent ffaf9fe
commit a7936c0
Showing 1 changed file with 57 additions and 39 deletions.
diff --git a/tests/manage/mcg/test_endpoint_autoscale.py b/tests/manage/mcg/test_endpoint_autoscale.py
@@ -1,3 +1,5 @@
+import time
+import logging
 from ocs_ci.framework import config
 from ocs_ci.framework.testlib import MCGTest, tier1, skipif_ocs_version
 from ocs_ci.ocs import constants, ocp
@@ -6,7 +8,9 @@
     red_squad,
     mcg,
 )
+from ocs_ci.ocs.scale_noobaa_lib import get_endpoint_pod_count, get_hpa_utilization
 
+log = logging.getLogger(__name__)
 
 # @pytest.mark.polarion_id("OCS-XXXX")
 # Skipped above 4.6 because of https://github.com/red-hat-storage/ocs-ci/issues/4129
@@ -27,47 +31,61 @@ class TestEndpointAutoScale(MCGTest):
     # with an autoscaling conifguration of 1-2
     MIN_ENDPOINT_COUNT = 1
     MAX_ENDPOINT_COUNT = 2
+    options = {
+        "create": [
+            ("name", "job1"),
+            ("name", "job2"),
+            ("name", "job3"),
+            ("runtime", "1200"),
+        ],
+        "job1": [
+            ("iodepth", "4"),
+            ("rw", "randrw"),
+            ("bs", "32k"),
+            ("size", "64m"),
+            ("numjobs", "4"),
+        ],
+        "job2": [
+            ("iodepth", "16"),
+            ("rw", "randrw"),
+            ("bs", "64k"),
+            ("size", "512m"),
+            ("numjobs", "4"),
+        ],
+        "job3": [
+            ("iodepth", "32"),
+            ("rw", "randrw"),
+            ("bs", "128k"),
+            ("size", "1024m"),
+            ("numjobs", "4"),
+        ],
+    }
 
     def test_scaling_under_load(self, mcg_job_factory):
-        self._assert_endpoint_count(1)
-
-        options = {
-            "create": [
-                ("name", "job1"),
-                ("name", "job2"),
-                ("name", "job3"),
-                ("runtime", "1200"),
-            ],
-            "job1": [
-                ("iodepth", "4"),
-                ("rw", "randrw"),
-                ("bs", "32k"),
-                ("size", "64m"),
-                ("numjobs", "4"),
-            ],
-            "job2": [
-                ("iodepth", "16"),
-                ("rw", "randrw"),
-                ("bs", "64k"),
-                ("size", "512m"),
-                ("numjobs", "4"),
-            ],
-            "job3": [
-                ("iodepth", "32"),
-                ("rw", "randrw"),
-                ("bs", "128k"),
-                ("size", "1024m"),
-                ("numjobs", "4"),
-            ],
-        }
-        for i in range(10):
-            exec(f"job{i} = mcg_job_factory(custom_options={options})")
-        self._assert_endpoint_count(2)
-
-        for i in range(10):
-            exec(f"job{i}.delete()")
-            exec(f"job{i}.ocp.wait_for_delete(resource_name=job{i}.name, timeout=60)")
-        self._assert_endpoint_count(1)
+        self._assert_endpoint_count(self.MIN_ENDPOINT_COUNT)
+        endpoint_cnt = get_endpoint_pod_count(config.ENV_DATA["cluster_namespace"])
+        get_hpa_utilization(config.ENV_DATA["cluster_namespace"])
+        job_cnt = 0
+        wait_time = 30
+        job_list = list()
+        while endpoint_cnt < self.MAX_ENDPOINT_COUNT:
+            exec(f"job{job_cnt} = mcg_job_factory(custom_options=self.options)")
+            job_list.append(f"job{job_cnt}")
+            time.sleep(wait_time)
+            endpoint_cnt = get_endpoint_pod_count(config.ENV_DATA["cluster_namespace"])
+            hpa_cpu_utilization = get_hpa_utilization(
+                config.ENV_DATA["cluster_namespace"]
+            )
+            log.info(
+                f"HPA CPU utilization by noobaa-endpoint is {hpa_cpu_utilization}%"
+            )
+            if endpoint_cnt == self.MAX_ENDPOINT_COUNT:
+                break
+            job_cnt += 1
+        for i in job_list:
+            exec(f"{i}.delete()")
+            exec(f"{i}.ocp.wait_for_delete(resource_name={i}.name, timeout=60)")
+        self._assert_endpoint_count(self.MIN_ENDPOINT_COUNT)
 
     def _assert_endpoint_count(self, desired_count):
         pod = ocp.OCP(