Skip to content

Commit

Permalink
Background check implementation and Implementing multiplier upload
Browse files Browse the repository at this point in the history
Signed-off-by: Mahesh Shetty <[email protected]>
  • Loading branch information
Mahesh Shetty authored and Mahesh Shetty committed Dec 6, 2024
1 parent 6697ef7 commit 46079cf
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 10 deletions.
18 changes: 18 additions & 0 deletions ocs_ci/helpers/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4298,6 +4298,24 @@ def get_mon_db_size_in_kb(mon_pod_obj):
return mon_db_size_kb


def get_noobaa_db_size():
"""
Get noobaa db size
Returns:
str: Noobaa db size
"""
noobaa_db_pod_obj = pod.get_noobaa_pods(
noobaa_label=constants.NOOBAA_DB_LABEL_47_AND_ABOVE
)
cmd_out = noobaa_db_pod_obj[0].exec_cmd_on_pod(
command="df -h /var/lib/pgsql/", out_yaml_format=False
)
df_out = cmd_out.split()
return df_out[2]


def get_noobaa_db_used_space():
"""
Get noobaa db size
Expand Down
111 changes: 101 additions & 10 deletions ocs_ci/helpers/mcg_stress_helper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import logging
import concurrent.futures
import time

from ocs_ci.helpers.helpers import get_noobaa_db_size, get_noobaa_db_used_space
from ocs_ci.ocs import constants
from ocs_ci.ocs.resources.mcg import MCG
from ocs_ci.ocs.resources.objectbucket import OBC
from ocs_ci.ocs.resources.bucket_policy import NoobaaAccount
Expand All @@ -14,11 +17,20 @@
list_objects_in_batches,
s3_delete_objects,
)
from ocs_ci.utility.retry import retry
from ocs_ci.ocs.cluster import CephCluster
from ocs_ci.ocs.exceptions import (
NoobaaHealthException,
CephHealthException,
CommandFailed,
)

logger = logging.getLogger(__name__)


def upload_objs_to_buckets(mcg_obj, pod_obj, buckets, iteration_no, event=None):
def upload_objs_to_buckets(
mcg_obj, pod_obj, buckets, iteration_no, event=None, multiplier=1
):
"""
This will upload objects present in the stress-cli pod
to the buckets provided concurrently
Expand Down Expand Up @@ -46,15 +58,16 @@ def upload_objs_to_buckets(mcg_obj, pod_obj, buckets, iteration_no, event=None):
logger.info(
f"OBJECT UPLOAD: Uploading objects to the bucket {bucket.name}"
)
future = executor.submit(
sync_object_directory,
pod_obj,
src_path,
f"s3://{bucket.name}/{iteration_no}/",
s3_obj,
timeout=20000,
)
futures.append(future)
for i in range(multiplier):
future = executor.submit(
sync_object_directory,
pod_obj,
src_path,
f"s3://{bucket.name}/{iteration_no}/{i+1}/",
s3_obj,
timeout=20000,
)
futures.append(future)

logger.info(
"OBJECT UPLOAD: Waiting for the objects upload to complete for all the buckets"
Expand Down Expand Up @@ -353,3 +366,81 @@ def delete_objects_in_batches(bucket, batch_size):
logger.info(
f"Total objects deleted {total_objs_deleted} in bucket {bucket_name}"
)


def run_background_cluster_checks(scale_noobaa_db_pv, event=None):
"""
Run background checks to verify noobaa health
and cluster health overall
1. Check Noobaa Health
2. Check Ceph Health
3. Check Noobaa db usage
4. Check for any alerts
5. Memory and CPU utilization
"""
ceph_cluster = CephCluster()

@retry(NoobaaHealthException, tries=10, delay=60)
def check_noobaa_health():

while True:

ceph_cluster.noobaa_health_check()
logger.info("BACKGROUND CHECK: Noobaa is healthy... rechecking in 1 minute")
time.sleep(60)

if event.is_set():
logger.info("BACKGROUND CHECK: Stopping the Noobaa health check")
break

@retry(CephHealthException, tries=10, delay=60)
def check_ceph_health():

while True:

if ceph_cluster.get_ceph_health() == constants.CEPH_HEALTH_ERROR:
raise CephHealthException
logger.info("BACKGROUND CHECK: Ceph is healthy... rechecking in 1 minute")
time.sleep(60)

if event.is_set():
logger.info("BACKGROUND CHECK: Stopping the Ceph health check")
break

@retry(CommandFailed, tries=10, delay=60)
def check_noobaa_db_size():

while True:

nb_db_pv_used = get_noobaa_db_used_space()
nb_db_pv_size = get_noobaa_db_size()
used_percent = int((nb_db_pv_used * 100) / nb_db_pv_size)
if used_percent > 85:
logger.info(
f"BACKGROUND CHECK: Noobaa db is {used_percent} percentage. Increasing the noobaa db by 50%"
)
new_size = int(nb_db_pv_size + int(nb_db_pv_size.split("G")[0]) / 2)
scale_noobaa_db_pv(pvc_size=new_size)
logger.info(
f"BACKGROUND CHECK: Scaled noobaa db to new size {new_size}"
)
logger.info(
f"BACKGROUND CHECK: Current noobaa db usage is at {used_percent}%... Rechecking in 5 minutes..."
)
time.sleep(300)

if event.is_set():
logger.info("BACKGROUND CHECK: Stopping the Noobaa db size check")
break

logger.info("Initiating background ops")
executor = concurrent.futures.ThreadPoolExecutor(max_workers=3)
futures_obj = list()
futures_obj.append(executor.submit(check_noobaa_health))
futures_obj.append(executor.submit(check_ceph_health))
futures_obj.append(executor.submit(check_noobaa_db_size))

for future in futures_obj:
future.result()
5 changes: 5 additions & 0 deletions ocs_ci/ocs/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@
HEALTHY_OB_CLI_MODE = "Mode:OPTIMAL"
HEALTHY_PV_BS = ["OPTIMAL", "LOW_CAPACITY"]

# check health
CEPH_HEALTH_WARN = "HEALTH_WARN"
CEPH_HEALTH_ERROR = "HEALTH_ERR"
CEPH_HEALTH_OK = "HEALTH_OK"

# noobaa-core config.js parameters
CONFIG_JS_PREFIX = "CONFIG_JS_"
BUCKET_REPLICATOR_DELAY_PARAM = CONFIG_JS_PREFIX + "BUCKET_REPLICATOR_DELAY"
Expand Down
4 changes: 4 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8390,6 +8390,10 @@ def finalizer():

@pytest.fixture()
def scale_noobaa_db_pod_pv_size(request):
return scale_noobaa_db_pv(request)


def scale_noobaa_db_pv(request):
"""
This fixtue helps to scale the noobaa db pv size.
follows KCS: https://access.redhat.com/solutions/6976547
Expand Down
19 changes: 19 additions & 0 deletions tests/cross_functional/stress/test_noobaa_under_stress.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
list_objs_from_bucket,
download_objs_from_bucket,
delete_objects_in_batches,
run_background_cluster_checks,
)

logger = logging.getLogger(__name__)
Expand All @@ -29,6 +30,7 @@ def test_noobaa_under_stress(
rgw_obj_session,
stress_test_directory_setup,
bucket_factory,
scale_noobaa_resources_session,
):
"""
Stress Noobaa by performing bulk s3 operations. This consists mainly 3 stages
Expand Down Expand Up @@ -162,3 +164,20 @@ def test_noobaa_under_stress(
logger.info("Waiting for all the delete object operations to complete")
for future in futures:
future.result()


def test_sample(scale_noobaa_db_pod_pv_size):

bg_event = Event()
executor = ThreadPoolExecutor(max_workers=1)

bg_future = executor.submit(
run_background_cluster_checks, scale_noobaa_db_pod_pv_size, event=bg_event
)

import time

time.sleep(300)

bg_event.set()
bg_future.result()

0 comments on commit 46079cf

Please sign in to comment.