diff --git a/conf/README.md b/conf/README.md index 9301c6c4d84..72e178d4a2e 100644 --- a/conf/README.md +++ b/conf/README.md @@ -97,6 +97,8 @@ anywhere else. * `create_ibm_cos_secret`: If this value is set to True (by default), the COS secret is created. If False, it will not be created. Relevant only for IBM Cloud deployment. +* `ceph_dubg` - Deploy OCS with Ceph in debug log level. Available starting OCS 4.7 (Default: false) + #### REPORTING diff --git a/docs/usage.md b/docs/usage.md index 86ade7616ba..52828560e7b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -129,7 +129,7 @@ to the pytest. , while '::' is the delimiter * `--dev-mode` - Runs in development mode. Skip the checks like collecting cluster versions, collection ocs versions, health checks etc. - +* `--ceph-debug` - Deploy with Ceph in debug log level. This option is available starting OCS 4.7 ## Examples Deployment and teardown of the test cluster can be done automatically with diff --git a/ocs_ci/deployment/deployment.py b/ocs_ci/deployment/deployment.py index b7318215398..f7e69632bad 100644 --- a/ocs_ci/deployment/deployment.py +++ b/ocs_ci/deployment/deployment.py @@ -46,6 +46,7 @@ get_all_pods, validate_pods_are_respinned_and_running_state, ) +from ocs_ci.ocs.resources.storage_cluster import setup_ceph_debug from ocs_ci.ocs.uninstall import uninstall_ocs from ocs_ci.ocs.utils import setup_ceph_toolbox, collect_ocs_logs from ocs_ci.utility.flexy import load_cluster_info @@ -699,6 +700,12 @@ def deploy_ocs_via_operator(self, image=None): "enable": True, } + if config.DEPLOYMENT.get("ceph_debug"): + setup_ceph_debug() + cluster_data["spec"]["managedResources"] = { + "cephConfig": {"reconcileStrategy": "ignore"} + } + cluster_data_yaml = tempfile.NamedTemporaryFile( mode="w+", prefix="cluster_storage", delete=False ) diff --git a/ocs_ci/framework/conf/default_config.yaml b/ocs_ci/framework/conf/default_config.yaml index d957d133e89..32a66733873 100644 --- a/ocs_ci/framework/conf/default_config.yaml +++ b/ocs_ci/framework/conf/default_config.yaml @@ -84,6 +84,8 @@ DEPLOYMENT: kms_deployment: False # define if ceph is setup as arbiter/stretch cluster, default is false arbiter_deployment: False + # Ceph in debug log level + ceph_debug: False # Section for reporting configuration REPORTING: diff --git a/ocs_ci/framework/pytest_customization/ocscilib.py b/ocs_ci/framework/pytest_customization/ocscilib.py index b081d0f202c..93784e6e968 100644 --- a/ocs_ci/framework/pytest_customization/ocscilib.py +++ b/ocs_ci/framework/pytest_customization/ocscilib.py @@ -232,6 +232,15 @@ def pytest_addoption(parser): "versions, collecting logs, etc" ), ) + parser.addoption( + "--ceph-debug", + dest="ceph_debug", + action="store_true", + default=False, + help=( + "For OCS cluster deployment with Ceph configured in debug mode. Available for OCS 4.7 and above" + ), + ) def pytest_configure(config): @@ -483,6 +492,9 @@ def process_cluster_cli_params(config): if collect_logs_on_success_run: ocsci_config.REPORTING["collect_logs_on_success_run"] = True get_cli_param(config, "dev_mode") + ceph_debug = get_cli_param(config, "ceph_debug") + if ceph_debug: + ocsci_config.DEPLOYMENT["ceph_debug"] = True def pytest_collection_modifyitems(session, config, items): diff --git a/ocs_ci/ocs/constants.py b/ocs_ci/ocs/constants.py index 155da72a413..b02750466e9 100644 --- a/ocs_ci/ocs/constants.py +++ b/ocs_ci/ocs/constants.py @@ -547,6 +547,9 @@ EXTERNAL_VAULT_KMS_CONNECTION_DETAILS = os.path.join( EXTERNAL_VAULT_TEMPLATES, "ocs-kms-connection-details.yaml" ) +CEPH_CONFIG_DEBUG_LOG_LEVEL_CONFIGMAP = os.path.join( + TEMPLATE_DEPLOYMENT_DIR, "ceph-debug-log-level-configmap.yaml" +) # constants RBD_INTERFACE = "rbd" @@ -754,6 +757,7 @@ EC2_USER = "ec2-user" OCS_SUBSCRIPTION = "ocs-operator" ROOK_OPERATOR_CONFIGMAP = "rook-ceph-operator-config" +ROOK_CONFIG_OVERRIDE_CONFIGMAP = "rook-config-override" # UI Deployment constants HTPASSWD_SECRET_NAME = "htpass-secret" @@ -1244,3 +1248,20 @@ VOLUMESNAPSHOT = "volumesnapshot" PERF_IMAGE = "quay.io/ocsci/perf:latest" + +ROOK_CEPH_CONFIG_VALUES = """ +[global] +mon_osd_full_ratio = .85 +mon_osd_backfillfull_ratio = .8 +mon_osd_nearfull_ratio = .75 +mon_max_pg_per_osd = 600 +[osd] +osd_memory_target_cgroup_limit_ratio = 0.5 +""" +CEPH_DEBUG_CONFIG_VALUES = """ +[mon] +debug_mon = 20 +debug_ms = 1 +debug_paxos = 20 +debug_crush = 20 +""" diff --git a/ocs_ci/ocs/resources/storage_cluster.py b/ocs_ci/ocs/resources/storage_cluster.py index b90ec78d0c5..ab9f4159e3f 100644 --- a/ocs_ci/ocs/resources/storage_cluster.py +++ b/ocs_ci/ocs/resources/storage_cluster.py @@ -1,8 +1,9 @@ """ StorageCluster related functionalities """ -import logging import re +import logging +import tempfile from jsonschema import validate @@ -12,7 +13,7 @@ from ocs_ci.ocs.ocp import get_images, OCP from ocs_ci.ocs.resources.ocs import get_ocs_csv from ocs_ci.ocs.resources.pod import get_pods_having_label, get_osd_pods -from ocs_ci.utility import localstorage, utils +from ocs_ci.utility import localstorage, utils, templating from ocs_ci.ocs.node import get_osds_per_node from ocs_ci.ocs.exceptions import UnsupportedFeatureError from ocs_ci.utility.rgwutils import get_rgw_count @@ -591,3 +592,26 @@ def get_all_storageclass(): ) ] return storageclass + + +def setup_ceph_debug(): + """ + Set Ceph to run in debug log level using a ConfigMap. + This functionality is available starting OCS 4.7. + + """ + ceph_debug_log_configmap_data = templating.load_yaml( + constants.CEPH_CONFIG_DEBUG_LOG_LEVEL_CONFIGMAP + ) + ceph_debug_log_configmap_data["data"]["config"] = ( + constants.ROOK_CEPH_CONFIG_VALUES + constants.CEPH_DEBUG_CONFIG_VALUES + ) + + ceph_configmap_yaml = tempfile.NamedTemporaryFile( + mode="w+", prefix="config_map", delete=False + ) + templating.dump_data_to_temp_yaml( + ceph_debug_log_configmap_data, ceph_configmap_yaml.name + ) + log.info("Setting Ceph to work in debug log level using a new ConfigMap resource") + run_cmd(f"oc create -f {ceph_configmap_yaml.name}") diff --git a/ocs_ci/templates/ocs-deployment/ceph-debug-log-level-configmap.yaml b/ocs_ci/templates/ocs-deployment/ceph-debug-log-level-configmap.yaml new file mode 100644 index 00000000000..91e24991bf5 --- /dev/null +++ b/ocs_ci/templates/ocs-deployment/ceph-debug-log-level-configmap.yaml @@ -0,0 +1,7 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: rook-config-override + namespace: openshift-storage +data: + config: null diff --git a/tests/manage/z_cluster/test_ceph_default_values_check.py b/tests/manage/z_cluster/test_ceph_default_values_check.py index c152202b28a..72ba6e75926 100644 --- a/tests/manage/z_cluster/test_ceph_default_values_check.py +++ b/tests/manage/z_cluster/test_ceph_default_values_check.py @@ -1,9 +1,14 @@ +import collections import logging import pytest from ocs_ci.framework.testlib import ManageTest, tier1, skipif_external_mode from ocs_ci.ocs.resources import pod from ocs_ci.ocs.cluster import get_pg_balancer_status +from ocs_ci.framework import config +from ocs_ci.ocs.ocp import OCP +from ocs_ci.ocs import constants, defaults + log = logging.getLogger(__name__) @@ -65,3 +70,33 @@ def test_ceph_default_values_check(self): # Check if PG balancer is active assert get_pg_balancer_status(), "PG balancer is not active" + + @tier1 + @pytest.mark.skipif( + config.DEPLOYMENT.get("ceph_debug"), + reason="Ceph was configured with customized values by ocs-ci so there is point in validating its config values", + ) + def test_validate_ceph_config_values_in_rook_config_override(self): + """ + Test case for comparing the cluster's config values of + Ceph, set by ceph-config-override configMap, with the static set of configuration saved in ocs-ci + + """ + cm_obj = OCP( + kind="configmap", + namespace=defaults.ROOK_CLUSTER_NAMESPACE, + resource_name=constants.ROOK_CONFIG_OVERRIDE_CONFIGMAP, + ) + config_data = cm_obj.get()["data"]["config"] + config_data = config_data.split("\n") + log.info( + "Validating that the Ceph values, configured by ceph-config-override " + "confiMap, match the ones stored in ocs-ci" + ) + stored_values = constants.ROOK_CEPH_CONFIG_VALUES.split("\n") + assert collections.Counter(config_data) == collections.Counter(stored_values), ( + f"The Ceph config, set by {constants.ROOK_CONFIG_OVERRIDE_CONFIGMAP} " + f"is different than the expected. Please inform OCS-QE about this discrepancy. " + f"The expected values are:\n{stored_values}\n" + f"The cluster's Ceph values are:{config_data}" + )