Skip to content

Commit

Permalink
Rosa hcp node labeling (red-hat-storage#11024)
Browse files Browse the repository at this point in the history
* label nodes for node stop and autoscaling

Signed-off-by: Daniel Osypenko <[email protected]>
  • Loading branch information
DanielOsypenko authored Dec 11, 2024
1 parent 2935738 commit 71fee55
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 4 deletions.
1 change: 1 addition & 0 deletions conf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ higher priority).
* `continue_upgrade_after_checks_even_if_not_healthy` - if set to true Rook will continue the OSD daemon upgrade process even if the PGs are not clean.
* `upgrade_osd_requires_healthy_pgs` - If set to true OSD upgrade process won't start until PGs are healthy.
* `workaround_mark_disks_as_ssd` - WORKAROUND: mark disks as SSD (not rotational - `0` in `/sys/block/*d*/queue/rotational`)
* `node_labels` - Comma-separated labels to be applied to the nodes in the cluster, e.g. 'cluster.ocs.openshift.io/openshift-storage="",node-role.kubernetes.io/infra=""', default - empty string

#### UPGRADE

Expand Down
1 change: 1 addition & 0 deletions conf/deployment/aws/rosa_hcp_1az_3w_m5.12x.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ ENV_DATA:
ms_env_type: "staging"
addon_name: "ocs-converged"
persistent-monitoring: false
node_labels: cluster.ocs.openshift.io/openshift-storage=""
1 change: 1 addition & 0 deletions conf/deployment/aws/rosa_hcp_1az_6w_m5.2x.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ ENV_DATA:
ms_env_type: "staging"
addon_name: "ocs-converged"
persistent-monitoring: false
node_labels: cluster.ocs.openshift.io/openshift-storage=""
5 changes: 5 additions & 0 deletions ocs_ci/deployment/rosa.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ def deploy(self, log_level=""):
machinepool_details.wait_replicas_ready(
target_replicas=config.ENV_DATA["worker_replicas"], timeout=1200
)
if node_labels := config.ENV_DATA.get("node_labels"):
if machinepool_id := config.ENV_DATA.get("machine_pool"):
rosa.label_nodes(
self.cluster_name, machinepool_id, node_labels, rewrite=False
)

logger.info("generate kubeconfig and kubeadmin-password files")
if config.ENV_DATA["ms_env_type"] == "staging":
Expand Down
3 changes: 3 additions & 0 deletions ocs_ci/framework/conf/default_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,9 @@ ENV_DATA:
#RDR Green field
rdr_osd_deployment_mode: "greenfield"

# Label nodes with specific labels, used for example fot ODF deployment on ROSA HCP
node_labels: ""

# Assisted Installer related settings

# This section is related to upgrade
Expand Down
13 changes: 9 additions & 4 deletions ocs_ci/ocs/machinepool.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ class MachinePool:
exist: bool = field(
default=False
) # not a part of the data fetched from the cluster
labels: Dict[str, str] = field(default_factory=dict)

def __post_init__(self):
"""Automatically populate fields by fetching machine pool details."""
Expand Down Expand Up @@ -173,8 +174,16 @@ def from_dict(cls, data: dict, cluster_name=None):
"id"
), # this parameter is different in node_conf and data fetched from machinepool
cluster_name=cluster_name,
labels=data.get("labels", {}),
)

def refresh(self):
"""Refresh the machine pool details."""
details = self.get_machinepool_details(self.cluster_name, self.machinepool_id)
if details:
self.__dict__.update(details.__dict__)
self.exist = True

def get_machinepool_updated_replicas(self) -> Dict[str, int]:
"""
Retrieve the number of replicas and current replicas for this machine pool.
Expand Down Expand Up @@ -463,10 +472,6 @@ def build_machinepool_cmd_base(cluster_name, node_conf, action):
raise ValueError(
"When 'enable_autoscaling' is True, 'min_replicas' and 'max_replicas' are required."
)
elif node_conf.get("replicas") is None:
raise ValueError(
"Parameter 'replicas' is required when autoscaling is disabled."
)

cmd = f"rosa {action} machinepool --cluster {cluster_name} "

Expand Down
39 changes: 39 additions & 0 deletions ocs_ci/utility/rosa.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
ResourceWrongStatusException,
TimeoutExpiredError,
)
from ocs_ci.ocs.machinepool import MachinePools, NodeConf
from ocs_ci.utility import openshift_dedicated as ocm
from ocs_ci.utility import utils

Expand Down Expand Up @@ -1112,3 +1113,41 @@ def get_associated_oidc_config_id(cluster_name):
logger.warning(f"Failed to get OIDC config id: {proc.stderr.decode().strip()}")
return ""
return proc.stdout.decode().strip()


def label_nodes(cluster_name, machinepool_id, labels, rewrite=False):
"""
Label nodes of the given cluster.
! Important
This method rewrites existing behavior of labeling nodes in the cluster, it appends the labels to the existing
labels, but not rewrite them. This prevents the issue of accidental overwriting the existing labels.
Args:
cluster_name (str): The cluster name
machinepool_id (str): The machinepool id
labels (str): The labels to apply
rewrite (bool): If True, rewrite the labels. False, otherwise.
Returns:
str: The output of the command
"""
machine_pools = MachinePools(cluster_name)
machine_pool = machine_pools.filter(machinepool_id="workers", pick_first=True)
if not rewrite:
labels_dict = machine_pool.labels
logger.info(f"Existing labels: {labels_dict}")
# convert to comma separated string
if labels_dict:
labels = (
",".join([f"{key}={value}" for key, value in labels_dict.items()])
+ ","
+ labels
)
else:
labels = labels
machine_pools.edit_machine_pool(
NodeConf(**{"machinepool_id": machinepool_id, "labels": labels}),
wait_ready=False,
)
machine_pool.refresh()
return machine_pool.labels

0 comments on commit 71fee55

Please sign in to comment.