From 739e80424b3842aa72c04f55d4d4f39d101c32f8 Mon Sep 17 00:00:00 2001 From: Malay Kumar Parida Date: Mon, 2 Dec 2024 14:25:41 +0530 Subject: [PATCH] Add default TSCs if not present to ensure even distribution of OSDs When parts of the placement spec, such as tolerations or node affinity, are defined, the ocs-operator stops applying default placement specs, including TSCs. The osd distribution may become potentially uneven in absence of the TSC. Always adding default TSCs ensures consistent and balanced OSD placement across nodes. Signed-off-by: Malay Kumar Parida --- controllers/storagecluster/cephcluster.go | 74 ++++++++++++----------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/controllers/storagecluster/cephcluster.go b/controllers/storagecluster/cephcluster.go index 96304a05e0..1ff9dad0f3 100644 --- a/controllers/storagecluster/cephcluster.go +++ b/controllers/storagecluster/cephcluster.go @@ -783,21 +783,21 @@ func newStorageClassDeviceSets(sc *ocsv1.StorageCluster) []rookCephv1.StorageCla topologyKey := ds.TopologyKey topologyKeyValues := []string{} - noPlacement := ds.Placement.NodeAffinity == nil && ds.Placement.PodAffinity == nil && ds.Placement.PodAntiAffinity == nil && ds.Placement.TopologySpreadConstraints == nil - noPreparePlacement := ds.PreparePlacement.NodeAffinity == nil && ds.PreparePlacement.PodAffinity == nil && ds.PreparePlacement.PodAntiAffinity == nil && ds.PreparePlacement.TopologySpreadConstraints == nil + noPlacementTsc := ds.Placement.TopologySpreadConstraints == nil + noPlacement := ds.Placement.NodeAffinity == nil && ds.Placement.PodAffinity == nil && ds.Placement.PodAntiAffinity == nil && noPlacementTsc + noPreparePlacementTsc := ds.PreparePlacement.TopologySpreadConstraints == nil + noPreparePlacement := ds.PreparePlacement.NodeAffinity == nil && ds.PreparePlacement.PodAffinity == nil && ds.PreparePlacement.PodAntiAffinity == nil && noPreparePlacementTsc - if noPlacement { - if topologyKey == "" { - topologyKey = getFailureDomain(sc) - } + if topologyKey == "" { + topologyKey = getFailureDomain(sc) + } - if topologyKey == "host" { - portable = false - } + if topologyKey == "host" { + portable = false + } - if topologyMap != nil { - topologyKey, topologyKeyValues = topologyMap.GetKeyValues(topologyKey) - } + if topologyMap != nil { + topologyKey, topologyKeyValues = topologyMap.GetKeyValues(topologyKey) } count, replica := countAndReplicaOf(&ds) @@ -812,28 +812,7 @@ func newStorageClassDeviceSets(sc *ocsv1.StorageCluster) []rookCephv1.StorageCla if noPreparePlacement { in := getPlacement(sc, "osd-prepare") (&in).DeepCopyInto(&preparePlacement) - } - - if len(topologyKeyValues) >= getMinDeviceSetReplica(sc) { - // Hard constraints are set in OSD placement for portable volumes with rack failure domain - // domain as there is no node affinity in PVs. This restricts the movement of OSDs - // between failure domain. - if portable && !strings.Contains(topologyKey, "zone") { - addStrictFailureDomainTSC(&placement, topologyKey) - } - // If topologyKey is not host, append additional topology spread constraint to the - // default preparePlacement. This serves even distribution at the host level - // within a failure domain (zone/rack). - if noPreparePlacement { - if topologyKey != corev1.LabelHostname { - addStrictFailureDomainTSC(&preparePlacement, topologyKey) - } else { - preparePlacement.TopologySpreadConstraints[0].TopologyKey = topologyKey - } - } - } - - if !noPreparePlacement { + } else { preparePlacement = ds.PreparePlacement } } else if !noPlacement && noPreparePlacement { @@ -844,6 +823,33 @@ func newStorageClassDeviceSets(sc *ocsv1.StorageCluster) []rookCephv1.StorageCla placement = ds.Placement } + // Add default TSCs if not set to ensure even distribution of OSDs across nodes + if !noPlacement && noPlacementTsc { + placement.TopologySpreadConstraints = append(placement.TopologySpreadConstraints, defaults.DaemonPlacements["osd"].TopologySpreadConstraints...) + } + if !noPreparePlacement && noPreparePlacementTsc { + preparePlacement.TopologySpreadConstraints = append(preparePlacement.TopologySpreadConstraints, defaults.DaemonPlacements["osd-prepare"].TopologySpreadConstraints...) + } + + if len(topologyKeyValues) >= getMinDeviceSetReplica(sc) { + // Hard constraints are set in OSD placement for portable volumes with rack failure domain + // domain as there is no node affinity in PVs. This restricts the movement of OSDs + // between failure domain. + if noPlacementTsc && portable && !strings.Contains(topologyKey, "zone") { + addStrictFailureDomainTSC(&placement, topologyKey) + } + // If topologyKey is not host, append additional topology spread constraint to the + // default preparePlacement. This serves even distribution at the host level + // within a failure domain (zone/rack). + if noPreparePlacementTsc { + if topologyKey != corev1.LabelHostname { + addStrictFailureDomainTSC(&preparePlacement, topologyKey) + } else { + preparePlacement.TopologySpreadConstraints[0].TopologyKey = topologyKey + } + } + } + // Annotation crushDeviceClass ensures osd with different CRUSH device class than the one detected by Ceph crushDeviceClass := ds.DeviceType if ds.DeviceClass != "" {