Skip to content

Commit

Permalink
Fix upgrade issue when storagecluster is not present
Browse files Browse the repository at this point in the history
The ocsinitialization controller just creates the ocs-operator-config cm
(from which rook-ceph-operator pod takes env values for configuration).
The task of keeping it updated is on the storagecluster controller. But
when a storagecluster is not there and an upgrade happens the configmap
is not updated but the new rook operator looks for the Key for
configuration. So the rook-cpeh-operator pod fails & so does the
upgrade.

The solution is to change the behavior of the handling of the configmap
so that when the storagecluster controller is not present
ocsinitialization controller should own the configmap & keep it updated.
But when the storagecluster is present it should do nothing and leave it
to the storagecluster controller.

This commit also includes a change where we no longer compare and update
the individual keys of the configmap. Instead, we just check the whole
data section of the configmap and update it if required.

Signed-off-by: Malay Kumar Parida <[email protected]>
  • Loading branch information
malayparida2000 committed Oct 30, 2023
1 parent cd1b6f0 commit 066bfd5
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 47 deletions.
41 changes: 25 additions & 16 deletions controllers/ocsinitialization/ocsinitialization_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package ocsinitialization
import (
"context"
"fmt"
"reflect"

"github.com/go-logr/logr"
secv1client "github.com/openshift/client-go/security/clientset/versioned/typed/security/v1"
Expand All @@ -17,6 +18,7 @@ import (
"k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"
Expand Down Expand Up @@ -233,28 +235,35 @@ func (r *OCSInitializationReconciler) ensureRookCephOperatorConfigExists(initial
// The needed keys from the configmap are passed to rook-ceph operator pod as env variables.
// When any value in the configmap is updated, the rook-ceph-operator pod is restarted to pick up the new values.
func (r *OCSInitializationReconciler) ensureOcsOperatorConfigExists(initialData *ocsv1.OCSInitialization) error {
const (
clusterNameKey = "CSI_CLUSTER_NAME"
enableReadAffinityKey = "CSI_ENABLE_READ_AFFINITY"
cephFSKernelMountOptionsKey = "CSI_CEPHFS_KERNEL_MOUNT_OPTIONS"
enableNFSKey = "ROOK_CSI_ENABLE_NFS"
)
// Default or placeholder data that is put during the creation of the configmap
// The values are updated by the StorageCluster controller later if required
ocsOperatorConfigData := map[string]string{
util.ClusterNameKey: util.GetClusterID(r.ctx, r.Client, &r.Log),
util.EnableReadAffinityKey: "true",
util.CephFSKernelMountOptionsKey: "ms_mode=prefer-crc",
util.EnableNFSKey: "false",
}
ocsOperatorConfig := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: util.OcsOperatorConfigName,
Namespace: initialData.Namespace,
},
// Default or placeholder values for the configmap
Data: map[string]string{
clusterNameKey: "",
enableReadAffinityKey: "true",
cephFSKernelMountOptionsKey: "ms_mode=prefer-crc",
enableNFSKey: "false",
},
Data: ocsOperatorConfigData,
}
err := r.Client.Create(r.ctx, ocsOperatorConfig)
if err != nil && !errors.IsAlreadyExists(err) {
r.Log.Error(err, fmt.Sprintf("Failed to create %v configmap", util.OcsOperatorConfigName))
opResult, err := ctrl.CreateOrUpdate(r.ctx, r.Client, ocsOperatorConfig, func() error {
// If the configmap is being controlled by a StorageCluster, nothing to do here
if metav1.GetControllerOf(ocsOperatorConfig) != nil && metav1.GetControllerOf(ocsOperatorConfig).Kind == "StorageCluster" {
return nil
}
// If the configmap is not controlled by a StorageCluster, keep it updated with the default values & set OCSInitialization as the controller
if !reflect.DeepEqual(ocsOperatorConfig.Data, ocsOperatorConfigData) {
r.Log.Info("Updating ocs-operator-config configmap")
ocsOperatorConfig.Data = ocsOperatorConfigData
}
return ctrl.SetControllerReference(initialData, ocsOperatorConfig, r.Scheme)
})
if err != nil {
r.Log.Error(err, "Failed to create/update ocs-operator-config configmap", "OperationResult", opResult)
return err
}
// If configmap is created or updated, restart the rook-ceph-operator pod to pick up the new change
Expand Down
43 changes: 12 additions & 31 deletions controllers/storagecluster/ocs_operator_config.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package storagecluster

import (
"reflect"
"strconv"

ocsv1 "github.com/red-hat-storage/ocs-operator/api/v1"
Expand All @@ -12,30 +13,19 @@ import (
)

func (r *StorageClusterReconciler) ensureOCSOperatorConfig(sc *ocsv1.StorageCluster) error {
const (
clusterNameKey = "CSI_CLUSTER_NAME"
enableReadAffinityKey = "CSI_ENABLE_READ_AFFINITY"
cephFSKernelMountOptionsKey = "CSI_CEPHFS_KERNEL_MOUNT_OPTIONS"
enableNFSKey = "ROOK_CSI_ENABLE_NFS"
)
var (
clusterNameVal = r.getClusterID()
enableReadAffinityVal = strconv.FormatBool(!sc.Spec.ExternalStorage.Enable)
cephFSKernelMountOptionVal = getCephFSKernelMountOptions(sc)
enableNFSVal = getEnableNFSVal(sc)
)
ocsOperatorConfigData := map[string]string{
util.ClusterNameKey: util.GetClusterID(r.ctx, r.Client, &r.Log),
util.EnableReadAffinityKey: strconv.FormatBool(!sc.Spec.ExternalStorage.Enable),
util.CephFSKernelMountOptionsKey: getCephFSKernelMountOptions(sc),
util.EnableNFSKey: getEnableNFSVal(sc),
}

cm := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: util.OcsOperatorConfigName,
Namespace: sc.Namespace,
},
Data: map[string]string{
clusterNameKey: clusterNameVal,
enableReadAffinityKey: enableReadAffinityVal,
cephFSKernelMountOptionsKey: cephFSKernelMountOptionVal,
enableNFSKey: enableNFSVal,
},
Data: ocsOperatorConfigData,
}

opResult, err := ctrl.CreateOrUpdate(r.ctx, r.Client, cm, func() error {
Expand All @@ -46,23 +36,14 @@ func (r *StorageClusterReconciler) ensureOCSOperatorConfig(sc *ocsv1.StorageClus
existing.BlockOwnerDeletion = nil
existing.Controller = nil
}

if cm.Data[clusterNameKey] != clusterNameVal {
cm.Data[clusterNameKey] = clusterNameVal
}
if cm.Data[enableReadAffinityKey] != enableReadAffinityVal {
cm.Data[enableReadAffinityKey] = enableReadAffinityVal
}
if cm.Data[cephFSKernelMountOptionsKey] != cephFSKernelMountOptionVal {
cm.Data[cephFSKernelMountOptionsKey] = cephFSKernelMountOptionVal
}
if cm.Data[enableNFSKey] != enableNFSVal {
cm.Data[enableNFSKey] = enableNFSVal
if !reflect.DeepEqual(cm.Data, ocsOperatorConfigData) {
r.Log.Info("Updating ocs-operator-config configmap")
cm.Data = ocsOperatorConfigData
}
return ctrl.SetControllerReference(sc, cm, r.Scheme)
})
if err != nil {
r.Log.Error(err, fmt.Sprintf("failed to update %q configmap", util.OcsOperatorConfigName))
r.Log.Error(err, "Failed to create/update ocs-operator-config configmap", "OperationResult", opResult)
return err
}
// If configmap is created or updated, restart the rook-ceph-operator pod to pick up the new change
Expand Down

0 comments on commit 066bfd5

Please sign in to comment.