From 1d5b7b2733b4c974ac413fd350fc75d93f59a2bb Mon Sep 17 00:00:00 2001 From: Malay Kumar Parida Date: Tue, 5 Sep 2023 13:49:49 +0000 Subject: [PATCH] Fix upgrade issue when storagecluster is not present The ocsinitialization controller just creates the ocs-operator-config cm (from which rook-ceph-operator pod takes env values for configuration). The task of keeping it updated is on the storagecluster controller. But when a storagecluster is not there and an upgrade happens the configmap is not updated but the new rook operator looks for the Key for configuration. So the rook-cpeh-operator pod fails & so does the upgrade. The solution is to change the behavior of the handling of the configmap so that when the storagecluster controller is not present ocsinitialization controller should own the configmap & keep it updated. But when the storagecluster is present it should do nothing and leave it to the storagecluster controller. This commit also includes a change where we no longer compare and update the individual keys of the configmap. Instead, we just check the whole data section of the configmap and update it if required. Signed-off-by: Malay Kumar Parida --- .../ocsinitialization_controller.go | 38 ++++++++++++------- .../storagecluster/ocs_operator_config.go | 35 ++++++----------- 2 files changed, 35 insertions(+), 38 deletions(-) diff --git a/controllers/ocsinitialization/ocsinitialization_controller.go b/controllers/ocsinitialization/ocsinitialization_controller.go index 2ea0284cd4..02d93f5b31 100644 --- a/controllers/ocsinitialization/ocsinitialization_controller.go +++ b/controllers/ocsinitialization/ocsinitialization_controller.go @@ -3,6 +3,7 @@ package ocsinitialization import ( "context" "fmt" + "reflect" "github.com/go-logr/logr" secv1client "github.com/openshift/client-go/security/clientset/versioned/typed/security/v1" @@ -17,6 +18,7 @@ import ( "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" @@ -233,26 +235,34 @@ func (r *OCSInitializationReconciler) ensureRookCephOperatorConfigExists(initial // The needed keys from the configmap are passed to rook-ceph operator pod as env variables. // When any value in the configmap is updated, the rook-ceph-operator pod is restarted to pick up the new values. func (r *OCSInitializationReconciler) ensureOcsOperatorConfigExists(initialData *ocsv1.OCSInitialization) error { - const ( - clusterNameKey = "CSI_CLUSTER_NAME" - enableReadAffinityKey = "CSI_ENABLE_READ_AFFINITY" - cephFSKernelMountOptionsKey = "CSI_CEPHFS_KERNEL_MOUNT_OPTIONS" - ) + // Default or placeholder data that is put during the creation of the configmap + // The values are updated by the StorageCluster controller later if required + ocsOperatorConfigData := map[string]string{ + util.ClusterNameKey: util.GetClusterID(r.ctx, r.Client, &r.Log), + util.EnableReadAffinityKey: "true", + util.CephFSKernelMountOptionsKey: "ms_mode=prefer-crc", + } ocsOperatorConfig := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: util.OcsOperatorConfigName, Namespace: initialData.Namespace, }, - // Default or placeholder values for the configmap - Data: map[string]string{ - clusterNameKey: "", - enableReadAffinityKey: "true", - cephFSKernelMountOptionsKey: "ms_mode=prefer-crc", - }, + Data: ocsOperatorConfigData, } - err := r.Client.Create(r.ctx, ocsOperatorConfig) - if err != nil && !errors.IsAlreadyExists(err) { - r.Log.Error(err, fmt.Sprintf("Failed to create %v configmap", util.OcsOperatorConfigName)) + opResult, err := ctrl.CreateOrUpdate(r.ctx, r.Client, ocsOperatorConfig, func() error { + // If the configmap is being controlled by a StorageCluster, nothing to do here + if metav1.GetControllerOf(ocsOperatorConfig) != nil && metav1.GetControllerOf(ocsOperatorConfig).Kind == "StorageCluster" { + return nil + } + // If the configmap is not controlled by a StorageCluster, keep it updated with the default values & set OCSInitialization as the controller + if !reflect.DeepEqual(ocsOperatorConfig.Data, ocsOperatorConfigData) { + r.Log.Info("Updating ocs-operator-config configmap") + ocsOperatorConfig.Data = ocsOperatorConfigData + } + return ctrl.SetControllerReference(initialData, ocsOperatorConfig, r.Scheme) + }) + if err != nil { + r.Log.Error(err, "Failed to create/update ocs-operator-config configmap", "OperationResult", opResult) return err } // If configmap is created or updated, restart the rook-ceph-operator pod to pick up the new change diff --git a/controllers/storagecluster/ocs_operator_config.go b/controllers/storagecluster/ocs_operator_config.go index af377b99b0..2db47b2029 100644 --- a/controllers/storagecluster/ocs_operator_config.go +++ b/controllers/storagecluster/ocs_operator_config.go @@ -1,6 +1,7 @@ package storagecluster import ( + "reflect" "strconv" ocsv1 "github.com/red-hat-storage/ocs-operator/api/v1" @@ -12,26 +13,18 @@ import ( ) func (r *StorageClusterReconciler) ensureOCSOperatorConfig(sc *ocsv1.StorageCluster) error { - const ( - clusterNameKey = "CSI_CLUSTER_NAME" - enableReadAffinityKey = "CSI_ENABLE_READ_AFFINITY" - cephFSKernelMountOptionsKey = "CSI_CEPHFS_KERNEL_MOUNT_OPTIONS" - ) - var ( - clusterNameVal = r.getClusterID() - enableReadAffinityVal = strconv.FormatBool(!sc.Spec.ExternalStorage.Enable) - cephFSKernelMountOptionVal = getCephFSKernelMountOptions(sc) - ) + ocsOperatorConfigData := map[string]string{ + util.ClusterNameKey: util.GetClusterID(r.ctx, r.Client, &r.Log), + util.EnableReadAffinityKey: strconv.FormatBool(!sc.Spec.ExternalStorage.Enable), + util.CephFSKernelMountOptionsKey: getCephFSKernelMountOptions(sc), + } cm := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: util.OcsOperatorConfigName, Namespace: sc.Namespace, }, - Data: map[string]string{ - clusterNameKey: clusterNameVal, - enableReadAffinityKey: enableReadAffinityVal, - }, + Data: ocsOperatorConfigData, } opResult, err := ctrl.CreateOrUpdate(r.ctx, r.Client, cm, func() error { @@ -42,20 +35,14 @@ func (r *StorageClusterReconciler) ensureOCSOperatorConfig(sc *ocsv1.StorageClus existing.BlockOwnerDeletion = nil existing.Controller = nil } - - if cm.Data[clusterNameKey] != clusterNameVal { - cm.Data[clusterNameKey] = clusterNameVal - } - if cm.Data[enableReadAffinityKey] != enableReadAffinityVal { - cm.Data[enableReadAffinityKey] = enableReadAffinityVal - } - if cm.Data[cephFSKernelMountOptionsKey] != cephFSKernelMountOptionVal { - cm.Data[cephFSKernelMountOptionsKey] = cephFSKernelMountOptionVal + if !reflect.DeepEqual(cm.Data, ocsOperatorConfigData) { + r.Log.Info("Updating ocs-operator-config configmap") + cm.Data = ocsOperatorConfigData } return ctrl.SetControllerReference(sc, cm, r.Scheme) }) if err != nil { - r.Log.Error(err, fmt.Sprintf("failed to update %q configmap", util.OcsOperatorConfigName)) + r.Log.Error(err, "Failed to create/update ocs-operator-config configmap", "OperationResult", opResult) return err } // If configmap is created or updated, restart the rook-ceph-operator pod to pick up the new change