From cd1b6f0da3342bc44255443e24eba3314eb990dd Mon Sep 17 00:00:00 2001 From: Malay Kumar Parida Date: Tue, 5 Sep 2023 13:48:26 +0000 Subject: [PATCH 1/2] Refactor ocs-operator-config related code for easier maintainability The keys for the configmap are defined duplicately in 2 places. So keeping these keys in a single place will make it easier to maintain them. Also some util functions which might be used by others are moved to the util package. Signed-off-by: Malay Kumar Parida --- .../ocsinitialization_controller.go | 6 +++ .../storagecluster/ocs_operator_config.go | 39 +--------------- controllers/util/k8sutil.go | 44 +++++++++++++++++++ 3 files changed, 52 insertions(+), 37 deletions(-) diff --git a/controllers/ocsinitialization/ocsinitialization_controller.go b/controllers/ocsinitialization/ocsinitialization_controller.go index 34275c763e..69356abb36 100644 --- a/controllers/ocsinitialization/ocsinitialization_controller.go +++ b/controllers/ocsinitialization/ocsinitialization_controller.go @@ -257,5 +257,11 @@ func (r *OCSInitializationReconciler) ensureOcsOperatorConfigExists(initialData r.Log.Error(err, fmt.Sprintf("Failed to create %v configmap", util.OcsOperatorConfigName)) return err } + // If configmap is created or updated, restart the rook-ceph-operator pod to pick up the new change + if opResult == controllerutil.OperationResultCreated || opResult == controllerutil.OperationResultUpdated { + r.Log.Info("ocs-operator-config configmap created/updated. Restarting rook-ceph-operator pod to pick up the new values") + util.RestartPod(r.ctx, r.Client, &r.Log, "rook-ceph-operator", initialData.Namespace) + } + return nil } diff --git a/controllers/storagecluster/ocs_operator_config.go b/controllers/storagecluster/ocs_operator_config.go index 4046c5ed7a..fcf1f0b7ad 100644 --- a/controllers/storagecluster/ocs_operator_config.go +++ b/controllers/storagecluster/ocs_operator_config.go @@ -1,19 +1,13 @@ package storagecluster import ( - "context" - "fmt" "strconv" - configv1 "github.com/openshift/api/config/v1" ocsv1 "github.com/red-hat-storage/ocs-operator/api/v1" "github.com/red-hat-storage/ocs-operator/controllers/util" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) @@ -73,42 +67,13 @@ func (r *StorageClusterReconciler) ensureOCSOperatorConfig(sc *ocsv1.StorageClus } // If configmap is created or updated, restart the rook-ceph-operator pod to pick up the new change if opResult == controllerutil.OperationResultCreated || opResult == controllerutil.OperationResultUpdated { - r.restartRookCephOperatorPod(sc.Namespace) - r.Log.Info(fmt.Sprintf("%q configmap updated & rook-ceph-operator pod restarted to pick up new values", util.OcsOperatorConfigName), - "storageCluster", klog.KRef(sc.Namespace, sc.Name)) + r.Log.Info("ocs-operator-config configmap created/updated. Restarting rook-ceph-operator pod to pick up the new values") + util.RestartPod(r.ctx, r.Client, &r.Log, "rook-ceph-operator", sc.Namespace) } return nil } -// restartRookOperatorPod restarts the rook-operator pod in the OCP cluster -func (r *StorageClusterReconciler) restartRookCephOperatorPod(namespace string) { - podList := &corev1.PodList{} - err := r.Client.List(context.TODO(), podList, client.InNamespace(namespace), client.MatchingLabels{"app": "rook-ceph-operator"}) - if err != nil { - r.Log.Error(err, "Failed to list rook-ceph-operator pod") - return - } - for _, pod := range podList.Items { - err := r.Client.Delete(context.TODO(), &pod) - if err != nil { - r.Log.Error(err, "Failed to delete rook-ceph-operator pod") - return - } - } -} - -// getClusterID returns the cluster ID of the OCP-Cluster -func (r *StorageClusterReconciler) getClusterID() string { - clusterVersion := &configv1.ClusterVersion{} - err := r.Client.Get(context.TODO(), types.NamespacedName{Name: "version"}, clusterVersion) - if err != nil { - r.Log.Error(err, "Failed to get the clusterVersion version of the OCP cluster") - return "" - } - return fmt.Sprint(clusterVersion.Spec.ClusterID) -} - // getCephFSKernelMountOptions returns the kernel mount options for CephFS based on the spec on the StorageCluster func getCephFSKernelMountOptions(sc *ocsv1.StorageCluster) string { // If Encryption is enabled, Always use secure mode diff --git a/controllers/util/k8sutil.go b/controllers/util/k8sutil.go index 26e659d8a9..5137ba0763 100644 --- a/controllers/util/k8sutil.go +++ b/controllers/util/k8sutil.go @@ -1,8 +1,16 @@ package util import ( + "context" "fmt" "os" + "strings" + + "github.com/go-logr/logr" + configv1 "github.com/openshift/api/config/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" ) const ( @@ -16,6 +24,12 @@ const ( // This configmap is watched by rook-ceph-operator & is reserved only for manual overrides. RookCephOperatorConfigName = "rook-ceph-operator-config" + + // These are the keys in the ocs-operator-config configmap + ClusterNameKey = "CSI_CLUSTER_NAME" + EnableReadAffinityKey = "CSI_ENABLE_READ_AFFINITY" + CephFSKernelMountOptionsKey = "CSI_CEPHFS_KERNEL_MOUNT_OPTIONS" + EnableNFSKey = "ROOK_CSI_ENABLE_NFS" ) // GetWatchNamespace returns the namespace the operator should be watching for changes @@ -39,3 +53,33 @@ func GetOperatorNamespace() (string, error) { } return ns, nil } + +// getClusterID returns the cluster ID of the OCP-Cluster +func GetClusterID(ctx context.Context, kubeClient client.Client, logger *logr.Logger) string { + clusterVersion := &configv1.ClusterVersion{} + err := kubeClient.Get(ctx, types.NamespacedName{Name: "version"}, clusterVersion) + if err != nil { + logger.Error(err, "Failed to get the clusterVersion version of the OCP cluster") + return "" + } + return fmt.Sprint(clusterVersion.Spec.ClusterID) +} + +// RestartPod restarts the pod with the given name in the given namespace by deleting it and letting another one be created +func RestartPod(ctx context.Context, kubeClient client.Client, logger *logr.Logger, name string, namespace string) { + logger.Info("restarting pod", "name", name, "namespace", namespace) + podList := &corev1.PodList{} + err := kubeClient.List(ctx, podList, client.InNamespace(namespace)) + if err != nil { + logger.Error(err, "failed to list pods", "namespace", namespace) + return + } + for _, pod := range podList.Items { + if strings.Contains(pod.Name, name) { + err = kubeClient.Delete(ctx, &pod) + if err != nil { + logger.Error(err, "failed to delete pod", "name", pod.Name, "namespace", namespace) + } + } + } +} From 066bfd5ebc55b8c99b70b4ea99783395b42ffd5d Mon Sep 17 00:00:00 2001 From: Malay Kumar Parida Date: Tue, 5 Sep 2023 13:49:49 +0000 Subject: [PATCH 2/2] Fix upgrade issue when storagecluster is not present The ocsinitialization controller just creates the ocs-operator-config cm (from which rook-ceph-operator pod takes env values for configuration). The task of keeping it updated is on the storagecluster controller. But when a storagecluster is not there and an upgrade happens the configmap is not updated but the new rook operator looks for the Key for configuration. So the rook-cpeh-operator pod fails & so does the upgrade. The solution is to change the behavior of the handling of the configmap so that when the storagecluster controller is not present ocsinitialization controller should own the configmap & keep it updated. But when the storagecluster is present it should do nothing and leave it to the storagecluster controller. This commit also includes a change where we no longer compare and update the individual keys of the configmap. Instead, we just check the whole data section of the configmap and update it if required. Signed-off-by: Malay Kumar Parida --- .../ocsinitialization_controller.go | 41 +++++++++++------- .../storagecluster/ocs_operator_config.go | 43 ++++++------------- 2 files changed, 37 insertions(+), 47 deletions(-) diff --git a/controllers/ocsinitialization/ocsinitialization_controller.go b/controllers/ocsinitialization/ocsinitialization_controller.go index 69356abb36..ac7fdcce60 100644 --- a/controllers/ocsinitialization/ocsinitialization_controller.go +++ b/controllers/ocsinitialization/ocsinitialization_controller.go @@ -3,6 +3,7 @@ package ocsinitialization import ( "context" "fmt" + "reflect" "github.com/go-logr/logr" secv1client "github.com/openshift/client-go/security/clientset/versioned/typed/security/v1" @@ -17,6 +18,7 @@ import ( "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" @@ -233,28 +235,35 @@ func (r *OCSInitializationReconciler) ensureRookCephOperatorConfigExists(initial // The needed keys from the configmap are passed to rook-ceph operator pod as env variables. // When any value in the configmap is updated, the rook-ceph-operator pod is restarted to pick up the new values. func (r *OCSInitializationReconciler) ensureOcsOperatorConfigExists(initialData *ocsv1.OCSInitialization) error { - const ( - clusterNameKey = "CSI_CLUSTER_NAME" - enableReadAffinityKey = "CSI_ENABLE_READ_AFFINITY" - cephFSKernelMountOptionsKey = "CSI_CEPHFS_KERNEL_MOUNT_OPTIONS" - enableNFSKey = "ROOK_CSI_ENABLE_NFS" - ) + // Default or placeholder data that is put during the creation of the configmap + // The values are updated by the StorageCluster controller later if required + ocsOperatorConfigData := map[string]string{ + util.ClusterNameKey: util.GetClusterID(r.ctx, r.Client, &r.Log), + util.EnableReadAffinityKey: "true", + util.CephFSKernelMountOptionsKey: "ms_mode=prefer-crc", + util.EnableNFSKey: "false", + } ocsOperatorConfig := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: util.OcsOperatorConfigName, Namespace: initialData.Namespace, }, - // Default or placeholder values for the configmap - Data: map[string]string{ - clusterNameKey: "", - enableReadAffinityKey: "true", - cephFSKernelMountOptionsKey: "ms_mode=prefer-crc", - enableNFSKey: "false", - }, + Data: ocsOperatorConfigData, } - err := r.Client.Create(r.ctx, ocsOperatorConfig) - if err != nil && !errors.IsAlreadyExists(err) { - r.Log.Error(err, fmt.Sprintf("Failed to create %v configmap", util.OcsOperatorConfigName)) + opResult, err := ctrl.CreateOrUpdate(r.ctx, r.Client, ocsOperatorConfig, func() error { + // If the configmap is being controlled by a StorageCluster, nothing to do here + if metav1.GetControllerOf(ocsOperatorConfig) != nil && metav1.GetControllerOf(ocsOperatorConfig).Kind == "StorageCluster" { + return nil + } + // If the configmap is not controlled by a StorageCluster, keep it updated with the default values & set OCSInitialization as the controller + if !reflect.DeepEqual(ocsOperatorConfig.Data, ocsOperatorConfigData) { + r.Log.Info("Updating ocs-operator-config configmap") + ocsOperatorConfig.Data = ocsOperatorConfigData + } + return ctrl.SetControllerReference(initialData, ocsOperatorConfig, r.Scheme) + }) + if err != nil { + r.Log.Error(err, "Failed to create/update ocs-operator-config configmap", "OperationResult", opResult) return err } // If configmap is created or updated, restart the rook-ceph-operator pod to pick up the new change diff --git a/controllers/storagecluster/ocs_operator_config.go b/controllers/storagecluster/ocs_operator_config.go index fcf1f0b7ad..f18fb8f4ba 100644 --- a/controllers/storagecluster/ocs_operator_config.go +++ b/controllers/storagecluster/ocs_operator_config.go @@ -1,6 +1,7 @@ package storagecluster import ( + "reflect" "strconv" ocsv1 "github.com/red-hat-storage/ocs-operator/api/v1" @@ -12,30 +13,19 @@ import ( ) func (r *StorageClusterReconciler) ensureOCSOperatorConfig(sc *ocsv1.StorageCluster) error { - const ( - clusterNameKey = "CSI_CLUSTER_NAME" - enableReadAffinityKey = "CSI_ENABLE_READ_AFFINITY" - cephFSKernelMountOptionsKey = "CSI_CEPHFS_KERNEL_MOUNT_OPTIONS" - enableNFSKey = "ROOK_CSI_ENABLE_NFS" - ) - var ( - clusterNameVal = r.getClusterID() - enableReadAffinityVal = strconv.FormatBool(!sc.Spec.ExternalStorage.Enable) - cephFSKernelMountOptionVal = getCephFSKernelMountOptions(sc) - enableNFSVal = getEnableNFSVal(sc) - ) + ocsOperatorConfigData := map[string]string{ + util.ClusterNameKey: util.GetClusterID(r.ctx, r.Client, &r.Log), + util.EnableReadAffinityKey: strconv.FormatBool(!sc.Spec.ExternalStorage.Enable), + util.CephFSKernelMountOptionsKey: getCephFSKernelMountOptions(sc), + util.EnableNFSKey: getEnableNFSVal(sc), + } cm := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: util.OcsOperatorConfigName, Namespace: sc.Namespace, }, - Data: map[string]string{ - clusterNameKey: clusterNameVal, - enableReadAffinityKey: enableReadAffinityVal, - cephFSKernelMountOptionsKey: cephFSKernelMountOptionVal, - enableNFSKey: enableNFSVal, - }, + Data: ocsOperatorConfigData, } opResult, err := ctrl.CreateOrUpdate(r.ctx, r.Client, cm, func() error { @@ -46,23 +36,14 @@ func (r *StorageClusterReconciler) ensureOCSOperatorConfig(sc *ocsv1.StorageClus existing.BlockOwnerDeletion = nil existing.Controller = nil } - - if cm.Data[clusterNameKey] != clusterNameVal { - cm.Data[clusterNameKey] = clusterNameVal - } - if cm.Data[enableReadAffinityKey] != enableReadAffinityVal { - cm.Data[enableReadAffinityKey] = enableReadAffinityVal - } - if cm.Data[cephFSKernelMountOptionsKey] != cephFSKernelMountOptionVal { - cm.Data[cephFSKernelMountOptionsKey] = cephFSKernelMountOptionVal - } - if cm.Data[enableNFSKey] != enableNFSVal { - cm.Data[enableNFSKey] = enableNFSVal + if !reflect.DeepEqual(cm.Data, ocsOperatorConfigData) { + r.Log.Info("Updating ocs-operator-config configmap") + cm.Data = ocsOperatorConfigData } return ctrl.SetControllerReference(sc, cm, r.Scheme) }) if err != nil { - r.Log.Error(err, fmt.Sprintf("failed to update %q configmap", util.OcsOperatorConfigName)) + r.Log.Error(err, "Failed to create/update ocs-operator-config configmap", "OperationResult", opResult) return err } // If configmap is created or updated, restart the rook-ceph-operator pod to pick up the new change