Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug 2235571:[release-4.13] Fix upgrade issue when storagecluster is not present #2181

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 31 additions & 16 deletions controllers/ocsinitialization/ocsinitialization_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package ocsinitialization
import (
"context"
"fmt"
"reflect"

"github.com/go-logr/logr"
secv1client "github.com/openshift/client-go/security/clientset/versioned/typed/security/v1"
Expand All @@ -17,6 +18,7 @@ import (
"k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"
Expand Down Expand Up @@ -233,29 +235,42 @@ func (r *OCSInitializationReconciler) ensureRookCephOperatorConfigExists(initial
// The needed keys from the configmap are passed to rook-ceph operator pod as env variables.
// When any value in the configmap is updated, the rook-ceph-operator pod is restarted to pick up the new values.
func (r *OCSInitializationReconciler) ensureOcsOperatorConfigExists(initialData *ocsv1.OCSInitialization) error {
const (
clusterNameKey = "CSI_CLUSTER_NAME"
enableReadAffinityKey = "CSI_ENABLE_READ_AFFINITY"
cephFSKernelMountOptionsKey = "CSI_CEPHFS_KERNEL_MOUNT_OPTIONS"
enableNFSKey = "ROOK_CSI_ENABLE_NFS"
)
// Default or placeholder data that is put during the creation of the configmap
// The values are updated by the StorageCluster controller later if required
ocsOperatorConfigData := map[string]string{
util.ClusterNameKey: util.GetClusterID(r.ctx, r.Client, &r.Log),
util.EnableReadAffinityKey: "true",
util.CephFSKernelMountOptionsKey: "ms_mode=prefer-crc",
util.EnableNFSKey: "false",
}
ocsOperatorConfig := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: util.OcsOperatorConfigName,
Namespace: initialData.Namespace,
},
// Default or placeholder values for the configmap
Data: map[string]string{
clusterNameKey: "",
enableReadAffinityKey: "true",
cephFSKernelMountOptionsKey: "ms_mode=prefer-crc",
enableNFSKey: "false",
},
Data: ocsOperatorConfigData,
}
err := r.Client.Create(r.ctx, ocsOperatorConfig)
if err != nil && !errors.IsAlreadyExists(err) {
r.Log.Error(err, fmt.Sprintf("Failed to create %v configmap", util.OcsOperatorConfigName))
opResult, err := ctrl.CreateOrUpdate(r.ctx, r.Client, ocsOperatorConfig, func() error {
// If the configmap is being controlled by a StorageCluster, nothing to do here
if metav1.GetControllerOf(ocsOperatorConfig) != nil && metav1.GetControllerOf(ocsOperatorConfig).Kind == "StorageCluster" {
return nil
}
// If the configmap is not controlled by a StorageCluster, keep it updated with the default values & set OCSInitialization as the controller
if !reflect.DeepEqual(ocsOperatorConfig.Data, ocsOperatorConfigData) {
r.Log.Info("Updating ocs-operator-config configmap")
ocsOperatorConfig.Data = ocsOperatorConfigData
}
return ctrl.SetControllerReference(initialData, ocsOperatorConfig, r.Scheme)
})
if err != nil {
r.Log.Error(err, "Failed to create/update ocs-operator-config configmap", "OperationResult", opResult)
return err
}
// If configmap is created or updated, restart the rook-ceph-operator pod to pick up the new change
if opResult == controllerutil.OperationResultCreated || opResult == controllerutil.OperationResultUpdated {
r.Log.Info("ocs-operator-config configmap created/updated. Restarting rook-ceph-operator pod to pick up the new values")
util.RestartPod(r.ctx, r.Client, &r.Log, "rook-ceph-operator", initialData.Namespace)
}

return nil
}
82 changes: 14 additions & 68 deletions controllers/storagecluster/ocs_operator_config.go
Original file line number Diff line number Diff line change
@@ -1,47 +1,31 @@
package storagecluster

import (
"context"
"fmt"
"reflect"
"strconv"

configv1 "github.com/openshift/api/config/v1"
ocsv1 "github.com/red-hat-storage/ocs-operator/api/v1"
"github.com/red-hat-storage/ocs-operator/controllers/util"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
)

func (r *StorageClusterReconciler) ensureOCSOperatorConfig(sc *ocsv1.StorageCluster) error {
const (
clusterNameKey = "CSI_CLUSTER_NAME"
enableReadAffinityKey = "CSI_ENABLE_READ_AFFINITY"
cephFSKernelMountOptionsKey = "CSI_CEPHFS_KERNEL_MOUNT_OPTIONS"
enableNFSKey = "ROOK_CSI_ENABLE_NFS"
)
var (
clusterNameVal = r.getClusterID()
enableReadAffinityVal = strconv.FormatBool(!sc.Spec.ExternalStorage.Enable)
cephFSKernelMountOptionVal = getCephFSKernelMountOptions(sc)
enableNFSVal = getEnableNFSVal(sc)
)
ocsOperatorConfigData := map[string]string{
util.ClusterNameKey: util.GetClusterID(r.ctx, r.Client, &r.Log),
util.EnableReadAffinityKey: strconv.FormatBool(!sc.Spec.ExternalStorage.Enable),
util.CephFSKernelMountOptionsKey: getCephFSKernelMountOptions(sc),
util.EnableNFSKey: getEnableNFSVal(sc),
}

cm := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: util.OcsOperatorConfigName,
Namespace: sc.Namespace,
},
Data: map[string]string{
clusterNameKey: clusterNameVal,
enableReadAffinityKey: enableReadAffinityVal,
cephFSKernelMountOptionsKey: cephFSKernelMountOptionVal,
enableNFSKey: enableNFSVal,
},
Data: ocsOperatorConfigData,
}

opResult, err := ctrl.CreateOrUpdate(r.ctx, r.Client, cm, func() error {
Expand All @@ -52,63 +36,25 @@ func (r *StorageClusterReconciler) ensureOCSOperatorConfig(sc *ocsv1.StorageClus
existing.BlockOwnerDeletion = nil
existing.Controller = nil
}

if cm.Data[clusterNameKey] != clusterNameVal {
cm.Data[clusterNameKey] = clusterNameVal
}
if cm.Data[enableReadAffinityKey] != enableReadAffinityVal {
cm.Data[enableReadAffinityKey] = enableReadAffinityVal
}
if cm.Data[cephFSKernelMountOptionsKey] != cephFSKernelMountOptionVal {
cm.Data[cephFSKernelMountOptionsKey] = cephFSKernelMountOptionVal
}
if cm.Data[enableNFSKey] != enableNFSVal {
cm.Data[enableNFSKey] = enableNFSVal
if !reflect.DeepEqual(cm.Data, ocsOperatorConfigData) {
r.Log.Info("Updating ocs-operator-config configmap")
cm.Data = ocsOperatorConfigData
}
return ctrl.SetControllerReference(sc, cm, r.Scheme)
})
if err != nil {
r.Log.Error(err, fmt.Sprintf("failed to update %q configmap", util.OcsOperatorConfigName))
r.Log.Error(err, "Failed to create/update ocs-operator-config configmap", "OperationResult", opResult)
return err
}
// If configmap is created or updated, restart the rook-ceph-operator pod to pick up the new change
if opResult == controllerutil.OperationResultCreated || opResult == controllerutil.OperationResultUpdated {
r.restartRookCephOperatorPod(sc.Namespace)
r.Log.Info(fmt.Sprintf("%q configmap updated & rook-ceph-operator pod restarted to pick up new values", util.OcsOperatorConfigName),
"storageCluster", klog.KRef(sc.Namespace, sc.Name))
r.Log.Info("ocs-operator-config configmap created/updated. Restarting rook-ceph-operator pod to pick up the new values")
util.RestartPod(r.ctx, r.Client, &r.Log, "rook-ceph-operator", sc.Namespace)
}

return nil
}

// restartRookOperatorPod restarts the rook-operator pod in the OCP cluster
func (r *StorageClusterReconciler) restartRookCephOperatorPod(namespace string) {
podList := &corev1.PodList{}
err := r.Client.List(context.TODO(), podList, client.InNamespace(namespace), client.MatchingLabels{"app": "rook-ceph-operator"})
if err != nil {
r.Log.Error(err, "Failed to list rook-ceph-operator pod")
return
}
for _, pod := range podList.Items {
err := r.Client.Delete(context.TODO(), &pod)
if err != nil {
r.Log.Error(err, "Failed to delete rook-ceph-operator pod")
return
}
}
}

// getClusterID returns the cluster ID of the OCP-Cluster
func (r *StorageClusterReconciler) getClusterID() string {
clusterVersion := &configv1.ClusterVersion{}
err := r.Client.Get(context.TODO(), types.NamespacedName{Name: "version"}, clusterVersion)
if err != nil {
r.Log.Error(err, "Failed to get the clusterVersion version of the OCP cluster")
return ""
}
return fmt.Sprint(clusterVersion.Spec.ClusterID)
}

// getCephFSKernelMountOptions returns the kernel mount options for CephFS based on the spec on the StorageCluster
func getCephFSKernelMountOptions(sc *ocsv1.StorageCluster) string {
// If Encryption is enabled, Always use secure mode
Expand Down
44 changes: 44 additions & 0 deletions controllers/util/k8sutil.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
package util

import (
"context"
"fmt"
"os"
"strings"

"github.com/go-logr/logr"
configv1 "github.com/openshift/api/config/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
Expand All @@ -16,6 +24,12 @@ const (

// This configmap is watched by rook-ceph-operator & is reserved only for manual overrides.
RookCephOperatorConfigName = "rook-ceph-operator-config"

// These are the keys in the ocs-operator-config configmap
ClusterNameKey = "CSI_CLUSTER_NAME"
EnableReadAffinityKey = "CSI_ENABLE_READ_AFFINITY"
CephFSKernelMountOptionsKey = "CSI_CEPHFS_KERNEL_MOUNT_OPTIONS"
EnableNFSKey = "ROOK_CSI_ENABLE_NFS"
)

// GetWatchNamespace returns the namespace the operator should be watching for changes
Expand All @@ -39,3 +53,33 @@ func GetOperatorNamespace() (string, error) {
}
return ns, nil
}

// getClusterID returns the cluster ID of the OCP-Cluster
func GetClusterID(ctx context.Context, kubeClient client.Client, logger *logr.Logger) string {
clusterVersion := &configv1.ClusterVersion{}
err := kubeClient.Get(ctx, types.NamespacedName{Name: "version"}, clusterVersion)
if err != nil {
logger.Error(err, "Failed to get the clusterVersion version of the OCP cluster")
return ""
}
return fmt.Sprint(clusterVersion.Spec.ClusterID)
}

// RestartPod restarts the pod with the given name in the given namespace by deleting it and letting another one be created
func RestartPod(ctx context.Context, kubeClient client.Client, logger *logr.Logger, name string, namespace string) {
logger.Info("restarting pod", "name", name, "namespace", namespace)
podList := &corev1.PodList{}
err := kubeClient.List(ctx, podList, client.InNamespace(namespace))
if err != nil {
logger.Error(err, "failed to list pods", "namespace", namespace)
return
}
for _, pod := range podList.Items {
if strings.Contains(pod.Name, name) {
err = kubeClient.Delete(ctx, &pod)
if err != nil {
logger.Error(err, "failed to delete pod", "name", pod.Name, "namespace", namespace)
}
}
}
}