Skip to content

Commit

Permalink
controllers: available CRDs check feature
Browse files Browse the repository at this point in the history
Reasons for this enhancement:
- A controller cannot set up a watch for a CRD that is not installed on
 the cluster, trying to set up a watch will panic the operator
- There is no known way, that we are aware of, to add a watch later
 without client cache issue

How does the enhancement work around the issue:
- On start of the operator(main), detect which CRDs are avail
- At the start each reconcile of controller, we fetch the CRD
 of interest and compare it with CRDs fetched in previous step,
  If there is any change, we panic the op

Signed-off-by: Rewant Soni <[email protected]>
Signed-off-by: raaizik <[email protected]>
  • Loading branch information
rewantsoni committed Oct 8, 2024
1 parent 037e1ed commit 63a30c0
Show file tree
Hide file tree
Showing 33 changed files with 4,154 additions and 6 deletions.
41 changes: 36 additions & 5 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ limitations under the License.
package main

import (
"context"
"flag"
"fmt"
"os"

apiv1alpha1 "github.com/red-hat-storage/ocs-client-operator/api/v1alpha1"
Expand All @@ -37,9 +39,11 @@ import (
secv1 "github.com/openshift/api/security/v1"
opv1a1 "github.com/operator-framework/api/pkg/operators/v1alpha1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
ramenv1alpha1 "github.com/ramendr/ramen/api/v1alpha1"
admrv1 "k8s.io/api/admissionregistration/v1"
appsv1 "k8s.io/api/apps/v1"
extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
Expand Down Expand Up @@ -75,6 +79,7 @@ func init() {
utilruntime.Must(quotav1.AddToScheme(scheme))
utilruntime.Must(csiopv1a1.AddToScheme(scheme))
utilruntime.Must(nbapis.AddToScheme(scheme))
utilruntime.Must(ramenv1alpha1.AddToScheme(scheme))
//+kubebuilder:scaffold:scheme
}

Expand Down Expand Up @@ -129,11 +134,6 @@ func main() {
os.Exit(1)
}

if err != nil {
setupLog.Error(err, "Unable to get Client")
os.Exit(1)
}

// set namespace
err = utils.ValidateOperatorNamespace()
if err != nil {
Expand All @@ -147,6 +147,22 @@ func main() {
os.Exit(1)
}

// apiclient.New() returns a client without cache. cache is not initialized before mgr.Start()
// we need this because we need to watch for CRDs the operator is dependent on
apiClient, err := client.New(mgr.GetConfig(), client.Options{
Scheme: mgr.GetScheme(),
})
if err != nil {
setupLog.Error(err, "Unable to get Client")
os.Exit(1)
}

availCrds, err := getAvailableCRDNames(context.Background(), apiClient)
if err != nil {
setupLog.Error(err, "Unable get a list of available CRD names")
os.Exit(1)
}

setupLog.Info("setting up webhook server")
hookServer := mgr.GetWebhookServer()

Expand All @@ -172,6 +188,7 @@ func main() {
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
OperatorNamespace: utils.GetOperatorNamespace(),
AvailableCrds: availCrds,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "StorageClaim")
os.Exit(1)
Expand Down Expand Up @@ -202,3 +219,17 @@ func main() {
os.Exit(1)
}
}

func getAvailableCRDNames(ctx context.Context, cl client.Client) (map[string]bool, error) {
crdExist := map[string]bool{}
crdList := &metav1.PartialObjectMetadataList{}
crdList.SetGroupVersionKind(extv1.SchemeGroupVersion.WithKind("CustomResourceDefinitionList"))
if err := cl.List(ctx, crdList); err != nil {
return nil, fmt.Errorf("error listing CRDs, %v", err)
}
// Iterate over the list and populate the map
for i := range crdList.Items {
crdExist[crdList.Items[i].Name] = true
}
return crdExist, nil
}
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ require (
github.com/operator-framework/api v0.27.0
github.com/pkg/errors v0.9.1
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.76.0
github.com/ramendr/ramen/api v0.0.0-20241001141243-29d6f22ad237
github.com/red-hat-storage/ocs-client-operator/api v0.0.0-00010101000000-000000000000
github.com/red-hat-storage/ocs-operator/services/provider/api/v4 v4.0.0-20240917115204-741b9d6f263d
github.com/stretchr/testify v1.9.0
Expand All @@ -48,6 +49,7 @@ require (
github.com/openshift/custom-resource-status v1.1.3-0.20220503160415-f2fdb4999d87 // indirect
github.com/x448/float16 v0.8.4 // indirect
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
k8s.io/component-base v0.31.0 // indirect
sigs.k8s.io/container-object-storage-interface-api v0.1.0 // indirect
)

Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,8 @@ github.com/prometheus/common v0.57.0 h1:Ro/rKjwdq9mZn1K5QPctzh+MA4Lp0BuYk5ZZEVho
github.com/prometheus/common v0.57.0/go.mod h1:7uRPFSUTbfZWsJ7MHY56sqt7hLQu3bxXHDnNhl8E9qI=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/ramendr/ramen/api v0.0.0-20241001141243-29d6f22ad237 h1:ig6ePD0yopC5Qi5BRmhsIsKaOkdsGXTSmG3HTYIpquo=
github.com/ramendr/ramen/api v0.0.0-20241001141243-29d6f22ad237/go.mod h1:nO6VM/+PEhcPGyFIQJdhY6ip822cA61PAy/s6IjenAA=
github.com/red-hat-storage/ocs-operator/services/provider/api/v4 v4.0.0-20240917115204-741b9d6f263d h1:RK/zCM6xRwyeJ06u6xSLEwl5Q1g/6EZRQmSgzbqleT0=
github.com/red-hat-storage/ocs-operator/services/provider/api/v4 v4.0.0-20240917115204-741b9d6f263d/go.mod h1:t9GJk69TGXABBF8fFPB+ImpbA9mJyRS86wW6+Qn8xHo=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
Expand Down Expand Up @@ -776,6 +778,8 @@ k8s.io/client-go v0.31.0 h1:QqEJzNjbN2Yv1H79SsS+SWnXkBgVu4Pj3CJQgbx0gI8=
k8s.io/client-go v0.31.0/go.mod h1:Y9wvC76g4fLjmU0BA+rV+h2cncoadjvjjkkIGoTLcGU=
k8s.io/code-generator v0.20.1/go.mod h1:UsqdF+VX4PU2g46NC2JRs4gc+IfrctnwHb76RNbWHJg=
k8s.io/code-generator v0.23.3/go.mod h1:S0Q1JVA+kSzTI1oUvbKAxZY/DYbA/ZUb4Uknog12ETk=
k8s.io/component-base v0.31.0 h1:/KIzGM5EvPNQcYgwq5NwoQBaOlVFrghoVGr8lG6vNRs=
k8s.io/component-base v0.31.0/go.mod h1:TYVuzI1QmN4L5ItVdMSXKvH7/DtvIuas5/mm8YT3rTo=
k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0=
k8s.io/gengo v0.0.0-20201113003025-83324d819ded/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E=
k8s.io/gengo v0.0.0-20210813121822-485abfe95c7c/go.mod h1:FiNAH4ZV3gBg2Kwh89tzAEV2be7d5xI0vBa/VySYy3E=
Expand Down
30 changes: 29 additions & 1 deletion internal/controller/storageclaim_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"encoding/json"
"fmt"
"reflect"
"sigs.k8s.io/controller-runtime/pkg/handler"
"slices"
"strings"
"time"
Expand All @@ -32,9 +33,11 @@ import (
csiopv1a1 "github.com/ceph/ceph-csi-operator/api/v1alpha1"
"github.com/go-logr/logr"
snapapi "github.com/kubernetes-csi/external-snapshotter/client/v6/apis/volumesnapshot/v1"
ramenv1alpha1 "github.com/ramendr/ramen/api/v1alpha1"
providerclient "github.com/red-hat-storage/ocs-operator/services/provider/api/v4/client"
corev1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
extv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand All @@ -56,6 +59,8 @@ const (

pvClusterIDIndexName = "index:persistentVolumeClusterID"
vscClusterIDIndexName = "index:volumeSnapshotContentCSIDriver"

drClusterConfigCRDName = "drclusterconfigs.ramendr.openshift.io"
)

// StorageClaimReconciler reconciles a StorageClaim object
Expand All @@ -64,6 +69,7 @@ type StorageClaimReconciler struct {
cache.Cache
Scheme *runtime.Scheme
OperatorNamespace string
AvailableCrds map[string]bool

log logr.Logger
ctx context.Context
Expand Down Expand Up @@ -110,12 +116,25 @@ func (r *StorageClaimReconciler) SetupWithManager(mgr ctrl.Manager) error {
bldr := ctrl.NewControllerManagedBy(mgr).
For(&v1alpha1.StorageClaim{}, builder.WithPredicates(generationChangePredicate)).
Owns(&storagev1.StorageClass{}).
Owns(&snapapi.VolumeSnapshotClass{})
Owns(&snapapi.VolumeSnapshotClass{}).
Watches(
&extv1.CustomResourceDefinition{},
&handler.EnqueueRequestForObject{},
builder.WithPredicates(
utils.NamePredicate(drClusterConfigCRDName),
utils.CrdCreateAndDeletePredicate(&r.log, drClusterConfigCRDName, r.AvailableCrds[drClusterConfigCRDName]),
),
builder.OnlyMetadata,
)

if utils.DelegateCSI {
bldr = bldr.Owns(&csiopv1a1.ClientProfile{}, builder.WithPredicates(generationChangePredicate))
}

if r.AvailableCrds[drClusterConfigCRDName] {
bldr = bldr.Owns(&ramenv1alpha1.DRClusterConfig{}, builder.WithPredicates(generationChangePredicate))
}

return bldr.Complete(r)
}

Expand Down Expand Up @@ -144,6 +163,15 @@ func (r *StorageClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request
r.ctx = ctrllog.IntoContext(ctx, r.log)
r.log.Info("Reconciling StorageClaim.")

crd := &metav1.PartialObjectMetadata{}
crd.SetGroupVersionKind(extv1.SchemeGroupVersion.WithKind("CustomResourceDefinition"))
crd.Name = drClusterConfigCRDName
if err := r.Client.Get(ctx, client.ObjectKeyFromObject(crd), crd); client.IgnoreNotFound(err) != nil {
r.log.Error(err, "Failed to get CRD", "CRD", drClusterConfigCRDName)
return reconcile.Result{}, err
}
utils.AssertEqual(r.AvailableCrds[drClusterConfigCRDName], crd.UID != "", utils.ExitCodeThatShouldRestartTheProcess)

// Fetch the StorageClaim instance
r.storageClaim = &v1alpha1.StorageClaim{}
r.storageClaim.Name = req.Name
Expand Down
2 changes: 2 additions & 0 deletions pkg/utils/k8sutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ const CronScheduleWeekly = "@weekly"

const CSIReconcileEnvVar = "CSI_RECONCILE"

const ExitCodeThatShouldRestartTheProcess = 42

// GetOperatorNamespace returns the namespace where the operator is deployed.
func GetOperatorNamespace() string {
return os.Getenv(OperatorNamespaceEnvVar)
Expand Down
39 changes: 39 additions & 0 deletions pkg/utils/predicates.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package utils

import (
"github.com/go-logr/logr"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/predicate"
)

// Name Predicate return a predicate the filter events produced
// by resources that matches the given name
func NamePredicate(name string) predicate.Predicate {
return predicate.NewPredicateFuncs(func(obj client.Object) bool {
return obj.GetName() == name
})
}

func CrdCreateAndDeletePredicate(log *logr.Logger, crdName string, crdExists bool) predicate.Predicate {
return predicate.Funcs{
CreateFunc: func(_ event.CreateEvent) bool {
if !crdExists {
log.Info("CustomResourceDefinition %s was Created.", crdName)
}
return !crdExists
},
DeleteFunc: func(_ event.DeleteEvent) bool {
if crdExists {
log.Info("CustomResourceDefinition %s was Deleted.", crdName)
}
return crdExists
},
UpdateFunc: func(_ event.UpdateEvent) bool {
return false
},
GenericFunc: func(_ event.GenericEvent) bool {
return false
},
}
}
8 changes: 8 additions & 0 deletions pkg/utils/slices.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ limitations under the License.

package utils

import "os"

// Find returns the first entry matching the function "f" or else return nil
func Find[T any](list []T, f func(item *T) bool) *T {
for idx := range list {
Expand All @@ -26,3 +28,9 @@ func Find[T any](list []T, f func(item *T) bool) *T {
}
return nil
}

func AssertEqual[T comparable](actual T, expected T, exitCode int) {
if actual != expected {
os.Exit(exitCode)
}
}
Loading

0 comments on commit 63a30c0

Please sign in to comment.