diff --git a/cmd/training-operator.v2alpha1/main.go b/cmd/training-operator.v2alpha1/main.go index ea3573dfc4..1b0c9b9fb5 100644 --- a/cmd/training-operator.v2alpha1/main.go +++ b/cmd/training-operator.v2alpha1/main.go @@ -29,6 +29,7 @@ import ( utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" @@ -159,7 +160,7 @@ func setupControllers(mgr ctrl.Manager, runtimes map[string]runtime.Runtime, cer <-certsReady setupLog.Info("Certs ready") - if failedCtrlName, err := controllerv2.SetupControllers(mgr, runtimes); err != nil { + if failedCtrlName, err := controllerv2.SetupControllers(mgr, runtimes, controller.Options{}); err != nil { setupLog.Error(err, "Could not create controller", "controller", failedCtrlName) os.Exit(1) } diff --git a/pkg/controller.v2/setup.go b/pkg/controller.v2/setup.go index 3fb8b98d9d..e6e675e108 100644 --- a/pkg/controller.v2/setup.go +++ b/pkg/controller.v2/setup.go @@ -18,16 +18,17 @@ package controllerv2 import ( ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/controller" runtime "github.com/kubeflow/training-operator/pkg/runtime.v2" ) -func SetupControllers(mgr ctrl.Manager, runtimes map[string]runtime.Runtime) (string, error) { +func SetupControllers(mgr ctrl.Manager, runtimes map[string]runtime.Runtime, options controller.Options) (string, error) { if err := NewTrainJobReconciler( mgr.GetClient(), mgr.GetEventRecorderFor("training-operator-trainjob-controller"), runtimes, - ).SetupWithManager(mgr); err != nil { + ).SetupWithManager(mgr, options); err != nil { return "TrainJob", err } return "", nil diff --git a/pkg/controller.v2/trainjob_controller.go b/pkg/controller.v2/trainjob_controller.go index 5adaea6ac8..6fca663115 100644 --- a/pkg/controller.v2/trainjob_controller.go +++ b/pkg/controller.v2/trainjob_controller.go @@ -33,6 +33,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/apiutil" + "sigs.k8s.io/controller-runtime/pkg/controller" kubeflowv2 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v2alpha1" jobruntimes "github.com/kubeflow/training-operator/pkg/runtime.v2" @@ -65,8 +66,8 @@ func NewTrainJobReconciler(client client.Client, recorder record.EventRecorder, } } -//+kubebuilder:rbac:groups=kubeflow.org,resources=trainjobs,verbs=get;list;watch;create;update;patch;delete -//+kubebuilder:rbac:groups=kubeflow.org,resources=trainjobs/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=kubeflow.org,resources=trainjobs,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=kubeflow.org,resources=trainjobs/status,verbs=get;update;patch func (r *TrainJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { var trainJob kubeflowv2.TrainJob @@ -219,8 +220,9 @@ func runtimeRefToGroupKind(runtimeRef kubeflowv2.RuntimeRef) schema.GroupKind { } } -func (r *TrainJobReconciler) SetupWithManager(mgr ctrl.Manager) error { +func (r *TrainJobReconciler) SetupWithManager(mgr ctrl.Manager, options controller.Options) error { b := ctrl.NewControllerManagedBy(mgr). + WithOptions(options). For(&kubeflowv2.TrainJob{}) for _, runtime := range r.runtimes { for _, registrar := range runtime.EventHandlerRegistrars() { diff --git a/test/integration/framework/framework.go b/test/integration/framework/framework.go index 9824992d8d..c7685a0d50 100644 --- a/test/integration/framework/framework.go +++ b/test/integration/framework/framework.go @@ -29,8 +29,10 @@ import ( "go.uber.org/zap/zapcore" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" + "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/envtest" "sigs.k8s.io/controller-runtime/pkg/log/zap" "sigs.k8s.io/controller-runtime/pkg/manager" @@ -100,7 +102,15 @@ func (f *Framework) RunManager(cfg *rest.Config) (context.Context, client.Client gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred()) gomega.ExpectWithOffset(1, runtimes).NotTo(gomega.BeNil()) - failedCtrlName, err := controllerv2.SetupControllers(mgr, runtimes) + failedCtrlName, err := controllerv2.SetupControllers(mgr, runtimes, controller.Options{ + // controller-runtime v0.19+ validates controller names are unique, to make sure + // exported Prometheus metrics for each controller do not conflict. The current check + // relies on static state that's not compatible with testing execution model. + // See the following resources for more context: + // https://github.com/kubernetes-sigs/controller-runtime/pull/2902#issuecomment-2284194683 + // https://github.com/kubernetes-sigs/controller-runtime/issues/2994 + SkipNameValidation: ptr.To(true), + }) gomega.ExpectWithOffset(1, err).NotTo(gomega.HaveOccurred(), "controller", failedCtrlName) gomega.ExpectWithOffset(1, failedCtrlName).To(gomega.BeEmpty())