From 03fb25c22dd4bec62be29bc2e31d2800b09381e2 Mon Sep 17 00:00:00 2001 From: Stefan Prodan Date: Thu, 26 Sep 2024 13:49:15 +0300 Subject: [PATCH] Implement ServiceAccount impersonation for ResourceGroups Signed-off-by: Stefan Prodan --- api/v1/resourcegroup_types.go | 16 +-- cmd/main.go | 30 ++-- ...fluxcd.controlplane.io_resourcegroups.yaml | 5 + docs/api/v1/resourcegroup.md | 14 ++ .../controller/resourcegroup_controller.go | 88 +++++++++--- .../resourcegroup_controller_test.go | 133 ++++++++++++++++++ internal/controller/suite_test.go | 2 + test/olm/instance_test.go | 2 +- 8 files changed, 243 insertions(+), 47 deletions(-) diff --git a/api/v1/resourcegroup_types.go b/api/v1/resourcegroup_types.go index 2665e80..6dd6a8d 100644 --- a/api/v1/resourcegroup_types.go +++ b/api/v1/resourcegroup_types.go @@ -38,6 +38,11 @@ type ResourceGroupSpec struct { // +optional DependsOn []Dependency `json:"dependsOn,omitempty"` + // The name of the Kubernetes service account to impersonate + // when reconciling the generated resources. + // +optional + ServiceAccountName string `json:"serviceAccountName,omitempty"` + // Wait instructs the controller to check the health of all the reconciled // resources. Defaults to true. // +kubebuilder:default:=true @@ -45,17 +50,6 @@ type ResourceGroupSpec struct { Wait bool `json:"wait,omitempty"` } -// CommonMetadata defines the common labels and annotations. -type CommonMetadata struct { - // Annotations to be added to the object's metadata. - // +optional - Annotations map[string]string `json:"annotations,omitempty"` - - // Labels to be added to the object's metadata. - // +optional - Labels map[string]string `json:"labels,omitempty"` -} - // Dependency defines a ResourceGroup dependency on a Kubernetes resource. type Dependency struct { // APIVersion of the resource to depend on. diff --git a/cmd/main.go b/cmd/main.go index bdc7355..378c664 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -50,19 +50,22 @@ func init() { func main() { var ( - concurrent int - metricsAddr string - healthAddr string - enableLeaderElection bool - logOptions logger.Options - rateLimiterOptions runtimeCtrl.RateLimiterOptions - storagePath string + concurrent int + metricsAddr string + healthAddr string + enableLeaderElection bool + logOptions logger.Options + rateLimiterOptions runtimeCtrl.RateLimiterOptions + storagePath string + defaultServiceAccount string ) flag.IntVar(&concurrent, "concurrent", 10, "The number of concurrent resource reconciles.") flag.StringVar(&metricsAddr, "metrics-addr", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&healthAddr, "health-addr", ":8081", "The address the health endpoint binds to.") flag.StringVar(&storagePath, "storage-path", "/data", "The local storage path.") + flag.StringVar(&defaultServiceAccount, "default-service-account", "", + "Default service account used for impersonation.") flag.BoolVar(&enableLeaderElection, "enable-leader-election", false, "Enable leader election for controller manager. "+ "Enabling this will ensure there is only one active controller manager.") @@ -177,12 +180,13 @@ func main() { } if err = (&controller.ResourceGroupReconciler{ - Client: mgr.GetClient(), - APIReader: mgr.GetAPIReader(), - Scheme: mgr.GetScheme(), - StatusPoller: polling.NewStatusPoller(mgr.GetClient(), mgr.GetRESTMapper(), polling.Options{}), - StatusManager: controllerName, - EventRecorder: mgr.GetEventRecorderFor(controllerName), + Client: mgr.GetClient(), + APIReader: mgr.GetAPIReader(), + Scheme: mgr.GetScheme(), + StatusPoller: polling.NewStatusPoller(mgr.GetClient(), mgr.GetRESTMapper(), polling.Options{}), + StatusManager: controllerName, + EventRecorder: mgr.GetEventRecorderFor(controllerName), + DefaultServiceAccount: defaultServiceAccount, }).SetupWithManager(mgr, controller.ResourceGroupReconcilerOptions{ RateLimiter: runtimeCtrl.GetRateLimiter(rateLimiterOptions), diff --git a/config/crd/bases/fluxcd.controlplane.io_resourcegroups.yaml b/config/crd/bases/fluxcd.controlplane.io_resourcegroups.yaml index 07c9a14..61eb39b 100644 --- a/config/crd/bases/fluxcd.controlplane.io_resourcegroups.yaml +++ b/config/crd/bases/fluxcd.controlplane.io_resourcegroups.yaml @@ -113,6 +113,11 @@ spec: items: x-kubernetes-preserve-unknown-fields: true type: array + serviceAccountName: + description: |- + The name of the Kubernetes service account to impersonate + when reconciling the generated resources. + type: string wait: default: true description: |- diff --git a/docs/api/v1/resourcegroup.md b/docs/api/v1/resourcegroup.md index 9b682be..e7ef2a3 100644 --- a/docs/api/v1/resourcegroup.md +++ b/docs/api/v1/resourcegroup.md @@ -389,6 +389,20 @@ The health check is performed for the following resources types: By default, the wait timeout is `5m` and can be changed with the `fluxcd.controlplane.io/reconcileTimeout` annotation, set on the ResourceGroup object. +### Role-based access control + +The `.spec.serviceAccountName` field is optional and specifies the name of the +Kubernetes ServiceAccount used by the flux-operator to reconcile the ResourceGroup. +The ServiceAccount must exist in the same namespace as the ResourceGroup +and must have the necessary permissions to create, update and delete +the resources defined in the ResourceGroup. + +On multi-tenant clusters, it is recommended to use a dedicated ServiceAccount per tenant namespace +with the minimum required permissions. To enforce a ServiceAccount for all ResourceGroups, +the `--default-service-account=flux-operator`flag can be set in the flux-operator container arguments. +With this flag set, only the ResourceGroups created in the same namespace as the flux-operator +will run with cluster-admin permissions. + ## ResourceGroup Status ### Conditions diff --git a/internal/controller/resourcegroup_controller.go b/internal/controller/resourcegroup_controller.go index a8a56ee..b916c40 100644 --- a/internal/controller/resourcegroup_controller.go +++ b/internal/controller/resourcegroup_controller.go @@ -13,6 +13,7 @@ import ( "github.com/fluxcd/cli-utils/pkg/kstatus/polling" "github.com/fluxcd/cli-utils/pkg/kstatus/status" "github.com/fluxcd/pkg/apis/meta" + runtimeClient "github.com/fluxcd/pkg/runtime/client" "github.com/fluxcd/pkg/runtime/conditions" "github.com/fluxcd/pkg/runtime/patch" "github.com/fluxcd/pkg/ssa" @@ -39,10 +40,11 @@ type ResourceGroupReconciler struct { client.Client kuberecorder.EventRecorder - APIReader client.Reader - Scheme *runtime.Scheme - StatusPoller *polling.StatusPoller - StatusManager string + APIReader client.Reader + Scheme *runtime.Scheme + StatusPoller *polling.StatusPoller + StatusManager string + DefaultServiceAccount string } // +kubebuilder:rbac:groups=fluxcd.controlplane.io,resources=resourcegroups,verbs=get;list;watch;create;update;patch;delete @@ -227,8 +229,26 @@ func (r *ResourceGroupReconciler) apply(ctx context.Context, obj.Status.Inventory.DeepCopyInto(oldInventory) } + // Configure the Kubernetes client for impersonation. + impersonation := runtimeClient.NewImpersonator( + r.Client, + r.StatusPoller, + polling.Options{}, + nil, + runtimeClient.KubeConfigOptions{}, + r.DefaultServiceAccount, + obj.Spec.ServiceAccountName, + obj.GetNamespace(), + ) + + // Create the Kubernetes client that runs under impersonation. + kubeClient, statusPoller, err := impersonation.GetClient(ctx) + if err != nil { + return fmt.Errorf("failed to build kube client: %w", err) + } + // Create a resource manager to reconcile the resources. - resourceManager := ssa.NewResourceManager(r.Client, r.StatusPoller, ssa.Owner{ + resourceManager := ssa.NewResourceManager(kubeClient, statusPoller, ssa.Owner{ Field: r.StatusManager, Group: fmt.Sprintf("resourcegroup.%s", fluxcdv1.GroupVersion.Group), }) @@ -403,29 +423,53 @@ func (r *ResourceGroupReconciler) uninstall(ctx context.Context, return ctrl.Result{}, nil } - resourceManager := ssa.NewResourceManager(r.Client, nil, ssa.Owner{ - Field: r.StatusManager, - Group: fluxcdv1.GroupVersion.Group, - }) + // Configure the Kubernetes client for impersonation. + impersonation := runtimeClient.NewImpersonator( + r.Client, + r.StatusPoller, + polling.Options{}, + nil, + runtimeClient.KubeConfigOptions{}, + r.DefaultServiceAccount, + obj.Spec.ServiceAccountName, + obj.GetNamespace(), + ) + + // Prune the managed resources if the service account is found. + if impersonation.CanImpersonate(ctx) { + kubeClient, _, err := impersonation.GetClient(ctx) + if err != nil { + return ctrl.Result{}, err + } - opts := ssa.DeleteOptions{ - PropagationPolicy: metav1.DeletePropagationBackground, - Inclusions: resourceManager.GetOwnerLabels(obj.Name, obj.Namespace), - Exclusions: map[string]string{ - fluxcdv1.PruneAnnotation: fluxcdv1.DisabledValue, - }, - } + resourceManager := ssa.NewResourceManager(kubeClient, nil, ssa.Owner{ + Field: r.StatusManager, + Group: fluxcdv1.GroupVersion.Group, + }) + + opts := ssa.DeleteOptions{ + PropagationPolicy: metav1.DeletePropagationBackground, + Inclusions: resourceManager.GetOwnerLabels(obj.Name, obj.Namespace), + Exclusions: map[string]string{ + fluxcdv1.PruneAnnotation: fluxcdv1.DisabledValue, + }, + } - objects, _ := inventory.List(obj.Status.Inventory) + objects, _ := inventory.List(obj.Status.Inventory) - changeSet, err := resourceManager.DeleteAll(ctx, objects, opts) - if err != nil { - log.Error(err, "pruning for deleted resource failed") + changeSet, err := resourceManager.DeleteAll(ctx, objects, opts) + if err != nil { + log.Error(err, "pruning for deleted resource failed") + } + + msg := fmt.Sprintf("Uninstallation completed in %v", fmtDuration(reconcileStart)) + log.Info(msg, "output", changeSet.ToMap()) + } else { + log.Error(errors.New("service account not found"), "skip pruning for deleted resource") } + // Release the object to be garbage collected. controllerutil.RemoveFinalizer(obj, fluxcdv1.Finalizer) - msg := fmt.Sprintf("Uninstallation completed in %v", fmtDuration(reconcileStart)) - log.Info(msg, "output", changeSet.ToMap()) // Stop reconciliation as the object is being deleted. return ctrl.Result{}, nil diff --git a/internal/controller/resourcegroup_controller_test.go b/internal/controller/resourcegroup_controller_test.go index 333e464..45333ab 100644 --- a/internal/controller/resourcegroup_controller_test.go +++ b/internal/controller/resourcegroup_controller_test.go @@ -6,6 +6,7 @@ package controller import ( "context" "fmt" + "os" "testing" "time" @@ -14,9 +15,11 @@ import ( "github.com/fluxcd/pkg/runtime/conditions" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/yaml" @@ -295,6 +298,136 @@ spec: g.Expect(r.IsZero()).To(BeTrue()) } +func TestResourceGroupReconciler_Impersonation(t *testing.T) { + g := NewWithT(t) + reconciler := getResourceGroupReconciler() + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + // Generate a kubeconfig for the testenv-admin user. + user, err := testEnv.AddUser(envtest.User{ + Name: "testenv-admin", + Groups: []string{"system:masters"}, + }, nil) + if err != nil { + panic(fmt.Sprintf("failed to create testenv-admin user: %v", err)) + } + + kubeConfig, err := user.KubeConfig() + if err != nil { + panic(fmt.Sprintf("failed to create the testenv-admin user kubeconfig: %v", err)) + } + + tmpDir := t.TempDir() + err = os.WriteFile(fmt.Sprintf("%s/kubeconfig", tmpDir), kubeConfig, 0644) + g.Expect(err).ToNot(HaveOccurred()) + + // Set the kubeconfig environment variable for the impersonator. + t.Setenv("KUBECONFIG", fmt.Sprintf("%s/kubeconfig", tmpDir)) + + ns, err := testEnv.CreateNamespace(ctx, "test") + g.Expect(err).ToNot(HaveOccurred()) + + objDef := fmt.Sprintf(` +apiVersion: fluxcd.controlplane.io/v1 +kind: ResourceGroup +metadata: + name: test + namespace: "%[1]s" +spec: + serviceAccountName: flux-operator + resources: + - apiVersion: v1 + kind: ConfigMap + metadata: + name: test + namespace: "%[1]s" +`, ns.Name) + + obj := &fluxcdv1.ResourceGroup{} + err = yaml.Unmarshal([]byte(objDef), obj) + g.Expect(err).ToNot(HaveOccurred()) + + // Initialize the instance. + err = testEnv.Create(ctx, obj) + g.Expect(err).ToNot(HaveOccurred()) + + r, err := reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: client.ObjectKeyFromObject(obj), + }) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(r.Requeue).To(BeTrue()) + + // Reconcile with missing service account. + r, err = reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: client.ObjectKeyFromObject(obj), + }) + g.Expect(err).To(HaveOccurred()) + + // Check if the instance was installed. + result := &fluxcdv1.ResourceGroup{} + err = testClient.Get(ctx, client.ObjectKeyFromObject(obj), result) + g.Expect(err).ToNot(HaveOccurred()) + + logObjectStatus(t, result) + g.Expect(conditions.GetReason(result, meta.ReadyCondition)).To(BeIdenticalTo(meta.ReconciliationFailedReason)) + + // Create the service account and role binding. + sa := &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "flux-operator", + Namespace: ns.Name, + }, + } + + rb := &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "flux-operator", + Namespace: ns.Name, + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: "flux-operator", + Namespace: ns.Name, + }, + }, + RoleRef: rbacv1.RoleRef{ + Kind: "ClusterRole", + Name: "cluster-admin", + }, + } + + err = testClient.Create(ctx, sa) + g.Expect(err).ToNot(HaveOccurred()) + err = testClient.Create(ctx, rb) + g.Expect(err).ToNot(HaveOccurred()) + + // Reconcile with existing service account. + r, err = reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: client.ObjectKeyFromObject(obj), + }) + g.Expect(err).ToNot(HaveOccurred()) + + // Check if the instance was installed. + resultFinal := &fluxcdv1.ResourceGroup{} + err = testClient.Get(ctx, client.ObjectKeyFromObject(obj), resultFinal) + g.Expect(err).ToNot(HaveOccurred()) + + logObjectStatus(t, resultFinal) + g.Expect(conditions.GetReason(resultFinal, meta.ReadyCondition)).To(BeIdenticalTo(meta.ReconciliationSucceededReason)) + + // Delete the resource group. + err = testClient.Delete(ctx, obj) + g.Expect(err).ToNot(HaveOccurred()) + + r, err = reconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: client.ObjectKeyFromObject(obj), + }) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(r.IsZero()).To(BeTrue()) +} + func getResourceGroupReconciler() *ResourceGroupReconciler { return &ResourceGroupReconciler{ Client: testClient, diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index 831938a..413ba76 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -20,6 +20,7 @@ import ( . "github.com/onsi/gomega" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" @@ -45,6 +46,7 @@ var ( func NewTestScheme() *runtime.Scheme { s := runtime.NewScheme() utilruntime.Must(corev1.AddToScheme(s)) + utilruntime.Must(rbacv1.AddToScheme(s)) utilruntime.Must(appsv1.AddToScheme(s)) utilruntime.Must(apiextensionsv1.AddToScheme(s)) utilruntime.Must(fluxcdv1.AddToScheme(s)) diff --git a/test/olm/instance_test.go b/test/olm/instance_test.go index ac2b70d..6f3c1e7 100644 --- a/test/olm/instance_test.go +++ b/test/olm/instance_test.go @@ -38,7 +38,7 @@ var _ = Describe("FluxInstance", Ordered, func() { Context("uninstallation", func() { It("should run successfully", func() { By("delete FluxInstance") - cmd := exec.Command("kubectl", "delete", "-k", "config/samples", + cmd := exec.Command("kubectl", "delete", "FluxInstance/flux", "--timeout=30s", "-n", namespace) _, err := utils.Run(cmd, "/test/olm") Expect(err).NotTo(HaveOccurred())