From fa11850563064d4beb68e29baf7526fe3de6bb5c Mon Sep 17 00:00:00 2001 From: Derek Nola Date: Fri, 9 Feb 2024 11:37:37 -0800 Subject: [PATCH] Readd `k3s secrets-encrypt rotate-keys` with correct support for KMSv2 GA (#9340) * Reorder copy order for caching * Enable longer http timeout requests Signed-off-by: Derek Nola * Setup reencrypt controller to run on all apiserver nodes * Fix reencryption for disabling secrets encryption, reenable drone tests --- .drone.yml | 4 + Dockerfile.local | 2 +- go.mod | 2 +- pkg/cli/cmds/secrets_encrypt.go | 7 ++ pkg/cli/secretsencrypt/secrets_encrypt.go | 4 +- pkg/clientaccess/token.go | 29 +++-- pkg/daemons/control/deps/deps.go | 2 +- pkg/daemons/control/server.go | 14 +++ pkg/secretsencrypt/config.go | 116 +++++++++++++++++- pkg/secretsencrypt/controller.go | 23 ++-- pkg/server/secrets-encrypt.go | 19 ++- pkg/server/server.go | 10 -- tests/e2e/secretsencryption/Vagrantfile | 3 +- .../secretsencryption_test.go | 2 +- 14 files changed, 198 insertions(+), 39 deletions(-) diff --git a/.drone.yml b/.drone.yml index ad0e2307fd40..3d1fc05b9358 100644 --- a/.drone.yml +++ b/.drone.yml @@ -657,6 +657,10 @@ steps: - vagrant destroy -f - go test -v -timeout=45m ./validatecluster_test.go -ci -local - cp ./coverage.out /tmp/artifacts/validate-coverage.out + - cd ../secretsencryption + - vagrant destroy -f + - go test -v -timeout=30m ./secretsencryption_test.go -ci -local + - cp ./coverage.out /tmp/artifacts/se-coverage.out - cd ../startup - vagrant destroy -f - go test -v -timeout=30m ./startup_test.go -ci -local diff --git a/Dockerfile.local b/Dockerfile.local index 6251a41b3558..04ae2dc49e4e 100644 --- a/Dockerfile.local +++ b/Dockerfile.local @@ -46,9 +46,9 @@ RUN --mount=type=cache,id=gomod,target=/go/pkg/mod \ ./scripts/download COPY ./cmd ./cmd -COPY ./pkg ./pkg COPY ./tests ./tests COPY ./.git ./.git +COPY ./pkg ./pkg RUN --mount=type=cache,id=gomod,target=/go/pkg/mod \ --mount=type=cache,id=gobuild,target=/root/.cache/go-build \ ./scripts/build diff --git a/go.mod b/go.mod index 0b7454c03774..d93e0adadf52 100644 --- a/go.mod +++ b/go.mod @@ -136,6 +136,7 @@ require ( github.com/opencontainers/selinux v1.11.0 github.com/otiai10/copy v1.7.0 github.com/pkg/errors v0.9.1 + github.com/prometheus/common v0.45.0 github.com/rancher/dynamiclistener v0.3.6 github.com/rancher/lasso v0.0.0-20230830164424-d684fdeb6f29 github.com/rancher/remotedialer v0.3.0 @@ -421,7 +422,6 @@ require ( github.com/pquerna/cachecontrol v0.1.0 // indirect github.com/prometheus/client_golang v1.18.0 // indirect github.com/prometheus/client_model v0.5.0 // indirect - github.com/prometheus/common v0.45.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect github.com/quic-go/qpack v0.4.0 // indirect github.com/quic-go/qtls-go1-20 v0.3.3 // indirect diff --git a/pkg/cli/cmds/secrets_encrypt.go b/pkg/cli/cmds/secrets_encrypt.go index 8e1fb946d75b..3fcf0c839f2e 100644 --- a/pkg/cli/cmds/secrets_encrypt.go +++ b/pkg/cli/cmds/secrets_encrypt.go @@ -84,6 +84,13 @@ func NewSecretsEncryptCommands(status, enable, disable, prepare, rotate, reencry Destination: &ServerConfig.EncryptSkip, }), }, + { + Name: "rotate-keys", + Usage: "(experimental) Dynamically rotates secrets encryption keys and re-encrypt secrets", + SkipArgReorder: true, + Action: rotateKeys, + Flags: EncryptFlags, + }, }, } } diff --git a/pkg/cli/secretsencrypt/secrets_encrypt.go b/pkg/cli/secretsencrypt/secrets_encrypt.go index 65e58f18ad02..04579edae8a0 100644 --- a/pkg/cli/secretsencrypt/secrets_encrypt.go +++ b/pkg/cli/secretsencrypt/secrets_encrypt.go @@ -8,6 +8,7 @@ import ( "path/filepath" "strings" "text/tabwriter" + "time" "github.com/erikdubbelboer/gspt" "github.com/k3s-io/k3s/pkg/cli/cmds" @@ -226,7 +227,8 @@ func RotateKeys(app *cli.Context) error { if err != nil { return err } - if err = info.Put("/v1-"+version.Program+"/encrypt/config", b); err != nil { + timeout := 70 * time.Second + if err = info.Put("/v1-"+version.Program+"/encrypt/config", b, clientaccess.WithTimeout(timeout)); err != nil { return wrapServerError(err) } fmt.Println("keys rotated, reencryption started") diff --git a/pkg/clientaccess/token.go b/pkg/clientaccess/token.go index 37031379f120..73bc6e732262 100644 --- a/pkg/clientaccess/token.go +++ b/pkg/clientaccess/token.go @@ -41,6 +41,9 @@ var ( } ) +// ClientOption is a callback to mutate the http client prior to use +type ClientOption func(*http.Client) + // Info contains fields that track parsed parts of a cluster join token type Info struct { *kubeadm.BootstrapTokenString @@ -233,7 +236,7 @@ func parseToken(token string) (*Info, error) { // If the CA bundle is not empty but does not contain any valid certs, it validates using // an empty CA bundle (which will always fail). // If valid cert+key paths can be loaded from the provided paths, they are used for client cert auth. -func GetHTTPClient(cacerts []byte, certFile, keyFile string) *http.Client { +func GetHTTPClient(cacerts []byte, certFile, keyFile string, option ...ClientOption) *http.Client { if len(cacerts) == 0 { return defaultClient } @@ -250,18 +253,29 @@ func GetHTTPClient(cacerts []byte, certFile, keyFile string) *http.Client { if err == nil { tlsConfig.Certificates = []tls.Certificate{cert} } - - return &http.Client{ + client := &http.Client{ Timeout: defaultClientTimeout, Transport: &http.Transport{ DisableKeepAlives: true, TLSClientConfig: tlsConfig, }, } + + for _, o := range option { + o(client) + } + return client +} + +func WithTimeout(d time.Duration) ClientOption { + return func(c *http.Client) { + c.Timeout = d + c.Transport.(*http.Transport).ResponseHeaderTimeout = d + } } // Get makes a request to a subpath of info's BaseURL -func (i *Info) Get(path string) ([]byte, error) { +func (i *Info) Get(path string, option ...ClientOption) ([]byte, error) { u, err := url.Parse(i.BaseURL) if err != nil { return nil, err @@ -272,11 +286,12 @@ func (i *Info) Get(path string) ([]byte, error) { } p.Scheme = u.Scheme p.Host = u.Host - return get(p.String(), GetHTTPClient(i.CACerts, i.CertFile, i.KeyFile), i.Username, i.Password, i.Token()) + return get(p.String(), GetHTTPClient(i.CACerts, i.CertFile, i.KeyFile, option...), i.Username, i.Password, i.Token()) } // Put makes a request to a subpath of info's BaseURL -func (i *Info) Put(path string, body []byte) error { +func (i *Info) Put(path string, body []byte, option ...ClientOption) error { + u, err := url.Parse(i.BaseURL) if err != nil { return err @@ -287,7 +302,7 @@ func (i *Info) Put(path string, body []byte) error { } p.Scheme = u.Scheme p.Host = u.Host - return put(p.String(), body, GetHTTPClient(i.CACerts, i.CertFile, i.KeyFile), i.Username, i.Password, i.Token()) + return put(p.String(), body, GetHTTPClient(i.CACerts, i.CertFile, i.KeyFile, option...), i.Username, i.Password, i.Token()) } // setServer sets the BaseURL and CACerts fields of the Info by connecting to the server diff --git a/pkg/daemons/control/deps/deps.go b/pkg/daemons/control/deps/deps.go index 5af285c086a9..140fb2edd0cc 100644 --- a/pkg/daemons/control/deps/deps.go +++ b/pkg/daemons/control/deps/deps.go @@ -741,7 +741,7 @@ func genEncryptionConfigAndState(controlConfig *config.Control) error { return nil } - aescbcKey := make([]byte, aescbcKeySize, aescbcKeySize) + aescbcKey := make([]byte, aescbcKeySize) _, err := cryptorand.Read(aescbcKey) if err != nil { return err diff --git a/pkg/daemons/control/server.go b/pkg/daemons/control/server.go index 2448a572bb0c..5477e8ae05d4 100644 --- a/pkg/daemons/control/server.go +++ b/pkg/daemons/control/server.go @@ -14,6 +14,7 @@ import ( "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/k3s-io/k3s/pkg/daemons/control/deps" "github.com/k3s-io/k3s/pkg/daemons/executor" + "github.com/k3s-io/k3s/pkg/secretsencrypt" "github.com/k3s-io/k3s/pkg/util" "github.com/k3s-io/k3s/pkg/version" "github.com/pkg/errors" @@ -60,6 +61,19 @@ func Server(ctx context.Context, cfg *config.Control) error { if err := apiServer(ctx, cfg); err != nil { return err } + if cfg.EncryptSecrets { + controllerName := "reencrypt-secrets" + cfg.Runtime.ClusterControllerStarts[controllerName] = func(ctx context.Context) { + // cfg.Runtime.Core is populated before this callback is triggered + if err := secretsencrypt.Register(ctx, + controllerName, + cfg, + cfg.Runtime.Core.Core().V1().Node(), + cfg.Runtime.Core.Core().V1().Secret()); err != nil { + logrus.Errorf("Failed to register %s controller: %v", controllerName, err) + } + } + } } // Wait for an apiserver to become available before starting additional controllers, diff --git a/pkg/secretsencrypt/config.go b/pkg/secretsencrypt/config.go index dd770c7d8b7a..1c365b9f0588 100644 --- a/pkg/secretsencrypt/config.go +++ b/pkg/secretsencrypt/config.go @@ -1,20 +1,29 @@ package secretsencrypt import ( + "bytes" + "context" "crypto/sha256" "encoding/hex" "encoding/json" "fmt" "os" + "time" "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/k3s-io/k3s/pkg/util" "github.com/k3s-io/k3s/pkg/version" + "github.com/prometheus/common/expfmt" corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/tools/clientcmd" + "github.com/k3s-io/k3s/pkg/generated/clientset/versioned/scheme" "github.com/sirupsen/logrus" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" apiserverconfigv1 "k8s.io/apiserver/pkg/apis/config/v1" + + "k8s.io/client-go/rest" ) const ( @@ -42,7 +51,10 @@ func GetEncryptionProviders(runtime *config.ControlRuntime) ([]apiserverconfigv1 return curEncryption.Resources[0].Providers, nil } -func GetEncryptionKeys(runtime *config.ControlRuntime) ([]apiserverconfigv1.Key, error) { +// GetEncryptionKeys returns a list of encryption keys from the current encryption configuration. +// If includeIdentity is true, it will also include a fake key representing the identity provider, which +// is used to determine if encryption is enabled/disabled. +func GetEncryptionKeys(runtime *config.ControlRuntime, includeIdentity bool) ([]apiserverconfigv1.Key, error) { providers, err := GetEncryptionProviders(runtime) if err != nil { @@ -54,6 +66,14 @@ func GetEncryptionKeys(runtime *config.ControlRuntime) ([]apiserverconfigv1.Key, var curKeys []apiserverconfigv1.Key for _, p := range providers { + // Since identity doesn't have keys, we make up a fake key to represent it, so we can + // know that encryption is enabled/disabled in the request. + if p.Identity != nil && includeIdentity { + curKeys = append(curKeys, apiserverconfigv1.Key{ + Name: "identity", + Secret: "identity", + }) + } if p.AESCBC != nil { curKeys = append(curKeys, p.AESCBC.Keys...) } @@ -121,10 +141,10 @@ func GenEncryptionConfigHash(runtime *config.ControlRuntime) (string, error) { } // GenReencryptHash generates a sha256 hash from the existing secrets keys and -// a new key based on the input arguments. +// any identity providers plus a new key based on the input arguments. func GenReencryptHash(runtime *config.ControlRuntime, keyName string) (string, error) { - keys, err := GetEncryptionKeys(runtime) + keys, err := GetEncryptionKeys(runtime, true) if err != nil { return "", err } @@ -174,3 +194,93 @@ func WriteEncryptionHashAnnotation(runtime *config.ControlRuntime, node *corev1. logrus.Debugf("encryption hash annotation set successfully on node: %s\n", node.ObjectMeta.Name) return os.WriteFile(runtime.EncryptionHash, []byte(ann), 0600) } + +// WaitForEncryptionConfigReload watches the metrics API, polling the latest time the encryption config was reloaded. +func WaitForEncryptionConfigReload(runtime *config.ControlRuntime, reloadSuccesses, reloadTime int64) error { + var lastFailure string + err := wait.PollImmediate(5*time.Second, 60*time.Second, func() (bool, error) { + + newReloadTime, newReloadSuccess, err := GetEncryptionConfigMetrics(runtime, false) + if err != nil { + return true, err + } + + if newReloadSuccess <= reloadSuccesses || newReloadTime <= reloadTime { + lastFailure = fmt.Sprintf("apiserver has not reloaded encryption configuration (reload success: %d/%d, reload timestamp %d/%d)", newReloadSuccess, reloadSuccesses, newReloadTime, reloadTime) + return false, nil + } + logrus.Infof("encryption config reloaded successfully %d times", newReloadSuccess) + logrus.Debugf("encryption config reloaded at %s", time.Unix(newReloadTime, 0)) + return true, nil + }) + if err != nil { + err = fmt.Errorf("%w: %s", err, lastFailure) + } + return err +} + +// GetEncryptionConfigMetrics fetches the metrics API and returns the last time the encryption config was reloaded +// and the number of times it has been reloaded. +func GetEncryptionConfigMetrics(runtime *config.ControlRuntime, initialMetrics bool) (int64, int64, error) { + var unixUpdateTime int64 + var reloadSuccessCounter int64 + var lastFailure string + restConfig, err := clientcmd.BuildConfigFromFlags("", runtime.KubeConfigSupervisor) + if err != nil { + return 0, 0, err + } + restConfig.GroupVersion = &apiserverconfigv1.SchemeGroupVersion + restConfig.NegotiatedSerializer = scheme.Codecs.WithoutConversion() + restClient, err := rest.RESTClientFor(restConfig) + if err != nil { + return 0, 0, err + } + + // This is wrapped in a poller because on startup no metrics exist. Its only after the encryption config + // is modified and the first reload occurs that the metrics are available. + err = wait.PollImmediate(5*time.Second, 60*time.Second, func() (bool, error) { + data, err := restClient.Get().AbsPath("/metrics").DoRaw(context.TODO()) + if err != nil { + return true, err + } + + reader := bytes.NewReader(data) + var parser expfmt.TextParser + mf, err := parser.TextToMetricFamilies(reader) + if err != nil { + return true, err + } + tsMetric := mf["apiserver_encryption_config_controller_automatic_reload_last_timestamp_seconds"] + successMetric := mf["apiserver_encryption_config_controller_automatic_reload_success_total"] + + // First time, no metrics exist, so return zeros + if tsMetric == nil && successMetric == nil && initialMetrics { + return true, nil + } + + if tsMetric == nil { + lastFailure = "encryption config time metric not found" + return false, nil + } + + if successMetric == nil { + lastFailure = "encryption config success metric not found" + return false, nil + } + + unixUpdateTime = int64(tsMetric.GetMetric()[0].GetGauge().GetValue()) + if time.Now().Unix() < unixUpdateTime { + return true, fmt.Errorf("encryption reload time is incorrectly ahead of current time") + } + + reloadSuccessCounter = int64(successMetric.GetMetric()[0].GetCounter().GetValue()) + + return true, nil + }) + + if err != nil { + err = fmt.Errorf("%w: %s", err, lastFailure) + } + + return unixUpdateTime, reloadSuccessCounter, err +} diff --git a/pkg/secretsencrypt/controller.go b/pkg/secretsencrypt/controller.go index 7fb76f6ce6b7..e377fe548895 100644 --- a/pkg/secretsencrypt/controller.go +++ b/pkg/secretsencrypt/controller.go @@ -18,6 +18,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" "k8s.io/client-go/tools/pager" "k8s.io/client-go/tools/record" "k8s.io/client-go/util/retry" @@ -41,11 +42,20 @@ type handler struct { func Register( ctx context.Context, - k8s kubernetes.Interface, + controllerName string, controlConfig *config.Control, nodes coreclient.NodeController, secrets coreclient.SecretController, ) error { + restConfig, err := clientcmd.BuildConfigFromFlags("", controlConfig.Runtime.KubeConfigSupervisor) + if err != nil { + return err + } + k8s, err := kubernetes.NewForConfig(restConfig) + if err != nil { + return err + } + h := &handler{ ctx: ctx, controlConfig: controlConfig, @@ -54,7 +64,7 @@ func Register( recorder: util.BuildControllerEventRecorder(k8s, controllerAgentName, metav1.NamespaceDefault), } - nodes.OnChange(ctx, "reencrypt-controller", h.onChangeNode) + nodes.OnChange(ctx, controllerName, h.onChangeNode) return nil } @@ -126,22 +136,19 @@ func (h *handler) onChangeNode(nodeName string, node *corev1.Node) (*corev1.Node } // Remove last key - curKeys, err := GetEncryptionKeys(h.controlConfig.Runtime) + curKeys, err := GetEncryptionKeys(h.controlConfig.Runtime, false) if err != nil { h.recorder.Event(nodeRef, corev1.EventTypeWarning, secretsUpdateErrorEvent, err.Error()) return node, err } + logrus.Infoln("Removing key: ", curKeys[len(curKeys)-1]) curKeys = curKeys[:len(curKeys)-1] if err = WriteEncryptionConfig(h.controlConfig.Runtime, curKeys, true); err != nil { h.recorder.Event(nodeRef, corev1.EventTypeWarning, secretsUpdateErrorEvent, err.Error()) return node, err } - logrus.Infoln("Removed key: ", curKeys[len(curKeys)-1]) - if err != nil { - h.recorder.Event(nodeRef, corev1.EventTypeWarning, secretsUpdateErrorEvent, err.Error()) - return node, err - } + err = retry.RetryOnConflict(retry.DefaultRetry, func() error { node, err = h.nodes.Get(nodeName, metav1.GetOptions{}) if err != nil { diff --git a/pkg/server/secrets-encrypt.go b/pkg/server/secrets-encrypt.go index d7947a997eb9..0acda0702fa4 100644 --- a/pkg/server/secrets-encrypt.go +++ b/pkg/server/secrets-encrypt.go @@ -128,7 +128,7 @@ func encryptionEnable(ctx context.Context, server *config.Control, enable bool) if len(providers) > 2 { return fmt.Errorf("more than 2 providers (%d) found in secrets encryption", len(providers)) } - curKeys, err := secretsencrypt.GetEncryptionKeys(server.Runtime) + curKeys, err := secretsencrypt.GetEncryptionKeys(server.Runtime, false) if err != nil { return err } @@ -209,7 +209,7 @@ func encryptionPrepare(ctx context.Context, server *config.Control, force bool) return err } - curKeys, err := secretsencrypt.GetEncryptionKeys(server.Runtime) + curKeys, err := secretsencrypt.GetEncryptionKeys(server.Runtime, false) if err != nil { return err } @@ -241,7 +241,7 @@ func encryptionRotate(ctx context.Context, server *config.Control, force bool) e return err } - curKeys, err := secretsencrypt.GetEncryptionKeys(server.Runtime) + curKeys, err := secretsencrypt.GetEncryptionKeys(server.Runtime, false) if err != nil { return err } @@ -293,7 +293,7 @@ func encryptionReencrypt(ctx context.Context, server *config.Control, force bool } func addAndRotateKeys(server *config.Control) error { - curKeys, err := secretsencrypt.GetEncryptionKeys(server.Runtime) + curKeys, err := secretsencrypt.GetEncryptionKeys(server.Runtime, false) if err != nil { return err } @@ -309,7 +309,7 @@ func addAndRotateKeys(server *config.Control) error { // Right rotate elements rotatedKeys := append(curKeys[len(curKeys)-1:], curKeys[:len(curKeys)-1]...) - + logrus.Infoln("Rotating secrets-encryption keys") return secretsencrypt.WriteEncryptionConfig(server.Runtime, rotatedKeys, true) } @@ -325,10 +325,19 @@ func encryptionRotateKeys(ctx context.Context, server *config.Control) error { return err } + reloadTime, reloadSuccesses, err := secretsencrypt.GetEncryptionConfigMetrics(server.Runtime, true) + if err != nil { + return err + } + if err := addAndRotateKeys(server); err != nil { return err } + if err := secretsencrypt.WaitForEncryptionConfigReload(server.Runtime, reloadSuccesses, reloadTime); err != nil { + return err + } + return setReencryptAnnotation(server) } diff --git a/pkg/server/server.go b/pkg/server/server.go index b1a96afc852e..925830a9575a 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -245,16 +245,6 @@ func coreControllers(ctx context.Context, sc *Context, config *Config) error { core.V1().Secret()) } - if config.ControlConfig.EncryptSecrets { - if err := secretsencrypt.Register(ctx, - sc.K8s, - &config.ControlConfig, - sc.Core.Core().V1().Node(), - sc.Core.Core().V1().Secret()); err != nil { - return err - } - } - if config.ControlConfig.Rootless { return rootlessports.Register(ctx, sc.Core.Core().V1().Service(), diff --git a/tests/e2e/secretsencryption/Vagrantfile b/tests/e2e/secretsencryption/Vagrantfile index d93ddf3d13f3..94cd61fc683e 100644 --- a/tests/e2e/secretsencryption/Vagrantfile +++ b/tests/e2e/secretsencryption/Vagrantfile @@ -44,10 +44,11 @@ def provision(vm, role, role_num, node_num) if vm.box.to_s.include?("microos") vm.provision 'k3s-reload', type: 'reload', run: 'once' end + vm.provision 'k3s-autocomplete', type: 'shell', inline: "k3s completion -i bash" end Vagrant.configure("2") do |config| - config.vagrant.plugins = ["vagrant-k3s", "vagrant-reload"] + config.vagrant.plugins = ["vagrant-k3s", "vagrant-reload", "vagrant-scp"] # Default provider is libvirt, virtualbox is only provided as a backup config.vm.provider "libvirt" do |v| v.cpus = NODE_CPUS diff --git a/tests/e2e/secretsencryption/secretsencryption_test.go b/tests/e2e/secretsencryption/secretsencryption_test.go index 907833514c40..61b886467d5a 100644 --- a/tests/e2e/secretsencryption/secretsencryption_test.go +++ b/tests/e2e/secretsencryption/secretsencryption_test.go @@ -113,7 +113,7 @@ var _ = Describe("Verify Secrets Encryption Rotation", Ordered, func() { } else { g.Expect(res).Should(ContainSubstring("Current Rotation Stage: start")) } - }, "420s", "2s").Should(Succeed()) + }, "420s", "10s").Should(Succeed()) } })