From efebe5bd7e864b23e8520aab2574fa53b441a365 Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Thu, 31 Oct 2024 09:31:06 +0000 Subject: [PATCH 01/12] Move the pprof endpoints to the main HTTP server rather than starting a second server (#600) Signed-off-by: Richard Wall --- pkg/agent/config.go | 2 +- pkg/agent/run.go | 21 ++++++++++----------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/pkg/agent/config.go b/pkg/agent/config.go index 241f70f8..25e4d253 100644 --- a/pkg/agent/config.go +++ b/pkg/agent/config.go @@ -281,7 +281,7 @@ func InitAgentCmdFlags(c *cobra.Command, cfg *AgentCmdFlags) { "enable-pprof", "", false, - "Enables the pprof profiling server on the agent (port: 6060).", + "Enables the pprof profiling endpoints on the agent server (port: 8081).", ) c.PersistentFlags().BoolVarP( &cfg.Prometheus, diff --git a/pkg/agent/run.go b/pkg/agent/run.go index 6db9035a..fc4a828f 100644 --- a/pkg/agent/run.go +++ b/pkg/agent/run.go @@ -34,7 +34,7 @@ import ( "github.com/jetstack/preflight/pkg/logs" "github.com/jetstack/preflight/pkg/version" - _ "net/http/pprof" + "net/http/pprof" ) var Flags AgentCmdFlags @@ -74,19 +74,18 @@ func Run(cmd *cobra.Command, args []string) { logs.Log.Fatalf("While evaluating configuration: %v", err) } - if Flags.Profiling { - logs.Log.Printf("pprof profiling was enabled.\nRunning profiling on port :6060") - go func() { - err := http.ListenAndServe(":6060", nil) - if err != nil && !errors.Is(err, http.ErrServerClosed) { - logs.Log.Fatalf("failed to run pprof profiler: %s", err) - } - }() - } - go func() { server := http.NewServeMux() + if Flags.Profiling { + logs.Log.Printf("pprof profiling was enabled.") + server.HandleFunc("/debug/pprof/", pprof.Index) + server.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) + server.HandleFunc("/debug/pprof/profile", pprof.Profile) + server.HandleFunc("/debug/pprof/symbol", pprof.Symbol) + server.HandleFunc("/debug/pprof/trace", pprof.Trace) + } + if Flags.Prometheus { logs.Log.Printf("Prometheus was enabled.\nRunning prometheus on port :8081") prometheus.MustRegister(metricPayloadSize) From 415d434c586cb98dbe9ef2df9b5dd971cde39fda Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Thu, 31 Oct 2024 09:36:47 +0000 Subject: [PATCH 02/12] [VC-35738] Log with klog to stdout and stderr in Kubernetes text format (#596) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Log with klog to stdout and stderr in Kubernetes text format Signed-off-by: Richard Wall Co-authored-by: MaĆ«l Valais --- cmd/root.go | 6 + go.mod | 7 +- hack/e2e/test.sh | 13 +- hack/e2e/values.venafi-kubernetes-agent.yaml | 3 + pkg/logs/logs.go | 155 ++++++- pkg/logs/logs_test.go | 448 +++++++++++++++++++ 6 files changed, 620 insertions(+), 12 deletions(-) create mode 100644 pkg/logs/logs_test.go diff --git a/cmd/root.go b/cmd/root.go index a83f604b..826b4295 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -5,6 +5,7 @@ import ( "os" "strings" + "github.com/jetstack/preflight/pkg/logs" "github.com/spf13/cobra" "github.com/spf13/pflag" ) @@ -17,6 +18,9 @@ var rootCmd = &cobra.Command{ configuration checks using Open Policy Agent (OPA). Preflight checks are bundled into Packages`, + PersistentPreRun: func(cmd *cobra.Command, args []string) { + logs.Initialize() + }, } func init() { @@ -28,6 +32,8 @@ func init() { // Execute adds all child commands to the root command and sets flags appropriately. // This is called by main.main(). It only needs to happen once to the rootCmd. func Execute() { + logs.AddFlags(rootCmd.PersistentFlags()) + if err := rootCmd.Execute(); err != nil { fmt.Println(err) os.Exit(1) diff --git a/go.mod b/go.mod index 8c0d2cc4..307a7710 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/jetstack/preflight go 1.22.0 require ( + github.com/Venafi/vcert/v5 v5.7.1 github.com/cenkalti/backoff v2.2.1+incompatible github.com/d4l3k/messagediff v1.2.1 github.com/fatih/color v1.17.0 @@ -22,13 +23,13 @@ require ( k8s.io/api v0.31.1 k8s.io/apimachinery v0.31.1 k8s.io/client-go v0.31.1 + k8s.io/component-base v0.31.0 sigs.k8s.io/controller-runtime v0.19.0 sigs.k8s.io/yaml v1.4.0 ) require ( github.com/Khan/genqlient v0.7.0 // indirect - github.com/Venafi/vcert/v5 v5.7.1 // indirect github.com/antlr4-go/antlr/v4 v4.13.0 // indirect github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect github.com/aymerick/douceur v0.2.0 // indirect @@ -37,6 +38,7 @@ require ( github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect github.com/go-http-utils/headers v0.0.0-20181008091004-fed159eddc2a // indirect + github.com/go-logr/zapr v1.3.0 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/google/cel-go v0.20.1 // indirect github.com/google/gnostic-models v0.6.9-0.20230804172637-c7be7c783f49 // indirect @@ -62,7 +64,6 @@ require ( gopkg.in/ini.v1 v1.67.0 // indirect k8s.io/apiextensions-apiserver v0.31.0 // indirect k8s.io/apiserver v0.31.0 // indirect - k8s.io/component-base v0.31.0 // indirect ) require ( @@ -100,7 +101,7 @@ require ( google.golang.org/protobuf v1.34.2 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 - k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/klog/v2 v2.130.1 k8s.io/kube-openapi v0.0.0-20240430033511-f0e62f92d13f // indirect k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect diff --git a/hack/e2e/test.sh b/hack/e2e/test.sh index a7b90cbf..c1f88be6 100755 --- a/hack/e2e/test.sh +++ b/hack/e2e/test.sh @@ -191,14 +191,11 @@ EOF envsubst (grep -v -e "reflector\.go" -e "datagatherer" -e "data gatherer" >/dev/stderr) \ - | grep -q "Data sent successfully" + --follow \ + --namespace venafi \ + | timeout 60 jq 'if .msg | test("Data sent successfully") then . | halt_error(0) end' diff --git a/hack/e2e/values.venafi-kubernetes-agent.yaml b/hack/e2e/values.venafi-kubernetes-agent.yaml index ff55025d..06b475f1 100644 --- a/hack/e2e/values.venafi-kubernetes-agent.yaml +++ b/hack/e2e/values.venafi-kubernetes-agent.yaml @@ -6,3 +6,6 @@ config: authentication: venafiConnection: enabled: true + +extraArgs: +- --logging-format=json diff --git a/pkg/logs/logs.go b/pkg/logs/logs.go index 042281ae..9dd988b8 100644 --- a/pkg/logs/logs.go +++ b/pkg/logs/logs.go @@ -1,8 +1,161 @@ package logs import ( + "bytes" + "fmt" "log" + "log/slog" "os" + "strings" + + "github.com/spf13/pflag" + "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/component-base/featuregate" + "k8s.io/component-base/logs" + logsapi "k8s.io/component-base/logs/api/v1" + _ "k8s.io/component-base/logs/json/register" +) + +// venafi-kubernetes-agent follows [Kubernetes Logging Conventions] and writes +// logs in [Kubernetes text logging format] by default. It does not support +// named levels (aka. severity), instead it uses arbitrary levels. Errors and +// warnings are logged to stderr and Info messages to stdout, because that is +// how some cloud logging systems (notably Google Cloud Logs Explorer) assign a +// severity (INFO or ERROR) in the UI. The agent's and vcert's logs are written +// logged as Info messages with level=0. +// +// Further reading: +// - [Kubernetes logging conventions](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-instrumentation/logging.md) +// - [Kubernetes text logging format](https://github.com/kubernetes/community/blob/master/contributors/devel/sig-instrumentation/logging.md#text-logging-format) +// - [Why not named levels, like Info/Warning/Error?](https://github.com/go-logr/logr?tab=readme-ov-file#why-not-named-levels-like-infowarningerror) +// - [GKE logs best practices](https://cloud.google.com/kubernetes-engine/docs/concepts/about-logs#best_practices) +// - [Structured Logging KEP](https://github.com/kubernetes/enhancements/blob/master/keps/sig-instrumentation/1602-structured-logging/README.md) +// - [Examples of using k8s.io/component-base/logs](https://github.com/kubernetes/kubernetes/tree/master/staging/src/k8s.io/component-base/logs/example), +// upon which this code was based. + +var ( + // This is the Agent's logger. For now, it is still a *log.Logger, but we + // mean to migrate everything to slog with the klog backend. We avoid using + // log.Default because log.Default is already used by the VCert library, and + // we need to keep the agent's logger from the VCert's logger to be able to + // remove the `vCert: ` prefix from the VCert logs. + Log *log.Logger + + // All but the essential logging flags will be hidden to avoid overwhelming + // the user. The hidden flags can still be used. For example if a user does + // not like the split-stream behavior and a Venafi field engineer can + // instruct them to patch --log-json-split-stream=false on to the Deployment + // arguments. + visibleFlagNames = sets.New[string]("v", "vmodule", "logging-format") + // This default logging configuration will be updated with values from the + // logging flags, even those that are hidden. + configuration = logsapi.NewLoggingConfiguration() + // Logging features will be added to this feature gate, but the + // feature-gates flag will be hidden from the user. + features = featuregate.NewFeatureGate() ) -var Log = log.New(os.Stderr, "", log.LstdFlags) +func init() { + runtime.Must(logsapi.AddFeatureGates(features)) + // Turn on ALPHA options to enable the split-stream logging options. + runtime.Must(features.OverrideDefault(logsapi.LoggingAlphaOptions, true)) +} + +// AddFlags adds log related flags to the supplied flag set. +// +// The split-stream options are enabled by default, so that errors are logged to +// stderr and info to stdout, allowing cloud logging systems to assign a +// severity INFO or ERROR to the messages. +func AddFlags(fs *pflag.FlagSet) { + var tfs pflag.FlagSet + logsapi.AddFlags(configuration, &tfs) + features.AddFlag(&tfs) + tfs.VisitAll(func(f *pflag.Flag) { + if !visibleFlagNames.Has(f.Name) { + tfs.MarkHidden(f.Name) + } + + // The original usage string includes details about how + // JSON logging is only available when BETA logging features are + // enabled, but that's not relevant here because the feature is enabled + // by default. + if f.Name == "logging-format" { + f.Usage = `Sets the log format. Permitted formats: "json", "text".` + } + if f.Name == "log-text-split-stream" { + f.DefValue = "true" + runtime.Must(f.Value.Set("true")) + } + if f.Name == "log-json-split-stream" { + f.DefValue = "true" + runtime.Must(f.Value.Set("true")) + } + + // Since `--v` (which is the long form of `-v`) isn't the standard in + // our projects (it only exists in cert-manager, webhook, and such), + // let's rename it to the more common `--log-level`, which appears in + // openshift-routes, csi-driver, trust-manager, and approver-policy. + // More details at: + // https://github.com/jetstack/jetstack-secure/pull/596#issuecomment-2421708181 + if f.Name == "v" { + f.Name = "log-level" + f.Shorthand = "v" + } + }) + fs.AddFlagSet(&tfs) +} + +// Initialize uses k8s.io/component-base/logs, to configure the following global +// loggers: log, slog, and klog. All are configured to write in the same format. +func Initialize() { + // This configures the global logger in klog *and* slog, if compiled with Go + // >= 1.21. + logs.InitLogs() + if err := logsapi.ValidateAndApply(configuration, features); err != nil { + fmt.Fprintf(os.Stderr, "Error in logging configuration: %v\n", err) + os.Exit(2) + } + + // Thanks to logs.InitLogs, slog.Default now uses klog as its backend. Thus, + // the client-go library, which relies on klog.Info, has the same logger as + // the agent, which still uses log.Printf. + slog := slog.Default() + + Log = &log.Logger{} + Log.SetOutput(LogToSlogWriter{Slog: slog, Source: "agent"}) + + // Let's make sure the VCert library, which is the only library we import to + // be using the global log.Default, also uses the common slog logger. + vcertLog := log.Default() + vcertLog.SetOutput(LogToSlogWriter{Slog: slog, Source: "vcert"}) + // This is a work around for a bug in vcert where it adds a `vCert: ` prefix + // to the global log logger. It can be removed when this is fixed upstream + // in vcert: https://github.com/Venafi/vcert/pull/512 + vcertLog.SetPrefix("") +} + +type LogToSlogWriter struct { + Slog *slog.Logger + Source string +} + +func (w LogToSlogWriter) Write(p []byte) (n int, err error) { + // log.Printf writes a newline at the end of the message, so we need to trim + // it. + p = bytes.TrimSuffix(p, []byte("\n")) + + message := string(p) + if strings.Contains(message, "error") || + strings.Contains(message, "failed") || + strings.Contains(message, "fatal") || + strings.Contains(message, "Failed") || + strings.Contains(message, "While evaluating configuration") || + strings.Contains(message, "data-path override present") || + strings.Contains(message, "Cannot marshal readings") { + w.Slog.With("source", w.Source).Error(message) + } else { + w.Slog.With("source", w.Source).Info(message) + } + return len(p), nil +} diff --git a/pkg/logs/logs_test.go b/pkg/logs/logs_test.go new file mode 100644 index 00000000..3ddd6da1 --- /dev/null +++ b/pkg/logs/logs_test.go @@ -0,0 +1,448 @@ +package logs_test + +import ( + "bytes" + "context" + "errors" + "log" + "log/slog" + "os" + "os/exec" + "regexp" + "strings" + "testing" + "time" + + _ "github.com/Venafi/vcert/v5" + "github.com/spf13/pflag" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "k8s.io/klog/v2" + + "github.com/jetstack/preflight/pkg/logs" +) + +// TestLogs demonstrates how the logging flags affect the logging output. +// +// The test executes itself with as a sub-process to avoid mutating the global +// logging configuration. +// +// Inspired by: +// - https://stackoverflow.com/a/67945462 +// - https://go.dev/src/flag/flag_test.go (TestExitCode) +func TestLogs(t *testing.T) { + if flags, found := os.LookupEnv("GO_CHILD_FLAG"); found { + if _, found := os.LookupEnv("GO_CHILD_SKIP_INITIALIZE"); !found { + fs := pflag.NewFlagSet("test-logs", pflag.ExitOnError) + logs.AddFlags(fs) + fs.Parse(strings.Split(flags, " ")) + logs.Initialize() + } + + log.Print("log Print") + slog.Info("slog Info") + slog.Warn("slog Warn") + slog.Error("slog Error") + klog.Info("klog Info") + klog.Warning("klog Warning") + klog.ErrorS(errors.New("fake-error"), "klog Error") + klog.InfoS("klog InfoS", "key", "value") + logger := klog.FromContext(context.Background()).WithName("foo") + logger.V(3).Info("Contextual Info Level 3", "key", "value") + logger.Error(errors.New("fake-error"), "Contextual error", "key", "value") + + klog.FlushAndExit(time.Second, 0) + } + + tests := []struct { + name string + flags string + skipIntialize bool + expectError bool + expectStdout string + expectStderr string + }{ + { + name: "help", + flags: "-h", + expectError: true, + expectStdout: ` +pflag: help requested +`, + expectStderr: ` +Usage of test-logs: + -v, --log-level Level number for the log level verbosity + --logging-format string Sets the log format. Permitted formats: "json", "text". (default "text") + --vmodule pattern=N,... comma-separated list of pattern=N settings for file-filtered logging (only works for text log format) +`, + }, + { + name: "unrecognized-flag", + flags: "--foo", + expectError: true, + expectStdout: ` +unknown flag: --foo +`, + expectStderr: ` +unknown flag: --foo +Usage of test-logs: + -v, --log-level Level number for the log level verbosity + --logging-format string Sets the log format. Permitted formats: "json", "text". (default "text") + --vmodule pattern=N,... comma-separated list of pattern=N settings for file-filtered logging (only works for text log format) +`, + }, + { + name: "v-long-form-not-available", + flags: "--v=3", + expectError: true, + expectStdout: ` +unknown flag: --v +`, + expectStderr: ` +unknown flag: --v +Usage of test-logs: + -v, --log-level Level number for the log level verbosity + --logging-format string Sets the log format. Permitted formats: "json", "text". (default "text") + --vmodule pattern=N,... comma-separated list of pattern=N settings for file-filtered logging (only works for text log format) +`, + }, + { + name: "logging-format-unrecognized", + flags: "--logging-format=foo", + expectError: true, + expectStderr: ` +Error in logging configuration: format: Invalid value: "foo": Unsupported log format +`, + }, + { + name: "original-defaults", + flags: "", + skipIntialize: true, + expectStderr: ` +vCert: 0000/00/00 00:00:00 log Print +vCert: 0000/00/00 00:00:00 INFO slog Info +vCert: 0000/00/00 00:00:00 WARN slog Warn +vCert: 0000/00/00 00:00:00 ERROR slog Error +I0000 00:00:00.000000 00000 logs_test.go:000] klog Info +W0000 00:00:00.000000 00000 logs_test.go:000] klog Warning +E0000 00:00:00.000000 00000 logs_test.go:000] "klog Error" err="fake-error" +I0000 00:00:00.000000 00000 logs_test.go:000] "klog InfoS" key="value" +E0000 00:00:00.000000 00000 logs_test.go:000] "Contextual error" err="fake-error" logger="foo" key="value" +`, + }, + { + name: "modified-defaults", + flags: "", + expectStdout: ` +I0000 00:00:00.000000 00000 logs.go:000] "log Print" source="vcert" +I0000 00:00:00.000000 00000 logs_test.go:000] "slog Info" +I0000 00:00:00.000000 00000 logs_test.go:000] klog Info +I0000 00:00:00.000000 00000 logs_test.go:000] "klog InfoS" key="value" +`, + expectStderr: ` +W0000 00:00:00.000000 00000 logs_test.go:000] "slog Warn" +E0000 00:00:00.000000 00000 logs_test.go:000] "slog Error" +W0000 00:00:00.000000 00000 logs_test.go:000] klog Warning +E0000 00:00:00.000000 00000 logs_test.go:000] "klog Error" err="fake-error" +E0000 00:00:00.000000 00000 logs_test.go:000] "Contextual error" err="fake-error" logger="foo" key="value" +`, + }, + { + name: "logging-format-json", + flags: "--logging-format=json", + expectStdout: ` +{"ts":0000000000000.000,"caller":"logs/logs.go:000","msg":"log Print","source":"vcert","v":0} +{"ts":0000000000000.000,"caller":"logs/logs_test.go:000","msg":"slog Info","v":0} +{"ts":0000000000000.000,"caller":"logs/logs_test.go:000","msg":"slog Warn","v":0} +{"ts":0000000000000.000,"caller":"logs/logs_test.go:000","msg":"klog Info","v":0} +{"ts":0000000000000.000,"caller":"logs/logs_test.go:000","msg":"klog Warning","v":0} +{"ts":0000000000000.000,"caller":"logs/logs_test.go:000","msg":"klog InfoS","v":0,"key":"value"} +`, + expectStderr: ` +{"ts":0000000000000.000,"caller":"logs/logs_test.go:000","msg":"slog Error"} +{"ts":0000000000000.000,"caller":"logs/logs_test.go:000","msg":"klog Error","err":"fake-error"} +{"ts":0000000000000.000,"logger":"foo","caller":"logs/logs_test.go:000","msg":"Contextual error","key":"value","err":"fake-error"} +`, + }, + { + name: "log-json-split-stream-false", + flags: "--logging-format=json --log-json-split-stream=false", + expectStderr: ` +{"ts":0000000000000.000,"caller":"logs/logs.go:000","msg":"log Print","source":"vcert","v":0} +{"ts":0000000000000.000,"caller":"logs/logs_test.go:000","msg":"slog Info","v":0} +{"ts":0000000000000.000,"caller":"logs/logs_test.go:000","msg":"slog Warn","v":0} +{"ts":0000000000000.000,"caller":"logs/logs_test.go:000","msg":"slog Error"} +{"ts":0000000000000.000,"caller":"logs/logs_test.go:000","msg":"klog Info","v":0} +{"ts":0000000000000.000,"caller":"logs/logs_test.go:000","msg":"klog Warning","v":0} +{"ts":0000000000000.000,"caller":"logs/logs_test.go:000","msg":"klog Error","err":"fake-error"} +{"ts":0000000000000.000,"caller":"logs/logs_test.go:000","msg":"klog InfoS","v":0,"key":"value"} +{"ts":0000000000000.000,"logger":"foo","caller":"logs/logs_test.go:000","msg":"Contextual error","key":"value","err":"fake-error"} +`, + }, + { + name: "logging-format-text", + flags: "--logging-format=text", + expectStdout: ` +I0000 00:00:00.000000 00000 logs.go:000] "log Print" source="vcert" +I0000 00:00:00.000000 00000 logs_test.go:000] "slog Info" +I0000 00:00:00.000000 00000 logs_test.go:000] klog Info +I0000 00:00:00.000000 00000 logs_test.go:000] "klog InfoS" key="value" +`, + expectStderr: ` +W0000 00:00:00.000000 00000 logs_test.go:000] "slog Warn" +E0000 00:00:00.000000 00000 logs_test.go:000] "slog Error" +W0000 00:00:00.000000 00000 logs_test.go:000] klog Warning +E0000 00:00:00.000000 00000 logs_test.go:000] "klog Error" err="fake-error" +E0000 00:00:00.000000 00000 logs_test.go:000] "Contextual error" err="fake-error" logger="foo" key="value" +`, + }, + { + name: "log-text-split-stream-false", + flags: "--logging-format=text --log-text-split-stream=false", + expectStderr: ` +I0000 00:00:00.000000 00000 logs.go:000] "log Print" source="vcert" +I0000 00:00:00.000000 00000 logs_test.go:000] "slog Info" +W0000 00:00:00.000000 00000 logs_test.go:000] "slog Warn" +E0000 00:00:00.000000 00000 logs_test.go:000] "slog Error" +I0000 00:00:00.000000 00000 logs_test.go:000] klog Info +W0000 00:00:00.000000 00000 logs_test.go:000] klog Warning +E0000 00:00:00.000000 00000 logs_test.go:000] "klog Error" err="fake-error" +I0000 00:00:00.000000 00000 logs_test.go:000] "klog InfoS" key="value" +E0000 00:00:00.000000 00000 logs_test.go:000] "Contextual error" err="fake-error" logger="foo" key="value" +`, + }, + { + name: "v-level-3", + flags: "-v=3", + expectStdout: ` +I0000 00:00:00.000000 00000 logs.go:000] "log Print" source="vcert" +I0000 00:00:00.000000 00000 logs_test.go:000] "slog Info" +I0000 00:00:00.000000 00000 logs_test.go:000] klog Info +I0000 00:00:00.000000 00000 logs_test.go:000] "klog InfoS" key="value" +I0000 00:00:00.000000 00000 logs_test.go:000] "Contextual Info Level 3" logger="foo" key="value" +`, + expectStderr: ` +W0000 00:00:00.000000 00000 logs_test.go:000] "slog Warn" +E0000 00:00:00.000000 00000 logs_test.go:000] "slog Error" +W0000 00:00:00.000000 00000 logs_test.go:000] klog Warning +E0000 00:00:00.000000 00000 logs_test.go:000] "klog Error" err="fake-error" +E0000 00:00:00.000000 00000 logs_test.go:000] "Contextual error" err="fake-error" logger="foo" key="value" +`, + }, + { + name: "log-level-3", + flags: "--log-level=3", + expectStdout: ` +I0000 00:00:00.000000 00000 logs.go:000] "log Print" source="vcert" +I0000 00:00:00.000000 00000 logs_test.go:000] "slog Info" +I0000 00:00:00.000000 00000 logs_test.go:000] klog Info +I0000 00:00:00.000000 00000 logs_test.go:000] "klog InfoS" key="value" +I0000 00:00:00.000000 00000 logs_test.go:000] "Contextual Info Level 3" logger="foo" key="value" +`, + expectStderr: ` +W0000 00:00:00.000000 00000 logs_test.go:000] "slog Warn" +E0000 00:00:00.000000 00000 logs_test.go:000] "slog Error" +W0000 00:00:00.000000 00000 logs_test.go:000] klog Warning +E0000 00:00:00.000000 00000 logs_test.go:000] "klog Error" err="fake-error" +E0000 00:00:00.000000 00000 logs_test.go:000] "Contextual error" err="fake-error" logger="foo" key="value" +`, + }, + { + name: "vmodule-level-3", + flags: "--vmodule=logs_test=3", + expectStdout: ` +I0000 00:00:00.000000 00000 logs.go:000] "log Print" source="vcert" +I0000 00:00:00.000000 00000 logs_test.go:000] "slog Info" +I0000 00:00:00.000000 00000 logs_test.go:000] klog Info +I0000 00:00:00.000000 00000 logs_test.go:000] "klog InfoS" key="value" +I0000 00:00:00.000000 00000 logs_test.go:000] "Contextual Info Level 3" logger="foo" key="value" +`, + expectStderr: ` +W0000 00:00:00.000000 00000 logs_test.go:000] "slog Warn" +E0000 00:00:00.000000 00000 logs_test.go:000] "slog Error" +W0000 00:00:00.000000 00000 logs_test.go:000] klog Warning +E0000 00:00:00.000000 00000 logs_test.go:000] "klog Error" err="fake-error" +E0000 00:00:00.000000 00000 logs_test.go:000] "Contextual error" err="fake-error" logger="foo" key="value" +`, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + cmd := exec.Command(os.Args[0], "-test.run=^TestLogs$", "-test.v") + var ( + stdout bytes.Buffer + stderr bytes.Buffer + ) + cmd.Stdout = &stdout + cmd.Stderr = &stderr + cmd.Env = append( + os.Environ(), + "GO_CHILD_FLAG="+test.flags, + ) + if test.skipIntialize { + cmd.Env = append( + cmd.Env, + "GO_CHILD_SKIP_INITIALIZE=true", + ) + } + err := cmd.Run() + + t.Logf("FLAGS\n%s\n", test.flags) + // Remove the standard output generated by `-test.v` + stdoutStr := strings.TrimPrefix(stdout.String(), "=== RUN TestLogs\n") + stderrStr := stderr.String() + t.Logf("STDOUT\n%s\n", stdoutStr) + t.Logf("STDERR\n%s\n", stderrStr) + if test.expectError { + var target *exec.ExitError + require.ErrorAs(t, err, &target) + require.Equal(t, 2, target.ExitCode(), "Flag parsing failures should always result in exit code 2") + t.Logf("ERROR: %v", err) + } else { + require.NoError(t, err) + } + + // This trick helps with the readability of the table test: we can + // have the first "expected" log line at the same level as the other + // lines. + test.expectStdout = strings.TrimPrefix(test.expectStdout, "\n") + test.expectStderr = strings.TrimPrefix(test.expectStderr, "\n") + + require.Equal(t, test.expectStdout, replaceWithStaticTimestamps(stdoutStr), "stdout doesn't match") + require.Equal(t, test.expectStderr, replaceWithStaticTimestamps(stderrStr), "stderr doesn't match") + }) + } +} + +var ( + timestampRegexpStdLog = regexp.MustCompile(`\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}`) + timestampRegexpKlog = regexp.MustCompile(`\d{4} \d{2}:\d{2}:\d{2}\.\d{6} +\d+`) + timestampRegexpJSON = regexp.MustCompile(`"ts":\d+\.?\d*`) + fileAndLineRegexpJSON = regexp.MustCompile(`"caller":"([^"]+).go:\d+"`) + fileAndLineRegexpKlog = regexp.MustCompile(` ([^:]+).go:\d+`) +) + +// Replaces the klog and JSON timestamps with a static timestamp to make it +// easier to assert the logs. It also replaces the line number with 000 as it +// often changes. +// +// I1018 15:12:57.953433 22183 logs.go:000] log +// {"ts":1729258473588.828,"caller":"log/log.go:000","msg":"log Print","v":0} +// 2024/10/18 15:40:50 log Print +// +// to the fixed: +// +// I0000 00:00:00.000000 00000 logs.go:000] log +// {"ts":0000000000000.000,"caller":"log/log.go:000","msg":"log Print","v":0} +// 0000/00/00 00:00:00 log Print +func replaceWithStaticTimestamps(input string) string { + input = timestampRegexpKlog.ReplaceAllString(input, "0000 00:00:00.000000 00000") + input = timestampRegexpJSON.ReplaceAllString(input, `"ts":0000000000000.000`) + input = timestampRegexpStdLog.ReplaceAllString(input, "0000/00/00 00:00:00") + input = fileAndLineRegexpJSON.ReplaceAllString(input, `"caller":"$1.go:000"`) + input = fileAndLineRegexpKlog.ReplaceAllString(input, " $1.go:000") + return input +} + +func Test_replaceWithStaticTimestamps(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "klog", + input: `I1018 15:20:42.861239 2386 logs_test.go:13] "Contextual Info Level 3" logger="foo" key="value"`, + expected: `I0000 00:00:00.000000 00000 logs_test.go:000] "Contextual Info Level 3" logger="foo" key="value"`, + }, + { + name: "json-with-nanoseconds", + input: `{"ts":1729270111728.125,"caller":"logs/logs_test.go:000","msg":"slog Warn","v":0}`, + expected: `{"ts":0000000000000.000,"caller":"logs/logs_test.go:000","msg":"slog Warn","v":0}`, + }, + { + name: "json-might-not-have-nanoseconds", + input: `{"ts":1729270111728,"caller":"logs/logs_test.go:000","msg":"slog Info","v":0}`, + expected: `{"ts":0000000000000.000,"caller":"logs/logs_test.go:000","msg":"slog Info","v":0}`, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + assert.Equal(t, test.expected, replaceWithStaticTimestamps(test.input)) + }) + } +} + +func TestLogToSlogWriter(t *testing.T) { + // This test makes sure that all the agent's Log.Fatalf calls are correctly + // translated to slog.Error calls. + // + // This list was generated using: + // grep -r "Log\.Fatalf" ./cmd ./pkg + given := strings.TrimPrefix(` +Failed to load config file for agent from +Failed to read config file +Failed to parse config file +While evaluating configuration +failed to run pprof profiler +failed to run the health check server +failed to start a controller-runtime component +failed to wait for controller-runtime component to stop +running data gatherer %s of type %s as Local, data-path override present +failed to instantiate %q data gatherer +failed to read local data file +failed to unmarshal local data file +failed to output to local file +Exiting due to fatal error uploading +halting datagathering in strict mode due to error +Cannot marshal readings +Failed to read config file +Failed to parse config file +Failed to validate data gatherers +this is a happy log that should show as INFO`, "\n") + expect := strings.TrimPrefix(` +level=ERROR msg="Failed to load config file for agent from" source=agent +level=ERROR msg="Failed to read config file" source=agent +level=ERROR msg="Failed to parse config file" source=agent +level=ERROR msg="While evaluating configuration" source=agent +level=ERROR msg="failed to run pprof profiler" source=agent +level=ERROR msg="failed to run the health check server" source=agent +level=ERROR msg="failed to start a controller-runtime component" source=agent +level=ERROR msg="failed to wait for controller-runtime component to stop" source=agent +level=ERROR msg="running data gatherer %!s(MISSING) of type %!s(MISSING) as Local, data-path override present" source=agent +level=ERROR msg="failed to instantiate %!q(MISSING) data gatherer" source=agent +level=ERROR msg="failed to read local data file" source=agent +level=ERROR msg="failed to unmarshal local data file" source=agent +level=ERROR msg="failed to output to local file" source=agent +level=ERROR msg="Exiting due to fatal error uploading" source=agent +level=ERROR msg="halting datagathering in strict mode due to error" source=agent +level=ERROR msg="Cannot marshal readings" source=agent +level=ERROR msg="Failed to read config file" source=agent +level=ERROR msg="Failed to parse config file" source=agent +level=ERROR msg="Failed to validate data gatherers" source=agent +level=INFO msg="this is a happy log that should show as INFO" source=agent +`, "\n") + + gotBuf := &bytes.Buffer{} + slogHandler := slog.NewTextHandler(gotBuf, &slog.HandlerOptions{ + // Remove the timestamp from the logs so that we can compare them. + ReplaceAttr: func(groups []string, a slog.Attr) slog.Attr { + if a.Key == "time" { + return slog.Attr{} + } + return a + }, + }) + slogLogger := slog.New(slogHandler) + + logger := log.New(&bytes.Buffer{}, "", 0) + logger.SetOutput(logs.LogToSlogWriter{Slog: slogLogger, Source: "agent"}) + + for _, line := range strings.Split(given, "\n") { + // Simulate the current agent's logs. + logger.Printf(line) + } + + assert.Equal(t, expect, gotBuf.String()) +} From c87f7c39403b4b87530ba18934ab42bb6e26494a Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Fri, 1 Nov 2024 12:06:35 +0000 Subject: [PATCH 03/12] Use errgroup for all go routines (#601) Instead of using log.Fatal in some of them Signed-off-by: Richard Wall --- pkg/agent/run.go | 51 +++++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/pkg/agent/run.go b/pkg/agent/run.go index fc4a828f..31b0ad8f 100644 --- a/pkg/agent/run.go +++ b/pkg/agent/run.go @@ -74,7 +74,16 @@ func Run(cmd *cobra.Command, args []string) { logs.Log.Fatalf("While evaluating configuration: %v", err) } - go func() { + group, gctx := errgroup.WithContext(ctx) + defer func() { + // TODO: replace Fatalf log calls with Errorf and return the error + cancel() + if err := group.Wait(); err != nil { + logs.Log.Fatalf("failed to wait for controller-runtime component to stop: %v", err) + } + }() + + group.Go(func() error { server := http.NewServeMux() if Flags.Profiling { @@ -105,21 +114,25 @@ func Run(cmd *cobra.Command, args []string) { err := http.ListenAndServe(":8081", server) if err != nil && !errors.Is(err, http.ErrServerClosed) { - logs.Log.Fatalf("failed to run the health check server: %s", err) + return fmt.Errorf("failed to run the health check server: %s", err) } - }() + // The agent must stop if the management server stops + cancel() + return nil + }) _, isVenConn := preflightClient.(*client.VenConnClient) if isVenConn { - go func() { - err := preflightClient.(manager.Runnable).Start(ctx) + group.Go(func() error { + err := preflightClient.(manager.Runnable).Start(gctx) if err != nil { - logs.Log.Fatalf("failed to start a controller-runtime component: %v", err) + return fmt.Errorf("failed to start a controller-runtime component: %v", err) } // The agent must stop if the controller-runtime component stops. cancel() - }() + return nil + }) } // To help users notice issues with the agent, we show the error messages in @@ -130,15 +143,6 @@ func Run(cmd *cobra.Command, args []string) { } dataGatherers := map[string]datagatherer.DataGatherer{} - group, gctx := errgroup.WithContext(ctx) - - defer func() { - // TODO: replace Fatalf log calls with Errorf and return the error - cancel() - if err := group.Wait(); err != nil { - logs.Log.Fatalf("failed to wait for controller-runtime component to stop: %v", err) - } - }() // load datagatherer config and boot each one for _, dgConfig := range config.DataGatherers { @@ -160,6 +164,8 @@ func Run(cmd *cobra.Command, args []string) { if err := newDg.Run(gctx.Done()); err != nil { return fmt.Errorf("failed to start %q data gatherer %q: %v", kind, dgConfig.Name, err) } + // The agent must stop if any of the data gatherers stops + cancel() return nil }) @@ -192,7 +198,12 @@ func Run(cmd *cobra.Command, args []string) { // begin the datagathering loop, periodically sending data to the // configured output using data in datagatherer caches or refreshing from - // APIs each cycle depending on datagatherer implementation + // APIs each cycle depending on datagatherer implementation. + // If any of the go routines exit (with nil or error) the main context will + // be cancelled, which will cause this blocking loop to exit + // instead of waiting for the time period. + // TODO(wallrj): Pass a context to gatherAndOutputData, so that we don't + // have to wait for it to finish before exiting the process. for { gatherAndOutputData(eventf, config, preflightClient, dataGatherers) @@ -200,7 +211,11 @@ func Run(cmd *cobra.Command, args []string) { break } - time.Sleep(config.Period) + select { + case <-gctx.Done(): + return + case <-time.After(config.Period): + } } } From 41553d41d220bf47e8bff13fe556dd21321af08e Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Tue, 5 Nov 2024 13:23:22 +0000 Subject: [PATCH 04/12] [VC-35738] Stop the API server when the context is cancelled (#604) * Test that one-shot mode exits * Stop the server when the context is cancelled --- pkg/agent/run.go | 89 ++++++++++++++++--- pkg/agent/run_test.go | 72 +++++++++++++++ .../testdata/one-shot/success/config.yaml | 4 + .../testdata/one-shot/success/input.json | 1 + .../testdata/one-shot/success/kubeconfig.yaml | 15 ++++ 5 files changed, 169 insertions(+), 12 deletions(-) create mode 100644 pkg/agent/run_test.go create mode 100644 pkg/agent/testdata/one-shot/success/config.yaml create mode 100644 pkg/agent/testdata/one-shot/success/input.json create mode 100644 pkg/agent/testdata/one-shot/success/kubeconfig.yaml diff --git a/pkg/agent/run.go b/pkg/agent/run.go index 31b0ad8f..bde0532d 100644 --- a/pkg/agent/run.go +++ b/pkg/agent/run.go @@ -7,6 +7,7 @@ import ( "errors" "fmt" "io" + "net" "net/http" "os" "strings" @@ -25,6 +26,7 @@ import ( "k8s.io/client-go/kubernetes" clientgocorev1 "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/manager" "github.com/jetstack/preflight/api" @@ -50,7 +52,12 @@ const schemaVersion string = "v2.0.0" // Run starts the agent process func Run(cmd *cobra.Command, args []string) { logs.Log.Printf("Preflight agent version: %s (%s)", version.PreflightVersion, version.Commit) - ctx, cancel := context.WithCancel(context.Background()) + ctx, cancel := context.WithCancel( + klog.NewContext( + context.Background(), + klog.Background(), + ), + ) defer cancel() file, err := os.Open(Flags.ConfigFilePath) @@ -83,11 +90,13 @@ func Run(cmd *cobra.Command, args []string) { } }() - group.Go(func() error { + { server := http.NewServeMux() + const serverAddress = ":8081" + log := klog.FromContext(ctx).WithName("APIServer").WithValues("addr", serverAddress) if Flags.Profiling { - logs.Log.Printf("pprof profiling was enabled.") + log.Info("Profiling endpoints enabled", "path", "/debug/pprof") server.HandleFunc("/debug/pprof/", pprof.Index) server.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) server.HandleFunc("/debug/pprof/profile", pprof.Profile) @@ -96,7 +105,7 @@ func Run(cmd *cobra.Command, args []string) { } if Flags.Prometheus { - logs.Log.Printf("Prometheus was enabled.\nRunning prometheus on port :8081") + log.Info("Metrics endpoints enabled", "path", "/metrics") prometheus.MustRegister(metricPayloadSize) server.Handle("/metrics", promhttp.Handler()) } @@ -105,21 +114,32 @@ func Run(cmd *cobra.Command, args []string) { // what "ready" means for the agent, we just return 200 OK inconditionally. // The goal is to satisfy some Kubernetes distributions, like OpenShift, // that require a liveness and health probe to be present for each pod. + log.Info("Healthz endpoints enabled", "path", "/healthz") server.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) }) + log.Info("Readyz endpoints enabled", "path", "/readyz") server.HandleFunc("/readyz", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) }) - err := http.ListenAndServe(":8081", server) - if err != nil && !errors.Is(err, http.ErrServerClosed) { - return fmt.Errorf("failed to run the health check server: %s", err) - } - // The agent must stop if the management server stops - cancel() - return nil - }) + group.Go(func() error { + err := listenAndServe( + klog.NewContext(gctx, log), + &http.Server{ + Addr: serverAddress, + Handler: server, + BaseContext: func(_ net.Listener) context.Context { + return gctx + }, + }, + ) + if err != nil { + return fmt.Errorf("APIServer: %s", err) + } + return nil + }) + } _, isVenConn := preflightClient.(*client.VenConnClient) if isVenConn { @@ -412,3 +432,48 @@ func postData(config CombinedConfig, preflightClient client.Client, readings []* return nil } + +// listenAndServe starts the supplied HTTP server and stops it gracefully when +// the supplied context is cancelled. +// It returns when the graceful server shutdown is complete or when the server +// exits with an error. +// If the server fails to start, it returns the server error. +// If the server fails to shutdown gracefully, it returns the shutdown error. +// The server is given 3 seconds to shutdown gracefully before it is stopped +// forcefully. +func listenAndServe(ctx context.Context, server *http.Server) error { + log := klog.FromContext(ctx).WithName("ListenAndServe") + + log.V(1).Info("Starting") + + listenCTX, listenCancelCause := context.WithCancelCause(context.WithoutCancel(ctx)) + go func() { + err := server.ListenAndServe() + listenCancelCause(fmt.Errorf("ListenAndServe: %s", err)) + }() + + select { + case <-listenCTX.Done(): + log.V(1).Info("Shutdown skipped", "reason", "Server already stopped") + return context.Cause(listenCTX) + + case <-ctx.Done(): + log.V(1).Info("Shutting down") + } + + shutdownCTX, shutdownCancel := context.WithTimeout(context.WithoutCancel(ctx), time.Second*3) + shutdownErr := server.Shutdown(shutdownCTX) + shutdownCancel() + if shutdownErr != nil { + shutdownErr = fmt.Errorf("Shutdown: %s", shutdownErr) + } + + closeErr := server.Close() + if closeErr != nil { + closeErr = fmt.Errorf("Close: %s", closeErr) + } + + log.V(1).Info("Shutdown complete") + + return errors.Join(shutdownErr, closeErr) +} diff --git a/pkg/agent/run_test.go b/pkg/agent/run_test.go new file mode 100644 index 00000000..69025950 --- /dev/null +++ b/pkg/agent/run_test.go @@ -0,0 +1,72 @@ +package agent + +import ( + "bytes" + "context" + "os" + "os/exec" + "testing" + "time" + + "github.com/spf13/cobra" + "github.com/stretchr/testify/require" + "k8s.io/klog/v2" + + "github.com/jetstack/preflight/pkg/logs" +) + +// TestRunOneShot runs the agent in `--one-shot` mode and verifies that it exits +// after the first data gathering iteration. +func TestRunOneShot(t *testing.T) { + if _, found := os.LookupEnv("GO_CHILD"); found { + // Silence the warning about missing pod name for event generation + // TODO(wallrj): This should not be required when an `--input-file` has been supplied. + t.Setenv("POD_NAME", "venafi-kubernetes-e2e") + // Silence the error about missing kubeconfig. + // TODO(wallrj): This should not be required when an `--input-file` has been supplied. + t.Setenv("KUBECONFIG", "testdata/one-shot/success/kubeconfig.yaml") + + c := &cobra.Command{} + InitAgentCmdFlags(c, &Flags) + logs.AddFlags(c.Flags()) + + err := c.ParseFlags([]string{ + "--one-shot", + // TODO(wallrj): This should not be required when an `--input-file` has been supplied. + "--api-token=should-not-be-required", + // TODO(wallrj): This should not be required when an `--input-file` has been supplied. + "--install-namespace=default", + "--agent-config-file=testdata/one-shot/success/config.yaml", + "--input-path=testdata/one-shot/success/input.json", + "--output-path=/dev/null", + "-v=1", + }) + require.NoError(t, err) + + logs.Initialize() + Run(c, nil) + klog.Flush() + return + } + t.Log("Running child process") + ctx, cancel := context.WithTimeout(context.Background(), time.Second*3) + defer cancel() + cmd := exec.CommandContext(ctx, os.Args[0], "-test.run=^TestRunOneShot$") + var ( + stdout bytes.Buffer + stderr bytes.Buffer + ) + cmd.Stdout = &stdout + cmd.Stderr = &stderr + cmd.Env = append( + os.Environ(), + "GO_CHILD=true", + ) + err := cmd.Run() + + stdoutStr := stdout.String() + stderrStr := stderr.String() + t.Logf("STDOUT\n%s\n", stdoutStr) + t.Logf("STDERR\n%s\n", stderrStr) + require.NoError(t, err, context.Cause(ctx)) +} diff --git a/pkg/agent/testdata/one-shot/success/config.yaml b/pkg/agent/testdata/one-shot/success/config.yaml new file mode 100644 index 00000000..51688b26 --- /dev/null +++ b/pkg/agent/testdata/one-shot/success/config.yaml @@ -0,0 +1,4 @@ +# Just enough venafi-kubernetes-agent config to allow it to run with an input +# file in one-shot mode. +cluster_id: "venafi-kubernetes-agent-e2e" +organization_id: "venafi-kubernetes-agent-e2e" diff --git a/pkg/agent/testdata/one-shot/success/input.json b/pkg/agent/testdata/one-shot/success/input.json new file mode 100644 index 00000000..fe51488c --- /dev/null +++ b/pkg/agent/testdata/one-shot/success/input.json @@ -0,0 +1 @@ +[] diff --git a/pkg/agent/testdata/one-shot/success/kubeconfig.yaml b/pkg/agent/testdata/one-shot/success/kubeconfig.yaml new file mode 100644 index 00000000..993dcec3 --- /dev/null +++ b/pkg/agent/testdata/one-shot/success/kubeconfig.yaml @@ -0,0 +1,15 @@ +# Just enough kubeconfig to satisfy client-go +apiVersion: v1 +kind: Config +current-context: cluster-1 +contexts: +- name: cluster-1 + context: + cluster: cluster-1 + user: user-1 +clusters: +- name: cluster-1 + cluster: + server: https://192.0.2.1:8443 +preferences: {} +users: [] From 61d64e4ee1fddefe768b3635ca8469b88c6994fa Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Tue, 5 Nov 2024 16:06:57 +0000 Subject: [PATCH 05/12] [VC-35738] Bubble up the errors from sub-commands and handle errors centrally instead of using log.Fatal at multiple sites (#599) * Bubble up the errors from sub-commands and handle errors centrally instead of using log.Fatal at multiple sites * Log the bubbled up error instead of printing it * Turn on SilenceErrors and SilenceUsage to prevent Cobra from messing up the stderr output * Remove obsolete translations for converting log.printf to error messages --- cmd/agent.go | 12 ++++----- cmd/echo.go | 2 +- cmd/root.go | 21 ++++++++++++--- pkg/agent/run.go | 60 +++++++++++++++++++++++++------------------ pkg/echo/echo.go | 8 ++---- pkg/logs/logs.go | 7 +---- pkg/logs/logs_test.go | 52 ++++++++----------------------------- 7 files changed, 73 insertions(+), 89 deletions(-) diff --git a/cmd/agent.go b/cmd/agent.go index c0d142cb..ce453d01 100644 --- a/cmd/agent.go +++ b/cmd/agent.go @@ -7,7 +7,6 @@ import ( "github.com/spf13/cobra" "github.com/jetstack/preflight/pkg/agent" - "github.com/jetstack/preflight/pkg/logs" "github.com/jetstack/preflight/pkg/permissions" ) @@ -16,7 +15,7 @@ var agentCmd = &cobra.Command{ Short: "start the preflight agent", Long: `The agent will periodically gather data for the configured data gatherers and send it to a remote backend for evaluation`, - Run: agent.Run, + RunE: agent.Run, } var agentInfoCmd = &cobra.Command{ @@ -34,24 +33,25 @@ var agentRBACCmd = &cobra.Command{ Use: "rbac", Short: "print the agent's minimal RBAC manifest", Long: `Print RBAC string by reading GVRs`, - Run: func(cmd *cobra.Command, args []string) { + RunE: func(cmd *cobra.Command, args []string) error { b, err := os.ReadFile(agent.Flags.ConfigFilePath) if err != nil { - logs.Log.Fatalf("Failed to read config file: %s", err) + return fmt.Errorf("Failed to read config file: %s", err) } cfg, err := agent.ParseConfig(b) if err != nil { - logs.Log.Fatalf("Failed to parse config file: %s", err) + return fmt.Errorf("Failed to parse config file: %s", err) } err = agent.ValidateDataGatherers(cfg.DataGatherers) if err != nil { - logs.Log.Fatalf("Failed to validate data gatherers: %s", err) + return fmt.Errorf("Failed to validate data gatherers: %s", err) } out := permissions.GenerateFullManifest(cfg.DataGatherers) fmt.Print(out) + return nil }, } diff --git a/cmd/echo.go b/cmd/echo.go index 34b302e4..04477cac 100644 --- a/cmd/echo.go +++ b/cmd/echo.go @@ -11,7 +11,7 @@ var echoCmd = &cobra.Command{ Short: "starts an echo server to test the agent", Long: `The agent sends data to a server. This echo server can be used to act as the server part and echo the data received by the agent.`, - Run: echo.Echo, + RunE: echo.Echo, } func init() { diff --git a/cmd/root.go b/cmd/root.go index 826b4295..fe9e5989 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -5,9 +5,11 @@ import ( "os" "strings" - "github.com/jetstack/preflight/pkg/logs" "github.com/spf13/cobra" "github.com/spf13/pflag" + "k8s.io/klog/v2" + + "github.com/jetstack/preflight/pkg/logs" ) // rootCmd represents the base command when called without any subcommands @@ -21,6 +23,14 @@ Preflight checks are bundled into Packages`, PersistentPreRun: func(cmd *cobra.Command, args []string) { logs.Initialize() }, + // SilenceErrors and SilenceUsage prevents this command or any sub-command + // from printing arbitrary text to stderr. + // Why? To ensure that each line of output can be parsed as a single message + // for consumption by logging agents such as fluentd. + // Usage information is still available on stdout with the `-h` and `--help` + // flags. + SilenceErrors: true, + SilenceUsage: true, } func init() { @@ -31,13 +41,16 @@ func init() { // Execute adds all child commands to the root command and sets flags appropriately. // This is called by main.main(). It only needs to happen once to the rootCmd. +// If the root command or sub-command returns an error, the error message will +// will be logged and the process will exit with status 1. func Execute() { logs.AddFlags(rootCmd.PersistentFlags()) - + var exitCode int if err := rootCmd.Execute(); err != nil { - fmt.Println(err) - os.Exit(1) + exitCode = 1 + klog.ErrorS(err, "Exiting due to error", "exit-code", exitCode) } + klog.FlushAndExit(klog.ExitFlushTimeout, exitCode) } func setFlagsFromEnv(prefix string, fs *pflag.FlagSet) { diff --git a/pkg/agent/run.go b/pkg/agent/run.go index bde0532d..5d3ad52d 100644 --- a/pkg/agent/run.go +++ b/pkg/agent/run.go @@ -50,7 +50,7 @@ var Flags AgentCmdFlags const schemaVersion string = "v2.0.0" // Run starts the agent process -func Run(cmd *cobra.Command, args []string) { +func Run(cmd *cobra.Command, args []string) error { logs.Log.Printf("Preflight agent version: %s (%s)", version.PreflightVersion, version.Commit) ctx, cancel := context.WithCancel( klog.NewContext( @@ -62,31 +62,33 @@ func Run(cmd *cobra.Command, args []string) { file, err := os.Open(Flags.ConfigFilePath) if err != nil { - logs.Log.Fatalf("Failed to load config file for agent from: %s", Flags.ConfigFilePath) + return fmt.Errorf("Failed to load config file for agent from: %s", Flags.ConfigFilePath) } defer file.Close() b, err := io.ReadAll(file) if err != nil { - logs.Log.Fatalf("Failed to read config file: %s", err) + return fmt.Errorf("Failed to read config file: %s", err) } cfg, err := ParseConfig(b) if err != nil { - logs.Log.Fatalf("Failed to parse config file: %s", err) + return fmt.Errorf("Failed to parse config file: %s", err) } config, preflightClient, err := ValidateAndCombineConfig(logs.Log, cfg, Flags) if err != nil { - logs.Log.Fatalf("While evaluating configuration: %v", err) + return fmt.Errorf("While evaluating configuration: %v", err) } group, gctx := errgroup.WithContext(ctx) defer func() { - // TODO: replace Fatalf log calls with Errorf and return the error cancel() - if err := group.Wait(); err != nil { - logs.Log.Fatalf("failed to wait for controller-runtime component to stop: %v", err) + if groupErr := group.Wait(); groupErr != nil { + err = multierror.Append( + err, + fmt.Errorf("failed to wait for controller-runtime component to stop: %v", groupErr), + ) } }() @@ -159,7 +161,7 @@ func Run(cmd *cobra.Command, args []string) { // the agent pod's events. eventf, err := newEventf(config.InstallNS) if err != nil { - logs.Log.Fatalf("failed to create event recorder: %v", err) + return fmt.Errorf("failed to create event recorder: %v", err) } dataGatherers := map[string]datagatherer.DataGatherer{} @@ -169,12 +171,12 @@ func Run(cmd *cobra.Command, args []string) { kind := dgConfig.Kind if dgConfig.DataPath != "" { kind = "local" - logs.Log.Fatalf("running data gatherer %s of type %s as Local, data-path override present: %s", dgConfig.Name, dgConfig.Kind, dgConfig.DataPath) + return fmt.Errorf("running data gatherer %s of type %s as Local, data-path override present: %s", dgConfig.Name, dgConfig.Kind, dgConfig.DataPath) } newDg, err := dgConfig.Config.NewDataGatherer(gctx) if err != nil { - logs.Log.Fatalf("failed to instantiate %q data gatherer %q: %v", kind, dgConfig.Name, err) + return fmt.Errorf("failed to instantiate %q data gatherer %q: %v", kind, dgConfig.Name, err) } logs.Log.Printf("starting %q datagatherer", dgConfig.Name) @@ -225,7 +227,9 @@ func Run(cmd *cobra.Command, args []string) { // TODO(wallrj): Pass a context to gatherAndOutputData, so that we don't // have to wait for it to finish before exiting the process. for { - gatherAndOutputData(eventf, config, preflightClient, dataGatherers) + if err := gatherAndOutputData(eventf, config, preflightClient, dataGatherers); err != nil { + return err + } if config.OneShot { break @@ -233,10 +237,11 @@ func Run(cmd *cobra.Command, args []string) { select { case <-gctx.Done(): - return + return nil case <-time.After(config.Period): } } + return nil } // Creates an event recorder for the agent's Pod object. Expects the env var @@ -246,7 +251,7 @@ func Run(cmd *cobra.Command, args []string) { func newEventf(installNS string) (Eventf, error) { restcfg, err := kubeconfig.LoadRESTConfig("") if err != nil { - logs.Log.Fatalf("failed to load kubeconfig: %v", err) + return nil, fmt.Errorf("failed to load kubeconfig: %v", err) } scheme := runtime.NewScheme() _ = corev1.AddToScheme(scheme) @@ -276,31 +281,35 @@ func newEventf(installNS string) (Eventf, error) { // Like Printf but for sending events to the agent's Pod object. type Eventf func(eventType, reason, msg string, args ...interface{}) -func gatherAndOutputData(eventf Eventf, config CombinedConfig, preflightClient client.Client, dataGatherers map[string]datagatherer.DataGatherer) { +func gatherAndOutputData(eventf Eventf, config CombinedConfig, preflightClient client.Client, dataGatherers map[string]datagatherer.DataGatherer) error { var readings []*api.DataReading if config.InputPath != "" { logs.Log.Printf("Reading data from local file: %s", config.InputPath) data, err := os.ReadFile(config.InputPath) if err != nil { - logs.Log.Fatalf("failed to read local data file: %s", err) + return fmt.Errorf("failed to read local data file: %s", err) } err = json.Unmarshal(data, &readings) if err != nil { - logs.Log.Fatalf("failed to unmarshal local data file: %s", err) + return fmt.Errorf("failed to unmarshal local data file: %s", err) } } else { - readings = gatherData(config, dataGatherers) + var err error + readings, err = gatherData(config, dataGatherers) + if err != nil { + return err + } } if config.OutputPath != "" { data, err := json.MarshalIndent(readings, "", " ") if err != nil { - logs.Log.Fatal("failed to marshal JSON") + return fmt.Errorf("failed to marshal JSON: %s", err) } err = os.WriteFile(config.OutputPath, data, 0644) if err != nil { - logs.Log.Fatalf("failed to output to local file: %s", err) + return fmt.Errorf("failed to output to local file: %s", err) } logs.Log.Printf("Data saved to local file: %s", config.OutputPath) } else { @@ -316,12 +325,13 @@ func gatherAndOutputData(eventf Eventf, config CombinedConfig, preflightClient c logs.Log.Printf("retrying in %v after error: %s", t, err) }) if err != nil { - logs.Log.Fatalf("Exiting due to fatal error uploading: %v", err) + return fmt.Errorf("Exiting due to fatal error uploading: %v", err) } } + return nil } -func gatherData(config CombinedConfig, dataGatherers map[string]datagatherer.DataGatherer) []*api.DataReading { +func gatherData(config CombinedConfig, dataGatherers map[string]datagatherer.DataGatherer) ([]*api.DataReading, error) { var readings []*api.DataReading var dgError *multierror.Error @@ -360,10 +370,10 @@ func gatherData(config CombinedConfig, dataGatherers map[string]datagatherer.Dat } if config.StrictMode && dgError.ErrorOrNil() != nil { - logs.Log.Fatalf("halting datagathering in strict mode due to error: %s", dgError.ErrorOrNil()) + return nil, fmt.Errorf("halting datagathering in strict mode due to error: %s", dgError.ErrorOrNil()) } - return readings + return readings, nil } func postData(config CombinedConfig, preflightClient client.Client, readings []*api.DataReading) error { @@ -388,7 +398,7 @@ func postData(config CombinedConfig, preflightClient client.Client, readings []* if config.OrganizationID == "" { data, err := json.Marshal(readings) if err != nil { - logs.Log.Fatalf("Cannot marshal readings: %+v", err) + return fmt.Errorf("Cannot marshal readings: %+v", err) } // log and collect metrics about the upload size diff --git a/pkg/echo/echo.go b/pkg/echo/echo.go index 62fa5f7a..ac4bf7db 100644 --- a/pkg/echo/echo.go +++ b/pkg/echo/echo.go @@ -9,20 +9,16 @@ import ( "github.com/spf13/cobra" "github.com/jetstack/preflight/api" - "github.com/jetstack/preflight/pkg/logs" ) var EchoListen string var Compact bool -func Echo(cmd *cobra.Command, args []string) { +func Echo(cmd *cobra.Command, args []string) error { http.HandleFunc("/", echoHandler) fmt.Println("Listening to requests at ", EchoListen) - err := http.ListenAndServe(EchoListen, nil) - if err != nil { - logs.Log.Fatal(err) - } + return http.ListenAndServe(EchoListen, nil) } func echoHandler(w http.ResponseWriter, r *http.Request) { diff --git a/pkg/logs/logs.go b/pkg/logs/logs.go index 9dd988b8..b16c62e1 100644 --- a/pkg/logs/logs.go +++ b/pkg/logs/logs.go @@ -147,12 +147,7 @@ func (w LogToSlogWriter) Write(p []byte) (n int, err error) { message := string(p) if strings.Contains(message, "error") || - strings.Contains(message, "failed") || - strings.Contains(message, "fatal") || - strings.Contains(message, "Failed") || - strings.Contains(message, "While evaluating configuration") || - strings.Contains(message, "data-path override present") || - strings.Contains(message, "Cannot marshal readings") { + strings.Contains(message, "failed") { w.Slog.With("source", w.Source).Error(message) } else { w.Slog.With("source", w.Source).Info(message) diff --git a/pkg/logs/logs_test.go b/pkg/logs/logs_test.go index 3ddd6da1..58a74725 100644 --- a/pkg/logs/logs_test.go +++ b/pkg/logs/logs_test.go @@ -375,52 +375,22 @@ func Test_replaceWithStaticTimestamps(t *testing.T) { } func TestLogToSlogWriter(t *testing.T) { - // This test makes sure that all the agent's Log.Fatalf calls are correctly - // translated to slog.Error calls. + // This test makes sure that all the agent's remaining Log calls are correctly + // translated to slog.Error calls where appropriate. // // This list was generated using: - // grep -r "Log\.Fatalf" ./cmd ./pkg + // git grep -i "log\.\(print\|fatal\)" pkg/ cmd/ | fgrep -e error -e failed given := strings.TrimPrefix(` -Failed to load config file for agent from -Failed to read config file -Failed to parse config file -While evaluating configuration -failed to run pprof profiler -failed to run the health check server -failed to start a controller-runtime component -failed to wait for controller-runtime component to stop -running data gatherer %s of type %s as Local, data-path override present -failed to instantiate %q data gatherer -failed to read local data file -failed to unmarshal local data file -failed to output to local file -Exiting due to fatal error uploading -halting datagathering in strict mode due to error -Cannot marshal readings -Failed to read config file -Failed to parse config file -Failed to validate data gatherers +failed to complete initial sync of %q data gatherer %q: %v +error messages will not show in the pod's events because the POD_NAME environment variable is empty +retrying in %v after error: %s +datagatherer informer for %q has failed and is backing off due to error: %s this is a happy log that should show as INFO`, "\n") expect := strings.TrimPrefix(` -level=ERROR msg="Failed to load config file for agent from" source=agent -level=ERROR msg="Failed to read config file" source=agent -level=ERROR msg="Failed to parse config file" source=agent -level=ERROR msg="While evaluating configuration" source=agent -level=ERROR msg="failed to run pprof profiler" source=agent -level=ERROR msg="failed to run the health check server" source=agent -level=ERROR msg="failed to start a controller-runtime component" source=agent -level=ERROR msg="failed to wait for controller-runtime component to stop" source=agent -level=ERROR msg="running data gatherer %!s(MISSING) of type %!s(MISSING) as Local, data-path override present" source=agent -level=ERROR msg="failed to instantiate %!q(MISSING) data gatherer" source=agent -level=ERROR msg="failed to read local data file" source=agent -level=ERROR msg="failed to unmarshal local data file" source=agent -level=ERROR msg="failed to output to local file" source=agent -level=ERROR msg="Exiting due to fatal error uploading" source=agent -level=ERROR msg="halting datagathering in strict mode due to error" source=agent -level=ERROR msg="Cannot marshal readings" source=agent -level=ERROR msg="Failed to read config file" source=agent -level=ERROR msg="Failed to parse config file" source=agent -level=ERROR msg="Failed to validate data gatherers" source=agent +level=ERROR msg="failed to complete initial sync of %!q(MISSING) data gatherer %!q(MISSING): %!v(MISSING)" source=agent +level=ERROR msg="error messages will not show in the pod's events because the POD_NAME environment variable is empty" source=agent +level=ERROR msg="retrying in %!v(MISSING) after error: %!s(MISSING)" source=agent +level=ERROR msg="datagatherer informer for %!q(MISSING) has failed and is backing off due to error: %!s(MISSING)" source=agent level=INFO msg="this is a happy log that should show as INFO" source=agent `, "\n") From 4ce96c0478d3b5fbb3a11c91e435781236edc1a9 Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Thu, 7 Nov 2024 10:20:12 +0000 Subject: [PATCH 06/12] [VC-35738] Append errgroup errors to the error returned by Agent.Run (#606) * Append errgroup errors to Run return error * Fail TestRunOneShot if Run returns errors --- pkg/agent/run.go | 6 +++--- pkg/agent/run_test.go | 7 +++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pkg/agent/run.go b/pkg/agent/run.go index 5d3ad52d..fc66da68 100644 --- a/pkg/agent/run.go +++ b/pkg/agent/run.go @@ -50,7 +50,7 @@ var Flags AgentCmdFlags const schemaVersion string = "v2.0.0" // Run starts the agent process -func Run(cmd *cobra.Command, args []string) error { +func Run(cmd *cobra.Command, args []string) (returnErr error) { logs.Log.Printf("Preflight agent version: %s (%s)", version.PreflightVersion, version.Commit) ctx, cancel := context.WithCancel( klog.NewContext( @@ -85,8 +85,8 @@ func Run(cmd *cobra.Command, args []string) error { defer func() { cancel() if groupErr := group.Wait(); groupErr != nil { - err = multierror.Append( - err, + returnErr = multierror.Append( + returnErr, fmt.Errorf("failed to wait for controller-runtime component to stop: %v", groupErr), ) } diff --git a/pkg/agent/run_test.go b/pkg/agent/run_test.go index 69025950..3bd54773 100644 --- a/pkg/agent/run_test.go +++ b/pkg/agent/run_test.go @@ -44,8 +44,11 @@ func TestRunOneShot(t *testing.T) { require.NoError(t, err) logs.Initialize() - Run(c, nil) - klog.Flush() + defer klog.Flush() + + runErr := Run(c, nil) + require.NoError(t, runErr, "Run returned an unexpected error") + return } t.Log("Running child process") From 1778ed5f1775c5ca2745b26c2e6d0bd4077f51f2 Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Thu, 7 Nov 2024 10:27:34 +0000 Subject: [PATCH 07/12] Allow DataGatherer.Run to return without stopping the other errgroup Go Routines (#605) Signed-off-by: Richard Wall --- pkg/agent/run.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pkg/agent/run.go b/pkg/agent/run.go index fc66da68..2458befe 100644 --- a/pkg/agent/run.go +++ b/pkg/agent/run.go @@ -183,11 +183,14 @@ func Run(cmd *cobra.Command, args []string) (returnErr error) { // start the data gatherers and wait for the cache sync group.Go(func() error { + // Most implementations of `DataGatherer.Run` return immediately. + // Only the Dynamic DataGatherer starts an informer which runs and + // blocks until the supplied channel is closed. + // For this reason, we must allow these errgroup Go routines to exit + // without cancelling the other Go routines in the group. if err := newDg.Run(gctx.Done()); err != nil { return fmt.Errorf("failed to start %q data gatherer %q: %v", kind, dgConfig.Name, err) } - // The agent must stop if any of the data gatherers stops - cancel() return nil }) From 45a5d778969db3685bf3ed756608ab65d3327546 Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Tue, 12 Nov 2024 06:55:48 +0000 Subject: [PATCH 08/12] Move the agent --one-shot test to the cmd package (#610) And use the Execute function Signed-off-by: Richard Wall --- pkg/agent/run_test.go => cmd/agent_test.go | 37 ++++++------------- .../agent}/one-shot/success/config.yaml | 0 .../agent}/one-shot/success/input.json | 0 .../agent}/one-shot/success/kubeconfig.yaml | 0 4 files changed, 12 insertions(+), 25 deletions(-) rename pkg/agent/run_test.go => cmd/agent_test.go (63%) rename {pkg/agent/testdata => cmd/testdata/agent}/one-shot/success/config.yaml (100%) rename {pkg/agent/testdata => cmd/testdata/agent}/one-shot/success/input.json (100%) rename {pkg/agent/testdata => cmd/testdata/agent}/one-shot/success/kubeconfig.yaml (100%) diff --git a/pkg/agent/run_test.go b/cmd/agent_test.go similarity index 63% rename from pkg/agent/run_test.go rename to cmd/agent_test.go index 3bd54773..9c92144d 100644 --- a/pkg/agent/run_test.go +++ b/cmd/agent_test.go @@ -1,4 +1,4 @@ -package agent +package cmd import ( "bytes" @@ -8,53 +8,40 @@ import ( "testing" "time" - "github.com/spf13/cobra" "github.com/stretchr/testify/require" - "k8s.io/klog/v2" - - "github.com/jetstack/preflight/pkg/logs" ) -// TestRunOneShot runs the agent in `--one-shot` mode and verifies that it exits +// TestAgentRunOneShot runs the agent in `--one-shot` mode and verifies that it exits // after the first data gathering iteration. -func TestRunOneShot(t *testing.T) { +func TestAgentRunOneShot(t *testing.T) { if _, found := os.LookupEnv("GO_CHILD"); found { // Silence the warning about missing pod name for event generation // TODO(wallrj): This should not be required when an `--input-file` has been supplied. t.Setenv("POD_NAME", "venafi-kubernetes-e2e") // Silence the error about missing kubeconfig. // TODO(wallrj): This should not be required when an `--input-file` has been supplied. - t.Setenv("KUBECONFIG", "testdata/one-shot/success/kubeconfig.yaml") - - c := &cobra.Command{} - InitAgentCmdFlags(c, &Flags) - logs.AddFlags(c.Flags()) + t.Setenv("KUBECONFIG", "testdata/agent/one-shot/success/kubeconfig.yaml") - err := c.ParseFlags([]string{ + os.Args = []string{ + "preflight", + "agent", "--one-shot", // TODO(wallrj): This should not be required when an `--input-file` has been supplied. "--api-token=should-not-be-required", // TODO(wallrj): This should not be required when an `--input-file` has been supplied. "--install-namespace=default", - "--agent-config-file=testdata/one-shot/success/config.yaml", - "--input-path=testdata/one-shot/success/input.json", + "--agent-config-file=testdata/agent/one-shot/success/config.yaml", + "--input-path=testdata/agent/one-shot/success/input.json", "--output-path=/dev/null", "-v=1", - }) - require.NoError(t, err) - - logs.Initialize() - defer klog.Flush() - - runErr := Run(c, nil) - require.NoError(t, runErr, "Run returned an unexpected error") - + } + Execute() return } t.Log("Running child process") ctx, cancel := context.WithTimeout(context.Background(), time.Second*3) defer cancel() - cmd := exec.CommandContext(ctx, os.Args[0], "-test.run=^TestRunOneShot$") + cmd := exec.CommandContext(ctx, os.Args[0], "-test.run=^TestAgentRunOneShot$") var ( stdout bytes.Buffer stderr bytes.Buffer diff --git a/pkg/agent/testdata/one-shot/success/config.yaml b/cmd/testdata/agent/one-shot/success/config.yaml similarity index 100% rename from pkg/agent/testdata/one-shot/success/config.yaml rename to cmd/testdata/agent/one-shot/success/config.yaml diff --git a/pkg/agent/testdata/one-shot/success/input.json b/cmd/testdata/agent/one-shot/success/input.json similarity index 100% rename from pkg/agent/testdata/one-shot/success/input.json rename to cmd/testdata/agent/one-shot/success/input.json diff --git a/pkg/agent/testdata/one-shot/success/kubeconfig.yaml b/cmd/testdata/agent/one-shot/success/kubeconfig.yaml similarity index 100% rename from pkg/agent/testdata/one-shot/success/kubeconfig.yaml rename to cmd/testdata/agent/one-shot/success/kubeconfig.yaml From 39e6c1f50a4c45ef6f04ddd34708af2dfd9c3d8b Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Tue, 12 Nov 2024 14:11:51 +0000 Subject: [PATCH 09/12] Remove os.Exit from the logs module and make the logs test output more realistic (#611) Signed-off-by: Richard Wall --- cmd/root.go | 4 ++-- pkg/logs/logs.go | 7 +++--- pkg/logs/logs_test.go | 54 +++++++++++++++++++++---------------------- 3 files changed, 31 insertions(+), 34 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index fe9e5989..e2d49ba3 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -20,8 +20,8 @@ var rootCmd = &cobra.Command{ configuration checks using Open Policy Agent (OPA). Preflight checks are bundled into Packages`, - PersistentPreRun: func(cmd *cobra.Command, args []string) { - logs.Initialize() + PersistentPreRunE: func(cmd *cobra.Command, args []string) error { + return logs.Initialize() }, // SilenceErrors and SilenceUsage prevents this command or any sub-command // from printing arbitrary text to stderr. diff --git a/pkg/logs/logs.go b/pkg/logs/logs.go index b16c62e1..b8ffd71f 100644 --- a/pkg/logs/logs.go +++ b/pkg/logs/logs.go @@ -5,7 +5,6 @@ import ( "fmt" "log" "log/slog" - "os" "strings" "github.com/spf13/pflag" @@ -108,13 +107,12 @@ func AddFlags(fs *pflag.FlagSet) { // Initialize uses k8s.io/component-base/logs, to configure the following global // loggers: log, slog, and klog. All are configured to write in the same format. -func Initialize() { +func Initialize() error { // This configures the global logger in klog *and* slog, if compiled with Go // >= 1.21. logs.InitLogs() if err := logsapi.ValidateAndApply(configuration, features); err != nil { - fmt.Fprintf(os.Stderr, "Error in logging configuration: %v\n", err) - os.Exit(2) + return fmt.Errorf("Error in logging configuration: %s", err) } // Thanks to logs.InitLogs, slog.Default now uses klog as its backend. Thus, @@ -133,6 +131,7 @@ func Initialize() { // to the global log logger. It can be removed when this is fixed upstream // in vcert: https://github.com/Venafi/vcert/pull/512 vcertLog.SetPrefix("") + return nil } type LogToSlogWriter struct { diff --git a/pkg/logs/logs_test.go b/pkg/logs/logs_test.go index 58a74725..e3869df6 100644 --- a/pkg/logs/logs_test.go +++ b/pkg/logs/logs_test.go @@ -4,6 +4,8 @@ import ( "bytes" "context" "errors" + "fmt" + "io" "log" "log/slog" "os" @@ -33,10 +35,25 @@ import ( func TestLogs(t *testing.T) { if flags, found := os.LookupEnv("GO_CHILD_FLAG"); found { if _, found := os.LookupEnv("GO_CHILD_SKIP_INITIALIZE"); !found { - fs := pflag.NewFlagSet("test-logs", pflag.ExitOnError) + fs := pflag.NewFlagSet("test-logs", pflag.ContinueOnError) + fs.SetOutput(io.Discard) logs.AddFlags(fs) - fs.Parse(strings.Split(flags, " ")) - logs.Initialize() + if err := fs.Parse(strings.Split(flags, " ")); err != nil { + exitCode := 0 + if errors.Is(err, pflag.ErrHelp) { + fmt.Fprint(os.Stdout, fs.FlagUsages()) + os.Exit(exitCode) + } else { + exitCode := 1 + klog.ErrorS(err, "Exiting due to error", "exit-code", exitCode) + klog.FlushAndExit(time.Second, exitCode) + } + } + if err := logs.Initialize(); err != nil { + exitCode := 1 + klog.ErrorS(err, "Exiting due to error", "exit-code", exitCode) + klog.FlushAndExit(time.Second, exitCode) + } } log.Print("log Print") @@ -63,14 +80,9 @@ func TestLogs(t *testing.T) { expectStderr string }{ { - name: "help", - flags: "-h", - expectError: true, + name: "help", + flags: "-h", expectStdout: ` -pflag: help requested -`, - expectStderr: ` -Usage of test-logs: -v, --log-level Level number for the log level verbosity --logging-format string Sets the log format. Permitted formats: "json", "text". (default "text") --vmodule pattern=N,... comma-separated list of pattern=N settings for file-filtered logging (only works for text log format) @@ -80,30 +92,16 @@ Usage of test-logs: name: "unrecognized-flag", flags: "--foo", expectError: true, - expectStdout: ` -unknown flag: --foo -`, expectStderr: ` -unknown flag: --foo -Usage of test-logs: - -v, --log-level Level number for the log level verbosity - --logging-format string Sets the log format. Permitted formats: "json", "text". (default "text") - --vmodule pattern=N,... comma-separated list of pattern=N settings for file-filtered logging (only works for text log format) +E0000 00:00:00.000000 00000 logs_test.go:000] "Exiting due to error" err="unknown flag: --foo" exit-code=1 `, }, { name: "v-long-form-not-available", flags: "--v=3", expectError: true, - expectStdout: ` -unknown flag: --v -`, expectStderr: ` -unknown flag: --v -Usage of test-logs: - -v, --log-level Level number for the log level verbosity - --logging-format string Sets the log format. Permitted formats: "json", "text". (default "text") - --vmodule pattern=N,... comma-separated list of pattern=N settings for file-filtered logging (only works for text log format) +E0000 00:00:00.000000 00000 logs_test.go:000] "Exiting due to error" err="unknown flag: --v" exit-code=1 `, }, { @@ -111,7 +109,7 @@ Usage of test-logs: flags: "--logging-format=foo", expectError: true, expectStderr: ` -Error in logging configuration: format: Invalid value: "foo": Unsupported log format +E0000 00:00:00.000000 00000 logs_test.go:000] "Exiting due to error" err="Error in logging configuration: format: Invalid value: \"foo\": Unsupported log format" exit-code=1 `, }, { @@ -297,7 +295,7 @@ E0000 00:00:00.000000 00000 logs_test.go:000] "Contextual error" err="fake-err if test.expectError { var target *exec.ExitError require.ErrorAs(t, err, &target) - require.Equal(t, 2, target.ExitCode(), "Flag parsing failures should always result in exit code 2") + require.Equal(t, 1, target.ExitCode(), "Flag parsing failures should always result in exit code 1") t.Logf("ERROR: %v", err) } else { require.NoError(t, err) From fd8d0deeed5129814fed66f1676f5c68bbef5b45 Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Tue, 12 Nov 2024 14:41:46 +0000 Subject: [PATCH 10/12] Use klog and logr logger instead of log (#609) Signed-off-by: Richard Wall --- cmd/root.go | 4 +- pkg/agent/config.go | 40 +++++++++++--------- pkg/agent/config_test.go | 33 ++++++++-------- pkg/agent/run.go | 82 ++++++++++++++++++++++------------------ 4 files changed, 89 insertions(+), 70 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index e2d49ba3..1c78a0c8 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -1,6 +1,7 @@ package cmd import ( + "context" "fmt" "os" "strings" @@ -45,8 +46,9 @@ func init() { // will be logged and the process will exit with status 1. func Execute() { logs.AddFlags(rootCmd.PersistentFlags()) + ctx := klog.NewContext(context.Background(), klog.Background()) var exitCode int - if err := rootCmd.Execute(); err != nil { + if err := rootCmd.ExecuteContext(ctx); err != nil { exitCode = 1 klog.ErrorS(err, "Exiting due to error", "exit-code", exitCode) } diff --git a/pkg/agent/config.go b/pkg/agent/config.go index 25e4d253..5bccf1d9 100644 --- a/pkg/agent/config.go +++ b/pkg/agent/config.go @@ -3,11 +3,11 @@ package agent import ( "fmt" "io" - "log" "net/url" "os" "time" + "github.com/go-logr/logr" "github.com/hashicorp/go-multierror" "github.com/pkg/errors" "github.com/spf13/cobra" @@ -355,32 +355,35 @@ type CombinedConfig struct { // The error returned may be a multierror.Error. Use multierror.Prefix(err, // "context:") rather than fmt.Errorf("context: %w", err) when wrapping the // error. -func ValidateAndCombineConfig(log *log.Logger, cfg Config, flags AgentCmdFlags) (CombinedConfig, client.Client, error) { +func ValidateAndCombineConfig(log logr.Logger, cfg Config, flags AgentCmdFlags) (CombinedConfig, client.Client, error) { res := CombinedConfig{} var errs error { - var mode AuthMode + var ( + mode AuthMode + reason string + ) switch { case flags.VenafiCloudMode && flags.CredentialsPath != "": mode = VenafiCloudKeypair - log.Printf("Using the %s auth mode since --venafi-cloud and --credentials-path were specified.", mode) + reason = fmt.Sprintf("Using the %s auth mode since --venafi-cloud and --credentials-path were specified.", mode) case flags.ClientID != "" && flags.PrivateKeyPath != "": mode = VenafiCloudKeypair - log.Printf("Using the %s auth mode since --client-id and --private-key-path were specified.", mode) + reason = fmt.Sprintf("Using the %s auth mode since --client-id and --private-key-path were specified.", mode) case flags.ClientID != "": return CombinedConfig{}, nil, fmt.Errorf("if --client-id is specified, --private-key-path must also be specified") case flags.PrivateKeyPath != "": return CombinedConfig{}, nil, fmt.Errorf("--private-key-path is specified, --client-id must also be specified") case flags.VenConnName != "": mode = VenafiCloudVenafiConnection - log.Printf("Using the %s auth mode since --venafi-connection was specified.", mode) + reason = fmt.Sprintf("Using the %s auth mode since --venafi-connection was specified.", mode) case flags.APIToken != "": mode = JetstackSecureAPIToken - log.Printf("Using the %s auth mode since --api-token was specified.", mode) + reason = fmt.Sprintf("Using the %s auth mode since --api-token was specified.", mode) case !flags.VenafiCloudMode && flags.CredentialsPath != "": mode = JetstackSecureOAuth - log.Printf("Using the %s auth mode since --credentials-file was specified without --venafi-cloud.", mode) + reason = fmt.Sprintf("Using the %s auth mode since --credentials-file was specified without --venafi-cloud.", mode) default: return CombinedConfig{}, nil, fmt.Errorf("no auth mode specified. You can use one of four auth modes:\n" + " - Use (--venafi-cloud with --credentials-file) or (--client-id with --private-key-path) to use the " + string(VenafiCloudKeypair) + " mode.\n" + @@ -389,6 +392,7 @@ func ValidateAndCombineConfig(log *log.Logger, cfg Config, flags AgentCmdFlags) " - Use --api-token if you want to use the " + string(JetstackSecureAPIToken) + " mode.\n") } res.AuthMode = mode + log.Info(reason) } // Validation and defaulting of `server` and the deprecated `endpoint.path`. @@ -403,10 +407,10 @@ func ValidateAndCombineConfig(log *log.Logger, cfg Config, flags AgentCmdFlags) case hasServerField && hasEndpointField: // The `server` field takes precedence over the deprecated // `endpoint` field. - log.Printf("The `server` and `endpoint` fields are both set in the config; using the `server` field.") + log.Info("The `server` and `endpoint` fields are both set in the config; using the `server` field.") server = cfg.Server case !hasServerField && hasEndpointField: - log.Printf("Using deprecated Endpoint configuration. User Server instead.") + log.Info("Using deprecated Endpoint configuration. User Server instead.") if cfg.Endpoint.Protocol == "" && cfg.Server == "" { cfg.Endpoint.Protocol = "http" } @@ -424,7 +428,7 @@ func ValidateAndCombineConfig(log *log.Logger, cfg Config, flags AgentCmdFlags) errs = multierror.Append(errs, fmt.Errorf("server %q is not a valid URL", server)) } if res.AuthMode == VenafiCloudVenafiConnection && server != "" { - log.Printf("ignoring the server field specified in the config file. In %s mode, this field is not needed.", VenafiCloudVenafiConnection) + log.Info(fmt.Sprintf("ignoring the server field specified in the config file. In %s mode, this field is not needed.", VenafiCloudVenafiConnection)) server = "" } res.Server = server @@ -454,7 +458,7 @@ func ValidateAndCombineConfig(log *log.Logger, cfg Config, flags AgentCmdFlags) // change this value with the new --venafi-connection flag, and this // field is simply ignored. if cfg.VenafiCloud != nil && cfg.VenafiCloud.UploadPath != "" { - log.Printf(`ignoring the venafi-cloud.upload_path field in the config file. In %s mode, this field is not needed.`, res.AuthMode) + log.Info(fmt.Sprintf(`ignoring the venafi-cloud.upload_path field in the config file. In %s mode, this field is not needed.`, res.AuthMode)) } uploadPath = "" } @@ -472,7 +476,7 @@ func ValidateAndCombineConfig(log *log.Logger, cfg Config, flags AgentCmdFlags) // https://venafi.atlassian.net/browse/VC-35385 is done. { if cfg.VenafiCloud != nil && cfg.VenafiCloud.UploaderID != "" { - log.Printf(`ignoring the venafi-cloud.uploader_id field in the config file. This field is not needed in %s mode.`, res.AuthMode) + log.Info(fmt.Sprintf(`ignoring the venafi-cloud.uploader_id field in the config file. This field is not needed in %s mode.`, res.AuthMode)) } } @@ -524,13 +528,13 @@ func ValidateAndCombineConfig(log *log.Logger, cfg Config, flags AgentCmdFlags) case flags.Period == 0 && cfg.Period == 0: errs = multierror.Append(errs, fmt.Errorf("period must be set using --period or -p, or using the 'period' field in the config file")) case flags.Period == 0 && cfg.Period > 0: - log.Printf("Using period from config %s", cfg.Period) + log.Info("Using period from config", "period", cfg.Period) period = cfg.Period case flags.Period > 0 && cfg.Period == 0: period = flags.Period case flags.Period > 0 && cfg.Period > 0: // The flag takes precedence. - log.Printf("Both the 'period' field and --period are set. Using the value provided with --period.") + log.Info("Both the 'period' field and --period are set. Using the value provided with --period.") period = flags.Period } res.Period = period @@ -599,7 +603,7 @@ func ValidateAndCombineConfig(log *log.Logger, cfg Config, flags AgentCmdFlags) // The error returned may be a multierror.Error. Use multierror.Prefix(err, // "context:") rather than fmt.Errorf("context: %w", err) when wrapping the // error. -func validateCredsAndCreateClient(log *log.Logger, flagCredentialsPath, flagClientID, flagPrivateKeyPath, flagAPIToken string, cfg CombinedConfig) (client.Client, error) { +func validateCredsAndCreateClient(log logr.Logger, flagCredentialsPath, flagClientID, flagPrivateKeyPath, flagAPIToken string, cfg CombinedConfig) (client.Client, error) { var errs error var preflightClient client.Client @@ -719,7 +723,7 @@ func ValidateDataGatherers(dataGatherers []DataGatherer) error { // The error returned may be a multierror.Error. Instead of adding context to // the error with fmt.Errorf("%w", err), use multierror.Prefix(err, "context"). -func createCredentialClient(log *log.Logger, credentials client.Credentials, cfg CombinedConfig, agentMetadata *api.AgentMetadata) (client.Client, error) { +func createCredentialClient(log logr.Logger, credentials client.Credentials, cfg CombinedConfig, agentMetadata *api.AgentMetadata) (client.Client, error) { switch creds := credentials.(type) { case *client.VenafiSvcAccountCredentials: // The uploader ID isn't actually used in the backend, let's use an @@ -730,7 +734,7 @@ func createCredentialClient(log *log.Logger, credentials client.Credentials, cfg if cfg.AuthMode == VenafiCloudKeypair { // We don't do this for the VenafiCloudVenafiConnection mode because // the upload_path field is ignored in that mode. - log.Println("Loading upload_path from \"venafi-cloud\" configuration.") + log.Info("Loading upload_path from \"venafi-cloud\" configuration.") uploadPath = cfg.UploadPath } return client.NewVenafiCloudClient(agentMetadata, creds, cfg.Server, uploaderID, uploadPath, cfg.DisableCompression) diff --git a/pkg/agent/config_test.go b/pkg/agent/config_test.go index 87751592..6a3bd07e 100644 --- a/pkg/agent/config_test.go +++ b/pkg/agent/config_test.go @@ -6,15 +6,16 @@ import ( "context" "fmt" "io" - "log" "net/http" "os" "testing" "time" + "github.com/go-logr/logr" "github.com/spf13/cobra" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "k8s.io/klog/v2/ktesting" "github.com/jetstack/preflight/pkg/client" "github.com/jetstack/preflight/pkg/testutil" @@ -86,7 +87,7 @@ func Test_ValidateAndCombineConfig(t *testing.T) { t.Run("--period flag takes precedence over period field in config, shows warning", func(t *testing.T) { t.Setenv("POD_NAMESPACE", "venafi") - log, gotLogs := recordLogs() + log, gotLogs := recordLogs(t) got, _, err := ValidateAndCombineConfig(log, withConfig(testutil.Undent(` server: https://api.venafi.eu @@ -97,8 +98,8 @@ func Test_ValidateAndCombineConfig(t *testing.T) { withCmdLineFlags("--period", "99m", "--credentials-file", fakeCredsPath)) require.NoError(t, err) assert.Equal(t, testutil.Undent(` - Using the Jetstack Secure OAuth auth mode since --credentials-file was specified without --venafi-cloud. - Both the 'period' field and --period are set. Using the value provided with --period. + INFO Using the Jetstack Secure OAuth auth mode since --credentials-file was specified without --venafi-cloud. + INFO Both the 'period' field and --period are set. Using the value provided with --period. `), gotLogs.String()) assert.Equal(t, 99*time.Minute, got.Period) }) @@ -573,7 +574,7 @@ func Test_ValidateAndCombineConfig(t *testing.T) { t.Run("venafi-cloud-workload-identity-auth: warning about server, venafi-cloud.uploader_id, and venafi-cloud.upload_path being skipped", func(t *testing.T) { t.Setenv("POD_NAMESPACE", "venafi") t.Setenv("KUBECONFIG", withFile(t, fakeKubeconfig)) - log, gotLogs := recordLogs() + log, gotLogs := recordLogs(t) got, gotCl, err := ValidateAndCombineConfig(log, withConfig(testutil.Undent(` server: https://api.venafi.eu @@ -587,11 +588,11 @@ func Test_ValidateAndCombineConfig(t *testing.T) { ) require.NoError(t, err) assert.Equal(t, testutil.Undent(` - Using the Venafi Cloud VenafiConnection auth mode since --venafi-connection was specified. - ignoring the server field specified in the config file. In Venafi Cloud VenafiConnection mode, this field is not needed. - ignoring the venafi-cloud.upload_path field in the config file. In Venafi Cloud VenafiConnection mode, this field is not needed. - ignoring the venafi-cloud.uploader_id field in the config file. This field is not needed in Venafi Cloud VenafiConnection mode. - Using period from config 1h0m0s + INFO Using the Venafi Cloud VenafiConnection auth mode since --venafi-connection was specified. + INFO ignoring the server field specified in the config file. In Venafi Cloud VenafiConnection mode, this field is not needed. + INFO ignoring the venafi-cloud.upload_path field in the config file. In Venafi Cloud VenafiConnection mode, this field is not needed. + INFO ignoring the venafi-cloud.uploader_id field in the config file. This field is not needed in Venafi Cloud VenafiConnection mode. + INFO Using period from config period="1h0m0s" `), gotLogs.String()) assert.Equal(t, VenafiCloudVenafiConnection, got.AuthMode) assert.IsType(t, &client.VenConnClient{}, gotCl) @@ -994,13 +995,15 @@ func withFile(t testing.TB, content string) string { return f.Name() } -func recordLogs() (*log.Logger, *bytes.Buffer) { - b := bytes.Buffer{} - return log.New(&b, "", 0), &b +func recordLogs(t *testing.T) (logr.Logger, ktesting.Buffer) { + log := ktesting.NewLogger(t, ktesting.NewConfig(ktesting.BufferLogs(true))) + testingLogger, ok := log.GetSink().(ktesting.Underlier) + require.True(t, ok) + return log, testingLogger.GetBuffer() } -func discardLogs() *log.Logger { - return log.New(io.Discard, "", 0) +func discardLogs() logr.Logger { + return logr.Discard() } // Shortcut for ParseConfig. diff --git a/pkg/agent/run.go b/pkg/agent/run.go index 2458befe..7bb629ed 100644 --- a/pkg/agent/run.go +++ b/pkg/agent/run.go @@ -9,11 +9,13 @@ import ( "io" "net" "net/http" + "net/http/pprof" "os" "strings" "time" "github.com/cenkalti/backoff" + "github.com/go-logr/logr" "github.com/hashicorp/go-multierror" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -33,10 +35,7 @@ import ( "github.com/jetstack/preflight/pkg/client" "github.com/jetstack/preflight/pkg/datagatherer" "github.com/jetstack/preflight/pkg/kubeconfig" - "github.com/jetstack/preflight/pkg/logs" "github.com/jetstack/preflight/pkg/version" - - "net/http/pprof" ) var Flags AgentCmdFlags @@ -51,14 +50,11 @@ const schemaVersion string = "v2.0.0" // Run starts the agent process func Run(cmd *cobra.Command, args []string) (returnErr error) { - logs.Log.Printf("Preflight agent version: %s (%s)", version.PreflightVersion, version.Commit) - ctx, cancel := context.WithCancel( - klog.NewContext( - context.Background(), - klog.Background(), - ), - ) + ctx, cancel := context.WithCancel(cmd.Context()) defer cancel() + log := klog.FromContext(ctx).WithName("Run") + + log.Info("Starting", "version", version.PreflightVersion, "commit", version.Commit) file, err := os.Open(Flags.ConfigFilePath) if err != nil { @@ -76,7 +72,7 @@ func Run(cmd *cobra.Command, args []string) (returnErr error) { return fmt.Errorf("Failed to parse config file: %s", err) } - config, preflightClient, err := ValidateAndCombineConfig(logs.Log, cfg, Flags) + config, preflightClient, err := ValidateAndCombineConfig(log, cfg, Flags) if err != nil { return fmt.Errorf("While evaluating configuration: %v", err) } @@ -95,7 +91,7 @@ func Run(cmd *cobra.Command, args []string) (returnErr error) { { server := http.NewServeMux() const serverAddress = ":8081" - log := klog.FromContext(ctx).WithName("APIServer").WithValues("addr", serverAddress) + log := log.WithName("APIServer").WithValues("addr", serverAddress) if Flags.Profiling { log.Info("Profiling endpoints enabled", "path", "/debug/pprof") @@ -159,7 +155,7 @@ func Run(cmd *cobra.Command, args []string) (returnErr error) { // To help users notice issues with the agent, we show the error messages in // the agent pod's events. - eventf, err := newEventf(config.InstallNS) + eventf, err := newEventf(log, config.InstallNS) if err != nil { return fmt.Errorf("failed to create event recorder: %v", err) } @@ -179,7 +175,7 @@ func Run(cmd *cobra.Command, args []string) (returnErr error) { return fmt.Errorf("failed to instantiate %q data gatherer %q: %v", kind, dgConfig.Name, err) } - logs.Log.Printf("starting %q datagatherer", dgConfig.Name) + log.Info("Starting DataGatherer", "name", dgConfig.Name) // start the data gatherers and wait for the cache sync group.Go(func() error { @@ -217,7 +213,7 @@ func Run(cmd *cobra.Command, args []string) (returnErr error) { // the run. if err := dg.WaitForCacheSync(bootCtx.Done()); err != nil { // log sync failure, this might recover in future - logs.Log.Printf("failed to complete initial sync of %q data gatherer %q: %v", dgConfig.Kind, dgConfig.Name, err) + log.Error(err, "Failed to complete initial sync of DataGatherer", "kind", dgConfig.Kind, "name", dgConfig.Name) } } @@ -230,7 +226,7 @@ func Run(cmd *cobra.Command, args []string) (returnErr error) { // TODO(wallrj): Pass a context to gatherAndOutputData, so that we don't // have to wait for it to finish before exiting the process. for { - if err := gatherAndOutputData(eventf, config, preflightClient, dataGatherers); err != nil { + if err := gatherAndOutputData(klog.NewContext(ctx, log), eventf, config, preflightClient, dataGatherers); err != nil { return err } @@ -251,7 +247,7 @@ func Run(cmd *cobra.Command, args []string) (returnErr error) { // POD_NAME to contain the pod name. Note that the RBAC rule allowing sending // events is attached to the pod's service account, not the impersonated service // account (venafi-connection). -func newEventf(installNS string) (Eventf, error) { +func newEventf(log logr.Logger, installNS string) (Eventf, error) { restcfg, err := kubeconfig.LoadRESTConfig("") if err != nil { return nil, fmt.Errorf("failed to load kubeconfig: %v", err) @@ -262,7 +258,7 @@ func newEventf(installNS string) (Eventf, error) { var eventf Eventf if os.Getenv("POD_NAME") == "" { eventf = func(eventType, reason, msg string, args ...interface{}) {} - logs.Log.Printf("error messages will not show in the pod's events because the POD_NAME environment variable is empty") + log.Error(nil, "Error messages will not show in the pod's events because the POD_NAME environment variable is empty") } else { podName := os.Getenv("POD_NAME") @@ -284,11 +280,12 @@ func newEventf(installNS string) (Eventf, error) { // Like Printf but for sending events to the agent's Pod object. type Eventf func(eventType, reason, msg string, args ...interface{}) -func gatherAndOutputData(eventf Eventf, config CombinedConfig, preflightClient client.Client, dataGatherers map[string]datagatherer.DataGatherer) error { +func gatherAndOutputData(ctx context.Context, eventf Eventf, config CombinedConfig, preflightClient client.Client, dataGatherers map[string]datagatherer.DataGatherer) error { + log := klog.FromContext(ctx).WithName("gatherAndOutputData") var readings []*api.DataReading if config.InputPath != "" { - logs.Log.Printf("Reading data from local file: %s", config.InputPath) + log.Info("Reading data from local file", "inputPath", config.InputPath) data, err := os.ReadFile(config.InputPath) if err != nil { return fmt.Errorf("failed to read local data file: %s", err) @@ -299,7 +296,7 @@ func gatherAndOutputData(eventf Eventf, config CombinedConfig, preflightClient c } } else { var err error - readings, err = gatherData(config, dataGatherers) + readings, err = gatherData(klog.NewContext(ctx, log), config, dataGatherers) if err != nil { return err } @@ -314,18 +311,18 @@ func gatherAndOutputData(eventf Eventf, config CombinedConfig, preflightClient c if err != nil { return fmt.Errorf("failed to output to local file: %s", err) } - logs.Log.Printf("Data saved to local file: %s", config.OutputPath) + log.Info("Data saved to local file", "outputPath", config.OutputPath) } else { backOff := backoff.NewExponentialBackOff() backOff.InitialInterval = 30 * time.Second backOff.MaxInterval = 3 * time.Minute backOff.MaxElapsedTime = config.BackoffMaxTime post := func() error { - return postData(config, preflightClient, readings) + return postData(klog.NewContext(ctx, log), config, preflightClient, readings) } err := backoff.RetryNotify(post, backOff, func(err error, t time.Duration) { eventf("Warning", "PushingErr", "retrying in %v after error: %s", t, err) - logs.Log.Printf("retrying in %v after error: %s", t, err) + log.Info("Warning: PushingErr: retrying", "in", t, "reason", err) }) if err != nil { return fmt.Errorf("Exiting due to fatal error uploading: %v", err) @@ -334,7 +331,9 @@ func gatherAndOutputData(eventf Eventf, config CombinedConfig, preflightClient c return nil } -func gatherData(config CombinedConfig, dataGatherers map[string]datagatherer.DataGatherer) ([]*api.DataReading, error) { +func gatherData(ctx context.Context, config CombinedConfig, dataGatherers map[string]datagatherer.DataGatherer) ([]*api.DataReading, error) { + log := klog.FromContext(ctx).WithName("gatherData") + var readings []*api.DataReading var dgError *multierror.Error @@ -345,11 +344,14 @@ func gatherData(config CombinedConfig, dataGatherers map[string]datagatherer.Dat continue } - - if count >= 0 { - logs.Log.Printf("successfully gathered %d items from %q datagatherer", count, k) - } else { - logs.Log.Printf("successfully gathered data from %q datagatherer", k) + { + // Not all datagatherers return a count. + // If `count == -1` it means that the datagatherer does not support returning a count. + log := log + if count >= 0 { + log = log.WithValues("count", count) + } + log.Info("Successfully gathered", "name", k) } readings = append(readings, &api.DataReading{ ClusterID: config.ClusterID, @@ -379,13 +381,16 @@ func gatherData(config CombinedConfig, dataGatherers map[string]datagatherer.Dat return readings, nil } -func postData(config CombinedConfig, preflightClient client.Client, readings []*api.DataReading) error { +func postData(ctx context.Context, config CombinedConfig, preflightClient client.Client, readings []*api.DataReading) error { + log := klog.FromContext(ctx).WithName("postData") baseURL := config.Server - logs.Log.Println("Posting data to:", baseURL) + log.Info("Posting data", "baseURL", baseURL) if config.AuthMode == VenafiCloudKeypair || config.AuthMode == VenafiCloudVenafiConnection { // orgID and clusterID are not required for Venafi Cloud auth + // TODO(wallrj): Pass the context to PostDataReadingsWithOptions, so + // that its network operations can be cancelled. err := preflightClient.PostDataReadingsWithOptions(readings, client.Options{ ClusterName: config.ClusterID, ClusterDescription: config.ClusterDescription, @@ -393,7 +398,7 @@ func postData(config CombinedConfig, preflightClient client.Client, readings []* if err != nil { return fmt.Errorf("post to server failed: %+v", err) } - logs.Log.Println("Data sent successfully.") + log.Info("Data sent successfully") return nil } @@ -409,11 +414,13 @@ func postData(config CombinedConfig, preflightClient client.Client, readings []* prometheus.Labels{"organization": config.OrganizationID, "cluster": config.ClusterID}, ) metric.Set(float64(len(data))) - logs.Log.Printf("Data readings upload size: %d", len(data)) + log.Info("Data readings", "uploadSize", len(data)) path := config.EndpointPath if path == "" { path = "/api/v1/datareadings" } + // TODO(wallrj): Pass the context to Post, so that its network + // operations can be cancelled. res, err := preflightClient.Post(path, bytes.NewBuffer(data)) if err != nil { @@ -429,7 +436,8 @@ func postData(config CombinedConfig, preflightClient client.Client, readings []* return fmt.Errorf("received response with status code %d. Body: [%s]", code, errorContent) } - logs.Log.Println("Data sent successfully.") + log.Info("Data sent successfully") + return err } @@ -437,11 +445,13 @@ func postData(config CombinedConfig, preflightClient client.Client, readings []* return fmt.Errorf("post to server failed: missing clusterID from agent configuration") } + // TODO(wallrj): Pass the context to PostDataReadings, so + // that its network operations can be cancelled. err := preflightClient.PostDataReadings(config.OrganizationID, config.ClusterID, readings) if err != nil { return fmt.Errorf("post to server failed: %+v", err) } - logs.Log.Println("Data sent successfully.") + log.Info("Data sent successfully") return nil } From 279706714f86c14d26ac45af4c003f6e464c9b39 Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Thu, 14 Nov 2024 11:38:11 +0000 Subject: [PATCH 11/12] Remove the logs.Log variable and replace it with logr.Logger in the remaining code (#612) Signed-off-by: Richard Wall --- pkg/datagatherer/k8s/cache.go | 25 ++++++++++++++++--------- pkg/datagatherer/k8s/cache_test.go | 22 ++++++++++++++++++---- pkg/datagatherer/k8s/dynamic.go | 14 ++++++++------ pkg/logs/logs.go | 9 --------- 4 files changed, 42 insertions(+), 28 deletions(-) diff --git a/pkg/datagatherer/k8s/cache.go b/pkg/datagatherer/k8s/cache.go index f40c3c09..e64f13c5 100644 --- a/pkg/datagatherer/k8s/cache.go +++ b/pkg/datagatherer/k8s/cache.go @@ -1,13 +1,14 @@ package k8s import ( + "fmt" "time" + "github.com/go-logr/logr" "github.com/pmylund/go-cache" "k8s.io/apimachinery/pkg/types" "github.com/jetstack/preflight/api" - "github.com/jetstack/preflight/pkg/logs" ) // time interface, this is used to fetch the current time @@ -30,9 +31,17 @@ type cacheResource interface { GetNamespace() string } +func logCacheUpdateFailure(log logr.Logger, obj interface{}, operation string) { + // We use WithCallStackHelper to ensure the correct caller line numbers in the log messages + helper, log := log.WithCallStackHelper() + helper() + err := fmt.Errorf("not a cacheResource type: %T missing metadata/uid field", obj) + log.Error(err, "Cache update failure", "operation", operation) +} + // onAdd handles the informer creation events, adding the created runtime.Object // to the data gatherer's cache. The cache key is the uid of the object -func onAdd(obj interface{}, dgCache *cache.Cache) { +func onAdd(log logr.Logger, obj interface{}, dgCache *cache.Cache) { item, ok := obj.(cacheResource) if ok { cacheObject := &api.GatheredResource{ @@ -41,28 +50,26 @@ func onAdd(obj interface{}, dgCache *cache.Cache) { dgCache.Set(string(item.GetUID()), cacheObject, cache.DefaultExpiration) return } - logs.Log.Printf("could not %q resource to the cache, missing metadata/uid field", "add") - + logCacheUpdateFailure(log, obj, "add") } // onUpdate handles the informer update events, replacing the old object with the new one // if it's present in the data gatherer's cache, (if the object isn't present, it gets added). // The cache key is the uid of the object -func onUpdate(old, new interface{}, dgCache *cache.Cache) { +func onUpdate(log logr.Logger, old, new interface{}, dgCache *cache.Cache) { item, ok := old.(cacheResource) if ok { cacheObject := updateCacheGatheredResource(string(item.GetUID()), new, dgCache) dgCache.Set(string(item.GetUID()), cacheObject, cache.DefaultExpiration) return } - - logs.Log.Printf("could not %q resource to the cache, missing metadata/uid field", "update") + logCacheUpdateFailure(log, old, "update") } // onDelete handles the informer deletion events, updating the object's properties with the deletion // time of the object (but not removing the object from the cache). // The cache key is the uid of the object -func onDelete(obj interface{}, dgCache *cache.Cache) { +func onDelete(log logr.Logger, obj interface{}, dgCache *cache.Cache) { item, ok := obj.(cacheResource) if ok { cacheObject := updateCacheGatheredResource(string(item.GetUID()), obj, dgCache) @@ -70,7 +77,7 @@ func onDelete(obj interface{}, dgCache *cache.Cache) { dgCache.Set(string(item.GetUID()), cacheObject, cache.DefaultExpiration) return } - logs.Log.Printf("could not %q resource to the cache, missing metadata/uid field", "delete") + logCacheUpdateFailure(log, obj, "delete") } // creates a new updated instance of a cache object, with the resource diff --git a/pkg/datagatherer/k8s/cache_test.go b/pkg/datagatherer/k8s/cache_test.go index 80ab6a9e..a8616b89 100644 --- a/pkg/datagatherer/k8s/cache_test.go +++ b/pkg/datagatherer/k8s/cache_test.go @@ -6,8 +6,10 @@ import ( "time" "github.com/d4l3k/messagediff" + "github.com/go-logr/logr" "github.com/pmylund/go-cache" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/klog/v2/ktesting" "github.com/jetstack/preflight/api" ) @@ -23,7 +25,7 @@ func TestOnAddCache(t *testing.T) { tcs := map[string]struct { inputObjects []runtime.Object eventObjects []runtime.Object - eventFunc func(old, obj interface{}, dgCache *cache.Cache) + eventFunc func(log logr.Logger, old, obj interface{}, dgCache *cache.Cache) expected []*api.GatheredResource }{ "add all objects": { @@ -50,7 +52,7 @@ func TestOnAddCache(t *testing.T) { getObject("v1", "Service", "testservice", "testns", false), getObject("foobar/v1", "NotFoo", "notfoo", "testns", false), }, - eventFunc: func(old, new interface{}, dgCache *cache.Cache) { onDelete(old, dgCache) }, + eventFunc: func(log logr.Logger, old, new interface{}, dgCache *cache.Cache) { onDelete(log, old, dgCache) }, expected: []*api.GatheredResource{ makeGatheredResource( getObject("foobar/v1", "Foo", "testfoo", "testns", false), @@ -98,16 +100,17 @@ func TestOnAddCache(t *testing.T) { for name, tc := range tcs { t.Run(name, func(t *testing.T) { + log := ktesting.NewLogger(t, ktesting.NewConfig(ktesting.Verbosity(10))) dgCache := cache.New(5*time.Minute, 30*time.Second) // adding initial objetcs to the cache for _, obj := range tc.inputObjects { - onAdd(obj, dgCache) + onAdd(log, obj, dgCache) } // Testing event founction on set of objects for _, obj := range tc.eventObjects { if tc.eventFunc != nil { - tc.eventFunc(obj, obj, dgCache) + tc.eventFunc(log, obj, obj, dgCache) } } @@ -136,3 +139,14 @@ func TestOnAddCache(t *testing.T) { }) } } + +// TestNoneCache demonstrates that the cache helpers do not crash if passed a +// non-cachable object, but log an error with a reference to the object type. +func TestNoneCache(t *testing.T) { + log := ktesting.NewLogger(t, ktesting.NewConfig(ktesting.Verbosity(10))) + + type notCachable struct{} + onAdd(log, ¬Cachable{}, nil) + onUpdate(log, ¬Cachable{}, nil, nil) + onDelete(log, ¬Cachable{}, nil) +} diff --git a/pkg/datagatherer/k8s/dynamic.go b/pkg/datagatherer/k8s/dynamic.go index d24b0629..e80dc75d 100644 --- a/pkg/datagatherer/k8s/dynamic.go +++ b/pkg/datagatherer/k8s/dynamic.go @@ -23,10 +23,10 @@ import ( "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" k8scache "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" "github.com/jetstack/preflight/api" "github.com/jetstack/preflight/pkg/datagatherer" - "github.com/jetstack/preflight/pkg/logs" ) // ConfigDynamic contains the configuration for the data-gatherer. @@ -161,6 +161,7 @@ func (c *ConfigDynamic) NewDataGatherer(ctx context.Context) (datagatherer.DataG } func (c *ConfigDynamic) newDataGathererWithClient(ctx context.Context, cl dynamic.Interface, clientset kubernetes.Interface) (datagatherer.DataGatherer, error) { + log := klog.FromContext(ctx) if err := c.validate(); err != nil { return nil, err } @@ -216,13 +217,13 @@ func (c *ConfigDynamic) newDataGathererWithClient(ctx context.Context, cl dynami registration, err := newDataGatherer.informer.AddEventHandler(k8scache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { - onAdd(obj, dgCache) + onAdd(log, obj, dgCache) }, UpdateFunc: func(old, new interface{}) { - onUpdate(old, new, dgCache) + onUpdate(log, old, new, dgCache) }, DeleteFunc: func(obj interface{}) { - onDelete(obj, dgCache) + onDelete(log, obj, dgCache) }, }) if err != nil { @@ -264,6 +265,7 @@ type DataGathererDynamic struct { // Returns error if the data gatherer informer wasn't initialized, Run blocks // until the stopCh is closed. func (g *DataGathererDynamic) Run(stopCh <-chan struct{}) error { + log := klog.FromContext(g.ctx) if g.informer == nil { return fmt.Errorf("informer was not initialized, impossible to start") } @@ -271,9 +273,9 @@ func (g *DataGathererDynamic) Run(stopCh <-chan struct{}) error { // attach WatchErrorHandler, it needs to be set before starting an informer err := g.informer.SetWatchErrorHandler(func(r *k8scache.Reflector, err error) { if strings.Contains(fmt.Sprintf("%s", err), "the server could not find the requested resource") { - logs.Log.Printf("server missing resource for datagatherer of %q ", g.groupVersionResource) + log.Info("server missing resource for datagatherer", "groupVersionResource", g.groupVersionResource) } else { - logs.Log.Printf("datagatherer informer for %q has failed and is backing off due to error: %s", g.groupVersionResource, err) + log.Info("datagatherer informer has failed and is backing off", "groupVersionResource", g.groupVersionResource, "reason", err) } }) if err != nil { diff --git a/pkg/logs/logs.go b/pkg/logs/logs.go index b8ffd71f..35366ab4 100644 --- a/pkg/logs/logs.go +++ b/pkg/logs/logs.go @@ -34,12 +34,6 @@ import ( // upon which this code was based. var ( - // This is the Agent's logger. For now, it is still a *log.Logger, but we - // mean to migrate everything to slog with the klog backend. We avoid using - // log.Default because log.Default is already used by the VCert library, and - // we need to keep the agent's logger from the VCert's logger to be able to - // remove the `vCert: ` prefix from the VCert logs. - Log *log.Logger // All but the essential logging flags will be hidden to avoid overwhelming // the user. The hidden flags can still be used. For example if a user does @@ -120,9 +114,6 @@ func Initialize() error { // the agent, which still uses log.Printf. slog := slog.Default() - Log = &log.Logger{} - Log.SetOutput(LogToSlogWriter{Slog: slog, Source: "agent"}) - // Let's make sure the VCert library, which is the only library we import to // be using the global log.Default, also uses the common slog logger. vcertLog := log.Default() From 1690dd9fc0a1110e1dde634500a0caccb4356ee2 Mon Sep 17 00:00:00 2001 From: Richard Wall Date: Thu, 14 Nov 2024 11:46:24 +0000 Subject: [PATCH 12/12] Define well known log verbosity values and log less by default (#617) Signed-off-by: Richard Wall --- pkg/agent/run.go | 17 +++++++++-------- pkg/datagatherer/k8s/dynamic.go | 3 ++- pkg/logs/logs.go | 9 +++++++++ pkg/logs/logs_test.go | 2 +- 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/pkg/agent/run.go b/pkg/agent/run.go index 7bb629ed..0fa7730b 100644 --- a/pkg/agent/run.go +++ b/pkg/agent/run.go @@ -35,6 +35,7 @@ import ( "github.com/jetstack/preflight/pkg/client" "github.com/jetstack/preflight/pkg/datagatherer" "github.com/jetstack/preflight/pkg/kubeconfig" + "github.com/jetstack/preflight/pkg/logs" "github.com/jetstack/preflight/pkg/version" ) @@ -175,7 +176,7 @@ func Run(cmd *cobra.Command, args []string) (returnErr error) { return fmt.Errorf("failed to instantiate %q data gatherer %q: %v", kind, dgConfig.Name, err) } - log.Info("Starting DataGatherer", "name", dgConfig.Name) + log.V(logs.Debug).Info("Starting DataGatherer", "name", dgConfig.Name) // start the data gatherers and wait for the cache sync group.Go(func() error { @@ -285,7 +286,7 @@ func gatherAndOutputData(ctx context.Context, eventf Eventf, config CombinedConf var readings []*api.DataReading if config.InputPath != "" { - log.Info("Reading data from local file", "inputPath", config.InputPath) + log.V(logs.Debug).Info("Reading data from local file", "inputPath", config.InputPath) data, err := os.ReadFile(config.InputPath) if err != nil { return fmt.Errorf("failed to read local data file: %s", err) @@ -351,7 +352,7 @@ func gatherData(ctx context.Context, config CombinedConfig, dataGatherers map[st if count >= 0 { log = log.WithValues("count", count) } - log.Info("Successfully gathered", "name", k) + log.V(logs.Debug).Info("Successfully gathered", "name", k) } readings = append(readings, &api.DataReading{ ClusterID: config.ClusterID, @@ -385,7 +386,7 @@ func postData(ctx context.Context, config CombinedConfig, preflightClient client log := klog.FromContext(ctx).WithName("postData") baseURL := config.Server - log.Info("Posting data", "baseURL", baseURL) + log.V(logs.Debug).Info("Posting data", "baseURL", baseURL) if config.AuthMode == VenafiCloudKeypair || config.AuthMode == VenafiCloudVenafiConnection { // orgID and clusterID are not required for Venafi Cloud auth @@ -467,7 +468,7 @@ func postData(ctx context.Context, config CombinedConfig, preflightClient client func listenAndServe(ctx context.Context, server *http.Server) error { log := klog.FromContext(ctx).WithName("ListenAndServe") - log.V(1).Info("Starting") + log.V(logs.Debug).Info("Starting") listenCTX, listenCancelCause := context.WithCancelCause(context.WithoutCancel(ctx)) go func() { @@ -477,11 +478,11 @@ func listenAndServe(ctx context.Context, server *http.Server) error { select { case <-listenCTX.Done(): - log.V(1).Info("Shutdown skipped", "reason", "Server already stopped") + log.V(logs.Debug).Info("Shutdown skipped", "reason", "Server already stopped") return context.Cause(listenCTX) case <-ctx.Done(): - log.V(1).Info("Shutting down") + log.V(logs.Debug).Info("Shutting down") } shutdownCTX, shutdownCancel := context.WithTimeout(context.WithoutCancel(ctx), time.Second*3) @@ -496,7 +497,7 @@ func listenAndServe(ctx context.Context, server *http.Server) error { closeErr = fmt.Errorf("Close: %s", closeErr) } - log.V(1).Info("Shutdown complete") + log.V(logs.Debug).Info("Shutdown complete") return errors.Join(shutdownErr, closeErr) } diff --git a/pkg/datagatherer/k8s/dynamic.go b/pkg/datagatherer/k8s/dynamic.go index e80dc75d..68753648 100644 --- a/pkg/datagatherer/k8s/dynamic.go +++ b/pkg/datagatherer/k8s/dynamic.go @@ -27,6 +27,7 @@ import ( "github.com/jetstack/preflight/api" "github.com/jetstack/preflight/pkg/datagatherer" + "github.com/jetstack/preflight/pkg/logs" ) // ConfigDynamic contains the configuration for the data-gatherer. @@ -273,7 +274,7 @@ func (g *DataGathererDynamic) Run(stopCh <-chan struct{}) error { // attach WatchErrorHandler, it needs to be set before starting an informer err := g.informer.SetWatchErrorHandler(func(r *k8scache.Reflector, err error) { if strings.Contains(fmt.Sprintf("%s", err), "the server could not find the requested resource") { - log.Info("server missing resource for datagatherer", "groupVersionResource", g.groupVersionResource) + log.V(logs.Debug).Info("Server missing resource for datagatherer", "groupVersionResource", g.groupVersionResource) } else { log.Info("datagatherer informer has failed and is backing off", "groupVersionResource", g.groupVersionResource, "reason", err) } diff --git a/pkg/logs/logs.go b/pkg/logs/logs.go index 35366ab4..cbca5b13 100644 --- a/pkg/logs/logs.go +++ b/pkg/logs/logs.go @@ -49,6 +49,14 @@ var ( features = featuregate.NewFeatureGate() ) +const ( + // Standard log verbosity levels. + // Use these instead of integers in venafi-kubernetes-agent code. + Info = 0 + Debug = 1 + Trace = 2 +) + func init() { runtime.Must(logsapi.AddFeatureGates(features)) // Turn on ALPHA options to enable the split-stream logging options. @@ -94,6 +102,7 @@ func AddFlags(fs *pflag.FlagSet) { if f.Name == "v" { f.Name = "log-level" f.Shorthand = "v" + f.Usage = fmt.Sprintf("%s. 0=Info, 1=Debug, 2=Trace. Use 3-10 for even greater detail. (default: 0)", f.Usage) } }) fs.AddFlagSet(&tfs) diff --git a/pkg/logs/logs_test.go b/pkg/logs/logs_test.go index e3869df6..f48a1b4c 100644 --- a/pkg/logs/logs_test.go +++ b/pkg/logs/logs_test.go @@ -83,7 +83,7 @@ func TestLogs(t *testing.T) { name: "help", flags: "-h", expectStdout: ` - -v, --log-level Level number for the log level verbosity + -v, --log-level Level number for the log level verbosity. 0=Info, 1=Debug, 2=Trace. Use 3-10 for even greater detail. (default: 0) --logging-format string Sets the log format. Permitted formats: "json", "text". (default "text") --vmodule pattern=N,... comma-separated list of pattern=N settings for file-filtered logging (only works for text log format) `,