From 11b1878e260e45dee7bb0fe47add4b374b748661 Mon Sep 17 00:00:00 2001 From: Jacob Weinstock Date: Thu, 10 Oct 2024 22:00:26 -0600 Subject: [PATCH] WIP: add otel logging to help with debugging: And to help with understanding code paths taken. This is very helpful as we have to create an idempotent Reconcile function. Signed-off-by: Jacob Weinstock --- api/v1alpha1/workflow_types.go | 2 + cmd/tink-controller/main.go | 52 +++++++++++++++++++--- config/manager-rbac/role.yaml | 2 +- go.mod | 9 ++-- go.sum | 6 +++ internal/deprecated/controller/manager.go | 5 ++- internal/deprecated/workflow/job.go | 37 ++++++++++++++- internal/deprecated/workflow/pre.go | 9 ++++ internal/deprecated/workflow/reconciler.go | 42 ++++++++++++++--- 9 files changed, 142 insertions(+), 22 deletions(-) diff --git a/api/v1alpha1/workflow_types.go b/api/v1alpha1/workflow_types.go index bf216b1fb..5f1025958 100644 --- a/api/v1alpha1/workflow_types.go +++ b/api/v1alpha1/workflow_types.go @@ -146,6 +146,8 @@ type WorkflowStatus struct { // +patchStrategy=merge // +listType=atomic Conditions []WorkflowCondition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type" protobuf:"bytes,1,rep,name=conditions"` + + TraceParent string `json:"traceParent,omitempty"` } // JobStatus holds the state of a specific job.bmc.tinkerbell.org object created. diff --git a/cmd/tink-controller/main.go b/cmd/tink-controller/main.go index 354990bdf..4731b7e80 100644 --- a/cmd/tink-controller/main.go +++ b/cmd/tink-controller/main.go @@ -11,6 +11,10 @@ import ( "github.com/spf13/pflag" "github.com/spf13/viper" "github.com/tinkerbell/tink/internal/deprecated/controller" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp" + "go.opentelemetry.io/otel/sdk/log" + "go.opentelemetry.io/otel/trace" "go.uber.org/zap" "k8s.io/client-go/tools/clientcmd" clientcmdapi "k8s.io/client-go/tools/clientcmd/api" @@ -56,19 +60,53 @@ func NewRootCommand() *cobra.Command { if err != nil { panic(err) } - logger := zapr.NewLogger(zlog).WithName("github.com/tinkerbell/tink") + logger2 := zapr.NewLogger(zlog).WithName("github.com/tinkerbell/tink") cmd := &cobra.Command{ Use: "tink-controller", PreRunE: func(cmd *cobra.Command, _ []string) error { - viper, err := createViper(logger) + viper, err := createViper(logger2) if err != nil { return fmt.Errorf("config init: %w", err) } return applyViper(viper, cmd) }, RunE: func(cmd *cobra.Command, _ []string) error { - logger.Info("Starting controller version " + version) + os.Setenv("OTEL_EXPORTER_OTLP_ENDPOINT", "http://192.168.2.50:4318") + os.Setenv("OTEL_EXPORTER_OTLP_INSECURE", "true") + os.Setenv("OTEL_SERVICE_NAME", "tink-controller") + ctx := cmd.Context() + + oCfg := OConfig{ + Servicename: "tink-controller", + Endpoint: "192.168.2.50:4317", + Insecure: true, + } + ctx, _, _ = Init(ctx, oCfg) + // Create the OTLP log exporter that sends logs to configured destination + logExporter, err := otlploghttp.New(ctx) + if err != nil { + panic("failed to initialize exporter") + } + + // Create the logger provider + lp := log.NewLoggerProvider( + log.WithProcessor( + log.NewBatchProcessor(logExporter), + ), + ) + + // Ensure the logger is shutdown before exiting so all pending logs are exported + defer lp.Shutdown(ctx) + handler := NewHandler("github.com/tinkerbell/tink", WithLoggerProvider(lp)) + + logger := logr.FromSlogHandler(handler) + tracer := otel.Tracer("my-tracer") + var span trace.Span + ctx, span = tracer.Start(ctx, "start up") + defer span.End() + // new stuff above + logger.Info("Starting controller version "+version, "TraceID", trace.SpanContextFromContext(ctx).TraceID()) ccfg := clientcmd.NewNonInteractiveDeferredLoadingClientConfig( &clientcmd.ClientConfigLoadingRules{ExplicitPath: config.Kubeconfig}, @@ -85,7 +123,7 @@ func NewRootCommand() *cobra.Command { } options := ctrl.Options{ - Logger: logger, + Logger: logger2, LeaderElection: config.EnableLeaderElection, LeaderElectionID: "tink.tinkerbell.org", LeaderElectionNamespace: namespace, @@ -95,14 +133,14 @@ func NewRootCommand() *cobra.Command { HealthProbeBindAddress: config.ProbeAddr, } - ctrl.SetLogger(logger) + ctrl.SetLogger(logger2) - mgr, err := controller.NewManager(cfg, options) + mgr, err := controller.NewManager(cfg, options, logger) if err != nil { return fmt.Errorf("controller manager: %w", err) } - return mgr.Start(cmd.Context()) + return mgr.Start(ctx) }, } config.AddFlags(cmd.Flags()) diff --git a/config/manager-rbac/role.yaml b/config/manager-rbac/role.yaml index 3f46bb22e..1ff908740 100644 --- a/config/manager-rbac/role.yaml +++ b/config/manager-rbac/role.yaml @@ -2,7 +2,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: manager-role + name: tink-controller-manager-role rules: - apiGroups: - bmc.tinkerbell.org diff --git a/go.mod b/go.mod index d1160300a..2e2f739b5 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ toolchain go1.22.2 require ( github.com/Masterminds/sprig/v3 v3.3.0 github.com/avast/retry-go v3.0.0+incompatible + github.com/cenkalti/backoff/v4 v4.3.0 github.com/distribution/reference v0.6.0 github.com/docker/docker v27.3.1+incompatible github.com/equinix-labs/otel-init-go v0.0.9 @@ -28,6 +29,10 @@ require ( github.com/stretchr/testify v1.9.0 github.com/tinkerbell/rufio v0.3.3 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.55.0 + go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.6.0 + go.opentelemetry.io/otel/log v0.6.0 + go.opentelemetry.io/otel/sdk/log v0.6.0 + go.opentelemetry.io/otel/trace v1.30.0 go.uber.org/multierr v1.11.0 go.uber.org/zap v1.27.0 google.golang.org/grpc v1.67.1 @@ -36,7 +41,6 @@ require ( k8s.io/api v0.31.1 k8s.io/apimachinery v0.31.1 k8s.io/client-go v0.31.1 - k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 knative.dev/pkg v0.0.0-20240917091217-aaab500c26c4 sigs.k8s.io/controller-runtime v0.19.0 sigs.k8s.io/yaml v1.4.0 @@ -48,7 +52,6 @@ require ( github.com/Masterminds/semver/v3 v3.3.0 // indirect github.com/Microsoft/go-winio v0.4.14 // indirect github.com/beorn7/perks v1.0.1 // indirect - github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/containerd/log v0.1.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect @@ -112,7 +115,6 @@ require ( go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0 // indirect go.opentelemetry.io/otel/metric v1.30.0 // indirect go.opentelemetry.io/otel/sdk v1.30.0 // indirect - go.opentelemetry.io/otel/trace v1.30.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect golang.org/x/crypto v0.27.0 // indirect golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa // indirect @@ -134,6 +136,7 @@ require ( k8s.io/apiextensions-apiserver v0.31.0 // indirect k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20240808142205-8e686545bdb8 // indirect + k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) diff --git a/go.sum b/go.sum index 448e3b0bb..9f5858183 100644 --- a/go.sum +++ b/go.sum @@ -219,16 +219,22 @@ go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 h1:4K4tsIX go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0/go.mod h1:jjdQuTGVsXV4vSs+CJ2qYDeDPf9yIJV23qlIzBm73Vg= go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts= go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc= +go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.6.0 h1:QSKmLBzbFULSyHzOdO9JsN9lpE4zkrz1byYGmJecdVE= +go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp v0.6.0/go.mod h1:sTQ/NH8Yrirf0sJ5rWqVu+oT82i4zL9FaF6rWcqnptM= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0 h1:lsInsfvhVIfOI6qHVyysXMNDnjO9Npvl7tlDPJFBVd4= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.30.0/go.mod h1:KQsVNh4OjgjTG0G6EiNi1jVpnaeeKsKMRwbLN+f1+8M= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 h1:qFffATk0X+HD+f1Z8lswGiOQYKHRlzfmdJm0wEaVrFA= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0/go.mod h1:MOiCmryaYtc+V0Ei+Tx9o5S1ZjA7kzLucuVuyzBZloQ= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0 h1:umZgi92IyxfXd/l4kaDhnKgY8rnN/cZcF1LKc6I8OQ8= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.30.0/go.mod h1:4lVs6obhSVRb1EW5FhOuBTyiQhtRtAnnva9vD3yRfq8= +go.opentelemetry.io/otel/log v0.6.0 h1:nH66tr+dmEgW5y+F9LanGJUBYPrRgP4g2EkmPE3LeK8= +go.opentelemetry.io/otel/log v0.6.0/go.mod h1:KdySypjQHhP069JX0z/t26VHwa8vSwzgaKmXtIB3fJM= go.opentelemetry.io/otel/metric v1.30.0 h1:4xNulvn9gjzo4hjg+wzIKG7iNFEaBMX00Qd4QIZs7+w= go.opentelemetry.io/otel/metric v1.30.0/go.mod h1:aXTfST94tswhWEb+5QjlSqG+cZlmyXy/u8jFpor3WqQ= go.opentelemetry.io/otel/sdk v1.30.0 h1:cHdik6irO49R5IysVhdn8oaiR9m8XluDaJAs4DfOrYE= go.opentelemetry.io/otel/sdk v1.30.0/go.mod h1:p14X4Ok8S+sygzblytT1nqG98QG2KYKv++HE0LY/mhg= +go.opentelemetry.io/otel/sdk/log v0.6.0 h1:4J8BwXY4EeDE9Mowg+CyhWVBhTSLXVXodiXxS/+PGqI= +go.opentelemetry.io/otel/sdk/log v0.6.0/go.mod h1:L1DN8RMAduKkrwRAFDEX3E3TLOq46+XMGSbUfHU/+vE= go.opentelemetry.io/otel/trace v1.30.0 h1:7UBkkYzeg3C7kQX8VAidWh2biiQbtAKjyIML8dQ9wmc= go.opentelemetry.io/otel/trace v1.30.0/go.mod h1:5EyKqTzzmyqB9bwtCCq6pDLktPK6fmGf/Dph+8VI02o= go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= diff --git a/internal/deprecated/controller/manager.go b/internal/deprecated/controller/manager.go index 4c5178f58..b346c959f 100644 --- a/internal/deprecated/controller/manager.go +++ b/internal/deprecated/controller/manager.go @@ -3,6 +3,7 @@ package controller import ( "fmt" + "github.com/go-logr/logr" rufio "github.com/tinkerbell/rufio/api/v1alpha1" "github.com/tinkerbell/tink/api/v1alpha1" "github.com/tinkerbell/tink/internal/deprecated/workflow" @@ -28,7 +29,7 @@ func DefaultScheme() *runtime.Scheme { // NewManager creates a new controller manager with tink controller controllers pre-registered. // If opts.Scheme is nil, DefaultScheme() is used. -func NewManager(cfg *rest.Config, opts ctrl.Options) (ctrl.Manager, error) { +func NewManager(cfg *rest.Config, opts ctrl.Options, logger logr.Logger) (ctrl.Manager, error) { if opts.Scheme == nil { opts.Scheme = DefaultScheme() } @@ -46,7 +47,7 @@ func NewManager(cfg *rest.Config, opts ctrl.Options) (ctrl.Manager, error) { return nil, fmt.Errorf("set up ready check: %w", err) } - err = workflow.NewReconciler(mgr.GetClient()).SetupWithManager(mgr) + err = workflow.NewReconciler(mgr.GetClient(), logger).SetupWithManager(mgr) if err != nil { return nil, fmt.Errorf("setup workflow reconciler: %w", err) } diff --git a/internal/deprecated/workflow/job.go b/internal/deprecated/workflow/job.go index 705842529..82bdf70ab 100644 --- a/internal/deprecated/workflow/job.go +++ b/internal/deprecated/workflow/job.go @@ -3,10 +3,13 @@ package workflow import ( "context" "fmt" + "log/slog" "time" rufio "github.com/tinkerbell/rufio/api/v1alpha1" "github.com/tinkerbell/tink/api/v1alpha1" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/trace" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -26,9 +29,14 @@ func (j jobName) String() string { // this function will update the Workflow status. func (s *state) handleJob(ctx context.Context, actions []rufio.Action, name jobName) (reconcile.Result, error) { + tracer := otel.Tracer("handleJob") + var span trace.Span + ctx, span = tracer.Start(ctx, "handleJob") + defer span.End() // there are 3 phases. 1. Clean up existing 2. Create new 3. Track status // 1. clean up existing job if it wasn't already deleted if j := s.workflow.Status.BootOptions.Jobs[name.String()]; !j.ExistingJobDeleted { + s.logger.InfoContext(ctx, "deleting existing job", "name", name) result, err := s.deleteExisting(ctx, name) if err != nil { return result, err @@ -39,6 +47,7 @@ func (s *state) handleJob(ctx context.Context, actions []rufio.Action, name jobN // 2. create a new job if uid := s.workflow.Status.BootOptions.Jobs[name.String()].UID; uid == "" { + s.logger.InfoContext(ctx, "no uid found for job", "name", name) result, err := s.createJob(ctx, actions, name) if err != nil { s.workflow.Status.SetCondition(v1alpha1.WorkflowCondition{ @@ -62,6 +71,7 @@ func (s *state) handleJob(ctx context.Context, actions []rufio.Action, name jobN // 3. track status if !s.workflow.Status.BootOptions.Jobs[name.String()].Complete { + s.logger.InfoContext(ctx, "tracking job", "name", name) // track status r, tState, err := s.trackRunningJob(ctx, name) if err != nil { @@ -111,6 +121,10 @@ func (s *state) deleteExisting(ctx context.Context, name jobName) (reconcile.Res // This function will update the Workflow status. func (s *state) createJob(ctx context.Context, actions []rufio.Action, name jobName) (reconcile.Result, error) { + tracer := otel.Tracer("createJob") + var span trace.Span + ctx, span = tracer.Start(ctx, "createJob") + defer span.End() // create a new job // The assumption is that the UID is not set. UID checking is not handled here. // 1. look up if there's an existing job with the same name, if so update the status with the UID and return @@ -118,6 +132,12 @@ func (s *state) createJob(ctx context.Context, actions []rufio.Action, name jobN rj := &rufio.Job{} if err := s.client.Get(ctx, client.ObjectKey{Name: name.String(), Namespace: s.workflow.Namespace}, rj); err == nil { + s.logger.InfoContext(ctx, "job already exists", "name", name) + if !rj.DeletionTimestamp.IsZero() { + s.logger.InfoContext(ctx, "job is being deleted", "name", name) + return reconcile.Result{Requeue: true}, nil + } + //TODO(jacobweinstock): job exists means that the job name and uid from the status are the same. // get the UID and update the status jStatus := s.workflow.Status.BootOptions.Jobs[name.String()] jStatus.UID = rj.GetUID() @@ -134,9 +154,10 @@ func (s *state) createJob(ctx context.Context, actions []rufio.Action, name jobN return reconcile.Result{}, fmt.Errorf("hardware %q does not have a BMC", s.hardware.Name) } - if err := create(ctx, s.client, name.String(), s.hardware, s.workflow.Namespace, actions); err != nil { + if err := create(ctx, s.logger, s.client, name.String(), s.hardware, s.workflow.Namespace, actions); err != nil { return reconcile.Result{}, fmt.Errorf("error creating job: %w", err) } + s.logger.InfoContext(ctx, "job created", "name", name) return reconcile.Result{Requeue: true}, nil } @@ -152,6 +173,10 @@ var ( // This function will update the Workflow status. func (s *state) trackRunningJob(ctx context.Context, name jobName) (reconcile.Result, trackedState, error) { + tracer := otel.Tracer("trackRunningJob") + var span trace.Span + ctx, span = tracer.Start(ctx, "trackRunningJob") + defer span.End() // track status // get the job rj := &rufio.Job{} @@ -159,10 +184,12 @@ func (s *state) trackRunningJob(ctx context.Context, name jobName) (reconcile.Re return reconcile.Result{}, trackedStateError, fmt.Errorf("error getting job: %w", err) } if rj.HasCondition(rufio.JobFailed, rufio.ConditionTrue) { + s.logger.InfoContext(ctx, "job failed", "name", name) // job failed return reconcile.Result{}, trackedStateFailed, fmt.Errorf("job failed") } if rj.HasCondition(rufio.JobCompleted, rufio.ConditionTrue) { + s.logger.InfoContext(ctx, "job completed", "name", name) // job completed jStatus := s.workflow.Status.BootOptions.Jobs[name.String()] jStatus.Complete = true @@ -171,11 +198,17 @@ func (s *state) trackRunningJob(ctx context.Context, name jobName) (reconcile.Re return reconcile.Result{}, trackedStateComplete, nil } // still running + s.logger.InfoContext(ctx, "job still running", "name", name) time.Sleep(s.backoff.NextBackOff()) return reconcile.Result{Requeue: true}, trackedStateRunning, nil } -func create(ctx context.Context, cc client.Client, name string, hw *v1alpha1.Hardware, ns string, tasks []rufio.Action) error { +func create(ctx context.Context, logger *slog.Logger, cc client.Client, name string, hw *v1alpha1.Hardware, ns string, tasks []rufio.Action) error { + tracer := otel.Tracer("create") + var span trace.Span + ctx, span = tracer.Start(ctx, "create") + defer span.End() + logger.InfoContext(ctx, "creating job", "name", name) if err := cc.Create(ctx, &rufio.Job{ ObjectMeta: metav1.ObjectMeta{ Name: name, diff --git a/internal/deprecated/workflow/pre.go b/internal/deprecated/workflow/pre.go index 1661ea51a..8ceb0869d 100644 --- a/internal/deprecated/workflow/pre.go +++ b/internal/deprecated/workflow/pre.go @@ -7,6 +7,8 @@ import ( "github.com/pkg/errors" rufio "github.com/tinkerbell/rufio/api/v1alpha1" "github.com/tinkerbell/tink/api/v1alpha1" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/trace" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -14,9 +16,14 @@ import ( // The workflow (s.workflow) can be updated even if an error occurs. // Any patching of the workflow object in a cluster is left up to the caller. func (s *state) prepareWorkflow(ctx context.Context) (reconcile.Result, error) { + tracer := otel.Tracer("prepareWorkflow") + var span trace.Span + ctx, span = tracer.Start(ctx, "prepareWorkflow") + defer span.End() // handle bootoptions // 1. Handle toggling allowPXE in a hardware object if toggleAllowNetboot is true. if s.workflow.Spec.BootOptions.ToggleAllowNetboot { + s.logger.InfoContext(ctx, "toggling allowPXE true") if err := s.toggleHardware(ctx, true); err != nil { return reconcile.Result{}, err } @@ -25,6 +32,7 @@ func (s *state) prepareWorkflow(ctx context.Context) (reconcile.Result, error) { // 2. Handle booting scenarios. switch s.workflow.Spec.BootOptions.BootMode { case v1alpha1.BootModeNetboot: + s.logger.InfoContext(ctx, "boot mode netboot") if s.hardware == nil { return reconcile.Result{}, errors.New("hardware is nil") } @@ -60,6 +68,7 @@ func (s *state) prepareWorkflow(ctx context.Context) (reconcile.Result, error) { } return r, err case v1alpha1.BootModeISO: + s.logger.InfoContext(ctx, "boot mode iso") if s.hardware == nil { return reconcile.Result{}, errors.New("hardware is nil") } diff --git a/internal/deprecated/workflow/reconciler.go b/internal/deprecated/workflow/reconciler.go index 026a2a4a9..b6dbb0a02 100644 --- a/internal/deprecated/workflow/reconciler.go +++ b/internal/deprecated/workflow/reconciler.go @@ -4,11 +4,15 @@ import ( "context" serrors "errors" "fmt" + "log/slog" "time" "github.com/cenkalti/backoff/v4" "github.com/go-logr/logr" "github.com/tinkerbell/tink/api/v1alpha1" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/trace" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -24,17 +28,21 @@ type Reconciler struct { client ctrlclient.Client nowFunc func() time.Time backoff *backoff.ExponentialBackOff + Logger logr.Logger + props propagation.TextMapPropagator } // TODO(jacobweinstock): add functional arguments to the signature. // TODO(jacobweinstock): write functional argument for customizing the backoff. -func NewReconciler(client ctrlclient.Client) *Reconciler { +func NewReconciler(client ctrlclient.Client, logger logr.Logger) *Reconciler { return &Reconciler{ client: client, nowFunc: time.Now, backoff: backoff.NewExponentialBackOff([]backoff.ExponentialBackOffOpts{ backoff.WithMaxInterval(5 * time.Second), // this should keep all NextBackOff's under 10 seconds }...), + Logger: logger, + props: otel.GetTextMapPropagator(), } } @@ -50,6 +58,7 @@ type state struct { workflow *v1alpha1.Workflow hardware *v1alpha1.Hardware backoff *backoff.ExponentialBackOff + logger *slog.Logger } // +kubebuilder:rbac:groups=tinkerbell.org,resources=hardware;hardware/status,verbs=get;list;watch;update;patch @@ -59,8 +68,15 @@ type state struct { // Reconcile handles Workflow objects. This includes Template rendering, optional Hardware allowPXE toggling, and optional Hardware one-time netbooting. func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { - logger := ctrl.LoggerFrom(ctx) - logger.Info("Reconciling") + tracer := otel.Tracer("my-tracer") + var span trace.Span + ctx, span = tracer.Start(ctx, "reconcile", trace.WithNewRoot()) + defer span.End() + + logger := slog.New(logr.ToSlogHandler(r.Logger)) + //logger = logger.With("TraceID", trace.SpanContextFromContext(ctx).TraceID()) + //logger := ctrl.LoggerFrom(ctx).WithValues("TraceID", trace.SpanContextFromContext(ctx).TraceID()) + logger.InfoContext(ctx, "In reconcile func") stored := &v1alpha1.Workflow{} if err := r.client.Get(ctx, req.NamespacedName, stored); err != nil { @@ -78,12 +94,16 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco wflow := stored.DeepCopy() + + switch wflow.Status.State { case "": + logger.InfoContext(ctx, "new workflow") resp, err := r.processNewWorkflow(ctx, logger, wflow) return resp, serrors.Join(err, mergePatchStatus(ctx, r.client, stored, wflow)) case v1alpha1.WorkflowStatePreparing: + logger.InfoContext(ctx, "preparing workflow") hw, err := hardwareFrom(ctx, r.client, wflow) if err != nil { return reconcile.Result{}, err @@ -93,15 +113,18 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco workflow: wflow, hardware: hw, backoff: r.backoff, + logger: logger, } resp, err := s.prepareWorkflow(ctx) return resp, serrors.Join(err, mergePatchStatus(ctx, r.client, stored, s.workflow)) case v1alpha1.WorkflowStateRunning: + logger.InfoContext(ctx, "process running workflow") r.processRunningWorkflow(wflow) return reconcile.Result{}, mergePatchStatus(ctx, r.client, stored, wflow) case v1alpha1.WorkflowStatePost: + logger.InfoContext(ctx, "post actions") hw, err := hardwareFrom(ctx, r.client, wflow) if err != nil { return reconcile.Result{}, err @@ -111,6 +134,7 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco workflow: wflow, hardware: hw, backoff: r.backoff, + logger: logger, } rc, err := s.postActions(ctx) @@ -133,12 +157,16 @@ func mergePatchStatus(ctx context.Context, cc ctrlclient.Client, original, updat return nil } -func (r *Reconciler) processNewWorkflow(ctx context.Context, logger logr.Logger, stored *v1alpha1.Workflow) (reconcile.Result, error) { +func (r *Reconciler) processNewWorkflow(ctx context.Context, logger *slog.Logger, stored *v1alpha1.Workflow) (reconcile.Result, error) { + tracer := otel.Tracer("processNewWorkflow") + var span trace.Span + ctx, span = tracer.Start(ctx, "processNewWorkflow") + defer span.End() tpl := &v1alpha1.Template{} if err := r.client.Get(ctx, ctrlclient.ObjectKey{Name: stored.Spec.TemplateRef, Namespace: stored.Namespace}, tpl); err != nil { if errors.IsNotFound(err) { // Throw an error to raise awareness and take advantage of immediate requeue. - logger.Error(err, "error getting Template object in processNewWorkflow function") + logger.ErrorContext(ctx, "error getting Template object in processNewWorkflow function", "error", err) stored.Status.TemplateRendering = v1alpha1.TemplateRenderingFailed stored.Status.SetCondition(v1alpha1.WorkflowCondition{ Type: v1alpha1.TemplateRenderedSuccess, @@ -167,7 +195,7 @@ func (r *Reconciler) processNewWorkflow(ctx context.Context, logger logr.Logger, var hardware v1alpha1.Hardware err := r.client.Get(ctx, ctrlclient.ObjectKey{Name: stored.Spec.HardwareRef, Namespace: stored.Namespace}, &hardware) if ctrlclient.IgnoreNotFound(err) != nil { - logger.Error(err, "error getting Hardware object in processNewWorkflow function") + logger.ErrorContext(ctx, "error getting Hardware object in processNewWorkflow function", "error", err) stored.Status.TemplateRendering = v1alpha1.TemplateRenderingFailed stored.Status.SetCondition(v1alpha1.WorkflowCondition{ Type: v1alpha1.TemplateRenderedSuccess, @@ -180,7 +208,7 @@ func (r *Reconciler) processNewWorkflow(ctx context.Context, logger logr.Logger, } if stored.Spec.HardwareRef != "" && errors.IsNotFound(err) { - logger.Error(err, "hardware not found in processNewWorkflow function") + logger.ErrorContext(ctx, "hardware not found in processNewWorkflow function", "error", err) stored.Status.TemplateRendering = v1alpha1.TemplateRenderingFailed stored.Status.SetCondition(v1alpha1.WorkflowCondition{ Type: v1alpha1.TemplateRenderedSuccess,