diff --git a/internal/backend/runtime/omni/controllers/omni/cluster_machine_status.go b/internal/backend/runtime/omni/controllers/omni/cluster_machine_status.go index 5a958a47..faecf7cc 100644 --- a/internal/backend/runtime/omni/controllers/omni/cluster_machine_status.go +++ b/internal/backend/runtime/omni/controllers/omni/cluster_machine_status.go @@ -40,7 +40,7 @@ func NewClusterMachineStatusController() *ClusterMachineStatusController { }, TransformFunc: func(ctx context.Context, r controller.Reader, _ *zap.Logger, clusterMachine *omni.ClusterMachine, clusterMachineStatus *omni.ClusterMachineStatus) error { machine, err := safe.ReaderGet[*omni.Machine](ctx, r, resource.NewMetadata(resources.DefaultNamespace, omni.MachineType, clusterMachine.Metadata().ID(), resource.VersionUndefined)) - if err != nil { + if err != nil && !state.IsNotFoundError(err) { return err } @@ -51,7 +51,7 @@ func NewClusterMachineStatusController() *ClusterMachineStatusController { omni.LabelMachineSet, ) - if machine.TypedSpec().Value.Connected { + if machine != nil && machine.TypedSpec().Value.Connected { clusterMachineStatus.Metadata().Labels().Set(omni.MachineStatusLabelConnected, "") } else { clusterMachineStatus.Metadata().Labels().Delete(omni.MachineStatusLabelConnected) @@ -140,11 +140,12 @@ func NewClusterMachineStatusController() *ClusterMachineStatusController { _, isControlPlaneNode := clusterMachineStatus.Metadata().Labels().Get(omni.LabelControlPlaneRole) if (status.GetStage() == machineapi.MachineStatusEvent_BOOTING || status.GetStage() == machineapi.MachineStatusEvent_RUNNING) && isControlPlaneNode { - cmsVal.ApidAvailable = machine.TypedSpec().Value.Connected + cmsVal.ApidAvailable = machine != nil && machine.TypedSpec().Value.Connected } // should we also mark it as not ready when the clustermachine is tearing down (?) - cmsVal.Ready = status.GetStatus().GetReady() && machine.TypedSpec().Value.Connected + cmsVal.Ready = status.GetStatus().GetReady() && + machine != nil && machine.TypedSpec().Value.Connected if clusterMachine.Metadata().Phase() == resource.PhaseTearingDown { cmsVal.Stage = specs.ClusterMachineStatusSpec_DESTROYING diff --git a/internal/backend/runtime/omni/controllers/omni/internal/machineset/machineset.go b/internal/backend/runtime/omni/controllers/omni/internal/machineset/machineset.go index 58947417..d746fec2 100644 --- a/internal/backend/runtime/omni/controllers/omni/internal/machineset/machineset.go +++ b/internal/backend/runtime/omni/controllers/omni/internal/machineset/machineset.go @@ -12,6 +12,7 @@ import ( "github.com/cosi-project/runtime/pkg/controller" "github.com/cosi-project/runtime/pkg/resource" "github.com/cosi-project/runtime/pkg/safe" + "github.com/cosi-project/runtime/pkg/state" "go.uber.org/zap" "github.com/siderolabs/omni/client/pkg/omni/resources" @@ -92,6 +93,10 @@ func UpdateFinalizers(ctx context.Context, r controller.ReaderWriter, rc *Reconc for _, clusterMachine := range rc.GetClusterMachines() { if clusterMachine.Metadata().Phase() == resource.PhaseRunning && !clusterMachine.Metadata().Finalizers().Has(ControllerName) { if err := r.AddFinalizer(ctx, omni.NewMachine(resources.DefaultNamespace, clusterMachine.Metadata().ID()).Metadata(), ControllerName); err != nil { + if state.IsNotFoundError(err) { + continue + } + return err } } diff --git a/internal/backend/runtime/omni/controllers/omni/machine_cleanup.go b/internal/backend/runtime/omni/controllers/omni/machine_cleanup.go index cde6f51e..07b946f8 100644 --- a/internal/backend/runtime/omni/controllers/omni/machine_cleanup.go +++ b/internal/backend/runtime/omni/controllers/omni/machine_cleanup.go @@ -49,7 +49,7 @@ func (h *sameIDHandler[I, O]) FinalizerRemoval(ctx context.Context, r controller return nil } - ready, err := r.Teardown(ctx, md) + ready, err := r.Teardown(ctx, md, controller.WithOwner("")) if err != nil { return err } diff --git a/internal/backend/runtime/omni/controllers/omni/machine_cleanup_test.go b/internal/backend/runtime/omni/controllers/omni/machine_cleanup_test.go new file mode 100644 index 00000000..ef33864a --- /dev/null +++ b/internal/backend/runtime/omni/controllers/omni/machine_cleanup_test.go @@ -0,0 +1,93 @@ +// Copyright (c) 2024 Sidero Labs, Inc. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. + +package omni_test + +import ( + "context" + "testing" + "time" + + "github.com/cosi-project/runtime/pkg/resource/rtestutils" + "github.com/cosi-project/runtime/pkg/state" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + + "github.com/siderolabs/omni/client/pkg/omni/resources" + "github.com/siderolabs/omni/client/pkg/omni/resources/omni" + omnictrl "github.com/siderolabs/omni/internal/backend/runtime/omni/controllers/omni" +) + +type MachineCleanupSuite struct { + OmniSuite +} + +func (suite *MachineCleanupSuite) TestCleanup() { + require := suite.Require() + + suite.ctx, suite.ctxCancel = context.WithTimeout(suite.ctx, time.Second*10) + + suite.startRuntime() + + controller := omnictrl.NewMachineCleanupController() + + require.NoError(suite.runtime.RegisterController(controller)) + + machineID := "machine-cleanup-test-machine" + + machineSet := omni.NewMachineSet(resources.DefaultNamespace, "machine-cleanup-test-machine-set") + machineSetNode := omni.NewMachineSetNode(resources.DefaultNamespace, machineID, machineSet) + machine := omni.NewMachine(resources.DefaultNamespace, machineID) + + machine.Metadata().Finalizers().Add(controller.Name()) + + require.NoError(suite.state.Create(suite.ctx, machineSetNode)) + require.NoError(suite.state.Create(suite.ctx, machine)) + + _, err := suite.state.Teardown(suite.ctx, machine.Metadata()) + + require.NoError(err) + + assertNoResource[*omni.MachineSetNode](&suite.OmniSuite, machineSetNode) + + assertResource[*omni.Machine](&suite.OmniSuite, machine.Metadata(), func(r *omni.Machine, assertion *assert.Assertions) { + assertion.Empty(r.Metadata().Finalizers()) + }) + + require.NoError(suite.state.Destroy(suite.ctx, machine.Metadata())) +} + +func (suite *MachineCleanupSuite) TestSkipMachineSetNodeWithOwner() { + require := suite.Require() + + suite.ctx, suite.ctxCancel = context.WithTimeout(suite.ctx, time.Second*10) + + suite.startRuntime() + + controller := omnictrl.NewMachineCleanupController() + + require.NoError(suite.runtime.RegisterController(controller)) + + machineID := "machine-cleanup-skip-test-machine" + + machineSet := omni.NewMachineSet(resources.DefaultNamespace, "machine-cleanup-skip-test-machine-set") + machineSetNode := omni.NewMachineSetNode(resources.DefaultNamespace, machineID, machineSet) + machine := omni.NewMachine(resources.DefaultNamespace, machineID) + + machine.Metadata().Finalizers().Add(controller.Name()) + require.NoError(machineSetNode.Metadata().SetOwner("some-owner")) + + require.NoError(suite.state.Create(suite.ctx, machineSetNode, state.WithCreateOwner("some-owner"))) + require.NoError(suite.state.Create(suite.ctx, machine)) + + rtestutils.Destroy[*omni.Machine](suite.ctx, suite.T(), suite.state, []string{machine.Metadata().ID()}) + + // MachineSetNode should still be around, as it is owned by a controller - CleanupController should skip it + assertResource[*omni.MachineSetNode](&suite.OmniSuite, machine.Metadata(), func(*omni.MachineSetNode, *assert.Assertions) {}) +} + +func TestMachineCleanupSuite(t *testing.T) { + suite.Run(t, new(MachineCleanupSuite)) +}