Skip to content

Commit

Permalink
fix: implement the correct upgrade flow from 1.6.x to 1.7.x for SBC
Browse files Browse the repository at this point in the history
Couple of things were missing:
- schematic ID wasn't generated for empty extensions list, while we
  should also consider overlay which was detected for Talos < 1.7.x.
- schematic change could be received a bit later than
  `TalosUpgradeController` starts the upgrade. That could lead to one
  unsuccessful attempt to upgrade as the wrong schematic id was used.

Now schematic ID is generated when either extensions list is not empty,
or overlay is not empty and if the node was joined to Omni running
Talos < 1.7 and current Talos version is >= 1.7.
Sync issues are handled by adding TalosVersion to the
SchematicConfiguration: this way it is possible to make
`TalosUpgradeController` wait until schematic configuration is synced to
the Talos version it's going to upgrade to.

Signed-off-by: Artem Chernyshev <[email protected]>
  • Loading branch information
Unix4ever committed Apr 19, 2024
1 parent 586d2d7 commit 6fff261
Show file tree
Hide file tree
Showing 10 changed files with 1,022 additions and 846 deletions.
1,700 changes: 861 additions & 839 deletions client/api/omni/specs/omni.pb.go

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions client/api/omni/specs/omni.proto
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@ message MachineStatusSpec {
Schematic schematic = 14;

MaintenanceConfig maintenance_config = 15;

// InitialTalosVersion is set only once when the machine first joined Omni.
string initial_talos_version = 16;
}

// TalosConfigSpec describes a Talos cluster config.
Expand Down Expand Up @@ -948,6 +951,7 @@ message TalosExtensionsSpec {
// SchematicConfigurationSpec is the desired Image Factory schematic for a machine, machine set or a cluster.
message SchematicConfigurationSpec {
string schematic_id = 1;
string talos_version = 2;
}

// ExtensionsConfigurationSpec is the desired list of extensions to be installed on the machine or the set of machines.
Expand Down
96 changes: 96 additions & 0 deletions client/api/omni/specs/omni_vtproto.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions frontend/src/api/omni/specs/omni.pb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ export type MachineStatusSpec = {
image_labels?: {[key: string]: string}
schematic?: MachineStatusSpecSchematic
maintenance_config?: MachineStatusSpecMaintenanceConfig
initial_talos_version?: string
}

export type TalosConfigSpec = {
Expand Down Expand Up @@ -630,6 +631,7 @@ export type TalosExtensionsSpec = {

export type SchematicConfigurationSpec = {
schematic_id?: string
talos_version?: string
}

export type ExtensionsConfigurationSpec = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,10 @@ func (ctrl *MachineStatusController) handleNotification(ctx context.Context, r c

if event.TalosVersion != nil {
spec.TalosVersion = *event.TalosVersion

if spec.InitialTalosVersion == "" {
spec.InitialTalosVersion = spec.TalosVersion
}
}

if spec.Network == nil {
Expand Down Expand Up @@ -423,7 +427,9 @@ func (ctrl *MachineStatusController) handleNotification(ctx context.Context, r c
spec.Schematic.Extensions = event.Schematic.Extensions
spec.Schematic.Id = event.Schematic.Id
spec.Schematic.Invalid = event.Schematic.Invalid
spec.Schematic.Overlay = event.Schematic.Overlay
if event.Schematic.Overlay != nil && event.Schematic.Overlay.Name != "" {
spec.Schematic.Overlay = event.Schematic.Overlay
}

if spec.Schematic.Invalid {
spec.Schematic.InitialSchematic = ""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,6 @@ func (suite *MachineStatusSuite) TestMachineSchematic() {
"siderolabs/hello-world-service",
"siderolabs/mdadm",
},
Overlay: &specs.MachineStatusSpec_Schematic_Overlay{},
},
},
{
Expand All @@ -339,7 +338,6 @@ func (suite *MachineStatusSuite) TestMachineSchematic() {
expected: &specs.MachineStatusSpec_Schematic{
Id: defaultSchematic,
InitialSchematic: defaultSchematic,
Overlay: &specs.MachineStatusSpec_Schematic_Overlay{},
},
},
} {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,22 @@ package omni

import (
"context"
"errors"

"github.com/cosi-project/runtime/pkg/controller"
"github.com/cosi-project/runtime/pkg/controller/generic/qtransform"
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/image-factory/pkg/schematic"
"github.com/siderolabs/talos/pkg/machinery/imager/quirks"
"go.uber.org/zap"

"github.com/siderolabs/omni/client/pkg/omni/resources"
"github.com/siderolabs/omni/client/pkg/omni/resources/omni"
"github.com/siderolabs/omni/internal/backend/imagefactory"
"github.com/siderolabs/omni/internal/backend/runtime/omni/controllers/helpers"
"github.com/siderolabs/omni/internal/backend/runtime/omni/controllers/omni/internal/mappers"
)

const schematicConfigurationControllerName = "SchematicConfigurationController"
Expand Down Expand Up @@ -56,6 +59,9 @@ func NewSchematicConfigurationController(imageFactoryClient *imagefactory.Client
qtransform.WithExtraMappedInput(
qtransform.MapperSameID[*omni.MachineStatus, *omni.ClusterMachine](),
),
qtransform.WithExtraMappedInput(
mappers.MapClusterResourceToLabeledResources[*omni.Cluster, *omni.ClusterMachine](),
),
qtransform.WithExtraMappedInput(
qtransform.MapperNone[*omni.Schematic](),
),
Expand All @@ -82,10 +88,19 @@ func (helper *schematicConfigurationHelper) reconcile(
return err
}

var extensionsList []string

if extensions != nil {
if err = updateFinalizers(ctx, r, extensions); err != nil {
return err
}

extensionsList = extensions.TypedSpec().Value.Extensions
}

clusterName, ok := clusterMachine.Metadata().Labels().Get(omni.LabelCluster)
if !ok {
return errors.New("failed to determine cluster")
}

var (
Expand All @@ -110,7 +125,14 @@ func (helper *schematicConfigurationHelper) reconcile(
currentSchematic = ms.TypedSpec().Value.Schematic.Id
}

if extensions == nil || extensions.Metadata().Phase() == resource.PhaseTearingDown {
cluster, err := safe.ReaderGetByID[*omni.Cluster](ctx, r, clusterName)
if err != nil {
return err
}

schematicConfiguration.TypedSpec().Value.TalosVersion = cluster.TypedSpec().Value.TalosVersion

if !shouldGenerateSchematicID(cluster, extensions, ms, overlay) {
// if extensions config is not set, fall back to the initial schematic id and exit
id := initialSchematic

Expand All @@ -127,7 +149,7 @@ func (helper *schematicConfigurationHelper) reconcile(
config := schematic.Schematic{
Customization: schematic.Customization{
SystemExtensions: schematic.SystemExtensions{
OfficialExtensions: extensions.TypedSpec().Value.Extensions,
OfficialExtensions: extensionsList,
},
},
Overlay: overlay,
Expand All @@ -149,11 +171,27 @@ func (helper *schematicConfigurationHelper) reconcile(
}

schematicConfiguration.TypedSpec().Value.SchematicId = id

helpers.CopyLabels(clusterMachine, schematicConfiguration, omni.LabelCluster)

return nil
}

func shouldGenerateSchematicID(cluster *omni.Cluster, extensions *omni.MachineExtensions, machineStatus *omni.MachineStatus, overlay schematic.Overlay) bool {
// migrating SBC running Talos < 1.7.0, overlay was detected, but is not applied yet, should generate a schematic
if overlay.Name != "" &&
!quirks.New(machineStatus.TypedSpec().Value.InitialTalosVersion).SupportsOverlay() &&
quirks.New(cluster.TypedSpec().Value.TalosVersion).SupportsOverlay() {
return true
}

if extensions == nil || extensions.Metadata().Phase() == resource.PhaseTearingDown {
return false
}

return true
}

func getOverlay(ms *omni.MachineStatus) schematic.Overlay {
if ms.TypedSpec().Value.Schematic.Overlay == nil {
return schematic.Overlay{}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ func (suite *SchematicConfigurationSuite) TestReconcile() {
clusterName := "cluster"
machineSet := "machineset"

cluster := omni.NewCluster(resources.DefaultNamespace, clusterName)
cluster.TypedSpec().Value.TalosVersion = "1.7.0"

suite.Require().NoError(suite.state.Create(ctx, cluster))

machineStatus := omni.NewMachineStatus(resources.DefaultNamespace, machineName)
machineStatus.TypedSpec().Value.Schematic = &specs.MachineStatusSpec_Schematic{
InitialSchematic: initialSchematic,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ func reconcileTalosUpdateStatus(ctx context.Context, r controller.ReaderWriter,

var schematicID string

schematicID, err = getDesiredSchematic(ctx, r, machine)
schematicID, err = getDesiredSchematic(ctx, r, machine, talosVersion)
if err != nil {
return err
}
Expand Down Expand Up @@ -477,13 +477,17 @@ func cleanupResources(ctx context.Context, r controller.ReaderWriter, clusterMac
})
}

func getDesiredSchematic(ctx context.Context, r controller.ReaderWriter, machine *omni.ClusterMachine) (string, error) {
func getDesiredSchematic(ctx context.Context, r controller.ReaderWriter, machine *omni.ClusterMachine, talosVersion string) (string, error) {
schematic, err := safe.ReaderGetByID[*omni.SchematicConfiguration](ctx, r, machine.Metadata().ID())
if err != nil && !state.IsNotFoundError(err) {
return "", err
}

if schematic != nil {
if schematic.TypedSpec().Value.TalosVersion != talosVersion {
return "", xerrors.NewTaggedf[qtransform.SkipReconcileTag]("the schematic is not in sync with Talos version yet")
}

return schematic.TypedSpec().Value.SchematicId, nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ func (suite *TalosUpgradeStatusSuite) TestReconcile() {

schematicConfig := omni.NewSchematicConfiguration(resources.DefaultNamespace, machines[1].Metadata().ID())
schematicConfig.TypedSpec().Value.SchematicId = "abcd"
schematicConfig.TypedSpec().Value.TalosVersion = "1.3.6"

suite.Require().NoError(suite.state.Create(suite.ctx, schematicConfig))

Expand Down

0 comments on commit 6fff261

Please sign in to comment.