Skip to content

Commit

Permalink
feat: don't allow downgrades of the machines when adding to a cluster
Browse files Browse the repository at this point in the history
Make `MachineClass` selector skip them.
Disable them in the UI.
Do not allow adding them as the resources, validate on the backend
level.
Fixes: #46

Signed-off-by: Artem Chernyshev <[email protected]>
  • Loading branch information
Unix4ever committed Apr 12, 2024
1 parent 2e015a9 commit 592f916
Show file tree
Hide file tree
Showing 7 changed files with 179 additions and 48 deletions.
2 changes: 1 addition & 1 deletion frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@
"postcss": "^8.4.14",
"tailwindcss": "^3.1.4",
"ts-jest": "^27.1.5",
"ts-loader": "^9.3.1",
"typescript": "^4.7.4",
"ts-loader": "^9.3.1",
"vue-cli-plugin-tailwind": "^3.0.0"
}
}
5 changes: 1 addition & 4 deletions frontend/src/views/cluster/Config/Patches.vue
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,7 @@ type Props = {
currentCluster?: ResourceTyped<ClusterSpec>,
};

const props = defineProps<Props>();

console.log("PATCHES currentCluster", props.currentCluster);

defineProps<Props>();

const updateSelectors = () => {
patchListSelectors.value = [`!${LabelSystemPatch}`];
Expand Down
10 changes: 9 additions & 1 deletion frontend/src/views/omni/Clusters/Management/ClusterCreate.vue
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ included in the LICENSE file.
<template #default="{ items, searchQuery }">
<cluster-machine-item
v-for="item in items"
:talos-version-not-allowed="!canAddMachine(item)"
:key="itemID(item)"
:reset="reset"
:item="item"
Expand Down Expand Up @@ -140,7 +141,7 @@ import {
PatchBaseWeightMachineSet,
PatchBaseWeightCluster,
} from "@/api/resources";
import { TalosVersionSpec } from "@/api/omni/specs/omni.pb";
import { MachineStatusSpec, TalosVersionSpec } from "@/api/omni/specs/omni.pb";
import WatchResource, { itemID } from "@/api/watch";
import { showError, showSuccess } from "@/notification";
import { Resource, ResourceTyped } from "@/api/grpc";
Expand Down Expand Up @@ -261,6 +262,13 @@ const createCluster = async () => {
}
};

const canAddMachine = (machine: Resource<MachineStatusSpec>) => {
const clusterVersion = semver.parse(state.value.cluster.talosVersion);
const machineVersion = semver.parse(machine.spec.talos_version);

return machineVersion.major <= clusterVersion.major && machineVersion.minor <= clusterVersion.minor;
}

const createCluster_ = async (untaint: boolean) => {
if (typeof state.value.controlPlanesCount === 'number' && (state.value.controlPlanesCount - 1) % 2 !== 0) {
showError(
Expand Down
109 changes: 70 additions & 39 deletions frontend/src/views/omni/Clusters/Management/ClusterMachineItem.vue
Original file line number Diff line number Diff line change
Expand Up @@ -7,44 +7,44 @@ included in the LICENSE file.
<template>
<t-list-item>
<template #default>
<div class="flex items-center text-naturals-N13">
<div class="truncate flex-1 flex items-center gap-2">
<span class="font-bold pr-2">
<word-highlighter
:query="(searchQuery ?? '')"
:textToHighlight="item?.spec?.network?.hostname ?? item?.metadata?.id"
split-by-space
highlightClass="bg-naturals-N14"/>
</span>
<machine-item-labels :resource="item" :add-label-func="addMachineLabels" :remove-label-func="removeMachineLabels" @filter-label="e => $emit('filterLabel', e)"/>
</div>
<div class="flex justify-end flex-initial w-128 gap-4 items-center">
<template v-if="machineSetIndex !== undefined">
<div v-if="systemDiskPath" class="pr-8 pl-3 py-1.5 text-naturals-N11 rounded border border-naturals-N6 cursor-not-allowed">
Install Disk: {{ systemDiskPath }}
</div>
<div v-else>
<t-select-list
class="h-7"
title="Install Disk"
@checkedValue="setInstallDisk"
:values="disks"
:defaultValue="disks[0]"/>
</div>
</template>
<div>
<machine-set-picker :options="options" :machine-set-index="machineSetIndex" @update:machineSetIndex="value => machineSetIndex = value"/>
<div class="flex items-center text-naturals-N13">
<div class="truncate flex-1 flex items-center gap-2">
<span class="font-bold pr-2">
<word-highlighter
:query="(searchQuery ?? '')"
:textToHighlight="item?.spec?.network?.hostname ?? item?.metadata?.id"
split-by-space
highlightClass="bg-naturals-N14"/>
</span>
<machine-item-labels :resource="item" :add-label-func="addMachineLabels" :remove-label-func="removeMachineLabels" @filter-label="e => $emit('filterLabel', e)"/>
</div>
<div class="flex justify-end flex-initial w-128 gap-4 items-center">
<template v-if="machineSetIndex !== undefined">
<div v-if="systemDiskPath" class="pr-8 pl-3 py-1.5 text-naturals-N11 rounded border border-naturals-N6 cursor-not-allowed">
Install Disk: {{ systemDiskPath }}
</div>
<div class="flex items-center">
<icon-button
class="text-naturals-N14 my-auto"
@click="openPatchConfig"
:id="machineSetIndex !== undefined ? options?.[machineSetIndex]?.id : undefined"
:disabled="machineSetIndex === undefined || options?.[machineSetIndex]?.disabled"
:icon="machineSetNode.patches[machinePatchID] && machineSetIndex ? 'settings-toggle': 'settings'"/>
<div v-else>
<t-select-list
class="h-7"
title="Install Disk"
@checkedValue="setInstallDisk"
:values="disks"
:defaultValue="disks[0]"/>
</div>
</template>
<div>
<machine-set-picker :options="options" :machine-set-index="machineSetIndex" @update:machineSetIndex="value => machineSetIndex = value"/>
</div>
<div class="flex items-center">
<icon-button
class="text-naturals-N14 my-auto"
@click="openPatchConfig"
:id="machineSetIndex !== undefined ? options?.[machineSetIndex]?.id : undefined"
:disabled="machineSetIndex === undefined || options?.[machineSetIndex]?.disabled"
:icon="machineSetNode.patches[machinePatchID] && machineSetIndex ? 'settings-toggle': 'settings'"/>
</div>
</div>
</div>
</template>
<template #details>
<div class="pl-6 grid grid-cols-5">
Expand Down Expand Up @@ -121,10 +121,11 @@ defineEmits(['filterLabel']);
const props = defineProps<{
item: ResourceTyped<MachineStatusSpec & SiderolinkSpec & MachineConfigGenOptionsSpec>,
reset?: number,
searchQuery?: string
searchQuery?: string,
talosVersionNotAllowed: boolean
}>();

const { item, reset } = toRefs(props);
const { item, reset, talosVersionNotAllowed } = toRefs(props);

const machineSetNode = ref<MachineSetNode>({
patches: {},
Expand All @@ -133,7 +134,7 @@ const machineSetIndex = ref<number | undefined>();
const systemDiskPath: Ref<string | undefined> = ref();
const disks: Ref<string[]> = ref([]);

const computeDisks = () => {
const computeState = () => {
const bds: MachineStatusSpecHardwareStatusBlockDevice[] = item?.value?.spec?.hardware?.blockdevices || [];
const diskPaths: string[] = [];

Expand All @@ -147,11 +148,36 @@ const computeDisks = () => {
}

disks.value = diskPaths;

computeMachineAssignment()
}

computeDisks();
const computeMachineAssignment = () => {
for (var i = 0; i < state.value.machineSets.length; i++) {
const machineSet = state.value.machineSets[i];

watch(item, computeDisks);
for(const id in machineSet.machines) {
if (item.value.metadata.id === id) {
machineSetIndex.value = i;

return;
}
}
}

machineSetIndex.value = undefined;
}

computeState();

watch(item, computeState);
watch(state.value, computeMachineAssignment);

watch(talosVersionNotAllowed, (value: boolean) => {
if (value) {
machineSetIndex.value = undefined;
}
});

watch(machineSetIndex, (val?: number, old?: number) => {
if (val !== undefined) {
Expand Down Expand Up @@ -214,6 +240,11 @@ const options: Ref<PickerOption[]> = computed(() => {
tooltip = `The machine class ${ms.id} is using machine class so no manual allocation is possible`
}

if (talosVersionNotAllowed.value) {
disabled = true;
tooltip = `The machine has newer Talos version installed: downgrade is not allowed. Upgrade the machine or change Talos cluster version`
}

return {
id: ms.id,
disabled: disabled,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ import (
"fmt"
"math"
"slices"
"strings"

"github.com/blang/semver/v4"
"github.com/cosi-project/runtime/pkg/controller"
"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/safe"
Expand Down Expand Up @@ -42,6 +44,11 @@ func (ctrl *MachineSetNodeController) Inputs() []controller.Input {
Type: omni.MachineSetType,
Kind: controller.InputWeak,
},
{
Namespace: resources.DefaultNamespace,
Type: omni.ClusterType,
Kind: controller.InputWeak,
},
{
Namespace: resources.DefaultNamespace,
Type: omni.MachineType,
Expand Down Expand Up @@ -222,6 +229,21 @@ func (ctrl *MachineSetNodeController) createNodes(

created := 0

clusterName, ok := machineSet.Metadata().Labels().Get(omni.LabelCluster)
if !ok {
return fmt.Errorf("failed to get cluster name of the machine set %q", machineSet.Metadata().ID())
}

cluster, err := safe.ReaderGetByID[*omni.Cluster](ctx, r, clusterName)
if err != nil {
return err
}

clusterVersion, err := semver.Parse(cluster.TypedSpec().Value.TalosVersion)
if err != nil {
return fmt.Errorf("failed to parse talos version of the cluster %w", err)
}

for _, selector := range selectors {
selector.Terms = append(selector.Terms, resource.LabelTerm{
Key: omni.MachineStatusLabelAvailable,
Expand All @@ -231,7 +253,22 @@ func (ctrl *MachineSetNodeController) createNodes(
availableMachineClassMachines := allMachineStatuses.FilterLabelQuery(resource.RawLabelQuery(selector))

for i := 0; i < availableMachineClassMachines.Len(); i++ {
id := availableMachineClassMachines.Get(i).Metadata().ID()
machine := availableMachineClassMachines.Get(i)

var machineVersion semver.Version

machineVersion, err = semver.Parse(strings.TrimPrefix(machine.TypedSpec().Value.TalosVersion, "v"))
if err != nil {
continue
}

// do not try to allocate the machine if it's Talos major or minor version is greater than cluster Talos version
// this way we don't allow downgrading the machines while allocating them
if machineVersion.Major > clusterVersion.Major || machineVersion.Minor > clusterVersion.Minor {
continue
}

id := machine.Metadata().ID()

if err := r.Create(ctx, omni.NewMachineSetNode(resources.DefaultNamespace, id, machineSet)); err != nil {
if state.IsConflictError(err) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ func (suite *MachineSetNodeSuite) createMachines(labels ...map[string]string) []
}
})

machineStatus.TypedSpec().Value.TalosVersion = "v1.6.0"

res = append(res, machineStatus)

suite.Require().NoError(suite.state.Create(suite.ctx, machineStatus))
Expand Down Expand Up @@ -109,7 +111,12 @@ func (suite *MachineSetNodeSuite) TestReconcile() {
)
}

machineSet.Metadata().Labels().Set(omni.LabelCluster, "cluster1")
cluster := omni.NewCluster(resources.DefaultNamespace, "cluster1")
cluster.TypedSpec().Value.TalosVersion = "1.6.0"

suite.Require().NoError(suite.state.Create(suite.ctx, cluster))

machineSet.Metadata().Labels().Set(omni.LabelCluster, cluster.Metadata().ID())
machineSet.Metadata().Labels().Set(omni.LabelWorkerRole, "")

machineClass := newMachineClass(fmt.Sprintf("%s==amd64", omni.MachineStatusLabelArch))
Expand Down
53 changes: 52 additions & 1 deletion internal/backend/runtime/omni/state_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ func validateBootstrapSpec(ctx context.Context, st state.State, etcdBackupStoreF

// machineSetNodeValidationOptions returns the validation options for the machine set node resource.
//
//nolint:gocognit
//nolint:gocognit,gocyclo,cyclop
func machineSetNodeValidationOptions(st state.State) []validated.StateOption {
getMachineSet := func(ctx context.Context, res *omni.MachineSetNode) (*omni.MachineSet, error) {
machineSetName, ok := res.Metadata().Labels().Get(omni.LabelMachineSet)
Expand All @@ -421,6 +421,53 @@ func machineSetNodeValidationOptions(st state.State) []validated.StateOption {
return machineSet, nil
}

validateTalosVersion := func(ctx context.Context, res *omni.MachineSetNode) error {
clusterName, ok := res.Metadata().Labels().Get(omni.LabelCluster)
if !ok {
return nil
}

cluster, err := safe.ReaderGetByID[*omni.Cluster](ctx, st, clusterName)
if err != nil {
if state.IsNotFoundError(err) {
return nil
}

return err
}

machineStatus, err := safe.ReaderGetByID[*omni.MachineStatus](ctx, st, res.Metadata().ID())
if err != nil {
if state.IsNotFoundError(err) {
return nil
}

return err
}

machineTalosVersion, err := semver.Parse(strings.TrimLeft(machineStatus.TypedSpec().Value.TalosVersion, "v"))
if err != nil {
// ignore version check if it's not possible to parse machine Talos version
return nil //nolint:nilerr
}

clusterTalosVersion, err := semver.Parse(cluster.TypedSpec().Value.TalosVersion)
if err != nil {
return err
}

if machineTalosVersion.Major > clusterTalosVersion.Major || machineTalosVersion.Minor > clusterTalosVersion.Minor {
return fmt.Errorf(
"cannot add machine set node to the cluster %s as it will trigger Talos downgrade on the node (%s -> %s)",
clusterName,
machineTalosVersion.String(),
clusterTalosVersion.String(),
)
}

return nil
}

return []validated.StateOption{
validated.WithCreateValidations(validated.NewCreateValidationForType(func(ctx context.Context, res *omni.MachineSetNode, _ ...state.CreateOption) error {
machineSet, err := getMachineSet(ctx, res)
Expand All @@ -436,6 +483,10 @@ func machineSetNodeValidationOptions(st state.State) []validated.StateOption {
return fmt.Errorf("adding machine set node to the machine set %q is not allowed: the machine set is using machine classes", machineSet.Metadata().ID())
}

if err = validateTalosVersion(ctx, res); err != nil {
return err
}

return validateNotControlplane(machineSet, res)
})),
validated.WithUpdateValidations(validated.NewUpdateValidationForType(func(ctx context.Context, res *omni.MachineSetNode, newRes *omni.MachineSetNode, _ ...state.UpdateOption) error {
Expand Down

0 comments on commit 592f916

Please sign in to comment.