Skip to content

Commit

Permalink
requeue during machine preflight
Browse files Browse the repository at this point in the history
  • Loading branch information
Brian Mendoza committed Apr 8, 2024
1 parent 645b584 commit e450c57
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 71 deletions.
8 changes: 7 additions & 1 deletion cloud/scope/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ import (
"github.com/linode/cluster-api-provider-linode/version"
)

func CreateLinodeClient(apiKey string) (*linodego.Client, error) {
type ClientOpt func(*linodego.Client)

func CreateLinodeClient(apiKey string, opts ...ClientOpt) (*linodego.Client, error) {
if apiKey == "" {
return nil, errors.New("missing Linode API key")
}
Expand All @@ -28,6 +30,10 @@ func CreateLinodeClient(apiKey string) (*linodego.Client, error) {
}
linodeClient := linodego.NewClient(oauth2Client)

for _, opt := range opts {
opt(&linodeClient)
}

linodeClient.SetUserAgent(fmt.Sprintf("CAPL/%s", version.GetVersion()))

return &linodeClient, nil
Expand Down
5 changes: 4 additions & 1 deletion cloud/scope/machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"
"fmt"

"github.com/linode/linodego"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
Expand Down Expand Up @@ -82,7 +83,9 @@ func NewMachineScope(ctx context.Context, apiKey string, params MachineScopePara
}
apiKey = string(data)
}
linodeClient, err := CreateLinodeClient(apiKey)
linodeClient, err := CreateLinodeClient(apiKey, func(linodeClient *linodego.Client) {
linodeClient.SetRetryCount(0)
})
if err != nil {
return nil, fmt.Errorf("failed to create linode client: %w", err)
}
Expand Down
188 changes: 119 additions & 69 deletions controller/linodemachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,18 @@ import (
"github.com/linode/cluster-api-provider-linode/util/reconciler"
)

// default etcd Disk size in MB
const (
// default etcd disk size in MB
defaultEtcdDiskSize = 10240
defaultResizeTimeoutSeconds = 30
defaultResizeTimeoutSeconds = 5

// conditions for preflight instance creation
ConditionPreflightRootDiskResizing clusterv1.ConditionType = "PreflightRootDiskResizing"
ConditionPreflightRootDiskResized clusterv1.ConditionType = "PreflightRootDiskResized"
ConditionPreflightEtcdDiskCreated clusterv1.ConditionType = "PreflightEtcdDiskCreated"
ConditionPreflightConfigured clusterv1.ConditionType = "PreflightConfigured"
ConditionPreflightBootStarted clusterv1.ConditionType = "PreflightBootStarted"
ConditionPreflightReady clusterv1.ConditionType = "PreflightReady"
)

var skippedMachinePhases = map[string]bool{
Expand All @@ -61,6 +69,7 @@ var skippedMachinePhases = map[string]bool{
string(clusterv1.MachinePhaseUnknown): true,
}

// statuses to keep requeueing on while an instance is booting
var requeueInstanceStatuses = map[linodego.InstanceStatus]bool{
linodego.InstanceOffline: true,
linodego.InstanceBooting: true,
Expand Down Expand Up @@ -204,16 +213,15 @@ func (r *LinodeMachineReconciler) reconcile(
return
}

var linodeInstance *linodego.Instance
defer func() {
machineScope.LinodeMachine.Status.InstanceState = util.Pointer(linodego.InstanceOffline)
if linodeInstance != nil {
machineScope.LinodeMachine.Status.InstanceState = &linodeInstance.Status
}
}()

// Update
if machineScope.LinodeMachine.Spec.InstanceID != nil {
if machineScope.LinodeMachine.Status.InstanceState != nil {
var linodeInstance *linodego.Instance
defer func() {
if linodeInstance != nil {
machineScope.LinodeMachine.Status.InstanceState = &linodeInstance.Status

Check warning on line 221 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L217-L221

Added lines #L217 - L221 were not covered by tests
}
}()

failureReason = cerrs.UpdateMachineError

logger = logger.WithValues("ID", *machineScope.LinodeMachine.Spec.InstanceID)
Expand All @@ -232,7 +240,7 @@ func (r *LinodeMachineReconciler) reconcile(

return
}
linodeInstance, err = r.reconcileCreate(ctx, logger, machineScope)
res, err = r.reconcileCreate(ctx, logger, machineScope)

Check warning on line 243 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L243

Added line #L243 was not covered by tests

return
}
Expand All @@ -241,7 +249,7 @@ func (r *LinodeMachineReconciler) reconcileCreate(
ctx context.Context,
logger logr.Logger,
machineScope *scope.MachineScope,
) (*linodego.Instance, error) {
) (ctrl.Result, error) {

Check warning on line 252 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L252

Added line #L252 was not covered by tests
logger.Info("creating machine")

tags := []string{machineScope.LinodeCluster.Name}
Expand All @@ -253,13 +261,14 @@ func (r *LinodeMachineReconciler) reconcileCreate(
}
filter, err := listFilter.String()
if err != nil {
return nil, err
return ctrl.Result{}, err

Check warning on line 264 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L264

Added line #L264 was not covered by tests
}
linodeInstances, err := machineScope.LinodeClient.ListInstances(ctx, linodego.NewListOptions(1, filter))
if err != nil {
logger.Error(err, "Failed to list Linode machine instances")

return nil, err
// TODO: What terminal errors should we not requeue for, and just return an error?
return ctrl.Result{RequeueAfter: reconciler.DefaultMachineControllerWaitForRunningDelay}, nil

Check warning on line 271 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L271

Added line #L271 was not covered by tests
}

var linodeInstance *linodego.Instance
Expand All @@ -274,43 +283,66 @@ func (r *LinodeMachineReconciler) reconcileCreate(
if err != nil {
logger.Error(err, "Failed to create Linode machine InstanceCreateOptions")

return nil, err
return ctrl.Result{}, err

Check warning on line 286 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L286

Added line #L286 was not covered by tests
}

linodeInstance, err = machineScope.LinodeClient.CreateInstance(ctx, *createOpts)
if err != nil || linodeInstance == nil {
if err != nil {

Check warning on line 290 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L290

Added line #L290 was not covered by tests
logger.Error(err, "Failed to create Linode machine instance")

return nil, err
// TODO: What terminal errors should we not requeue for, and just return an error?
return ctrl.Result{RequeueAfter: reconciler.DefaultMachineControllerWaitForRunningDelay}, nil

Check warning on line 294 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L294

Added line #L294 was not covered by tests
}

machineScope.LinodeMachine.Spec.InstanceID = &linodeInstance.ID

Check warning on line 297 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L297

Added line #L297 was not covered by tests

default:
err = errors.New("multiple instances")
logger.Error(err, "multiple instances found", "tags", tags)

Check warning on line 301 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L299-L301

Added lines #L299 - L301 were not covered by tests

return ctrl.Result{}, err

Check warning on line 303 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L303

Added line #L303 was not covered by tests
}

if !reconciler.ConditionTrue(machineScope.LinodeMachine, ConditionPreflightConfigured) {

Check warning on line 306 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L306

Added line #L306 was not covered by tests
if err = r.configureDisksControlPlane(ctx, logger, machineScope, linodeInstance.ID); err != nil {
logger.Error(err, "Failed to configure instance disks")

return nil, err
// TODO: What terminal errors should we not requeue for, and just return an error?
return ctrl.Result{RequeueAfter: reconciler.DefaultMachineControllerWaitForRunningDelay}, nil

Check warning on line 311 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L311

Added line #L311 was not covered by tests
}

conditions.MarkTrue(machineScope.LinodeMachine, ConditionPreflightConfigured)

Check warning on line 314 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L314

Added line #L314 was not covered by tests
}

if !reconciler.ConditionTrue(machineScope.LinodeMachine, ConditionPreflightBootStarted) {

Check warning on line 317 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L317

Added line #L317 was not covered by tests
if err = machineScope.LinodeClient.BootInstance(ctx, linodeInstance.ID, 0); err != nil {
logger.Error(err, "Failed to boot instance")

return nil, err
// TODO: What terminal errors should we not requeue for, and just return an error?
return ctrl.Result{RequeueAfter: reconciler.DefaultMachineControllerWaitForRunningDelay}, nil

Check warning on line 322 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L322

Added line #L322 was not covered by tests
}
default:
err = errors.New("multiple instances")
logger.Error(err, "multiple instances found", "tags", tags)

return nil, err
conditions.MarkTrue(machineScope.LinodeMachine, ConditionPreflightBootStarted)

Check warning on line 325 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L325

Added line #L325 was not covered by tests
}

machineScope.LinodeMachine.Status.Ready = true
machineScope.LinodeMachine.Spec.InstanceID = &linodeInstance.ID
machineScope.LinodeMachine.Spec.ProviderID = util.Pointer(fmt.Sprintf("linode://%d", linodeInstance.ID))
machineScope.LinodeMachine.Status.Addresses = buildInstanceAddrs(linodeInstance)
if !reconciler.ConditionTrue(machineScope.LinodeMachine, ConditionPreflightReady) {
if err = services.AddNodeToNB(ctx, logger, machineScope); err != nil {
logger.Error(err, "Failed to add instance to Node Balancer backend")

Check warning on line 330 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L328-L330

Added lines #L328 - L330 were not covered by tests

if err = services.AddNodeToNB(ctx, logger, machineScope); err != nil {
logger.Error(err, "Failed to add instance to Node Balancer backend")
// TODO: What terminal errors should we not requeue for, and just return an error?
return ctrl.Result{RequeueAfter: reconciler.DefaultMachineControllerWaitForRunningDelay}, nil

Check warning on line 333 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L333

Added line #L333 was not covered by tests
}

return linodeInstance, err
conditions.MarkTrue(machineScope.LinodeMachine, ConditionPreflightReady)

Check warning on line 336 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L336

Added line #L336 was not covered by tests
}

return linodeInstance, nil
machineScope.LinodeMachine.Spec.ProviderID = util.Pointer(fmt.Sprintf("linode://%d", linodeInstance.ID))
machineScope.LinodeMachine.Status.Addresses = buildInstanceAddrs(linodeInstance)

Check warning on line 340 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L339-L340

Added lines #L339 - L340 were not covered by tests

// Set the instance state to signal preflight process is done
machineScope.LinodeMachine.Status.InstanceState = util.Pointer(linodego.InstanceOffline)

Check warning on line 343 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L343

Added line #L343 was not covered by tests

return ctrl.Result{}, nil

Check warning on line 345 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L345

Added line #L345 was not covered by tests
}

func (r *LinodeMachineReconciler) configureDisksControlPlane(
Expand All @@ -322,51 +354,69 @@ func (r *LinodeMachineReconciler) configureDisksControlPlane(
if !kutil.IsControlPlaneMachine(machineScope.Machine) {
return nil
}
// get the default instance config
configs, err := machineScope.LinodeClient.ListInstanceConfigs(ctx, linodeInstanceID, &linodego.ListOptions{})
if err != nil || len(configs) == 0 {
logger.Error(err, "Failed to list instance configs")

return err
}
instanceConfig := &configs[0]
if !reconciler.ConditionTrue(machineScope.LinodeMachine, ConditionPreflightRootDiskResized) {

Check failure on line 358 in controller/linodemachine_controller.go

View workflow job for this annotation

GitHub Actions / go-analyse

`if !reconciler.ConditionTrue(machineScope.LinodeMachine, ConditionPreflightRootDiskResized)` has complex nested blocks (complexity: 8) (nestif)

Check warning on line 358 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L358

Added line #L358 was not covered by tests
// get the default instance config
configs, err := machineScope.LinodeClient.ListInstanceConfigs(ctx, linodeInstanceID, &linodego.ListOptions{})
if err != nil || len(configs) == 0 {
logger.Error(err, "Failed to list instance configs")

Check warning on line 362 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L360-L362

Added lines #L360 - L362 were not covered by tests

// carve out space for the etcd disk
rootDiskID := instanceConfig.Devices.SDA.DiskID
rootDisk, err := machineScope.LinodeClient.GetInstanceDisk(ctx, linodeInstanceID, rootDiskID)
if err != nil {
logger.Error(err, "Failed to get root disk for instance")
return err

Check warning on line 364 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L364

Added line #L364 was not covered by tests
}
instanceConfig := configs[0]

Check warning on line 366 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L366

Added line #L366 was not covered by tests

return err
}
diskSize := rootDisk.Size - defaultEtcdDiskSize
if err = machineScope.LinodeClient.ResizeInstanceDisk(ctx, linodeInstanceID, rootDiskID, diskSize); err != nil {
logger.Error(err, "Failed to resize root disk")
if instanceConfig.Devices.SDA == nil {
return errors.New("root disk not yet ready")

Check warning on line 369 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L368-L369

Added lines #L368 - L369 were not covered by tests
}

return err
}
// wait for the disk to resize
_, err = machineScope.LinodeClient.WaitForInstanceDiskStatus(ctx, linodeInstanceID, rootDiskID, linodego.DiskReady, defaultResizeTimeoutSeconds)
if err != nil {
logger.Error(err, fmt.Sprintf("Failed to resize root disk within resize timeout of %d seconds", defaultResizeTimeoutSeconds))
rootDiskID := instanceConfig.Devices.SDA.DiskID

Check warning on line 372 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L372

Added line #L372 was not covered by tests

return err
// carve out space for the etcd disk
if !reconciler.ConditionTrue(machineScope.LinodeMachine, ConditionPreflightRootDiskResizing) {
rootDisk, err := machineScope.LinodeClient.GetInstanceDisk(ctx, linodeInstanceID, rootDiskID)
if err != nil {
logger.Error(err, "Failed to get root disk for instance")

Check warning on line 378 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L375-L378

Added lines #L375 - L378 were not covered by tests

return err

Check warning on line 380 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L380

Added line #L380 was not covered by tests
}
diskSize := rootDisk.Size - defaultEtcdDiskSize
if err = machineScope.LinodeClient.ResizeInstanceDisk(ctx, linodeInstanceID, rootDiskID, diskSize); err != nil {
logger.Error(err, "Failed to resize root disk")

Check warning on line 384 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L382-L384

Added lines #L382 - L384 were not covered by tests

return err

Check warning on line 386 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L386

Added line #L386 was not covered by tests
}

conditions.MarkTrue(machineScope.LinodeMachine, ConditionPreflightRootDiskResizing)

Check warning on line 389 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L389

Added line #L389 was not covered by tests
}

// wait for the disk to resize
if _, err := machineScope.LinodeClient.WaitForInstanceDiskStatus(ctx, linodeInstanceID, rootDiskID, linodego.DiskReady, defaultResizeTimeoutSeconds); err != nil {
logger.Error(err, fmt.Sprintf("Failed to resize root disk within resize timeout of %d seconds", defaultResizeTimeoutSeconds))

Check warning on line 394 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L393-L394

Added lines #L393 - L394 were not covered by tests

return err

Check warning on line 396 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L396

Added line #L396 was not covered by tests
}

conditions.MarkTrue(machineScope.LinodeMachine, ConditionPreflightRootDiskResized)
conditions.Delete(machineScope.LinodeMachine, ConditionPreflightRootDiskResizing)

Check warning on line 400 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L399-L400

Added lines #L399 - L400 were not covered by tests
}

// create the etcd disk
_, err = machineScope.LinodeClient.CreateInstanceDisk(
ctx,
linodeInstanceID,
linodego.InstanceDiskCreateOptions{
Label: "etcd-data",
Size: defaultEtcdDiskSize,
Filesystem: string(linodego.FilesystemExt4),
},
)
if err != nil {
logger.Error(err, "Failed to create etcd disk")
if !reconciler.ConditionTrue(machineScope.LinodeMachine, ConditionPreflightEtcdDiskCreated) {

Check warning on line 403 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L403

Added line #L403 was not covered by tests
// create the etcd disk
if _, err := machineScope.LinodeClient.CreateInstanceDisk(
ctx,
linodeInstanceID,
linodego.InstanceDiskCreateOptions{
Label: "etcd-data",
Size: defaultEtcdDiskSize,
Filesystem: string(linodego.FilesystemExt4),
},
); err != nil {
logger.Error(err, "Failed to create etcd disk")

Check warning on line 414 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L405-L414

Added lines #L405 - L414 were not covered by tests

return err
return err

Check warning on line 416 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L416

Added line #L416 was not covered by tests
}

conditions.MarkTrue(machineScope.LinodeMachine, ConditionPreflightEtcdDiskCreated)

Check warning on line 419 in controller/linodemachine_controller.go

View check run for this annotation

Codecov / codecov/patch

controller/linodemachine_controller.go#L419

Added line #L419 was not covered by tests
}

return nil
Expand Down
20 changes: 20 additions & 0 deletions util/reconciler/conditions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package reconciler

import (
corev1 "k8s.io/api/core/v1"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/util/conditions"
)

func ConditionTrue(from conditions.Getter, typ clusterv1.ConditionType) bool {
return HasConditionStatus(from, typ, "True")

Check warning on line 10 in util/reconciler/conditions.go

View check run for this annotation

Codecov / codecov/patch

util/reconciler/conditions.go#L9-L10

Added lines #L9 - L10 were not covered by tests
}

func HasConditionStatus(from conditions.Getter, typ clusterv1.ConditionType, status corev1.ConditionStatus) bool {
cond := conditions.Get(from, typ)
if cond == nil {
return false

Check warning on line 16 in util/reconciler/conditions.go

View check run for this annotation

Codecov / codecov/patch

util/reconciler/conditions.go#L13-L16

Added lines #L13 - L16 were not covered by tests
}

return cond.Status == status

Check warning on line 19 in util/reconciler/conditions.go

View check run for this annotation

Codecov / codecov/patch

util/reconciler/conditions.go#L19

Added line #L19 was not covered by tests
}

0 comments on commit e450c57

Please sign in to comment.