From 6d1df753c96fb33cbc986b5ab923bf0b231538ad Mon Sep 17 00:00:00 2001 From: Angelos Kolaitis Date: Thu, 29 Feb 2024 16:53:42 +0200 Subject: [PATCH 1/2] ensure we use InternalIP or ExternalIP machine addresses --- controllers/cloudinit/controlplane_join.go | 10 ++-- .../cloudinit/controlplane_join_test.go | 5 +- .../cloudinit/scripts/20-microk8s-join.sh | 32 ++++++++----- controllers/cloudinit/worker_join.go | 10 ++-- controllers/cloudinit/worker_join_test.go | 3 +- controllers/microk8sconfig_controller.go | 47 ++++++++++++------- 6 files changed, 63 insertions(+), 44 deletions(-) diff --git a/controllers/cloudinit/controlplane_join.go b/controllers/cloudinit/controlplane_join.go index 59fb3f4..4530689 100644 --- a/controllers/cloudinit/controlplane_join.go +++ b/controllers/cloudinit/controlplane_join.go @@ -48,7 +48,7 @@ type ControlPlaneJoinInput struct { // IPinIP defines whether Calico will use IPinIP mode for cluster networking. IPinIP bool // JoinNodeIPs is the IP addresses of the nodes to join. - JoinNodeIPs [2]string + JoinNodeIPs []string // Confinement specifies a classic or strict deployment of microk8s snap. Confinement string // RiskLevel specifies the risk level (strict, candidate, beta, edge) for the snap channels. @@ -111,8 +111,10 @@ func NewJoinControlPlane(input *ControlPlaneJoinInput) (*CloudConfig, error) { }) } - joinStr := fmt.Sprintf("%s:%s/%s", input.JoinNodeIPs[0], input.ClusterAgentPort, input.Token) - joinStrAlt := fmt.Sprintf("%s:%s/%s", input.JoinNodeIPs[1], input.ClusterAgentPort, input.Token) + joinURLs := make([]string, 0, len(input.JoinNodeIPs)) + for _, nodeIP := range input.JoinNodeIPs { + joinURLs = append(joinURLs, fmt.Sprintf("%q", fmt.Sprintf("%s:%s/%s", nodeIP, input.ClusterAgentPort, input.Token))) + } cloudConfig.BootCommands = append(cloudConfig.BootCommands, input.BootCommands...) @@ -130,7 +132,7 @@ func NewJoinControlPlane(input *ControlPlaneJoinInput) (*CloudConfig, error) { fmt.Sprintf("%s %q", scriptPath(configureDqlitePortScript), input.DqlitePort), "microk8s status --wait-ready", fmt.Sprintf("%s %q %q", scriptPath(configureCertLB), endpointType, input.ControlPlaneEndpoint), - fmt.Sprintf("%s no %q %q", scriptPath(microk8sJoinScript), joinStr, joinStrAlt), + fmt.Sprintf("%s no %s", scriptPath(microk8sJoinScript), strings.Join(joinURLs, " ")), scriptPath(configureAPIServerScript), fmt.Sprintf("microk8s add-node --token-ttl %v --token %q", input.TokenTTL, input.Token), ) diff --git a/controllers/cloudinit/controlplane_join_test.go b/controllers/cloudinit/controlplane_join_test.go index 28b89f3..b56994a 100644 --- a/controllers/cloudinit/controlplane_join_test.go +++ b/controllers/cloudinit/controlplane_join_test.go @@ -28,7 +28,6 @@ func TestControlPlaneJoin(t *testing.T) { t.Run("Simple", func(t *testing.T) { g := NewWithT(t) - joins := [2]string{"10.0.3.39", "10.0.3.40"} cloudConfig, err := cloudinit.NewJoinControlPlane(&cloudinit.ControlPlaneJoinInput{ ControlPlaneEndpoint: "k8s.my-domain.com", KubernetesVersion: "v1.25.2", @@ -37,7 +36,7 @@ func TestControlPlaneJoin(t *testing.T) { IPinIP: true, Token: strings.Repeat("a", 32), TokenTTL: 10000, - JoinNodeIPs: joins, + JoinNodeIPs: []string{"10.0.3.39", "10.0.3.40", "10.0.3.41"}, }) g.Expect(err).NotTo(HaveOccurred()) @@ -55,7 +54,7 @@ func TestControlPlaneJoin(t *testing.T) { `/capi-scripts/10-configure-dqlite-port.sh "2379"`, `microk8s status --wait-ready`, `/capi-scripts/10-configure-cert-for-lb.sh "DNS" "k8s.my-domain.com"`, - `/capi-scripts/20-microk8s-join.sh no "10.0.3.39:30000/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" "10.0.3.40:30000/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"`, + `/capi-scripts/20-microk8s-join.sh no "10.0.3.39:30000/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" "10.0.3.40:30000/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" "10.0.3.41:30000/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"`, `/capi-scripts/10-configure-apiserver.sh`, `microk8s add-node --token-ttl 10000 --token "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"`, })) diff --git a/controllers/cloudinit/scripts/20-microk8s-join.sh b/controllers/cloudinit/scripts/20-microk8s-join.sh index 2f6d570..e432335 100644 --- a/controllers/cloudinit/scripts/20-microk8s-join.sh +++ b/controllers/cloudinit/scripts/20-microk8s-join.sh @@ -1,27 +1,33 @@ #!/bin/bash -xe # Usage: -# $0 $worker_yes_no $join_string $alternative_join_string +# $0 $worker_yes_no $join_string_1 $join_string_2 ... $join_string_N # # Assumptions: # - microk8s is installed # - microk8s node is ready to join the cluster -join="${2}" -join_alt="${3}" - +join_args="" if [ ${1} == "yes" ]; then - join+=" --worker" - join_alt+=" --worker" + join_args="--worker" fi -while ! microk8s join ${join}; do - echo "Failed to join MicroK8s cluster, retring alternative join string" - if ! microk8s join ${join_alt} ; then - break - fi - echo "Failed to join MicroK8s cluster, will retry" - sleep 5 +shift + +# Loop over the given join addresses until microk8s join command succeeds. +joined="false" +while [ "$joined" = "false" ]; do + + for url in "${@}"; do + if microk8s join "${url}" $join_args; then + joined="true" + break + fi + + echo "Failed to join MicroK8s cluster, will retry" + sleep 5 + done + done # What is this hack? Why do we call snap set here? diff --git a/controllers/cloudinit/worker_join.go b/controllers/cloudinit/worker_join.go index 498c38e..20dac3c 100644 --- a/controllers/cloudinit/worker_join.go +++ b/controllers/cloudinit/worker_join.go @@ -41,7 +41,7 @@ type WorkerInput struct { // ContainerdNoProxy is no_proxy configuration for containerd. ContainerdNoProxy string // JoinNodeIPs is the IP addresses of the nodes to join. - JoinNodeIPs [2]string + JoinNodeIPs []string // Confinement specifies a classic or strict deployment of microk8s snap. Confinement string // RiskLevel specifies the risk level (strict, candidate, beta, edge) for the snap channels. @@ -100,8 +100,10 @@ func NewJoinWorker(input *WorkerInput) (*CloudConfig, error) { }) } - joinStr := fmt.Sprintf("%s:%s/%s", input.JoinNodeIPs[0], input.ClusterAgentPort, input.Token) - joinStrAlt := fmt.Sprintf("%s:%s/%s", input.JoinNodeIPs[1], input.ClusterAgentPort, input.Token) + joinURLs := make([]string, 0, len(input.JoinNodeIPs)) + for _, nodeIP := range input.JoinNodeIPs { + joinURLs = append(joinURLs, fmt.Sprintf("%q", fmt.Sprintf("%s:%s/%s", nodeIP, input.ClusterAgentPort, input.Token))) + } cloudConfig.BootCommands = append(cloudConfig.BootCommands, input.BootCommands...) @@ -115,7 +117,7 @@ func NewJoinWorker(input *WorkerInput) (*CloudConfig, error) { scriptPath(configureKubeletScript), "microk8s status --wait-ready", fmt.Sprintf("%s %q", scriptPath(configureClusterAgentPortScript), input.ClusterAgentPort), - fmt.Sprintf("%s yes %q %q", scriptPath(microk8sJoinScript), joinStr, joinStrAlt), + fmt.Sprintf("%s yes %s", scriptPath(microk8sJoinScript), strings.Join(joinURLs, " ")), fmt.Sprintf("%s %s 6443 %s", scriptPath(configureTraefikScript), input.ControlPlaneEndpoint, stopApiServerProxyRefreshes), ) cloudConfig.RunCommands = append(cloudConfig.RunCommands, input.PostRunCommands...) diff --git a/controllers/cloudinit/worker_join_test.go b/controllers/cloudinit/worker_join_test.go index c4c9fbd..a207203 100644 --- a/controllers/cloudinit/worker_join_test.go +++ b/controllers/cloudinit/worker_join_test.go @@ -28,13 +28,12 @@ func TestWorkerJoin(t *testing.T) { t.Run("Simple", func(t *testing.T) { g := NewWithT(t) - joins := [2]string{"10.0.3.194", "10.0.3.195"} cloudConfig, err := cloudinit.NewJoinWorker(&cloudinit.WorkerInput{ ControlPlaneEndpoint: "capi-aws-apiserver-1647391446.us-east-1.elb.amazonaws.com", KubernetesVersion: "v1.24.3", ClusterAgentPort: "30000", Token: strings.Repeat("a", 32), - JoinNodeIPs: joins, + JoinNodeIPs: []string{"10.0.3.194", "10.0.3.195"}, }) g.Expect(err).NotTo(HaveOccurred()) diff --git a/controllers/microk8sconfig_controller.go b/controllers/microk8sconfig_controller.go index 5697de6..e092772 100644 --- a/controllers/microk8sconfig_controller.go +++ b/controllers/microk8sconfig_controller.go @@ -91,6 +91,14 @@ const ( remappedClusterAgentPort string = "30000" ) +var ( + // preferMachineAddressTypeList is the order of preference of machine addresses to use for joining microk8s nodes to a cluster. + preferMachineAddressTypeList = []clusterv1.MachineAddressType{ + clusterv1.MachineInternalIP, + clusterv1.MachineExternalIP, + } +) + //+kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=microk8sconfigs,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=microk8sconfigs/status,verbs=get;update;patch //+kubebuilder:rbac:groups=bootstrap.cluster.x-k8s.io,resources=microk8sconfigs/finalizers,verbs=update @@ -482,7 +490,7 @@ func (r *MicroK8sConfigReconciler) handleJoiningWorkerNode(ctx context.Context, } ipOfNodesToConnectTo, err := r.getControlPlaneNodesToJoin(ctx, scope) - if err != nil || ipOfNodesToConnectTo[0] == "" { + if err != nil || len(ipOfNodesToConnectTo) == 0 { scope.Info("Failed to discover a control plane IP, requeueing.") return ctrl.Result{RequeueAfter: 30 * time.Second}, nil } @@ -539,34 +547,37 @@ func (r *MicroK8sConfigReconciler) handleJoiningWorkerNode(ctx context.Context, return ctrl.Result{}, nil } -func (r *MicroK8sConfigReconciler) getControlPlaneNodesToJoin(ctx context.Context, scope *Scope) ([2]string, error) { - var ipOfNodesToConnectTo [2]string +func (r *MicroK8sConfigReconciler) getControlPlaneNodesToJoin(ctx context.Context, scope *Scope) ([]string, error) { nodes, err := r.getControlPlaneMachinesForCluster(ctx, util.ObjectKey(scope.Cluster)) if err != nil { scope.Error(err, "Lookup control plane nodes") - return ipOfNodesToConnectTo, err + return nil, err } - n := 0 + addressesByType := make(map[clusterv1.MachineAddressType][]string) for _, node := range nodes { - if n >= 2 { - break + if node.Spec.ProviderID == nil || node.Status.Phase != "Running" { + continue } - if node.Spec.ProviderID != nil && node.Status.Phase == "Running" { - for _, address := range node.Status.Addresses { - if address.Address != "" { - ipOfNodesToConnectTo[n] = address.Address - n += 1 - if n == 1 { - ipOfNodesToConnectTo[n] = address.Address - } - break - } + for _, address := range node.Status.Addresses { + if address.Address == "" { + continue } + addressesByType[address.Type] = append(addressesByType[address.Type], address.Address) } } - return ipOfNodesToConnectTo, nil + for _, addressType := range preferMachineAddressTypeList { + if addresses, ok := addressesByType[addressType]; ok && len(addresses) > 0 { + return addresses, nil + } + } + + // if we are here, no addresses were found + err = fmt.Errorf("machines do not have any addresses with types %v", preferMachineAddressTypeList) + scope.Error(err, "Lookup control plane node IPs", "addressesByType", addressesByType) + + return nil, err } func (r *MicroK8sConfigReconciler) storeBootstrapData(ctx context.Context, scope *Scope, data []byte) error { From e4bd4d5d649656dadc75a1480b783428d2e46c23 Mon Sep 17 00:00:00 2001 From: Angelos Kolaitis Date: Thu, 29 Feb 2024 17:00:17 +0200 Subject: [PATCH 2/2] do not accept empty list of addresses --- controllers/microk8sconfig_controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controllers/microk8sconfig_controller.go b/controllers/microk8sconfig_controller.go index e092772..63406d1 100644 --- a/controllers/microk8sconfig_controller.go +++ b/controllers/microk8sconfig_controller.go @@ -374,7 +374,7 @@ func (r *MicroK8sConfigReconciler) handleJoiningControlPlaneNode(ctx context.Con scope.Info("Creating BootstrapData for the join control plane") ipsOfNodesToConnectTo, err := r.getControlPlaneNodesToJoin(ctx, scope) - if err != nil || ipsOfNodesToConnectTo[0] == "" { + if err != nil || len(ipsOfNodesToConnectTo) == 0 { scope.Info("Failed to discover a control plane IP, requeueing.") return ctrl.Result{RequeueAfter: 30 * time.Second}, nil }