From a88cfaf49ed98681757f5dce1912f1346dbdda80 Mon Sep 17 00:00:00 2001 From: Oscar Frasier Date: Mon, 6 May 2024 14:55:17 -0400 Subject: [PATCH] Defer cEOS-lab pod check, update operator version (#534) * Arista node to implement Status The Arista node is waiting for its pod to come up in Create(). This is problematic in high-scale scenarious because we create the pods sychronously. Move the check to Status() so checkNodeStatus() handles things instead. * Update ceoslab operator to v2.0.2 to v2.1.2 See release notes: https://github.com/aristanetworks/arista-ceoslab-operator/releases/tag/v2.1.2 Increased the number of workers in the operator to the number of cores (from the default value of 1). This may improve performance in high-scale scenarios. Most of their time is spent generating RSA certificates. --- go.mod | 2 +- go.sum | 4 +- manifests/controllers/ceoslab/manifest.yaml | 2 +- topo/node/arista/arista.go | 43 ++++++----- topo/node/arista/arista_test.go | 82 +++++++++++++++++++++ 5 files changed, 112 insertions(+), 21 deletions(-) diff --git a/go.mod b/go.mod index 3c3b41e0..4229f93c 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.21 require ( cloud.google.com/go/pubsub v1.33.0 - github.com/aristanetworks/arista-ceoslab-operator/v2 v2.0.2 + github.com/aristanetworks/arista-ceoslab-operator/v2 v2.1.2 github.com/blang/semver v3.5.1+incompatible github.com/docker/docker v24.0.9+incompatible github.com/drivenets/cdnos-controller v1.7.4 diff --git a/go.sum b/go.sum index 23294f9a..af5037a0 100644 --- a/go.sum +++ b/go.sum @@ -823,8 +823,8 @@ github.com/apache/arrow/go/v10 v10.0.1/go.mod h1:YvhnlEePVnBS4+0z3fhPfUy7W1Ikj0I github.com/apache/arrow/go/v11 v11.0.0/go.mod h1:Eg5OsL5H+e299f7u5ssuXsuHQVEGC4xei5aX110hRiI= github.com/apache/arrow/go/v12 v12.0.0/go.mod h1:d+tV/eHZZ7Dz7RPrFKtPK02tpr+c9/PEd/zm8mDS9Vg= github.com/apache/thrift v0.16.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU= -github.com/aristanetworks/arista-ceoslab-operator/v2 v2.0.2 h1:KQL1evr4NM4ZQOLRs1bbmD0kYPmLRAMqvRrNSpYAph4= -github.com/aristanetworks/arista-ceoslab-operator/v2 v2.0.2/go.mod h1:/mvSt2fEmlVEU7dppip3UNz/MUt380f50dFsZRGn83o= +github.com/aristanetworks/arista-ceoslab-operator/v2 v2.1.2 h1:1aAxwwu4xyfiU1/FX2D5x/jsF/sxFVkjVhvF661isM4= +github.com/aristanetworks/arista-ceoslab-operator/v2 v2.1.2/go.mod h1:/mvSt2fEmlVEU7dppip3UNz/MUt380f50dFsZRGn83o= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= diff --git a/manifests/controllers/ceoslab/manifest.yaml b/manifests/controllers/ceoslab/manifest.yaml index a673106a..98cbc25b 100644 --- a/manifests/controllers/ceoslab/manifest.yaml +++ b/manifests/controllers/ceoslab/manifest.yaml @@ -487,7 +487,7 @@ spec: - --leader-elect command: - /manager - image: ghcr.io/aristanetworks/arista-ceoslab-operator:v2.0.2 + image: ghcr.io/aristanetworks/arista-ceoslab-operator:v2.1.2 livenessProbe: httpGet: path: /healthz diff --git a/topo/node/arista/arista.go b/topo/node/arista/arista.go index 3af6477a..b2ac04b2 100644 --- a/topo/node/arista/arista.go +++ b/topo/node/arista/arista.go @@ -100,6 +100,31 @@ func (n *Node) Create(ctx context.Context) error { return nil } +func (n *Node) Status(ctx context.Context) (node.Status, error) { + w, err := n.KubeClient.CoreV1().Pods(n.Namespace).Watch(ctx, metav1.ListOptions{ + FieldSelector: fields.SelectorFromSet(fields.Set{metav1.ObjectNameField: n.Name()}).String(), + }) + if err != nil { + return node.StatusFailed, err + } + status := node.StatusUnknown + for e := range w.ResultChan() { + p, ok := e.Object.(*corev1.Pod) + if !ok { + continue + } + if p.Status.Phase == corev1.PodPending { + status = node.StatusPending + break + } + if p.Status.Phase == corev1.PodRunning { + status = node.StatusRunning + break + } + } + return status, nil +} + func (n *Node) CreateConfig(ctx context.Context) (*corev1.Volume, error) { pb := n.Proto var data []byte @@ -212,24 +237,8 @@ func (n *Node) CreateCRD(ctx context.Context) error { if err != nil { return err } - // Wait for pods - w, err := n.KubeClient.CoreV1().Pods(n.Namespace).Watch(ctx, metav1.ListOptions{ - FieldSelector: fields.SelectorFromSet(fields.Set{metav1.ObjectNameField: n.Name()}).String(), - }) - if err != nil { - return err - } - for e := range w.ResultChan() { - p, ok := e.Object.(*corev1.Pod) - if !ok { - continue - } - if p.Status.Phase == corev1.PodPending || p.Status.Phase == corev1.PodRunning { - break - } - } log.Infof("Created CEosLabDevice CRD for node: %v", n.Name()) - return err + return nil } func (n *Node) Delete(ctx context.Context) error { diff --git a/topo/node/arista/arista_test.go b/topo/node/arista/arista_test.go index 3b206597..359b6a91 100644 --- a/topo/node/arista/arista_test.go +++ b/topo/node/arista/arista_test.go @@ -15,6 +15,7 @@ package arista import ( "context" + "errors" "fmt" "testing" "time" @@ -55,6 +56,7 @@ func (f *fakeWatch) ResultChan() <-chan watch.Event { f.e = f.e[1:] eCh <- e } + close(eCh) }() return eCh } @@ -542,3 +544,83 @@ func TestResetCfg(t *testing.T) { }) } } + +func TestStatus(t *testing.T) { + tests := []struct { + desc string + cantWatch bool + noPodYet bool + phase corev1.PodPhase + status node.Status + }{ + { + desc: "can't watch pod status", + cantWatch: true, + phase: corev1.PodUnknown, + status: node.StatusFailed, + }, + { + desc: "no pod in the watch channel", + noPodYet: true, + status: node.StatusUnknown, + }, + { + desc: "pod pending", + phase: corev1.PodPending, + status: node.StatusPending, + }, + { + desc: "pod running", + phase: corev1.PodRunning, + status: node.StatusRunning, + }, + } + + ctx := context.Background() + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + name := "pod1" + ki := fake.NewSimpleClientset(&corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + }) + + reaction := func(action ktest.Action) (handled bool, ret watch.Interface, err error) { + if tt.cantWatch { + err = errors.New("") + return true, nil, err + } + f := &fakeWatch{} + if !tt.noPodYet { + f.e = []watch.Event{{ + Object: &corev1.Pod{ + Status: corev1.PodStatus{ + Phase: tt.phase, + }, + }, + }} + } + return true, f, nil + } + ki.PrependWatchReactor("*", reaction) + + ns := "default" + node := &Node{ + Impl: &node.Impl{ + KubeClient: ki, + Namespace: ns, + Proto: &topopb.Node{}, + }, + } + node.Impl.Proto.Name = name + status, err := node.Status(ctx) + if s := errdiff.Check(err, tt.cantWatch); s != "" { + t.Errorf("Status() unexpected err: %s", s) + } + if s := cmp.Diff(tt.status, status); s != "" { + t.Errorf("New() CEosLabDevice CRDs unexpected diff (-want +got):\n%s", s) + } + }) + } +}