diff --git a/go.mod b/go.mod index d60500c..a0531a8 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,8 @@ module github.com/flanksource/is-healthy -go 1.20 +go 1.22.0 + +toolchain go1.22.9 require ( github.com/cert-manager/cert-manager v1.9.0 @@ -25,6 +27,7 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f // indirect golang.org/x/net v0.23.0 // indirect golang.org/x/text v0.16.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect diff --git a/go.sum b/go.sum index 53f9c69..3befd97 100644 --- a/go.sum +++ b/go.sum @@ -565,6 +565,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= +golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo= +golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= diff --git a/main.go b/main.go index 04a4457..978f9b2 100644 --- a/main.go +++ b/main.go @@ -31,9 +31,9 @@ func main() { os.Exit(1) } - fmt.Printf("%s: %s\n", _health.Status, _health.Message) + fmt.Printf("%s\n", *_health) - if health.IsWorse(health.HealthStatusHealthy, _health.Status) { + if _health.Health.IsWorseThan(health.HealthWarning) { os.Exit(1) } } diff --git a/pkg/health/health.go b/pkg/health/health.go index 758d003..dab6de6 100644 --- a/pkg/health/health.go +++ b/pkg/health/health.go @@ -6,6 +6,7 @@ import ( "time" "github.com/samber/lo" + "golang.org/x/exp/slices" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/duration" @@ -27,6 +28,47 @@ func IsValidHealth(s string) bool { s == string(HealthWarning) } +var healthOrder = []Health{ + HealthUnknown, + HealthHealthy, + HealthWarning, + HealthUnhealthy, +} + +func (h Health) Worst(others ...Health) Health { + all := append(others, h) + slices.SortFunc(all, CompareHealth) + return all[len(all)-1] +} + +func (h Health) IsWorseThan(other Health) bool { + return h.CompareTo(other) >= 0 +} + +func CompareHealth(a, b Health) int { + return a.CompareTo(b) +} + +func (h Health) CompareTo(other Health) int { + currentIndex := 0 + newIndex := 0 + for i, code := range healthOrder { + if h == code { + currentIndex = i + } + if other == code { + newIndex = i + } + } + if newIndex == currentIndex { + return 0 + } + if currentIndex > newIndex { + return 1 + } + return -1 +} + // Represents resource health status type HealthStatusCode string @@ -64,21 +106,20 @@ const ( HealthStatusScaling HealthStatusCode = "Scaling" HealthStatusRestart HealthStatusCode = "Restarting" HealthStatusStarting HealthStatusCode = "Starting" + HealthStatusFailed HealthStatusCode = "Failed" HealthStatusUnschedulable HealthStatusCode = "Unschedulable" HealthStatusUpgradeFailed HealthStatusCode = "UpgradeFailed" - - HealthStatusScalingUp HealthStatusCode = "Scaling Up" - HealthStatusScaledToZero HealthStatusCode = "Scaled to Zero" - HealthStatusScalingDown HealthStatusCode = "Scaling Down" - HealthStatusRunning HealthStatusCode = "Running" - - HealthStatusRollingOut HealthStatusCode = "Rolling Out" - - HealthStatusUnhealthy HealthStatusCode = "Unhealthy" - HealthStatusUpdating HealthStatusCode = "Updating" - HealthStatusWarning HealthStatusCode = "Warning" - HealthStatusStopped HealthStatusCode = "Stopped" - HealthStatusStopping HealthStatusCode = "Stopping" + HealthStatusOOMKilled HealthStatusCode = "OOMKilled" + HealthStatusScalingUp HealthStatusCode = "Scaling Up" + HealthStatusScaledToZero HealthStatusCode = "Scaled to Zero" + HealthStatusScalingDown HealthStatusCode = "Scaling Down" + HealthStatusRunning HealthStatusCode = "Running" + HealthStatusRollingOut HealthStatusCode = "Rolling Out" + HealthStatusUnhealthy HealthStatusCode = "Unhealthy" + HealthStatusUpdating HealthStatusCode = "Updating" + HealthStatusWarning HealthStatusCode = "Warning" + HealthStatusStopped HealthStatusCode = "Stopped" + HealthStatusStopping HealthStatusCode = "Stopping" ) // Implements custom health assessment that overrides built-in assessment @@ -86,31 +127,6 @@ type HealthOverride interface { GetResourceHealth(obj *unstructured.Unstructured) (*HealthStatus, error) } -// healthOrder is a list of health codes in order of most healthy to least healthy -var healthOrder = []HealthStatusCode{ - HealthStatusHealthy, - HealthStatusSuspended, - HealthStatusProgressing, - HealthStatusMissing, - HealthStatusDegraded, - HealthStatusUnknown, -} - -// IsWorse returns whether or not the new health status code is a worse condition than the current -func IsWorse(current, new HealthStatusCode) bool { - currentIndex := 0 - newIndex := 0 - for i, code := range healthOrder { - if current == code { - currentIndex = i - } - if new == code { - newIndex = i - } - } - return newIndex > currentIndex -} - func get(obj map[string]any, keys ...string) string { v, _, _ := unstructured.NestedString(obj, keys...) return strings.TrimSpace(v) diff --git a/pkg/health/health_pod.go b/pkg/health/health_pod.go index a7a0c12..d127053 100644 --- a/pkg/health/health_pod.go +++ b/pkg/health/health_pod.go @@ -25,8 +25,65 @@ func getPodHealth(obj *unstructured.Unstructured) (*HealthStatus, error) { } } +func getPodStatus(containers ...corev1.ContainerStatus) (waiting *HealthStatus, terminated *HealthStatus) { + for _, container := range containers { + _waiting, _terminated := getContainerStatus(container) + if _waiting != nil { + if waiting == nil { + waiting = _waiting + } else if _waiting.Health.IsWorseThan(waiting.Health) { + waiting = _waiting + } + if _terminated != nil { + if terminated == nil { + terminated = _terminated + } else if _terminated.Health.IsWorseThan(terminated.Health) { + terminated = _terminated + } + } + } + } + return waiting, terminated +} + +func getContainerStatus(containerStatus corev1.ContainerStatus) (waiting *HealthStatus, terminated *HealthStatus) { + if state := containerStatus.State.Waiting; state != nil && + (strings.HasPrefix(state.Reason, "Err") || + strings.HasSuffix(state.Reason, "Error") || + strings.HasSuffix(state.Reason, "BackOff")) { + waiting = &HealthStatus{ + Status: HealthStatusCode(state.Reason), + Health: HealthUnhealthy, + Message: state.Message, + } + } + + if state := containerStatus.LastTerminationState.Terminated; state != nil { + age := time.Since(state.FinishedAt.Time) + terminated = &HealthStatus{ + Status: HealthStatusCode(state.Reason), + Health: HealthUnhealthy, + Message: state.Message, + } + if age >= time.Hour*24 { + terminated.Health = HealthUnknown + } else if age >= time.Hour { + terminated.Health = HealthWarning + } + } + return waiting, terminated +} + func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) { isReady := IsPodReady(pod) + containers := append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...) + deadline := GetStartDeadline(append(pod.Spec.InitContainers, pod.Spec.Containers...)...) + age := time.Since(pod.CreationTimestamp.Time).Truncate(time.Minute).Abs() + isStarting := age < deadline + var hr = HealthStatus{ + Health: HealthUnknown, + } + if pod.ObjectMeta.DeletionTimestamp != nil && !pod.ObjectMeta.DeletionTimestamp.IsZero() { status := HealthUnknown message := "" @@ -54,98 +111,19 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) { }, nil } - getCommonContainerError := func(containerStatus *corev1.ContainerStatus) *HealthStatus { - waiting := containerStatus.State.Waiting - // Article listing common container errors: https://medium.com/kokster/debugging-crashloopbackoffs-with-init-containers-26f79e9fb5bf - if waiting != nil && - (strings.HasPrefix(waiting.Reason, "Err") || strings.HasSuffix(waiting.Reason, "Error") || strings.HasSuffix(waiting.Reason, "BackOff")) { + for _, ctrStatus := range pod.Status.Conditions { + if ctrStatus.Reason == "Unschedulable" { return &HealthStatus{ - Status: HealthStatusCode(waiting.Reason), Health: HealthUnhealthy, - Message: waiting.Message, - } - } - - return nil - } - - // This logic cannot be applied when the pod.Spec.RestartPolicy is: corev1.RestartPolicyOnFailure, - // corev1.RestartPolicyNever, otherwise it breaks the resource hook logic. - // The issue is, if we mark a pod with ImagePullBackOff as Degraded, and the pod is used as a resource hook, - // then we will prematurely fail the PreSync/PostSync hook. Meanwhile, when that error condition is resolved - // (e.g. the image is available), the resource hook pod will unexpectedly be executed even though the sync has - // completed. - if pod.Spec.RestartPolicy == corev1.RestartPolicyAlways { - var status HealthStatusCode - var health Health - var messages []string - - for _, containerStatus := range pod.Status.ContainerStatuses { - if msg := getCommonContainerError(&containerStatus); msg != nil { - health = msg.Health - status = msg.Status - messages = append(messages, msg.Message) - } - } - - if status != "" { - return &HealthStatus{ - Health: health, - Status: status, - Message: strings.Join(messages, ", "), + Status: HealthStatusUnschedulable, + Message: ctrStatus.Message, }, nil } } - getFailMessage := func(ctr *corev1.ContainerStatus) string { - if ctr.State.Terminated != nil { - if ctr.State.Terminated.Message != "" { - return ctr.State.Terminated.Message - } - if ctr.State.Terminated.Reason == "OOMKilled" { - return ctr.State.Terminated.Reason - } - if ctr.State.Terminated.ExitCode != 0 { - return fmt.Sprintf("container %q failed with exit code %d", ctr.Name, ctr.State.Terminated.ExitCode) - } - } - return "" - } + waiting, terminated := getPodStatus(containers...) switch pod.Status.Phase { - case corev1.PodPending: - for _, ctrStatus := range pod.Status.InitContainerStatuses { - if ctrStatus.LastTerminationState.Terminated != nil && - ctrStatus.LastTerminationState.Terminated.Reason == "Error" { - // A pending pod whose container was previously terminated with error should be marked as unhealthy (instead of unknown) - return &HealthStatus{ - Health: HealthUnhealthy, - Status: HealthStatusCrashLoopBackoff, - Message: ctrStatus.LastTerminationState.Terminated.Reason, - }, nil - } - - if msg := getCommonContainerError(&ctrStatus); msg != nil { - return msg, nil - } - } - - for _, ctrStatus := range pod.Status.Conditions { - if ctrStatus.Reason == "Unschedulable" { - return &HealthStatus{ - Health: HealthUnhealthy, - Status: HealthStatusUnschedulable, - Message: ctrStatus.Message, - }, nil - } - } - - return &HealthStatus{ - Health: HealthUnknown, - Status: HealthStatusPending, - Message: pod.Status.Message, - }, nil - case corev1.PodSucceeded: return &HealthStatus{ Health: HealthHealthy, @@ -155,144 +133,29 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) { }, nil case corev1.PodFailed: - if pod.Status.Message != "" { - // Pod has a nice error message. Use that. - return &HealthStatus{ - Health: HealthUnhealthy, - Status: HealthStatusError, - Ready: true, - Message: pod.Status.Message, - }, nil - } - for _, ctr := range append(pod.Status.InitContainerStatuses, pod.Status.ContainerStatuses...) { - if msg := getFailMessage(&ctr); msg != "" { - return &HealthStatus{Health: HealthUnhealthy, Status: HealthStatusError, Ready: true, Message: msg}, nil - } + hr.Health = HealthUnhealthy + hr.Ready = true + hr.Status, _ = lo.Coalesce(hr.Status, HealthStatusFailed) + hr.Message = lo.CoalesceOrEmpty(pod.Status.Message, hr.Message) + + case corev1.PodRunning, corev1.PodPending: + hr = hr.Merge(terminated, waiting) + if terminated != nil && terminated.Health.IsWorseThan(HealthWarning) && hr.Status == HealthStatusCrashLoopBackoff { + hr.Status = terminated.Status + hr.Health = hr.Health.Worst(terminated.Health) } + hr.Status, _ = lo.Coalesce(hr.Status, HealthStatusRunning) + hr.Health = hr.Health.Worst(lo.Ternary(isReady, HealthHealthy, HealthUnhealthy)) + } - return &HealthStatus{Health: HealthUnhealthy, Status: HealthStatusError, Message: "", Ready: true}, nil - - case corev1.PodRunning: - switch pod.Spec.RestartPolicy { - case corev1.RestartPolicyAlways: - if isReady { - h := &HealthStatus{ - Health: HealthHealthy, - Ready: true, - Status: HealthStatusRunning, - Message: pod.Status.Message, - } - - // A ready pod can be in a warning state if it has been in a restart loop. - // i.e. the container completes successfully, but the pod keeps restarting. - for _, s := range pod.Status.ContainerStatuses { - possiblyInRestartLoop := s.RestartCount > 2 && - s.LastTerminationState.Terminated != nil && - time.Since(s.State.Running.StartedAt.Time) < time.Hour*4 - - if possiblyInRestartLoop { - lastTerminatedTime := s.LastTerminationState.Terminated.FinishedAt.Time - h.Message = fmt.Sprintf( - "%s has restarted %d time(s)", - s.Name, - pod.Status.ContainerStatuses[0].RestartCount, - ) - - if s.LastTerminationState.Terminated.Reason != "Completed" { - h.Status = HealthStatusCode(s.LastTerminationState.Terminated.Reason) - } - - if time.Since(lastTerminatedTime) < time.Minute*30 { - h.Health = HealthUnhealthy - h.Ready = false - } else if time.Since(lastTerminatedTime) < time.Hour*8 { - h.Health = HealthWarning - h.Ready = false - } - } - } - - return h, nil - } - - // if it's not ready, check to see if any container terminated, if so, it's degraded - var nonReadyContainers []ContainerRecord - for _, ctrStatus := range pod.Status.ContainerStatuses { - if !ctrStatus.Ready { - spec := lo.Filter(pod.Spec.Containers, func(i corev1.Container, _ int) bool { - return i.Name == ctrStatus.Name - }) - nonReadyContainers = append(nonReadyContainers, ContainerRecord{ - Status: ctrStatus, - Spec: spec[0], - }) - } - - if ctrStatus.LastTerminationState.Terminated != nil { - return &HealthStatus{ - Health: HealthUnhealthy, - Ready: true, - Status: HealthStatusCode(ctrStatus.LastTerminationState.Terminated.Reason), - Message: ctrStatus.LastTerminationState.Terminated.Message, - }, nil - } - } - - // Pod isn't ready but all containers are - if len(nonReadyContainers) == 0 { - return &HealthStatus{ - Health: HealthWarning, - Status: HealthStatusRunning, - Message: pod.Status.Message, - }, nil - } - - var containersWaitingForReadinessProbe []string - for _, c := range nonReadyContainers { - if c.Spec.ReadinessProbe == nil || c.Spec.ReadinessProbe.InitialDelaySeconds == 0 { - continue - } - - if c.Status.State.Running != nil && - time.Since( - c.Status.State.Running.StartedAt.Time, - ) <= time.Duration( - c.Spec.ReadinessProbe.InitialDelaySeconds, - )*time.Second { - containersWaitingForReadinessProbe = append(containersWaitingForReadinessProbe, c.Spec.Name) - } - } - - // otherwise we are progressing towards a ready state - return &HealthStatus{ - Health: HealthUnknown, - Status: HealthStatusStarting, - Message: fmt.Sprintf( - "Container %s is waiting for readiness probe", - strings.Join(containersWaitingForReadinessProbe, ","), - ), - }, nil - - case corev1.RestartPolicyOnFailure, corev1.RestartPolicyNever: - if isReady { - return &HealthStatus{ - Health: HealthHealthy, - Status: HealthStatusRunning, - }, nil - } else { - return &HealthStatus{ - Health: HealthUnhealthy, - Status: HealthStatusRunning, - }, nil - } - } + if isStarting && hr.Health.IsWorseThan(HealthWarning) && + (terminated != nil && terminated.Status != HealthStatusOOMKilled) { + hr.Health = HealthUnknown + hr.Message = fmt.Sprintf("%s %s", string(hr.Status), hr.Message) + hr.Status = HealthStatusStarting } - return &HealthStatus{ - Health: HealthUnknown, - Status: HealthStatusUnknown, - Message: pod.Status.Message, - }, nil + return &hr, nil } type ContainerRecord struct { diff --git a/pkg/health/health_test.go b/pkg/health/health_test.go index 7f85946..2e22855 100644 --- a/pkg/health/health_test.go +++ b/pkg/health/health_test.go @@ -32,13 +32,13 @@ var ( "@now-1m": _now.Add(-time.Minute * 1).Format(RFC3339Micro), "@now-10m": _now.Add(-time.Minute * 5).Format(RFC3339Micro), "@now-15m": _now.Add(-time.Minute * 15).Format(RFC3339Micro), - "@now-5m": _now.Add(-time.Minute * 5).Format(RFC3339Micro), "@now-1h": _now.Add(-time.Hour).Format(RFC3339Micro), "@now-2h": _now.Add(-time.Hour * 2).Format(RFC3339Micro), "@now-4h": _now.Add(-time.Hour * 4).Format(RFC3339Micro), "@now-8h": _now.Add(-time.Hour * 8).Format(RFC3339Micro), "@now-1d": _now.Add(-time.Hour * 24).Format(RFC3339Micro), + "@now-5d": _now.Add(-time.Hour * 24).Format(RFC3339Micro), "@now+10m": _now.Add(time.Minute * 10).Format(RFC3339Micro), "@now+5m": _now.Add(time.Minute * 5).Format(RFC3339Micro), "@now+15m": _now.Add(time.Minute * 15).Format(RFC3339Micro), @@ -77,6 +77,13 @@ func testFixture(t *testing.T, yamlPath string) { }) } +func TestHealthCompare(t *testing.T) { + assert.True(t, health.HealthUnhealthy.IsWorseThan(health.HealthWarning)) + assert.Equal(t, health.HealthHealthy, health.HealthHealthy.Worst(health.HealthUnknown)) + assert.Equal(t, health.HealthUnhealthy, health.HealthHealthy.Worst(health.HealthUnhealthy)) + +} + func assertAppHealthMsg( t *testing.T, yamlPath string, @@ -92,9 +99,6 @@ func assertAppHealthMsg( } m := make(map[string]string) - for k, v := range defaultOverrides { - m[k] = v - } for i := 0; i < len(overrides); i += 2 { if v, ok := defaultOverrides[overrides[i+1]]; ok { m[overrides[i]] = v @@ -150,12 +154,19 @@ func assertAppHealthWithOverwrite( func getHealthStatus( yamlPath string, t *testing.T, - overwrites map[string]string, + overrides map[string]string, ) (*health.HealthStatus, unstructured.Unstructured) { if !strings.HasPrefix(yamlPath, "./testdata/") && !strings.HasPrefix(yamlPath, "testdata/") && !strings.HasPrefix(yamlPath, "../resource_customizations") { yamlPath = "./testdata/" + yamlPath } + m := make(map[string]string) + for k, v := range defaultOverrides { + m[k] = v + } + for k, v := range overrides { + m[k] = v + } var yamlBytes []byte var err error @@ -167,19 +178,19 @@ func getHealthStatus( require.NoError(t, err) yamlString := string(yamlBytes) - keys := lo.Keys(overwrites) + keys := lo.Keys(m) sort.Slice(keys, func(i, j int) bool { return len(keys[i]) > len(keys[j]) }) for _, k := range keys { - v := overwrites[k] + v := m[k] yamlString = strings.ReplaceAll(yamlString, k, v) } - // 2nd iteration + // 2nd iteration, sometimes @now is replaced with @now-5m for _, k := range keys { - v := overwrites[k] + v := m[k] yamlString = strings.ReplaceAll(yamlString, k, v) } @@ -637,8 +648,6 @@ func TestPod(t *testing.T) { health.HealthUnhealthy, false, ) - assertAppHealthMsg(t, "./testdata/pod-imagepullbackoff.yaml", "ImagePullBackOff", health.HealthUnhealthy, false) - assertAppHealthMsg(t, "./testdata/pod-error.yaml", health.HealthStatusError, health.HealthUnhealthy, true) assertAppHealthMsg( t, "./testdata/pod-running-restart-always.yaml", @@ -660,8 +669,6 @@ func TestPod(t *testing.T) { health.HealthUnhealthy, false, ) - assertAppHealthMsg(t, "./testdata/pod-failed.yaml", health.HealthStatusError, health.HealthUnhealthy, true) - assertAppHealthMsg(t, "./testdata/pod-succeeded.yaml", health.HealthStatusCompleted, health.HealthHealthy, true) assertAppHealthMsg( t, "./testdata/pod-init-container-fail.yaml", diff --git a/pkg/health/testdata/Kubernetes/Pod/healthy.yaml b/pkg/health/testdata/Kubernetes/Pod/healthy.yaml new file mode 100644 index 0000000..40b97d3 --- /dev/null +++ b/pkg/health/testdata/Kubernetes/Pod/healthy.yaml @@ -0,0 +1,168 @@ +apiVersion: v1 +kind: Pod +metadata: + uid: c36a1534-25dd-4502-84ef-614771d2eec3 + name: acc-history-preprod-f5c7fdbf-qs42k + labels: + app: acc-history-preprod + pod-template-hash: f5c7fdbf + namespace: sandbox + generateName: acc-history-preprod-f5c7fdbf- + ownerReferences: + - uid: 204a2f0a-013f-4f90-8daa-3dfc6cfb94c2 + kind: ReplicaSet + name: acc-history-preprod-f5c7fdbf + apiVersion: apps/v1 + controller: true + blockOwnerDeletion: true + creationTimestamp: 2024-11-13T07:48:25Z +spec: + volumes: + - name: files + secret: + optional: true + secretName: acc-history-preprod-files + defaultMode: 420 + - name: cm-files + configMap: + name: acc-history-preprod-cm-files + optional: true + defaultMode: 420 + - name: kube-api-access-m69vn + projected: + sources: + - serviceAccountToken: + path: token + expirationSeconds: 3607 + - configMap: + name: kube-root-ca.crt + items: + - key: ca.crt + path: ca.crt + - downwardAPI: + items: + - path: namespace + fieldRef: + fieldPath: metadata.namespace + apiVersion: v1 + defaultMode: 420 + nodeName: gke-sandbox-private-pool-containerd-s-c8d2b7ad-fs6l + priority: 0 + dnsPolicy: ClusterFirst + containers: + - env: + - name: database.username + value: acc-history-preprod + - name: database.prefix + name: acc-history-preprod + image: europe-west2-docker.pkg.dev/acme-group-sdlc-infra/docker/acc-history-aggregator:f0fe7c26 + ports: + - name: http + protocol: TCP + containerPort: 8080 + - name: monitoring + protocol: TCP + containerPort: 9090 + envFrom: + - secretRef: + name: defaults + optional: true + - configMapRef: + name: defaults + optional: true + - secretRef: + name: acc-history-preprod + optional: true + - secretRef: + name: acc-history-preprod-extra + optional: true + - secretRef: + name: acc-history-preprod-sql + optional: true + - configMapRef: + name: acc-history-preprod + optional: true + resources: + limits: + cpu: 200m + memory: 512Mi + requests: + cpu: 10m + memory: 256Mi + volumeMounts: + - name: files + mountPath: /var/secret/files + - name: cm-files + mountPath: /var/secret/cm-files + - name: kube-api-access-m69vn + readOnly: true + mountPath: /var/run/secrets/kubernetes.io/serviceaccount + livenessProbe: + exec: + command: + - /bin/true + periodSeconds: 10 + timeoutSeconds: 1 + failureThreshold: 3 + successThreshold: 1 + readinessProbe: + exec: + command: + - /bin/true + periodSeconds: 10 + timeoutSeconds: 1 + failureThreshold: 3 + successThreshold: 1 + imagePullPolicy: IfNotPresent + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + tolerations: + - key: node.kubernetes.io/not-ready + effect: NoExecute + operator: Exists + tolerationSeconds: 300 + - key: node.kubernetes.io/unreachable + effect: NoExecute + operator: Exists + tolerationSeconds: 300 + restartPolicy: Always + schedulerName: default-scheduler + serviceAccount: acc-history-preprod + securityContext: {} + preemptionPolicy: PreemptLowerPriority + enableServiceLinks: true + serviceAccountName: acc-history-preprod + terminationGracePeriodSeconds: 30 +status: + phase: Running + podIP: 10.72.2.72 + hostIP: 10.64.0.17 + podIPs: + - ip: 10.72.2.72 + hostIPs: + - ip: 10.64.0.17 + qosClass: Burstable + startTime: 2024-11-13T07:48:25Z + conditions: + - type: ContainersReady + status: "True" + - type: Initialized + status: "True" + - type: PodReadyToStartContainers + status: "True" + - type: PodScheduled + status: "True" + - type: Ready + status: "True" + containerStatuses: + - name: acc-history-preprod + image: europe-west2-docker.pkg.dev/acme-group-sdlc-infra/docker/acc-history-aggregator:f0fe7c26 + ready: true + state: + running: + startedAt: 2024-11-13T07:49:20Z + imageID: europe-west2-docker.pkg.dev/acme-group-sdlc-infra/docker/acc-history-aggregator@sha256:65f2d47fa6df644cfbd0336a49e5d3d76eaffb0aa9f3e5037bf6c2f3f4e3bc62 + started: true + lastState: {} + containerID: containerd://e59a6dfc0b03b062236f23ad622b1baf8e405f30cabf841cb8c15846ed52368a + restartCount: 0 diff --git a/pkg/health/testdata/Kubernetes/Pod/oomkilled-old.yaml b/pkg/health/testdata/Kubernetes/Pod/oomkilled-old.yaml new file mode 100644 index 0000000..fcde26b --- /dev/null +++ b/pkg/health/testdata/Kubernetes/Pod/oomkilled-old.yaml @@ -0,0 +1,113 @@ +apiVersion: v1 +kind: Pod +metadata: + uid: def4df27-ade6-4ba1-b2dc-57d07e12bfe6 + name: oomkilled-pod + namespace: mission-control + annotations: + expected-status: Running + expected-health: healthy + creationTimestamp: 2024-11-20T06:57:31Z +spec: + volumes: + - name: kube-api-access-c5fxw + projected: + sources: + - serviceAccountToken: + path: token + expirationSeconds: 3607 + - configMap: + name: kube-root-ca.crt + items: + - key: ca.crt + path: ca.crt + - downwardAPI: + items: + - path: namespace + fieldRef: + fieldPath: metadata.namespace + apiVersion: v1 + defaultMode: 420 + nodeName: gke-hub-cluster-private-pool-containe-b918c0a5-x9j3 + priority: 0 + dnsPolicy: ClusterFirst + containers: + - args: + - --vm + - "1" + - --vm-bytes + - 250M + - --vm-hang + - "1" + name: oomkilled + image: polinux/stress + command: + - stress + resources: + limits: + memory: 200Mi + requests: + memory: 100Mi + volumeMounts: + - name: kube-api-access-c5fxw + readOnly: true + mountPath: /var/run/secrets/kubernetes.io/serviceaccount + imagePullPolicy: Always + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + tolerations: + - key: node.kubernetes.io/not-ready + effect: NoExecute + operator: Exists + tolerationSeconds: 300 + - key: node.kubernetes.io/unreachable + effect: NoExecute + operator: Exists + tolerationSeconds: 300 + restartPolicy: Always + schedulerName: default-scheduler + serviceAccount: default + securityContext: {} + preemptionPolicy: PreemptLowerPriority + enableServiceLinks: true + serviceAccountName: default + terminationGracePeriodSeconds: 30 +status: + phase: Running + podIP: 10.192.17.29 + hostIP: 10.192.1.58 + podIPs: + - ip: 10.192.17.29 + hostIPs: + - ip: 10.192.1.58 + qosClass: Burstable + startTime: 2024-11-20T06:57:31Z + conditions: + - type: Initialized + status: "True" + - type: PodReadyToStartContainers + status: "True" + - type: PodScheduled + status: "True" + - type: Ready + reason: ContainersNotReady + status: "True" + message: "containers with unready status: [oomkilled]" + containerStatuses: + - name: oomkilled + image: docker.io/polinux/stress:latest + ready: true + state: + running: + startedAt: "@now-5m" + imageID: docker.io/polinux/stress@sha256:b6144f84f9c15dac80deb48d3a646b55c7043ab1d83ea0a697c09097aaad21aa + started: false + lastState: + terminated: + reason: OOMKilled + exitCode: 1 + startedAt: 2024-11-20T07:18:53Z + finishedAt: "@now-5d" + containerID: containerd://21377c37d28bbac13cb6d41d4bac8de3f3bb57ffdf62523594464198157bb4e9 + containerID: containerd://21377c37d28bbac13cb6d41d4bac8de3f3bb57ffdf62523594464198157bb4e9 + restartCount: 9 diff --git a/pkg/health/testdata/pod-evicted.yaml b/pkg/health/testdata/Kubernetes/Pod/pod-evicted.yaml similarity index 92% rename from pkg/health/testdata/pod-evicted.yaml rename to pkg/health/testdata/Kubernetes/Pod/pod-evicted.yaml index a5bec56..1c10a4e 100644 --- a/pkg/health/testdata/pod-evicted.yaml +++ b/pkg/health/testdata/Kubernetes/Pod/pod-evicted.yaml @@ -5,6 +5,8 @@ metadata: namespace: default annotations: kubernetes.io/eviction: 'true' + expected-status: Evicted + expected-health: warning spec: containers: - name: busybox diff --git a/pkg/health/testdata/pod-failed.yaml b/pkg/health/testdata/Kubernetes/Pod/pod-failed.yaml similarity index 96% rename from pkg/health/testdata/pod-failed.yaml rename to pkg/health/testdata/Kubernetes/Pod/pod-failed.yaml index db9718a..3312edb 100644 --- a/pkg/health/testdata/pod-failed.yaml +++ b/pkg/health/testdata/Kubernetes/Pod/pod-failed.yaml @@ -4,6 +4,9 @@ metadata: creationTimestamp: 2018-12-02T09:17:56Z name: my-pod namespace: argocd + annotations: + expected-status: Failed + expected-health: unhealthy resourceVersion: "151243" selfLink: /api/v1/namespaces/argocd/pods/my-pod uid: 27c0fdf5-f613-11e8-a057-fe5f49266390 diff --git a/pkg/health/testdata/pod-imagepullbackoff.yaml b/pkg/health/testdata/Kubernetes/Pod/pod-imagepullbackoff.yaml similarity index 96% rename from pkg/health/testdata/pod-imagepullbackoff.yaml rename to pkg/health/testdata/Kubernetes/Pod/pod-imagepullbackoff.yaml index 51ef010..ae678de 100644 --- a/pkg/health/testdata/pod-imagepullbackoff.yaml +++ b/pkg/health/testdata/Kubernetes/Pod/pod-imagepullbackoff.yaml @@ -7,6 +7,9 @@ metadata: app: guestbook-ui pod-template-hash: "2279996225" name: guestbook-ui-errimagepullbackoff-66cfffb669-45w2j + annotations: + expected-status: ImagePullBackOff + expected-health: unhealthy namespace: default ownerReferences: - apiVersion: extensions/v1beta1 diff --git a/pkg/health/testdata/pod-succeeded.yaml b/pkg/health/testdata/Kubernetes/Pod/pod-succeeded.yaml similarity index 96% rename from pkg/health/testdata/pod-succeeded.yaml rename to pkg/health/testdata/Kubernetes/Pod/pod-succeeded.yaml index de3dc75..374b5f4 100644 --- a/pkg/health/testdata/pod-succeeded.yaml +++ b/pkg/health/testdata/Kubernetes/Pod/pod-succeeded.yaml @@ -6,6 +6,9 @@ metadata: namespace: argocd resourceVersion: "151066" selfLink: /api/v1/namespaces/argocd/pods/my-pod + annotations: + expected-status: Completed + expected-health: healthy uid: c86e909c-f612-11e8-a057-fe5f49266390 spec: containers: diff --git a/pkg/health/testdata/Kubernetes/Pod/pod-terminated.yaml b/pkg/health/testdata/Kubernetes/Pod/pod-terminated.yaml new file mode 100644 index 0000000..4f6c30b --- /dev/null +++ b/pkg/health/testdata/Kubernetes/Pod/pod-terminated.yaml @@ -0,0 +1,133 @@ +apiVersion: v1 +kind: Pod +metadata: + uid: b18e39d3-1301-4ac9-afb9-da3295261aa0 + name: config-test-q9kfv + labels: {} + namespace: flux-system + annotations: + expected-status: Terminating + expected-health: unknown + finalizers: + - batch.kubernetes.io/job-tracking + generateName: config-test- + ownerReferences: + - uid: c9f2c95e-3564-4631-959c-921ac410c030 + kind: Job + name: config-test + apiVersion: batch/v1 + controller: true + blockOwnerDeletion: true + creationTimestamp: 2024-11-14T12:40:17Z +spec: + volumes: + - name: kube-api-access-7hdzn + projected: + sources: + - serviceAccountToken: + path: token + expirationSeconds: 3607 + - configMap: + name: kube-root-ca.crt + items: + - key: ca.crt + path: ca.crt + - downwardAPI: + items: + - path: namespace + fieldRef: + fieldPath: metadata.namespace + apiVersion: v1 + defaultMode: 420 + nodeName: gke-hub-cluster-private-pool-containe-bf9b9895-9gpx + priority: 0 + dnsPolicy: ClusterFirst + containers: + - name: kubeconfig-updater + image: flanksource/base-image:latest + command: + - /bin/bash + - -c + - > + while read -r NAME NAMESPACE; do + CLUSTER=$(kubectl get containercluster $NAME -n $NAMESPACE -o yaml) + LOCATION=$(echo "$CLUSTER" | yq '.spec.location') + PROJECT=$(echo "$CLUSTER" | yq '.metadata.annotations."cnrm.cloud.google.com/project-id"') + export KUBECONFIG="$NAME-$LOCATION-$PROJECT" + export TOKEN=$(gcloud auth print-access-token) + gcloud container clusters get-credentials $NAME --location $LOCATION --project $PROJECT + yq -i '.users[].user.token = strenv(TOKEN) | del(.users[].user.exec)' $KUBECONFIG + kubectl create secret generic $NAME-kubeconfig -n $NAMESPACE --from-file=kubeconfig=$KUBECONFIG --dry-run=client -o yaml | kubectl apply -f - + done < <(kubectl get containercluster -A -o custom-columns=NAME:.metadata.name,NAMESPACE:.metadata.namespace | grep -v NAME) + resources: {} + volumeMounts: + - name: kube-api-access-7hdzn + readOnly: true + mountPath: /var/run/secrets/kubernetes.io/serviceaccount + imagePullPolicy: Always + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + tolerations: + - key: node.kubernetes.io/not-ready + effect: NoExecute + operator: Exists + tolerationSeconds: 300 + - key: node.kubernetes.io/unreachable + effect: NoExecute + operator: Exists + tolerationSeconds: 300 + restartPolicy: OnFailure + schedulerName: default-scheduler + serviceAccount: kustomize-controller + securityContext: {} + preemptionPolicy: PreemptLowerPriority + enableServiceLinks: true + serviceAccountName: kustomize-controller + terminationGracePeriodSeconds: 30 +status: + phase: Running + podIP: 10.1.115.11 + hostIP: 10.1.238.8 + podIPs: + - ip: 10.1.115.11 + hostIPs: + - ip: 10.1.238.8 + qosClass: BestEffort + startTime: 2024-11-14T12:40:17Z + conditions: + - type: ContainersReady + reason: ContainersNotReady + status: "False" + message: "containers with unready status: [kubeconfig-updater]" + - type: Initialized + status: "True" + - type: PodReadyToStartContainers + status: "True" + - type: PodScheduled + status: "True" + - type: Ready + reason: ContainersNotReady + status: "False" + message: "containers with unready status: [kubeconfig-updater]" + containerStatuses: + - name: kubeconfig-updater + image: docker.io/flanksource/base-image:latest + ready: false + state: + terminated: + reason: Error + exitCode: 1 + startedAt: 2024-11-14T12:40:22Z + finishedAt: 2024-11-14T12:40:24Z + containerID: containerd://3743fce5828cad78b261d52d2b5c27bfb4436a2ce55f454962fc5669d1c0dff1 + imageID: docker.io/flanksource/base-image@sha256:8d3fe5816e10e0eb0e74ef30dbbc66d54402dcbdab80b72c7461811a05825dbc + started: false + lastState: + terminated: + reason: Error + exitCode: 1 + startedAt: 2024-11-14T12:40:18Z + finishedAt: 2024-11-14T12:40:21Z + containerID: containerd://b4c3b97a5495e10d80202c1879b5aee7d6720c13dde573163f832a3231d35886 + containerID: containerd://3743fce5828cad78b261d52d2b5c27bfb4436a2ce55f454962fc5669d1c0dff1 + restartCount: 1 diff --git a/pkg/health/testdata/Kubernetes/Pod/unhealthy.yaml b/pkg/health/testdata/Kubernetes/Pod/unhealthy.yaml new file mode 100644 index 0000000..1454295 --- /dev/null +++ b/pkg/health/testdata/Kubernetes/Pod/unhealthy.yaml @@ -0,0 +1,118 @@ +apiVersion: v1 +kind: Pod +metadata: + uid: def4df27-ade6-4ba1-b2dc-57d07e12bfe6 + name: oomkilled-pod + namespace: mission-control + annotations: + expected-status: OOMKilled + creationTimestamp: 2024-11-20T06:57:31Z +spec: + volumes: + - name: kube-api-access-c5fxw + projected: + sources: + - serviceAccountToken: + path: token + expirationSeconds: 3607 + - configMap: + name: kube-root-ca.crt + items: + - key: ca.crt + path: ca.crt + - downwardAPI: + items: + - path: namespace + fieldRef: + fieldPath: metadata.namespace + apiVersion: v1 + defaultMode: 420 + nodeName: gke-hub-cluster-private-pool-containe-b918c0a5-x9j3 + priority: 0 + dnsPolicy: ClusterFirst + containers: + - args: + - --vm + - "1" + - --vm-bytes + - 250M + - --vm-hang + - "1" + name: oomkilled + image: polinux/stress + command: + - stress + resources: + limits: + memory: 200Mi + requests: + memory: 100Mi + volumeMounts: + - name: kube-api-access-c5fxw + readOnly: true + mountPath: /var/run/secrets/kubernetes.io/serviceaccount + imagePullPolicy: Always + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + tolerations: + - key: node.kubernetes.io/not-ready + effect: NoExecute + operator: Exists + tolerationSeconds: 300 + - key: node.kubernetes.io/unreachable + effect: NoExecute + operator: Exists + tolerationSeconds: 300 + restartPolicy: Always + schedulerName: default-scheduler + serviceAccount: default + securityContext: {} + preemptionPolicy: PreemptLowerPriority + enableServiceLinks: true + serviceAccountName: default + terminationGracePeriodSeconds: 30 +status: + phase: Running + podIP: 10.192.17.29 + hostIP: 10.192.1.58 + podIPs: + - ip: 10.192.17.29 + hostIPs: + - ip: 10.192.1.58 + qosClass: Burstable + startTime: 2024-11-20T06:57:31Z + conditions: + - type: ContainersReady + reason: ContainersNotReady + status: "False" + message: "containers with unready status: [oomkilled]" + - type: Initialized + status: "True" + - type: PodReadyToStartContainers + status: "True" + - type: PodScheduled + status: "True" + - type: Ready + reason: ContainersNotReady + status: "False" + message: "containers with unready status: [oomkilled]" + containerStatuses: + - name: oomkilled + image: docker.io/polinux/stress:latest + ready: false + state: + waiting: + reason: CrashLoopBackOff + message: back-off 5m0s restarting failed container=oomkilled + pod=oomkilled-pod_mission-control(def4df27-ade6-4ba1-b2dc-57d07e12bfe6) + imageID: docker.io/polinux/stress@sha256:b6144f84f9c15dac80deb48d3a646b55c7043ab1d83ea0a697c09097aaad21aa + started: false + lastState: + terminated: + reason: OOMKilled + exitCode: 1 + startedAt: 2024-11-20T07:18:53Z + finishedAt: "@now-5m" + containerID: containerd://21377c37d28bbac13cb6d41d4bac8de3f3bb57ffdf62523594464198157bb4e9 + containerID: containerd://21377c37d28bbac13cb6d41d4bac8de3f3bb57ffdf62523594464198157bb4e9 + restartCount: 9 diff --git a/pkg/health/testdata/Kubernetes/Pod/warning.yaml b/pkg/health/testdata/Kubernetes/Pod/warning.yaml new file mode 100644 index 0000000..1b45b7e --- /dev/null +++ b/pkg/health/testdata/Kubernetes/Pod/warning.yaml @@ -0,0 +1,117 @@ +apiVersion: v1 +kind: Pod +metadata: + uid: def4df27-ade6-4ba1-b2dc-57d07e12bfe6 + name: oomkilled-pod + namespace: mission-control + annotations: + expected-status: OOMKilled + expected-health: warning + creationTimestamp: 2024-11-20T06:57:31Z +spec: + volumes: + - name: kube-api-access-c5fxw + projected: + sources: + - serviceAccountToken: + path: token + expirationSeconds: 3607 + - configMap: + name: kube-root-ca.crt + items: + - key: ca.crt + path: ca.crt + - downwardAPI: + items: + - path: namespace + fieldRef: + fieldPath: metadata.namespace + apiVersion: v1 + defaultMode: 420 + nodeName: gke-hub-cluster-private-pool-containe-b918c0a5-x9j3 + priority: 0 + dnsPolicy: ClusterFirst + containers: + - args: + - --vm + - "1" + - --vm-bytes + - 250M + - --vm-hang + - "1" + name: oomkilled + image: polinux/stress + command: + - stress + resources: + limits: + memory: 200Mi + requests: + memory: 100Mi + volumeMounts: + - name: kube-api-access-c5fxw + readOnly: true + mountPath: /var/run/secrets/kubernetes.io/serviceaccount + imagePullPolicy: Always + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + tolerations: + - key: node.kubernetes.io/not-ready + effect: NoExecute + operator: Exists + tolerationSeconds: 300 + - key: node.kubernetes.io/unreachable + effect: NoExecute + operator: Exists + tolerationSeconds: 300 + restartPolicy: Always + schedulerName: default-scheduler + serviceAccount: default + securityContext: {} + preemptionPolicy: PreemptLowerPriority + enableServiceLinks: true + serviceAccountName: default + terminationGracePeriodSeconds: 30 +status: + phase: Running + podIP: 10.192.17.29 + hostIP: 10.192.1.58 + podIPs: + - ip: 10.192.17.29 + hostIPs: + - ip: 10.192.1.58 + qosClass: Burstable + startTime: 2024-11-20T06:57:31Z + conditions: + - type: ContainersReady + reason: ContainersNotReady + status: "False" + message: "containers with unready status: [oomkilled]" + - type: Initialized + status: "True" + - type: PodReadyToStartContainers + status: "True" + - type: PodScheduled + status: "True" + - type: Ready + reason: ContainersNotReady + status: "False" + message: "containers with unready status: [oomkilled]" + containerStatuses: + - name: oomkilled + image: docker.io/polinux/stress:latest + ready: false + state: + running: + startedAt: "@now-5m" + imageID: docker.io/polinux/stress@sha256:b6144f84f9c15dac80deb48d3a646b55c7043ab1d83ea0a697c09097aaad21aa + started: false + lastState: + terminated: + reason: OOMKilled + exitCode: 1 + startedAt: 2024-11-20T07:18:53Z + finishedAt: "@now-2h" + containerID: containerd://21377c37d28bbac13cb6d41d4bac8de3f3bb57ffdf62523594464198157bb4e9 + containerID: containerd://21377c37d28bbac13cb6d41d4bac8de3f3bb57ffdf62523594464198157bb4e9 + restartCount: 9 diff --git a/pkg/health/testdata/pod-terminating.yaml b/pkg/health/testdata/pod-terminating.yaml deleted file mode 100644 index 807138c..0000000 --- a/pkg/health/testdata/pod-terminating.yaml +++ /dev/null @@ -1,111 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - annotations: - kubectl.kubernetes.io/last-applied-configuration: | - {"apiVersion":"v1","kind":"Pod","metadata":{"annotations":{},"finalizers":["example.com/test-finalizer"],"name":"test-pod","namespace":"default"},"spec":{"containers":[{"command":["sh","-c","while true; do echo hello; sleep 10;done"],"image":"busybox","name":"test-container"}]}} - creationTimestamp: "2024-07-01T05:51:36Z" - deletionGracePeriodSeconds: 0 - deletionTimestamp: "2024-07-01T06:52:22Z" - finalizers: - - example.com/test-finalizer - name: test-pod - namespace: default - resourceVersion: "58029548" - uid: 4bb10d70-5481-41e9-bf05-43b740bf6ffa -spec: - containers: - - command: - - sh - - -c - - while true; do echo hello; sleep 10;done - image: busybox - imagePullPolicy: Always - name: test-container - resources: {} - terminationMessagePath: /dev/termination-log - terminationMessagePolicy: File - volumeMounts: - - mountPath: /var/run/secrets/kubernetes.io/serviceaccount - name: kube-api-access-784np - readOnly: true - dnsPolicy: ClusterFirst - enableServiceLinks: true - nodeName: esr - preemptionPolicy: PreemptLowerPriority - priority: 0 - restartPolicy: Always - schedulerName: default-scheduler - securityContext: {} - serviceAccount: default - serviceAccountName: default - terminationGracePeriodSeconds: 30 - tolerations: - - effect: NoExecute - key: node.kubernetes.io/not-ready - operator: Exists - tolerationSeconds: 300 - - effect: NoExecute - key: node.kubernetes.io/unreachable - operator: Exists - tolerationSeconds: 300 - volumes: - - name: kube-api-access-784np - projected: - defaultMode: 420 - sources: - - serviceAccountToken: - expirationSeconds: 3607 - path: token - - configMap: - items: - - key: ca.crt - path: ca.crt - name: kube-root-ca.crt - - downwardAPI: - items: - - fieldRef: - apiVersion: v1 - fieldPath: metadata.namespace - path: namespace -status: - conditions: - - lastTransitionTime: "2024-07-01T08:51:36Z" - status: "True" - type: Initialized - - lastTransitionTime: "2024-07-01T08:52:53Z" - message: 'containers with unready status: [test-container]' - reason: ContainersNotReady - status: "False" - type: Ready - - lastTransitionTime: "2024-07-01T08:52:53Z" - message: 'containers with unready status: [test-container]' - reason: ContainersNotReady - status: "False" - type: ContainersReady - - lastTransitionTime: "2024-07-01T08:51:36Z" - status: "True" - type: PodScheduled - containerStatuses: - - containerID: containerd://06962418f541510abda8a61803dd03cd27cc1b309402006420d8a5e8069569ce - image: docker.io/library/busybox:latest - imageID: docker.io/library/busybox@sha256:9ae97d36d26566ff84e8893c64a6dc4fe8ca6d1144bf5b87b2b85a32def253c7 - lastState: {} - name: test-container - ready: false - restartCount: 0 - started: false - state: - terminated: - containerID: containerd://06962418f541510abda8a61803dd03cd27cc1b309402006420d8a5e8069569ce - exitCode: 137 - finishedAt: "2024-07-01T08:52:52Z" - reason: Error - startedAt: "2024-07-01T08:52:10Z" - hostIP: 10.99.99.9 - phase: Running - podIP: 10.42.1.123 - podIPs: - - ip: 10.42.1.123 - qosClass: BestEffort - startTime: "2024-07-01T08:51:36Z" diff --git a/pkg/health/utils.go b/pkg/health/utils.go index 71b2eb2..ec7dcd0 100644 --- a/pkg/health/utils.go +++ b/pkg/health/utils.go @@ -44,6 +44,25 @@ type HealthStatus struct { order int `json:"-" yaml:"-"` } +func (hs HealthStatus) String() string { + return fmt.Sprintf("%s (%s): %s", hs.Status, hs.Health, hs.Message) +} + +func (hs HealthStatus) Merge(others ...*HealthStatus) HealthStatus { + for _, other := range others { + if other == nil { + continue + } + hs = HealthStatus{ + Ready: hs.Ready && other.Ready, + Health: hs.Health.Worst(other.Health), + Status: HealthStatusCode(lo.CoalesceOrEmpty(string(hs.Status), string(other.Status))), + Message: strings.Join(lo.Compact([]string{hs.Message, other.Message}), ", "), + } + } + return hs +} + func (hs *HealthStatus) AppendMessage(msg string, args ...interface{}) { if msg == "" { return