diff --git a/pkg/health/health.go b/pkg/health/health.go index 7c4326e..e0cc658 100644 --- a/pkg/health/health.go +++ b/pkg/health/health.go @@ -106,6 +106,7 @@ const ( HealthStatusRestart HealthStatusCode = "Restarting" HealthStatusStarting HealthStatusCode = "Starting" HealthStatusFailed HealthStatusCode = "Failed" + HealthStatusFailedCreate HealthStatusCode = "Failed Create" HealthStatusUnschedulable HealthStatusCode = "Unschedulable" HealthStatusUpgradeFailed HealthStatusCode = "UpgradeFailed" HealthStatusOOMKilled HealthStatusCode = "OOMKilled" diff --git a/pkg/health/health_deployment.go b/pkg/health/health_deployment.go index 69cae8f..b007104 100644 --- a/pkg/health/health_deployment.go +++ b/pkg/health/health_deployment.go @@ -54,12 +54,28 @@ func getReplicaHealth(s ReplicaStatus) *HealthStatus { gs := GetGenericStatus(s.Object) + available := gs.FindCondition("Available") + isAvailable := s.Ready > 0 + if available.Status != "" { + isAvailable = available.Status == "True" + } + progressing := gs.FindCondition("Progressing") + + failure := gs.FindCondition("ReplicaFailure") + if failure.Status == "True" { + hs.Status = HealthStatusFailedCreate + hs.Health = HealthUnhealthy + hs.Message = failure.Message + hs.Ready = true + return hs + } + isStarting := age < startDeadline isProgressDeadlineExceeded := !isStarting && (progressing.Reason == "ProgressDeadlineExceeded") - hs.Ready = progressing.Status == "True" + hs.Ready = progressing.Status == "True" && progressing.Reason != "ReplicaSetUpdated" - hs.Health = lo.Ternary(s.Ready >= s.Desired, HealthHealthy, lo.Ternary(s.Ready > 0, HealthWarning, HealthUnhealthy)) + hs.Health = lo.Ternary(isAvailable, HealthHealthy, lo.Ternary(s.Ready > 0, HealthWarning, HealthUnhealthy)) if s.Desired == 0 && s.Replicas == 0 { hs.Ready = true @@ -75,26 +91,32 @@ func getReplicaHealth(s ReplicaStatus) *HealthStatus { hs.Status = "Pending" hs.Health = HealthUnknown } - } else if s.Ready == 0 && isStarting && !isProgressDeadlineExceeded { - hs.Health = HealthUnknown + } else if s.Ready == 0 && isStarting { hs.Status = HealthStatusStarting - } else if s.Ready == 0 && !isStarting { - hs.Health = HealthUnhealthy - hs.Status = HealthStatusCrashLoopBackoff + } else if s.Ready == 0 { + if isProgressDeadlineExceeded { + hs.Status = HealthStatusCrashLoopBackoff + } else if isAvailable { + hs.Status = HealthStatusUpdating + } + } + + if isProgressDeadlineExceeded { + hs.Status = HealthStatusRolloutFailed + hs.Health = hs.Health.Worst(HealthWarning) } else if s.Desired == 0 && s.Replicas > 0 { hs.Status = HealthStatusScalingDown - hs.Health = lo.Ternary(isProgressDeadlineExceeded, HealthWarning, HealthHealthy) } else if s.Ready == s.Desired && s.Desired == s.Updated && s.Replicas == s.Desired { hs.Status = HealthStatusRunning - } else if s.Desired != s.Updated { - hs.Status = HealthStatusUpdating + } else if !isStarting && s.Desired != s.Updated { + hs.Status = HealthStatusRollingOut } else if s.Replicas > s.Desired { hs.Status = HealthStatusScalingDown } else if s.Replicas < s.Desired { hs.Status = HealthStatusScalingUp } - if isStarting && hs.Health == HealthUnhealthy { + if isStarting && (hs.Health == HealthUnhealthy || hs.Health == HealthWarning) { hs.Health = HealthUnknown } diff --git a/pkg/health/health_test.go b/pkg/health/health_test.go index 795d564..2075c3c 100644 --- a/pkg/health/health_test.go +++ b/pkg/health/health_test.go @@ -329,19 +329,12 @@ func TestDeploymentHealth(t *testing.T) { assertAppHealthMsg( t, "./deployment-rollout-failed.yaml", - health.HealthStatusUpdating, - health.HealthWarning, - true, - "1/2 ready, 1 updating", - ) - assertAppHealthMsg( - t, - "./testdata/deployment-progressing.yaml", - health.HealthStatusUpdating, + health.HealthStatusRollingOut, health.HealthWarning, true, "1/2 ready, 1 updating", ) + assertAppHealthMsg( t, "./testdata/deployment-suspended.yaml", @@ -353,20 +346,12 @@ func TestDeploymentHealth(t *testing.T) { assertAppHealthMsg( t, "./testdata/deployment-degraded.yaml", - health.HealthStatusUpdating, + health.HealthStatusRollingOut, health.HealthWarning, true, "1/2 ready, 1 updating", ) - assertAppHealthMsg( - t, - "./testdata/deployment-starting.yaml", - health.HealthStatusStarting, - health.HealthUnknown, - true, - "0/2 ready, 1 updating", - ) assertAppHealthMsg( t, "./testdata/deployment-scaling-down.yaml", @@ -375,14 +360,7 @@ func TestDeploymentHealth(t *testing.T) { true, "1/1 ready, 1 updating, 1 terminating", ) - assertAppHealthMsg( - t, - "./testdata/deployment-failed.yaml", - "Failed Create", - health.HealthUnhealthy, - false, - "0/1 ready", - ) + } func TestStatefulSetHealth(t *testing.T) { diff --git a/pkg/health/testdata/deployment-failed.yaml b/pkg/health/testdata/Kubernetes/Deployment/deployment-failed.yaml similarity index 88% rename from pkg/health/testdata/deployment-failed.yaml rename to pkg/health/testdata/Kubernetes/Deployment/deployment-failed.yaml index 6136a69..45ef505 100644 --- a/pkg/health/testdata/deployment-failed.yaml +++ b/pkg/health/testdata/Kubernetes/Deployment/deployment-failed.yaml @@ -7,6 +7,11 @@ metadata: control-plane: karina-operator namespace: platform-system annotations: + expected-status: Failed Create + expected-ready: "true" + expected-health: "unhealthy" + expected-message: 'pods "karina-c7585bd87-" is forbidden: error looking up service + account platform-system/karina: serviceaccount "karina" not found' deployment.kubernetes.io/revision: "1" creationTimestamp: 2023-05-10T08:11:03Z spec: diff --git a/pkg/health/testdata/deployment-progressing.yaml b/pkg/health/testdata/Kubernetes/Deployment/deployment-progressing.yaml similarity index 75% rename from pkg/health/testdata/deployment-progressing.yaml rename to pkg/health/testdata/Kubernetes/Deployment/deployment-progressing.yaml index cf61d45..a6ba5cc 100644 --- a/pkg/health/testdata/deployment-progressing.yaml +++ b/pkg/health/testdata/Kubernetes/Deployment/deployment-progressing.yaml @@ -2,9 +2,11 @@ apiVersion: apps/v1 kind: Deployment metadata: annotations: - deployment.kubernetes.io/revision: "4" - kubectl.kubernetes.io/last-applied-configuration: | - {"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"labels":{"app.kubernetes.io/instance":"guestbook-default"},"name":"guestbook-ui","namespace":"default"},"spec":{"replicas":1,"selector":{"matchLabels":{"app":"guestbook-ui"}},"template":{"metadata":{"labels":{"app":"guestbook-ui","app.kubernetes.io/instance":"guestbook-default"}},"spec":{"containers":[{"image":"gcr.io/heptio-images/ks-guestbook-demo:0.3","name":"guestbook-ui","ports":[{"containerPort":80}]}]}}}} + expected-status: Rolling Out + expected-health: healthy + expected-message: "1/2 ready, 1 updating" + expected-replicas: "2" + expted-ready: "false" creationTimestamp: 2018-07-18T04:40:44Z generation: 4 labels: diff --git a/pkg/health/testdata/deployment-rollout-failed-unhealthy.yaml b/pkg/health/testdata/Kubernetes/Deployment/deployment-rollout-failed-unhealthy.yaml similarity index 94% rename from pkg/health/testdata/deployment-rollout-failed-unhealthy.yaml rename to pkg/health/testdata/Kubernetes/Deployment/deployment-rollout-failed-unhealthy.yaml index 512ee39..cbdab15 100644 --- a/pkg/health/testdata/deployment-rollout-failed-unhealthy.yaml +++ b/pkg/health/testdata/Kubernetes/Deployment/deployment-rollout-failed-unhealthy.yaml @@ -2,6 +2,9 @@ apiVersion: apps/v1 kind: Deployment metadata: name: guestbook-ui + annotations: + expected-health: warning + expected-status: Rollout Failed spec: progressDeadlineSeconds: 600 replicas: 2 diff --git a/pkg/health/testdata/deployment-starting.yaml b/pkg/health/testdata/Kubernetes/Deployment/deployment-starting.yaml similarity index 95% rename from pkg/health/testdata/deployment-starting.yaml rename to pkg/health/testdata/Kubernetes/Deployment/deployment-starting.yaml index eec0a6d..a54a603 100644 --- a/pkg/health/testdata/deployment-starting.yaml +++ b/pkg/health/testdata/Kubernetes/Deployment/deployment-starting.yaml @@ -5,6 +5,9 @@ metadata: labels: app.kubernetes.io/instance: guestbook-default name: guestbook-ui + annotations: + expected-status: Starting + expected-health: unknown namespace: default spec: progressDeadlineSeconds: 600 diff --git a/pkg/health/testdata/Kubernetes/Deployment/restart.yaml b/pkg/health/testdata/Kubernetes/Deployment/restart.yaml new file mode 100644 index 0000000..0fa5a1e --- /dev/null +++ b/pkg/health/testdata/Kubernetes/Deployment/restart.yaml @@ -0,0 +1,124 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + uid: 8d347012-945e-4e95-9b75-66aad2c923be + name: podinfo2 + labels: + helm.sh/chart: podinfo-6.7.1 + app.kubernetes.io/name: podinfo2 + app.kubernetes.io/version: 6.7.1 + helm.toolkit.fluxcd.io/name: podinfo2 + app.kubernetes.io/managed-by: Helm + helm.toolkit.fluxcd.io/namespace: flux-092532 + namespace: flux-092532 + annotations: + expected-status: Updating + expected-ready: "false" + expected-health: "healthy" + + meta.helm.sh/release-name: podinfo2 + meta.helm.sh/release-namespace: flux-092532 + deployment.kubernetes.io/revision: "2" + creationTimestamp: 2024-11-03T19:53:13Z +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: podinfo2 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 25% + maxUnavailable: 1 + template: + spec: + volumes: + - name: data + emptyDir: {} + dnsPolicy: ClusterFirst + containers: + - env: + - name: PODINFO_UI_COLOR + value: "#34577c" + name: podinfo + image: ghcr.io/stefanprodan/podinfo:6.7.1 + ports: + - name: http + protocol: TCP + containerPort: 9898 + - name: http-metrics + protocol: TCP + containerPort: 9797 + - name: grpc + protocol: TCP + containerPort: 9999 + command: + - ./podinfo + - --port=9898 + - --cert-path=/data/cert + - --port-metrics=9797 + - --grpc-port=9999 + - --grpc-service-name=podinfo + - --level=info + - --random-delay=false + - --random-error=false + resources: + requests: + cpu: 1m + memory: 16Mi + volumeMounts: + - name: data + mountPath: /data + livenessProbe: + exec: + command: + - podcli + - check + - http + - localhost:9898/healthz + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + successThreshold: 1 + initialDelaySeconds: 1 + readinessProbe: + exec: + command: + - podcli + - check + - http + - localhost:9898/readyz + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + successThreshold: 1 + initialDelaySeconds: 1 + imagePullPolicy: IfNotPresent + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + metadata: + labels: + app.kubernetes.io/name: podinfo2 + annotations: + prometheus.io/port: "9898" + prometheus.io/scrape: "true" + kubectl.kubernetes.io/restartedAt: 2024-12-01T20:55:04Z + revisionHistoryLimit: 10 + progressDeadlineSeconds: 600 +status: + replicas: 1 + conditions: + - type: Available + reason: MinimumReplicasAvailable + status: "True" + message: Deployment has minimum availability. + - type: Progressing + reason: ReplicaSetUpdated + status: "True" + message: ReplicaSet "podinfo2-8c499d45b" is progressing. + updatedReplicas: 1 + unavailableReplicas: 1 diff --git a/pkg/health/testdata/deployment-degraded.yaml b/pkg/health/testdata/deployment-degraded.yaml index 7f49967..8eaf556 100644 --- a/pkg/health/testdata/deployment-degraded.yaml +++ b/pkg/health/testdata/deployment-degraded.yaml @@ -2,9 +2,6 @@ apiVersion: apps/v1 kind: Deployment metadata: annotations: - deployment.kubernetes.io/revision: "4" - kubectl.kubernetes.io/last-applied-configuration: | - {"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"labels":{"app.kubernetes.io/instance":"guestbook-default"},"name":"guestbook-ui","namespace":"default"},"spec":{"replicas":1,"selector":{"matchLabels":{"app":"guestbook-ui"}},"template":{"metadata":{"labels":{"app":"guestbook-ui","app.kubernetes.io/instance":"guestbook-default"}},"spec":{"containers":[{"image":"gcr.io/heptio-images/ks-guestbook-demo:0.3","name":"guestbook-ui","ports":[{"containerPort":80}]}]}}}} creationTimestamp: 2018-07-18T04:40:44Z generation: 4 labels: @@ -55,7 +52,7 @@ status: lastUpdateTime: 2018-07-18T04:48:48Z message: Deployment has minimum availability. reason: MinimumReplicasAvailable - status: "True" + status: "false" type: Available - lastTransitionTime: 2018-07-18T06:29:23Z lastUpdateTime: 2018-07-18T06:29:23Z