Skip to content

Commit

Permalink
fix: deployment health fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
moshloop committed Dec 5, 2024
1 parent eaa9bff commit fa7cb08
Show file tree
Hide file tree
Showing 9 changed files with 179 additions and 44 deletions.
1 change: 1 addition & 0 deletions pkg/health/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ const (
HealthStatusRestart HealthStatusCode = "Restarting"
HealthStatusStarting HealthStatusCode = "Starting"
HealthStatusFailed HealthStatusCode = "Failed"
HealthStatusFailedCreate HealthStatusCode = "Failed Create"
HealthStatusUnschedulable HealthStatusCode = "Unschedulable"
HealthStatusUpgradeFailed HealthStatusCode = "UpgradeFailed"
HealthStatusOOMKilled HealthStatusCode = "OOMKilled"
Expand Down
44 changes: 33 additions & 11 deletions pkg/health/health_deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,28 @@ func getReplicaHealth(s ReplicaStatus) *HealthStatus {

gs := GetGenericStatus(s.Object)

available := gs.FindCondition("Available")
isAvailable := s.Ready > 0
if available.Status != "" {
isAvailable = available.Status == "True"
}

progressing := gs.FindCondition("Progressing")

failure := gs.FindCondition("ReplicaFailure")
if failure.Status == "True" {
hs.Status = HealthStatusFailedCreate
hs.Health = HealthUnhealthy
hs.Message = failure.Message
hs.Ready = true
return hs
}

isStarting := age < startDeadline
isProgressDeadlineExceeded := !isStarting && (progressing.Reason == "ProgressDeadlineExceeded")
hs.Ready = progressing.Status == "True"
hs.Ready = progressing.Status == "True" && progressing.Reason != "ReplicaSetUpdated"

hs.Health = lo.Ternary(s.Ready >= s.Desired, HealthHealthy, lo.Ternary(s.Ready > 0, HealthWarning, HealthUnhealthy))
hs.Health = lo.Ternary(isAvailable, HealthHealthy, lo.Ternary(s.Ready > 0, HealthWarning, HealthUnhealthy))

if s.Desired == 0 && s.Replicas == 0 {
hs.Ready = true
Expand All @@ -75,26 +91,32 @@ func getReplicaHealth(s ReplicaStatus) *HealthStatus {
hs.Status = "Pending"
hs.Health = HealthUnknown
}
} else if s.Ready == 0 && isStarting && !isProgressDeadlineExceeded {
hs.Health = HealthUnknown
} else if s.Ready == 0 && isStarting {
hs.Status = HealthStatusStarting
} else if s.Ready == 0 && !isStarting {
hs.Health = HealthUnhealthy
hs.Status = HealthStatusCrashLoopBackoff
} else if s.Ready == 0 {
if isProgressDeadlineExceeded {
hs.Status = HealthStatusCrashLoopBackoff
} else if isAvailable {
hs.Status = HealthStatusUpdating
}
}

if isProgressDeadlineExceeded {
hs.Status = HealthStatusRolloutFailed
hs.Health = hs.Health.Worst(HealthWarning)
} else if s.Desired == 0 && s.Replicas > 0 {
hs.Status = HealthStatusScalingDown
hs.Health = lo.Ternary(isProgressDeadlineExceeded, HealthWarning, HealthHealthy)
} else if s.Ready == s.Desired && s.Desired == s.Updated && s.Replicas == s.Desired {
hs.Status = HealthStatusRunning
} else if s.Desired != s.Updated {
hs.Status = HealthStatusUpdating
} else if !isStarting && s.Desired != s.Updated {
hs.Status = HealthStatusRollingOut
} else if s.Replicas > s.Desired {
hs.Status = HealthStatusScalingDown
} else if s.Replicas < s.Desired {
hs.Status = HealthStatusScalingUp
}

if isStarting && hs.Health == HealthUnhealthy {
if isStarting && (hs.Health == HealthUnhealthy || hs.Health == HealthWarning) {
hs.Health = HealthUnknown
}

Expand Down
30 changes: 4 additions & 26 deletions pkg/health/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,19 +329,12 @@ func TestDeploymentHealth(t *testing.T) {
assertAppHealthMsg(
t,
"./deployment-rollout-failed.yaml",
health.HealthStatusUpdating,
health.HealthWarning,
true,
"1/2 ready, 1 updating",
)
assertAppHealthMsg(
t,
"./testdata/deployment-progressing.yaml",
health.HealthStatusUpdating,
health.HealthStatusRollingOut,
health.HealthWarning,
true,
"1/2 ready, 1 updating",
)

assertAppHealthMsg(
t,
"./testdata/deployment-suspended.yaml",
Expand All @@ -353,20 +346,12 @@ func TestDeploymentHealth(t *testing.T) {
assertAppHealthMsg(
t,
"./testdata/deployment-degraded.yaml",
health.HealthStatusUpdating,
health.HealthStatusRollingOut,
health.HealthWarning,
true,
"1/2 ready, 1 updating",
)

assertAppHealthMsg(
t,
"./testdata/deployment-starting.yaml",
health.HealthStatusStarting,
health.HealthUnknown,
true,
"0/2 ready, 1 updating",
)
assertAppHealthMsg(
t,
"./testdata/deployment-scaling-down.yaml",
Expand All @@ -375,14 +360,7 @@ func TestDeploymentHealth(t *testing.T) {
true,
"1/1 ready, 1 updating, 1 terminating",
)
assertAppHealthMsg(
t,
"./testdata/deployment-failed.yaml",
"Failed Create",
health.HealthUnhealthy,
false,
"0/1 ready",
)

}

func TestStatefulSetHealth(t *testing.T) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ metadata:
control-plane: karina-operator
namespace: platform-system
annotations:
expected-status: Failed Create
expected-ready: "true"
expected-health: "unhealthy"
expected-message: 'pods "karina-c7585bd87-" is forbidden: error looking up service
account platform-system/karina: serviceaccount "karina" not found'
deployment.kubernetes.io/revision: "1"
creationTimestamp: 2023-05-10T08:11:03Z
spec:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ apiVersion: apps/v1
kind: Deployment
metadata:
annotations:
deployment.kubernetes.io/revision: "4"
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"labels":{"app.kubernetes.io/instance":"guestbook-default"},"name":"guestbook-ui","namespace":"default"},"spec":{"replicas":1,"selector":{"matchLabels":{"app":"guestbook-ui"}},"template":{"metadata":{"labels":{"app":"guestbook-ui","app.kubernetes.io/instance":"guestbook-default"}},"spec":{"containers":[{"image":"gcr.io/heptio-images/ks-guestbook-demo:0.3","name":"guestbook-ui","ports":[{"containerPort":80}]}]}}}}
expected-status: Rolling Out
expected-health: healthy
expected-message: "1/2 ready, 1 updating"
expected-replicas: "2"
expted-ready: "false"
creationTimestamp: 2018-07-18T04:40:44Z
generation: 4
labels:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ apiVersion: apps/v1
kind: Deployment
metadata:
name: guestbook-ui
annotations:
expected-health: warning
expected-status: Rollout Failed
spec:
progressDeadlineSeconds: 600
replicas: 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ metadata:
labels:
app.kubernetes.io/instance: guestbook-default
name: guestbook-ui
annotations:
expected-status: Starting
expected-health: unknown
namespace: default
spec:
progressDeadlineSeconds: 600
Expand Down
124 changes: 124 additions & 0 deletions pkg/health/testdata/Kubernetes/Deployment/restart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
apiVersion: apps/v1
kind: Deployment
metadata:
uid: 8d347012-945e-4e95-9b75-66aad2c923be
name: podinfo2
labels:
helm.sh/chart: podinfo-6.7.1
app.kubernetes.io/name: podinfo2
app.kubernetes.io/version: 6.7.1
helm.toolkit.fluxcd.io/name: podinfo2
app.kubernetes.io/managed-by: Helm
helm.toolkit.fluxcd.io/namespace: flux-092532
namespace: flux-092532
annotations:
expected-status: Updating
expected-ready: "false"
expected-health: "healthy"

meta.helm.sh/release-name: podinfo2
meta.helm.sh/release-namespace: flux-092532
deployment.kubernetes.io/revision: "2"
creationTimestamp: 2024-11-03T19:53:13Z
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: podinfo2
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 25%
maxUnavailable: 1
template:
spec:
volumes:
- name: data
emptyDir: {}
dnsPolicy: ClusterFirst
containers:
- env:
- name: PODINFO_UI_COLOR
value: "#34577c"
name: podinfo
image: ghcr.io/stefanprodan/podinfo:6.7.1
ports:
- name: http
protocol: TCP
containerPort: 9898
- name: http-metrics
protocol: TCP
containerPort: 9797
- name: grpc
protocol: TCP
containerPort: 9999
command:
- ./podinfo
- --port=9898
- --cert-path=/data/cert
- --port-metrics=9797
- --grpc-port=9999
- --grpc-service-name=podinfo
- --level=info
- --random-delay=false
- --random-error=false
resources:
requests:
cpu: 1m
memory: 16Mi
volumeMounts:
- name: data
mountPath: /data
livenessProbe:
exec:
command:
- podcli
- check
- http
- localhost:9898/healthz
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
successThreshold: 1
initialDelaySeconds: 1
readinessProbe:
exec:
command:
- podcli
- check
- http
- localhost:9898/readyz
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
successThreshold: 1
initialDelaySeconds: 1
imagePullPolicy: IfNotPresent
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
terminationGracePeriodSeconds: 30
metadata:
labels:
app.kubernetes.io/name: podinfo2
annotations:
prometheus.io/port: "9898"
prometheus.io/scrape: "true"
kubectl.kubernetes.io/restartedAt: 2024-12-01T20:55:04Z
revisionHistoryLimit: 10
progressDeadlineSeconds: 600
status:
replicas: 1
conditions:
- type: Available
reason: MinimumReplicasAvailable
status: "True"
message: Deployment has minimum availability.
- type: Progressing
reason: ReplicaSetUpdated
status: "True"
message: ReplicaSet "podinfo2-8c499d45b" is progressing.
updatedReplicas: 1
unavailableReplicas: 1
5 changes: 1 addition & 4 deletions pkg/health/testdata/deployment-degraded.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@ apiVersion: apps/v1
kind: Deployment
metadata:
annotations:
deployment.kubernetes.io/revision: "4"
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"labels":{"app.kubernetes.io/instance":"guestbook-default"},"name":"guestbook-ui","namespace":"default"},"spec":{"replicas":1,"selector":{"matchLabels":{"app":"guestbook-ui"}},"template":{"metadata":{"labels":{"app":"guestbook-ui","app.kubernetes.io/instance":"guestbook-default"}},"spec":{"containers":[{"image":"gcr.io/heptio-images/ks-guestbook-demo:0.3","name":"guestbook-ui","ports":[{"containerPort":80}]}]}}}}
creationTimestamp: 2018-07-18T04:40:44Z
generation: 4
labels:
Expand Down Expand Up @@ -55,7 +52,7 @@ status:
lastUpdateTime: 2018-07-18T04:48:48Z
message: Deployment has minimum availability.
reason: MinimumReplicasAvailable
status: "True"
status: "false"
type: Available
- lastTransitionTime: 2018-07-18T06:29:23Z
lastUpdateTime: 2018-07-18T06:29:23Z
Expand Down

0 comments on commit fa7cb08

Please sign in to comment.