-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: pod stuck in "terminating" for > 15m (#61)
* feat: pod stuck in "terminating" for > 15m * chore: on pod delete test set the deletion timestamp to 1m ago * chore: address review comments
- Loading branch information
1 parent
0850096
commit 68bb6ec
Showing
4 changed files
with
159 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,29 +7,47 @@ package health_test | |
import ( | ||
"os" | ||
"testing" | ||
"time" | ||
|
||
"github.com/flanksource/is-healthy/pkg/health" | ||
"github.com/flanksource/is-healthy/pkg/lua" | ||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
v1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" | ||
"sigs.k8s.io/yaml" | ||
) | ||
|
||
func assertAppHealth(t *testing.T, yamlPath string, expectedStatus health.HealthStatusCode, expectedHealth health.Health, expectedReady bool) { | ||
health := getHealthStatus(yamlPath, t) | ||
health := getHealthStatus(yamlPath, t, nil) | ||
assert.NotNil(t, health) | ||
assert.Equal(t, expectedHealth, health.Health) | ||
assert.Equal(t, expectedReady, health.Ready) | ||
assert.Equal(t, expectedStatus, health.Status) | ||
} | ||
|
||
func getHealthStatus(yamlPath string, t *testing.T) *health.HealthStatus { | ||
func assertAppHealthWithOverwrite(t *testing.T, yamlPath string, overwrites map[string]any, expectedStatus health.HealthStatusCode, expectedHealth health.Health, expectedReady bool) { | ||
health := getHealthStatus(yamlPath, t, overwrites) | ||
assert.NotNil(t, health) | ||
assert.Equal(t, expectedHealth, health.Health) | ||
assert.Equal(t, expectedReady, health.Ready) | ||
assert.Equal(t, expectedStatus, health.Status) | ||
} | ||
|
||
func getHealthStatus(yamlPath string, t *testing.T, overwrites map[string]any) *health.HealthStatus { | ||
yamlBytes, err := os.ReadFile(yamlPath) | ||
require.NoError(t, err) | ||
var obj unstructured.Unstructured | ||
err = yaml.Unmarshal(yamlBytes, &obj) | ||
require.NoError(t, err) | ||
|
||
for k, v := range overwrites { | ||
switch k { | ||
case "deletionTimestamp": | ||
obj.SetDeletionTimestamp(v.(*v1.Time)) | ||
} | ||
} | ||
|
||
health, err := health.GetResourceHealth(&obj, lua.ResourceHealthOverrides{}) | ||
require.NoError(t, err) | ||
return health | ||
|
@@ -126,6 +144,10 @@ func TestHPA(t *testing.T) { | |
} | ||
|
||
func TestPod(t *testing.T) { | ||
assertAppHealth(t, "./testdata/pod-terminating.yaml", health.HealthStatusTerminating, health.HealthWarning, false) | ||
status := getHealthStatus("./testdata/pod-terminating.yaml", t, nil) | ||
assert.Contains(t, status.Message, "stuck in 'Terminating' for") | ||
|
||
assertAppHealth(t, "./testdata/pod-pending.yaml", health.HealthStatusPending, health.HealthUnknown, false) | ||
assertAppHealth(t, "./testdata/pod-running-not-ready.yaml", health.HealthStatusStarting, health.HealthUnknown, false) | ||
assertAppHealth(t, "./testdata/pod-crashloop.yaml", health.HealthStatusCrashLoopBackoff, health.HealthUnhealthy, false) | ||
|
@@ -136,8 +158,11 @@ func TestPod(t *testing.T) { | |
assertAppHealth(t, "./testdata/pod-running-restart-onfailure.yaml", health.HealthStatusRunning, health.HealthUnhealthy, false) | ||
assertAppHealth(t, "./testdata/pod-failed.yaml", health.HealthStatusError, health.HealthUnhealthy, true) | ||
assertAppHealth(t, "./testdata/pod-succeeded.yaml", health.HealthStatusCompleted, health.HealthHealthy, true) | ||
assertAppHealth(t, "./testdata/pod-deletion.yaml", health.HealthStatusTerminating, health.HealthUnhealthy, false) | ||
assertAppHealth(t, "./testdata/pod-init-container-fail.yaml", health.HealthStatusCrashLoopBackoff, health.HealthUnhealthy, false) | ||
|
||
assertAppHealthWithOverwrite(t, "./testdata/pod-deletion.yaml", map[string]any{ | ||
"deletionTimestamp": &v1.Time{Time: time.Now().Add(-time.Minute)}, | ||
}, health.HealthStatusTerminating, health.HealthUnknown, false) | ||
} | ||
|
||
// func TestAPIService(t *testing.T) { | ||
|
@@ -206,13 +231,13 @@ func TestFluxResources(t *testing.T) { | |
assertAppHealth(t, "./testdata/flux-kustomization-healthy.yaml", "Succeeded", health.HealthHealthy, true) | ||
assertAppHealth(t, "./testdata/flux-kustomization-unhealthy.yaml", "Progressing", health.HealthUnknown, false) | ||
assertAppHealth(t, "./testdata/flux-kustomization-failed.yaml", "BuildFailed", health.HealthUnhealthy, false) | ||
status := getHealthStatus("./testdata/flux-kustomization-failed.yaml", t) | ||
status := getHealthStatus("./testdata/flux-kustomization-failed.yaml", t, nil) | ||
assert.Contains(t, status.Message, "err='accumulating resources from 'kubernetes_resource_ingress_fail.yaml'") | ||
|
||
assertAppHealth(t, "./testdata/flux-helmrelease-healthy.yaml", "ReconciliationSucceeded", health.HealthHealthy, true) | ||
assertAppHealth(t, "./testdata/flux-helmrelease-unhealthy.yaml", "UpgradeFailed", health.HealthUnhealthy, true) | ||
assertAppHealth(t, "./testdata/flux-helmrelease-upgradefailed.yaml", "UpgradeFailed", health.HealthUnhealthy, true) | ||
helmreleaseStatus := getHealthStatus("./testdata/flux-helmrelease-upgradefailed.yaml", t) | ||
helmreleaseStatus := getHealthStatus("./testdata/flux-helmrelease-upgradefailed.yaml", t, nil) | ||
assert.Contains(t, helmreleaseStatus.Message, "Helm upgrade failed for release mission-control-agent/prod-kubernetes-bundle with chart [email protected]: YAML parse error on mission-control-kubernetes/templates/topology.yaml: error converting YAML to JSON: yaml: line 171: did not find expected '-' indicator") | ||
assert.Equal(t, helmreleaseStatus.Status, health.HealthStatusUpgradeFailed) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
apiVersion: v1 | ||
kind: Pod | ||
metadata: | ||
annotations: | ||
kubectl.kubernetes.io/last-applied-configuration: | | ||
{"apiVersion":"v1","kind":"Pod","metadata":{"annotations":{},"finalizers":["example.com/test-finalizer"],"name":"test-pod","namespace":"default"},"spec":{"containers":[{"command":["sh","-c","while true; do echo hello; sleep 10;done"],"image":"busybox","name":"test-container"}]}} | ||
creationTimestamp: "2024-07-01T05:51:36Z" | ||
deletionGracePeriodSeconds: 0 | ||
deletionTimestamp: "2024-07-01T06:52:22Z" | ||
finalizers: | ||
- example.com/test-finalizer | ||
name: test-pod | ||
namespace: default | ||
resourceVersion: "58029548" | ||
uid: 4bb10d70-5481-41e9-bf05-43b740bf6ffa | ||
spec: | ||
containers: | ||
- command: | ||
- sh | ||
- -c | ||
- while true; do echo hello; sleep 10;done | ||
image: busybox | ||
imagePullPolicy: Always | ||
name: test-container | ||
resources: {} | ||
terminationMessagePath: /dev/termination-log | ||
terminationMessagePolicy: File | ||
volumeMounts: | ||
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount | ||
name: kube-api-access-784np | ||
readOnly: true | ||
dnsPolicy: ClusterFirst | ||
enableServiceLinks: true | ||
nodeName: esr | ||
preemptionPolicy: PreemptLowerPriority | ||
priority: 0 | ||
restartPolicy: Always | ||
schedulerName: default-scheduler | ||
securityContext: {} | ||
serviceAccount: default | ||
serviceAccountName: default | ||
terminationGracePeriodSeconds: 30 | ||
tolerations: | ||
- effect: NoExecute | ||
key: node.kubernetes.io/not-ready | ||
operator: Exists | ||
tolerationSeconds: 300 | ||
- effect: NoExecute | ||
key: node.kubernetes.io/unreachable | ||
operator: Exists | ||
tolerationSeconds: 300 | ||
volumes: | ||
- name: kube-api-access-784np | ||
projected: | ||
defaultMode: 420 | ||
sources: | ||
- serviceAccountToken: | ||
expirationSeconds: 3607 | ||
path: token | ||
- configMap: | ||
items: | ||
- key: ca.crt | ||
path: ca.crt | ||
name: kube-root-ca.crt | ||
- downwardAPI: | ||
items: | ||
- fieldRef: | ||
apiVersion: v1 | ||
fieldPath: metadata.namespace | ||
path: namespace | ||
status: | ||
conditions: | ||
- lastTransitionTime: "2024-07-01T08:51:36Z" | ||
status: "True" | ||
type: Initialized | ||
- lastTransitionTime: "2024-07-01T08:52:53Z" | ||
message: 'containers with unready status: [test-container]' | ||
reason: ContainersNotReady | ||
status: "False" | ||
type: Ready | ||
- lastTransitionTime: "2024-07-01T08:52:53Z" | ||
message: 'containers with unready status: [test-container]' | ||
reason: ContainersNotReady | ||
status: "False" | ||
type: ContainersReady | ||
- lastTransitionTime: "2024-07-01T08:51:36Z" | ||
status: "True" | ||
type: PodScheduled | ||
containerStatuses: | ||
- containerID: containerd://06962418f541510abda8a61803dd03cd27cc1b309402006420d8a5e8069569ce | ||
image: docker.io/library/busybox:latest | ||
imageID: docker.io/library/busybox@sha256:9ae97d36d26566ff84e8893c64a6dc4fe8ca6d1144bf5b87b2b85a32def253c7 | ||
lastState: {} | ||
name: test-container | ||
ready: false | ||
restartCount: 0 | ||
started: false | ||
state: | ||
terminated: | ||
containerID: containerd://06962418f541510abda8a61803dd03cd27cc1b309402006420d8a5e8069569ce | ||
exitCode: 137 | ||
finishedAt: "2024-07-01T08:52:52Z" | ||
reason: Error | ||
startedAt: "2024-07-01T08:52:10Z" | ||
hostIP: 10.99.99.9 | ||
phase: Running | ||
podIP: 10.42.1.123 | ||
podIPs: | ||
- ip: 10.42.1.123 | ||
qosClass: BestEffort | ||
startTime: "2024-07-01T08:51:36Z" |