Skip to content

Commit

Permalink
fix: time limit of restarts
Browse files Browse the repository at this point in the history
  • Loading branch information
adityathebe committed Jul 17, 2024
1 parent 3c90293 commit b039fc1
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 12 deletions.
21 changes: 11 additions & 10 deletions pkg/health/health_pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,19 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) {
messages = append(messages, msg.Message)
} else if containerStatus.RestartCount > 2 && containerStatus.LastTerminationState.Terminated != nil {
lastRestarted := containerStatus.LastTerminationState.Terminated.FinishedAt.Time
status = HealthStatusCode(containerStatus.LastTerminationState.Terminated.Reason)
if time.Since(lastRestarted) < time.Minute*30 {
health = HealthUnhealthy
} else {
health = HealthWarning
return &HealthStatus{
Health: HealthUnhealthy,
Status: HealthStatusCode(containerStatus.LastTerminationState.Terminated.Reason),
Message: strings.Join(messages, ", "),
}, nil
} else if time.Since(lastRestarted) < time.Hour*8 {
return &HealthStatus{
Health: HealthWarning,
Status: HealthStatusCode(containerStatus.LastTerminationState.Terminated.Reason),
Message: strings.Join(messages, ", "),
}, nil
}

return &HealthStatus{
Health: health,
Status: status,
Message: strings.Join(messages, ", "),
}, nil
}
}

Expand Down
11 changes: 9 additions & 2 deletions pkg/health/health_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,14 +143,21 @@ func TestHPA(t *testing.T) {
}

func TestPod(t *testing.T) {
// Less than 30 minutes
assertAppHealthWithOverwrite(t, "./testdata/pod-high-restart-count.yaml", map[string]string{
"2024-07-17T14:29:51Z": time.Now().Add(-time.Minute).Format("2006-01-02T15:04:05Z"),
"2024-07-17T14:29:51Z": time.Now().UTC().Add(-time.Minute).Format("2006-01-02T15:04:05Z"),
}, "OOMKilled", health.HealthUnhealthy, false)

// Less than 8 hours
assertAppHealthWithOverwrite(t, "./testdata/pod-high-restart-count.yaml", map[string]string{
"2024-07-17T14:29:51Z": "2024-06-17T14:29:51Z",
"2024-07-17T14:29:51Z": time.Now().UTC().Add(-time.Hour).Format("2006-01-02T15:04:05Z"),
}, "OOMKilled", health.HealthWarning, false)

// More than 8 hours
assertAppHealthWithOverwrite(t, "./testdata/pod-high-restart-count.yaml", map[string]string{
"2024-07-17T14:29:51Z": "2024-06-17T14:29:51Z",
}, health.HealthStatusRunning, health.HealthHealthy, true)

assertAppHealth(t, "./testdata/pod-old-restarts.yaml", health.HealthStatusRunning, health.HealthHealthy, true)

assertAppHealth(t, "./testdata/pod-terminating.yaml", health.HealthStatusTerminating, health.HealthWarning, false)
Expand Down

0 comments on commit b039fc1

Please sign in to comment.