Skip to content

Commit

Permalink
Merge pull request #23900 from Honny1/healthcheck-log
Browse files Browse the repository at this point in the history
HealthCheck log output options
  • Loading branch information
openshift-merge-bot[bot] authored Sep 26, 2024
2 parents bf30466 + de856da commit 4e38381
Show file tree
Hide file tree
Showing 33 changed files with 701 additions and 238 deletions.
24 changes: 24 additions & 0 deletions cmd/podman/common/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,30 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
)
_ = cmd.RegisterFlagCompletionFunc(healthIntervalFlagName, completion.AutocompleteNone)

healthLogDestinationFlagName := "health-log-destination"
createFlags.StringVar(
&cf.HealthLogDestination,
healthLogDestinationFlagName, define.DefaultHealthCheckLocalDestination,
"set the destination of the HealthCheck log. Directory path, local or events_logger (local use container state file)",
)
_ = cmd.RegisterFlagCompletionFunc(healthLogDestinationFlagName, completion.AutocompleteNone)

healthMaxLogCountFlagName := "health-max-log-count"
createFlags.UintVar(
&cf.HealthMaxLogCount,
healthMaxLogCountFlagName, define.DefaultHealthMaxLogCount,
"set maximum number of attempts in the HealthCheck log file. ('0' value means an infinite number of attempts in the log file)",
)
_ = cmd.RegisterFlagCompletionFunc(healthMaxLogCountFlagName, completion.AutocompleteNone)

healthMaxLogSizeFlagName := "health-max-log-size"
createFlags.UintVar(
&cf.HealthMaxLogSize,
healthMaxLogSizeFlagName, define.DefaultHealthMaxLogSize,
"set maximum length in characters of stored HealthCheck log. ('0' value means an infinite log length)",
)
_ = cmd.RegisterFlagCompletionFunc(healthMaxLogSizeFlagName, completion.AutocompleteNone)

healthRetriesFlagName := "health-retries"
createFlags.UintVar(
&cf.HealthRetries,
Expand Down
3 changes: 3 additions & 0 deletions cmd/podman/common/create_opts.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,7 @@ func DefineCreateDefaults(opts *entities.ContainerCreateOptions) {
opts.Ulimit = ulimits()
opts.SeccompPolicy = "default"
opts.Volume = volumes()
opts.HealthLogDestination = define.DefaultHealthCheckLocalDestination
opts.HealthMaxLogCount = define.DefaultHealthMaxLogCount
opts.HealthMaxLogSize = define.DefaultHealthMaxLogSize
}
11 changes: 11 additions & 0 deletions docs/source/markdown/options/health-log-destination.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
####> This option file is used in:
####> podman create, run
####> If file is edited, make sure the changes
####> are applicable to all of those.
#### **--health-log-destination**=*directory_path*

Set the destination of the HealthCheck log. Directory path, local or events_logger (local use container state file) (Default: local)

* `local`: (default) HealthCheck logs are stored in overlay containers. (For example: `$runroot/healthcheck.log`)
* `directory`: creates a log file named `<container-ID>-healthcheck.log` with HealthCheck logs in the specified directory.
* `events_logger`: The log will be written with logging mechanism set by events_logger. It also saves the log to a default directory, for performance on a system with a large number of logs.
7 changes: 7 additions & 0 deletions docs/source/markdown/options/health-max-log-count.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
####> This option file is used in:
####> podman create, run
####> If file is edited, make sure the changes
####> are applicable to all of those.
#### **--health-max-log-count**=*number of stored logs*

Set maximum number of attempts in the HealthCheck log file. ('0' value means an infinite number of attempts in the log file) (Default: 5 attempts)
7 changes: 7 additions & 0 deletions docs/source/markdown/options/health-max-log-size.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
####> This option file is used in:
####> podman create, run
####> If file is edited, make sure the changes
####> are applicable to all of those.
#### **--health-max-log-size**=*size of stored logs*

Set maximum length in characters of stored HealthCheck log. ("0" value means an infinite log length) (Default: 500 characters)
6 changes: 6 additions & 0 deletions docs/source/markdown/podman-create.1.md.in
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,12 @@ See [**Environment**](#environment) note below for precedence and examples.

@@option health-interval

@@option health-log-destination

@@option health-max-log-count

@@option health-max-log-size

@@option health-on-failure

@@option health-retries
Expand Down
6 changes: 6 additions & 0 deletions docs/source/markdown/podman-run.1.md.in
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@ See [**Environment**](#environment) note below for precedence and examples.

@@option health-interval

@@option health-log-destination

@@option health-max-log-count

@@option health-max-log-size

@@option health-on-failure

@@option health-retries
Expand Down
25 changes: 25 additions & 0 deletions docs/source/markdown/podman-systemd.unit.5.md
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,9 @@ Valid options for `[Container]` are listed below:
| GroupAdd=keep-groups | --group-add=keep-groups |
| HealthCmd=/usr/bin/command | --health-cmd=/usr/bin/command |
| HealthInterval=2m | --health-interval=2m |
| HealthLogDestination=/foo/log | --health-log-destination=/foo/log |
| HealthMaxLogCount=5 | --health-max-log-count=5 |
| HealthMaxLogSize=500 | --health-max-log-size=500 |
| HealthOnFailure=kill | --health-on-failure=kill |
| HealthRetries=5 | --health-retries=5 |
| HealthStartPeriod=1m | --health-start-period=period=1m |
Expand Down Expand Up @@ -514,6 +517,28 @@ Equivalent to the Podman `--health-cmd` option.
Set an interval for the healthchecks. An interval of disable results in no automatic timer setup.
Equivalent to the Podman `--health-interval` option.

### `HealthLogDestination=`

Set the destination of the HealthCheck log. Directory path, local or events_logger (local use container state file)
(Default: local)
Equivalent to the Podman `--health-log-destination` option.

* `local`: (default) HealthCheck logs are stored in overlay containers. (For example: `$runroot/healthcheck.log`)
* `directory`: creates a log file named `<container-ID>-healthcheck.log` with HealthCheck logs in the specified directory.
* `events_logger`: The log will be written with logging mechanism set by events_logger. It also saves the log to a default directory, for performance on a system with a large number of logs.

### `HealthMaxLogCount=`

Set maximum number of attempts in the HealthCheck log file. ('0' value means an infinite number of attempts in the log file)
(Default: 5 attempts)
Equivalent to the Podman `--Health-max-log-count` option.

### `HealthMaxLogSize=`

Set maximum length in characters of stored HealthCheck log. ("0" value means an infinite log length)
(Default: 500 characters)
Equivalent to the Podman `--Health-max-log-size` option.

### `HealthOnFailure=`

Action to take once the container transitions to an unhealthy state.
Expand Down
8 changes: 8 additions & 0 deletions libpod/container_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,14 @@ type ContainerMiscConfig struct {
HealthCheckConfig *manifest.Schema2HealthConfig `json:"healthcheck"`
// HealthCheckOnFailureAction defines an action to take once the container turns unhealthy.
HealthCheckOnFailureAction define.HealthCheckOnFailureAction `json:"healthcheck_on_failure_action"`
// HealthLogDestination defines the destination where the log is stored
HealthLogDestination string `json:"healthLogDestination,omitempty"`
// HealthMaxLogCount is maximum number of attempts in the HealthCheck log file.
// ('0' value means an infinite number of attempts in the log file)
HealthMaxLogCount uint `json:"healthMaxLogCount,omitempty"`
// HealthMaxLogSize is the maximum length in characters of stored HealthCheck log
// ("0" value means an infinite log length)
HealthMaxLogSize uint `json:"healthMaxLogSize,omitempty"`
// StartupHealthCheckConfig is the configuration of the startup
// healthcheck for the container. This will run before the regular HC
// runs, and when it passes the regular HC will be activated.
Expand Down
8 changes: 7 additions & 1 deletion libpod/container_inspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver
// inspect status should be set to nil.
if c.config.HealthCheckConfig != nil && !(len(c.config.HealthCheckConfig.Test) == 1 && c.config.HealthCheckConfig.Test[0] == "NONE") {
// This container has a healthcheck defined in it; we need to add its state
healthCheckState, err := c.getHealthCheckLog()
healthCheckState, err := c.readHealthCheckLog()
if err != nil {
// An error here is not considered fatal; no health state will be displayed
logrus.Error(err)
Expand Down Expand Up @@ -426,6 +426,12 @@ func (c *Container) generateInspectContainerConfig(spec *spec.Spec) *define.Insp

ctrConfig.HealthcheckOnFailureAction = c.config.HealthCheckOnFailureAction.String()

ctrConfig.HealthLogDestination = c.config.HealthLogDestination

ctrConfig.HealthMaxLogCount = c.config.HealthMaxLogCount

ctrConfig.HealthMaxLogSize = c.config.HealthMaxLogSize

ctrConfig.CreateCommand = c.config.CreateCommand

ctrConfig.Timezone = c.config.Timezone
Expand Down
7 changes: 3 additions & 4 deletions libpod/container_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -1123,10 +1123,9 @@ func (c *Container) init(ctx context.Context, retainRetries bool) error {
// bugzilla.redhat.com/show_bug.cgi?id=2144754:
// In case of a restart, make sure to remove the healthcheck log to
// have a clean state.
if path := c.healthCheckLogPath(); path != "" {
if err := os.Remove(path); err != nil && !errors.Is(err, os.ErrNotExist) {
logrus.Error(err)
}
err = c.writeHealthCheckLog(define.HealthCheckResults{Status: define.HealthCheckReset})
if err != nil {
return err
}

if err := c.save(); err != nil {
Expand Down
8 changes: 8 additions & 0 deletions libpod/define/container_inspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ type InspectContainerConfig struct {
Healthcheck *manifest.Schema2HealthConfig `json:"Healthcheck,omitempty"`
// HealthcheckOnFailureAction defines an action to take once the container turns unhealthy.
HealthcheckOnFailureAction string `json:"HealthcheckOnFailureAction,omitempty"`
// HealthLogDestination defines the destination where the log is stored
HealthLogDestination string `json:"HealthLogDestination,omitempty"`
// HealthMaxLogCount is maximum number of attempts in the HealthCheck log file.
// ('0' value means an infinite number of attempts in the log file)
HealthMaxLogCount uint `json:"HealthcheckMaxLogCount,omitempty"`
// HealthMaxLogSize is the maximum length in characters of stored HealthCheck log
// ("0" value means an infinite log length)
HealthMaxLogSize uint `json:"HealthcheckMaxLogSize,omitempty"`
// CreateCommand is the full command plus arguments of the process the
// container has been created with.
CreateCommand []string `json:"CreateCommand,omitempty"`
Expand Down
10 changes: 10 additions & 0 deletions libpod/define/healthchecks.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ const (
// and the start-period (time allowed for the container to start and application
// to be running) expires.
HealthCheckStarting string = "starting"
// HealthCheckReset describes reset of HealthCheck logs
HealthCheckReset string = "reset"
)

// HealthCheckStatus represents the current state of a container
Expand Down Expand Up @@ -56,8 +58,16 @@ const (
DefaultHealthCheckStartPeriod = "0s"
// DefaultHealthCheckTimeout default value
DefaultHealthCheckTimeout = "30s"
// DefaultHealthMaxLogCount default value
DefaultHealthMaxLogCount uint = 5
// DefaultHealthMaxLogSize default value
DefaultHealthMaxLogSize uint = 500
// DefaultHealthCheckLocalDestination default value
DefaultHealthCheckLocalDestination string = "local"
)

const HealthCheckEventsLoggerDestination string = "events_logger"

// HealthConfig.Test options
const (
// HealthConfigTestNone disables healthcheck
Expand Down
21 changes: 16 additions & 5 deletions libpod/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"path/filepath"
"sync"

"github.com/containers/podman/v5/libpod/define"
"github.com/containers/podman/v5/libpod/events"
"github.com/sirupsen/logrus"
)
Expand All @@ -28,27 +29,37 @@ func (r *Runtime) newEventer() (events.Eventer, error) {

// newContainerEvent creates a new event based on a container
func (c *Container) newContainerEvent(status events.Status) {
if err := c.newContainerEventWithInspectData(status, "", false); err != nil {
if err := c.newContainerEventWithInspectData(status, define.HealthCheckResults{}, false); err != nil {
logrus.Errorf("Unable to write container event: %v", err)
}
}

// newContainerHealthCheckEvent creates a new healthcheck event with the given status
func (c *Container) newContainerHealthCheckEvent(healthStatus string) {
if err := c.newContainerEventWithInspectData(events.HealthStatus, healthStatus, false); err != nil {
func (c *Container) newContainerHealthCheckEvent(healthCheckResult define.HealthCheckResults) {
if err := c.newContainerEventWithInspectData(events.HealthStatus, healthCheckResult, false); err != nil {
logrus.Errorf("Unable to write container event: %v", err)
}
}

// newContainerEventWithInspectData creates a new event and sets the
// ContainerInspectData field if inspectData is set.
func (c *Container) newContainerEventWithInspectData(status events.Status, healthStatus string, inspectData bool) error {
func (c *Container) newContainerEventWithInspectData(status events.Status, healthCheckResult define.HealthCheckResults, inspectData bool) error {
e := events.NewEvent(status)
e.ID = c.ID()
e.Name = c.Name()
e.Image = c.config.RootfsImageName
e.Type = events.Container
e.HealthStatus = healthStatus
e.HealthStatus = healthCheckResult.Status
if c.config.HealthLogDestination == define.HealthCheckEventsLoggerDestination {
if len(healthCheckResult.Log) > 0 {
logData, err := json.Marshal(healthCheckResult.Log[len(healthCheckResult.Log)-1])
if err != nil {
return fmt.Errorf("unable to marshall healthcheck log for writing: %w", err)
}
e.HealthLog = string(logData)
}
}
e.HealthFailingStreak = healthCheckResult.FailingStreak

e.Details = events.Details{
PodID: c.PodID(),
Expand Down
4 changes: 4 additions & 0 deletions libpod/events/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ type Event struct {
Type Type
// Health status of the current container
HealthStatus string `json:"health_status,omitempty"`
// Healthcheck log of the current container
HealthLog string `json:"health_log,omitempty"`
// HealthFailingStreak log of the current container
HealthFailingStreak int `json:"health_failing_streak,omitempty"`
// Error code for certain events involving errors.
Error string `json:"error,omitempty"`

Expand Down
4 changes: 3 additions & 1 deletion libpod/events/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,10 @@ func (e *Event) ToHumanReadable(truncate bool) string {
if e.PodID != "" {
humanFormat += fmt.Sprintf(", pod_id=%s", e.PodID)
}
if e.HealthStatus != "" {
if e.Status == HealthStatus {
humanFormat += fmt.Sprintf(", health_status=%s", e.HealthStatus)
humanFormat += fmt.Sprintf(", health_failing_streak=%d", e.HealthFailingStreak)
humanFormat += fmt.Sprintf(", health_log=%s", e.HealthLog)
}
// check if the container has labels and add it to the output
if len(e.Attributes) > 0 {
Expand Down
18 changes: 16 additions & 2 deletions libpod/events/journal_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,13 @@ func (e EventJournalD) Write(ee Event) error {
}
m["PODMAN_LABELS"] = string(b)
}
m["PODMAN_HEALTH_STATUS"] = ee.HealthStatus

if ee.Status == HealthStatus {
m["PODMAN_HEALTH_STATUS"] = ee.HealthStatus
if ee.HealthLog != "" {
m["PODMAN_HEALTH_LOG"] = ee.HealthLog
}
m["PODMAN_HEALTH_FAILING_STREAK"] = strconv.Itoa(ee.HealthFailingStreak)
}
if len(ee.Details.ContainerInspectData) > 0 {
m["PODMAN_CONTAINER_INSPECT_DATA"] = ee.Details.ContainerInspectData
}
Expand Down Expand Up @@ -225,6 +230,15 @@ func newEventFromJournalEntry(entry *sdjournal.JournalEntry) (*Event, error) {
}
}
newEvent.HealthStatus = entry.Fields["PODMAN_HEALTH_STATUS"]
if log, ok := entry.Fields["PODMAN_HEALTH_LOG"]; ok {
newEvent.HealthLog = log
}
if FailingStreak, ok := entry.Fields["PODMAN_HEALTH_FAILING_STREAK"]; ok {
FailingStreakInt, err := strconv.Atoi(FailingStreak)
if err == nil {
newEvent.HealthFailingStreak = FailingStreakInt
}
}
newEvent.Details.ContainerInspectData = entry.Fields["PODMAN_CONTAINER_INSPECT_DATA"]
case Network:
newEvent.ID = entry.Fields["PODMAN_ID"]
Expand Down
Loading

1 comment on commit 4e38381

@packit-as-a-service
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

podman-next COPR build failed. @containers/packit-build please check.

Please sign in to comment.