diff --git a/config.go b/config.go index 3ae79e9..18e91c6 100644 --- a/config.go +++ b/config.go @@ -127,6 +127,8 @@ type Config struct { ProcessMonitoring processes.Config `toml:"process_monitoring" comment:"Cagent monitors all running processes and reports them for further processing to the Hub.\nOn heavy loaded systems or if you don't need process monitoring at all,\nyou can change the following settings."` Updates UpdatesConfig `toml:"self_update" comment:"Control how cagent installs self-updates. Windows-only"` + + DockerMonitoring DockerMonitoringConfig `toml:"docker_monitoring" comment:"Cagent monitors all running docker containers and reports them for further processing to the Hub.\nYou can change the following settings."` } type ConfigDeprecated struct { @@ -152,6 +154,10 @@ type UpdatesMonitoringConfig struct { CheckInterval uint32 `toml:"check_interval" comment:"Check for available updates every N seconds. Minimum is 300 seconds"` } +type DockerMonitoringConfig struct { + Enabled bool `toml:"enabled" comment:"Set 'false' to disable docker monitoring'"` +} + func (l *UpdatesMonitoringConfig) Validate() error { if l.FetchTimeout >= l.CheckInterval { return errors.New("fetch_timeout should be less than check_interval") @@ -281,6 +287,7 @@ func NewConfig() *Config { Enabled: false, CheckInterval: 21600, }, + DockerMonitoring: DockerMonitoringConfig{Enabled: true}, } cfg.MinValuableConfig = *(defaultMinValuableConfig()) diff --git a/example.config.toml b/example.config.toml index 1db6c1b..56164e9 100644 --- a/example.config.toml +++ b/example.config.toml @@ -128,3 +128,8 @@ software_raid_monitoring = true [self_update] enabled = true # Set to false to disable self-updates check_interval = 21600 # Cagent will check for new versions every N seconds + +# Cagent monitors all running docker containers and reports them for further processing to the Hub. +# You can change the following settings. +[docker_monitoring] + enabled = true \ No newline at end of file diff --git a/handler.go b/handler.go index 2e1de72..4090acb 100644 --- a/handler.go +++ b/handler.go @@ -165,11 +165,13 @@ func (ca *Cagent) collectMeasurements(fullMode bool) (common.MeasurementsMap, Cl } measurements = measurements.AddWithPrefix("services.", servicesList) - containersList, err := docker.ListContainers() - if err != docker.ErrorNotImplementedForOS && err != docker.ErrorDockerNotAvailable { - errCollector.Add(err) + if cfg.DockerMonitoring.Enabled { + containersList, err := docker.ListContainers() + if err != docker.ErrorNotImplementedForOS && err != docker.ErrorDockerNotAvailable { + errCollector.Add(err) + } + measurements = measurements.AddWithPrefix("docker.", containersList) } - measurements = measurements.AddWithPrefix("docker.", containersList) if cfg.TemperatureMonitoring { temperatures, err := sensors.ReadTemperatureSensors() diff --git a/pkg/common/common.go b/pkg/common/common.go index 672ac07..906278c 100644 --- a/pkg/common/common.go +++ b/pkg/common/common.go @@ -6,15 +6,19 @@ import ( "context" "crypto/sha256" "encoding/json" + "errors" "math" "os" "os/exec" "path/filepath" "strings" + "time" "github.com/sirupsen/logrus" ) +var ErrCommandExecutionTimeout = errors.New("command execution timeout exceeded") + // Invoker executes command in context and gathers stdout/stderr output into slice type Invoker interface { CommandWithContext(context.Context, string, ...string) ([]byte, error) @@ -54,6 +58,20 @@ func RunCommandInBackground(name string, arg ...string) ([]byte, error) { return RunCommandWithContext(context.Background(), name, arg...) } +// RunCommandWithTimeout runs command and returns it's standard output. If timeout exceeded the returned error is ErrCommandExecutionTimeout +func RunCommandWithTimeout(timeout time.Duration, name string, arg ...string) ([]byte, error) { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + cmd := exec.CommandContext(ctx, name, arg...) + + result, err := cmd.Output() + if ctx.Err() == context.DeadlineExceeded { + err = ErrCommandExecutionTimeout + } + return result, err +} + func MergeStringMaps(mapA, mapB map[string]interface{}) map[string]interface{} { for k, v := range mapB { mapA[k] = v diff --git a/pkg/monitoring/docker/docker_notwindows.go b/pkg/monitoring/docker/docker_notwindows.go index 6dd5890..c2c7f28 100644 --- a/pkg/monitoring/docker/docker_notwindows.go +++ b/pkg/monitoring/docker/docker_notwindows.go @@ -9,6 +9,8 @@ import ( "os/exec" "strings" "time" + + "github.com/cloudradar-monitoring/cagent/pkg/common" ) type dockerPsOutput struct { @@ -19,6 +21,7 @@ type dockerPsOutput struct { } const dockerAvailabilityCheckCacheExpiration = 1 * time.Minute +const cmdExecTimeout = 10 * time.Second var dockerIsAvailable bool var dockerAvailabilityLastRequestedAt *time.Time @@ -35,7 +38,7 @@ func isDockerAvailable() bool { dockerIsAvailable = err == nil if dockerIsAvailable { - err = exec.Command("/bin/sh", "-c", "sudo docker info").Run() + _, err := common.RunCommandWithTimeout(cmdExecTimeout, "/bin/sh", "-c", "sudo docker info") if err != nil { log.WithError(err).Debug("while executing 'docker info' to check if docker is available") } @@ -78,7 +81,7 @@ func ListContainers() (map[string]interface{}, error) { return nil, ErrorDockerNotAvailable } - out, err := exec.Command("/bin/sh", "-c", "sudo docker ps -a --format \"{{ json . }}\"").Output() + out, err := common.RunCommandWithTimeout(cmdExecTimeout, "/bin/sh", "-c", "sudo docker ps -a --format \"{{ json . }}\"") if err != nil { if ee, ok := err.(*exec.ExitError); ok { err = errors.New(ee.Error() + ": " + string(ee.Stderr)) @@ -122,7 +125,7 @@ func ContainerNameByID(id string) (string, error) { return "", ErrorDockerNotAvailable } - out, err := exec.Command("/bin/sh", "-c", fmt.Sprintf("sudo docker inspect --format \"{{ .Name }}\" %s", id)).Output() + out, err := common.RunCommandWithTimeout(cmdExecTimeout, "/bin/sh", "-c", fmt.Sprintf("sudo docker inspect --format \"{{ .Name }}\" %s", id)) if err != nil { if ee, ok := err.(*exec.ExitError); ok { err = errors.New(ee.Error() + ": " + string(ee.Stderr)) diff --git a/pkg/monitoring/updates/apt.go b/pkg/monitoring/updates/apt.go index 8e8f31b..482f65c 100644 --- a/pkg/monitoring/updates/apt.go +++ b/pkg/monitoring/updates/apt.go @@ -3,7 +3,6 @@ package updates import ( - "context" "fmt" "os/exec" "strconv" @@ -11,6 +10,8 @@ import ( "time" "github.com/pkg/errors" + + "github.com/cloudradar-monitoring/cagent/pkg/common" ) type pkgMgrApt struct { @@ -21,13 +22,8 @@ func (a *pkgMgrApt) GetBinaryPath() string { } func (a *pkgMgrApt) FetchUpdates(timeout time.Duration) error { - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - - cmd := exec.CommandContext(ctx, "sudo", a.GetBinaryPath(), "update", "-q", "-y") - - err := cmd.Run() - if ctx.Err() == context.DeadlineExceeded { + _, err := common.RunCommandWithTimeout(timeout, "sudo", a.GetBinaryPath(), "update", "-q", "-y") + if err == common.ErrCommandExecutionTimeout { return fmt.Errorf("timeout of %s exceeded while fetching new updates", timeout) } diff --git a/pkg/monitoring/updates/yum.go b/pkg/monitoring/updates/yum.go index 9b576da..3a5c77a 100644 --- a/pkg/monitoring/updates/yum.go +++ b/pkg/monitoring/updates/yum.go @@ -10,6 +10,8 @@ import ( "time" "github.com/pkg/errors" + + "github.com/cloudradar-monitoring/cagent/pkg/common" ) type pkgMgrYUM struct { @@ -35,13 +37,8 @@ func (a *pkgMgrYUM) FetchUpdates(timeout time.Duration) error { } func (a *pkgMgrYUM) fetchTotalUpdates(timeout time.Duration) error { - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - - cmd := exec.CommandContext(ctx, "sudo", a.GetBinaryPath(), "-q", "check-update") - - out, err := cmd.Output() - if ctx.Err() == context.DeadlineExceeded { + out, err := common.RunCommandWithTimeout(timeout, "sudo", a.GetBinaryPath(), "-q", "check-update") + if err == common.ErrCommandExecutionTimeout { return fmt.Errorf("timeout of %s exceeded while fetching new updates", timeout) }