Skip to content

Commit

Permalink
Add config fields to allow disabling docker monitoring and timeouts w…
Browse files Browse the repository at this point in the history
…hen executing docker commands

Some minor refactoring to reuse new code
  • Loading branch information
nikita-vanyasin committed May 28, 2020
1 parent 63aea12 commit 45fcba9
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 22 deletions.
7 changes: 7 additions & 0 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ type Config struct {
ProcessMonitoring processes.Config `toml:"process_monitoring" comment:"Cagent monitors all running processes and reports them for further processing to the Hub.\nOn heavy loaded systems or if you don't need process monitoring at all,\nyou can change the following settings."`

Updates UpdatesConfig `toml:"self_update" comment:"Control how cagent installs self-updates. Windows-only"`

DockerMonitoring DockerMonitoringConfig `toml:"docker_monitoring" comment:"Cagent monitors all running docker containers and reports them for further processing to the Hub.\nYou can change the following settings."`
}

type ConfigDeprecated struct {
Expand All @@ -152,6 +154,10 @@ type UpdatesMonitoringConfig struct {
CheckInterval uint32 `toml:"check_interval" comment:"Check for available updates every N seconds. Minimum is 300 seconds"`
}

type DockerMonitoringConfig struct {
Enabled bool `toml:"enabled" comment:"Set 'false' to disable docker monitoring'"`
}

func (l *UpdatesMonitoringConfig) Validate() error {
if l.FetchTimeout >= l.CheckInterval {
return errors.New("fetch_timeout should be less than check_interval")
Expand Down Expand Up @@ -281,6 +287,7 @@ func NewConfig() *Config {
Enabled: false,
CheckInterval: 21600,
},
DockerMonitoring: DockerMonitoringConfig{Enabled: true},
}

cfg.MinValuableConfig = *(defaultMinValuableConfig())
Expand Down
5 changes: 5 additions & 0 deletions example.config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,8 @@ software_raid_monitoring = true
[self_update]
enabled = true # Set to false to disable self-updates
check_interval = 21600 # Cagent will check for new versions every N seconds

# Cagent monitors all running docker containers and reports them for further processing to the Hub.
# You can change the following settings.
[docker_monitoring]
enabled = true
10 changes: 6 additions & 4 deletions handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,13 @@ func (ca *Cagent) collectMeasurements(fullMode bool) (common.MeasurementsMap, Cl
}
measurements = measurements.AddWithPrefix("services.", servicesList)

containersList, err := docker.ListContainers()
if err != docker.ErrorNotImplementedForOS && err != docker.ErrorDockerNotAvailable {
errCollector.Add(err)
if cfg.DockerMonitoring.Enabled {
containersList, err := docker.ListContainers()
if err != docker.ErrorNotImplementedForOS && err != docker.ErrorDockerNotAvailable {
errCollector.Add(err)
}
measurements = measurements.AddWithPrefix("docker.", containersList)
}
measurements = measurements.AddWithPrefix("docker.", containersList)

if cfg.TemperatureMonitoring {
temperatures, err := sensors.ReadTemperatureSensors()
Expand Down
18 changes: 18 additions & 0 deletions pkg/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,19 @@ import (
"context"
"crypto/sha256"
"encoding/json"
"errors"
"math"
"os"
"os/exec"
"path/filepath"
"strings"
"time"

"github.com/sirupsen/logrus"
)

var ErrCommandExecutionTimeout = errors.New("command execution timeout exceeded")

// Invoker executes command in context and gathers stdout/stderr output into slice
type Invoker interface {
CommandWithContext(context.Context, string, ...string) ([]byte, error)
Expand Down Expand Up @@ -54,6 +58,20 @@ func RunCommandInBackground(name string, arg ...string) ([]byte, error) {
return RunCommandWithContext(context.Background(), name, arg...)
}

// RunCommandWithTimeout runs command and returns it's standard output. If timeout exceeded the returned error is ErrCommandExecutionTimeout
func RunCommandWithTimeout(timeout time.Duration, name string, arg ...string) ([]byte, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()

cmd := exec.CommandContext(ctx, name, arg...)

result, err := cmd.Output()
if ctx.Err() == context.DeadlineExceeded {
err = ErrCommandExecutionTimeout
}
return result, err
}

func MergeStringMaps(mapA, mapB map[string]interface{}) map[string]interface{} {
for k, v := range mapB {
mapA[k] = v
Expand Down
9 changes: 6 additions & 3 deletions pkg/monitoring/docker/docker_notwindows.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
"os/exec"
"strings"
"time"

"github.com/cloudradar-monitoring/cagent/pkg/common"
)

type dockerPsOutput struct {
Expand All @@ -19,6 +21,7 @@ type dockerPsOutput struct {
}

const dockerAvailabilityCheckCacheExpiration = 1 * time.Minute
const cmdExecTimeout = 10 * time.Second

var dockerIsAvailable bool
var dockerAvailabilityLastRequestedAt *time.Time
Expand All @@ -35,7 +38,7 @@ func isDockerAvailable() bool {
dockerIsAvailable = err == nil

if dockerIsAvailable {
err = exec.Command("/bin/sh", "-c", "sudo docker info").Run()
_, err := common.RunCommandWithTimeout(cmdExecTimeout, "/bin/sh", "-c", "sudo docker info")
if err != nil {
log.WithError(err).Debug("while executing 'docker info' to check if docker is available")
}
Expand Down Expand Up @@ -78,7 +81,7 @@ func ListContainers() (map[string]interface{}, error) {
return nil, ErrorDockerNotAvailable
}

out, err := exec.Command("/bin/sh", "-c", "sudo docker ps -a --format \"{{ json . }}\"").Output()
out, err := common.RunCommandWithTimeout(cmdExecTimeout, "/bin/sh", "-c", "sudo docker ps -a --format \"{{ json . }}\"")
if err != nil {
if ee, ok := err.(*exec.ExitError); ok {
err = errors.New(ee.Error() + ": " + string(ee.Stderr))
Expand Down Expand Up @@ -122,7 +125,7 @@ func ContainerNameByID(id string) (string, error) {
return "", ErrorDockerNotAvailable
}

out, err := exec.Command("/bin/sh", "-c", fmt.Sprintf("sudo docker inspect --format \"{{ .Name }}\" %s", id)).Output()
out, err := common.RunCommandWithTimeout(cmdExecTimeout, "/bin/sh", "-c", fmt.Sprintf("sudo docker inspect --format \"{{ .Name }}\" %s", id))
if err != nil {
if ee, ok := err.(*exec.ExitError); ok {
err = errors.New(ee.Error() + ": " + string(ee.Stderr))
Expand Down
12 changes: 4 additions & 8 deletions pkg/monitoring/updates/apt.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
package updates

import (
"context"
"fmt"
"os/exec"
"strconv"
"strings"
"time"

"github.com/pkg/errors"

"github.com/cloudradar-monitoring/cagent/pkg/common"
)

type pkgMgrApt struct {
Expand All @@ -21,13 +22,8 @@ func (a *pkgMgrApt) GetBinaryPath() string {
}

func (a *pkgMgrApt) FetchUpdates(timeout time.Duration) error {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()

cmd := exec.CommandContext(ctx, "sudo", a.GetBinaryPath(), "update", "-q", "-y")

err := cmd.Run()
if ctx.Err() == context.DeadlineExceeded {
_, err := common.RunCommandWithTimeout(timeout, "sudo", a.GetBinaryPath(), "update", "-q", "-y")
if err == common.ErrCommandExecutionTimeout {
return fmt.Errorf("timeout of %s exceeded while fetching new updates", timeout)
}

Expand Down
11 changes: 4 additions & 7 deletions pkg/monitoring/updates/yum.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
"time"

"github.com/pkg/errors"

"github.com/cloudradar-monitoring/cagent/pkg/common"
)

type pkgMgrYUM struct {
Expand All @@ -35,13 +37,8 @@ func (a *pkgMgrYUM) FetchUpdates(timeout time.Duration) error {
}

func (a *pkgMgrYUM) fetchTotalUpdates(timeout time.Duration) error {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()

cmd := exec.CommandContext(ctx, "sudo", a.GetBinaryPath(), "-q", "check-update")

out, err := cmd.Output()
if ctx.Err() == context.DeadlineExceeded {
out, err := common.RunCommandWithTimeout(timeout, "sudo", a.GetBinaryPath(), "-q", "check-update")
if err == common.ErrCommandExecutionTimeout {
return fmt.Errorf("timeout of %s exceeded while fetching new updates", timeout)
}

Expand Down

0 comments on commit 45fcba9

Please sign in to comment.