Skip to content

Commit

Permalink
Improve docs on SLURM collector (#101)
Browse files Browse the repository at this point in the history
* Disable slurm collector by default

* Once we add libvirt collector, operators should choose which one to enable

* Add section on SLURM config that is needed to get accounting info properly

* Improve log messages for SLURM manager

Signed-off-by: Mahendra Paipuri <[email protected]>
  • Loading branch information
mahendrapaipuri authored Jun 26, 2024
1 parent bd6d37a commit 3b9c5a5
Show file tree
Hide file tree
Showing 11 changed files with 135 additions and 21 deletions.
5 changes: 4 additions & 1 deletion pkg/api/resource/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ func checkConfig(managers []string, config *Config[models.Cluster]) (map[string]
return nil, fmt.Errorf("unknown resource manager found in the config: %s", config.Clusters[i].Manager)
}
if base.InvalidIDRegex.MatchString(config.Clusters[i].ID) {
return nil, fmt.Errorf("invalid ID %s found in clusters config. It must contain only [a-zA-Z0-9-_]", config.Clusters[i].ID)
return nil, fmt.Errorf(
"invalid ID %s found in clusters config. It must contain only [a-zA-Z0-9-_]",
config.Clusters[i].ID,
)
}
IDs = append(IDs, config.Clusters[i].ID)
configMap[config.Clusters[i].Manager] = append(configMap[config.Clusters[i].Manager], config.Clusters[i])
Expand Down
24 changes: 15 additions & 9 deletions pkg/api/resource/slurm/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ func preflightsCLI(slurm *slurmScheduler) error {
// Assume execMode is always native
slurm.fetchMode = "cli"
slurm.cmdExecMode = "native"
level.Debug(slurm.logger).Log("msg", "SLURM jobs will be fetched using sacct command")
level.Debug(slurm.logger).Log("msg", "SLURM jobs will be fetched using CLI commands")

// If no sacct path is provided, assume it is available on PATH
if slurm.cluster.CLI.Path == "" {
path, err := exec.LookPath("sacct")
if err != nil {
level.Error(slurm.logger).Log("msg", "Failed to find sacct executable on PATH", "err", err)
level.Error(slurm.logger).Log("msg", "Failed to find SLURM utility executables on PATH", "err", err)
return err
}
slurm.cluster.CLI.Path = filepath.Dir(path)
Expand All @@ -46,46 +46,51 @@ func preflightsCLI(slurm *slurmScheduler) error {

// If current user is slurm or root pass checks
if currentUser, err := user.Current(); err == nil && (currentUser.Username == "slurm" || currentUser.Uid == "0") {
level.Debug(slurm.logger).
Log("msg", "Current user have enough privileges to get job data for all users", "user", currentUser.Username)
level.Info(slurm.logger).
Log("msg", "Current user have enough privileges to execute SLURM commands", "user", currentUser.Username)
return nil
}

// First try to run as slurm user in a subprocess. If current process have capabilities
// it will be a success
slurmUser, err := user.Lookup("slurm")
if err != nil {
level.Error(slurm.logger).Log("msg", "Failed to lookup SLURM user for executing sacct cmd", "err", err)
level.Debug(slurm.logger).
Log("msg", "User slurm not found. Next attempt to execute SLURM commands with sudo", "err", err)
goto sudomode
}

slurmUserUID, err = strconv.Atoi(slurmUser.Uid)
if err != nil {
level.Error(slurm.logger).Log("msg", "Failed to convert SLURM user uid to int", "uid", slurmUserUID, "err", err)
level.Debug(slurm.logger).
Log("msg", "Failed to convert SLURM user uid to int. Next attempt to execute SLURM commands with sudo", "uid", slurmUserUID, "err", err)
goto sudomode
}

slurmUserGID, err = strconv.Atoi(slurmUser.Gid)
if err != nil {
level.Error(slurm.logger).Log("msg", "Failed to convert SLURM user gid to int", "gid", slurmUserGID, "err", err)
level.Debug(slurm.logger).
Log("msg", "Failed to convert SLURM user gid to int. Next attempt to execute SLURM commands with sudo", "gid", slurmUserGID, "err", err)
goto sudomode
}

if _, err := internal_osexec.ExecuteAs(sacctPath, []string{"--help"}, slurmUserUID, slurmUserGID, nil, slurm.logger); err == nil {
slurm.cmdExecMode = "cap"
level.Debug(slurm.logger).Log("msg", "Linux capabilities will be used to execute sacct as SLURM user")
level.Info(slurm.logger).Log("msg", "Linux capabilities will be used to execute SLURM commands as slurm user")
return nil
}

sudomode:
// Last attempt to run sacct with sudo
if _, err := internal_osexec.ExecuteWithTimeout("sudo", []string{sacctPath, "--help"}, 5, nil, slurm.logger); err == nil {
slurm.cmdExecMode = "sudo"
level.Debug(slurm.logger).Log("msg", "sudo will be used to execute sacct command")
level.Info(slurm.logger).Log("msg", "sudo will be used to execute SLURM commands")
return nil
}

// If nothing works give up. In the worst case DB will be updated with only jobs from current user
level.Warn(slurm.logger).
Log("msg", "SLURM commands will be executed as current user. Might not fetch jobs of all users")
return nil
}

Expand Down Expand Up @@ -210,6 +215,7 @@ func parseSacctCmdOutput(sacctOutput string, start time.Time, end time.Time) ([]
if jobStartTS > intStartTS {
startMark = jobStartTS
}

// If job has ended before end of interval, we should mark job's end time
// as elapsed end time.
if jobEndTS > 0 && jobEndTS < intEndTS {
Expand Down
5 changes: 4 additions & 1 deletion pkg/api/updater/updater.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,10 @@ func checkConfig(updaters []string, config *Config[Instance]) (map[string][]Inst
return nil, fmt.Errorf("unknown updater found in the config: %s", config.Instances[i].Updater)
}
if base.InvalidIDRegex.MatchString(config.Instances[i].ID) {
return nil, fmt.Errorf("invalid ID %s found in updaters config. It must contain only [a-zA-Z0-9-_]", config.Instances[i].ID)
return nil, fmt.Errorf(
"invalid ID %s found in updaters config. It must contain only [a-zA-Z0-9-_]",
config.Instances[i].ID,
)
}
IDs = append(IDs, config.Instances[i].ID)
configMap[config.Instances[i].Updater] = append(configMap[config.Instances[i].Updater], config.Instances[i])
Expand Down
3 changes: 1 addition & 2 deletions pkg/collector/rapl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ package collector
import (
"testing"

"github.com/go-kit/log"
"github.com/prometheus/procfs/sysfs"
)

Expand All @@ -20,7 +19,7 @@ func TestRaplMetrics(t *testing.T) {
if err != nil {
t.Errorf("failed to open procfs: %v", err)
}
c := raplCollector{fs: fs, logger: log.NewNopLogger()}
c := raplCollector{fs: fs}
zones, err := sysfs.GetRaplZones(c.fs)
if err != nil {
t.Errorf("failed to get RAPL zones: %v", err)
Expand Down
2 changes: 1 addition & 1 deletion pkg/collector/slurm.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ type slurmCollector struct {
}

func init() {
RegisterCollector(slurmCollectorSubsystem, defaultEnabled, NewSlurmCollector)
RegisterCollector(slurmCollectorSubsystem, defaultDisabled, NewSlurmCollector)
}

// NewSlurmCollector returns a new Collector exposing a summary of cgroups.
Expand Down
6 changes: 6 additions & 0 deletions scripts/e2e-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ then
--path.sysfs="pkg/collector/testdata/sys" \
--path.cgroupfs="pkg/collector/testdata/sys/fs/cgroup" \
--path.procfs="pkg/collector/testdata/proc" \
--collector.slurm \
--collector.slurm.create.unique.jobids \
--collector.slurm.job.props.path="pkg/collector/testdata/slurmjobprops" \
--collector.slurm.gpu.type="nvidia" \
Expand All @@ -292,6 +293,7 @@ then
--path.sysfs="pkg/collector/testdata/sys" \
--path.cgroupfs="pkg/collector/testdata/sys/fs/cgroup" \
--path.procfs="pkg/collector/testdata/proc" \
--collector.slurm \
--collector.slurm.job.props.path="pkg/collector/testdata/slurmjobprops" \
--collector.slurm.gpu.type="nvidia" \
--collector.slurm.nvidia.smi.path="pkg/collector/testdata/nvidia-smi" \
Expand All @@ -307,6 +309,7 @@ then
--path.sysfs="pkg/collector/testdata/sys" \
--path.cgroupfs="pkg/collector/testdata/sys/fs/cgroup" \
--path.procfs="pkg/collector/testdata/proc" \
--collector.slurm \
--collector.slurm.create.unique.jobids \
--collector.slurm.job.props.path="pkg/collector/testdata/slurmjobprops" \
--collector.slurm.gpu.type="amd" \
Expand All @@ -323,6 +326,7 @@ then
--path.sysfs="pkg/collector/testdata/sys" \
--path.cgroupfs="pkg/collector/testdata/sys/fs/cgroup" \
--path.procfs="pkg/collector/testdata/proc" \
--collector.slurm \
--collector.slurm.create.unique.jobids \
--collector.slurm.job.props.path="pkg/collector/testdata/slurmjobprops" \
--collector.slurm.force.cgroups.version="v2" \
Expand All @@ -336,6 +340,7 @@ then
--path.sysfs="pkg/collector/testdata/sys" \
--path.cgroupfs="pkg/collector/testdata/sys/fs/cgroup" \
--path.procfs="pkg/collector/testdata/proc" \
--collector.slurm \
--collector.slurm.create.unique.jobids \
--collector.slurm.gpu.type="nvidia" \
--collector.slurm.nvidia.smi.path="pkg/collector/testdata/nvidia-smi" \
Expand All @@ -351,6 +356,7 @@ then
--path.sysfs="pkg/collector/testdata/sys" \
--path.cgroupfs="pkg/collector/testdata/sys/fs/cgroup" \
--path.procfs="pkg/collector/testdata/proc" \
--collector.slurm \
--collector.slurm.create.unique.jobids \
--collector.slurm.job.props.path="pkg/collector/testdata/slurmjobprops" \
--collector.slurm.gpu.type="amd" \
Expand Down
7 changes: 5 additions & 2 deletions website/docs/components/ceems-exporter.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,15 @@ the mapping related to job ID to GPU ordinals.

:::warning[WARNING]

For SLURM collector to work properly, SLURM needs to be configured well to enable all
For SLURM collector to work properly, SLURM needs to be configured well to use all
the available cgroups controllers. At least `cpu` and `memory` controllers must be
enabled if not cgroups will not contain any accounting information. Without `cpu`
enabled, if not cgroups will not contain any accounting information. Without `cpu`
and `memory` accounting information, it is not possible to estimate energy consumption
of the job.

More details on how to configure SLURM to get accounting information from cgroups can
be found in [Configuration](../configuration/resource-managers.md) section.

:::

Currently, the list of job related metrics exported by SLURM exporter are as follows:
Expand Down
2 changes: 1 addition & 1 deletion website/docs/configuration/config-reference.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
sidebar_position: 6
sidebar_position: 7
---

# Configuration Reference
Expand Down
80 changes: 80 additions & 0 deletions website/docs/configuration/resource-managers.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
---
sidebar_position: 5
---

# Resource Managers

This section contains information on the configuration required by the supported
resource managers by CEEMS.

## SLURM

[Slurm collector](../components/ceems-exporter.md#slurm-collector) in the CEEMS exporter
relies on the job accounting information (like CPU time and memory usage) in the
cgroups that SLURM create for each job to estimate the energy and emissions for a
given job. However, depending on the cgroups version and SLURM configuration, this
accounting information might not be available. The following section will give guidelines
on how to configure SLURM to ensure that this accounting information is always available.

Starting from [SLURM 22.05](https://slurm.schedmd.com/archive/slurm-22.05.0/cgroups.html)
SLURM supports both cgroups v1 and v2. When cgroups v1 is being used, SLURM might not
contain accounting information in the cgroups.

### cgroups v1

The following configuration will enable necessary cgroups controllers and provide the
accounting information of the jobs when cgroups v1 is used.

As stated in the [cgroups docs of SLURM](https://slurm.schedmd.com/cgroup.conf.html),
cgroups plugin can be controlled by the configuration in this file. An
[example config](https://slurm.schedmd.com/cgroup.conf.html#OPT_/etc/slurm/cgroup.conf)
is also provided which should be a good starting point.

Along the `cgroups.conf` file, certain configuration parameters are required in the
`slurm.conf` file as well. This is provided in the
[SLURM docs](https://slurm.schedmd.com/cgroup.conf.html#OPT_/etc/slurm/slurm.conf) as
well.

:::important[IMPORTANT]

Although `JobAcctGatherType=jobacct_gather/cgroup` is presented as _optional_
configuration parameter, it _must_ be used to get the accounting information on CPU
usage. Without this configuration parameter, CPU time of the job will not be available
in the job's cgroups

:::

Besides the above configuration, [SelectTypeParameters](https://slurm.schedmd.com/slurm.conf.html#OPT_SelectTypeParameters)
must be configured to set core or CPU and memory as consumable resources. This is
highlighted in the documentation of [ConstrainRAMSpace](https://slurm.schedmd.com/cgroup.conf.html#OPT_ConstrainRAMSpace)
configuration parameter in [`cgroups.conf` docs](https://slurm.schedmd.com/cgroup.conf.html).

In conclusion, here are the excerpts of configuration needed:

```ini

# cgroups.conf

ConstrainCores=yes
ConstrainDevices=yes
ConstrainRAMSpace=yes
ConstrainSwapSpace=yes

```

```ini
# slurm.conf

ProctrackType=proctrack/cgroup
TaskPlugin=task/cgroup,task/affinity
JobAcctGatherType=jobacct_gather/cgroup
SelectType=select/con_tres
SelectTypeParameters=CR_CPU_Memory # or CR_Core_Memory
```

### cgroups v2

In the case of cgroups v2, SLURM should create a proper cgroup for every job without any
special configuration. However, the configuration presented for [cgroups v1](#cgroups-v1)
is applicable for cgroups v2 and it is advised to use that configuration for cgroups v2
as well.
2 changes: 1 addition & 1 deletion website/docs/configuration/systemd.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
sidebar_position: 5
sidebar_position: 6
---

# Systemd
Expand Down
20 changes: 17 additions & 3 deletions website/docs/usage/ceems-exporter.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ sidebar_position: 1

:::important[IMPORTANT]

Currently CEEMS exporter supports only exporting SLURM job metrics. Adding support for
Currently CEEMS exporter supports only exporting SLURM job metrics. Consequently, CEEMS
support only SLURM resource manager. Adding support for
Openstack and libvirt is in next milestone.

:::
Expand All @@ -19,6 +20,13 @@ To run exporter with default enabled collectors, use the following command:
ceems_exporter
```

List of collectors that are enabled by default are:

- `cpu`: Node level CPU stats
- `memory`: Node level memory stats
- `rapl`: RAPL energy counters
- `ipmi_dcmi`: Power usage from IPMI DCMI

By default CEEMS exporter exposes metrics on all interfaces, port `9010` and
at `/metrics` endpoint. This can be changed by setting `--web.listen-address` CLI flag

Expand All @@ -28,13 +36,19 @@ ceems_exporter --web.listen-address="localhost:8010"

Above command will run exporter only on `localhost` and on port `8010`.

In order to enable SLURM collector, we need to add the following CLI flag

```bash
ceems_exporter --collector.slurm
```

If there are GPUs on the compute nodes, it is necessary to tell the exporter the type
of GPU. Currently only NVIDIA and AMD GPUs are supported.

```bash
ceems_exporter --collector.slurm.gpu.type=amd
ceems_exporter --collector.slurm --collector.slurm.gpu.type=amd
# or
ceems_exporter --collector.slurm.gpu.type=nvidia
ceems_exporter --collector.slurm --collector.slurm.gpu.type=nvidia
```

In order to disable default collectors, we need to add `no` prefix to the collector flag.
Expand Down

0 comments on commit 3b9c5a5

Please sign in to comment.