Skip to content

Commit

Permalink
Merge pull request galaxyproject#17169 from natefoo/cgroupsv2
Browse files Browse the repository at this point in the history
[23.2] Add support for Cgroupsv2
  • Loading branch information
natefoo authored Dec 14, 2023
2 parents 1fd9cf3 + c650007 commit faaff5e
Show file tree
Hide file tree
Showing 3 changed files with 239 additions and 20 deletions.
83 changes: 66 additions & 17 deletions lib/galaxy/job_metrics/instrumenters/cgroup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""The module describes the ``cgroup`` job metrics plugin."""
import decimal
import logging
import numbers
from collections import namedtuple
Expand All @@ -17,7 +18,27 @@

log = logging.getLogger(__name__)

VALID_VERSIONS = ("auto", "1", "2")
DEFAULT_PARAMS = (
# cgroupsv1 - this is probably more params than are useful to collect, but don't remove any for legacy reasons
"memory.memsw.max_usage_in_bytes",
"memory.max_usage_in_bytes",
"memory.limit_in_bytes",
"memory.memsw.limit_in_bytes",
"memory.soft_limit_in_bytes",
"memory.failcnt",
"memory.oom_control.oom_kill_disable",
"memory.oom_control.under_oom",
"cpuacct.usage",
# cgroupsv2
"memory.events.oom_kill",
"memory.peak",
"cpu.stat.system_usec",
"cpu.stat.usage_usec",
"cpu.stat.user_usec",
)
TITLES = {
# cgroupsv1
"memory.memsw.max_usage_in_bytes": "Max memory usage (MEM+SWP)",
"memory.max_usage_in_bytes": "Max memory usage (MEM)",
"memory.limit_in_bytes": "Memory limit on cgroup (MEM)",
Expand All @@ -27,14 +48,35 @@
"memory.oom_control.oom_kill_disable": "OOM Control enabled",
"memory.oom_control.under_oom": "Was OOM Killer active?",
"cpuacct.usage": "CPU Time",
# cgroupsv2
"memory.events.low": "Number of times the cgroup was reclaimed due to high memory pressure even though its usage is under the low "
"boundary",
"memory.events.high": "Number of times processes of the cgroup were throttled and routed to perform direct memory reclaim because "
"the high memory boundary was exceeded",
"memory.events.max": "Number of times the cgroup's memory usage was about to go over the max boundary",
"memory.events.oom": "Number of time the cgroup's memory usage reached the limit and allocation was about to fail",
"memory.events.oom_kill": "Number of processes belonging to this cgroup killed by any kind of OOM killer",
"memory.events.oom_group_kill": "Number of times a group OOM has occurred",
"memory.high": "Memory usage throttle limit",
"memory.low": "Best-effort memory protection",
"memory.max": "Memory usage hard limit",
"memory.min": "Hard memory protection",
"memory.peak": "Max memory usage recorded",
"cpu.stat.system_usec": "CPU system time",
"cpu.stat.usage_usec": "CPU usage time",
"cpu.stat.user_usec": "CPU user time",
}
CONVERSION = {
"memory.oom_control.oom_kill_disable": lambda x: "No" if x == 1 else "Yes",
"memory.oom_control.under_oom": lambda x: "Yes" if x == 1 else "No",
"memory.peak": lambda x: nice_size(x),
"cpuacct.usage": lambda x: formatting.seconds_to_str(x / 10**9), # convert nanoseconds
"cpu.stat.system_usec": lambda x: formatting.seconds_to_str(x / 10**6), # convert microseconds
"cpu.stat.usage_usec": lambda x: formatting.seconds_to_str(x / 10**6), # convert microseconds
"cpu.stat.user_usec": lambda x: formatting.seconds_to_str(x / 10**6), # convert microseconds
}
CPU_USAGE_TEMPLATE = r"""
if [ -e "/proc/$$/cgroup" -a -d "{cgroup_mount}" ]; then
CGROUPSV1_TEMPLATE = r"""
if [ -e "/proc/$$/cgroup" -a -d "{cgroup_mount}" -a ! -f "{cgroup_mount}/cgroup.controllers" ]; then
cgroup_path=$(cat "/proc/$$/cgroup" | awk -F':' '($2=="cpuacct,cpu") || ($2=="cpu,cpuacct") {{print $3}}');
if [ ! -e "{cgroup_mount}/cpu$cgroup_path/cpuacct.usage" ]; then
cgroup_path="";
Expand All @@ -44,12 +86,6 @@
echo "__$(basename $f)__" >> {metrics}; cat "$f" >> {metrics} 2>/dev/null;
fi;
done;
fi
""".replace(
"\n", " "
).strip()
MEMORY_USAGE_TEMPLATE = """
if [ -e "/proc/$$/cgroup" -a -d "{cgroup_mount}" ]; then
cgroup_path=$(cat "/proc/$$/cgroup" | awk -F':' '$2=="memory"{{print $3}}');
if [ ! -e "{cgroup_mount}/memory$cgroup_path/memory.max_usage_in_bytes" ]; then
cgroup_path="";
Expand All @@ -61,6 +97,16 @@
""".replace(
"\n", " "
).strip()
CGROUPSV2_TEMPLATE = r"""
if [ -e "/proc/$$/cgroup" -a -f "{cgroup_mount}/cgroup.controllers" ]; then
cgroup_path=$(cat "/proc/$$/cgroup" | awk -F':' '($1=="0") {{print $3}}');
for f in {cgroup_mount}/${{cgroup_path}}/{{cpu,memory}}.*; do
echo "__$(basename $f)__" >> {metrics}; cat "$f" >> {metrics} 2>/dev/null;
done;
fi
""".replace(
"\n", " "
).strip()


Metric = namedtuple("Metric", ("key", "subkey", "value"))
Expand All @@ -76,7 +122,7 @@ def format(self, key, value):
return title, nice_size(value)
except ValueError:
pass
elif isinstance(value, (numbers.Integral, numbers.Real)) and value == int(value):
elif isinstance(value, (decimal.Decimal, numbers.Integral, numbers.Real)) and value == int(value):
value = int(value)
return title, value

Expand All @@ -90,33 +136,36 @@ class CgroupPlugin(InstrumentPlugin):
def __init__(self, **kwargs):
self.verbose = asbool(kwargs.get("verbose", False))
self.cgroup_mount = kwargs.get("cgroup_mount", "/sys/fs/cgroup")
self.version = str(kwargs.get("version", "auto"))
assert self.version in VALID_VERSIONS, f"cgroup metric version option must be one of {VALID_VERSIONS}"
params_str = kwargs.get("params", None)
if isinstance(params_str, list):
params = params_str
elif params_str:
params = [v.strip() for v in params_str.split(",")]
else:
params = list(TITLES.keys())
params = list(DEFAULT_PARAMS)
self.params = params

def post_execute_instrument(self, job_directory: str) -> List[str]:
commands: List[str] = []
commands.append(self.__record_cgroup_cpu_usage(job_directory))
commands.append(self.__record_cgroup_memory_usage(job_directory))
if self.version in ("auto", "1"):
commands.append(self.__record_cgroup_v1_usage(job_directory))
if self.version in ("auto", "2"):
commands.append(self.__record_cgroup_v2_usage(job_directory))
return commands

def job_properties(self, job_id, job_directory: str) -> Dict[str, Any]:
metrics = self.__read_metrics(self.__cgroup_metrics_file(job_directory))
return metrics

def __record_cgroup_cpu_usage(self, job_directory: str) -> str:
# comounted cgroups (which cpu and cpuacct are on the supported Linux distros) can appear in any order (cpu,cpuacct or cpuacct,cpu)
return CPU_USAGE_TEMPLATE.format(
def __record_cgroup_v1_usage(self, job_directory: str) -> str:
return CGROUPSV1_TEMPLATE.format(
metrics=self.__cgroup_metrics_file(job_directory), cgroup_mount=self.cgroup_mount
)

def __record_cgroup_memory_usage(self, job_directory: str) -> str:
return MEMORY_USAGE_TEMPLATE.format(
def __record_cgroup_v2_usage(self, job_directory: str) -> str:
return CGROUPSV2_TEMPLATE.format(
metrics=self.__cgroup_metrics_file(job_directory), cgroup_mount=self.cgroup_mount
)

Expand Down
162 changes: 159 additions & 3 deletions test/unit/job_metrics/test_cgroups.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from galaxy.job_metrics.instrumenters.cgroup import CgroupPlugin

CGROUP_PRODUCTION_EXAMPLE_2201 = """__cpu.cfs_period_us__
CGROUPV1_PRODUCTION_EXAMPLE_2201 = """__cpu.cfs_period_us__
100000
__cpu.cfs_quota_us__
-1
Expand Down Expand Up @@ -105,18 +105,174 @@
1
"""

CGROUPV2_PRODUCTION_EXAMPLE_232 = """__cpu.idle__
0
__cpu.max__
max 100000
__cpu.max.burst__
0
__cpu.stat__
usage_usec 8992210
user_usec 6139150
system_usec 2853059
core_sched.force_idle_usec 0
nr_periods 0
nr_throttled 0
throttled_usec 0
nr_bursts 0
burst_usec 0
__cpu.weight__
100
__cpu.weight.nice__
0
__memory.current__
139350016
__memory.events__
low 0
high 0
max 0
oom 0
oom_kill 0
oom_group_kill 0
__memory.events.local__
low 0
high 0
max 0
oom 0
oom_kill 0
oom_group_kill 0
__memory.high__
max
__memory.low__
0
__memory.max__
max
__memory.min__
0
__memory.numa_stat__
anon N0=864256
file N0=129146880
kernel_stack N0=32768
pagetables N0=131072
sec_pagetables N0=0
shmem N0=0
file_mapped N0=0
file_dirty N0=0
file_writeback N0=0
swapcached N0=0
anon_thp N0=0
file_thp N0=0
shmem_thp N0=0
inactive_anon N0=819200
active_anon N0=20480
inactive_file N0=51507200
active_file N0=77639680
unevictable N0=0
slab_reclaimable N0=8638552
slab_unreclaimable N0=340136
workingset_refault_anon N0=0
workingset_refault_file N0=77
workingset_activate_anon N0=0
workingset_activate_file N0=0
workingset_restore_anon N0=0
workingset_restore_file N0=0
workingset_nodereclaim N0=0
__memory.oom.group__
0
__memory.peak__
339906560
__memory.reclaim__
__memory.stat__
anon 860160
file 129146880
kernel 9211904
kernel_stack 32768
pagetables 126976
sec_pagetables 0
percpu 0
sock 0
vmalloc 0
shmem 0
zswap 0
zswapped 0
file_mapped 0
file_dirty 0
file_writeback 0
swapcached 0
anon_thp 0
file_thp 0
shmem_thp 0
inactive_anon 815104
active_anon 20480
inactive_file 51507200
active_file 77639680
unevictable 0
slab_reclaimable 8642480
slab_unreclaimable 340904
slab 8983384
workingset_refault_anon 0
workingset_refault_file 77
workingset_activate_anon 0
workingset_activate_file 0
workingset_restore_anon 0
workingset_restore_file 0
workingset_nodereclaim 0
pgscan 0
pgsteal 0
pgscan_kswapd 0
pgscan_direct 0
pgsteal_kswapd 0
pgsteal_direct 0
pgfault 132306
pgmajfault 524
pgrefill 0
pgactivate 18958
pgdeactivate 0
pglazyfree 0
pglazyfreed 0
zswpin 0
zswpout 0
thp_fault_alloc 19
thp_collapse_alloc 0
__memory.swap.current__
0
__memory.swap.events__
high 0
max 0
fail 0
__memory.swap.high__
max
__memory.swap.max__
max
__memory.zswap.current__
0
__memory.zswap.max__
max
"""


def test_cgroup_collection(tmpdir):
def test_cgroupv1_collection(tmpdir):
plugin = CgroupPlugin()
job_dir = tmpdir.mkdir("job")
job_dir.join("__instrument_cgroup__metrics").write(CGROUP_PRODUCTION_EXAMPLE_2201)
job_dir.join("__instrument_cgroup__metrics").write(CGROUPV1_PRODUCTION_EXAMPLE_2201)
properties = plugin.job_properties(1, job_dir)
assert "cpuacct.usage" in properties
assert properties["cpuacct.usage"] == 7265342042
assert "memory.limit_in_bytes" in properties
assert properties["memory.limit_in_bytes"] == 9223372036854771712


def test_cgroupv2_collection(tmpdir):
plugin = CgroupPlugin()
job_dir = tmpdir.mkdir("job")
job_dir.join("__instrument_cgroup__metrics").write(CGROUPV2_PRODUCTION_EXAMPLE_232)
properties = plugin.job_properties(1, job_dir)
assert "cpu.stat.usage_usec" in properties
assert properties["cpu.stat.usage_usec"] == 8992210
assert "memory.peak" in properties
assert properties["memory.peak"] == 339906560


def test_instrumentation(tmpdir):
# don't actually run the instrumentation but at least exercise the code the and make
# sure templating includes cgroup_mount override.
Expand Down
14 changes: 14 additions & 0 deletions test/unit/job_metrics/test_job_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,20 @@ def test_job_metrics_format_cgroup():
assert_title="Memory limit on cgroup (MEM)",
assert_value="8.0 EB",
)
_assert_format(
"cgroup",
"cpu.stat.usage_usec",
7982357892.000000,
assert_title="CPU usage time",
assert_value="2.0 hours and 13.0 minutes",
)
_assert_format(
"cgroup",
"memory.peak",
45097156608,
assert_title="Max memory usage recorded",
assert_value="42.0 GB",
)


def test_job_metrics_uname():
Expand Down

0 comments on commit faaff5e

Please sign in to comment.