diff --git a/README.md b/README.md index 02f556047..6171e26b1 100644 --- a/README.md +++ b/README.md @@ -57,8 +57,11 @@ Results from multiple runs can be combined into CSV and interactive HTML tables, of which the latter provide scatter and quantile plots (have a look at our [demo table](https://sosy-lab.github.io/benchexec/example-table/svcomp-simple-cbmc-cpachecker.table.html)). -BenchExec works only on Linux and needs a one-time setup of cgroups by the machine's administrator. -The actual benchmarking can be done by any user and does not need root access. +On modern Linux systems (e.g., Debian 11, Ubuntu 22.04, etc.), +BenchExec works out-of-the box and without the need for root access, +not even for installation. +On older Linux systems, a one-time setup of cgroups by the machine's administrator may be needed. +The actual benchmarking can always be done by any user and does not need root access. BenchExec was originally developed for use with the software verification framework [CPAchecker](https://cpachecker.sosy-lab.org) @@ -103,6 +106,7 @@ Contributors: - [Montgomery Carter](https://github.com/MontyCarter) - [Andreas Donig](https://github.com/adonig) - [Karlheinz Friedberger](https://www.sosy-lab.org/people/friedberger) +- [Robin Gloster](https://github.com/globin) - Peter Häring - [Florian Heck](https://github.com/fheck) - [Hugo](https://github.com/hugovk) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index f3d59ccd4..d27e483d3 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -5,104 +5,18 @@ # # SPDX-License-Identifier: Apache-2.0 +from abc import ABC, abstractmethod import errno -import grp import logging import os -import shutil -import signal import stat -import sys -import tempfile -import time -from benchexec import BenchExecException -from benchexec import systeminfo from benchexec import util -__all__ = [ - "find_my_cgroups", - "BLKIO", - "CPUACCT", - "CPUSET", - "FREEZER", - "MEMORY", -] - -CGROUP_FALLBACK_PATH = "system.slice/benchexec-cgroup.service" -"""If we do not have write access to the current cgroup, -attempt to use this cgroup as fallback.""" - -CGROUP_NAME_PREFIX = "benchmark_" - -BLKIO = "blkio" -CPUACCT = "cpuacct" -CPUSET = "cpuset" -FREEZER = "freezer" -MEMORY = "memory" -ALL_KNOWN_SUBSYSTEMS = { - # cgroups for BenchExec - BLKIO, - CPUACCT, - CPUSET, - FREEZER, - MEMORY, - # other cgroups users might want - "cpu", - "devices", - "net_cls", - "net_prio", - "hugetlb", - "perf_event", - "pids", -} CGROUPS_V1 = 1 CGROUPS_V2 = 2 -_PERMISSION_HINT_GROUPS = """ -You need to add your account to the following groups: {0} -Remember to logout and login again afterwards to make group changes effective.""" - -_PERMISSION_HINT_DEBIAN = """ -The recommended way to fix this is to install the Debian package for BenchExec and add your account to the group "benchexec": -https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#debianubuntu -Alternatively, you can install benchexec-cgroup.service manually: -https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" - -_PERMISSION_HINT_SYSTEMD = """ -The recommended way to fix this is to add your account to a group named "benchexec" and install benchexec-cgroup.service: -https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" - -_PERMISSION_HINT_OTHER = """ -Please configure your system in way to allow your user to use cgroups: -https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-without-systemd""" - -_ERROR_MSG_PERMISSIONS = """ -Required cgroups are not available because of missing permissions.{0} - -As a temporary workaround, you can also run -"sudo chmod o+wt {1}" -Note that this will grant permissions to more users than typically desired and it will only last until the next reboot.""" - -_ERROR_MSG_CGROUPS_V2 = """ -Required cgroups are not available because this system is using cgroupsv2 exclusively. - -This version of BenchExec does not yet support cgroupsv2. -Please check at https://github.com/sosy-lab/benchexec/issues/133 -whether a new version of BenchExec with support for cgroupsv2 is available -and update if applicable. - -Alternatively, you could try switching back to cgroupsv1 -with the kernel command-line parameter systemd.unified_cgroup_hierarchy=0 -or use BenchExec without the features that need cgroups -(i.e., disable cpu-time limit, memory limit, and core limit). -""" - -_ERROR_MSG_OTHER = """ -Required cgroups are not available. -If you are using BenchExec within a container, please make "/sys/fs/cgroup" available.""" - def _get_cgroup_version(): version = None @@ -117,205 +31,99 @@ def _get_cgroup_version(): # we don't support crippled hybrid mode elif mount[2] == "cgroup2" and version != CGROUPS_V1: version = CGROUPS_V2 - - if version is None: - raise BenchExecException("Could not detect Cgroup Version") except OSError: logging.exception("Cannot read /proc/mounts") return version -def find_my_cgroups(cgroup_paths=None, fallback=True): +class Cgroups(ABC): """ - Return a Cgroup object with the cgroups of the current process. - Note that it is not guaranteed that all subsystems are available - in the returned object, as a subsystem may not be mounted. - Check with "subsystem in " before using. - A subsystem may also be present but we do not have the rights to create - child cgroups, this can be checked with require_subsystem(). - @param cgroup_paths: If given, use this instead of reading /proc/self/cgroup. - @param fallback: Whether to look for a default cgroup as fallback is our cgroup - is not accessible. + A representation of a cgroup that attempts to abstract away the differences + between cgroups v1 and v2. + The typical way to get a usable instance is to call initialize(). """ - logging.debug( - "Analyzing /proc/mounts and /proc/self/cgroup for determining cgroups." - ) - if cgroup_paths is None: - my_cgroups = dict(_find_own_cgroups()) - else: - my_cgroups = dict(_parse_proc_pid_cgroup(cgroup_paths)) - - cgroupsParents = {} - for subsystem, mount in _find_cgroup_mounts(): - # Ignore mount points where we do not have any access, - # e.g. because a parent directory has insufficient permissions - # (lxcfs mounts cgroups under /run/lxcfs in such a way). - if os.access(mount, os.F_OK): - cgroupPath = os.path.join(mount, my_cgroups[subsystem]) - fallbackPath = os.path.join(mount, CGROUP_FALLBACK_PATH) - if ( - fallback - and not os.access(cgroupPath, os.W_OK) - and os.path.isdir(fallbackPath) - ): - cgroupPath = fallbackPath - cgroupsParents[subsystem] = cgroupPath - - return Cgroup(cgroupsParents) - - -def _find_cgroup_mounts(): - """ - Return the information which subsystems are mounted where. - @return a generator of tuples (subsystem, mountpoint) - """ - try: - with open("/proc/mounts", "rt") as mountsFile: - for mount in mountsFile: - mount = mount.split(" ") - if mount[2] == "cgroup": - mountpoint = mount[1] - options = mount[3] - for option in options.split(","): - if option in ALL_KNOWN_SUBSYSTEMS: - yield (option, mountpoint) - except OSError: - logging.exception("Cannot read /proc/mounts") + @staticmethod + def initialize(allowed_versions=None): + """ + Try to find or create a usable cgroup and return a Cgroups instance + that represents it. + + Calling this method may have an effect on the cgroup of the current process, + e.g., it may be moved to a different cgroup. + This will likely cause problems if other non-BenchExec components + are also using cgroups in the same process. + Even though it may change the cgroup state of the process, + this method is safe to call more than once and it is expected that later calls + do not produce further changes. + + The returned cgroup may or may not have child cgroups + and the current process may or may not be contained in the returned cgroup + or one of its children. + + This method cannot guarantee that a usable cgroup is found, + but it will always return a Cgroups instance. + Call require_subsystem() on it in order to find out which subsystems (if any) + are usable. + + Typically, callers should use the returned cgroup instance only for creating + child cgroups and not call any other modifying method such as add_task(). + + @param allowed_versions: None, or a sequence of allowed cgroup versions (1 or 2). + If the current system uses a different cgroup version, no attempt at + returning a usable Cgroups instance is made. + """ + version = _get_cgroup_version() + if allowed_versions is not None and version not in allowed_versions: + return Cgroups.dummy() -def _find_own_cgroups(): - """ - For all subsystems, return the information in which (sub-)cgroup this process is in. - (Each process is in exactly cgroup in each hierarchy.) - @return a generator of tuples (subsystem, cgroup) - """ - try: - with open("/proc/self/cgroup", "rt") as ownCgroupsFile: - for cgroup in _parse_proc_pid_cgroup(ownCgroupsFile): - yield cgroup - except OSError: - logging.exception("Cannot read /proc/self/cgroup") + if version == CGROUPS_V1: + from .cgroupsv1 import CgroupsV1 + return CgroupsV1.from_system() -def _parse_proc_pid_cgroup(content): - """ - Parse a /proc/*/cgroup file into tuples of (subsystem,cgroup). - @param content: An iterable over the lines of the file. - @return: a generator of tuples - """ - for ownCgroup in content: - # each line is "id:subsystem,subsystem:path" - ownCgroup = ownCgroup.strip().split(":") - try: - path = ownCgroup[2][1:] # remove leading / - except IndexError: - raise IndexError(f"index out of range for {ownCgroup}") - for subsystem in ownCgroup[1].split(","): - yield (subsystem, path) + elif version == CGROUPS_V2: + from .cgroupsv2 import initialize + return initialize() -def _force_open_read(filename): - """ - Open a file for reading even if we have no read permission, - as long as we can grant it to us. - """ - try: - return open(filename, "rt") - except OSError: - os.chmod(filename, stat.S_IRUSR) - return open(filename, "rt") - - -def kill_all_tasks_in_cgroup(cgroup): - tasksFile = os.path.join(cgroup, "tasks") - - i = 0 - while True: - i += 1 - # TODO We can probably remove this loop over signals and just send - # SIGKILL. We added this loop when killing sub-processes was not reliable - # and we did not know why, but now it is reliable. - for sig in [signal.SIGKILL, signal.SIGINT, signal.SIGTERM]: - with _force_open_read(tasksFile) as tasks: - task = None - for task in tasks: - task = task.strip() - if i > 1: - logging.warning( - "Run has left-over process with pid %s " - "in cgroup %s, sending signal %s (try %s).", - task, - cgroup, - sig, - i, - ) - util.kill_process(int(task), sig) - - if task is None: - return # No process was hanging, exit - # wait for the process to exit, this might take some time - time.sleep(i * 0.5) - - -def remove_cgroup(cgroup): - if not os.path.exists(cgroup): - logging.warning("Cannot remove CGroup %s, because it does not exist.", cgroup) - return - assert util.read_file(cgroup, "tasks") == "", "Cgroup not yet empty" - try: - os.rmdir(cgroup) - except OSError: - # sometimes this fails because the cgroup is still busy, we try again once - try: - os.chmod(os.path.basename(cgroup), stat.S_IWUSR) - os.rmdir(cgroup) - except OSError as e: - if e.errno != errno.ENOENT: - logging.warning( - "Failed to remove cgroup %s: error %s (%s)", - cgroup, - e.errno, - e.strerror, - ) + return Cgroups.dummy() + @staticmethod + def from_system(cgroup_procinfo=None): + """ + Create a cgroups instance representing the current cgroup of the process. -def _register_process_with_cgrulesengd(pid): - """Tell cgrulesengd daemon to not move the given process into other cgroups, - if libcgroup is available. - """ - # Logging/printing from inside preexec_fn would end up in the output file, - # not in the correct logger, thus it is disabled here. - from ctypes import cdll + @param cgroup_procinfo: Optional, if given use this instead of /proc/self/cgroup + """ + version = _get_cgroup_version() + if version == CGROUPS_V1: + from .cgroupsv1 import CgroupsV1 - try: - libcgroup = cdll.LoadLibrary("libcgroup.so.1") - failure = libcgroup.cgroup_init() - if failure: - pass - else: - CGROUP_DAEMON_UNCHANGE_CHILDREN = 0x1 - failure = libcgroup.cgroup_register_unchanged_process( - pid, CGROUP_DAEMON_UNCHANGE_CHILDREN - ) - if failure: - pass - # print(f'Could not register process to cgrulesndg, error {success}. ' - # 'Probably the daemon will mess up our cgroups.') - except OSError: - pass + return CgroupsV1.from_system(cgroup_procinfo, fallback=False) + elif version == CGROUPS_V2: + from .cgroupsv2 import CgroupsV2 + return CgroupsV2.from_system(cgroup_procinfo) + + return Cgroups.dummy() + + @staticmethod + def dummy(): + return _DummyCgroups({}) + + def __init__(self, subsystems): + self.subsystems = subsystems + + assert all(self.subsystems.values()) + + self.paths = set(self.subsystems.values()) # without duplicates + + logging.debug("Available Cgroups: %s", self.subsystems) -class Cgroup(object): - def __init__(self, cgroupsPerSubsystem): - assert set(cgroupsPerSubsystem.keys()) <= ALL_KNOWN_SUBSYSTEMS - assert all(cgroupsPerSubsystem.values()) - # Also update self.paths on every update to this! - self.subsystems = cgroupsPerSubsystem - self.paths = set(cgroupsPerSubsystem.values()) # without duplicates # for error messages: self.unusable_subsystems = set() - self.denied_subsystems = {} def __contains__(self, key): return key in self.subsystems @@ -340,31 +148,14 @@ def require_subsystem(self, subsystem, log_method=logging.warning): self.unusable_subsystems.add(subsystem) log_method( "Cgroup subsystem %s is not available. " - "Please make sure it is supported by your kernel and mounted.", + "Please make sure it is supported by your kernel and available.", subsystem, ) return False - try: - test_cgroup = self.create_fresh_child_cgroup(subsystem) - test_cgroup.remove() - except OSError as e: - log_method( - "Cannot use cgroup %s for subsystem %s, reason: %s (%s).", - self.subsystems[subsystem], - subsystem, - e.strerror, - e.errno, - ) - self.unusable_subsystems.add(subsystem) - if e.errno == errno.EACCES: - self.denied_subsystems[subsystem] = self.subsystems[subsystem] - del self.subsystems[subsystem] - self.paths = set(self.subsystems.values()) - return False - return True + @abstractmethod def handle_errors(self, critical_cgroups): """ If there were errors in calls to require_subsystem() and critical_cgroups @@ -373,185 +164,19 @@ def handle_errors(self, critical_cgroups): @param critical_cgroups: set of unusable but required cgroups """ - if not critical_cgroups: - return - assert critical_cgroups.issubset(self.unusable_subsystems) - - if critical_cgroups.issubset(self.denied_subsystems): - # All errors were because of permissions for these directories - paths = sorted(set(self.denied_subsystems.values())) - - # Check if all cgroups have group permissions and user could just be added - # to some groups to get access. But group 0 (root) of course does not count. - groups = {} - try: - if all(stat.S_IWGRP & os.stat(path).st_mode for path in paths): - groups = {os.stat(path).st_gid for path in paths} - except OSError: - pass - if groups and 0 not in groups: - - def get_group_name(gid): - try: - name = grp.getgrgid(gid).gr_name - except KeyError: - name = None - return util.escape_string_shell(name or str(gid)) - - groups = " ".join(sorted(set(map(get_group_name, groups)))) - permission_hint = _PERMISSION_HINT_GROUPS.format(groups) - - elif systeminfo.has_systemd(): - if systeminfo.is_debian(): - permission_hint = _PERMISSION_HINT_DEBIAN - else: - permission_hint = _PERMISSION_HINT_SYSTEMD - - else: - permission_hint = _PERMISSION_HINT_OTHER - - paths = " ".join(map(util.escape_string_shell, paths)) - sys.exit(_ERROR_MSG_PERMISSIONS.format(permission_hint, paths)) - - elif _get_cgroup_version() == CGROUPS_V2: - sys.exit(_ERROR_MSG_CGROUPS_V2) - else: - sys.exit(_ERROR_MSG_OTHER) # e.g., subsystem not mounted - - def create_fresh_child_cgroup(self, *subsystems): - """ - Create child cgroups of the current cgroup for at least the given subsystems. - @return: A Cgroup instance representing the new child cgroup(s). - """ - assert set(subsystems).issubset(self.subsystems.keys()) - createdCgroupsPerSubsystem = {} - createdCgroupsPerParent = {} - for subsystem in subsystems: - parentCgroup = self.subsystems[subsystem] - if parentCgroup in createdCgroupsPerParent: - # reuse already created cgroup - createdCgroupsPerSubsystem[subsystem] = createdCgroupsPerParent[ - parentCgroup - ] - continue - - cgroup = tempfile.mkdtemp(prefix=CGROUP_NAME_PREFIX, dir=parentCgroup) - createdCgroupsPerSubsystem[subsystem] = cgroup - createdCgroupsPerParent[parentCgroup] = cgroup - - # add allowed cpus and memory to cgroup if necessary - # (otherwise we can't add any tasks) - def copy_parent_to_child(name): - shutil.copyfile( - os.path.join(parentCgroup, name), # noqa: B023 - os.path.join(cgroup, name), # noqa: B023 - ) - - try: - copy_parent_to_child("cpuset.cpus") - copy_parent_to_child("cpuset.mems") - except OSError: - # expected to fail if cpuset subsystem is not enabled in this hierarchy - pass + pass - return Cgroup(createdCgroupsPerSubsystem) + @abstractmethod + def create_fresh_child_cgroup(self, subsystems): + pass + @abstractmethod def add_task(self, pid): - """ - Add a process to the cgroups represented by this instance. - """ - _register_process_with_cgrulesengd(pid) - for cgroup in self.paths: - with open(os.path.join(cgroup, "tasks"), "w") as tasksFile: - tasksFile.write(str(pid)) - - def get_all_tasks(self, subsystem): - """ - Return a generator of all PIDs currently in this cgroup for the given subsystem. - """ - with open(os.path.join(self.subsystems[subsystem], "tasks"), "r") as tasksFile: - for line in tasksFile: - yield int(line) + pass + @abstractmethod def kill_all_tasks(self): - """ - Kill all tasks in this cgroup and all its children cgroups forcefully. - Additionally, the children cgroups will be deleted. - """ - # In this method we should attempt to guard against child cgroups - # that have been created and manipulated by processes in the run. - # For example, they could have removed permissions from files and directories. - - def recursive_child_cgroups(cgroup): - def raise_error(e): - raise e - - try: - for dirpath, dirs, _files in os.walk( - cgroup, topdown=False, onerror=raise_error - ): - for subCgroup in dirs: - yield os.path.join(dirpath, subCgroup) - except OSError as e: - # some process might have made a child cgroup inaccessible - os.chmod(e.filename, stat.S_IRUSR | stat.S_IXUSR) - # restart, which might yield already yielded cgroups again, - # but this is ok for the callers of recursive_child_cgroups() - yield from recursive_child_cgroups(cgroup) - - def try_unfreeze(cgroup): - try: - util.write_file("THAWED", cgroup, "freezer.state", force=True) - except OSError: - # With force=True this fails only if we are not owner, but then there is - # nothing we can do. But the processes inside the run cannot change the - # owner, so this should not happen. - pass - - # First, we go through all cgroups recursively while they are frozen and kill - # all processes. This helps against fork bombs and prevents processes from - # creating new subgroups while we are trying to kill everything. - # But this is only possible if we have freezer, and all processes will stay - # until they are thawed (so we cannot check for cgroup emptiness and we cannot - # delete subgroups). - if FREEZER in self.subsystems: - cgroup = self.subsystems[FREEZER] - util.write_file("FROZEN", cgroup, "freezer.state", force=True) - - for child_cgroup in recursive_child_cgroups(cgroup): - with _force_open_read(os.path.join(child_cgroup, "tasks")) as tasks: - for task in tasks: - util.kill_process(int(task)) - - # This cgroup could be frozen, which would prevent processes from being - # killed and would lead to an endless loop below. cf. - # https://github.com/sosy-lab/benchexec/issues/840 - try_unfreeze(child_cgroup) - - util.write_file("THAWED", cgroup, "freezer.state", force=True) - - # Second, we go through all cgroups again, kill what is left, - # check for emptiness, and remove subgroups. - # Furthermore, we do this for all hierarchies, not only the one with freezer. - for cgroup in self.paths: - # Sometimes nested cgroups vanish while we iterate over them. - # Not sure why because the freezing above should prevent any process - # from still being alive, but maybe we are iterating here already - # while the kernel is still doing some cleanup. So in order to prevent - # crashes we handle this. - while True: - try: - for child_cgroup in recursive_child_cgroups(cgroup): - kill_all_tasks_in_cgroup(child_cgroup) - remove_cgroup(child_cgroup) - break - except FileNotFoundError as e: - logging.debug( - "Cgroup vanished while we were trying to clean it up: %s", e - ) - continue - - kill_all_tasks_in_cgroup(cgroup) + pass def has_value(self, subsystem, option): """ @@ -614,23 +239,175 @@ def remove(self): This instance is afterwards not usable anymore! """ for cgroup in self.paths: - remove_cgroup(cgroup) + self._remove_cgroup(cgroup) del self.paths del self.subsystems + def _remove_cgroup(self, path): + if not os.path.exists(path): + logging.warning("Cannot remove CGroup %s, because it does not exist.", path) + return + assert not self._has_tasks(path) + try: + os.rmdir(path) + except OSError: + # sometimes this fails because the cgroup is still busy, we try again once + try: + os.chmod(os.path.basename(path), stat.S_IWUSR) + os.rmdir(path) + except OSError as e: + if e.errno != errno.ENOENT: + logging.warning( + "Failed to remove cgroup %s: error %s (%s)", + path, + e.errno, + e.strerror, + ) + + @abstractmethod def read_cputime(self): """ - Read the cputime usage of this cgroup. CPUACCT cgroup needs to be available. + Read the cputime usage of this cgroup. CPU cgroup needs to be available. @return cputime usage in seconds """ - # convert nano-seconds to seconds - return float(self.get_value(CPUACCT, "usage")) / 1_000_000_000 + pass + + @abstractmethod + def read_max_mem_usage(self): + pass + + @abstractmethod + def read_mem_pressure(self): + pass + + @abstractmethod + def read_cpu_pressure(self): + pass + @abstractmethod + def read_io_pressure(self): + pass + + @abstractmethod + def read_usage_per_cpu(self): + pass + + @abstractmethod def read_allowed_cpus(self): """Get the list of all CPU cores allowed by this cgroup.""" - return util.parse_int_list(self.get_value(CPUSET, "cpus")) + pass + @abstractmethod def read_allowed_memory_banks(self): """Get the list of all memory banks allowed by this cgroup.""" - return util.parse_int_list(self.get_value(CPUSET, "mems")) + pass + + @abstractmethod + def read_io_stat(self): + pass + + @abstractmethod + def _has_tasks(self, path): + pass + + @abstractmethod + def write_memory_limit(self, limit): + pass + + @abstractmethod + def read_memory_limit(self): + pass + + @abstractmethod + def read_hierarchical_memory_limit(self): + """Read the memory limit that applies to the current cgroup or any parent.""" + pass + + @abstractmethod + def read_oom_kill_count(self): + pass + + @abstractmethod + def can_limit_swap(self): + """Check wether cgroups can be used to limit swap usage.""" + pass + + @abstractmethod + def disable_swap(self): + pass + + +class _DummyCgroups(Cgroups): + version = 0 + IO = "io" + CPU = "cpu" + CPUSET = "cpuset" + FREEZE = "freezer" + MEMORY = "memory" + + def add_task(self, pid): + pass + + def kill_all_tasks(self): + pass + + def create_fresh_child_cgroup(self, subsystems): + return self + + def create_fresh_child_cgroup_for_delegation(self): + return self + + def handle_errors(self, critical_cgroups): + pass + + def read_cputime(self): + pass + + def read_max_mem_usage(self): + pass + + def read_mem_pressure(self): + pass + + def read_cpu_pressure(self): + pass + + def read_io_pressure(self): + pass + + def read_usage_per_cpu(self): + pass + + def read_allowed_cpus(self): + pass + + def read_allowed_memory_banks(self): + pass + + def read_io_stat(self): + pass + + def _has_tasks(self, path): + pass + + def has_tasks(self): + pass + + def write_memory_limit(self, limit): + pass + + def read_memory_limit(self): + pass + + def read_hierarchical_memory_limit(self): + pass + + def read_oom_kill_count(self): + pass + + def can_limit_swap(self): + pass + + def disable_swap(self): + pass diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py new file mode 100644 index 000000000..f6273f8cd --- /dev/null +++ b/benchexec/cgroupsv1.py @@ -0,0 +1,608 @@ +# This file is part of BenchExec, a framework for reliable benchmarking: +# https://github.com/sosy-lab/benchexec +# +# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer +# +# SPDX-License-Identifier: Apache-2.0 + +import errno +import grp +import logging +import os +import shutil +import signal +import stat +import sys +import tempfile +import time + +from benchexec import systeminfo +from benchexec import util +from benchexec.cgroups import Cgroups + +CGROUP_FALLBACK_PATH = "system.slice/benchexec-cgroup.service" +"""If we do not have write access to the current cgroup, +attempt to use this cgroup as fallback.""" + +CGROUP_NAME_PREFIX = "benchmark_" + +_PERMISSION_HINT_GROUPS = """ +You need to add your account to the following groups: {0} +Remember to logout and login again afterwards to make group changes effective.""" + +_PERMISSION_HINT_DEBIAN = """ +The recommended way to fix this is to install the Debian package for BenchExec and add your account to the group "benchexec": +https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#debianubuntu +Alternatively, you can install benchexec-cgroup.service manually: +https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" + +_PERMISSION_HINT_SYSTEMD = """ +The recommended way to fix this is to add your account to a group named "benchexec" and install benchexec-cgroup.service: +https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" + +_PERMISSION_HINT_OTHER = """ +Please configure your system in way to allow your user to use cgroups: +https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-without-systemd""" + +_ERROR_MSG_PERMISSIONS = """ +Required cgroups are not available because of missing permissions.{0} + +As a temporary workaround, you can also run +"sudo chmod o+wt {1}" +Note that this will grant permissions to more users than typically desired and it will only last until the next reboot.""" + +_ERROR_MSG_OTHER = """ +Required cgroups are not available. +If you are using BenchExec within a container, please make "/sys/fs/cgroup" available.""" + + +def find_my_cgroups(cgroup_paths=None, fallback=True): + """ + Return a dict with the cgroups of the current process. + Note that it is not guaranteed that all subsystems are available + in the returned object, as a subsystem may not be mounted. + Check with "subsystem in " before using. + A subsystem may also be present but we do not have the rights to create + child cgroups, this can be checked with require_subsystem(). + @param cgroup_paths: If given, use this instead of reading /proc/self/cgroup. + @param fallback: Whether to look for a default cgroup as fallback is our cgroup + is not accessible. + """ + logging.debug( + "Analyzing /proc/mounts and /proc/self/cgroup for determining cgroups." + ) + if cgroup_paths is None: + my_cgroups = dict(_find_own_cgroups()) + else: + my_cgroups = dict(_parse_proc_pid_cgroup(cgroup_paths)) + + cgroupsParents = {} + for subsystem, mount in _find_cgroup_mounts(): + # Ignore mount points where we do not have any access, + # e.g. because a parent directory has insufficient permissions + # (lxcfs mounts cgroups under /run/lxcfs in such a way). + if os.access(mount, os.F_OK): + cgroupPath = os.path.join(mount, my_cgroups[subsystem]) + fallbackPath = os.path.join(mount, CGROUP_FALLBACK_PATH) + if ( + fallback + and not os.access(cgroupPath, os.W_OK) + and os.path.isdir(fallbackPath) + ): + cgroupPath = fallbackPath + cgroupsParents[subsystem] = cgroupPath + + return cgroupsParents + + +def _find_cgroup_mounts(): + """ + Return the information which subsystems are mounted where. + @return a generator of tuples (subsystem, mountpoint) + """ + try: + with open("/proc/mounts", "rt") as mountsFile: + for mount in mountsFile: + mount = mount.split(" ") + if mount[2] == "cgroup": + mountpoint = mount[1] + options = mount[3] + for option in options.split(","): + if option in CgroupsV1.known_subsystems: + yield (option, mountpoint) + except OSError: + logging.exception("Cannot read /proc/mounts") + + +def _find_own_cgroups(): + """ + For all subsystems, return the information in which (sub-)cgroup this process is in. + (Each process is in exactly cgroup in each hierarchy.) + @return a generator of tuples (subsystem, cgroup) + """ + try: + with open("/proc/self/cgroup", "rt") as ownCgroupsFile: + for cgroup in _parse_proc_pid_cgroup(ownCgroupsFile): + yield cgroup + except OSError: + logging.exception("Cannot read /proc/self/cgroup") + + +def _parse_proc_pid_cgroup(content): + """ + Parse a /proc/*/cgroup file into tuples of (subsystem,cgroup). + @param content: An iterable over the lines of the file. + @return: a generator of tuples + """ + for ownCgroup in content: + # each line is "id:subsystem,subsystem:path" + ownCgroup = ownCgroup.strip().split(":") + try: + path = ownCgroup[2][1:] # remove leading / + except IndexError: + raise IndexError(f"index out of range for {ownCgroup}") + for subsystem in ownCgroup[1].split(","): + yield (subsystem, path) + + +def _force_open_read(filename): + """ + Open a file for reading even if we have no read permission, + as long as we can grant it to us. + """ + try: + return open(filename, "rt") + except OSError: + os.chmod(filename, stat.S_IRUSR) + return open(filename, "rt") + + +def kill_all_tasks_in_cgroup(cgroup): + tasksFile = os.path.join(cgroup, "tasks") + + i = 0 + while True: + i += 1 + # TODO We can probably remove this loop over signals and just send + # SIGKILL. We added this loop when killing sub-processes was not reliable + # and we did not know why, but now it is reliable. + for sig in [signal.SIGKILL, signal.SIGINT, signal.SIGTERM]: + with _force_open_read(tasksFile) as tasks: + task = None + for task in tasks: + task = task.strip() + if i > 1: + logging.warning( + "Run has left-over process with pid %s " + "in cgroup %s, sending signal %s (try %s).", + task, + cgroup, + sig, + i, + ) + util.kill_process(int(task), sig) + + if task is None: + return # No process was hanging, exit + # wait for the process to exit, this might take some time + time.sleep(i * 0.5) + + +def _register_process_with_cgrulesengd(pid): + """Tell cgrulesengd daemon to not move the given process into other cgroups, + if libcgroup is available. + """ + # Logging/printing from inside preexec_fn would end up in the output file, + # not in the correct logger, thus it is disabled here. + from ctypes import cdll + + try: + libcgroup = cdll.LoadLibrary("libcgroup.so.1") + failure = libcgroup.cgroup_init() + if failure: + pass + else: + CGROUP_DAEMON_UNCHANGE_CHILDREN = 0x1 + failure = libcgroup.cgroup_register_unchanged_process( + pid, CGROUP_DAEMON_UNCHANGE_CHILDREN + ) + if failure: + pass + # print(f'Could not register process to cgrulesndg, error {success}. ' + # 'Probably the daemon will mess up our cgroups.') + except OSError: + pass + + +class CgroupsV1(Cgroups): + version = 1 + + IO = "blkio" + CPU = "cpuacct" + CPUSET = "cpuset" + FREEZE = "freezer" + MEMORY = "memory" + + known_subsystems = { + # cgroups for BenchExec + IO, + CPU, + CPUSET, + FREEZE, + MEMORY, + # other cgroups users might want + "cpu", + "devices", + "net_cls", + "net_prio", + "hugetlb", + "perf_event", + "pids", + } + + def __init__(self, subsystems): + assert set(subsystems.keys()) <= self.known_subsystems + super(CgroupsV1, self).__init__(subsystems) + + # for error messages: + self.denied_subsystems = {} + + @classmethod + def from_system(cls, cgroup_procinfo=None, fallback=True): + """ + Return a Cgroup object with the cgroups of the current process. + Note that it is not guaranteed that all subsystems are available + in the returned object, as a subsystem may not be mounted. + Check with "subsystem in " before using. + A subsystem may also be present but we do not have the rights to create + child cgroups, this can be checked with require_subsystem(). + @param cgroup_procinfo: If given, use this instead of reading /proc/self/cgroup. + @param fallback: Whether to look for a default cgroup as fallback if our cgroup + is not accessible. + """ + return cls(find_my_cgroups(cgroup_procinfo, fallback)) + + def require_subsystem(self, subsystem, log_method=logging.warning): + """ + Check whether the given subsystem is enabled and is writable + (i.e., new cgroups can be created for it). + Produces a log message for the user if one of the conditions is not fulfilled. + If the subsystem is enabled but not writable, it will be removed from + this instance such that further checks with "in" will return "False". + @return A boolean value. + """ + if subsystem not in self: + return super().require_subsystem(subsystem, log_method) + + try: + test_cgroup = self.create_fresh_child_cgroup([subsystem]) + test_cgroup.remove() + except OSError as e: + log_method( + "Cannot use cgroup %s for subsystem %s, reason: %s (%s).", + self.subsystems[subsystem], + subsystem, + e.strerror, + e.errno, + ) + self.unusable_subsystems.add(subsystem) + if e.errno == errno.EACCES: + self.denied_subsystems[subsystem] = self.subsystems[subsystem] + del self.subsystems[subsystem] + self.paths = set(self.subsystems.values()) + return False + + return True + + def handle_errors(self, critical_cgroups): + """ + If there were errors in calls to require_subsystem() and critical_cgroups + is not empty, terminate the program with an error message that explains how to + fix the problem. + + @param critical_cgroups: set of unusable but required cgroups + """ + if not critical_cgroups: + return + assert critical_cgroups.issubset(self.unusable_subsystems) + + if critical_cgroups.issubset(self.denied_subsystems): + # All errors were because of permissions for these directories + paths = sorted(set(self.denied_subsystems.values())) + + # Check if all cgroups have group permissions and user could just be added + # to some groups to get access. But group 0 (root) of course does not count. + groups = {} + try: + if all(stat.S_IWGRP & os.stat(path).st_mode for path in paths): + groups = {os.stat(path).st_gid for path in paths} + except OSError: + pass + if groups and 0 not in groups: + + def get_group_name(gid): + try: + name = grp.getgrgid(gid).gr_name + except KeyError: + name = None + return util.escape_string_shell(name or str(gid)) + + groups = " ".join(sorted(set(map(get_group_name, groups)))) + permission_hint = _PERMISSION_HINT_GROUPS.format(groups) + + elif systeminfo.has_systemd(): + if systeminfo.is_debian(): + permission_hint = _PERMISSION_HINT_DEBIAN + else: + permission_hint = _PERMISSION_HINT_SYSTEMD + + else: + permission_hint = _PERMISSION_HINT_OTHER + + paths = " ".join([util.escape_string_shell(str(p)) for p in paths]) + sys.exit(_ERROR_MSG_PERMISSIONS.format(permission_hint, paths)) + + else: + sys.exit(_ERROR_MSG_OTHER) # e.g., subsystem not mounted + + def create_fresh_child_cgroup(self, subsystems): + """ + Create child cgroups of the current cgroup for at least the given subsystems. + @return: A Cgroup instance representing the new child cgroup(s). + """ + assert set(subsystems).issubset(self.subsystems.keys()) + createdCgroupsPerSubsystem = {} + createdCgroupsPerParent = {} + for subsystem in subsystems: + parentCgroup = self.subsystems[subsystem] + if parentCgroup in createdCgroupsPerParent: + # reuse already created cgroup + createdCgroupsPerSubsystem[subsystem] = createdCgroupsPerParent[ + parentCgroup + ] + continue + + cgroup = tempfile.mkdtemp(prefix=CGROUP_NAME_PREFIX, dir=parentCgroup) + createdCgroupsPerSubsystem[subsystem] = cgroup + createdCgroupsPerParent[parentCgroup] = cgroup + + # add allowed cpus and memory to cgroup if necessary + # (otherwise we can't add any tasks) + def copy_parent_to_child(name): + shutil.copyfile( + os.path.join(parentCgroup, name), # noqa: B023 + os.path.join(cgroup, name), # noqa: B023 + ) + + try: + copy_parent_to_child("cpuset.cpus") + copy_parent_to_child("cpuset.mems") + except OSError: + # expected to fail if cpuset subsystem is not enabled in this hierarchy + pass + + return CgroupsV1(createdCgroupsPerSubsystem) + + def add_task(self, pid): + """ + Add a process to the cgroups represented by this instance. + """ + _register_process_with_cgrulesengd(pid) + for cgroup in self.paths: + with open(os.path.join(cgroup, "tasks"), "w") as tasksFile: + tasksFile.write(str(pid)) + + def get_all_tasks(self, subsystem): + """ + Return a generator of all PIDs currently in this cgroup for the given subsystem. + """ + with open(os.path.join(self.subsystems[subsystem], "tasks"), "r") as tasksFile: + for line in tasksFile: + yield int(line) + + def kill_all_tasks(self): + """ + Kill all tasks in this cgroup and all its children cgroups forcefully. + Additionally, the children cgroups will be deleted. + """ + # In this method we should attempt to guard against child cgroups + # that have been created and manipulated by processes in the run. + # For example, they could have removed permissions from files and directories. + + def recursive_child_cgroups(cgroup): + def raise_error(e): + raise e + + try: + for dirpath, dirs, _files in os.walk( + cgroup, topdown=False, onerror=raise_error + ): + for subCgroup in dirs: + yield os.path.join(dirpath, subCgroup) + except OSError as e: + # some process might have made a child cgroup inaccessible + os.chmod(e.filename, stat.S_IRUSR | stat.S_IXUSR) + # restart, which might yield already yielded cgroups again, + # but this is ok for the callers of recursive_child_cgroups() + yield from recursive_child_cgroups(cgroup) + + def try_unfreeze(cgroup): + try: + util.write_file("THAWED", cgroup, "freezer.state", force=True) + except OSError: + # With force=True this fails only if we are not owner, but then there is + # nothing we can do. But the processes inside the run cannot change the + # owner, so this should not happen. + pass + + # First, we go through all cgroups recursively while they are frozen and kill + # all processes. This helps against fork bombs and prevents processes from + # creating new subgroups while we are trying to kill everything. + # But this is only possible if we have freezer, and all processes will stay + # until they are thawed (so we cannot check for cgroup emptiness and we cannot + # delete subgroups). + if self.FREEZE in self.subsystems: + cgroup = self.subsystems[self.FREEZE] + util.write_file("FROZEN", cgroup, "freezer.state", force=True) + + for child_cgroup in recursive_child_cgroups(cgroup): + with _force_open_read(os.path.join(child_cgroup, "tasks")) as tasks: + for task in tasks: + util.kill_process(int(task)) + + # This cgroup could be frozen, which would prevent processes from being + # killed and would lead to an endless loop below. cf. + # https://github.com/sosy-lab/benchexec/issues/840 + try_unfreeze(child_cgroup) + + util.write_file("THAWED", cgroup, "freezer.state", force=True) + + # Second, we go through all cgroups again, kill what is left, + # check for emptiness, and remove subgroups. + # Furthermore, we do this for all hierarchies, not only the one with freezer. + for cgroup in self.paths: + # Sometimes nested cgroups vanish while we iterate over them. + # Not sure why because the freezing above should prevent any process + # from still being alive, but maybe we are iterating here already + # while the kernel is still doing some cleanup. So in order to prevent + # crashes we handle this. + while True: + try: + for child_cgroup in recursive_child_cgroups(cgroup): + kill_all_tasks_in_cgroup(child_cgroup) + self._remove_cgroup(child_cgroup) + break + except FileNotFoundError as e: + logging.debug( + "Cgroup vanished while we were trying to clean it up: %s", e + ) + continue + + kill_all_tasks_in_cgroup(cgroup) + + def read_cputime(self): + # convert nano-seconds to seconds + return float(self.get_value(self.CPU, "usage")) / 1_000_000_000 + + def read_max_mem_usage(self): + # This measurement reads the maximum number of bytes of RAM+Swap the process used. + # For more details, c.f. the kernel documentation: + # https://www.kernel.org/doc/Documentation/cgroups/memory.txt + memUsageFile = "memsw.max_usage_in_bytes" + if not self.has_value(self.MEMORY, memUsageFile): + memUsageFile = "max_usage_in_bytes" + if self.has_value(self.MEMORY, memUsageFile): + try: + return int(self.get_value(self.MEMORY, memUsageFile)) + except OSError as e: + if e.errno == errno.ENOTSUP: + # kernel responds with operation unsupported if this is disabled + logging.critical( + "Kernel does not track swap memory usage, cannot measure memory usage." + " Please set swapaccount=1 on your kernel command line." + ) + else: + raise e + + return None + + def read_mem_pressure(self): + return None + + def read_cpu_pressure(self): + return None + + def read_io_pressure(self): + return None + + def read_usage_per_cpu(self): + usage = {} + for core, coretime in enumerate( + self.get_value(self.CPU, "usage_percpu").split(" ") + ): + try: + coretime = int(coretime) + if coretime != 0: + # convert nanoseconds to seconds + usage[core] = coretime / 1_000_000_000 + except (OSError, ValueError) as e: + logging.debug( + "Could not read CPU time for core %s from kernel: %s", core, e + ) + + return usage + + def read_allowed_cpus(self): + return util.parse_int_list(self.get_value(self.CPUSET, "cpus")) + + def read_allowed_memory_banks(self): + return util.parse_int_list(self.get_value(self.CPUSET, "mems")) + + def read_io_stat(self): + blkio_bytes_file = "throttle.io_service_bytes" + bytes_read = 0 + bytes_written = 0 + for blkio_line in self.get_file_lines(self.IO, blkio_bytes_file): + try: + dev_no, io_type, bytes_amount = blkio_line.split(" ") + if io_type == "Read": + bytes_read += int(bytes_amount) + elif io_type == "Write": + bytes_written += int(bytes_amount) + except ValueError: + pass # There are irrelevant lines in this file with a different structure + return bytes_read, bytes_written + + def _has_tasks(self, path): + return util.read_file(path, "tasks") != "" + + def write_memory_limit(self, limit): + limit_file = "limit_in_bytes" + self.set_value(self.MEMORY, limit_file, limit) + + swap_limit_file = "memsw.limit_in_bytes" + # We need swap limit because otherwise the kernel just starts swapping + # out our process if the limit is reached. + # Some kernels might not have this feature, + # which is ok if there is actually no swap. + if not self.has_value(self.MEMORY, swap_limit_file): + if systeminfo.has_swap(): + sys.exit( + 'Kernel misses feature for accounting swap memory, but machine has swap. Please set swapaccount=1 on your kernel command line or disable swap with "sudo swapoff -a".' + ) + else: + try: + self.set_value(self.MEMORY, swap_limit_file, limit) + except OSError as e: + if e.errno == errno.ENOTSUP: + # kernel responds with operation unsupported if this is disabled + sys.exit( + 'Memory limit specified, but kernel does not allow limiting swap memory. Please set swapaccount=1 on your kernel command line or disable swap with "sudo swapoff -a".' + ) + raise e + + def read_memory_limit(self): + return int(self.get_value(self.MEMORY, "limit_in_bytes")) + + def read_hierarchical_memory_limit(self): + limit = self.read_memory_limit() + # We also use the entries hierarchical_*_limit in memory.stat + # because it may be lower if memory.use_hierarchy is enabled. + for key, value in self.get_key_value_pairs(self.MEMORY, "stat"): + if key == "hierarchical_memory_limit" or key == "hierarchical_memsw_limit": + limit = min(limit, int(value)) + return limit + + def can_limit_swap(self): + return self.has_value(self.MEMORY, "memsw.max_usage_in_bytes") + + def disable_swap(self): + # Note that this disables swapping completely according to + # https://www.kernel.org/doc/Documentation/cgroups/memory.txt + # (unlike setting the global swappiness to 0). + # Our process might get killed because of this. + self.set_value(self.MEMORY, "swappiness", "0") + + def read_oom_kill_count(self): + # not supported in v1, see oomhandler and memory_used > memlimit impl + return None diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py new file mode 100644 index 000000000..55ee170dd --- /dev/null +++ b/benchexec/cgroupsv2.py @@ -0,0 +1,625 @@ +# This file is part of BenchExec, a framework for reliable benchmarking: +# https://github.com/sosy-lab/benchexec +# +# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer +# +# SPDX-License-Identifier: Apache-2.0 + +import logging +import os +import pathlib +import secrets +import signal +import stat +import sys +import tempfile +import threading +import time +import typing +from decimal import Decimal + +from benchexec import systeminfo, util +from benchexec.cgroups import Cgroups + +_ERROR_MSG_UNKNOWN_SUBSYSTEMS = """ +The following cgroup subsystems were required but are not supported by this kernel: {}. +Please avoid their usage or enable them in the kernel.""" + +_ERROR_MSG_MISSING_SUBSYSTEMS = """ +The following cgroup subsystems were required but are not usable: {}. +Please enable them, e.g., by setting up delegation. +The cgroup that we attempted to use was: {}""" + +_ERROR_MSG_MISSING_CPUSET = """ +The kernel has a bug where delegation of cpuset does not work if there are processes of other users in this user's cgroup. +This happens commonly if xdg-document-portal is running while such delegation is attempted for the first time. +For more information cf. https://github.com/systemd/systemd/issues/18293. +Linux 6.6 is expected to contain a fix for this bug. + +As a quick workaround, execute this command, which forces the missing delegation as root user: + echo +cpuset | sudo tee {}""" + +_ERROR_PODMAN = """ +BenchExec seems to be running in a Podman container without enabled cgroups. +Please pass "--security-opt unmask=/sys/fs/cgroup" to your "podman run" command.""" + +_ERROR_RO_CGROUPFS = """ +System is using cgroups v2 but the cgroupfs is mounted read-only. +This likely means that you are using BenchExec within a container. +Please ensure that cgroups are properly delegated into the container.""" + +_ERROR_NO_SYSTEMD = """ +System is using cgroups v2 but not systemd. +If you are using BenchExec within a container, please ensure that cgroups are properly delegated into the container. +Otherwise please configure your system such that BenchExec can use cgroups.""" + +_ERROR_NO_PSYSTEMD = """ +BenchExec was not able to use cgroups. +Please either start it within a fresh systemd scope by prefixing your command line with + systemd-run --user --scope --slice=benchexec -p Delegate=yes +or install the Python library pystemd such that BenchExec can do this automatically.""" + +_ERROR_MSG_OTHER = """ +BenchExec was not able to use cgroups and did not manage to create a systemd scope. +Please ensure that we can connect to systemd via DBus or try starting BenchExec within a fresh systemd scope by prefixing your command line with + systemd-run --user --scope --slice=benchexec -p Delegate=yes""" + +uid = os.getuid() +CGROUP_NAME_PREFIX = "benchmark_" + +# Global state that stores the cgroup we have prepared for use. +# Global state is not nice, but here we have to use it because during cgroup +# initialization we have to move the current process into a cgroup, +# and this is inherently global state (because it affects the whole process). +# So we need to know whether we have done this already or not. +_usable_cgroup = None +_usable_cgroup_lock = threading.Lock() + + +def initialize(): + """ + Attempt to get a usable cgroup. + This may involve moving the current process into a different cgroup, + but this method is idempotent. + """ + global _usable_cgroup + if _usable_cgroup: + return _usable_cgroup + + with _usable_cgroup_lock: + if _usable_cgroup: + return _usable_cgroup + + cgroup = CgroupsV2.from_system() + + if list(cgroup.get_all_tasks()) == [os.getpid()]: + # If we are the only process, somebody prepared a cgroup for us. Use it. + # We might be able to relax this check and for example allow child processes, + # but then we would also have to move them to another cgroup, + # which might not be a good idea. + logging.debug("BenchExec was started in its own cgroup: %s", cgroup) + + elif _create_systemd_scope_for_us(): + # If we can create a systemd scope for us and move ourselves in it, + # we have a usable cgroup afterwards. + cgroup = CgroupsV2.from_system() + + else: + # No usable cgroup. We might still be able to continue if we actually + # do not require cgroups for benchmarking. So we do not fail here + # but return an instance that will on produce an error later. + return CgroupsV2({}) + + # Now we are the only process in this cgroup. In order to make it usable for + # benchmarking, we need to move ourselves into a child cgroup. + try: + child_cgroup = cgroup.create_fresh_child_cgroup( + cgroup.subsystems.keys(), prefix="benchexec_process_" + ) + except OSError as e: + # No usable cgroup, e.g., because of read-only cgroup fs. + # Continue as described above. + logging.debug("Cgroup found, but cannot create child cgroups: %s", e) + return CgroupsV2({}) + + for pid in cgroup.get_all_tasks(): + child_cgroup.add_task(pid) + assert child_cgroup.has_tasks() + assert not cgroup.has_tasks() + + # Now that the cgroup is empty, we can enable controller delegation. + cgroup._delegate_controllers() + + _usable_cgroup = cgroup + + return _usable_cgroup + + +def _create_systemd_scope_for_us(): + """ + Attempt to create a systemd scope for us (with pystemd). + If it works this process is moved into the fresh scope. + + TODO: We should probably also move our child processes to the scope. + + @return: a boolean indicating whether this succeeded + """ + try: + from pystemd.dbuslib import DBus + from pystemd.dbusexc import DBusFileNotFoundError + from pystemd.systemd1 import Manager, Unit + + with DBus(user_mode=True) as bus, Manager(bus=bus) as manager: + unit_params = { + # workaround for not declared parameters, remove in the future + b"_custom": (b"PIDs", b"au", [os.getpid()]), + # Put us in our own slice to be separate from other applications + b"Slice": b"benchexec.slice", + b"Delegate": True, + } + + random_suffix = secrets.token_urlsafe(8) + name = f"benchexec_{random_suffix}.scope".encode() + manager.Manager.StartTransientUnit(name, b"fail", unit_params) + # StartTransientUnit is async, so we need to ensure it has finished + # and moved our process before we continue. + # We might need a loop here (so far it always seems to work without, + # maybe systemd serializes this request with the unit creation). + with Unit(name, bus=bus) as unit: + assert unit.LoadState == b"loaded" + assert unit.ActiveState == b"active" + assert unit.SubState == b"running" + # Cgroup path would be accessible as unit.ControlGroup if we need it. + + logging.debug("Process moved to a fresh systemd scope: %s", name.decode()) + return True + + except ImportError: + logging.debug("pystemd could not be imported.") + except DBusFileNotFoundError as e: # pytype: disable=name-error + logging.debug("No user DBus found, not using pystemd: %s", e) + + return False + + +def _find_cgroup_mount(): + """ + Return the mountpoint of the cgroupv2 unified hierarchy. + @return Path mountpoint + """ + try: + with open("/proc/mounts", "rt") as mountsFile: + for mount in mountsFile: + mount = mount.split(" ") + if mount[2] == "cgroup2": + return pathlib.Path(mount[1]) + except OSError: + logging.exception("Cannot read /proc/mounts") + + +def _find_own_cgroups(): + """ + For all subsystems, return the information in which (sub-)cgroup this process is in. + (Each process is in exactly cgroup in each hierarchy.) + @return a generator of tuples (subsystem, cgroup) + """ + try: + with open("/proc/self/cgroup", "rt") as ownCgroupsFile: + return _parse_proc_pid_cgroup(ownCgroupsFile) + except OSError: + logging.exception("Cannot read /proc/self/cgroup") + + +def _parse_proc_pid_cgroup(cgroup_file): + """ + Parse a /proc/*/cgroup file into tuples of (subsystem,cgroup). + @param content: An iterable over the lines of the file. + @return: a generator of tuples + """ + mountpoint = _find_cgroup_mount() + for line in cgroup_file: + own_cgroup = line.strip().split(":")[2][1:] + path = mountpoint / own_cgroup + + return path + + +def _force_open_read(filename): + """ + Open a file for reading even if we have no read permission, + as long as we can grant it to us. + """ + try: + return open(filename, "rt") + except OSError: + os.chmod(filename, stat.S_IRUSR) + return open(filename, "rt") + + +def kill_all_tasks_in_cgroup(cgroup): + tasksFile = cgroup / "cgroup.procs" + + i = 0 + while True: + i += 1 + # TODO We can probably remove this loop over signals and just send + # SIGKILL. We added this loop when killing sub-processes was not reliable + # and we did not know why, but now it is reliable. + for sig in [signal.SIGKILL, signal.SIGINT, signal.SIGTERM]: + with _force_open_read(tasksFile) as tasks: + task = None + for task in tasks: + task = task.strip() + if i > 1: + logging.warning( + "Run has left-over process with pid %s " + "in cgroup %s, sending signal %s (try %s).", + task, + cgroup, + sig, + i, + ) + util.kill_process(int(task), sig) + + if task is None: + return # No process was hanging, exit + # wait for the process to exit, this might take some time + time.sleep(i * 0.5) + + +class CgroupsV2(Cgroups): + version = 2 + + IO = "io" + CPU = "cpu" + CPUSET = "cpuset" + MEMORY = "memory" + PID = "pids" + FREEZE = "freeze" + KILL = "kill" + + def __init__(self, subsystems): + super(CgroupsV2, self).__init__(subsystems) + + self.path = ( + next(iter(self.subsystems.values())) if len(self.subsystems) else None + ) + + # Store reference to child cgroup if we delegated controllers to it. + self._delegated_to: typing.Optional[CgroupsV2] = None + + @classmethod + def from_system(cls, cgroup_procinfo=None): + logging.debug( + "Analyzing /proc/mounts and /proc/self/cgroup to determine cgroups." + ) + if cgroup_procinfo is None: + cgroup_path = _find_own_cgroups() + else: + cgroup_path = _parse_proc_pid_cgroup(cgroup_procinfo) + + try: + with open(cgroup_path / "cgroup.controllers") as subsystems_file: + subsystems = set(subsystems_file.readline().strip().split()) + except OSError: + # happens if we parse cgroup_procinfo of a deleted cgroup for check_cgroups + subsystems = set() + + # introduced in 5.14 + if (cgroup_path / "cgroup.kill").exists(): + subsystems.add(cls.KILL) + + # always supported in v2 + subsystems.add(cls.FREEZE) + + # basic support always available in v2, this supports everything we use + subsystems.add(cls.CPU) + + return cls({k: cgroup_path for k in subsystems}) + + def create_fresh_child_cgroup(self, subsystems, prefix=CGROUP_NAME_PREFIX): + """ + Create child cgroups of the current cgroup for at least the given subsystems. + @return: A Cgroup instance representing the new child cgroup(s). + """ + subsystems = set(subsystems) + assert subsystems.issubset(self.subsystems.keys()) + + if not subsystems: + return Cgroups.dummy() + + child_path = pathlib.Path(tempfile.mkdtemp(prefix=prefix, dir=self.path)) + + child_subsystems = set( + util.read_file(child_path / "cgroup.controllers").split() + ) + + # basic cpu controller support without being enabled + child_subsystems |= {self.CPU, self.FREEZE} + if self.KILL in self.subsystems: + child_subsystems.add(self.KILL) + + return CgroupsV2({c: child_path for c in child_subsystems}) + + def create_fresh_child_cgroup_for_delegation(self, prefix="delegate_"): + """ + Create a special child cgroup and delegate all controllers to it. + The current cgroup must not have processes and may never have processes. + This method can be called only once because we remember what child cgroup + we create here and use it for some special purposes later on. + """ + assert not self._delegated_to + self._delegate_controllers() + child_cgroup = self.create_fresh_child_cgroup(self.subsystems.keys(), prefix) + assert isinstance(child_cgroup, CgroupsV2) + assert ( + self.subsystems.keys() == child_cgroup.subsystems.keys() + ), "delegation failed for at least one controller" + self._delegated_to = child_cgroup + + if self.MEMORY in child_cgroup: + # Copy memory limit to child. This has no actual effect (limits apply + # recursively), but informs the users of the child cgroup about the limit + # (otherwise they would not see it). + child_cgroup.write_memory_limit(self.read_memory_limit() or "max") + + return child_cgroup + + def _delegate_controllers(self): + """ + Enable delegation of all controllers of this cgroup to child cgroups. + This is relevant if processes in child cgroups also want to use cgroup features. + The current cgroup needs to have no processes in order to do so! + """ + # We enable all controllers, even those that we do not need ourselves, + # in order to allow nesting of other cgroup-using software. + controllers = util.read_file(self.path / "cgroup.controllers").split() + util.write_file( + " ".join(f"+{c}" for c in controllers), + self.path / "cgroup.subtree_control", + ) + + def require_subsystem(self, subsystem, log_method=logging.warning): + """ + Check whether the given subsystem is enabled and is writable + (i.e., new cgroups can be created for it). + Produces a log message for the user if one of the conditions is not fulfilled. + @return A boolean value. + """ + # TODO + # We can assume that creation of child cgroups works, + # because we only use cgroups if we were able to move the current process + # into a child cgroup in initialize(). + return super().require_subsystem(subsystem, log_method) + + def handle_errors(self, critical_cgroups): + """ + If there were errors in calls to require_subsystem() and critical_cgroups + is not empty, terminate the program with an error message that explains how to + fix the problem. + + @param critical_cgroups: set of unusable but required cgroups + """ + if not critical_cgroups: + return + + if self.subsystems: + # Some subsystems are available, but not the required ones. + # Check if it is a delegation problem or if some subsystems do not exist. + unknown_subsystems = set(critical_cgroups) + with open("/proc/cgroups", mode="r") as cgroups: + for line in cgroups: + if not line.startswith("#"): + unknown_subsystems.discard(line.split("\t", maxsplit=1)[0]) + if unknown_subsystems: + sys.exit( + _ERROR_MSG_UNKNOWN_SUBSYSTEMS.format(", ".join(unknown_subsystems)) + ) + elif critical_cgroups == {self.CPUSET}: + problem_cgroup = self.path + while self.CPUSET not in util.read_file( + problem_cgroup, "cgroup.controllers" + ): + problem_cgroup = problem_cgroup.parent + sys.exit( + _ERROR_MSG_MISSING_CPUSET.format( + problem_cgroup / "cgroup.subtree_control" + ) + ) + else: + sys.exit( + _ERROR_MSG_MISSING_SUBSYSTEMS.format( + ", ".join(critical_cgroups), self.path + ) + ) + + else: + # no cgroup available at all, likely a container + + # Podman detection from https://github.com/containers/podman/issues/3586 + if os.getenv("container") == "podman" or os.path.exists( + "/run/.containerenv" + ): + sys.exit(_ERROR_PODMAN) + + elif os.statvfs("/sys/fs/cgroup").f_flag & os.ST_RDONLY: + sys.exit(_ERROR_RO_CGROUPFS) + + elif not systeminfo.has_systemd(): + sys.exit(_ERROR_NO_SYSTEMD) + + try: + import pystemd # noqa: F401 + except ImportError: + sys.exit(_ERROR_NO_PSYSTEMD) + else: + sys.exit(_ERROR_MSG_OTHER) + + def add_task(self, pid): + """ + Add a process to the cgroups represented by this instance. + """ + assert not self._delegated_to, "Delegated cgroups cannot have processes" + with open(self.path / "cgroup.procs", "w") as tasksFile: + tasksFile.write(str(pid)) + + def get_all_tasks(self, subsystem=None): + """ + Return a generator of all PIDs currently in this cgroup for the given subsystem. + """ + with open(self.path / "cgroup.procs") as tasksFile: + for line in tasksFile: + yield int(line) + + def kill_all_tasks(self): + """ + Kill all tasks in this cgroup and all its children cgroups forcefully. + Additionally, the children cgroups will be deleted. + """ + # In this method we should attempt to guard against child cgroups + # that have been created and manipulated by processes in the run. + # For example, they could have removed permissions from files and directories. + + def recursive_child_cgroups(cgroup): + def raise_error(e): + raise e + + try: + for dirpath, dirs, _files in os.walk( + cgroup, topdown=False, onerror=raise_error + ): + for subCgroup in dirs: + yield pathlib.Path(os.path.join(dirpath, subCgroup)) + except OSError as e: + # some process might have made a child cgroup inaccessible + os.chmod(e.filename, stat.S_IRUSR | stat.S_IXUSR) + # restart, which might yield already yielded cgroups again, + # but this is ok for the callers of recursive_child_cgroups() + yield from recursive_child_cgroups(cgroup) + + if self.KILL in self.subsystems: + # This will immediately terminate all processes recursively, even if frozen + util.write_file("1", self.path, "cgroup.kill", force=True) + # We still need to clean up any child cgroups. + + # First, we go through all cgroups recursively while they are frozen and kill + # all processes. This helps against fork bombs and prevents processes from + # creating new subgroups while we are trying to kill everything. + # On cgroupsv2, frozen processes can still be killed, so this is all we need to + # do. + util.write_file("1", self.path, "cgroup.freeze", force=True) + keep_child = self._delegated_to.path if self._delegated_to else None + for child_cgroup in recursive_child_cgroups(self.path): + kill_all_tasks_in_cgroup(child_cgroup) + + # Remove child_cgroup, but not if it is our immediate child because of + # delegation. We need that cgroup to read the OOM kill count. + if child_cgroup != keep_child: + self._remove_cgroup(child_cgroup) + + kill_all_tasks_in_cgroup(self.path) + + def read_cputime(self): + for k, v in self.get_key_value_pairs(self.CPU, "stat"): + if k == "usage_usec": + # TODO switch to Decimal together with all other float values + return int(v) / 1_000_000 + return None + + def read_max_mem_usage(self): + # Was only added in Linux 5.19 + if self.has_value(self.MEMORY, "peak"): + return int(self.get_value(self.MEMORY, "peak")) + return None + + def _read_pressure_stall_information(self, subsystem): + for line in open(self.path / (subsystem + ".pressure")): + if line.startswith("some "): + for item in line.split(" ")[1:]: + k, v = item.split("=") + if k == "total": + return Decimal(v) / 1_000_000 + return None + + def read_mem_pressure(self): + return self._read_pressure_stall_information("memory") + + def read_cpu_pressure(self): + return self._read_pressure_stall_information("cpu") + + def read_io_pressure(self): + return self._read_pressure_stall_information("io") + + def read_usage_per_cpu(self): + return {} + + def read_allowed_cpus(self): + return util.parse_int_list(self.get_value(self.CPUSET, "cpus.effective")) + + def read_allowed_memory_banks(self): + return util.parse_int_list(self.get_value(self.CPUSET, "mems.effective")) + + def read_io_stat(self): + bytes_read = 0 + bytes_written = 0 + for io_line in self.get_file_lines(self.IO, "stat"): + dev_no, *stats = io_line.split(" ") + stats_map = {s[0]: s[1] for s in (s.split("=") for s in stats if s)} + if "rbytes" in stats_map: + bytes_read += int(stats_map["rbytes"]) + if "wbytes" in stats_map: + bytes_written += int(stats_map["wbytes"]) + return bytes_read, bytes_written + + def has_tasks(self): + return self._has_tasks(self.path) + + def _has_tasks(self, path): + return bool((path / "cgroup.procs").read_bytes().strip()) + + def write_memory_limit(self, limit): + self.set_value(self.MEMORY, "max", limit) + # On OOM we want to terminate the whole run, but we would not notice if the + # kernel kills only some random subprocess. So we tell it to kill all processes + # in the cgroup. This is available since Linux 4.19. + self.set_value(self.MEMORY, "oom.group", 1) + + def read_memory_limit(self): + limit = self.get_value(self.MEMORY, "max") + return None if limit == "max" else int(limit) + + def read_hierarchical_memory_limit(self): + # We do not know a way how to read the effective memory limit without looking at + # all parents. + limit = self.read_memory_limit() + for parent_cgroup in self.path.parents: + try: + parent_limit = util.read_file(parent_cgroup, "memory.max") + if parent_limit != "max": + limit = min(limit, int(parent_limit)) + except OSError: + # reached parent directory of cgroupfs + return limit + + assert False # will never be reached + + def can_limit_swap(self): + return self.has_value(self.MEMORY, "swap.max") + + def disable_swap(self): + self.set_value(self.MEMORY, "swap.max", "0") + + def read_oom_kill_count(self): + # We read only the counter from memory.events.local to avoid reporting OOM + # if the process used cgroups internally and there was an OOM in some + # arbitrary nested child cgroup, but not for the main process itself. + # But if we have delegated, then our own cgroup has no processes and OOM count + # would remain zero, so we have to read it from the child cgroup. + if self._delegated_to: + return self._delegated_to.read_oom_kill_count() + + for k, v in self.get_key_value_pairs(self.MEMORY, "events.local"): + if k == "oom_kill": + return int(v) + + return None diff --git a/benchexec/check_cgroups.py b/benchexec/check_cgroups.py index 5fe269ca8..99dd032a5 100644 --- a/benchexec/check_cgroups.py +++ b/benchexec/check_cgroups.py @@ -7,12 +7,11 @@ import argparse import logging -import os import sys import tempfile import threading -from benchexec.cgroups import CPUACCT, CPUSET, FREEZER, MEMORY, find_my_cgroups +from benchexec.cgroups import Cgroups from benchexec.runexecutor import RunExecutor sys.dont_write_bytecode = True # prevent creation of .pyc files @@ -33,21 +32,28 @@ def check_cgroup_availability(wait=1): my_cgroups = runexecutor.cgroups if not ( - CPUACCT in my_cgroups - and CPUSET in my_cgroups + my_cgroups.CPU in my_cgroups # and FREEZER in my_cgroups # For now, we do not require freezer - and MEMORY in my_cgroups + and my_cgroups.MEMORY in my_cgroups ): sys.exit(1) + if my_cgroups.CPUSET in my_cgroups: + cores = my_cgroups.read_allowed_cpus() + mems = my_cgroups.read_allowed_memory_banks() + else: + # Use dummy value (does not matter which) to let execute_run() fail. + cores = [0] + mems = [0] + with tempfile.NamedTemporaryFile(mode="rt") as tmp: runexecutor.execute_run( ["sh", "-c", f"sleep {wait}; cat /proc/self/cgroup"], tmp.name, memlimit=1024 * 1024, # set memlimit to force check for swapaccount # set cores and memory_nodes to force usage of CPUSET - cores=my_cgroups.read_allowed_cpus(), - memory_nodes=my_cgroups.read_allowed_memory_banks(), + cores=cores, + memory_nodes=mems, ) lines = [] for line in tmp: @@ -58,14 +64,15 @@ def check_cgroup_availability(wait=1): and not all(c == "-" for c in line) ): lines.append(line) - task_cgroups = find_my_cgroups(lines, fallback=False) + task_cgroups = Cgroups.from_system(cgroup_procinfo=lines) fail = False - for subsystem in CPUACCT, CPUSET, MEMORY, FREEZER: + expected_subsystems = [my_cgroups.FREEZE] + if my_cgroups.version == 1: + expected_subsystems += [my_cgroups.CPU, my_cgroups.CPUSET, my_cgroups.MEMORY] + for subsystem in expected_subsystems: if subsystem in my_cgroups: - if not task_cgroups[subsystem].startswith( - os.path.join(my_cgroups[subsystem], "benchmark_") - ): + if not str(task_cgroups[subsystem]).startswith(str(my_cgroups[subsystem])): logging.warning( "Task was in cgroup %s for subsystem %s, " "which is not the expected sub-cgroup of %s. " diff --git a/benchexec/container.py b/benchexec/container.py index 1fc7d0bd0..838217524 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -39,6 +39,7 @@ "drop_capabilities", "wait_for_child_and_forward_signals", "setup_container_system_config", + "setup_cgroup_namespace", "CONTAINER_UID", "CONTAINER_GID", "CONTAINER_HOME", @@ -958,3 +959,23 @@ def is_container_system_config_file(file): return file in ( os.path.join("/etc", f.decode()) for f in CONTAINER_ETC_FILE_OVERRIDE ) + + +def setup_cgroup_namespace(): + """Move the current process into a new cgroup namespace and setup /sys/fs/cgroup + appropriately. This method assumes that cgroupv2 is used. + It needs to be called from within the target process.""" + # Move us to new namespace. + libc.unshare(libc.CLONE_NEWCGROUP) + + # Mount /sys/fs/cgroup with view of new namespace. + # For some reason, mounting directly on top of /sys/fs/cgroup gives EBUSY, + # but mounting somewhere else and moving into the correct place works. + libc.mount( + b"cgroup2", + b"/proc", + b"cgroup2", + libc.MS_NOSUID | libc.MS_NODEV | libc.MS_NOEXEC, + None, + ) + libc.mount(b"/proc", b"/sys/fs/cgroup", b"none", libc.MS_MOVE, None) diff --git a/benchexec/containerexecutor.py b/benchexec/containerexecutor.py index 27d8c7573..b96c5032a 100644 --- a/benchexec/containerexecutor.py +++ b/benchexec/containerexecutor.py @@ -24,7 +24,7 @@ from benchexec import __version__ from benchexec import baseexecutor from benchexec import BenchExecException -from benchexec.cgroups import Cgroup +from benchexec.cgroups import Cgroups from benchexec import container from benchexec import libc from benchexec import util @@ -253,6 +253,14 @@ def main(argv=None): default=None, help="use given GID within container (default: current UID)", ) + parser.add_argument( + "--cgroup-access", + action="store_true", + help="Allow processes in the container to use cgroups. " + "This only works on cgroupsv2 systems and if containerexec is either started in" + " its own cgroup or can talk to systemd to create a cgroup (same requirements" + " as for runexec).", + ) add_basic_container_args(parser) add_container_output_args(parser) baseexecutor.add_basic_executor_options(parser) @@ -261,6 +269,7 @@ def main(argv=None): baseexecutor.handle_basic_executor_options(options, parser) logging.debug("This is containerexec %s.", __version__) container_options = handle_basic_container_args(options, parser) + container_options["cgroup_access"] = options.cgroup_access container_output_options = handle_container_output_args(options, parser) if options.root: @@ -308,6 +317,7 @@ def __init__( dir_modes={"/": DIR_OVERLAY, "/run": DIR_HIDDEN, "/tmp": DIR_HIDDEN}, container_system_config=True, container_tmpfs=True, + cgroup_access=False, *args, **kwargs, ): @@ -323,6 +333,9 @@ def __init__( @param container_system_config: Whether to use a special system configuration in the container that disables all remote host and user lookups, sets a custom hostname, etc. + @param cgroup_access: + Whether to allow processes in the contain to access cgroups. + Only supported on systems with cgroupsv2. """ super(ContainerExecutor, self).__init__(*args, **kwargs) self._use_namespaces = use_namespaces @@ -389,6 +402,17 @@ def is_accessible(path): "threads please read https://github.com/sosy-lab/benchexec/issues/435" ) + self._cgroups = Cgroups.dummy() + if cgroup_access: + self._cgroups = Cgroups.initialize(allowed_versions=[2]) + if self._cgroups.version != 2: + sys.exit( + "Cgroup access unsupported on this system, " + "BenchExec only supports this for cgroupsv2." + ) + if self._cgroups.CPU not in self._cgroups: + self._cgroups.handle_errors([self._cgroups.CPU]) + def _get_result_files_base(self, temp_dir): """Given the temp directory that is created for each run, return the path to the directory where files created by the tool are stored.""" @@ -434,6 +458,9 @@ def execute_run( if environ is None: environ = os.environ.copy() + cgroups = self._cgroups.create_fresh_child_cgroup( + self._cgroups.subsystems.keys() + ) pid = None returnvalue = 0 @@ -449,7 +476,7 @@ def execute_run( root_dir=rootDir, cwd=workingDir, temp_dir=temp_dir, - cgroups=Cgroup({}), + cgroups=cgroups, output_dir=output_dir, result_files_patterns=result_files_patterns, child_setup_fn=util.dummy_fn, @@ -602,6 +629,8 @@ def _start_execution_in_container( root_dir = os.path.abspath(root_dir) cwd = os.path.abspath(cwd) + use_cgroup_ns = cgroups.version == 2 + def grandchild(): """Setup everything inside the process that finally exec()s the tool.""" try: @@ -616,7 +645,6 @@ def grandchild(): my_outer_pid = container.get_my_pid_from_procfs() container.mount_proc(self._container_system_config) - container.drop_capabilities() container.reset_signal_handling() child_setup_fn() # Do some other setup the caller wants. @@ -625,6 +653,14 @@ def grandchild(): os.write(to_parent, str(my_outer_pid).encode()) received = os.read(from_parent, 1) assert received == MARKER_PARENT_COMPLETED, received + + # Finalize setup + # We want to do as little as possible here because measurements are + # already running, but we can only setup the cgroup namespace + # once we are in the desired cgroup. + if use_cgroup_ns: + container.setup_cgroup_namespace() + container.drop_capabilities() except BaseException as e: # When using runexec, this logging will end up in the output.log file, # where usually the tool output is. This is suboptimal, but probably @@ -935,6 +971,13 @@ def check_child_exit_code(): child_pid, ) + # cgroups is the cgroups where we configure limits. + # So for isolation, we need to create a child cgroup that becomes the root + # of the cgroup ns, such that the limit settings are not accessible in the + # container and cannot be changed. + if use_cgroup_ns: + cgroups = cgroups.create_fresh_child_cgroup_for_delegation() + # start measurements cgroups.add_task(grandchild_pid) parent_setup = parent_setup_fn() diff --git a/benchexec/libc.py b/benchexec/libc.py index ef347e10b..2a808ec7e 100644 --- a/benchexec/libc.py +++ b/benchexec/libc.py @@ -58,6 +58,7 @@ def _check_errno(result, func, arguments): # /usr/include/linux/sched.h CLONE_NEWNS = 0x00020000 +CLONE_NEWCGROUP = 0x02000000 CLONE_NEWUTS = 0x04000000 CLONE_NEWIPC = 0x08000000 CLONE_NEWUSER = 0x10000000 diff --git a/benchexec/localexecution.py b/benchexec/localexecution.py index 54ad88396..a60cdf1c8 100644 --- a/benchexec/localexecution.py +++ b/benchexec/localexecution.py @@ -14,7 +14,7 @@ import time from benchexec import BenchExecException -from benchexec import cgroups +from benchexec.cgroups import Cgroups from benchexec import containerexecutor from benchexec import resources from benchexec.runexecutor import RunExecutor @@ -68,7 +68,7 @@ def execute_benchmark(benchmark, output_handler): "only resource limits are used." ) - my_cgroups = cgroups.find_my_cgroups() + my_cgroups = Cgroups.initialize() required_cgroups = set() coreAssignment = None # cores per run @@ -78,8 +78,8 @@ def execute_benchmark(benchmark, output_handler): pqos.reset_monitoring() if benchmark.rlimits.cpu_cores: - if not my_cgroups.require_subsystem(cgroups.CPUSET): - required_cgroups.add(cgroups.CPUSET) + if not my_cgroups.require_subsystem(my_cgroups.CPUSET): + required_cgroups.add(my_cgroups.CPUSET) logging.error( "Cgroup subsystem cpuset is required " "for limiting the number of CPU cores/memory nodes." @@ -107,8 +107,8 @@ def execute_benchmark(benchmark, output_handler): ) if benchmark.rlimits.memory: - if not my_cgroups.require_subsystem(cgroups.MEMORY): - required_cgroups.add(cgroups.MEMORY) + if not my_cgroups.require_subsystem(my_cgroups.MEMORY): + required_cgroups.add(my_cgroups.MEMORY) logging.error("Cgroup subsystem memory is required for memory limit.") else: # check whether we have enough memory in the used memory banks for all runs @@ -120,8 +120,8 @@ def execute_benchmark(benchmark, output_handler): ) if benchmark.rlimits.cputime: - if not my_cgroups.require_subsystem(cgroups.CPUACCT): - required_cgroups.add(cgroups.CPUACCT) + if not my_cgroups.require_subsystem(my_cgroups.CPU): + required_cgroups.add(my_cgroups.CPU) logging.error("Cgroup subsystem cpuacct is required for cputime limit.") my_cgroups.handle_errors(required_cgroups) diff --git a/benchexec/oomhandler.py b/benchexec/oomhandler.py index edaf4a89e..6bac8559a 100644 --- a/benchexec/oomhandler.py +++ b/benchexec/oomhandler.py @@ -9,7 +9,6 @@ import os import threading -from benchexec.cgroups import MEMORY from benchexec import util from ctypes import cdll @@ -53,7 +52,7 @@ def __init__(self, cgroups, pid_to_kill, callbackFn=lambda reason: None): self._cgroups = cgroups self._callback = callbackFn - cgroup = cgroups[MEMORY] # for raw access + cgroup = cgroups[cgroups.MEMORY] # for raw access ofd = os.open(os.path.join(cgroup, "memory.oom_control"), os.O_WRONLY) try: # Important to use CLOEXEC, otherwise the benchmarked tool inherits @@ -103,7 +102,9 @@ def run(self): ) util.kill_process(self._pid_to_kill) # Also kill all children of subprocesses directly. - with open(os.path.join(self._cgroups[MEMORY], "tasks"), "rt") as tasks: + with open( + os.path.join(self._cgroups[self._cgroups.MEMORY], "tasks"), "rt" + ) as tasks: for task in tasks: util.kill_process(int(task)) @@ -116,11 +117,11 @@ def run(self): close(self._efd) def _reset_memory_limit(self, limitFile): - if self._cgroups.has_value(MEMORY, limitFile): + if self._cgroups.has_value(self._cgroups.MEMORY, limitFile): try: # Write a high value (1 PB) as the limit self._cgroups.set_value( - MEMORY, + self._cgroups.MEMORY, limitFile, str( 1 diff --git a/benchexec/outputhandler.py b/benchexec/outputhandler.py index d1a52a3bc..f696fe73a 100644 --- a/benchexec/outputhandler.py +++ b/benchexec/outputhandler.py @@ -723,6 +723,8 @@ def add_column_to_xml(self, xml, title, value, prefix="", value_suffix=""): value_suffix = "B" elif title.startswith("mbm"): value_suffix = "B/s" + elif title.startswith("pressure-") and title.endswith("-some"): + value_suffix = "s" value = f"{value}{value_suffix}" diff --git a/benchexec/resources.py b/benchexec/resources.py index 5394cb835..8116ac9ca 100644 --- a/benchexec/resources.py +++ b/benchexec/resources.py @@ -16,7 +16,6 @@ import os import sys -from benchexec import cgroups from benchexec import util __all__ = [ @@ -361,8 +360,8 @@ def _get_memory_banks_listed_in_dir(path): def check_memory_size(memLimit, num_of_threads, memoryAssignment, my_cgroups): """Check whether the desired amount of parallel benchmarks fits in the memory. - Implemented are checks for memory limits via cgroup controller "memory" and - memory bank restrictions via cgroup controller "cpuset", + Implemented are checks for memory limits via cgroup subsystem "memory" and + memory bank restrictions via cgroup subsystem "cpuset", as well as whether the system actually has enough memory installed. @param memLimit: the memory limit in bytes per run @param num_of_threads: the number of parallel benchmark executions @@ -389,19 +388,14 @@ def check_limit(actualLimit): ) return - if cgroups.MEMORY in my_cgroups: - # We use the entries hierarchical_*_limit in memory.stat and not memory.*limit_in_bytes - # because the former may be lower if memory.use_hierarchy is enabled. - for key, value in my_cgroups.get_key_value_pairs(cgroups.MEMORY, "stat"): - if ( - key == "hierarchical_memory_limit" - or key == "hierarchical_memsw_limit" - ): - check_limit(int(value)) + if my_cgroups.MEMORY in my_cgroups: + actual_limit = my_cgroups.read_hierarchical_memory_limit() + if actual_limit is not None: + check_limit(actual_limit) # Get list of all memory banks, either from memory assignment or from system. if not memoryAssignment: - if cgroups.CPUSET in my_cgroups: + if my_cgroups.CPUSET in my_cgroups: allMems = my_cgroups.read_allowed_memory_banks() else: allMems = _get_memory_banks_listed_in_dir("/sys/devices/system/node/") diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index a8f0d990d..35350f39e 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -8,7 +8,7 @@ import argparse import collections import datetime -import errno +import decimal import logging import multiprocessing import os @@ -24,10 +24,11 @@ from benchexec import baseexecutor from benchexec import BenchExecException from benchexec import containerexecutor -from benchexec.cgroups import BLKIO, CPUACCT, CPUSET, FREEZER, MEMORY, find_my_cgroups +from benchexec.cgroups import Cgroups from benchexec.filehierarchylimit import FileHierarchyLimitThread from benchexec import intel_cpu_energy from benchexec import oomhandler +from benchexec.util import print_decimal from benchexec import resources from benchexec import systeminfo from benchexec import util @@ -277,12 +278,19 @@ def signal_handler_kill(signum, frame): # exit_code is a util.ProcessExitCode instance exit_code = cast(Optional[util.ProcessExitCode], result.pop("exitcode", None)) - def print_optional_result(key, unit="", format_fn=str): + def print_optional_result(key, unit=""): if key in result: - print(f"{key}={format_fn(result[key])}{unit}") + value = result[key] + if isinstance(value, decimal.Decimal): + format_fn = print_decimal + elif isinstance(value, datetime.datetime): + format_fn = datetime.datetime.isoformat + else: + format_fn = str + print(f"{key}={format_fn(value)}{unit}") # output results - print_optional_result("starttime", unit="", format_fn=datetime.datetime.isoformat) + print_optional_result("starttime", unit="") print_optional_result("terminationreason") if exit_code is not None and exit_code.value is not None: print(f"returnvalue={exit_code.value}") @@ -296,6 +304,9 @@ def print_optional_result(key, unit="", format_fn=str): print_optional_result("memory", "B") print_optional_result("blkio-read", "B") print_optional_result("blkio-write", "B") + print_optional_result("pressure-cpu-some", "s") + print_optional_result("pressure-io-some", "s") + print_optional_result("pressure-memory-some", "s") energy = intel_cpu_energy.format_energy_results(result.get("cpuenergy")) for energy_key, energy_value in energy.items(): print(f"{energy_key}={energy_value}J") @@ -327,7 +338,7 @@ def _init_cgroups(self): """ This function initializes the cgroups for the limitations and measurements. """ - self.cgroups = find_my_cgroups() + self.cgroups = Cgroups.initialize() critical_cgroups = set() for subsystem in self._cgroup_subsystems: @@ -340,29 +351,27 @@ def _init_cgroups(self): ) # Feature is still experimental, do not warn loudly - self.cgroups.require_subsystem(BLKIO, log_method=logging.debug) - if BLKIO not in self.cgroups: + self.cgroups.require_subsystem(self.cgroups.IO, log_method=logging.debug) + if self.cgroups.IO not in self.cgroups: logging.debug("Cannot measure I/O without blkio cgroup.") - self.cgroups.require_subsystem(CPUACCT) - if CPUACCT not in self.cgroups: + self.cgroups.require_subsystem(self.cgroups.CPU) + if self.cgroups.CPU not in self.cgroups: logging.warning("Cannot measure CPU time without cpuacct cgroup.") - self.cgroups.require_subsystem(FREEZER) - if FREEZER not in self.cgroups and not self._use_namespaces: - critical_cgroups.add(FREEZER) + self.cgroups.require_subsystem(self.cgroups.FREEZE) + if self.cgroups.FREEZE not in self.cgroups and not self._use_namespaces: + critical_cgroups.add(self.cgroups.FREEZE) logging.error( "Cannot reliably kill sub-processes without freezer cgroup " "or container mode. Please enable at least one of them." ) - self.cgroups.require_subsystem(MEMORY) - if MEMORY not in self.cgroups: + self.cgroups.require_subsystem(self.cgroups.MEMORY) + if self.cgroups.MEMORY not in self.cgroups: logging.warning("Cannot measure memory consumption without memory cgroup.") else: - if systeminfo.has_swap() and ( - not self.cgroups.has_value(MEMORY, "memsw.max_usage_in_bytes") - ): + if systeminfo.has_swap() and not self.cgroups.can_limit_swap(): logging.warning( "Kernel misses feature for accounting swap memory, but machine has swap. " "Memory usage may be measured inaccurately. " @@ -370,10 +379,12 @@ def _init_cgroups(self): '"sudo swapoff -a".' ) - self.cgroups.require_subsystem(CPUSET) + # Do not warn about missing CPUSET here, it is only useful for core limits + # and if one is set we terminate with a better error message later. + self.cgroups.require_subsystem(self.cgroups.CPUSET, log_method=logging.debug) self.cpus = None # to indicate that we cannot limit cores self.memory_nodes = None # to indicate that we cannot limit cores - if CPUSET in self.cgroups: + if self.cgroups.CPUSET in self.cgroups: # Read available cpus/memory nodes: try: self.cpus = self.cgroups.read_allowed_cpus() @@ -382,9 +393,7 @@ def _init_cgroups(self): logging.debug("List of available CPU cores is %s.", self.cpus) try: - self.memory_nodes = util.parse_int_list( - self.cgroups.get_value(CPUSET, "mems") - ) + self.memory_nodes = self.cgroups.read_allowed_memory_banks() except ValueError as e: logging.warning( "Could not read available memory nodes from kernel: %s", str(e) @@ -414,12 +423,17 @@ def _setup_cgroups(self, my_cpus, memlimit, memory_nodes, cgroup_values): logging.debug("Setting up cgroups for run.") # Setup cgroups, need a single call to create_cgroup() for all subsystems - subsystems = [BLKIO, CPUACCT, FREEZER, MEMORY] + self._cgroup_subsystems + subsystems = [ + self.cgroups.IO, + self.cgroups.CPU, + self.cgroups.FREEZE, + self.cgroups.MEMORY, + ] + self._cgroup_subsystems if my_cpus is not None or memory_nodes is not None: - subsystems.append(CPUSET) + subsystems.append(self.cgroups.CPUSET) subsystems = [s for s in subsystems if s in self.cgroups] - cgroups = self.cgroups.create_fresh_child_cgroup(*subsystems) + cgroups = self.cgroups.create_fresh_child_cgroup(subsystems) logging.debug("Created cgroups %s.", cgroups) @@ -444,51 +458,27 @@ def _setup_cgroups(self, my_cpus, memlimit, memory_nodes, cgroup_values): # Setup cpuset cgroup if necessary to limit the CPU cores/memory nodes to be used. if my_cpus is not None: my_cpus_str = ",".join(map(str, my_cpus)) - cgroups.set_value(CPUSET, "cpus", my_cpus_str) - my_cpus_str = cgroups.get_value(CPUSET, "cpus") + cgroups.set_value(self.cgroups.CPUSET, "cpus", my_cpus_str) + my_cpus_str = cgroups.get_value(self.cgroups.CPUSET, "cpus") logging.debug("Using cpu cores [%s].", my_cpus_str) if memory_nodes is not None: - cgroups.set_value(CPUSET, "mems", ",".join(map(str, memory_nodes))) - memory_nodesStr = cgroups.get_value(CPUSET, "mems") + cgroups.set_value( + self.cgroups.CPUSET, "mems", ",".join(map(str, memory_nodes)) + ) + memory_nodesStr = cgroups.get_value(self.cgroups.CPUSET, "mems") logging.debug("Using memory nodes [%s].", memory_nodesStr) # Setup memory limit if memlimit is not None: - limit = "limit_in_bytes" - cgroups.set_value(MEMORY, limit, memlimit) - - swap_limit = "memsw.limit_in_bytes" - # We need swap limit because otherwise the kernel just starts swapping - # out our process if the limit is reached. - # Some kernels might not have this feature, - # which is ok if there is actually no swap. - if not cgroups.has_value(MEMORY, swap_limit): - if systeminfo.has_swap(): - sys.exit( - 'Kernel misses feature for accounting swap memory, but machine has swap. Please set swapaccount=1 on your kernel command line or disable swap with "sudo swapoff -a".' - ) - else: - try: - cgroups.set_value(MEMORY, swap_limit, memlimit) - except OSError as e: - if e.errno == errno.ENOTSUP: - # kernel responds with operation unsupported if this is disabled - sys.exit( - 'Memory limit specified, but kernel does not allow limiting swap memory. Please set swapaccount=1 on your kernel command line or disable swap with "sudo swapoff -a".' - ) - raise e + cgroups.write_memory_limit(memlimit) - memlimit = cgroups.get_value(MEMORY, limit) + memlimit = cgroups.read_memory_limit() logging.debug("Effective memory limit is %s bytes.", memlimit) - if MEMORY in cgroups: + if cgroups.MEMORY in cgroups: try: - # Note that this disables swapping completely according to - # https://www.kernel.org/doc/Documentation/cgroups/memory.txt - # (unlike setting the global swappiness to 0). - # Our process might get killed because of this. - cgroups.set_value(MEMORY, "swappiness", "0") + cgroups.disable_swap() except OSError as e: logging.warning( "Could not disable swapping for benchmarked process: %s", e @@ -570,11 +560,13 @@ def _setup_cgroup_time_limit( return timelimitThread return None - def _setup_cgroup_memory_limit(self, memlimit, cgroups, pid_to_kill): + def _setup_cgroup_memory_limit_thread(self, memlimit, cgroups, pid_to_kill): """Start memory-limit handler. @return None or the memory-limit handler for calling cancel() """ - if memlimit is not None: + # On CgroupsV2, the kernel kills the whole cgroup for us on OOM + # and we can detect OOMs reliably after the fact. So no need to do anything. + if memlimit is not None and cgroups.version == 1: try: oomThread = oomhandler.KillProcessOnOomThread( cgroups=cgroups, @@ -671,19 +663,19 @@ def execute_run( if hardtimelimit is not None: if hardtimelimit <= 0: sys.exit(f"Invalid time limit {hardtimelimit}.") - if CPUACCT not in self.cgroups: + if self.cgroups.CPU not in self.cgroups: logging.error("Time limit cannot be specified without cpuacct cgroup.") - critical_cgroups.add(CPUACCT) + critical_cgroups.add(self.cgroups.CPU) if softtimelimit is not None: if softtimelimit <= 0: sys.exit(f"Invalid soft time limit {softtimelimit}.") if hardtimelimit and (softtimelimit > hardtimelimit): sys.exit("Soft time limit cannot be larger than the hard time limit.") - if CPUACCT not in self.cgroups: + if self.cgroups.CPU not in self.cgroups: logging.error( "Soft time limit cannot be specified without cpuacct cgroup." ) - critical_cgroups.add(CPUACCT) + critical_cgroups.add(self.cgroups.CPU) if walltimelimit is None: if hardtimelimit is not None: @@ -697,7 +689,7 @@ def execute_run( if cores is not None: if self.cpus is None: logging.error("Cannot limit CPU cores without cpuset cgroup.") - critical_cgroups.add(CPUSET) + critical_cgroups.add(self.cgroups.CPUSET) elif not cores: sys.exit("Cannot execute run without any CPU core.") elif not set(cores).issubset(self.cpus): @@ -707,16 +699,16 @@ def execute_run( if memlimit is not None: if memlimit <= 0: sys.exit(f"Invalid memory limit {memlimit}.") - if MEMORY not in self.cgroups: + if self.cgroups.MEMORY not in self.cgroups: logging.error( "Memory limit specified, but cannot be implemented without cgroup support." ) - critical_cgroups.add(MEMORY) + critical_cgroups.add(self.cgroups.MEMORY) if memory_nodes is not None: if self.memory_nodes is None: logging.error("Cannot restrict memory nodes without cpuset cgroup.") - critical_cgroups.add(CPUSET) + critical_cgroups.add(self.cgroups.CPUSET) elif len(memory_nodes) == 0: sys.exit("Cannot execute run without any memory node.") elif not set(memory_nodes).issubset(self.memory_nodes): @@ -861,7 +853,7 @@ def postParent(preParent_result, exit_code, base_path): # process existed, and killing via cgroups prevents this. # But if we do not have freezer, it is safer to just let all processes run # until the container is killed. - if FREEZER in cgroups: + if cgroups.FREEZE in cgroups: cgroups.kill_all_tasks() # For a similar reason, we cancel all limits. Otherwise a run could have @@ -932,7 +924,7 @@ def preSubprocess(): timelimitThread = self._setup_cgroup_time_limit( hardtimelimit, softtimelimit, walltimelimit, cgroups, cores, pid ) - oomThread = self._setup_cgroup_memory_limit(memlimit, cgroups, pid) + oomThread = self._setup_cgroup_memory_limit_thread(memlimit, cgroups, pid) file_hierarchy_limit_thread = self._setup_file_hierarchy_limit( files_count_limit, files_size_limit, temp_dir, cgroups, pid ) @@ -1011,11 +1003,19 @@ def preSubprocess(): } if self._termination_reason: result["terminationreason"] = self._termination_reason - elif memlimit and "memory" in result and result["memory"] >= memlimit: + elif self.cgroups.version == 2 and result.get("oom_kill_count"): + # At least one process was killed by the kernel due to OOM. + result["terminationreason"] = "memory" + elif self.cgroups.version == 1 and ( + memlimit and result.get("memory", 0) >= memlimit + ): # The kernel does not always issue OOM notifications and thus the OOMHandler # does not always run even in case of OOM. We detect this there and report OOM. result["terminationreason"] = "memory" + # Cleanup + result.pop("oom_kill_count", None) + return result def _get_cgroup_measurements(self, cgroups, ru_child, result): @@ -1027,7 +1027,12 @@ def _get_cgroup_measurements(self, cgroups, ru_child, result): cputime_wait = ru_child.ru_utime + ru_child.ru_stime if ru_child else 0 cputime_cgroups = None - if CPUACCT in cgroups: + + def store_result(key, value): + if value is not None: + result[key] = value + + if cgroups.CPU in cgroups: # We want to read the value from the cgroup. # The documentation warns about outdated values. # So we read twice with 0.1s time difference, @@ -1063,57 +1068,21 @@ def _get_cgroup_measurements(self, cgroups, ru_child, result): else: result["cputime"] = cputime_cgroups - for core, coretime in enumerate( - cgroups.get_value(CPUACCT, "usage_percpu").split(" ") - ): - try: - coretime = int(coretime) - if coretime != 0: - # convert nanoseconds to seconds - result[f"cputime-cpu{core}"] = coretime / 1_000_000_000 - except (OSError, ValueError) as e: - logging.debug( - "Could not read CPU time for core %s from kernel: %s", core, e - ) + for core, coretime in cgroups.read_usage_per_cpu().items(): + result[f"cputime-cpu{core}"] = coretime - if MEMORY in cgroups: - # This measurement reads the maximum number of bytes of RAM+Swap the process used. - # For more details, c.f. the kernel documentation: - # https://www.kernel.org/doc/Documentation/cgroups/memory.txt - memUsageFile = "memsw.max_usage_in_bytes" - if not cgroups.has_value(MEMORY, memUsageFile): - memUsageFile = "max_usage_in_bytes" - if not cgroups.has_value(MEMORY, memUsageFile): - logging.warning("Memory-usage is not available due to missing files.") - else: - try: - result["memory"] = int(cgroups.get_value(MEMORY, memUsageFile)) - except OSError as e: - if e.errno == errno.ENOTSUP: - # kernel responds with operation unsupported if this is disabled - logging.critical( - "Kernel does not track swap memory usage, cannot measure memory usage." - " Please set swapaccount=1 on your kernel command line." - ) - else: - raise e - - if BLKIO in cgroups: - blkio_bytes_file = "throttle.io_service_bytes" - if cgroups.has_value(BLKIO, blkio_bytes_file): - bytes_read = 0 - bytes_written = 0 - for blkio_line in cgroups.get_file_lines(BLKIO, blkio_bytes_file): - try: - dev_no, io_type, bytes_amount = blkio_line.split(" ") - if io_type == "Read": - bytes_read += int(bytes_amount) - elif io_type == "Write": - bytes_written += int(bytes_amount) - except ValueError: - pass # There are irrelevant lines in this file with a different structure - result["blkio-read"] = bytes_read - result["blkio-write"] = bytes_written + if cgroups.MEMORY in cgroups: + store_result("memory", cgroups.read_max_mem_usage()) + store_result("oom_kill_count", cgroups.read_oom_kill_count()) + + if cgroups.IO in cgroups: + result["blkio-read"], result["blkio-write"] = cgroups.read_io_stat() + + # Pressure information does not depend on enabled controllers: + # https://docs.kernel.org/accounting/psi.html + store_result("pressure-cpu-some", cgroups.read_cpu_pressure()) + store_result("pressure-memory-some", cgroups.read_mem_pressure()) + store_result("pressure-io-some", cgroups.read_io_pressure()) logging.debug( "Resource usage of run: walltime=%s, cputime=%s, cgroup-cputime=%s, memory=%s", @@ -1233,7 +1202,7 @@ def __init__( self.finished = threading.Event() if hardtimelimit or softtimelimit: - assert CPUACCT in cgroups + assert cgroups.CPU in cgroups assert walltimelimit is not None if cores: @@ -1262,7 +1231,7 @@ def read_cputime(self): def run(self): while not self.finished.is_set(): - usedCpuTime = self.read_cputime() if CPUACCT in self.cgroups else 0 + usedCpuTime = self.read_cputime() if self.cgroups.CPU in self.cgroups else 0 remainingCpuTime = self.timelimit - usedCpuTime remainingSoftCpuTime = self.softtimelimit - usedCpuTime remainingWallTime = self.latestKillTime - time.monotonic() diff --git a/benchexec/tablegenerator/columns.py b/benchexec/tablegenerator/columns.py index 4acd1ec7d..3197b4975 100644 --- a/benchexec/tablegenerator/columns.py +++ b/benchexec/tablegenerator/columns.py @@ -13,6 +13,7 @@ import logging from typing import Tuple, Union +from benchexec.util import print_decimal from benchexec.tablegenerator import util __all__ = ["Column", "ColumnType", "ColumnMeasureType"] @@ -206,7 +207,7 @@ def format_value(self, value, format_target): number = Decimal(number_str) elif isinstance(value, Decimal): number = value - number_str = util.print_decimal(number) + number_str = print_decimal(number) else: raise TypeError(f"Unexpected number type {type(value)}") @@ -229,7 +230,7 @@ def format_value(self, value, format_target): ): # Column of type count (integral values) without specified sig. digits. # However, we need to round values like stdev, so we just round somehow. - return util.print_decimal(round(number, DEFAULT_TOOLTIP_PRECISION)) + return print_decimal(round(number, DEFAULT_TOOLTIP_PRECISION)) number_of_significant_digits = self.get_number_of_significant_digits( format_target @@ -250,7 +251,7 @@ def format_value(self, value, format_target): format_target, ) else: - return util.print_decimal(number) + return print_decimal(number) def set_column_type_from(self, column_values): """ @@ -378,7 +379,7 @@ def _format_number( rounded_value = round(number, rounding_point) assert rounded_value == number.quantize(Decimal(1).scaleb(-rounding_point)) - formatted_value = util.print_decimal(rounded_value) + formatted_value = print_decimal(rounded_value) # Get the number of resulting significant digits. current_sig_digits = _get_significant_digits(formatted_value) diff --git a/benchexec/tablegenerator/test_util.py b/benchexec/tablegenerator/test_util.py index fa691604d..fff5d9409 100644 --- a/benchexec/tablegenerator/test_util.py +++ b/benchexec/tablegenerator/test_util.py @@ -5,7 +5,6 @@ # # SPDX-License-Identifier: Apache-2.0 -from decimal import Decimal import sys import unittest @@ -53,52 +52,6 @@ def test_split_string_at_suffix(self): self.assertEqualTextAndNumber("abc1abc", "abc1abc", "") self.assertEqualTextAndNumber("abc1abc1", "abc1abc", "1") - def test_print_decimal_roundtrip(self): - # These values should be printed exactly as in the input (with "+" removed) - test_values = [ - "NaN", - "Inf", - "-Inf", - "+Inf", - "0", - "-0", - "+0", - "0.0", - "-0.0", - "0.00000000000000000000", - "0.00000000000000000001", - "0.00000000123450000000", - "0.1", - "0.10000000000000000000", - "0.99999999999999999999", - "1", - "-1", - "+1", - "1000000000000000000000", - "10000000000.0000000000", - ] - for value in test_values: - expected = value.lstrip("+") - self.assertEqual(expected, util.print_decimal(Decimal(value))) - - def test_print_decimal_int(self): - # These values should be printed like Decimal prints them after quantizing - # to remove the exponent. - test_values = ["0e0", "-0e0", "0e20", "1e0", "1e20", "0e10"] - for value in test_values: - value = Decimal(value) - expected = str(value.quantize(1)) - assert "e" not in expected - self.assertEqual(expected, util.print_decimal(value)) - - def test_print_decimal_float(self): - # These values should be printed like str prints floats. - test_values = ["1e-4", "123e-4", "1234e-4", "1234e-5", "1234e-6"] - for value in test_values: - expected = str(float(value)) - assert "e" not in expected, expected - self.assertEqual(expected, util.print_decimal(Decimal(value))) - def test_roman_number_conversion(self): test_data = { 1: "I", diff --git a/benchexec/tablegenerator/util.py b/benchexec/tablegenerator/util.py index 7a9c55a82..0172798f4 100644 --- a/benchexec/tablegenerator/util.py +++ b/benchexec/tablegenerator/util.py @@ -164,49 +164,6 @@ def to_decimal(s): return None -def print_decimal(d): - """ - Print a Decimal instance in non-scientific (i.e., decimal) notation with full - precision, i.e., all digits are printed exactly as stored in the Decimal instance. - Note that str(d) always falls back to scientific notation for very small values. - """ - - if d.is_nan(): - return "NaN" - elif d.is_infinite(): - return "Inf" if d > 0 else "-Inf" - assert d.is_finite() - - sign, digits, exp = d.as_tuple() - # sign is 1 if negative - # digits is exactly the sequence of significant digits in the decimal representation - # exp tells us whether we need to shift digits (pos: left shift; neg: right shift). - # left shift can only add zeros, right shift adds decimal separator - - sign = "-" if sign == 1 else "" - digits = list(map(str, digits)) - - if exp >= 0: - if digits == ["0"]: - # special case: return "0" instead of "0000" for "0e4" - return sign + "0" - return sign + "".join(digits) + ("0" * exp) - - # Split digits into parts before and after decimal separator. - # If -exp > len(digits) the result needs to start with "0.", so we force a 0. - integral_part = digits[:exp] or ["0"] - decimal_part = digits[exp:] - assert decimal_part - - return ( - sign - + "".join(integral_part) - + "." - + ("0" * (-exp - len(decimal_part))) # additional zeros if necessary - + "".join(decimal_part) - ) - - def collapse_equal_values(values, counts): """ Take a tuple (values, counts), remove consecutive values and increment their count instead. diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index 4b9a06c21..340164887 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -21,6 +21,7 @@ from benchexec import containerexecutor from benchexec import filehierarchylimit from benchexec.runexecutor import RunExecutor +from benchexec.cgroups import Cgroups from benchexec import runexecutor from benchexec import util @@ -43,6 +44,8 @@ def setUpClass(cls): if not hasattr(cls, "assertRegex"): cls.assertRegex = cls.assertRegexpMatches + cls.cgroups = Cgroups.initialize() + cls.echo = shutil.which("echo") or "/bin/echo" cls.sleep = shutil.which("sleep") or "/bin/sleep" cls.cat = shutil.which("cat") or "/bin/cat" @@ -154,6 +157,9 @@ def check_result_keys(self, result, *additional_keys): "blkio-read", "blkio-write", "starttime", + "pressure-cpu-some", + "pressure-io-some", + "pressure-memory-some", } expected_keys.update(additional_keys) for key in result.keys(): @@ -732,6 +738,8 @@ def test_require_cgroup_cpu(self): self.skipTest(e) if not os.path.exists(self.cat): self.skipTest("missing cat") + if self.cgroups.version != 1: + self.skipTest("not relevant in unified hierarchy") (result, output) = self.execute_run(self.cat, "/proc/self/cgroup") self.check_exitcode(result, 0, "exit code of cat is not zero") for line in output: @@ -743,10 +751,17 @@ def test_set_cgroup_cpu_shares(self): if not os.path.exists(self.echo): self.skipTest("missing echo") try: - self.setUp(additional_cgroup_subsystems=["cpu"]) + if self.cgroups.version == 1: + self.setUp(additional_cgroup_subsystems=["cpu"]) + else: + self.setUp(additional_cgroup_subsystems=["memory"]) except SystemExit as e: self.skipTest(e) - (result, _) = self.execute_run(self.echo, cgroupValues={("cpu", "shares"): 42}) + if self.cgroups.version == 1: + cgValues = {("cpu", "shares"): 42} + else: + cgValues = {("memory", "high"): 420000000} + (result, _) = self.execute_run(self.echo, cgroupValues=cgValues) self.check_exitcode(result, 0, "exit code of echo is not zero") # Just assert that execution was successful, # testing that the value was actually set is much more difficult. @@ -798,7 +813,7 @@ def test_frozen_process(self): # https://github.com/sosy-lab/benchexec/issues/840 if not os.path.exists(self.sleep): self.skipTest("missing sleep") - if not os.path.exists("/sys/fs/cgroup/freezer"): + if self.cgroups.version == 1 and not os.path.exists("/sys/fs/cgroup/freezer"): self.skipTest("missing freezer cgroup") self.setUp( dir_modes={ @@ -808,10 +823,7 @@ def test_frozen_process(self): "/sys/fs/cgroup": containerexecutor.DIR_FULL_ACCESS, } ) - (result, output) = self.execute_run( - "/bin/sh", - "-c", - """#!/bin/sh + script_v1 = """#!/bin/sh # create process, move it to sub-cgroup, and freeze it set -eu @@ -832,7 +844,33 @@ def test_frozen_process(self): chmod 000 "$cgroup/tasks" echo FROZEN wait $child_pid -""", +""" + script_v2 = """#!/bin/sh +# create process, move it to sub-cgroup, and freeze it +set -eu + +cgroup="/sys/fs/cgroup/$(cut -f 3 -d : /proc/self/cgroup)" +mkdir "$cgroup/tmp" +mkdir "$cgroup/tmp/tmp" + +sleep 10 & +child_pid=$! + +echo $child_pid > "$cgroup/tmp/cgroup.procs" +echo 1 > "$cgroup/tmp/cgroup.freeze" +# remove permissions in order to test our handling of this case +chmod 000 "$cgroup/tmp/cgroup.freeze" +chmod 000 "$cgroup/tmp/cgroup.procs" +chmod 000 "$cgroup/tmp" +chmod 000 "$cgroup/cgroup.freeze" +chmod 000 "$cgroup/cgroup.kill" +echo FROZEN +wait $child_pid +""" + (result, output) = self.execute_run( + "/bin/sh", + "-c", + script_v1 if self.cgroups.version == 1 else script_v2, walltimelimit=1, expect_terminationreason="walltime", ) diff --git a/benchexec/test_util.py b/benchexec/test_util.py index cf2d79bbb..523d7161a 100644 --- a/benchexec/test_util.py +++ b/benchexec/test_util.py @@ -5,6 +5,7 @@ # # SPDX-License-Identifier: Apache-2.0 +from decimal import Decimal import sys import unittest from benchexec.util import ProcessExitCode @@ -54,6 +55,52 @@ def test_parse_timespan_value(self): self.assertEqual(util.parse_timespan_value("1h"), 60 * 60) self.assertEqual(util.parse_timespan_value("1d"), 24 * 60 * 60) + def test_print_decimal_roundtrip(self): + # These values should be printed exactly as in the input (with "+" removed) + test_values = [ + "NaN", + "Inf", + "-Inf", + "+Inf", + "0", + "-0", + "+0", + "0.0", + "-0.0", + "0.00000000000000000000", + "0.00000000000000000001", + "0.00000000123450000000", + "0.1", + "0.10000000000000000000", + "0.99999999999999999999", + "1", + "-1", + "+1", + "1000000000000000000000", + "10000000000.0000000000", + ] + for value in test_values: + expected = value.lstrip("+") + self.assertEqual(expected, util.print_decimal(Decimal(value))) + + def test_print_decimal_int(self): + # These values should be printed like Decimal prints them after quantizing + # to remove the exponent. + test_values = ["0e0", "-0e0", "0e20", "1e0", "1e20", "0e10"] + for value in test_values: + value = Decimal(value) + expected = str(value.quantize(1)) + assert "e" not in expected + self.assertEqual(expected, util.print_decimal(value)) + + def test_print_decimal_float(self): + # These values should be printed like str prints floats. + test_values = ["1e-4", "123e-4", "1234e-4", "1234e-5", "1234e-6"] + for value in test_values: + expected = str(float(value)) + assert "e" not in expected, expected + self.assertEqual(expected, util.print_decimal(Decimal(value))) + class TestProcessExitCode(unittest.TestCase): @classmethod diff --git a/benchexec/util.py b/benchexec/util.py index fa208fb86..f0a3df8b3 100644 --- a/benchexec/util.py +++ b/benchexec/util.py @@ -250,6 +250,49 @@ def non_empty_str(s): return s +def print_decimal(d): + """ + Print a Decimal instance in non-scientific (i.e., decimal) notation with full + precision, i.e., all digits are printed exactly as stored in the Decimal instance. + Note that str(d) always falls back to scientific notation for very small values. + """ + + if d.is_nan(): + return "NaN" + elif d.is_infinite(): + return "Inf" if d > 0 else "-Inf" + assert d.is_finite() + + sign, digits, exp = d.as_tuple() + # sign is 1 if negative + # digits is exactly the sequence of significant digits in the decimal representation + # exp tells us whether we need to shift digits (pos: left shift; neg: right shift). + # left shift can only add zeros, right shift adds decimal separator + + sign = "-" if sign == 1 else "" + digits = list(map(str, digits)) + + if exp >= 0: + if digits == ["0"]: + # special case: return "0" instead of "0000" for "0e4" + return sign + "0" + return sign + "".join(digits) + ("0" * exp) + + # Split digits into parts before and after decimal separator. + # If -exp > len(digits) the result needs to start with "0.", so we force a 0. + integral_part = digits[:exp] or ["0"] + decimal_part = digits[exp:] + assert decimal_part + + return ( + sign + + "".join(integral_part) + + "." + + ("0" * (-exp - len(decimal_part))) # additional zeros if necessary + + "".join(decimal_part) + ) + + def expand_filename_pattern(pattern, base_dir): """ Expand a file name pattern containing wildcards, environment variables etc. diff --git a/debian/README.Debian b/debian/README.Debian index 8eb82911f..c29638d1d 100644 --- a/debian/README.Debian +++ b/debian/README.Debian @@ -1,11 +1,15 @@ benchexec for Debian -------------------- -This package uses a systemd service (benchexec-cgroup) +For systems with cgroupsv1, +this package uses a systemd service (benchexec-cgroup) to configure the Linux kernel cgroups as necessary for BenchExec. Users that should be able to use BenchExec should be added to the group "benchexec". Alternatively, the permissions assigned to the cgroups can be changed by changing the BENCHEXEC_CGROUP_GROUP and BENCHEXEC_CGROUP_PERM environment variables of that service. +For systems with cgroupsv2 and systemd, +no special configuration is necessary. + -- Philipp Wendler Fri, 11 Sep 2015 10:29:29 +0200 diff --git a/debian/benchexec.postinst b/debian/benchexec.postinst index 2ddfec023..7fab8891e 100644 --- a/debian/benchexec.postinst +++ b/debian/benchexec.postinst @@ -13,9 +13,11 @@ GROUP=benchexec add_group() { addgroup --system "${GROUP}" - echo - echo "Please add those user accounts that should be able to use BenchExec to the group ${GROUP}." - echo + if [ ! -f /sys/fs/cgroup/cgroup.controllers ]; then + echo + echo "Please add those user accounts that should be able to use BenchExec to the group ${GROUP}." + echo + fi } case "$1" in diff --git a/debian/control b/debian/control index 381832d94..7078ead5c 100644 --- a/debian/control +++ b/debian/control @@ -18,7 +18,7 @@ Vcs-Browser: https://github.com/sosy-lab/benchexec Package: benchexec Architecture: all Depends: ${python3:Depends}, python3-pkg-resources, ${misc:Depends} -Recommends: cpu-energy-meter, libseccomp2, lxcfs, python3-coloredlogs +Recommends: cpu-energy-meter, libseccomp2, lxcfs, python3-coloredlogs, python3-pystemd Description: Framework for Reliable Benchmarking and Resource Measurement BenchExec allows benchmarking non-interactive tools on Linux systems. It measures CPU time, wall time, and memory usage of a tool, diff --git a/doc/INSTALL.md b/doc/INSTALL.md index ca0ed97ec..fe401bbd2 100644 --- a/doc/INSTALL.md +++ b/doc/INSTALL.md @@ -15,7 +15,7 @@ SPDX-License-Identifier: Apache-2.0 - Python 3.7 or newer - Linux (cf. [Kernel Requirements](#kernel-requirements) below for details) -- Cgroups v1 (cf. [Setting up Cgroups](#setting-up-cgroups) below for details) +- Access to cgroups (cf. [Setting up Cgroups](#setting-up-cgroups) below for details) - x86 or ARM machine (please [contact us](https://github.com/sosy-lab/benchexec/issues/new) for other architectures) The following packages are optional but recommended dependencies: @@ -26,6 +26,7 @@ The following packages are optional but recommended dependencies: - [pqos_wrapper] and [pqos library][pqos] provide isolation of L3 cache and measurement of cache usage and memory bandwidth (only in `benchexec`). +- [pystemd] allows BenchExec to automatically configure cgroups on systems with systemd and cgroups v2. Note that the `table-generator` utility requires only Python and works on all platforms. @@ -41,9 +42,11 @@ and install manually (note that the leading `./` is important, otherwise `apt` w apt install --install-recommends ./benchexec_*.deb -Our package automatically configures the necessary cgroup permissions -if the system uses cgroups v1. -Just add the users that should be able to use BenchExec to the group `benchexec` +On Ubuntu 21.10 and newer with the default cgroup config, this is all. + +On older Ubuntu versions or those configured for cgroups v1, +our package automatically configures the necessary cgroup permissions. +Then add the users that should be able to use BenchExec to the group `benchexec` (group membership will be effective after the next login of the respective user): adduser benchexec @@ -68,11 +71,11 @@ To automatically download and install the latest stable version and its dependen from the [Python Packaging Index](https://pypi.python.org/pypi/BenchExec) with pip, run this command: - sudo pip3 install benchexec coloredlogs + sudo pip3 install benchexec[systemd] coloredlogs You can also install BenchExec only for your user with - pip3 install --user benchexec coloredlogs + pip3 install --user benchexec[systemd] coloredlogs In the latter case you probably need to add the directory where pip installs the commands to the PATH environment by adding the following line to your `~/.profile` file: @@ -81,6 +84,8 @@ to the PATH environment by adding the following line to your `~/.profile` file: Of course you can also install BenchExec in a virtualenv if you are familiar with Python tools. +On systems without systemd you can omit the `[systemd]` part. + Please make sure to configure cgroups as [described below](#setting-up-cgroups) and install [cpu-energy-meter], [libseccomp2], [LXCFS], and [pqos_wrapper] if desired. @@ -148,17 +153,18 @@ If container mode does not work, please check the [common problems](container.md ## Setting up Cgroups -If you have installed the Debian package and you are running systemd -(default since Debian 8 and Ubuntu 15.04), -the package should have configured everything automatically -as long as the system is using cgroups v1. -Just add your user to the group `benchexec` and reboot: +This depends on whether your system is using cgroups v1 or v2. +To find out, please check whether `/sys/fs/cgroup/cgroup.controllers` exists. +If yes, you are using v2, otherwise v1 +(for the purpose of BenchExec, a hybrid usage of v1 and v2 counts as v1). - adduser benchexec +Then please follow the instructions from the appropriate subsection +and the instructions for [testing and troubleshooting](#testing-cgroups-setup-and-known-problems). -Support for cgroups v2 is still under development for BenchExec. -On recent distributions (e.g., Ubuntu 22.04), -please switch back to cgroups v1 for now by putting +Note that support for cgroups v2 is available only since BenchExec 3.18 +and has received less testing than using cgroups v1 so far. +If you are on a distribution with cgroups v2 and want to switch to cgroups v1, +you can switch back to cgroups v1 for now by putting `systemd.unified_cgroup_hierarchy=0` on the kernel command line. On Debian/Ubuntu, this could be done with the following steps and rebooting afterwards: ``` @@ -166,7 +172,47 @@ echo 'GRUB_CMDLINE_LINUX_DEFAULT="${GRUB_CMDLINE_LINUX_DEFAULT} systemd.unified_ sudo update-grub ``` -### Setting up Cgroups on Machines with systemd +### Setting up Cgroups v2 on Machines with systemd + +This applies for example for Ubuntu 21.10 and newer, +Debian 11 and newer, and most other current Linux distributions. + +BenchExec can use systemd to automatically take care of any necessary cgroup configuration, +so no manual configuration is necessary. +However, the Python package `pystemd` needs to be installed, +which happens automatically if you installed our Ubuntu package +including its recommended dependencies or `benchexec[systemd]` via pip. +If missing, install the package with `sudo apt install python3-pystemd` +or `pip install pystemd` according to how you installed BenchExec. + +BenchExec also works without `pystemd` if you start BenchExec inside its own cgroup. +The easiest way to do so is using `systemd-run`: + + systemd-run --user --scope --slice=benchexec -p Delegate=yes benchexec ... + +If you want to use systemd to pre-configure the cgroups that BenchExec uses +(e.g., influence the allowed CPU cores etc.), +you can do so by configuring `benchexec.slice` in the user-specific systemd instance(s) +(all cgroups that BenchExec creates will be inside this systemd slice). + +### Setting up Cgroups v2 on Machines without systemd + +This is possible if you ensure manually that +BenchExec is started in its own cgroup +(i.e., a cgroup with no other processes inside). +We recommend using systemd, though. + +### Setting up Cgroups v1 on Machines with systemd and BenchExec as Debian package + +This applies to Ubuntu 21.04 and older and Debian 10 and older +(if the Debian package for BenchExec was used). + +Our Debian package should have configured everything automatically. +Just add your user to the group `benchexec` and reboot: + + adduser benchexec + +### Setting up Cgroups v1 on Machines with systemd Most distributions today use systemd, and systemd makes extensive usage of cgroups and [claims that it should be the only process that accesses cgroups directly](https://wiki.freedesktop.org/www/Software/systemd/ControlGroupInterface/). @@ -189,9 +235,6 @@ The following steps are necessary: By default, this gives permissions to users of the group `benchexec`, this can be adjusted in the `Environment` line as necessary. - * If the system is using cgroups v2, you need to tell systemd to use cgroups v1 instead - as [described above](#setting-up-cgroups). - By default, BenchExec will automatically attempt to use the cgroup `system.slice/benchexec-cgroup.service` that is created by this service file. If you use a different cgroup structure, @@ -207,7 +250,7 @@ echo $$ > /sys/fs/cgroup/freezer//tasks In any case, please check whether everything works or whether additional settings are necessary as [described below](#testing-cgroups-setup-and-known-problems). -### Setting up Cgroups on Machines without systemd +### Setting up Cgroups v1 on Machines without systemd The cgroup virtual file system is typically mounted at or below `/sys/fs/cgroup`. If it is not, you can mount it with @@ -222,7 +265,7 @@ you can use Of course permissions can also be assigned in a more fine-grained way if necessary. Alternatively, software such as `cgrulesengd` from -the [cgroup-bin](http://libcg.sourceforge.net/) package +the [libcgroup](https://github.com/libcgroup/libcgroup) project can be used to setup the cgroups hierarchy. Note that `cgrulesengd` might interfere with the cgroups of processes, @@ -246,8 +289,14 @@ or whether additional settings are necessary as [described below](#testing-cgrou ### Setting up Cgroups in a Docker/Podman Container -If you want to run benchmarks within a Docker/Podman container, -and the cgroups file system is not available within the container, +If you want to run BenchExec inside a container, +we recommend Podman and systems with cgroups v2. +Then pass `--security-opt unmask=/sys/fs/cgroup` to `podman run`. +This will work if BenchExec is the main process inside the container, +otherwise you need to create an appropriate cgroup hierarchy inside the container, +i.e., one where BenchExec has its own separate cgroup. + +For other cases, if the cgroups file system is not available within the container, please use the following command line argument to mount the cgroup hierarchy within the container when starting it (same for Podman): @@ -259,7 +308,7 @@ which are explained in the [container documentation](container.md#using-benchexe ### Testing Cgroups Setup and Known Problems -After installing BenchExec and setting up the cgroups file system, please run +After installing BenchExec and configuring cgroups if appropriate, please run python3 -m benchexec.check_cgroups @@ -267,6 +316,7 @@ This will report warnings and exit with code 1 if something is missing. If you find that something does not work, please check the following list of solutions. +For cgroups v1: If your machine has swap, cgroups should be configured to also track swap memory. This is turned off by several distributions. If the file `memory.memsw.usage_in_bytes` does not exist in the directory @@ -296,3 +346,4 @@ Please refer to the [development instructions](DEVELOPMENT.md). [LXCFS]: https://github.com/lxc/lxcfs [pqos]: https://github.com/intel/intel-cmt-cat/tree/master/pqos [pqos_wrapper]: https://gitlab.com/sosy-lab/software/pqos-wrapper +[pystemd]: https://github.com/systemd/pystemd diff --git a/doc/container.md b/doc/container.md index 72772799e..a01b40b27 100644 --- a/doc/container.md +++ b/doc/container.md @@ -185,7 +185,7 @@ because it provides "rootless" containers To use BenchExec within Podman, start it as a regular user (not root) and use the following arguments: ``` -podman run --security-opt unmask=/proc/* --security-opt seccomp=unconfined ... +podman run --security-opt unmask=/proc/* --security-opt unmask=/sys/fs/cgroup --security-opt seccomp=unconfined ... ``` You may additionally need the arguments documented for [cgroup usage](INSTALL.md#setting-up-cgroups-in-a-dockerpodman-container). diff --git a/doc/run-results.md b/doc/run-results.md index 22f30cce4..8563e3ec0 100644 --- a/doc/run-results.md +++ b/doc/run-results.md @@ -57,6 +57,7 @@ The meanings of the current possible result values are as follows: The value might not accurately represent disk I/O due to caches or if virtual block devices such as LVM, RAID, RAM disks etc. are used. - **cpuenergy-pkg``**: Energy consumption of the CPU ([more information](resources.md#energy)). This is still experimental. +- **pressure-`*`-some**: Number of seconds (as decimal with suffix "s") that at least some process had to wait for the respective resource, e.g., the CPU becoming available ([more information](https://docs.kernel.org/accounting/psi.html)). - **returnvalue**: The return value of the process (between 0 and 255). Not present if process was killed. - **exitsignal**: The signal with which the process was killed (if any). diff --git a/setup.cfg b/setup.cfg index 75a842150..76c30633a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -62,6 +62,8 @@ zip_safe = True dev = nose >= 1.0 lxml +systemd = + pystemd >= 0.7.0 [options.entry_points] console_scripts =