From 434679d51f8ffb9f09a57de392b399cfa90bb08a Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Fri, 2 Jul 2021 07:59:11 -0500 Subject: [PATCH 001/133] [WIP] cgroups: implement cgroupsv2 support still prototyping --- benchexec/cgroupsv2.py | 498 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 498 insertions(+) create mode 100644 benchexec/cgroupsv2.py diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py new file mode 100644 index 000000000..0777ea060 --- /dev/null +++ b/benchexec/cgroupsv2.py @@ -0,0 +1,498 @@ +# This file is part of BenchExec, a framework for reliable benchmarking: +# https://github.com/sosy-lab/benchexec +# +# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer +# +# SPDX-License-Identifier: Apache-2.0 + +import errno +import grp +import logging +import os +import pathlib +import shutil +import signal +import stat +import sys +import tempfile +import time + +from benchexec import systeminfo +from benchexec import util + +__all__ = [ + "find_my_cgroups", + "BLKIO", + "CPUACCT", + "CPUSET", + "FREEZER", + "MEMORY", +] + +CGROUP_FALLBACK_PATH = "system.slice/benchexec-cgroup.service" +"""If we do not have write access to the current cgroup, +attempt to use this cgroup as fallback.""" + +CGROUP_NAME_PREFIX = "benchmark_" + +IO = "io" +CPU = "cpu" +MEMORY = "memory" +ALL_KNOWN_SUBSYSTEMS = { + # cgroups for BenchExec + BLKIO, + CPU, + MEMORY, + "pids", +} + +_PERMISSION_HINT_GROUPS = """ +You need to add your account to the following groups: {0} +Remember to logout and login again afterwards to make group changes effective.""" + +_PERMISSION_HINT_DEBIAN = """ +The recommended way to fix this is to install the Debian package for BenchExec and add your account to the group "benchexec": +https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#debianubuntu +Alternatively, you can install benchexec-cgroup.service manually: +https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" + +_PERMISSION_HINT_SYSTEMD = """ +The recommended way to fix this is to add your account to a group named "benchexec" and install benchexec-cgroup.service: +https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" + +_PERMISSION_HINT_OTHER = """ +Please configure your system in way to allow your user to use cgroups: +https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-without-systemd""" + +_ERROR_MSG_PERMISSIONS = """ +Required cgroups are not available because of missing permissions.{0} + +As a temporary workaround, you can also run +"sudo chmod o+wt {1}" +Note that this will grant permissions to more users than typically desired and it will only last until the next reboot.""" + +_ERROR_MSG_OTHER = """ +Required cgroups are not available. +If you are using BenchExec within a container, please make "/sys/fs/cgroup" available.""" + + +def find_my_cgroups(cgroup_procinfo=None, fallback=True): + """ + Return a Cgroup object with the cgroups of the current process. + Note that it is not guaranteed that all subsystems are available + in the returned object, as a subsystem may not be mounted. + Check with "subsystem in " before using. + A subsystem may also be present but we do not have the rights to create + child cgroups, this can be checked with require_subsystem(). + @param cgroup_procinfo: If given, use this instead of reading /proc/self/cgroup. + @param fallback: Whether to look for a default cgroup as fallback is our cgroup + is not accessible. + """ + logging.debug( + "Analyzing /proc/mounts and /proc/self/cgroup for determining cgroups." + ) + if cgroup_paths is None: + my_cgroups = _find_own_cgroups() + else: + my_cgroups = _parse_proc_pid_cgroup(cgroup_path) + + if fallback: + raise "not implemented" + + cgroup_path, controllers = my_cgroups + + return Cgroup(cgroup_path, controllers) + + +def _find_cgroup_mount(): + """ + Return the mountpoint of the cgroupv2 unified hierarchy. + @return Path mountpoint + """ + try: + with open("/proc/mounts", "rt") as mountsFile: + for mount in mountsFile: + mount = mount.split(" ") + if mount[2] == "cgroup2": + return pathlib.Path(mount[1]) + except OSError: + logging.exception("Cannot read /proc/mounts") + + +def _find_own_cgroups(): + """ + For all subsystems, return the information in which (sub-)cgroup this process is in. + (Each process is in exactly cgroup in each hierarchy.) + @return a generator of tuples (subsystem, cgroup) + """ + try: + with open("/proc/self/cgroup", "rt") as ownCgroupsFile: + return _parse_proc_pid_cgroup(ownCgroupsFile): + except OSError: + logging.exception("Cannot read /proc/self/cgroup") + + +def _parse_proc_pid_cgroup(cgroup_file): + """ + Parse a /proc/*/cgroup file into tuples of (subsystem,cgroup). + @param content: An iterable over the lines of the file. + @return: a generator of tuples + """ + mountpoint = _find_cgroup_mount() + own_cgroup = cgroup_file.readline().strip().split(":") + path = mountpoint / own_cgroup[2] + with open(path / "cgroup.controllers") as controllers_file: + controllers = controllers_file.readline().strip().split() + + return (path, controllers) + + +def kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): + tasksFile = cgroup / "cgroup.procs" + + i = 0 + while True: + i += 1 + # TODO We can probably remove this loop over signals and just send + # SIGKILL. We added this loop when killing sub-processes was not reliable + # and we did not know why, but now it is reliable. + for sig in [signal.SIGKILL, signal.SIGINT, signal.SIGTERM]: + with open(tasksFile, "rt") as tasks: + task = None + for task in tasks: + task = task.strip() + if i > 1: + logging.warning( + "Run has left-over process with pid %s " + "in cgroup %s, sending signal %s (try %s).", + task, + cgroup, + sig, + i, + ) + util.kill_process(int(task), sig) + + if task is None or not ensure_empty: + return # No process was hanging, exit + # wait for the process to exit, this might take some time + time.sleep(i * 0.5) + + +def remove_cgroup(cgroup): + if not os.path.exists(cgroup): + logging.warning("Cannot remove CGroup %s, because it does not exist.", cgroup) + return + assert os.path.getsize(os.path.join(cgroup, "tasks")) == 0 + try: + os.rmdir(cgroup) + except OSError: + # sometimes this fails because the cgroup is still busy, we try again once + try: + os.rmdir(cgroup) + except OSError as e: + logging.warning( + "Failed to remove cgroup %s: error %s (%s)", cgroup, e.errno, e.strerror + ) + + +def _register_process_with_cgrulesengd(pid): + """Tell cgrulesengd daemon to not move the given process into other cgroups, + if libcgroup is available. + """ + # Logging/printing from inside preexec_fn would end up in the output file, + # not in the correct logger, thus it is disabled here. + from ctypes import cdll + + raise 'not implemented' + + try: + libcgroup = cdll.LoadLibrary("libcgroup.so.1") + failure = libcgroup.cgroup_init() + if failure: + pass + else: + CGROUP_DAEMON_UNCHANGE_CHILDREN = 0x1 + failure = libcgroup.cgroup_register_unchanged_process( + pid, CGROUP_DAEMON_UNCHANGE_CHILDREN + ) + if failure: + pass + # print(f'Could not register process to cgrulesndg, error {success}. ' + # 'Probably the daemon will mess up our cgroups.') + except OSError: + pass + + +class CgroupV2(object): + def __init__(self, cgroup_path, controllers): + assert set(controllers) <= ALL_KNOWN_SUBSYSTEMS + # Also update self.paths on every update to this! + self.controllers = controllers + self.path = cgroup_path + # for error messages: + self.unusable_subsystems = set() + self.denied_subsystems = {} + + def __contains__(self, key): + return key in self.controllers + + def __getitem__(self, key): + raise 'not implemented' + return self.per_subsystem[key] + + def __str__(self): + return str(self.path) + + def require_subsystem(self, subsystem, log_method=logging.warning): + """ + Check whether the given subsystem is enabled and is writable + (i.e., new cgroups can be created for it). + Produces a log message for the user if one of the conditions is not fulfilled. + If the subsystem is enabled but not writable, it will be removed from + this instance such that further checks with "in" will return "False". + @return A boolean value. + """ + if subsystem not in self: + if subsystem not in self.unusable_subsystems: + self.unusable_subsystems.add(subsystem) + log_method( + "Cgroup subsystem %s is not available. " + "Please make sure it is supported by your kernel and mounted.", + subsystem, + ) + return False + + # try: + # test_cgroup = self.create_fresh_child_cgroup(subsystem) + # test_cgroup.remove() + # except OSError as e: + # log_method( + # "Cannot use cgroup %s for subsystem %s, reason: %s (%s).", + # self.per_subsystem[subsystem], + # subsystem, + # e.strerror, + # e.errno, + # ) + # self.unusable_subsystems.add(subsystem) + # if e.errno == errno.EACCES: + # self.denied_subsystems[subsystem] = self.per_subsystem[subsystem] + # del self.per_subsystem[subsystem] + # self.paths = set(self.per_subsystem.values()) + # return False + + return True + + def handle_errors(self, critical_cgroups): + """ + If there were errors in calls to require_subsystem() and critical_cgroups + is not empty, terminate the program with an error message that explains how to + fix the problem. + + @param critical_cgroups: set of unusable but required cgroups + """ + if not critical_cgroups: + return + assert critical_cgroups.issubset(self.unusable_subsystems) + + if critical_cgroups.issubset(self.denied_subsystems): + # All errors were because of permissions for these directories + paths = sorted(set(self.denied_subsystems.values())) + + # Check if all cgroups have group permissions and user could just be added + # to some groups to get access. But group 0 (root) of course does not count. + groups = {} + try: + if all(stat.S_IWGRP & os.stat(path).st_mode for path in paths): + groups = {os.stat(path).st_gid for path in paths} + except OSError: + pass + if groups and 0 not in groups: + + def get_group_name(gid): + try: + name = grp.getgrgid(gid).gr_name + except KeyError: + name = None + return util.escape_string_shell(name or str(gid)) + + groups = " ".join(sorted(set(map(get_group_name, groups)))) + permission_hint = _PERMISSION_HINT_GROUPS.format(groups) + + elif systeminfo.has_systemd(): + if systeminfo.is_debian(): + permission_hint = _PERMISSION_HINT_DEBIAN + else: + permission_hint = _PERMISSION_HINT_SYSTEMD + + else: + permission_hint = _PERMISSION_HINT_OTHER + + paths = " ".join(map(util.escape_string_shell, paths)) + sys.exit(_ERROR_MSG_PERMISSIONS.format(permission_hint, paths)) + + else: + sys.exit(_ERROR_MSG_OTHER) # e.g., subsystem not mounted + + def create_fresh_child_cgroup(self, *subsystems): + """ + Create child cgroups of the current cgroup for at least the given subsystems. + @return: A Cgroup instance representing the new child cgroup(s). + """ + assert set(subsystems).issubset(self.per_subsystem.keys()) + createdCgroupsPerSubsystem = {} + createdCgroupsPerParent = {} + for subsystem in subsystems: + parentCgroup = self.per_subsystem[subsystem] + if parentCgroup in createdCgroupsPerParent: + # reuse already created cgroup + createdCgroupsPerSubsystem[subsystem] = createdCgroupsPerParent[ + parentCgroup + ] + continue + + cgroup = tempfile.mkdtemp(prefix=CGROUP_NAME_PREFIX, dir=parentCgroup) + createdCgroupsPerSubsystem[subsystem] = cgroup + createdCgroupsPerParent[parentCgroup] = cgroup + + # add allowed cpus and memory to cgroup if necessary + # (otherwise we can't add any tasks) + def copy_parent_to_child(name): + shutil.copyfile( + os.path.join(parentCgroup, name), os.path.join(cgroup, name) + ) + + try: + copy_parent_to_child("cpuset.cpus") + copy_parent_to_child("cpuset.mems") + except OSError: + # expected to fail if cpuset subsystem is not enabled in this hierarchy + pass + + return Cgroup(createdCgroupsPerSubsystem) + + def add_task(self, pid): + """ + Add a process to the cgroups represented by this instance. + """ + _register_process_with_cgrulesengd(pid) + with open(self.path / "cgroup.procs"), "w") as tasksFile: + tasksFile.write(str(pid)) + + def get_all_tasks(self, subsystem): + """ + Return a generator of all PIDs currently in this cgroup for the given subsystem. + """ + with open(self.path / "cgroup.procs") as tasksFile: + for line in tasksFile: + yield int(line) + + def kill_all_tasks(self): + """ + Kill all tasks in this cgroup and all its children cgroups forcefully. + Additionally, the children cgroups will be deleted. + """ + + def kill_all_tasks_in_cgroup_recursively(cgroup, delete): + for dirpath, dirs, _files in os.walk(cgroup, topdown=False): + for subCgroup in dirs: + subCgroup = os.path.join(dirpath, subCgroup) + kill_all_tasks_in_cgroup(subCgroup, ensure_empty=delete) + + if delete: + remove_cgroup(subCgroup) + + kill_all_tasks_in_cgroup(cgroup, ensure_empty=delete) + + # First, we go through all cgroups recursively while they are frozen and kill + # all processes. This helps against fork bombs and prevents processes from + # creating new subgroups while we are trying to kill everything. + # All processes will stay until they are thawed (so we cannot check for cgroup + # emptiness and we cannot delete subgroups). + freezer_file = self.path / "cgroup.freeze" + + util.write_file("1", freezer_file) + kill_all_tasks_in_cgroup_recursively(self.path, delete=False) + util.write_file("0", freezer_file) + + # Second, we go through all cgroups again, kill what is left, + # check for emptiness, and remove subgroups. + kill_all_tasks_in_cgroup_recursively(self.path, delete=True) + + def has_value(self, subsystem, option): + """ + Check whether the given value exists in the given subsystem. + Does not make a difference whether the value is readable, writable, or both. + Do not include the subsystem name in the option name. + Only call this method if the given subsystem is available. + """ + assert subsystem in self + return os.path.isfile( + os.path.join(self.per_subsystem[subsystem], f"{subsystem}.{option}") + ) + + def get_value(self, subsystem, option): + """ + Read the given value from the given subsystem. + Do not include the subsystem name in the option name. + Only call this method if the given subsystem is available. + """ + assert subsystem in self, f"Subsystem {subsystem} is missing" + return util.read_file(self.per_subsystem[subsystem], f"{subsystem}.{option}") + + def get_file_lines(self, subsystem, option): + """ + Read the lines of the given file from the given subsystem. + Do not include the subsystem name in the option name. + Only call this method if the given subsystem is available. + """ + assert subsystem in self + with open( + os.path.join(self.per_subsystem[subsystem], f"{subsystem}.{option}") + ) as f: + for line in f: + yield line + + def get_key_value_pairs(self, subsystem, filename): + """ + Read the lines of the given file from the given subsystem + and split the lines into key-value pairs. + Do not include the subsystem name in the option name. + Only call this method if the given subsystem is available. + """ + assert subsystem in self + return util.read_key_value_pairs_from_file( + self.per_subsystem[subsystem], f"{subsystem}.{filename}" + ) + + def set_value(self, subsystem, option, value): + """ + Write the given value for the given subsystem. + Do not include the subsystem name in the option name. + Only call this method if the given subsystem is available. + """ + assert subsystem in self + util.write_file( + str(value), self.per_subsystem[subsystem], f"{subsystem}.{option}" + ) + + def remove(self): + """ + Remove all cgroups this instance represents from the system. + This instance is afterwards not usable anymore! + """ + remove_cgroup(self.path) + + del self.path + del self.controllers + + def read_cputime(self): + """ + Read the cputime usage of this cgroup. CPU cgroup needs to be available. + @return cputime usage in seconds + """ + # convert micro-seconds to seconds + return float(self.get_value(CPU, "stat")) / 1_000_000 + + def read_allowed_memory_banks(self): + """Get the list of all memory banks allowed by this cgroup.""" + return util.parse_int_list(self.get_value(CPUSET, "mems")) From 6251cbe3c3cb86b5a1232703b3f3cd6de7dc57e4 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Thu, 12 Aug 2021 14:10:03 +0200 Subject: [PATCH 002/133] [WIP] cgroups: default to cgroups v2 code temporarily still prototyping --- benchexec/baseexecutor.py | 2 + benchexec/cgroupsv2.py | 98 +++++++++++++++++-------------------- benchexec/check_cgroups.py | 2 +- benchexec/localexecution.py | 2 +- benchexec/oomhandler.py | 2 +- benchexec/resources.py | 2 +- benchexec/runexecutor.py | 11 +++-- 7 files changed, 58 insertions(+), 61 deletions(-) diff --git a/benchexec/baseexecutor.py b/benchexec/baseexecutor.py index 7b3260f1c..96c0be1bc 100644 --- a/benchexec/baseexecutor.py +++ b/benchexec/baseexecutor.py @@ -110,6 +110,8 @@ def pre_subprocess(): parent_setup = parent_setup_fn() + logging.debug("Executing run with args %s.", args) + logging.debug("Executing run with env %s.", env) p = subprocess.Popen( args, stdin=stdin, diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 0777ea060..f11c02b8c 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -23,27 +23,36 @@ __all__ = [ "find_my_cgroups", "BLKIO", + "CPU" "CPUACCT", "CPUSET", "FREEZER", "MEMORY", ] -CGROUP_FALLBACK_PATH = "system.slice/benchexec-cgroup.service" +CGROUP_FALLBACK_PATH = "system.slice/benchexec-cgroup.service/benchexec_root" """If we do not have write access to the current cgroup, attempt to use this cgroup as fallback.""" CGROUP_NAME_PREFIX = "benchmark_" +BLKIO = "io" # FIXME legacy +CPUACCT = "cpu" # FIXME legacy +FREEZER = "freeze" # FIXME legacy + IO = "io" CPU = "cpu" +CPUSET = "cpuset" MEMORY = "memory" ALL_KNOWN_SUBSYSTEMS = { # cgroups for BenchExec - BLKIO, + IO, CPU, + CPUSET, MEMORY, "pids", + + FREEZER, # FIXME v1 bc } _PERMISSION_HINT_GROUPS = """ @@ -91,15 +100,17 @@ def find_my_cgroups(cgroup_procinfo=None, fallback=True): logging.debug( "Analyzing /proc/mounts and /proc/self/cgroup for determining cgroups." ) - if cgroup_paths is None: - my_cgroups = _find_own_cgroups() + if cgroup_procinfo is None: + cgroup_path = _find_own_cgroups() else: - my_cgroups = _parse_proc_pid_cgroup(cgroup_path) + cgroup_path = _parse_proc_pid_cgroup(cgroup_procinfo) if fallback: - raise "not implemented" + mount = _find_cgroup_mount() + fallback_path = mount / CGROUP_FALLBACK_PATH + cgroup_path = fallback_path - cgroup_path, controllers = my_cgroups + controllers = _supported_cgroup_controllers(cgroup_path) return Cgroup(cgroup_path, controllers) @@ -127,11 +138,20 @@ def _find_own_cgroups(): """ try: with open("/proc/self/cgroup", "rt") as ownCgroupsFile: - return _parse_proc_pid_cgroup(ownCgroupsFile): + return _parse_proc_pid_cgroup(ownCgroupsFile) except OSError: logging.exception("Cannot read /proc/self/cgroup") +def _supported_cgroup_controllers(path: pathlib.Path): + with open(path / "cgroup.controllers") as controllers_file: + controllers = controllers_file.readline().strip().split() + + controllers.append(FREEZER) # FIXME bc, always supported in v2 + + return controllers + + def _parse_proc_pid_cgroup(cgroup_file): """ Parse a /proc/*/cgroup file into tuples of (subsystem,cgroup). @@ -141,10 +161,8 @@ def _parse_proc_pid_cgroup(cgroup_file): mountpoint = _find_cgroup_mount() own_cgroup = cgroup_file.readline().strip().split(":") path = mountpoint / own_cgroup[2] - with open(path / "cgroup.controllers") as controllers_file: - controllers = controllers_file.readline().strip().split() - return (path, controllers) + return path def kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): @@ -199,12 +217,12 @@ def _register_process_with_cgrulesengd(pid): """Tell cgrulesengd daemon to not move the given process into other cgroups, if libcgroup is available. """ + # FIXME is there a libcgroup for v2? + # Logging/printing from inside preexec_fn would end up in the output file, # not in the correct logger, thus it is disabled here. from ctypes import cdll - raise 'not implemented' - try: libcgroup = cdll.LoadLibrary("libcgroup.so.1") failure = libcgroup.cgroup_init() @@ -223,7 +241,7 @@ def _register_process_with_cgrulesengd(pid): pass -class CgroupV2(object): +class Cgroup(object): def __init__(self, cgroup_path, controllers): assert set(controllers) <= ALL_KNOWN_SUBSYSTEMS # Also update self.paths on every update to this! @@ -237,7 +255,7 @@ def __contains__(self, key): return key in self.controllers def __getitem__(self, key): - raise 'not implemented' + raise Exception('not implemented') return self.per_subsystem[key] def __str__(self): @@ -338,44 +356,19 @@ def create_fresh_child_cgroup(self, *subsystems): Create child cgroups of the current cgroup for at least the given subsystems. @return: A Cgroup instance representing the new child cgroup(s). """ - assert set(subsystems).issubset(self.per_subsystem.keys()) - createdCgroupsPerSubsystem = {} - createdCgroupsPerParent = {} - for subsystem in subsystems: - parentCgroup = self.per_subsystem[subsystem] - if parentCgroup in createdCgroupsPerParent: - # reuse already created cgroup - createdCgroupsPerSubsystem[subsystem] = createdCgroupsPerParent[ - parentCgroup - ] - continue - - cgroup = tempfile.mkdtemp(prefix=CGROUP_NAME_PREFIX, dir=parentCgroup) - createdCgroupsPerSubsystem[subsystem] = cgroup - createdCgroupsPerParent[parentCgroup] = cgroup - - # add allowed cpus and memory to cgroup if necessary - # (otherwise we can't add any tasks) - def copy_parent_to_child(name): - shutil.copyfile( - os.path.join(parentCgroup, name), os.path.join(cgroup, name) - ) + # assert set(subsystems).issubset(self.per_subsystem.keys()) + cgroup_path = pathlib.Path(tempfile.mkdtemp(prefix=CGROUP_NAME_PREFIX, dir=self.path)) - try: - copy_parent_to_child("cpuset.cpus") - copy_parent_to_child("cpuset.mems") - except OSError: - # expected to fail if cpuset subsystem is not enabled in this hierarchy - pass - - return Cgroup(createdCgroupsPerSubsystem) + # FIXME do something with subsystems, also subtree_control? + return Cgroup(cgroup_path, self.controllers) def add_task(self, pid): """ Add a process to the cgroups represented by this instance. """ _register_process_with_cgrulesengd(pid) - with open(self.path / "cgroup.procs"), "w") as tasksFile: + with open(self.path / "cgroup.procs", "w") as tasksFile: + print(tasksFile) tasksFile.write(str(pid)) def get_all_tasks(self, subsystem): @@ -437,7 +430,7 @@ def get_value(self, subsystem, option): Only call this method if the given subsystem is available. """ assert subsystem in self, f"Subsystem {subsystem} is missing" - return util.read_file(self.per_subsystem[subsystem], f"{subsystem}.{option}") + return util.read_file(self.path / f"{subsystem}.{option}") def get_file_lines(self, subsystem, option): """ @@ -460,9 +453,7 @@ def get_key_value_pairs(self, subsystem, filename): Only call this method if the given subsystem is available. """ assert subsystem in self - return util.read_key_value_pairs_from_file( - self.per_subsystem[subsystem], f"{subsystem}.{filename}" - ) + return util.read_key_value_pairs_from_file(self.path / f"{subsystem}.{filename}") def set_value(self, subsystem, option, value): """ @@ -472,7 +463,7 @@ def set_value(self, subsystem, option, value): """ assert subsystem in self util.write_file( - str(value), self.per_subsystem[subsystem], f"{subsystem}.{option}" + str(value), self.path / f"{subsystem}.{option}" ) def remove(self): @@ -490,8 +481,9 @@ def read_cputime(self): Read the cputime usage of this cgroup. CPU cgroup needs to be available. @return cputime usage in seconds """ - # convert micro-seconds to seconds - return float(self.get_value(CPU, "stat")) / 1_000_000 + cpu_stats = dict(self.get_key_value_pairs(CPU, "stat")) + + return float(cpu_stats['usage_usec']) / 1_000_000 def read_allowed_memory_banks(self): """Get the list of all memory banks allowed by this cgroup.""" diff --git a/benchexec/check_cgroups.py b/benchexec/check_cgroups.py index c1d7f2556..d08f6f919 100644 --- a/benchexec/check_cgroups.py +++ b/benchexec/check_cgroups.py @@ -12,7 +12,7 @@ import tempfile import threading -from benchexec.cgroups import CPUACCT, CPUSET, FREEZER, MEMORY, find_my_cgroups +from benchexec.cgroupsv2 import CPUACCT, CPUSET, FREEZER, MEMORY, find_my_cgroups from benchexec.runexecutor import RunExecutor from benchexec import util diff --git a/benchexec/localexecution.py b/benchexec/localexecution.py index f227a84db..f43161b0d 100644 --- a/benchexec/localexecution.py +++ b/benchexec/localexecution.py @@ -14,7 +14,7 @@ import time from benchexec import BenchExecException -from benchexec import cgroups +from benchexec import cgroupsv2 as cgroups from benchexec import containerexecutor from benchexec import resources from benchexec.runexecutor import RunExecutor diff --git a/benchexec/oomhandler.py b/benchexec/oomhandler.py index edaf4a89e..86c8c733f 100644 --- a/benchexec/oomhandler.py +++ b/benchexec/oomhandler.py @@ -9,7 +9,7 @@ import os import threading -from benchexec.cgroups import MEMORY +from benchexec.cgroupsv2 import MEMORY from benchexec import util from ctypes import cdll diff --git a/benchexec/resources.py b/benchexec/resources.py index a15aabb8f..dd43fabd1 100644 --- a/benchexec/resources.py +++ b/benchexec/resources.py @@ -16,7 +16,7 @@ import os import sys -from benchexec import cgroups +from benchexec import cgroupsv2 as cgroups from benchexec import util __all__ = [ diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index bfedfc2e9..1f834b206 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -24,7 +24,7 @@ from benchexec import baseexecutor from benchexec import BenchExecException from benchexec import containerexecutor -from benchexec.cgroups import BLKIO, CPUACCT, CPUSET, FREEZER, MEMORY, find_my_cgroups +from benchexec.cgroupsv2 import BLKIO, CPUACCT, CPUSET, FREEZER, MEMORY, find_my_cgroups from benchexec.filehierarchylimit import FileHierarchyLimitThread from benchexec import intel_cpu_energy from benchexec import oomhandler @@ -377,14 +377,16 @@ def _init_cgroups(self): if CPUSET in self.cgroups: # Read available cpus/memory nodes: try: - self.cpus = util.parse_int_list(self.cgroups.get_value(CPUSET, "cpus")) + # FIXME self.cpus = util.parse_int_list(self.cgroups.get_value(CPUSET, "cpus")) + self.cpus = util.parse_int_list(self.cgroups.get_value(CPUSET, "cpus.effective")) except ValueError as e: logging.warning("Could not read available CPU cores from kernel: %s", e) logging.debug("List of available CPU cores is %s.", self.cpus) try: self.memory_nodes = util.parse_int_list( - self.cgroups.get_value(CPUSET, "mems") + # FIXME self.cgroups.get_value(CPUSET, "mems") + self.cgroups.get_value(CPUSET, "mems.effective") ) except ValueError as e: logging.warning( @@ -489,7 +491,8 @@ def _setup_cgroups(self, my_cpus, memlimit, memory_nodes, cgroup_values): # https://www.kernel.org/doc/Documentation/cgroups/memory.txt # (unlike setting the global swappiness to 0). # Our process might get killed because of this. - cgroups.set_value(MEMORY, "swappiness", "0") + # FIXME v1 cgroups.set_value(MEMORY, "swappiness", "0") + cgroups.set_value(MEMORY, "swap.max", "0") except OSError as e: logging.warning( "Could not disable swapping for benchmarked process: %s", e From c6155a1cc1e5b8b6d90908bfef1e6b0615216abf Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Thu, 26 Aug 2021 14:04:08 +0200 Subject: [PATCH 003/133] cgroups: remove debug logging --- benchexec/baseexecutor.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/benchexec/baseexecutor.py b/benchexec/baseexecutor.py index 96c0be1bc..7b3260f1c 100644 --- a/benchexec/baseexecutor.py +++ b/benchexec/baseexecutor.py @@ -110,8 +110,6 @@ def pre_subprocess(): parent_setup = parent_setup_fn() - logging.debug("Executing run with args %s.", args) - logging.debug("Executing run with env %s.", env) p = subprocess.Popen( args, stdin=stdin, From 5b23179986e4e666f7a7c41f951027654e670bea Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Thu, 26 Aug 2021 14:04:20 +0200 Subject: [PATCH 004/133] cgroups: try with user service --- benchexec/cgroupsv2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index f11c02b8c..2a63664c8 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -30,7 +30,8 @@ "MEMORY", ] -CGROUP_FALLBACK_PATH = "system.slice/benchexec-cgroup.service/benchexec_root" +# FIXME uid +CGROUP_FALLBACK_PATH = "user.slice/user-1000.slice/user@1000.service/app.slice/benchexec-cgroup.service/benchexec_root" """If we do not have write access to the current cgroup, attempt to use this cgroup as fallback.""" From 66c22cdd1b456707d97b70979ecb292a9dee3d84 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Thu, 26 Aug 2021 14:05:01 +0200 Subject: [PATCH 005/133] cgroups: fix paths --- benchexec/cgroupsv2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 2a63664c8..5c86c035f 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -201,7 +201,7 @@ def remove_cgroup(cgroup): if not os.path.exists(cgroup): logging.warning("Cannot remove CGroup %s, because it does not exist.", cgroup) return - assert os.path.getsize(os.path.join(cgroup, "tasks")) == 0 + assert os.path.getsize(cgroup / "cgroup.procs") == 0 try: os.rmdir(cgroup) except OSError: @@ -421,7 +421,7 @@ def has_value(self, subsystem, option): """ assert subsystem in self return os.path.isfile( - os.path.join(self.per_subsystem[subsystem], f"{subsystem}.{option}") + self.path / f"{subsystem}.{option}" ) def get_value(self, subsystem, option): From 9c6e3ffc5d97a17f85c5f84ea29c44cf1a15433e Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Thu, 26 Aug 2021 14:05:16 +0200 Subject: [PATCH 006/133] cgroups: remove cgrulesd code in v2 --- benchexec/cgroupsv2.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 5c86c035f..36462798b 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -214,34 +214,6 @@ def remove_cgroup(cgroup): ) -def _register_process_with_cgrulesengd(pid): - """Tell cgrulesengd daemon to not move the given process into other cgroups, - if libcgroup is available. - """ - # FIXME is there a libcgroup for v2? - - # Logging/printing from inside preexec_fn would end up in the output file, - # not in the correct logger, thus it is disabled here. - from ctypes import cdll - - try: - libcgroup = cdll.LoadLibrary("libcgroup.so.1") - failure = libcgroup.cgroup_init() - if failure: - pass - else: - CGROUP_DAEMON_UNCHANGE_CHILDREN = 0x1 - failure = libcgroup.cgroup_register_unchanged_process( - pid, CGROUP_DAEMON_UNCHANGE_CHILDREN - ) - if failure: - pass - # print(f'Could not register process to cgrulesndg, error {success}. ' - # 'Probably the daemon will mess up our cgroups.') - except OSError: - pass - - class Cgroup(object): def __init__(self, cgroup_path, controllers): assert set(controllers) <= ALL_KNOWN_SUBSYSTEMS @@ -367,9 +339,7 @@ def add_task(self, pid): """ Add a process to the cgroups represented by this instance. """ - _register_process_with_cgrulesengd(pid) with open(self.path / "cgroup.procs", "w") as tasksFile: - print(tasksFile) tasksFile.write(str(pid)) def get_all_tasks(self, subsystem): From 133d5bf6f3ded53ec480fda8ab2d3ad701381591 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Sun, 26 Sep 2021 21:49:34 +0200 Subject: [PATCH 007/133] cgroups: WIP abstraction for v1 & v2 --- benchexec/cgroups.py | 365 +++++--------------------- benchexec/cgroupsv1.py | 454 +++++++++++++++++++++++++++++++++ benchexec/cgroupsv2.py | 267 ++++++------------- benchexec/check_cgroups.py | 19 +- benchexec/container.py | 1 + benchexec/containerexecutor.py | 4 +- benchexec/localexecution.py | 4 +- benchexec/oomhandler.py | 6 +- benchexec/resources.py | 6 +- benchexec/runexecutor.py | 179 ++++++------- 10 files changed, 700 insertions(+), 605 deletions(-) create mode 100644 benchexec/cgroupsv1.py diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index eef8664d1..6fe4b2dad 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -5,6 +5,7 @@ # # SPDX-License-Identifier: Apache-2.0 +from abc import ABC, abstractmethod import errno import grp import logging @@ -16,63 +17,18 @@ import tempfile import time +from benchexec import BenchExecException from benchexec import systeminfo from benchexec import util -__all__ = [ - "find_my_cgroups", - "BLKIO", - "CPUACCT", - "CPUSET", - "FREEZER", - "MEMORY", -] - CGROUP_FALLBACK_PATH = "system.slice/benchexec-cgroup.service" """If we do not have write access to the current cgroup, attempt to use this cgroup as fallback.""" CGROUP_NAME_PREFIX = "benchmark_" -BLKIO = "blkio" -CPUACCT = "cpuacct" -CPUSET = "cpuset" -FREEZER = "freezer" -MEMORY = "memory" -ALL_KNOWN_SUBSYSTEMS = { - # cgroups for BenchExec - BLKIO, - CPUACCT, - CPUSET, - FREEZER, - MEMORY, - # other cgroups users might want - "cpu", - "devices", - "net_cls", - "net_prio", - "hugetlb", - "perf_event", - "pids", -} - -_PERMISSION_HINT_GROUPS = """ -You need to add your account to the following groups: {0} -Remember to logout and login again afterwards to make group changes effective.""" - -_PERMISSION_HINT_DEBIAN = """ -The recommended way to fix this is to install the Debian package for BenchExec and add your account to the group "benchexec": -https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#debianubuntu -Alternatively, you can install benchexec-cgroup.service manually: -https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" - -_PERMISSION_HINT_SYSTEMD = """ -The recommended way to fix this is to add your account to a group named "benchexec" and install benchexec-cgroup.service: -https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" - -_PERMISSION_HINT_OTHER = """ -Please configure your system in way to allow your user to use cgroups: -https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-without-systemd""" +CGROUPS_V1 = 1 +CGROUPS_V2 = 2 _ERROR_MSG_PERMISSIONS = """ Required cgroups are not available because of missing permissions.{0} @@ -86,189 +42,68 @@ If you are using BenchExec within a container, please make "/sys/fs/cgroup" available.""" -def find_my_cgroups(cgroup_paths=None, fallback=True): - """ - Return a Cgroup object with the cgroups of the current process. - Note that it is not guaranteed that all subsystems are available - in the returned object, as a subsystem may not be mounted. - Check with "subsystem in " before using. - A subsystem may also be present but we do not have the rights to create - child cgroups, this can be checked with require_subsystem(). - @param cgroup_paths: If given, use this instead of reading /proc/self/cgroup. - @param fallback: Whether to look for a default cgroup as fallback is our cgroup - is not accessible. - """ - logging.debug( - "Analyzing /proc/mounts and /proc/self/cgroup for determining cgroups." - ) - if cgroup_paths is None: - my_cgroups = dict(_find_own_cgroups()) - else: - my_cgroups = dict(_parse_proc_pid_cgroup(cgroup_paths)) - - cgroupsParents = {} - for subsystem, mount in _find_cgroup_mounts(): - # Ignore mount points where we do not have any access, - # e.g. because a parent directory has insufficient permissions - # (lxcfs mounts cgroups under /run/lxcfs in such a way). - if os.access(mount, os.F_OK): - cgroupPath = os.path.join(mount, my_cgroups[subsystem]) - fallbackPath = os.path.join(mount, CGROUP_FALLBACK_PATH) - if ( - fallback - and not os.access(cgroupPath, os.W_OK) - and os.path.isdir(fallbackPath) - ): - cgroupPath = fallbackPath - cgroupsParents[subsystem] = cgroupPath - - return Cgroup(cgroupsParents) - - -def _find_cgroup_mounts(): - """ - Return the information which subsystems are mounted where. - @return a generator of tuples (subsystem, mountpoint) - """ +def _get_cgroup_version(): + version = None try: - with open("/proc/mounts", "rt") as mountsFile: + with open("/proc/mounts") as mountsFile: for mount in mountsFile: mount = mount.split(" ") if mount[2] == "cgroup": - mountpoint = mount[1] - options = mount[3] - for option in options.split(","): - if option in ALL_KNOWN_SUBSYSTEMS: - yield (option, mountpoint) + version = CGROUPS_V1 + + # only set v2 if it's the only active mount + # we don't support crippled hybrid mode + elif mount[2] == "cgroup2" and version != CGROUPS_V1: + version = CGROUPS_V2 + + if version is None: + raise BenchExecException("Could not detect Cgroup Version") except OSError: logging.exception("Cannot read /proc/mounts") + return version -def _find_own_cgroups(): - """ - For all subsystems, return the information in which (sub-)cgroup this process is in. - (Each process is in exactly cgroup in each hierarchy.) - @return a generator of tuples (subsystem, cgroup) - """ - try: - with open("/proc/self/cgroup", "rt") as ownCgroupsFile: - for cgroup in _parse_proc_pid_cgroup(ownCgroupsFile): - yield cgroup - except OSError: - logging.exception("Cannot read /proc/self/cgroup") - - -def _parse_proc_pid_cgroup(content): - """ - Parse a /proc/*/cgroup file into tuples of (subsystem,cgroup). - @param content: An iterable over the lines of the file. - @return: a generator of tuples - """ - for ownCgroup in content: - # each line is "id:subsystem,subsystem:path" - ownCgroup = ownCgroup.strip().split(":") - try: - path = ownCgroup[2][1:] # remove leading / - except IndexError: - raise IndexError(f"index out of range for {ownCgroup}") - for subsystem in ownCgroup[1].split(","): - yield (subsystem, path) - - -def kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): - tasksFile = os.path.join(cgroup, "tasks") - - i = 0 - while True: - i += 1 - # TODO We can probably remove this loop over signals and just send - # SIGKILL. We added this loop when killing sub-processes was not reliable - # and we did not know why, but now it is reliable. - for sig in [signal.SIGKILL, signal.SIGINT, signal.SIGTERM]: - with open(tasksFile, "rt") as tasks: - task = None - for task in tasks: - task = task.strip() - if i > 1: - logging.warning( - "Run has left-over process with pid %s " - "in cgroup %s, sending signal %s (try %s).", - task, - cgroup, - sig, - i, - ) - util.kill_process(int(task), sig) - - if task is None or not ensure_empty: - return # No process was hanging, exit - # wait for the process to exit, this might take some time - time.sleep(i * 0.5) - - -def remove_cgroup(cgroup): - if not os.path.exists(cgroup): - logging.warning("Cannot remove CGroup %s, because it does not exist.", cgroup) - return - assert os.path.getsize(os.path.join(cgroup, "tasks")) == 0 - try: - os.rmdir(cgroup) - except OSError: - # sometimes this fails because the cgroup is still busy, we try again once - try: - os.rmdir(cgroup) - except OSError as e: - logging.warning( - "Failed to remove cgroup %s: error %s (%s)", cgroup, e.errno, e.strerror - ) - - -def _register_process_with_cgrulesengd(pid): - """Tell cgrulesengd daemon to not move the given process into other cgroups, - if libcgroup is available. - """ - # Logging/printing from inside preexec_fn would end up in the output file, - # not in the correct logger, thus it is disabled here. - from ctypes import cdll - try: - libcgroup = cdll.LoadLibrary("libcgroup.so.1") - failure = libcgroup.cgroup_init() - if failure: - pass +class Cgroups(ABC): + @staticmethod + def from_system(cgroup_procinfo=None, fallback=True): + version = _get_cgroup_version() + if version == CGROUPS_V1: + from .cgroupsv1 import CgroupsV1 + + return CgroupsV1(cgroup_procinfo=cgroup_procinfo, fallback=fallback) + elif version == CGROUPS_V2: + from .cgroupsv2 import CgroupsV2 + + return CgroupsV2(cgroup_procinfo=cgroup_procinfo, fallback=fallback) + + def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): + if subsystems is None: + self.subsystems = self._supported_cgroup_subsystems() else: - CGROUP_DAEMON_UNCHANGE_CHILDREN = 0x1 - failure = libcgroup.cgroup_register_unchanged_process( - pid, CGROUP_DAEMON_UNCHANGE_CHILDREN - ) - if failure: - pass - # print(f'Could not register process to cgrulesndg, error {success}. ' - # 'Probably the daemon will mess up our cgroups.') - except OSError: - pass + self.subsystems = subsystems + + assert set(self.subsystems.keys()) <= self.KNOWN_SUBSYSTEMS + assert all(self.subsystems.values()) + self.paths = set(self.subsystems.values()) # without duplicates -class Cgroup(object): - def __init__(self, cgroupsPerSubsystem): - assert set(cgroupsPerSubsystem.keys()) <= ALL_KNOWN_SUBSYSTEMS - assert all(cgroupsPerSubsystem.values()) - # Also update self.paths on every update to this! - self.per_subsystem = cgroupsPerSubsystem - self.paths = set(cgroupsPerSubsystem.values()) # without duplicates # for error messages: self.unusable_subsystems = set() self.denied_subsystems = {} + logging.debug("Available Cgroups: %s", self.subsystems) + def __contains__(self, key): - return key in self.per_subsystem + return key in self.subsystems def __getitem__(self, key): - return self.per_subsystem[key] + return self.subsystems[key] def __str__(self): return str(self.paths) + # FIXME improve message for v2 def require_subsystem(self, subsystem, log_method=logging.warning): """ Check whether the given subsystem is enabled and is writable @@ -288,26 +123,6 @@ def require_subsystem(self, subsystem, log_method=logging.warning): ) return False - try: - test_cgroup = self.create_fresh_child_cgroup(subsystem) - test_cgroup.remove() - except OSError as e: - log_method( - "Cannot use cgroup %s for subsystem %s, reason: %s (%s).", - self.per_subsystem[subsystem], - subsystem, - e.strerror, - e.errno, - ) - self.unusable_subsystems.add(subsystem) - if e.errno == errno.EACCES: - self.denied_subsystems[subsystem] = self.per_subsystem[subsystem] - del self.per_subsystem[subsystem] - self.paths = set(self.per_subsystem.values()) - return False - - return True - def handle_errors(self, critical_cgroups): """ If there were errors in calls to require_subsystem() and critical_cgroups @@ -405,16 +220,6 @@ def add_task(self, pid): with open(os.path.join(cgroup, "tasks"), "w") as tasksFile: tasksFile.write(str(pid)) - def get_all_tasks(self, subsystem): - """ - Return a generator of all PIDs currently in this cgroup for the given subsystem. - """ - with open( - os.path.join(self.per_subsystem[subsystem], "tasks"), "r" - ) as tasksFile: - for line in tasksFile: - yield int(line) - def kill_all_tasks(self): """ Kill all tasks in this cgroup and all its children cgroups forcefully. @@ -438,13 +243,10 @@ def kill_all_tasks_in_cgroup_recursively(cgroup, delete): # But this is only possible if we have freezer, and all processes will stay # until they are thawed (so we cannot check for cgroup emptiness and we cannot # delete subgroups). - if FREEZER in self.per_subsystem: - cgroup = self.per_subsystem[FREEZER] - freezer_file = os.path.join(cgroup, "freezer.state") - - util.write_file("FROZEN", freezer_file) + if self.version == 2 or FREEZER in self.impl.per_subsystem: + self.impl.freeze() kill_all_tasks_in_cgroup_recursively(cgroup, delete=False) - util.write_file("THAWED", freezer_file) + self.impl.unfreeze() # Second, we go through all cgroups again, kill what is left, # check for emptiness, and remove subgroups. @@ -452,63 +254,6 @@ def kill_all_tasks_in_cgroup_recursively(cgroup, delete): for cgroup in self.paths: kill_all_tasks_in_cgroup_recursively(cgroup, delete=True) - def has_value(self, subsystem, option): - """ - Check whether the given value exists in the given subsystem. - Does not make a difference whether the value is readable, writable, or both. - Do not include the subsystem name in the option name. - Only call this method if the given subsystem is available. - """ - assert subsystem in self - return os.path.isfile( - os.path.join(self.per_subsystem[subsystem], f"{subsystem}.{option}") - ) - - def get_value(self, subsystem, option): - """ - Read the given value from the given subsystem. - Do not include the subsystem name in the option name. - Only call this method if the given subsystem is available. - """ - assert subsystem in self, f"Subsystem {subsystem} is missing" - return util.read_file(self.per_subsystem[subsystem], f"{subsystem}.{option}") - - def get_file_lines(self, subsystem, option): - """ - Read the lines of the given file from the given subsystem. - Do not include the subsystem name in the option name. - Only call this method if the given subsystem is available. - """ - assert subsystem in self - with open( - os.path.join(self.per_subsystem[subsystem], f"{subsystem}.{option}") - ) as f: - for line in f: - yield line - - def get_key_value_pairs(self, subsystem, filename): - """ - Read the lines of the given file from the given subsystem - and split the lines into key-value pairs. - Do not include the subsystem name in the option name. - Only call this method if the given subsystem is available. - """ - assert subsystem in self - return util.read_key_value_pairs_from_file( - self.per_subsystem[subsystem], f"{subsystem}.{filename}" - ) - - def set_value(self, subsystem, option, value): - """ - Write the given value for the given subsystem. - Do not include the subsystem name in the option name. - Only call this method if the given subsystem is available. - """ - assert subsystem in self - util.write_file( - str(value), self.per_subsystem[subsystem], f"{subsystem}.{option}" - ) - def remove(self): """ Remove all cgroups this instance represents from the system. @@ -526,8 +271,24 @@ def read_cputime(self): @return cputime usage in seconds """ # convert nano-seconds to seconds - return float(self.get_value(CPUACCT, "usage")) / 1_000_000_000 + return self.impl.read_cputime() def read_allowed_memory_banks(self): """Get the list of all memory banks allowed by this cgroup.""" return util.parse_int_list(self.get_value(CPUSET, "mems")) + + @abstractmethod + def read_max_mem_usage(self): + pass + + @abstractmethod + def read_usage_per_cpu(self): + pass + + @abstractmethod + def read_available_cpus(self): + pass + + @abstractmethod + def read_available_mems(self): + pass diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py new file mode 100644 index 000000000..1602b2159 --- /dev/null +++ b/benchexec/cgroupsv1.py @@ -0,0 +1,454 @@ +# This file is part of BenchExec, a framework for reliable benchmarking: +# https://github.com/sosy-lab/benchexec +# +# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer +# +# SPDX-License-Identifier: Apache-2.0 + +import errno +import grp +import logging +import os +import shutil +import signal +import stat +import sys +import tempfile +import time + +from benchexec import systeminfo +from benchexec import util +from benchexec.cgroups import Cgroups + +# FIXME __all__ ? + +CGROUP_FALLBACK_PATH = "system.slice/benchexec-cgroup.service" +"""If we do not have write access to the current cgroup, +attempt to use this cgroup as fallback.""" + +CGROUP_NAME_PREFIX = "benchmark_" + + +_PERMISSION_HINT_GROUPS = """ +You need to add your account to the following groups: {0} +Remember to logout and login again afterwards to make group changes effective.""" + +_PERMISSION_HINT_DEBIAN = """ +The recommended way to fix this is to install the Debian package for BenchExec and add your account to the group "benchexec": +https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#debianubuntu +Alternatively, you can install benchexec-cgroup.service manually: +https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" + +_PERMISSION_HINT_SYSTEMD = """ +The recommended way to fix this is to add your account to a group named "benchexec" and install benchexec-cgroup.service: +https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" + +_PERMISSION_HINT_OTHER = """ +Please configure your system in way to allow your user to use cgroups: +https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-without-systemd""" + + +def _find_own_cgroups(): + """ + For all subsystems, return the information in which (sub-)cgroup this process is in. + (Each process is in exactly cgroup in each hierarchy.) + @return a generator of tuples (subsystem, cgroup) + """ + try: + with open("/proc/self/cgroup", "rt") as ownCgroupsFile: + for cgroup in _parse_proc_pid_cgroup(ownCgroupsFile): + yield cgroup + except OSError: + logging.exception("Cannot read /proc/self/cgroup") + + +def _parse_proc_pid_cgroup(content): + """ + Parse a /proc/*/cgroup file into tuples of (subsystem,cgroup). + @param content: An iterable over the lines of the file. + @return: a generator of tuples + """ + for ownCgroup in content: + # each line is "id:subsystem,subsystem:path" + ownCgroup = ownCgroup.strip().split(":") + try: + path = ownCgroup[2][1:] # remove leading / + except IndexError: + raise IndexError(f"index out of range for {ownCgroup}") + for subsystem in ownCgroup[1].split(","): + yield (subsystem, path) + + +def kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): + tasksFile = os.path.join(cgroup, "tasks") + + i = 0 + while True: + i += 1 + # TODO We can probably remove this loop over signals and just send + # SIGKILL. We added this loop when killing sub-processes was not reliable + # and we did not know why, but now it is reliable. + for sig in [signal.SIGKILL, signal.SIGINT, signal.SIGTERM]: + with open(tasksFile, "rt") as tasks: + task = None + for task in tasks: + task = task.strip() + if i > 1: + logging.warning( + "Run has left-over process with pid %s " + "in cgroup %s, sending signal %s (try %s).", + task, + cgroup, + sig, + i, + ) + util.kill_process(int(task), sig) + + if task is None or not ensure_empty: + return # No process was hanging, exit + # wait for the process to exit, this might take some time + time.sleep(i * 0.5) + + +def remove_cgroup(cgroup): + if not os.path.exists(cgroup): + logging.warning("Cannot remove CGroup %s, because it does not exist.", cgroup) + return + assert os.path.getsize(os.path.join(cgroup, "tasks")) == 0 + try: + os.rmdir(cgroup) + except OSError: + # sometimes this fails because the cgroup is still busy, we try again once + try: + os.rmdir(cgroup) + except OSError as e: + logging.warning( + "Failed to remove cgroup %s: error %s (%s)", cgroup, e.errno, e.strerror + ) + + +def _register_process_with_cgrulesengd(pid): + """Tell cgrulesengd daemon to not move the given process into other cgroups, + if libcgroup is available. + """ + # Logging/printing from inside preexec_fn would end up in the output file, + # not in the correct logger, thus it is disabled here. + from ctypes import cdll + + try: + libcgroup = cdll.LoadLibrary("libcgroup.so.1") + failure = libcgroup.cgroup_init() + if failure: + pass + else: + CGROUP_DAEMON_UNCHANGE_CHILDREN = 0x1 + failure = libcgroup.cgroup_register_unchanged_process( + pid, CGROUP_DAEMON_UNCHANGE_CHILDREN + ) + if failure: + pass + # print(f'Could not register process to cgrulesndg, error {success}. ' + # 'Probably the daemon will mess up our cgroups.') + except OSError: + pass + + +class CgroupsV1(Cgroups): + def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): + self.version = 1 + + self.IO = "blkio" + self.CPU = "cpuacct" + self.CPUSET = "cpuset" + self.FREEZE = "freezer" + self.MEMORY = "memory" + + self.KNOWN_SUBSYSTEMS = { + # cgroups for BenchExec + self.IO, + self.CPU, + self.CPUSET, + self.FREEZE, + self.MEMORY, + # other cgroups users might want + "cpu", + "devices", + "net_cls", + "net_prio", + "hugetlb", + "perf_event", + "pids", + } + + super(CgroupsV1, self).__init__(subsystems, cgroup_procinfo, fallback) + + def _supported_cgroup_subsystems(self, cgroup_procinfo=None, fallback=True): + """ + Return a Cgroup object with the cgroups of the current process. + Note that it is not guaranteed that all subsystems are available + in the returned object, as a subsystem may not be mounted. + Check with "subsystem in " before using. + A subsystem may also be present but we do not have the rights to create + child cgroups, this can be checked with require_subsystem(). + @param cgroup_procinfo: If given, use this instead of reading /proc/self/cgroup. + @param fallback: Whether to look for a default cgroup as fallback if our cgroup + is not accessible. + """ + logging.debug( + "Analyzing /proc/mounts and /proc/self/cgroup for determining cgroups." + ) + if cgroup_procinfo is None: + my_cgroups = dict(_find_own_cgroups()) + else: + my_cgroups = dict(_parse_proc_pid_cgroup(cgroup_procingo)) + + cgroupsParents = {} + for subsystem, mount in self._find_cgroup_mounts(): + # Ignore mount points where we do not have any access, + # e.g. because a parent directory has insufficient permissions + # (lxcfs mounts cgroups under /run/lxcfs in such a way). + if os.access(mount, os.F_OK): + cgroupPath = os.path.join(mount, my_cgroups[subsystem]) + fallbackPath = os.path.join(mount, CGROUP_FALLBACK_PATH) + if ( + fallback + and not os.access(cgroupPath, os.W_OK) + and os.path.isdir(fallbackPath) + ): + cgroupPath = fallbackPath + cgroupsParents[subsystem] = cgroupPath + + return cgroupsParents + + def _find_cgroup_mounts(self): + """ + Return the information which subsystems are mounted where. + @return a generator of tuples (subsystem, mountpoint) + """ + try: + with open("/proc/mounts", "rt") as mountsFile: + for mount in mountsFile: + mount = mount.split(" ") + if mount[2] == "cgroup": + mountpoint = mount[1] + options = mount[3] + for option in options.split(","): + if option in self.KNOWN_SUBSYSTEMS: + yield (option, mountpoint) + except OSError: + logging.exception("Cannot read /proc/mounts") + + def create_fresh_child_cgroup(self, *subsystems): + """ + Create child cgroups of the current cgroup for at least the given subsystems. + @return: A Cgroup instance representing the new child cgroup(s). + """ + assert set(subsystems).issubset(self.subsystems.keys()) + createdCgroupsPerSubsystem = {} + createdCgroupsPerParent = {} + for subsystem in subsystems: + parentCgroup = self.subsystems[subsystem] + if parentCgroup in createdCgroupsPerParent: + # reuse already created cgroup + createdCgroupsPerSubsystem[subsystem] = createdCgroupsPerParent[ + parentCgroup + ] + continue + + cgroup = tempfile.mkdtemp(prefix=CGROUP_NAME_PREFIX, dir=parentCgroup) + createdCgroupsPerSubsystem[subsystem] = cgroup + createdCgroupsPerParent[parentCgroup] = cgroup + + # add allowed cpus and memory to cgroup if necessary + # (otherwise we can't add any tasks) + def copy_parent_to_child(name): + shutil.copyfile( + os.path.join(parentCgroup, name), os.path.join(cgroup, name) + ) + + try: + copy_parent_to_child("cpuset.cpus") + copy_parent_to_child("cpuset.mems") + except OSError: + # expected to fail if cpuset subsystem is not enabled in this hierarchy + pass + + return CgroupsV1(createdCgroupsPerSubsystem) + + def add_task(self, pid): + """ + Add a process to the cgroups represented by this instance. + """ + _register_process_with_cgrulesengd(pid) + for cgroup in self.paths: + with open(os.path.join(cgroup, "tasks"), "w") as tasksFile: + tasksFile.write(str(pid)) + + def get_all_tasks(self, subsystem): + """ + Return a generator of all PIDs currently in this cgroup for the given subsystem. + """ + with open(os.path.join(self.subsystems[subsystem], "tasks"), "r") as tasksFile: + for line in tasksFile: + yield int(line) + + def kill_all_tasks(self): + """ + Kill all tasks in this cgroup and all its children cgroups forcefully. + Additionally, the children cgroups will be deleted. + """ + + def kill_all_tasks_in_cgroup_recursively(cgroup, delete): + for dirpath, dirs, _files in os.walk(cgroup, topdown=False): + for subCgroup in dirs: + subCgroup = os.path.join(dirpath, subCgroup) + kill_all_tasks_in_cgroup(subCgroup, ensure_empty=delete) + + if delete: + remove_cgroup(subCgroup) + + kill_all_tasks_in_cgroup(cgroup, ensure_empty=delete) + + # First, we go through all cgroups recursively while they are frozen and kill + # all processes. This helps against fork bombs and prevents processes from + # creating new subgroups while we are trying to kill everything. + # But this is only possible if we have freezer, and all processes will stay + # until they are thawed (so we cannot check for cgroup emptiness and we cannot + # delete subgroups). + if self.FREEZE in self.subsystems: + cgroup = self.subsystems[self.FREEZE] + freezer_file = os.path.join(cgroup, "freezer.state") + + util.write_file("FROZEN", freezer_file) + kill_all_tasks_in_cgroup_recursively(cgroup, delete=False) + util.write_file("THAWED", freezer_file) + + # Second, we go through all cgroups again, kill what is left, + # check for emptiness, and remove subgroups. + # Furthermore, we do this for all hierarchies, not only the one with freezer. + for cgroup in self.paths: + kill_all_tasks_in_cgroup_recursively(cgroup, delete=True) + + def has_value(self, subsystem, option): + """ + Check whether the given value exists in the given subsystem. + Does not make a difference whether the value is readable, writable, or both. + Do not include the subsystem name in the option name. + Only call this method if the given subsystem is available. + """ + assert subsystem in self + return os.path.isfile( + os.path.join(self.subsystems[subsystem], f"{subsystem}.{option}") + ) + + def get_value(self, subsystem, option): + """ + Read the given value from the given subsystem. + Do not include the subsystem name in the option name. + Only call this method if the given subsystem is available. + """ + assert subsystem in self, f"Subsystem {subsystem} is missing" + return util.read_file(self.subsystems[subsystem], f"{subsystem}.{option}") + + def get_file_lines(self, subsystem, option): + """ + Read the lines of the given file from the given subsystem. + Do not include the subsystem name in the option name. + Only call this method if the given subsystem is available. + """ + assert subsystem in self + with open( + os.path.join(self.subsystems[subsystem], f"{subsystem}.{option}") + ) as f: + for line in f: + yield line + + def get_key_value_pairs(self, subsystem, filename): + """ + Read the lines of the given file from the given subsystem + and split the lines into key-value pairs. + Do not include the subsystem name in the option name. + Only call this method if the given subsystem is available. + """ + assert subsystem in self + return util.read_key_value_pairs_from_file( + self.subsystems[subsystem], f"{subsystem}.{filename}" + ) + + def set_value(self, subsystem, option, value): + """ + Write the given value for the given subsystem. + Do not include the subsystem name in the option name. + Only call this method if the given subsystem is available. + """ + assert subsystem in self + util.write_file(str(value), self.subsystems[subsystem], f"{subsystem}.{option}") + + def remove(self): + """ + Remove all cgroups this instance represents from the system. + This instance is afterwards not usable anymore! + """ + for cgroup in self.paths: + remove_cgroup(cgroup) + + # ? + del self.paths + del self.subsystems + + def read_cputime(self): + """ + Read the cputime usage of this cgroup. CPUACCT cgroup needs to be available. + @return cputime usage in seconds + """ + # convert nano-seconds to seconds + return float(self.get_value(self.CPU, "usage")) / 1_000_000_000 + + def read_allowed_memory_banks(self): + """Get the list of all memory banks allowed by this cgroup.""" + return util.parse_int_list(self.get_value(self.CPUSET, "mems")) + + def read_max_mem_usage(self): + # This measurement reads the maximum number of bytes of RAM+Swap the process used. + # For more details, c.f. the kernel documentation: + # https://www.kernel.org/doc/Documentation/cgroups/memory.txt + memUsageFile = "memsw.max_usage_in_bytes" + if not self.has_value(self.MEMORY, memUsageFile): + memUsageFile = "max_usage_in_bytes" + if self.has_value(self.MEMORY, memUsageFile): + try: + return int(self.get_value(self.MEMORY, memUsageFile)) + except OSError as e: + if e.errno == errno.ENOTSUP: + # kernel responds with operation unsupported if this is disabled + logging.critical( + "Kernel does not track swap memory usage, cannot measure memory usage." + " Please set swapaccount=1 on your kernel command line." + ) + else: + raise e + + return None + + def read_usage_per_cpu(self): + usage = {} + for (core, coretime) in enumerate( + self.get_value(self.CPU, "usage_percpu").split(" ") + ): + try: + coretime = int(coretime) + if coretime != 0: + # convert nanoseconds to seconds + usage[core] = coretime / 1_000_000_000 + except (OSError, ValueError) as e: + logging.debug( + "Could not read CPU time for core %s from kernel: %s", core, e + ) + + return usage + + def read_available_cpus(self): + return util.parse_int_list(self.get_value(self.CPUSET, "cpus")) + + def read_available_mems(self): + return util.parse_int_list(self.get_value(self.CPUSET, "mems")) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 36462798b..7c892cfd9 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -19,16 +19,8 @@ from benchexec import systeminfo from benchexec import util +from benchexec.cgroups import Cgroups -__all__ = [ - "find_my_cgroups", - "BLKIO", - "CPU" - "CPUACCT", - "CPUSET", - "FREEZER", - "MEMORY", -] # FIXME uid CGROUP_FALLBACK_PATH = "user.slice/user-1000.slice/user@1000.service/app.slice/benchexec-cgroup.service/benchexec_root" @@ -37,24 +29,6 @@ CGROUP_NAME_PREFIX = "benchmark_" -BLKIO = "io" # FIXME legacy -CPUACCT = "cpu" # FIXME legacy -FREEZER = "freeze" # FIXME legacy - -IO = "io" -CPU = "cpu" -CPUSET = "cpuset" -MEMORY = "memory" -ALL_KNOWN_SUBSYSTEMS = { - # cgroups for BenchExec - IO, - CPU, - CPUSET, - MEMORY, - "pids", - - FREEZER, # FIXME v1 bc -} _PERMISSION_HINT_GROUPS = """ You need to add your account to the following groups: {0} @@ -74,47 +48,6 @@ Please configure your system in way to allow your user to use cgroups: https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-without-systemd""" -_ERROR_MSG_PERMISSIONS = """ -Required cgroups are not available because of missing permissions.{0} - -As a temporary workaround, you can also run -"sudo chmod o+wt {1}" -Note that this will grant permissions to more users than typically desired and it will only last until the next reboot.""" - -_ERROR_MSG_OTHER = """ -Required cgroups are not available. -If you are using BenchExec within a container, please make "/sys/fs/cgroup" available.""" - - -def find_my_cgroups(cgroup_procinfo=None, fallback=True): - """ - Return a Cgroup object with the cgroups of the current process. - Note that it is not guaranteed that all subsystems are available - in the returned object, as a subsystem may not be mounted. - Check with "subsystem in " before using. - A subsystem may also be present but we do not have the rights to create - child cgroups, this can be checked with require_subsystem(). - @param cgroup_procinfo: If given, use this instead of reading /proc/self/cgroup. - @param fallback: Whether to look for a default cgroup as fallback is our cgroup - is not accessible. - """ - logging.debug( - "Analyzing /proc/mounts and /proc/self/cgroup for determining cgroups." - ) - if cgroup_procinfo is None: - cgroup_path = _find_own_cgroups() - else: - cgroup_path = _parse_proc_pid_cgroup(cgroup_procinfo) - - if fallback: - mount = _find_cgroup_mount() - fallback_path = mount / CGROUP_FALLBACK_PATH - cgroup_path = fallback_path - - controllers = _supported_cgroup_controllers(cgroup_path) - - return Cgroup(cgroup_path, controllers) - def _find_cgroup_mount(): """ @@ -144,15 +77,6 @@ def _find_own_cgroups(): logging.exception("Cannot read /proc/self/cgroup") -def _supported_cgroup_controllers(path: pathlib.Path): - with open(path / "cgroup.controllers") as controllers_file: - controllers = controllers_file.readline().strip().split() - - controllers.append(FREEZER) # FIXME bc, always supported in v2 - - return controllers - - def _parse_proc_pid_cgroup(cgroup_file): """ Parse a /proc/*/cgroup file into tuples of (subsystem,cgroup). @@ -214,126 +138,66 @@ def remove_cgroup(cgroup): ) -class Cgroup(object): - def __init__(self, cgroup_path, controllers): - assert set(controllers) <= ALL_KNOWN_SUBSYSTEMS - # Also update self.paths on every update to this! - self.controllers = controllers - self.path = cgroup_path - # for error messages: - self.unusable_subsystems = set() - self.denied_subsystems = {} - - def __contains__(self, key): - return key in self.controllers +class CgroupsV2(Cgroups): + def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): + self.version = 2 - def __getitem__(self, key): - raise Exception('not implemented') - return self.per_subsystem[key] + self.IO = "io" + self.CPU = "cpu" + self.CPUSET = "cpuset" + self.MEMORY = "memory" + self.PID = "pids" + self.FREEZE = "freeze" - def __str__(self): - return str(self.path) + self.KNOWN_SUBSYSTEMS = { + # cgroups for BenchExec + self.IO, + self.CPU, + self.CPUSET, + self.MEMORY, + self.PID, + # not really a subsystem anymore, but implicitly supported + self.FREEZE, + } - def require_subsystem(self, subsystem, log_method=logging.warning): - """ - Check whether the given subsystem is enabled and is writable - (i.e., new cgroups can be created for it). - Produces a log message for the user if one of the conditions is not fulfilled. - If the subsystem is enabled but not writable, it will be removed from - this instance such that further checks with "in" will return "False". - @return A boolean value. - """ - if subsystem not in self: - if subsystem not in self.unusable_subsystems: - self.unusable_subsystems.add(subsystem) - log_method( - "Cgroup subsystem %s is not available. " - "Please make sure it is supported by your kernel and mounted.", - subsystem, - ) - return False - - # try: - # test_cgroup = self.create_fresh_child_cgroup(subsystem) - # test_cgroup.remove() - # except OSError as e: - # log_method( - # "Cannot use cgroup %s for subsystem %s, reason: %s (%s).", - # self.per_subsystem[subsystem], - # subsystem, - # e.strerror, - # e.errno, - # ) - # self.unusable_subsystems.add(subsystem) - # if e.errno == errno.EACCES: - # self.denied_subsystems[subsystem] = self.per_subsystem[subsystem] - # del self.per_subsystem[subsystem] - # self.paths = set(self.per_subsystem.values()) - # return False - - return True - - def handle_errors(self, critical_cgroups): - """ - If there were errors in calls to require_subsystem() and critical_cgroups - is not empty, terminate the program with an error message that explains how to - fix the problem. + super(CgroupsV2, self).__init__(subsystems, cgroup_procinfo, fallback) - @param critical_cgroups: set of unusable but required cgroups - """ - if not critical_cgroups: - return - assert critical_cgroups.issubset(self.unusable_subsystems) - - if critical_cgroups.issubset(self.denied_subsystems): - # All errors were because of permissions for these directories - paths = sorted(set(self.denied_subsystems.values())) - - # Check if all cgroups have group permissions and user could just be added - # to some groups to get access. But group 0 (root) of course does not count. - groups = {} - try: - if all(stat.S_IWGRP & os.stat(path).st_mode for path in paths): - groups = {os.stat(path).st_gid for path in paths} - except OSError: - pass - if groups and 0 not in groups: - - def get_group_name(gid): - try: - name = grp.getgrgid(gid).gr_name - except KeyError: - name = None - return util.escape_string_shell(name or str(gid)) - - groups = " ".join(sorted(set(map(get_group_name, groups)))) - permission_hint = _PERMISSION_HINT_GROUPS.format(groups) - - elif systeminfo.has_systemd(): - if systeminfo.is_debian(): - permission_hint = _PERMISSION_HINT_DEBIAN - else: - permission_hint = _PERMISSION_HINT_SYSTEMD - - else: - permission_hint = _PERMISSION_HINT_OTHER - - paths = " ".join(map(util.escape_string_shell, paths)) - sys.exit(_ERROR_MSG_PERMISSIONS.format(permission_hint, paths)) + self.path = next(iter(self.subsystems.values())) + def _supported_cgroup_subsystems(self, cgroup_procinfo=None, fallback=True): + logging.debug( + "Analyzing /proc/mounts and /proc/self/cgroup to determine cgroups." + ) + if cgroup_procinfo is None: + cgroup_path = _find_own_cgroups() else: - sys.exit(_ERROR_MSG_OTHER) # e.g., subsystem not mounted + cgroup_path = _parse_proc_pid_cgroup(cgroup_procinfo) + + if fallback: + mount = _find_cgroup_mount() + fallback_path = mount / CGROUP_FALLBACK_PATH + cgroup_path = fallback_path + + with open(cgroup_path / "cgroup.subsystems") as subsystems_file: + subsystems = subsystems_file.readline().strip().split() + + # always supported in v2 + subsystems.append(self.FREEZE) + + return {k: cgroup_path for k in subsystems} def create_fresh_child_cgroup(self, *subsystems): """ Create child cgroups of the current cgroup for at least the given subsystems. @return: A Cgroup instance representing the new child cgroup(s). """ - # assert set(subsystems).issubset(self.per_subsystem.keys()) - cgroup_path = pathlib.Path(tempfile.mkdtemp(prefix=CGROUP_NAME_PREFIX, dir=self.path)) + assert set(subsystems).issubset(self.subsystems.keys()) + cgroup_path = pathlib.Path( + tempfile.mkdtemp(prefix=CGROUP_NAME_PREFIX, dir=self.path) + ) # FIXME do something with subsystems, also subtree_control? - return Cgroup(cgroup_path, self.controllers) + return CgroupsV2({c: cgroup_path for c in self.subsystems}) def add_task(self, pid): """ @@ -390,9 +254,7 @@ def has_value(self, subsystem, option): Only call this method if the given subsystem is available. """ assert subsystem in self - return os.path.isfile( - self.path / f"{subsystem}.{option}" - ) + return os.path.isfile(self.path / f"{subsystem}.{option}") def get_value(self, subsystem, option): """ @@ -423,8 +285,11 @@ def get_key_value_pairs(self, subsystem, filename): Do not include the subsystem name in the option name. Only call this method if the given subsystem is available. """ - assert subsystem in self - return util.read_key_value_pairs_from_file(self.path / f"{subsystem}.{filename}") + # FIXME v2 has basic cpu support even if not enabled + # assert subsystem in self + return util.read_key_value_pairs_from_file( + self.path / f"{subsystem}.{filename}" + ) def set_value(self, subsystem, option, value): """ @@ -434,7 +299,7 @@ def set_value(self, subsystem, option, value): """ assert subsystem in self util.write_file( - str(value), self.path / f"{subsystem}.{option}" + str(value), self.subsystems[subsystem] / f"{subsystem}.{option}" ) def remove(self): @@ -444,18 +309,34 @@ def remove(self): """ remove_cgroup(self.path) - del self.path - del self.controllers + # FIXME why, we're not C? + del self.subsystems def read_cputime(self): """ Read the cputime usage of this cgroup. CPU cgroup needs to be available. @return cputime usage in seconds """ - cpu_stats = dict(self.get_key_value_pairs(CPU, "stat")) + cpu_stats = dict(self.get_key_value_pairs(self.CPU, "stat")) - return float(cpu_stats['usage_usec']) / 1_000_000 + return float(cpu_stats["usage_usec"]) / 1_000_000 def read_allowed_memory_banks(self): """Get the list of all memory banks allowed by this cgroup.""" return util.parse_int_list(self.get_value(CPUSET, "mems")) + + def read_max_mem_usage(self): + logging.debug("Memory-usage not supported in cgroups v2") + + return None + + def read_usage_per_cpu(self): + logging.debug("Usage per CPU not supported in cgroups v2") + + return {} + + def read_available_cpus(self): + return util.parse_int_list(self.get_value(self.CPUSET, "cpus.effective")) + + def read_available_mems(self): + return util.parse_int_list(self.get_value(self.CPUSET, "mems.effective")) diff --git a/benchexec/check_cgroups.py b/benchexec/check_cgroups.py index d08f6f919..1dd4dca95 100644 --- a/benchexec/check_cgroups.py +++ b/benchexec/check_cgroups.py @@ -12,7 +12,7 @@ import tempfile import threading -from benchexec.cgroupsv2 import CPUACCT, CPUSET, FREEZER, MEMORY, find_my_cgroups +from benchexec.cgroups import Cgroups from benchexec.runexecutor import RunExecutor from benchexec import util @@ -34,10 +34,10 @@ def check_cgroup_availability(wait=1): my_cgroups = runexecutor.cgroups if not ( - CPUACCT in my_cgroups - and CPUSET in my_cgroups + my_cgroups.CPU in my_cgroups + and my_cgroups.CPUSET in my_cgroups # and FREEZER in my_cgroups # For now, we do not require freezer - and MEMORY in my_cgroups + and my_cgroups.MEMORY in my_cgroups ): sys.exit(1) @@ -47,7 +47,7 @@ def check_cgroup_availability(wait=1): tmp.name, memlimit=1024 * 1024, # set memlimit to force check for swapaccount # set cores and memory_nodes to force usage of CPUSET - cores=util.parse_int_list(my_cgroups.get_value(CPUSET, "cpus")), + cores=util.parse_int_list(my_cgroups.get_value(my_cgroups.CPUSET, "cpus")), memory_nodes=my_cgroups.read_allowed_memory_banks(), ) lines = [] @@ -59,10 +59,15 @@ def check_cgroup_availability(wait=1): and not all(c == "-" for c in line) ): lines.append(line) - task_cgroups = find_my_cgroups(lines, fallback=False) + task_cgroups = Cgroups.from_system(cgroup_procinfo=lines, fallback=False) fail = False - for subsystem in CPUACCT, CPUSET, MEMORY, FREEZER: + for subsystem in ( + my_cgroups.CPU, + my_cgroups.CPUSET, + my_cgroups.MEMORY, + my_cgroups.FREEZE, + ): if subsystem in my_cgroups: if not task_cgroups[subsystem].startswith( os.path.join(my_cgroups[subsystem], "benchmark_") diff --git a/benchexec/container.py b/benchexec/container.py index 90993bb7e..f3a573f99 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -591,6 +591,7 @@ def determine_directory_mode(dir_modes, path, fstype=None): or fstype == b"autofs" or fstype == b"vfat" or fstype == b"ntfs" + or fstype == b"zfs" ) ): # Overlayfs does not support these as underlying file systems. diff --git a/benchexec/containerexecutor.py b/benchexec/containerexecutor.py index 21b60d934..26fe98136 100644 --- a/benchexec/containerexecutor.py +++ b/benchexec/containerexecutor.py @@ -22,7 +22,7 @@ from benchexec import __version__ from benchexec import baseexecutor from benchexec import BenchExecException -from benchexec.cgroups import Cgroup +from benchexec.cgroups import Cgroups from benchexec import container from benchexec import libc from benchexec import util @@ -439,7 +439,7 @@ def execute_run( root_dir=rootDir, cwd=workingDir, temp_dir=temp_dir, - cgroups=Cgroup({}), + cgroups=Cgroups(), output_dir=output_dir, result_files_patterns=result_files_patterns, child_setup_fn=util.dummy_fn, diff --git a/benchexec/localexecution.py b/benchexec/localexecution.py index f43161b0d..5bf0cb174 100644 --- a/benchexec/localexecution.py +++ b/benchexec/localexecution.py @@ -14,7 +14,7 @@ import time from benchexec import BenchExecException -from benchexec import cgroupsv2 as cgroups +from benchexec.cgroups import Cgroups from benchexec import containerexecutor from benchexec import resources from benchexec.runexecutor import RunExecutor @@ -69,7 +69,7 @@ def execute_benchmark(benchmark, output_handler): "only resource limits are used." ) - my_cgroups = cgroups.find_my_cgroups() + my_cgroups = Cgroups.from_system() coreAssignment = None # cores per run memoryAssignment = None # memory banks per run diff --git a/benchexec/oomhandler.py b/benchexec/oomhandler.py index 86c8c733f..211da026e 100644 --- a/benchexec/oomhandler.py +++ b/benchexec/oomhandler.py @@ -9,7 +9,7 @@ import os import threading -from benchexec.cgroupsv2 import MEMORY +from benchexec.cgroups import Cgroups from benchexec import util from ctypes import cdll @@ -53,7 +53,8 @@ def __init__(self, cgroups, pid_to_kill, callbackFn=lambda reason: None): self._cgroups = cgroups self._callback = callbackFn - cgroup = cgroups[MEMORY] # for raw access + # FIXME + cgroup = cgroups[cgroups.MEMORY] # for raw access ofd = os.open(os.path.join(cgroup, "memory.oom_control"), os.O_WRONLY) try: # Important to use CLOEXEC, otherwise the benchmarked tool inherits @@ -103,6 +104,7 @@ def run(self): ) util.kill_process(self._pid_to_kill) # Also kill all children of subprocesses directly. + # FIXME with open(os.path.join(self._cgroups[MEMORY], "tasks"), "rt") as tasks: for task in tasks: util.kill_process(int(task)) diff --git a/benchexec/resources.py b/benchexec/resources.py index dd43fabd1..ccce47aad 100644 --- a/benchexec/resources.py +++ b/benchexec/resources.py @@ -16,7 +16,7 @@ import os import sys -from benchexec import cgroupsv2 as cgroups +from benchexec import cgroups from benchexec import util __all__ = [ @@ -360,8 +360,8 @@ def _get_memory_banks_listed_in_dir(path): def check_memory_size(memLimit, num_of_threads, memoryAssignment, my_cgroups): """Check whether the desired amount of parallel benchmarks fits in the memory. - Implemented are checks for memory limits via cgroup controller "memory" and - memory bank restrictions via cgroup controller "cpuset", + Implemented are checks for memory limits via cgroup subsystem "memory" and + memory bank restrictions via cgroup subsystem "cpuset", as well as whether the system actually has enough memory installed. @param memLimit: the memory limit in bytes per run @param num_of_threads: the number of parallel benchmark executions diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index 1f834b206..6f4c495f1 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -24,7 +24,7 @@ from benchexec import baseexecutor from benchexec import BenchExecException from benchexec import containerexecutor -from benchexec.cgroupsv2 import BLKIO, CPUACCT, CPUSET, FREEZER, MEMORY, find_my_cgroups +from benchexec.cgroups import Cgroups from benchexec.filehierarchylimit import FileHierarchyLimitThread from benchexec import intel_cpu_energy from benchexec import oomhandler @@ -328,7 +328,7 @@ def _init_cgroups(self): """ This function initializes the cgroups for the limitations and measurements. """ - self.cgroups = find_my_cgroups() + self.cgroups = Cgroups.from_system() critical_cgroups = set() for subsystem in self._cgroup_subsystems: @@ -341,28 +341,30 @@ def _init_cgroups(self): ) # Feature is still experimental, do not warn loudly - self.cgroups.require_subsystem(BLKIO, log_method=logging.debug) - if BLKIO not in self.cgroups: + self.cgroups.require_subsystem(self.cgroups.IO, log_method=logging.debug) + if self.cgroups.IO not in self.cgroups: logging.debug("Cannot measure I/O without blkio cgroup.") - self.cgroups.require_subsystem(CPUACCT) - if CPUACCT not in self.cgroups: + self.cgroups.require_subsystem(self.cgroups.CPU) + if self.cgroups.CPU not in self.cgroups and self.cgroups.version == 1: logging.warning("Cannot measure CPU time without cpuacct cgroup.") - self.cgroups.require_subsystem(FREEZER) - if FREEZER not in self.cgroups and not self._use_namespaces: - critical_cgroups.add(FREEZER) + self.cgroups.require_subsystem(self.cgroups.FREEZE) + if self.cgroups.FREEZE not in self.cgroups and not self._use_namespaces: + critical_cgroups.add(self.cgroups.FREEZE) logging.error( "Cannot reliably kill sub-processes without freezer cgroup " "or container mode. Please enable at least one of them." ) - self.cgroups.require_subsystem(MEMORY) - if MEMORY not in self.cgroups: + self.cgroups.require_subsystem(self.cgroups.MEMORY) + if self.cgroups.MEMORY not in self.cgroups: logging.warning("Cannot measure memory consumption without memory cgroup.") else: if systeminfo.has_swap() and ( - not self.cgroups.has_value(MEMORY, "memsw.max_usage_in_bytes") + not self.cgroups.has_value( + self.cgroups.MEMORY, "memsw.max_usage_in_bytes" + ) ): logging.warning( "Kernel misses feature for accounting swap memory, but machine has swap. " @@ -371,23 +373,19 @@ def _init_cgroups(self): '"sudo swapoff -a".' ) - self.cgroups.require_subsystem(CPUSET) + self.cgroups.require_subsystem(self.cgroups.CPUSET) self.cpus = None # to indicate that we cannot limit cores self.memory_nodes = None # to indicate that we cannot limit cores - if CPUSET in self.cgroups: + if self.cgroups.CPUSET in self.cgroups: # Read available cpus/memory nodes: try: - # FIXME self.cpus = util.parse_int_list(self.cgroups.get_value(CPUSET, "cpus")) - self.cpus = util.parse_int_list(self.cgroups.get_value(CPUSET, "cpus.effective")) + self.cpus = self.cgroups.read_available_cpus() except ValueError as e: logging.warning("Could not read available CPU cores from kernel: %s", e) logging.debug("List of available CPU cores is %s.", self.cpus) try: - self.memory_nodes = util.parse_int_list( - # FIXME self.cgroups.get_value(CPUSET, "mems") - self.cgroups.get_value(CPUSET, "mems.effective") - ) + self.memory_nodes = self.cgroups.read_available_cpus() except ValueError as e: logging.warning( "Could not read available memory nodes from kernel: %s", str(e) @@ -417,9 +415,14 @@ def _setup_cgroups(self, my_cpus, memlimit, memory_nodes, cgroup_values): logging.debug("Setting up cgroups for run.") # Setup cgroups, need a single call to create_cgroup() for all subsystems - subsystems = [BLKIO, CPUACCT, FREEZER, MEMORY] + self._cgroup_subsystems + subsystems = [ + self.cgroups.IO, + self.cgroups.CPU, + self.cgroups.FREEZE, + self.cgroups.MEMORY, + ] + self._cgroup_subsystems if my_cpus is not None or memory_nodes is not None: - subsystems.append(CPUSET) + subsystems.append(self.cgroups.CPUSET) subsystems = [s for s in subsystems if s in self.cgroups] cgroups = self.cgroups.create_fresh_child_cgroup(*subsystems) @@ -447,33 +450,35 @@ def _setup_cgroups(self, my_cpus, memlimit, memory_nodes, cgroup_values): # Setup cpuset cgroup if necessary to limit the CPU cores/memory nodes to be used. if my_cpus is not None: my_cpus_str = ",".join(map(str, my_cpus)) - cgroups.set_value(CPUSET, "cpus", my_cpus_str) - my_cpus_str = cgroups.get_value(CPUSET, "cpus") + cgroups.set_value(self.cgroups.CPUSET, "cpus", my_cpus_str) + my_cpus_str = cgroups.get_value(self.cgroups.CPUSET, "cpus") logging.debug("Using cpu cores [%s].", my_cpus_str) if memory_nodes is not None: - cgroups.set_value(CPUSET, "mems", ",".join(map(str, memory_nodes))) - memory_nodesStr = cgroups.get_value(CPUSET, "mems") + cgroups.set_value( + self.cgroups.CPUSET, "mems", ",".join(map(str, memory_nodes)) + ) + memory_nodesStr = cgroups.get_value(self.cgroups.CPUSET, "mems") logging.debug("Using memory nodes [%s].", memory_nodesStr) # Setup memory limit if memlimit is not None: limit = "limit_in_bytes" - cgroups.set_value(MEMORY, limit, memlimit) + cgroups.set_value(cgroups.MEMORY, limit, memlimit) swap_limit = "memsw.limit_in_bytes" # We need swap limit because otherwise the kernel just starts swapping # out our process if the limit is reached. # Some kernels might not have this feature, # which is ok if there is actually no swap. - if not cgroups.has_value(MEMORY, swap_limit): + if not cgroups.has_value(cgroups.MEMORY, swap_limit): if systeminfo.has_swap(): sys.exit( 'Kernel misses feature for accounting swap memory, but machine has swap. Please set swapaccount=1 on your kernel command line or disable swap with "sudo swapoff -a".' ) else: try: - cgroups.set_value(MEMORY, swap_limit, memlimit) + cgroups.set_value(cgroups.MEMORY, swap_limit, memlimit) except OSError as e: if e.errno == errno.ENOTSUP: # kernel responds with operation unsupported if this is disabled @@ -482,17 +487,17 @@ def _setup_cgroups(self, my_cpus, memlimit, memory_nodes, cgroup_values): ) raise e - memlimit = cgroups.get_value(MEMORY, limit) + memlimit = cgroups.get_value(cgroups.MEMORY, limit) logging.debug("Effective memory limit is %s bytes.", memlimit) - if MEMORY in cgroups: + if cgroups.MEMORY in cgroups: try: # Note that this disables swapping completely according to # https://www.kernel.org/doc/Documentation/cgroups/memory.txt # (unlike setting the global swappiness to 0). # Our process might get killed because of this. # FIXME v1 cgroups.set_value(MEMORY, "swappiness", "0") - cgroups.set_value(MEMORY, "swap.max", "0") + cgroups.set_value(cgroups.MEMORY, "swap.max", "0") except OSError as e: logging.warning( "Could not disable swapping for benchmarked process: %s", e @@ -578,7 +583,9 @@ def _setup_cgroup_memory_limit(self, memlimit, cgroups, pid_to_kill): """Start memory-limit handler. @return None or the memory-limit handler for calling cancel() """ - if memlimit is not None: + # FIXME + if False: + # if memlimit is not None: try: oomThread = oomhandler.KillProcessOnOomThread( cgroups=cgroups, @@ -675,19 +682,19 @@ def execute_run( if hardtimelimit is not None: if hardtimelimit <= 0: sys.exit(f"Invalid time limit {hardtimelimit}.") - if CPUACCT not in self.cgroups: + if self.cgroups.CPU not in self.cgroups: logging.error("Time limit cannot be specified without cpuacct cgroup.") - critical_cgroups.add(CPUACCT) + critical_cgroups.add(self.cgroups.CPU) if softtimelimit is not None: if softtimelimit <= 0: sys.exit(f"Invalid soft time limit {softtimelimit}.") if hardtimelimit and (softtimelimit > hardtimelimit): sys.exit("Soft time limit cannot be larger than the hard time limit.") - if CPUACCT not in self.cgroups: + if self.cgroups.CPU not in self.cgroups: logging.error( "Soft time limit cannot be specified without cpuacct cgroup." ) - critical_cgroups.add(CPUACCT) + critical_cgroups.add(self.cgroups.CPU) if walltimelimit is None: if hardtimelimit is not None: @@ -701,7 +708,7 @@ def execute_run( if cores is not None: if self.cpus is None: logging.error("Cannot limit CPU cores without cpuset cgroup.") - critical_cgroups.add(CPUSET) + critical_cgroups.add(self.cgroups.CPUSET) elif not cores: sys.exit("Cannot execute run without any CPU core.") elif not set(cores).issubset(self.cpus): @@ -711,16 +718,16 @@ def execute_run( if memlimit is not None: if memlimit <= 0: sys.exit(f"Invalid memory limit {memlimit}.") - if MEMORY not in self.cgroups: + if self.cgroups.MEMORY not in self.cgroups: logging.error( "Memory limit specified, but cannot be implemented without cgroup support." ) - critical_cgroups.add(MEMORY) + critical_cgroups.add(self.cgroups.MEMORY) if memory_nodes is not None: if self.memory_nodes is None: logging.error("Cannot restrict memory nodes without cpuset cgroup.") - critical_cgroups.add(CPUSET) + critical_cgroups.add(self.cgroups.CPUSET) elif len(memory_nodes) == 0: sys.exit("Cannot execute run without any memory node.") elif not set(memory_nodes).issubset(self.memory_nodes): @@ -865,7 +872,7 @@ def postParent(preParent_result, exit_code, base_path): # process existed, and killing via cgroups prevents this. # But if we do not have freezer, it is safer to just let all processes run # until the container is killed. - if FREEZER in cgroups: + if cgroups.FREEZE in cgroups: cgroups.kill_all_tasks() # For a similar reason, we cancel all limits. Otherwise a run could have @@ -1031,7 +1038,9 @@ def _get_cgroup_measurements(self, cgroups, ru_child, result): cputime_wait = ru_child.ru_utime + ru_child.ru_stime if ru_child else 0 cputime_cgroups = None - if CPUACCT in cgroups: + + # FIXME v2 + if cgroups.CPU in cgroups: # We want to read the value from the cgroup. # The documentation warns about outdated values. # So we read twice with 0.1s time difference, @@ -1067,57 +1076,38 @@ def _get_cgroup_measurements(self, cgroups, ru_child, result): else: result["cputime"] = cputime_cgroups - for (core, coretime) in enumerate( - cgroups.get_value(CPUACCT, "usage_percpu").split(" ") - ): - try: - coretime = int(coretime) - if coretime != 0: - # convert nanoseconds to seconds - result[f"cputime-cpu{core}"] = coretime / 1_000_000_000 - except (OSError, ValueError) as e: - logging.debug( - "Could not read CPU time for core %s from kernel: %s", core, e - ) + for core, coretime in enumerate(cgroups.read_usage_per_cpu()): + result[f"cputime-cpu{core}"] = coretime - if MEMORY in cgroups: - # This measurement reads the maximum number of bytes of RAM+Swap the process used. - # For more details, c.f. the kernel documentation: - # https://www.kernel.org/doc/Documentation/cgroups/memory.txt - memUsageFile = "memsw.max_usage_in_bytes" - if not cgroups.has_value(MEMORY, memUsageFile): - memUsageFile = "max_usage_in_bytes" - if not cgroups.has_value(MEMORY, memUsageFile): - logging.warning("Memory-usage is not available due to missing files.") + if cgroups.MEMORY in cgroups: + max_mem_usage = cgroups.read_max_mem_usage() + if max_mem_usage is None: + logging.warning( + "Memory-usage is not available for cgroups v2 or due to missing files." + ) else: - try: - result["memory"] = int(cgroups.get_value(MEMORY, memUsageFile)) - except OSError as e: - if e.errno == errno.ENOTSUP: - # kernel responds with operation unsupported if this is disabled - logging.critical( - "Kernel does not track swap memory usage, cannot measure memory usage." - " Please set swapaccount=1 on your kernel command line." - ) - else: - raise e - - if BLKIO in cgroups: - blkio_bytes_file = "throttle.io_service_bytes" - if cgroups.has_value(BLKIO, blkio_bytes_file): - bytes_read = 0 - bytes_written = 0 - for blkio_line in cgroups.get_file_lines(BLKIO, blkio_bytes_file): - try: - dev_no, io_type, bytes_amount = blkio_line.split(" ") - if io_type == "Read": - bytes_read += int(bytes_amount) - elif io_type == "Write": - bytes_written += int(bytes_amount) - except ValueError: - pass # There are irrelevant lines in this file with a different structure - result["blkio-read"] = bytes_read - result["blkio-write"] = bytes_written + result["memory"] = max_mem_usage + + # FIXME empty in v2 because of ...? + # if cgroups.IO in cgroups: + # # blkio_bytes_file = "throttle.io_service_bytes" + # # if cgroups.has_value(BLKIO, blkio_bytes_file): + # if cgroups.has_value(cgroups.IO, 'stat'): + # bytes_read = 0 + # bytes_written = 0 + # print(cgroups.get_value(cgroups.IO, 'stat')) + # print(dict(cgroups.get_key_value_pairs(cgroups.IO, 'stat'))) + # #for blkio_line in cgroups.get_file_lines(BLKIO, blkio_bytes_file): + # # try: + # # dev_no, io_type, bytes_amount = blkio_line.split(" ") + # # if io_type == "Read": + # # bytes_read += int(bytes_amount) + # # elif io_type == "Write": + # # bytes_written += int(bytes_amount) + # # except ValueError: + # # pass # There are irrelevant lines in this file with a different structure + # result["blkio-read"] = bytes_read + # result["blkio-write"] = bytes_written logging.debug( "Resource usage of run: walltime=%s, cputime=%s, cgroup-cputime=%s, memory=%s", @@ -1237,7 +1227,7 @@ def __init__( self.finished = threading.Event() if hardtimelimit or softtimelimit: - assert CPUACCT in cgroups + assert cgroups.CPU in cgroups assert walltimelimit is not None if cores: @@ -1266,7 +1256,8 @@ def read_cputime(self): def run(self): while not self.finished.is_set(): - usedCpuTime = self.read_cputime() if CPUACCT in self.cgroups else 0 + # FIXME v2 + usedCpuTime = self.read_cputime() if self.cgroups.CPU in self.cgroups else 0 remainingCpuTime = self.timelimit - usedCpuTime remainingSoftCpuTime = self.softtimelimit - usedCpuTime remainingWallTime = self.latestKillTime - time.monotonic() From 3182bca6abe9b759c8adeb9407be42f511fc6ca5 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Mon, 27 Sep 2021 01:06:04 +0200 Subject: [PATCH 008/133] cgroups: fix permission hints --- benchexec/cgroups.py | 18 ++++++++++++++++++ benchexec/cgroupsv1.py | 19 ------------------- benchexec/cgroupsv2.py | 19 ------------------- 3 files changed, 18 insertions(+), 38 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 6fe4b2dad..9d7e87772 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -30,6 +30,24 @@ CGROUPS_V1 = 1 CGROUPS_V2 = 2 +_PERMISSION_HINT_GROUPS = """ +You need to add your account to the following groups: {0} +Remember to logout and login again afterwards to make group changes effective.""" + +_PERMISSION_HINT_DEBIAN = """ +The recommended way to fix this is to install the Debian package for BenchExec and add your account to the group "benchexec": +https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#debianubuntu +Alternatively, you can install benchexec-cgroup.service manually: +https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" + +_PERMISSION_HINT_SYSTEMD = """ +The recommended way to fix this is to add your account to a group named "benchexec" and install benchexec-cgroup.service: +https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" + +_PERMISSION_HINT_OTHER = """ +Please configure your system in way to allow your user to use cgroups: +https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-without-systemd""" + _ERROR_MSG_PERMISSIONS = """ Required cgroups are not available because of missing permissions.{0} diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 1602b2159..3f2ab98ff 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -29,25 +29,6 @@ CGROUP_NAME_PREFIX = "benchmark_" -_PERMISSION_HINT_GROUPS = """ -You need to add your account to the following groups: {0} -Remember to logout and login again afterwards to make group changes effective.""" - -_PERMISSION_HINT_DEBIAN = """ -The recommended way to fix this is to install the Debian package for BenchExec and add your account to the group "benchexec": -https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#debianubuntu -Alternatively, you can install benchexec-cgroup.service manually: -https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" - -_PERMISSION_HINT_SYSTEMD = """ -The recommended way to fix this is to add your account to a group named "benchexec" and install benchexec-cgroup.service: -https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" - -_PERMISSION_HINT_OTHER = """ -Please configure your system in way to allow your user to use cgroups: -https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-without-systemd""" - - def _find_own_cgroups(): """ For all subsystems, return the information in which (sub-)cgroup this process is in. diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 7c892cfd9..c560b38e8 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -30,25 +30,6 @@ CGROUP_NAME_PREFIX = "benchmark_" -_PERMISSION_HINT_GROUPS = """ -You need to add your account to the following groups: {0} -Remember to logout and login again afterwards to make group changes effective.""" - -_PERMISSION_HINT_DEBIAN = """ -The recommended way to fix this is to install the Debian package for BenchExec and add your account to the group "benchexec": -https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#debianubuntu -Alternatively, you can install benchexec-cgroup.service manually: -https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" - -_PERMISSION_HINT_SYSTEMD = """ -The recommended way to fix this is to add your account to a group named "benchexec" and install benchexec-cgroup.service: -https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" - -_PERMISSION_HINT_OTHER = """ -Please configure your system in way to allow your user to use cgroups: -https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-without-systemd""" - - def _find_cgroup_mount(): """ Return the mountpoint of the cgroupv2 unified hierarchy. From b4fe3dce8eb962732c6c0623c7ffb56633302a3f Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Mon, 27 Sep 2021 13:03:29 +0200 Subject: [PATCH 009/133] cgroups: WIP cleanup & fixes --- benchexec/cgroups.py | 175 ++++++++++++++++-------------------- benchexec/cgroupsv1.py | 96 +------------------- benchexec/cgroupsv2.py | 95 +------------------- benchexec/check_cgroups.py | 2 +- benchexec/localexecution.py | 4 +- benchexec/resources.py | 4 +- 6 files changed, 84 insertions(+), 292 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 9d7e87772..59474ab99 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -6,16 +6,11 @@ # SPDX-License-Identifier: Apache-2.0 from abc import ABC, abstractmethod -import errno import grp import logging import os -import shutil -import signal import stat import sys -import tempfile -import time from benchexec import BenchExecException from benchexec import systeminfo @@ -121,6 +116,80 @@ def __getitem__(self, key): def __str__(self): return str(self.paths) + def _remove_cgroup(self, path): + if not os.path.exists(path): + logging.warning("Cannot remove CGroup %s, because it does not exist.", path) + return + assert os.path.getsize(os.path.join(path, "tasks")) == 0 + try: + os.rmdir(path) + except OSError: + # sometimes this fails because the cgroup is still busy, we try again once + try: + os.rmdir(path) + except OSError as e: + logging.warning( + "Failed to remove cgroup %s: error %s (%s)", + path, + e.errno, + e.strerror, + ) + + def has_value(self, subsystem, option): + """ + Check whether the given value exists in the given subsystem. + Does not make a difference whether the value is readable, writable, or both. + Do not include the subsystem name in the option name. + Only call this method if the given subsystem is available. + """ + assert subsystem in self + return os.path.isfile(self.subsystems[subsystem] / f"{subsystem}.{option}") + + def get_value(self, subsystem, option): + """ + Read the given value from the given subsystem. + Do not include the subsystem name in the option name. + Only call this method if the given subsystem is available. + """ + assert subsystem in self, f"Subsystem {subsystem} is missing" + return util.read_file(self.subsystems[subsystem] / f"{subsystem}.{option}") + + def get_file_lines(self, subsystem, option): + """ + Read the lines of the given file from the given subsystem. + Do not include the subsystem name in the option name. + Only call this method if the given subsystem is available. + """ + assert subsystem in self + with open( + os.path.join(self.subsystems[subsystem], f"{subsystem}.{option}") + ) as f: + for line in f: + yield line + + def get_key_value_pairs(self, subsystem, filename): + """ + Read the lines of the given file from the given subsystem + and split the lines into key-value pairs. + Do not include the subsystem name in the option name. + Only call this method if the given subsystem is available. + """ + # assert subsystem in self + return util.read_key_value_pairs_from_file( + self.subsystems[subsystem] / f"{subsystem}.{filename}" + ) + + def set_value(self, subsystem, option, value): + """ + Write the given value for the given subsystem. + Do not include the subsystem name in the option name. + Only call this method if the given subsystem is available. + """ + assert subsystem in self + util.write_file( + str(value), self.subsystems[subsystem] / f"{subsystem}.{option}" + ) + # FIXME improve message for v2 def require_subsystem(self, subsystem, log_method=logging.warning): """ @@ -192,108 +261,16 @@ def get_group_name(gid): else: sys.exit(_ERROR_MSG_OTHER) # e.g., subsystem not mounted - def create_fresh_child_cgroup(self, *subsystems): - """ - Create child cgroups of the current cgroup for at least the given subsystems. - @return: A Cgroup instance representing the new child cgroup(s). - """ - assert set(subsystems).issubset(self.per_subsystem.keys()) - createdCgroupsPerSubsystem = {} - createdCgroupsPerParent = {} - for subsystem in subsystems: - parentCgroup = self.per_subsystem[subsystem] - if parentCgroup in createdCgroupsPerParent: - # reuse already created cgroup - createdCgroupsPerSubsystem[subsystem] = createdCgroupsPerParent[ - parentCgroup - ] - continue - - cgroup = tempfile.mkdtemp(prefix=CGROUP_NAME_PREFIX, dir=parentCgroup) - createdCgroupsPerSubsystem[subsystem] = cgroup - createdCgroupsPerParent[parentCgroup] = cgroup - - # add allowed cpus and memory to cgroup if necessary - # (otherwise we can't add any tasks) - def copy_parent_to_child(name): - shutil.copyfile( - os.path.join(parentCgroup, name), os.path.join(cgroup, name) - ) - - try: - copy_parent_to_child("cpuset.cpus") - copy_parent_to_child("cpuset.mems") - except OSError: - # expected to fail if cpuset subsystem is not enabled in this hierarchy - pass - - return Cgroup(createdCgroupsPerSubsystem) - - def add_task(self, pid): - """ - Add a process to the cgroups represented by this instance. - """ - _register_process_with_cgrulesengd(pid) - for cgroup in self.paths: - with open(os.path.join(cgroup, "tasks"), "w") as tasksFile: - tasksFile.write(str(pid)) - - def kill_all_tasks(self): - """ - Kill all tasks in this cgroup and all its children cgroups forcefully. - Additionally, the children cgroups will be deleted. - """ - - def kill_all_tasks_in_cgroup_recursively(cgroup, delete): - for dirpath, dirs, _files in os.walk(cgroup, topdown=False): - for subCgroup in dirs: - subCgroup = os.path.join(dirpath, subCgroup) - kill_all_tasks_in_cgroup(subCgroup, ensure_empty=delete) - - if delete: - remove_cgroup(subCgroup) - - kill_all_tasks_in_cgroup(cgroup, ensure_empty=delete) - - # First, we go through all cgroups recursively while they are frozen and kill - # all processes. This helps against fork bombs and prevents processes from - # creating new subgroups while we are trying to kill everything. - # But this is only possible if we have freezer, and all processes will stay - # until they are thawed (so we cannot check for cgroup emptiness and we cannot - # delete subgroups). - if self.version == 2 or FREEZER in self.impl.per_subsystem: - self.impl.freeze() - kill_all_tasks_in_cgroup_recursively(cgroup, delete=False) - self.impl.unfreeze() - - # Second, we go through all cgroups again, kill what is left, - # check for emptiness, and remove subgroups. - # Furthermore, we do this for all hierarchies, not only the one with freezer. - for cgroup in self.paths: - kill_all_tasks_in_cgroup_recursively(cgroup, delete=True) - def remove(self): """ Remove all cgroups this instance represents from the system. This instance is afterwards not usable anymore! """ for cgroup in self.paths: - remove_cgroup(cgroup) + self._remove_cgroup(cgroup) del self.paths - del self.per_subsystem - - def read_cputime(self): - """ - Read the cputime usage of this cgroup. CPUACCT cgroup needs to be available. - @return cputime usage in seconds - """ - # convert nano-seconds to seconds - return self.impl.read_cputime() - - def read_allowed_memory_banks(self): - """Get the list of all memory banks allowed by this cgroup.""" - return util.parse_int_list(self.get_value(CPUSET, "mems")) + del self.subsystems @abstractmethod def read_max_mem_usage(self): diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 3f2ab98ff..e99230498 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -6,17 +6,13 @@ # SPDX-License-Identifier: Apache-2.0 import errno -import grp import logging import os import shutil import signal -import stat -import sys import tempfile import time -from benchexec import systeminfo from benchexec import util from benchexec.cgroups import Cgroups @@ -91,23 +87,6 @@ def kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): time.sleep(i * 0.5) -def remove_cgroup(cgroup): - if not os.path.exists(cgroup): - logging.warning("Cannot remove CGroup %s, because it does not exist.", cgroup) - return - assert os.path.getsize(os.path.join(cgroup, "tasks")) == 0 - try: - os.rmdir(cgroup) - except OSError: - # sometimes this fails because the cgroup is still busy, we try again once - try: - os.rmdir(cgroup) - except OSError as e: - logging.warning( - "Failed to remove cgroup %s: error %s (%s)", cgroup, e.errno, e.strerror - ) - - def _register_process_with_cgrulesengd(pid): """Tell cgrulesengd daemon to not move the given process into other cgroups, if libcgroup is available. @@ -181,7 +160,7 @@ def _supported_cgroup_subsystems(self, cgroup_procinfo=None, fallback=True): if cgroup_procinfo is None: my_cgroups = dict(_find_own_cgroups()) else: - my_cgroups = dict(_parse_proc_pid_cgroup(cgroup_procingo)) + my_cgroups = dict(_parse_proc_pid_cgroup(cgroup_procinfo)) cgroupsParents = {} for subsystem, mount in self._find_cgroup_mounts(): @@ -286,7 +265,7 @@ def kill_all_tasks_in_cgroup_recursively(cgroup, delete): kill_all_tasks_in_cgroup(subCgroup, ensure_empty=delete) if delete: - remove_cgroup(subCgroup) + self._remove_cgroup(subCgroup) kill_all_tasks_in_cgroup(cgroup, ensure_empty=delete) @@ -310,73 +289,6 @@ def kill_all_tasks_in_cgroup_recursively(cgroup, delete): for cgroup in self.paths: kill_all_tasks_in_cgroup_recursively(cgroup, delete=True) - def has_value(self, subsystem, option): - """ - Check whether the given value exists in the given subsystem. - Does not make a difference whether the value is readable, writable, or both. - Do not include the subsystem name in the option name. - Only call this method if the given subsystem is available. - """ - assert subsystem in self - return os.path.isfile( - os.path.join(self.subsystems[subsystem], f"{subsystem}.{option}") - ) - - def get_value(self, subsystem, option): - """ - Read the given value from the given subsystem. - Do not include the subsystem name in the option name. - Only call this method if the given subsystem is available. - """ - assert subsystem in self, f"Subsystem {subsystem} is missing" - return util.read_file(self.subsystems[subsystem], f"{subsystem}.{option}") - - def get_file_lines(self, subsystem, option): - """ - Read the lines of the given file from the given subsystem. - Do not include the subsystem name in the option name. - Only call this method if the given subsystem is available. - """ - assert subsystem in self - with open( - os.path.join(self.subsystems[subsystem], f"{subsystem}.{option}") - ) as f: - for line in f: - yield line - - def get_key_value_pairs(self, subsystem, filename): - """ - Read the lines of the given file from the given subsystem - and split the lines into key-value pairs. - Do not include the subsystem name in the option name. - Only call this method if the given subsystem is available. - """ - assert subsystem in self - return util.read_key_value_pairs_from_file( - self.subsystems[subsystem], f"{subsystem}.{filename}" - ) - - def set_value(self, subsystem, option, value): - """ - Write the given value for the given subsystem. - Do not include the subsystem name in the option name. - Only call this method if the given subsystem is available. - """ - assert subsystem in self - util.write_file(str(value), self.subsystems[subsystem], f"{subsystem}.{option}") - - def remove(self): - """ - Remove all cgroups this instance represents from the system. - This instance is afterwards not usable anymore! - """ - for cgroup in self.paths: - remove_cgroup(cgroup) - - # ? - del self.paths - del self.subsystems - def read_cputime(self): """ Read the cputime usage of this cgroup. CPUACCT cgroup needs to be available. @@ -385,10 +297,6 @@ def read_cputime(self): # convert nano-seconds to seconds return float(self.get_value(self.CPU, "usage")) / 1_000_000_000 - def read_allowed_memory_banks(self): - """Get the list of all memory banks allowed by this cgroup.""" - return util.parse_int_list(self.get_value(self.CPUSET, "mems")) - def read_max_mem_usage(self): # This measurement reads the maximum number of bytes of RAM+Swap the process used. # For more details, c.f. the kernel documentation: diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index c560b38e8..2cb5efa82 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -5,19 +5,13 @@ # # SPDX-License-Identifier: Apache-2.0 -import errno -import grp import logging import os import pathlib -import shutil import signal -import stat -import sys import tempfile import time -from benchexec import systeminfo from benchexec import util from benchexec.cgroups import Cgroups @@ -102,23 +96,6 @@ def kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): time.sleep(i * 0.5) -def remove_cgroup(cgroup): - if not os.path.exists(cgroup): - logging.warning("Cannot remove CGroup %s, because it does not exist.", cgroup) - return - assert os.path.getsize(cgroup / "cgroup.procs") == 0 - try: - os.rmdir(cgroup) - except OSError: - # sometimes this fails because the cgroup is still busy, we try again once - try: - os.rmdir(cgroup) - except OSError as e: - logging.warning( - "Failed to remove cgroup %s: error %s (%s)", cgroup, e.errno, e.strerror - ) - - class CgroupsV2(Cgroups): def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): self.version = 2 @@ -208,7 +185,7 @@ def kill_all_tasks_in_cgroup_recursively(cgroup, delete): kill_all_tasks_in_cgroup(subCgroup, ensure_empty=delete) if delete: - remove_cgroup(subCgroup) + self._remove_cgroup(subCgroup) kill_all_tasks_in_cgroup(cgroup, ensure_empty=delete) @@ -227,72 +204,6 @@ def kill_all_tasks_in_cgroup_recursively(cgroup, delete): # check for emptiness, and remove subgroups. kill_all_tasks_in_cgroup_recursively(self.path, delete=True) - def has_value(self, subsystem, option): - """ - Check whether the given value exists in the given subsystem. - Does not make a difference whether the value is readable, writable, or both. - Do not include the subsystem name in the option name. - Only call this method if the given subsystem is available. - """ - assert subsystem in self - return os.path.isfile(self.path / f"{subsystem}.{option}") - - def get_value(self, subsystem, option): - """ - Read the given value from the given subsystem. - Do not include the subsystem name in the option name. - Only call this method if the given subsystem is available. - """ - assert subsystem in self, f"Subsystem {subsystem} is missing" - return util.read_file(self.path / f"{subsystem}.{option}") - - def get_file_lines(self, subsystem, option): - """ - Read the lines of the given file from the given subsystem. - Do not include the subsystem name in the option name. - Only call this method if the given subsystem is available. - """ - assert subsystem in self - with open( - os.path.join(self.per_subsystem[subsystem], f"{subsystem}.{option}") - ) as f: - for line in f: - yield line - - def get_key_value_pairs(self, subsystem, filename): - """ - Read the lines of the given file from the given subsystem - and split the lines into key-value pairs. - Do not include the subsystem name in the option name. - Only call this method if the given subsystem is available. - """ - # FIXME v2 has basic cpu support even if not enabled - # assert subsystem in self - return util.read_key_value_pairs_from_file( - self.path / f"{subsystem}.{filename}" - ) - - def set_value(self, subsystem, option, value): - """ - Write the given value for the given subsystem. - Do not include the subsystem name in the option name. - Only call this method if the given subsystem is available. - """ - assert subsystem in self - util.write_file( - str(value), self.subsystems[subsystem] / f"{subsystem}.{option}" - ) - - def remove(self): - """ - Remove all cgroups this instance represents from the system. - This instance is afterwards not usable anymore! - """ - remove_cgroup(self.path) - - # FIXME why, we're not C? - del self.subsystems - def read_cputime(self): """ Read the cputime usage of this cgroup. CPU cgroup needs to be available. @@ -302,10 +213,6 @@ def read_cputime(self): return float(cpu_stats["usage_usec"]) / 1_000_000 - def read_allowed_memory_banks(self): - """Get the list of all memory banks allowed by this cgroup.""" - return util.parse_int_list(self.get_value(CPUSET, "mems")) - def read_max_mem_usage(self): logging.debug("Memory-usage not supported in cgroups v2") diff --git a/benchexec/check_cgroups.py b/benchexec/check_cgroups.py index 1dd4dca95..908ae122f 100644 --- a/benchexec/check_cgroups.py +++ b/benchexec/check_cgroups.py @@ -48,7 +48,7 @@ def check_cgroup_availability(wait=1): memlimit=1024 * 1024, # set memlimit to force check for swapaccount # set cores and memory_nodes to force usage of CPUSET cores=util.parse_int_list(my_cgroups.get_value(my_cgroups.CPUSET, "cpus")), - memory_nodes=my_cgroups.read_allowed_memory_banks(), + memory_nodes=my_cgroups.read_available_mems(), ) lines = [] for line in tmp: diff --git a/benchexec/localexecution.py b/benchexec/localexecution.py index 5bf0cb174..2048881c7 100644 --- a/benchexec/localexecution.py +++ b/benchexec/localexecution.py @@ -78,12 +78,12 @@ def execute_benchmark(benchmark, output_handler): pqos.reset_monitoring() if benchmark.rlimits.cpu_cores: - if not my_cgroups.require_subsystem(cgroups.CPUSET): + if not my_cgroups.require_subsystem(my_cgroups.CPUSET): logging.error( "Cgroup subsystem cpuset is required " "for limiting the number of CPU cores/memory nodes." ) - my_cgroups.handle_errors({cgroups.CPUSET}) + my_cgroups.handle_errors({my_cgroups.CPUSET}) coreAssignment = resources.get_cpu_cores_per_run( benchmark.rlimits.cpu_cores, benchmark.num_of_threads, diff --git a/benchexec/resources.py b/benchexec/resources.py index ccce47aad..5de537e01 100644 --- a/benchexec/resources.py +++ b/benchexec/resources.py @@ -320,7 +320,7 @@ def get_memory_banks_per_run(coreAssignment, cgroups): to one of its CPU cores.""" try: # read list of available memory banks - allMems = set(cgroups.read_allowed_memory_banks()) + allMems = set(cgroups.read_available_mems()) result = [] for cores in coreAssignment: @@ -401,7 +401,7 @@ def check_limit(actualLimit): # Get list of all memory banks, either from memory assignment or from system. if not memoryAssignment: if cgroups.CPUSET in my_cgroups: - allMems = my_cgroups.read_allowed_memory_banks() + allMems = my_cgroups.read_available_mems() else: allMems = _get_memory_banks_listed_in_dir("/sys/devices/system/node/") memoryAssignment = [ From c33516c2d007349f47530140e3c7492bc128fd0b Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Mon, 27 Sep 2021 14:03:39 +0200 Subject: [PATCH 010/133] cgroups: cleanup some path handling --- benchexec/cgroups.py | 9 ++++----- benchexec/cgroupsv1.py | 23 ++++++++++++----------- benchexec/check_cgroups.py | 4 ++-- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 59474ab99..794695309 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -9,6 +9,7 @@ import grp import logging import os +import pathlib import stat import sys @@ -116,11 +117,11 @@ def __getitem__(self, key): def __str__(self): return str(self.paths) - def _remove_cgroup(self, path): + def _remove_cgroup(self, path: pathlib.Path): if not os.path.exists(path): logging.warning("Cannot remove CGroup %s, because it does not exist.", path) return - assert os.path.getsize(os.path.join(path, "tasks")) == 0 + assert os.path.getsize(path / "tasks") == 0 try: os.rmdir(path) except OSError: @@ -161,9 +162,7 @@ def get_file_lines(self, subsystem, option): Only call this method if the given subsystem is available. """ assert subsystem in self - with open( - os.path.join(self.subsystems[subsystem], f"{subsystem}.{option}") - ) as f: + with open(self.subsystems[subsystem] / f"{subsystem}.{option}") as f: for line in f: yield line diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index e99230498..636fee023 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -8,6 +8,7 @@ import errno import logging import os +import pathlib import shutil import signal import tempfile @@ -57,7 +58,7 @@ def _parse_proc_pid_cgroup(content): def kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): - tasksFile = os.path.join(cgroup, "tasks") + tasksFile = cgroup / "tasks" i = 0 while True: @@ -168,8 +169,8 @@ def _supported_cgroup_subsystems(self, cgroup_procinfo=None, fallback=True): # e.g. because a parent directory has insufficient permissions # (lxcfs mounts cgroups under /run/lxcfs in such a way). if os.access(mount, os.F_OK): - cgroupPath = os.path.join(mount, my_cgroups[subsystem]) - fallbackPath = os.path.join(mount, CGROUP_FALLBACK_PATH) + cgroupPath = mount / my_cgroups[subsystem] + fallbackPath = mount / CGROUP_FALLBACK_PATH if ( fallback and not os.access(cgroupPath, os.W_OK) @@ -190,7 +191,7 @@ def _find_cgroup_mounts(self): for mount in mountsFile: mount = mount.split(" ") if mount[2] == "cgroup": - mountpoint = mount[1] + mountpoint = pathlib.Path(mount[1]) options = mount[3] for option in options.split(","): if option in self.KNOWN_SUBSYSTEMS: @@ -215,16 +216,16 @@ def create_fresh_child_cgroup(self, *subsystems): ] continue - cgroup = tempfile.mkdtemp(prefix=CGROUP_NAME_PREFIX, dir=parentCgroup) + cgroup = pathlib.Path( + tempfile.mkdtemp(prefix=CGROUP_NAME_PREFIX, dir=parentCgroup) + ) createdCgroupsPerSubsystem[subsystem] = cgroup createdCgroupsPerParent[parentCgroup] = cgroup # add allowed cpus and memory to cgroup if necessary # (otherwise we can't add any tasks) def copy_parent_to_child(name): - shutil.copyfile( - os.path.join(parentCgroup, name), os.path.join(cgroup, name) - ) + shutil.copyfile(parentCgroup / name, cgroup / name) try: copy_parent_to_child("cpuset.cpus") @@ -241,14 +242,14 @@ def add_task(self, pid): """ _register_process_with_cgrulesengd(pid) for cgroup in self.paths: - with open(os.path.join(cgroup, "tasks"), "w") as tasksFile: + with open(cgroup / "tasks", "w") as tasksFile: tasksFile.write(str(pid)) def get_all_tasks(self, subsystem): """ Return a generator of all PIDs currently in this cgroup for the given subsystem. """ - with open(os.path.join(self.subsystems[subsystem], "tasks"), "r") as tasksFile: + with open(self.subsystems[subsystem] / "tasks", "r") as tasksFile: for line in tasksFile: yield int(line) @@ -277,7 +278,7 @@ def kill_all_tasks_in_cgroup_recursively(cgroup, delete): # delete subgroups). if self.FREEZE in self.subsystems: cgroup = self.subsystems[self.FREEZE] - freezer_file = os.path.join(cgroup, "freezer.state") + freezer_file = cgroup / "freezer.state" util.write_file("FROZEN", freezer_file) kill_all_tasks_in_cgroup_recursively(cgroup, delete=False) diff --git a/benchexec/check_cgroups.py b/benchexec/check_cgroups.py index 908ae122f..61a2518f0 100644 --- a/benchexec/check_cgroups.py +++ b/benchexec/check_cgroups.py @@ -69,8 +69,8 @@ def check_cgroup_availability(wait=1): my_cgroups.FREEZE, ): if subsystem in my_cgroups: - if not task_cgroups[subsystem].startswith( - os.path.join(my_cgroups[subsystem], "benchmark_") + if not str(task_cgroups[subsystem]).startswith( + str(my_cgroups[subsystem] / "benchmark_") ): logging.warning( "Task was in cgroup %s for subsystem %s, " From 5f108eff72715b6f328bea2572aee25c971df0dd Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Mon, 27 Sep 2021 14:53:43 +0200 Subject: [PATCH 011/133] cgroups: fix issues from refactoring --- benchexec/cgroups.py | 29 +++++++++++++++++++++++++++-- benchexec/cgroupsv2.py | 2 +- benchexec/check_cgroups.py | 1 - 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 794695309..0088e6012 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -6,6 +6,7 @@ # SPDX-License-Identifier: Apache-2.0 from abc import ABC, abstractmethod +import errno import grp import logging import os @@ -209,6 +210,26 @@ def require_subsystem(self, subsystem, log_method=logging.warning): ) return False + try: + test_cgroup = self.create_fresh_child_cgroup(subsystem) + test_cgroup.remove() + except OSError as e: + log_method( + "Cannot use cgroup %s for subsystem %s, reason: %s (%s).", + self.subsystems[subsystem], + subsystem, + e.strerror, + e.errno, + ) + self.unusable_subsystems.add(subsystem) + if e.errno == errno.EACCES: + self.denied_subsystems[subsystem] = self.subsystems[subsystem] + del self.subsystems[subsystem] + self.paths = set(self.subsystems.values()) + return False + + return True + def handle_errors(self, critical_cgroups): """ If there were errors in calls to require_subsystem() and critical_cgroups @@ -229,8 +250,8 @@ def handle_errors(self, critical_cgroups): # to some groups to get access. But group 0 (root) of course does not count. groups = {} try: - if all(stat.S_IWGRP & os.stat(path).st_mode for path in paths): - groups = {os.stat(path).st_gid for path in paths} + if all(stat.S_IWGRP & path.stat().st_mode for path in paths): + groups = {path.stat().st_gid for path in paths} except OSError: pass if groups and 0 not in groups: @@ -271,6 +292,10 @@ def remove(self): del self.paths del self.subsystems + @abstractmethod + def create_fresh_child_cgroup(self, subsystem): + pass + @abstractmethod def read_max_mem_usage(self): pass diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 2cb5efa82..a37e17b1d 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -136,7 +136,7 @@ def _supported_cgroup_subsystems(self, cgroup_procinfo=None, fallback=True): fallback_path = mount / CGROUP_FALLBACK_PATH cgroup_path = fallback_path - with open(cgroup_path / "cgroup.subsystems") as subsystems_file: + with open(cgroup_path / "cgroup.controllers") as subsystems_file: subsystems = subsystems_file.readline().strip().split() # always supported in v2 diff --git a/benchexec/check_cgroups.py b/benchexec/check_cgroups.py index 61a2518f0..fabb7bca1 100644 --- a/benchexec/check_cgroups.py +++ b/benchexec/check_cgroups.py @@ -7,7 +7,6 @@ import argparse import logging -import os import sys import tempfile import threading From c3132980ea27454c62aefdd81275996324e4d63c Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Fri, 8 Oct 2021 15:43:34 +0200 Subject: [PATCH 012/133] cgroups: add cgroupsv2 bootstrap service --- debian/benchexec-cgroup2.service | 49 ++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 debian/benchexec-cgroup2.service diff --git a/debian/benchexec-cgroup2.service b/debian/benchexec-cgroup2.service new file mode 100644 index 000000000..b9d07a824 --- /dev/null +++ b/debian/benchexec-cgroup2.service @@ -0,0 +1,49 @@ +# This file is part of BenchExec, a framework for reliable benchmarking: +# https://github.com/sosy-lab/benchexec +# +# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer +# +# SPDX-License-Identifier: Apache-2.0 + +[Unit] +Description=Cgroup setup for BenchExec +Documentation=https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md +Documentation=https://github.com/sosy-lab/benchexec/blob/master/doc/INDEX.md + +[Service] +# Adjust the following line to configure permissions for cgroup usage. +# The default gives permissions to users in group "benchexec". +# You can change the group name, or give permissions to everybody by +# setting BENCHEXEC_CGROUP_PERM to "a+w". +Environment=BENCHEXEC_CGROUP_GROUP=benchexec BENCHEXEC_CGROUP_PERM=g+w + +Restart=always +Delegate=cpu cpuset memory io pids +CPUAccounting=true +IOAccounting=true +MemoryAccounting=true +ExecStartPre=/bin/bash -c '\ +set -e;\ +mkdir /sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/app.slice/benchexec-cgroup2.service/benchexec_root;\ +mkdir /sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/app.slice/benchexec-cgroup2.service/benchexec_root/dummy' + +ExecStart=/bin/bash -c '\ +set -e;\ +exec sleep $(( 10 * 365 * 24 * 3600 ))' + +ExecStartPost=/bin/bash -c '\ +set -e;\ +for p in $(cat /sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/app.slice/benchexec-cgroup2.service/cgroup.procs); do\ + echo $p > /sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/app.slice/benchexec-cgroup2.service/benchexec_root/dummy/cgroup.procs;\ +done;\ +for cg in $(cat /sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/app.slice/benchexec-cgroup2.service/cgroup.controllers); do\ + echo +$cg >/sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/app.slice/benchexec-cgroup2.service/cgroup.subtree_control;\ + echo +$cg >/sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/app.slice/benchexec-cgroup2.service/benchexec_root/cgroup.subtree_control;\ +done' + +Restart=always +TimeoutStartSec=360000 + + +[Install] +WantedBy=default.target From a67214bae791a6cea2284bc9172837e7f1ae204b Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Fri, 8 Oct 2021 15:43:55 +0200 Subject: [PATCH 013/133] cgroups: parameterise cgv2 fallback path --- benchexec/cgroupsv2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index a37e17b1d..bd3b5c180 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -16,8 +16,8 @@ from benchexec.cgroups import Cgroups -# FIXME uid -CGROUP_FALLBACK_PATH = "user.slice/user-1000.slice/user@1000.service/app.slice/benchexec-cgroup.service/benchexec_root" +uid = os.getuid() +CGROUP_FALLBACK_PATH = f"user.slice/user-{uid}.slice/user@{uid}.service/app.slice/benchexec-cgroup2.service/benchexec_root" """If we do not have write access to the current cgroup, attempt to use this cgroup as fallback.""" From 1d0e1c058632a7ebd03ba29c9fc2a0f90dbedd88 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Wed, 13 Oct 2021 00:29:52 +0200 Subject: [PATCH 014/133] cgroups: OOM handling --- benchexec/cgroups.py | 12 +++++++ benchexec/cgroupsv1.py | 69 ++++++++++++++++++++++++++++++++++++++++ benchexec/cgroupsv2.py | 9 ++++++ benchexec/oomhandler.py | 69 +++------------------------------------- benchexec/runexecutor.py | 11 ++----- 5 files changed, 96 insertions(+), 74 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 0088e6012..1a9c1f5a9 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -311,3 +311,15 @@ def read_available_cpus(self): @abstractmethod def read_available_mems(self): pass + + @abstractmethod + def disable_swap(self): + pass + + @abstractmethod + def set_oom_handler(self): + pass + + @abstractmethod + def reset_memory_limit(self): + pass diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 636fee023..a84cfc9c1 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -17,6 +17,13 @@ from benchexec import util from benchexec.cgroups import Cgroups +from ctypes import cdll + +_libc = cdll.LoadLibrary("libc.so.6") +_EFD_CLOEXEC = 0x80000 # from : mark eventfd as close-on-exec + +_BYTE_FACTOR = 1000 # byte in kilobyte + # FIXME __all__ ? CGROUP_FALLBACK_PATH = "system.slice/benchexec-cgroup.service" @@ -342,3 +349,65 @@ def read_available_cpus(self): def read_available_mems(self): return util.parse_int_list(self.get_value(self.CPUSET, "mems")) + + def disable_swap(self): + # Note that this disables swapping completely according to + # https://www.kernel.org/doc/Documentation/cgroups/memory.txt + # (unlike setting the global swappiness to 0). + # Our process might get killed because of this. + return self.set_value(self.MEMORY, "swappiness", "0") + + def set_oom_handler(self): + mem_cgroup = self[self.MEMORY] + ofd = os.open(os.path.join(mem_cgroup, "memory.oom_control"), os.O_WRONLY) + try: + # Important to use CLOEXEC, otherwise the benchmarked tool inherits + # the file descriptor. + efd = _libc.eventfd(0, _EFD_CLOEXEC) + + try: + util.write_file(f"{efd} {ofd}", mem_cgroup, "cgroup.event_control") + + # If everything worked, disable Kernel-side process killing. + # This is not allowed if memory.use_hierarchy is enabled, + # but we don't care. + try: + os.write(ofd, b"1") + except OSError as e: + logging.debug( + "Failed to disable kernel-side OOM killer: error %s (%s)", + e.errno, + e.strerror, + ) + except OSError as e: + os.close(efd) + raise e + finally: + os.close(ofd) + + return efd + + def reset_memory_limit(self): + for limitFile in ("memory.memsw.limit_in_bytes", "memory.limit_in_bytes"): + if self._cgroups.has_value(self.MEMORY, limitFile): + try: + # Write a high value (1 PB) as the limit + self.set_value( + self.MEMORY, + limitFile, + str( + 1 + * _BYTE_FACTOR + * _BYTE_FACTOR + * _BYTE_FACTOR + * _BYTE_FACTOR + * _BYTE_FACTOR + ), + ) + except OSError as e: + logging.warning( + "Failed to increase %s after OOM: error %s (%s).", + limitFile, + e.errno, + e.strerror, + ) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index bd3b5c180..080d1f615 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -228,3 +228,12 @@ def read_available_cpus(self): def read_available_mems(self): return util.parse_int_list(self.get_value(self.CPUSET, "mems.effective")) + + def disable_swap(self): + return self.set_value(self.MEMORY, "swap.max", "0") + + def set_oom_handler(self): + logging.warn("OOM handling for cgroups 2 not implemented yet") + + def reset_memory_limit(self): + logging.warn("OOM handling for cgroups 2 not implemented yet") diff --git a/benchexec/oomhandler.py b/benchexec/oomhandler.py index 211da026e..d0b1791e3 100644 --- a/benchexec/oomhandler.py +++ b/benchexec/oomhandler.py @@ -9,16 +9,8 @@ import os import threading -from benchexec.cgroups import Cgroups from benchexec import util -from ctypes import cdll - -_libc = cdll.LoadLibrary("libc.so.6") -_EFD_CLOEXEC = 0x80000 # from : mark eventfd as close-on-exec - -_BYTE_FACTOR = 1000 # byte in kilobyte - class KillProcessOnOomThread(threading.Thread): """ @@ -53,33 +45,7 @@ def __init__(self, cgroups, pid_to_kill, callbackFn=lambda reason: None): self._cgroups = cgroups self._callback = callbackFn - # FIXME - cgroup = cgroups[cgroups.MEMORY] # for raw access - ofd = os.open(os.path.join(cgroup, "memory.oom_control"), os.O_WRONLY) - try: - # Important to use CLOEXEC, otherwise the benchmarked tool inherits - # the file descriptor. - self._efd = _libc.eventfd(0, _EFD_CLOEXEC) - - try: - util.write_file(f"{self._efd} {ofd}", cgroup, "cgroup.event_control") - - # If everything worked, disable Kernel-side process killing. - # This is not allowed if memory.use_hierarchy is enabled, - # but we don't care. - try: - os.write(ofd, b"1") - except OSError as e: - logging.debug( - "Failed to disable kernel-side OOM killer: error %s (%s)", - e.errno, - e.strerror, - ) - except OSError as e: - os.close(self._efd) - raise e - finally: - os.close(ofd) + self._efd = self._cgroups.set_oom_handler() def run(self): # os.close gets called in finally, @@ -103,43 +69,16 @@ def run(self): self._pid_to_kill, ) util.kill_process(self._pid_to_kill) + # Also kill all children of subprocesses directly. - # FIXME - with open(os.path.join(self._cgroups[MEMORY], "tasks"), "rt") as tasks: - for task in tasks: - util.kill_process(int(task)) + self._cgroups.kill_all_tasks() # We now need to increase the memory limit of this cgroup # to give the process a chance to terminate - self._reset_memory_limit("memory.memsw.limit_in_bytes") - self._reset_memory_limit("memory.limit_in_bytes") + self._cgroups.reset_memory_limit() finally: close(self._efd) - def _reset_memory_limit(self, limitFile): - if self._cgroups.has_value(MEMORY, limitFile): - try: - # Write a high value (1 PB) as the limit - self._cgroups.set_value( - MEMORY, - limitFile, - str( - 1 - * _BYTE_FACTOR - * _BYTE_FACTOR - * _BYTE_FACTOR - * _BYTE_FACTOR - * _BYTE_FACTOR - ), - ) - except OSError as e: - logging.warning( - "Failed to increase %s after OOM: error %s (%s).", - limitFile, - e.errno, - e.strerror, - ) - def cancel(self): self._finished.set() diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index 6f4c495f1..00f335d7a 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -492,12 +492,7 @@ def _setup_cgroups(self, my_cpus, memlimit, memory_nodes, cgroup_values): if cgroups.MEMORY in cgroups: try: - # Note that this disables swapping completely according to - # https://www.kernel.org/doc/Documentation/cgroups/memory.txt - # (unlike setting the global swappiness to 0). - # Our process might get killed because of this. - # FIXME v1 cgroups.set_value(MEMORY, "swappiness", "0") - cgroups.set_value(cgroups.MEMORY, "swap.max", "0") + cgroups.disable_swap() except OSError as e: logging.warning( "Could not disable swapping for benchmarked process: %s", e @@ -583,9 +578,7 @@ def _setup_cgroup_memory_limit(self, memlimit, cgroups, pid_to_kill): """Start memory-limit handler. @return None or the memory-limit handler for calling cancel() """ - # FIXME - if False: - # if memlimit is not None: + if memlimit is not None: try: oomThread = oomhandler.KillProcessOnOomThread( cgroups=cgroups, From e41913e5b42c714300eaadc00994fee6aa74cdcf Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Wed, 13 Oct 2021 00:30:06 +0200 Subject: [PATCH 015/133] cgroups: remove unreferenced code --- benchexec/cgroups.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 1a9c1f5a9..2e1bb1fd9 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -18,11 +18,6 @@ from benchexec import systeminfo from benchexec import util -CGROUP_FALLBACK_PATH = "system.slice/benchexec-cgroup.service" -"""If we do not have write access to the current cgroup, -attempt to use this cgroup as fallback.""" - -CGROUP_NAME_PREFIX = "benchmark_" CGROUPS_V1 = 1 CGROUPS_V2 = 2 From 2307834e47468df28e85e7a7bf46ea5173b8c517 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Wed, 13 Oct 2021 00:30:26 +0200 Subject: [PATCH 016/133] cgroups: cgroup.kill handling for >=5.14 --- benchexec/cgroupsv2.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 080d1f615..db507ff25 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -106,6 +106,7 @@ def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): self.MEMORY = "memory" self.PID = "pids" self.FREEZE = "freeze" + self.KILL = "kill" self.KNOWN_SUBSYSTEMS = { # cgroups for BenchExec @@ -116,6 +117,7 @@ def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): self.PID, # not really a subsystem anymore, but implicitly supported self.FREEZE, + self.KILL, } super(CgroupsV2, self).__init__(subsystems, cgroup_procinfo, fallback) @@ -139,6 +141,10 @@ def _supported_cgroup_subsystems(self, cgroup_procinfo=None, fallback=True): with open(cgroup_path / "cgroup.controllers") as subsystems_file: subsystems = subsystems_file.readline().strip().split() + # introduced in 5.14 + if (cgroup_path / "cgroup.kill").exists(): + subsystems.append(self.KILL) + # always supported in v2 subsystems.append(self.FREEZE) @@ -189,6 +195,10 @@ def kill_all_tasks_in_cgroup_recursively(cgroup, delete): kill_all_tasks_in_cgroup(cgroup, ensure_empty=delete) + if self.KILL in self.subsystems: + util.write_file("1", self.path / "cgroup.kill") + return + # First, we go through all cgroups recursively while they are frozen and kill # all processes. This helps against fork bombs and prevents processes from # creating new subgroups while we are trying to kill everything. From 4a354a99eee07be449db16a8ede4ea8ebd60d03e Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Wed, 13 Oct 2021 00:30:58 +0200 Subject: [PATCH 017/133] cgroups: WIP IO and v2 CPU --- benchexec/runexecutor.py | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index 00f335d7a..ca4ca72b0 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -1032,8 +1032,7 @@ def _get_cgroup_measurements(self, cgroups, ru_child, result): cputime_wait = ru_child.ru_utime + ru_child.ru_stime if ru_child else 0 cputime_cgroups = None - # FIXME v2 - if cgroups.CPU in cgroups: + if cgroups.CPU in cgroups or cgroups.version == 2: # always possible in v2 # We want to read the value from the cgroup. # The documentation warns about outdated values. # So we read twice with 0.1s time difference, @@ -1081,26 +1080,23 @@ def _get_cgroup_measurements(self, cgroups, ru_child, result): else: result["memory"] = max_mem_usage - # FIXME empty in v2 because of ...? - # if cgroups.IO in cgroups: - # # blkio_bytes_file = "throttle.io_service_bytes" - # # if cgroups.has_value(BLKIO, blkio_bytes_file): - # if cgroups.has_value(cgroups.IO, 'stat'): - # bytes_read = 0 - # bytes_written = 0 - # print(cgroups.get_value(cgroups.IO, 'stat')) - # print(dict(cgroups.get_key_value_pairs(cgroups.IO, 'stat'))) - # #for blkio_line in cgroups.get_file_lines(BLKIO, blkio_bytes_file): - # # try: - # # dev_no, io_type, bytes_amount = blkio_line.split(" ") - # # if io_type == "Read": - # # bytes_read += int(bytes_amount) - # # elif io_type == "Write": - # # bytes_written += int(bytes_amount) - # # except ValueError: - # # pass # There are irrelevant lines in this file with a different structure - # result["blkio-read"] = bytes_read - # result["blkio-write"] = bytes_written + if cgroups.IO in cgroups: + blkio_bytes_file = "throttle.io_service_bytes" + # FIXME v2? if cgroups.has_value(cgroups.IO, 'stat'): + if cgroups.has_value(cgroups.IO, blkio_bytes_file): + bytes_read = 0 + bytes_written = 0 + for blkio_line in cgroups.get_file_lines(cgroups.IO, blkio_bytes_file): + try: + dev_no, io_type, bytes_amount = blkio_line.split(" ") + if io_type == "Read": + bytes_read += int(bytes_amount) + elif io_type == "Write": + bytes_written += int(bytes_amount) + except ValueError: + pass # There are irrelevant lines in this file with a different structure + result["blkio-read"] = bytes_read + result["blkio-write"] = bytes_written logging.debug( "Resource usage of run: walltime=%s, cputime=%s, cgroup-cputime=%s, memory=%s", From 1215096045d0bfa1f66f5f06b3bdcf65daa9a6a9 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Wed, 13 Oct 2021 00:51:40 +0200 Subject: [PATCH 018/133] cgroups: type-checking clean-up --- benchexec/cgroups.py | 13 +++++++++++-- benchexec/cgroupsv1.py | 13 ++++++++----- benchexec/cgroupsv2.py | 14 +++++++++----- 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 2e1bb1fd9..7393368b1 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -89,11 +89,11 @@ def from_system(cgroup_procinfo=None, fallback=True): def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): if subsystems is None: - self.subsystems = self._supported_cgroup_subsystems() + self.subsystems = self._supported_subsystems() else: self.subsystems = subsystems - assert set(self.subsystems.keys()) <= self.KNOWN_SUBSYSTEMS + assert set(self.subsystems.keys()) <= self.known_subsystems assert all(self.subsystems.values()) self.paths = set(self.subsystems.values()) # without duplicates @@ -287,6 +287,15 @@ def remove(self): del self.paths del self.subsystems + @property + @abstractmethod + def known_subsystems(self): + pass + + @abstractmethod + def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): + pass + @abstractmethod def create_fresh_child_cgroup(self, subsystem): pass diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index a84cfc9c1..ec9a94bcf 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -131,7 +131,12 @@ def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): self.FREEZE = "freezer" self.MEMORY = "memory" - self.KNOWN_SUBSYSTEMS = { + super(CgroupsV1, self).__init__(subsystems, cgroup_procinfo, fallback) + + + @property + def known_subsystems(self): + return { # cgroups for BenchExec self.IO, self.CPU, @@ -148,9 +153,7 @@ def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): "pids", } - super(CgroupsV1, self).__init__(subsystems, cgroup_procinfo, fallback) - - def _supported_cgroup_subsystems(self, cgroup_procinfo=None, fallback=True): + def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): """ Return a Cgroup object with the cgroups of the current process. Note that it is not guaranteed that all subsystems are available @@ -201,7 +204,7 @@ def _find_cgroup_mounts(self): mountpoint = pathlib.Path(mount[1]) options = mount[3] for option in options.split(","): - if option in self.KNOWN_SUBSYSTEMS: + if option in self.known_subsystems: yield (option, mountpoint) except OSError: logging.exception("Cannot read /proc/mounts") diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index db507ff25..7eb829295 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -108,7 +108,14 @@ def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): self.FREEZE = "freeze" self.KILL = "kill" - self.KNOWN_SUBSYSTEMS = { + super(CgroupsV2, self).__init__(subsystems, cgroup_procinfo, fallback) + + self.path = next(iter(self.subsystems.values())) + + + @property + def known_subsystems(self): + return { # cgroups for BenchExec self.IO, self.CPU, @@ -120,11 +127,8 @@ def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): self.KILL, } - super(CgroupsV2, self).__init__(subsystems, cgroup_procinfo, fallback) - - self.path = next(iter(self.subsystems.values())) - def _supported_cgroup_subsystems(self, cgroup_procinfo=None, fallback=True): + def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): logging.debug( "Analyzing /proc/mounts and /proc/self/cgroup to determine cgroups." ) From 89c5d64ab6e387b3b1c833794a36c03fc04bc8a5 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Wed, 13 Oct 2021 01:11:04 +0200 Subject: [PATCH 019/133] cgroups: CI flake fixes? --- benchexec/cgroups.py | 2 ++ benchexec/cgroupsv1.py | 3 +-- benchexec/cgroupsv2.py | 4 +--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 7393368b1..6f5b5f417 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -87,6 +87,8 @@ def from_system(cgroup_procinfo=None, fallback=True): return CgroupsV2(cgroup_procinfo=cgroup_procinfo, fallback=fallback) + raise BenchExecException("Could not detect Cgroup Version") + def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): if subsystems is None: self.subsystems = self._supported_subsystems() diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index ec9a94bcf..a5754f5e5 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -133,7 +133,6 @@ def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): super(CgroupsV1, self).__init__(subsystems, cgroup_procinfo, fallback) - @property def known_subsystems(self): return { @@ -392,7 +391,7 @@ def set_oom_handler(self): def reset_memory_limit(self): for limitFile in ("memory.memsw.limit_in_bytes", "memory.limit_in_bytes"): - if self._cgroups.has_value(self.MEMORY, limitFile): + if self.has_value(self.MEMORY, limitFile): try: # Write a high value (1 PB) as the limit self.set_value( diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 7eb829295..229350579 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -112,7 +112,6 @@ def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): self.path = next(iter(self.subsystems.values())) - @property def known_subsystems(self): return { @@ -127,7 +126,6 @@ def known_subsystems(self): self.KILL, } - def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): logging.debug( "Analyzing /proc/mounts and /proc/self/cgroup to determine cgroups." @@ -230,7 +228,7 @@ def read_cputime(self): def read_max_mem_usage(self): logging.debug("Memory-usage not supported in cgroups v2") - return None + return def read_usage_per_cpu(self): logging.debug("Usage per CPU not supported in cgroups v2") From f1210811b02bdb6b7114487c0e4a9d40e73b3047 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Thu, 14 Oct 2021 13:16:20 +0200 Subject: [PATCH 020/133] cgroups: v2 CPU subsystem By default, even when not explicitly enabled the cpu subsystem in v2 provides all the metrics we use. If cpu.{max,uclamp,weight} were to be used in the future this needs to be extended and made more specific. --- benchexec/cgroupsv2.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 229350579..593d134b0 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -141,14 +141,17 @@ def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): cgroup_path = fallback_path with open(cgroup_path / "cgroup.controllers") as subsystems_file: - subsystems = subsystems_file.readline().strip().split() + subsystems = set(subsystems_file.readline().strip().split()) # introduced in 5.14 if (cgroup_path / "cgroup.kill").exists(): - subsystems.append(self.KILL) + subsystems.add(self.KILL) # always supported in v2 - subsystems.append(self.FREEZE) + subsystems.add(self.FREEZE) + + # basic support always available in v2, this supports everything we use + subsystems.add(self.CPU) return {k: cgroup_path for k in subsystems} From 6b1f75a48099cc6dd455d4492cbcd248805e23aa Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Thu, 14 Oct 2021 13:24:09 +0200 Subject: [PATCH 021/133] cgroups: cleanup and available mem node fix --- benchexec/cgroups.py | 2 +- benchexec/cgroupsv1.py | 2 -- benchexec/runexecutor.py | 3 +-- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 6f5b5f417..8228bd1b4 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -171,7 +171,7 @@ def get_key_value_pairs(self, subsystem, filename): Do not include the subsystem name in the option name. Only call this method if the given subsystem is available. """ - # assert subsystem in self + assert subsystem in self return util.read_key_value_pairs_from_file( self.subsystems[subsystem] / f"{subsystem}.{filename}" ) diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index a5754f5e5..6a2130eaf 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -24,8 +24,6 @@ _BYTE_FACTOR = 1000 # byte in kilobyte -# FIXME __all__ ? - CGROUP_FALLBACK_PATH = "system.slice/benchexec-cgroup.service" """If we do not have write access to the current cgroup, attempt to use this cgroup as fallback.""" diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index ca4ca72b0..f144cc0bc 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -385,7 +385,7 @@ def _init_cgroups(self): logging.debug("List of available CPU cores is %s.", self.cpus) try: - self.memory_nodes = self.cgroups.read_available_cpus() + self.memory_nodes = self.cgroups.read_available_mems() except ValueError as e: logging.warning( "Could not read available memory nodes from kernel: %s", str(e) @@ -1245,7 +1245,6 @@ def read_cputime(self): def run(self): while not self.finished.is_set(): - # FIXME v2 usedCpuTime = self.read_cputime() if self.cgroups.CPU in self.cgroups else 0 remainingCpuTime = self.timelimit - usedCpuTime remainingSoftCpuTime = self.softtimelimit - usedCpuTime From 34d2c8f8d0cdc44919ba9a5ab927f782dd036fec Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Thu, 14 Oct 2021 13:24:46 +0200 Subject: [PATCH 022/133] cgroups: initial v2 IO implementation still not working correctly, as apparently only the parent cgroup outputs to the io.stat file? --- benchexec/cgroups.py | 4 ++++ benchexec/cgroupsv1.py | 16 ++++++++++++++++ benchexec/cgroupsv2.py | 11 +++++++++++ benchexec/runexecutor.py | 17 +---------------- 4 files changed, 32 insertions(+), 16 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 8228bd1b4..24f31c792 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -318,6 +318,10 @@ def read_available_cpus(self): def read_available_mems(self): pass + @abstractmethod + def read_io_stat(self): + pass + @abstractmethod def disable_swap(self): pass diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 6a2130eaf..4c6383ad0 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -350,6 +350,22 @@ def read_available_cpus(self): def read_available_mems(self): return util.parse_int_list(self.get_value(self.CPUSET, "mems")) + def read_io_stat(self): + blkio_bytes_file = "throttle.io_service_bytes" + if self.has_value(self.IO, blkio_bytes_file): + bytes_read = 0 + bytes_written = 0 + for blkio_line in self.get_file_lines(self.IO, blkio_bytes_file): + try: + dev_no, io_type, bytes_amount = blkio_line.split(" ") + if io_type == "Read": + bytes_read += int(bytes_amount) + elif io_type == "Write": + bytes_written += int(bytes_amount) + except ValueError: + pass # There are irrelevant lines in this file with a different structure + return bytes_read, bytes_written + def disable_swap(self): # Note that this disables swapping completely according to # https://www.kernel.org/doc/Documentation/cgroups/memory.txt diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 593d134b0..44f2194ea 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -244,6 +244,17 @@ def read_available_cpus(self): def read_available_mems(self): return util.parse_int_list(self.get_value(self.CPUSET, "mems.effective")) + def read_io_stat(self): + bytes_read = 0 + bytes_written = 0 + logging.debug(f"{list(self.get_file_lines(self.IO, 'stat'))}") + for io_line in self.get_file_lines(self.IO, "stat"): + dev_no, *stats = io_line.split(" ") + stats_map = {s[0]: s[1] for s in (s.split("=") for s in stats)} + bytes_read += int(stats_map["rbytes"]) + bytes_written += int(stats_map["wbytes"]) + return bytes_read, bytes_written + def disable_swap(self): return self.set_value(self.MEMORY, "swap.max", "0") diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index f144cc0bc..61fdeaac3 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -1081,22 +1081,7 @@ def _get_cgroup_measurements(self, cgroups, ru_child, result): result["memory"] = max_mem_usage if cgroups.IO in cgroups: - blkio_bytes_file = "throttle.io_service_bytes" - # FIXME v2? if cgroups.has_value(cgroups.IO, 'stat'): - if cgroups.has_value(cgroups.IO, blkio_bytes_file): - bytes_read = 0 - bytes_written = 0 - for blkio_line in cgroups.get_file_lines(cgroups.IO, blkio_bytes_file): - try: - dev_no, io_type, bytes_amount = blkio_line.split(" ") - if io_type == "Read": - bytes_read += int(bytes_amount) - elif io_type == "Write": - bytes_written += int(bytes_amount) - except ValueError: - pass # There are irrelevant lines in this file with a different structure - result["blkio-read"] = bytes_read - result["blkio-write"] = bytes_written + result["blkio-read"], result["blkio-write"] = cgroups.read_io_stat() logging.debug( "Resource usage of run: walltime=%s, cputime=%s, cgroup-cputime=%s, memory=%s", From 32060959495b51b67d9638730be6bdfb1a1b88d2 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Thu, 14 Oct 2021 14:19:16 +0200 Subject: [PATCH 023/133] cgroups: fix v2 cgroup removal assertion --- benchexec/cgroups.py | 7 ++++++- benchexec/cgroupsv1.py | 3 +++ benchexec/cgroupsv2.py | 3 +++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 24f31c792..91e2ebadf 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -119,7 +119,7 @@ def _remove_cgroup(self, path: pathlib.Path): if not os.path.exists(path): logging.warning("Cannot remove CGroup %s, because it does not exist.", path) return - assert os.path.getsize(path / "tasks") == 0 + assert not self.has_tasks(path) try: os.rmdir(path) except OSError: @@ -322,6 +322,11 @@ def read_available_mems(self): def read_io_stat(self): pass + # TODO improve interface + @abstractmethod + def has_tasks(self, path): + pass + @abstractmethod def disable_swap(self): pass diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 4c6383ad0..556032204 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -366,6 +366,9 @@ def read_io_stat(self): pass # There are irrelevant lines in this file with a different structure return bytes_read, bytes_written + def has_tasks(self, path): + return os.path.getsize(path / "tasks") > 0 + def disable_swap(self): # Note that this disables swapping completely according to # https://www.kernel.org/doc/Documentation/cgroups/memory.txt diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 44f2194ea..676cbe365 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -255,6 +255,9 @@ def read_io_stat(self): bytes_written += int(stats_map["wbytes"]) return bytes_read, bytes_written + def has_tasks(self, path): + return os.path.getsize(path / "cgroup.procs") > 0 + def disable_swap(self): return self.set_value(self.MEMORY, "swap.max", "0") From f92fea539d15fcca3e92ff1db09e9703b36100c9 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Thu, 14 Oct 2021 14:22:42 +0200 Subject: [PATCH 024/133] cgroups: update default branch name in links --- benchexec/cgroups.py | 8 ++++---- debian/benchexec-cgroup2.service | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 91e2ebadf..1c749c852 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -28,17 +28,17 @@ _PERMISSION_HINT_DEBIAN = """ The recommended way to fix this is to install the Debian package for BenchExec and add your account to the group "benchexec": -https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#debianubuntu +https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#debianubuntu Alternatively, you can install benchexec-cgroup.service manually: -https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" +https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" _PERMISSION_HINT_SYSTEMD = """ The recommended way to fix this is to add your account to a group named "benchexec" and install benchexec-cgroup.service: -https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" +https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" _PERMISSION_HINT_OTHER = """ Please configure your system in way to allow your user to use cgroups: -https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md#setting-up-cgroups-on-machines-without-systemd""" +https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-without-systemd""" _ERROR_MSG_PERMISSIONS = """ Required cgroups are not available because of missing permissions.{0} diff --git a/debian/benchexec-cgroup2.service b/debian/benchexec-cgroup2.service index b9d07a824..2a6aa3050 100644 --- a/debian/benchexec-cgroup2.service +++ b/debian/benchexec-cgroup2.service @@ -7,8 +7,8 @@ [Unit] Description=Cgroup setup for BenchExec -Documentation=https://github.com/sosy-lab/benchexec/blob/master/doc/INSTALL.md -Documentation=https://github.com/sosy-lab/benchexec/blob/master/doc/INDEX.md +Documentation=https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md +Documentation=https://github.com/sosy-lab/benchexec/blob/main/doc/INDEX.md [Service] # Adjust the following line to configure permissions for cgroup usage. From 26ab65aafbb6e675a6875756bb8226260ab39045 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Thu, 14 Oct 2021 14:28:32 +0200 Subject: [PATCH 025/133] cgroups: fix percpu usage output --- benchexec/runexecutor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index 61fdeaac3..90850e5ed 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -1068,7 +1068,7 @@ def _get_cgroup_measurements(self, cgroups, ru_child, result): else: result["cputime"] = cputime_cgroups - for core, coretime in enumerate(cgroups.read_usage_per_cpu()): + for core, coretime in cgroups.read_usage_per_cpu().items(): result[f"cputime-cpu{core}"] = coretime if cgroups.MEMORY in cgroups: From 4a4eacd1ba353449d567c187fafadc80785b1d4b Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Thu, 14 Oct 2021 19:12:55 +0200 Subject: [PATCH 026/133] cgroups: fix resource handling --- benchexec/resources.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/benchexec/resources.py b/benchexec/resources.py index 5de537e01..d56f40c02 100644 --- a/benchexec/resources.py +++ b/benchexec/resources.py @@ -16,7 +16,6 @@ import os import sys -from benchexec import cgroups from benchexec import util __all__ = [ @@ -62,7 +61,7 @@ def get_cpu_cores_per_run( """ try: # read list of available CPU cores - allCpus = util.parse_int_list(my_cgroups.get_value(cgroups.CPUSET, "cpus")) + allCpus = my_cgroups.read_available_cpus() # Filter CPU cores according to the list of identifiers provided by a user if coreSet: @@ -388,10 +387,11 @@ def check_limit(actualLimit): ) return - if cgroups.MEMORY in my_cgroups: + if my_cgroups.MEMORY in my_cgroups: # We use the entries hierarchical_*_limit in memory.stat and not memory.*limit_in_bytes # because the former may be lower if memory.use_hierarchy is enabled. - for key, value in my_cgroups.get_key_value_pairs(cgroups.MEMORY, "stat"): + # FIXME v2 + for key, value in my_cgroups.get_key_value_pairs(my_cgroups.MEMORY, "stat"): if ( key == "hierarchical_memory_limit" or key == "hierarchical_memsw_limit" @@ -400,7 +400,7 @@ def check_limit(actualLimit): # Get list of all memory banks, either from memory assignment or from system. if not memoryAssignment: - if cgroups.CPUSET in my_cgroups: + if my_cgroups.CPUSET in my_cgroups: allMems = my_cgroups.read_available_mems() else: allMems = _get_memory_banks_listed_in_dir("/sys/devices/system/node/") From aa0b905f54949e35894c0d274bf8ccdeb8e15441 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Wed, 20 Oct 2021 23:14:59 +0200 Subject: [PATCH 027/133] cgroups: implement oom/memlimit handling --- benchexec/cgroups.py | 10 ++- benchexec/cgroupsv1.py | 166 +++++++++++++++++-------------------- benchexec/cgroupsv2.py | 19 +++-- benchexec/check_cgroups.py | 2 +- benchexec/oomhandler.py | 68 ++++++++++++++- benchexec/runexecutor.py | 39 +++------ 6 files changed, 170 insertions(+), 134 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 1c749c852..933042d3e 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -328,13 +328,17 @@ def has_tasks(self, path): pass @abstractmethod - def disable_swap(self): + def write_memory_limit(self, limit): pass @abstractmethod - def set_oom_handler(self): + def read_memory_limit(self): pass @abstractmethod - def reset_memory_limit(self): + def read_oom_count(self): + pass + + @abstractmethod + def disable_swap(self): pass diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 556032204..87b330d81 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -11,11 +11,13 @@ import pathlib import shutil import signal +import sys import tempfile import time from benchexec import util from benchexec.cgroups import Cgroups +from benchexec import systeminfo from ctypes import cdll @@ -24,11 +26,11 @@ _BYTE_FACTOR = 1000 # byte in kilobyte -CGROUP_FALLBACK_PATH = "system.slice/benchexec-cgroup.service" +_CGROUP_FALLBACK_PATH = "system.slice/benchexec-cgroup.service" """If we do not have write access to the current cgroup, attempt to use this cgroup as fallback.""" -CGROUP_NAME_PREFIX = "benchmark_" +_CGROUP_NAME_PREFIX = "benchmark_" def _find_own_cgroups(): @@ -62,7 +64,7 @@ def _parse_proc_pid_cgroup(content): yield (subsystem, path) -def kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): +def _kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): tasksFile = cgroup / "tasks" i = 0 @@ -72,23 +74,29 @@ def kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): # SIGKILL. We added this loop when killing sub-processes was not reliable # and we did not know why, but now it is reliable. for sig in [signal.SIGKILL, signal.SIGINT, signal.SIGTERM]: - with open(tasksFile, "rt") as tasks: - task = None - for task in tasks: - task = task.strip() - if i > 1: - logging.warning( - "Run has left-over process with pid %s " - "in cgroup %s, sending signal %s (try %s).", - task, - cgroup, - sig, - i, - ) - util.kill_process(int(task), sig) - - if task is None or not ensure_empty: - return # No process was hanging, exit + task = None + try: + with open(tasksFile, "rt") as tasks: + for task in tasks: + task = task.strip() + if i > 1: + logging.warning( + "Run has left-over process with pid %s " + "in cgroup %s, sending signal %s (try %s).", + task, + cgroup, + sig, + i, + ) + util.kill_process(int(task), sig) + except FileNotFoundError: + logging.warning( + "cgroup tasks file %s " "could no longer be found while killing", + tasksFile, + ) + + if task is None or not ensure_empty: + return # No process was hanging, exit # wait for the process to exit, this might take some time time.sleep(i * 0.5) @@ -177,7 +185,7 @@ def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): # (lxcfs mounts cgroups under /run/lxcfs in such a way). if os.access(mount, os.F_OK): cgroupPath = mount / my_cgroups[subsystem] - fallbackPath = mount / CGROUP_FALLBACK_PATH + fallbackPath = mount / _CGROUP_FALLBACK_PATH if ( fallback and not os.access(cgroupPath, os.W_OK) @@ -224,7 +232,7 @@ def create_fresh_child_cgroup(self, *subsystems): continue cgroup = pathlib.Path( - tempfile.mkdtemp(prefix=CGROUP_NAME_PREFIX, dir=parentCgroup) + tempfile.mkdtemp(prefix=_CGROUP_NAME_PREFIX, dir=parentCgroup) ) createdCgroupsPerSubsystem[subsystem] = cgroup createdCgroupsPerParent[parentCgroup] = cgroup @@ -270,12 +278,12 @@ def kill_all_tasks_in_cgroup_recursively(cgroup, delete): for dirpath, dirs, _files in os.walk(cgroup, topdown=False): for subCgroup in dirs: subCgroup = os.path.join(dirpath, subCgroup) - kill_all_tasks_in_cgroup(subCgroup, ensure_empty=delete) + _kill_all_tasks_in_cgroup(subCgroup, ensure_empty=delete) if delete: self._remove_cgroup(subCgroup) - kill_all_tasks_in_cgroup(cgroup, ensure_empty=delete) + _kill_all_tasks_in_cgroup(cgroup, ensure_empty=delete) # First, we go through all cgroups recursively while they are frozen and kill # all processes. This helps against fork bombs and prevents processes from @@ -352,23 +360,50 @@ def read_available_mems(self): def read_io_stat(self): blkio_bytes_file = "throttle.io_service_bytes" - if self.has_value(self.IO, blkio_bytes_file): - bytes_read = 0 - bytes_written = 0 - for blkio_line in self.get_file_lines(self.IO, blkio_bytes_file): - try: - dev_no, io_type, bytes_amount = blkio_line.split(" ") - if io_type == "Read": - bytes_read += int(bytes_amount) - elif io_type == "Write": - bytes_written += int(bytes_amount) - except ValueError: - pass # There are irrelevant lines in this file with a different structure - return bytes_read, bytes_written + bytes_read = 0 + bytes_written = 0 + for blkio_line in self.get_file_lines(self.IO, blkio_bytes_file): + try: + dev_no, io_type, bytes_amount = blkio_line.split(" ") + if io_type == "Read": + bytes_read += int(bytes_amount) + elif io_type == "Write": + bytes_written += int(bytes_amount) + except ValueError: + pass # There are irrelevant lines in this file with a different structure + return bytes_read, bytes_written def has_tasks(self, path): return os.path.getsize(path / "tasks") > 0 + def write_memory_limit(self, limit): + limit_file = "limit_in_bytes" + self.set_value(self.MEMORY, limit_file, limit) + + swap_limit_file = "memsw.limit_in_bytes" + # We need swap limit because otherwise the kernel just starts swapping + # out our process if the limit is reached. + # Some kernels might not have this feature, + # which is ok if there is actually no swap. + if not self.has_value(self.MEMORY, swap_limit_file): + if systeminfo.has_swap(): + sys.exit( + 'Kernel misses feature for accounting swap memory, but machine has swap. Please set swapaccount=1 on your kernel command line or disable swap with "sudo swapoff -a".' + ) + else: + try: + self.set_value(self.MEMORY, swap_limit_file, limit) + except OSError as e: + if e.errno == errno.ENOTSUP: + # kernel responds with operation unsupported if this is disabled + sys.exit( + 'Memory limit specified, but kernel does not allow limiting swap memory. Please set swapaccount=1 on your kernel command line or disable swap with "sudo swapoff -a".' + ) + raise e + + def read_memory_limit(self): + return int(self.get_value(self.MEMORY, "limit_in_bytes")) + def disable_swap(self): # Note that this disables swapping completely according to # https://www.kernel.org/doc/Documentation/cgroups/memory.txt @@ -376,57 +411,6 @@ def disable_swap(self): # Our process might get killed because of this. return self.set_value(self.MEMORY, "swappiness", "0") - def set_oom_handler(self): - mem_cgroup = self[self.MEMORY] - ofd = os.open(os.path.join(mem_cgroup, "memory.oom_control"), os.O_WRONLY) - try: - # Important to use CLOEXEC, otherwise the benchmarked tool inherits - # the file descriptor. - efd = _libc.eventfd(0, _EFD_CLOEXEC) - - try: - util.write_file(f"{efd} {ofd}", mem_cgroup, "cgroup.event_control") - - # If everything worked, disable Kernel-side process killing. - # This is not allowed if memory.use_hierarchy is enabled, - # but we don't care. - try: - os.write(ofd, b"1") - except OSError as e: - logging.debug( - "Failed to disable kernel-side OOM killer: error %s (%s)", - e.errno, - e.strerror, - ) - except OSError as e: - os.close(efd) - raise e - finally: - os.close(ofd) - - return efd - - def reset_memory_limit(self): - for limitFile in ("memory.memsw.limit_in_bytes", "memory.limit_in_bytes"): - if self.has_value(self.MEMORY, limitFile): - try: - # Write a high value (1 PB) as the limit - self.set_value( - self.MEMORY, - limitFile, - str( - 1 - * _BYTE_FACTOR - * _BYTE_FACTOR - * _BYTE_FACTOR - * _BYTE_FACTOR - * _BYTE_FACTOR - ), - ) - except OSError as e: - logging.warning( - "Failed to increase %s after OOM: error %s (%s).", - limitFile, - e.errno, - e.strerror, - ) + def read_oom_count(self): + # not supported in v1, see oomhandler and memory_used > memlimit impl + return None diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 676cbe365..eb04cfb39 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -247,7 +247,6 @@ def read_available_mems(self): def read_io_stat(self): bytes_read = 0 bytes_written = 0 - logging.debug(f"{list(self.get_file_lines(self.IO, 'stat'))}") for io_line in self.get_file_lines(self.IO, "stat"): dev_no, *stats = io_line.split(" ") stats_map = {s[0]: s[1] for s in (s.split("=") for s in stats)} @@ -258,11 +257,19 @@ def read_io_stat(self): def has_tasks(self, path): return os.path.getsize(path / "cgroup.procs") > 0 + def write_memory_limit(self, limit): + self.set_value(self.MEMORY, "max", limit) + + def read_memory_limit(self): + return int(self.get_value(self.MEMORY, "max")) + def disable_swap(self): - return self.set_value(self.MEMORY, "swap.max", "0") + self.set_value(self.MEMORY, "swap.max", "0") - def set_oom_handler(self): - logging.warn("OOM handling for cgroups 2 not implemented yet") + def read_oom_count(self): + for line in self.get_file_lines(self.MEMORY, "events"): + k, v = line.split(" ") + if k == "oom_kill": + return int(v) - def reset_memory_limit(self): - logging.warn("OOM handling for cgroups 2 not implemented yet") + return 0 diff --git a/benchexec/check_cgroups.py b/benchexec/check_cgroups.py index fabb7bca1..e1b675db8 100644 --- a/benchexec/check_cgroups.py +++ b/benchexec/check_cgroups.py @@ -46,7 +46,7 @@ def check_cgroup_availability(wait=1): tmp.name, memlimit=1024 * 1024, # set memlimit to force check for swapaccount # set cores and memory_nodes to force usage of CPUSET - cores=util.parse_int_list(my_cgroups.get_value(my_cgroups.CPUSET, "cpus")), + cores=my_cgroups.read_available_cpus(), memory_nodes=my_cgroups.read_available_mems(), ) lines = [] diff --git a/benchexec/oomhandler.py b/benchexec/oomhandler.py index d0b1791e3..6bac8559a 100644 --- a/benchexec/oomhandler.py +++ b/benchexec/oomhandler.py @@ -11,6 +11,13 @@ from benchexec import util +from ctypes import cdll + +_libc = cdll.LoadLibrary("libc.so.6") +_EFD_CLOEXEC = 0x80000 # from : mark eventfd as close-on-exec + +_BYTE_FACTOR = 1000 # byte in kilobyte + class KillProcessOnOomThread(threading.Thread): """ @@ -45,7 +52,32 @@ def __init__(self, cgroups, pid_to_kill, callbackFn=lambda reason: None): self._cgroups = cgroups self._callback = callbackFn - self._efd = self._cgroups.set_oom_handler() + cgroup = cgroups[cgroups.MEMORY] # for raw access + ofd = os.open(os.path.join(cgroup, "memory.oom_control"), os.O_WRONLY) + try: + # Important to use CLOEXEC, otherwise the benchmarked tool inherits + # the file descriptor. + self._efd = _libc.eventfd(0, _EFD_CLOEXEC) + + try: + util.write_file(f"{self._efd} {ofd}", cgroup, "cgroup.event_control") + + # If everything worked, disable Kernel-side process killing. + # This is not allowed if memory.use_hierarchy is enabled, + # but we don't care. + try: + os.write(ofd, b"1") + except OSError as e: + logging.debug( + "Failed to disable kernel-side OOM killer: error %s (%s)", + e.errno, + e.strerror, + ) + except OSError as e: + os.close(self._efd) + raise e + finally: + os.close(ofd) def run(self): # os.close gets called in finally, @@ -69,16 +101,44 @@ def run(self): self._pid_to_kill, ) util.kill_process(self._pid_to_kill) - # Also kill all children of subprocesses directly. - self._cgroups.kill_all_tasks() + with open( + os.path.join(self._cgroups[self._cgroups.MEMORY], "tasks"), "rt" + ) as tasks: + for task in tasks: + util.kill_process(int(task)) # We now need to increase the memory limit of this cgroup # to give the process a chance to terminate - self._cgroups.reset_memory_limit() + self._reset_memory_limit("memory.memsw.limit_in_bytes") + self._reset_memory_limit("memory.limit_in_bytes") finally: close(self._efd) + def _reset_memory_limit(self, limitFile): + if self._cgroups.has_value(self._cgroups.MEMORY, limitFile): + try: + # Write a high value (1 PB) as the limit + self._cgroups.set_value( + self._cgroups.MEMORY, + limitFile, + str( + 1 + * _BYTE_FACTOR + * _BYTE_FACTOR + * _BYTE_FACTOR + * _BYTE_FACTOR + * _BYTE_FACTOR + ), + ) + except OSError as e: + logging.warning( + "Failed to increase %s after OOM: error %s (%s).", + limitFile, + e.errno, + e.strerror, + ) + def cancel(self): self._finished.set() diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index 90850e5ed..a89933f37 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -8,7 +8,6 @@ import argparse import collections import datetime -import errno import logging import multiprocessing import os @@ -463,31 +462,9 @@ def _setup_cgroups(self, my_cpus, memlimit, memory_nodes, cgroup_values): # Setup memory limit if memlimit is not None: - limit = "limit_in_bytes" - cgroups.set_value(cgroups.MEMORY, limit, memlimit) - - swap_limit = "memsw.limit_in_bytes" - # We need swap limit because otherwise the kernel just starts swapping - # out our process if the limit is reached. - # Some kernels might not have this feature, - # which is ok if there is actually no swap. - if not cgroups.has_value(cgroups.MEMORY, swap_limit): - if systeminfo.has_swap(): - sys.exit( - 'Kernel misses feature for accounting swap memory, but machine has swap. Please set swapaccount=1 on your kernel command line or disable swap with "sudo swapoff -a".' - ) - else: - try: - cgroups.set_value(cgroups.MEMORY, swap_limit, memlimit) - except OSError as e: - if e.errno == errno.ENOTSUP: - # kernel responds with operation unsupported if this is disabled - sys.exit( - 'Memory limit specified, but kernel does not allow limiting swap memory. Please set swapaccount=1 on your kernel command line or disable swap with "sudo swapoff -a".' - ) - raise e + cgroups.write_memory_limit(memlimit) - memlimit = cgroups.get_value(cgroups.MEMORY, limit) + memlimit = cgroups.read_memory_limit() logging.debug("Effective memory limit is %s bytes.", memlimit) if cgroups.MEMORY in cgroups: @@ -574,11 +551,11 @@ def _setup_cgroup_time_limit( return timelimitThread return None - def _setup_cgroup_memory_limit(self, memlimit, cgroups, pid_to_kill): + def _setup_cgroup_memory_limit_thread(self, memlimit, cgroups, pid_to_kill): """Start memory-limit handler. @return None or the memory-limit handler for calling cancel() """ - if memlimit is not None: + if memlimit is not None and cgroups.version == 1: try: oomThread = oomhandler.KillProcessOnOomThread( cgroups=cgroups, @@ -936,7 +913,7 @@ def preSubprocess(): timelimitThread = self._setup_cgroup_time_limit( hardtimelimit, softtimelimit, walltimelimit, cgroups, cores, pid ) - oomThread = self._setup_cgroup_memory_limit(memlimit, cgroups, pid) + oomThread = self._setup_cgroup_memory_limit_thread(memlimit, cgroups, pid) file_hierarchy_limit_thread = self._setup_file_hierarchy_limit( files_count_limit, files_size_limit, temp_dir, cgroups, pid ) @@ -1015,7 +992,7 @@ def preSubprocess(): } if self._termination_reason: result["terminationreason"] = self._termination_reason - elif memlimit and "memory" in result and result["memory"] >= memlimit: + elif result.get("oom") or (memlimit and result.get("memory", 0) >= memlimit): # The kernel does not always issue OOM notifications and thus the OOMHandler # does not always run even in case of OOM. We detect this there and report OOM. result["terminationreason"] = "memory" @@ -1080,6 +1057,10 @@ def _get_cgroup_measurements(self, cgroups, ru_child, result): else: result["memory"] = max_mem_usage + oom_count = cgroups.read_oom_count() + if oom_count: + result["oom"] = oom_count + if cgroups.IO in cgroups: result["blkio-read"], result["blkio-write"] = cgroups.read_io_stat() From 1245dc0daa753d1a26f73337bf59939f9397cdaf Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Thu, 21 Oct 2021 01:23:16 +0200 Subject: [PATCH 028/133] cgroups: CI fixes --- benchexec/cgroups.py | 4 ++++ benchexec/check_cgroups.py | 1 - benchexec/runexecutor.py | 1 + benchexec/test_integration/__init__.py | 2 +- 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 933042d3e..fc6bf3a50 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -298,6 +298,10 @@ def known_subsystems(self): def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): pass + @abstractmethod + def add_task(self, pid): + pass + @abstractmethod def create_fresh_child_cgroup(self, subsystem): pass diff --git a/benchexec/check_cgroups.py b/benchexec/check_cgroups.py index e1b675db8..0d9a9f098 100644 --- a/benchexec/check_cgroups.py +++ b/benchexec/check_cgroups.py @@ -13,7 +13,6 @@ from benchexec.cgroups import Cgroups from benchexec.runexecutor import RunExecutor -from benchexec import util sys.dont_write_bytecode = True # prevent creation of .pyc files diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index a89933f37..5ec42497b 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -360,6 +360,7 @@ def _init_cgroups(self): if self.cgroups.MEMORY not in self.cgroups: logging.warning("Cannot measure memory consumption without memory cgroup.") else: + # FIXME if systeminfo.has_swap() and ( not self.cgroups.has_value( self.cgroups.MEMORY, "memsw.max_usage_in_bytes" diff --git a/benchexec/test_integration/__init__.py b/benchexec/test_integration/__init__.py index e9a4875ac..75f8993e0 100644 --- a/benchexec/test_integration/__init__.py +++ b/benchexec/test_integration/__init__.py @@ -240,7 +240,7 @@ def test_simple_set_name(self): ) def test_simple_parallel(self): - self.run_benchexec_and_compare_expected_files("--numOfThreads", "12") + self.run_benchexec_and_compare_expected_files("--numOfThreads", "4") def test_wildcard_tasks_1(self): self.run_benchexec_and_compare_expected_files( From c815670e711874a0c873035ea09979acafa5c1fe Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Thu, 28 Oct 2021 02:10:30 +0200 Subject: [PATCH 029/133] cgroups: implement non-fallback code path for v2 This allows the following to be run without any need for adminitrative privileges. ``` $ systemd-run --user --scope -p Delegate=true runexec --read-only-dir / date ``` This also moves the main runexec process into a child cgroup due to it not being possible to delegate controllers if a process exists in the parent. This will make it possible to report benchexec system usage. --- benchexec/cgroups.py | 15 +++++++--- benchexec/cgroupsv1.py | 5 +++- benchexec/cgroupsv2.py | 54 +++++++++++++++++++++++++++------- benchexec/containerexecutor.py | 4 ++- benchexec/runexecutor.py | 18 +++++++----- 5 files changed, 71 insertions(+), 25 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index fc6bf3a50..59d44847e 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -85,13 +85,16 @@ def from_system(cgroup_procinfo=None, fallback=True): elif version == CGROUPS_V2: from .cgroupsv2 import CgroupsV2 - return CgroupsV2(cgroup_procinfo=cgroup_procinfo, fallback=fallback) + cgroups = CgroupsV2(cgroup_procinfo=cgroup_procinfo, fallback=fallback) + cgroups._move_to_child() + + return cgroups raise BenchExecException("Could not detect Cgroup Version") def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): if subsystems is None: - self.subsystems = self._supported_subsystems() + self.subsystems = self._supported_subsystems(cgroup_procinfo, fallback) else: self.subsystems = subsystems @@ -208,7 +211,7 @@ def require_subsystem(self, subsystem, log_method=logging.warning): return False try: - test_cgroup = self.create_fresh_child_cgroup(subsystem) + test_cgroup = self.create_fresh_child_cgroup([subsystem]) test_cgroup.remove() except OSError as e: log_method( @@ -298,12 +301,16 @@ def known_subsystems(self): def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): pass + @abstractmethod + def _move_to_child(self): + pass + @abstractmethod def add_task(self, pid): pass @abstractmethod - def create_fresh_child_cgroup(self, subsystem): + def create_fresh_child_cgroup(self, subsystems, move_to_child=False): pass @abstractmethod diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 87b330d81..c3879acad 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -214,7 +214,7 @@ def _find_cgroup_mounts(self): except OSError: logging.exception("Cannot read /proc/mounts") - def create_fresh_child_cgroup(self, *subsystems): + def create_fresh_child_cgroup(self, subsystems): """ Create child cgroups of the current cgroup for at least the given subsystems. @return: A Cgroup instance representing the new child cgroup(s). @@ -251,6 +251,9 @@ def copy_parent_to_child(name): return CgroupsV1(createdCgroupsPerSubsystem) + def _move_to_child(self): + logging.debug("moving to child currently not supported for cgroups v1") + def add_task(self, pid): """ Add a process to the cgroups represented by this instance. diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index eb04cfb39..3b931fd94 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -12,7 +12,7 @@ import tempfile import time -from benchexec import util +from benchexec import util, BenchExecException from benchexec.cgroups import Cgroups @@ -59,8 +59,8 @@ def _parse_proc_pid_cgroup(cgroup_file): @return: a generator of tuples """ mountpoint = _find_cgroup_mount() - own_cgroup = cgroup_file.readline().strip().split(":") - path = mountpoint / own_cgroup[2] + own_cgroup = cgroup_file.readline().strip().split(":")[2][1:] + path = mountpoint / own_cgroup return path @@ -135,7 +135,7 @@ def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): else: cgroup_path = _parse_proc_pid_cgroup(cgroup_procinfo) - if fallback: + if not os.access(cgroup_path / "cgroup.subtree_control", os.W_OK) and fallback: mount = _find_cgroup_mount() fallback_path = mount / CGROUP_FALLBACK_PATH cgroup_path = fallback_path @@ -155,18 +155,50 @@ def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): return {k: cgroup_path for k in subsystems} - def create_fresh_child_cgroup(self, *subsystems): + def create_fresh_child_cgroup(self, subsystems, move_to_child=False): """ Create child cgroups of the current cgroup for at least the given subsystems. @return: A Cgroup instance representing the new child cgroup(s). """ - assert set(subsystems).issubset(self.subsystems.keys()) - cgroup_path = pathlib.Path( - tempfile.mkdtemp(prefix=CGROUP_NAME_PREFIX, dir=self.path) - ) + subsystems = set(subsystems) + assert subsystems.issubset(self.subsystems.keys()) + + tasks = set(util.read_file(self.path / "cgroup.procs").split()) + if tasks and not move_to_child: + raise BenchExecException( + "Cannot create cgroups v2 child on non-empty parent without moving tasks" + ) + + prefix = "runexec_main_" if move_to_child else CGROUP_NAME_PREFIX + child_path = pathlib.Path(tempfile.mkdtemp(prefix=prefix, dir=self.path)) + + if move_to_child and tasks: + prev_delegated_controllers = set( + util.read_file(self.path / "cgroup.subtree_control").split() + ) + for c in prev_delegated_controllers: + util.write_file(f"-{c}", self.path / "cgroup.subtree_control") + + for t in tasks: + try: + util.write_file(t, child_path / "cgroup.procs") + except OSError as e: + logging.warn(f"Could not move pid {t} to {child_path}: {e}") + + for c in prev_delegated_controllers: + util.write_file(f"+{c}", self.path / "cgroup.subtree_control") + + controllers = set(util.read_file(self.path / "cgroup.controllers").split()) + controllers_to_delegate = controllers & subsystems + + for c in controllers_to_delegate: + util.write_file(f"+{c}", self.path / "cgroup.subtree_control") + + return CgroupsV2({c: child_path for c in controllers_to_delegate}) - # FIXME do something with subsystems, also subtree_control? - return CgroupsV2({c: cgroup_path for c in self.subsystems}) + def _move_to_child(self): + logging.debug("Moving runexec main process to child") + self.create_fresh_child_cgroup(self.subsystems.keys(), move_to_child=True) def add_task(self, pid): """ diff --git a/benchexec/containerexecutor.py b/benchexec/containerexecutor.py index 26fe98136..a23d2c133 100644 --- a/benchexec/containerexecutor.py +++ b/benchexec/containerexecutor.py @@ -429,6 +429,8 @@ def execute_run( logging.debug("Starting process.") + cgroups = Cgroups.from_system() + try: pid, result_fn = self._start_execution( args=args, @@ -439,7 +441,7 @@ def execute_run( root_dir=rootDir, cwd=workingDir, temp_dir=temp_dir, - cgroups=Cgroups(), + cgroups=cgroups, output_dir=output_dir, result_files_patterns=result_files_patterns, child_setup_fn=util.dummy_fn, diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index 5ec42497b..ab8277b18 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -348,13 +348,15 @@ def _init_cgroups(self): if self.cgroups.CPU not in self.cgroups and self.cgroups.version == 1: logging.warning("Cannot measure CPU time without cpuacct cgroup.") - self.cgroups.require_subsystem(self.cgroups.FREEZE) - if self.cgroups.FREEZE not in self.cgroups and not self._use_namespaces: - critical_cgroups.add(self.cgroups.FREEZE) - logging.error( - "Cannot reliably kill sub-processes without freezer cgroup " - "or container mode. Please enable at least one of them." - ) + # only a real subsystem in v1 + if self.cgroups.version == 1: + self.cgroups.require_subsystem(self.cgroups.FREEZE) + if self.cgroups.FREEZE not in self.cgroups and not self._use_namespaces: + critical_cgroups.add(self.cgroups.FREEZE) + logging.error( + "Cannot reliably kill sub-processes without freezer cgroup " + "or container mode. Please enable at least one of them." + ) self.cgroups.require_subsystem(self.cgroups.MEMORY) if self.cgroups.MEMORY not in self.cgroups: @@ -425,7 +427,7 @@ def _setup_cgroups(self, my_cpus, memlimit, memory_nodes, cgroup_values): subsystems.append(self.cgroups.CPUSET) subsystems = [s for s in subsystems if s in self.cgroups] - cgroups = self.cgroups.create_fresh_child_cgroup(*subsystems) + cgroups = self.cgroups.create_fresh_child_cgroup(subsystems) logging.debug("Created cgroups %s.", cgroups) From db9e6c09713769cfa56d820cc0ef5afe0b77a228 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Tue, 2 Nov 2021 16:28:44 +0100 Subject: [PATCH 030/133] cgroups: fix file/path handling edge cases --- benchexec/cgroupsv2.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 3b931fd94..9976c6dc6 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -59,8 +59,9 @@ def _parse_proc_pid_cgroup(cgroup_file): @return: a generator of tuples """ mountpoint = _find_cgroup_mount() - own_cgroup = cgroup_file.readline().strip().split(":")[2][1:] - path = mountpoint / own_cgroup + for line in cgroup_file: + own_cgroup = line.strip().split(":")[2][1:] + path = mountpoint / own_cgroup return path @@ -224,7 +225,7 @@ def kill_all_tasks(self): def kill_all_tasks_in_cgroup_recursively(cgroup, delete): for dirpath, dirs, _files in os.walk(cgroup, topdown=False): for subCgroup in dirs: - subCgroup = os.path.join(dirpath, subCgroup) + subCgroup = pathlib.Path(dirpath) / subCgroup kill_all_tasks_in_cgroup(subCgroup, ensure_empty=delete) if delete: From ffd05e5d49474b9841c30ee812f40b859c35db32 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Thu, 25 Nov 2021 19:20:26 +0100 Subject: [PATCH 031/133] cgroups: use pystemd with cgroupsv2 This allows basic use of runexec/benchexec without user intervention for cgroups permissions or having to call via systemd-run --- benchexec/cgroups.py | 4 ++-- benchexec/cgroupsv1.py | 4 ++-- benchexec/cgroupsv2.py | 38 ++++++++++++++++++++++++++++++++++---- pyproject.toml | 1 + 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 59d44847e..97cf18914 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -86,7 +86,7 @@ def from_system(cgroup_procinfo=None, fallback=True): from .cgroupsv2 import CgroupsV2 cgroups = CgroupsV2(cgroup_procinfo=cgroup_procinfo, fallback=fallback) - cgroups._move_to_child() + cgroups._move_to_scope() return cgroups @@ -302,7 +302,7 @@ def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): pass @abstractmethod - def _move_to_child(self): + def _move_to_scope(self): pass @abstractmethod diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index c3879acad..5570d1ab8 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -251,8 +251,8 @@ def copy_parent_to_child(name): return CgroupsV1(createdCgroupsPerSubsystem) - def _move_to_child(self): - logging.debug("moving to child currently not supported for cgroups v1") + def _move_to_scope(self): + logging.debug("moving to scope currently not supported for cgroups v1") def add_task(self, pid): """ diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 9976c6dc6..d16b399ea 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -8,10 +8,13 @@ import logging import os import pathlib +import random import signal +import string import tempfile import time + from benchexec import util, BenchExecException from benchexec.cgroups import Cgroups @@ -111,7 +114,7 @@ def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): super(CgroupsV2, self).__init__(subsystems, cgroup_procinfo, fallback) - self.path = next(iter(self.subsystems.values())) + self.path = next(iter(self.subsystems.values())) if len(self.subsystems) else None @property def known_subsystems(self): @@ -195,10 +198,37 @@ def create_fresh_child_cgroup(self, subsystems, move_to_child=False): for c in controllers_to_delegate: util.write_file(f"+{c}", self.path / "cgroup.subtree_control") - return CgroupsV2({c: child_path for c in controllers_to_delegate}) + # basic cpu controller support without being enabled + child_subsystems = controllers_to_delegate | {"cpu"} + return CgroupsV2({c: child_path for c in child_subsystems}) + + def _move_to_scope(self): + logging.debug("Moving runexec main process to scope") + + try: + from pystemd.dbuslib import DBus + from pystemd.systemd1 import Manager, Unit + + with DBus(user_mode=True) as bus, Manager(bus=bus) as manager: + unit_params = { + # workaround for not declared parameters, remove in the future + b"_custom": (b"PIDs", b"au", [os.getpid()]), + b"Delegate": True, + b"CPUAccounting": True + } + + random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=8)) + name = f"benchexec_{random_suffix}.scope".encode() + manager.Manager.StartTransientUnit(name, b"fail", unit_params) + + with Unit(name, bus=bus) as unit: + self.subsystems = self._supported_subsystems() + self.paths = set(self.subsystems.values()) + self.path = next(iter(self.subsystems.values())) + logging.debug(f"moved to scope {name}, subsystems: {self.subsystems}") + except ImportError: + logging.warn("pystemd could not be imported") - def _move_to_child(self): - logging.debug("Moving runexec main process to child") self.create_fresh_child_cgroup(self.subsystems.keys(), move_to_child=True) def add_task(self, pid): diff --git a/pyproject.toml b/pyproject.toml index 2e7fd35a9..64f010f1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ requires = [ # Require versions that support our license files 'setuptools >= 42.0.0', 'wheel >= 0.32.0', + 'pystemd >= 0.7.0', ] build-backend = 'setuptools.build_meta' From bab92a56bee92558d8b701f021fd23a706f6e569 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Thu, 2 Dec 2021 18:06:13 +0100 Subject: [PATCH 032/133] cgroups: only allow execution in proper cgroup Check for other tasks in cgroup when moving, indicating that this is not run in a cgroup created for benchexec (either with pystemd, systemd-run or manually exec'ed in a new cgroup) Also catches a pystemd error if no user dbus is available. --- benchexec/cgroupsv2.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index d16b399ea..cd52186bd 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -114,7 +114,9 @@ def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): super(CgroupsV2, self).__init__(subsystems, cgroup_procinfo, fallback) - self.path = next(iter(self.subsystems.values())) if len(self.subsystems) else None + self.path = ( + next(iter(self.subsystems.values())) if len(self.subsystems) else None + ) @property def known_subsystems(self): @@ -173,6 +175,14 @@ def create_fresh_child_cgroup(self, subsystems, move_to_child=False): "Cannot create cgroups v2 child on non-empty parent without moving tasks" ) + if len(tasks) > 1 and move_to_child: + raise BenchExecException( + "runexec must be the only running process in its cgroup. Either install pystemd " + "for runexec to handle this itself, prefix the command with `systemd-run --user --scope -p Delegate=yes` " + "or otherwise prepare the cgroup hierarchy to make sure of this and the subtree being " + "writable by the executing user." + ) + prefix = "runexec_main_" if move_to_child else CGROUP_NAME_PREFIX child_path = pathlib.Path(tempfile.mkdtemp(prefix=prefix, dir=self.path)) @@ -207,6 +217,7 @@ def _move_to_scope(self): try: from pystemd.dbuslib import DBus + from pystemd.dbusexc import DBusFileNotFoundError from pystemd.systemd1 import Manager, Unit with DBus(user_mode=True) as bus, Manager(bus=bus) as manager: @@ -214,20 +225,26 @@ def _move_to_scope(self): # workaround for not declared parameters, remove in the future b"_custom": (b"PIDs", b"au", [os.getpid()]), b"Delegate": True, - b"CPUAccounting": True + b"CPUAccounting": True, } - random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=8)) + random_suffix = "".join( + random.choices(string.ascii_letters + string.digits, k=8) + ) name = f"benchexec_{random_suffix}.scope".encode() manager.Manager.StartTransientUnit(name, b"fail", unit_params) - with Unit(name, bus=bus) as unit: + with Unit(name, bus=bus): self.subsystems = self._supported_subsystems() self.paths = set(self.subsystems.values()) self.path = next(iter(self.subsystems.values())) - logging.debug(f"moved to scope {name}, subsystems: {self.subsystems}") + logging.debug( + f"moved to scope {name}, subsystems: {self.subsystems}" + ) except ImportError: logging.warn("pystemd could not be imported") + except DBusFileNotFoundError: + logging.warn("no user DBus found, not using pystemd") self.create_fresh_child_cgroup(self.subsystems.keys(), move_to_child=True) From 8ea677725aa45432981418e252292cf537a87f66 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Sat, 4 Dec 2021 16:07:55 +0100 Subject: [PATCH 033/133] cgroups: add some->total pressure metrics for cgroups v2 These metrics can help identify bottlenecks of CPU/IO/memory --- benchexec/cgroups.py | 12 ++++++++++++ benchexec/cgroupsv1.py | 15 +++++++++++++++ benchexec/cgroupsv2.py | 21 +++++++++++++++++++++ benchexec/runexecutor.py | 15 +++++++++++++++ 4 files changed, 63 insertions(+) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 97cf18914..805332b71 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -317,6 +317,18 @@ def create_fresh_child_cgroup(self, subsystems, move_to_child=False): def read_max_mem_usage(self): pass + @abstractmethod + def read_mem_pressure(self): + pass + + @abstractmethod + def read_cpu_pressure(self): + pass + + @abstractmethod + def read_io_pressure(self): + pass + @abstractmethod def read_usage_per_cpu(self): pass diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 5570d1ab8..05c824b7c 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -338,6 +338,21 @@ def read_max_mem_usage(self): return None + def read_mem_pressure(self): + logging.debug("Pressure metrics not supported in cgroups v1") + + return None + + def read_cpu_pressure(self): + logging.debug("Pressure metrics not supported in cgroups v1") + + return None + + def read_io_pressure(self): + logging.debug("Pressure metrics not supported in cgroups v1") + + return None + def read_usage_per_cpu(self): usage = {} for (core, coretime) in enumerate( diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index cd52186bd..ffbf24c49 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -313,6 +313,27 @@ def read_max_mem_usage(self): return + def read_mem_pressure(self): + mem_stats = dict(self.get_key_value_pairs(self.MEMORY, "pressure")) + mem_some_stats = mem_stats["some"].split(' ') + stats_map = {s[0]: s[1] for s in (s.split("=") for s in mem_some_stats)} + + return float(stats_map["total"]) / 1_000_000 + + def read_cpu_pressure(self): + cpu_stats = dict(self.get_key_value_pairs(self.CPU, "pressure")) + cpu_some_stats = cpu_stats["some"].split(' ') + stats_map = {s[0]: s[1] for s in (s.split("=") for s in cpu_some_stats)} + + return float(stats_map["total"]) / 1_000_000 + + def read_io_pressure(self): + io_stats = dict(self.get_key_value_pairs(self.IO, "pressure")) + io_some_stats = io_stats["some"].split(' ') + stats_map = {s[0]: s[1] for s in (s.split("=") for s in io_some_stats)} + + return float(stats_map["total"]) / 1_000_000 + def read_usage_per_cpu(self): logging.debug("Usage per CPU not supported in cgroups v2") diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index ab8277b18..f9e1232a1 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -295,6 +295,9 @@ def print_optional_result(key, unit="", format_fn=str): print_optional_result("memory", "B") print_optional_result("blkio-read", "B") print_optional_result("blkio-write", "B") + print_optional_result("total-cpu-pressure-some", "s") + print_optional_result("total-io-pressure-some", "s") + print_optional_result("total-memory-pressure-some", "s") energy = intel_cpu_energy.format_energy_results(result.get("cpuenergy")) for energy_key, energy_value in energy.items(): print(f"{energy_key}={energy_value}J") @@ -1051,6 +1054,10 @@ def _get_cgroup_measurements(self, cgroups, ru_child, result): for core, coretime in cgroups.read_usage_per_cpu().items(): result[f"cputime-cpu{core}"] = coretime + cpu_pressure = cgroups.read_cpu_pressure() + if cpu_pressure is not None: + result["total-cpu-pressure-some"] = cpu_pressure + if cgroups.MEMORY in cgroups: max_mem_usage = cgroups.read_max_mem_usage() if max_mem_usage is None: @@ -1064,9 +1071,17 @@ def _get_cgroup_measurements(self, cgroups, ru_child, result): if oom_count: result["oom"] = oom_count + mem_pressure = cgroups.read_mem_pressure() + if mem_pressure is not None: + result["total-memory-pressure-some"] = mem_pressure + if cgroups.IO in cgroups: result["blkio-read"], result["blkio-write"] = cgroups.read_io_stat() + io_pressure = cgroups.read_io_pressure() + if io_pressure is not None: + result["total-io-pressure-some"] = io_pressure + logging.debug( "Resource usage of run: walltime=%s, cputime=%s, cgroup-cputime=%s, memory=%s", result.get("walltime"), From 3469607cf5016621bb90b23eff3590f23ccdfc47 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Sat, 4 Dec 2021 16:10:05 +0100 Subject: [PATCH 034/133] cgroups: partially revert cgroup-availability checking This only worked if runexec was started directly --- benchexec/cgroupsv2.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index ffbf24c49..778a715fd 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -175,13 +175,14 @@ def create_fresh_child_cgroup(self, subsystems, move_to_child=False): "Cannot create cgroups v2 child on non-empty parent without moving tasks" ) - if len(tasks) > 1 and move_to_child: - raise BenchExecException( - "runexec must be the only running process in its cgroup. Either install pystemd " - "for runexec to handle this itself, prefix the command with `systemd-run --user --scope -p Delegate=yes` " - "or otherwise prepare the cgroup hierarchy to make sure of this and the subtree being " - "writable by the executing user." - ) + #FIXME + #if len(tasks) > 1 and move_to_child: + # raise BenchExecException( + # "runexec must be the only running process in its cgroup. Either install pystemd " + # "for runexec to handle this itself, prefix the command with `systemd-run --user --scope -p Delegate=yes` " + # "or otherwise prepare the cgroup hierarchy to make sure of this and the subtree being " + # "writable by the executing user." + # ) prefix = "runexec_main_" if move_to_child else CGROUP_NAME_PREFIX child_path = pathlib.Path(tempfile.mkdtemp(prefix=prefix, dir=self.path)) From 5db0ec864eb136e814c38bdd5bfaea56487ecdff Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Sat, 4 Dec 2021 16:12:58 +0100 Subject: [PATCH 035/133] cgroups: cleanup --- benchexec/cgroupsv1.py | 6 ------ benchexec/cgroupsv2.py | 22 +++++++++++----------- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 05c824b7c..dfd763cf2 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -341,18 +341,12 @@ def read_max_mem_usage(self): def read_mem_pressure(self): logging.debug("Pressure metrics not supported in cgroups v1") - return None - def read_cpu_pressure(self): logging.debug("Pressure metrics not supported in cgroups v1") - return None - def read_io_pressure(self): logging.debug("Pressure metrics not supported in cgroups v1") - return None - def read_usage_per_cpu(self): usage = {} for (core, coretime) in enumerate( diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 778a715fd..8fe8be1c5 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -175,14 +175,14 @@ def create_fresh_child_cgroup(self, subsystems, move_to_child=False): "Cannot create cgroups v2 child on non-empty parent without moving tasks" ) - #FIXME - #if len(tasks) > 1 and move_to_child: - # raise BenchExecException( - # "runexec must be the only running process in its cgroup. Either install pystemd " - # "for runexec to handle this itself, prefix the command with `systemd-run --user --scope -p Delegate=yes` " - # "or otherwise prepare the cgroup hierarchy to make sure of this and the subtree being " - # "writable by the executing user." - # ) + # FIXME + # if len(tasks) > 1 and move_to_child: + # raise BenchExecException( + # "runexec must be the only running process in its cgroup. Either install pystemd " + # "for runexec to handle this itself, prefix the command with `systemd-run --user --scope -p Delegate=yes` " + # "or otherwise prepare the cgroup hierarchy to make sure of this and the subtree being " + # "writable by the executing user." + # ) prefix = "runexec_main_" if move_to_child else CGROUP_NAME_PREFIX child_path = pathlib.Path(tempfile.mkdtemp(prefix=prefix, dir=self.path)) @@ -316,21 +316,21 @@ def read_max_mem_usage(self): def read_mem_pressure(self): mem_stats = dict(self.get_key_value_pairs(self.MEMORY, "pressure")) - mem_some_stats = mem_stats["some"].split(' ') + mem_some_stats = mem_stats["some"].split(" ") stats_map = {s[0]: s[1] for s in (s.split("=") for s in mem_some_stats)} return float(stats_map["total"]) / 1_000_000 def read_cpu_pressure(self): cpu_stats = dict(self.get_key_value_pairs(self.CPU, "pressure")) - cpu_some_stats = cpu_stats["some"].split(' ') + cpu_some_stats = cpu_stats["some"].split(" ") stats_map = {s[0]: s[1] for s in (s.split("=") for s in cpu_some_stats)} return float(stats_map["total"]) / 1_000_000 def read_io_pressure(self): io_stats = dict(self.get_key_value_pairs(self.IO, "pressure")) - io_some_stats = io_stats["some"].split(' ') + io_some_stats = io_stats["some"].split(" ") stats_map = {s[0]: s[1] for s in (s.split("=") for s in io_some_stats)} return float(stats_map["total"]) / 1_000_000 From 5b4f30911f87604089c9356b68608d81b4e284be Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Tue, 11 Jan 2022 15:50:11 +0100 Subject: [PATCH 036/133] move benchexec's process group to scope not runexec --- benchexec/cgroups.py | 9 ++++++--- benchexec/cgroupsv1.py | 2 +- benchexec/cgroupsv2.py | 22 +++++++++++----------- benchexec/localexecution.py | 3 ++- benchexec/util.py | 13 +++++++++++++ 5 files changed, 33 insertions(+), 16 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 805332b71..f292697f1 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -76,7 +76,7 @@ def _get_cgroup_version(): class Cgroups(ABC): @staticmethod - def from_system(cgroup_procinfo=None, fallback=True): + def from_system(cgroup_procinfo=None, fallback=True, initial_cgroup=False): version = _get_cgroup_version() if version == CGROUPS_V1: from .cgroupsv1 import CgroupsV1 @@ -86,7 +86,10 @@ def from_system(cgroup_procinfo=None, fallback=True): from .cgroupsv2 import CgroupsV2 cgroups = CgroupsV2(cgroup_procinfo=cgroup_procinfo, fallback=fallback) - cgroups._move_to_scope() + if not initial_cgroup: + cgroups.create_fresh_child_cgroup( + cgroups.subsystems.keys(), move_to_child=True + ) return cgroups @@ -302,7 +305,7 @@ def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): pass @abstractmethod - def _move_to_scope(self): + def move_to_scope(self): pass @abstractmethod diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index dfd763cf2..57c33f8b3 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -251,7 +251,7 @@ def copy_parent_to_child(name): return CgroupsV1(createdCgroupsPerSubsystem) - def _move_to_scope(self): + def move_to_scope(self): logging.debug("moving to scope currently not supported for cgroups v1") def add_task(self, pid): diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 8fe8be1c5..0caaf6569 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -175,14 +175,14 @@ def create_fresh_child_cgroup(self, subsystems, move_to_child=False): "Cannot create cgroups v2 child on non-empty parent without moving tasks" ) - # FIXME - # if len(tasks) > 1 and move_to_child: - # raise BenchExecException( - # "runexec must be the only running process in its cgroup. Either install pystemd " - # "for runexec to handle this itself, prefix the command with `systemd-run --user --scope -p Delegate=yes` " - # "or otherwise prepare the cgroup hierarchy to make sure of this and the subtree being " - # "writable by the executing user." - # ) + allowed_pids = {str(p) for p in util.get_pgrp_pids(os.getpgid(0))} + if len(tasks) > 1 and not tasks <= allowed_pids and move_to_child: + raise BenchExecException( + "runexec must be the only running process in its cgroup. Either install pystemd " + "for runexec to handle this itself, prefix the command with `systemd-run --user --scope -p Delegate=yes` " + "or otherwise prepare the cgroup hierarchy to make sure of this and the subtree being " + "writable by the executing user." + ) prefix = "runexec_main_" if move_to_child else CGROUP_NAME_PREFIX child_path = pathlib.Path(tempfile.mkdtemp(prefix=prefix, dir=self.path)) @@ -213,9 +213,10 @@ def create_fresh_child_cgroup(self, subsystems, move_to_child=False): child_subsystems = controllers_to_delegate | {"cpu"} return CgroupsV2({c: child_path for c in child_subsystems}) - def _move_to_scope(self): + def move_to_scope(self): logging.debug("Moving runexec main process to scope") + pids = util.get_pgrp_pids(os.getpgid(0)) try: from pystemd.dbuslib import DBus from pystemd.dbusexc import DBusFileNotFoundError @@ -224,9 +225,8 @@ def _move_to_scope(self): with DBus(user_mode=True) as bus, Manager(bus=bus) as manager: unit_params = { # workaround for not declared parameters, remove in the future - b"_custom": (b"PIDs", b"au", [os.getpid()]), + b"_custom": (b"PIDs", b"au", [int(p) for p in pids]), b"Delegate": True, - b"CPUAccounting": True, } random_suffix = "".join( diff --git a/benchexec/localexecution.py b/benchexec/localexecution.py index 1ed6d61d6..75ba3b337 100644 --- a/benchexec/localexecution.py +++ b/benchexec/localexecution.py @@ -69,7 +69,8 @@ def execute_benchmark(benchmark, output_handler): "only resource limits are used." ) - my_cgroups = Cgroups.from_system() + my_cgroups = Cgroups.from_system(initial_cgroup=True) + my_cgroups.move_to_scope() required_cgroups = set() coreAssignment = None # cores per run diff --git a/benchexec/util.py b/benchexec/util.py index 52c51dbee..a4c28cc2e 100644 --- a/benchexec/util.py +++ b/benchexec/util.py @@ -17,6 +17,7 @@ import glob import logging import os +import pathlib import shutil import signal as _signal import stat @@ -739,3 +740,15 @@ def check_msr(): if all(os.access(f"/dev/cpu/{cpu}/msr", os.W_OK) for cpu in cpu_dirs): res["write"] = True return res + + +def get_pgrp_pids(pgid): + pids = [] + for proc_stat_path in pathlib.Path("/proc").glob("[0-9]*/stat"): + with open(proc_stat_path) as proc_stat_fh: + proc_stat = proc_stat_fh.readline().split(" ") + pid, stat_pgid = proc_stat[0], proc_stat[4] + if pgid == int(stat_pgid): + pids.append(pid) + + return pids From 21fdde20b2aa0dea9cc2b412e789c631215719d1 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Tue, 11 Jan 2022 15:57:20 +0100 Subject: [PATCH 037/133] add pressure metrics to test whitelist --- benchexec/test_runexecutor.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index 276f93981..37876f11b 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -150,6 +150,9 @@ def check_result_keys(self, result, *additional_keys): "blkio-read", "blkio-write", "starttime", + "total-cpu-pressure-some", + "total-io-pressure-some", + "total-memory-pressure-some", } expected_keys.update(additional_keys) for key in result.keys(): From c3d7c1c6cab287a89e7201828d76c51df53c1c43 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Tue, 11 Jan 2022 20:10:08 +0100 Subject: [PATCH 038/133] cgroups v2: remove fallback logic --- benchexec/cgroupsv2.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 0caaf6569..f2a58d7ae 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -20,10 +20,6 @@ uid = os.getuid() -CGROUP_FALLBACK_PATH = f"user.slice/user-{uid}.slice/user@{uid}.service/app.slice/benchexec-cgroup2.service/benchexec_root" -"""If we do not have write access to the current cgroup, -attempt to use this cgroup as fallback.""" - CGROUP_NAME_PREFIX = "benchmark_" @@ -141,11 +137,6 @@ def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): else: cgroup_path = _parse_proc_pid_cgroup(cgroup_procinfo) - if not os.access(cgroup_path / "cgroup.subtree_control", os.W_OK) and fallback: - mount = _find_cgroup_mount() - fallback_path = mount / CGROUP_FALLBACK_PATH - cgroup_path = fallback_path - with open(cgroup_path / "cgroup.controllers") as subsystems_file: subsystems = set(subsystems_file.readline().strip().split()) From 4d32b329111216d66a9d986083f0f00683b1ff88 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Wed, 12 Jan 2022 14:50:42 +0100 Subject: [PATCH 039/133] util.get_pgrp_pids: fix race condition processes may disappear while iterating through /proc --- benchexec/util.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/benchexec/util.py b/benchexec/util.py index a4c28cc2e..c98571825 100644 --- a/benchexec/util.py +++ b/benchexec/util.py @@ -745,10 +745,19 @@ def check_msr(): def get_pgrp_pids(pgid): pids = [] for proc_stat_path in pathlib.Path("/proc").glob("[0-9]*/stat"): - with open(proc_stat_path) as proc_stat_fh: - proc_stat = proc_stat_fh.readline().split(" ") - pid, stat_pgid = proc_stat[0], proc_stat[4] - if pgid == int(stat_pgid): - pids.append(pid) + try: + with open(proc_stat_path) as proc_stat_fh: + proc_stat = proc_stat_fh.readline().split(" ") + pid, stat_pgid = proc_stat[0], proc_stat[4] + if pgid == int(stat_pgid): + pids.append(pid) + except OSError: + # ignore race conditions with processes disappearing + # they aren't interesting to us anyway as processes + # related to us will continue running. Apart from that, this is + # used to move processes to a scope or check if moving to a cgroup + # makes sense so processes having terminated aren't useful in these + # cases anyway + pass return pids From 9728457be3a00494c2b36ccd4ae43128e1c39272 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Wed, 12 Jan 2022 17:02:51 +0100 Subject: [PATCH 040/133] benchmark-cgroup2.service: remove not necessary due to setting up cgroups with pystemd or systemd-run --- debian/benchexec-cgroup2.service | 49 -------------------------------- 1 file changed, 49 deletions(-) delete mode 100644 debian/benchexec-cgroup2.service diff --git a/debian/benchexec-cgroup2.service b/debian/benchexec-cgroup2.service deleted file mode 100644 index 2a6aa3050..000000000 --- a/debian/benchexec-cgroup2.service +++ /dev/null @@ -1,49 +0,0 @@ -# This file is part of BenchExec, a framework for reliable benchmarking: -# https://github.com/sosy-lab/benchexec -# -# SPDX-FileCopyrightText: 2007-2020 Dirk Beyer -# -# SPDX-License-Identifier: Apache-2.0 - -[Unit] -Description=Cgroup setup for BenchExec -Documentation=https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md -Documentation=https://github.com/sosy-lab/benchexec/blob/main/doc/INDEX.md - -[Service] -# Adjust the following line to configure permissions for cgroup usage. -# The default gives permissions to users in group "benchexec". -# You can change the group name, or give permissions to everybody by -# setting BENCHEXEC_CGROUP_PERM to "a+w". -Environment=BENCHEXEC_CGROUP_GROUP=benchexec BENCHEXEC_CGROUP_PERM=g+w - -Restart=always -Delegate=cpu cpuset memory io pids -CPUAccounting=true -IOAccounting=true -MemoryAccounting=true -ExecStartPre=/bin/bash -c '\ -set -e;\ -mkdir /sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/app.slice/benchexec-cgroup2.service/benchexec_root;\ -mkdir /sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/app.slice/benchexec-cgroup2.service/benchexec_root/dummy' - -ExecStart=/bin/bash -c '\ -set -e;\ -exec sleep $(( 10 * 365 * 24 * 3600 ))' - -ExecStartPost=/bin/bash -c '\ -set -e;\ -for p in $(cat /sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/app.slice/benchexec-cgroup2.service/cgroup.procs); do\ - echo $p > /sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/app.slice/benchexec-cgroup2.service/benchexec_root/dummy/cgroup.procs;\ -done;\ -for cg in $(cat /sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/app.slice/benchexec-cgroup2.service/cgroup.controllers); do\ - echo +$cg >/sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/app.slice/benchexec-cgroup2.service/cgroup.subtree_control;\ - echo +$cg >/sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/app.slice/benchexec-cgroup2.service/benchexec_root/cgroup.subtree_control;\ -done' - -Restart=always -TimeoutStartSec=360000 - - -[Install] -WantedBy=default.target From b9d2c63ab543e2647fa9f12d3d3d53b6d496ccd1 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Sun, 27 Feb 2022 01:27:34 +0100 Subject: [PATCH 041/133] tests: do not skip if executable is on PATH If /bin/echo does not exist but echo is on the PATH the test should still be able to run successfully. Only /bin/sh is guaranteed to exist on POSIX systems. --- benchexec/test_runexecutor.py | 136 +++++++++++++++++----------------- 1 file changed, 70 insertions(+), 66 deletions(-) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index 37876f11b..826477d3c 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -49,6 +49,10 @@ def setUp(self, *args, **kwargs): ): self.runexecutor = RunExecutor(use_namespaces=False, *args, **kwargs) + self.echo = shutil.which("echo") or "/bin/echo" + self.sleep = shutil.which("sleep") or "/bin/sleep" + self.cat = shutil.which("cat") or "/bin/cat" + @contextlib.contextmanager def skip_if_logs(self, error_msg): """A context manager that automatically marks the test as skipped if SystemExit @@ -186,17 +190,17 @@ def check_exitcode_extern(self, result, exitcode, msg=None): self.assertEqual(int(result["exitsignal"]), exitcode.signal, msg) def test_command_output(self): - if not os.path.exists("/bin/echo"): - self.skipTest("missing /bin/echo") - (_, output) = self.execute_run("/bin/echo", "TEST_TOKEN") - self.check_command_in_output(output, "/bin/echo TEST_TOKEN") + if not os.path.exists(self.echo): + self.skipTest("missing echo") + (_, output) = self.execute_run(self.echo, "TEST_TOKEN") + self.check_command_in_output(output, f"{self.echo} TEST_TOKEN") self.assertEqual(output[-1], "TEST_TOKEN", "run output misses command output") for line in output[1:-1]: self.assertRegex(line, "^-*$", "unexpected text in run output") def test_command_error_output(self): - if not os.path.exists("/bin/echo"): - self.skipTest("missing /bin/echo") + if not os.path.exists(self.echo): + self.skipTest("missing echo") if not os.path.exists("/bin/sh"): self.skipTest("missing /bin/sh") @@ -213,7 +217,7 @@ def execute_Run_intern(*args, **kwargs): os.remove(error_filename) (output_lines, error_lines) = execute_Run_intern( - "/bin/sh", "-c", "/bin/echo ERROR_TOKEN >&2" + "/bin/sh", "-c", f"{self.echo} ERROR_TOKEN >&2" ) self.assertEqual( error_lines[-1], "ERROR_TOKEN", "run error output misses command output" @@ -223,9 +227,9 @@ def execute_Run_intern(*args, **kwargs): for line in error_lines[1:-1]: self.assertRegex(line, "^-*$", "unexpected text in run error output") - (output_lines, error_lines) = execute_Run_intern("/bin/echo", "OUT_TOKEN") - self.check_command_in_output(output_lines, "/bin/echo OUT_TOKEN") - self.check_command_in_output(error_lines, "/bin/echo OUT_TOKEN") + (output_lines, error_lines) = execute_Run_intern(self.echo, "OUT_TOKEN") + self.check_command_in_output(output_lines, f"{self.echo} OUT_TOKEN") + self.check_command_in_output(error_lines, f"{self.echo} OUT_TOKEN") self.assertEqual( output_lines[-1], "OUT_TOKEN", "run output misses command output" ) @@ -235,22 +239,22 @@ def execute_Run_intern(*args, **kwargs): self.assertRegex(line, "^-*$", "unexpected text in run error output") def test_command_result(self): - if not os.path.exists("/bin/echo"): - self.skipTest("missing /bin/echo") - (result, _) = self.execute_run("/bin/echo", "TEST_TOKEN") - self.check_exitcode(result, 0, "exit code of /bin/echo is not zero") + if not os.path.exists(self.echo): + self.skipTest("missing echo") + (result, _) = self.execute_run(self.echo, "TEST_TOKEN") + self.check_exitcode(result, 0, "exit code of echo is not zero") self.assertAlmostEqual( result["walltime"], trivial_run_grace_time, delta=trivial_run_grace_time, - msg="walltime of /bin/echo not as expected", + msg="walltime of echo not as expected", ) if "cputime" in result: # not present without cpuacct cgroup self.assertAlmostEqual( result["cputime"], trivial_run_grace_time, delta=trivial_run_grace_time, - msg="cputime of /bin/echo not as expected", + msg="cputime of echo not as expected", ) self.check_result_keys(result) @@ -323,10 +327,10 @@ def test_cputime_softlimit(self): self.assertRegex(line, "^-*$", "unexpected text in run output") def test_walltime_limit(self): - if not os.path.exists("/bin/sleep"): - self.skipTest("missing /bin/sleep") + if not os.path.exists(self.sleep): + self.skipTest("missing sleep") (result, output) = self.execute_run( - "/bin/sleep", "10", walltimelimit=1, expect_terminationreason="walltime" + self.sleep, "10", walltimelimit=1, expect_terminationreason="walltime" ) self.check_exitcode(result, 9, "exit code of killed process is not 9") @@ -341,10 +345,10 @@ def test_walltime_limit(self): result["cputime"], trivial_run_grace_time, delta=trivial_run_grace_time, - msg="cputime of /bin/sleep is not approximately zero", + msg="cputime of sleep is not approximately zero", ) - self.check_command_in_output(output, "/bin/sleep 10") + self.check_command_in_output(output, f"{self.sleep} 10") for line in output[1:]: self.assertRegex(line, "^-*$", "unexpected text in run output") @@ -410,63 +414,63 @@ def test_all_timelimits(self): self.assertRegex(line, "^-*$", "unexpected text in run output") def test_input_is_redirected_from_devnull(self): - if not os.path.exists("/bin/cat"): - self.skipTest("missing /bin/cat") - (result, output) = self.execute_run("/bin/cat", walltimelimit=1) + if not os.path.exists(self.cat): + self.skipTest("missing cat") + (result, output) = self.execute_run(self.cat, walltimelimit=1) self.check_exitcode(result, 0, "exit code of process is not 0") self.assertAlmostEqual( result["walltime"], trivial_run_grace_time, delta=trivial_run_grace_time, - msg='walltime of "/bin/cat < /dev/null" is not approximately zero', + msg='walltime of "cat < /dev/null" is not approximately zero', ) if "cputime" in result: # not present without cpuacct cgroup self.assertAlmostEqual( result["cputime"], trivial_run_grace_time, delta=trivial_run_grace_time, - msg='cputime of "/bin/cat < /dev/null" is not approximately zero', + msg='cputime of "cat < /dev/null" is not approximately zero', ) self.check_result_keys(result) - self.check_command_in_output(output, "/bin/cat") + self.check_command_in_output(output, self.cat) for line in output[1:]: self.assertRegex(line, "^-*$", "unexpected text in run output") def test_input_is_redirected_from_file(self): - if not os.path.exists("/bin/cat"): - self.skipTest("missing /bin/cat") + if not os.path.exists(self.cat): + self.skipTest("missing cat") with tempfile.TemporaryFile() as tmp: tmp.write(b"TEST_TOKEN") tmp.flush() tmp.seek(0) - (result, output) = self.execute_run("/bin/cat", stdin=tmp, walltimelimit=1) + (result, output) = self.execute_run(self.cat, stdin=tmp, walltimelimit=1) self.check_exitcode(result, 0, "exit code of process is not 0") self.assertAlmostEqual( result["walltime"], trivial_run_grace_time, delta=trivial_run_grace_time, - msg='walltime of "/bin/cat < /dev/null" is not approximately zero', + msg='walltime of "cat < /dev/null" is not approximately zero', ) if "cputime" in result: # not present without cpuacct cgroup self.assertAlmostEqual( result["cputime"], trivial_run_grace_time, delta=trivial_run_grace_time, - msg='cputime of "/bin/cat < /dev/null" is not approximately zero', + msg='cputime of "cat < /dev/null" is not approximately zero', ) self.check_result_keys(result) - self.check_command_in_output(output, "/bin/cat") + self.check_command_in_output(output, self.cat) self.assertEqual(output[-1], "TEST_TOKEN", "run output misses command output") for line in output[1:-1]: self.assertRegex(line, "^-*$", "unexpected text in run output") def test_input_is_redirected_from_stdin(self): - if not os.path.exists("/bin/cat"): - self.skipTest("missing /bin/cat") + if not os.path.exists(self.cat): + self.skipTest("missing cat") (output_fd, output_filename) = tempfile.mkstemp(".log", "output_", text=True) cmd = self.get_runexec_cmdline( @@ -474,7 +478,7 @@ def test_input_is_redirected_from_stdin(self): "-", "--walltime", "1", - "/bin/cat", + self.cat, output_filename=output_filename, ) try: @@ -513,18 +517,18 @@ def test_input_is_redirected_from_stdin(self): float(result["walltime"].rstrip("s")), trivial_run_grace_time, delta=trivial_run_grace_time, - msg='walltime of "/bin/cat < /dev/null" is not approximately zero', + msg='walltime of "cat < /dev/null" is not approximately zero', ) if "cputime" in result: # not present without cpuacct cgroup self.assertAlmostEqual( float(result["cputime"].rstrip("s")), trivial_run_grace_time, delta=trivial_run_grace_time, - msg='cputime of "/bin/cat < /dev/null" is not approximately zero', + msg='cputime of "cat < /dev/null" is not approximately zero', ) self.check_result_keys(result, "returnvalue") - self.check_command_in_output(output, "/bin/cat") + self.check_command_in_output(output, self.cat) self.assertEqual(output[-1], "TEST_TOKEN", "run output misses command output") for line in output[1:-1]: self.assertRegex(line, "^-*$", "unexpected text in run output") @@ -551,12 +555,12 @@ def test_new_environment_variable(self): self.assertEqual(output[-1], "/usr/bin") def test_stop_run(self): - if not os.path.exists("/bin/sleep"): - self.skipTest("missing /bin/sleep") + if not os.path.exists(self.sleep): + self.skipTest("missing sleep") thread = _StopRunThread(1, self.runexecutor) thread.start() (result, output) = self.execute_run( - "/bin/sleep", "10", expect_terminationreason="killed" + self.sleep, "10", expect_terminationreason="killed" ) thread.join() @@ -572,10 +576,10 @@ def test_stop_run(self): result["cputime"], trivial_run_grace_time, delta=trivial_run_grace_time, - msg="cputime of /bin/sleep is not approximately zero", + msg="cputime of sleep is not approximately zero", ) - self.check_command_in_output(output, "/bin/sleep 10") + self.check_command_in_output(output, f"{self.sleep} 10") for line in output[1:]: self.assertRegex(line, "^-*$", "unexpected text in run output") @@ -647,13 +651,13 @@ def test_append_crash_dump_info(self): ) def test_integration(self): - if not os.path.exists("/bin/echo"): - self.skipTest("missing /bin/echo") - (result, output) = self.execute_run_extern("/bin/echo", "TEST_TOKEN") - self.check_exitcode_extern(result, 0, "exit code of /bin/echo is not zero") + if not os.path.exists(self.echo): + self.skipTest("missing echo") + (result, output) = self.execute_run_extern(self.echo, "TEST_TOKEN") + self.check_exitcode_extern(result, 0, "exit code of echo is not zero") self.check_result_keys(result, "returnvalue") - self.check_command_in_output(output, "/bin/echo TEST_TOKEN") + self.check_command_in_output(output, f"{self.echo} TEST_TOKEN") self.assertEqual(output[-1], "TEST_TOKEN", "run output misses command output") for line in output[1:-1]: self.assertRegex(line, "^-*$", "unexpected text in run output") @@ -729,32 +733,32 @@ def test_require_cgroup_cpu(self): self.setUp(additional_cgroup_subsystems=["cpu"]) except SystemExit as e: self.skipTest(e) - if not os.path.exists("/bin/cat"): - self.skipTest("missing /bin/cat") - (result, output) = self.execute_run("/bin/cat", "/proc/self/cgroup") - self.check_exitcode(result, 0, "exit code of /bin/cat is not zero") + if not os.path.exists(self.cat): + self.skipTest("missing cat") + (result, output) = self.execute_run(self.cat, "/proc/self/cgroup") + self.check_exitcode(result, 0, "exit code of cat is not zero") for line in output: if re.match(r"^[0-9]*:([^:]*,)?cpu(,[^:]*)?:/(.*/)?benchmark_.*$", line): return # Success self.fail("Not in expected cgroup for subsystem cpu:\n" + "\n".join(output)) def test_set_cgroup_cpu_shares(self): - if not os.path.exists("/bin/echo"): - self.skipTest("missing /bin/echo") + if not os.path.exists(self.echo): + self.skipTest("missing echo") try: self.setUp(additional_cgroup_subsystems=["cpu"]) except SystemExit as e: self.skipTest(e) (result, _) = self.execute_run( - "/bin/echo", cgroupValues={("cpu", "shares"): 42} + self.echo, cgroupValues={("cpu", "shares"): 42} ) - self.check_exitcode(result, 0, "exit code of /bin/echo is not zero") + self.check_exitcode(result, 0, "exit code of echo is not zero") # Just assert that execution was successful, # testing that the value was actually set is much more difficult. def test_nested_runexec(self): - if not os.path.exists("/bin/echo"): - self.skipTest("missing /bin/echo") + if not os.path.exists(self.echo): + self.skipTest("missing echo") self.setUp( dir_modes={ # Do not mark /home hidden, would fail with python from virtualenv @@ -763,7 +767,7 @@ def test_nested_runexec(self): "/sys/fs/cgroup": containerexecutor.DIR_FULL_ACCESS, } ) - inner_args = ["--", "/bin/echo", "TEST_TOKEN"] + inner_args = ["--", self.echo, "TEST_TOKEN"] with tempfile.NamedTemporaryFile( mode="r", prefix="inner_output_", suffix=".log" @@ -778,16 +782,16 @@ def test_nested_runexec(self): logging.info("Inner output:\n%s", "\n".join(inner_output)) self.check_result_keys(outer_result, "returnvalue") self.check_exitcode(outer_result, 0, "exit code of inner runexec is not zero") - self.check_command_in_output(inner_output, "/bin/echo TEST_TOKEN") + self.check_command_in_output(inner_output, f"{self.echo} TEST_TOKEN") self.assertEqual( inner_output[-1], "TEST_TOKEN", "run output misses command output" ) def test_starttime(self): - if not os.path.exists("/bin/echo"): - self.skipTest("missing /bin/echo") + if not os.path.exists(self.echo): + self.skipTest("missing echo") before = util.read_local_time() - (result, _) = self.execute_run("/bin/echo") + (result, _) = self.execute_run(self.echo) after = util.read_local_time() self.check_result_keys(result) run_starttime = result["starttime"] @@ -1068,7 +1072,7 @@ def test_path_with_space(self): def test_uptime_with_lxcfs(self): if not os.path.exists("/var/lib/lxcfs/proc"): self.skipTest("missing lxcfs") - result, output = self.execute_run("cat", "/proc/uptime") + result, output = self.execute_run(self.cat, "/proc/uptime") self.check_result_keys(result) self.check_exitcode(result, 0, "exit code for reading uptime is not zero") uptime = float(output[-1].split(" ")[0]) @@ -1081,7 +1085,7 @@ def test_uptime_without_lxcfs(self): self.skipTest("missing lxcfs") # create RunExecutor with desired parameter self.setUp(container_system_config=False) - result, output = self.execute_run("cat", "/proc/uptime") + result, output = self.execute_run(self.cat, "/proc/uptime") self.check_result_keys(result) self.check_exitcode(result, 0, "exit code for reading uptime is not zero") uptime = float(output[-1].split(" ")[0]) From 567480bc74b3c50079e624c1576c46ccfda7de6a Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Sun, 27 Feb 2022 23:13:40 +0100 Subject: [PATCH 042/133] tests: fix cgroup v2 tests --- benchexec/test_runexecutor.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index 826477d3c..abb696f68 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -21,6 +21,7 @@ from benchexec import containerexecutor from benchexec import filehierarchylimit from benchexec.runexecutor import RunExecutor +from benchexec.cgroups import Cgroups from benchexec import runexecutor from benchexec import util @@ -43,6 +44,9 @@ def setUpClass(cls): if not hasattr(cls, "assertRegex"): cls.assertRegex = cls.assertRegexpMatches + cls.cgroups = Cgroups.from_system(initial_cgroup=True) + cls.cgroups.move_to_scope() + def setUp(self, *args, **kwargs): with self.skip_if_logs( "Cannot reliably kill sub-processes without freezer cgroup" @@ -735,6 +739,8 @@ def test_require_cgroup_cpu(self): self.skipTest(e) if not os.path.exists(self.cat): self.skipTest("missing cat") + if self.cgroup.version != 1: + self.skipTest("not relevant in unified hierarchy") (result, output) = self.execute_run(self.cat, "/proc/self/cgroup") self.check_exitcode(result, 0, "exit code of cat is not zero") for line in output: @@ -749,8 +755,12 @@ def test_set_cgroup_cpu_shares(self): self.setUp(additional_cgroup_subsystems=["cpu"]) except SystemExit as e: self.skipTest(e) + if self.cgroups.version == 1: + cgValues = {("cpu", "shares"): 42} + else: + cgValues = {("cpu", "weight"): 42} (result, _) = self.execute_run( - self.echo, cgroupValues={("cpu", "shares"): 42} + self.echo, cgroupValues=cgValues ) self.check_exitcode(result, 0, "exit code of echo is not zero") # Just assert that execution was successful, @@ -802,6 +812,7 @@ def test_starttime(self): class TestRunExecutorWithContainer(TestRunExecutor): def setUp(self, *args, **kwargs): + super().setUp(*args, **kwargs) try: container.execute_in_namespace(lambda: 0) except OSError as e: From 2212ff6151541edbd1e2855be3a55a91a484c5a1 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Sun, 27 Feb 2022 23:18:41 +0100 Subject: [PATCH 043/133] tests: reformat --- benchexec/test_runexecutor.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index abb696f68..c483dd9e3 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -759,9 +759,7 @@ def test_set_cgroup_cpu_shares(self): cgValues = {("cpu", "shares"): 42} else: cgValues = {("cpu", "weight"): 42} - (result, _) = self.execute_run( - self.echo, cgroupValues=cgValues - ) + (result, _) = self.execute_run(self.echo, cgroupValues=cgValues) self.check_exitcode(result, 0, "exit code of echo is not zero") # Just assert that execution was successful, # testing that the value was actually set is much more difficult. From 4eec9e0c156cf42b10537e837aa1c8527417cf81 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Sun, 27 Feb 2022 23:20:56 +0100 Subject: [PATCH 044/133] gitlab-ci: install pystemd --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4937c977c..67afcffec 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -43,6 +43,7 @@ stages: # Give $PRIMARY_USER permission to create cgroups - test/for_each_of_my_cgroups.sh chgrp $PRIMARY_USER - test/for_each_of_my_cgroups.sh chmod g+w $PRIMARY_USER + - sudo apt install python3-pystemd # Install BenchExec - sudo -u $PRIMARY_USER pip install --user . # Start lxcfs From 6341f7147a39b0ae0497f2074c98558428005c60 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Sun, 27 Feb 2022 23:26:08 +0100 Subject: [PATCH 045/133] gitlab-ci: actually install pystemd --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 67afcffec..00470c718 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -43,7 +43,7 @@ stages: # Give $PRIMARY_USER permission to create cgroups - test/for_each_of_my_cgroups.sh chgrp $PRIMARY_USER - test/for_each_of_my_cgroups.sh chmod g+w $PRIMARY_USER - - sudo apt install python3-pystemd + - sudo apt install -y python3-pystemd # Install BenchExec - sudo -u $PRIMARY_USER pip install --user . # Start lxcfs From ab65a52e9e8c86492aaea087415d8a739ee5ae0b Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Mon, 28 Feb 2022 01:27:07 +0100 Subject: [PATCH 046/133] gitlab-ci: install libsystemd and python headers instead --- .gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 00470c718..6dcbee93a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -43,7 +43,7 @@ stages: # Give $PRIMARY_USER permission to create cgroups - test/for_each_of_my_cgroups.sh chgrp $PRIMARY_USER - test/for_each_of_my_cgroups.sh chmod g+w $PRIMARY_USER - - sudo apt install -y python3-pystemd + - sudo apt install -y libsystemd-dev python-dev # Install BenchExec - sudo -u $PRIMARY_USER pip install --user . # Start lxcfs From 3576599563ad15aba3926e19b64704f60114ce7f Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Mon, 28 Feb 2022 01:42:01 +0100 Subject: [PATCH 047/133] tests: fix typo --- benchexec/test_runexecutor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index c483dd9e3..5b199f15e 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -739,7 +739,7 @@ def test_require_cgroup_cpu(self): self.skipTest(e) if not os.path.exists(self.cat): self.skipTest("missing cat") - if self.cgroup.version != 1: + if self.cgroups.version != 1: self.skipTest("not relevant in unified hierarchy") (result, output) = self.execute_run(self.cat, "/proc/self/cgroup") self.check_exitcode(result, 0, "exit code of cat is not zero") From 758ceec9ddeed2833cd71a095e16c70d6d502e5c Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Mon, 28 Feb 2022 01:44:53 +0100 Subject: [PATCH 048/133] gitlab-ci: fix pytype --- .gitlab-ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6dcbee93a..508b56a3c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -115,8 +115,9 @@ pytype: stage: test image: python:3.7 before_script: + - sudo apt install -y libsystemd-dev python-dev # version due to https://github.com/google/pytype/issues/1130 - - pip install coloredlogs pytype==2022.2.8 + - pip install pystemd coloredlogs pytype==2022.2.8 script: - pytype -k cache: From d1691bd5ee57b94ccca508805910aefd64b25996 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Mon, 28 Feb 2022 01:52:04 +0100 Subject: [PATCH 049/133] gitlab-ci: really fix pytype? --- .gitlab-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 508b56a3c..af61b6087 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -43,7 +43,7 @@ stages: # Give $PRIMARY_USER permission to create cgroups - test/for_each_of_my_cgroups.sh chgrp $PRIMARY_USER - test/for_each_of_my_cgroups.sh chmod g+w $PRIMARY_USER - - sudo apt install -y libsystemd-dev python-dev + - apt install -y libsystemd-dev python-dev # Install BenchExec - sudo -u $PRIMARY_USER pip install --user . # Start lxcfs @@ -115,7 +115,7 @@ pytype: stage: test image: python:3.7 before_script: - - sudo apt install -y libsystemd-dev python-dev + - apt install -y libsystemd-dev python-dev # version due to https://github.com/google/pytype/issues/1130 - pip install pystemd coloredlogs pytype==2022.2.8 script: From 2a34e769503fa7e92ebad1d23963ef41ab18605f Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Mon, 28 Feb 2022 01:55:34 +0100 Subject: [PATCH 050/133] gitlab-ci: really fix pytype?! --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index af61b6087..56a53dc2d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -115,6 +115,7 @@ pytype: stage: test image: python:3.7 before_script: + - apt update - apt install -y libsystemd-dev python-dev # version due to https://github.com/google/pytype/issues/1130 - pip install pystemd coloredlogs pytype==2022.2.8 From f4c3f0edcc0ec52892dfb87c4523db2cdd335183 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Mon, 7 Mar 2022 14:19:40 +0100 Subject: [PATCH 051/133] cgroups: make sure paths are strings before escaping --- benchexec/cgroups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index f292697f1..438121f59 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -278,7 +278,7 @@ def get_group_name(gid): else: permission_hint = _PERMISSION_HINT_OTHER - paths = " ".join(map(util.escape_string_shell, paths)) + paths = " ".join([util.escape_string_shell(str(p)) for p in paths]) sys.exit(_ERROR_MSG_PERMISSIONS.format(permission_hint, paths)) else: From daad5e4512c1ce85675ee31127b06f918ee5ff03 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Mon, 7 Mar 2022 14:20:17 +0100 Subject: [PATCH 052/133] check_cgroups: fix for cgv2 --- benchexec/cgroupsv2.py | 7 ++----- benchexec/check_cgroups.py | 7 ++++--- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index f2a58d7ae..9a0af77bc 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -132,10 +132,7 @@ def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): logging.debug( "Analyzing /proc/mounts and /proc/self/cgroup to determine cgroups." ) - if cgroup_procinfo is None: - cgroup_path = _find_own_cgroups() - else: - cgroup_path = _parse_proc_pid_cgroup(cgroup_procinfo) + cgroup_path = _find_own_cgroups() with open(cgroup_path / "cgroup.controllers") as subsystems_file: subsystems = set(subsystems_file.readline().strip().split()) @@ -170,7 +167,7 @@ def create_fresh_child_cgroup(self, subsystems, move_to_child=False): if len(tasks) > 1 and not tasks <= allowed_pids and move_to_child: raise BenchExecException( "runexec must be the only running process in its cgroup. Either install pystemd " - "for runexec to handle this itself, prefix the command with `systemd-run --user --scope -p Delegate=yes` " + "for benchexec to handle this itself, prefix the command with `systemd-run --user --scope -p Delegate=yes` " "or otherwise prepare the cgroup hierarchy to make sure of this and the subtree being " "writable by the executing user." ) diff --git a/benchexec/check_cgroups.py b/benchexec/check_cgroups.py index 0d9a9f098..e210eb924 100644 --- a/benchexec/check_cgroups.py +++ b/benchexec/check_cgroups.py @@ -67,9 +67,7 @@ def check_cgroup_availability(wait=1): my_cgroups.FREEZE, ): if subsystem in my_cgroups: - if not str(task_cgroups[subsystem]).startswith( - str(my_cgroups[subsystem] / "benchmark_") - ): + if not str(task_cgroups[subsystem]).startswith(str(my_cgroups[subsystem])): logging.warning( "Task was in cgroup %s for subsystem %s, " "which is not the expected sub-cgroup of %s. " @@ -141,6 +139,9 @@ def main(argv=None): options = parser.parse_args(argv[1:]) + cgroups = Cgroups.from_system(initial_cgroup=True) + cgroups.move_to_scope() + if options.no_thread: check_cgroup_availability(options.wait) else: From 3e27f1da960241e0d4cf5bd32115232fb87dc561 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Mon, 7 Mar 2022 14:27:44 +0100 Subject: [PATCH 053/133] test_runexecutor: use existing metric for cgv2 --- benchexec/test_runexecutor.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index 5b199f15e..2dbbef399 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -752,13 +752,16 @@ def test_set_cgroup_cpu_shares(self): if not os.path.exists(self.echo): self.skipTest("missing echo") try: - self.setUp(additional_cgroup_subsystems=["cpu"]) + if self.cgroups_version == 1: + self.setUp(additional_cgroup_subsystems=["cpu"]) + else: + self.setUp(additional_cgroup_subsystems=["memory"]) except SystemExit as e: self.skipTest(e) if self.cgroups.version == 1: cgValues = {("cpu", "shares"): 42} else: - cgValues = {("cpu", "weight"): 42} + cgValues = {("memory", "high"): 420000000} (result, _) = self.execute_run(self.echo, cgroupValues=cgValues) self.check_exitcode(result, 0, "exit code of echo is not zero") # Just assert that execution was successful, From d70a66ba5feeca2e2dcf71397fd4dbf2cb05378f Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Mon, 7 Mar 2022 14:28:53 +0100 Subject: [PATCH 054/133] util.get_prgp_pids: fix when binaries with spaces are running --- benchexec/util.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/benchexec/util.py b/benchexec/util.py index 42bc80f0e..13934903c 100644 --- a/benchexec/util.py +++ b/benchexec/util.py @@ -759,13 +759,17 @@ def check_msr(): def get_pgrp_pids(pgid): pids = [] - for proc_stat_path in pathlib.Path("/proc").glob("[0-9]*/stat"): + for proc_stat_path in pathlib.Path("/proc").glob("[0-9]*/status"): try: - with open(proc_stat_path) as proc_stat_fh: - proc_stat = proc_stat_fh.readline().split(" ") - pid, stat_pgid = proc_stat[0], proc_stat[4] - if pgid == int(stat_pgid): - pids.append(pid) + with open(proc_status_path) as proc_status: + for line in proc_status: + key, value, *_ = line.split("\t") + if key == 'Pid:': + pid = value + elif key == 'NSpgid:': + status_pgid = value + if pgid == int(status_pgid): + pids.append(pid.strip()) except OSError: # ignore race conditions with processes disappearing # they aren't interesting to us anyway as processes From a6bf3d24da185e8c5419cc54f030478a4f928f34 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Mon, 7 Mar 2022 15:11:19 +0100 Subject: [PATCH 055/133] util.get_prgp_pids: fix typo --- benchexec/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/util.py b/benchexec/util.py index 13934903c..be6e8d9ee 100644 --- a/benchexec/util.py +++ b/benchexec/util.py @@ -759,7 +759,7 @@ def check_msr(): def get_pgrp_pids(pgid): pids = [] - for proc_stat_path in pathlib.Path("/proc").glob("[0-9]*/status"): + for proc_status_path in pathlib.Path("/proc").glob("[0-9]*/status"): try: with open(proc_status_path) as proc_status: for line in proc_status: From c42b142030a7735dd052dfcc0d7356a7a7ac6291 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Mon, 7 Mar 2022 15:14:31 +0100 Subject: [PATCH 056/133] util.get_prgp_pids: rerun black --- benchexec/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchexec/util.py b/benchexec/util.py index be6e8d9ee..6e73a422d 100644 --- a/benchexec/util.py +++ b/benchexec/util.py @@ -764,9 +764,9 @@ def get_pgrp_pids(pgid): with open(proc_status_path) as proc_status: for line in proc_status: key, value, *_ = line.split("\t") - if key == 'Pid:': + if key == "Pid:": pid = value - elif key == 'NSpgid:': + elif key == "NSpgid:": status_pgid = value if pgid == int(status_pgid): pids.append(pid.strip()) From 3b776d5b792c709e5850f0ad34d072f2fc009a54 Mon Sep 17 00:00:00 2001 From: Robin Gloster Date: Mon, 7 Mar 2022 15:18:37 +0100 Subject: [PATCH 057/133] test_runexecutor: fix typo --- benchexec/test_runexecutor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index 2dbbef399..f30c0b787 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -752,7 +752,7 @@ def test_set_cgroup_cpu_shares(self): if not os.path.exists(self.echo): self.skipTest("missing echo") try: - if self.cgroups_version == 1: + if self.cgroups.version == 1: self.setUp(additional_cgroup_subsystems=["cpu"]) else: self.setUp(additional_cgroup_subsystems=["memory"]) From 4f4aff86538c94f96a1e1c7129be4eef1493fbab Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 22 Jul 2022 18:21:23 +0200 Subject: [PATCH 058/133] Port a921d6be to this branch Code was moved from cgroups.py to cgroupsv1.py --- benchexec/cgroupsv1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 57c33f8b3..c3d8fee1b 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -240,7 +240,7 @@ def create_fresh_child_cgroup(self, subsystems): # add allowed cpus and memory to cgroup if necessary # (otherwise we can't add any tasks) def copy_parent_to_child(name): - shutil.copyfile(parentCgroup / name, cgroup / name) + shutil.copyfile(parentCgroup / name, cgroup / name) # noqa: B023 try: copy_parent_to_child("cpuset.cpus") From 568c61418562887996ee20f61d3ff33480daba11 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 27 Jul 2022 14:54:59 +0200 Subject: [PATCH 059/133] Revert change to test parameter that does not belong to cgroupsv2 branch --- benchexec/test_integration/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/test_integration/__init__.py b/benchexec/test_integration/__init__.py index e978ae9ec..276544037 100644 --- a/benchexec/test_integration/__init__.py +++ b/benchexec/test_integration/__init__.py @@ -245,7 +245,7 @@ def test_simple_set_name(self): ) def test_simple_parallel(self): - self.run_benchexec_and_compare_expected_files("--numOfThreads", "4") + self.run_benchexec_and_compare_expected_files("--numOfThreads", "12") def test_wildcard_tasks_1(self): self.run_benchexec_and_compare_expected_files( From 3f3bcd0d53a3eb83c7eecf56fc2c2f6f86a8dcd6 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 22 Jul 2022 18:21:53 +0200 Subject: [PATCH 060/133] Simplify creation of random token and silence flake8 warning It is not really important that the random identifier is cryptographically strong, but it also does not hurt and it is actually less effort for us this way. --- benchexec/cgroupsv2.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 9a0af77bc..0aa266d47 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -8,9 +8,8 @@ import logging import os import pathlib -import random +import secrets import signal -import string import tempfile import time @@ -217,9 +216,7 @@ def move_to_scope(self): b"Delegate": True, } - random_suffix = "".join( - random.choices(string.ascii_letters + string.digits, k=8) - ) + random_suffix = secrets.token_urlsafe(8) name = f"benchexec_{random_suffix}.scope".encode() manager.Manager.StartTransientUnit(name, b"fail", unit_params) From 28a2aa305d98192e8b42f66ce43715500030cd00 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 22 Jul 2022 18:33:05 +0200 Subject: [PATCH 061/133] Fix containerexec: it should not use cgroups --- benchexec/cgroups.py | 68 ++++++++++++++++++++++++++++++++++ benchexec/containerexecutor.py | 4 +- 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 438121f59..1593be6e4 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -95,6 +95,10 @@ def from_system(cgroup_procinfo=None, fallback=True, initial_cgroup=False): raise BenchExecException("Could not detect Cgroup Version") + @staticmethod + def dummy(): + return _DummyCgroups({}) + def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): if subsystems is None: self.subsystems = self._supported_subsystems(cgroup_procinfo, fallback) @@ -368,3 +372,67 @@ def read_oom_count(self): @abstractmethod def disable_swap(self): pass + + +class _DummyCgroups(Cgroups): + version = 0 + IO = "io" + CPU = "cpu" + CPUSET = "cpuset" + FREEZE = "freezer" + MEMORY = "memory" + + @property + def known_subsystems(self): + return set() + + def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): + return set() + + def move_to_scope(self): + pass + + def add_task(self, pid): + pass + + def create_fresh_child_cgroup(self, subsystems, move_to_child=False): + pass + + def read_max_mem_usage(self): + pass + + def read_mem_pressure(self): + pass + + def read_cpu_pressure(self): + pass + + def read_io_pressure(self): + pass + + def read_usage_per_cpu(self): + pass + + def read_available_cpus(self): + pass + + def read_available_mems(self): + pass + + def read_io_stat(self): + pass + + def has_tasks(self, path): + pass + + def write_memory_limit(self, limit): + pass + + def read_memory_limit(self): + pass + + def read_oom_count(self): + pass + + def disable_swap(self): + pass diff --git a/benchexec/containerexecutor.py b/benchexec/containerexecutor.py index b01f09cb9..d5e1dee16 100644 --- a/benchexec/containerexecutor.py +++ b/benchexec/containerexecutor.py @@ -437,8 +437,6 @@ def execute_run( logging.debug("Starting process.") - cgroups = Cgroups.from_system() - try: pid, result_fn = self._start_execution( args=args, @@ -449,7 +447,7 @@ def execute_run( root_dir=rootDir, cwd=workingDir, temp_dir=temp_dir, - cgroups=cgroups, + cgroups=Cgroups.dummy(), output_dir=output_dir, result_files_patterns=result_files_patterns, child_setup_fn=util.dummy_fn, From 38292267d2930331410fcb362e58905b59ceefe9 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Tue, 26 Jul 2022 14:00:16 +0200 Subject: [PATCH 062/133] Fix bug in cgroups handling: has_tasks() always returned False The virtual cgroupfs does not produce meaningful file sizes. --- benchexec/cgroupsv1.py | 2 +- benchexec/cgroupsv2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index c3d8fee1b..e831df85b 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -386,7 +386,7 @@ def read_io_stat(self): return bytes_read, bytes_written def has_tasks(self, path): - return os.path.getsize(path / "tasks") > 0 + return bool((path / "cgroup.procs").read_bytes().strip()) def write_memory_limit(self, limit): limit_file = "limit_in_bytes" diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 0aa266d47..b90aebf98 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -342,7 +342,7 @@ def read_io_stat(self): return bytes_read, bytes_written def has_tasks(self, path): - return os.path.getsize(path / "cgroup.procs") > 0 + return bool((path / "cgroup.procs").read_bytes().strip()) def write_memory_limit(self, limit): self.set_value(self.MEMORY, "max", limit) From ffe39131d78d48434b3383b06441f24634ee3e10 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Tue, 26 Jul 2022 18:29:55 +0200 Subject: [PATCH 063/133] Fix handling of child cgroups FREEZE is always available in v2 cgroups, and KILL always if available in the parent cgroup. If we do not add them here, later code would mistakenly assume that these features are not available. --- benchexec/cgroupsv2.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index b90aebf98..3cbc0df1f 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -197,7 +197,9 @@ def create_fresh_child_cgroup(self, subsystems, move_to_child=False): util.write_file(f"+{c}", self.path / "cgroup.subtree_control") # basic cpu controller support without being enabled - child_subsystems = controllers_to_delegate | {"cpu"} + child_subsystems = controllers_to_delegate | {self.CPU, self.FREEZE} + if self.KILL in self.subsystems: + child_subsystems.add(self.KILL) return CgroupsV2({c: child_path for c in child_subsystems}) def move_to_scope(self): From 9cc8deb751b322bb94104154ec23505f80f12b7e Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 27 Jul 2022 07:59:25 +0200 Subject: [PATCH 064/133] Add method from Cgroups classes also to abstract base class --- benchexec/cgroups.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 1593be6e4..95abae76f 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -316,6 +316,10 @@ def move_to_scope(self): def add_task(self, pid): pass + @abstractmethod + def kill_all_tasks(self): + pass + @abstractmethod def create_fresh_child_cgroup(self, subsystems, move_to_child=False): pass @@ -395,6 +399,9 @@ def move_to_scope(self): def add_task(self, pid): pass + def kill_all_tasks(self): + pass + def create_fresh_child_cgroup(self, subsystems, move_to_child=False): pass From 885e12ca1eba1cab0590f13e2ce58c7183f7f6ed Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 27 Jul 2022 08:04:16 +0200 Subject: [PATCH 065/133] Remove special casing for cgroup versions from RunExecutor The cgroups classes should abstract away from the differences, including faking a controller like "freezer" if it is no longer a separate controller on v2. Furthermore, we cannot just assume that these features are always usable on v2 systems because it can still happen that we do not have a usable cgroup at all. --- benchexec/runexecutor.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index f9e1232a1..424d7d5b4 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -348,18 +348,16 @@ def _init_cgroups(self): logging.debug("Cannot measure I/O without blkio cgroup.") self.cgroups.require_subsystem(self.cgroups.CPU) - if self.cgroups.CPU not in self.cgroups and self.cgroups.version == 1: + if self.cgroups.CPU not in self.cgroups: logging.warning("Cannot measure CPU time without cpuacct cgroup.") - # only a real subsystem in v1 - if self.cgroups.version == 1: - self.cgroups.require_subsystem(self.cgroups.FREEZE) - if self.cgroups.FREEZE not in self.cgroups and not self._use_namespaces: - critical_cgroups.add(self.cgroups.FREEZE) - logging.error( - "Cannot reliably kill sub-processes without freezer cgroup " - "or container mode. Please enable at least one of them." - ) + self.cgroups.require_subsystem(self.cgroups.FREEZE) + if self.cgroups.FREEZE not in self.cgroups and not self._use_namespaces: + critical_cgroups.add(self.cgroups.FREEZE) + logging.error( + "Cannot reliably kill sub-processes without freezer cgroup " + "or container mode. Please enable at least one of them." + ) self.cgroups.require_subsystem(self.cgroups.MEMORY) if self.cgroups.MEMORY not in self.cgroups: @@ -1015,7 +1013,7 @@ def _get_cgroup_measurements(self, cgroups, ru_child, result): cputime_wait = ru_child.ru_utime + ru_child.ru_stime if ru_child else 0 cputime_cgroups = None - if cgroups.CPU in cgroups or cgroups.version == 2: # always possible in v2 + if cgroups.CPU in cgroups: # We want to read the value from the cgroup. # The documentation warns about outdated values. # So we read twice with 0.1s time difference, From 86ff57bcbba3dfd76c7eff7446515822c82f8b55 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 27 Jul 2022 08:28:09 +0200 Subject: [PATCH 066/133] Make some methods on CgroupsV2 easier to use For compatibility with v1 they need to take parameters, but on v2 systems the parameter is actually useless. --- benchexec/cgroupsv2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 3cbc0df1f..1aa31e245 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -243,7 +243,7 @@ def add_task(self, pid): with open(self.path / "cgroup.procs", "w") as tasksFile: tasksFile.write(str(pid)) - def get_all_tasks(self, subsystem): + def get_all_tasks(self, subsystem=None): """ Return a generator of all PIDs currently in this cgroup for the given subsystem. """ @@ -343,7 +343,8 @@ def read_io_stat(self): bytes_written += int(stats_map["wbytes"]) return bytes_read, bytes_written - def has_tasks(self, path): + def has_tasks(self, path=None): + path = path or self.path return bool((path / "cgroup.procs").read_bytes().strip()) def write_memory_limit(self, limit): From 9729ef56b1d88fb4f09d14420c52dd9823abcfa8 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 27 Jul 2022 08:27:13 +0200 Subject: [PATCH 067/133] Rewrite main logic for initializing cgroups There were at least two problems: - Deeply nested cgroups created in tests due to move_to_scope() there. - runexec not working without manual cgroup creation like benchexec. We need to have some global initialization logic that works for all circumstances and which is safe to be called more than once without creating lots of deeply nested cgroups in order to solve this. It even needs to be thread safe for concurrent callers of RunExecutor. What is missing in this version is proper error handling: For example, if we cannot move the process to a new systemd scope, we do not want to produce an error immediately because cgroups might actually be optional. So we need to produce the error only later if we find out that we actually require cgroups, but then the information about why we cannot have cgroups is lost. So for now we just get a generic error message and not one showing the reason why cgroups are unusable. This will have to be implemented. --- benchexec/cgroups.py | 52 +++++++++++-- benchexec/cgroupsv1.py | 3 - benchexec/cgroupsv2.py | 136 +++++++++++++++++++++++++--------- benchexec/check_cgroups.py | 3 +- benchexec/localexecution.py | 3 +- benchexec/runexecutor.py | 2 +- benchexec/test_runexecutor.py | 3 +- benchexec/util.py | 2 +- 8 files changed, 152 insertions(+), 52 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 95abae76f..3ce894214 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -75,6 +75,51 @@ def _get_cgroup_version(): class Cgroups(ABC): + """ + A representation of a cgroup that attempts to abstract away the differences + between cgroups v1 and v2. + The typical way to get a usable instance is to call initialize(). + """ + + @staticmethod + def initialize(): + """ + Try to find or create a usable cgroup and return a Cgroups instance + that represents it. + + Calling this method may have an effect on the cgroup of the current process, + e.g., it may be moved to a different cgroup. + This will likely cause problems if other non-BenchExec components + are also using cgroups in the same process. + Even though it may change the cgroup state of the process, + this method is safe to call more than once and it is expected that later calls + do not produce further changes. + + The returned cgroup may or may not have child cgroups + and the current process may or may not be contained in the returned cgroup + or one of its children. + + This method cannot guarantee that a usable cgroup is found, + but it will always return a Cgroups instance. + Call require_subsystem() on it in order to find out which subsystems (if any) + are usable. + + Typically, callers should use the returned cgroup instance only for creating + child cgroups and not call any other modifying method such as add_task(). + """ + version = _get_cgroup_version() + if version == CGROUPS_V1: + from .cgroupsv1 import CgroupsV1 + + return CgroupsV1() + + elif version == CGROUPS_V2: + from .cgroupsv2 import initialize + + return initialize() + + return Cgroups.dummy() + @staticmethod def from_system(cgroup_procinfo=None, fallback=True, initial_cgroup=False): version = _get_cgroup_version() @@ -308,10 +353,6 @@ def known_subsystems(self): def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): pass - @abstractmethod - def move_to_scope(self): - pass - @abstractmethod def add_task(self, pid): pass @@ -393,9 +434,6 @@ def known_subsystems(self): def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): return set() - def move_to_scope(self): - pass - def add_task(self, pid): pass diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index e831df85b..ae717b0d9 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -251,9 +251,6 @@ def copy_parent_to_child(name): return CgroupsV1(createdCgroupsPerSubsystem) - def move_to_scope(self): - logging.debug("moving to scope currently not supported for cgroups v1") - def add_task(self, pid): """ Add a process to the cgroups represented by this instance. diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 1aa31e245..ba9aa76b8 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -11,6 +11,7 @@ import secrets import signal import tempfile +import threading import time @@ -21,6 +22,104 @@ uid = os.getuid() CGROUP_NAME_PREFIX = "benchmark_" +# Global state that stores the cgroup we have prepared for use. +# Global state is not nice, but here we have to use it because during cgroup +# initialization we have to move the current process into a cgroup, +# and this is inherently global state (because it affects the whole process). +# So we need to know whether we have done this already or not. +_usable_cgroup = None +_usable_cgroup_lock = threading.Lock() + + +def initialize(): + """ + Attempt to get a usable cgroup. + This may involve moving the current process into a different cgroup, + but this method is idempotent. + """ + global _usable_cgroup + if _usable_cgroup: + return _usable_cgroup + + with _usable_cgroup_lock: + if _usable_cgroup: + return _usable_cgroup + + cgroup = CgroupsV2() + + allowed_pids = set(util.get_pgrp_pids(os.getpgid(0))) + if set(cgroup.get_all_tasks()) <= allowed_pids: + # If we are the only process, somebody prepared a cgroup for us. Use it. + logging.debug("BenchExec was started in its own cgroup: %s", cgroup) + + elif _create_systemd_scope_for_us(): + # If we can create a systemd scope for us and move ourselves in it, + # we have a usable cgroup afterwards. + cgroup = CgroupsV2() + + else: + # No usable cgroup. We might still be able to continue if we actually + # do not require cgroups for benchmarking. So we do not fail here + # but return an instance that will on produce an error later. + return CgroupsV2({}) + + # Now we are the only process in this cgroup. In order to make it usable for + # benchmarking, we need to move ourselves into a child cgroup. + child_cgroup = cgroup.create_fresh_child_cgroup( + cgroup.subsystems.keys(), move_to_child=True + ) + assert child_cgroup.has_tasks() + assert not cgroup.has_tasks() + + _usable_cgroup = cgroup + + return _usable_cgroup + + +def _create_systemd_scope_for_us(): + """ + Attempt to create a systemd scope for us (with pystemd). + If it works this process is moved into the fresh scope. + + TODO: We should probably also move our child processes to the scope. + + @return: a boolean indicating whether this succeeded + """ + try: + from pystemd.dbuslib import DBus + from pystemd.dbusexc import DBusFileNotFoundError + from pystemd.systemd1 import Manager, Unit + + with DBus(user_mode=True) as bus, Manager(bus=bus) as manager: + unit_params = { + # workaround for not declared parameters, remove in the future + b"_custom": (b"PIDs", b"au", [os.getpid()]), + b"Delegate": True, + } + + random_suffix = secrets.token_urlsafe(8) + name = f"benchexec_{random_suffix}.scope".encode() + manager.Manager.StartTransientUnit(name, b"fail", unit_params) + # StartTransientUnit is async, so we need to ensure it has finished + # and moved our process before we continue. + # We might need a loop here (so far it always seems to work without, + # maybe systemd serializes this request with the unit creation). + with Unit(name, bus=bus) as unit: + assert unit.LoadState == b"loaded" + assert unit.ActiveState == b"active" + assert unit.SubState == b"running" + # Cgroup path would be accessible as unit.ControlGroup if we need it. + + logging.debug("Process moved to a fresh systemd scope: %s", name.decode()) + return True + + except ImportError: + logging.debug("pystemd could not be imported.") + except DBusFileNotFoundError as e: + logging.debug("No user DBus found, not using pystemd: %s", e) + + return False + def _find_cgroup_mount(): """ @@ -156,6 +255,9 @@ def create_fresh_child_cgroup(self, subsystems, move_to_child=False): subsystems = set(subsystems) assert subsystems.issubset(self.subsystems.keys()) + if not subsystems: + return Cgroups.dummy() + tasks = set(util.read_file(self.path / "cgroup.procs").split()) if tasks and not move_to_child: raise BenchExecException( @@ -202,40 +304,6 @@ def create_fresh_child_cgroup(self, subsystems, move_to_child=False): child_subsystems.add(self.KILL) return CgroupsV2({c: child_path for c in child_subsystems}) - def move_to_scope(self): - logging.debug("Moving runexec main process to scope") - - pids = util.get_pgrp_pids(os.getpgid(0)) - try: - from pystemd.dbuslib import DBus - from pystemd.dbusexc import DBusFileNotFoundError - from pystemd.systemd1 import Manager, Unit - - with DBus(user_mode=True) as bus, Manager(bus=bus) as manager: - unit_params = { - # workaround for not declared parameters, remove in the future - b"_custom": (b"PIDs", b"au", [int(p) for p in pids]), - b"Delegate": True, - } - - random_suffix = secrets.token_urlsafe(8) - name = f"benchexec_{random_suffix}.scope".encode() - manager.Manager.StartTransientUnit(name, b"fail", unit_params) - - with Unit(name, bus=bus): - self.subsystems = self._supported_subsystems() - self.paths = set(self.subsystems.values()) - self.path = next(iter(self.subsystems.values())) - logging.debug( - f"moved to scope {name}, subsystems: {self.subsystems}" - ) - except ImportError: - logging.warn("pystemd could not be imported") - except DBusFileNotFoundError: - logging.warn("no user DBus found, not using pystemd") - - self.create_fresh_child_cgroup(self.subsystems.keys(), move_to_child=True) - def add_task(self, pid): """ Add a process to the cgroups represented by this instance. diff --git a/benchexec/check_cgroups.py b/benchexec/check_cgroups.py index e210eb924..05fe01306 100644 --- a/benchexec/check_cgroups.py +++ b/benchexec/check_cgroups.py @@ -139,8 +139,7 @@ def main(argv=None): options = parser.parse_args(argv[1:]) - cgroups = Cgroups.from_system(initial_cgroup=True) - cgroups.move_to_scope() + Cgroups.initialize() if options.no_thread: check_cgroup_availability(options.wait) diff --git a/benchexec/localexecution.py b/benchexec/localexecution.py index 0174ffc40..0fd789595 100644 --- a/benchexec/localexecution.py +++ b/benchexec/localexecution.py @@ -69,8 +69,7 @@ def execute_benchmark(benchmark, output_handler): "only resource limits are used." ) - my_cgroups = Cgroups.from_system(initial_cgroup=True) - my_cgroups.move_to_scope() + my_cgroups = Cgroups.initialize() required_cgroups = set() coreAssignment = None # cores per run diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index 424d7d5b4..fb34b925b 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -330,7 +330,7 @@ def _init_cgroups(self): """ This function initializes the cgroups for the limitations and measurements. """ - self.cgroups = Cgroups.from_system() + self.cgroups = Cgroups.initialize() critical_cgroups = set() for subsystem in self._cgroup_subsystems: diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index f30c0b787..058d4f693 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -44,8 +44,7 @@ def setUpClass(cls): if not hasattr(cls, "assertRegex"): cls.assertRegex = cls.assertRegexpMatches - cls.cgroups = Cgroups.from_system(initial_cgroup=True) - cls.cgroups.move_to_scope() + cls.cgroups = Cgroups.initialize() def setUp(self, *args, **kwargs): with self.skip_if_logs( diff --git a/benchexec/util.py b/benchexec/util.py index 27751800a..a1761408f 100644 --- a/benchexec/util.py +++ b/benchexec/util.py @@ -778,7 +778,7 @@ def get_pgrp_pids(pgid): elif key == "NSpgid:": status_pgid = value if pgid == int(status_pgid): - pids.append(pid.strip()) + pids.append(int(pid)) except OSError: # ignore race conditions with processes disappearing # they aren't interesting to us anyway as processes From a2eabf164c42a37a9aff07242206e3fd3a0c9d2c Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 27 Jul 2022 11:31:57 +0200 Subject: [PATCH 068/133] Make check_cgroups work properly for cgroups v2 We need to be able to parse a /proc/self/cgroup content that is given to us from a different source. --- benchexec/cgroups.py | 17 ++++++++--------- benchexec/cgroupsv2.py | 15 +++++++++++---- benchexec/check_cgroups.py | 12 +++++------- 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 3ce894214..ad2a1d64f 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -121,22 +121,21 @@ def initialize(): return Cgroups.dummy() @staticmethod - def from_system(cgroup_procinfo=None, fallback=True, initial_cgroup=False): + def from_system(cgroup_procinfo=None): + """ + Create a cgroups instance representing the current cgroup of the process. + + @param cgroup_procinfo: Optional, if given use this instead of /proc/self/cgroup + """ version = _get_cgroup_version() if version == CGROUPS_V1: from .cgroupsv1 import CgroupsV1 - return CgroupsV1(cgroup_procinfo=cgroup_procinfo, fallback=fallback) + return CgroupsV1(cgroup_procinfo=cgroup_procinfo, fallback=False) elif version == CGROUPS_V2: from .cgroupsv2 import CgroupsV2 - cgroups = CgroupsV2(cgroup_procinfo=cgroup_procinfo, fallback=fallback) - if not initial_cgroup: - cgroups.create_fresh_child_cgroup( - cgroups.subsystems.keys(), move_to_child=True - ) - - return cgroups + return CgroupsV2(cgroup_procinfo=cgroup_procinfo, fallback=False) raise BenchExecException("Could not detect Cgroup Version") diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index ba9aa76b8..f396e769b 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -230,10 +230,17 @@ def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): logging.debug( "Analyzing /proc/mounts and /proc/self/cgroup to determine cgroups." ) - cgroup_path = _find_own_cgroups() - - with open(cgroup_path / "cgroup.controllers") as subsystems_file: - subsystems = set(subsystems_file.readline().strip().split()) + if cgroup_procinfo is None: + cgroup_path = _find_own_cgroups() + else: + cgroup_path = _parse_proc_pid_cgroup(cgroup_procinfo) + + try: + with open(cgroup_path / "cgroup.controllers") as subsystems_file: + subsystems = set(subsystems_file.readline().strip().split()) + except OSError: + # happens if we parse cgroup_procinfo of a deleted cgroup for check_cgroups + subsystems = set() # introduced in 5.14 if (cgroup_path / "cgroup.kill").exists(): diff --git a/benchexec/check_cgroups.py b/benchexec/check_cgroups.py index 05fe01306..143e535db 100644 --- a/benchexec/check_cgroups.py +++ b/benchexec/check_cgroups.py @@ -57,15 +57,13 @@ def check_cgroup_availability(wait=1): and not all(c == "-" for c in line) ): lines.append(line) - task_cgroups = Cgroups.from_system(cgroup_procinfo=lines, fallback=False) + task_cgroups = Cgroups.from_system(cgroup_procinfo=lines) fail = False - for subsystem in ( - my_cgroups.CPU, - my_cgroups.CPUSET, - my_cgroups.MEMORY, - my_cgroups.FREEZE, - ): + expected_subsystems = [my_cgroups.FREEZE] + if my_cgroups.version == 1: + expected_subsystems += [my_cgroups.CPU, my_cgroups.CPUSET, my_cgroups.MEMORY] + for subsystem in expected_subsystems: if subsystem in my_cgroups: if not str(task_cgroups[subsystem]).startswith(str(my_cgroups[subsystem])): logging.warning( From da642c92796627e103f23cdcaa1504e2b1134b6a Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 27 Jul 2022 11:50:12 +0200 Subject: [PATCH 069/133] Refactoring: Move fields and methods from instance to class for Cgroups These are all things that do not actually relate to or use a specific instance. --- benchexec/cgroups.py | 15 ++++------ benchexec/cgroupsv1.py | 62 +++++++++++++++++++++--------------------- benchexec/cgroupsv2.py | 55 ++++++++++++++++++------------------- 3 files changed, 63 insertions(+), 69 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index ad2a1d64f..28cedeb12 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -343,13 +343,9 @@ def remove(self): del self.paths del self.subsystems - @property + @classmethod @abstractmethod - def known_subsystems(self): - pass - - @abstractmethod - def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): + def _supported_subsystems(cls, cgroup_procinfo=None, fallback=True): pass @abstractmethod @@ -426,11 +422,10 @@ class _DummyCgroups(Cgroups): FREEZE = "freezer" MEMORY = "memory" - @property - def known_subsystems(self): - return set() + known_subsystems = set() - def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): + @classmethod + def _supported_subsystems(cls, cgroup_procinfo=None, fallback=True): return set() def add_task(self, pid): diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index ae717b0d9..465c0c5b2 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -128,37 +128,36 @@ def _register_process_with_cgrulesengd(pid): class CgroupsV1(Cgroups): - def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): - self.version = 1 - - self.IO = "blkio" - self.CPU = "cpuacct" - self.CPUSET = "cpuset" - self.FREEZE = "freezer" - self.MEMORY = "memory" + version = 1 + + IO = "blkio" + CPU = "cpuacct" + CPUSET = "cpuset" + FREEZE = "freezer" + MEMORY = "memory" + + known_subsystems = { + # cgroups for BenchExec + IO, + CPU, + CPUSET, + FREEZE, + MEMORY, + # other cgroups users might want + "cpu", + "devices", + "net_cls", + "net_prio", + "hugetlb", + "perf_event", + "pids", + } + def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): super(CgroupsV1, self).__init__(subsystems, cgroup_procinfo, fallback) - @property - def known_subsystems(self): - return { - # cgroups for BenchExec - self.IO, - self.CPU, - self.CPUSET, - self.FREEZE, - self.MEMORY, - # other cgroups users might want - "cpu", - "devices", - "net_cls", - "net_prio", - "hugetlb", - "perf_event", - "pids", - } - - def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): + @classmethod + def _supported_subsystems(cls, cgroup_procinfo=None, fallback=True): """ Return a Cgroup object with the cgroups of the current process. Note that it is not guaranteed that all subsystems are available @@ -179,7 +178,7 @@ def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): my_cgroups = dict(_parse_proc_pid_cgroup(cgroup_procinfo)) cgroupsParents = {} - for subsystem, mount in self._find_cgroup_mounts(): + for subsystem, mount in cls._find_cgroup_mounts(): # Ignore mount points where we do not have any access, # e.g. because a parent directory has insufficient permissions # (lxcfs mounts cgroups under /run/lxcfs in such a way). @@ -196,7 +195,8 @@ def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): return cgroupsParents - def _find_cgroup_mounts(self): + @classmethod + def _find_cgroup_mounts(cls): """ Return the information which subsystems are mounted where. @return a generator of tuples (subsystem, mountpoint) @@ -209,7 +209,7 @@ def _find_cgroup_mounts(self): mountpoint = pathlib.Path(mount[1]) options = mount[3] for option in options.split(","): - if option in self.known_subsystems: + if option in cls.known_subsystems: yield (option, mountpoint) except OSError: logging.exception("Cannot read /proc/mounts") diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index f396e769b..5d4ee708a 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -195,38 +195,37 @@ def kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): class CgroupsV2(Cgroups): - def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): - self.version = 2 - - self.IO = "io" - self.CPU = "cpu" - self.CPUSET = "cpuset" - self.MEMORY = "memory" - self.PID = "pids" - self.FREEZE = "freeze" - self.KILL = "kill" + version = 2 + + IO = "io" + CPU = "cpu" + CPUSET = "cpuset" + MEMORY = "memory" + PID = "pids" + FREEZE = "freeze" + KILL = "kill" + + known_subsystems = { + # cgroups for BenchExec + IO, + CPU, + CPUSET, + MEMORY, + PID, + # not really a subsystem anymore, but implicitly supported + FREEZE, + KILL, + } + def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): super(CgroupsV2, self).__init__(subsystems, cgroup_procinfo, fallback) self.path = ( next(iter(self.subsystems.values())) if len(self.subsystems) else None ) - @property - def known_subsystems(self): - return { - # cgroups for BenchExec - self.IO, - self.CPU, - self.CPUSET, - self.MEMORY, - self.PID, - # not really a subsystem anymore, but implicitly supported - self.FREEZE, - self.KILL, - } - - def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): + @classmethod + def _supported_subsystems(cls, cgroup_procinfo=None, fallback=True): logging.debug( "Analyzing /proc/mounts and /proc/self/cgroup to determine cgroups." ) @@ -244,13 +243,13 @@ def _supported_subsystems(self, cgroup_procinfo=None, fallback=True): # introduced in 5.14 if (cgroup_path / "cgroup.kill").exists(): - subsystems.add(self.KILL) + subsystems.add(cls.KILL) # always supported in v2 - subsystems.add(self.FREEZE) + subsystems.add(cls.FREEZE) # basic support always available in v2, this supports everything we use - subsystems.add(self.CPU) + subsystems.add(cls.CPU) return {k: cgroup_path for k in subsystems} From 3e47e4bbbb436724d84418c0d32cd2c68c8169dc Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 27 Jul 2022 12:03:16 +0200 Subject: [PATCH 070/133] Refactoring: simplify creation of Cgroups instances The constructor of the abstract base class had to call a method of the overriding class in order to find out the usable subsystems, so the call graph was difficult to follow (subclass constructor calling the super class constructor which in turn calls a subclass method). With a factory method this can be simplified at lot. --- benchexec/cgroups.py | 22 +++++----------------- benchexec/cgroupsv1.py | 7 ++----- benchexec/cgroupsv2.py | 12 ++++++------ 3 files changed, 13 insertions(+), 28 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 28cedeb12..479d5bad3 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -111,7 +111,7 @@ def initialize(): if version == CGROUPS_V1: from .cgroupsv1 import CgroupsV1 - return CgroupsV1() + return CgroupsV1.from_system() elif version == CGROUPS_V2: from .cgroupsv2 import initialize @@ -131,11 +131,11 @@ def from_system(cgroup_procinfo=None): if version == CGROUPS_V1: from .cgroupsv1 import CgroupsV1 - return CgroupsV1(cgroup_procinfo=cgroup_procinfo, fallback=False) + return CgroupsV1.from_system(cgroup_procinfo, fallback=False) elif version == CGROUPS_V2: from .cgroupsv2 import CgroupsV2 - return CgroupsV2(cgroup_procinfo=cgroup_procinfo, fallback=False) + return CgroupsV2.from_system(cgroup_procinfo) raise BenchExecException("Could not detect Cgroup Version") @@ -143,11 +143,8 @@ def from_system(cgroup_procinfo=None): def dummy(): return _DummyCgroups({}) - def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): - if subsystems is None: - self.subsystems = self._supported_subsystems(cgroup_procinfo, fallback) - else: - self.subsystems = subsystems + def __init__(self, subsystems): + self.subsystems = subsystems assert set(self.subsystems.keys()) <= self.known_subsystems assert all(self.subsystems.values()) @@ -343,11 +340,6 @@ def remove(self): del self.paths del self.subsystems - @classmethod - @abstractmethod - def _supported_subsystems(cls, cgroup_procinfo=None, fallback=True): - pass - @abstractmethod def add_task(self, pid): pass @@ -424,10 +416,6 @@ class _DummyCgroups(Cgroups): known_subsystems = set() - @classmethod - def _supported_subsystems(cls, cgroup_procinfo=None, fallback=True): - return set() - def add_task(self, pid): pass diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 465c0c5b2..be89f8e63 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -153,11 +153,8 @@ class CgroupsV1(Cgroups): "pids", } - def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): - super(CgroupsV1, self).__init__(subsystems, cgroup_procinfo, fallback) - @classmethod - def _supported_subsystems(cls, cgroup_procinfo=None, fallback=True): + def from_system(cls, cgroup_procinfo=None, fallback=True): """ Return a Cgroup object with the cgroups of the current process. Note that it is not guaranteed that all subsystems are available @@ -193,7 +190,7 @@ def _supported_subsystems(cls, cgroup_procinfo=None, fallback=True): cgroupPath = fallbackPath cgroupsParents[subsystem] = cgroupPath - return cgroupsParents + return cls(cgroupsParents) @classmethod def _find_cgroup_mounts(cls): diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 5d4ee708a..e9cfdac81 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -45,7 +45,7 @@ def initialize(): if _usable_cgroup: return _usable_cgroup - cgroup = CgroupsV2() + cgroup = CgroupsV2.from_system() allowed_pids = set(util.get_pgrp_pids(os.getpgid(0))) if set(cgroup.get_all_tasks()) <= allowed_pids: @@ -55,7 +55,7 @@ def initialize(): elif _create_systemd_scope_for_us(): # If we can create a systemd scope for us and move ourselves in it, # we have a usable cgroup afterwards. - cgroup = CgroupsV2() + cgroup = CgroupsV2.from_system() else: # No usable cgroup. We might still be able to continue if we actually @@ -217,15 +217,15 @@ class CgroupsV2(Cgroups): KILL, } - def __init__(self, subsystems=None, cgroup_procinfo=None, fallback=True): - super(CgroupsV2, self).__init__(subsystems, cgroup_procinfo, fallback) + def __init__(self, subsystems): + super(CgroupsV2, self).__init__(subsystems) self.path = ( next(iter(self.subsystems.values())) if len(self.subsystems) else None ) @classmethod - def _supported_subsystems(cls, cgroup_procinfo=None, fallback=True): + def from_system(cls, cgroup_procinfo=None): logging.debug( "Analyzing /proc/mounts and /proc/self/cgroup to determine cgroups." ) @@ -251,7 +251,7 @@ def _supported_subsystems(cls, cgroup_procinfo=None, fallback=True): # basic support always available in v2, this supports everything we use subsystems.add(cls.CPU) - return {k: cgroup_path for k in subsystems} + return cls({k: cgroup_path for k in subsystems}) def create_fresh_child_cgroup(self, subsystems, move_to_child=False): """ From 873759c2a56535e0cf00d0c2c9b04d174c36d375 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 27 Jul 2022 16:11:40 +0200 Subject: [PATCH 071/133] Remove restriction to well-known controllers for cgroups v2 This would crash if somebody calls us in a cgroup with controllers enabled that we do not know. For cgroups v1 we have this check because unknown controllers can do weird things and it is not in general safe or even possible to ignore them. However, controllers for cgroup v2 are expected to be more well behaved. --- benchexec/cgroups.py | 3 --- benchexec/cgroupsv1.py | 4 ++++ benchexec/cgroupsv2.py | 12 ------------ 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 479d5bad3..a6eff5ef4 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -146,7 +146,6 @@ def dummy(): def __init__(self, subsystems): self.subsystems = subsystems - assert set(self.subsystems.keys()) <= self.known_subsystems assert all(self.subsystems.values()) self.paths = set(self.subsystems.values()) # without duplicates @@ -414,8 +413,6 @@ class _DummyCgroups(Cgroups): FREEZE = "freezer" MEMORY = "memory" - known_subsystems = set() - def add_task(self, pid): pass diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index be89f8e63..4acafbee9 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -153,6 +153,10 @@ class CgroupsV1(Cgroups): "pids", } + def __init__(self, subsystems): + assert set(subsystems.keys()) <= self.known_subsystems + super(CgroupsV1, self).__init__(subsystems) + @classmethod def from_system(cls, cgroup_procinfo=None, fallback=True): """ diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index e9cfdac81..19f024756 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -205,18 +205,6 @@ class CgroupsV2(Cgroups): FREEZE = "freeze" KILL = "kill" - known_subsystems = { - # cgroups for BenchExec - IO, - CPU, - CPUSET, - MEMORY, - PID, - # not really a subsystem anymore, but implicitly supported - FREEZE, - KILL, - } - def __init__(self, subsystems): super(CgroupsV2, self).__init__(subsystems) From 84535f872c79723601fcbe1c6869085171d264e7 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 27 Jul 2022 16:18:26 +0200 Subject: [PATCH 072/133] Refactor CgroupsV2.create_fresh_child_cgroup() The handling of move_to_child=True is needed only in one specific case, so we can move it there and simplify the method. Furthermore, we can simplify the handling of delegating controllers, because if a cgroup has processes, we know it has no controllers. So we do not need to remove controllers before creating the child. --- benchexec/cgroups.py | 4 +-- benchexec/cgroupsv2.py | 58 +++++++++++++----------------------------- 2 files changed, 20 insertions(+), 42 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index a6eff5ef4..15b4bbd44 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -348,7 +348,7 @@ def kill_all_tasks(self): pass @abstractmethod - def create_fresh_child_cgroup(self, subsystems, move_to_child=False): + def create_fresh_child_cgroup(self, subsystems): pass @abstractmethod @@ -419,7 +419,7 @@ def add_task(self, pid): def kill_all_tasks(self): pass - def create_fresh_child_cgroup(self, subsystems, move_to_child=False): + def create_fresh_child_cgroup(self, subsystems): pass def read_max_mem_usage(self): diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 19f024756..a5f4b197f 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -66,11 +66,22 @@ def initialize(): # Now we are the only process in this cgroup. In order to make it usable for # benchmarking, we need to move ourselves into a child cgroup. child_cgroup = cgroup.create_fresh_child_cgroup( - cgroup.subsystems.keys(), move_to_child=True + cgroup.subsystems.keys(), prefix="benchexec_process_" ) + for pid in cgroup.get_all_tasks(): + child_cgroup.add_task(pid) assert child_cgroup.has_tasks() assert not cgroup.has_tasks() + # Now that the cgroup is empty, we can enable controller delegation. + # We enable all controllers, even those that we do not need ourselves, + # in order to allow nesting of other cgroup-using software. + controllers = util.read_file(cgroup.path / "cgroup.controllers").split() + util.write_file( + " ".join(f"+{c}" for c in controllers), + cgroup.path / "cgroup.subtree_control", + ) + _usable_cgroup = cgroup return _usable_cgroup @@ -241,7 +252,7 @@ def from_system(cls, cgroup_procinfo=None): return cls({k: cgroup_path for k in subsystems}) - def create_fresh_child_cgroup(self, subsystems, move_to_child=False): + def create_fresh_child_cgroup(self, subsystems, prefix=CGROUP_NAME_PREFIX): """ Create child cgroups of the current cgroup for at least the given subsystems. @return: A Cgroup instance representing the new child cgroup(s). @@ -252,50 +263,17 @@ def create_fresh_child_cgroup(self, subsystems, move_to_child=False): if not subsystems: return Cgroups.dummy() - tasks = set(util.read_file(self.path / "cgroup.procs").split()) - if tasks and not move_to_child: - raise BenchExecException( - "Cannot create cgroups v2 child on non-empty parent without moving tasks" - ) - - allowed_pids = {str(p) for p in util.get_pgrp_pids(os.getpgid(0))} - if len(tasks) > 1 and not tasks <= allowed_pids and move_to_child: - raise BenchExecException( - "runexec must be the only running process in its cgroup. Either install pystemd " - "for benchexec to handle this itself, prefix the command with `systemd-run --user --scope -p Delegate=yes` " - "or otherwise prepare the cgroup hierarchy to make sure of this and the subtree being " - "writable by the executing user." - ) - - prefix = "runexec_main_" if move_to_child else CGROUP_NAME_PREFIX child_path = pathlib.Path(tempfile.mkdtemp(prefix=prefix, dir=self.path)) - if move_to_child and tasks: - prev_delegated_controllers = set( - util.read_file(self.path / "cgroup.subtree_control").split() - ) - for c in prev_delegated_controllers: - util.write_file(f"-{c}", self.path / "cgroup.subtree_control") - - for t in tasks: - try: - util.write_file(t, child_path / "cgroup.procs") - except OSError as e: - logging.warn(f"Could not move pid {t} to {child_path}: {e}") - - for c in prev_delegated_controllers: - util.write_file(f"+{c}", self.path / "cgroup.subtree_control") - - controllers = set(util.read_file(self.path / "cgroup.controllers").split()) - controllers_to_delegate = controllers & subsystems - - for c in controllers_to_delegate: - util.write_file(f"+{c}", self.path / "cgroup.subtree_control") + child_subsystems = set( + util.read_file(child_path / "cgroup.controllers").split() + ) # basic cpu controller support without being enabled - child_subsystems = controllers_to_delegate | {self.CPU, self.FREEZE} + child_subsystems |= {self.CPU, self.FREEZE} if self.KILL in self.subsystems: child_subsystems.add(self.KILL) + return CgroupsV2({c: child_path for c in child_subsystems}) def add_task(self, pid): From ddf5ad0c6fecde2b52f821ab3c465929a69161fe Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 27 Jul 2022 16:21:48 +0200 Subject: [PATCH 073/133] Do not throw BenchExecException from cgroups module BenchExecException is a high-level exception and not all users of the low-level cgroups module expect it. Furthermore, we do not want to stop immediately if cgroups are not available, maybe they are not strictly needed. Error handling will come later. --- benchexec/cgroups.py | 6 +----- benchexec/cgroupsv2.py | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 15b4bbd44..374e02991 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -14,7 +14,6 @@ import stat import sys -from benchexec import BenchExecException from benchexec import systeminfo from benchexec import util @@ -65,9 +64,6 @@ def _get_cgroup_version(): # we don't support crippled hybrid mode elif mount[2] == "cgroup2" and version != CGROUPS_V1: version = CGROUPS_V2 - - if version is None: - raise BenchExecException("Could not detect Cgroup Version") except OSError: logging.exception("Cannot read /proc/mounts") @@ -137,7 +133,7 @@ def from_system(cgroup_procinfo=None): return CgroupsV2.from_system(cgroup_procinfo) - raise BenchExecException("Could not detect Cgroup Version") + return Cgroups.dummy() @staticmethod def dummy(): diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index a5f4b197f..fa38be5c1 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -15,7 +15,7 @@ import time -from benchexec import util, BenchExecException +from benchexec import util from benchexec.cgroups import Cgroups From dc8adf0359e972bf65cad1c473748022c21f03c7 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 27 Jul 2022 16:32:04 +0200 Subject: [PATCH 074/133] More robust parsing of io.stat on cgroups v2 Sometimes the file contains lines with only the device numbers and no actual statistics. --- benchexec/cgroupsv2.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index fa38be5c1..3a69770ee 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -378,9 +378,11 @@ def read_io_stat(self): bytes_written = 0 for io_line in self.get_file_lines(self.IO, "stat"): dev_no, *stats = io_line.split(" ") - stats_map = {s[0]: s[1] for s in (s.split("=") for s in stats)} - bytes_read += int(stats_map["rbytes"]) - bytes_written += int(stats_map["wbytes"]) + stats_map = {s[0]: s[1] for s in (s.split("=") for s in stats if s)} + if "rbytes" in stats_map: + bytes_read += int(stats_map["rbytes"]) + if "wbytes" in stats_map: + bytes_written += int(stats_map["wbytes"]) return bytes_read, bytes_written def has_tasks(self, path=None): From f350ed62e59ce05773d1a4f43dcb3fd8e5405bc1 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 27 Jul 2022 16:47:38 +0200 Subject: [PATCH 075/133] Fix output of pressure stall time with cgroups v2 Because of the use of type float, for small values (which can occur here) the output would be in scientific notation, but we promise decimal notation. Furthermore, float can of course be imprecise. So we switch to the type Decimal and make sure it is printed properly. It would be good to also switch to Decimal for other time values, but we should make that change separately. --- benchexec/cgroupsv2.py | 9 +++++---- benchexec/runexecutor.py | 15 ++++++++++++--- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 3a69770ee..db2f3a136 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -13,7 +13,7 @@ import tempfile import threading import time - +from decimal import Decimal from benchexec import util from benchexec.cgroups import Cgroups @@ -334,6 +334,7 @@ def read_cputime(self): """ cpu_stats = dict(self.get_key_value_pairs(self.CPU, "stat")) + # TODO switch to Decimal together with all other float values return float(cpu_stats["usage_usec"]) / 1_000_000 def read_max_mem_usage(self): @@ -346,21 +347,21 @@ def read_mem_pressure(self): mem_some_stats = mem_stats["some"].split(" ") stats_map = {s[0]: s[1] for s in (s.split("=") for s in mem_some_stats)} - return float(stats_map["total"]) / 1_000_000 + return Decimal(stats_map["total"]) / 1_000_000 def read_cpu_pressure(self): cpu_stats = dict(self.get_key_value_pairs(self.CPU, "pressure")) cpu_some_stats = cpu_stats["some"].split(" ") stats_map = {s[0]: s[1] for s in (s.split("=") for s in cpu_some_stats)} - return float(stats_map["total"]) / 1_000_000 + return Decimal(stats_map["total"]) / 1_000_000 def read_io_pressure(self): io_stats = dict(self.get_key_value_pairs(self.IO, "pressure")) io_some_stats = io_stats["some"].split(" ") stats_map = {s[0]: s[1] for s in (s.split("=") for s in io_some_stats)} - return float(stats_map["total"]) / 1_000_000 + return Decimal(stats_map["total"]) / 1_000_000 def read_usage_per_cpu(self): logging.debug("Usage per CPU not supported in cgroups v2") diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index fb34b925b..6dd657500 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -8,6 +8,7 @@ import argparse import collections import datetime +import decimal import logging import multiprocessing import os @@ -27,6 +28,7 @@ from benchexec.filehierarchylimit import FileHierarchyLimitThread from benchexec import intel_cpu_energy from benchexec import oomhandler +from benchexec.tablegenerator.util import print_decimal from benchexec import resources from benchexec import systeminfo from benchexec import util @@ -276,12 +278,19 @@ def signal_handler_kill(signum, frame): # exit_code is a util.ProcessExitCode instance exit_code = cast(Optional[util.ProcessExitCode], result.pop("exitcode", None)) - def print_optional_result(key, unit="", format_fn=str): + def print_optional_result(key, unit=""): if key in result: - print(f"{key}={format_fn(result[key])}{unit}") + value = result[key] + if isinstance(value, decimal.Decimal): + format_fn = print_decimal + elif isinstance(value, datetime.datetime): + format_fn = datetime.datetime.isoformat + else: + format_fn = str + print(f"{key}={format_fn(value)}{unit}") # output results - print_optional_result("starttime", unit="", format_fn=datetime.datetime.isoformat) + print_optional_result("starttime", unit="") print_optional_result("terminationreason") if exit_code is not None and exit_code.value is not None: print(f"returnvalue={exit_code.value}") From 9fcf85ccc70fc5c834f3db93f866e78716b6bc69 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Tue, 30 Aug 2022 18:04:41 +0200 Subject: [PATCH 076/133] First step at better error handling for cgroups v2 As discussed in 4ef31426, we currently do not get error messages that explain why cgroups are not working (e.g., missing pystemd or something like this). The current error messages for all the different situations are only for cgroupsv1. This commit moves the existing error messages to cgroupsv1.py and adds new error messages for situations that can exist with cgroupsv2. It is likely that we need to add more cases or better explanations. --- benchexec/cgroups.py | 120 ++++----------------------------------- benchexec/cgroupsv1.py | 125 +++++++++++++++++++++++++++++++++++++++++ benchexec/cgroupsv2.py | 76 ++++++++++++++++++++++++- 3 files changed, 210 insertions(+), 111 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 374e02991..cf93ca3d5 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -6,50 +6,16 @@ # SPDX-License-Identifier: Apache-2.0 from abc import ABC, abstractmethod -import errno -import grp import logging import os import pathlib -import stat -import sys -from benchexec import systeminfo from benchexec import util CGROUPS_V1 = 1 CGROUPS_V2 = 2 -_PERMISSION_HINT_GROUPS = """ -You need to add your account to the following groups: {0} -Remember to logout and login again afterwards to make group changes effective.""" - -_PERMISSION_HINT_DEBIAN = """ -The recommended way to fix this is to install the Debian package for BenchExec and add your account to the group "benchexec": -https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#debianubuntu -Alternatively, you can install benchexec-cgroup.service manually: -https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" - -_PERMISSION_HINT_SYSTEMD = """ -The recommended way to fix this is to add your account to a group named "benchexec" and install benchexec-cgroup.service: -https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" - -_PERMISSION_HINT_OTHER = """ -Please configure your system in way to allow your user to use cgroups: -https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-without-systemd""" - -_ERROR_MSG_PERMISSIONS = """ -Required cgroups are not available because of missing permissions.{0} - -As a temporary workaround, you can also run -"sudo chmod o+wt {1}" -Note that this will grant permissions to more users than typically desired and it will only last until the next reboot.""" - -_ERROR_MSG_OTHER = """ -Required cgroups are not available. -If you are using BenchExec within a container, please make "/sys/fs/cgroup" available.""" - def _get_cgroup_version(): version = None @@ -146,10 +112,6 @@ def __init__(self, subsystems): self.paths = set(self.subsystems.values()) # without duplicates - # for error messages: - self.unusable_subsystems = set() - self.denied_subsystems = {} - logging.debug("Available Cgroups: %s", self.subsystems) def __contains__(self, key): @@ -233,7 +195,7 @@ def set_value(self, subsystem, option, value): str(value), self.subsystems[subsystem] / f"{subsystem}.{option}" ) - # FIXME improve message for v2 + @abstractmethod def require_subsystem(self, subsystem, log_method=logging.warning): """ Check whether the given subsystem is enabled and is writable @@ -243,36 +205,9 @@ def require_subsystem(self, subsystem, log_method=logging.warning): this instance such that further checks with "in" will return "False". @return A boolean value. """ - if subsystem not in self: - if subsystem not in self.unusable_subsystems: - self.unusable_subsystems.add(subsystem) - log_method( - "Cgroup subsystem %s is not available. " - "Please make sure it is supported by your kernel and mounted.", - subsystem, - ) - return False - - try: - test_cgroup = self.create_fresh_child_cgroup([subsystem]) - test_cgroup.remove() - except OSError as e: - log_method( - "Cannot use cgroup %s for subsystem %s, reason: %s (%s).", - self.subsystems[subsystem], - subsystem, - e.strerror, - e.errno, - ) - self.unusable_subsystems.add(subsystem) - if e.errno == errno.EACCES: - self.denied_subsystems[subsystem] = self.subsystems[subsystem] - del self.subsystems[subsystem] - self.paths = set(self.subsystems.values()) - return False - - return True + pass + @abstractmethod def handle_errors(self, critical_cgroups): """ If there were errors in calls to require_subsystem() and critical_cgroups @@ -281,48 +216,7 @@ def handle_errors(self, critical_cgroups): @param critical_cgroups: set of unusable but required cgroups """ - if not critical_cgroups: - return - assert critical_cgroups.issubset(self.unusable_subsystems) - - if critical_cgroups.issubset(self.denied_subsystems): - # All errors were because of permissions for these directories - paths = sorted(set(self.denied_subsystems.values())) - - # Check if all cgroups have group permissions and user could just be added - # to some groups to get access. But group 0 (root) of course does not count. - groups = {} - try: - if all(stat.S_IWGRP & path.stat().st_mode for path in paths): - groups = {path.stat().st_gid for path in paths} - except OSError: - pass - if groups and 0 not in groups: - - def get_group_name(gid): - try: - name = grp.getgrgid(gid).gr_name - except KeyError: - name = None - return util.escape_string_shell(name or str(gid)) - - groups = " ".join(sorted(set(map(get_group_name, groups)))) - permission_hint = _PERMISSION_HINT_GROUPS.format(groups) - - elif systeminfo.has_systemd(): - if systeminfo.is_debian(): - permission_hint = _PERMISSION_HINT_DEBIAN - else: - permission_hint = _PERMISSION_HINT_SYSTEMD - - else: - permission_hint = _PERMISSION_HINT_OTHER - - paths = " ".join([util.escape_string_shell(str(p)) for p in paths]) - sys.exit(_ERROR_MSG_PERMISSIONS.format(permission_hint, paths)) - - else: - sys.exit(_ERROR_MSG_OTHER) # e.g., subsystem not mounted + pass def remove(self): """ @@ -418,6 +312,12 @@ def kill_all_tasks(self): def create_fresh_child_cgroup(self, subsystems): pass + def require_subsystem(self, subsystem, log_method=logging.warning): + pass + + def handle_errors(self, critical_cgroups): + pass + def read_max_mem_usage(self): pass diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 4acafbee9..3ba44fa07 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -6,11 +6,13 @@ # SPDX-License-Identifier: Apache-2.0 import errno +import grp import logging import os import pathlib import shutil import signal +import stat import sys import tempfile import time @@ -32,6 +34,35 @@ _CGROUP_NAME_PREFIX = "benchmark_" +_PERMISSION_HINT_GROUPS = """ +You need to add your account to the following groups: {0} +Remember to logout and login again afterwards to make group changes effective.""" + +_PERMISSION_HINT_DEBIAN = """ +The recommended way to fix this is to install the Debian package for BenchExec and add your account to the group "benchexec": +https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#debianubuntu +Alternatively, you can install benchexec-cgroup.service manually: +https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" + +_PERMISSION_HINT_SYSTEMD = """ +The recommended way to fix this is to add your account to a group named "benchexec" and install benchexec-cgroup.service: +https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-with-systemd""" + +_PERMISSION_HINT_OTHER = """ +Please configure your system in way to allow your user to use cgroups: +https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#setting-up-cgroups-on-machines-without-systemd""" + +_ERROR_MSG_PERMISSIONS = """ +Required cgroups are not available because of missing permissions.{0} + +As a temporary workaround, you can also run +"sudo chmod o+wt {1}" +Note that this will grant permissions to more users than typically desired and it will only last until the next reboot.""" + +_ERROR_MSG_OTHER = """ +Required cgroups are not available. +If you are using BenchExec within a container, please make "/sys/fs/cgroup" available.""" + def _find_own_cgroups(): """ @@ -157,6 +188,10 @@ def __init__(self, subsystems): assert set(subsystems.keys()) <= self.known_subsystems super(CgroupsV1, self).__init__(subsystems) + # for error messages: + self.unusable_subsystems = set() + self.denied_subsystems = {} + @classmethod def from_system(cls, cgroup_procinfo=None, fallback=True): """ @@ -252,6 +287,96 @@ def copy_parent_to_child(name): return CgroupsV1(createdCgroupsPerSubsystem) + def require_subsystem(self, subsystem, log_method=logging.warning): + """ + Check whether the given subsystem is enabled and is writable + (i.e., new cgroups can be created for it). + Produces a log message for the user if one of the conditions is not fulfilled. + If the subsystem is enabled but not writable, it will be removed from + this instance such that further checks with "in" will return "False". + @return A boolean value. + """ + if subsystem not in self: + if subsystem not in self.unusable_subsystems: + self.unusable_subsystems.add(subsystem) + log_method( + "Cgroup subsystem %s is not available. " + "Please make sure it is supported by your kernel and mounted.", + subsystem, + ) + return False + + try: + test_cgroup = self.create_fresh_child_cgroup([subsystem]) + test_cgroup.remove() + except OSError as e: + log_method( + "Cannot use cgroup %s for subsystem %s, reason: %s (%s).", + self.subsystems[subsystem], + subsystem, + e.strerror, + e.errno, + ) + self.unusable_subsystems.add(subsystem) + if e.errno == errno.EACCES: + self.denied_subsystems[subsystem] = self.subsystems[subsystem] + del self.subsystems[subsystem] + self.paths = set(self.subsystems.values()) + return False + + return True + + def handle_errors(self, critical_cgroups): + """ + If there were errors in calls to require_subsystem() and critical_cgroups + is not empty, terminate the program with an error message that explains how to + fix the problem. + + @param critical_cgroups: set of unusable but required cgroups + """ + if not critical_cgroups: + return + assert critical_cgroups.issubset(self.unusable_subsystems) + + if critical_cgroups.issubset(self.denied_subsystems): + # All errors were because of permissions for these directories + paths = sorted(set(self.denied_subsystems.values())) + + # Check if all cgroups have group permissions and user could just be added + # to some groups to get access. But group 0 (root) of course does not count. + groups = {} + try: + if all(stat.S_IWGRP & path.stat().st_mode for path in paths): + groups = {path.stat().st_gid for path in paths} + except OSError: + pass + if groups and 0 not in groups: + + def get_group_name(gid): + try: + name = grp.getgrgid(gid).gr_name + except KeyError: + name = None + return util.escape_string_shell(name or str(gid)) + + groups = " ".join(sorted(set(map(get_group_name, groups)))) + permission_hint = _PERMISSION_HINT_GROUPS.format(groups) + + elif systeminfo.has_systemd(): + if systeminfo.is_debian(): + permission_hint = _PERMISSION_HINT_DEBIAN + else: + permission_hint = _PERMISSION_HINT_SYSTEMD + + else: + permission_hint = _PERMISSION_HINT_OTHER + + paths = " ".join([util.escape_string_shell(str(p)) for p in paths]) + sys.exit(_ERROR_MSG_PERMISSIONS.format(permission_hint, paths)) + + else: + sys.exit(_ERROR_MSG_OTHER) # e.g., subsystem not mounted + def add_task(self, pid): """ Add a process to the cgroups represented by this instance. diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index db2f3a136..e14c31ccf 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -10,14 +10,39 @@ import pathlib import secrets import signal +import sys import tempfile import threading import time from decimal import Decimal -from benchexec import util +from benchexec import systeminfo, util from benchexec.cgroups import Cgroups +_ERROR_MSG_UNKNOWN_SUBSYSTEMS = """ +The following cgroup subsystems were required but are not supported by this kernel: {}. +Please avoid their usage or enable them in the kernel.""" + +_ERROR_MSG_MISSING_SUBSYSTEMS = """ +The following cgroup subsystems were required but are not usable: {}. +Please enable them, e.g., by setting up delegation. +The cgroup that we attempted to use was: {}""" + +_ERROR_NO_SYSTEMD = """ +System is using cgroups v2 but not systemd. +If you are using BenchExec within a container, please ensure that cgroups are properly delegated into the container. +Otherwise please configure your system such that BenchExec can use cgroups.""" + +_ERROR_NO_PSYSTEMD = """ +BenchExec was not able to use cgroups. +Please either start it within a fresh systemd scope by prefixing your command line with + systemd-run --user --scope -p Delegate=yes +or install the Python library pystemd such that BenchExec can do this automatically.""" + +_ERROR_MSG_OTHER = """ +BenchExec was not able to use cgroups and did not manage to create a systemd scope. +Please ensure that we can connect to systemd via DBus or try starting BenchExec within a fresh systemd scope by prefixing your command line with + systemd-run --user --scope -p Delegate=yes""" uid = os.getuid() CGROUP_NAME_PREFIX = "benchmark_" @@ -276,6 +301,55 @@ def create_fresh_child_cgroup(self, subsystems, prefix=CGROUP_NAME_PREFIX): return CgroupsV2({c: child_path for c in child_subsystems}) + def require_subsystem(self, subsystem, log_method=logging.warning): + """ + Check whether the given subsystem is enabled and is writable + (i.e., new cgroups can be created for it). + Produces a log message for the user if one of the conditions is not fulfilled. + @return A boolean value. + """ + # TODO + # We can assume that creation of child cgroups works, + # because we only use cgroups if we were able to move the current process + # into a child cgroup in initialize(). + return subsystem in self + + def handle_errors(self, critical_cgroups): + """ + If there were errors in calls to require_subsystem() and critical_cgroups + is not empty, terminate the program with an error message that explains how to + fix the problem. + + @param critical_cgroups: set of unusable but required cgroups + """ + if not critical_cgroups: + return + + if self.subsystems: + # Some subsystems are available, but not the required ones. + # Check if it is a delegation problem or if some subsystems do not exist. + unknown_subsystems = set(critical_cgroups) + with open("/proc/cgroups", mode="r") as cgroups: + for line in cgroups: + if not line.startswith("#"): + unknown_subsystems.discard(line.split("\t", maxsplit=1)[0]) + if unknown_subsystems: + sys.exit(_ERROR_MSG_UNKNOWN_SUBSYSTEMS.format(', '.join(unknown_subsystems))) + else: + sys.exit(_ERROR_MSG_MISSING_SUBSYSTEMS.format(', '.join(critical_cgroups), self.path)) + + else: + # no cgroup available at all + if not systeminfo.has_systemd(): + sys.exit(_ERROR_NO_SYSTEMD) + + try: + import pystemd # noqa: F401 + except ImportError: + sys.exit(_ERROR_NO_PSYSTEMD) + else: + sys.exit(_ERROR_MSG_OTHER) + def add_task(self, pid): """ Add a process to the cgroups represented by this instance. From 5d442996f814e554f3a16682c8743f0626b3a4ec Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 15 Sep 2022 17:27:42 +0200 Subject: [PATCH 077/133] format --- benchexec/cgroupsv2.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index e14c31ccf..e0944ab6f 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -334,9 +334,15 @@ def handle_errors(self, critical_cgroups): if not line.startswith("#"): unknown_subsystems.discard(line.split("\t", maxsplit=1)[0]) if unknown_subsystems: - sys.exit(_ERROR_MSG_UNKNOWN_SUBSYSTEMS.format(', '.join(unknown_subsystems))) + sys.exit( + _ERROR_MSG_UNKNOWN_SUBSYSTEMS.format(", ".join(unknown_subsystems)) + ) else: - sys.exit(_ERROR_MSG_MISSING_SUBSYSTEMS.format(', '.join(critical_cgroups), self.path)) + sys.exit( + _ERROR_MSG_MISSING_SUBSYSTEMS.format( + ", ".join(critical_cgroups), self.path + ) + ) else: # no cgroup available at all From c4e447ce0ca4f6d2068f4e7f78cfb73f966f40bf Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 27 Feb 2023 16:41:53 +0100 Subject: [PATCH 078/133] Copy over protection for #840 to cgroupsv2 branch This only changes handling of cgroups on cgroupsv1 systems. The same improvements still need to be done for cgroupsv2. Effectively, this cherry-picks 7ac5aa5af33169339a367844a0d3806729f902cd 87e2bcb679e885025337f07ce515980f299c2bd6 a1515702ff0510be35da1abfdab884d59a6a253c b3462dc69049a863c1309b39fb65883eba335c33 and applies them to cgroupsv1.py --- benchexec/cgroupsv1.py | 77 ++++++++++++++++++++++++++++------- benchexec/test_runexecutor.py | 52 +++++++++++++++++++++++ 2 files changed, 115 insertions(+), 14 deletions(-) diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 3ba44fa07..7b345e212 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -95,7 +95,19 @@ def _parse_proc_pid_cgroup(content): yield (subsystem, path) -def _kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): +def _force_open_read(filename): + """ + Open a file for reading even if we have no read permission, + as long as we can grant it to us. + """ + try: + return open(filename, "rt") + except OSError: + os.chmod(filename, stat.S_IRUSR) + return open(filename, "rt") + + +def kill_all_tasks_in_cgroup(cgroup): tasksFile = cgroup / "tasks" i = 0 @@ -107,7 +119,7 @@ def _kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): for sig in [signal.SIGKILL, signal.SIGINT, signal.SIGTERM]: task = None try: - with open(tasksFile, "rt") as tasks: + with _force_open_read(tasksFile) as tasks: for task in tasks: task = task.strip() if i > 1: @@ -126,7 +138,7 @@ def _kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): tasksFile, ) - if task is None or not ensure_empty: + if task is None: return # No process was hanging, exit # wait for the process to exit, this might take some time time.sleep(i * 0.5) @@ -399,17 +411,40 @@ def kill_all_tasks(self): Kill all tasks in this cgroup and all its children cgroups forcefully. Additionally, the children cgroups will be deleted. """ + # In this method we should attempt to guard against child cgroups + # that have been created and manipulated by processes in the run. + # For example, they could have removed permissions from files and directories. - def kill_all_tasks_in_cgroup_recursively(cgroup, delete): - for dirpath, dirs, _files in os.walk(cgroup, topdown=False): - for subCgroup in dirs: - subCgroup = os.path.join(dirpath, subCgroup) - _kill_all_tasks_in_cgroup(subCgroup, ensure_empty=delete) - - if delete: - self._remove_cgroup(subCgroup) + def recursive_child_cgroups(cgroup): + def raise_error(e): + raise e - _kill_all_tasks_in_cgroup(cgroup, ensure_empty=delete) + try: + for dirpath, dirs, _files in os.walk( + cgroup, topdown=False, onerror=raise_error + ): + for subCgroup in dirs: + yield os.path.join(dirpath, subCgroup) + except OSError as e: + # some process might have made a child cgroup inaccessible + os.chmod(e.filename, stat.S_IRUSR | stat.S_IXUSR) + # restart, which might yield already yielded cgroups again, + # but this is ok for the callers of recursive_child_cgroups() + yield from recursive_child_cgroups(cgroup) + + def try_unfreeze(cgroup): + freezer_file = os.path.join(cgroup, "freezer.state") + try: + util.write_file("THAWED", freezer_file) + except OSError: + # Somebody could have fiddle with permissions, try to set them. + # If we are not owner, this also fails, but then there is nothing we + # can do. But the processes inside the run cannot change the owner. + try: + os.chmod(freezer_file, stat.S_IRUSR | stat.S_IWUSR) + util.write_file("THAWED", freezer_file) + except OSError: + pass # First, we go through all cgroups recursively while they are frozen and kill # all processes. This helps against fork bombs and prevents processes from @@ -422,14 +457,28 @@ def kill_all_tasks_in_cgroup_recursively(cgroup, delete): freezer_file = cgroup / "freezer.state" util.write_file("FROZEN", freezer_file) - kill_all_tasks_in_cgroup_recursively(cgroup, delete=False) + + for child_cgroup in recursive_child_cgroups(cgroup): + with _force_open_read(os.path.join(child_cgroup, "tasks")) as tasks: + for task in tasks: + util.kill_process(int(task)) + + # This cgroup could be frozen, which would prevent processes from being + # killed and would lead to an endless loop below. cf. + # https://github.com/sosy-lab/benchexec/issues/840 + try_unfreeze(child_cgroup) + util.write_file("THAWED", freezer_file) # Second, we go through all cgroups again, kill what is left, # check for emptiness, and remove subgroups. # Furthermore, we do this for all hierarchies, not only the one with freezer. for cgroup in self.paths: - kill_all_tasks_in_cgroup_recursively(cgroup, delete=True) + for child_cgroup in recursive_child_cgroups(cgroup): + kill_all_tasks_in_cgroup(child_cgroup) + remove_cgroup(child_cgroup) + + kill_all_tasks_in_cgroup(cgroup) def read_cputime(self): """ diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index 058d4f693..cb51f97fb 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -809,6 +809,58 @@ def test_starttime(self): self.assertLessEqual(before, run_starttime) self.assertLessEqual(run_starttime, after) + def test_frozen_process(self): + # https://github.com/sosy-lab/benchexec/issues/840 + if not os.path.exists(self.sleep): + self.skipTest("missing sleep") + if not os.path.exists("/sys/fs/cgroup/freezer"): + self.skipTest("missing freezer cgroup") + self.setUp( + dir_modes={ + "/": containerexecutor.DIR_READ_ONLY, + "/home": containerexecutor.DIR_HIDDEN, + "/tmp": containerexecutor.DIR_HIDDEN, + "/sys/fs/cgroup": containerexecutor.DIR_FULL_ACCESS, + } + ) + (result, output) = self.execute_run( + "/bin/sh", + "-c", + """#!/bin/sh +# create process, move it to sub-cgroup, and freeze it +set -eu + +cgroup="/sys/fs/cgroup/freezer/$(grep freezer /proc/self/cgroup | cut -f 3 -d :)" +mkdir "$cgroup/tmp" + +sleep 10 & +child_pid=$! + +echo $child_pid > "$cgroup/tmp/tasks" +echo FROZEN > "$cgroup/tmp/freezer.state" +# remove permissions in order to test our handling of this case +chmod 000 "$cgroup/tmp/freezer.state" +chmod 000 "$cgroup/tmp/tasks" +chmod 000 "$cgroup/tmp" +echo FROZEN +wait $child_pid +""", + walltimelimit=1, + expect_terminationreason="walltime", + ) + self.check_exitcode(result, 9, "exit code of killed process is not 9") + self.assertAlmostEqual( + result["walltime"], + 2, + delta=0.5, + msg="walltime is not approximately the time after which the process should have been killed", + ) + self.assertEqual( + output[-1], + "FROZEN", + "run output misses command output and was not executed properly", + ) + class TestRunExecutorWithContainer(TestRunExecutor): def setUp(self, *args, **kwargs): From b5e8d62a52c177f9ef1f38650d9d3a372de33c5d Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 17 Mar 2023 09:56:41 +0100 Subject: [PATCH 079/133] Remove ZFS from list of filesystems that do not support overlayfs It seems that ZFS has gained for overlayfs in https://github.com/openzfs/zfs/pull/9414 Furthermore, such a change should not be mixed in with the cgroups changes. --- benchexec/container.py | 1 - 1 file changed, 1 deletion(-) diff --git a/benchexec/container.py b/benchexec/container.py index aa465de5a..1fc7d0bd0 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -615,7 +615,6 @@ def determine_directory_mode(dir_modes, path, fstype=None): or fstype == b"autofs" or fstype == b"vfat" or fstype == b"ntfs" - or fstype == b"zfs" ) ): # Overlayfs does not support these as underlying file systems. From a302f70314a9f9a1d8d537f30a6dbf1e535067c6 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 17 Mar 2023 10:23:19 +0100 Subject: [PATCH 080/133] Fix method call from c4e447ce --- benchexec/cgroupsv1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 145a64c59..4c3203e15 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -476,7 +476,7 @@ def try_unfreeze(cgroup): for cgroup in self.paths: for child_cgroup in recursive_child_cgroups(cgroup): kill_all_tasks_in_cgroup(child_cgroup) - remove_cgroup(child_cgroup) + self._remove_cgroup(child_cgroup) kill_all_tasks_in_cgroup(cgroup) From 921929bd161037496a1128678870774aca238e7c Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 17 Mar 2023 10:25:56 +0100 Subject: [PATCH 081/133] Remove inconsistent pathlib usage from cgroupsv1 code This makes the code more similar to the code on the main branch and makes reviewing easier. Refactorings like starting to use pathlib should be done separately from new features. --- benchexec/cgroups.py | 19 ++++++++++--------- benchexec/cgroupsv1.py | 26 +++++++++++++------------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index cf93ca3d5..0cf81c476 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -8,7 +8,6 @@ from abc import ABC, abstractmethod import logging import os -import pathlib from benchexec import util @@ -123,7 +122,7 @@ def __getitem__(self, key): def __str__(self): return str(self.paths) - def _remove_cgroup(self, path: pathlib.Path): + def _remove_cgroup(self, path): if not os.path.exists(path): logging.warning("Cannot remove CGroup %s, because it does not exist.", path) return @@ -150,7 +149,9 @@ def has_value(self, subsystem, option): Only call this method if the given subsystem is available. """ assert subsystem in self - return os.path.isfile(self.subsystems[subsystem] / f"{subsystem}.{option}") + return os.path.isfile( + os.path.join(self.subsystems[subsystem], f"{subsystem}.{option}") + ) def get_value(self, subsystem, option): """ @@ -159,7 +160,7 @@ def get_value(self, subsystem, option): Only call this method if the given subsystem is available. """ assert subsystem in self, f"Subsystem {subsystem} is missing" - return util.read_file(self.subsystems[subsystem] / f"{subsystem}.{option}") + return util.read_file(self.subsystems[subsystem], f"{subsystem}.{option}") def get_file_lines(self, subsystem, option): """ @@ -168,7 +169,9 @@ def get_file_lines(self, subsystem, option): Only call this method if the given subsystem is available. """ assert subsystem in self - with open(self.subsystems[subsystem] / f"{subsystem}.{option}") as f: + with open( + os.path.join(self.subsystems[subsystem], f"{subsystem}.{option}") + ) as f: for line in f: yield line @@ -181,7 +184,7 @@ def get_key_value_pairs(self, subsystem, filename): """ assert subsystem in self return util.read_key_value_pairs_from_file( - self.subsystems[subsystem] / f"{subsystem}.{filename}" + self.subsystems[subsystem], f"{subsystem}.{filename}" ) def set_value(self, subsystem, option, value): @@ -191,9 +194,7 @@ def set_value(self, subsystem, option, value): Only call this method if the given subsystem is available. """ assert subsystem in self - util.write_file( - str(value), self.subsystems[subsystem] / f"{subsystem}.{option}" - ) + util.write_file(str(value), self.subsystems[subsystem], f"{subsystem}.{option}") @abstractmethod def require_subsystem(self, subsystem, log_method=logging.warning): diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 4c3203e15..a3945c5c0 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -9,7 +9,6 @@ import grp import logging import os -import pathlib import shutil import signal import stat @@ -108,7 +107,7 @@ def _force_open_read(filename): def kill_all_tasks_in_cgroup(cgroup): - tasksFile = cgroup / "tasks" + tasksFile = os.path.join(cgroup, "tasks") i = 0 while True: @@ -231,8 +230,8 @@ def from_system(cls, cgroup_procinfo=None, fallback=True): # e.g. because a parent directory has insufficient permissions # (lxcfs mounts cgroups under /run/lxcfs in such a way). if os.access(mount, os.F_OK): - cgroupPath = mount / my_cgroups[subsystem] - fallbackPath = mount / _CGROUP_FALLBACK_PATH + cgroupPath = os.path.join(mount, my_cgroups[subsystem]) + fallbackPath = os.path.join(mount, _CGROUP_FALLBACK_PATH) if ( fallback and not os.access(cgroupPath, os.W_OK) @@ -254,7 +253,7 @@ def _find_cgroup_mounts(cls): for mount in mountsFile: mount = mount.split(" ") if mount[2] == "cgroup": - mountpoint = pathlib.Path(mount[1]) + mountpoint = mount[1] options = mount[3] for option in options.split(","): if option in cls.known_subsystems: @@ -279,16 +278,17 @@ def create_fresh_child_cgroup(self, subsystems): ] continue - cgroup = pathlib.Path( - tempfile.mkdtemp(prefix=_CGROUP_NAME_PREFIX, dir=parentCgroup) - ) + cgroup = tempfile.mkdtemp(prefix=_CGROUP_NAME_PREFIX, dir=parentCgroup) createdCgroupsPerSubsystem[subsystem] = cgroup createdCgroupsPerParent[parentCgroup] = cgroup # add allowed cpus and memory to cgroup if necessary # (otherwise we can't add any tasks) def copy_parent_to_child(name): - shutil.copyfile(parentCgroup / name, cgroup / name) # noqa: B023 + shutil.copyfile( + os.path.join(parentCgroup, name), # noqa: B023 + os.path.join(cgroup, name), # noqa: B023 + ) try: copy_parent_to_child("cpuset.cpus") @@ -395,14 +395,14 @@ def add_task(self, pid): """ _register_process_with_cgrulesengd(pid) for cgroup in self.paths: - with open(cgroup / "tasks", "w") as tasksFile: + with open(os.path.join(cgroup, "tasks"), "w") as tasksFile: tasksFile.write(str(pid)) def get_all_tasks(self, subsystem): """ Return a generator of all PIDs currently in this cgroup for the given subsystem. """ - with open(self.subsystems[subsystem] / "tasks", "r") as tasksFile: + with open(os.path.join(self.subsystems[subsystem], "tasks"), "r") as tasksFile: for line in tasksFile: yield int(line) @@ -454,7 +454,7 @@ def try_unfreeze(cgroup): # delete subgroups). if self.FREEZE in self.subsystems: cgroup = self.subsystems[self.FREEZE] - freezer_file = cgroup / "freezer.state" + freezer_file = os.path.join(cgroup, "freezer.state") util.write_file("FROZEN", freezer_file) @@ -558,7 +558,7 @@ def read_io_stat(self): return bytes_read, bytes_written def has_tasks(self, path): - return bool((path / "cgroup.procs").read_bytes().strip()) + return util.read_file(path, "tasks") != "" def write_memory_limit(self, limit): limit_file = "limit_in_bytes" From 8a7f6136c25624fdbfb490bbaa6a0dad3df7f6f0 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 30 Jan 2023 15:05:11 +0100 Subject: [PATCH 082/133] Fix broken merge of 314b32b1 intro branch cgroupsv2 --- .gitlab-ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 866c372b3..f0ca3c052 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -122,8 +122,7 @@ pytype: before_script: - apt update - apt install -y libsystemd-dev python-dev - # version due to https://github.com/google/pytype/issues/1130 - - pip install pystemd coloredlogs pytype==2022.2.8 + - pip install pystemd coloredlogs pytype script: - pytype -k cache: From a869c00048d50a2b17e1daf3905252226de59499 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 17 Mar 2023 11:02:36 +0100 Subject: [PATCH 083/133] Remove pystemd as "build dependency" and make it an optional dependency There should be no reason why we need it during build. It is optional for now instead of a regular dependency because pystemd does not publish wheels, so users need a compiler and development headers to install it, or they need to install the package from their distribution. --- pyproject.toml | 1 - setup.cfg | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d6ce6962b..5d5803175 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,6 @@ requires = [ # Require versions that support our license files 'setuptools >= 42.0.0', 'wheel >= 0.32.0', - 'pystemd >= 0.7.0', ] build-backend = 'setuptools.build_meta' diff --git a/setup.cfg b/setup.cfg index 75a842150..76c30633a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -62,6 +62,8 @@ zip_safe = True dev = nose >= 1.0 lxml +systemd = + pystemd >= 0.7.0 [options.entry_points] console_scripts = From 7f8f5b4da8da5aafafdbea799c5e25dcdd1a0683 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 17 Mar 2023 11:07:40 +0100 Subject: [PATCH 084/133] remove test adjustment that is now unnecessary --- benchexec/test_runexecutor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index 75e4b14c2..472ea22ee 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -864,7 +864,6 @@ def test_frozen_process(self): class TestRunExecutorWithContainer(TestRunExecutor): def setUp(self, *args, **kwargs): - super().setUp(*args, **kwargs) try: container.execute_in_namespace(lambda: 0) except OSError as e: From 92ad4346e17f91529eaceeb5a439cb8f541bc054 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 17 Mar 2023 12:57:05 +0100 Subject: [PATCH 085/133] Silence pytype error about potentially undefined name It can only be undefined if there is an ImportError, which is handled before. --- benchexec/cgroupsv2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index e0944ab6f..e5f809945 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -151,7 +151,7 @@ def _create_systemd_scope_for_us(): except ImportError: logging.debug("pystemd could not be imported.") - except DBusFileNotFoundError as e: + except DBusFileNotFoundError as e: # pytype: disable=name-error logging.debug("No user DBus found, not using pystemd: %s", e) return False From 1596a6e13ace78fed90b3720abc0c1a272212ffd Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 17 Mar 2023 16:50:32 +0100 Subject: [PATCH 086/133] Remove forgotten code from code move back to oomhandler.py in aa0b905f --- benchexec/cgroupsv1.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index a3945c5c0..353534e3e 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -20,13 +20,6 @@ from benchexec.cgroups import Cgroups from benchexec import systeminfo -from ctypes import cdll - -_libc = cdll.LoadLibrary("libc.so.6") -_EFD_CLOEXEC = 0x80000 # from : mark eventfd as close-on-exec - -_BYTE_FACTOR = 1000 # byte in kilobyte - _CGROUP_FALLBACK_PATH = "system.slice/benchexec-cgroup.service" """If we do not have write access to the current cgroup, attempt to use this cgroup as fallback.""" From 51c23c4f6ecb795ee4ba98a9ccda40d941d80f0c Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 17 Mar 2023 17:01:12 +0100 Subject: [PATCH 087/133] Refactoring: Move code around This is only intended to make cgroupsv1.py more similar to the old cgroups.py in order to make reviewing easier. --- benchexec/cgroupsv1.py | 186 ++++++++++++++++++++++------------------- 1 file changed, 100 insertions(+), 86 deletions(-) diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 353534e3e..157a814d1 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -16,15 +16,15 @@ import tempfile import time +from benchexec import systeminfo from benchexec import util from benchexec.cgroups import Cgroups -from benchexec import systeminfo -_CGROUP_FALLBACK_PATH = "system.slice/benchexec-cgroup.service" +CGROUP_FALLBACK_PATH = "system.slice/benchexec-cgroup.service" """If we do not have write access to the current cgroup, attempt to use this cgroup as fallback.""" -_CGROUP_NAME_PREFIX = "benchmark_" +CGROUP_NAME_PREFIX = "benchmark_" _PERMISSION_HINT_GROUPS = """ You need to add your account to the following groups: {0} @@ -56,6 +56,64 @@ If you are using BenchExec within a container, please make "/sys/fs/cgroup" available.""" +def find_my_cgroups(cgroup_paths=None, fallback=True): + """ + Return a dict with the cgroups of the current process. + Note that it is not guaranteed that all subsystems are available + in the returned object, as a subsystem may not be mounted. + Check with "subsystem in " before using. + A subsystem may also be present but we do not have the rights to create + child cgroups, this can be checked with require_subsystem(). + @param cgroup_paths: If given, use this instead of reading /proc/self/cgroup. + @param fallback: Whether to look for a default cgroup as fallback is our cgroup + is not accessible. + """ + logging.debug( + "Analyzing /proc/mounts and /proc/self/cgroup for determining cgroups." + ) + if cgroup_paths is None: + my_cgroups = dict(_find_own_cgroups()) + else: + my_cgroups = dict(_parse_proc_pid_cgroup(cgroup_paths)) + + cgroupsParents = {} + for subsystem, mount in _find_cgroup_mounts(): + # Ignore mount points where we do not have any access, + # e.g. because a parent directory has insufficient permissions + # (lxcfs mounts cgroups under /run/lxcfs in such a way). + if os.access(mount, os.F_OK): + cgroupPath = os.path.join(mount, my_cgroups[subsystem]) + fallbackPath = os.path.join(mount, CGROUP_FALLBACK_PATH) + if ( + fallback + and not os.access(cgroupPath, os.W_OK) + and os.path.isdir(fallbackPath) + ): + cgroupPath = fallbackPath + cgroupsParents[subsystem] = cgroupPath + + return cgroupsParents + + +def _find_cgroup_mounts(): + """ + Return the information which subsystems are mounted where. + @return a generator of tuples (subsystem, mountpoint) + """ + try: + with open("/proc/mounts", "rt") as mountsFile: + for mount in mountsFile: + mount = mount.split(" ") + if mount[2] == "cgroup": + mountpoint = mount[1] + options = mount[3] + for option in options.split(","): + if option in CgroupsV1.known_subsystems: + yield (option, mountpoint) + except OSError: + logging.exception("Cannot read /proc/mounts") + + def _find_own_cgroups(): """ For all subsystems, return the information in which (sub-)cgroup this process is in. @@ -209,88 +267,7 @@ def from_system(cls, cgroup_procinfo=None, fallback=True): @param fallback: Whether to look for a default cgroup as fallback if our cgroup is not accessible. """ - logging.debug( - "Analyzing /proc/mounts and /proc/self/cgroup for determining cgroups." - ) - if cgroup_procinfo is None: - my_cgroups = dict(_find_own_cgroups()) - else: - my_cgroups = dict(_parse_proc_pid_cgroup(cgroup_procinfo)) - - cgroupsParents = {} - for subsystem, mount in cls._find_cgroup_mounts(): - # Ignore mount points where we do not have any access, - # e.g. because a parent directory has insufficient permissions - # (lxcfs mounts cgroups under /run/lxcfs in such a way). - if os.access(mount, os.F_OK): - cgroupPath = os.path.join(mount, my_cgroups[subsystem]) - fallbackPath = os.path.join(mount, _CGROUP_FALLBACK_PATH) - if ( - fallback - and not os.access(cgroupPath, os.W_OK) - and os.path.isdir(fallbackPath) - ): - cgroupPath = fallbackPath - cgroupsParents[subsystem] = cgroupPath - - return cls(cgroupsParents) - - @classmethod - def _find_cgroup_mounts(cls): - """ - Return the information which subsystems are mounted where. - @return a generator of tuples (subsystem, mountpoint) - """ - try: - with open("/proc/mounts", "rt") as mountsFile: - for mount in mountsFile: - mount = mount.split(" ") - if mount[2] == "cgroup": - mountpoint = mount[1] - options = mount[3] - for option in options.split(","): - if option in cls.known_subsystems: - yield (option, mountpoint) - except OSError: - logging.exception("Cannot read /proc/mounts") - - def create_fresh_child_cgroup(self, subsystems): - """ - Create child cgroups of the current cgroup for at least the given subsystems. - @return: A Cgroup instance representing the new child cgroup(s). - """ - assert set(subsystems).issubset(self.subsystems.keys()) - createdCgroupsPerSubsystem = {} - createdCgroupsPerParent = {} - for subsystem in subsystems: - parentCgroup = self.subsystems[subsystem] - if parentCgroup in createdCgroupsPerParent: - # reuse already created cgroup - createdCgroupsPerSubsystem[subsystem] = createdCgroupsPerParent[ - parentCgroup - ] - continue - - cgroup = tempfile.mkdtemp(prefix=_CGROUP_NAME_PREFIX, dir=parentCgroup) - createdCgroupsPerSubsystem[subsystem] = cgroup - createdCgroupsPerParent[parentCgroup] = cgroup - - # add allowed cpus and memory to cgroup if necessary - # (otherwise we can't add any tasks) - def copy_parent_to_child(name): - shutil.copyfile( - os.path.join(parentCgroup, name), # noqa: B023 - os.path.join(cgroup, name), # noqa: B023 - ) - - try: - copy_parent_to_child("cpuset.cpus") - copy_parent_to_child("cpuset.mems") - except OSError: - # expected to fail if cpuset subsystem is not enabled in this hierarchy - pass - - return CgroupsV1(createdCgroupsPerSubsystem) + return cls(find_my_cgroups(cgroup_procinfo, fallback)) def require_subsystem(self, subsystem, log_method=logging.warning): """ @@ -382,6 +359,44 @@ def get_group_name(gid): else: sys.exit(_ERROR_MSG_OTHER) # e.g., subsystem not mounted + def create_fresh_child_cgroup(self, subsystems): + """ + Create child cgroups of the current cgroup for at least the given subsystems. + @return: A Cgroup instance representing the new child cgroup(s). + """ + assert set(subsystems).issubset(self.subsystems.keys()) + createdCgroupsPerSubsystem = {} + createdCgroupsPerParent = {} + for subsystem in subsystems: + parentCgroup = self.subsystems[subsystem] + if parentCgroup in createdCgroupsPerParent: + # reuse already created cgroup + createdCgroupsPerSubsystem[subsystem] = createdCgroupsPerParent[ + parentCgroup + ] + continue + + cgroup = tempfile.mkdtemp(prefix=CGROUP_NAME_PREFIX, dir=parentCgroup) + createdCgroupsPerSubsystem[subsystem] = cgroup + createdCgroupsPerParent[parentCgroup] = cgroup + + # add allowed cpus and memory to cgroup if necessary + # (otherwise we can't add any tasks) + def copy_parent_to_child(name): + shutil.copyfile( + os.path.join(parentCgroup, name), # noqa: B023 + os.path.join(cgroup, name), # noqa: B023 + ) + + try: + copy_parent_to_child("cpuset.cpus") + copy_parent_to_child("cpuset.mems") + except OSError: + # expected to fail if cpuset subsystem is not enabled in this hierarchy + pass + + return CgroupsV1(createdCgroupsPerSubsystem) + def add_task(self, pid): """ Add a process to the cgroups represented by this instance. @@ -448,7 +463,6 @@ def try_unfreeze(cgroup): if self.FREEZE in self.subsystems: cgroup = self.subsystems[self.FREEZE] freezer_file = os.path.join(cgroup, "freezer.state") - util.write_file("FROZEN", freezer_file) for child_cgroup in recursive_child_cgroups(cgroup): From 3dcb47c0750dc00efbc6610648c7182f2bbf3d2c Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 17 Mar 2023 17:15:37 +0100 Subject: [PATCH 088/133] Remove more pathlib usage from cgroupsv1 code cf. 921929bd --- benchexec/cgroupsv1.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 157a814d1..deae76ad0 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -328,8 +328,8 @@ def handle_errors(self, critical_cgroups): # to some groups to get access. But group 0 (root) of course does not count. groups = {} try: - if all(stat.S_IWGRP & path.stat().st_mode for path in paths): - groups = {path.stat().st_gid for path in paths} + if all(stat.S_IWGRP & os.stat(path).st_mode for path in paths): + groups = {os.stat(path).st_gid for path in paths} except OSError: pass if groups and 0 not in groups: From 81bfdbf3c75d1c10d8fde927e93a6a2956996048 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 30 Mar 2023 16:07:45 +0200 Subject: [PATCH 089/133] Rename some methods to keep the code more similar with existing code --- benchexec/cgroups.py | 8 ++++---- benchexec/cgroupsv1.py | 4 ++-- benchexec/cgroupsv2.py | 4 ++-- benchexec/check_cgroups.py | 4 ++-- benchexec/resources.py | 6 +++--- benchexec/runexecutor.py | 4 ++-- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 0cf81c476..7baedddd9 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -263,11 +263,11 @@ def read_usage_per_cpu(self): pass @abstractmethod - def read_available_cpus(self): + def read_allowed_cpus(self): pass @abstractmethod - def read_available_mems(self): + def read_allowed_memory_banks(self): pass @abstractmethod @@ -334,10 +334,10 @@ def read_io_pressure(self): def read_usage_per_cpu(self): pass - def read_available_cpus(self): + def read_allowed_cpus(self): pass - def read_available_mems(self): + def read_allowed_memory_banks(self): pass def read_io_stat(self): diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index deae76ad0..aa6ca560f 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -543,10 +543,10 @@ def read_usage_per_cpu(self): return usage - def read_available_cpus(self): + def read_allowed_cpus(self): return util.parse_int_list(self.get_value(self.CPUSET, "cpus")) - def read_available_mems(self): + def read_allowed_memory_banks(self): return util.parse_int_list(self.get_value(self.CPUSET, "mems")) def read_io_stat(self): diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index e5f809945..b6aa0a3ab 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -448,10 +448,10 @@ def read_usage_per_cpu(self): return {} - def read_available_cpus(self): + def read_allowed_cpus(self): return util.parse_int_list(self.get_value(self.CPUSET, "cpus.effective")) - def read_available_mems(self): + def read_allowed_memory_banks(self): return util.parse_int_list(self.get_value(self.CPUSET, "mems.effective")) def read_io_stat(self): diff --git a/benchexec/check_cgroups.py b/benchexec/check_cgroups.py index 4e9520acc..0028a9734 100644 --- a/benchexec/check_cgroups.py +++ b/benchexec/check_cgroups.py @@ -45,8 +45,8 @@ def check_cgroup_availability(wait=1): tmp.name, memlimit=1024 * 1024, # set memlimit to force check for swapaccount # set cores and memory_nodes to force usage of CPUSET - cores=my_cgroups.read_available_cpus(), - memory_nodes=my_cgroups.read_available_mems(), + cores=my_cgroups.read_allowed_cpus(), + memory_nodes=my_cgroups.read_allowed_memory_banks(), ) lines = [] for line in tmp: diff --git a/benchexec/resources.py b/benchexec/resources.py index aa437505f..b9911e89d 100644 --- a/benchexec/resources.py +++ b/benchexec/resources.py @@ -61,7 +61,7 @@ def get_cpu_cores_per_run( """ try: # read list of available CPU cores - allCpus = my_cgroups.read_available_cpus() + allCpus = my_cgroups.read_allowed_cpus() # Filter CPU cores according to the list of identifiers provided by a user if coreSet: @@ -319,7 +319,7 @@ def get_memory_banks_per_run(coreAssignment, cgroups): to one of its CPU cores.""" try: # read list of available memory banks - allMems = set(cgroups.read_available_mems()) + allMems = set(cgroups.read_allowed_memory_banks()) result = [] for cores in coreAssignment: @@ -402,7 +402,7 @@ def check_limit(actualLimit): # Get list of all memory banks, either from memory assignment or from system. if not memoryAssignment: if my_cgroups.CPUSET in my_cgroups: - allMems = my_cgroups.read_available_mems() + allMems = my_cgroups.read_allowed_memory_banks() else: allMems = _get_memory_banks_listed_in_dir("/sys/devices/system/node/") memoryAssignment = [ diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index 0bc3d5b08..1518ef77d 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -390,13 +390,13 @@ def _init_cgroups(self): if self.cgroups.CPUSET in self.cgroups: # Read available cpus/memory nodes: try: - self.cpus = self.cgroups.read_available_cpus() + self.cpus = self.cgroups.read_allowed_cpus() except ValueError as e: logging.warning("Could not read available CPU cores from kernel: %s", e) logging.debug("List of available CPU cores is %s.", self.cpus) try: - self.memory_nodes = self.cgroups.read_available_mems() + self.memory_nodes = self.cgroups.read_allowed_memory_banks() except ValueError as e: logging.warning( "Could not read available memory nodes from kernel: %s", str(e) From 4b4f939bf3b9e075fca29ffd265d7700bc4ffad8 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 30 Mar 2023 16:16:07 +0200 Subject: [PATCH 090/133] Make comments and abstract methods for Cgroups class more consistent --- benchexec/cgroups.py | 13 +++++++++++++ benchexec/cgroupsv1.py | 4 ---- benchexec/cgroupsv2.py | 4 ---- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 7baedddd9..199956639 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -242,6 +242,14 @@ def kill_all_tasks(self): def create_fresh_child_cgroup(self, subsystems): pass + @abstractmethod + def read_cputime(self): + """ + Read the cputime usage of this cgroup. CPU cgroup needs to be available. + @return cputime usage in seconds + """ + pass + @abstractmethod def read_max_mem_usage(self): pass @@ -264,10 +272,12 @@ def read_usage_per_cpu(self): @abstractmethod def read_allowed_cpus(self): + """Get the list of all CPU cores allowed by this cgroup.""" pass @abstractmethod def read_allowed_memory_banks(self): + """Get the list of all memory banks allowed by this cgroup.""" pass @abstractmethod @@ -319,6 +329,9 @@ def require_subsystem(self, subsystem, log_method=logging.warning): def handle_errors(self, critical_cgroups): pass + def read_cputime(self): + pass + def read_max_mem_usage(self): pass diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index aa6ca560f..40fe7358a 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -488,10 +488,6 @@ def try_unfreeze(cgroup): kill_all_tasks_in_cgroup(cgroup) def read_cputime(self): - """ - Read the cputime usage of this cgroup. CPUACCT cgroup needs to be available. - @return cputime usage in seconds - """ # convert nano-seconds to seconds return float(self.get_value(self.CPU, "usage")) / 1_000_000_000 diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index b6aa0a3ab..363d7c8bf 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -408,10 +408,6 @@ def kill_all_tasks_in_cgroup_recursively(cgroup, delete): kill_all_tasks_in_cgroup_recursively(self.path, delete=True) def read_cputime(self): - """ - Read the cputime usage of this cgroup. CPU cgroup needs to be available. - @return cputime usage in seconds - """ cpu_stats = dict(self.get_key_value_pairs(self.CPU, "stat")) # TODO switch to Decimal together with all other float values From 040fcb6d976ea71d1e2f0156d498ee8976549e2a Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 30 Mar 2023 17:30:13 +0200 Subject: [PATCH 091/133] Move print_decimal to benchexec.util RunExecutor needs it now, but RunExecutor must not have a dependency on table-generator, because it should work on systems where table-generator dependencies (like PyYaml) are not available. --- benchexec/runexecutor.py | 2 +- benchexec/tablegenerator/columns.py | 9 ++--- benchexec/tablegenerator/test_util.py | 47 --------------------------- benchexec/tablegenerator/util.py | 43 ------------------------ benchexec/test_util.py | 47 +++++++++++++++++++++++++++ benchexec/util.py | 43 ++++++++++++++++++++++++ 6 files changed, 96 insertions(+), 95 deletions(-) diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index 1518ef77d..3b0979c27 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -28,7 +28,7 @@ from benchexec.filehierarchylimit import FileHierarchyLimitThread from benchexec import intel_cpu_energy from benchexec import oomhandler -from benchexec.tablegenerator.util import print_decimal +from benchexec.util import print_decimal from benchexec import resources from benchexec import systeminfo from benchexec import util diff --git a/benchexec/tablegenerator/columns.py b/benchexec/tablegenerator/columns.py index dfa7110e5..9557a6d08 100644 --- a/benchexec/tablegenerator/columns.py +++ b/benchexec/tablegenerator/columns.py @@ -13,6 +13,7 @@ import logging from typing import Tuple, Union +from benchexec.util import print_decimal from benchexec.tablegenerator import util __all__ = ["Column", "ColumnType", "ColumnMeasureType"] @@ -206,7 +207,7 @@ def format_value(self, value, format_target): number = Decimal(number_str) elif isinstance(value, Decimal): number = value - number_str = util.print_decimal(number) + number_str = print_decimal(number) else: raise TypeError(f"Unexpected number type {type(value)}") @@ -229,7 +230,7 @@ def format_value(self, value, format_target): ): # Column of type count (integral values) without specified sig. digits. # However, we need to round values like stdev, so we just round somehow. - return util.print_decimal(round(number, DEFAULT_TOOLTIP_PRECISION)) + return print_decimal(round(number, DEFAULT_TOOLTIP_PRECISION)) number_of_significant_digits = self.get_number_of_significant_digits( format_target @@ -250,7 +251,7 @@ def format_value(self, value, format_target): format_target, ) else: - return util.print_decimal(number) + return print_decimal(number) def set_column_type_from(self, column_values): """ @@ -378,7 +379,7 @@ def _format_number( rounded_value = round(number, rounding_point) assert rounded_value == number.quantize(Decimal(1).scaleb(-rounding_point)) - formatted_value = util.print_decimal(rounded_value) + formatted_value = print_decimal(rounded_value) # Get the number of resulting significant digits. current_sig_digits = _get_significant_digits(formatted_value) diff --git a/benchexec/tablegenerator/test_util.py b/benchexec/tablegenerator/test_util.py index fa691604d..fff5d9409 100644 --- a/benchexec/tablegenerator/test_util.py +++ b/benchexec/tablegenerator/test_util.py @@ -5,7 +5,6 @@ # # SPDX-License-Identifier: Apache-2.0 -from decimal import Decimal import sys import unittest @@ -53,52 +52,6 @@ def test_split_string_at_suffix(self): self.assertEqualTextAndNumber("abc1abc", "abc1abc", "") self.assertEqualTextAndNumber("abc1abc1", "abc1abc", "1") - def test_print_decimal_roundtrip(self): - # These values should be printed exactly as in the input (with "+" removed) - test_values = [ - "NaN", - "Inf", - "-Inf", - "+Inf", - "0", - "-0", - "+0", - "0.0", - "-0.0", - "0.00000000000000000000", - "0.00000000000000000001", - "0.00000000123450000000", - "0.1", - "0.10000000000000000000", - "0.99999999999999999999", - "1", - "-1", - "+1", - "1000000000000000000000", - "10000000000.0000000000", - ] - for value in test_values: - expected = value.lstrip("+") - self.assertEqual(expected, util.print_decimal(Decimal(value))) - - def test_print_decimal_int(self): - # These values should be printed like Decimal prints them after quantizing - # to remove the exponent. - test_values = ["0e0", "-0e0", "0e20", "1e0", "1e20", "0e10"] - for value in test_values: - value = Decimal(value) - expected = str(value.quantize(1)) - assert "e" not in expected - self.assertEqual(expected, util.print_decimal(value)) - - def test_print_decimal_float(self): - # These values should be printed like str prints floats. - test_values = ["1e-4", "123e-4", "1234e-4", "1234e-5", "1234e-6"] - for value in test_values: - expected = str(float(value)) - assert "e" not in expected, expected - self.assertEqual(expected, util.print_decimal(Decimal(value))) - def test_roman_number_conversion(self): test_data = { 1: "I", diff --git a/benchexec/tablegenerator/util.py b/benchexec/tablegenerator/util.py index 7a9c55a82..0172798f4 100644 --- a/benchexec/tablegenerator/util.py +++ b/benchexec/tablegenerator/util.py @@ -164,49 +164,6 @@ def to_decimal(s): return None -def print_decimal(d): - """ - Print a Decimal instance in non-scientific (i.e., decimal) notation with full - precision, i.e., all digits are printed exactly as stored in the Decimal instance. - Note that str(d) always falls back to scientific notation for very small values. - """ - - if d.is_nan(): - return "NaN" - elif d.is_infinite(): - return "Inf" if d > 0 else "-Inf" - assert d.is_finite() - - sign, digits, exp = d.as_tuple() - # sign is 1 if negative - # digits is exactly the sequence of significant digits in the decimal representation - # exp tells us whether we need to shift digits (pos: left shift; neg: right shift). - # left shift can only add zeros, right shift adds decimal separator - - sign = "-" if sign == 1 else "" - digits = list(map(str, digits)) - - if exp >= 0: - if digits == ["0"]: - # special case: return "0" instead of "0000" for "0e4" - return sign + "0" - return sign + "".join(digits) + ("0" * exp) - - # Split digits into parts before and after decimal separator. - # If -exp > len(digits) the result needs to start with "0.", so we force a 0. - integral_part = digits[:exp] or ["0"] - decimal_part = digits[exp:] - assert decimal_part - - return ( - sign - + "".join(integral_part) - + "." - + ("0" * (-exp - len(decimal_part))) # additional zeros if necessary - + "".join(decimal_part) - ) - - def collapse_equal_values(values, counts): """ Take a tuple (values, counts), remove consecutive values and increment their count instead. diff --git a/benchexec/test_util.py b/benchexec/test_util.py index cf2d79bbb..523d7161a 100644 --- a/benchexec/test_util.py +++ b/benchexec/test_util.py @@ -5,6 +5,7 @@ # # SPDX-License-Identifier: Apache-2.0 +from decimal import Decimal import sys import unittest from benchexec.util import ProcessExitCode @@ -54,6 +55,52 @@ def test_parse_timespan_value(self): self.assertEqual(util.parse_timespan_value("1h"), 60 * 60) self.assertEqual(util.parse_timespan_value("1d"), 24 * 60 * 60) + def test_print_decimal_roundtrip(self): + # These values should be printed exactly as in the input (with "+" removed) + test_values = [ + "NaN", + "Inf", + "-Inf", + "+Inf", + "0", + "-0", + "+0", + "0.0", + "-0.0", + "0.00000000000000000000", + "0.00000000000000000001", + "0.00000000123450000000", + "0.1", + "0.10000000000000000000", + "0.99999999999999999999", + "1", + "-1", + "+1", + "1000000000000000000000", + "10000000000.0000000000", + ] + for value in test_values: + expected = value.lstrip("+") + self.assertEqual(expected, util.print_decimal(Decimal(value))) + + def test_print_decimal_int(self): + # These values should be printed like Decimal prints them after quantizing + # to remove the exponent. + test_values = ["0e0", "-0e0", "0e20", "1e0", "1e20", "0e10"] + for value in test_values: + value = Decimal(value) + expected = str(value.quantize(1)) + assert "e" not in expected + self.assertEqual(expected, util.print_decimal(value)) + + def test_print_decimal_float(self): + # These values should be printed like str prints floats. + test_values = ["1e-4", "123e-4", "1234e-4", "1234e-5", "1234e-6"] + for value in test_values: + expected = str(float(value)) + assert "e" not in expected, expected + self.assertEqual(expected, util.print_decimal(Decimal(value))) + class TestProcessExitCode(unittest.TestCase): @classmethod diff --git a/benchexec/util.py b/benchexec/util.py index 84f129225..70531b90c 100644 --- a/benchexec/util.py +++ b/benchexec/util.py @@ -251,6 +251,49 @@ def non_empty_str(s): return s +def print_decimal(d): + """ + Print a Decimal instance in non-scientific (i.e., decimal) notation with full + precision, i.e., all digits are printed exactly as stored in the Decimal instance. + Note that str(d) always falls back to scientific notation for very small values. + """ + + if d.is_nan(): + return "NaN" + elif d.is_infinite(): + return "Inf" if d > 0 else "-Inf" + assert d.is_finite() + + sign, digits, exp = d.as_tuple() + # sign is 1 if negative + # digits is exactly the sequence of significant digits in the decimal representation + # exp tells us whether we need to shift digits (pos: left shift; neg: right shift). + # left shift can only add zeros, right shift adds decimal separator + + sign = "-" if sign == 1 else "" + digits = list(map(str, digits)) + + if exp >= 0: + if digits == ["0"]: + # special case: return "0" instead of "0000" for "0e4" + return sign + "0" + return sign + "".join(digits) + ("0" * exp) + + # Split digits into parts before and after decimal separator. + # If -exp > len(digits) the result needs to start with "0.", so we force a 0. + integral_part = digits[:exp] or ["0"] + decimal_part = digits[exp:] + assert decimal_part + + return ( + sign + + "".join(integral_part) + + "." + + ("0" * (-exp - len(decimal_part))) # additional zeros if necessary + + "".join(decimal_part) + ) + + def expand_filename_pattern(pattern, base_dir): """ Expand a file name pattern containing wildcards, environment variables etc. From 8ab811106a9aba166fddf3e144d9e60687bf6dbe Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 30 Mar 2023 17:41:36 +0200 Subject: [PATCH 092/133] Fix missing log message of benchexec.check_cgroups in cgroupsv2 If there are required subsystems that are not delegated, the check_cgroups script would just silently terminate. Now it produces a warning. --- benchexec/cgroups.py | 19 ++++++++++++++----- benchexec/cgroupsv1.py | 10 +--------- benchexec/cgroupsv2.py | 2 +- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 199956639..b3db83718 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -113,6 +113,9 @@ def __init__(self, subsystems): logging.debug("Available Cgroups: %s", self.subsystems) + # for error messages: + self.unusable_subsystems = set() + def __contains__(self, key): return key in self.subsystems @@ -196,7 +199,6 @@ def set_value(self, subsystem, option, value): assert subsystem in self util.write_file(str(value), self.subsystems[subsystem], f"{subsystem}.{option}") - @abstractmethod def require_subsystem(self, subsystem, log_method=logging.warning): """ Check whether the given subsystem is enabled and is writable @@ -206,7 +208,17 @@ def require_subsystem(self, subsystem, log_method=logging.warning): this instance such that further checks with "in" will return "False". @return A boolean value. """ - pass + if subsystem not in self: + if subsystem not in self.unusable_subsystems: + self.unusable_subsystems.add(subsystem) + log_method( + "Cgroup subsystem %s is not available. " + "Please make sure it is supported by your kernel and available.", + subsystem, + ) + return False + + return True @abstractmethod def handle_errors(self, critical_cgroups): @@ -323,9 +335,6 @@ def kill_all_tasks(self): def create_fresh_child_cgroup(self, subsystems): pass - def require_subsystem(self, subsystem, log_method=logging.warning): - pass - def handle_errors(self, critical_cgroups): pass diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 40fe7358a..267eba7c8 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -251,7 +251,6 @@ def __init__(self, subsystems): super(CgroupsV1, self).__init__(subsystems) # for error messages: - self.unusable_subsystems = set() self.denied_subsystems = {} @classmethod @@ -279,14 +278,7 @@ def require_subsystem(self, subsystem, log_method=logging.warning): @return A boolean value. """ if subsystem not in self: - if subsystem not in self.unusable_subsystems: - self.unusable_subsystems.add(subsystem) - log_method( - "Cgroup subsystem %s is not available. " - "Please make sure it is supported by your kernel and mounted.", - subsystem, - ) - return False + return super().require_subsystem(subsystem, log_method) try: test_cgroup = self.create_fresh_child_cgroup([subsystem]) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 363d7c8bf..e6e166796 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -312,7 +312,7 @@ def require_subsystem(self, subsystem, log_method=logging.warning): # We can assume that creation of child cgroups works, # because we only use cgroups if we were able to move the current process # into a child cgroup in initialize(). - return subsystem in self + return super().require_subsystem(subsystem, log_method) def handle_errors(self, critical_cgroups): """ From 39808b6de1078820219847d550f2739ef1cca64c Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 30 Mar 2023 17:47:44 +0200 Subject: [PATCH 093/133] Remove redundant Cgroups.initialize() call RunExecutor does this for us anyway. And we do not want to log before this call because that messes up the formatting of log messages. --- benchexec/check_cgroups.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/benchexec/check_cgroups.py b/benchexec/check_cgroups.py index 0028a9734..20dd9445b 100644 --- a/benchexec/check_cgroups.py +++ b/benchexec/check_cgroups.py @@ -136,8 +136,6 @@ def main(argv=None): options = parser.parse_args(argv[1:]) - Cgroups.initialize() - if options.no_thread: check_cgroup_availability(options.wait) else: From e77404f71a175126e1ac22d261adc4b0a4572c94 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 30 Mar 2023 18:27:31 +0200 Subject: [PATCH 094/133] Move methods around to put them in better order --- benchexec/cgroups.py | 120 +++++++++++++++++++++---------------------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index b3db83718..099bf0ad9 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -125,24 +125,49 @@ def __getitem__(self, key): def __str__(self): return str(self.paths) - def _remove_cgroup(self, path): - if not os.path.exists(path): - logging.warning("Cannot remove CGroup %s, because it does not exist.", path) - return - assert not self.has_tasks(path) - try: - os.rmdir(path) - except OSError: - # sometimes this fails because the cgroup is still busy, we try again once - try: - os.rmdir(path) - except OSError as e: - logging.warning( - "Failed to remove cgroup %s: error %s (%s)", - path, - e.errno, - e.strerror, + def require_subsystem(self, subsystem, log_method=logging.warning): + """ + Check whether the given subsystem is enabled and is writable + (i.e., new cgroups can be created for it). + Produces a log message for the user if one of the conditions is not fulfilled. + If the subsystem is enabled but not writable, it will be removed from + this instance such that further checks with "in" will return "False". + @return A boolean value. + """ + if subsystem not in self: + if subsystem not in self.unusable_subsystems: + self.unusable_subsystems.add(subsystem) + log_method( + "Cgroup subsystem %s is not available. " + "Please make sure it is supported by your kernel and available.", + subsystem, ) + return False + + return True + + @abstractmethod + def handle_errors(self, critical_cgroups): + """ + If there were errors in calls to require_subsystem() and critical_cgroups + is not empty, terminate the program with an error message that explains how to + fix the problem. + + @param critical_cgroups: set of unusable but required cgroups + """ + pass + + @abstractmethod + def create_fresh_child_cgroup(self, subsystems): + pass + + @abstractmethod + def add_task(self, pid): + pass + + @abstractmethod + def kill_all_tasks(self): + pass def has_value(self, subsystem, option): """ @@ -199,38 +224,6 @@ def set_value(self, subsystem, option, value): assert subsystem in self util.write_file(str(value), self.subsystems[subsystem], f"{subsystem}.{option}") - def require_subsystem(self, subsystem, log_method=logging.warning): - """ - Check whether the given subsystem is enabled and is writable - (i.e., new cgroups can be created for it). - Produces a log message for the user if one of the conditions is not fulfilled. - If the subsystem is enabled but not writable, it will be removed from - this instance such that further checks with "in" will return "False". - @return A boolean value. - """ - if subsystem not in self: - if subsystem not in self.unusable_subsystems: - self.unusable_subsystems.add(subsystem) - log_method( - "Cgroup subsystem %s is not available. " - "Please make sure it is supported by your kernel and available.", - subsystem, - ) - return False - - return True - - @abstractmethod - def handle_errors(self, critical_cgroups): - """ - If there were errors in calls to require_subsystem() and critical_cgroups - is not empty, terminate the program with an error message that explains how to - fix the problem. - - @param critical_cgroups: set of unusable but required cgroups - """ - pass - def remove(self): """ Remove all cgroups this instance represents from the system. @@ -242,17 +235,24 @@ def remove(self): del self.paths del self.subsystems - @abstractmethod - def add_task(self, pid): - pass - - @abstractmethod - def kill_all_tasks(self): - pass - - @abstractmethod - def create_fresh_child_cgroup(self, subsystems): - pass + def _remove_cgroup(self, path): + if not os.path.exists(path): + logging.warning("Cannot remove CGroup %s, because it does not exist.", path) + return + assert not self.has_tasks(path) + try: + os.rmdir(path) + except OSError: + # sometimes this fails because the cgroup is still busy, we try again once + try: + os.rmdir(path) + except OSError as e: + logging.warning( + "Failed to remove cgroup %s: error %s (%s)", + path, + e.errno, + e.strerror, + ) @abstractmethod def read_cputime(self): From f787fae864bbbc91bdb56c2f1e0456dc7674a27f Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 30 Mar 2023 18:34:46 +0200 Subject: [PATCH 095/133] Simplify kill_all_tasks_in_cgroups This brings it back to the previous state of this code. It is not clear who a cgroup should vanish while we use it, we never had problems with this. And the cgroupsv2 variant also does not handle this. --- benchexec/cgroupsv1.py | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 267eba7c8..6ff7e3ccc 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -167,29 +167,23 @@ def kill_all_tasks_in_cgroup(cgroup): # SIGKILL. We added this loop when killing sub-processes was not reliable # and we did not know why, but now it is reliable. for sig in [signal.SIGKILL, signal.SIGINT, signal.SIGTERM]: - task = None - try: - with _force_open_read(tasksFile) as tasks: - for task in tasks: - task = task.strip() - if i > 1: - logging.warning( - "Run has left-over process with pid %s " - "in cgroup %s, sending signal %s (try %s).", - task, - cgroup, - sig, - i, - ) - util.kill_process(int(task), sig) - except FileNotFoundError: - logging.warning( - "cgroup tasks file %s " "could no longer be found while killing", - tasksFile, - ) - - if task is None: - return # No process was hanging, exit + with _force_open_read(tasksFile) as tasks: + task = None + for task in tasks: + task = task.strip() + if i > 1: + logging.warning( + "Run has left-over process with pid %s " + "in cgroup %s, sending signal %s (try %s).", + task, + cgroup, + sig, + i, + ) + util.kill_process(int(task), sig) + + if task is None: + return # No process was hanging, exit # wait for the process to exit, this might take some time time.sleep(i * 0.5) From cd8dad9746e769759006c82681982bdbf415aab4 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 10:35:28 +0200 Subject: [PATCH 096/133] Simplify handling of process termination on cgroupsv2 On cgroupsv2, frozen processes can still be killed (cf. https://docs.kernel.org/admin-guide/cgroup-v2.html). This means we can simplify the handling and do not need to unfreeze the cgroups. Furthermore, more complex solutions for frozen sub-cgroups as in 87e2bcb6 are not needed. --- benchexec/cgroupsv2.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index e6e166796..c7ca8560d 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -199,7 +199,7 @@ def _parse_proc_pid_cgroup(cgroup_file): return path -def kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): +def kill_all_tasks_in_cgroup(cgroup): tasksFile = cgroup / "cgroup.procs" i = 0 @@ -224,7 +224,7 @@ def kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): ) util.kill_process(int(task), sig) - if task is None or not ensure_empty: + if task is None: return # No process was hanging, exit # wait for the process to exit, this might take some time time.sleep(i * 0.5) @@ -377,35 +377,28 @@ def kill_all_tasks(self): Additionally, the children cgroups will be deleted. """ - def kill_all_tasks_in_cgroup_recursively(cgroup, delete): + def kill_all_tasks_in_cgroup_recursively(cgroup): for dirpath, dirs, _files in os.walk(cgroup, topdown=False): for subCgroup in dirs: subCgroup = pathlib.Path(dirpath) / subCgroup - kill_all_tasks_in_cgroup(subCgroup, ensure_empty=delete) + kill_all_tasks_in_cgroup(subCgroup) + self._remove_cgroup(subCgroup) - if delete: - self._remove_cgroup(subCgroup) - - kill_all_tasks_in_cgroup(cgroup, ensure_empty=delete) + kill_all_tasks_in_cgroup(cgroup) if self.KILL in self.subsystems: + # This will immediately terminate all processes recursively, even if frozen util.write_file("1", self.path / "cgroup.kill") return # First, we go through all cgroups recursively while they are frozen and kill # all processes. This helps against fork bombs and prevents processes from # creating new subgroups while we are trying to kill everything. - # All processes will stay until they are thawed (so we cannot check for cgroup - # emptiness and we cannot delete subgroups). + # On cgroupsv2, frozen processes can still be killed, so this is all we need to + # do. freezer_file = self.path / "cgroup.freeze" - util.write_file("1", freezer_file) - kill_all_tasks_in_cgroup_recursively(self.path, delete=False) - util.write_file("0", freezer_file) - - # Second, we go through all cgroups again, kill what is left, - # check for emptiness, and remove subgroups. - kill_all_tasks_in_cgroup_recursively(self.path, delete=True) + kill_all_tasks_in_cgroup_recursively(self.path) def read_cputime(self): cpu_stats = dict(self.get_key_value_pairs(self.CPU, "stat")) From e3241811fda06fa0bb9f70b8dc598517a7cf158d Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 11:22:18 +0200 Subject: [PATCH 097/133] Fix bug for cgroupsv2: child cgroups were not deleted --- benchexec/cgroupsv2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index c7ca8560d..8c7249e97 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -389,7 +389,7 @@ def kill_all_tasks_in_cgroup_recursively(cgroup): if self.KILL in self.subsystems: # This will immediately terminate all processes recursively, even if frozen util.write_file("1", self.path / "cgroup.kill") - return + # We still need to clean up any child cgroups. # First, we go through all cgroups recursively while they are frozen and kill # all processes. This helps against fork bombs and prevents processes from From 6cb8fafcc985dcabdc6d73765abb6f66669eefc7 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 11:31:04 +0200 Subject: [PATCH 098/133] Guard against child cgroups with unexpected permissions for cgroupsv2 This ports a1515702 and 30a0e4bc to cgroupsv2. --- benchexec/cgroupsv2.py | 51 ++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 8c7249e97..0e630e560 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -10,6 +10,7 @@ import pathlib import secrets import signal +import stat import sys import tempfile import threading @@ -199,6 +200,18 @@ def _parse_proc_pid_cgroup(cgroup_file): return path +def _force_open_read(filename): + """ + Open a file for reading even if we have no read permission, + as long as we can grant it to us. + """ + try: + return open(filename, "rt") + except OSError: + os.chmod(filename, stat.S_IRUSR) + return open(filename, "rt") + + def kill_all_tasks_in_cgroup(cgroup): tasksFile = cgroup / "cgroup.procs" @@ -209,7 +222,7 @@ def kill_all_tasks_in_cgroup(cgroup): # SIGKILL. We added this loop when killing sub-processes was not reliable # and we did not know why, but now it is reliable. for sig in [signal.SIGKILL, signal.SIGINT, signal.SIGTERM]: - with open(tasksFile, "rt") as tasks: + with _force_open_read(tasksFile) as tasks: task = None for task in tasks: task = task.strip() @@ -376,19 +389,30 @@ def kill_all_tasks(self): Kill all tasks in this cgroup and all its children cgroups forcefully. Additionally, the children cgroups will be deleted. """ + # In this method we should attempt to guard against child cgroups + # that have been created and manipulated by processes in the run. + # For example, they could have removed permissions from files and directories. - def kill_all_tasks_in_cgroup_recursively(cgroup): - for dirpath, dirs, _files in os.walk(cgroup, topdown=False): - for subCgroup in dirs: - subCgroup = pathlib.Path(dirpath) / subCgroup - kill_all_tasks_in_cgroup(subCgroup) - self._remove_cgroup(subCgroup) + def recursive_child_cgroups(cgroup): + def raise_error(e): + raise e - kill_all_tasks_in_cgroup(cgroup) + try: + for dirpath, dirs, _files in os.walk( + cgroup, topdown=False, onerror=raise_error + ): + for subCgroup in dirs: + yield pathlib.Path(os.path.join(dirpath, subCgroup)) + except OSError as e: + # some process might have made a child cgroup inaccessible + os.chmod(e.filename, stat.S_IRUSR | stat.S_IXUSR) + # restart, which might yield already yielded cgroups again, + # but this is ok for the callers of recursive_child_cgroups() + yield from recursive_child_cgroups(cgroup) if self.KILL in self.subsystems: # This will immediately terminate all processes recursively, even if frozen - util.write_file("1", self.path / "cgroup.kill") + util.write_file("1", self.path, "cgroup.kill", force=True) # We still need to clean up any child cgroups. # First, we go through all cgroups recursively while they are frozen and kill @@ -396,9 +420,12 @@ def kill_all_tasks_in_cgroup_recursively(cgroup): # creating new subgroups while we are trying to kill everything. # On cgroupsv2, frozen processes can still be killed, so this is all we need to # do. - freezer_file = self.path / "cgroup.freeze" - util.write_file("1", freezer_file) - kill_all_tasks_in_cgroup_recursively(self.path) + util.write_file("1", self.path, "cgroup.freeze", force=True) + for child_cgroup in recursive_child_cgroups(self.path): + kill_all_tasks_in_cgroup(child_cgroup) + self._remove_cgroup(child_cgroup) + + kill_all_tasks_in_cgroup(self.path) def read_cputime(self): cpu_stats = dict(self.get_key_value_pairs(self.CPU, "stat")) From ff28210f439e401ecf8b36cd9cf76b7d2f38d198 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 10:29:33 +0200 Subject: [PATCH 099/133] Port and enable test with frozen processes for cgroupsv2 --- benchexec/test_runexecutor.py | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index 793994629..fb037afc7 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -813,7 +813,7 @@ def test_frozen_process(self): # https://github.com/sosy-lab/benchexec/issues/840 if not os.path.exists(self.sleep): self.skipTest("missing sleep") - if not os.path.exists("/sys/fs/cgroup/freezer"): + if self.cgroups.version == 1 and not os.path.exists("/sys/fs/cgroup/freezer"): self.skipTest("missing freezer cgroup") self.setUp( dir_modes={ @@ -823,10 +823,7 @@ def test_frozen_process(self): "/sys/fs/cgroup": containerexecutor.DIR_FULL_ACCESS, } ) - (result, output) = self.execute_run( - "/bin/sh", - "-c", - """#!/bin/sh + script_v1 = """#!/bin/sh # create process, move it to sub-cgroup, and freeze it set -eu @@ -847,7 +844,33 @@ def test_frozen_process(self): chmod 000 "$cgroup/tasks" echo FROZEN wait $child_pid -""", +""" + script_v2 = """#!/bin/sh +# create process, move it to sub-cgroup, and freeze it +set -eu + +cgroup="/sys/fs/cgroup/$(cut -f 3 -d : /proc/self/cgroup)" +mkdir "$cgroup/tmp" +mkdir "$cgroup/tmp/tmp" + +sleep 10 & +child_pid=$! + +echo $child_pid > "$cgroup/tmp/cgroup.procs" +echo 1 > "$cgroup/tmp/cgroup.freeze" +# remove permissions in order to test our handling of this case +chmod 000 "$cgroup/tmp/cgroup.freeze" +chmod 000 "$cgroup/tmp/cgroup.procs" +chmod 000 "$cgroup/tmp" +chmod 000 "$cgroup/cgroup.freeze" +chmod 000 "$cgroup/cgroup.kill" +echo FROZEN +wait $child_pid +""" + (result, output) = self.execute_run( + "/bin/sh", + "-c", + script_v1 if self.cgroups.version == 1 else script_v2, walltimelimit=1, expect_terminationreason="walltime", ) From cbf65433c0e973938103a7423a7c7836897b9ea9 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 14:40:04 +0200 Subject: [PATCH 100/133] Remove some unimportant logging that clutters the output --- benchexec/cgroupsv1.py | 6 +++--- benchexec/cgroupsv2.py | 2 -- benchexec/runexecutor.py | 6 +----- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index bf1b5726a..71468e509 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -494,13 +494,13 @@ def read_max_mem_usage(self): return None def read_mem_pressure(self): - logging.debug("Pressure metrics not supported in cgroups v1") + return None def read_cpu_pressure(self): - logging.debug("Pressure metrics not supported in cgroups v1") + return None def read_io_pressure(self): - logging.debug("Pressure metrics not supported in cgroups v1") + return None def read_usage_per_cpu(self): usage = {} diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 0e630e560..52b413c84 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -460,8 +460,6 @@ def read_io_pressure(self): return Decimal(stats_map["total"]) / 1_000_000 def read_usage_per_cpu(self): - logging.debug("Usage per CPU not supported in cgroups v2") - return {} def read_allowed_cpus(self): diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index 3b0979c27..6b3981537 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -1066,11 +1066,7 @@ def _get_cgroup_measurements(self, cgroups, ru_child, result): if cgroups.MEMORY in cgroups: max_mem_usage = cgroups.read_max_mem_usage() - if max_mem_usage is None: - logging.warning( - "Memory-usage is not available for cgroups v2 or due to missing files." - ) - else: + if max_mem_usage is not None: result["memory"] = max_mem_usage oom_count = cgroups.read_oom_count() From 06ae55b32f40a76bca95fe3dbe085743337cd726 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 14:44:08 +0200 Subject: [PATCH 101/133] Implement TODO: improve interface of Cgroups.has_tasks() We split the internal method that takes a path from the one that checks the current cgroup (and which makes sense to expose to clients). --- benchexec/cgroups.py | 7 +++---- benchexec/cgroupsv1.py | 2 +- benchexec/cgroupsv2.py | 6 ++++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index f9b49f6c1..69d6f375b 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -241,7 +241,7 @@ def _remove_cgroup(self, path): if not os.path.exists(path): logging.warning("Cannot remove CGroup %s, because it does not exist.", path) return - assert not self.has_tasks(path) + assert not self._has_tasks(path) try: os.rmdir(path) except OSError: @@ -300,9 +300,8 @@ def read_allowed_memory_banks(self): def read_io_stat(self): pass - # TODO improve interface @abstractmethod - def has_tasks(self, path): + def _has_tasks(self, path): pass @abstractmethod @@ -369,7 +368,7 @@ def read_allowed_memory_banks(self): def read_io_stat(self): pass - def has_tasks(self, path): + def _has_tasks(self, path): pass def write_memory_limit(self, limit): diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 71468e509..131a2b30c 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -540,7 +540,7 @@ def read_io_stat(self): pass # There are irrelevant lines in this file with a different structure return bytes_read, bytes_written - def has_tasks(self, path): + def _has_tasks(self, path): return util.read_file(path, "tasks") != "" def write_memory_limit(self, limit): diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 52b413c84..aa6b1a988 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -480,8 +480,10 @@ def read_io_stat(self): bytes_written += int(stats_map["wbytes"]) return bytes_read, bytes_written - def has_tasks(self, path=None): - path = path or self.path + def has_tasks(self): + return self._has_tasks(self.path) + + def _has_tasks(self, path): return bool((path / "cgroup.procs").read_bytes().strip()) def write_memory_limit(self, limit): From 419cae35bd353481837e9d005d333831412cd0d3 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 15:21:12 +0200 Subject: [PATCH 102/133] Avoid crash in read_memory_limit() if limit is "max" We never call this function in this case currently, but this might change. --- benchexec/cgroupsv2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index aa6b1a988..41bfbc309 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -490,7 +490,8 @@ def write_memory_limit(self, limit): self.set_value(self.MEMORY, "max", limit) def read_memory_limit(self): - return int(self.get_value(self.MEMORY, "max")) + limit = self.get_value(self.MEMORY, "max") + return None if limit == "max" else int(limit) def disable_swap(self): self.set_value(self.MEMORY, "swap.max", "0") From 7aec1e20f90b357a26f1132ffb99bf64c06c329c Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 15:43:10 +0200 Subject: [PATCH 103/133] Check preexisting memory limit also on cgroupsv2 --- benchexec/cgroups.py | 8 ++++++++ benchexec/cgroupsv1.py | 9 +++++++++ benchexec/cgroupsv2.py | 15 +++++++++++++++ benchexec/resources.py | 12 +++--------- 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 69d6f375b..d30bcb544 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -312,6 +312,11 @@ def write_memory_limit(self, limit): def read_memory_limit(self): pass + @abstractmethod + def read_hierarchical_memory_limit(self): + """Read the memory limit that applies to the current cgroup or any parent.""" + pass + @abstractmethod def read_oom_count(self): pass @@ -377,6 +382,9 @@ def write_memory_limit(self, limit): def read_memory_limit(self): pass + def read_hierarchical_memory_limit(self): + pass + def read_oom_count(self): pass diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 131a2b30c..0f8e9e914 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -571,6 +571,15 @@ def write_memory_limit(self, limit): def read_memory_limit(self): return int(self.get_value(self.MEMORY, "limit_in_bytes")) + def read_hierarchical_memory_limit(self): + limit = self.read_memory_limit() + # We also use the entries hierarchical_*_limit in memory.stat + # because it may be lower if memory.use_hierarchy is enabled. + for key, value in self.get_key_value_pairs(self.MEMORY, "stat"): + if key == "hierarchical_memory_limit" or key == "hierarchical_memsw_limit": + limit = min(limit, int(value)) + return limit + def disable_swap(self): # Note that this disables swapping completely according to # https://www.kernel.org/doc/Documentation/cgroups/memory.txt diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 41bfbc309..2abd7c0fc 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -493,6 +493,21 @@ def read_memory_limit(self): limit = self.get_value(self.MEMORY, "max") return None if limit == "max" else int(limit) + def read_hierarchical_memory_limit(self): + # We do not know a way how to read the effective memory limit without looking at + # all parents. + limit = self.read_memory_limit() + for parent_cgroup in self.path.parents: + try: + parent_limit = util.read_file(parent_cgroup, "memory.max") + if parent_limit != "max": + limit = min(limit, int(parent_limit)) + except OSError: + # reached parent directory of cgroupfs + return limit + + assert False # will never be reached + def disable_swap(self): self.set_value(self.MEMORY, "swap.max", "0") diff --git a/benchexec/resources.py b/benchexec/resources.py index b9911e89d..8116ac9ca 100644 --- a/benchexec/resources.py +++ b/benchexec/resources.py @@ -389,15 +389,9 @@ def check_limit(actualLimit): return if my_cgroups.MEMORY in my_cgroups: - # We use the entries hierarchical_*_limit in memory.stat and not memory.*limit_in_bytes - # because the former may be lower if memory.use_hierarchy is enabled. - # FIXME v2 - for key, value in my_cgroups.get_key_value_pairs(my_cgroups.MEMORY, "stat"): - if ( - key == "hierarchical_memory_limit" - or key == "hierarchical_memsw_limit" - ): - check_limit(int(value)) + actual_limit = my_cgroups.read_hierarchical_memory_limit() + if actual_limit is not None: + check_limit(actual_limit) # Get list of all memory banks, either from memory assignment or from system. if not memoryAssignment: From 51f39fa528583ecc056fda89d4ea4dc78e108640 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 16:03:32 +0200 Subject: [PATCH 104/133] Refactoring: refactor out common logic --- benchexec/runexecutor.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index 6b3981537..519562d49 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -1021,6 +1021,10 @@ def _get_cgroup_measurements(self, cgroups, ru_child, result): cputime_wait = ru_child.ru_utime + ru_child.ru_stime if ru_child else 0 cputime_cgroups = None + def store_result(key, value): + if value is not None: + result[key] = value + if cgroups.CPU in cgroups: # We want to read the value from the cgroup. # The documentation warns about outdated values. @@ -1060,29 +1064,21 @@ def _get_cgroup_measurements(self, cgroups, ru_child, result): for core, coretime in cgroups.read_usage_per_cpu().items(): result[f"cputime-cpu{core}"] = coretime - cpu_pressure = cgroups.read_cpu_pressure() - if cpu_pressure is not None: - result["total-cpu-pressure-some"] = cpu_pressure + store_result("total-cpu-pressure-some", cgroups.read_cpu_pressure()) if cgroups.MEMORY in cgroups: - max_mem_usage = cgroups.read_max_mem_usage() - if max_mem_usage is not None: - result["memory"] = max_mem_usage + store_result("memory", cgroups.read_max_mem_usage()) oom_count = cgroups.read_oom_count() if oom_count: result["oom"] = oom_count - mem_pressure = cgroups.read_mem_pressure() - if mem_pressure is not None: - result["total-memory-pressure-some"] = mem_pressure + store_result("total-memory-pressure-some", cgroups.read_mem_pressure()) if cgroups.IO in cgroups: result["blkio-read"], result["blkio-write"] = cgroups.read_io_stat() - io_pressure = cgroups.read_io_pressure() - if io_pressure is not None: - result["total-io-pressure-some"] = io_pressure + store_result("total-io-pressure-some", cgroups.read_io_pressure()) logging.debug( "Resource usage of run: walltime=%s, cputime=%s, cgroup-cputime=%s, memory=%s", From 2271d1c8c8e0308354a235bf69fe09738caeaa79 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 16:07:37 +0200 Subject: [PATCH 105/133] Always read pressure stall information from cgroupsv2 This information is independent of the enabled controllers, so we can always output it. --- benchexec/cgroupsv2.py | 8 +++++--- benchexec/runexecutor.py | 10 +++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 2abd7c0fc..95e50b054 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -439,21 +439,23 @@ def read_max_mem_usage(self): return def read_mem_pressure(self): - mem_stats = dict(self.get_key_value_pairs(self.MEMORY, "pressure")) + mem_stats = dict( + util.read_key_value_pairs_from_file(self.path, "memory.pressure") + ) mem_some_stats = mem_stats["some"].split(" ") stats_map = {s[0]: s[1] for s in (s.split("=") for s in mem_some_stats)} return Decimal(stats_map["total"]) / 1_000_000 def read_cpu_pressure(self): - cpu_stats = dict(self.get_key_value_pairs(self.CPU, "pressure")) + cpu_stats = dict(util.read_key_value_pairs_from_file(self.path, "cpu.pressure")) cpu_some_stats = cpu_stats["some"].split(" ") stats_map = {s[0]: s[1] for s in (s.split("=") for s in cpu_some_stats)} return Decimal(stats_map["total"]) / 1_000_000 def read_io_pressure(self): - io_stats = dict(self.get_key_value_pairs(self.IO, "pressure")) + io_stats = dict(util.read_key_value_pairs_from_file(self.path, "io.pressure")) io_some_stats = io_stats["some"].split(" ") stats_map = {s[0]: s[1] for s in (s.split("=") for s in io_some_stats)} diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index 519562d49..ebc9bd2af 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -1064,8 +1064,6 @@ def store_result(key, value): for core, coretime in cgroups.read_usage_per_cpu().items(): result[f"cputime-cpu{core}"] = coretime - store_result("total-cpu-pressure-some", cgroups.read_cpu_pressure()) - if cgroups.MEMORY in cgroups: store_result("memory", cgroups.read_max_mem_usage()) @@ -1073,12 +1071,14 @@ def store_result(key, value): if oom_count: result["oom"] = oom_count - store_result("total-memory-pressure-some", cgroups.read_mem_pressure()) - if cgroups.IO in cgroups: result["blkio-read"], result["blkio-write"] = cgroups.read_io_stat() - store_result("total-io-pressure-some", cgroups.read_io_pressure()) + # Pressure information does not depend on enabled controllers: + # https://docs.kernel.org/accounting/psi.html + store_result("total-cpu-pressure-some", cgroups.read_cpu_pressure()) + store_result("total-memory-pressure-some", cgroups.read_mem_pressure()) + store_result("total-io-pressure-some", cgroups.read_io_pressure()) logging.debug( "Resource usage of run: walltime=%s, cputime=%s, cgroup-cputime=%s, memory=%s", From 9f1029c7d77ecba6e721004ca4a0fe3c9c9df77d Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 16:15:25 +0200 Subject: [PATCH 106/133] Refactoring: Implement reading of pressure information only once --- benchexec/cgroupsv2.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 95e50b054..1b26e8850 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -438,28 +438,23 @@ def read_max_mem_usage(self): return - def read_mem_pressure(self): - mem_stats = dict( - util.read_key_value_pairs_from_file(self.path, "memory.pressure") - ) - mem_some_stats = mem_stats["some"].split(" ") - stats_map = {s[0]: s[1] for s in (s.split("=") for s in mem_some_stats)} + def _read_pressure_stall_information(self, subsystem): + for line in open(self.path / (subsystem + ".pressure")): + if line.startswith("some "): + for item in line.split(" ")[1:]: + k, v = item.split("=") + if k == "total": + return Decimal(v) / 1_000_000 + return None - return Decimal(stats_map["total"]) / 1_000_000 + def read_mem_pressure(self): + return self._read_pressure_stall_information("memory") def read_cpu_pressure(self): - cpu_stats = dict(util.read_key_value_pairs_from_file(self.path, "cpu.pressure")) - cpu_some_stats = cpu_stats["some"].split(" ") - stats_map = {s[0]: s[1] for s in (s.split("=") for s in cpu_some_stats)} - - return Decimal(stats_map["total"]) / 1_000_000 + return self._read_pressure_stall_information("cpu") def read_io_pressure(self): - io_stats = dict(util.read_key_value_pairs_from_file(self.path, "io.pressure")) - io_some_stats = io_stats["some"].split(" ") - stats_map = {s[0]: s[1] for s in (s.split("=") for s in io_some_stats)} - - return Decimal(stats_map["total"]) / 1_000_000 + return self._read_pressure_stall_information("io") def read_usage_per_cpu(self): return {} From 327cd01fb785b0cd966995842a791cf408e81201 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 16:23:14 +0200 Subject: [PATCH 107/133] Rename and document the pressure values that runexec returns --- benchexec/runexecutor.py | 12 ++++++------ benchexec/test_runexecutor.py | 6 +++--- doc/run-results.md | 1 + 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index ebc9bd2af..d25055043 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -304,9 +304,9 @@ def print_optional_result(key, unit=""): print_optional_result("memory", "B") print_optional_result("blkio-read", "B") print_optional_result("blkio-write", "B") - print_optional_result("total-cpu-pressure-some", "s") - print_optional_result("total-io-pressure-some", "s") - print_optional_result("total-memory-pressure-some", "s") + print_optional_result("pressure-cpu-some", "s") + print_optional_result("pressure-io-some", "s") + print_optional_result("pressure-memory-some", "s") energy = intel_cpu_energy.format_energy_results(result.get("cpuenergy")) for energy_key, energy_value in energy.items(): print(f"{energy_key}={energy_value}J") @@ -1076,9 +1076,9 @@ def store_result(key, value): # Pressure information does not depend on enabled controllers: # https://docs.kernel.org/accounting/psi.html - store_result("total-cpu-pressure-some", cgroups.read_cpu_pressure()) - store_result("total-memory-pressure-some", cgroups.read_mem_pressure()) - store_result("total-io-pressure-some", cgroups.read_io_pressure()) + store_result("pressure-cpu-some", cgroups.read_cpu_pressure()) + store_result("pressure-memory-some", cgroups.read_mem_pressure()) + store_result("pressure-io-some", cgroups.read_io_pressure()) logging.debug( "Resource usage of run: walltime=%s, cputime=%s, cgroup-cputime=%s, memory=%s", diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py index fb037afc7..340164887 100644 --- a/benchexec/test_runexecutor.py +++ b/benchexec/test_runexecutor.py @@ -157,9 +157,9 @@ def check_result_keys(self, result, *additional_keys): "blkio-read", "blkio-write", "starttime", - "total-cpu-pressure-some", - "total-io-pressure-some", - "total-memory-pressure-some", + "pressure-cpu-some", + "pressure-io-some", + "pressure-memory-some", } expected_keys.update(additional_keys) for key in result.keys(): diff --git a/doc/run-results.md b/doc/run-results.md index 22f30cce4..8563e3ec0 100644 --- a/doc/run-results.md +++ b/doc/run-results.md @@ -57,6 +57,7 @@ The meanings of the current possible result values are as follows: The value might not accurately represent disk I/O due to caches or if virtual block devices such as LVM, RAID, RAM disks etc. are used. - **cpuenergy-pkg``**: Energy consumption of the CPU ([more information](resources.md#energy)). This is still experimental. +- **pressure-`*`-some**: Number of seconds (as decimal with suffix "s") that at least some process had to wait for the respective resource, e.g., the CPU becoming available ([more information](https://docs.kernel.org/accounting/psi.html)). - **returnvalue**: The return value of the process (between 0 and 255). Not present if process was killed. - **exitsignal**: The signal with which the process was killed (if any). From 2eb1f91a4905ba4c627070ba47fa669292a46d77 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 16:24:41 +0200 Subject: [PATCH 108/133] Fix missing units for pressure information in XML results --- benchexec/outputhandler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchexec/outputhandler.py b/benchexec/outputhandler.py index d1a52a3bc..f696fe73a 100644 --- a/benchexec/outputhandler.py +++ b/benchexec/outputhandler.py @@ -723,6 +723,8 @@ def add_column_to_xml(self, xml, title, value, prefix="", value_suffix=""): value_suffix = "B" elif title.startswith("mbm"): value_suffix = "B/s" + elif title.startswith("pressure-") and title.endswith("-some"): + value_suffix = "s" value = f"{value}{value_suffix}" From d3d1d744f234f503980ddd6533e997aa047663cb Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 16:28:35 +0200 Subject: [PATCH 109/133] Implement reading of memory usage for cgroupsv2 Since Linux 5.19 this is supported. --- benchexec/cgroupsv2.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 1b26e8850..4281733cc 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -434,9 +434,10 @@ def read_cputime(self): return float(cpu_stats["usage_usec"]) / 1_000_000 def read_max_mem_usage(self): - logging.debug("Memory-usage not supported in cgroups v2") - - return + # Was only added in Linux 5.19 + if self.has_value(self.MEMORY, "peak"): + return int(self.get_value(self.MEMORY, "peak")) + return None def _read_pressure_stall_information(self, subsystem): for line in open(self.path / (subsystem + ".pressure")): From 88c337e159037a98ffa1a7b206a102a91aaab489 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 16:34:45 +0200 Subject: [PATCH 110/133] Slight refactorings for reading cgroup values --- benchexec/cgroupsv1.py | 2 +- benchexec/cgroupsv2.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 0f8e9e914..e050c485e 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -585,7 +585,7 @@ def disable_swap(self): # https://www.kernel.org/doc/Documentation/cgroups/memory.txt # (unlike setting the global swappiness to 0). # Our process might get killed because of this. - return self.set_value(self.MEMORY, "swappiness", "0") + self.set_value(self.MEMORY, "swappiness", "0") def read_oom_count(self): # not supported in v1, see oomhandler and memory_used > memlimit impl diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 4281733cc..99669e83f 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -428,10 +428,11 @@ def raise_error(e): kill_all_tasks_in_cgroup(self.path) def read_cputime(self): - cpu_stats = dict(self.get_key_value_pairs(self.CPU, "stat")) - - # TODO switch to Decimal together with all other float values - return float(cpu_stats["usage_usec"]) / 1_000_000 + for k, v in self.get_key_value_pairs(self.CPU, "stat"): + if k == "usage_usec": + # TODO switch to Decimal together with all other float values + return int(v) / 1_000_000 + return None def read_max_mem_usage(self): # Was only added in Linux 5.19 @@ -510,9 +511,8 @@ def disable_swap(self): self.set_value(self.MEMORY, "swap.max", "0") def read_oom_count(self): - for line in self.get_file_lines(self.MEMORY, "events"): - k, v = line.split(" ") + for k, v in self.get_key_value_pairs(self.MEMORY, "events"): if k == "oom_kill": return int(v) - return 0 + return None From 1c98bc40d196d42fd2e027e0f96be56cf7fc108c Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 17:09:40 +0200 Subject: [PATCH 111/133] Fix type error found by pytype --- benchexec/cgroups.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index d30bcb544..01af8e450 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -376,6 +376,9 @@ def read_io_stat(self): def _has_tasks(self, path): pass + def has_tasks(self): + pass + def write_memory_limit(self, limit): pass From 590ac51cd6087e3dd7fc0318f70319e92b02b5a5 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Mon, 3 Apr 2023 17:42:10 +0200 Subject: [PATCH 112/133] Fix warning about swapaccount being always shown for cgroupsv2 --- benchexec/cgroups.py | 8 ++++++++ benchexec/cgroupsv1.py | 3 +++ benchexec/cgroupsv2.py | 3 +++ benchexec/runexecutor.py | 7 +------ 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 01af8e450..f3f37f615 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -321,6 +321,11 @@ def read_hierarchical_memory_limit(self): def read_oom_count(self): pass + @abstractmethod + def can_limit_swap(self): + """Check wether cgroups can be used to limit swap usage.""" + pass + @abstractmethod def disable_swap(self): pass @@ -391,5 +396,8 @@ def read_hierarchical_memory_limit(self): def read_oom_count(self): pass + def can_limit_swap(self): + pass + def disable_swap(self): pass diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index e050c485e..71b12613a 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -580,6 +580,9 @@ def read_hierarchical_memory_limit(self): limit = min(limit, int(value)) return limit + def can_limit_swap(self): + return self.has_value(self.MEMORY, "memsw.max_usage_in_bytes") + def disable_swap(self): # Note that this disables swapping completely according to # https://www.kernel.org/doc/Documentation/cgroups/memory.txt diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 99669e83f..659f104bd 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -507,6 +507,9 @@ def read_hierarchical_memory_limit(self): assert False # will never be reached + def can_limit_swap(self): + return self.has_value(self.MEMORY, "swap.max") + def disable_swap(self): self.set_value(self.MEMORY, "swap.max", "0") diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index d25055043..280430b02 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -371,12 +371,7 @@ def _init_cgroups(self): if self.cgroups.MEMORY not in self.cgroups: logging.warning("Cannot measure memory consumption without memory cgroup.") else: - # FIXME - if systeminfo.has_swap() and ( - not self.cgroups.has_value( - self.cgroups.MEMORY, "memsw.max_usage_in_bytes" - ) - ): + if systeminfo.has_swap() and not self.cgroups.can_limit_swap(): logging.warning( "Kernel misses feature for accounting swap memory, but machine has swap. " "Memory usage may be measured inaccurately. " From ae71489d3b07f6e069ff07fb3cf234320c2a6b43 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Tue, 4 Apr 2023 07:24:50 +0200 Subject: [PATCH 113/133] Detect OOM kills only in our cgroup, not in child cgroups (for cgroupsv2) Otherwise for nested cgroup usage we would always mark a run as OOM if there was an OOM event in some child cgroup. --- benchexec/cgroupsv2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 659f104bd..03bfafec4 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -514,7 +514,7 @@ def disable_swap(self): self.set_value(self.MEMORY, "swap.max", "0") def read_oom_count(self): - for k, v in self.get_key_value_pairs(self.MEMORY, "events"): + for k, v in self.get_key_value_pairs(self.MEMORY, "events.local"): if k == "oom_kill": return int(v) From 6938ef83a8381e34d99859d5a4ba52a40f738afc Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 5 Apr 2023 07:16:58 +0200 Subject: [PATCH 114/133] Fix OOM handling on cgroupsv2 If only a subprocess of the run triggers the OOM, we still want to terminate the run, but so far we would not notice. On cgroupsv1 we need a thread that listens for kernel events, but on cgroupsv2 we can just tell the kernel to kill everything for us. --- benchexec/cgroupsv2.py | 4 ++++ benchexec/runexecutor.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 03bfafec4..5c1761ffb 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -487,6 +487,10 @@ def _has_tasks(self, path): def write_memory_limit(self, limit): self.set_value(self.MEMORY, "max", limit) + # On OOM we want to terminate the whole run, but we would not notice if the + # kernel kills only some random subprocess. So we tell it to kill all processes + # in the cgroup. This is available since Linux 4.19. + self.set_value(self.MEMORY, "oom.group", 1) def read_memory_limit(self): limit = self.get_value(self.MEMORY, "max") diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index 280430b02..a6ac1892f 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -562,6 +562,8 @@ def _setup_cgroup_memory_limit_thread(self, memlimit, cgroups, pid_to_kill): """Start memory-limit handler. @return None or the memory-limit handler for calling cancel() """ + # On CgroupsV2, the kernel kills the whole cgroup for us on OOM + # and we can detect OOMs reliably after the fact. So no need to do anything. if memlimit is not None and cgroups.version == 1: try: oomThread = oomhandler.KillProcessOnOomThread( From f2839b7d5e15c6c0571e6260a877b13a5a9dbffe Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 5 Apr 2023 07:24:48 +0200 Subject: [PATCH 115/133] Improve OOM handling for cgroupsv2 - We report terminationfailure=memory only if kernel tells us about OOM and do not use the heuristic "memory_usage>=memory_limit" anymore. Let's see whether this works reliably. - Do not leak the uninteresting "oom" count in the RunExecutor result. - Rename "OOM count" to "OOM kill count", which it actually is. --- benchexec/cgroups.py | 4 ++-- benchexec/cgroupsv1.py | 2 +- benchexec/cgroupsv2.py | 2 +- benchexec/runexecutor.py | 15 ++++++++++----- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index f3f37f615..9343f0939 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -318,7 +318,7 @@ def read_hierarchical_memory_limit(self): pass @abstractmethod - def read_oom_count(self): + def read_oom_kill_count(self): pass @abstractmethod @@ -393,7 +393,7 @@ def read_memory_limit(self): def read_hierarchical_memory_limit(self): pass - def read_oom_count(self): + def read_oom_kill_count(self): pass def can_limit_swap(self): diff --git a/benchexec/cgroupsv1.py b/benchexec/cgroupsv1.py index 71b12613a..ff3d0f3d2 100644 --- a/benchexec/cgroupsv1.py +++ b/benchexec/cgroupsv1.py @@ -590,6 +590,6 @@ def disable_swap(self): # Our process might get killed because of this. self.set_value(self.MEMORY, "swappiness", "0") - def read_oom_count(self): + def read_oom_kill_count(self): # not supported in v1, see oomhandler and memory_used > memlimit impl return None diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 5c1761ffb..d6273f0c7 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -517,7 +517,7 @@ def can_limit_swap(self): def disable_swap(self): self.set_value(self.MEMORY, "swap.max", "0") - def read_oom_count(self): + def read_oom_kill_count(self): for k, v in self.get_key_value_pairs(self.MEMORY, "events.local"): if k == "oom_kill": return int(v) diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index a6ac1892f..bdf4438fb 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -1001,11 +1001,19 @@ def preSubprocess(): } if self._termination_reason: result["terminationreason"] = self._termination_reason - elif result.get("oom") or (memlimit and result.get("memory", 0) >= memlimit): + elif self.cgroups.version == 2 and result.get("oom_kill_count"): + # At least one process was killed by the kernel due to OOM. + result["terminationreason"] = "memory" + elif self.cgroups.version == 1 or ( + memlimit and result.get("memory", 0) >= memlimit + ): # The kernel does not always issue OOM notifications and thus the OOMHandler # does not always run even in case of OOM. We detect this there and report OOM. result["terminationreason"] = "memory" + # Cleanup + del result["oom_kill_count"] + return result def _get_cgroup_measurements(self, cgroups, ru_child, result): @@ -1063,10 +1071,7 @@ def store_result(key, value): if cgroups.MEMORY in cgroups: store_result("memory", cgroups.read_max_mem_usage()) - - oom_count = cgroups.read_oom_count() - if oom_count: - result["oom"] = oom_count + store_result("oom_kill_count", cgroups.read_oom_kill_count()) if cgroups.IO in cgroups: result["blkio-read"], result["blkio-write"] = cgroups.read_io_stat() From e428fa2a99466cb41a58fed1b6c8f3c492a707c1 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 5 Apr 2023 08:24:06 +0200 Subject: [PATCH 116/133] fix failure in case key does not exist --- benchexec/runexecutor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index bdf4438fb..02c4b5e9f 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -1012,7 +1012,7 @@ def preSubprocess(): result["terminationreason"] = "memory" # Cleanup - del result["oom_kill_count"] + result.pop("oom_kill_count", None) return result From 29d7cfac9cfc8bf3acee11b6c8f6459ff3748093 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 5 Apr 2023 08:45:28 +0200 Subject: [PATCH 117/133] fix logic bug --- benchexec/runexecutor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index 02c4b5e9f..967b4b621 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -1004,7 +1004,7 @@ def preSubprocess(): elif self.cgroups.version == 2 and result.get("oom_kill_count"): # At least one process was killed by the kernel due to OOM. result["terminationreason"] = "memory" - elif self.cgroups.version == 1 or ( + elif self.cgroups.version == 1 and ( memlimit and result.get("memory", 0) >= memlimit ): # The kernel does not always issue OOM notifications and thus the OOMHandler From a5655f4be9c52d13607edadcae1c5bd5fd14af83 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 5 Apr 2023 08:54:53 +0200 Subject: [PATCH 118/133] Put our systemd scope into a separate slice This way users could configure some systemd settings that apply to all our scopes (with systemctl --user edit benchexec.slice) and we are not affected by any configuration for other slices like app.slice. --- benchexec/cgroupsv2.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index d6273f0c7..cbb916117 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -37,13 +37,13 @@ _ERROR_NO_PSYSTEMD = """ BenchExec was not able to use cgroups. Please either start it within a fresh systemd scope by prefixing your command line with - systemd-run --user --scope -p Delegate=yes + systemd-run --user --scope --slice=benchexec -p Delegate=yes or install the Python library pystemd such that BenchExec can do this automatically.""" _ERROR_MSG_OTHER = """ BenchExec was not able to use cgroups and did not manage to create a systemd scope. Please ensure that we can connect to systemd via DBus or try starting BenchExec within a fresh systemd scope by prefixing your command line with - systemd-run --user --scope -p Delegate=yes""" + systemd-run --user --scope --slice=benchexec -p Delegate=yes""" uid = os.getuid() CGROUP_NAME_PREFIX = "benchmark_" @@ -131,6 +131,8 @@ def _create_systemd_scope_for_us(): unit_params = { # workaround for not declared parameters, remove in the future b"_custom": (b"PIDs", b"au", [os.getpid()]), + # Put us in our own slice to be separate from other applications + b"Slice": b"benchexec.slice", b"Delegate": True, } From 2f3aa044b5a39333167fafd5fac9d23553203861 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 6 Apr 2023 15:34:27 +0200 Subject: [PATCH 119/133] Require that we are the only process in our cgroup for cgroupsv2 So far we accepted other processes from the same process group, but this would mean that we move these other processes into a different cgroup, and maybe we should not do this. Let's wait for whether some use case appears where it is relevant that we do not require to be the only process in the cgroup. --- benchexec/cgroupsv2.py | 6 ++++-- benchexec/util.py | 26 -------------------------- 2 files changed, 4 insertions(+), 28 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index cbb916117..acd8d1cfa 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -73,9 +73,11 @@ def initialize(): cgroup = CgroupsV2.from_system() - allowed_pids = set(util.get_pgrp_pids(os.getpgid(0))) - if set(cgroup.get_all_tasks()) <= allowed_pids: + if list(cgroup.get_all_tasks()) == [os.getpid()]: # If we are the only process, somebody prepared a cgroup for us. Use it. + # We might be able to relax this check and for example allow child processes, + # but then we would also have to move them to another cgroup, + # which might not be a good idea. logging.debug("BenchExec was started in its own cgroup: %s", cgroup) elif _create_systemd_scope_for_us(): diff --git a/benchexec/util.py b/benchexec/util.py index 48ac9272c..f0a3df8b3 100644 --- a/benchexec/util.py +++ b/benchexec/util.py @@ -17,7 +17,6 @@ import glob import logging import os -import pathlib import re import shutil import signal as _signal @@ -816,28 +815,3 @@ def check_msr(): if all(os.access(f"/dev/cpu/{cpu}/msr", os.W_OK) for cpu in cpu_dirs): res["write"] = True return res - - -def get_pgrp_pids(pgid): - pids = [] - for proc_status_path in pathlib.Path("/proc").glob("[0-9]*/status"): - try: - with open(proc_status_path) as proc_status: - for line in proc_status: - key, value, *_ = line.split("\t") - if key == "Pid:": - pid = value - elif key == "NSpgid:": - status_pgid = value - if pgid == int(status_pgid): - pids.append(int(pid)) - except OSError: - # ignore race conditions with processes disappearing - # they aren't interesting to us anyway as processes - # related to us will continue running. Apart from that, this is - # used to move processes to a scope or check if moving to a cgroup - # makes sense so processes having terminated aren't useful in these - # cases anyway - pass - - return pids From 14d8210755bb02115311f2ffe235ecc19364c61d Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 24 Aug 2023 13:03:03 +0200 Subject: [PATCH 120/133] Use cgroup namespace for runs with cgroupsv2 Cgroup namespaces allow us to properly isolate cgroups and make them usable inside the container. They exist for cgroups v1 and v2, but due to some v1 limitation make sense mostly on cgroupsv2. There are no disadvantages on using them, so we enable their use always when using cgroupsv2, and then provide a usable /sys/fs/cgroups directory. This implements most of #436 for runexec. The required prevention of modifying the resource limits from inside the container will come next. --- benchexec/container.py | 21 +++++++++++++++++++++ benchexec/containerexecutor.py | 11 ++++++++++- benchexec/libc.py | 1 + 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/benchexec/container.py b/benchexec/container.py index 1fc7d0bd0..838217524 100644 --- a/benchexec/container.py +++ b/benchexec/container.py @@ -39,6 +39,7 @@ "drop_capabilities", "wait_for_child_and_forward_signals", "setup_container_system_config", + "setup_cgroup_namespace", "CONTAINER_UID", "CONTAINER_GID", "CONTAINER_HOME", @@ -958,3 +959,23 @@ def is_container_system_config_file(file): return file in ( os.path.join("/etc", f.decode()) for f in CONTAINER_ETC_FILE_OVERRIDE ) + + +def setup_cgroup_namespace(): + """Move the current process into a new cgroup namespace and setup /sys/fs/cgroup + appropriately. This method assumes that cgroupv2 is used. + It needs to be called from within the target process.""" + # Move us to new namespace. + libc.unshare(libc.CLONE_NEWCGROUP) + + # Mount /sys/fs/cgroup with view of new namespace. + # For some reason, mounting directly on top of /sys/fs/cgroup gives EBUSY, + # but mounting somewhere else and moving into the correct place works. + libc.mount( + b"cgroup2", + b"/proc", + b"cgroup2", + libc.MS_NOSUID | libc.MS_NODEV | libc.MS_NOEXEC, + None, + ) + libc.mount(b"/proc", b"/sys/fs/cgroup", b"none", libc.MS_MOVE, None) diff --git a/benchexec/containerexecutor.py b/benchexec/containerexecutor.py index 2d6270902..64423ba5b 100644 --- a/benchexec/containerexecutor.py +++ b/benchexec/containerexecutor.py @@ -601,6 +601,8 @@ def _start_execution_in_container( root_dir = os.path.abspath(root_dir) cwd = os.path.abspath(cwd) + use_cgroup_ns = cgroups.version == 2 + def grandchild(): """Setup everything inside the process that finally exec()s the tool.""" try: @@ -615,7 +617,6 @@ def grandchild(): my_outer_pid = container.get_my_pid_from_procfs() container.mount_proc(self._container_system_config) - container.drop_capabilities() container.reset_signal_handling() child_setup_fn() # Do some other setup the caller wants. @@ -624,6 +625,14 @@ def grandchild(): os.write(to_parent, str(my_outer_pid).encode()) received = os.read(from_parent, 1) assert received == MARKER_PARENT_COMPLETED, received + + # Finalize setup + # We want to do as little as possible here because measurements are + # already running, but we can only setup the cgroup namespace + # once we are in the desired cgroup. + if use_cgroup_ns: + container.setup_cgroup_namespace() + container.drop_capabilities() except BaseException as e: # When using runexec, this logging will end up in the output.log file, # where usually the tool output is. This is suboptimal, but probably diff --git a/benchexec/libc.py b/benchexec/libc.py index ef347e10b..2a808ec7e 100644 --- a/benchexec/libc.py +++ b/benchexec/libc.py @@ -58,6 +58,7 @@ def _check_errno(result, func, arguments): # /usr/include/linux/sched.h CLONE_NEWNS = 0x00020000 +CLONE_NEWCGROUP = 0x02000000 CLONE_NEWUTS = 0x04000000 CLONE_NEWIPC = 0x08000000 CLONE_NEWUSER = 0x10000000 From f63bb3da28c381518f90f59bd675c7d7565218e6 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 24 Aug 2023 09:36:41 +0200 Subject: [PATCH 121/133] Refactoring: Move code to separate method to allow reuse --- benchexec/cgroupsv2.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index acd8d1cfa..65201ffe6 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -102,13 +102,7 @@ def initialize(): assert not cgroup.has_tasks() # Now that the cgroup is empty, we can enable controller delegation. - # We enable all controllers, even those that we do not need ourselves, - # in order to allow nesting of other cgroup-using software. - controllers = util.read_file(cgroup.path / "cgroup.controllers").split() - util.write_file( - " ".join(f"+{c}" for c in controllers), - cgroup.path / "cgroup.subtree_control", - ) + cgroup._delegate_controllers() _usable_cgroup = cgroup @@ -318,6 +312,20 @@ def create_fresh_child_cgroup(self, subsystems, prefix=CGROUP_NAME_PREFIX): return CgroupsV2({c: child_path for c in child_subsystems}) + def _delegate_controllers(self): + """ + Enable delegation of all controllers of this cgroup to child cgroups. + This is relevant if processes in child cgroups also want to use cgroup features. + The current cgroup needs to have no processes in order to do so! + """ + # We enable all controllers, even those that we do not need ourselves, + # in order to allow nesting of other cgroup-using software. + controllers = util.read_file(self.path / "cgroup.controllers").split() + util.write_file( + " ".join(f"+{c}" for c in controllers), + self.path / "cgroup.subtree_control", + ) + def require_subsystem(self, subsystem, log_method=logging.warning): """ Check whether the given subsystem is enabled and is writable From a6d7d36de8ef66082d4be88183f1d1528bf16d34 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 24 Aug 2023 11:27:34 +0200 Subject: [PATCH 122/133] Add another nesting level of cgroups in case of cgroup namespaces This prevents the benchmarked process from changing the configured limits. So far, the cgroup with the limits it the same where we add the benchmarked process. But if we then delegate it into the container, the benchmarked process can access it and change the limits. So now we create a child cgroup of the cgroup with the limits, move the benchmarked process into the child, and make the child the root cgroup of the container. Then the limits are configured outside of the container and cannot be changed. This finishes #436 for runexec. We just need to take care that for some special operations we also use the child cgroup instead of the main one. An alternative would be the "nsdelegate" mount option for the cgroup2 fs. However, this needs to be set in the initial namespace, so we cannot enforce this. And at least on my Ubuntu system, it is missing, so we also not just declare it as a requirement. --- benchexec/cgroupsv2.py | 42 +++++++++++++++++++++++++++++++++- benchexec/containerexecutor.py | 7 ++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 65201ffe6..73e74e79d 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -259,6 +259,9 @@ def __init__(self, subsystems): next(iter(self.subsystems.values())) if len(self.subsystems) else None ) + # Store reference to child cgroup if we delegated controllers to it. + self._delegated_to = None + @classmethod def from_system(cls, cgroup_procinfo=None): logging.debug( @@ -312,6 +315,29 @@ def create_fresh_child_cgroup(self, subsystems, prefix=CGROUP_NAME_PREFIX): return CgroupsV2({c: child_path for c in child_subsystems}) + def create_fresh_child_cgroup_for_delegation(self, prefix="delegate_"): + """ + Create a special child cgroup and delegate all controllers to it. + The current cgroup must not have processes and may never have processes. + This method can be called only once because we remember what child cgroup + we create here and use it for some special purposes later on. + """ + assert not self._delegated_to + self._delegate_controllers() + child_cgroup = self.create_fresh_child_cgroup(self.subsystems.keys(), prefix) + assert ( + self.subsystems.keys() == child_cgroup.subsystems.keys() + ), "delegation failed for at least one controller" + self._delegated_to = child_cgroup + + if self.MEMORY in child_cgroup: + # Copy memory limit to child. This has no actual effect (limits apply + # recursively), but informs the users of the child cgroup about the limit + # (otherwise they would not see it). + child_cgroup.write_memory_limit(self.read_memory_limit() or "max") + + return child_cgroup + def _delegate_controllers(self): """ Enable delegation of all controllers of this cgroup to child cgroups. @@ -385,6 +411,7 @@ def add_task(self, pid): """ Add a process to the cgroups represented by this instance. """ + assert not self._delegated_to, "Delegated cgroups cannot have processes" with open(self.path / "cgroup.procs", "w") as tasksFile: tasksFile.write(str(pid)) @@ -433,9 +460,14 @@ def raise_error(e): # On cgroupsv2, frozen processes can still be killed, so this is all we need to # do. util.write_file("1", self.path, "cgroup.freeze", force=True) + keep_child = self._delegated_to.path if self._delegated_to else None for child_cgroup in recursive_child_cgroups(self.path): kill_all_tasks_in_cgroup(child_cgroup) - self._remove_cgroup(child_cgroup) + + # Remove child_cgroup, but not if it is our immediate child because of + # delegation. We need that cgroup to read the OOM kill count. + if child_cgroup != keep_child: + self._remove_cgroup(child_cgroup) kill_all_tasks_in_cgroup(self.path) @@ -530,6 +562,14 @@ def disable_swap(self): self.set_value(self.MEMORY, "swap.max", "0") def read_oom_kill_count(self): + # We read only the counter from memory.events.local to avoid reporting OOM + # if the process used cgroups internally and there was an OOM in some + # arbitrary nested child cgroup, but not for the main process itself. + # But if we have delegated, then our own cgroup has no processes and OOM count + # would remain zero, so we have to read it from the child cgroup. + if self._delegated_to: + return self._delegated_to.read_oom_kill_count() + for k, v in self.get_key_value_pairs(self.MEMORY, "events.local"): if k == "oom_kill": return int(v) diff --git a/benchexec/containerexecutor.py b/benchexec/containerexecutor.py index 64423ba5b..9d71a4c2d 100644 --- a/benchexec/containerexecutor.py +++ b/benchexec/containerexecutor.py @@ -930,6 +930,13 @@ def check_child_exit_code(): child_pid, ) + # cgroups is the cgroups where we configure limits. + # So for isolation, we need to create a child cgroup that becomes the root + # of the cgroup ns, such that the limit settings are not accessible in the + # container and cannot be changed. + if use_cgroup_ns: + cgroups = cgroups.create_fresh_child_cgroup_for_delegation() + # start measurements cgroups.add_task(grandchild_pid) parent_setup = parent_setup_fn() From f0ad1df4a80516fcac29feff5f10ee91a4666352 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 24 Aug 2023 13:05:57 +0200 Subject: [PATCH 123/133] Add --cgroup-access to containerexec for cgroupsv2 The cgroup namespaces from the previous commit provide better isolation, so using them also makes sense for containerexec, not just for runexec. However, containerexec currently does not use and require cgroups at all, and we do not want to make them mandatory, such that containerexec stays usable for users without cgroup access. So we add a new argument --cgroup-access to containerexec that triggers use of cgroups for the run (without any limits etc.) and uses cgroup namespaces to provide a usable cgroup for the process inside the container. We support this flag only on systems with cgroupsv2 because with cgroupsv1 the cgroup namespaces do not work as well. runexec and benchexec do not get this argument, because they use cgroups anyway and thus --cgroup-access is implied. This closes #436. --- benchexec/cgroups.py | 11 +++++++++-- benchexec/containerexecutor.py | 29 ++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 9343f0939..42853155f 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -45,7 +45,7 @@ class Cgroups(ABC): """ @staticmethod - def initialize(): + def initialize(allowed_versions=None): """ Try to find or create a usable cgroup and return a Cgroups instance that represents it. @@ -69,8 +69,15 @@ def initialize(): Typically, callers should use the returned cgroup instance only for creating child cgroups and not call any other modifying method such as add_task(). + + @param allowed_versions: None, or a sequence of allowed cgroup versions (1 or 2). + If the current system uses a different cgroup version, no attempt at + returning a usable Cgroups instance is made. """ version = _get_cgroup_version() + if allowed_versions is not None and version not in allowed_versions: + return Cgroups.dummy() + if version == CGROUPS_V1: from .cgroupsv1 import CgroupsV1 @@ -346,7 +353,7 @@ def kill_all_tasks(self): pass def create_fresh_child_cgroup(self, subsystems): - pass + return self def handle_errors(self, critical_cgroups): pass diff --git a/benchexec/containerexecutor.py b/benchexec/containerexecutor.py index 9d71a4c2d..1492a32f6 100644 --- a/benchexec/containerexecutor.py +++ b/benchexec/containerexecutor.py @@ -252,6 +252,14 @@ def main(argv=None): default=None, help="use given GID within container (default: current UID)", ) + parser.add_argument( + "--cgroup-access", + action="store_true", + help="Allow processes in the container to use cgroups. " + "This only works on cgroupsv2 systems and if containerexec is either started in" + " its own cgroup or can talk to systemd to create a cgroup (same requirements" + " as for runexec).", + ) add_basic_container_args(parser) add_container_output_args(parser) baseexecutor.add_basic_executor_options(parser) @@ -260,6 +268,7 @@ def main(argv=None): baseexecutor.handle_basic_executor_options(options, parser) logging.debug("This is containerexec %s.", __version__) container_options = handle_basic_container_args(options, parser) + container_options["cgroup_access"] = options.cgroup_access container_output_options = handle_container_output_args(options, parser) if options.root: @@ -307,6 +316,7 @@ def __init__( dir_modes={"/": DIR_OVERLAY, "/run": DIR_HIDDEN, "/tmp": DIR_HIDDEN}, container_system_config=True, container_tmpfs=True, + cgroup_access=False, *args, **kwargs, ): @@ -322,6 +332,9 @@ def __init__( @param container_system_config: Whether to use a special system configuration in the container that disables all remote host and user lookups, sets a custom hostname, etc. + @param cgroup_access: + Whether to allow processes in the contain to access cgroups. + Only supported on systems with cgroupsv2. """ super(ContainerExecutor, self).__init__(*args, **kwargs) self._use_namespaces = use_namespaces @@ -388,6 +401,17 @@ def is_accessible(path): "threads please read https://github.com/sosy-lab/benchexec/issues/435" ) + self._cgroups = Cgroups.dummy() + if cgroup_access: + self._cgroups = Cgroups.initialize(allowed_versions=[2]) + if self._cgroups.version != 2: + sys.exit( + "Cgroup access unsupported on this system, " + "BenchExec only supports this for cgroupsv2." + ) + if self._cgroups.CPU not in self._cgroups: + self._cgroups.handle_errors([self._cgroups.CPU]) + def _get_result_files_base(self, temp_dir): """Given the temp directory that is created for each run, return the path to the directory where files created by the tool are stored.""" @@ -433,6 +457,9 @@ def execute_run( if environ is None: environ = os.environ.copy() + cgroups = self._cgroups.create_fresh_child_cgroup( + self._cgroups.subsystems.keys() + ) pid = None returnvalue = 0 @@ -448,7 +475,7 @@ def execute_run( root_dir=rootDir, cwd=workingDir, temp_dir=temp_dir, - cgroups=Cgroups.dummy(), + cgroups=cgroups, output_dir=output_dir, result_files_patterns=result_files_patterns, child_setup_fn=util.dummy_fn, From 0cc4975a41085cec15af87941ec5c6f0b74bfd2e Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 24 Aug 2023 16:09:11 +0200 Subject: [PATCH 124/133] add some type annotations and checks to help pytype --- benchexec/cgroups.py | 3 +++ benchexec/cgroupsv2.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/benchexec/cgroups.py b/benchexec/cgroups.py index 42853155f..d27e483d3 100644 --- a/benchexec/cgroups.py +++ b/benchexec/cgroups.py @@ -355,6 +355,9 @@ def kill_all_tasks(self): def create_fresh_child_cgroup(self, subsystems): return self + def create_fresh_child_cgroup_for_delegation(self): + return self + def handle_errors(self, critical_cgroups): pass diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 73e74e79d..1e0a0ea17 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -15,6 +15,7 @@ import tempfile import threading import time +import typing from decimal import Decimal from benchexec import systeminfo, util @@ -260,7 +261,7 @@ def __init__(self, subsystems): ) # Store reference to child cgroup if we delegated controllers to it. - self._delegated_to = None + self._delegated_to: typing.Optional[CgroupsV2] = None @classmethod def from_system(cls, cgroup_procinfo=None): @@ -325,6 +326,7 @@ def create_fresh_child_cgroup_for_delegation(self, prefix="delegate_"): assert not self._delegated_to self._delegate_controllers() child_cgroup = self.create_fresh_child_cgroup(self.subsystems.keys(), prefix) + assert isinstance(child_cgroup, CgroupsV2) assert ( self.subsystems.keys() == child_cgroup.subsystems.keys() ), "delegation failed for at least one controller" From 6cbd9fc5722cf763d141fbb5d1449f194d481b67 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 24 Aug 2023 16:10:05 +0200 Subject: [PATCH 125/133] Better error message for problem of missing cpuset delegation This is a kernel problem that is hopefully fixed in the future, but until then likely to cause problems for users who use BenchExec on their own machines (and not dedicated servers). So we at least give them a quick workaround. --- benchexec/cgroupsv2.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 1e0a0ea17..7e7a7774b 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -30,6 +30,14 @@ Please enable them, e.g., by setting up delegation. The cgroup that we attempted to use was: {}""" +_ERROR_MSG_MISSING_CPUSET = """ +The kernel has a bug where delegation of cpuset does not work if there are processes of other users in this user's cgroup. +This happens commonly if xdg-document-portal is running while such delegation is attempted for the first time. +For more information cf. https://github.com/systemd/systemd/issues/18293. + +As a quick workaround, execute this command, which forces the missing delegation as root user: + echo +cpuset | sudo tee {}""" + _ERROR_NO_SYSTEMD = """ System is using cgroups v2 but not systemd. If you are using BenchExec within a container, please ensure that cgroups are properly delegated into the container. @@ -390,6 +398,17 @@ def handle_errors(self, critical_cgroups): sys.exit( _ERROR_MSG_UNKNOWN_SUBSYSTEMS.format(", ".join(unknown_subsystems)) ) + elif critical_cgroups == {self.CPUSET}: + problem_cgroup = self.path + while not self.CPUSET in util.read_file( + problem_cgroup, "cgroup.controllers" + ): + problem_cgroup = problem_cgroup.parent + sys.exit( + _ERROR_MSG_MISSING_CPUSET.format( + problem_cgroup / "cgroup.subtree_control" + ) + ) else: sys.exit( _ERROR_MSG_MISSING_SUBSYSTEMS.format( From 2a0eec264d37b18b8bc15e72625a16edf2c54293 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Tue, 5 Sep 2023 10:08:28 +0200 Subject: [PATCH 126/133] code style --- benchexec/cgroupsv2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 7e7a7774b..250e18bc9 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -400,7 +400,7 @@ def handle_errors(self, critical_cgroups): ) elif critical_cgroups == {self.CPUSET}: problem_cgroup = self.path - while not self.CPUSET in util.read_file( + while self.CPUSET not in util.read_file( problem_cgroup, "cgroup.controllers" ): problem_cgroup = problem_cgroup.parent From d17198e8009bcbb1784c3b56e5c9016e16998f2f Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 19 Oct 2023 14:06:10 +0200 Subject: [PATCH 127/133] Update Debian package config for cgroupsv2 We want to recommend installation of pystemd and provide appropriate documentation for users. --- debian/README.Debian | 6 +++++- debian/benchexec.postinst | 8 +++++--- debian/control | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/debian/README.Debian b/debian/README.Debian index 8eb82911f..c29638d1d 100644 --- a/debian/README.Debian +++ b/debian/README.Debian @@ -1,11 +1,15 @@ benchexec for Debian -------------------- -This package uses a systemd service (benchexec-cgroup) +For systems with cgroupsv1, +this package uses a systemd service (benchexec-cgroup) to configure the Linux kernel cgroups as necessary for BenchExec. Users that should be able to use BenchExec should be added to the group "benchexec". Alternatively, the permissions assigned to the cgroups can be changed by changing the BENCHEXEC_CGROUP_GROUP and BENCHEXEC_CGROUP_PERM environment variables of that service. +For systems with cgroupsv2 and systemd, +no special configuration is necessary. + -- Philipp Wendler Fri, 11 Sep 2015 10:29:29 +0200 diff --git a/debian/benchexec.postinst b/debian/benchexec.postinst index 2ddfec023..7fab8891e 100644 --- a/debian/benchexec.postinst +++ b/debian/benchexec.postinst @@ -13,9 +13,11 @@ GROUP=benchexec add_group() { addgroup --system "${GROUP}" - echo - echo "Please add those user accounts that should be able to use BenchExec to the group ${GROUP}." - echo + if [ ! -f /sys/fs/cgroup/cgroup.controllers ]; then + echo + echo "Please add those user accounts that should be able to use BenchExec to the group ${GROUP}." + echo + fi } case "$1" in diff --git a/debian/control b/debian/control index 381832d94..7078ead5c 100644 --- a/debian/control +++ b/debian/control @@ -18,7 +18,7 @@ Vcs-Browser: https://github.com/sosy-lab/benchexec Package: benchexec Architecture: all Depends: ${python3:Depends}, python3-pkg-resources, ${misc:Depends} -Recommends: cpu-energy-meter, libseccomp2, lxcfs, python3-coloredlogs +Recommends: cpu-energy-meter, libseccomp2, lxcfs, python3-coloredlogs, python3-pystemd Description: Framework for Reliable Benchmarking and Resource Measurement BenchExec allows benchmarking non-interactive tools on Linux systems. It measures CPU time, wall time, and memory usage of a tool, From 40d3986f9cf625b49b3bfbb7803ea52226c1a914 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 19 Oct 2023 14:45:16 +0200 Subject: [PATCH 128/133] Update installation instructions with cgroups v2 --- doc/INSTALL.md | 74 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 14 deletions(-) diff --git a/doc/INSTALL.md b/doc/INSTALL.md index ca0ed97ec..c07a2bc75 100644 --- a/doc/INSTALL.md +++ b/doc/INSTALL.md @@ -26,6 +26,7 @@ The following packages are optional but recommended dependencies: - [pqos_wrapper] and [pqos library][pqos] provide isolation of L3 cache and measurement of cache usage and memory bandwidth (only in `benchexec`). +- [pystemd] allows BenchExec to automatically configure cgroups on systems with systemd and cgroups v2. Note that the `table-generator` utility requires only Python and works on all platforms. @@ -68,11 +69,11 @@ To automatically download and install the latest stable version and its dependen from the [Python Packaging Index](https://pypi.python.org/pypi/BenchExec) with pip, run this command: - sudo pip3 install benchexec coloredlogs + sudo pip3 install benchexec[systemd] coloredlogs You can also install BenchExec only for your user with - pip3 install --user benchexec coloredlogs + pip3 install --user benchexec[systemd] coloredlogs In the latter case you probably need to add the directory where pip installs the commands to the PATH environment by adding the following line to your `~/.profile` file: @@ -81,6 +82,8 @@ to the PATH environment by adding the following line to your `~/.profile` file: Of course you can also install BenchExec in a virtualenv if you are familiar with Python tools. +On systems without systemd you can omit the `[systemd]` part. + Please make sure to configure cgroups as [described below](#setting-up-cgroups) and install [cpu-energy-meter], [libseccomp2], [LXCFS], and [pqos_wrapper] if desired. @@ -148,17 +151,18 @@ If container mode does not work, please check the [common problems](container.md ## Setting up Cgroups -If you have installed the Debian package and you are running systemd -(default since Debian 8 and Ubuntu 15.04), -the package should have configured everything automatically -as long as the system is using cgroups v1. -Just add your user to the group `benchexec` and reboot: +This depends on whether your system is using cgroups v1 or v2. +To find out, please check whether `/sys/fs/cgroup/cgroup.controllers` exists. +If yes, you are using v2, otherwise v1 +(for the purpose of BenchExec, a hybrid usage of v1 and v2 counts as v1). - adduser benchexec +Then please follow the instructions from the appropriate subsection +and the instructions for [testing and troubleshooting](#testing-cgroups-setup-and-known-problems). -Support for cgroups v2 is still under development for BenchExec. -On recent distributions (e.g., Ubuntu 22.04), -please switch back to cgroups v1 for now by putting +Note that support for cgroups v2 is available only since BenchExec 3.18 +and has received less testing than using cgroups v1 so far. +If you are on a distribution with cgroups v2 and want to switch to cgroups v1, +you can switch back to cgroups v1 for now by putting `systemd.unified_cgroup_hierarchy=0` on the kernel command line. On Debian/Ubuntu, this could be done with the following steps and rebooting afterwards: ``` @@ -166,7 +170,47 @@ echo 'GRUB_CMDLINE_LINUX_DEFAULT="${GRUB_CMDLINE_LINUX_DEFAULT} systemd.unified_ sudo update-grub ``` -### Setting up Cgroups on Machines with systemd +### Setting up Cgroups v2 on Machines with systemd + +This applies for example for Ubuntu 21.10 and newer, +Debian 11 and newer, and most other current Linux distributions. + +BenchExec can use systemd to automatically take care of any necessary cgroup configuration, +so no manual configuration is necessary. +However, the Python package `pystemd` needs to be installed, +which happens automatically if you installed our Ubuntu package +including its recommended dependencies or `benchexec[systemd]` via pip. +If missing, install the package with `sudo apt install python3-pystemd` +or `pip install pystemd` according to how you installed BenchExec. + +BenchExec also works without `pystemd` if you start BenchExec inside its own cgroup. +The easiest way to do so is using `systemd-run`: + + systemd-run --user --scope --slice=benchexec -p Delegate=yes benchexec ... + +If you want to use systemd to pre-configure the cgroups that BenchExec uses +(e.g., influence the allowed CPU cores etc.), +you can do so by configuring `benchexec.slice` in the user-specific systemd instance(s) +(all cgroups that BenchExec creates will be inside this systemd slice). + +### Setting up Cgroups v2 on Machines without systemd + +This is possible if you ensure manually that +BenchExec is started in its own cgroup +(i.e., a cgroup with no other processes inside). +We recommend using systemd, though. + +### Setting up Cgroups v1 on Machines with systemd and BenchExec as Debian package + +This applies to Ubuntu 21.04 and older and Debian 10 and older +(if the Debian package for BenchExec was used). + +Our Debian package should have configured everything automatically. +Just add your user to the group `benchexec` and reboot: + + adduser benchexec + +### Setting up Cgroups v1 on Machines with systemd Most distributions today use systemd, and systemd makes extensive usage of cgroups and [claims that it should be the only process that accesses cgroups directly](https://wiki.freedesktop.org/www/Software/systemd/ControlGroupInterface/). @@ -207,7 +251,7 @@ echo $$ > /sys/fs/cgroup/freezer//tasks In any case, please check whether everything works or whether additional settings are necessary as [described below](#testing-cgroups-setup-and-known-problems). -### Setting up Cgroups on Machines without systemd +### Setting up Cgroups v1 on Machines without systemd The cgroup virtual file system is typically mounted at or below `/sys/fs/cgroup`. If it is not, you can mount it with @@ -259,7 +303,7 @@ which are explained in the [container documentation](container.md#using-benchexe ### Testing Cgroups Setup and Known Problems -After installing BenchExec and setting up the cgroups file system, please run +After installing BenchExec and configuring cgroups if appropriate, please run python3 -m benchexec.check_cgroups @@ -267,6 +311,7 @@ This will report warnings and exit with code 1 if something is missing. If you find that something does not work, please check the following list of solutions. +For cgroups v1: If your machine has swap, cgroups should be configured to also track swap memory. This is turned off by several distributions. If the file `memory.memsw.usage_in_bytes` does not exist in the directory @@ -296,3 +341,4 @@ Please refer to the [development instructions](DEVELOPMENT.md). [LXCFS]: https://github.com/lxc/lxcfs [pqos]: https://github.com/intel/intel-cmt-cat/tree/master/pqos [pqos_wrapper]: https://gitlab.com/sosy-lab/software/pqos-wrapper +[pystemd]: https://github.com/systemd/pystemd From fa8a85d20931517bf6dd07faeb7c93011e70017e Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Wed, 6 Sep 2023 07:37:56 +0200 Subject: [PATCH 129/133] Handle failure case of r/o cgroupfs for cgroupsv2 Instead of crashing, we continue and provide nice error messages if cgroups are required. For Podman we can even tell the user exactly what to do. --- benchexec/cgroupsv2.py | 36 +++++++++++++++++++++++++++++++----- doc/INSTALL.md | 9 +++++++-- doc/container.md | 2 +- 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 250e18bc9..3c06165b7 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -38,6 +38,15 @@ As a quick workaround, execute this command, which forces the missing delegation as root user: echo +cpuset | sudo tee {}""" +_ERROR_PODMAN = """ +BenchExec seems to be running in a Podman container without enabled cgroups. +Please pass "--security-opt unmask=/sys/fs/cgroup" to your "podman run" command.""" + +_ERROR_RO_CGROUPFS = """ +System is using cgroups v2 but the cgroupfs is mounted read-only. +This likely means that you are using BenchExec within a container. +Please ensure that cgroups are properly delegated into the container.""" + _ERROR_NO_SYSTEMD = """ System is using cgroups v2 but not systemd. If you are using BenchExec within a container, please ensure that cgroups are properly delegated into the container. @@ -102,9 +111,16 @@ def initialize(): # Now we are the only process in this cgroup. In order to make it usable for # benchmarking, we need to move ourselves into a child cgroup. - child_cgroup = cgroup.create_fresh_child_cgroup( - cgroup.subsystems.keys(), prefix="benchexec_process_" - ) + try: + child_cgroup = cgroup.create_fresh_child_cgroup( + cgroup.subsystems.keys(), prefix="benchexec_process_" + ) + except OSError as e: + # No usable cgroup, e.g., because of read-only cgroup fs. + # Continue as described above. + logging.debug("Cgroup found, but cannot create child cgroups: %s", e) + return CgroupsV2({}) + for pid in cgroup.get_all_tasks(): child_cgroup.add_task(pid) assert child_cgroup.has_tasks() @@ -417,8 +433,18 @@ def handle_errors(self, critical_cgroups): ) else: - # no cgroup available at all - if not systeminfo.has_systemd(): + # no cgroup available at all, likely a container + + # Podman detection from https://github.com/containers/podman/issues/3586 + if os.getenv("container") == "podman" or os.path.exists( + "/run/.containerenv" + ): + sys.exit(_ERROR_PODMAN) + + elif os.statvfs("/sys/fs/cgroup").f_flag & os.ST_RDONLY: + sys.exit(_ERROR_RO_CGROUPFS) + + elif not systeminfo.has_systemd(): sys.exit(_ERROR_NO_SYSTEMD) try: diff --git a/doc/INSTALL.md b/doc/INSTALL.md index c07a2bc75..4b6a9e698 100644 --- a/doc/INSTALL.md +++ b/doc/INSTALL.md @@ -290,8 +290,13 @@ or whether additional settings are necessary as [described below](#testing-cgrou ### Setting up Cgroups in a Docker/Podman Container -If you want to run benchmarks within a Docker/Podman container, -and the cgroups file system is not available within the container, +If you want to run BenchExec inside a container, +we recommend Podman and systems with cgroups v2. +Then pass `--security-opt unmask=/sys/fs/cgroup` to `podman run`. +This will work if BenchExec is the main process inside the container, +otherwise a separate cgroup needs to be created. + +For other cases, if the cgroups file system is not available within the container, please use the following command line argument to mount the cgroup hierarchy within the container when starting it (same for Podman): diff --git a/doc/container.md b/doc/container.md index 72772799e..a01b40b27 100644 --- a/doc/container.md +++ b/doc/container.md @@ -185,7 +185,7 @@ because it provides "rootless" containers To use BenchExec within Podman, start it as a regular user (not root) and use the following arguments: ``` -podman run --security-opt unmask=/proc/* --security-opt seccomp=unconfined ... +podman run --security-opt unmask=/proc/* --security-opt unmask=/sys/fs/cgroup --security-opt seccomp=unconfined ... ``` You may additionally need the arguments documented for [cgroup usage](INSTALL.md#setting-up-cgroups-in-a-dockerpodman-container). From 93f22e7afb74270e1b5c5d7d6752bbe2cea57638 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Thu, 19 Oct 2023 16:43:13 +0200 Subject: [PATCH 130/133] more documentation for cgroups v2 --- benchexec/cgroupsv2.py | 1 + doc/INSTALL.md | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/benchexec/cgroupsv2.py b/benchexec/cgroupsv2.py index 3c06165b7..55ee170dd 100644 --- a/benchexec/cgroupsv2.py +++ b/benchexec/cgroupsv2.py @@ -34,6 +34,7 @@ The kernel has a bug where delegation of cpuset does not work if there are processes of other users in this user's cgroup. This happens commonly if xdg-document-portal is running while such delegation is attempted for the first time. For more information cf. https://github.com/systemd/systemd/issues/18293. +Linux 6.6 is expected to contain a fix for this bug. As a quick workaround, execute this command, which forces the missing delegation as root user: echo +cpuset | sudo tee {}""" diff --git a/doc/INSTALL.md b/doc/INSTALL.md index 4b6a9e698..9e0173b31 100644 --- a/doc/INSTALL.md +++ b/doc/INSTALL.md @@ -15,7 +15,7 @@ SPDX-License-Identifier: Apache-2.0 - Python 3.7 or newer - Linux (cf. [Kernel Requirements](#kernel-requirements) below for details) -- Cgroups v1 (cf. [Setting up Cgroups](#setting-up-cgroups) below for details) +- Access to cgroups (cf. [Setting up Cgroups](#setting-up-cgroups) below for details) - x86 or ARM machine (please [contact us](https://github.com/sosy-lab/benchexec/issues/new) for other architectures) The following packages are optional but recommended dependencies: @@ -42,9 +42,11 @@ and install manually (note that the leading `./` is important, otherwise `apt` w apt install --install-recommends ./benchexec_*.deb -Our package automatically configures the necessary cgroup permissions -if the system uses cgroups v1. -Just add the users that should be able to use BenchExec to the group `benchexec` +On Ubuntu 21.10 and newer with the default cgroup config, this is all. + +On older Ubuntu versions or those configured for cgroups v1, +our package automatically configures the necessary cgroup permissions. +Then add the users that should be able to use BenchExec to the group `benchexec` (group membership will be effective after the next login of the respective user): adduser benchexec From d97af637c4d119f87d7262482cc133a77124aee1 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 20 Oct 2023 09:16:18 +0200 Subject: [PATCH 131/133] polish documentation for cgroups v2 Mention @globin as contributor as this work is from him. --- README.md | 8 ++++++-- doc/INSTALL.md | 8 +++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 02f556047..6171e26b1 100644 --- a/README.md +++ b/README.md @@ -57,8 +57,11 @@ Results from multiple runs can be combined into CSV and interactive HTML tables, of which the latter provide scatter and quantile plots (have a look at our [demo table](https://sosy-lab.github.io/benchexec/example-table/svcomp-simple-cbmc-cpachecker.table.html)). -BenchExec works only on Linux and needs a one-time setup of cgroups by the machine's administrator. -The actual benchmarking can be done by any user and does not need root access. +On modern Linux systems (e.g., Debian 11, Ubuntu 22.04, etc.), +BenchExec works out-of-the box and without the need for root access, +not even for installation. +On older Linux systems, a one-time setup of cgroups by the machine's administrator may be needed. +The actual benchmarking can always be done by any user and does not need root access. BenchExec was originally developed for use with the software verification framework [CPAchecker](https://cpachecker.sosy-lab.org) @@ -103,6 +106,7 @@ Contributors: - [Montgomery Carter](https://github.com/MontyCarter) - [Andreas Donig](https://github.com/adonig) - [Karlheinz Friedberger](https://www.sosy-lab.org/people/friedberger) +- [Robin Gloster](https://github.com/globin) - Peter Häring - [Florian Heck](https://github.com/fheck) - [Hugo](https://github.com/hugovk) diff --git a/doc/INSTALL.md b/doc/INSTALL.md index 9e0173b31..fe401bbd2 100644 --- a/doc/INSTALL.md +++ b/doc/INSTALL.md @@ -235,9 +235,6 @@ The following steps are necessary: By default, this gives permissions to users of the group `benchexec`, this can be adjusted in the `Environment` line as necessary. - * If the system is using cgroups v2, you need to tell systemd to use cgroups v1 instead - as [described above](#setting-up-cgroups). - By default, BenchExec will automatically attempt to use the cgroup `system.slice/benchexec-cgroup.service` that is created by this service file. If you use a different cgroup structure, @@ -268,7 +265,7 @@ you can use Of course permissions can also be assigned in a more fine-grained way if necessary. Alternatively, software such as `cgrulesengd` from -the [cgroup-bin](http://libcg.sourceforge.net/) package +the [libcgroup](https://github.com/libcgroup/libcgroup) project can be used to setup the cgroups hierarchy. Note that `cgrulesengd` might interfere with the cgroups of processes, @@ -296,7 +293,8 @@ If you want to run BenchExec inside a container, we recommend Podman and systems with cgroups v2. Then pass `--security-opt unmask=/sys/fs/cgroup` to `podman run`. This will work if BenchExec is the main process inside the container, -otherwise a separate cgroup needs to be created. +otherwise you need to create an appropriate cgroup hierarchy inside the container, +i.e., one where BenchExec has its own separate cgroup. For other cases, if the cgroups file system is not available within the container, please use the following command line argument From 3b2a933d0bc7076beaa83c68c73e7a1f61676917 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 20 Oct 2023 11:07:54 +0200 Subject: [PATCH 132/133] Let check_cgroups produce better error message for missing CPUSET There is this kernel bug for cgroups v2 that prevents us from using CPUSET but everything else is working. In this case we provide a nice error message with a workaround (6cbd9fc5), but check_cgroups did not do this so far because it terminated itself too early. Now we continue and let RunExecutor print the message. --- benchexec/check_cgroups.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/benchexec/check_cgroups.py b/benchexec/check_cgroups.py index 20dd9445b..99dd032a5 100644 --- a/benchexec/check_cgroups.py +++ b/benchexec/check_cgroups.py @@ -33,20 +33,27 @@ def check_cgroup_availability(wait=1): if not ( my_cgroups.CPU in my_cgroups - and my_cgroups.CPUSET in my_cgroups # and FREEZER in my_cgroups # For now, we do not require freezer and my_cgroups.MEMORY in my_cgroups ): sys.exit(1) + if my_cgroups.CPUSET in my_cgroups: + cores = my_cgroups.read_allowed_cpus() + mems = my_cgroups.read_allowed_memory_banks() + else: + # Use dummy value (does not matter which) to let execute_run() fail. + cores = [0] + mems = [0] + with tempfile.NamedTemporaryFile(mode="rt") as tmp: runexecutor.execute_run( ["sh", "-c", f"sleep {wait}; cat /proc/self/cgroup"], tmp.name, memlimit=1024 * 1024, # set memlimit to force check for swapaccount # set cores and memory_nodes to force usage of CPUSET - cores=my_cgroups.read_allowed_cpus(), - memory_nodes=my_cgroups.read_allowed_memory_banks(), + cores=cores, + memory_nodes=mems, ) lines = [] for line in tmp: From f8348a79b1aeb0bc90d365b292caad975868da47 Mon Sep 17 00:00:00 2001 From: Philipp Wendler Date: Fri, 20 Oct 2023 11:10:31 +0200 Subject: [PATCH 133/133] Silence warning about missing CPUSET if not required So far, BenchExec prints a warning about every missing cgroup controller, and later on an error message if that controller is strictly required. But CPUSET is only required for core limits, and has no other use. This is different from e.g. MEMORY which is required for memory limits but also provides memory measurements. So lets silence the warning about missing CPUSET and keep only the error message. check_cgroups is not affected because if forces CPUSET to be required. --- benchexec/runexecutor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py index 5b79a894b..35350f39e 100644 --- a/benchexec/runexecutor.py +++ b/benchexec/runexecutor.py @@ -379,7 +379,9 @@ def _init_cgroups(self): '"sudo swapoff -a".' ) - self.cgroups.require_subsystem(self.cgroups.CPUSET) + # Do not warn about missing CPUSET here, it is only useful for core limits + # and if one is set we terminate with a better error message later. + self.cgroups.require_subsystem(self.cgroups.CPUSET, log_method=logging.debug) self.cpus = None # to indicate that we cannot limit cores self.memory_nodes = None # to indicate that we cannot limit cores if self.cgroups.CPUSET in self.cgroups: