From 0b514ae8abf8e6f4e07c5b724f18433dcfe525cf Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Fri, 30 Sep 2022 11:30:06 -0700 Subject: [PATCH 001/121] Run CI on schedule (#404) Co-authored-by: Marcin Zalewski --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6afe451d2..32a514e00 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,9 @@ on: pull_request: branches-ignore: - gh-pages # deployment target branch (this workflow should not exist on that branch anyway) + schedule: + # * is a special character in YAML so you have to quote this string + - cron: '* */4 * * *' env: COMMIT: ${{ github.event.pull_request.head.sha || github.sha }} PROJECT: github-core-ci From 2a1a13c6e541885d2a83fe9d682b017865922ab4 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Fri, 30 Sep 2022 11:37:04 -0700 Subject: [PATCH 002/121] Adjust consensus match frequency based on field sizes (#402) (#406) * Perform consensus match more frequently for bigger free fields * Minor cleanup --- legate/core/runtime.py | 39 ++++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/legate/core/runtime.py b/legate/core/runtime.py index fa4fdaad9..b47624378 100644 --- a/legate/core/runtime.py +++ b/legate/core/runtime.py @@ -200,9 +200,9 @@ def add_free_field( ) -> None: self._freed_fields.append(FreeFieldInfo(manager, region, field_id)) - def issue_field_match(self) -> None: + def issue_field_match(self, credit: int) -> None: # Increment our match counter - self._match_counter += 1 + self._match_counter += credit if self._match_counter < self._match_frequency: return # If the match counter equals our match frequency then do an exchange @@ -342,9 +342,29 @@ def __init__( ) -> None: super().__init__(runtime, shape, field_size) self._field_match_manager = runtime.field_match_manager + self._update_match_credit() + + def _update_match_credit(self) -> None: + if self.shape.fixed: + size = self.shape.volume() * self.field_size + self._match_credit = ( + size + self.runtime.max_field_reuse_size - 1 + if size > self.runtime.max_field_reuse_size + else self.runtime.max_field_reuse_size + ) // self.runtime.max_field_reuse_size + # No need to update the credit as the exact size is known + self._need_to_update_match_credit = False + # If the shape is unknown, we set the credit such that every new + # free field leads to a consensus match, and ask the manager + # to update the credit. + else: + self._match_credit = self.runtime.max_field_reuse_frequency + self._need_to_update_match_credit = True def try_reuse_field(self) -> Optional[tuple[Region, int]]: - self._field_match_manager.issue_field_match() + if self._need_to_update_match_credit: + self._update_match_credit() + self._field_match_manager.issue_field_match(self._match_credit) # First, if we have a free field then we know everyone has one of those if len(self.free_fields) > 0: @@ -915,6 +935,12 @@ def __init__(self, core_library: CoreLib) -> None: ty.uint32, ) ) + self.max_field_reuse_size = int( + self._core_context.get_tunable( + legion.LEGATE_CORE_TUNABLE_FIELD_REUSE_SIZE, + ty.uint64, + ) + ) self._field_manager_class = ( ConsensusMatchingFieldManager if self._num_nodes > 1 or self._args.consensus @@ -1246,12 +1272,7 @@ def find_region_manager(self, region: Region) -> RegionManager: return self.region_managers_by_region[region] def revive_manager(self, region_mgr: RegionManager) -> None: - lru_managers: Deque[RegionManager] = deque() - for to_check in self.lru_managers: - if to_check is not region_mgr: - lru_managers.append(to_check) - assert len(lru_managers) < len(self.lru_managers) - self.lru_managers = lru_managers + self.lru_managers.remove(region_mgr) def free_region_manager( self, shape: Shape, region: Region, unordered: bool = False From b66abe0f368ff0634bbd66b2c225ec61fe2bd3fd Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Fri, 30 Sep 2022 12:20:28 -0700 Subject: [PATCH 003/121] Driver verbose only for rank 0 or "none" launcher (#403) --- legate/driver/driver.py | 4 +++- tests/unit/legate/driver/test_driver.py | 31 ++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/legate/driver/driver.py b/legate/driver/driver.py index 9548f197b..57dd653d0 100644 --- a/legate/driver/driver.py +++ b/legate/driver/driver.py @@ -88,7 +88,9 @@ def run(self) -> int: """ if self.config.info.verbose: - print_verbose(self.system, self) + # we only want to print verbose output on a "head" node + if self.launcher.kind != "none" or self.launcher.rank_id == "0": + print_verbose(self.system, self) self._darwin_gdb_warn() diff --git a/tests/unit/legate/driver/test_driver.py b/tests/unit/legate/driver/test_driver.py index f6aea4a0b..e346210d3 100644 --- a/tests/unit/legate/driver/test_driver.py +++ b/tests/unit/legate/driver/test_driver.py @@ -23,7 +23,7 @@ import legate.driver.driver as m from legate.driver.args import LAUNCHERS from legate.driver.command import CMD_PARTS -from legate.driver.launcher import Launcher +from legate.driver.launcher import RANK_ENV_VARS, Launcher from legate.driver.system import System from legate.driver.types import LauncherType from legate.driver.ui import scrub @@ -129,6 +129,35 @@ def test_verbose( assert pv_out in run_out + @pytest.mark.parametrize("rank_var", RANK_ENV_VARS) + def test_verbose_nonero_rank_id( + self, + monkeypatch: pytest.MonkeyPatch, + capsys: Capsys, + genconfig: GenConfig, + rank_var: str, + ) -> None: + for name in RANK_ENV_VARS: + monkeypatch.delenv(name, raising=False) + monkeypatch.setenv(name, "1") + + # set --dry-run to avoid needing to mock anything + config = genconfig( + ["--launcher", "none", "--verbose", "--dry-run"], multi_rank=(2, 2) + ) + system = System() + driver = m.Driver(config, system) + + driver.run() + + run_out = scrub(capsys.readouterr()[0]).strip() + + print_verbose(driver.system, driver) + + pv_out = scrub(capsys.readouterr()[0]).strip() + + assert pv_out not in run_out + @pytest.mark.parametrize("launch", LAUNCHERS) def test_darwin_gdb_warning( self, From 7671dd7ab5ca780baddb18b915f9d9faa29919bf Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Tue, 4 Oct 2022 16:50:04 -0700 Subject: [PATCH 004/121] Legion bug WAR: don't instantiate futures on framebuffer (#413) * Legion bug WAR: don't instantiate futures on framebuffer * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Wrap the workaround with a define Co-authored-by: Manolis Papadakis Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- src/core/data/store.cc | 6 +++++- src/core/mapping/base_mapper.cc | 6 +++++- src/legate_defines.h | 3 +++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/core/data/store.cc b/src/core/data/store.cc index 76d4b405f..a4ca73f4b 100644 --- a/src/core/data/store.cc +++ b/src/core/data/store.cc @@ -136,9 +136,13 @@ FutureWrapper::FutureWrapper( #ifdef DEBUG_LEGATE assert(!initialize || future_.get_untyped_size() == field_size); #endif - auto proc = Processor::get_executing_processor(); + auto proc = Processor::get_executing_processor(); +#ifdef LEGATE_NO_FUTURES_ON_FB + auto mem_kind = find_memory_kind_for_executing_processor(); +#else auto mem_kind = proc.kind() == Processor::Kind::TOC_PROC ? Memory::Kind::GPU_FB_MEM : Memory::Kind::SYSTEM_MEM; +#endif if (initialize) { auto p_init_value = future_.get_buffer(mem_kind); #ifdef LEGATE_USE_CUDA diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index 739cd8f97..f975fe3f8 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -610,7 +610,11 @@ void BaseMapper::map_task(const MapperContext ctx, if (req_indices.empty()) { // This is a mapping for futures - output.future_locations.push_back(get_target_memory(task.target_proc, mapping.policy.target)); + StoreTarget target = mapping.policy.target; +#ifdef LEGATE_NO_FUTURES_ON_FB + if (target == StoreTarget::FBMEM) target = StoreTarget::ZCMEM; +#endif + output.future_locations.push_back(get_target_memory(task.target_proc, target)); continue; } else if (mapping.for_unbound_stores()) { for (auto req_idx : req_indices) { diff --git a/src/legate_defines.h b/src/legate_defines.h index b7c3dacba..fa215e8e7 100644 --- a/src/legate_defines.h +++ b/src/legate_defines.h @@ -50,3 +50,6 @@ #define LEGATE_USE_NETWORK #endif #endif + +// TODO: 2022-10-04: Work around a Legion bug, by not instantiating futures on framebuffer. +#define LEGATE_NO_FUTURES_ON_FB From 3db762906a6948be29e50f41d3ebef555a3eab3e Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Wed, 5 Oct 2022 12:13:09 -0700 Subject: [PATCH 005/121] Adjust the schedule of the CI runs (#414) Co-authored-by: Marcin Zalewski --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 32a514e00..5a90f5520 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,7 +8,7 @@ on: - gh-pages # deployment target branch (this workflow should not exist on that branch anyway) schedule: # * is a special character in YAML so you have to quote this string - - cron: '* */4 * * *' + - cron: '0 */6 * * *' env: COMMIT: ${{ github.event.pull_request.head.sha || github.sha }} PROJECT: github-core-ci From 9b9e59cdbaeb22661f11a2babc491620ea3362d5 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Wed, 5 Oct 2022 13:17:52 -0700 Subject: [PATCH 006/121] Consolidate driver and test driver codebases (#397) * initial import of test driver code * consolidate some utils and types * ignore vscode workspace for now at least * parse_command_args -> parse_library_command_args * factor out colorama * Consolidate types * consolidate ui modules * consolidate system classes * get rid of driver.util * temp compat imports * probable fix for https://github.com/nv-legate/legate.core/issues/393 * bail if legate_module cannot be determined * use singular util * use cwd for default test_root * move custom argparse action to util * fix test after merge --- .gitignore | 2 + legate/core/__init__.py | 3 +- legate/core/runtime.py | 5 +- legate/driver/__init__.py | 1 - legate/driver/args.py | 2 +- legate/driver/command.py | 6 +- legate/driver/config.py | 10 +- legate/driver/driver.py | 62 ++- legate/driver/launcher.py | 13 +- legate/driver/logs.py | 14 +- legate/driver/main.py | 7 +- legate/driver/ui.py | 246 ------------ legate/rc.py | 108 +---- legate/tester/__init__.py | 71 ++++ legate/tester/args.py | 223 +++++++++++ legate/tester/config.py | 163 ++++++++ legate/tester/logger.py | 67 ++++ legate/tester/stages/__init__.py | 41 ++ legate/tester/stages/_linux/__init__.py | 24 ++ legate/tester/stages/_linux/cpu.py | 83 ++++ legate/tester/stages/_linux/eager.py | 75 ++++ legate/tester/stages/_linux/gpu.py | 85 ++++ legate/tester/stages/_linux/omp.py | 87 ++++ legate/tester/stages/_osx/__init__.py | 24 ++ legate/tester/stages/_osx/cpu.py | 68 ++++ legate/tester/stages/_osx/eager.py | 68 ++++ legate/tester/stages/_osx/gpu.py | 54 +++ legate/tester/stages/_osx/omp.py | 74 ++++ legate/tester/stages/test_stage.py | 268 +++++++++++++ legate/tester/stages/util.py | 115 ++++++ legate/tester/test_plan.py | 132 ++++++ legate/tester/test_system.py | 123 ++++++ .../test_types.py => legate/util/__init__.py | 0 legate/util/args.py | 182 +++++++++ legate/util/colors.py | 95 +++++ legate/{driver/util.py => util/fs.py} | 103 +---- legate/{driver => util}/system.py | 53 ++- legate/{driver => util}/types.py | 51 ++- legate/util/ui.py | 345 ++++++++++++++++ tests/unit/__init__.py | 15 + tests/unit/legate/__init__.py | 15 + tests/unit/legate/driver/__init__.py | 15 + tests/unit/legate/driver/conftest.py | 6 +- tests/unit/legate/driver/test_command.py | 8 +- tests/unit/legate/driver/test_config.py | 7 +- tests/unit/legate/driver/test_driver.py | 62 ++- tests/unit/legate/driver/test_launcher.py | 8 +- tests/unit/legate/driver/test_logs.py | 6 +- tests/unit/legate/driver/test_main.py | 6 +- tests/unit/legate/driver/test_ui.py | 254 ------------ tests/unit/legate/driver/test_util.py | 131 ------ tests/unit/legate/driver/util.py | 16 +- tests/unit/legate/test_rc.py | 122 +----- tests/unit/legate/tester/__init__.py | 15 + tests/unit/legate/tester/stages/__init__.py | 38 ++ .../legate/tester/stages/_linux/__init__.py | 22 + .../legate/tester/stages/_linux/test_cpu.py | 132 ++++++ .../legate/tester/stages/_linux/test_eager.py | 82 ++++ .../legate/tester/stages/_linux/test_gpu.py | 101 +++++ .../legate/tester/stages/_linux/test_omp.py | 164 ++++++++ .../legate/tester/stages/test_test_stage.py | 88 ++++ tests/unit/legate/tester/stages/test_util.py | 48 +++ tests/unit/legate/tester/test___init__.py | 69 ++++ tests/unit/legate/tester/test_args.py | 89 +++++ tests/unit/legate/tester/test_config.py | 182 +++++++++ tests/unit/legate/tester/test_logger.py | 74 ++++ tests/unit/legate/tester/test_test_system.py | 65 +++ tests/unit/legate/util/__init__.py | 15 + .../{driver => util}/sample_cmake_cache.txt | 0 .../legate/{driver => util}/sample_header.h | 0 tests/unit/legate/util/test_args.py | 187 +++++++++ tests/unit/legate/util/test_colors.py | 103 +++++ tests/unit/legate/util/test_fs.py | 53 +++ .../legate/{driver => util}/test_system.py | 19 +- tests/unit/legate/util/test_types.py | 57 +++ tests/unit/legate/util/test_ui.py | 375 ++++++++++++++++++ tests/unit/util.py | 33 ++ 77 files changed, 4821 insertions(+), 1014 deletions(-) delete mode 100644 legate/driver/ui.py create mode 100644 legate/tester/__init__.py create mode 100644 legate/tester/args.py create mode 100644 legate/tester/config.py create mode 100644 legate/tester/logger.py create mode 100644 legate/tester/stages/__init__.py create mode 100644 legate/tester/stages/_linux/__init__.py create mode 100644 legate/tester/stages/_linux/cpu.py create mode 100644 legate/tester/stages/_linux/eager.py create mode 100644 legate/tester/stages/_linux/gpu.py create mode 100644 legate/tester/stages/_linux/omp.py create mode 100644 legate/tester/stages/_osx/__init__.py create mode 100644 legate/tester/stages/_osx/cpu.py create mode 100644 legate/tester/stages/_osx/eager.py create mode 100644 legate/tester/stages/_osx/gpu.py create mode 100644 legate/tester/stages/_osx/omp.py create mode 100644 legate/tester/stages/test_stage.py create mode 100644 legate/tester/stages/util.py create mode 100644 legate/tester/test_plan.py create mode 100644 legate/tester/test_system.py rename tests/unit/legate/driver/test_types.py => legate/util/__init__.py (100%) create mode 100644 legate/util/args.py create mode 100644 legate/util/colors.py rename legate/{driver/util.py => util/fs.py} (81%) rename legate/{driver => util}/system.py (51%) rename legate/{driver => util}/types.py (68%) create mode 100644 legate/util/ui.py create mode 100644 tests/unit/__init__.py create mode 100644 tests/unit/legate/__init__.py create mode 100644 tests/unit/legate/driver/__init__.py delete mode 100644 tests/unit/legate/driver/test_ui.py delete mode 100644 tests/unit/legate/driver/test_util.py create mode 100644 tests/unit/legate/tester/__init__.py create mode 100644 tests/unit/legate/tester/stages/__init__.py create mode 100644 tests/unit/legate/tester/stages/_linux/__init__.py create mode 100644 tests/unit/legate/tester/stages/_linux/test_cpu.py create mode 100644 tests/unit/legate/tester/stages/_linux/test_eager.py create mode 100644 tests/unit/legate/tester/stages/_linux/test_gpu.py create mode 100644 tests/unit/legate/tester/stages/_linux/test_omp.py create mode 100644 tests/unit/legate/tester/stages/test_test_stage.py create mode 100644 tests/unit/legate/tester/stages/test_util.py create mode 100644 tests/unit/legate/tester/test___init__.py create mode 100644 tests/unit/legate/tester/test_args.py create mode 100644 tests/unit/legate/tester/test_config.py create mode 100644 tests/unit/legate/tester/test_logger.py create mode 100644 tests/unit/legate/tester/test_test_system.py create mode 100644 tests/unit/legate/util/__init__.py rename tests/unit/legate/{driver => util}/sample_cmake_cache.txt (100%) rename tests/unit/legate/{driver => util}/sample_header.h (100%) create mode 100644 tests/unit/legate/util/test_args.py create mode 100644 tests/unit/legate/util/test_colors.py create mode 100644 tests/unit/legate/util/test_fs.py rename tests/unit/legate/{driver => util}/test_system.py (83%) create mode 100644 tests/unit/legate/util/test_types.py create mode 100644 tests/unit/legate/util/test_ui.py create mode 100644 tests/unit/util.py diff --git a/.gitignore b/.gitignore index 42f7cc262..f7cd920b2 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,5 @@ config.mk .vscode _cmake_test_compile !cmake/versions.json +legate.core.code-workspace + diff --git a/legate/core/__init__.py b/legate/core/__init__.py index 4ad4c308b..8a6beee0a 100644 --- a/legate/core/__init__.py +++ b/legate/core/__init__.py @@ -14,7 +14,8 @@ # from __future__ import annotations -from ..rc import check_legion, parse_command_args +from ..rc import check_legion +from ..util.args import parse_library_command_args check_legion() diff --git a/legate/core/runtime.py b/legate/core/runtime.py index b47624378..c30bc6237 100644 --- a/legate/core/runtime.py +++ b/legate/core/runtime.py @@ -24,8 +24,7 @@ from legion_top import add_cleanup_item, top_level -from legate.rc import ArgSpec, Argument, parse_command_args - +from ..util.args import ArgSpec, Argument, parse_library_command_args from . import ffi # Make sure we only have one ffi instance from . import ( Fence, @@ -855,7 +854,7 @@ def __init__(self, core_library: CoreLib) -> None: focus on implementing their domain logic. """ - self._args = parse_command_args("legate", ARGS) + self._args = parse_library_command_args("legate", ARGS) try: self._legion_context = top_level.context[0] diff --git a/legate/driver/__init__.py b/legate/driver/__init__.py index f5803f8a8..b8496597d 100644 --- a/legate/driver/__init__.py +++ b/legate/driver/__init__.py @@ -18,4 +18,3 @@ from .driver import Driver from .launcher import Launcher from .main import main -from .system import System diff --git a/legate/driver/args.py b/legate/driver/args.py index 2e919a2bc..739722170 100755 --- a/legate/driver/args.py +++ b/legate/driver/args.py @@ -18,8 +18,8 @@ from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser +from ..util.types import LauncherType from . import defaults -from .types import LauncherType __all__ = ("parser",) diff --git a/legate/driver/command.py b/legate/driver/command.py index 2c582e4b7..7d11c2c9b 100644 --- a/legate/driver/command.py +++ b/legate/driver/command.py @@ -16,13 +16,13 @@ from typing import TYPE_CHECKING -from .ui import warn +from ..util.ui import warn if TYPE_CHECKING: + from ..util.system import System + from ..util.types import CommandPart from .config import Config from .launcher import Launcher - from .system import System - from .types import CommandPart __all__ = ("CMD_PARTS",) diff --git a/legate/driver/config.py b/legate/driver/config.py index e90a3454c..c4acb3c41 100644 --- a/legate/driver/config.py +++ b/legate/driver/config.py @@ -23,10 +23,14 @@ from pathlib import Path from typing import Any +from ..util.types import ( + ArgList, + DataclassMixin, + LauncherType, + object_to_dataclass, +) +from ..util.ui import warn from .args import parser -from .types import ArgList, DataclassMixin, LauncherType -from .ui import warn -from .util import object_to_dataclass __all__ = ("Config",) diff --git a/legate/driver/driver.py b/legate/driver/driver.py index 57dd653d0..7a3e00c40 100644 --- a/legate/driver/driver.py +++ b/legate/driver/driver.py @@ -14,18 +14,22 @@ # from __future__ import annotations +from shlex import quote from subprocess import run +from textwrap import indent +from typing import TYPE_CHECKING +from ..util.system import System +from ..util.ui import kvtable, rule, section, value, warn from .command import CMD_PARTS from .config import Config from .launcher import Launcher from .logs import process_logs -from .system import System -from .types import Command, EnvDict -from .ui import warn -from .util import print_verbose -__all__ = ("Driver",) +if TYPE_CHECKING: + from ..util.types import Command, EnvDict + +__all__ = ("Driver", "print_verbose") _DARWIN_GDB_WARN = """\ You must start the debugging session with the following command, @@ -113,3 +117,51 @@ def _darwin_gdb_warn(self) -> None: ) ) ) + + +def print_verbose( + system: System, + driver: Driver | None = None, +) -> None: + """Print system and driver configuration values. + + Parameters + ---------- + system : System + A System instance to obtain Legate and Legion paths from + + driver : Driver or None, optional + If not None, a Driver instance to obtain command invocation and + environment from (default: None) + + Returns + ------- + None + + """ + + print(f"\n{rule('Legion Python Configuration')}") + + print(section("\nLegate paths:")) + print(indent(str(system.legate_paths), prefix=" ")) + + print(section("\nLegion paths:")) + print(indent(str(system.legion_paths), prefix=" ")) + + if driver: + print(section("\nCommand:")) + cmd = " ".join(quote(t) for t in driver.cmd) + print(f" {value(cmd)}") + + if keys := sorted(driver.custom_env_vars): + print(section("\nCustomized Environment:")) + print( + indent( + kvtable(driver.env, delim="=", align=False, keys=keys), + prefix=" ", + ) + ) + + print(f"\n{rule()}") + + print(flush=True) diff --git a/legate/driver/launcher.py b/legate/driver/launcher.py index 922eb4f6f..009b0cf6b 100644 --- a/legate/driver/launcher.py +++ b/legate/driver/launcher.py @@ -17,12 +17,15 @@ import os import sys from pathlib import Path +from typing import TYPE_CHECKING -from .config import Config -from .system import System -from .types import Command, EnvDict, LauncherType -from .ui import warn -from .util import read_c_define +from ..util.fs import read_c_define +from ..util.ui import warn + +if TYPE_CHECKING: + from ..util.system import System + from ..util.types import Command, EnvDict, LauncherType + from .config import Config __all__ = ("Launcher",) diff --git a/legate/driver/logs.py b/legate/driver/logs.py index 1173e8486..261ab6dd5 100644 --- a/legate/driver/logs.py +++ b/legate/driver/logs.py @@ -22,13 +22,15 @@ from contextlib import contextmanager from shlex import quote from subprocess import run -from typing import Iterator +from typing import TYPE_CHECKING, Iterator -from .config import Config -from .launcher import Launcher -from .system import System -from .types import Command -from .ui import warn +from ..util.ui import warn + +if TYPE_CHECKING: + from ..util.system import System + from ..util.types import Command + from .config import Config + from .launcher import Launcher __all__ = ( "DebuggingHandler", diff --git a/legate/driver/main.py b/legate/driver/main.py index c2e0ac577..2ca3f04be 100644 --- a/legate/driver/main.py +++ b/legate/driver/main.py @@ -34,9 +34,10 @@ def main(argv: list[str]) -> int: int, a process return code """ - from . import Config, Driver, System - from .ui import error - from .util import print_verbose + from ..util.system import System + from ..util.ui import error + from . import Config, Driver + from .driver import print_verbose try: config = Config(argv) diff --git a/legate/driver/ui.py b/legate/driver/ui.py deleted file mode 100644 index e6f5ee37d..000000000 --- a/legate/driver/ui.py +++ /dev/null @@ -1,246 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Helper functions for simple text UI output. - -The color functions in this module require ``colorama`` to be installed in -order to generate color output. If ``colorama`` is not available, plain -text output (i.e. without ANSI color codes) will be generated. - -""" -from __future__ import annotations - -import re -import sys -from typing import Any, Iterable - -__all__ = ( - "bright", - "cyan", - "dim", - "error", - "green", - "key", - "kvtable", - "magenta", - "red", - "rule", - "scrub", - "section", - "value", - "warn", - "white", - "yellow", -) - - -def _text(text: str) -> str: - return text - - -try: - import colorama # type: ignore[import] - - def bright(text: str) -> str: - return f"{colorama.Style.BRIGHT}{text}{colorama.Style.RESET_ALL}" - - def dim(text: str) -> str: - return f"{colorama.Style.DIM}{text}{colorama.Style.RESET_ALL}" - - def white(text: str) -> str: - return f"{colorama.Fore.WHITE}{text}{colorama.Style.RESET_ALL}" - - def cyan(text: str) -> str: - return f"{colorama.Fore.CYAN}{text}{colorama.Style.RESET_ALL}" - - def red(text: str) -> str: - return f"{colorama.Fore.RED}{text}{colorama.Style.RESET_ALL}" - - def magenta(text: str) -> str: - return f"{colorama.Fore.MAGENTA}{text}{colorama.Style.RESET_ALL}" - - def green(text: str) -> str: - return f"{colorama.Fore.GREEN}{text}{colorama.Style.RESET_ALL}" - - def yellow(text: str) -> str: - return f"{colorama.Fore.YELLOW}{text}{colorama.Style.RESET_ALL}" - - if sys.platform == "win32": - colorama.init() - -except ImportError: - - bright = dim = white = cyan = red = magenta = green = yellow = _text - -# ref: https://stackoverflow.com/a/14693789 -_ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") - - -def error(text: str) -> str: - """Format text as an error. - - Parameters - ---------- - text : str - The text to format - - Returns - ------- - str - - """ - return red(f"ERROR: {text}") - - -def key(text: str) -> str: - """Format a 'key' from a key-value pair. - - Parameters - ---------- - text : str - The key to format - - Returns - ------- - str - - """ - return dim(green(text)) - - -def value(text: str) -> str: - """Format a 'value' from of a key-value pair. - - Parameters - ---------- - text : str - The key to format - - Returns - ------- - str - - """ - return yellow(text) - - -def kvtable( - items: dict[str, Any], - *, - delim: str = " : ", - align: bool = True, - keys: Iterable[str] | None = None, -) -> str: - """Format a dictionay as a table of key-value pairs. - - Parameters - ---------- - items : dict[str, Any] - The dictionary of items to format - - delim : str, optional - A delimiter to display between keys and values (default: " : ") - - align : bool, optional - Whether to align delimiters to the longest key length (default: True) - - keys : Iterable[str] or None, optional - If not None, only the specified subset of keys is included in the - table output (default: None) - - Returns - ------- - str - - """ - # annoying but necessary to take len on color-formatted version - N = max(len(key(k)) for k in items) if align else 0 - - keys = items.keys() if keys is None else keys - - return "\n".join( - f"{key(k): <{N}}{delim}{value(str(items[k]))}" for k in keys - ) - - -def rule(text: str | None = None, *, char: str = "-", N: int = 80) -> str: - """Format a horizontal rule, optionally with text - - Parameters - ---------- - text : str or None, optional - If not None, display this text inline in the rule (default: None) - - char: str, optional - A character to use for the rule (default: "-") - - N : int, optional - Character width for the rule (default: 80) - - Returns - ------- - str - - """ - if text is None: - return cyan(char * N) - return cyan(char * 3 + f"{f' {text} ' :{char}<{N-3}}") - - -def section(text: str) -> str: - """Format text as a section header - - Parameters - ---------- - text : str - The text to format - - Returns - ------- - str - - """ - return bright(white(text)) - - -def scrub(text: str) -> str: - """Remove ANSI color codes from a text string. - - Parameters - ---------- - text : str - The text to scrub - - Returns - ------- - str - - """ - return _ANSI_ESCAPE.sub("", text) - - -def warn(text: str) -> str: - """Format text as a warning. - - Parameters - ---------- - text : str - The text to format - - Returns - ------- - str - - """ - return magenta(f"WARNING: {text}") diff --git a/legate/rc.py b/legate/rc.py index 6a54cc530..bd4abca51 100644 --- a/legate/rc.py +++ b/legate/rc.py @@ -14,14 +14,6 @@ # from __future__ import annotations -import sys -import warnings -from argparse import ArgumentParser, Namespace -from dataclasses import dataclass, fields -from typing import Any, Iterable, Literal, Sequence, Type, TypeVar, Union - -from typing_extensions import TypeAlias - LEGION_WARNING = """ All Legate programs must be run with a legion_python interperter. We @@ -35,6 +27,13 @@ legion_python directly. """ +# TODO (bv) temp transitive imports until cunumeric is updated +from .util.args import ( # noqa + ArgSpec, + Argument, + parse_library_command_args as parse_command_args, +) + def has_legion_context() -> bool: """Determine whether we are running in legion_python. @@ -55,96 +54,3 @@ def check_legion(msg: str = LEGION_WARNING) -> None: """Raise an error if we are not running in legion_python.""" if not has_legion_context(): raise RuntimeError(msg) - - -class _UnsetType: - pass - - -Unset = _UnsetType() - -_T = TypeVar("_T") -NotRequired = Union[_UnsetType, _T] - - -def entries(obj: Any) -> Iterable[tuple[str, Any]]: - for f in fields(obj): - value = getattr(obj, f.name) - if value is not Unset: - yield (f.name, value) - - -# https://docs.python.org/3/library/argparse.html#action -ActionType: TypeAlias = Literal[ - "store", - "store_const", - "store_true", - "append", - "append_const", - "count", - "help", - "version", - "extend", -] - -# https://docs.python.org/3/library/argparse.html#nargs -NargsType: TypeAlias = Literal["?", "*", "+", "..."] - - -@dataclass(frozen=True) -class ArgSpec: - dest: str - action: NotRequired[ActionType] = "store_true" - nargs: NotRequired[Union[int, NargsType]] = Unset - const: NotRequired[Any] = Unset - default: NotRequired[Any] = Unset - type: NotRequired[Type[Any]] = Unset - choices: NotRequired[Sequence[Any]] = Unset - help: NotRequired[str] = Unset - metavar: NotRequired[str] = Unset - - -@dataclass(frozen=True) -class Argument: - name: str - spec: ArgSpec - - -def parse_command_args(libname: str, args: Iterable[Argument]) -> Namespace: - """ """ - if not libname.isidentifier(): - raise ValueError( - f"Invalid library {libname!r} for command line arguments" - ) - - parser = ArgumentParser( - prog=f"<{libname} program>", add_help=False, allow_abbrev=False - ) - - lib_prefix = f"-{libname}:" - - argnames = [arg.name for arg in args] - - for arg in args: - argname = f"{lib_prefix}{arg.name}" - kwargs = dict(entries(arg.spec)) - parser.add_argument(argname, **kwargs) - - has_custom_help = "help" in argnames - - if f"{lib_prefix}help" in sys.argv and not has_custom_help: - parser.print_help() - sys.exit() - - args, extra = parser.parse_known_args() - - for item in extra: - if item.startswith(lib_prefix): - warnings.warn( - f"Unrecognized argument {item!r} for {libname} (passed on as-is)" # noqa: E501 - ) - break - - sys.argv = sys.argv[:1] + extra - - return args diff --git a/legate/tester/__init__.py b/legate/tester/__init__.py new file mode 100644 index 000000000..270abcf8d --- /dev/null +++ b/legate/tester/__init__.py @@ -0,0 +1,71 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Utilities and helpers for implementing the Cunumeric custom test runner. + +""" +from __future__ import annotations + +from typing import Union +from typing_extensions import Literal, TypeAlias + +#: Define the available feature types for tests +FeatureType: TypeAlias = Union[ + Literal["cpus"], Literal["cuda"], Literal["eager"], Literal["openmp"] +] + +#: Value to use if --cpus is not specified. +DEFAULT_CPUS_PER_NODE = 4 + +#: Value to use if --gpus is not specified. +DEFAULT_GPUS_PER_NODE = 1 + +# Delay to introduce between GPU test invocations (ms) +DEFAULT_GPU_DELAY = 2000 + +# Value to use if --fbmem is not specified (MB) +DEFAULT_GPU_MEMORY_BUDGET = 4096 + +#: Value to use if --omps is not specified. +DEFAULT_OMPS_PER_NODE = 1 + +#: Value to use if --ompthreads is not specified. +DEFAULT_OMPTHREADS = 4 + +#: Default values to apply to normalize the testing environment. +DEFAULT_PROCESS_ENV = { + "LEGATE_TEST": "1", +} + +#: Feature values that are accepted for --use, in the relative order +#: that the corresponding test stages should always execute in +FEATURES: tuple[FeatureType, ...] = ( + "cpus", + "cuda", + "eager", + "openmp", +) + +#: Paths to example files that should be skipped. +SKIPPED_EXAMPLES = { + "examples/ingest.py", + "examples/kmeans_sort.py", + "examples/lstm_full.py", + "examples/wgrad.py", +} + +#: Extra arguments to supply when specific examples are executed. +PER_FILE_ARGS = { + "examples/lstm_full.py": ["--file", "resources/lstm_input.txt"], +} diff --git a/legate/tester/args.py b/legate/tester/args.py new file mode 100644 index 000000000..6c3f24962 --- /dev/null +++ b/legate/tester/args.py @@ -0,0 +1,223 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Provide an argparse ArgumentParser for the test runner. + +""" +from __future__ import annotations + +from argparse import ArgumentParser +from typing import Literal, Union + +from typing_extensions import TypeAlias + +from ..util.args import ExtendAction, MultipleChoices +from . import ( + DEFAULT_CPUS_PER_NODE, + DEFAULT_GPU_DELAY, + DEFAULT_GPU_MEMORY_BUDGET, + DEFAULT_GPUS_PER_NODE, + DEFAULT_OMPS_PER_NODE, + DEFAULT_OMPTHREADS, + FEATURES, +) + +PinOptionsType: TypeAlias = Union[ + Literal["partial"], + Literal["none"], + Literal["strict"], +] + +PIN_OPTIONS: tuple[PinOptionsType, ...] = ( + "partial", + "none", + "strict", +) + + +#: The argument parser for test.py +parser = ArgumentParser( + description="Run the Cunumeric test suite", + epilog="Any extra arguments will be forwarded to the Legate script", +) + + +stages = parser.add_argument_group("Feature stage selection") + + +stages.add_argument( + "--use", + dest="features", + action=ExtendAction, + choices=MultipleChoices(sorted(FEATURES)), + type=lambda s: s.split(","), # type: ignore + help="Test Legate with features (also via USE_*)", +) + + +selection = parser.add_argument_group("Test file selection") + + +selection.add_argument( + "--files", + nargs="+", + default=None, + help="Explicit list of test files to run", +) + + +selection.add_argument( + "--unit", + dest="unit", + action="store_true", + default=False, + help="Include unit tests", +) + + +feature_opts = parser.add_argument_group("Feature stage configuration options") + + +feature_opts.add_argument( + "--cpus", + dest="cpus", + type=int, + default=DEFAULT_CPUS_PER_NODE, + help="Number of CPUs per node to use", +) + + +feature_opts.add_argument( + "--gpus", + dest="gpus", + type=int, + default=DEFAULT_GPUS_PER_NODE, + help="Number of GPUs per node to use", +) + + +feature_opts.add_argument( + "--omps", + dest="omps", + type=int, + default=DEFAULT_OMPS_PER_NODE, + help="Number OpenMP processors per node to use", +) + + +feature_opts.add_argument( + "--utility", + dest="utility", + type=int, + default=1, + help="Number of of utility CPUs to reserve for runtime services", +) + + +feature_opts.add_argument( + "--cpu-pin", + dest="cpu_pin", + choices=PIN_OPTIONS, + default="partial", + help="CPU pinning behavior on platforms that support CPU pinning", +) + +feature_opts.add_argument( + "--gpu-delay", + dest="gpu_delay", + type=int, + default=DEFAULT_GPU_DELAY, + help="Delay to introduce between GPU tests (ms)", +) + + +feature_opts.add_argument( + "--fbmem", + dest="fbmem", + type=int, + default=DEFAULT_GPU_MEMORY_BUDGET, + help="GPU framebuffer memory (MB)", +) + + +feature_opts.add_argument( + "--ompthreads", + dest="ompthreads", + metavar="THREADS", + type=int, + default=DEFAULT_OMPTHREADS, + help="Number of threads per OpenMP processor", +) + + +test_opts = parser.add_argument_group("Test run configuration options") + + +test_opts.add_argument( + "--legate", + dest="legate_dir", + metavar="LEGATE_DIR", + action="store", + default=None, + required=False, + help="Path to Legate installation directory", +) + + +test_opts.add_argument( + "-C", + "--directory", + dest="test_root", + metavar="DIR", + action="store", + default=None, + required=False, + help="Root directory containing the tests subdirectory", +) + + +test_opts.add_argument( + "-j", + "--workers", + dest="workers", + type=int, + default=None, + help="Number of parallel workers for testing", +) + + +test_opts.add_argument( + "-v", + "--verbose", + dest="verbose", + action="count", + default=0, + help="Display verbose output. Use -vv for even more output (test stdout)", +) + + +test_opts.add_argument( + "--dry-run", + dest="dry_run", + action="store_true", + help="Print the test plan but don't run anything", +) + + +test_opts.add_argument( + "--debug", + dest="debug", + action="store_true", + help="Print out the commands that are to be executed", +) diff --git a/legate/tester/config.py b/legate/tester/config.py new file mode 100644 index 000000000..a621ba8c3 --- /dev/null +++ b/legate/tester/config.py @@ -0,0 +1,163 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Consolidate test configuration from command-line and environment. + +""" +from __future__ import annotations + +import os +from argparse import Namespace +from pathlib import Path + +from ..util.types import ArgList, EnvDict +from . import DEFAULT_PROCESS_ENV, FEATURES, SKIPPED_EXAMPLES, FeatureType +from .args import parser + + +class Config: + """A centralized configuration object that provides the information + needed by test stages in order to run. + + Parameters + ---------- + argv : ArgList + command-line arguments to use when building the configuration + + """ + + def __init__(self, argv: ArgList) -> None: + args, self._extra_args = parser.parse_known_args(argv[1:]) + + # which tests to run + self.examples = True + self.integration = True + self.unit = args.unit + self.files = args.files + + # feature configuration + self.features = self._compute_features(args) + + # feature options for integration tests + self.cpus = args.cpus + self.gpus = args.gpus + self.omps = args.omps + self.utility = args.utility + self.cpu_pin = args.cpu_pin + self.fbmem = args.fbmem + self.gpu_delay = args.gpu_delay + self.ompthreads = args.ompthreads + + # test run configuration + self.debug = args.debug + self.dry_run = args.dry_run + self.verbose = args.verbose + self.test_root = args.test_root + self.requested_workers = args.workers + self.legate_dir = self._compute_legate_dir(args) + + @property + def env(self) -> EnvDict: + """Custom environment settings used for process exectution.""" + return dict(DEFAULT_PROCESS_ENV) + + @property + def extra_args(self) -> ArgList: + """Extra command-line arguments to pass on to individual test files.""" + return self._extra_args + + @property + def root_dir(self) -> Path: + """Path to the directory containing the tests.""" + if self.test_root: + return Path(self.test_root) + + # if not explicitly given, just use cwd assuming we are at a repo top + return Path(os.getcwd()) + + @property + def test_files(self) -> tuple[Path, ...]: + """List of all test files to use for each stage. + + An explicit list of files from the command line will take precedence. + + Otherwise, the files are computed based on command-line options, etc. + + """ + if self.files: + return self.files + + files = [] + + if self.examples: + examples = ( + path.relative_to(self.root_dir) + for path in self.root_dir.joinpath("examples").glob("*.py") + if str(path.relative_to(self.root_dir)) not in SKIPPED_EXAMPLES + ) + files.extend(sorted(examples)) + + if self.integration: + integration_tests = ( + path.relative_to(self.root_dir) + for path in self.root_dir.joinpath("tests/integration").glob( + "*.py" + ) + ) + files.extend(sorted(integration_tests)) + + if self.unit: + unit_tests = ( + path.relative_to(self.root_dir) + for path in self.root_dir.joinpath("tests/unit").glob( + "**/*.py" + ) + ) + files.extend(sorted(unit_tests)) + + return tuple(files) + + @property + def legate_path(self) -> str: + """Computed path to the legate driver script""" + if self.legate_dir is None: + return "legate" + return str(self.legate_dir / "bin" / "legate") + + def _compute_features(self, args: Namespace) -> tuple[FeatureType, ...]: + if args.features is not None: + computed = args.features + else: + computed = [ + feature + for feature in FEATURES + if os.environ.get(f"USE_{feature.upper()}", None) == "1" + ] + + # if nothing is specified any other way, at least run CPU stage + if len(computed) == 0: + computed.append("cpus") + + return tuple(computed) + + def _compute_legate_dir(self, args: Namespace) -> Path | None: + # self._legate_source below is purely for testing + if args.legate_dir: + self._legate_source = "cmd" + return Path(args.legate_dir) + elif "LEGATE_DIR" in os.environ: + self._legate_source = "env" + return Path(os.environ["LEGATE_DIR"]) + self._legate_source = "install" + return None diff --git a/legate/tester/logger.py b/legate/tester/logger.py new file mode 100644 index 000000000..f40904219 --- /dev/null +++ b/legate/tester/logger.py @@ -0,0 +1,67 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Provide a basic logger that can scrub ANSI color codes. + +""" +from __future__ import annotations + +import re + +# ref: https://stackoverflow.com/a/14693789 +_ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") + + +class Log: + def __init__(self) -> None: + self._record: list[str] = [] + + def __call__(self, *lines: str) -> tuple[int, int]: + return self.record(*lines) + + def record(self, *lines: str) -> tuple[int, int]: + if len(lines) == 1 and "\n" in lines[0]: + lines = tuple(lines[0].split("\n")) + + start = len(self._record) + for line in lines: + self._record.append(line) + print(line, flush=True) + return (start, len(self._record)) + + def clear(self) -> None: + self._record = [] + + def dump( + self, + *, + start: int = 0, + end: int | None = None, + filter_ansi: bool = True, + ) -> str: + lines = self._record[start:end] + + if filter_ansi: + full_text = _ANSI_ESCAPE.sub("", "\n".join(lines)) + else: + full_text = "\n".join(lines) + + return full_text + + @property + def lines(self) -> tuple[str, ...]: + return tuple(self._record) + + +LOG = Log() diff --git a/legate/tester/stages/__init__.py b/legate/tester/stages/__init__.py new file mode 100644 index 000000000..fa8f916d5 --- /dev/null +++ b/legate/tester/stages/__init__.py @@ -0,0 +1,41 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Provide TestStage subclasses for running configured test files using +specific features. + +""" +from __future__ import annotations + +import sys +from typing import Dict, Type + +from .. import FeatureType +from .test_stage import TestStage +from .util import log_proc + +if sys.platform == "darwin": + from ._osx import CPU, Eager, GPU, OMP +elif sys.platform.startswith("linux"): + from ._linux import CPU, Eager, GPU, OMP +else: + raise RuntimeError(f"unsupported platform: {sys.platform}") + +#: All the available test stages that can be selected +STAGES: Dict[FeatureType, Type[TestStage]] = { + "cpus": CPU, + "cuda": GPU, + "openmp": OMP, + "eager": Eager, +} diff --git a/legate/tester/stages/_linux/__init__.py b/legate/tester/stages/_linux/__init__.py new file mode 100644 index 000000000..032305f9c --- /dev/null +++ b/legate/tester/stages/_linux/__init__.py @@ -0,0 +1,24 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Provide TestStage subclasses for running configured test files using +specific features on linux platforms. + +""" +from __future__ import annotations + +from .cpu import CPU +from .gpu import GPU +from .eager import Eager +from .omp import OMP diff --git a/legate/tester/stages/_linux/cpu.py b/legate/tester/stages/_linux/cpu.py new file mode 100644 index 000000000..deb5610a6 --- /dev/null +++ b/legate/tester/stages/_linux/cpu.py @@ -0,0 +1,83 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from itertools import chain +from typing import TYPE_CHECKING + +from ..test_stage import TestStage +from ..util import ( + CUNUMERIC_TEST_ARG, + UNPIN_ENV, + Shard, + StageSpec, + adjust_workers, +) + +if TYPE_CHECKING: + from ....util.types import ArgList, EnvDict + from ... import FeatureType + from ...config import Config + from ...test_system import TestSystem + + +class CPU(TestStage): + """A test stage for exercising CPU features. + + Parameters + ---------- + config: Config + Test runner configuration + + system: TestSystem + Process execution wrapper + + """ + + kind: FeatureType = "cpus" + + args = [CUNUMERIC_TEST_ARG] + + def __init__(self, config: Config, system: TestSystem) -> None: + self._init(config, system) + + def env(self, config: Config, system: TestSystem) -> EnvDict: + return {} if config.cpu_pin == "strict" else dict(UNPIN_ENV) + + def shard_args(self, shard: Shard, config: Config) -> ArgList: + args = [ + "--cpus", + str(config.cpus), + ] + if config.cpu_pin != "none": + args += [ + "--cpu-bind", + ",".join(str(x) for x in shard), + ] + return args + + def compute_spec(self, config: Config, system: TestSystem) -> StageSpec: + cpus = system.cpus + + procs = config.cpus + config.utility + int(config.cpu_pin == "strict") + workers = adjust_workers(len(cpus) // procs, config.requested_workers) + + shards: list[tuple[int, ...]] = [] + for i in range(workers): + shard_cpus = range(i * procs, (i + 1) * procs) + shard = chain.from_iterable(cpus[j].ids for j in shard_cpus) + shards.append(tuple(sorted(shard))) + + return StageSpec(workers, shards) diff --git a/legate/tester/stages/_linux/eager.py b/legate/tester/stages/_linux/eager.py new file mode 100644 index 000000000..cc9a08d5a --- /dev/null +++ b/legate/tester/stages/_linux/eager.py @@ -0,0 +1,75 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ..test_stage import TestStage +from ..util import Shard, StageSpec, adjust_workers + +if TYPE_CHECKING: + from ....util.types import ArgList, EnvDict + from ... import FeatureType + from ...config import Config + from ...test_system import TestSystem + + +class Eager(TestStage): + """A test stage for exercising Eager Numpy execution features. + + Parameters + ---------- + config: Config + Test runner configuration + + system: TestSystem + Process execution wrapper + + """ + + kind: FeatureType = "eager" + + args: ArgList = [] + + def __init__(self, config: Config, system: TestSystem) -> None: + self._init(config, system) + + def env(self, config: Config, system: TestSystem) -> EnvDict: + # Raise min chunk sizes for deferred codepaths to force eager execution + env = { + "CUNUMERIC_MIN_CPU_CHUNK": "2000000000", + "CUNUMERIC_MIN_OMP_CHUNK": "2000000000", + "CUNUMERIC_MIN_GPU_CHUNK": "2000000000", + } + return env + + def shard_args(self, shard: Shard, config: Config) -> ArgList: + return [ + "--cpus", + "1", + "--cpu-bind", + ",".join(str(x) for x in shard), + ] + + def compute_spec(self, config: Config, system: TestSystem) -> StageSpec: + N = len(system.cpus) + + degree = min(N, 60) # ~LEGION_MAX_NUM_PROCS just in case + workers = adjust_workers(degree, config.requested_workers) + + # Just put each worker on its own full CPU for eager tests + shards = [cpu.ids for cpu in system.cpus] + + return StageSpec(workers, shards) diff --git a/legate/tester/stages/_linux/gpu.py b/legate/tester/stages/_linux/gpu.py new file mode 100644 index 000000000..f1a222fc0 --- /dev/null +++ b/legate/tester/stages/_linux/gpu.py @@ -0,0 +1,85 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import time +from typing import TYPE_CHECKING + +from ..test_stage import TestStage +from ..util import CUNUMERIC_TEST_ARG, Shard, StageSpec, adjust_workers + +if TYPE_CHECKING: + from ....util.types import ArgList, EnvDict + from ... import FeatureType + from ...config import Config + from ...test_system import TestSystem + +BLOAT_FACTOR = 1.5 # hard coded for now + + +class GPU(TestStage): + """A test stage for exercising GPU features. + + Parameters + ---------- + config: Config + Test runner configuration + + system: TestSystem + Process execution wrapper + + """ + + kind: FeatureType = "cuda" + + args = [CUNUMERIC_TEST_ARG] + + def __init__(self, config: Config, system: TestSystem) -> None: + self._init(config, system) + + def env(self, config: Config, system: TestSystem) -> EnvDict: + return {} + + def delay(self, shard: Shard, config: Config, system: TestSystem) -> None: + time.sleep(config.gpu_delay / 1000) + + def shard_args(self, shard: Shard, config: Config) -> ArgList: + return [ + "--fbmem", + str(config.fbmem), + "--gpus", + str(len(shard)), + "--gpu-bind", + ",".join(str(x) for x in shard), + ] + + def compute_spec(self, config: Config, system: TestSystem) -> StageSpec: + N = len(system.gpus) + degree = N // config.gpus + + fbsize = min(gpu.total for gpu in system.gpus) / (2 << 20) # MB + oversub_factor = int(fbsize // (config.fbmem * BLOAT_FACTOR)) + workers = adjust_workers( + degree * oversub_factor, config.requested_workers + ) + + # https://docs.python.org/3/library/itertools.html#itertools-recipes + # grouper('ABCDEF', 3) --> ABC DEF + args = [iter(range(degree * config.gpus))] * config.gpus + per_worker_shards = list(zip(*args)) + + shards = per_worker_shards * workers + + return StageSpec(workers, shards) diff --git a/legate/tester/stages/_linux/omp.py b/legate/tester/stages/_linux/omp.py new file mode 100644 index 000000000..f7af3e9d0 --- /dev/null +++ b/legate/tester/stages/_linux/omp.py @@ -0,0 +1,87 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from itertools import chain +from typing import TYPE_CHECKING + +from ..test_stage import TestStage +from ..util import ( + CUNUMERIC_TEST_ARG, + UNPIN_ENV, + Shard, + StageSpec, + adjust_workers, +) + +if TYPE_CHECKING: + from ....util.types import ArgList, EnvDict + from ... import FeatureType + from ...config import Config + from ...test_system import TestSystem + + +class OMP(TestStage): + """A test stage for exercising OpenMP features. + + Parameters + ---------- + config: Config + Test runner configuration + + system: TestSystem + Process execution wrapper + + """ + + kind: FeatureType = "openmp" + + args = [CUNUMERIC_TEST_ARG] + + def __init__(self, config: Config, system: TestSystem) -> None: + self._init(config, system) + + def env(self, config: Config, system: TestSystem) -> EnvDict: + return {} if config.cpu_pin == "strict" else dict(UNPIN_ENV) + + def shard_args(self, shard: Shard, config: Config) -> ArgList: + args = [ + "--omps", + str(config.omps), + "--ompthreads", + str(config.ompthreads), + ] + if config.cpu_pin != "none": + args += [ + "--cpu-bind", + ",".join(str(x) for x in shard), + ] + return args + + def compute_spec(self, config: Config, system: TestSystem) -> StageSpec: + cpus = system.cpus + omps, threads = config.omps, config.ompthreads + procs = ( + omps * threads + config.utility + int(config.cpu_pin == "strict") + ) + workers = adjust_workers(len(cpus) // procs, config.requested_workers) + + shards: list[tuple[int, ...]] = [] + for i in range(workers): + shard_cpus = range(i * procs, (i + 1) * procs) + shard = chain.from_iterable(cpus[j].ids for j in shard_cpus) + shards.append(tuple(sorted(shard))) + + return StageSpec(workers, shards) diff --git a/legate/tester/stages/_osx/__init__.py b/legate/tester/stages/_osx/__init__.py new file mode 100644 index 000000000..80a7c368d --- /dev/null +++ b/legate/tester/stages/_osx/__init__.py @@ -0,0 +1,24 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Provide TestStage subclasses for running configured test files using +specific features on OSX. + +""" +from __future__ import annotations + +from .cpu import CPU +from .gpu import GPU +from .eager import Eager +from .omp import OMP diff --git a/legate/tester/stages/_osx/cpu.py b/legate/tester/stages/_osx/cpu.py new file mode 100644 index 000000000..182a6d76b --- /dev/null +++ b/legate/tester/stages/_osx/cpu.py @@ -0,0 +1,68 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ..test_stage import TestStage +from ..util import ( + CUNUMERIC_TEST_ARG, + UNPIN_ENV, + Shard, + StageSpec, + adjust_workers, +) + +if TYPE_CHECKING: + from ....util.types import ArgList, EnvDict + from ... import FeatureType + from ...config import Config + from ...test_system import TestSystem + + +class CPU(TestStage): + """A test stage for exercising CPU features. + + Parameters + ---------- + config: Config + Test runner configuration + + system: TestSystem + Process execution wrapper + + """ + + kind: FeatureType = "cpus" + + args = [CUNUMERIC_TEST_ARG] + + def __init__(self, config: Config, system: TestSystem) -> None: + self._init(config, system) + + def env(self, config: Config, system: TestSystem) -> EnvDict: + return UNPIN_ENV + + def shard_args(self, shard: Shard, config: Config) -> ArgList: + return ["--cpus", str(config.cpus)] + + def compute_spec(self, config: Config, system: TestSystem) -> StageSpec: + procs = config.cpus + config.utility + workers = adjust_workers( + len(system.cpus) // procs, config.requested_workers + ) + + # return a dummy set of shards just for the runner to iterate over + return StageSpec(workers, [(i,) for i in range(workers)]) diff --git a/legate/tester/stages/_osx/eager.py b/legate/tester/stages/_osx/eager.py new file mode 100644 index 000000000..b32feb17d --- /dev/null +++ b/legate/tester/stages/_osx/eager.py @@ -0,0 +1,68 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ..test_stage import TestStage +from ..util import UNPIN_ENV, Shard, StageSpec, adjust_workers + +if TYPE_CHECKING: + from ....util.types import ArgList, EnvDict + from ... import FeatureType + from ...config import Config + from ...test_system import TestSystem + + +class Eager(TestStage): + """A test stage for exercising Eager Numpy execution features. + + Parameters + ---------- + config: Config + Test runner configuration + + system: TestSystem + Process execution wrapper + + """ + + kind: FeatureType = "eager" + + args: ArgList = [] + + def __init__(self, config: Config, system: TestSystem) -> None: + self._init(config, system) + + def env(self, config: Config, system: TestSystem) -> EnvDict: + # Raise min chunk sizes for deferred codepaths to force eager execution + env = { + "CUNUMERIC_MIN_CPU_CHUNK": "2000000000", + "CUNUMERIC_MIN_OMP_CHUNK": "2000000000", + "CUNUMERIC_MIN_GPU_CHUNK": "2000000000", + } + env.update(UNPIN_ENV) + return env + + def shard_args(self, shard: Shard, config: Config) -> ArgList: + return ["--cpus", "1"] + + def compute_spec(self, config: Config, system: TestSystem) -> StageSpec: + N = len(system.cpus) + degree = min(N, 60) # ~LEGION_MAX_NUM_PROCS just in case + workers = adjust_workers(degree, config.requested_workers) + + # return a dummy set of shards just for the runner to iterate over + return StageSpec(workers, [(i,) for i in range(workers)]) diff --git a/legate/tester/stages/_osx/gpu.py b/legate/tester/stages/_osx/gpu.py new file mode 100644 index 000000000..2a1597494 --- /dev/null +++ b/legate/tester/stages/_osx/gpu.py @@ -0,0 +1,54 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import time +from typing import TYPE_CHECKING + +from ..test_stage import TestStage +from ..util import CUNUMERIC_TEST_ARG, UNPIN_ENV, Shard + +if TYPE_CHECKING: + from ....util.types import ArgList, EnvDict + from ... import FeatureType + from ...config import Config + from ...test_system import TestSystem + + +class GPU(TestStage): + """A test stage for exercising GPU features. + + Parameters + ---------- + config: Config + Test runner configuration + + system: TestSystem + Process execution wrapper + + """ + + kind: FeatureType = "cuda" + + args: ArgList = [CUNUMERIC_TEST_ARG] + + def __init__(self, config: Config, system: TestSystem) -> None: + raise RuntimeError("GPU test are not supported on OSX") + + def env(self, config: Config, system: TestSystem) -> EnvDict: + return UNPIN_ENV + + def delay(self, shard: Shard, config: Config, system: TestSystem) -> None: + time.sleep(config.gpu_delay / 1000) diff --git a/legate/tester/stages/_osx/omp.py b/legate/tester/stages/_osx/omp.py new file mode 100644 index 000000000..eb279791a --- /dev/null +++ b/legate/tester/stages/_osx/omp.py @@ -0,0 +1,74 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ..test_stage import TestStage +from ..util import ( + CUNUMERIC_TEST_ARG, + UNPIN_ENV, + Shard, + StageSpec, + adjust_workers, +) + +if TYPE_CHECKING: + from ....util.types import ArgList, EnvDict + from ... import FeatureType + from ...config import Config + from ...test_system import TestSystem + + +class OMP(TestStage): + """A test stage for exercising OpenMP features. + + Parameters + ---------- + config: Config + Test runner configuration + + system: TestSystem + Process execution wrapper + + """ + + kind: FeatureType = "openmp" + + args = [CUNUMERIC_TEST_ARG] + + def __init__(self, config: Config, system: TestSystem) -> None: + self._init(config, system) + + def env(self, config: Config, system: TestSystem) -> EnvDict: + return UNPIN_ENV + + def shard_args(self, shard: Shard, config: Config) -> ArgList: + return [ + "--omps", + str(config.omps), + "--ompthreads", + str(config.ompthreads), + ] + + def compute_spec(self, config: Config, system: TestSystem) -> StageSpec: + omps, threads = config.omps, config.ompthreads + procs = omps * threads + config.utility + workers = adjust_workers( + len(system.cpus) // procs, config.requested_workers + ) + + # return a dummy set of shards just for the runner to iterate over + return StageSpec(workers, [(i,) for i in range(workers)]) diff --git a/legate/tester/stages/test_stage.py b/legate/tester/stages/test_stage.py new file mode 100644 index 000000000..c21fdd630 --- /dev/null +++ b/legate/tester/stages/test_stage.py @@ -0,0 +1,268 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import multiprocessing +from datetime import datetime +from pathlib import Path + +from typing_extensions import Protocol + +from ...util.colors import yellow +from ...util.types import ArgList, EnvDict +from ...util.ui import banner, summary +from .. import PER_FILE_ARGS, FeatureType +from ..config import Config +from ..test_system import ProcessResult, TestSystem +from .util import Shard, StageResult, StageSpec, log_proc + + +class TestStage(Protocol): + """Encapsulate running configured test files using specific features. + + Parameters + ---------- + config: Config + Test runner configuration + + system: TestSystem + Process execution wrapper + + """ + + kind: FeatureType + + #: The computed specification for processes to launch to run the + #: configured test files. + spec: StageSpec + + #: The computed sharding id sets to use for job runs + shards: multiprocessing.Queue[Shard] + + #: After the stage completes, results will be stored here + result: StageResult + + #: Any fixed stage-specific command-line args to pass + args: ArgList + + # --- Protocol methods + + def __init__(self, config: Config, system: TestSystem) -> None: + ... + + def env(self, config: Config, system: TestSystem) -> EnvDict: + """Generate stage-specific customizations to the process env + + Parameters + ---------- + config: Config + Test runner configuration + + system: TestSystem + Process execution wrapper + + """ + ... + + def delay(self, shard: Shard, config: Config, system: TestSystem) -> None: + """Wait any delay that should be applied before running the next + test. + + Parameters + ---------- + shard: Shard + The shard to be used for the next test that is run + + config: Config + Test runner configuration + + system: TestSystem + Process execution wrapper + + """ + ... + + def shard_args(self, shard: Shard, config: Config) -> ArgList: + """Generate the command line arguments necessary to launch + the next test process on the given shard. + + Parameters + ---------- + shard: Shard + The shard to be used for the next test that is run + + config: Config + Test runner configuration + + """ + ... + + def compute_spec(self, config: Config, system: TestSystem) -> StageSpec: + """Compute the number of worker processes to launch and stage shards + to use for running the configured test files. + + Parameters + ---------- + config: Config + Test runner configuration + + system: TestSystem + Process execution wrapper + + """ + ... + + # --- Shared implementation methods + + def __call__(self, config: Config, system: TestSystem) -> None: + """Execute this test stage. + + Parameters + ---------- + config: Config + Test runner configuration + + system: TestSystem + Process execution wrapper + + """ + t0 = datetime.now() + procs = self._launch(config, system) + t1 = datetime.now() + + self.result = StageResult(procs, t1 - t0) + + @property + def name(self) -> str: + """A stage name to display for tests in this stage.""" + return self.__class__.__name__ + + @property + def intro(self) -> str: + """An informative banner to display at stage end.""" + workers = self.spec.workers + workers_text = f"{workers} worker{'s' if workers > 1 else ''}" + return ( + banner(f"Entering stage: {self.name} (with {workers_text})") + "\n" + ) + + @property + def outro(self) -> str: + """An informative banner to display at stage end.""" + total, passed = self.result.total, self.result.passed + + result = summary(self.name, total, passed, self.result.time) + + footer = banner( + f"Exiting stage: {self.name}", + details=( + "* Results : " + + yellow( + f"{passed} / {total} files passed " # noqa E500 + f"({passed/total*100:0.1f}%)" + if total > 0 + else "0 tests are running, Please check " + ), + "* Elapsed time : " + yellow(f"{self.result.time}"), + ), + ) + + return f"{result}\n{footer}" + + def file_args(self, test_file: Path, config: Config) -> ArgList: + """Extra command line arguments based on the test file. + + Parameters + ---------- + test_file : Path + Path to a test file + + config: Config + Test runner configuration + + """ + test_file_string = str(test_file) + args = PER_FILE_ARGS.get(test_file_string, []) + + # These are a bit ugly but necessary in order to make pytest generate + # more verbose output for integration tests when -v, -vv is specified + if "integration" in test_file_string and config.verbose > 0: + args += ["-v"] + if "integration" in test_file_string and config.verbose > 1: + args += ["-s"] + + return args + + def run( + self, test_file: Path, config: Config, system: TestSystem + ) -> ProcessResult: + """Execute a single test files with appropriate environment and + command-line options for a feature test stage. + + Parameters + ---------- + test_file : Path + Test file to execute + + config: Config + Test runner configuration + + system: TestSystem + Process execution wrapper + + """ + test_path = config.root_dir / test_file + + shard = self.shards.get() + + stage_args = self.args + self.shard_args(shard, config) + file_args = self.file_args(test_file, config) + + cmd = [str(config.legate_path), str(test_path)] + cmd += stage_args + file_args + config.extra_args + + self.delay(shard, config, system) + + result = system.run(cmd, test_file, env=self._env(config, system)) + log_proc(self.name, result, config, verbose=config.verbose) + + self.shards.put(shard) + + return result + + def _env(self, config: Config, system: TestSystem) -> EnvDict: + env = dict(config.env) + env.update(self.env(config, system)) + return env + + def _init(self, config: Config, system: TestSystem) -> None: + self.spec = self.compute_spec(config, system) + self.shards = system.manager.Queue(len(self.spec.shards)) + for shard in self.spec.shards: + self.shards.put(shard) + + def _launch( + self, config: Config, system: TestSystem + ) -> list[ProcessResult]: + + pool = multiprocessing.pool.ThreadPool(self.spec.workers) + + jobs = [ + pool.apply_async(self.run, (path, config, system)) + for path in config.test_files + ] + pool.close() + + return [job.get() for job in jobs] diff --git a/legate/tester/stages/util.py b/legate/tester/stages/util.py new file mode 100644 index 000000000..2d6514877 --- /dev/null +++ b/legate/tester/stages/util.py @@ -0,0 +1,115 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from dataclasses import dataclass +from datetime import timedelta +from typing import Tuple, Union + +from typing_extensions import TypeAlias + +from ...util.ui import failed, passed, shell, skipped +from ..config import Config +from ..logger import LOG +from ..test_system import ProcessResult + +CUNUMERIC_TEST_ARG = "-cunumeric:test" + +UNPIN_ENV = {"REALM_SYNTHETIC_CORE_MAP": ""} + +Shard: TypeAlias = Tuple[int, ...] + + +@dataclass(frozen=True) +class StageSpec: + """Specify the operation of a test run""" + + #: The number of worker processes to start for running tests + workers: int + + # A list of (cpu or gpu) shards to draw on for each test + shards: list[Shard] + + +@dataclass(frozen=True) +class StageResult: + """Collect results from all tests in a TestStage.""" + + #: Individual test process results including return code and stdout. + procs: list[ProcessResult] + + #: Cumulative execution time for all tests in a stage. + time: timedelta + + @property + def total(self) -> int: + """The total number of tests run in this stage.""" + return len(self.procs) + + @property + def passed(self) -> int: + """The number of tests in this stage that passed.""" + return sum(p.returncode == 0 for p in self.procs) + + +def adjust_workers(workers: int, requested_workers: Union[int, None]) -> int: + """Adjust computed workers according to command line requested workers. + + The final number of workers will only be adjusted down by this function. + + Parameters + ---------- + workers: int + The computed number of workers to use + + requested_workers: int | None, optional + Requested number of workers from the user, if supplied (default: None) + + Returns + ------- + int + The number of workers to actually use + + """ + if requested_workers is not None and requested_workers < 0: + raise ValueError("requested workers must be non-negative") + + if requested_workers is not None: + if requested_workers > workers: + raise RuntimeError( + "Requested workers greater than assignable workers" + ) + workers = requested_workers + + if workers == 0: + raise RuntimeError("Current configuration results in zero workers") + + return workers + + +def log_proc( + name: str, proc: ProcessResult, config: Config, *, verbose: bool +) -> None: + """Log a process result according to the current configuration""" + if config.debug or config.dry_run: + LOG(shell(proc.invocation)) + msg = f"({name}) {proc.test_file}" + details = proc.output.split("\n") if verbose else None + if proc.skipped: + LOG(skipped(msg)) + elif proc.returncode == 0: + LOG(passed(msg, details=details)) + else: + LOG(failed(msg, details=details)) diff --git a/legate/tester/test_plan.py b/legate/tester/test_plan.py new file mode 100644 index 000000000..cc877f7a4 --- /dev/null +++ b/legate/tester/test_plan.py @@ -0,0 +1,132 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Provide a TestPlan class to coordinate multiple feature test stages. + +""" +from __future__ import annotations + +from datetime import timedelta +from itertools import chain + +from ..util.colors import yellow +from ..util.ui import banner, rule, summary +from .config import Config +from .logger import LOG +from .stages import STAGES, log_proc +from .test_system import TestSystem + + +class TestPlan: + """Encapsulate an entire test run with multiple feature test stages. + + Parameters + ---------- + config: Config + Test runner configuration + + system: TestSystem + Process execution wrapper + + """ + + def __init__(self, config: Config, system: TestSystem) -> None: + self._config = config + self._system = system + self._stages = [ + STAGES[feature](config, system) for feature in config.features + ] + + def execute(self) -> int: + """Execute the entire test run with all configured feature stages.""" + LOG.clear() + + LOG(self.intro) + + for stage in self._stages: + LOG(stage.intro) + stage(self._config, self._system) + LOG(stage.outro) + + all_procs = tuple( + chain.from_iterable(s.result.procs for s in self._stages) + ) + total = len(all_procs) + passed = sum(proc.returncode == 0 for proc in all_procs) + + LOG(f"\n{rule(pad=4)}") + + self._log_failures(total, passed) + + LOG(self.outro(total, passed)) + + return int((total - passed) > 0) + + @property + def intro(self) -> str: + """An informative banner to display at test run start.""" + + cpus = len(self._system.cpus) + try: + gpus = len(self._system.gpus) + except ImportError: + gpus = 0 + + details = ( + f"* Feature stages : {', '.join(yellow(x) for x in self._config.features)}", # noqa E501 + f"* Test files per stage : {yellow(str(len(self._config.test_files)))}", # noqa E501 + f"* TestSystem description : {yellow(str(cpus) + ' cpus')} / {yellow(str(gpus) + ' gpus')}", # noqa E501 + ) + return banner("Test Suite Configuration", details=details) + + def outro(self, total: int, passed: int) -> str: + """An informative banner to display at test run end. + + Parameters + ---------- + total: int + Number of total tests that ran in all stages + + passed: int + Number of tests that passed in all stages + + """ + details = [ + f"* {s.name: <6}: " + + yellow( + f"{s.result.passed} / {s.result.total} passed in {s.result.time.total_seconds():0.2f}s" # noqa E501 + ) + for s in self._stages + ] + + time = sum((s.result.time for s in self._stages), timedelta(0, 0)) + details.append("") + details.append( + summary("All tests", total, passed, time, justify=False) + ) + + overall = banner("Overall summary", details=details) + + return f"{overall}\n" + + def _log_failures(self, total: int, passed: int) -> None: + if total == passed: + return + + LOG(f"{banner('FAILURES')}\n") + + for stage in self._stages: + procs = (proc for proc in stage.result.procs if proc.returncode) + for proc in procs: + log_proc(stage.name, proc, self._config, verbose=True) diff --git a/legate/tester/test_system.py b/legate/tester/test_system.py new file mode 100644 index 000000000..2c4e9949f --- /dev/null +++ b/legate/tester/test_system.py @@ -0,0 +1,123 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Provide a System class to encapsulate process execution and reporting +system information (number of CPUs present, etc). + +""" +from __future__ import annotations + +import multiprocessing +import os +from dataclasses import dataclass +from pathlib import Path +from subprocess import PIPE, STDOUT, run as stdlib_run +from typing import Sequence + +from ..util.system import System +from ..util.types import EnvDict + +__all__ = ("TestSystem",) + + +@dataclass +class ProcessResult: + + #: The command invovation, including relevant environment vars + invocation: str + + # User-friendly test file path to use in reported output + test_file: Path + + #: Whether this process was actually invoked + skipped: bool = False + + #: The returncode from the process + returncode: int = 0 + + #: The collected stdout and stderr output from the process + output: str = "" + + +class TestSystem(System): + """A facade class for system-related functions. + + Parameters + ---------- + dry_run : bool, optional + If True, no commands will be executed, but a log of any commands + submitted to ``run`` will be made. (default: False) + + """ + + def __init__( + self, + *, + dry_run: bool = False, + ) -> None: + self.manager = multiprocessing.Manager() + self.dry_run: bool = dry_run + + def run( + self, + cmd: Sequence[str], + test_file: Path, + *, + env: EnvDict | None = None, + cwd: str | None = None, + ) -> ProcessResult: + """Wrapper for subprocess.run that encapsulates logging. + + Parameters + ---------- + cmd : sequence of str + The command to run, split on whitespace into a sequence + of strings + + test_file : Path + User-friendly test file path to use in reported output + + env : dict[str, str] or None, optional, default: None + Environment variables to apply when running the command + + cwd: str or None, optional, default: None + A current working directory to pass to stdlib ``run``. + + """ + + env = env or {} + + envstr = ( + " ".join(f"{k}={v}" for k, v in env.items()) + + min(len(env), 1) * " " + ) + + invocation = envstr + " ".join(cmd) + + if self.dry_run: + return ProcessResult(invocation, test_file, skipped=True) + + full_env = dict(os.environ) + full_env.update(env) + + proc = stdlib_run( + cmd, cwd=cwd, env=full_env, stdout=PIPE, stderr=STDOUT, text=True + ) + + return ProcessResult( + invocation, + test_file, + returncode=proc.returncode, + output=proc.stdout, + ) diff --git a/tests/unit/legate/driver/test_types.py b/legate/util/__init__.py similarity index 100% rename from tests/unit/legate/driver/test_types.py rename to legate/util/__init__.py diff --git a/legate/util/args.py b/legate/util/args.py new file mode 100644 index 000000000..e8fdc0c34 --- /dev/null +++ b/legate/util/args.py @@ -0,0 +1,182 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import sys +import warnings +from argparse import Action, ArgumentParser, Namespace +from dataclasses import dataclass, fields +from typing import ( + Any, + Generic, + Iterable, + Iterator, + Literal, + Sequence, + Type, + TypeVar, + Union, +) + +from typing_extensions import TypeAlias + + +class _UnsetType: + pass + + +Unset = _UnsetType() + + +T = TypeVar("T") + +NotRequired = Union[_UnsetType, T] + + +# https://docs.python.org/3/library/argparse.html#action +ActionType: TypeAlias = Literal[ + "store", + "store_const", + "store_true", + "append", + "append_const", + "count", + "help", + "version", + "extend", +] + +# https://docs.python.org/3/library/argparse.html#nargs +NargsType: TypeAlias = Literal["?", "*", "+", "..."] + + +@dataclass(frozen=True) +class ArgSpec: + dest: str + action: NotRequired[ActionType] = "store_true" + nargs: NotRequired[Union[int, NargsType]] = Unset + const: NotRequired[Any] = Unset + default: NotRequired[Any] = Unset + type: NotRequired[Type[Any]] = Unset + choices: NotRequired[Sequence[Any]] = Unset + help: NotRequired[str] = Unset + metavar: NotRequired[str] = Unset + + +@dataclass(frozen=True) +class Argument: + name: str + spec: ArgSpec + + +def entries(obj: Any) -> Iterable[tuple[str, Any]]: + for f in fields(obj): + value = getattr(obj, f.name) + if value is not Unset: + yield (f.name, value) + + +class MultipleChoices(Generic[T]): + """A container that reports True for any item or subset inclusion. + + Parameters + ---------- + choices: Iterable[T] + The values to populate the containter. + + Examples + -------- + + >>> choices = MultipleChoices(["a", "b", "c"]) + + >>> "a" in choices + True + + >>> ("b", "c") in choices + True + + """ + + def __init__(self, choices: Iterable[T]) -> None: + self._choices = set(choices) + + def __contains__(self, x: Union[T, Sequence[T]]) -> bool: + if isinstance(x, (list, tuple)): + return set(x).issubset(self._choices) + return x in self._choices + + def __iter__(self) -> Iterator[T]: + return self._choices.__iter__() + + +class ExtendAction(Action, Generic[T]): + """A custom argparse action to collect multiple values into a list.""" + + def __call__( + self, + parser: ArgumentParser, + namespace: Namespace, + values: Union[str, Sequence[T], None], + option_string: Union[str, None] = None, + ) -> None: + items = getattr(namespace, self.dest) or [] + if isinstance(values, (list, tuple)): + items.extend(values) + else: + items.append(values) + # removing any duplicates before storing + setattr(namespace, self.dest, list(set(items))) + + +def parse_library_command_args( + libname: str, args: Iterable[Argument] +) -> Namespace: + """ """ + if not libname.isidentifier(): + raise ValueError( + f"Invalid library {libname!r} for command line arguments" + ) + + parser = ArgumentParser( + prog=f"<{libname} program>", add_help=False, allow_abbrev=False + ) + + lib_prefix = f"-{libname}:" + + argnames = [arg.name for arg in args] + + for arg in args: + argname = f"{lib_prefix}{arg.name}" + kwargs = dict(entries(arg.spec)) + parser.add_argument(argname, **kwargs) + + has_custom_help = "help" in argnames + + if f"{lib_prefix}help" in sys.argv and not has_custom_help: + parser.print_help() + sys.exit() + + args, extra = parser.parse_known_args() + + for item in extra: + if item.startswith(lib_prefix): + warnings.warn( + f"Unrecognized argument {item!r} for {libname} (passed on as-is)" # noqa: E501 + ) + break + + sys.argv = sys.argv[:1] + extra + + return args diff --git a/legate/util/colors.py b/legate/util/colors.py new file mode 100644 index 000000000..5bb0b14b3 --- /dev/null +++ b/legate/util/colors.py @@ -0,0 +1,95 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Helper functions for adding colors to simple text UI output. + +The color functions in this module require ``colorama`` to be installed in +order to generate color output. If ``colorama`` is not available, plain +text output (i.e. without ANSI color codes) will be generated. + +""" +from __future__ import annotations + +import re +import sys + +__all__ = ( + "bright", + "cyan", + "dim", + "green", + "magenta", + "red", + "scrub", + "white", + "yellow", +) + + +def _text(text: str) -> str: + return text + + +try: + import colorama # type: ignore[import] + + def bright(text: str) -> str: + return f"{colorama.Style.BRIGHT}{text}{colorama.Style.RESET_ALL}" + + def dim(text: str) -> str: + return f"{colorama.Style.DIM}{text}{colorama.Style.RESET_ALL}" + + def white(text: str) -> str: + return f"{colorama.Fore.WHITE}{text}{colorama.Style.RESET_ALL}" + + def cyan(text: str) -> str: + return f"{colorama.Fore.CYAN}{text}{colorama.Style.RESET_ALL}" + + def red(text: str) -> str: + return f"{colorama.Fore.RED}{text}{colorama.Style.RESET_ALL}" + + def magenta(text: str) -> str: + return f"{colorama.Fore.MAGENTA}{text}{colorama.Style.RESET_ALL}" + + def green(text: str) -> str: + return f"{colorama.Fore.GREEN}{text}{colorama.Style.RESET_ALL}" + + def yellow(text: str) -> str: + return f"{colorama.Fore.YELLOW}{text}{colorama.Style.RESET_ALL}" + + if sys.platform == "win32": + colorama.init() + +except ImportError: + + bright = dim = white = cyan = red = magenta = green = yellow = _text + +# ref: https://stackoverflow.com/a/14693789 +_ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") + + +def scrub(text: str) -> str: + """Remove ANSI color codes from a text string. + + Parameters + ---------- + text : str + The text to scrub + + Returns + ------- + str + + """ + return _ANSI_ESCAPE.sub("", text) diff --git a/legate/driver/util.py b/legate/util/fs.py similarity index 81% rename from legate/driver/util.py rename to legate/util/fs.py index 499b250e3..e05e15279 100644 --- a/legate/driver/util.py +++ b/legate/util/fs.py @@ -17,100 +17,18 @@ import re import sys from pathlib import Path -from shlex import quote -from textwrap import indent -from typing import TYPE_CHECKING, Type, TypeVar -from .types import DataclassProtocol, LegatePaths, LegionPaths -from .ui import kvtable, rule, section, value - -if TYPE_CHECKING: - from .driver import Driver - from .system import System +from .types import LegatePaths, LegionPaths __all__ = ( "get_legate_build_dir", "get_legate_paths", "get_legion_paths", - "object_to_dataclass", - "print_verbose", "read_c_define", "read_cmake_cache_value", ) -T = TypeVar("T", bound=DataclassProtocol) - - -def object_to_dataclass(obj: object, typ: Type[T]) -> T: - """Automatically generate a dataclass from an object with appropriate - attributes. - - Parameters - ---------- - obj: object - An object to pull values from (e.g. an argparse Namespace) - - typ: - A dataclass type to generate from ``obj`` - - Returns - ------- - The generated dataclass instance - - """ - kws = {name: getattr(obj, name) for name in typ.__dataclass_fields__} - return typ(**kws) - - -def print_verbose( - system: System, - driver: Driver | None = None, -) -> None: - """Print system and driver configuration values. - - Parameters - ---------- - system : System - A System instance to obtain Legate and Legion paths from - - driver : Driver or None, optional - If not None, a Driver instance to obtain command invocation and - environment from (default: None) - - Returns - ------- - None - - """ - - print(f"\n{rule('Legion Python Configuration')}") - - print(section("\nLegate paths:")) - print(indent(str(system.legate_paths), prefix=" ")) - - print(section("\nLegion paths:")) - print(indent(str(system.legion_paths), prefix=" ")) - - if driver: - print(section("\nCommand:")) - cmd = " ".join(quote(t) for t in driver.cmd) - print(f" {value(cmd)}") - - if keys := sorted(driver.custom_env_vars): - print(section("\nCustomized Environment:")) - print( - indent( - kvtable(driver.env, delim="=", align=False, keys=keys), - prefix=" ", - ) - ) - - print(f"\n{rule()}") - - print(flush=True) - - def read_c_define(header_path: Path, name: str) -> str | None: """Open a C header file and read the value of a #define @@ -321,15 +239,16 @@ def get_legion_paths(legate_paths: LegatePaths) -> LegionPaths: # local builds over global installations. This allows devs to work in the # source tree and re-run without overwriting existing installations. - def installed_legion_paths( - legion_dir: Path, legion_module: Path | None = None - ) -> LegionPaths: - if legion_module is None: - legion_lib_dir = legion_dir / "lib" - for f in legion_lib_dir.iterdir(): - if f.joinpath("site-packages").exists(): - legion_module = f / "site-packages" - break + def installed_legion_paths(legion_dir: Path) -> LegionPaths: + legion_lib_dir = legion_dir / "lib" + for f in legion_lib_dir.iterdir(): + legion_module = f / "site-packages" + if legion_module.exists(): + break + + # NB: for-else clause! (executes if NO loop break) + else: + raise RuntimeError("could not determine legion module location") legion_bin_path = legion_dir / "bin" legion_include_path = legion_dir / "include" diff --git a/legate/driver/system.py b/legate/util/system.py similarity index 51% rename from legate/driver/system.py rename to legate/util/system.py index 57f9ec226..702514cc2 100644 --- a/legate/driver/system.py +++ b/legate/util/system.py @@ -14,11 +14,14 @@ # from __future__ import annotations +import multiprocessing import os import platform +import sys from functools import cached_property -from .util import LegatePaths, LegionPaths, get_legate_paths, get_legion_paths +from .fs import get_legate_paths, get_legion_paths +from .types import CPUInfo, GPUInfo, LegatePaths, LegionPaths __all__ = ("System",) @@ -78,3 +81,51 @@ def LIB_PATH(self) -> str: """ return "LD_LIBRARY_PATH" if self.os == "Linux" else "DYLD_LIBRARY_PATH" + + @cached_property + def cpus(self) -> tuple[CPUInfo, ...]: + """A list of CPUs on the system.""" + + N = multiprocessing.cpu_count() + + if sys.platform == "darwin": + return tuple(CPUInfo((i,)) for i in range(N)) + + sibling_sets: set[tuple[int, ...]] = set() + for i in range(N): + line = open( + f"/sys/devices/system/cpu/cpu{i}/topology/thread_siblings_list" + ).read() + sibling_sets.add( + tuple(sorted(int(x) for x in line.strip().split(","))) + ) + return tuple(CPUInfo(siblings) for siblings in sorted(sibling_sets)) + + @cached_property + def gpus(self) -> tuple[GPUInfo, ...]: + """A list of GPUs on the system, including total memory information.""" + + try: + # This pynvml import is protected inside this method so that in + # case pynvml is not installed, tests stages that don't need gpu + # info (e.g. cpus, eager) will proceed unaffected. Test stages + # that do require gpu info will fail here with an ImportError. + import pynvml # type: ignore[import] + + # Also a pynvml package is available on some platforms that won't + # have GPUs for some reason. In which case this init call will + # fail. + pynvml.nvmlInit() + except Exception: + return () + + num_gpus = pynvml.nvmlDeviceGetCount() + + results = [] + for i in range(num_gpus): + info = pynvml.nvmlDeviceGetMemoryInfo( + pynvml.nvmlDeviceGetHandleByIndex(i) + ) + results.append(GPUInfo(i, info.total)) + + return tuple(results) diff --git a/legate/driver/types.py b/legate/util/types.py similarity index 68% rename from legate/driver/types.py rename to legate/util/types.py index 0bde4643b..2a8166373 100644 --- a/legate/driver/types.py +++ b/legate/util/types.py @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # -"""Provide types that are useful throughout the driver code. +"""Provide types that are useful throughout the test driver code. """ from __future__ import annotations from dataclasses import Field, dataclass from pathlib import Path -from typing import Any, Dict, List, Protocol, Tuple, Union +from typing import Any, Dict, List, Protocol, Tuple, Type, TypeVar, Union from typing_extensions import Literal, TypeAlias @@ -29,14 +29,37 @@ "ArgList", "Command", "CommandPart", + "CPUInfo", "DataclassMixin", "DataclassProtocol", "EnvDict", + "GPUInfo", "LauncherType", "LegatePaths", "LegionPaths", + "object_to_dataclass", ) + +@dataclass(frozen=True) +class CPUInfo: + """Encapsulate information about a single CPU""" + + #: IDs of hypterthreading sibling cores for a given physscal core + ids: tuple[int, ...] + + +@dataclass(frozen=True) +class GPUInfo: + """Encapsulate information about a single CPU""" + + #: ID of the GPU to specify in test shards + id: int + + #: The total framebuffer memory of this GPU + total: int + + #: Define the available launcher for the driver to use LauncherType: TypeAlias = Union[ Literal["mpirun"], Literal["jsrun"], Literal["srun"], Literal["none"] @@ -73,6 +96,30 @@ def __str__(self) -> str: return kvtable(self.__dict__) +T = TypeVar("T", bound=DataclassProtocol) + + +def object_to_dataclass(obj: object, typ: Type[T]) -> T: + """Automatically generate a dataclass from an object with appropriate + attributes. + + Parameters + ---------- + obj: object + An object to pull values from (e.g. an argparse Namespace) + + typ: + A dataclass type to generate from ``obj`` + + Returns + ------- + The generated dataclass instance + + """ + kws = {name: getattr(obj, name) for name in typ.__dataclass_fields__} + return typ(**kws) + + @dataclass(frozen=True) class LegatePaths(DataclassMixin): """Collect all the filesystem paths relevant for Legate.""" diff --git a/legate/util/ui.py b/legate/util/ui.py new file mode 100644 index 000000000..9cf74b094 --- /dev/null +++ b/legate/util/ui.py @@ -0,0 +1,345 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Helper functions for simple text UI output. + +The color functions in this module require ``colorama`` to be installed in +order to generate color output. If ``colorama`` is not available, plain +text output (i.e. without ANSI color codes) will be generated. + +""" +from __future__ import annotations + +from datetime import timedelta +from typing import Any, Iterable + +from typing_extensions import TypeAlias + +from .colors import bright, cyan, dim, green, magenta, red, white, yellow + +Details: TypeAlias = Iterable[str] + +__all__ = ( + "UI_WIDTH", + "banner", + "error", + "key", + "kvtable", + "rule", + "section", + "value", + "warn", +) + + +#: Width for terminal ouput headers and footers. +UI_WIDTH = 80 + + +def _format_details( + details: Iterable[str] | None = None, pre: str = " " +) -> str: + if details: + return f"{pre}" + f"\n{pre}".join(f"{line}" for line in details) + return "" + + +def banner( + heading: str, + *, + char: str = "#", + width: int = UI_WIDTH, + details: Iterable[str] | None = None, +) -> str: + """Generate a title banner, with optional details included. + + Parameters + ---------- + heading : str + Text to use for the title + + char : str, optional + A character to use to frame the banner. (default: "#") + + width : int, optional + How wide to draw the banner. (Note: user-supplied heading or + details willnot be truncated if they exceed this width) + + details : Iterable[str], optional + A list of lines to diplay inside the banner area below the heading + + """ + pre = f"{char*3} " + divider = char * width + if not details: + return f"\n{divider}\n{pre}{heading}\n{divider}" + return f""" +{divider} +{pre} +{pre}{heading} +{pre} +{_format_details(details, pre)} +{pre} +{divider}""" + + +def error(text: str) -> str: + """Format text as an error. + + Parameters + ---------- + text : str + The text to format + + Returns + ------- + str + + """ + return red(f"ERROR: {text}") + + +def failed(msg: str, *, details: Details | None = None) -> str: + """Report a failed test result with a bright red [FAIL]. + + Parameters + ---------- + msg : str + Text to display after [FAIL] + + details : Iterable[str], optional + A sequenece of text lines to diplay below the ``msg`` line + + """ + if details: + return f"{bright(red('[FAIL]'))} {msg}\n{_format_details(details)}" + return f"{bright(red('[FAIL]'))} {msg}" + + +def passed(msg: str, *, details: Details | None = None) -> str: + """Report a passed test result with a bright green [PASS]. + + Parameters + ---------- + msg : str + Text to display after [PASS] + + details : Iterable[str], optional + A sequenece of text lines to diplay below the ``msg`` line + + """ + if details: + return f"{bright(green('[PASS]'))} {msg}\n{_format_details(details)}" + return f"{bright(green('[PASS]'))} {msg}" + + +def key(text: str) -> str: + """Format a 'key' from a key-value pair. + + Parameters + ---------- + text : str + The key to format + + Returns + ------- + str + + """ + return dim(green(text)) + + +def value(text: str) -> str: + """Format a 'value' from of a key-value pair. + + Parameters + ---------- + text : str + The key to format + + Returns + ------- + str + + """ + return yellow(text) + + +def kvtable( + items: dict[str, Any], + *, + delim: str = " : ", + align: bool = True, + keys: Iterable[str] | None = None, +) -> str: + """Format a dictionay as a table of key-value pairs. + + Parameters + ---------- + items : dict[str, Any] + The dictionary of items to format + + delim : str, optional + A delimiter to display between keys and values (default: " : ") + + align : bool, optional + Whether to align delimiters to the longest key length (default: True) + + keys : Iterable[str] or None, optional + If not None, only the specified subset of keys is included in the + table output (default: None) + + Returns + ------- + str + + """ + # annoying but necessary to take len on color-formatted version + N = max(len(key(k)) for k in items) if align else 0 + + keys = items.keys() if keys is None else keys + + return "\n".join( + f"{key(k): <{N}}{delim}{value(str(items[k]))}" for k in keys + ) + + +def rule( + text: str | None = None, + *, + pad: int = 0, + char: str = "-", + N: int = UI_WIDTH, +) -> str: + """Format a horizontal rule, optionally with text + + Parameters + ---------- + text : str or None, optional + If not None, display this text inline in the rule (default: None) + + pad : int, optional + An amount of padding to put in front of the rule + + char: str, optional + A character to use for the rule (default: "-") + + N : int, optional + Character width for the rule (default: 80) + + Returns + ------- + str + + """ + width = N - pad + if text is None: + return cyan(f"{char*width: >{N}}") + return cyan(" " * pad + char * 3 + f"{f' {text} ' :{char}<{width-3}}") + + +def section(text: str) -> str: + """Format text as a section header + + Parameters + ---------- + text : str + The text to format + + Returns + ------- + str + + """ + return bright(white(text)) + + +def shell(cmd: str, *, char: str = "+") -> str: + """Report a shell command in a dim white color. + + Parameters + ---------- + cmd : str + The shell command string to display + + char : str, optional + A character to prefix the ``cmd`` with. (default: "+") + + """ + return dim(white(f"{char}{cmd}")) + + +def skipped(msg: str) -> str: + """Report a skipped test with a cyan [SKIP] + + Parameters + ---------- + msg : str + Text to display after [SKIP] + + """ + return f"{cyan('[SKIP]')} {msg}" + + +def summary( + name: str, + total: int, + passed: int, + time: timedelta, + *, + justify: bool = True, +) -> str: + """Generate a test result summary line. + + The output is bright green if all tests passed, otherwise bright red. + + Parameters + ---------- + name : str + A name to display in this summary line. + + total : int + The total number of tests to report. + + passed : int + The number of passed tests to report. + + time : timedelta + The time taken to run the tests + + """ + summary = ( + f"{name}: Passed {passed} of {total} tests ({passed/total*100:0.1f}%) " + f"in {time.total_seconds():0.2f}s" + if total > 0 + else f"{name}: 0 tests are running, Please check" + ) + color = green if passed == total and total > 0 else red + return bright(color(f"{summary: >{UI_WIDTH}}" if justify else summary)) + + +def warn(text: str) -> str: + """Format text as a warning. + + Parameters + ---------- + text : str + The text to format + + Returns + ------- + str + + """ + return magenta(f"WARNING: {text}") diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 000000000..f0b271624 --- /dev/null +++ b/tests/unit/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations diff --git a/tests/unit/legate/__init__.py b/tests/unit/legate/__init__.py new file mode 100644 index 000000000..f0b271624 --- /dev/null +++ b/tests/unit/legate/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations diff --git a/tests/unit/legate/driver/__init__.py b/tests/unit/legate/driver/__init__.py new file mode 100644 index 000000000..f0b271624 --- /dev/null +++ b/tests/unit/legate/driver/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations diff --git a/tests/unit/legate/driver/conftest.py b/tests/unit/legate/driver/conftest.py index 1b1f31e48..09c8c7d18 100644 --- a/tests/unit/legate/driver/conftest.py +++ b/tests/unit/legate/driver/conftest.py @@ -19,10 +19,12 @@ from typing import Any, Callable, Iterable import pytest -from util import GenConfig, GenSystem -from legate.driver import Config, Launcher, System +from legate.driver import Config, Launcher from legate.driver.config import MultiNode +from legate.util.system import System + +from .util import GenConfig, GenSystem @pytest.fixture diff --git a/tests/unit/legate/driver/test_command.py b/tests/unit/legate/driver/test_command.py index f7188990f..29d4a8632 100644 --- a/tests/unit/legate/driver/test_command.py +++ b/tests/unit/legate/driver/test_command.py @@ -18,12 +18,14 @@ from pathlib import Path import pytest -from util import Capsys, GenObjs, powerset_nonempty import legate.driver.command as m from legate.driver.launcher import RANK_ENV_VARS -from legate.driver.types import LauncherType -from legate.driver.ui import scrub +from legate.util.colors import scrub +from legate.util.types import LauncherType + +from ...util import Capsys, powerset_nonempty +from .util import GenObjs def test___all__() -> None: diff --git a/tests/unit/legate/driver/test_config.py b/tests/unit/legate/driver/test_config.py index 0523b1db9..536289221 100644 --- a/tests/unit/legate/driver/test_config.py +++ b/tests/unit/legate/driver/test_config.py @@ -20,12 +20,13 @@ import pytest from pytest_mock import MockerFixture -from util import Capsys, powerset, powerset_nonempty import legate.driver.config as m import legate.driver.defaults as defaults -from legate.driver.types import DataclassMixin -from legate.driver.ui import scrub +from legate.util.colors import scrub +from legate.util.types import DataclassMixin + +from ...util import Capsys, powerset, powerset_nonempty DEFAULTS_ENV_VARS = ( "LEGATE_EAGER_ALLOC_PERCENTAGE", diff --git a/tests/unit/legate/driver/test_driver.py b/tests/unit/legate/driver/test_driver.py index e346210d3..fad492a2f 100644 --- a/tests/unit/legate/driver/test_driver.py +++ b/tests/unit/legate/driver/test_driver.py @@ -15,19 +15,22 @@ from __future__ import annotations import re +from shlex import quote import pytest from pytest_mock import MockerFixture -from util import Capsys, GenConfig import legate.driver.driver as m from legate.driver.args import LAUNCHERS from legate.driver.command import CMD_PARTS +from legate.driver.config import Config from legate.driver.launcher import RANK_ENV_VARS, Launcher -from legate.driver.system import System -from legate.driver.types import LauncherType -from legate.driver.ui import scrub -from legate.driver.util import print_verbose +from legate.util.colors import scrub +from legate.util.system import System +from legate.util.types import LauncherType + +from ...util import Capsys +from .util import GenConfig SYSTEM = System() @@ -123,7 +126,7 @@ def test_verbose( run_out = scrub(capsys.readouterr()[0]).strip() - print_verbose(driver.system, driver) + m.print_verbose(driver.system, driver) pv_out = scrub(capsys.readouterr()[0]).strip() @@ -152,7 +155,7 @@ def test_verbose_nonero_rank_id( run_out = scrub(capsys.readouterr()[0]).strip() - print_verbose(driver.system, driver) + m.print_verbose(driver.system, driver) pv_out = scrub(capsys.readouterr()[0]).strip() @@ -180,3 +183,48 @@ def test_darwin_gdb_warning( out, _ = capsys.readouterr() assert re.search(DARWIN_GDB_WARN_EXPECTED_PAT, scrub(out)) + + +class Test_print_verbose: + def test_system_only(self, capsys: Capsys) -> None: + system = System() + + m.print_verbose(system) + + out = scrub(capsys.readouterr()[0]).strip() + + assert out.startswith(f"{'--- Legion Python Configuration ':-<80}") + assert "Legate paths:" in out + for line in scrub(str(system.legate_paths)).split(): + assert line in out + + assert "Legion paths:" in out + for line in scrub(str(system.legion_paths)).split(): + assert line in out + + def test_system_and_driver(self, capsys: Capsys) -> None: + config = Config(["legate", "--no-replicate"]) + system = System() + driver = m.Driver(config, system) + + m.print_verbose(system, driver) + + out = scrub(capsys.readouterr()[0]).strip() + + assert out.startswith(f"{'--- Legion Python Configuration ':-<80}") + assert "Legate paths:" in out + for line in scrub(str(system.legate_paths)).split(): + assert line in out + + assert "Legion paths:" in out + for line in scrub(str(system.legion_paths)).split(): + assert line in out + + assert "Command:" in out + assert f" {' '.join(quote(t) for t in driver.cmd)}" in out + + assert "Customized Environment:" in out + for k in driver.custom_env_vars: + assert f"{k}={driver.env[k]}" in out + + assert out.endswith(f"\n{'-':-<80}") diff --git a/tests/unit/legate/driver/test_launcher.py b/tests/unit/legate/driver/test_launcher.py index 1c5b451af..ecf980d87 100644 --- a/tests/unit/legate/driver/test_launcher.py +++ b/tests/unit/legate/driver/test_launcher.py @@ -17,12 +17,14 @@ import os import pytest -from util import GenConfig, GenObjs, powerset_nonempty import legate.driver.launcher as m from legate.driver.args import LAUNCHERS -from legate.driver.system import System -from legate.driver.types import LauncherType +from legate.util.system import System +from legate.util.types import LauncherType + +from ...util import powerset_nonempty +from .util import GenConfig, GenObjs SYSTEM = System() diff --git a/tests/unit/legate/driver/test_logs.py b/tests/unit/legate/driver/test_logs.py index 918dfc283..44e89a364 100644 --- a/tests/unit/legate/driver/test_logs.py +++ b/tests/unit/legate/driver/test_logs.py @@ -16,12 +16,14 @@ import pytest from pytest_mock import MockerFixture -from util import Capsys, GenObjs, powerset_nonempty import legate.driver.logs as m from legate.driver.config import Config from legate.driver.launcher import RANK_ENV_VARS -from legate.driver.ui import scrub +from legate.util.colors import scrub + +from ...util import Capsys, powerset_nonempty +from .util import GenObjs class MockHandler(m.LogHandler): diff --git a/tests/unit/legate/driver/test_main.py b/tests/unit/legate/driver/test_main.py index 0784246a3..4c0260abb 100644 --- a/tests/unit/legate/driver/test_main.py +++ b/tests/unit/legate/driver/test_main.py @@ -28,10 +28,10 @@ def test_main(mocker: MockerFixture) -> None: import legate.driver.config import legate.driver.driver - import legate.driver.system + import legate.util.system config_spy = mocker.spy(legate.driver.config.Config, "__init__") - system_spy = mocker.spy(legate.driver.system.System, "__init__") + system_spy = mocker.spy(legate.util.system.System, "__init__") driver_spy = mocker.spy(legate.driver.driver.Driver, "__init__") mocker.patch("legate.driver.driver.Driver.run", return_value=123) @@ -48,7 +48,7 @@ def test_main(mocker: MockerFixture) -> None: assert driver_spy.call_count == 1 assert len(driver_spy.call_args[0]) == 3 assert isinstance(driver_spy.call_args[0][1], legate.driver.config.Config) - assert isinstance(driver_spy.call_args[0][2], legate.driver.system.System) + assert isinstance(driver_spy.call_args[0][2], legate.util.system.System) assert driver_spy.call_args[1] == {} assert result == 123 diff --git a/tests/unit/legate/driver/test_ui.py b/tests/unit/legate/driver/test_ui.py deleted file mode 100644 index 33b8b03eb..000000000 --- a/tests/unit/legate/driver/test_ui.py +++ /dev/null @@ -1,254 +0,0 @@ -# Copyright 2021-2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -from typing import Any - -import pytest -from pytest_mock import MockerFixture -from typing_extensions import TypeAlias - -import legate.driver.ui as m - -try: - import colorama # type: ignore -except ImportError: - colorama = None - -UsePlainTextFixture: TypeAlias = Any - - -@pytest.fixture -def use_plain_text(mocker: MockerFixture) -> None: - mocker.patch.object(m, "bright", m._text) - mocker.patch.object(m, "dim", m._text) - mocker.patch.object(m, "white", m._text) - mocker.patch.object(m, "cyan", m._text) - mocker.patch.object(m, "red", m._text) - mocker.patch.object(m, "green", m._text) - mocker.patch.object(m, "yellow", m._text) - mocker.patch.object(m, "magenta", m._text) - - -COLOR_FUNCS = ( - "cyan", - "green", - "magenta", - "red", - "white", - "yellow", -) - -STYLE_FUNCS = ( - "bright", - "dim", -) - - -@pytest.mark.skipif(colorama is None, reason="colorama required") -@pytest.mark.parametrize("color", COLOR_FUNCS) -def test_color_functions(color: str) -> None: - cfunc = getattr(m, color) - cprop = getattr(colorama.Fore, color.upper()) - - out = cfunc("some text") - - assert out == f"{cprop}some text{colorama.Style.RESET_ALL}" - - -@pytest.mark.skipif(colorama is None, reason="colorama required") -@pytest.mark.parametrize("style", STYLE_FUNCS) -def test_style_functions(style: str) -> None: - sfunc = getattr(m, style) - sprop = getattr(colorama.Style, style.upper()) - - out = sfunc("some text") - - assert out == f"{sprop}some text{colorama.Style.RESET_ALL}" - - -@pytest.mark.skipif(colorama is None, reason="colorama required") -def test_error(use_plain_text: UsePlainTextFixture) -> None: - assert m.error("some message") == m.red("ERROR: some message") - - -def test_error_plain(use_plain_text: UsePlainTextFixture) -> None: - assert m.error("some message") == "ERROR: some message" - - -@pytest.mark.skipif(colorama is None, reason="colorama required") -def test_key(use_plain_text: UsePlainTextFixture) -> None: - assert m.key("some key") == m.dim(m.green("some key")) - - -def test_key_plain(use_plain_text: UsePlainTextFixture) -> None: - assert m.key("some key") == "some key" - - -@pytest.mark.skipif(colorama is None, reason="colorama required") -def test_value(use_plain_text: UsePlainTextFixture) -> None: - assert m.value("some value") == m.yellow("some value") - - -def test_value_plain(use_plain_text: UsePlainTextFixture) -> None: - assert m.value("some value") == "some value" - - -class Test_kvtable: - ONE = {"foo": 10} - TWO = {"foo": 10, "barbaz": "some value"} - THREE = {"foo": 10, "barbaz": "some value", "a": 1.2} - - @pytest.mark.skipif(colorama is None, reason="colorama required") - @pytest.mark.parametrize("items", (ONE, TWO, THREE)) - def test_default(self, items: dict[str, Any]) -> None: - N = max(len(m.key(k)) for k in items) - assert m.kvtable(items) == "\n".join( - f"{m.key(k): <{N}} : {m.value(str(items[k]))}" for k in items - ) - - @pytest.mark.parametrize("items", (ONE, TWO, THREE)) - def test_default_plain( - self, use_plain_text: UsePlainTextFixture, items: dict[str, Any] - ) -> None: - N = max(len(k) for k in items) - assert m.kvtable(items) == "\n".join( - f"{k: <{N}} : {items[k]}" for k in items - ) - - @pytest.mark.skipif(colorama is None, reason="colorama required") - @pytest.mark.parametrize("items", (ONE, TWO, THREE)) - def test_delim(self, items: dict[str, Any]) -> None: - N = max(len(m.key(k)) for k in items) - assert m.kvtable(items, delim="/") == "\n".join( - f"{m.key(k): <{N}}/{m.value(str(items[k]))}" for k in items - ) - - @pytest.mark.parametrize("items", (ONE, TWO, THREE)) - def test_delim_plain( - self, use_plain_text: UsePlainTextFixture, items: dict[str, Any] - ) -> None: - N = max(len(k) for k in items) - assert m.kvtable(items, delim="/") == "\n".join( - f"{k: <{N}}/{items[k]}" for k in items - ) - - @pytest.mark.skipif(colorama is None, reason="colorama required") - @pytest.mark.parametrize("items", (ONE, TWO, THREE)) - def test_align_False(self, items: dict[str, Any]) -> None: - assert m.kvtable(items, align=False) == "\n".join( - f"{m.key(k)} : {m.value(str(items[k]))}" for k in items - ) - - @pytest.mark.parametrize("items", (ONE, TWO, THREE)) - def test_align_False_plain( - self, use_plain_text: UsePlainTextFixture, items: dict[str, Any] - ) -> None: - assert m.kvtable(items, align=False) == "\n".join( - f"{k} : {items[k]}" for k in items - ) - - @pytest.mark.skipif(colorama is None, reason="colorama required") - def test_keys(self) -> None: - items = self.THREE - keys = ("foo", "a") - N = max(len(m.key(k)) for k in items) - - assert m.kvtable(self.THREE, keys=keys) == "\n".join( - f"{m.key(k): <{N}} : {m.value(str(items[k]))}" for k in keys - ) - - def test_keys_plain(self, use_plain_text: UsePlainTextFixture) -> None: - items = self.THREE - keys = ("foo", "a") - N = max(len(m.key(k)) for k in items) - - assert m.kvtable(items, keys=keys) == "\n".join( - f"{k: <{N}} : {items[k]}" for k in keys - ) - - -class Test_rule: - @pytest.mark.skipif(colorama is None, reason="colorama required") - def test_text(self) -> None: - assert m.rule("foo bar") == m.cyan("--- foo bar " + "-" * 68) - - @pytest.mark.skipif(colorama is None, reason="colorama required") - def test_char(self) -> None: - assert m.rule(char="a") == m.cyan("a" * 80) - - @pytest.mark.skipif(colorama is None, reason="colorama required") - def test_N(self) -> None: - assert m.rule(N=60) == m.cyan("-" * 60) - - @pytest.mark.skipif(colorama is None, reason="colorama required") - def test_N_with_text(self) -> None: - assert m.rule("foo bar", N=65) == m.cyan("--- foo bar " + "-" * 53) - - def test_text_plain(self, use_plain_text: UsePlainTextFixture) -> None: - assert m.rule("foo bar") == "--- foo bar " + "-" * 68 - - def test_char_plain(self, use_plain_text: UsePlainTextFixture) -> None: - assert m.rule(char="a") == "a" * 80 - - def test_N_plain(self, use_plain_text: UsePlainTextFixture) -> None: - assert m.rule(N=60) == "-" * 60 - - def test_N_with_text_plain( - self, use_plain_text: UsePlainTextFixture - ) -> None: - assert m.rule("foo bar", N=65) == "--- foo bar " + "-" * 53 - - -@pytest.mark.skipif(colorama is None, reason="colorama required") -@pytest.mark.parametrize("color", COLOR_FUNCS) -@pytest.mark.parametrize("style", STYLE_FUNCS) -def test_scrub(style: str, color: str) -> None: - cfunc = getattr(m, color) - sfunc = getattr(m, style) - - assert m.scrub(cfunc(sfunc("some text"))) == "some text" - assert m.scrub(sfunc(cfunc("some text"))) == "some text" - - -@pytest.mark.skipif(colorama is None, reason="colorama required") -@pytest.mark.parametrize("color", COLOR_FUNCS) -@pytest.mark.parametrize("style", STYLE_FUNCS) -def test_scrub_plain( - use_plain_text: UsePlainTextFixture, style: str, color: str -) -> None: - cfunc = getattr(m, color) - sfunc = getattr(m, style) - - assert m.scrub(cfunc(sfunc("some text"))) == "some text" - assert m.scrub(sfunc(cfunc("some text"))) == "some text" - - -@pytest.mark.skipif(colorama is None, reason="colorama required") -def test_section(use_plain_text: UsePlainTextFixture) -> None: - assert m.section("some section") == m.bright(m.white("some section")) - - -def test_section_plain(use_plain_text: UsePlainTextFixture) -> None: - assert m.section("some section") == "some section" - - -@pytest.mark.skipif(colorama is None, reason="colorama required") -def test_warn(use_plain_text: UsePlainTextFixture) -> None: - assert m.warn("some message") == m.magenta("WARNING: some message") - - -def test_warn_plain(use_plain_text: UsePlainTextFixture) -> None: - assert m.warn("some message") == "WARNING: some message" diff --git a/tests/unit/legate/driver/test_util.py b/tests/unit/legate/driver/test_util.py deleted file mode 100644 index a864ddc8c..000000000 --- a/tests/unit/legate/driver/test_util.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright 2021-2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -from dataclasses import dataclass -from pathlib import Path -from shlex import quote - -import pytest -from util import Capsys - -import legate.driver.util as m -from legate.driver.config import Config -from legate.driver.driver import Driver -from legate.driver.system import System -from legate.driver.ui import scrub - - -class Source: - foo = 10 - bar = 10.2 - baz = "test" - quux = ["a", "b", "c"] - extra = (1, 2, 3) - - -@dataclass(frozen=True) -class Target: - foo: int - bar: float - baz: str - quux: list[str] - - -def test_object_to_dataclass() -> None: - source = Source() - target = m.object_to_dataclass(source, Target) - - assert set(target.__dict__) == set(Target.__dataclass_fields__) - for k, v in target.__dict__.items(): - assert getattr(source, k) == v - - -class Test_print_verbose: - def test_system_only(self, capsys: Capsys) -> None: - system = System() - - m.print_verbose(system) - - out = scrub(capsys.readouterr()[0]).strip() - - assert out.startswith(f"{'--- Legion Python Configuration ':-<80}") - assert "Legate paths:" in out - for line in scrub(str(system.legate_paths)).split(): - assert line in out - - assert "Legion paths:" in out - for line in scrub(str(system.legion_paths)).split(): - assert line in out - - def test_system_and_driver(self, capsys: Capsys) -> None: - config = Config(["legate", "--no-replicate"]) - system = System() - driver = Driver(config, system) - - m.print_verbose(system, driver) - - out = scrub(capsys.readouterr()[0]).strip() - - assert out.startswith(f"{'--- Legion Python Configuration ':-<80}") - assert "Legate paths:" in out - for line in scrub(str(system.legate_paths)).split(): - assert line in out - - assert "Legion paths:" in out - for line in scrub(str(system.legion_paths)).split(): - assert line in out - - assert "Command:" in out - assert f" {' '.join(quote(t) for t in driver.cmd)}" in out - - assert "Customized Environment:" in out - for k in driver.custom_env_vars: - assert f"{k}={driver.env[k]}" in out - - assert out.endswith(f"\n{'-':-<80}") - - -HEADER_PATH = Path(__file__).parent / "sample_header.h" - - -def test_read_c_define_hit() -> None: - assert m.read_c_define(HEADER_PATH, "FOO") == "10" - assert m.read_c_define(HEADER_PATH, "BAR") == '"bar"' - - -def test_read_c_define_miss() -> None: - assert m.read_c_define(HEADER_PATH, "JUNK") is None - - -CMAKE_CACHE_PATH = Path(__file__).parent / "sample_cmake_cache.txt" - - -def test_read_cmake_cache_value_hit() -> None: - assert ( - m.read_cmake_cache_value(CMAKE_CACHE_PATH, "Legion_SOURCE_DIR:STATIC=") - == '"foo/bar"' - ) - assert ( - m.read_cmake_cache_value( - CMAKE_CACHE_PATH, "FIND_LEGATE_CORE_CPP:BOOL=OFF" - ) - == "OFF" - ) - - -def test_read_cmake_cache_value_miss() -> None: - with pytest.raises(RuntimeError): - assert m.read_cmake_cache_value(CMAKE_CACHE_PATH, "JUNK") is None diff --git a/tests/unit/legate/driver/util.py b/tests/unit/legate/driver/util.py index d91896977..fad7a9f76 100644 --- a/tests/unit/legate/driver/util.py +++ b/tests/unit/legate/driver/util.py @@ -14,26 +14,12 @@ # from __future__ import annotations -from itertools import chain, combinations -from typing import Any, Iterable, Iterator +from typing import Any -import pytest from typing_extensions import TypeAlias -Capsys: TypeAlias = pytest.CaptureFixture[str] - GenConfig: TypeAlias = Any GenSystem: TypeAlias = Any GenObjs: TypeAlias = Any - - -# ref: https://docs.python.org/3/library/itertools.html -def powerset(iterable: Iterable[Any]) -> Iterator[Any]: - s = list(iterable) - return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)) - - -def powerset_nonempty(iterable: Iterable[Any]) -> Iterator[Any]: - return (x for x in powerset(iterable) if len(x)) diff --git a/tests/unit/legate/test_rc.py b/tests/unit/legate/test_rc.py index d497163ef..74cea3092 100644 --- a/tests/unit/legate/test_rc.py +++ b/tests/unit/legate/test_rc.py @@ -14,7 +14,6 @@ # import sys -from dataclasses import dataclass from unittest.mock import MagicMock import pytest @@ -30,27 +29,28 @@ def mock_has_legion_context(monkeypatch: pytest.MonkeyPatch) -> MagicMock: class Test_check_legion: - def test_True(self, mock_has_legion_context) -> None: + def test_True(self, mock_has_legion_context: MagicMock) -> None: mock_has_legion_context.return_value = True - assert m.check_legion() is None + assert m.check_legion() is None # type: ignore[func-returns-value] - def test_True_with_msg(self, mock_has_legion_context) -> None: + def test_True_with_msg(self, mock_has_legion_context: MagicMock) -> None: mock_has_legion_context.return_value = True - assert m.check_legion(msg="custom") is None + assert m.check_legion(msg="custom") is None # type: ignore[func-returns-value] # noqa - def test_False(self, mock_has_legion_context) -> None: + def test_False(self, mock_has_legion_context: MagicMock) -> None: mock_has_legion_context.return_value = False with pytest.raises(RuntimeError) as e: m.check_legion() assert str(e) == m.LEGION_WARNING - def test_False_with_msg(self, mock_has_legion_context) -> None: + def test_False_with_msg(self, mock_has_legion_context: MagicMock) -> None: mock_has_legion_context.return_value = False with pytest.raises(RuntimeError) as e: m.check_legion(msg="custom") assert str(e) == "custom" +@pytest.mark.skip class Test_has_legion_context: def test_True(self) -> None: assert m.has_legion_context() is True @@ -62,113 +62,5 @@ def test_False(self) -> None: pass -@dataclass(frozen=True) -class _TestObj: - a: int = 10 - b: m.NotRequired[int] = m.Unset - c: m.NotRequired[str] = "foo" - d: m.NotRequired[str] = m.Unset - - -def test_entries() -> None: - assert set(m.entries(_TestObj())) == {("a", 10), ("c", "foo")} - - -class TestArgSpec: - def test_dest_required(self): - with pytest.raises(TypeError) as e: - m.ArgSpec() - assert ( - str(e.value) - == "__init__() missing 1 required positional argument: 'dest'" - ) - - def test_default(self): - spec = m.ArgSpec("dest") - assert spec.dest == "dest" - assert spec.action == "store_true" - - # all others are unset - assert set(m.entries(spec)) == { - ("dest", "dest"), - ("action", "store_true"), - } - - -class Test_parse_command_args: - @pytest.mark.parametrize("name", ("1foo", "a.b", "a/b", "a[", "a(")) - def test_bad_libname(self, name): - with pytest.raises(ValueError): - m.parse_command_args(name, []) - - def test_default_help(self, monkeypatch, capsys): - monkeypatch.setattr("sys.argv", ["app", "-foo:help"]) - with pytest.raises(SystemExit) as e: - m.parse_command_args("foo", []) - assert e.value.code is None - out, err = capsys.readouterr() - assert out.startswith("usage: ") - - def test_default_help_precedence(self, monkeypatch, capsys): - monkeypatch.setattr("sys.argv", ["app", "-foo:help", "-foo:bar"]) - args = [m.Argument("bar", m.ArgSpec(dest="help"))] - with pytest.raises(SystemExit) as e: - m.parse_command_args("foo", args) - assert e.value.code is None - out, err = capsys.readouterr() - assert out.startswith("usage: ") - - def test_help_override(self, monkeypatch, capsys): - monkeypatch.setattr("sys.argv", ["app", "-foo:help"]) - args = [m.Argument("help", m.ArgSpec(dest="help"))] - ns = m.parse_command_args("foo", args) - out, err = capsys.readouterr() - assert out == "" - assert vars(ns) == {"help": True} - assert sys.argv == ["app"] - - def test_basic(self, monkeypatch, capsys): - monkeypatch.setattr("sys.argv", ["app", "-foo:bar", "-foo:quux", "1"]) - args = [ - m.Argument("bar", m.ArgSpec(dest="bar")), - m.Argument( - "quux", m.ArgSpec(dest="quux", action="store", type=int) - ), - ] - ns = m.parse_command_args("foo", args) - out, err = capsys.readouterr() - assert out == "" - assert vars(ns) == {"bar": True, "quux": 1} - assert sys.argv == ["app"] - - def test_extra_args_passed_on(self, monkeypatch, capsys): - monkeypatch.setattr("sys.argv", ["app", "-foo:bar", "--extra", "1"]) - args = [m.Argument("bar", m.ArgSpec(dest="bar"))] - ns = m.parse_command_args("foo", args) - out, err = capsys.readouterr() - assert out == "" - assert vars(ns) == {"bar": True} - assert sys.argv == ["app", "--extra", "1"] - - def test_unrecognized_libname_arg(self, monkeypatch, capsys): - monkeypatch.setattr("sys.argv", ["app", "-foo:bar", "-foo:baz"]) - with pytest.warns(UserWarning) as record: - ns = m.parse_command_args("foo", []) - out, err = capsys.readouterr() - assert out == "" - assert vars(ns) == {} - assert sys.argv == ["app", "-foo:bar", "-foo:baz"] - - # issues one warning for the first encountered - assert len(record) == 1 - assert ( - record[0].message.args[0] - == "Unrecognized argument '-foo:bar' for foo (passed on as-is)" - ) - assert out == "" - assert vars(ns) == {} - assert sys.argv == ["app", "-foo:bar", "-foo:baz"] - - if __name__ == "__main__": sys.exit(pytest.main(sys.argv)) diff --git a/tests/unit/legate/tester/__init__.py b/tests/unit/legate/tester/__init__.py new file mode 100644 index 000000000..f0b271624 --- /dev/null +++ b/tests/unit/legate/tester/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations diff --git a/tests/unit/legate/tester/stages/__init__.py b/tests/unit/legate/tester/stages/__init__.py new file mode 100644 index 000000000..a955e39e0 --- /dev/null +++ b/tests/unit/legate/tester/stages/__init__.py @@ -0,0 +1,38 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import Any + +from legate.tester.test_system import TestSystem +from legate.util.types import CPUInfo, GPUInfo + + +class FakeSystem(TestSystem): + def __init__( + self, cpus: int = 6, gpus: int = 6, fbmem: int = 6 << 32, **kwargs: Any + ) -> None: + self._cpus = cpus + self._gpus = gpus + self._fbmem = fbmem + super().__init__(**kwargs) + + @property + def cpus(self) -> tuple[CPUInfo, ...]: + return tuple(CPUInfo((i,)) for i in range(self._cpus)) + + @property + def gpus(self) -> tuple[GPUInfo, ...]: + return tuple(GPUInfo(i, self._fbmem) for i in range(self._gpus)) diff --git a/tests/unit/legate/tester/stages/_linux/__init__.py b/tests/unit/legate/tester/stages/_linux/__init__.py new file mode 100644 index 000000000..345983919 --- /dev/null +++ b/tests/unit/legate/tester/stages/_linux/__init__.py @@ -0,0 +1,22 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import sys + +import pytest + +if sys.platform != "linux": + pytestmark = pytest.mark.skip() diff --git a/tests/unit/legate/tester/stages/_linux/test_cpu.py b/tests/unit/legate/tester/stages/_linux/test_cpu.py new file mode 100644 index 000000000..24a4eef3d --- /dev/null +++ b/tests/unit/legate/tester/stages/_linux/test_cpu.py @@ -0,0 +1,132 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Consolidate test configuration from command-line and environment. + +""" +from __future__ import annotations + +import pytest + +from legate.tester.config import Config +from legate.tester.stages._linux import cpu as m +from legate.tester.stages.util import UNPIN_ENV + +from .. import FakeSystem + + +def test_default() -> None: + c = Config([]) + s = FakeSystem(cpus=12) + stage = m.CPU(c, s) + assert stage.kind == "cpus" + assert stage.args == ["-cunumeric:test"] + assert stage.env(c, s) == UNPIN_ENV + assert stage.spec.workers > 0 + + shard = (1, 2, 3) + assert "--cpu-bind" in stage.shard_args(shard, c) + + +def test_cpu_pin_strict() -> None: + c = Config(["test.py", "--cpu-pin", "strict"]) + s = FakeSystem(cpus=12) + stage = m.CPU(c, s) + assert stage.kind == "cpus" + assert stage.args == ["-cunumeric:test"] + assert stage.env(c, s) == {} + assert stage.spec.workers > 0 + + shard = (1, 2, 3) + assert "--cpu-bind" in stage.shard_args(shard, c) + + +def test_cpu_pin_none() -> None: + c = Config(["test.py", "--cpu-pin", "none"]) + s = FakeSystem(cpus=12) + stage = m.CPU(c, s) + assert stage.kind == "cpus" + assert stage.args == ["-cunumeric:test"] + assert stage.env(c, s) == UNPIN_ENV + assert stage.spec.workers > 0 + + shard = (1, 2, 3) + assert "--cpu-bind" not in stage.shard_args(shard, c) + + +@pytest.mark.parametrize("shard,expected", [[(2,), "2"], [(1, 2, 3), "1,2,3"]]) +def test_shard_args(shard: tuple[int, ...], expected: str) -> None: + c = Config([]) + s = FakeSystem() + stage = m.CPU(c, s) + result = stage.shard_args(shard, c) + assert result == ["--cpus", f"{c.cpus}", "--cpu-bind", expected] + + +def test_spec_with_cpus_1() -> None: + c = Config(["test.py", "--cpus", "1"]) + s = FakeSystem() + stage = m.CPU(c, s) + assert stage.spec.workers == 3 + assert stage.spec.shards == [(0, 1), (2, 3), (4, 5)] + + +def test_spec_with_cpus_2() -> None: + c = Config(["test.py", "--cpus", "2"]) + s = FakeSystem() + stage = m.CPU(c, s) + assert stage.spec.workers == 2 + assert stage.spec.shards == [(0, 1, 2), (3, 4, 5)] + + +def test_spec_with_utility() -> None: + c = Config(["test.py", "--cpus", "1", "--utility", "2"]) + s = FakeSystem() + stage = m.CPU(c, s) + assert stage.spec.workers == 2 + assert stage.spec.shards == [(0, 1, 2), (3, 4, 5)] + + +def test_spec_with_requested_workers() -> None: + c = Config(["test.py", "--cpus", "1", "-j", "2"]) + s = FakeSystem() + stage = m.CPU(c, s) + assert stage.spec.workers == 2 + assert stage.spec.shards == [(0, 1), (2, 3)] + + +def test_spec_with_requested_workers_zero() -> None: + s = FakeSystem() + c = Config(["test.py", "-j", "0"]) + assert c.requested_workers == 0 + with pytest.raises(RuntimeError): + m.CPU(c, s) + + +def test_spec_with_requested_workers_bad() -> None: + s = FakeSystem() + c = Config(["test.py", "-j", f"{len(s.cpus)+1}"]) + assert c.requested_workers > len(s.cpus) + with pytest.raises(RuntimeError): + m.CPU(c, s) + + +def test_spec_with_verbose() -> None: + args = ["test.py", "--cpus", "2"] + c = Config(args) + cv = Config(args + ["--verbose"]) + s = FakeSystem() + + spec, vspec = m.CPU(c, s).spec, m.CPU(cv, s).spec + assert vspec == spec diff --git a/tests/unit/legate/tester/stages/_linux/test_eager.py b/tests/unit/legate/tester/stages/_linux/test_eager.py new file mode 100644 index 000000000..eb8c48629 --- /dev/null +++ b/tests/unit/legate/tester/stages/_linux/test_eager.py @@ -0,0 +1,82 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Consolidate test configuration from command-line and environment. + +""" +from __future__ import annotations + +import pytest + +from legate.tester.config import Config +from legate.tester.stages._linux import eager as m + +from .. import FakeSystem + + +def test_default() -> None: + c = Config([]) + s = FakeSystem() + stage = m.Eager(c, s) + assert stage.kind == "eager" + assert stage.args == [] + assert stage.env(c, s) == { + "CUNUMERIC_MIN_CPU_CHUNK": "2000000000", + "CUNUMERIC_MIN_OMP_CHUNK": "2000000000", + "CUNUMERIC_MIN_GPU_CHUNK": "2000000000", + } + assert stage.spec.workers > 0 + + +@pytest.mark.parametrize("shard,expected", [[(2,), "2"], [(1, 2, 3), "1,2,3"]]) +def test_shard_args(shard: tuple[int, ...], expected: str) -> None: + c = Config([]) + s = FakeSystem() + stage = m.Eager(c, s) + result = stage.shard_args(shard, c) + assert result == ["--cpus", "1", "--cpu-bind", expected] + + +def test_spec() -> None: + c = Config([]) + s = FakeSystem() + stage = m.Eager(c, s) + assert stage.spec.workers == len(s.cpus) + # [cpu.ids for cpu in system.cpus] + assert stage.spec.shards == [(i,) for i in range(stage.spec.workers)] + + +def test_spec_with_requested_workers_zero() -> None: + s = FakeSystem() + c = Config(["test.py", "-j", "0"]) + assert c.requested_workers == 0 + with pytest.raises(RuntimeError): + m.Eager(c, s) + + +def test_spec_with_requested_workers_bad() -> None: + s = FakeSystem() + c = Config(["test.py", "-j", f"{len(s.cpus)+1}"]) + assert c.requested_workers > len(s.cpus) + with pytest.raises(RuntimeError): + m.Eager(c, s) + + +def test_spec_with_verbose() -> None: + c = Config(["test.py"]) + cv = Config(["test.py", "--verbose"]) + s = FakeSystem() + + spec, vspec = m.Eager(c, s).spec, m.Eager(cv, s).spec + assert vspec == spec diff --git a/tests/unit/legate/tester/stages/_linux/test_gpu.py b/tests/unit/legate/tester/stages/_linux/test_gpu.py new file mode 100644 index 000000000..df1441c65 --- /dev/null +++ b/tests/unit/legate/tester/stages/_linux/test_gpu.py @@ -0,0 +1,101 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Consolidate test configuration from command-line and environment. + +""" +from __future__ import annotations + +import pytest + +from legate.tester.config import Config +from legate.tester.stages._linux import gpu as m + +from .. import FakeSystem + + +def test_default() -> None: + c = Config([]) + s = FakeSystem() + stage = m.GPU(c, s) + assert stage.kind == "cuda" + assert stage.args == ["-cunumeric:test"] + assert stage.env(c, s) == {} + assert stage.spec.workers > 0 + + +@pytest.mark.parametrize("shard,expected", [[(2,), "2"], [(1, 2, 3), "1,2,3"]]) +def test_shard_args(shard: tuple[int, ...], expected: str) -> None: + c = Config([]) + s = FakeSystem() + stage = m.GPU(c, s) + result = stage.shard_args(shard, c) + assert result == [ + "--fbmem", + "4096", + "--gpus", + f"{len(shard)}", + "--gpu-bind", + expected, + ] + + +def test_spec_with_gpus_1() -> None: + c = Config(["test.py", "--gpus", "1"]) + s = FakeSystem() + stage = m.GPU(c, s) + assert stage.spec.workers == 12 + assert stage.spec.shards == [(0,), (1,), (2,), (3,), (4,), (5,)] * 12 + + +def test_spec_with_gpus_2() -> None: + c = Config(["test.py", "--gpus", "2"]) + s = FakeSystem() + stage = m.GPU(c, s) + assert stage.spec.workers == 6 + assert stage.spec.shards == [(0, 1), (2, 3), (4, 5)] * 6 + + +def test_spec_with_requested_workers() -> None: + c = Config(["test.py", "--gpus", "1", "-j", "2"]) + s = FakeSystem() + stage = m.GPU(c, s) + assert stage.spec.workers == 2 + assert stage.spec.shards == [(0,), (1,), (2,), (3,), (4,), (5,)] * 2 + + +def test_spec_with_requested_workers_zero() -> None: + s = FakeSystem() + c = Config(["test.py", "-j", "0"]) + assert c.requested_workers == 0 + with pytest.raises(RuntimeError): + m.GPU(c, s) + + +def test_spec_with_requested_workers_bad() -> None: + s = FakeSystem() + c = Config(["test.py", "-j", f"{len(s.gpus)+100}"]) + assert c.requested_workers > len(s.gpus) + with pytest.raises(RuntimeError): + m.GPU(c, s) + + +def test_spec_with_verbose() -> None: + args = ["test.py", "--gpus", "2"] + c = Config(args) + cv = Config(args + ["--verbose"]) + s = FakeSystem() + + spec, vspec = m.GPU(c, s).spec, m.GPU(cv, s).spec + assert vspec == spec diff --git a/tests/unit/legate/tester/stages/_linux/test_omp.py b/tests/unit/legate/tester/stages/_linux/test_omp.py new file mode 100644 index 000000000..a4d319fc0 --- /dev/null +++ b/tests/unit/legate/tester/stages/_linux/test_omp.py @@ -0,0 +1,164 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Consolidate test configuration from command-line and environment. + +""" +from __future__ import annotations + +import pytest + +from legate.tester.config import Config +from legate.tester.stages._linux import omp as m +from legate.tester.stages.util import UNPIN_ENV + +from .. import FakeSystem + + +def test_default() -> None: + c = Config([]) + s = FakeSystem(cpus=12) + stage = m.OMP(c, s) + assert stage.kind == "openmp" + assert stage.args == ["-cunumeric:test"] + assert stage.env(c, s) == UNPIN_ENV + assert stage.spec.workers > 0 + + shard = (1, 2, 3) + assert "--cpu-bind" in stage.shard_args(shard, c) + + +def test_cpu_pin_strict() -> None: + c = Config(["test.py", "--cpu-pin", "strict"]) + s = FakeSystem(cpus=12) + stage = m.OMP(c, s) + assert stage.kind == "openmp" + assert stage.args == ["-cunumeric:test"] + assert stage.env(c, s) == {} + assert stage.spec.workers > 0 + + shard = (1, 2, 3) + assert "--cpu-bind" in stage.shard_args(shard, c) + + +def test_cpu_pin_none() -> None: + c = Config(["test.py", "--cpu-pin", "none"]) + s = FakeSystem(cpus=12) + stage = m.OMP(c, s) + assert stage.kind == "openmp" + assert stage.args == ["-cunumeric:test"] + assert stage.env(c, s) == UNPIN_ENV + assert stage.spec.workers > 0 + + shard = (1, 2, 3) + assert "--cpu-bind" not in stage.shard_args(shard, c) + + +@pytest.mark.parametrize("shard,expected", [[(2,), "2"], [(1, 2, 3), "1,2,3"]]) +def test_shard_args(shard: tuple[int, ...], expected: str) -> None: + c = Config([]) + s = FakeSystem(cpus=12) + stage = m.OMP(c, s) + result = stage.shard_args(shard, c) + assert result == [ + "--omps", + f"{c.omps}", + "--ompthreads", + f"{c.ompthreads}", + "--cpu-bind", + expected, + ] + + +def test_spec_with_omps_1_threads_1() -> None: + c = Config(["test.py", "--omps", "1", "--ompthreads", "1"]) + s = FakeSystem(cpus=12) + stage = m.OMP(c, s) + assert stage.spec.workers == 6 + assert stage.spec.shards == [ + (0, 1), + (2, 3), + (4, 5), + (6, 7), + (8, 9), + (10, 11), + ] + + +def test_spec_with_omps_1_threads_2() -> None: + c = Config(["test.py", "--omps", "1", "--ompthreads", "2"]) + s = FakeSystem(cpus=12) + stage = m.OMP(c, s) + assert stage.spec.workers == 4 + assert stage.spec.shards == [(0, 1, 2), (3, 4, 5), (6, 7, 8), (9, 10, 11)] + + +def test_spec_with_omps_2_threads_1() -> None: + c = Config(["test.py", "--omps", "2", "--ompthreads", "1"]) + s = FakeSystem(cpus=12) + stage = m.OMP(c, s) + assert stage.spec.workers == 4 + assert stage.spec.shards == [(0, 1, 2), (3, 4, 5), (6, 7, 8), (9, 10, 11)] + + +def test_spec_with_omps_2_threads_2() -> None: + c = Config(["test.py", "--omps", "2", "--ompthreads", "2"]) + s = FakeSystem(cpus=12) + stage = m.OMP(c, s) + assert stage.spec.workers == 2 + assert stage.spec.shards == [(0, 1, 2, 3, 4), (5, 6, 7, 8, 9)] + + +def test_spec_with_utility() -> None: + c = Config( + ["test.py", "--omps", "2", "--ompthreads", "2", "--utility", "3"] + ) + s = FakeSystem(cpus=12) + stage = m.OMP(c, s) + assert stage.spec.workers == 1 + assert stage.spec.shards == [(0, 1, 2, 3, 4, 5, 6)] + + +def test_spec_with_requested_workers() -> None: + c = Config(["test.py", "--omps", "1", "--ompthreads", "1", "-j", "2"]) + s = FakeSystem(cpus=12) + stage = m.OMP(c, s) + assert stage.spec.workers == 2 + assert stage.spec.shards == [(0, 1), (2, 3)] + + +def test_spec_with_requested_workers_zero() -> None: + s = FakeSystem(cpus=12) + c = Config(["test.py", "-j", "0"]) + assert c.requested_workers == 0 + with pytest.raises(RuntimeError): + m.OMP(c, s) + + +def test_spec_with_requested_workers_bad() -> None: + s = FakeSystem(cpus=12) + c = Config(["test.py", "-j", f"{len(s.cpus)+1}"]) + assert c.requested_workers > len(s.cpus) + with pytest.raises(RuntimeError): + m.OMP(c, s) + + +def test_spec_with_verbose() -> None: + args = ["test.py", "--cpus", "2"] + c = Config(args) + cv = Config(args + ["--verbose"]) + s = FakeSystem(cpus=12) + + spec, vspec = m.OMP(c, s).spec, m.OMP(cv, s).spec + assert vspec == spec diff --git a/tests/unit/legate/tester/stages/test_test_stage.py b/tests/unit/legate/tester/stages/test_test_stage.py new file mode 100644 index 000000000..90edfaed4 --- /dev/null +++ b/tests/unit/legate/tester/stages/test_test_stage.py @@ -0,0 +1,88 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Consolidate test configuration from command-line and environment. + +""" +from __future__ import annotations + +from datetime import timedelta +from pathlib import Path + +from legate.tester import FeatureType +from legate.tester.config import Config +from legate.tester.stages import test_stage as m +from legate.tester.stages.util import StageResult, StageSpec +from legate.tester.test_system import ProcessResult, TestSystem as _TestSystem + +from . import FakeSystem + +s = FakeSystem() + + +class MockTestStage(m.TestStage): + + kind: FeatureType = "eager" + + name = "mock" + + args = ["-foo", "-bar"] + + def __init__(self, config: Config, system: _TestSystem) -> None: + self._init(config, system) + + def compute_spec(self, config: Config, system: _TestSystem) -> StageSpec: + return StageSpec(2, [(0,), (1,), (2,)]) + + +class TestTestStage: + def test_name(self) -> None: + c = Config([]) + stage = MockTestStage(c, s) + assert stage.name == "mock" + + def test_intro(self) -> None: + c = Config([]) + stage = MockTestStage(c, s) + assert "Entering stage: mock" in stage.intro + + def test_outro(self) -> None: + c = Config([]) + stage = MockTestStage(c, s) + stage.result = StageResult( + [ProcessResult("invoke", Path("test/file"))], + timedelta(seconds=2.123), + ) + outro = stage.outro + assert "Exiting stage: mock" in outro + assert "Passed 1 of 1 tests (100.0%)" in outro + assert "2.123" in outro + + def test_file_args_default(self) -> None: + c = Config([]) + stage = MockTestStage(c, s) + assert stage.file_args(Path("integration/foo"), c) == [] + assert stage.file_args(Path("unit/foo"), c) == [] + + def test_file_args_v(self) -> None: + c = Config(["test.py", "-v"]) + stage = MockTestStage(c, s) + assert stage.file_args(Path("integration/foo"), c) == ["-v"] + assert stage.file_args(Path("unit/foo"), c) == [] + + def test_file_args_vv(self) -> None: + c = Config(["test.py", "-vv"]) + stage = MockTestStage(c, s) + assert stage.file_args(Path("integration/foo"), c) == ["-v", "-s"] + assert stage.file_args(Path("unit/foo"), c) == [] diff --git a/tests/unit/legate/tester/stages/test_util.py b/tests/unit/legate/tester/stages/test_util.py new file mode 100644 index 000000000..b4c528d06 --- /dev/null +++ b/tests/unit/legate/tester/stages/test_util.py @@ -0,0 +1,48 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Consolidate test configuration from command-line and environment. + +""" +from __future__ import annotations + +import pytest + +from legate.tester.stages import util as m + + +class Test_adjust_workers: + @pytest.mark.parametrize("n", (1, 5, 100)) + def test_None_requested(self, n: int) -> None: + assert m.adjust_workers(n, None) == n + + @pytest.mark.parametrize("n", (1, 2, 9)) + def test_requested(self, n: int) -> None: + assert m.adjust_workers(10, n) == n + + def test_negative_requested(self) -> None: + with pytest.raises(ValueError): + assert m.adjust_workers(10, -1) + + def test_zero_requested(self) -> None: + with pytest.raises(RuntimeError): + assert m.adjust_workers(10, 0) + + def test_zero_computed(self) -> None: + with pytest.raises(RuntimeError): + assert m.adjust_workers(0, None) + + def test_requested_too_large(self) -> None: + with pytest.raises(RuntimeError): + assert m.adjust_workers(10, 11) diff --git a/tests/unit/legate/tester/test___init__.py b/tests/unit/legate/tester/test___init__.py new file mode 100644 index 000000000..6431469ff --- /dev/null +++ b/tests/unit/legate/tester/test___init__.py @@ -0,0 +1,69 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Consolidate test configuration from command-line and environment. + +""" +from __future__ import annotations + +from legate.tester import ( + DEFAULT_CPUS_PER_NODE, + DEFAULT_GPU_DELAY, + DEFAULT_GPU_MEMORY_BUDGET, + DEFAULT_GPUS_PER_NODE, + DEFAULT_OMPS_PER_NODE, + DEFAULT_OMPTHREADS, + DEFAULT_PROCESS_ENV, + FEATURES, + PER_FILE_ARGS, + SKIPPED_EXAMPLES, +) + + +class TestConsts: + def test_DEFAULT_CPUS_PER_NODE(self) -> None: + assert DEFAULT_CPUS_PER_NODE == 4 + + def test_DEFAULT_GPUS_PER_NODE(self) -> None: + assert DEFAULT_GPUS_PER_NODE == 1 + + def test_DEFAULT_GPU_DELAY(self) -> None: + assert DEFAULT_GPU_DELAY == 2000 + + def test_DEFAULT_GPU_MEMORY_BUDGET(self) -> None: + assert DEFAULT_GPU_MEMORY_BUDGET == 4096 + + def test_DEFAULT_OMPS_PER_NODE(self) -> None: + assert DEFAULT_OMPS_PER_NODE == 1 + + def test_DEFAULT_OMPTHREADS(self) -> None: + assert DEFAULT_OMPTHREADS == 4 + + def test_DEFAULT_PROCESS_ENV(self) -> None: + assert DEFAULT_PROCESS_ENV == { + "LEGATE_TEST": "1", + } + + def test_FEATURES(self) -> None: + assert FEATURES == ("cpus", "cuda", "eager", "openmp") + + def test_SKIPPED_EXAMPLES(self) -> None: + assert isinstance(SKIPPED_EXAMPLES, set) + assert all(isinstance(x, str) for x in SKIPPED_EXAMPLES) + assert all(x.startswith("examples") for x in SKIPPED_EXAMPLES) + + def test_PER_FILE_ARGS(self) -> None: + assert isinstance(PER_FILE_ARGS, dict) + assert all(isinstance(x, str) for x in PER_FILE_ARGS.keys()) + assert all(isinstance(x, list) for x in PER_FILE_ARGS.values()) diff --git a/tests/unit/legate/tester/test_args.py b/tests/unit/legate/tester/test_args.py new file mode 100644 index 000000000..c307a7080 --- /dev/null +++ b/tests/unit/legate/tester/test_args.py @@ -0,0 +1,89 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Consolidate test configuration from command-line and environment. + +""" +from __future__ import annotations + +from legate.tester import ( + DEFAULT_CPUS_PER_NODE, + DEFAULT_GPU_DELAY, + DEFAULT_GPU_MEMORY_BUDGET, + DEFAULT_GPUS_PER_NODE, + DEFAULT_OMPS_PER_NODE, + DEFAULT_OMPTHREADS, + args as m, +) + + +class TestParserDefaults: + def test_featurs(self) -> None: + assert m.parser.get_default("features") is None + + def test_files(self) -> None: + assert m.parser.get_default("files") is None + + def test_unit(self) -> None: + assert m.parser.get_default("unit") is False + + def test_cpus(self) -> None: + assert m.parser.get_default("cpus") == DEFAULT_CPUS_PER_NODE + + def test_gpus(self) -> None: + assert m.parser.get_default("gpus") == DEFAULT_GPUS_PER_NODE + + def test_cpu_pin(self) -> None: + assert m.parser.get_default("cpu_pin") == "partial" + + def test_gpu_delay(self) -> None: + assert m.parser.get_default("gpu_delay") == DEFAULT_GPU_DELAY + + def test_fbmem(self) -> None: + assert m.parser.get_default("fbmem") == DEFAULT_GPU_MEMORY_BUDGET + + def test_omps(self) -> None: + assert m.parser.get_default("omps") == DEFAULT_OMPS_PER_NODE + + def test_ompthreads(self) -> None: + assert m.parser.get_default("ompthreads") == DEFAULT_OMPTHREADS + + def test_legate_dir(self) -> None: + assert m.parser.get_default("legate_dir") is None + + def test_test_root(self) -> None: + assert m.parser.get_default("test_root") is None + + def test_workers(self) -> None: + assert m.parser.get_default("workers") is None + + def test_verbose(self) -> None: + assert m.parser.get_default("verbose") == 0 + + def test_dry_run(self) -> None: + assert m.parser.get_default("dry_run") is False + + def test_debug(self) -> None: + assert m.parser.get_default("debug") is False + + +class TestParserConfig: + def test_parser_epilog(self) -> None: + assert ( + m.parser.epilog + == "Any extra arguments will be forwarded to the Legate script" + ) + + def test_parser_description(self) -> None: + assert m.parser.description == "Run the Cunumeric test suite" diff --git a/tests/unit/legate/tester/test_config.py b/tests/unit/legate/tester/test_config.py new file mode 100644 index 000000000..d55104980 --- /dev/null +++ b/tests/unit/legate/tester/test_config.py @@ -0,0 +1,182 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Consolidate test configuration from command-line and environment. + +""" +from __future__ import annotations + +from pathlib import Path, PurePath + +import pytest + +from legate.tester import ( + DEFAULT_CPUS_PER_NODE, + DEFAULT_GPU_DELAY, + DEFAULT_GPU_MEMORY_BUDGET, + DEFAULT_GPUS_PER_NODE, + DEFAULT_OMPS_PER_NODE, + DEFAULT_OMPTHREADS, + FEATURES, + config as m, +) +from legate.tester.args import PIN_OPTIONS, PinOptionsType + + +class TestConfig: + def test_default_init(self) -> None: + c = m.Config([]) + + assert c.examples is True + assert c.integration is True + assert c.unit is False + assert c.files is None + + assert c.features == ("cpus",) + + assert c.cpus == DEFAULT_CPUS_PER_NODE + assert c.gpus == DEFAULT_GPUS_PER_NODE + assert c.cpu_pin == "partial" + assert c.gpu_delay == DEFAULT_GPU_DELAY + assert c.fbmem == DEFAULT_GPU_MEMORY_BUDGET + assert c.omps == DEFAULT_OMPS_PER_NODE + assert c.ompthreads == DEFAULT_OMPTHREADS + + assert c.debug is False + assert c.dry_run is False + assert c.verbose == 0 + assert c.test_root is None + assert c.requested_workers is None + assert c.legate_dir is None + + assert c.extra_args == [] + assert c.root_dir == PurePath(m.__file__).parents[2] + + # TODO (bv) restore when generalized + # assert len(c.test_files) > 0 + # assert any("examples" in str(x) for x in c.test_files) + # assert any("integration" in str(x) for x in c.test_files) + # assert all("unit" not in str(x) for x in c.test_files) + + assert c.legate_path == "legate" + + @pytest.mark.parametrize("feature", FEATURES) + def test_env_features( + self, monkeypatch: pytest.MonkeyPatch, feature: str + ) -> None: + monkeypatch.setenv(f"USE_{feature.upper()}", "1") + + # test default config + c = m.Config([]) + assert set(c.features) == {feature} + + # also test with a --use value provided + c = m.Config(["test.py", "--use", "cuda"]) + assert set(c.features) == {"cuda"} + + @pytest.mark.parametrize("feature", FEATURES) + def test_cmd_features(self, feature: str) -> None: + + # test a single value + c = m.Config(["test.py", "--use", feature]) + assert set(c.features) == {feature} + + # also test with multiple / duplication + c = m.Config(["test.py", "--use", f"cpus,{feature}"]) + assert set(c.features) == {"cpus", feature} + + # TODO (bv) restore when generalized + @pytest.mark.skip + def test_unit(self) -> None: + c = m.Config(["test.py", "--unit"]) + assert len(c.test_files) > 0 + assert any("examples" in str(x) for x in c.test_files) + assert any("integration" in str(x) for x in c.test_files) + assert any("unit" in str(x) for x in c.test_files) + + def test_files(self) -> None: + c = m.Config(["test.py", "--files", "a", "b", "c"]) + assert c.files == ["a", "b", "c"] + + @pytest.mark.parametrize( + "opt", ("cpus", "gpus", "gpu-delay", "fbmem", "omps", "ompthreads") + ) + def test_feature_options(self, opt: str) -> None: + c = m.Config(["test.py", f"--{opt}", "1234"]) + assert getattr(c, opt.replace("-", "_")) == 1234 + + @pytest.mark.parametrize("value", PIN_OPTIONS) + def test_cpu_pin(self, value: PinOptionsType) -> None: + c = m.Config(["test.py", "--cpu-pin", value]) + assert c.cpu_pin == value + + def test_workers(self) -> None: + c = m.Config(["test.py", "-j", "1234"]) + assert c.requested_workers == 1234 + + def test_debug(self) -> None: + c = m.Config(["test.py", "--debug"]) + assert c.debug is True + + def test_dry_run(self) -> None: + c = m.Config(["test.py", "--dry-run"]) + assert c.dry_run is True + + @pytest.mark.parametrize("arg", ("-v", "--verbose")) + def test_verbose1(self, arg: str) -> None: + c = m.Config(["test.py", arg]) + assert c.verbose == 1 + + def test_verbose2(self) -> None: + c = m.Config(["test.py", "-vv"]) + assert c.verbose == 2 + + @pytest.mark.parametrize("arg", ("-C", "--directory")) + def test_test_root(self, arg: str) -> None: + c = m.Config(["test.py", arg, "some/path"]) + assert c.test_root == "some/path" + + def test_legate_dir(self) -> None: + c = m.Config([]) + assert c.legate_dir is None + assert c.legate_path == "legate" + assert c._legate_source == "install" + + def test_cmd_legate_dir_good(self) -> None: + legate_dir = Path("/usr/local") + c = m.Config(["test.py", "--legate", str(legate_dir)]) + assert c.legate_dir == legate_dir + assert c.legate_path == str(legate_dir / "bin" / "legate") + assert c._legate_source == "cmd" + + def test_env_legate_dir_good( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + legate_dir = Path("/usr/local") + monkeypatch.setenv("LEGATE_DIR", str(legate_dir)) + c = m.Config([]) + assert c.legate_dir == legate_dir + assert c.legate_path == str(legate_dir / "bin" / "legate") + assert c._legate_source == "env" + + def test_extra_args(self) -> None: + extra = ["-foo", "--bar", "--baz", "10"] + c = m.Config(["test.py"] + extra) + assert c.extra_args == extra + + # also test with --files since that option collects arguments + c = m.Config(["test.py", "--files", "a", "b"] + extra) + assert c.extra_args == extra + c = m.Config(["test.py"] + extra + ["--files", "a", "b"]) + assert c.extra_args == extra diff --git a/tests/unit/legate/tester/test_logger.py b/tests/unit/legate/tester/test_logger.py new file mode 100644 index 000000000..40228c2f4 --- /dev/null +++ b/tests/unit/legate/tester/test_logger.py @@ -0,0 +1,74 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Consolidate test configuration from command-line and environment. + +""" +from __future__ import annotations + +from legate.tester import logger as m + +TEST_LINES = ( + "line 1", + "\x1b[31mfoo\x1b[0m", # ui.red("foo") + "bar", + "last line", +) + + +class TestLogger: + def test_init(self) -> None: + log = m.Log() + assert log.lines == () + assert log.dump() == "" + + def test_record_lines(self) -> None: + log = m.Log() + log.record(*TEST_LINES) + assert log.lines == TEST_LINES + assert log.dump(filter_ansi=False) == "\n".join(TEST_LINES) + + def test_record_line_with_newlines(self) -> None: + log = m.Log() + log.record("\n".join(TEST_LINES)) + assert log.lines == TEST_LINES + assert log.dump(filter_ansi=False) == "\n".join(TEST_LINES) + + def test_call(self) -> None: + log = m.Log() + log(*TEST_LINES) + assert log.lines == TEST_LINES + assert log.dump() == "line 1\nfoo\nbar\nlast line" + + def test_dump_filter(self) -> None: + log = m.Log() + log.record(*TEST_LINES) + assert log.lines == TEST_LINES + assert log.dump() == "line 1\nfoo\nbar\nlast line" + + def test_dump_index(self) -> None: + log = m.Log() + log.record(*TEST_LINES) + assert log.dump(start=1, end=3) == "foo\nbar" + + def test_clear(self) -> None: + log = m.Log() + log.record(*TEST_LINES) + assert len(log.lines) > 0 + log.clear() + assert len(log.lines) == 0 + + +def test_LOG() -> None: + assert isinstance(m.LOG, m.Log) diff --git a/tests/unit/legate/tester/test_test_system.py b/tests/unit/legate/tester/test_test_system.py new file mode 100644 index 000000000..268a6a32f --- /dev/null +++ b/tests/unit/legate/tester/test_test_system.py @@ -0,0 +1,65 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Consolidate test configuration from command-line and environment. + +""" +from __future__ import annotations + +from pathlib import Path +from subprocess import CompletedProcess +from unittest.mock import MagicMock + +import pytest +from pytest_mock import MockerFixture + +from legate.tester import test_system as m + + +@pytest.fixture +def mock_subprocess_run(mocker: MockerFixture) -> MagicMock: + return mocker.patch.object(m, "stdlib_run") + + +CMD = "legate script.py --cpus 4" + + +class TestSystem: + def test_init(self) -> None: + s = m.TestSystem() + assert s.dry_run is False + + def test_run(self, mock_subprocess_run: MagicMock) -> None: + s = m.TestSystem() + + expected = m.ProcessResult( + CMD, Path("test/file"), returncode=10, output="" + ) + mock_subprocess_run.return_value = CompletedProcess( + CMD, 10, stdout="" + ) + + result = s.run(CMD.split(), Path("test/file")) + mock_subprocess_run.assert_called() + + assert result == expected + + def test_dry_run(self, mock_subprocess_run: MagicMock) -> None: + s = m.TestSystem(dry_run=True) + + result = s.run(CMD.split(), Path("test/file")) + mock_subprocess_run.assert_not_called() + + assert result.output == "" + assert result.skipped diff --git a/tests/unit/legate/util/__init__.py b/tests/unit/legate/util/__init__.py new file mode 100644 index 000000000..f0b271624 --- /dev/null +++ b/tests/unit/legate/util/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations diff --git a/tests/unit/legate/driver/sample_cmake_cache.txt b/tests/unit/legate/util/sample_cmake_cache.txt similarity index 100% rename from tests/unit/legate/driver/sample_cmake_cache.txt rename to tests/unit/legate/util/sample_cmake_cache.txt diff --git a/tests/unit/legate/driver/sample_header.h b/tests/unit/legate/util/sample_header.h similarity index 100% rename from tests/unit/legate/driver/sample_header.h rename to tests/unit/legate/util/sample_header.h diff --git a/tests/unit/legate/util/test_args.py b/tests/unit/legate/util/test_args.py new file mode 100644 index 000000000..02d01a58c --- /dev/null +++ b/tests/unit/legate/util/test_args.py @@ -0,0 +1,187 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys +from argparse import ArgumentParser +from dataclasses import dataclass +from typing import Iterable, TypeVar + +import pytest + +import legate.util.args as m + +from ...util import Capsys, powerset + +T = TypeVar("T") + + +class TestMultipleChoices: + @pytest.mark.parametrize("choices", ([1, 2, 3], range(4), ("a", "b"))) + def test_init(self, choices: Iterable[T]) -> None: + mc = m.MultipleChoices(choices) + assert mc._choices == set(choices) + + def test_contains_item(self) -> None: + choices = [1, 2, 3] + mc = m.MultipleChoices(choices) + for item in choices: + assert item in mc + + def test_contains_subset(self) -> None: + choices = [1, 2, 3] + mc = m.MultipleChoices(choices) + for subset in powerset(choices): + assert subset in mc + + def test_iter(self) -> None: + choices = [1, 2, 3] + mc = m.MultipleChoices(choices) + assert list(mc) == choices + + +class TestExtendAction: + parser = ArgumentParser() + parser.add_argument( + "--foo", dest="foo", action=m.ExtendAction, choices=("a", "b", "c") + ) + + def test_single(self) -> None: + ns = self.parser.parse_args(["--foo", "a"]) + assert ns.foo == ["a"] + + def test_multi(self) -> None: + ns = self.parser.parse_args(["--foo", "a", "--foo", "b"]) + assert sorted(ns.foo) == ["a", "b"] + + def test_repeat(self) -> None: + ns = self.parser.parse_args(["--foo", "a", "--foo", "a"]) + assert ns.foo == ["a"] + + +@dataclass(frozen=True) +class _TestObj: + a: int = 10 + b: m.NotRequired[int] = m.Unset + c: m.NotRequired[str] = "foo" + d: m.NotRequired[str] = m.Unset + + +class TestArgSpec: + def test_default(self) -> None: + spec = m.ArgSpec("dest") + assert spec.dest == "dest" + assert spec.action == "store_true" + + # all others are unset + assert set(m.entries(spec)) == { + ("dest", "dest"), + ("action", "store_true"), + } + + +def test_entries() -> None: + assert set(m.entries(_TestObj())) == {("a", 10), ("c", "foo")} + + +class Test_parse_library_command_args: + @pytest.mark.parametrize("name", ("1foo", "a.b", "a/b", "a[", "a(")) + def test_bad_libname(self, name: str) -> None: + with pytest.raises(ValueError): + m.parse_library_command_args(name, []) + + def test_default_help( + self, monkeypatch: pytest.MonkeyPatch, capsys: Capsys + ) -> None: + monkeypatch.setattr("sys.argv", ["app", "-foo:help"]) + with pytest.raises(SystemExit) as e: + m.parse_library_command_args("foo", []) + assert e.value.code is None + out, err = capsys.readouterr() # type: ignore[unreachable] + assert out.startswith("usage: ") + + def test_default_help_precedence( + self, monkeypatch: pytest.MonkeyPatch, capsys: Capsys + ) -> None: + monkeypatch.setattr("sys.argv", ["app", "-foo:help", "-foo:bar"]) + args = [m.Argument("bar", m.ArgSpec(dest="help"))] + with pytest.raises(SystemExit) as e: + m.parse_library_command_args("foo", args) + assert e.value.code is None + out, err = capsys.readouterr() # type: ignore[unreachable] + assert out.startswith("usage: ") + + def test_help_override( + self, monkeypatch: pytest.MonkeyPatch, capsys: Capsys + ) -> None: + monkeypatch.setattr("sys.argv", ["app", "-foo:help"]) + args = [m.Argument("help", m.ArgSpec(dest="help"))] + ns = m.parse_library_command_args("foo", args) + out, err = capsys.readouterr() + assert out == "" + assert vars(ns) == {"help": True} + assert sys.argv == ["app"] + + def test_basic( + self, monkeypatch: pytest.MonkeyPatch, capsys: Capsys + ) -> None: + monkeypatch.setattr("sys.argv", ["app", "-foo:bar", "-foo:quux", "1"]) + args = [ + m.Argument("bar", m.ArgSpec(dest="bar")), + m.Argument( + "quux", m.ArgSpec(dest="quux", action="store", type=int) + ), + ] + ns = m.parse_library_command_args("foo", args) + out, err = capsys.readouterr() + assert out == "" + assert vars(ns) == {"bar": True, "quux": 1} + assert sys.argv == ["app"] + + def test_extra_args_passed_on( + self, monkeypatch: pytest.MonkeyPatch, capsys: Capsys + ) -> None: + monkeypatch.setattr("sys.argv", ["app", "-foo:bar", "--extra", "1"]) + args = [m.Argument("bar", m.ArgSpec(dest="bar"))] + ns = m.parse_library_command_args("foo", args) + out, err = capsys.readouterr() + assert out == "" + assert vars(ns) == {"bar": True} + assert sys.argv == ["app", "--extra", "1"] + + def test_unrecognized_libname_arg( + self, monkeypatch: pytest.MonkeyPatch, capsys: Capsys + ) -> None: + monkeypatch.setattr("sys.argv", ["app", "-foo:bar", "-foo:baz"]) + with pytest.warns(UserWarning) as record: + ns = m.parse_library_command_args("foo", []) + out, err = capsys.readouterr() + assert out == "" + assert vars(ns) == {} + assert sys.argv == ["app", "-foo:bar", "-foo:baz"] + + # issues one warning for the first encountered + assert len(record) == 1 + assert isinstance(record[0].message, Warning) + assert ( + record[0].message.args[0] + == "Unrecognized argument '-foo:bar' for foo (passed on as-is)" + ) + assert out == "" + assert vars(ns) == {} + assert sys.argv == ["app", "-foo:bar", "-foo:baz"] + + +if __name__ == "__main__": + sys.exit(pytest.main(sys.argv)) diff --git a/tests/unit/legate/util/test_colors.py b/tests/unit/legate/util/test_colors.py new file mode 100644 index 000000000..873f3dc53 --- /dev/null +++ b/tests/unit/legate/util/test_colors.py @@ -0,0 +1,103 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import Any + +import pytest +from pytest_mock import MockerFixture +from typing_extensions import TypeAlias + +import legate.util.colors as m + +try: + import colorama # type: ignore +except ImportError: + colorama = None + +UsePlainTextFixture: TypeAlias = Any + + +@pytest.fixture +def use_plain_text(mocker: MockerFixture) -> None: + mocker.patch.object(m, "bright", m._text) + mocker.patch.object(m, "dim", m._text) + mocker.patch.object(m, "white", m._text) + mocker.patch.object(m, "cyan", m._text) + mocker.patch.object(m, "red", m._text) + mocker.patch.object(m, "green", m._text) + mocker.patch.object(m, "yellow", m._text) + mocker.patch.object(m, "magenta", m._text) + + +COLOR_FUNCS = ( + "cyan", + "green", + "magenta", + "red", + "white", + "yellow", +) + +STYLE_FUNCS = ( + "bright", + "dim", +) + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +@pytest.mark.parametrize("color", COLOR_FUNCS) +def test_color_functions(color: str) -> None: + cfunc = getattr(m, color) + cprop = getattr(colorama.Fore, color.upper()) + + out = cfunc("some text") + + assert out == f"{cprop}some text{colorama.Style.RESET_ALL}" + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +@pytest.mark.parametrize("style", STYLE_FUNCS) +def test_style_functions(style: str) -> None: + sfunc = getattr(m, style) + sprop = getattr(colorama.Style, style.upper()) + + out = sfunc("some text") + + assert out == f"{sprop}some text{colorama.Style.RESET_ALL}" + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +@pytest.mark.parametrize("color", COLOR_FUNCS) +@pytest.mark.parametrize("style", STYLE_FUNCS) +def test_scrub(style: str, color: str) -> None: + cfunc = getattr(m, color) + sfunc = getattr(m, style) + + assert m.scrub(cfunc(sfunc("some text"))) == "some text" + assert m.scrub(sfunc(cfunc("some text"))) == "some text" + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +@pytest.mark.parametrize("color", COLOR_FUNCS) +@pytest.mark.parametrize("style", STYLE_FUNCS) +def test_scrub_plain( + use_plain_text: UsePlainTextFixture, style: str, color: str +) -> None: + cfunc = getattr(m, color) + sfunc = getattr(m, style) + + assert m.scrub(cfunc(sfunc("some text"))) == "some text" + assert m.scrub(sfunc(cfunc("some text"))) == "some text" diff --git a/tests/unit/legate/util/test_fs.py b/tests/unit/legate/util/test_fs.py new file mode 100644 index 000000000..32cd452b3 --- /dev/null +++ b/tests/unit/legate/util/test_fs.py @@ -0,0 +1,53 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from pathlib import Path + +import pytest + +import legate.util.fs as m + +HEADER_PATH = Path(__file__).parent / "sample_header.h" + + +def test_read_c_define_hit() -> None: + assert m.read_c_define(HEADER_PATH, "FOO") == "10" + assert m.read_c_define(HEADER_PATH, "BAR") == '"bar"' + + +def test_read_c_define_miss() -> None: + assert m.read_c_define(HEADER_PATH, "JUNK") is None + + +CMAKE_CACHE_PATH = Path(__file__).parent / "sample_cmake_cache.txt" + + +def test_read_cmake_cache_value_hit() -> None: + assert ( + m.read_cmake_cache_value(CMAKE_CACHE_PATH, "Legion_SOURCE_DIR:STATIC=") + == '"foo/bar"' + ) + assert ( + m.read_cmake_cache_value( + CMAKE_CACHE_PATH, "FIND_LEGATE_CORE_CPP:BOOL=OFF" + ) + == "OFF" + ) + + +def test_read_cmake_cache_value_miss() -> None: + with pytest.raises(RuntimeError): + assert m.read_cmake_cache_value(CMAKE_CACHE_PATH, "JUNK") is None diff --git a/tests/unit/legate/driver/test_system.py b/tests/unit/legate/util/test_system.py similarity index 83% rename from tests/unit/legate/driver/test_system.py rename to tests/unit/legate/util/test_system.py index a1b905496..3ae242b6f 100644 --- a/tests/unit/legate/driver/test_system.py +++ b/tests/unit/legate/util/test_system.py @@ -15,11 +15,12 @@ from __future__ import annotations import os +import sys import pytest from pytest_mock import MockerFixture -import legate.driver.system as m +import legate.util.system as m def test___all__() -> None: @@ -73,7 +74,7 @@ def test_LIBPATH_Darwin(self, mocker: MockerFixture) -> None: def test_legate_paths(self, mocker: MockerFixture) -> None: mocker.patch( - "legate.driver.system.get_legate_paths", + "legate.util.system.get_legate_paths", return_value="legate paths", ) @@ -83,10 +84,22 @@ def test_legate_paths(self, mocker: MockerFixture) -> None: def test_legion_paths(self, mocker: MockerFixture) -> None: mocker.patch( - "legate.driver.system.get_legion_paths", + "legate.util.system.get_legion_paths", return_value="legion paths", ) s = m.System() assert s.legion_paths == "legion paths" # type: ignore + + def test_cpus(self) -> None: + s = m.System() + cpus = s.cpus + assert len(cpus) > 0 + assert all(len(cpu.ids) > 0 for cpu in cpus) + + @pytest.mark.skipif(sys.platform != "linux", reason="pynvml required") + def test_gpus(self) -> None: + s = m.System() + # can't really assume / test much here + s.gpus diff --git a/tests/unit/legate/util/test_types.py b/tests/unit/legate/util/test_types.py new file mode 100644 index 000000000..01835f882 --- /dev/null +++ b/tests/unit/legate/util/test_types.py @@ -0,0 +1,57 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Consolidate test configuration from command-line and environment. + +""" +from __future__ import annotations + +from dataclasses import dataclass + +import legate.util.types as m + + +class TestCPUInfo: + def test_fields(self) -> None: + assert set(m.CPUInfo.__dataclass_fields__) == {"ids"} + + +class TestGPUInfo: + def test_fields(self) -> None: + assert set(m.GPUInfo.__dataclass_fields__) == {"id", "total"} + + +class Source: + foo = 10 + bar = 10.2 + baz = "test" + quux = ["a", "b", "c"] + extra = (1, 2, 3) + + +@dataclass(frozen=True) +class Target: + foo: int + bar: float + baz: str + quux: list[str] + + +def test_object_to_dataclass() -> None: + source = Source() + target = m.object_to_dataclass(source, Target) + + assert set(target.__dict__) == set(Target.__dataclass_fields__) + for k, v in target.__dict__.items(): + assert getattr(source, k) == v diff --git a/tests/unit/legate/util/test_ui.py b/tests/unit/legate/util/test_ui.py new file mode 100644 index 000000000..a9ac7d890 --- /dev/null +++ b/tests/unit/legate/util/test_ui.py @@ -0,0 +1,375 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from datetime import timedelta +from typing import Any + +import pytest +from pytest_mock import MockerFixture +from typing_extensions import TypeAlias + +from legate.util import colors, ui as m + +try: + import colorama # type: ignore +except ImportError: + colorama = None + +UsePlainTextFixture: TypeAlias = Any + + +@pytest.fixture +def use_plain_text(mocker: MockerFixture) -> None: + mocker.patch.object(m, "bright", colors._text) + mocker.patch.object(m, "dim", colors._text) + mocker.patch.object(m, "white", colors._text) + mocker.patch.object(m, "cyan", colors._text) + mocker.patch.object(m, "red", colors._text) + mocker.patch.object(m, "green", colors._text) + mocker.patch.object(m, "yellow", colors._text) + mocker.patch.object(m, "magenta", colors._text) + + +def test_UI_WIDTH() -> None: + assert m.UI_WIDTH == 80 + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_banner_simple() -> None: + assert ( + m.banner("some text") + == "\n" + "#" * m.UI_WIDTH + "\n### some text\n" + "#" * m.UI_WIDTH + ) + + +def test_banner_simple_plain(use_plain_text: UsePlainTextFixture) -> None: + assert ( + m.banner("some text") + == "\n" + "#" * m.UI_WIDTH + "\n### some text\n" + "#" * m.UI_WIDTH + ) + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_banner_full() -> None: + assert ( + m.banner("some text", char="*", width=100, details=["a", "b"]) + == "\n" + + "*" * 100 + + "\n*** \n*** some text\n*** \n*** a\n*** b\n*** \n" + + "*" * 100 + ) + + +def test_banner_full_plain(use_plain_text: UsePlainTextFixture) -> None: + assert ( + m.banner("some text", char="*", width=100, details=["a", "b"]) + == "\n" + + "*" * 100 + + "\n*** \n*** some text\n*** \n*** a\n*** b\n*** \n" + + "*" * 100 + ) + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_error() -> None: + assert m.error("some message") == colors.red("ERROR: some message") + + +def test_error_plain(use_plain_text: UsePlainTextFixture) -> None: + assert m.error("some message") == "ERROR: some message" + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_key() -> None: + assert m.key("some key") == colors.dim(colors.green("some key")) + + +def test_key_plain(use_plain_text: UsePlainTextFixture) -> None: + assert m.key("some key") == "some key" + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_value() -> None: + assert m.value("some value") == m.yellow("some value") + + +def test_value_plain(use_plain_text: UsePlainTextFixture) -> None: + assert m.value("some value") == "some value" + + +class Test_kvtable: + ONE = {"foo": 10} + TWO = {"foo": 10, "barbaz": "some value"} + THREE = {"foo": 10, "barbaz": "some value", "a": 1.2} + + @pytest.mark.skipif(colorama is None, reason="colorama required") + @pytest.mark.parametrize("items", (ONE, TWO, THREE)) + def test_default(self, items: dict[str, Any]) -> None: + N = max(len(m.key(k)) for k in items) + assert m.kvtable(items) == "\n".join( + f"{m.key(k): <{N}} : {m.value(str(items[k]))}" for k in items + ) + + @pytest.mark.parametrize("items", (ONE, TWO, THREE)) + def test_default_plain( + self, use_plain_text: UsePlainTextFixture, items: dict[str, Any] + ) -> None: + N = max(len(k) for k in items) + assert m.kvtable(items) == "\n".join( + f"{k: <{N}} : {items[k]}" for k in items + ) + + @pytest.mark.skipif(colorama is None, reason="colorama required") + @pytest.mark.parametrize("items", (ONE, TWO, THREE)) + def test_delim(self, items: dict[str, Any]) -> None: + N = max(len(m.key(k)) for k in items) + assert m.kvtable(items, delim="/") == "\n".join( + f"{m.key(k): <{N}}/{m.value(str(items[k]))}" for k in items + ) + + @pytest.mark.parametrize("items", (ONE, TWO, THREE)) + def test_delim_plain( + self, use_plain_text: UsePlainTextFixture, items: dict[str, Any] + ) -> None: + N = max(len(k) for k in items) + assert m.kvtable(items, delim="/") == "\n".join( + f"{k: <{N}}/{items[k]}" for k in items + ) + + @pytest.mark.skipif(colorama is None, reason="colorama required") + @pytest.mark.parametrize("items", (ONE, TWO, THREE)) + def test_align_False(self, items: dict[str, Any]) -> None: + assert m.kvtable(items, align=False) == "\n".join( + f"{m.key(k)} : {m.value(str(items[k]))}" for k in items + ) + + @pytest.mark.parametrize("items", (ONE, TWO, THREE)) + def test_align_False_plain( + self, use_plain_text: UsePlainTextFixture, items: dict[str, Any] + ) -> None: + assert m.kvtable(items, align=False) == "\n".join( + f"{k} : {items[k]}" for k in items + ) + + @pytest.mark.skipif(colorama is None, reason="colorama required") + def test_keys(self) -> None: + items = self.THREE + keys = ("foo", "a") + N = max(len(m.key(k)) for k in items) + + assert m.kvtable(self.THREE, keys=keys) == "\n".join( + f"{m.key(k): <{N}} : {m.value(str(items[k]))}" for k in keys + ) + + def test_keys_plain(self, use_plain_text: UsePlainTextFixture) -> None: + items = self.THREE + keys = ("foo", "a") + N = max(len(m.key(k)) for k in items) + + assert m.kvtable(items, keys=keys) == "\n".join( + f"{k: <{N}} : {items[k]}" for k in keys + ) + + +class Test_rule: + @pytest.mark.skipif(colorama is None, reason="colorama required") + def test_pad(self) -> None: + assert m.rule(pad=4) == colors.cyan(" " + "-" * (m.UI_WIDTH - 4)) + + def test_pad_with_text( + self, + ) -> None: + front = " --- foo bar " + assert m.rule("foo bar", pad=4) == colors.cyan( + front + "-" * (m.UI_WIDTH - len(front)) + ) + + @pytest.mark.skipif(colorama is None, reason="colorama required") + def test_text(self) -> None: + front = "--- foo bar " + assert m.rule("foo bar") == colors.cyan( + front + "-" * (m.UI_WIDTH - len(front)) + ) + + @pytest.mark.skipif(colorama is None, reason="colorama required") + def test_char(self) -> None: + assert m.rule(char="a") == colors.cyan("a" * m.UI_WIDTH) + + @pytest.mark.skipif(colorama is None, reason="colorama required") + def test_N(self) -> None: + assert m.rule(N=60) == colors.cyan("-" * 60) + + @pytest.mark.skipif(colorama is None, reason="colorama required") + def test_N_with_text(self) -> None: + front = "--- foo bar " + assert m.rule("foo bar", N=65) == colors.cyan( + front + "-" * (65 - len(front)) + ) + + @pytest.mark.skipif(colorama is None, reason="colorama required") + def test_pad_plain(self, use_plain_text: UsePlainTextFixture) -> None: + assert m.rule(pad=4) == " " + "-" * (m.UI_WIDTH - 4) + + def test_pad_with_text_plain( + self, use_plain_text: UsePlainTextFixture + ) -> None: + front = " --- foo bar " + assert m.rule("foo bar", pad=4) == front + "-" * ( + m.UI_WIDTH - len(front) + ) + + def test_text_plain(self, use_plain_text: UsePlainTextFixture) -> None: + front = "--- foo bar " + assert m.rule("foo bar") == "--- foo bar " + "-" * ( + m.UI_WIDTH - len(front) + ) + + def test_char_plain(self, use_plain_text: UsePlainTextFixture) -> None: + assert m.rule(char="a") == "a" * m.UI_WIDTH + + def test_N_plain(self, use_plain_text: UsePlainTextFixture) -> None: + assert m.rule(N=60) == "-" * 60 + + def test_N_with_text_plain( + self, use_plain_text: UsePlainTextFixture + ) -> None: + front = "--- foo bar " + assert m.rule("foo bar", N=65) == front + "-" * (65 - len(front)) + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_section() -> None: + assert m.section("some section") == m.bright(m.white("some section")) + + +def test_section_plain(use_plain_text: UsePlainTextFixture) -> None: + assert m.section("some section") == "some section" + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_warn() -> None: + assert m.warn("some message") == m.magenta("WARNING: some message") + + +def test_warn_plain(use_plain_text: UsePlainTextFixture) -> None: + assert m.warn("some message") == "WARNING: some message" + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_shell() -> None: + assert m.shell("cmd --foo") == colors.dim(colors.white("+cmd --foo")) + + +def test_shell_plain(use_plain_text: UsePlainTextFixture) -> None: + assert m.shell("cmd --foo") == "+cmd --foo" + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_shell_with_char() -> None: + assert m.shell("cmd --foo", char="") == colors.dim( + colors.white("cmd --foo") + ) + + +def test_shell_with_char_plain(use_plain_text: UsePlainTextFixture) -> None: + assert m.shell("cmd --foo", char="") == "cmd --foo" + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_passed() -> None: + assert m.passed("msg") == f"{colors.bright(colors.green('[PASS]'))} msg" + + +def test_passed_plain(use_plain_text: UsePlainTextFixture) -> None: + assert m.passed("msg") == "[PASS] msg" + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_passed_with_details() -> None: + assert ( + m.passed("msg", details=["a", "b"]) + == f"{colors.bright(colors.green('[PASS]'))} msg\n a\n b" + ) + + +def test_passed_with_details_plain( + use_plain_text: UsePlainTextFixture, +) -> None: + assert m.passed("msg", details=["a", "b"]) == "[PASS] msg\n a\n b" + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_failed() -> None: + assert m.failed("msg") == f"{colors.bright(colors.red('[FAIL]'))} msg" + + +def test_failed_plain(use_plain_text: UsePlainTextFixture) -> None: + assert m.failed("msg") == "[FAIL] msg" + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_failed_with_details() -> None: + assert ( + m.failed("msg", details=["a", "b"]) + == f"{colors.bright(colors.red('[FAIL]'))} msg\n a\n b" + ) + + +def test_failed_with_details_plain( + use_plain_text: UsePlainTextFixture, +) -> None: + assert m.failed("msg", details=["a", "b"]) == "[FAIL] msg\n a\n b" + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_skipped() -> None: + assert m.skipped("msg") == f"{colors.cyan('[SKIP]')} msg" + + +def test_skipped_plain(use_plain_text: UsePlainTextFixture) -> None: + assert m.skipped("msg") == "[SKIP] msg" + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_summary() -> None: + assert m.summary("foo", 12, 11, timedelta(seconds=2.123)) == colors.bright( + colors.red( + f"{'foo: Passed 11 of 12 tests (91.7%) in 2.12s': >{m.UI_WIDTH}}" + ) + ) + + +def test_summary_plain(use_plain_text: UsePlainTextFixture) -> None: + assert ( + m.summary("foo", 12, 11, timedelta(seconds=2.123)) + == f"{'foo: Passed 11 of 12 tests (91.7%) in 2.12s': >{m.UI_WIDTH}}" + ) + + +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_summary_no_justify() -> None: + assert m.summary( + "foo", 12, 11, timedelta(seconds=2.123), justify=False + ) == colors.bright( + colors.red("foo: Passed 11 of 12 tests (91.7%) in 2.12s") + ) + + +def test_summary_no_justify_plain(use_plain_text: UsePlainTextFixture) -> None: + assert ( + m.summary("foo", 12, 11, timedelta(seconds=2.123), justify=False) + == "foo: Passed 11 of 12 tests (91.7%) in 2.12s" + ) diff --git a/tests/unit/util.py b/tests/unit/util.py new file mode 100644 index 000000000..b6ce793c0 --- /dev/null +++ b/tests/unit/util.py @@ -0,0 +1,33 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from itertools import chain, combinations +from typing import Any, Iterable, Iterator + +import pytest +from typing_extensions import TypeAlias + +Capsys: TypeAlias = pytest.CaptureFixture[str] + + +# ref: https://docs.python.org/3/library/itertools.html +def powerset(iterable: Iterable[Any]) -> Iterator[Any]: + s = list(iterable) + return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)) + + +def powerset_nonempty(iterable: Iterable[Any]) -> Iterator[Any]: + return (x for x in powerset(iterable) if len(x)) From 7224c9de6d17044a07d0bdc425039da518a3f0d3 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Thu, 6 Oct 2022 18:28:25 -0700 Subject: [PATCH 007/121] Handle conflicts for library-level args (#416) * Handle conflicts for library-level args * fix copy-pasta --- legate/util/args.py | 42 ++++++++++++++++++++++++----- tests/unit/legate/util/test_args.py | 35 +++++++++++++++++++++++- 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/legate/util/args.py b/legate/util/args.py index e8fdc0c34..88cd73193 100644 --- a/legate/util/args.py +++ b/legate/util/args.py @@ -14,6 +14,7 @@ # from __future__ import annotations +import re import sys import warnings from argparse import Action, ArgumentParser, Namespace @@ -73,6 +74,7 @@ class ArgSpec: choices: NotRequired[Sequence[Any]] = Unset help: NotRequired[str] = Unset metavar: NotRequired[str] = Unset + required: NotRequired[bool] = Unset @dataclass(frozen=True) @@ -80,6 +82,10 @@ class Argument: name: str spec: ArgSpec + @property + def kwargs(self) -> dict[str, Any]: + return dict(entries(self.spec)) + def entries(obj: Any) -> Iterable[tuple[str, Any]]: for f in fields(obj): @@ -153,25 +159,47 @@ def parse_library_command_args( prog=f"<{libname} program>", add_help=False, allow_abbrev=False ) - lib_prefix = f"-{libname}:" + # Some explanation is in order. Argparse treats arguments with a single + # dash differently, e.g. "-xyz" is interpreted as "-x -y -z". This can + # cause confusion and clashes when there are multiple single-dash args + # with identical prefixes. TLDR; we want "-legate:foo" to behave just + # as if it was "--legate:foo". In order to do this, we configure a parser + # for "long argumens" and then munge the values in sys.argv to update + # any "short prefix" arguments to be "long prefix" arguments first, before + # parsing. We also take care to update any output. The alternative here + # would be to abandon argparse entirely, and parse sys.argv manually. + # + # ref: https://github.com/nv-legate/legate.core/issues/415 + + short_prefix = f"-{libname}:" + long_prefix = f"-{short_prefix}" argnames = [arg.name for arg in args] for arg in args: - argname = f"{lib_prefix}{arg.name}" - kwargs = dict(entries(arg.spec)) - parser.add_argument(argname, **kwargs) + argname = f"{long_prefix}{arg.name}" + parser.add_argument(argname, **arg.kwargs) has_custom_help = "help" in argnames - if f"{lib_prefix}help" in sys.argv and not has_custom_help: - parser.print_help() + if f"{short_prefix}help" in sys.argv and not has_custom_help: + help_string = parser.format_help() + + # this is a little sloppy but should suffice in practice + print(help_string.replace(long_prefix, short_prefix)) + sys.exit() + # convert any short-prefix args to be long-prefix + sys.argv = [re.sub(f"^{short_prefix}", long_prefix, x) for x in sys.argv] + args, extra = parser.parse_known_args() + # put any unconsumed args back they way they were + extra = [re.sub(f"^{long_prefix}", short_prefix, x) for x in extra] + for item in extra: - if item.startswith(lib_prefix): + if item.startswith(short_prefix): warnings.warn( f"Unrecognized argument {item!r} for {libname} (passed on as-is)" # noqa: E501 ) diff --git a/tests/unit/legate/util/test_args.py b/tests/unit/legate/util/test_args.py index 02d01a58c..83e3e02b3 100644 --- a/tests/unit/legate/util/test_args.py +++ b/tests/unit/legate/util/test_args.py @@ -91,6 +91,13 @@ def test_default(self) -> None: } +class TestArgument: + def test_kwargs(self) -> None: + arg = m.Argument("arg", m.ArgSpec("dest", default=2, help="help")) + + assert arg.kwargs == dict(m.entries(arg.spec)) + + def test_entries() -> None: assert set(m.entries(_TestObj())) == {("a", 10), ("c", "foo")} @@ -115,13 +122,26 @@ def test_default_help_precedence( self, monkeypatch: pytest.MonkeyPatch, capsys: Capsys ) -> None: monkeypatch.setattr("sys.argv", ["app", "-foo:help", "-foo:bar"]) - args = [m.Argument("bar", m.ArgSpec(dest="help"))] + args = [m.Argument("bar", m.ArgSpec(dest="bar"))] with pytest.raises(SystemExit) as e: m.parse_library_command_args("foo", args) assert e.value.code is None out, err = capsys.readouterr() # type: ignore[unreachable] assert out.startswith("usage: ") + def test_default_help_patches_short_args( + self, monkeypatch: pytest.MonkeyPatch, capsys: Capsys + ) -> None: + monkeypatch.setattr("sys.argv", ["app", "-foo:help", "-foo:bar"]) + args = [m.Argument("bar", m.ArgSpec(dest="bar"))] + with pytest.raises(SystemExit) as e: + m.parse_library_command_args("foo", args) + assert e.value.code is None + out, err = capsys.readouterr() # type: ignore[unreachable] + assert out.startswith("usage: ") + assert "-foo:bar" in out + assert "--foo:bar" not in out + def test_help_override( self, monkeypatch: pytest.MonkeyPatch, capsys: Capsys ) -> None: @@ -182,6 +202,19 @@ def test_unrecognized_libname_arg( assert vars(ns) == {} assert sys.argv == ["app", "-foo:bar", "-foo:baz"] + def test_no_prefix_conflict( + self, monkeypatch: pytest.MonkeyPatch, capsys: Capsys + ) -> None: + monkeypatch.setattr( + "sys.argv", ["app", "-foo:bar", "--foo", "-f", "1", "-ff"] + ) + args = [m.Argument("bar", m.ArgSpec(dest="bar"))] + ns = m.parse_library_command_args("foo", args) + out, err = capsys.readouterr() + assert out == "" + assert vars(ns) == {"bar": True} + assert sys.argv == ["app", "--foo", "-f", "1", "-ff"] + if __name__ == "__main__": sys.exit(pytest.main(sys.argv)) From c24487280d0046147ec3d2c9e38390dcafb57faf Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Thu, 6 Oct 2022 20:43:37 -0700 Subject: [PATCH 008/121] Revive dead region managers on field allocations (#418) (#419) * Make sure we test LRU mechanism in debug mode * Make sure we don't create fields on a dead region manager --- legate/core/runtime.py | 32 ++++++++++++++++++++++---------- src/core/mapping/core_mapper.cc | 2 +- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/legate/core/runtime.py b/legate/core/runtime.py index c30bc6237..22b3815e2 100644 --- a/legate/core/runtime.py +++ b/legate/core/runtime.py @@ -263,15 +263,21 @@ def destroy(self, unordered: bool) -> None: # unordered destructions self._region.destroy(unordered) - def increase_field_count(self) -> bool: + def increase_active_field_count(self) -> bool: revived = self._active_field_count == 0 self._active_field_count += 1 return revived - def decrease_field_count(self) -> bool: + def decrease_active_field_count(self) -> bool: self._active_field_count -= 1 return self._active_field_count == 0 + def increase_field_count(self) -> bool: + fresh = self._alloc_field_count == 0 + self._alloc_field_count += 1 + revived = self.increase_active_field_count() + return not fresh and revived + @property def has_space(self) -> bool: return self._alloc_field_count < LEGATE_MAX_FIELDS @@ -281,13 +287,12 @@ def get_next_field_id(self) -> int: self._next_field_id += 1 return field_id - def allocate_field(self, field_size: Any) -> tuple[Region, int]: + def allocate_field(self, field_size: Any) -> tuple[Region, int, bool]: field_id = self._region.field_space.allocate_field( field_size, self.get_next_field_id() ) - self._alloc_field_count += 1 - self.increase_field_count() - return self._region, field_id + revived = self.increase_field_count() + return self._region, field_id, revived # This class manages the allocation and reuse of fields @@ -315,18 +320,23 @@ def try_reuse_field(self) -> Optional[tuple[Region, int]]: def allocate_field(self) -> tuple[Region, int]: if (result := self.try_reuse_field()) is not None: region_manager = self.runtime.find_region_manager(result[0]) - if region_manager.increase_field_count(): + if region_manager.increase_active_field_count(): self.runtime.revive_manager(region_manager) return result region_manager = self.runtime.find_or_create_region_manager(self.shape) - return region_manager.allocate_field(self.field_size) + region, field_id, revived = region_manager.allocate_field( + self.field_size + ) + if revived: + self.runtime.revive_manager(region_manager) + return region, field_id def free_field( self, region: Region, field_id: int, ordered: bool = False ) -> None: self.free_fields.append((region, field_id)) region_manager = self.runtime.find_region_manager(region) - if region_manager.decrease_field_count(): + if region_manager.decrease_active_field_count(): self.runtime.free_region_manager( self.shape, region, unordered=not ordered ) @@ -1361,7 +1371,9 @@ def import_output_region( self.region_managers_by_region[region] = region_mgr self.find_or_create_field_manager(shape, dtype.size) - region_mgr.increase_field_count() + revived = region_mgr.increase_field_count() + if revived: + self.revive_manager(region_mgr) return RegionField.create(region, field_id, dtype.size, shape) def create_output_region( diff --git a/src/core/mapping/core_mapper.cc b/src/core/mapping/core_mapper.cc index 26e140879..da3f7414b 100644 --- a/src/core/mapping/core_mapper.cc +++ b/src/core/mapping/core_mapper.cc @@ -145,7 +145,7 @@ CoreMapper::CoreMapper(MapperRuntime* rt, Machine m, const LibraryContext& c) precise_exception_trace(static_cast(extract_env("LEGATE_PRECISE_EXCEPTION_TRACE", 0, 0))), field_reuse_frac(extract_env("LEGATE_FIELD_REUSE_FRAC", 256, 256)), field_reuse_freq(extract_env("LEGATE_FIELD_REUSE_FREQ", 32, 32)), - max_lru_length(extract_env("LEGATE_MAX_LRU_LENGTH", 5, 0)), + max_lru_length(extract_env("LEGATE_MAX_LRU_LENGTH", 5, 1)), has_socket_mem(false) { // Query to find all our local processors From 13f5f38f96ea64d3ea34de1fdc63a378b253d979 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Fri, 7 Oct 2022 09:11:09 -0700 Subject: [PATCH 009/121] Fix typo in driver script (#421) --- legate/driver/logs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/legate/driver/logs.py b/legate/driver/logs.py index 261ab6dd5..70696d3e1 100644 --- a/legate/driver/logs.py +++ b/legate/driver/logs.py @@ -145,7 +145,7 @@ def process(self) -> bool: dflag = "d" if self.config.debugging.dataflow else "" eflag = "e" if self.config.debugging.event else "" if dflag or eflag: - cmd += ("-{dflag}{eflag}",) + cmd += (f"-{dflag}{eflag}",) cmd += tuple(f"legate_{n}.log" for n in range(ranks)) From 8c7552231f75ca548fec5865422a56fa43927c28 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Fri, 7 Oct 2022 16:11:07 -0700 Subject: [PATCH 010/121] On mapping failure retry after tightening non-RO reqs (#423) * Acquire instances eagerly * Add option to allow map_legate_store to fail * Fixes to 'handled' check * On failed mapping, retry with tight write requirements * Use original polarity for map_legate_store * Reduce the diff * Uninitialized variable * Some debugging output * Add more debug logging to mapper * Mapper name and rank id is obvious from log message header * Note task id on mapper debug messages * Skip non-existent (?) reqs when setting chosen_instances * Report sizes of newly created reduction instances Co-authored-by: Manolis Papadakis --- src/core/mapping/base_mapper.cc | 212 ++++++++++++++++++++++---------- src/core/mapping/base_mapper.h | 3 +- 2 files changed, 151 insertions(+), 64 deletions(-) diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index f975fe3f8..66c1f4f80 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -18,6 +18,7 @@ #include #include "legion/legion_mapping.h" +#include "mappers/mapping_utilities.h" #include "core/data/store.h" #include "core/mapping/base_mapper.h" @@ -502,6 +503,10 @@ void BaseMapper::map_task(const MapperContext ctx, const MapTaskInput& input, MapTaskOutput& output) { +#ifdef DEBUG_LEGATE + logger.debug() << "Entering map_task for " << Utilities::to_string(runtime, ctx, task); +#endif + // Should never be mapping the top-level task here assert(task.get_depth() > 0); @@ -601,10 +606,51 @@ void BaseMapper::map_task(const MapperContext ctx, output.chosen_instances.resize(task.regions.size()); - // Map each field separately for each of the logical regions - std::vector needed_acquires; - std::map> instances_to_mappings; - for (uint32_t mapping_idx = 0; mapping_idx < mappings.size(); ++mapping_idx) { + bool can_fail = true; + std::map> instance_to_mappings; + std::map mapping_to_instance; + std::vector handled(mappings.size(), false); + + // See case of failed instance creation below + auto tighten_write_reqs = [&]() { + for (int32_t mapping_idx = 0; mapping_idx < mappings.size(); ++mapping_idx) { + auto& mapping = mappings[mapping_idx]; + PrivilegeMode priv = LEGION_NO_ACCESS; +#ifdef DEBUG_LEGATE + std::stringstream reqs_ss; +#endif + for (auto req_idx : mapping.requirement_indices()) { + const RegionRequirement& req = task.regions[req_idx]; + if (!req.region.exists()) continue; + priv |= req.privilege; +#ifdef DEBUG_LEGATE + reqs_ss << " " << req_idx; +#endif + } + if (!(priv & LEGION_WRITE_PRIV) || mapping.policy.exact) continue; +#ifdef DEBUG_LEGATE + logger.debug() << "Task " << task.get_unique_id() + << ": tightened mapping policy for reqs:" << reqs_ss.str(); +#endif + mapping.policy.exact = true; + if (!handled[mapping_idx]) continue; + handled[mapping_idx] = false; + auto m2i_it = mapping_to_instance.find(mapping_idx); + if (m2i_it == mapping_to_instance.end()) continue; + PhysicalInstance inst = m2i_it->second; + mapping_to_instance.erase(m2i_it); + auto i2m_it = instance_to_mappings.find(inst); + i2m_it->second.erase(mapping_idx); + if (i2m_it->second.empty()) { + runtime->release_instance(ctx, inst); + instance_to_mappings.erase(i2m_it); + } + } + }; + + // Mapping each field separately for each of the logical regions + for (int32_t mapping_idx = 0; mapping_idx < mappings.size(); ++mapping_idx) { + if (handled[mapping_idx]) continue; auto& mapping = mappings[mapping_idx]; auto req_indices = mapping.requirement_indices(); @@ -615,12 +661,14 @@ void BaseMapper::map_task(const MapperContext ctx, if (target == StoreTarget::FBMEM) target = StoreTarget::ZCMEM; #endif output.future_locations.push_back(get_target_memory(task.target_proc, target)); + handled[mapping_idx] = true; continue; - } else if (mapping.for_unbound_stores()) { + } + + if (mapping.for_unbound_stores()) { for (auto req_idx : req_indices) { output.output_targets[req_idx] = get_target_memory(task.target_proc, mapping.policy.target); auto ndim = mapping.stores.front().dim(); - // FIXME: Unbound stores can have more than one dimension later std::vector dimension_ordering; for (int32_t dim = ndim - 1; dim >= 0; --dim) @@ -630,65 +678,79 @@ void BaseMapper::map_task(const MapperContext ctx, output.output_constraints[req_idx].ordering_constraint = OrderingConstraint(dimension_ordering, false); } + handled[mapping_idx] = true; continue; } std::vector> reqs; +#ifdef DEBUG_LEGATE + std::stringstream reqs_ss; +#endif for (auto req_idx : req_indices) { const auto& req = task.regions[req_idx]; if (!req.region.exists()) continue; reqs.push_back(std::cref(req)); +#ifdef DEBUG_LEGATE + reqs_ss << " " << req_idx; +#endif + } + if (reqs.empty()) { + handled[mapping_idx] = true; + continue; } - if (reqs.empty()) continue; - - // Get the reference to our valid instances in case we decide to use them + // Get an instance and acquire it if necessary. If the acquire fails then prune it from the + // mapper's data structures and retry, until we succeed or map_legate_store fails with an out of + // memory error. PhysicalInstance result; - if (map_legate_store(ctx, task, mapping, reqs, task.target_proc, result)) - needed_acquires.push_back(result); - - for (auto req_idx : req_indices) output.chosen_instances[req_idx].push_back(result); - instances_to_mappings[result].insert(mapping_idx); - } - - // Do an acquire on all the instances so we have our result - // Keep doing this until we succed or we get an out of memory error - while (!needed_acquires.empty() && - !runtime->acquire_and_filter_instances(ctx, needed_acquires, true /*filter on acquire*/)) { - assert(!needed_acquires.empty()); - // If we failed to acquire any of the instances we need to prune them - // out of the mapper's data structure so do that first - std::set failed_acquires; - filter_failed_acquires(ctx, needed_acquires, failed_acquires); - - for (auto failed_acquire : failed_acquires) { - auto affected_mappings = instances_to_mappings[failed_acquire]; - instances_to_mappings.erase(failed_acquire); - - for (auto& mapping_idx : affected_mappings) { - auto& mapping = mappings[mapping_idx]; - auto req_indices = mapping.requirement_indices(); - - std::vector> reqs; - for (auto req_idx : req_indices) reqs.push_back(std::cref(task.regions[req_idx])); - - for (auto req_idx : req_indices) { - auto& instances = output.chosen_instances[req_idx]; - uint32_t inst_idx = 0; - for (; inst_idx < instances.size(); ++inst_idx) - if (instances[inst_idx] == failed_acquire) break; - instances.erase(instances.begin() + inst_idx); - } - - PhysicalInstance result; - if (map_legate_store(ctx, task, mapping, reqs, task.target_proc, result)) - needed_acquires.push_back(result); - - for (auto req_idx : req_indices) output.chosen_instances[req_idx].push_back(result); - instances_to_mappings[result].insert(mapping_idx); + while (map_legate_store(ctx, task, mapping, reqs, task.target_proc, result, can_fail)) { + if (result == PhysicalInstance()) break; + if (instance_to_mappings.count(result) > 0 || runtime->acquire_instance(ctx, result)) { +#ifdef DEBUG_LEGATE + logger.debug() << "Task " << task.get_unique_id() << ": acquired instance " << result + << " for reqs:" << reqs_ss.str(); +#endif + break; } +#ifdef DEBUG_LEGATE + logger.debug() << "Task " << task.get_unique_id() << ": failed to acquire instance " << result + << " for reqs:" << reqs_ss.str(); +#endif + AutoLock lock(ctx, local_instances->manager_lock()); + local_instances->erase(result); } + + // If instance creation failed we try mapping all stores again, but request tight instances for + // write requirements. The hope is that these write requirements cover the entire region (i.e. + // they use a complete partition), so the new tight instances will invalidate any pre-existing + // "bloated" instances for the same region, freeing up enough memory so that mapping can succeed + if (result == PhysicalInstance()) { +#ifdef DEBUG_LEGATE + logger.debug() << "Task " << task.get_unique_id() + << ": failed mapping for reqs:" << reqs_ss.str(); +#endif + assert(can_fail); + tighten_write_reqs(); + mapping_idx = -1; + can_fail = false; + continue; + } + + // Success; record the instance for this mapping. +#ifdef DEBUG_LEGATE + logger.debug() << "Task " << task.get_unique_id() + << ": completed mapping for reqs:" << reqs_ss.str(); +#endif + instance_to_mappings[result].insert(mapping_idx); + mapping_to_instance[mapping_idx] = result; + handled[mapping_idx] = true; } + + // Succeeded in mapping all stores, record it on map_task output. + for (const auto& m2i : mapping_to_instance) + for (auto req_idx : mappings[m2i.first].requirement_indices()) + if (task.regions[req_idx].region.exists()) + output.chosen_instances[req_idx].push_back(m2i.second); } void BaseMapper::map_replicate_task(const MapperContext ctx, @@ -747,7 +809,8 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, const StoreMapping& mapping, std::vector> reqs, Processor target_proc, - PhysicalInstance& result) + PhysicalInstance& result, + bool can_fail) { const auto& policy = mapping.policy; std::vector regions; @@ -776,12 +839,29 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, // If we're making a reduction instance, we should just make it now if (redop != 0) { layout_constraints.add_constraint(SpecializedConstraint(REDUCTION_FOLD_SPECIALIZE, redop)); - - if (!runtime->create_physical_instance( - ctx, target_memory, layout_constraints, regions, result, true /*acquire*/)) + size_t footprint = 0; + if (runtime->create_physical_instance(ctx, + target_memory, + layout_constraints, + regions, + result, + true /*acquire*/, + LEGION_GC_DEFAULT_PRIORITY, + false /*tight bounds*/, + &footprint)) { +#ifdef DEBUG_LEGATE + Realm::LoggerMessage msg = logger.debug(); + msg << "Operation " << mappable.get_unique_id() << ": created reduction instance " << result + << " for"; + for (LogicalRegion r : regions) msg << " " << r; + msg << " (size: " << footprint << " bytes, memory: " << target_memory << ")"; +#endif + // We already did the acquire + return false; + } + if (!can_fail) report_failed_mapping(mappable, mapping.requirement_index(), target_memory, redop); - // We already did the acquire - return false; + return true; } auto& fields = layout_constraints.field_constraint.field_set; @@ -797,8 +877,8 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, local_instances->find_instance( regions.front(), fields.front(), target_memory, result, policy)) { #ifdef DEBUG_LEGATE - logger.debug() << get_mapper_name() << " found instance " << result << " for " - << regions.front(); + logger.debug() << "Operation " << mappable.get_unique_id() << ": reused cached instance " + << result << " for " << regions.front(); #endif runtime->enable_reentrant(ctx); // Needs acquire to keep the runtime happy @@ -861,8 +941,12 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, assert(result.exists()); #ifdef DEBUG_LEGATE if (created) { - logger.debug() << get_mapper_name() << " created instance " << result << " for " << *group - << " (size: " << footprint << " bytes, memory: " << target_memory << ")"; + logger.debug() << "Operation " << mappable.get_unique_id() << ": created instance " << result + << " for " << *group << " (size: " << footprint + << " bytes, memory: " << target_memory << ")"; + } else { + logger.debug() << "Operation " << mappable.get_unique_id() << ": found instance " << result + << " for " << *group; } #endif // Only save the result for future use if it is not an external instance @@ -879,8 +963,10 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, runtime->enable_reentrant(ctx); // If we make it here then we failed entirely - auto req_indices = mapping.requirement_indices(); - for (auto req_idx : req_indices) report_failed_mapping(mappable, req_idx, target_memory, redop); + if (!can_fail) { + auto req_indices = mapping.requirement_indices(); + for (auto req_idx : req_indices) report_failed_mapping(mappable, req_idx, target_memory, redop); + } return true; } diff --git a/src/core/mapping/base_mapper.h b/src/core/mapping/base_mapper.h index fac2c7304..d81898411 100644 --- a/src/core/mapping/base_mapper.h +++ b/src/core/mapping/base_mapper.h @@ -268,7 +268,8 @@ class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { const StoreMapping& mapping, std::vector> reqs, Legion::Processor target_proc, - Legion::Mapping::PhysicalInstance& result); + Legion::Mapping::PhysicalInstance& result, + bool can_fail); bool map_raw_array(const Legion::Mapping::MapperContext ctx, const Legion::Mappable& mappable, unsigned index, From 15cf1dcdafdc7da8b93668fded778f9633f3cb14 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 Oct 2022 16:35:27 -0700 Subject: [PATCH 011/121] [pre-commit.ci] pre-commit autoupdate (#408) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/psf/black: 22.8.0 → 22.10.0](https://github.com/psf/black/compare/22.8.0...22.10.0) - [github.com/pre-commit/mirrors-mypy: v0.971 → v0.982](https://github.com/pre-commit/mirrors-mypy/compare/v0.971...v0.982) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2adbe47d7..04478d01c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ repos: hooks: - id: isort - repo: https://github.com/psf/black - rev: 22.8.0 + rev: 22.10.0 hooks: - id: black - repo: https://github.com/PyCQA/flake8 @@ -18,7 +18,7 @@ repos: files: \.(cu|cuh|h|cc|inl)$ types_or: [] - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v0.971' + rev: 'v0.982' hooks: - id: mypy pass_filenames: false From f1ee9680da0d0cbbecaadc7cdd232cb0febde1d4 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Tue, 11 Oct 2022 17:10:07 -0700 Subject: [PATCH 012/121] Fix Transform class hierarchy (#427) --- legate/core/transform.py | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/legate/core/transform.py b/legate/core/transform.py index 4cd7475c6..817f08e25 100644 --- a/legate/core/transform.py +++ b/legate/core/transform.py @@ -60,21 +60,6 @@ def invert_extent(self, extent: Shape) -> Shape: def invert_point(self, point: Shape) -> Shape: ... - def invert(self, partition: PartitionBase) -> PartitionBase: - ... - - def convert(self, partition: PartitionBase) -> PartitionBase: - ... - - def convert_partition(self, partition: PartitionBase) -> PartitionBase: - ... - - def _invert_partition(self, partition: PartitionBase) -> PartitionBase: - ... - - def invert_partition(self, partition: PartitionBase) -> PartitionBase: - ... - def invert_symbolic_point(self, dims: SymbolicPoint) -> SymbolicPoint: ... @@ -89,7 +74,11 @@ def get_inverse_transform(self, ndim: int) -> AffineTransform: class Transform(TransformProto, Protocol): - pass + def invert(self, partition: PartitionBase) -> PartitionBase: + ... + + def convert(self, partition: PartitionBase) -> PartitionBase: + ... class Shift(Transform): @@ -546,6 +535,9 @@ def invert_restrictions(self, restrictions: Restrictions) -> Restrictions: right = restrictions[self._dim + self._shape.ndim :] return left + right + def convert(self, partition: PartitionBase) -> PartitionBase: + raise NonInvertibleError() + def convert_restrictions(self, restrictions: Restrictions) -> Restrictions: left = restrictions[: self._dim] right = restrictions[self._dim + 1 :] @@ -585,6 +577,18 @@ class TransformStackBase(TransformProto, Protocol): def bottom(self) -> bool: ... + def stack(self, transform: Transform) -> TransformStack: + ... + + def convert_partition(self, partition: PartitionBase) -> PartitionBase: + ... + + def _invert_partition(self, partition: PartitionBase) -> PartitionBase: + ... + + def invert_partition(self, partition: PartitionBase) -> PartitionBase: + ... + class TransformStack(TransformStackBase): def __init__( From 054a589fb29dbfc7be1c7d36bef1b7e5998c72e9 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Tue, 11 Oct 2022 18:20:24 -0700 Subject: [PATCH 013/121] More changes for provenance (#417) * Provenance for inline mappings and attach ops * Show provenance in the mapping failure * Minor tweak to the error message --- legate/core/_legion/operation.py | 15 +++++++++++++++ legate/core/store.py | 3 +++ src/core/mapping/base_mapper.cc | 20 ++++++++++++++------ typings/legion_cffi/lib.pyi | 3 +++ 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/legate/core/_legion/operation.py b/legate/core/_legion/operation.py index 2ccde2f5c..e07b4ba8b 100644 --- a/legate/core/_legion/operation.py +++ b/legate/core/_legion/operation.py @@ -39,6 +39,7 @@ def __init__( tag: int = 0, parent: Optional[Region] = None, coherence: int = legion.LEGION_EXCLUSIVE, + provenance: Optional[str] = None, ) -> None: """ An InlineMapping object provides a mechanism for creating a mapped @@ -87,6 +88,10 @@ def __init__( mapper, tag, ) + if provenance is not None: + legion.legion_inline_launcher_set_provenance( + self.launcher, provenance.encode() + ) self.region = region self._launcher = ffi.gc( self.launcher, legion.legion_inline_launcher_destroy @@ -1091,6 +1096,7 @@ def __init__( mapper: int = 0, tag: int = 0, read_only: bool = False, + provenance: Optional[str] = None, ) -> None: """ An Attach object provides a mechanism for attaching external data to @@ -1115,6 +1121,10 @@ def __init__( self.launcher = legion.legion_attach_launcher_create( region.handle, region.handle, legion.LEGION_EXTERNAL_INSTANCE ) + if provenance is not None: + legion.legion_attach_launcher_set_provenance( + self.launcher, provenance.encode() + ) self.region = region self._launcher = ffi.gc( self.launcher, legion.legion_attach_launcher_destroy @@ -1232,6 +1242,7 @@ def __init__( shard_local_data: dict[Region, Any], mapper: int = 0, tag: int = 0, + provenance: Optional[str] = None, ) -> None: """ A variant of Attach that allows attaching multiple pieces of external @@ -1260,6 +1271,10 @@ def __init__( legion.LEGION_EXTERNAL_INSTANCE, True, # restricted ) + if provenance is not None: + legion.legion_index_attach_launcher_set_provenance( + self.launcher, provenance.encode() + ) self._launcher = ffi.gc( self.launcher, legion.legion_index_attach_launcher_destroy ) diff --git a/legate/core/store.py b/legate/core/store.py index 26e648989..4c947829b 100644 --- a/legate/core/store.py +++ b/legate/core/store.py @@ -178,6 +178,7 @@ def record_detach(detach: Union[Detach, IndexDetach]) -> None: self.field.field_id, alloc, mapper=context.mapper_id, + provenance=context.provenance, ) # If we're not sharing then there is no need to map or restrict the # attachment @@ -229,6 +230,7 @@ def record_detach(detach: Union[Detach, IndexDetach]) -> None: self.field.field_id, shard_local_data, mapper=context.mapper_id, + provenance=context.provenance, ) index_attach.set_deduplicate_across_shards(True) # If we're not sharing there is no need to restrict the attachment @@ -265,6 +267,7 @@ def get_inline_mapped_region(self, context: Context) -> PhysicalRegion: self.region, self.field.field_id, mapper=context.mapper_id, + provenance=context.provenance, ) self.physical_region = runtime.dispatch(mapping) self.physical_region_mapped = True diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index 66c1f4f80..983043322 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -1152,27 +1152,31 @@ void BaseMapper::report_failed_mapping(const Mappable& mappable, REALM_MEMORY_KINDS(MEM_NAMES) #undef MEM_NAMES }; + std::string provenance = mappable.get_provenance_string(); + if (provenance.empty()) provenance = "unknown provenance"; switch (mappable.get_mappable_type()) { case Mappable::TASK_MAPPABLE: { const auto task = mappable.as_task(); if (redop > 0) logger.error( "Mapper %s failed to map reduction (%d) region " - "requirement %d of task %s (UID %lld) into %s memory " IDFMT, + "requirement %d of task %s [%s] (UID %lld) into %s memory " IDFMT, get_mapper_name(), redop, index, task->get_task_name(), + provenance.c_str(), mappable.get_unique_id(), memory_kinds[target_memory.kind()], target_memory.id); else logger.error( "Mapper %s failed to map region requirement %d of " - "task %s (UID %lld) into %s memory " IDFMT, + "task %s [%s] (UID %lld) into %s memory " IDFMT, get_mapper_name(), index, task->get_task_name(), + provenance.c_str(), mappable.get_unique_id(), memory_kinds[target_memory.kind()], target_memory.id); @@ -1182,19 +1186,21 @@ void BaseMapper::report_failed_mapping(const Mappable& mappable, if (redop > 0) logger.error( "Mapper %s failed to map reduction (%d) region " - "requirement %d of copy (UID %lld) into %s memory " IDFMT, + "requirement %d of copy [%s] (UID %lld) into %s memory " IDFMT, get_mapper_name(), redop, index, + provenance.c_str(), mappable.get_unique_id(), memory_kinds[target_memory.kind()], target_memory.id); else logger.error( "Mapper %s failed to map region requirement %d of " - "copy (UID %lld) into %s memory " IDFMT, + "copy [%s] (UID %lld) into %s memory " IDFMT, get_mapper_name(), index, + provenance.c_str(), mappable.get_unique_id(), memory_kinds[target_memory.kind()], target_memory.id); @@ -1204,19 +1210,21 @@ void BaseMapper::report_failed_mapping(const Mappable& mappable, if (redop > 0) logger.error( "Mapper %s failed to map reduction (%d) region " - "requirement %d of inline mapping (UID %lld) into %s memory " IDFMT, + "requirement %d of inline mapping [%s] (UID %lld) into %s memory " IDFMT, get_mapper_name(), redop, index, + provenance.c_str(), mappable.get_unique_id(), memory_kinds[target_memory.kind()], target_memory.id); else logger.error( "Mapper %s failed to map region requirement %d of " - "inline mapping (UID %lld) into %s memory " IDFMT, + "inline mapping [%s] (UID %lld) into %s memory " IDFMT, get_mapper_name(), index, + provenance.c_str(), mappable.get_unique_id(), memory_kinds[target_memory.kind()], target_memory.id); diff --git a/typings/legion_cffi/lib.pyi b/typings/legion_cffi/lib.pyi index 11b43a823..2326b327c 100644 --- a/typings/legion_cffi/lib.pyi +++ b/typings/legion_cffi/lib.pyi @@ -93,6 +93,7 @@ def legion_attach_launcher_destroy(*args: Any) -> Any: ... def legion_attach_launcher_execute(*args: Any) -> Any: ... def legion_attach_launcher_set_mapped(*args: Any) -> Any: ... def legion_attach_launcher_set_restricted(*args: Any) -> Any: ... +def legion_attach_launcher_set_provenance(*args: Any) -> Any: ... def legion_auto_generate_id(*args: Any) -> Any: ... def legion_context_consensus_match(*args: Any) -> Any: ... def legion_context_progress_unordered_operations(*args: Any) -> Any: ... @@ -166,6 +167,7 @@ def legion_index_attach_launcher_set_deduplicate_across_shards( *args: Any, ) -> Any: ... def legion_index_attach_launcher_set_restricted(*args: Any) -> Any: ... +def legion_index_attach_launcher_set_provenance(*args: Any) -> Any: ... def legion_index_copy_launcher_add_dst_field(*args: Any) -> Any: ... def legion_index_copy_launcher_add_dst_indirect_region_requirement_logical_partition( *args: Any, @@ -268,6 +270,7 @@ def legion_inline_launcher_add_field(*args: Any) -> Any: ... def legion_inline_launcher_create_logical_region(*args: Any) -> Any: ... def legion_inline_launcher_destroy(*args: Any) -> Any: ... def legion_inline_launcher_execute(*args: Any) -> Any: ... +def legion_inline_launcher_set_provenance(*args: Any) -> Any: ... def legion_issue_timing_op_seconds(*args: Any) -> Any: ... def legion_issue_timing_op_microseconds(*args: Any) -> Any: ... def legion_issue_timing_op_nanoseconds(*args: Any) -> Any: ... From 07d272313526810ebc5a8d8968f3b0db2deb55f5 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Wed, 12 Oct 2022 00:10:31 -0700 Subject: [PATCH 014/121] Handle scalar outputs correctly in manual tasks (#432) * Handle scalar outputs correctly in manual tasks * Increase the max return size --- legate/core/operation.py | 2 ++ src/core/task/task.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/legate/core/operation.py b/legate/core/operation.py index 324c341e3..b53612d5e 100644 --- a/legate/core/operation.py +++ b/legate/core/operation.py @@ -694,6 +694,7 @@ def add_output( ) if arg.kind is Future: self._scalar_outputs.append(len(self._outputs)) + self._outputs.append(arg) self._output_parts.append(arg.partition(REPLICATE)) else: self._output_parts.append(arg) @@ -709,6 +710,7 @@ def add_reduction( if isinstance(arg, Store): if arg.kind is Future: self._scalar_reductions.append(len(self._reductions)) + self._reductions.append((arg, redop)) self._reduction_parts.append((arg.partition(REPLICATE), redop)) else: self._reduction_parts.append((arg, redop)) diff --git a/src/core/task/task.h b/src/core/task/task.h index 904d71f51..f86e9987c 100644 --- a/src/core/task/task.h +++ b/src/core/task/task.h @@ -32,8 +32,8 @@ namespace legate { -// We're going to allow for each task to use only up to 170 scalar output stores -constexpr size_t LEGATE_MAX_SIZE_SCALAR_RETURN = 2048; +// We're going to allow for each task to use only up to 341 scalar output stores +constexpr size_t LEGATE_MAX_SIZE_SCALAR_RETURN = 4096; using LegateVariantImpl = void (*)(TaskContext&); From c304e1f834a40fac8ab376ccaddfb7aacd3fa3a5 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Wed, 12 Oct 2022 10:11:30 -0700 Subject: [PATCH 015/121] force CPM to download Legion if legion_dir or legion_src_dir is not explicitly provided (#411) Co-authored-by: Bryan Van de Ven --- cmake/thirdparty/get_legion.cmake | 20 +++++++++++--------- install.py | 4 +++- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/cmake/thirdparty/get_legion.cmake b/cmake/thirdparty/get_legion.cmake index 344fc2d2c..5faf54023 100644 --- a/cmake/thirdparty/get_legion.cmake +++ b/cmake/thirdparty/get_legion.cmake @@ -37,16 +37,18 @@ function(find_or_configure_legion) BUILD_EXPORT_SET legate-core-exports INSTALL_EXPORT_SET legate-core-exports) - # First try to find Legion via find_package() - # so the `Legion_USE_*` variables are visible - # Use QUIET find by default. - set(_find_mode QUIET) - # If Legion_DIR/Legion_ROOT are defined as something other than empty or NOTFOUND - # use a REQUIRED find so that the build does not silently download Legion. - if(Legion_DIR OR Legion_ROOT) - set(_find_mode REQUIRED) + if((NOT CPM_Legion_SOURCE) AND (NOT CPM_DOWNLOAD_Legion)) + # First try to find Legion via find_package() + # so the `Legion_USE_*` variables are visible + # Use QUIET find by default. + set(_find_mode QUIET) + # If Legion_DIR/Legion_ROOT are defined as something other than empty or NOTFOUND + # use a REQUIRED find so that the build does not silently download Legion. + if(Legion_DIR OR Legion_ROOT) + set(_find_mode REQUIRED) + endif() + rapids_find_package(Legion ${PKG_VERSION} EXACT CONFIG ${_find_mode} ${FIND_PKG_ARGS}) endif() - rapids_find_package(Legion ${PKG_VERSION} EXACT CONFIG ${_find_mode} ${FIND_PKG_ARGS}) if(Legion_FOUND) message(STATUS "CPM: using local package Legion@${PKG_VERSION}") diff --git a/install.py b/install.py index 6566ca68b..6cd0ca3a7 100755 --- a/install.py +++ b/install.py @@ -454,8 +454,10 @@ def validate_path(path): cmake_flags += ["-DThrust_ROOT=%s" % thrust_dir] if legion_dir: cmake_flags += ["-DLegion_ROOT=%s" % legion_dir] - if legion_src_dir: + elif legion_src_dir: cmake_flags += ["-DCPM_Legion_SOURCE=%s" % legion_src_dir] + else: + cmake_flags += ["-DCPM_DOWNLOAD_Legion=ON"] if legion_url: cmake_flags += ["-Dlegate_core_LEGION_REPOSITORY=%s" % legion_url] if legion_branch: From eb0fcc3bebb6b6682108cd9bad6d14f7e5ed277b Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Wed, 12 Oct 2022 11:21:01 -0700 Subject: [PATCH 016/121] remove --install-dir option (#430) * remove --install-dir option * remove --python-lib --- bin/legate | 23 ------------ install.py | 76 ++++++++++++--------------------------- legate/__main__.py | 24 ------------- legate/driver/__init__.py | 9 ++++- setup.py | 6 ++-- 5 files changed, 32 insertions(+), 106 deletions(-) delete mode 100644 bin/legate delete mode 100755 legate/__main__.py diff --git a/bin/legate b/bin/legate deleted file mode 100644 index caa4983ec..000000000 --- a/bin/legate +++ /dev/null @@ -1,23 +0,0 @@ -#! /usr/bin/env python3 -# -*- coding: utf-8 -*- -import re -import sys -import os -import pathlib - -prefix_dir = os.path.dirname(os.path.dirname(__file__)) -for path in sys.path[:]: # slice to void inf append loop - parts = pathlib.Path(path).parts - if "lib" in parts: - relative_path = parts[parts.index("lib"):] - new_prefix_path = os.path.join(prefix_dir, *relative_path) - if os.path.exists(new_prefix_path): - sys.path.append(new_prefix_path) - -from legate.driver import main - -if __name__ == '__main__': - # if legate is installed into a non-standard location, the legate - # libraries may not be available in the python import path - sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) - sys.exit(main(sys.argv)) diff --git a/install.py b/install.py index 6cd0ca3a7..6105411e3 100755 --- a/install.py +++ b/install.py @@ -19,7 +19,6 @@ import multiprocessing import os import platform -import re import shutil import subprocess import sys @@ -145,26 +144,26 @@ def was_previously_built_with_different_build_isolation( return False -def get_install_dir_or_default(install_dir): - # If no install dir was passed on the command line, infer the location - # of where to install the Legion Python bindings, otherwise they'll only - # be installed into the local scikit-build cmake-install dir - if install_dir is None: - # Install into conda prefix if defined - if "CONDA_PREFIX" in os.environ: - install_dir = os.environ["CONDA_PREFIX"] - else: - import site - - # Try to install into user site packages first? - if site.ENABLE_USER_SITE and os.path.exists( - site_pkgs := site.getusersitepackages() - ): - install_dir = site_pkgs - # Otherwise fallback to regular site-packages? - elif os.path.exists(site_pkgs := site.getsitepackages()): - install_dir = site_pkgs - return install_dir +def get_install_dir(): + # Infer the location where to install the Legion Python bindings, + # otherwise they'll only be installed into the local scikit-build + # cmake-install dir + + # Install into conda prefix if defined + if "CONDA_PREFIX" in os.environ: + return os.environ["CONDA_PREFIX"] + + import site + + # Try to install into user site packages first? + if site.ENABLE_USER_SITE and os.path.exists( + user_site_pkgs := site.getusersitepackages() + ): + return user_site_pkgs + + # Otherwise fallback to regular site-packages? + if os.path.exists(site_pkgs := site.getsitepackages()): + return site_pkgs def install_legion_python_bindings( @@ -246,9 +245,7 @@ def install( nccl_dir, cmake_exe, cmake_generator, - install_dir, gasnet_dir, - pylib_name, cuda_dir, maxdim, maxfields, @@ -291,9 +288,7 @@ def install( print("nccl_dir:", nccl_dir) print("cmake_exe:", cmake_exe) print("cmake_generator:", cmake_generator) - print("install_dir:", install_dir) print("gasnet_dir:", gasnet_dir) - print("pylib_name:", pylib_name) print("cuda_dir:", cuda_dir) print("maxdim:", maxdim) print("maxfields:", maxfields) @@ -319,14 +314,7 @@ def install( legate_core_dir = dirname(realpath(__file__)) - if pylib_name is None: - pyversion, pylib_name = find_active_python_version_and_path() - else: - f_name = os.path.split(pylib_name)[-1] - match = re.match(r"^libpython(\d\d?\.\d\d?)", f_name) - e = "Unable to get version from library name {}".format(pylib_name) - assert match, e - pyversion = match.group(1) + pyversion, pylib_name = find_active_python_version_and_path() print("Using python lib and version: {}, {}".format(pylib_name, pyversion)) def validate_path(path): @@ -388,7 +376,7 @@ def validate_path(path): except Exception: pass - install_dir = get_install_dir_or_default(validate_path(install_dir)) + install_dir = get_install_dir() if verbose: print("install_dir: ", install_dir) @@ -485,14 +473,6 @@ def validate_path(path): def driver(): parser = argparse.ArgumentParser(description="Install Legate front end.") - parser.add_argument( - "--install-dir", - dest="install_dir", - metavar="DIR", - required=False, - default=None, - help="Path to install all Legate-related software", - ) parser.add_argument( "--debug", dest="debug", @@ -630,18 +610,6 @@ def driver(): default=os.environ.get("NCCL_PATH"), help="Path to NCCL installation directory.", ) - parser.add_argument( - "--python-lib", - dest="pylib_name", - action="store", - required=False, - default=None, - help=( - "Build Legate against the specified Python shared library. " - "Default is to use the Python library currently executing this " - "install script." - ), - ) parser.add_argument( "--with-cmake", dest="cmake_exe", diff --git a/legate/__main__.py b/legate/__main__.py deleted file mode 100755 index e7cc19d7b..000000000 --- a/legate/__main__.py +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2021-2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -import sys - -from .driver import main - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/legate/driver/__init__.py b/legate/driver/__init__.py index b8496597d..67ce493b8 100644 --- a/legate/driver/__init__.py +++ b/legate/driver/__init__.py @@ -17,4 +17,11 @@ from .config import Config from .driver import Driver from .launcher import Launcher -from .main import main + + +def main() -> int: + import sys + + from .main import main as _main + + return _main(sys.argv) diff --git a/setup.py b/setup.py index a59dadd8f..24e358eb0 100755 --- a/setup.py +++ b/setup.py @@ -62,13 +62,11 @@ include_package_data=True, entry_points={ "console_scripts": [ + "legate = legate.driver:main", "lgpatch = legate.lgpatch:main", ], }, - scripts=[ - "bind.sh", - "bin/legate", - ], + scripts=["bind.sh"], cmdclass=versioneer.get_cmdclass(), install_requires=["numpy>=1.22"], zip_safe=False, From 1be0d978eb823ba233af4c35501d6c06bc7043be Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Wed, 12 Oct 2022 15:03:55 -0700 Subject: [PATCH 017/121] Fix GPU shard computation (#433) --- legate/tester/stages/_linux/gpu.py | 2 +- tests/unit/legate/tester/stages/_linux/test_gpu.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/legate/tester/stages/_linux/gpu.py b/legate/tester/stages/_linux/gpu.py index f1a222fc0..64f625c00 100644 --- a/legate/tester/stages/_linux/gpu.py +++ b/legate/tester/stages/_linux/gpu.py @@ -69,7 +69,7 @@ def compute_spec(self, config: Config, system: TestSystem) -> StageSpec: N = len(system.gpus) degree = N // config.gpus - fbsize = min(gpu.total for gpu in system.gpus) / (2 << 20) # MB + fbsize = min(gpu.total for gpu in system.gpus) / (1 << 20) # MB oversub_factor = int(fbsize // (config.fbmem * BLOAT_FACTOR)) workers = adjust_workers( degree * oversub_factor, config.requested_workers diff --git a/tests/unit/legate/tester/stages/_linux/test_gpu.py b/tests/unit/legate/tester/stages/_linux/test_gpu.py index df1441c65..8d792b7b3 100644 --- a/tests/unit/legate/tester/stages/_linux/test_gpu.py +++ b/tests/unit/legate/tester/stages/_linux/test_gpu.py @@ -55,16 +55,16 @@ def test_spec_with_gpus_1() -> None: c = Config(["test.py", "--gpus", "1"]) s = FakeSystem() stage = m.GPU(c, s) - assert stage.spec.workers == 12 - assert stage.spec.shards == [(0,), (1,), (2,), (3,), (4,), (5,)] * 12 + assert stage.spec.workers == 24 + assert stage.spec.shards == [(0,), (1,), (2,), (3,), (4,), (5,)] * 24 def test_spec_with_gpus_2() -> None: c = Config(["test.py", "--gpus", "2"]) s = FakeSystem() stage = m.GPU(c, s) - assert stage.spec.workers == 6 - assert stage.spec.shards == [(0, 1), (2, 3), (4, 5)] * 6 + assert stage.spec.workers == 12 + assert stage.spec.shards == [(0, 1), (2, 3), (4, 5)] * 12 def test_spec_with_requested_workers() -> None: From b2b6228cc30b58b72b6644a09866bb016063bc74 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Wed, 12 Oct 2022 15:28:08 -0700 Subject: [PATCH 018/121] Only set default CMake generator if Ninja is available: Issue #374 (#379) * Only set default generator if Ninja is available * Address PR comments, fix typos Co-authored-by: Manolis Papadakis Co-authored-by: Manolis Papadakis --- install.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/install.py b/install.py index 6105411e3..93cd215d2 100755 --- a/install.py +++ b/install.py @@ -401,7 +401,7 @@ def validate_path(path): cmake_flags = [] if cmake_generator: - cmake_flags += [f"-G{cmake_generator}"] + cmake_flags += [f"-G'{cmake_generator}'"] if debug or verbose: cmake_flags += ["--log-level=%s" % ("DEBUG" if debug else "VERBOSE")] @@ -622,8 +622,8 @@ def driver(): "--cmake-generator", dest="cmake_generator", required=False, - default="Ninja", - choices=["Ninja", "Unix Makefiles"], + default=(None if shutil.which("ninja") is None else "Ninja"), + choices=["Ninja", "Unix Makefiles", None], help="The CMake makefiles generator", ) parser.add_argument( From c48b62cf427a05027bc75ea9a97149b8624362ef Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Wed, 12 Oct 2022 15:59:29 -0700 Subject: [PATCH 019/121] . (#434) --- legate/util/fs.py | 80 ++++++++++++++------------- tests/unit/legate/driver/test_main.py | 5 +- 2 files changed, 45 insertions(+), 40 deletions(-) diff --git a/legate/util/fs.py b/legate/util/fs.py index e05e15279..e3ea9e958 100644 --- a/legate/util/fs.py +++ b/legate/util/fs.py @@ -281,50 +281,52 @@ def installed_legion_paths(legion_dir: Path) -> LegionPaths: cmake_cache_txt = legate_build_dir / "CMakeCache.txt" try: - # Test whether Legion_DIR is set. If it isn't, then we built Legion as - # a side-effect of building legate_core - read_cmake_cache_value( - cmake_cache_txt, "Legion_DIR:PATH=Legion_DIR-NOTFOUND" - ) - except Exception: - # If Legion_DIR is a valid path, check whether it's a - # Legion build dir, i.e. `-D Legion_ROOT=/legion/build` - legion_dir = Path( - read_cmake_cache_value(cmake_cache_txt, "Legion_DIR:PATH=") - ) - if legion_dir.joinpath("CMakeCache.txt").exists(): - cmake_cache_txt = legion_dir / "CMakeCache.txt" - - try: - # If Legion_SOURCE_DIR and Legion_BINARY_DIR are in CMakeCache.txt, - # return the paths to Legion in the legate_core build dir. - legion_source_dir = Path( + try: + # Test whether Legion_DIR is set. If it isn't, then we built + # Legion as a side-effect of building legate_core read_cmake_cache_value( - cmake_cache_txt, "Legion_SOURCE_DIR:STATIC=" + cmake_cache_txt, "Legion_DIR:PATH=Legion_DIR-NOTFOUND" ) - ) - legion_binary_dir = Path( - read_cmake_cache_value( - cmake_cache_txt, "Legion_BINARY_DIR:STATIC=" + except Exception: + # If Legion_DIR is a valid path, check whether it's a + # Legion build dir, i.e. `-D Legion_ROOT=/legion/build` + legion_dir = Path( + read_cmake_cache_value(cmake_cache_txt, "Legion_DIR:PATH=") ) - ) + if legion_dir.joinpath("CMakeCache.txt").exists(): + cmake_cache_txt = legion_dir / "CMakeCache.txt" - legion_runtime_dir = legion_binary_dir / "runtime" - legion_bindings_dir = legion_source_dir / "bindings" - - return LegionPaths( - legion_bin_path=legion_binary_dir / "bin", - legion_lib_path=legion_binary_dir / "lib", - realm_defines_h=legion_runtime_dir / "realm_defines.h", - legion_defines_h=legion_runtime_dir / "legion_defines.h", - legion_spy_py=legion_source_dir / "tools" / "legion_spy.py", - legion_prof_py=legion_source_dir / "tools" / "legion_prof.py", - legion_python=legion_binary_dir / "bin" / "legion_python", - legion_module=legion_bindings_dir / "python" / "build" / "lib", - legion_jupyter_module=legion_source_dir / "jupyter_notebook", - ) except Exception: - pass + try: + # If Legion_SOURCE_DIR and Legion_BINARY_DIR are in CMakeCache.txt, + # return the paths to Legion in the legate_core build dir. + legion_source_dir = Path( + read_cmake_cache_value( + cmake_cache_txt, "Legion_SOURCE_DIR:STATIC=" + ) + ) + legion_binary_dir = Path( + read_cmake_cache_value( + cmake_cache_txt, "Legion_BINARY_DIR:STATIC=" + ) + ) + + legion_runtime_dir = legion_binary_dir / "runtime" + legion_bindings_dir = legion_source_dir / "bindings" + + return LegionPaths( + legion_bin_path=legion_binary_dir / "bin", + legion_lib_path=legion_binary_dir / "lib", + realm_defines_h=legion_runtime_dir / "realm_defines.h", + legion_defines_h=legion_runtime_dir / "legion_defines.h", + legion_spy_py=legion_source_dir / "tools" / "legion_spy.py", + legion_prof_py=legion_source_dir / "tools" / "legion_prof.py", + legion_python=legion_binary_dir / "bin" / "legion_python", + legion_module=legion_bindings_dir / "python" / "build" / "lib", + legion_jupyter_module=legion_source_dir / "jupyter_notebook", + ) + except Exception: + pass # Otherwise return the installation paths. return installed_legion_paths(Path(sys.argv[0]).parents[1]) diff --git a/tests/unit/legate/driver/test_main.py b/tests/unit/legate/driver/test_main.py index 4c0260abb..0992a226b 100644 --- a/tests/unit/legate/driver/test_main.py +++ b/tests/unit/legate/driver/test_main.py @@ -14,6 +14,8 @@ # from __future__ import annotations +import sys + from pytest_mock import MockerFixture import legate.driver as m @@ -34,8 +36,9 @@ def test_main(mocker: MockerFixture) -> None: system_spy = mocker.spy(legate.util.system.System, "__init__") driver_spy = mocker.spy(legate.driver.driver.Driver, "__init__") mocker.patch("legate.driver.driver.Driver.run", return_value=123) + mocker.patch.object(sys, "argv", ["foo", "bar"]) - result = m.main(["foo", "bar"]) + result = m.main() assert config_spy.call_count == 1 assert config_spy.call_args[0][1:] == (["foo", "bar"],) From 9f3894dff7010b02fc6bb8d771c4cf4c3680ca3a Mon Sep 17 00:00:00 2001 From: Jeremy Date: Thu, 13 Oct 2022 13:45:21 -0700 Subject: [PATCH 020/121] Allow only one of --legion-dir and --legion-src-dir (#387) * skip finding legion installation if given legion source override * check that only one of legion-dir and legion-src-dir is given * Revert "skip finding legion installation if given legion source override" This reverts commit 80a0f7ebdd7b6cc9eddfe3a8e4ebacfd08928845. Co-authored-by: Manolis Papadakis --- install.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/install.py b/install.py index 93cd215d2..e3303ae72 100755 --- a/install.py +++ b/install.py @@ -274,6 +274,9 @@ def install( if clean_first is None: clean_first = not editable + if legion_dir is not None and legion_src_dir is not None: + sys.exit("Cannot specify both --legion-dir and --legion-src-dir") + print("Verbose build is ", "on" if verbose else "off") if verbose: print("networks:", networks) From db41dbb0e578cc74b25c917cd6de6f9c66dcee74 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Thu, 13 Oct 2022 15:28:41 -0700 Subject: [PATCH 021/121] Report pytest exit code on fail (#436) * report process exit code on test failure * report process exit code on test failure --- legate/tester/stages/util.py | 2 +- legate/util/ui.py | 10 +- tests/unit/legate/tester/stages/test_util.py | 109 +++++++++++++++++++ tests/unit/legate/util/test_ui.py | 32 ++++++ 4 files changed, 149 insertions(+), 4 deletions(-) diff --git a/legate/tester/stages/util.py b/legate/tester/stages/util.py index 2d6514877..27d53bbd1 100644 --- a/legate/tester/stages/util.py +++ b/legate/tester/stages/util.py @@ -112,4 +112,4 @@ def log_proc( elif proc.returncode == 0: LOG(passed(msg, details=details)) else: - LOG(failed(msg, details=details)) + LOG(failed(msg, details=details, exit_code=proc.returncode)) diff --git a/legate/util/ui.py b/legate/util/ui.py index 9cf74b094..2e4af8fbc 100644 --- a/legate/util/ui.py +++ b/legate/util/ui.py @@ -110,7 +110,9 @@ def error(text: str) -> str: return red(f"ERROR: {text}") -def failed(msg: str, *, details: Details | None = None) -> str: +def failed( + msg: str, *, details: Details | None = None, exit_code: int | None = None +) -> str: """Report a failed test result with a bright red [FAIL]. Parameters @@ -122,9 +124,11 @@ def failed(msg: str, *, details: Details | None = None) -> str: A sequenece of text lines to diplay below the ``msg`` line """ + fail = f"{bright(red('[FAIL]'))}" + exit = f"{bright(white(f' (exit: {exit_code}) '))}" if exit_code else "" if details: - return f"{bright(red('[FAIL]'))} {msg}\n{_format_details(details)}" - return f"{bright(red('[FAIL]'))} {msg}" + return f"{fail} {msg}{exit}\n{_format_details(details)}" + return f"{fail} {msg}{exit}" def passed(msg: str, *, details: Details | None = None) -> str: diff --git a/tests/unit/legate/tester/stages/test_util.py b/tests/unit/legate/tester/stages/test_util.py index b4c528d06..f97174de8 100644 --- a/tests/unit/legate/tester/stages/test_util.py +++ b/tests/unit/legate/tester/stages/test_util.py @@ -17,9 +17,26 @@ """ from __future__ import annotations +from pathlib import Path + import pytest +from legate.tester.config import Config +from legate.tester.logger import LOG from legate.tester.stages import util as m +from legate.tester.test_system import ProcessResult +from legate.util.ui import failed, passed, shell, skipped + + +def test_StageResult() -> None: + procs = [ProcessResult(f"run{i}", Path(f"test{i}")) for i in range(10)] + procs[2].returncode = 10 + procs[7].returncode = -2 + + result = m.StageResult(procs=procs, time=0) + + assert result.total == 10 + assert result.passed == 8 class Test_adjust_workers: @@ -46,3 +63,95 @@ def test_zero_computed(self) -> None: def test_requested_too_large(self) -> None: with pytest.raises(RuntimeError): assert m.adjust_workers(10, 11) + + +class Test_log_proc: + @pytest.mark.parametrize("returncode", (-23, -1, 0, 1, 17)) + def test_skipped(self, returncode) -> None: + config = Config([]) + proc = ProcessResult( + "proc", Path("proc"), skipped=True, returncode=returncode + ) + + LOG.clear() + m.log_proc("foo", proc, config, verbose=False) + + assert LOG.lines == (skipped(f"(foo) {proc.test_file}"),) + + def test_passed(self) -> None: + config = Config([]) + proc = ProcessResult("proc", Path("proc")) + + LOG.clear() + m.log_proc("foo", proc, config, verbose=False) + + assert LOG.lines == (passed(f"(foo) {proc.test_file}"),) + + def test_passed_verbose(self) -> None: + config = Config([]) + proc = ProcessResult("proc", Path("proc"), output="foo\nbar") + details = proc.output.split("\n") + + LOG.clear() + m.log_proc("foo", proc, config, verbose=True) + + assert LOG.lines == tuple( + passed(f"(foo) {proc.test_file}", details=details).split("\n") + ) + + @pytest.mark.parametrize("returncode", (-23, -1, 1, 17)) + def test_failed(self, returncode) -> None: + config = Config([]) + proc = ProcessResult("proc", Path("proc"), returncode=returncode) + + LOG.clear() + m.log_proc("foo", proc, config, verbose=False) + + assert LOG.lines == ( + failed(f"(foo) {proc.test_file}", exit_code=returncode), + ) + + @pytest.mark.parametrize("returncode", (-23, -1, 1, 17)) + def test_failed_verbose(self, returncode) -> None: + config = Config([]) + proc = ProcessResult( + "proc", Path("proc"), returncode=returncode, output="foo\nbar" + ) + details = proc.output.split("\n") + + LOG.clear() + m.log_proc("foo", proc, config, verbose=True) + + assert LOG.lines == tuple( + failed( + f"(foo) {proc.test_file}", + details=details, + exit_code=returncode, + ).split("\n") + ) + + def test_dry_run(self) -> None: + config = Config([]) + config.dry_run = True + proc = ProcessResult("proc", Path("proc")) + + LOG.clear() + m.log_proc("foo", proc, config, verbose=False) + + assert LOG.lines == ( + shell(proc.invocation), + passed(f"(foo) {proc.test_file}"), + ) + + def test_debug(self) -> None: + config = Config([]) + config.debug = True + proc = ProcessResult("proc", Path("proc")) + + LOG.clear() + m.log_proc("foo", proc, config, verbose=False) + + assert LOG.lines == ( + shell(proc.invocation), + passed(f"(foo) {proc.test_file}"), + ) diff --git a/tests/unit/legate/util/test_ui.py b/tests/unit/legate/util/test_ui.py index a9ac7d890..4603c053c 100644 --- a/tests/unit/legate/util/test_ui.py +++ b/tests/unit/legate/util/test_ui.py @@ -320,6 +320,19 @@ def test_failed_plain(use_plain_text: UsePlainTextFixture) -> None: assert m.failed("msg") == "[FAIL] msg" +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_failed_with_exit_code() -> None: + fail = colors.bright(colors.red("[FAIL]")) + exit = colors.bright(colors.white(" (exit: 10) ")) + assert m.failed("msg", exit_code=10) == f"{fail} msg{exit}" # noqa + + +def test_failed_with_exit_code_plain( + use_plain_text: UsePlainTextFixture, +) -> None: + assert m.failed("msg", exit_code=10) == "[FAIL] msg (exit: 10) " + + @pytest.mark.skipif(colorama is None, reason="colorama required") def test_failed_with_details() -> None: assert ( @@ -334,6 +347,25 @@ def test_failed_with_details_plain( assert m.failed("msg", details=["a", "b"]) == "[FAIL] msg\n a\n b" +@pytest.mark.skipif(colorama is None, reason="colorama required") +def test_failed_with_details_and_exit_code() -> None: + fail = colors.bright(colors.red("[FAIL]")) + exit = colors.bright(colors.white(" (exit: 10) ")) + assert ( + m.failed("msg", details=["a", "b"], exit_code=10) + == f"{fail} msg{exit}\n a\n b" + ) + + +def test_failed_with_details_and_exit_code_plain( + use_plain_text: UsePlainTextFixture, +) -> None: + assert ( + m.failed("msg", details=["a", "b"], exit_code=10) + == "[FAIL] msg (exit: 10) \n a\n b" + ) + + @pytest.mark.skipif(colorama is None, reason="colorama required") def test_skipped() -> None: assert m.skipped("msg") == f"{colors.cyan('[SKIP]')} msg" From 39a3b4c9f0e9c9b5e4e4dd39ffe2e9394f7c6df1 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Thu, 13 Oct 2022 23:30:01 -0700 Subject: [PATCH 022/121] API to declare tasks with side effects (#437) --- legate/core/operation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/legate/core/operation.py b/legate/core/operation.py index b53612d5e..a3b2aef17 100644 --- a/legate/core/operation.py +++ b/legate/core/operation.py @@ -245,6 +245,7 @@ def __init__( self._comm_args: list[Communicator] = [] self._exn_types: list[type] = [] self._tb: Union[None, TracebackType] = None + self._side_effect = False @property def uses_communicator(self) -> bool: @@ -557,6 +558,7 @@ def launch(self, strategy: Strategy) -> None: self.context, self._task_id, self.mapper_id, + side_effect=self._side_effect, provenance=self.provenance, ) @@ -742,8 +744,9 @@ def launch(self, strategy: Strategy) -> None: self.context, self._task_id, self.mapper_id, - error_on_interference=False, tag=tag, + error_on_interference=False, + side_effect=self._side_effect, provenance=self.provenance, ) From 349182ba218603fcf43c73ed1c4d489f94084097 Mon Sep 17 00:00:00 2001 From: Rohan Yadav Date: Fri, 14 Oct 2022 11:40:35 -0700 Subject: [PATCH 023/121] legate/util: fix a mypy error on MacOS (#438) Fixes #435. Signed-off-by: Rohan Yadav Signed-off-by: Rohan Yadav --- legate/util/system.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/legate/util/system.py b/legate/util/system.py index 702514cc2..5fbabe1d0 100644 --- a/legate/util/system.py +++ b/legate/util/system.py @@ -90,16 +90,20 @@ def cpus(self) -> tuple[CPUInfo, ...]: if sys.platform == "darwin": return tuple(CPUInfo((i,)) for i in range(N)) - - sibling_sets: set[tuple[int, ...]] = set() - for i in range(N): - line = open( - f"/sys/devices/system/cpu/cpu{i}/topology/thread_siblings_list" - ).read() - sibling_sets.add( - tuple(sorted(int(x) for x in line.strip().split(","))) + else: + # This explicit else is needed for mypy to not raise a type + # error on MacOS. + sibling_sets: set[tuple[int, ...]] = set() + for i in range(N): + line = open( + f"/sys/devices/system/cpu/cpu{i}/topology/thread_siblings_list" # noqa E501 + ).read() + sibling_sets.add( + tuple(sorted(int(x) for x in line.strip().split(","))) + ) + return tuple( + CPUInfo(siblings) for siblings in sorted(sibling_sets) ) - return tuple(CPUInfo(siblings) for siblings in sorted(sibling_sets)) @cached_property def gpus(self) -> tuple[GPUInfo, ...]: From ad493f111c96a5d008c809f740a75c0995807575 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Mon, 17 Oct 2022 12:49:51 -0700 Subject: [PATCH 024/121] Improvements to legate.jupyter (#425) * Improvements to legate.jupyter * use absolute paths * define entry point * update docs * always print minimal status * first batch of tests * add kernelspec tests * Apply suggestions from code review * Update README.md --- README.md | 68 ++++--- legate/driver/args.py | 176 ++++-------------- legate/driver/command.py | 54 +++--- legate/driver/config.py | 24 ++- legate/driver/driver.py | 4 +- legate/driver/launcher.py | 16 +- legate/driver/logs.py | 8 +- legate/jupyter/__init__.py | 22 ++- legate/jupyter/__main__.py | 39 ---- legate/jupyter/_legion_kernel.py | 60 +++++++ legate/jupyter/_magic_cmd.py | 78 -------- legate/jupyter/args.py | 107 +++++++++++ legate/jupyter/config.py | 87 +++++++++ legate/jupyter/kernel.py | 128 +++++++++++++ legate/jupyter/magic.py | 103 +++++++++++ legate/jupyter/main.py | 37 ++++ legate/tester/config.py | 4 +- legate/util/args.py | 2 +- legate/util/shared_args.py | 207 ++++++++++++++++++++++ setup.py | 1 + tests/unit/legate/driver/test_args.py | 4 - tests/unit/legate/driver/test_driver.py | 2 +- tests/unit/legate/driver/test_launcher.py | 2 +- tests/unit/legate/driver/test_main.py | 2 +- tests/unit/legate/jupyter/__init__.py | 15 ++ tests/unit/legate/jupyter/test_args.py | 104 +++++++++++ tests/unit/legate/jupyter/test_config.py | 129 ++++++++++++++ tests/unit/legate/jupyter/test_kernel.py | 172 ++++++++++++++++++ tests/unit/legate/jupyter/test_main.py | 73 ++++++++ tests/unit/legate/util/test_args.py | 13 +- typings/IPython/__init__.pyi | 20 +++ typings/IPython/core/magic.pyi | 28 +++ typings/jupyter_client/__init__.pyi | 0 typings/jupyter_client/kernelspec.pyi | 40 +++++ 34 files changed, 1469 insertions(+), 360 deletions(-) delete mode 100644 legate/jupyter/__main__.py create mode 100644 legate/jupyter/_legion_kernel.py delete mode 100644 legate/jupyter/_magic_cmd.py create mode 100755 legate/jupyter/args.py create mode 100644 legate/jupyter/config.py create mode 100644 legate/jupyter/kernel.py create mode 100644 legate/jupyter/magic.py create mode 100644 legate/jupyter/main.py create mode 100644 legate/util/shared_args.py create mode 100644 tests/unit/legate/jupyter/__init__.py create mode 100644 tests/unit/legate/jupyter/test_args.py create mode 100644 tests/unit/legate/jupyter/test_config.py create mode 100644 tests/unit/legate/jupyter/test_kernel.py create mode 100644 tests/unit/legate/jupyter/test_main.py create mode 100644 typings/IPython/__init__.pyi create mode 100644 typings/IPython/core/magic.pyi create mode 100644 typings/jupyter_client/__init__.pyi create mode 100644 typings/jupyter_client/kernelspec.pyi diff --git a/README.md b/README.md index 019655092..ff1142695 100644 --- a/README.md +++ b/README.md @@ -452,28 +452,22 @@ that can adversely effect the performance of the application. Same as normal Python programs, Legate programs can be run using Jupyter Notebook. Currently we support single node execution with multiple CPUs and GPUs, and plan to support multi-node execution in the future. -We leverage Legion's Jupyter support, so you may want to refer to the +We leverage Legion's Jupyter support, so you may want to refer to the [relevant section in Legion's README](https://github.com/StanfordLegion/legion/blob/master/jupyter_notebook/README.md). -To simplify the installation, we provide a script specifically for Legate libraries. +To simplify the installation, we provide a script specifically for Legate libraries. ### Installation of the Legate IPython Kernel -Please install Legate, then run the following command to install the IPython -kernel: +Please install Legate, then run the following command to install a default +Jupyter kernel: ``` -python -m legate.jupyter --json=legate_jupyter.json +legate-jupyter ``` -If `--json=` is not provided, the installation script will look for a file -named `legate_jupyter.json` in the current directory. A sample -`legate_jupyter.json` file is provided in the legate.core source directory. - If installation is successful, you will see some output like the following: ``` -IPython kernel: legate_kernel_nocr(Legate_SM_GPU) has been installed +Jupyter kernel spec Legate_SM_GPU (Legate_SM_GPU) has been installed ``` -`Legate_SM_GPU` is the kernel name, and you will need to provide it -when starting the Jupyter Notebook. `SM` means the kernel is only for -shared memory execution; `GPU` means GPU support is enabled. +`Legate_SM_GPU` is the default kernel name. ### Running with Jupyter Notebook @@ -486,22 +480,13 @@ the Legion Jupyter Notebook extension: ### Configuring the Jupyter Notebook -The Legate IPython kernel is configured according to the json file provided at -install time. Here is an example of an entry in the json file: +The Legate Jupyter kernel is configured according to the command line arguments +provided at install time. Standard `legate` options for Core, Memory, and +Mult-node configuration may be provided, as well as a name for the kernel: ``` -"cpus": { - "cmd": "--cpus", - "value": 1 -} +legate-jupyter --name legate_cpus_2 --cpus 2 ``` -* `cpus` is the name of the field. - -* `cmd` is used to tell Jupyter how to pass the value for that field to Legate through the -CLI, in this case using `--cpus` to set the number of CPUs. - -* `value` is the value of the field. - -Other configuration options can be added by using the `other_options` field of the json file. +Other configuration options can be seen by using the `--help` command line option. ### Magic Command @@ -509,17 +494,24 @@ We provide a Jupyter magic command to display the IPython kernel configuration. ``` %load_ext legate.jupyter %legate_info -Number of CPUs to use per rank: 4 -Number of GPUs to use per rank: 1 -Number of OpenMP groups to use per rank: 0 -Number of threads per OpenMP group: 4 -Number of Utility processors per rank: 2 -Amount of DRAM memory per rank (in MBs): 4000 -Amount of DRAM memory per NUMA domain per rank (in MBs): 0 -Amount of framebuffer memory per GPU (in MBs): 4000 -Amount of zero-copy memory per rank (in MBs): 32 -Amount of registered CPU-side pinned memory per rank (in MBs): 0 -Number of nodes to use: 1 +``` +results in output: +``` +Kernel 'Legate_SM_GPU' configured for 1 node(s) + +Cores: + CPUs to use per rank : 4 + GPUs to use per rank : 0 + OpenMP groups to use per rank : 0 + Threads per OpenMP group : 4 + Utility processors per rank : 2 + +Memory: + DRAM memory per rank (in MBs) : 4000 + DRAM memory per NUMA domain per rank (in MBs) : 0 + Framebuffer memory per GPU (in MBs) : 4000 + Zero-copy memory per rank (in MBs) : 32 + Registered CPU-side pinned memory per rank (in MBs) : 0 ``` ## Other FAQs diff --git a/legate/driver/args.py b/legate/driver/args.py index 739722170..cc8667384 100755 --- a/legate/driver/args.py +++ b/legate/driver/args.py @@ -18,13 +18,27 @@ from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser -from ..util.types import LauncherType +from ..util.shared_args import ( + CPUS, + FBMEM, + GPUS, + LAUNCHER, + LAUNCHER_EXTRA, + NOCR, + NODES, + NUMAMEM, + OMPS, + OMPTHREADS, + RANKS_PER_NODE, + REGMEM, + SYSMEM, + UTILITY, + ZCMEM, +) from . import defaults __all__ = ("parser",) -LAUNCHERS: tuple[LauncherType, ...] = ("mpirun", "jsrun", "srun", "none") - parser = ArgumentParser( description="Legate Driver", allow_abbrev=False, @@ -33,58 +47,11 @@ multi_node = parser.add_argument_group("Multi-node configuration") - - -multi_node.add_argument( - "--nodes", - type=int, - default=defaults.LEGATE_NODES, - dest="nodes", - help="Number of nodes to use", -) - - -multi_node.add_argument( - "--ranks-per-node", - type=int, - default=defaults.LEGATE_RANKS_PER_NODE, - dest="ranks_per_node", - help="Number of ranks (processes running copies of the program) to " - "launch per node. The default (1 rank per node) will typically result " - "in the best performance.", -) - - -multi_node.add_argument( - "--no-replicate", - dest="not_control_replicable", - action="store_true", - required=False, - help="Execute this program without control replication. Most of the " - "time, this is not recommended. This option should be used for " - "debugging. The -lg:safe_ctrlrepl Legion option may be helpful " - "with discovering issues with replicated control.", -) - -multi_node.add_argument( - "--launcher", - dest="launcher", - choices=LAUNCHERS, - default="none", - help='launcher program to use (set to "none" for local runs, or if ' - "the launch has already happened by the time legate is invoked)", -) - - -multi_node.add_argument( - "--launcher-extra", - dest="launcher_extra", - action="append", - default=[], - required=False, - help="additional argument to pass to the launcher (can appear more " - "than once)", -) +multi_node.add_argument(NODES.name, **NODES.kwargs) +multi_node.add_argument(RANKS_PER_NODE.name, **RANKS_PER_NODE.kwargs) +multi_node.add_argument(NOCR.name, **NOCR.kwargs) +multi_node.add_argument(LAUNCHER.name, **LAUNCHER.kwargs) +multi_node.add_argument(LAUNCHER_EXTRA.name, **LAUNCHER_EXTRA.kwargs) binding = parser.add_argument_group("Hardware binding") @@ -124,98 +91,19 @@ core = parser.add_argument_group("Core alloction") - - -core.add_argument( - "--cpus", - type=int, - default=defaults.LEGATE_CPUS, - dest="cpus", - help="Number of CPUs to use per rank", -) - - -core.add_argument( - "--gpus", - type=int, - default=defaults.LEGATE_GPUS, - dest="gpus", - help="Number of GPUs to use per rank", -) - - -core.add_argument( - "--omps", - type=int, - default=defaults.LEGATE_OMP_PROCS, - dest="openmp", - help="Number of OpenMP groups to use per rank", -) - - -core.add_argument( - "--ompthreads", - type=int, - default=defaults.LEGATE_OMP_THREADS, - dest="ompthreads", - help="Number of threads per OpenMP group", -) - - -core.add_argument( - "--utility", - type=int, - default=defaults.LEGATE_UTILITY_CORES, - dest="utility", - help="Number of Utility processors per rank to request for meta-work", -) +core.add_argument(CPUS.name, **CPUS.kwargs) +core.add_argument(GPUS.name, **GPUS.kwargs) +core.add_argument(OMPS.name, **OMPS.kwargs) +core.add_argument(OMPTHREADS.name, **OMPTHREADS.kwargs) +core.add_argument(UTILITY.name, **UTILITY.kwargs) memory = parser.add_argument_group("Memory alloction") - -memory.add_argument( - "--sysmem", - type=int, - default=defaults.LEGATE_SYSMEM, - dest="sysmem", - help="Amount of DRAM memory per rank (in MBs)", -) - - -memory.add_argument( - "--numamem", - type=int, - default=defaults.LEGATE_NUMAMEM, - dest="numamem", - help="Amount of DRAM memory per NUMA domain per rank (in MBs)", -) - - -memory.add_argument( - "--fbmem", - type=int, - default=defaults.LEGATE_FBMEM, - dest="fbmem", - help="Amount of framebuffer memory per GPU (in MBs)", -) - - -memory.add_argument( - "--zcmem", - type=int, - default=defaults.LEGATE_ZCMEM, - dest="zcmem", - help="Amount of zero-copy memory per rank (in MBs)", -) - - -memory.add_argument( - "--regmem", - type=int, - default=defaults.LEGATE_REGMEM, - dest="regmem", - help="Amount of registered CPU-side pinned memory per rank (in MBs)", -) +memory.add_argument(SYSMEM.name, **SYSMEM.kwargs) +memory.add_argument(NUMAMEM.name, **NUMAMEM.kwargs) +memory.add_argument(FBMEM.name, **FBMEM.kwargs) +memory.add_argument(ZCMEM.name, **ZCMEM.kwargs) +memory.add_argument(REGMEM.name, **REGMEM.kwargs) # FIXME: We set the eager pool size to 50% of the total size for now. diff --git a/legate/driver/command.py b/legate/driver/command.py index 7d11c2c9b..f45a10c7c 100644 --- a/legate/driver/command.py +++ b/legate/driver/command.py @@ -21,14 +21,14 @@ if TYPE_CHECKING: from ..util.system import System from ..util.types import CommandPart - from .config import Config + from .config import ConfigProtocol from .launcher import Launcher __all__ = ("CMD_PARTS",) def cmd_bind( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: cpu_bind = config.binding.cpu_bind mem_bind = config.binding.mem_bind @@ -69,7 +69,9 @@ def check_bind_ranks(name: str, binding: str) -> None: return opts -def cmd_gdb(config: Config, system: System, launcher: Launcher) -> CommandPart: +def cmd_gdb( + config: ConfigProtocol, system: System, launcher: Launcher +) -> CommandPart: if not config.debugging.gdb: return () @@ -81,7 +83,7 @@ def cmd_gdb(config: Config, system: System, launcher: Launcher) -> CommandPart: def cmd_cuda_gdb( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: if not config.debugging.cuda_gdb: return () @@ -94,7 +96,7 @@ def cmd_cuda_gdb( def cmd_nvprof( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: if not config.profiling.nvprof: return () @@ -105,7 +107,7 @@ def cmd_nvprof( def cmd_nsys( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: if not config.profiling.nsys: return () @@ -123,7 +125,7 @@ def cmd_nsys( def cmd_memcheck( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: memcheck = config.debugging.memcheck @@ -131,7 +133,7 @@ def cmd_memcheck( def cmd_nocr( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: control_replicable = not config.multi_node.not_control_replicable @@ -139,7 +141,7 @@ def cmd_nocr( def cmd_module( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: module = config.other.module @@ -147,26 +149,26 @@ def cmd_module( def cmd_rlwrap( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: return ("rlwrap",) if config.other.rlwrap else () def cmd_legion( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: return (str(system.legion_paths.legion_python),) def cmd_processor( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: # We always need one python processor per rank and no local fields return ("-ll:py", "1", "-lg:local", "0") def cmd_kthreads( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: freeze_on_error = config.debugging.freeze_on_error gdb = config.debugging.gdb @@ -181,7 +183,7 @@ def cmd_kthreads( def cmd_cpus( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: cpus = config.core.cpus @@ -189,7 +191,7 @@ def cmd_cpus( def cmd_gpus( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: gpus = config.core.gpus @@ -198,7 +200,7 @@ def cmd_gpus( def cmd_openmp( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: openmp = config.core.openmp ompthreads = config.core.ompthreads @@ -228,7 +230,7 @@ def cmd_openmp( def cmd_utility( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: utility = config.core.utility ranks = config.multi_node.ranks @@ -247,20 +249,22 @@ def cmd_utility( return opts -def cmd_mem(config: Config, system: System, launcher: Launcher) -> CommandPart: +def cmd_mem( + config: ConfigProtocol, system: System, launcher: Launcher +) -> CommandPart: # Always specify the csize return ("-ll:csize", str(config.memory.sysmem)) def cmd_numamem( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: numamem = config.memory.numamem return () if numamem == 0 else ("-ll:nsize", str(numamem)) def cmd_fbmem( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: if config.core.gpus == 0: return () @@ -270,14 +274,14 @@ def cmd_fbmem( def cmd_regmem( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: regmem = config.memory.regmem return () if regmem == 0 else ("-ll:rsize", str(regmem)) def cmd_log_levels( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: log_dir = config.logging.logdir @@ -308,7 +312,7 @@ def cmd_log_levels( def cmd_log_file( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: log_dir = config.logging.logdir log_to_file = config.logging.log_to_file @@ -320,7 +324,7 @@ def cmd_log_file( def cmd_eager_alloc( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: eager_alloc = config.memory.eager_alloc @@ -328,7 +332,7 @@ def cmd_eager_alloc( def cmd_user_opts( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: return config.user_opts diff --git a/legate/driver/config.py b/legate/driver/config.py index c4acb3c41..5e42bc584 100644 --- a/legate/driver/config.py +++ b/legate/driver/config.py @@ -21,7 +21,7 @@ from dataclasses import dataclass from functools import cached_property from pathlib import Path -from typing import Any +from typing import Any, Protocol from ..util.types import ( ArgList, @@ -123,6 +123,24 @@ class Other(DataclassMixin): rlwrap: bool +class ConfigProtocol(Protocol): + + _args: Namespace + + argv: ArgList + + user_opts: tuple[str, ...] + multi_node: MultiNode + binding: Binding + core: Core + memory: Memory + profiling: Profiling + logging: Logging + debugging: Debugging + info: Info + other: Other + + class Config: """A centralized configuration object that provides the information needed by the Legate driver in order to run. @@ -135,7 +153,9 @@ class Config: """ def __init__(self, argv: ArgList) -> None: - args, extra = parser.parse_known_args(argv[1:]) + self.argv = argv + + args, extra = parser.parse_known_args(self.argv[1:]) # only saving this for help with testing self._args = args diff --git a/legate/driver/driver.py b/legate/driver/driver.py index 7a3e00c40..5329b951f 100644 --- a/legate/driver/driver.py +++ b/legate/driver/driver.py @@ -22,7 +22,7 @@ from ..util.system import System from ..util.ui import kvtable, rule, section, value, warn from .command import CMD_PARTS -from .config import Config +from .config import ConfigProtocol from .launcher import Launcher from .logs import process_logs @@ -53,7 +53,7 @@ class Driver: """ - def __init__(self, config: Config, system: System) -> None: + def __init__(self, config: ConfigProtocol, system: System) -> None: self.config = config self.system = system self.launcher = Launcher.create(config, system) diff --git a/legate/driver/launcher.py b/legate/driver/launcher.py index 009b0cf6b..e41b0a2e1 100644 --- a/legate/driver/launcher.py +++ b/legate/driver/launcher.py @@ -25,7 +25,7 @@ if TYPE_CHECKING: from ..util.system import System from ..util.types import Command, EnvDict, LauncherType - from .config import Config + from .config import ConfigProtocol __all__ = ("Launcher",) @@ -71,7 +71,7 @@ class Launcher: cmd: Command - _config: Config + _config: ConfigProtocol _system: System @@ -79,7 +79,7 @@ class Launcher: _custom_env_vars: set[str] | None = None - def __init__(self, config: Config, system: System) -> None: + def __init__(self, config: ConfigProtocol, system: System) -> None: self._config = config self._system = system @@ -95,7 +95,7 @@ def __eq__(self, other: object) -> bool: ) @classmethod - def create(cls, config: Config, system: System) -> Launcher: + def create(cls, config: ConfigProtocol, system: System) -> Launcher: """Factory method for creating appropriate Launcher subclass based on user configuration. @@ -291,7 +291,7 @@ class SimpleLauncher(Launcher): kind: LauncherType = "none" - def __init__(self, config: Config, system: System) -> None: + def __init__(self, config: ConfigProtocol, system: System) -> None: super().__init__(config, system) if config.multi_node.ranks == 1: @@ -319,7 +319,7 @@ class MPILauncher(Launcher): kind: LauncherType = "mpirun" - def __init__(self, config: Config, system: System) -> None: + def __init__(self, config: ConfigProtocol, system: System) -> None: super().__init__(config, system) self.rank_id = "%q{OMPI_COMM_WORLD_RANK}" @@ -349,7 +349,7 @@ class JSRunLauncher(Launcher): kind: LauncherType = "jsrun" - def __init__(self, config: Config, system: System) -> None: + def __init__(self, config: ConfigProtocol, system: System) -> None: super().__init__(config, system) self.rank_id = "%q{OMPI_COMM_WORLD_RANK}" @@ -377,7 +377,7 @@ class SRunLauncher(Launcher): kind: LauncherType = "srun" - def __init__(self, config: Config, system: System) -> None: + def __init__(self, config: ConfigProtocol, system: System) -> None: super().__init__(config, system) self.rank_id = "%q{SLURM_PROCID}" diff --git a/legate/driver/logs.py b/legate/driver/logs.py index 70696d3e1..95b2ed46f 100644 --- a/legate/driver/logs.py +++ b/legate/driver/logs.py @@ -29,7 +29,7 @@ if TYPE_CHECKING: from ..util.system import System from ..util.types import Command - from .config import Config + from .config import ConfigProtocol from .launcher import Launcher __all__ = ( @@ -53,10 +53,10 @@ class LogHandler(metaclass=ABCMeta): """ - config: Config + config: ConfigProtocol system: System - def __init__(self, config: Config, system: System) -> None: + def __init__(self, config: ConfigProtocol, system: System) -> None: self.config = config self.system = system @@ -164,7 +164,7 @@ def cleanup(self, keep_logs: bool) -> None: @contextmanager def process_logs( - config: Config, system: System, launcher: Launcher + config: ConfigProtocol, system: System, launcher: Launcher ) -> Iterator[tuple[LogHandler, ...]]: """A context manager for log initializion and processing/cleanup, based on the user configuration. diff --git a/legate/jupyter/__init__.py b/legate/jupyter/__init__.py index 318a79cc5..c9530a071 100644 --- a/legate/jupyter/__init__.py +++ b/legate/jupyter/__init__.py @@ -12,11 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from __future__ import annotations -# mypy: ignore-errors -from ._magic_cmd import LegateInfoMagics +from typing import TYPE_CHECKING +from legate.jupyter.magic import LegateInfoMagics -def load_ipython_extension(ipython) -> None: - legate_info_magic = LegateInfoMagics(ipython) - ipython.register_magics(legate_info_magic) +if TYPE_CHECKING: + from IPython import InteractiveShell + + +def load_ipython_extension(ipython: InteractiveShell) -> None: + ipython.register_magics(LegateInfoMagics(ipython)) + + +def main() -> int: + import sys + + from .main import main as _main + + return _main(sys.argv) diff --git a/legate/jupyter/__main__.py b/legate/jupyter/__main__.py deleted file mode 100644 index 26cc4e4cf..000000000 --- a/legate/jupyter/__main__.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2021-2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# mypy: ignore-errors -import shutil -from pathlib import Path - -import install_jupyter -from jupyter_client.kernelspec import KernelSpecManager - -if __name__ == "__main__": - legate_exe = Path(shutil.which("legate")) - legate_dir = legate_exe.parent.absolute() - args, opts = install_jupyter.parse_args() - if args.json == "legion_python.json": - # override the default one - args.json = "legate_jupyter.json" - args.legion_prefix = str(legate_dir) - legion_jupyter_file = Path(install_jupyter.__file__) - kernel_file_dir = str(legion_jupyter_file.parent.absolute()) - kernel_name = install_jupyter.driver(args, opts, kernel_file_dir) - # copy the json file into ipython kernel directory - ksm = KernelSpecManager() - spec = ksm.get_kernel_spec(kernel_name) - shutil.copy(args.json, spec.resource_dir) diff --git a/legate/jupyter/_legion_kernel.py b/legate/jupyter/_legion_kernel.py new file mode 100644 index 000000000..b88d23f30 --- /dev/null +++ b/legate/jupyter/_legion_kernel.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python + +# Copyright 2022 Los Alamos National Laboratory, NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import sys +from contextlib import contextmanager +from typing import Any, Iterator, TextIO + +from ipykernel.ipkernel import IPythonKernel # type: ignore + +__version__ = "0.1" + + +@contextmanager +def reset_stdout(stdout: TextIO) -> Iterator[None]: + _stdout = sys.stdout + sys.stdout = stdout + yield + sys.stdout = _stdout + + +class LegionKernel(IPythonKernel): # type: ignore + implementation = "legion_kernel" + implementation_version = __version__ + banner = "Legion IPython Kernel for SM" + language = "python" + language_version = __version__ + language_info = { + "name": "legion_kernel", + "mimetype": "text/x-python", + "codemirror_mode": {"name": "ipython", "version": 3}, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py", + } + + def __init__(self, **kwargs: Any) -> None: + with reset_stdout(open("/dev/stdout", "w")): + print("Initializing Legion kernel for single- or multi-node.") + super().__init__(**kwargs) + + +if __name__ == "__main__": + from ipykernel.kernelapp import IPKernelApp # type: ignore + + IPKernelApp.launch_instance(kernel_class=LegionKernel) diff --git a/legate/jupyter/_magic_cmd.py b/legate/jupyter/_magic_cmd.py deleted file mode 100644 index 06e653a51..000000000 --- a/legate/jupyter/_magic_cmd.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright 2021-2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# mypy: ignore-errors -import json -import os -import sys - -from IPython.core.magic import Magics, line_magic, magics_class -from jupyter_client.kernelspec import KernelSpecManager, NoSuchKernel - -cmd_dict = { - "cpus": "Number of CPUs to use per rank", - "gpus": "Number of GPUs to use per rank", - "omps": "Number of OpenMP groups to use per rank", - "ompthreads": "Number of threads per OpenMP group", - "utility": "Number of Utility processors per rank", - "sysmem": "Amount of DRAM memory per rank (in MBs)", - "numamem": "Amount of DRAM memory per NUMA domain per rank (in MBs)", - "fbmem": "Amount of framebuffer memory per GPU (in MBs)", - "zcmem": "Amount of zero-copy memory per rank (in MBs)", - "regmem": "Amount of registered CPU-side pinned memory per rank (in MBs)", - "nodes": "Number of nodes to use", -} - - -class LegateInfo(object): - def __init__(self, filename: str) -> None: - self.config_dict = dict() - # check if the json file is in the ipython kernel directory - try: - ksm = KernelSpecManager() - spec = ksm.get_kernel_spec("legate_kernel_nocr") - except NoSuchKernel: - print( - "Can not find the json file in the " - "IPython kernel directory, please " - "make sure the kernel has been installed." - ) - sys.exit(1) - filename_with_path = os.path.join(spec.resource_dir, filename) - with open(filename_with_path) as json_file: - json_dict = json.load(json_file) - if missing := (set(cmd_dict) - set(json_dict)): - raise RuntimeError(f"Expected keys {missing!r} are missing") - for key in cmd_dict.keys(): - self.config_dict[key] = json_dict[key]["value"] - - def __repr__(self) -> str: - out_str = "" - for key, value in self.config_dict.items(): - out_str += f"{cmd_dict[key]}: {value}\n" - return out_str[:-1] - - -@magics_class -class LegateInfoMagics(Magics): - __slots__ = ["legate_json"] - - def __init__(self, shell): - super(LegateInfoMagics, self).__init__(shell) - self.legate_json = LegateInfo("legate_jupyter.json") - - @line_magic - def legate_info(self, line: str) -> None: - print(self.legate_json) diff --git a/legate/jupyter/args.py b/legate/jupyter/args.py new file mode 100755 index 000000000..77c16b66a --- /dev/null +++ b/legate/jupyter/args.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python + +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser + +from legate.driver import defaults +from legate.util import shared_args as sa + +__all__ = ("parser",) + + +parser = ArgumentParser( + description="Install a Legate Jupyter Kernel", + allow_abbrev=False, + formatter_class=ArgumentDefaultsHelpFormatter, +) + +kernel = parser.add_argument_group("Kernel configuration") + +kernel.add_argument( + "--user", + action="store_true", + default=True, + dest="user", + help="Install the kernel in user home directory", +) + +kernel.add_argument( + "--name", + default="Legate_SM_GPU", + dest="spec_name", + help="A name for the kernel spec", +) + +kernel.add_argument( + "--display-name", + default=None, + dest="display_name", + help="A display name for the kernel (if not provided, --name is used)", +) + +kernel.add_argument( + "--prefix", + default=None, + dest="prefix", + help="A prefix to install the kernel into", +) + + +multi_node = parser.add_argument_group("Multi-node configuration") +multi_node.add_argument(sa.NODES.name, **sa.NODES.kwargs) +multi_node.add_argument(sa.RANKS_PER_NODE.name, **sa.RANKS_PER_NODE.kwargs) +multi_node.add_argument(sa.NOCR.name, **sa.NOCR.kwargs) +multi_node.add_argument(sa.LAUNCHER.name, **sa.LAUNCHER.kwargs) +multi_node.add_argument(sa.LAUNCHER_EXTRA.name, **sa.LAUNCHER_EXTRA.kwargs) + + +core = parser.add_argument_group("Core alloction") +core.add_argument(sa.CPUS.name, **sa.CPUS.kwargs) +core.add_argument(sa.GPUS.name, **sa.GPUS.kwargs) +core.add_argument(sa.OMPS.name, **sa.OMPS.kwargs) +core.add_argument(sa.OMPTHREADS.name, **sa.OMPTHREADS.kwargs) +core.add_argument(sa.UTILITY.name, **sa.UTILITY.kwargs) + + +memory = parser.add_argument_group("Memory alloction") +memory.add_argument(sa.SYSMEM.name, **sa.SYSMEM.kwargs) +memory.add_argument(sa.NUMAMEM.name, **sa.NUMAMEM.kwargs) +memory.add_argument(sa.FBMEM.name, **sa.FBMEM.kwargs) +memory.add_argument(sa.ZCMEM.name, **sa.ZCMEM.kwargs) +memory.add_argument(sa.REGMEM.name, **sa.REGMEM.kwargs) + +# FIXME: We set the eager pool size to 50% of the total size for now. +# This flag will be gone once we roll out a new allocation scheme. +memory.add_argument( + "--eager-alloc-percentage", + dest="eager_alloc", + default=defaults.LEGATE_EAGER_ALLOC_PERCENTAGE, + required=False, + help="Specify the size of eager allocation pool in percentage", +) + +info = parser.add_argument_group("Informational") + +info.add_argument( + "-v", + "--verbose", + dest="verbose", + action="count", + default=0, + help="Display verbose output. Use -vv for even more output (test stdout)", +) diff --git a/legate/jupyter/config.py b/legate/jupyter/config.py new file mode 100644 index 000000000..52c44c00f --- /dev/null +++ b/legate/jupyter/config.py @@ -0,0 +1,87 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Consolidate driver configuration from command-line and environment. + +""" +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +from legate.driver.config import ( + Binding, + Core, + Debugging, + Info, + Logging, + Memory, + MultiNode, + Other, + Profiling, +) +from legate.jupyter.args import parser +from legate.util.types import ArgList, DataclassMixin, object_to_dataclass + +__all__ = ("Config",) + + +@dataclass(frozen=True) +class Kernel(DataclassMixin): + user: bool + prefix: str | None + spec_name: str + display_name: str + + +class Config: + """A Jupyter-specific configuration object that provides the information + needed by the Legate driver in order to run. + + Parameters + ---------- + argv : ArgList + command-line arguments to use when building the configuration + + """ + + def __init__(self, argv: ArgList) -> None: + self.argv = argv + + args = parser.parse_args(self.argv[1:]) + + # only saving these for help with testing + self._args = args + + if args.display_name is None: + args.display_name = args.spec_name + + self.kernel = object_to_dataclass(args, Kernel) + self.verbose = args.verbose + + # these are the values we leave configurable for the kernel + self.multi_node = object_to_dataclass(args, MultiNode) + self.core = object_to_dataclass(args, Core) + self.memory = object_to_dataclass(args, Memory) + + # turn everything else off + self.user_opts: tuple[str, ...] = () + self.binding = Binding(None, None, None, None) + self.profiling = Profiling(False, False, False, "", []) + self.logging = Logging(None, Path(), False, False) + self.debugging = Debugging( + False, False, False, False, False, False, False + ) + self.info = Info(False, False, self.verbose > 0) + self.other = Other(None, False, False) diff --git a/legate/jupyter/kernel.py b/legate/jupyter/kernel.py new file mode 100644 index 000000000..e71604ed8 --- /dev/null +++ b/legate/jupyter/kernel.py @@ -0,0 +1,128 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Consolidate driver configuration from command-line and environment. + +""" +from __future__ import annotations + +import json +import os +import sys +from dataclasses import asdict +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import Any, Literal, TypedDict + +from jupyter_client.kernelspec import ( + KernelSpec, + KernelSpecManager, + NoSuchKernel, +) + +from legate.driver import Driver +from legate.jupyter.config import Config +from legate.util.types import ArgList +from legate.util.ui import error + + +class LegateMetadata(TypedDict): + argv: ArgList + multi_node: dict[str, Any] + memory: dict[str, Any] + core: dict[str, Any] + + +LEGATE_JUPYTER_KERNEL_SPEC_KEY = "__LEGATE_JUPYTER_KERNEL_SPEC__" +LEGATE_JUPYTER_METADATA_KEY: Literal["legate"] = "legate" + + +def generate_kernel_spec(driver: Driver, config: Config) -> KernelSpec: + legion_kernel = Path(__file__).parent / "_legion_kernel.py" + argv = list(driver.cmd) + [str(legion_kernel), "-f", "{connection_file}"] + + env = {k: v for k, v in driver.env.items() if k in driver.custom_env_vars} + + # Inexplicably, there is apparently no reasonable or supported way to + # determine the name of the currently running/connected Jupyter kernel. + # Instead, tunnel an env var with the name through, so that our LegateInfo + # line magic can actually find the right kernel spec to report on. + assert LEGATE_JUPYTER_KERNEL_SPEC_KEY not in env + env[LEGATE_JUPYTER_KERNEL_SPEC_KEY] = config.kernel.spec_name + + return KernelSpec( + display_name=config.kernel.display_name, + language="python", + argv=argv, + env=env, + metadata={ + LEGATE_JUPYTER_METADATA_KEY: LegateMetadata( + { + "argv": config.argv[1:], + "multi_node": asdict(config.multi_node), + "memory": asdict(config.memory), + "core": asdict(config.core), + } + ) + }, + ) + + +def install_kernel_spec(spec: KernelSpec, config: Config) -> None: + ksm = KernelSpecManager() + + spec_name = config.kernel.spec_name + display_name = spec.display_name + + try: + ksm.get_kernel_spec(spec_name) + except NoSuchKernel: + pass + else: + msg = error( + f"kernel spec {spec_name!r} already exists. Remove it by " + f"running 'jupyter kernelspec uninstall {spec_name!r}, " + "or choose a new kernel name." + ) + print(msg) + sys.exit(1) + + with TemporaryDirectory() as tmpdir: + os.chmod(tmpdir, 0o755) + with open(Path(tmpdir).joinpath("kernel.json"), "w") as f: + out = json.dumps(spec.to_dict(), sort_keys=True, indent=2) + if config.verbose > 0: + print(f"Wrote kernel spec file {spec_name}/kernel.json\n") + if config.verbose > 1: + print(f"\n{out}\n") + f.write(out) + + try: + ksm.install_kernel_spec( + tmpdir, + spec_name, + user=config.kernel.user, + prefix=config.kernel.prefix, + ) + print( + f"Jupyter kernel spec {spec_name} ({display_name}) " + "has been installed" + ) + except Exception as e: + msg = error( + "Failed to install the Jupyter kernel spec " + f"{spec_name} ({display_name}) with error: {e}" + ) + print(msg) + sys.exit(1) diff --git a/legate/jupyter/magic.py b/legate/jupyter/magic.py new file mode 100644 index 000000000..b5b82784c --- /dev/null +++ b/legate/jupyter/magic.py @@ -0,0 +1,103 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import os +from textwrap import indent +from typing import TYPE_CHECKING + +from IPython.core.magic import Magics, line_magic, magics_class +from jupyter_client.kernelspec import KernelSpecManager, NoSuchKernel + +from legate.jupyter.kernel import ( + LEGATE_JUPYTER_KERNEL_SPEC_KEY, + LEGATE_JUPYTER_METADATA_KEY, + LegateMetadata, +) +from legate.util.colors import scrub +from legate.util.ui import kvtable + +if TYPE_CHECKING: + from IPython import InteractiveShell + + +core = { + "cpus": "CPUs to use per rank", + "gpus": "GPUs to use per rank", + "openmp": "OpenMP groups to use per rank", + "ompthreads": "Threads per OpenMP group", + "utility": "Utility processors per rank", +} + +memory = { + "sysmem": "DRAM memory per rank (in MBs)", + "numamem": "DRAM memory per NUMA domain per rank (in MBs)", + "fbmem": "Framebuffer memory per GPU (in MBs)", + "zcmem": "Zero-copy memory per rank (in MBs)", + "regmem": "Registered CPU-side pinned memory per rank (in MBs)", +} + + +class LegateInfo(object): + config: LegateMetadata + + def __init__(self) -> None: + if LEGATE_JUPYTER_KERNEL_SPEC_KEY not in os.environ: + raise RuntimeError("Cannot determine currently running kernel") + + spec_name = os.environ[LEGATE_JUPYTER_KERNEL_SPEC_KEY] + + try: + spec = KernelSpecManager().get_kernel_spec(spec_name) + except NoSuchKernel: + raise RuntimeError( + f"Cannot find a Legate Jupyter kernel named {spec_name!r}" + ) + + self.spec_name = spec_name + self.config = spec.metadata[LEGATE_JUPYTER_METADATA_KEY] + + def __str__(self) -> str: + nodes = self.config["multi_node"]["nodes"] + header = f"Kernel {self.spec_name!r} configured for {nodes} node(s)" + core_table = { + desc: self.config["core"][field] for field, desc in core.items() + } + memory_table = { + desc: self.config["memory"][field] + for field, desc in memory.items() + } + + out = f"""{header} + +Cores: +{indent(kvtable(core_table, align=False), prefix=' ')} + +Memory: +{indent(kvtable(memory_table, align=False), prefix=' ')} +""" + # remove any text colors in notebook + return scrub(out) + + +@magics_class +class LegateInfoMagics(Magics): + def __init__(self, shell: InteractiveShell | None = None) -> None: + super().__init__(shell=shell) + self.info = LegateInfo() + + @line_magic + def legate_info(self, line: str) -> None: + print(self.info) diff --git a/legate/jupyter/main.py b/legate/jupyter/main.py new file mode 100644 index 000000000..494fdf421 --- /dev/null +++ b/legate/jupyter/main.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python + +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from legate.driver import Driver +from legate.jupyter.config import Config +from legate.jupyter.kernel import generate_kernel_spec, install_kernel_spec +from legate.util.system import System + +__all__ = ("main",) + + +def main(argv: list[str]) -> int: + config = Config(argv) + system = System() + + driver = Driver(config, system) + + spec = generate_kernel_spec(driver, config) + + install_kernel_spec(spec, config) + + return 0 diff --git a/legate/tester/config.py b/legate/tester/config.py index a621ba8c3..497c3a385 100644 --- a/legate/tester/config.py +++ b/legate/tester/config.py @@ -38,7 +38,9 @@ class Config: """ def __init__(self, argv: ArgList) -> None: - args, self._extra_args = parser.parse_known_args(argv[1:]) + self.argv = argv + + args, self._extra_args = parser.parse_known_args(self.argv[1:]) # which tests to run self.examples = True diff --git a/legate/util/args.py b/legate/util/args.py index 88cd73193..4485d6db2 100644 --- a/legate/util/args.py +++ b/legate/util/args.py @@ -66,7 +66,7 @@ class _UnsetType: @dataclass(frozen=True) class ArgSpec: dest: str - action: NotRequired[ActionType] = "store_true" + action: NotRequired[ActionType] = Unset nargs: NotRequired[Union[int, NargsType]] = Unset const: NotRequired[Any] = Unset default: NotRequired[Any] = Unset diff --git a/legate/util/shared_args.py b/legate/util/shared_args.py new file mode 100644 index 000000000..46def8642 --- /dev/null +++ b/legate/util/shared_args.py @@ -0,0 +1,207 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from ..driver import defaults +from .args import ArgSpec, Argument +from .types import LauncherType + +__all__ = ( + "CPUS", + "FBMEM", + "GPUS", + "LAUNCHER_EXTRA", + "LAUNCHER", + "LAUNCHERS", + "NOCR", + "NODES", + "NUMAMEM", + "OMPS", + "OMPTHREADS", + "RANKS_PER_NODE", + "REGMEM", + "SYSMEM", + "UTILITY", + "ZCMEM", +) + +LAUNCHERS: tuple[LauncherType, ...] = ("mpirun", "jsrun", "srun", "none") + +NODES = Argument( + "--nodes", + ArgSpec( + type=int, + default=defaults.LEGATE_NODES, + dest="nodes", + help="Number of nodes to use", + ), +) + + +RANKS_PER_NODE = Argument( + "--ranks-per-node", + ArgSpec( + type=int, + default=defaults.LEGATE_RANKS_PER_NODE, + dest="ranks_per_node", + help="Number of ranks (processes running copies of the program) to " + "launch per node. The default (1 rank per node) will typically result " + "in the best performance.", + ), +) + + +NOCR = Argument( + "--no-replicate", + ArgSpec( + dest="not_control_replicable", + action="store_true", + required=False, + help="Execute this program without control replication. Most of the " + "time, this is not recommended. This option should be used for " + "debugging. The -lg:safe_ctrlrepl Legion option may be helpful " + "with discovering issues with replicated control.", + ), +) + +LAUNCHER = Argument( + "--launcher", + ArgSpec( + dest="launcher", + choices=LAUNCHERS, + default="none", + help='launcher program to use (set to "none" for local runs, or if ' + "the launch has already happened by the time legate is invoked)", + ), +) + + +LAUNCHER_EXTRA = Argument( + "--launcher-extra", + ArgSpec( + dest="launcher_extra", + action="append", + default=[], + required=False, + help="additional argument to pass to the launcher (can appear more " + "than once)", + ), +) + + +CPUS = Argument( + "--cpus", + ArgSpec( + type=int, + default=defaults.LEGATE_CPUS, + dest="cpus", + help="Number of CPUs to use per rank", + ), +) + +GPUS = Argument( + "--gpus", + ArgSpec( + type=int, + default=defaults.LEGATE_GPUS, + dest="gpus", + help="Number of OpenMP groups to use per rank", + ), +) + +OMPS = Argument( + "--omps", + ArgSpec( + type=int, + default=defaults.LEGATE_OMP_PROCS, + dest="openmp", + help="Number of OpenMP groups to use per rank", + ), +) + + +OMPTHREADS = Argument( + "--ompthreads", + ArgSpec( + type=int, + default=defaults.LEGATE_OMP_THREADS, + dest="ompthreads", + help="Number of threads per OpenMP group", + ), +) + +UTILITY = Argument( + "--utility", + ArgSpec( + type=int, + default=defaults.LEGATE_UTILITY_CORES, + dest="utility", + help="Number of Utility processors per rank to request for meta-work", + ), +) + +SYSMEM = Argument( + "--sysmem", + ArgSpec( + type=int, + default=defaults.LEGATE_SYSMEM, + dest="sysmem", + help="Amount of DRAM memory per rank (in MBs)", + ), +) + + +NUMAMEM = Argument( + "--numamem", + ArgSpec( + type=int, + default=defaults.LEGATE_NUMAMEM, + dest="numamem", + help="Amount of DRAM memory per NUMA domain per rank (in MBs)", + ), +) + + +FBMEM = Argument( + "--fbmem", + ArgSpec( + type=int, + default=defaults.LEGATE_FBMEM, + dest="fbmem", + help="Amount of framebuffer memory per GPU (in MBs)", + ), +) + + +ZCMEM = Argument( + "--zcmem", + ArgSpec( + type=int, + default=defaults.LEGATE_ZCMEM, + dest="zcmem", + help="Amount of zero-copy memory per rank (in MBs)", + ), +) + + +REGMEM = Argument( + "--regmem", + ArgSpec( + type=int, + default=defaults.LEGATE_REGMEM, + dest="regmem", + help="Amount of registered CPU-side pinned memory per rank (in MBs)", + ), +) diff --git a/setup.py b/setup.py index 24e358eb0..89583411a 100755 --- a/setup.py +++ b/setup.py @@ -63,6 +63,7 @@ entry_points={ "console_scripts": [ "legate = legate.driver:main", + "legate-jupyter = legate.jupyter:main", "lgpatch = legate.lgpatch:main", ], }, diff --git a/tests/unit/legate/driver/test_args.py b/tests/unit/legate/driver/test_args.py index 76a29fd9b..fa9d36929 100644 --- a/tests/unit/legate/driver/test_args.py +++ b/tests/unit/legate/driver/test_args.py @@ -18,10 +18,6 @@ import legate.driver.defaults as defaults -def test_LAUNCHERS() -> None: - assert m.LAUNCHERS == ("mpirun", "jsrun", "srun", "none") - - class TestParserDefaults: def test_allow_abbrev(self) -> None: assert not m.parser.allow_abbrev diff --git a/tests/unit/legate/driver/test_driver.py b/tests/unit/legate/driver/test_driver.py index fad492a2f..652f73627 100644 --- a/tests/unit/legate/driver/test_driver.py +++ b/tests/unit/legate/driver/test_driver.py @@ -21,11 +21,11 @@ from pytest_mock import MockerFixture import legate.driver.driver as m -from legate.driver.args import LAUNCHERS from legate.driver.command import CMD_PARTS from legate.driver.config import Config from legate.driver.launcher import RANK_ENV_VARS, Launcher from legate.util.colors import scrub +from legate.util.shared_args import LAUNCHERS from legate.util.system import System from legate.util.types import LauncherType diff --git a/tests/unit/legate/driver/test_launcher.py b/tests/unit/legate/driver/test_launcher.py index ecf980d87..ebfc793c5 100644 --- a/tests/unit/legate/driver/test_launcher.py +++ b/tests/unit/legate/driver/test_launcher.py @@ -19,7 +19,7 @@ import pytest import legate.driver.launcher as m -from legate.driver.args import LAUNCHERS +from legate.util.shared_args import LAUNCHERS from legate.util.system import System from legate.util.types import LauncherType diff --git a/tests/unit/legate/driver/test_main.py b/tests/unit/legate/driver/test_main.py index 0992a226b..a5537afba 100644 --- a/tests/unit/legate/driver/test_main.py +++ b/tests/unit/legate/driver/test_main.py @@ -41,7 +41,7 @@ def test_main(mocker: MockerFixture) -> None: result = m.main() assert config_spy.call_count == 1 - assert config_spy.call_args[0][1:] == (["foo", "bar"],) + assert config_spy.call_args[0][1] == sys.argv assert config_spy.call_args[1] == {} assert system_spy.call_count == 1 diff --git a/tests/unit/legate/jupyter/__init__.py b/tests/unit/legate/jupyter/__init__.py new file mode 100644 index 000000000..f0b271624 --- /dev/null +++ b/tests/unit/legate/jupyter/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations diff --git a/tests/unit/legate/jupyter/test_args.py b/tests/unit/legate/jupyter/test_args.py new file mode 100644 index 000000000..c0904927a --- /dev/null +++ b/tests/unit/legate/jupyter/test_args.py @@ -0,0 +1,104 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import legate.driver.args as m +import legate.driver.defaults as defaults + + +class TestParserDefaults: + def test_allow_abbrev(self) -> None: + assert not m.parser.allow_abbrev + + # kernel + + def test_no_user(self) -> None: + assert m.parser.get_default("user") is None + + def test_name(self) -> None: + assert m.parser.get_default("name") is None + + def test_display_name(self) -> None: + assert m.parser.get_default("display_name") is None + + def test_prefix(self) -> None: + assert m.parser.get_default("prefix") is None + + # multi_node + + def test_nodes(self) -> None: + assert m.parser.get_default("nodes") == defaults.LEGATE_NODES + + def test_ranks_per_node(self) -> None: + assert ( + m.parser.get_default("ranks_per_node") + == defaults.LEGATE_RANKS_PER_NODE + ) + + def test_no_replicate(self) -> None: + assert m.parser.get_default("not_control_replicable") is False + + def test_launcher(self) -> None: + assert m.parser.get_default("launcher") == "none" + + def test_launcher_extra(self) -> None: + assert m.parser.get_default("launcher_extra") == [] + + # core + + def test_cpus(self) -> None: + assert m.parser.get_default("cpus") == defaults.LEGATE_CPUS + + def test_gpus(self) -> None: + assert m.parser.get_default("gpus") == defaults.LEGATE_GPUS + + def test_omps(self) -> None: + assert m.parser.get_default("openmp") == defaults.LEGATE_OMP_PROCS + + def test_ompthreads(self) -> None: + assert ( + m.parser.get_default("ompthreads") == defaults.LEGATE_OMP_THREADS + ) + + def test_utility(self) -> None: + assert m.parser.get_default("utility") == defaults.LEGATE_UTILITY_CORES + + # memory + + def test_sysmem(self) -> None: + assert m.parser.get_default("sysmem") == defaults.LEGATE_SYSMEM + + def test_numamem(self) -> None: + assert m.parser.get_default("numamem") == defaults.LEGATE_NUMAMEM + + def test_fbmem(self) -> None: + assert m.parser.get_default("fbmem") == defaults.LEGATE_FBMEM + + def test_zcmem(self) -> None: + assert m.parser.get_default("zcmem") == defaults.LEGATE_ZCMEM + + def test_regmem(self) -> None: + assert m.parser.get_default("regmem") == defaults.LEGATE_REGMEM + + def test_eager_alloc(self) -> None: + assert ( + m.parser.get_default("eager_alloc") + == defaults.LEGATE_EAGER_ALLOC_PERCENTAGE + ) + + # info + + def test_verbose(self) -> None: + assert m.parser.get_default("verbose") is False diff --git a/tests/unit/legate/jupyter/test_config.py b/tests/unit/legate/jupyter/test_config.py new file mode 100644 index 000000000..3ee258a14 --- /dev/null +++ b/tests/unit/legate/jupyter/test_config.py @@ -0,0 +1,129 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from pathlib import Path +from unittest.mock import call + +from pytest_mock import MockerFixture + +import legate.driver.defaults as defaults +import legate.jupyter.config as m +from legate.driver.config import Core, Memory, MultiNode +from legate.util.types import DataclassMixin + + +class TestKernel: + def test_fields(self) -> None: + assert set(m.Kernel.__dataclass_fields__) == { + "user", + "prefix", + "spec_name", + "display_name", + } + + def test_mixin(self) -> None: + assert issubclass(m.Kernel, DataclassMixin) + + +class TestConfig: + def test_default_init(self) -> None: + + # Note this test does not clear the environment. Default values from + # the defaults module can depend on the environment, but what matters + # is that the generated config matches those values, whatever they are. + + c = m.Config(["legate-jupyter"]) + + assert c.multi_node == m.MultiNode( + nodes=defaults.LEGATE_NODES, + ranks_per_node=defaults.LEGATE_RANKS_PER_NODE, + not_control_replicable=False, + launcher="none", + launcher_extra=[], + ) + assert c.core == m.Core( + cpus=4, + gpus=0, + openmp=defaults.LEGATE_OMP_PROCS, + ompthreads=defaults.LEGATE_OMP_THREADS, + utility=defaults.LEGATE_UTILITY_CORES, + ) + c.memory == m.Memory( + sysmem=defaults.LEGATE_SYSMEM, + numamem=defaults.LEGATE_NUMAMEM, + fbmem=defaults.LEGATE_FBMEM, + zcmem=defaults.LEGATE_ZCMEM, + regmem=defaults.LEGATE_REGMEM, + eager_alloc=defaults.LEGATE_EAGER_ALLOC_PERCENTAGE, + ) + + # These are all "turned off" + + assert c.binding == m.Binding( + cpu_bind=None, + mem_bind=None, + gpu_bind=None, + nic_bind=None, + ) + + c.profiling == m.Profiling( + profile=False, + nvprof=False, + nsys=False, + nsys_targets="", + nsys_extra=[], + ) + + assert c.logging == m.Logging( + user_logging_levels=None, + logdir=Path("."), + log_to_file=False, + keep_logs=False, + ) + + assert c.debugging == m.Debugging( + gdb=False, + cuda_gdb=False, + memcheck=False, + freeze_on_error=False, + gasnet_trace=False, + dataflow=False, + event=False, + ) + + assert c.info == m.Info(progress=False, mem_usage=False, verbose=False) + + assert c.other == m.Other(module=None, dry_run=False, rlwrap=False) + + def test_arg_conversions(self, mocker: MockerFixture) -> None: + + # This is kind of a dumb short-cut test, but if we believe that + # object_to_dataclass works as advertised, then this test ensures that + # it is being used for all the sub-configs that it should be used for + + spy = mocker.spy(m, "object_to_dataclass") + + c = m.Config(["legate"]) + + assert spy.call_count == 4 + spy.assert_has_calls( + [ + call(c._args, m.Kernel), + call(c._args, MultiNode), + call(c._args, Core), + call(c._args, Memory), + ] + ) diff --git a/tests/unit/legate/jupyter/test_kernel.py b/tests/unit/legate/jupyter/test_kernel.py new file mode 100644 index 000000000..42925387b --- /dev/null +++ b/tests/unit/legate/jupyter/test_kernel.py @@ -0,0 +1,172 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import json +from dataclasses import asdict + +from pytest_mock import MockerFixture + +import legate.jupyter.kernel as m +from legate.driver import Driver +from legate.jupyter.config import Config +from legate.util.system import System + +from ...util import Capsys + + +def test_LEGATE_JUPYTER_KERNEL_SPEC_KEY() -> None: + assert m.LEGATE_JUPYTER_KERNEL_SPEC_KEY == "__LEGATE_JUPYTER_KERNEL_SPEC__" + + +def test_LEGATE_JUPYTER_METADATA_KEY() -> None: + assert m.LEGATE_JUPYTER_METADATA_KEY == "legate" + + +system = System() + + +class Test_generate_kernel_spec: + def test_defatul(self) -> None: + config = Config([]) + driver = Driver(config, system) + + spec = m.generate_kernel_spec(driver, config) + + expected_env = { + k: v for k, v in driver.env.items() if k in driver.custom_env_vars + } + expected_env[ + m.LEGATE_JUPYTER_KERNEL_SPEC_KEY + ] = config.kernel.spec_name + + assert spec.display_name == config.kernel.display_name + assert spec.language == "python" + assert spec.argv[:-3] == list(driver.cmd) + assert spec.argv[-3].endswith("_legion_kernel.py") + assert spec.argv[-2:] == ["-f", "{connection_file}"] + assert spec.env == expected_env + assert m.LEGATE_JUPYTER_METADATA_KEY in spec.metadata + metadata = spec.metadata[m.LEGATE_JUPYTER_METADATA_KEY] + assert metadata == { + "argv": config.argv[1:], + "multi_node": asdict(config.multi_node), + "memory": asdict(config.memory), + "core": asdict(config.core), + } + + +class Test_install_kernel_spec: + def test_install(self, mocker: MockerFixture, capsys: Capsys) -> None: + install_mock = mocker.patch( + "jupyter_client.kernelspec.KernelSpecManager.install_kernel_spec" + ) + + config = Config( + ["legate-jupyter", "--name", "____fake_test_kernel_123abc_____"] + ) + driver = Driver(config, system) + + spec = m.generate_kernel_spec(driver, config) + + m.install_kernel_spec(spec, config) + + assert install_mock.call_count == 1 + assert install_mock.call_args[0][1] == config.kernel.spec_name + assert install_mock.call_args[1] == { + "user": config.kernel.user, + "prefix": config.kernel.prefix, + } + + out, _ = capsys.readouterr() + assert out == ( + f"Jupyter kernel spec {config.kernel.spec_name} " + f"({config.kernel.display_name}) " + "has been installed\n" + ) + + def test_install_verbose( + self, mocker: MockerFixture, capsys: Capsys + ) -> None: + install_mock = mocker.patch( + "jupyter_client.kernelspec.KernelSpecManager.install_kernel_spec" + ) + + config = Config( + [ + "legate-jupyter", + "-v", + "--name", + "____fake_test_kernel_123abc_____", + ] + ) + driver = Driver(config, system) + + spec = m.generate_kernel_spec(driver, config) + + m.install_kernel_spec(spec, config) + + assert install_mock.call_count == 1 + assert install_mock.call_args[0][1] == config.kernel.spec_name + assert install_mock.call_args[1] == { + "user": config.kernel.user, + "prefix": config.kernel.prefix, + } + + out, _ = capsys.readouterr() + assert out == ( + f"Wrote kernel spec file {config.kernel.spec_name}/kernel.json\n\n" + f"Jupyter kernel spec {config.kernel.spec_name} " + f"({config.kernel.display_name}) " + "has been installed\n" + ) + + def test_install_verbose2( + self, mocker: MockerFixture, capsys: Capsys + ) -> None: + install_mock = mocker.patch( + "jupyter_client.kernelspec.KernelSpecManager.install_kernel_spec" + ) + + config = Config( + [ + "legate-jupyter", + "-vv", + "--name", + "____fake_test_kernel_123abc_____", + ] + ) + driver = Driver(config, system) + + spec = m.generate_kernel_spec(driver, config) + + m.install_kernel_spec(spec, config) + + assert install_mock.call_count == 1 + assert install_mock.call_args[0][1] == config.kernel.spec_name + assert install_mock.call_args[1] == { + "user": config.kernel.user, + "prefix": config.kernel.prefix, + } + + out, _ = capsys.readouterr() + spec_json = json.dumps(spec.to_dict(), sort_keys=True, indent=2) + assert out == ( + f"Wrote kernel spec file {config.kernel.spec_name}/kernel.json\n\n" + f"\n{spec_json}\n\n" + f"Jupyter kernel spec {config.kernel.spec_name} " + f"({config.kernel.display_name}) " + "has been installed\n" + ) diff --git a/tests/unit/legate/jupyter/test_main.py b/tests/unit/legate/jupyter/test_main.py new file mode 100644 index 000000000..0e0159dc9 --- /dev/null +++ b/tests/unit/legate/jupyter/test_main.py @@ -0,0 +1,73 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import sys + +from pytest_mock import MockerFixture + +import legate.jupyter as m + +# main function shadows main module +# def test___all__() -> None: + +# The main() function is very simple, this test just confirms that +# all the expected plumbing is hooked up as it is supposed to be + + +def test_main(mocker: MockerFixture) -> None: + import legate.driver.driver + import legate.jupyter.config + import legate.util.system + + config_spy = mocker.spy(legate.jupyter.config.Config, "__init__") + system_spy = mocker.spy(legate.util.system.System, "__init__") + driver_spy = mocker.spy(legate.driver.driver.Driver, "__init__") + generate_spy = mocker.spy(legate.jupyter.kernel, "generate_kernel_spec") + install_mock = mocker.patch("legate.jupyter.kernel.install_kernel_spec") + mocker.patch.object(sys, "argv", ["legate-jupyter", "--name", "foo"]) + + m.main() + + assert config_spy.call_count == 1 + assert config_spy.call_args[0][1] == sys.argv + assert config_spy.call_args[1] == {} + + assert system_spy.call_count == 1 + assert system_spy.call_args[0][1:] == () + assert system_spy.call_args[1] == {} + + assert driver_spy.call_count == 1 + assert len(driver_spy.call_args[0]) == 3 + assert isinstance(driver_spy.call_args[0][1], legate.jupyter.config.Config) + assert isinstance(driver_spy.call_args[0][2], legate.util.system.System) + assert driver_spy.call_args[1] == {} + + assert generate_spy.call_count == 1 + assert len(generate_spy.call_args[0]) == 2 + assert isinstance( + generate_spy.call_args[0][0], legate.driver.driver.Driver + ) + assert isinstance( + generate_spy.call_args[0][1], legate.jupyter.config.Config + ) + assert generate_spy.call_args[1] == {} + + assert install_mock.call_count == 1 + assert install_mock.call_args[0][0] == generate_spy.spy_return + assert isinstance( + install_mock.call_args[0][1], legate.jupyter.config.Config + ) + assert install_mock.call_args[1] == {} diff --git a/tests/unit/legate/util/test_args.py b/tests/unit/legate/util/test_args.py index 83e3e02b3..f6c97f4ed 100644 --- a/tests/unit/legate/util/test_args.py +++ b/tests/unit/legate/util/test_args.py @@ -82,12 +82,11 @@ class TestArgSpec: def test_default(self) -> None: spec = m.ArgSpec("dest") assert spec.dest == "dest" - assert spec.action == "store_true" + assert spec.action == m.Unset # all others are unset assert set(m.entries(spec)) == { ("dest", "dest"), - ("action", "store_true"), } @@ -146,7 +145,9 @@ def test_help_override( self, monkeypatch: pytest.MonkeyPatch, capsys: Capsys ) -> None: monkeypatch.setattr("sys.argv", ["app", "-foo:help"]) - args = [m.Argument("help", m.ArgSpec(dest="help"))] + args = [ + m.Argument("help", m.ArgSpec(action="store_true", dest="help")) + ] ns = m.parse_library_command_args("foo", args) out, err = capsys.readouterr() assert out == "" @@ -158,7 +159,7 @@ def test_basic( ) -> None: monkeypatch.setattr("sys.argv", ["app", "-foo:bar", "-foo:quux", "1"]) args = [ - m.Argument("bar", m.ArgSpec(dest="bar")), + m.Argument("bar", m.ArgSpec(action="store_true", dest="bar")), m.Argument( "quux", m.ArgSpec(dest="quux", action="store", type=int) ), @@ -173,7 +174,7 @@ def test_extra_args_passed_on( self, monkeypatch: pytest.MonkeyPatch, capsys: Capsys ) -> None: monkeypatch.setattr("sys.argv", ["app", "-foo:bar", "--extra", "1"]) - args = [m.Argument("bar", m.ArgSpec(dest="bar"))] + args = [m.Argument("bar", m.ArgSpec(action="store_true", dest="bar"))] ns = m.parse_library_command_args("foo", args) out, err = capsys.readouterr() assert out == "" @@ -208,7 +209,7 @@ def test_no_prefix_conflict( monkeypatch.setattr( "sys.argv", ["app", "-foo:bar", "--foo", "-f", "1", "-ff"] ) - args = [m.Argument("bar", m.ArgSpec(dest="bar"))] + args = [m.Argument("bar", m.ArgSpec(action="store_true", dest="bar"))] ns = m.parse_library_command_args("foo", args) out, err = capsys.readouterr() assert out == "" diff --git a/typings/IPython/__init__.pyi b/typings/IPython/__init__.pyi new file mode 100644 index 000000000..13b35e47e --- /dev/null +++ b/typings/IPython/__init__.pyi @@ -0,0 +1,20 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from .core.magic import Magics + +class InteractiveShell: + def register_magics(self, *objs: Magics) -> None: ... diff --git a/typings/IPython/core/magic.pyi b/typings/IPython/core/magic.pyi new file mode 100644 index 000000000..354c7ce2c --- /dev/null +++ b/typings/IPython/core/magic.pyi @@ -0,0 +1,28 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import Any, Callable, TypeVar + +from typing_extensions import ParamSpec + +class Magics: + def __init__(self, shell: Any) -> None: ... + +R = TypeVar("R") +P = ParamSpec("P") + +line_magic: Callable[[Callable[P, R]], Callable[P, R]] +magics_class: Callable[[Callable[P, R]], Callable[P, R]] diff --git a/typings/jupyter_client/__init__.pyi b/typings/jupyter_client/__init__.pyi new file mode 100644 index 000000000..e69de29bb diff --git a/typings/jupyter_client/kernelspec.pyi b/typings/jupyter_client/kernelspec.pyi new file mode 100644 index 000000000..d69b0e3b7 --- /dev/null +++ b/typings/jupyter_client/kernelspec.pyi @@ -0,0 +1,40 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import Any + +class KernelSpec: + display_name: str + metadata: dict[str, Any] + + def __init__( + self, + argv: list[str], + env: dict[str, str], + display_name: str, + language: str, + metadata: dict[str, Any], + ) -> None: ... + def to_dict(self) -> dict[str, Any]: ... + +class NoSuchKernel(Exception): ... + +class KernelSpecManager: + def __init__(self, **kwargs: Any) -> None: ... + def get_kernel_spec(self, kernel_name: str) -> KernelSpec: ... + def install_kernel_spec( + self, source_dir: str, kernel_name: str, user: bool, prefix: str | None + ) -> None: ... From 2a9617c3af0d4a2e0eed54b45b1e5160dcb351bc Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Mon, 17 Oct 2022 20:13:57 -0700 Subject: [PATCH 025/121] APIs that GH 437 should have included (#443) --- legate/core/operation.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/legate/core/operation.py b/legate/core/operation.py index a3b2aef17..fcd626acb 100644 --- a/legate/core/operation.py +++ b/legate/core/operation.py @@ -247,6 +247,13 @@ def __init__( self._tb: Union[None, TracebackType] = None self._side_effect = False + @property + def side_effect(self) -> bool: + return self._side_effect + + def set_side_effect(self, side_effect: bool) -> None: + self._side_effect = side_effect + @property def uses_communicator(self) -> bool: return len(self._comm_args) > 0 From 180c23ed8a63d1646c9b1b0be65a1ba2bd1da23c Mon Sep 17 00:00:00 2001 From: robinw0928 <104830875+robinw0928@users.noreply.github.com> Date: Tue, 18 Oct 2022 16:38:17 +0800 Subject: [PATCH 026/121] Support python coverage test. (#431) * Support python coverage test. * Address comments * Address comments - part-2 * Add unit tests for cov_args related. --- legate/tester/args.py | 27 +++++++++++++++++ legate/tester/config.py | 5 +++- legate/tester/stages/test_stage.py | 23 ++++++++++++++- .../legate/tester/stages/test_test_stage.py | 29 +++++++++++++++++++ tests/unit/legate/tester/test_config.py | 9 ++++++ 5 files changed, 91 insertions(+), 2 deletions(-) diff --git a/legate/tester/args.py b/legate/tester/args.py index 6c3f24962..fcb680db1 100644 --- a/legate/tester/args.py +++ b/legate/tester/args.py @@ -187,6 +187,33 @@ ) +test_opts.add_argument( + "--cov-bin", + default=None, + help=( + "coverage binary location, " + "e.g. /conda_path/envs/env_name/bin/coverage" + ), +) + + +test_opts.add_argument( + "--cov-args", + default="run -a --branch", + help="coverage run command arguments, e.g. run -a --branch", +) + + +test_opts.add_argument( + "--cov-src-path", + default=None, + help=( + "path value of --source in coverage run command, " + "e.g. /project_path/cunumeric/cunumeric" + ), +) + + test_opts.add_argument( "-j", "--workers", diff --git a/legate/tester/config.py b/legate/tester/config.py index 497c3a385..e5cf412fc 100644 --- a/legate/tester/config.py +++ b/legate/tester/config.py @@ -43,7 +43,7 @@ def __init__(self, argv: ArgList) -> None: args, self._extra_args = parser.parse_known_args(self.argv[1:]) # which tests to run - self.examples = True + self.examples = False if args.cov_bin else True self.integration = True self.unit = args.unit self.files = args.files @@ -68,6 +68,9 @@ def __init__(self, argv: ArgList) -> None: self.test_root = args.test_root self.requested_workers = args.workers self.legate_dir = self._compute_legate_dir(args) + self.cov_bin = args.cov_bin + self.cov_args = args.cov_args + self.cov_src_path = args.cov_src_path @property def env(self) -> EnvDict: diff --git a/legate/tester/stages/test_stage.py b/legate/tester/stages/test_stage.py index c21fdd630..5962500bf 100644 --- a/legate/tester/stages/test_stage.py +++ b/legate/tester/stages/test_stage.py @@ -205,6 +205,24 @@ def file_args(self, test_file: Path, config: Config) -> ArgList: return args + def cov_args(self, config: Config) -> ArgList: + """Coverage binary and coverage arguments. + + Parameters + ---------- + config: Config + Test runner configuration + + """ + if config.cov_bin: + args = [str(config.cov_bin)] + config.cov_args.split() + if config.cov_src_path: + args += ["--source", str(config.cov_src_path)] + else: + args = [] + + return args + def run( self, test_file: Path, config: Config, system: TestSystem ) -> ProcessResult: @@ -227,10 +245,13 @@ def run( shard = self.shards.get() + cov_args = self.cov_args(config) + + cmd = [str(config.legate_path)] + cov_args + [str(test_path)] + stage_args = self.args + self.shard_args(shard, config) file_args = self.file_args(test_file, config) - cmd = [str(config.legate_path), str(test_path)] cmd += stage_args + file_args + config.extra_args self.delay(shard, config, system) diff --git a/tests/unit/legate/tester/stages/test_test_stage.py b/tests/unit/legate/tester/stages/test_test_stage.py index 90edfaed4..6a5678c22 100644 --- a/tests/unit/legate/tester/stages/test_test_stage.py +++ b/tests/unit/legate/tester/stages/test_test_stage.py @@ -86,3 +86,32 @@ def test_file_args_vv(self) -> None: stage = MockTestStage(c, s) assert stage.file_args(Path("integration/foo"), c) == ["-v", "-s"] assert stage.file_args(Path("unit/foo"), c) == [] + + def test_cov_args_without_cov_bin(self) -> None: + c = m.Config(["test.py", "--cov-args", "run -a"]) + stage = MockTestStage(c, s) + assert stage.cov_args(c) == [] + + def test_cov_args_with_cov_bin(self) -> None: + cov_bin = "conda/envs/legate/bin/coverage" + args = ["--cov-bin", cov_bin] + c = m.Config(["test.py"] + args) + expected_result = [cov_bin] + c.cov_args.split() + stage = MockTestStage(c, s) + assert stage.cov_args(c) == expected_result + + def test_cov_args_with_cov_bin_args_and_src_path(self) -> None: + cov_bin = "conda/envs/legate/bin/coverage" + cov_args = "run -a" + cov_src_path = "source_path" + args = ( + ["--cov-bin", cov_bin] + + ["--cov-args", cov_args] + + ["--cov-src-path", cov_src_path] + ) + c = m.Config(["test.py"] + args) + expected_result = ( + [cov_bin] + cov_args.split() + ["--source", cov_src_path] + ) + stage = MockTestStage(c, s) + assert stage.cov_args(c) == expected_result diff --git a/tests/unit/legate/tester/test_config.py b/tests/unit/legate/tester/test_config.py index d55104980..2d4326d69 100644 --- a/tests/unit/legate/tester/test_config.py +++ b/tests/unit/legate/tester/test_config.py @@ -71,6 +71,10 @@ def test_default_init(self) -> None: assert c.legate_path == "legate" + assert c.cov_bin is None + assert c.cov_args == "run -a --branch" + assert c.cov_src_path is None + @pytest.mark.parametrize("feature", FEATURES) def test_env_features( self, monkeypatch: pytest.MonkeyPatch, feature: str @@ -180,3 +184,8 @@ def test_extra_args(self) -> None: assert c.extra_args == extra c = m.Config(["test.py"] + extra + ["--files", "a", "b"]) assert c.extra_args == extra + + def test_cov_args(self) -> None: + cov_args = ["--cov-args", "run -a"] + c = m.Config(["test.py"] + cov_args) + assert c.cov_args == "run -a" From d30f6ca47b0bfa0843833413f1d008d1ae030fbd Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Tue, 18 Oct 2022 15:32:08 -0700 Subject: [PATCH 027/121] Make terminal colors explicitly opt-in (#445) * checkpoint * Make terminal colors explicitly opt-in --- legate/driver/args.py | 8 +++++ legate/driver/config.py | 3 ++ legate/jupyter/args.py | 8 +++++ legate/jupyter/config.py | 3 ++ legate/tester/args.py | 8 +++++ legate/tester/config.py | 3 ++ legate/util/colors.py | 22 +++++++++++++ tests/unit/legate/driver/test_config.py | 8 +++++ tests/unit/legate/jupyter/test_config.py | 8 +++++ tests/unit/legate/tester/test_config.py | 8 +++++ tests/unit/legate/util/test_colors.py | 42 ++++++++++++++++++++++-- 11 files changed, 119 insertions(+), 2 deletions(-) diff --git a/legate/driver/args.py b/legate/driver/args.py index cc8667384..f281d22e3 100755 --- a/legate/driver/args.py +++ b/legate/driver/args.py @@ -330,3 +330,11 @@ required=False, help="Whether to run with rlwrap to improve readline ability", ) + +other.add_argument( + "--color", + dest="color", + action="store_true", + required=False, + help="Whether to use color terminal output (if colorama is installed)", +) diff --git a/legate/driver/config.py b/legate/driver/config.py index 5e42bc584..b1192b665 100644 --- a/legate/driver/config.py +++ b/legate/driver/config.py @@ -23,6 +23,7 @@ from pathlib import Path from typing import Any, Protocol +from ..util import colors from ..util.types import ( ArgList, DataclassMixin, @@ -157,6 +158,8 @@ def __init__(self, argv: ArgList) -> None: args, extra = parser.parse_known_args(self.argv[1:]) + colors.ENABLED = args.color + # only saving this for help with testing self._args = args diff --git a/legate/jupyter/args.py b/legate/jupyter/args.py index 77c16b66a..7f80a49a1 100755 --- a/legate/jupyter/args.py +++ b/legate/jupyter/args.py @@ -105,3 +105,11 @@ default=0, help="Display verbose output. Use -vv for even more output (test stdout)", ) + +info.add_argument( + "--color", + dest="color", + action="store_true", + required=False, + help="Whether to use color terminal output (if colorama is installed)", +) diff --git a/legate/jupyter/config.py b/legate/jupyter/config.py index 52c44c00f..745238b63 100644 --- a/legate/jupyter/config.py +++ b/legate/jupyter/config.py @@ -20,6 +20,7 @@ from dataclasses import dataclass from pathlib import Path +import legate.util.colors as colors from legate.driver.config import ( Binding, Core, @@ -64,6 +65,8 @@ def __init__(self, argv: ArgList) -> None: # only saving these for help with testing self._args = args + colors.ENABLED = args.color + if args.display_name is None: args.display_name = args.spec_name diff --git a/legate/tester/args.py b/legate/tester/args.py index fcb680db1..0645fea9e 100644 --- a/legate/tester/args.py +++ b/legate/tester/args.py @@ -248,3 +248,11 @@ action="store_true", help="Print out the commands that are to be executed", ) + +parser.add_argument( + "--color", + dest="color", + action="store_true", + required=False, + help="Whether to use color terminal output (if colorama is installed)", +) diff --git a/legate/tester/config.py b/legate/tester/config.py index e5cf412fc..39441e433 100644 --- a/legate/tester/config.py +++ b/legate/tester/config.py @@ -21,6 +21,7 @@ from argparse import Namespace from pathlib import Path +from ..util import colors from ..util.types import ArgList, EnvDict from . import DEFAULT_PROCESS_ENV, FEATURES, SKIPPED_EXAMPLES, FeatureType from .args import parser @@ -42,6 +43,8 @@ def __init__(self, argv: ArgList) -> None: args, self._extra_args = parser.parse_known_args(self.argv[1:]) + colors.ENABLED = args.color + # which tests to run self.examples = False if args.cov_bin else True self.integration = True diff --git a/legate/util/colors.py b/legate/util/colors.py index 5bb0b14b3..6c417c221 100644 --- a/legate/util/colors.py +++ b/legate/util/colors.py @@ -37,6 +37,12 @@ ) +# Color terminal output needs to be explicitly opt-in. Applications that want +# to enable it should set this global flag to True, e.g based on a command line +# argument or other user-supplied configuration +ENABLED = False + + def _text(text: str) -> str: return text @@ -45,27 +51,43 @@ def _text(text: str) -> str: import colorama # type: ignore[import] def bright(text: str) -> str: + if not ENABLED: + return text return f"{colorama.Style.BRIGHT}{text}{colorama.Style.RESET_ALL}" def dim(text: str) -> str: + if not ENABLED: + return text return f"{colorama.Style.DIM}{text}{colorama.Style.RESET_ALL}" def white(text: str) -> str: + if not ENABLED: + return text return f"{colorama.Fore.WHITE}{text}{colorama.Style.RESET_ALL}" def cyan(text: str) -> str: + if not ENABLED: + return text return f"{colorama.Fore.CYAN}{text}{colorama.Style.RESET_ALL}" def red(text: str) -> str: + if not ENABLED: + return text return f"{colorama.Fore.RED}{text}{colorama.Style.RESET_ALL}" def magenta(text: str) -> str: + if not ENABLED: + return text return f"{colorama.Fore.MAGENTA}{text}{colorama.Style.RESET_ALL}" def green(text: str) -> str: + if not ENABLED: + return text return f"{colorama.Fore.GREEN}{text}{colorama.Style.RESET_ALL}" def yellow(text: str) -> str: + if not ENABLED: + return text return f"{colorama.Fore.YELLOW}{text}{colorama.Style.RESET_ALL}" if sys.platform == "win32": diff --git a/tests/unit/legate/driver/test_config.py b/tests/unit/legate/driver/test_config.py index 536289221..104f95f58 100644 --- a/tests/unit/legate/driver/test_config.py +++ b/tests/unit/legate/driver/test_config.py @@ -23,6 +23,7 @@ import legate.driver.config as m import legate.driver.defaults as defaults +from legate.util import colors from legate.util.colors import scrub from legate.util.types import DataclassMixin @@ -173,6 +174,8 @@ def test_default_init(self) -> None: c = m.Config(["legate"]) + assert colors.ENABLED is False + assert c.multi_node == m.MultiNode( nodes=defaults.LEGATE_NODES, ranks_per_node=defaults.LEGATE_RANKS_PER_NODE, @@ -232,6 +235,11 @@ def test_default_init(self) -> None: assert c.other == m.Other(module=None, dry_run=False, rlwrap=False) + def test_color_arg(self) -> None: + m.Config(["legate", "--color"]) + + assert colors.ENABLED is True + def test_arg_conversions(self, mocker: MockerFixture) -> None: # This is kind of a dumb short-cut test, but if we believe that diff --git a/tests/unit/legate/jupyter/test_config.py b/tests/unit/legate/jupyter/test_config.py index 3ee258a14..4e956ff85 100644 --- a/tests/unit/legate/jupyter/test_config.py +++ b/tests/unit/legate/jupyter/test_config.py @@ -22,6 +22,7 @@ import legate.driver.defaults as defaults import legate.jupyter.config as m from legate.driver.config import Core, Memory, MultiNode +from legate.util import colors from legate.util.types import DataclassMixin @@ -47,6 +48,8 @@ def test_default_init(self) -> None: c = m.Config(["legate-jupyter"]) + assert colors.ENABLED is False + assert c.multi_node == m.MultiNode( nodes=defaults.LEGATE_NODES, ranks_per_node=defaults.LEGATE_RANKS_PER_NODE, @@ -108,6 +111,11 @@ def test_default_init(self) -> None: assert c.other == m.Other(module=None, dry_run=False, rlwrap=False) + def test_color_arg(self) -> None: + m.Config(["legate-jupyter", "--color"]) + + assert colors.ENABLED is True + def test_arg_conversions(self, mocker: MockerFixture) -> None: # This is kind of a dumb short-cut test, but if we believe that diff --git a/tests/unit/legate/tester/test_config.py b/tests/unit/legate/tester/test_config.py index 2d4326d69..f0e351caf 100644 --- a/tests/unit/legate/tester/test_config.py +++ b/tests/unit/legate/tester/test_config.py @@ -32,12 +32,15 @@ config as m, ) from legate.tester.args import PIN_OPTIONS, PinOptionsType +from legate.util import colors class TestConfig: def test_default_init(self) -> None: c = m.Config([]) + assert colors.ENABLED is False + assert c.examples is True assert c.integration is True assert c.unit is False @@ -75,6 +78,11 @@ def test_default_init(self) -> None: assert c.cov_args == "run -a --branch" assert c.cov_src_path is None + def test_color_arg(self) -> None: + m.Config(["test.py", "--color"]) + + assert colors.ENABLED is True + @pytest.mark.parametrize("feature", FEATURES) def test_env_features( self, monkeypatch: pytest.MonkeyPatch, feature: str diff --git a/tests/unit/legate/util/test_colors.py b/tests/unit/legate/util/test_colors.py index 873f3dc53..60dce0ec3 100644 --- a/tests/unit/legate/util/test_colors.py +++ b/tests/unit/legate/util/test_colors.py @@ -57,9 +57,17 @@ def use_plain_text(mocker: MockerFixture) -> None: ) +def test_default_ENABLED() -> None: + assert m.ENABLED is False + + @pytest.mark.skipif(colorama is None, reason="colorama required") @pytest.mark.parametrize("color", COLOR_FUNCS) -def test_color_functions(color: str) -> None: +def test_color_functions_ENABLED_True( + mocker: MockerFixture, color: str +) -> None: + mocker.patch.object(m, "ENABLED", True) + cfunc = getattr(m, color) cprop = getattr(colorama.Fore, color.upper()) @@ -68,9 +76,26 @@ def test_color_functions(color: str) -> None: assert out == f"{cprop}some text{colorama.Style.RESET_ALL}" +@pytest.mark.parametrize("color", COLOR_FUNCS) +def test_color_functions_ENABLED_False( + mocker: MockerFixture, color: str +) -> None: + mocker.patch.object(m, "ENABLED", False) + + cfunc = getattr(m, color) + + out = cfunc("some text") + + assert out == "some text" + + @pytest.mark.skipif(colorama is None, reason="colorama required") @pytest.mark.parametrize("style", STYLE_FUNCS) -def test_style_functions(style: str) -> None: +def test_style_functions_ENABLED_True( + mocker: MockerFixture, style: str +) -> None: + mocker.patch.object(m, "ENABLED", True) + sfunc = getattr(m, style) sprop = getattr(colorama.Style, style.upper()) @@ -79,6 +104,19 @@ def test_style_functions(style: str) -> None: assert out == f"{sprop}some text{colorama.Style.RESET_ALL}" +@pytest.mark.parametrize("style", STYLE_FUNCS) +def test_style_functions_ENABLED_False( + mocker: MockerFixture, style: str +) -> None: + mocker.patch.object(m, "ENABLED", False) + + sfunc = getattr(m, style) + + out = sfunc("some text") + + assert out == "some text" + + @pytest.mark.skipif(colorama is None, reason="colorama required") @pytest.mark.parametrize("color", COLOR_FUNCS) @pytest.mark.parametrize("style", STYLE_FUNCS) From 90fb1ee91eaa6492a7d5fa709c5e8f138aa4459c Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Wed, 19 Oct 2022 10:05:07 -0700 Subject: [PATCH 028/121] Allow launcher_extra to split quoted values (#444) * Allow launcher_extra to split quoted values * use shlex.split to preserve sub-quotes * avoid over-quoting * same treatment for nsys_extra * docs --- legate/driver/args.py | 4 +- legate/driver/config.py | 19 +++++ legate/util/shared_args.py | 3 +- tests/unit/legate/driver/test_config.py | 100 ++++++++++++++++++++++++ 4 files changed, 124 insertions(+), 2 deletions(-) diff --git a/legate/driver/args.py b/legate/driver/args.py index f281d22e3..c473efa5f 100755 --- a/legate/driver/args.py +++ b/legate/driver/args.py @@ -162,7 +162,9 @@ action="append", default=[], required=False, - help="Specify extra flags for Nsight Systems", + help="Specify extra flags for Nsight Systems (can appear more than once). " + "Multiple arguments may be provided together in a quoted string " + "(arguments with spaces inside must be additionally quoted)", ) logging = parser.add_argument_group("Logging") diff --git a/legate/driver/config.py b/legate/driver/config.py index b1192b665..470cca123 100644 --- a/legate/driver/config.py +++ b/legate/driver/config.py @@ -17,6 +17,7 @@ """ from __future__ import annotations +import shlex from argparse import Namespace from dataclasses import dataclass from functools import cached_property @@ -44,6 +45,16 @@ class MultiNode(DataclassMixin): launcher: LauncherType launcher_extra: list[str] + def __post_init__(self, **kw: dict[str, Any]) -> None: + # fix up launcher_extra to automaticaly handle quoted strings with + # internal whitespace, have to use __setattr__ for frozen + # https://docs.python.org/3/library/dataclasses.html#frozen-instances + if self.launcher_extra: + ex: list[str] = sum( + (shlex.split(x) for x in self.launcher_extra), [] + ) + object.__setattr__(self, "launcher_extra", ex) + @property def ranks(self) -> int: return self.nodes * self.ranks_per_node @@ -84,6 +95,14 @@ class Profiling(DataclassMixin): nsys_targets: str # TODO: multi-choice nsys_extra: list[str] + def __post_init__(self, **kw: dict[str, Any]) -> None: + # fix up nsys_extra to automaticaly handle quoted strings with + # internal whitespace, have to use __setattr__ for frozen + # https://docs.python.org/3/library/dataclasses.html#frozen-instances + if self.nsys_extra: + ex: list[str] = sum((shlex.split(x) for x in self.nsys_extra), []) + object.__setattr__(self, "nsys_extra", ex) + @dataclass(frozen=True) class Logging(DataclassMixin): diff --git a/legate/util/shared_args.py b/legate/util/shared_args.py index 46def8642..688c0bfa3 100644 --- a/legate/util/shared_args.py +++ b/legate/util/shared_args.py @@ -96,7 +96,8 @@ default=[], required=False, help="additional argument to pass to the launcher (can appear more " - "than once)", + "than once). Multiple arguments may be provided together in a quoted " + "string (arguments with spaces inside must be additionally quoted)", ), ) diff --git a/tests/unit/legate/driver/test_config.py b/tests/unit/legate/driver/test_config.py index 104f95f58..2408bfe08 100644 --- a/tests/unit/legate/driver/test_config.py +++ b/tests/unit/legate/driver/test_config.py @@ -55,6 +55,56 @@ def test_fields(self) -> None: def test_mixin(self) -> None: assert issubclass(m.MultiNode, DataclassMixin) + @pytest.mark.parametrize( + "extra", + (["a"], ["a", "b c"], ["a", "b c", "d e"], ["a", "b c", "d e", "f"]), + ) + def test_launcher_extra_fixup_basic(self, extra) -> None: + mn = m.MultiNode( + nodes=1, + ranks_per_node=1, + not_control_replicable=False, + launcher="launcher", + launcher_extra=extra, + ) + assert mn.launcher_extra == sum((x.split() for x in extra), []) + + def test_launcher_extra_fixup_complex(self) -> None: + mn = m.MultiNode( + nodes=1, + ranks_per_node=1, + not_control_replicable=False, + launcher="launcher", + launcher_extra=[ + "-H g0002,g0002 -X SOMEENV --fork", + "-bind-to none", + ], + ) + assert mn.launcher_extra == [ + "-H", + "g0002,g0002", + "-X", + "SOMEENV", + "--fork", + "-bind-to", + "none", + ] + + def test_launcher_extra_fixup_quoted(self) -> None: + mn = m.MultiNode( + nodes=1, + ranks_per_node=1, + not_control_replicable=False, + launcher="launcher", + launcher_extra=[ + "-f 'some path with spaces/foo.txt'", + ], + ) + assert mn.launcher_extra == [ + "-f", + "some path with spaces/foo.txt", + ] + class TestBinding: def test_fields(self) -> None: @@ -111,6 +161,56 @@ def test_fields(self) -> None: def test_mixin(self) -> None: assert issubclass(m.Profiling, DataclassMixin) + @pytest.mark.parametrize( + "extra", + (["a"], ["a", "b c"], ["a", "b c", "d e"], ["a", "b c", "d e", "f"]), + ) + def test_nsys_extra_fixup_basic(self, extra) -> None: + p = m.Profiling( + profile=True, + nvprof=True, + nsys=True, + nsys_targets="foo,bar", + nsys_extra=extra, + ) + assert p.nsys_extra == sum((x.split() for x in extra), []) + + def test_nsys_extra_fixup_complex(self) -> None: + p = m.Profiling( + profile=True, + nvprof=True, + nsys=True, + nsys_targets="foo,bar", + nsys_extra=[ + "-H g0002,g0002 -X SOMEENV --fork", + "-bind-to none", + ], + ) + assert p.nsys_extra == [ + "-H", + "g0002,g0002", + "-X", + "SOMEENV", + "--fork", + "-bind-to", + "none", + ] + + def test_nsys_extra_fixup_quoted(self) -> None: + p = m.Profiling( + profile=True, + nvprof=True, + nsys=True, + nsys_targets="foo,bar", + nsys_extra=[ + "-f 'some path with spaces/foo.txt'", + ], + ) + assert p.nsys_extra == [ + "-f", + "some path with spaces/foo.txt", + ] + class TestLogging: def test_fields(self) -> None: From 6792d4d2ae8a6f0c79a5370ce40b91edb9c8d335 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Wed, 19 Oct 2022 11:23:53 -0700 Subject: [PATCH 029/121] Update CMakeLists.txt (#446) --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 74e640445..fa2ce2cf4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -65,7 +65,7 @@ include(rapids-cuda) include(rapids-export) include(rapids-find) -set(legate_core_version 22.10.00) +set(legate_core_version 22.12.00) # For now we want the optimization flags to match on both normal make and cmake # builds so we override the cmake defaults here for release, this changes From b5ce4428719a41c1113ec536a63acf68767fa9fd Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Fri, 21 Oct 2022 14:30:09 -0700 Subject: [PATCH 030/121] remove unncecessary exec bit (#451) --- legate/driver/args.py | 0 legate/jupyter/args.py | 0 legate/lgpatch.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 legate/driver/args.py mode change 100755 => 100644 legate/jupyter/args.py mode change 100755 => 100644 legate/lgpatch.py diff --git a/legate/driver/args.py b/legate/driver/args.py old mode 100755 new mode 100644 diff --git a/legate/jupyter/args.py b/legate/jupyter/args.py old mode 100755 new mode 100644 diff --git a/legate/lgpatch.py b/legate/lgpatch.py old mode 100755 new mode 100644 From bbc69e5383a0abcb5e2099a6eb8912578bf099db Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Tue, 25 Oct 2022 08:49:57 -0700 Subject: [PATCH 031/121] Better error when GPU detection fails (#448) --- legate/tester/test_plan.py | 6 +++--- legate/util/system.py | 8 +++++++- tests/unit/legate/util/test_system.py | 14 +++++++++++--- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/legate/tester/test_plan.py b/legate/tester/test_plan.py index cc877f7a4..634790758 100644 --- a/legate/tester/test_plan.py +++ b/legate/tester/test_plan.py @@ -79,9 +79,9 @@ def intro(self) -> str: cpus = len(self._system.cpus) try: - gpus = len(self._system.gpus) - except ImportError: - gpus = 0 + gpus: int | str = len(self._system.gpus) + except RuntimeError: + gpus = "N/A" details = ( f"* Feature stages : {', '.join(yellow(x) for x in self._config.features)}", # noqa E501 diff --git a/legate/util/system.py b/legate/util/system.py index 5fbabe1d0..ba48e6ac5 100644 --- a/legate/util/system.py +++ b/legate/util/system.py @@ -121,7 +121,13 @@ def gpus(self) -> tuple[GPUInfo, ...]: # fail. pynvml.nvmlInit() except Exception: - return () + if platform.system() == "Darwin": + raise RuntimeError("GPU execution is not available on OSX.") + else: + raise RuntimeError( + "GPU detection failed. Make sure nvml and pynvml are " + "both installed." + ) num_gpus = pynvml.nvmlDeviceGetCount() diff --git a/tests/unit/legate/util/test_system.py b/tests/unit/legate/util/test_system.py index 3ae242b6f..c3a5d6184 100644 --- a/tests/unit/legate/util/test_system.py +++ b/tests/unit/legate/util/test_system.py @@ -15,7 +15,7 @@ from __future__ import annotations import os -import sys +import platform import pytest from pytest_mock import MockerFixture @@ -98,8 +98,16 @@ def test_cpus(self) -> None: assert len(cpus) > 0 assert all(len(cpu.ids) > 0 for cpu in cpus) - @pytest.mark.skipif(sys.platform != "linux", reason="pynvml required") - def test_gpus(self) -> None: + @pytest.mark.skipif(platform.system() != "Linux", reason="Linux test") + def test_gpus_linux(self) -> None: s = m.System() # can't really assume / test much here s.gpus + + @pytest.mark.skipif(platform.system() != "Darwin", reason="OSX test") + def test_gpus_osx(self) -> None: + s = m.System() + + msg = "GPU execution is not available on OSX." + with pytest.raises(RuntimeError, msg=msg): + s.gpus From 5512ffb0e2c7bdecb522fa9b98c9276111c4cf61 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Tue, 25 Oct 2022 10:44:32 -0700 Subject: [PATCH 032/121] Add script to generate conda envs (#367) * Add script to generate conda envs * remove gcc and sysroot pkgs * split out openmpi and compilers options * Adjust consensus match frequency based on field sizes (#402) * Perform consensus match more frequently for bigger free fields * Minor cleanup * add command line args for selection * help wording * Make script executable * Fixes for python 3.8 * Remove old environment files * Unify file naming for "compilers" and "openmpi" * Fix typo * Remove optional ninja dependency * Not just for the core, include cunumeric also * Update build documentation * Fix a file link * Fix formatting * remove typing_extensions dependency * remove jinja dependency * slight vertical whitespace improvemetn * Use custom BooleanFlag action, for the benefit of py3.8 * Update build instructions * Fix intra-document reference * Revise file naming scheme * Update BUILD.md Co-authored-by: Wonchan Lee Co-authored-by: Manolis Papadakis Co-authored-by: Manolis Papadakis --- BUILD.md | 240 ++++++++++++++++++-- README.md | 140 ++---------- conda/environment-test-3.10.yml | 58 ----- conda/environment-test-3.8.yml | 58 ----- conda/environment-test-3.9.yml | 58 ----- scripts/generate-conda-envs.py | 381 ++++++++++++++++++++++++++++++++ 6 files changed, 618 insertions(+), 317 deletions(-) delete mode 100644 conda/environment-test-3.10.yml delete mode 100644 conda/environment-test-3.8.yml delete mode 100644 conda/environment-test-3.9.yml create mode 100755 scripts/generate-conda-envs.py diff --git a/BUILD.md b/BUILD.md index 8bf5f1ac7..5abb0af8d 100644 --- a/BUILD.md +++ b/BUILD.md @@ -15,40 +15,227 @@ limitations under the License. --> -# Overview +# TL;DR -The build system is designed to enable two different modes of use: -1. Simple `install.py` helper script or `pip install` for users -2. Highly customizable incremental builds for developers +1) Check if there are specialized scripts available for your cluster at https://github.com/nv-legate/quickstart. +2) [Install dependencies from conda](#getting-dependencies-through-conda) +3) [Build using install.py](#using-installpy) -We review each of these modes with examples. +# Getting dependencies +## Getting dependencies through conda + +The primary method of retrieving dependencies for Legate Core and downstream +libraries is through [conda](https://conda.io). You will need an installation of +conda to follow the instructions below. + +Please use the `scripts/generate-conda-envs.py` script to create a conda +environment file listing all the packages that are required to build, run and +test Legate Core and all downstream libraries. For example: + +``` +$ ./scripts/generate-conda-envs.py --python 3.10 --ctk 11.7 --os linux --compilers --openmpi +--- generating: environment-test-linux-py310-cuda-11.7-compilers-openmpi.yaml +``` + +Run this script with `-h` to see all available configuration options for the +generated environment file (e.g. all the supported Python versions). See the +[Notable Dependencies](#notable-dependencies) section for more details. + +Once you have this environment file, you can install the required packages by +creating a new conda environment: + +``` +conda env create -n legate -f .yaml +``` + +or by updating an existing environment: + +``` +conda env update -f .yaml +``` + +## Notable dependencies + +### OS (`--os` option) + +Legate has been tested on Linux and MacOS, although only a few flavors of Linux +such as Ubuntu have been thoroughly tested. There is currently no support for +Windows. + +### Python >= 3.8 (`--python` option) + +In terms of Python compatibility, Legate *roughly* follows the timeline outlined +in [NEP 29](https://numpy.org/neps/nep-0029-deprecation_policy.html). + +### C++17 compatible compiler (`--compilers` option) + +For example: g++, clang, or nvc++. When creating an environment using the +`--compilers` flag, an appropriate compiler for the current system will be +pulled from conda. + +If you need/prefer to use the system-provided compilers (typical for HPC +installations), please use a conda environment generated with `--no-compilers`. +Note that this will likely result in a +[conda/system library conflict](#alternative-sources-for-dependencies), +since the system compilers will typically produce executables +that link against the system-provided libraries, which can shadow the +conda-provided equivalents. + +### CUDA >= 10.2 (`--ctk` flag; optional) + +Only necessary if you wish to run with Nvidia GPUs. + +Some CUDA components necessary for building, e.g. the `nvcc` compiler and driver +stubs, are not distributed through conda. These must instead be installed using +[system-level packages](https://developer.nvidia.com/cuda-downloads). + +Independent of the system-level CUDA installation, conda will need to install an +environment-local copy of the CUDA toolkit (which is what the `--ctk` option +controls). To avoid versioning conflicts it is safest to match the version of +CUDA installed system-wide on your machine + +Legate is tested and guaranteed to be compatible with Volta and later GPU +architectures. You can use Legate with Pascal GPUs as well, but there could +be issues due to lack of independent thread scheduling. Please report any such +issues on GitHub. + +### Fortran compiler (optional) + +Only necessary if you wish to build OpenBLAS from source. + +Not included by default in the generated conda environment files; install +`fortran-compiler` from `conda-forge` if you need it. + +### Numactl (optional) + +Required to support CPU and memory binding in the Legate launcher. + +Not available on conda; typically available through the system-level package +manager. + +### MPI (`--openmpi` option) + +Only necessary if you wish to run on multiple nodes. + +Conda distributes a generic build of OpenMPI, but you may need to use a more +specialized build, e.g. the one distributed by +[MOFED](https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/), +or one provided by your HPC vendor. In that case you should use an environment +file generated with `--no-openmpi`. + +Legate requires a build of MPI that supports `MPI_THREAD_MULTIPLE`. + +### Networking libraries (e.g. Infiniband, RoCE, UCX; optional) + +Only necessary if you wish to run on multiple nodes. + +Not available on conda; typically available through MOFED or the system-level +package manager. + +If using UCX, a build configured with `--enable-mt` is required. + +## Alternative sources for dependencies + +If you do not wish to use conda for some (or all) of the dependencies, you can +remove the corresponding entries from the environment file before passing it to +conda. See [the `install.py` section](#using-installpy) for instructions on how +to provide alternative locations for these dependencies to the build process. + +Note that this is likely to result in conflicts between conda-provided and +system-provided libraries. + +Conda distributes its own version of certain common libraries (in particular the +C++ standard library), which are also typically available system-wide. Any +system package you include will typically link to the system version, while +conda packages link to the conda version. Often these two different versions, +although incompatible, carry the same version number (`SONAME`), and are +therefore indistinguishable to the dynamic linker. Then, the first component to +specify a link location for this library will cause it to be loaded from there, +and any subsequent link requests for the same library, even if suggesting a +different link location, will get served using the previously linked version. + +This can cause link failures at runtime, e.g. when a system-level library +happens to be the first to load GLIBC, causing any conda library that comes +after to trip GLIBC's internal version checks, since the conda library expects +to find symbols with more recent version numbers than what is available on the +system-wide GLIBC: + +``` +/lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.30' not found (required by /opt/conda/envs/legate/lib/libarrow.so) +``` + +You can usually work around this issue by putting the conda library directory +first in the dynamic library resolution path: + +``` +LD_LIBRARY_PATH="$CONDA_PREFIX/lib:$LD_LIBRARY_PATH" +``` + +This way you can make sure that the (typically more recent) conda version of any +common library will be preferred over the system-wide one, no matter which +component requests it first. # Building for Users ## Using install.py -For releases <= 22.07, the main method for building Legate was the `install.py` script. -Although the underlying implementation has significantly changed, `install.py` still supports the -same usage and same set of flags. For a full list of flags, users can run: +The Legate Core repository comes with a helper `install.py` script in the +top-level directory, that will build the C++ parts of the library and install +the C++ and Python components under the currently active Python environment. + +To add GPU support, use the `--cuda` flag: + +``` +./install.py --cuda +``` + +You can specify the CUDA toolkit directory and the CUDA architecture you want to +target using the `--with-cuda` and `--arch` flags, e.g.: ``` -$ ./install.py --help +./install.py --cuda --with-cuda /usr/local/cuda/ --arch ampere ``` -## Using Conda +By default the script relies on CMake's auto-detection for these settings. +CMake will first search the currently active Python/conda environment +for dependencies, then any common system-wide installation directories (e.g. +`/usr/lib`). If a dependency cannot be found but is publicly available in source +form (e.g. OpenBLAS), cmake will fetch and build it automatically. You can +override this search by providing an install location for any dependency +explicitly, using a `--with-dep` flag, e.g. `--with-nccl` and +`--with-openblas`. + +For multi-node execution Legate uses [GASNet](https://gasnet.lbl.gov/) which can be +requested using the `--network gasnet1` or `--network gasnetex` flag. By default +GASNet will be automatically downloaded and built, but if you have an existing +installation then you can inform the install script using the `--with-gasnet` flag. +You also need to specify the interconnect network of the target machine using the +`--conduit` flag. + +For example this would be an installation for a +[DGX SuperPOD](https://www.nvidia.com/en-us/data-center/dgx-superpod/): +``` +./install.py --network gasnet1 --conduit ibv --cuda --arch ampere +``` +Alternatively, here is an install line for the +[Piz-Daint](https://www.cscs.ch/computers/dismissed/piz-daint-piz-dora/) supercomputer: +``` +./install.py --network gasnet1 --conduit aries --cuda --arch pascal +``` -Legate can be installed using Conda by pointing to the required channels (`-c`): +To see all available configuration options, run with the `--help` flag: ``` -conda install -c nvidia -c conda-forge -c legate legate-core +./install.py --help ``` ## Using pip -Legate is not yet registered in a standard pip repository. However, users can still use the -pip installer to build and install Legate. After downloading or cloning the legate.core source, -users can run the following in the legate.core folder: +Legate Core is not yet registered in a standard pip repository. However, users +can still use the pip installer to build and install Legate Core. The following +command will trigger a single-node, CPU-only build of Legate Core, then install +it into the currently active Python environment: ``` $ pip install . @@ -58,18 +245,20 @@ or $ python3 -m pip install . ``` -This will install Legate in the standard packages directory for the environment Python. +## Advanced Customization -### Advanced Customization - -If users need to customize details of the underlying CMake build, they can pass -CMake flags through the `SKBUILD_CONFIGURE_OPTIONS` environment variable: +Legate relies on CMake to select its toolchain and build flags. Users can set +the environment variables `CXX` or `CXXFLAGS` prior to building to override the +CMake defaults. Alternatively, CMake values can be overridden through the +`SKBUILD_CONFIGURE_OPTIONS` variable: ``` $ SKBUILD_CONFIGURE_OPTIONS="-D Legion_USE_CUDA:BOOL=ON" \ pip install . ``` + An alternative syntax using `setup.py` with `scikit-build` is + ``` $ python setup.py install -- -DLegion_USE_CUDA:BOOL=ON ``` @@ -86,15 +275,17 @@ in `setup.py` to drive the build and installation. A `pip install` will trigger 3. pip installation of Python files The CMake build can be configured independently of `pip`, allowing incremental C++ builds directly through CMake. -This simplifies rebuilding `libcunumeric.so` either via command-line or via IDE. +This simplifies rebuilding the C++ shared libraries either via command-line or via IDE. After building the C++ libraries, the `pip install` can be done in "editable" mode using the `-e` flag. This configures the Python site packages to import the Python source tree directly. The Python source can then be edited and used directly for testing without requiring another `pip install`. ## Example -There are several examples in the `scripts` folder. We walk through the steps in the `build-separately-no-install.sh` here. -First, the CMake build needs to be configured, e.g.: +There are several examples in the `scripts` folder. We walk through the steps in +`build-separately-no-install.sh` here. + +First, the CMake build needs to be configured: ``` $ cmake -S . -B build -GNinja -D Legion_USE_CUDA=ON @@ -118,6 +309,7 @@ $ SKBUILD_BUILD_OPTIONS="-D FIND_LEGATE_CORE_CPP=ON -D legate_core_ROOT=$(pwd)/b The Python source tree and CMake build tree are now available with the environment Python for running Legate programs. The diagram below illustrates the -complete workflow for building both Legate core and a downstream package [cuNumeric]() +complete workflow for building both Legate core and a downstream package, +[cuNumeric](https://github.com/nv-legate/cunumeric) drawing diff --git a/README.md b/README.md index ff1142695..fe0d5b5e4 100644 --- a/README.md +++ b/README.md @@ -50,15 +50,23 @@ Pull requests are welcomed. If you have questions, please contact us at legate(at)nvidia.com. -1. [Why Legate?](#why-legate) -1. [What is the Legate Core?](#what-is-the-legate-core) -1. [How Does Legate Work?](#how-does-legate-work) -1. [How Do I Install Legate?](#how-do-i-install-legate) -1. [How Do I Use Legate?](#how-do-i-use-legate) -1. [Other FAQs](#other-faqs) -1. [Contributing](#contributing) -1. [Documentation](#documentation) -1. [Next Steps](#next-steps) +- [Legate](#legate) + - [Why Legate?](#why-legate) + - [What is the Legate Core?](#what-is-the-legate-core) + - [How Does Legate Work?](#how-does-legate-work) + - [How Do I Install Legate?](#how-do-i-install-legate) + - [How Do I Use Legate?](#how-do-i-use-legate) + - [Distributed Launch](#distributed-launch) + - [Debugging and Profiling](#debugging-and-profiling) + - [Running Legate programs with Jupyter Notebook](#running-legate-programs-with-jupyter-notebook) + - [Installation of the Legate IPython Kernel](#installation-of-the-legate-ipython-kernel) + - [Running with Jupyter Notebook](#running-with-jupyter-notebook) + - [Configuring the Jupyter Notebook](#configuring-the-jupyter-notebook) + - [Magic Command](#magic-command) + - [Other FAQs](#other-faqs) + - [Contributing](#contributing) + - [Documentation](#documentation) + - [Next Steps](#next-steps) ## Why Legate? @@ -215,120 +223,14 @@ Legate Core is available [on conda](https://anaconda.org/legate/legate-core): conda install -c nvidia -c conda-forge -c legate legate-core ``` +The conda package is compatible with CUDA >= 11.4 (CUDA driver version >= r470), +and Volta or later GPU architectures. + Docker image build scripts, as well as specialized install scripts for supported clusters are available on the [quickstart](https://github.com/nv-legate/quickstart) repo. -Read on for general instructions on building Legate Core from source. - -### Dependencies - -Legate has been tested on Linux and MacOS, although only a few flavors of Linux -such as Ubuntu have been thoroughly tested. There is currently no support for -Windows. - -Legate Core requires the following: - - - Python >= 3.8 - - [CUDA](https://developer.nvidia.com/cuda-downloads) >= 10.2 - - GNU Make - - C++17 compatible compiler (g++, clang, or nvc++) - - numactl (optional, to support CPU and memory binding) - - the Python packages listed in any one of the conda environment files: - - `conda/environment-test-3.8.yml` - - `conda/environment-test-3.9.yml` - - `conda/environment-test-3.10.yml` - -You can install the required Python packages by creating a new conda environment: - -``` -conda env create -n legate -f conda/environment-test-3.10.yml -``` - -or by updating an existing environment: - -``` -conda env update -f conda/environment-test-3.10.yml -``` - -Note that conda will need to install an environment-local copy of the CUDA -toolkit, and by default it will choose the latest available version. To avoid -versioning conflicts, however, it is safer to match the version of CUDA -installed system-wide on your machine. Therefore, we suggest that you add this -as an explicit dependency at the bottom of the conda environment file. For -example, if your system-wide CUDA installation is at version 10.2, add: - -``` - - cudatoolkit=10.2 -``` - -### Installation - -The Legate Core library comes with both a standard `setup.py` script and a -custom `install.py` script in the top-level directory of the repository that -will build and install the Legate Core library. Users can use either script -to install Legate as they will produce the same effect. Users can do a simple -pip installation of a single-node, CPU-only Legate configuration by navigating -to the Legate source directory and running: -``` -pip install . -``` -or -``` -python3 -m pip install . -``` - -This will install Legate into the standard packages of the Python environment. - -To add GPU support or do more complicated customization, Legate provides a -helper `install.py` script. For GPU support, simply use the `--cuda` flag: - -``` -./install.py --cuda -``` - -The first time you request GPU support you may need to use the `--with-cuda` flag to -specify the location of your CUDA installation and the `--with-nccl` flag to specify -the path to your NCCL installation, if these cannot be automatically located by the build system. -You can also specify the name of the CUDA architecture you want to target with the `--arch` -flag. By default the script relies on CMake's auto-detection. -``` -./install.py --cuda --with-cuda /usr/local/cuda/ --with-nccl "$CONDA_PREFIX" --arch ampere -``` -For multi-node support Legate uses [GASNet](https://gasnet.lbl.gov/) which can be -requested using the `--network gasnet1` or `--network gasnetex` flag. By default -GASNet will be automatically downloaded and built, but if you have an existing -installation then you can inform the install script using the `--with-gasnet` flag. -You also need to specify the interconnect network of the target machine using the -`--conduit` flag. - -For example this would be an installation for a -[DGX SuperPOD](https://www.nvidia.com/en-us/data-center/dgx-superpod/): -``` -./install.py --network gasnet1 --conduit ibv --cuda --arch ampere -``` -Alternatively here is an install line for the -[Piz-Daint](https://www.cscs.ch/computers/dismissed/piz-daint-piz-dora/) supercomputer: -``` -./install.py --network gasnet1 --conduit aries --cuda --arch pascal -``` -To see all the options available for installing Legate, run with the `--help` flag: -``` -./install.py --help -``` - -### Toolchain Selection - -Legate relies on CMake to select its toolchain and build flags. -Users can set the environment variables `CXX` or `CXXFLAGS` -prior to building to override the CMake defaults. Alternatively, CMake values -can be overriden through the `SKBUILD_CONFIGURE_OPTIONS` variable, -which is discussed in more detail in the [developer build instructions](BUILD.md). - -### Developer Workflow - -Details on doing incremental CMake builds and editable pip installations can be -found in the [developer build instructions](BUILD.md). +See [BUILD.md]() for instructions on building Legate Core from source. ## How Do I Use Legate? diff --git a/conda/environment-test-3.10.yml b/conda/environment-test-3.10.yml deleted file mode 100644 index 736e4c5e0..000000000 --- a/conda/environment-test-3.10.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: legate-core-test -channels: - - conda-forge -dependencies: - - python=3.10 - - # build - - git - - nccl - - make - - zlib - - cmake>=3.24 - - ninja - - openmpi - - c-compiler - - cxx-compiler - - gcc_linux-64 # [linux64] - - sysroot_linux-64==2.17 # [linux64] - - setuptools>=60 - - scikit-build>=0.13.1 - - # runtime - - cffi - - numpy>=1.22 - - opt_einsum - - pyarrow>=5 - - scipy - - typing_extensions - - llvm-openmp - - # tests - - clang>=8 - - clang-tools>=8 - - colorama - - coverage - - mock - - mypy>=0.961 - - pre-commit - - pynvml - - pytest - - pytest-cov - - pytest-lazy-fixture - - types-docutils - - # pip dependencies - - pip - - pip: - # docs - - jinja2 - - pydata-sphinx-theme - - recommonmark - - markdown<3.4.0 - - sphinx>=4.4.0 - - sphinx-copybutton - - sphinx-markdown-tables - - # examples - - tifffile diff --git a/conda/environment-test-3.8.yml b/conda/environment-test-3.8.yml deleted file mode 100644 index 9f58e9b5d..000000000 --- a/conda/environment-test-3.8.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: legate-core-test -channels: - - conda-forge -dependencies: - - python=3.8 - - # build - - git - - nccl - - make - - zlib - - cmake>=3.24 - - ninja - - openmpi - - c-compiler - - cxx-compiler - - gcc_linux-64 # [linux64] - - sysroot_linux-64==2.17 # [linux64] - - setuptools>=60 - - scikit-build>=0.13.1 - - # runtime - - cffi - - numpy>=1.22 - - opt_einsum - - pyarrow>=5 - - scipy - - typing_extensions - - llvm-openmp - - # tests - - clang>=8 - - clang-tools>=8 - - colorama - - coverage - - mock - - mypy>=0.961 - - pre-commit - - pynvml - - pytest - - pytest-cov - - pytest-lazy-fixture - - types-docutils - - # pip dependencies - - pip - - pip: - # docs - - jinja2 - - pydata-sphinx-theme - - recommonmark - - markdown<3.4.0 - - sphinx>=4.4.0 - - sphinx-copybutton - - sphinx-markdown-tables - - # examples - - tifffile diff --git a/conda/environment-test-3.9.yml b/conda/environment-test-3.9.yml deleted file mode 100644 index 9d4eea27d..000000000 --- a/conda/environment-test-3.9.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: legate-core-test -channels: - - conda-forge -dependencies: - - python=3.9 - - # build - - git - - nccl - - make - - zlib - - cmake>=3.24 - - ninja - - openmpi - - c-compiler - - cxx-compiler - - gcc_linux-64 # [linux64] - - sysroot_linux-64==2.17 # [linux64] - - setuptools>=60 - - scikit-build>=0.13.1 - - # runtime - - cffi - - numpy>=1.22 - - opt_einsum - - pyarrow>=5 - - scipy - - typing_extensions - - llvm-openmp - - # tests - - clang>=8 - - clang-tools>=8 - - colorama - - coverage - - mock - - mypy>=0.961 - - pre-commit - - pynvml - - pytest - - pytest-cov - - pytest-lazy-fixture - - types-docutils - - # pip dependencies - - pip - - pip: - # docs - - jinja2 - - pydata-sphinx-theme - - recommonmark - - markdown<3.4.0 - - sphinx>=4.4.0 - - sphinx-copybutton - - sphinx-markdown-tables - - # examples - - tifffile diff --git a/scripts/generate-conda-envs.py b/scripts/generate-conda-envs.py new file mode 100755 index 000000000..a5cd426ee --- /dev/null +++ b/scripts/generate-conda-envs.py @@ -0,0 +1,381 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. +# +# See the LICENSE file for details. +# +from __future__ import annotations + +from argparse import Action, ArgumentParser +from dataclasses import dataclass +from textwrap import indent +from typing import Literal, Protocol, Tuple + +# --- Types ------------------------------------------------------------------- + +Req = str +Reqs = Tuple[Req, ...] +OSType = Literal["linux", "darwin"] + + +class SectionConfig(Protocol): + header: str + + @property + def conda(self) -> Reqs: + return () + + @property + def pip(self) -> Reqs: + return () + + def __str__(self) -> str: + return self.header + + def format(self, kind: str) -> str: + return SECTION_TEMPLATE.format( + header=self.header, + reqs="- " + + "\n- ".join(self.conda if kind == "conda" else self.pip), + ) + + +@dataclass(frozen=True) +class CUDAConfig(SectionConfig): + ctk_version: str | None + + header = "cuda" + + @property + def conda(self) -> Reqs: + if self.ctk_version is None: + return () + + return ( + f"cudatoolkit={self.ctk_version}", # runtime + "cutensor>=1.3.3", # runtime + "nccl", # runtime + "pynvml", # tests + ) + + def __str__(self) -> str: + if self.ctk_version == "none": + return "" + + return f"-cuda{self.ctk_version}" + + +@dataclass(frozen=True) +class BuildConfig(SectionConfig): + compilers: bool = True + openmpi: bool = True + + header = "build" + + @property + def conda(self) -> Reqs: + pkgs = ( + "cmake>=3.24", + "git", + "make", + "scikit-build>=0.13.1", + "setuptools>=60", + "zlib", + ) + if self.compilers: + pkgs += ("c-compiler", "cxx-compiler") + if self.openmpi: + pkgs += ("openmpi",) + return sorted(pkgs) + + def __str__(self) -> str: + val = "-compilers" if self.compilers else "" + val += "-openmpi" if self.openmpi else "" + return val + + +@dataclass(frozen=True) +class RuntimeConfig(SectionConfig): + header = "runtime" + + @property + def conda(self) -> Reqs: + return ( + "cffi", + "llvm-openmp", + "numpy>=1.22", + "openblas=*=*openmp*", + "opt_einsum", + "pyarrow>=5", + "scipy", + "typing_extensions", + ) + + +@dataclass(frozen=True) +class TestsConfig(SectionConfig): + header = "tests" + + @property + def conda(self) -> Reqs: + return ( + "clang-tools>=8", + "clang>=8", + "colorama", + "coverage", + "mock", + "mypy>=0.961", + "pre-commit", + "pytest-cov", + "pytest-lazy-fixture", + "pytest-mock", + "pytest", + "types-docutils", + ) + + @property + def pip(self) -> Reqs: + return ("tifffile",) + + +@dataclass(frozen=True) +class DocsConfig(SectionConfig): + header = "docs" + + @property + def pip(self) -> Reqs: + return ( + "jinja2", + "markdown<3.4.0", + "pydata-sphinx-theme", + "recommonmark", + "sphinx-copybutton", + "sphinx-markdown-tables", + "sphinx>=4.4.0", + ) + + +@dataclass(frozen=True) +class EnvConfig: + use: str + python: str + os: OSType + ctk: str | None + compilers: bool + openmpi: bool + + @property + def sections(self) -> Tuple[SectionConfig, ...]: + return ( + self.cuda, + self.build, + self.runtime, + self.tests, + self.docs, + ) + + @property + def cuda(self) -> CUDAConfig: + return CUDAConfig(self.ctk) + + @property + def build(self) -> BuildConfig: + return BuildConfig(self.compilers, self.openmpi) + + @property + def runtime(self) -> RuntimeConfig: + return RuntimeConfig() + + @property + def tests(self) -> TestsConfig: + return TestsConfig() + + @property + def docs(self) -> DocsConfig: + return DocsConfig() + + @property + def filename(self) -> str: + return f"environment-{self.use}-{self.os}-py{self.python}{self.cuda}{self.build}.yaml" # noqa + + +# --- Setup ------------------------------------------------------------------- + +PYTHON_VERSIONS = ("3.8", "3.9", "3.10") + +CTK_VERSIONS = ( + "none", + "10.2", + "11.0", + "11.1", + "11.2", + "11.3", + "11.4", + "11.5", + "11.6", + "11.7", +) + +OS_NAMES: Tuple[OSType, ...] = ("linux", "osx") + + +ENV_TEMPLATE = """\ +name: legate-{use} +channels: + - conda-forge +dependencies: + + - python={python} + +{conda_sections}{pip} +""" + +SECTION_TEMPLATE = """\ +# {header} +{reqs} + +""" + +PIP_TEMPLATE = """\ + - pip + - pip: +{pip_sections} +""" + +ALL_CONFIGS = [ + EnvConfig("test", python, "linux", ctk, compilers, openmpi) + for python in PYTHON_VERSIONS + for ctk in CTK_VERSIONS + for compilers in (True, False) + for openmpi in (True, False) +] + [ + EnvConfig("test", python, "darwin", "none", compilers, openmpi) + for python in PYTHON_VERSIONS + for compilers in (True, False) + for openmpi in (True, False) +] + +# --- Code -------------------------------------------------------------------- + + +class BooleanFlag(Action): + def __init__( + self, + option_strings, + dest, + default, + required=False, + help="", + metavar=None, + ): + assert all(not opt.startswith("--no") for opt in option_strings) + + def flatten(list): + return [item for sublist in list for item in sublist] + + option_strings = flatten( + [ + [opt, "--no-" + opt[2:], "--no" + opt[2:]] + if opt.startswith("--") + else [opt] + for opt in option_strings + ] + ) + super().__init__( + option_strings, + dest, + nargs=0, + const=None, + default=default, + type=bool, + choices=None, + required=required, + help=help, + metavar=metavar, + ) + + def __call__(self, parser, namespace, values, option_string): + setattr(namespace, self.dest, not option_string.startswith("--no")) + + +if __name__ == "__main__": + + import sys + + parser = ArgumentParser() + parser.add_argument( + "--python", + choices=PYTHON_VERSIONS, + default=None, + help="Python version to generate for, (default: all python versions)", + ) + parser.add_argument( + "--ctk", + choices=CTK_VERSIONS, + default=None, + dest="ctk_version", + help="CTK version to generate for (default: all CTK versions)", + ) + parser.add_argument( + "--os", + choices=OS_NAMES, + default=None, + help="OS to generate for (default: all OSes)", + ) + parser.add_argument( + "--compilers", + action=BooleanFlag, + dest="compilers", + default=None, + help="Whether to include conda compilers or not (default: both)", + ) + parser.add_argument( + "--openmpi", + action=BooleanFlag, + dest="openmpi", + default=None, + help="Whether to include openmpi or not (default: both)", + ) + + args = parser.parse_args(sys.argv[1:]) + + configs = ALL_CONFIGS + + if args.python is not None: + configs = (x for x in configs if x.python == args.python) + if args.ctk_version is not None: + configs = ( + x for x in configs if x.cuda.ctk_version == args.ctk_version + ) + if args.compilers is not None: + configs = (x for x in configs if x.build.compilers == args.compilers) + if args.os is not None: + configs = (x for x in configs if x.os == args.os) + if args.openmpi is not None: + configs = (x for x in configs if x.build.openmpi == args.openmpi) + + for config in configs: + conda_sections = indent( + "".join(s.format("conda") for s in config.sections if s.conda), + " ", + ) + + pip_sections = indent( + "".join(s.format("pip") for s in config.sections if s.pip), " " + ) + + print(f"--- generating: {config.filename}") + out = ENV_TEMPLATE.format( + use=config.use, + python=config.python, + conda_sections=conda_sections, + pip=PIP_TEMPLATE.format(pip_sections=pip_sections), + ) + with open(f"{config.filename}", "w") as f: + f.write(out) From 5c430039ffe4accf89149ea5ae42c7b68dd73ddf Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Tue, 25 Oct 2022 11:01:52 -0700 Subject: [PATCH 033/121] Minor fix in documentation --- BUILD.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BUILD.md b/BUILD.md index 5abb0af8d..320059f9f 100644 --- a/BUILD.md +++ b/BUILD.md @@ -114,7 +114,7 @@ Required to support CPU and memory binding in the Legate launcher. Not available on conda; typically available through the system-level package manager. -### MPI (`--openmpi` option) +### MPI (`--openmpi` option; optional) Only necessary if you wish to run on multiple nodes. From b7cb465758d1c91537eddf31f849147a4c10382c Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Tue, 25 Oct 2022 13:45:43 -0700 Subject: [PATCH 034/121] Fix for cunumeric#668 (#453) Co-authored-by: Manolis Papadakis --- legate_core_cpp.cmake | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/legate_core_cpp.cmake b/legate_core_cpp.cmake index 3860d85dc..bf89ff01b 100644 --- a/legate_core_cpp.cmake +++ b/legate_core_cpp.cmake @@ -117,7 +117,7 @@ if(Legion_USE_Python AND (NOT Python3_FOUND)) endif() if(Legion_NETWORKS) - find_package(MPI REQUIRED) + find_package(MPI REQUIRED COMPONENTS CXX) endif() if(Legion_USE_CUDA) @@ -266,8 +266,8 @@ target_link_libraries(legate_core PUBLIC Legion::Legion legate::Thrust $ - PRIVATE $ - $) + $ + PRIVATE $) target_compile_options(legate_core PRIVATE "$<$:${legate_core_CXX_OPTIONS}>" @@ -394,6 +394,11 @@ endif() "set(Legion_USE_Python ${Legion_USE_Python})" "set(Legion_NETWORKS ${Legion_NETWORKS})" "set(Legion_BOUNDS_CHECKS ${Legion_BOUNDS_CHECKS})" +[=[ +if(Legion_NETWORKS) + find_package(MPI REQUIRED COMPONENTS CXX) +endif() +]=] ) rapids_export( From fcc0dcc505645a2c1696d6402c8ea662ac7e995d Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Wed, 26 Oct 2022 06:23:46 -0700 Subject: [PATCH 035/121] Update upload artifact action version (#454) v2 -> v3 to avoid GitHub warnings. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5a90f5520..11b700eab 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,7 +60,7 @@ jobs: if: always() - name: Upload Build Log if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: build-log path: ./**/${{ env.COMMIT }}-build.log.gpg \ No newline at end of file From e4b94ce4e231b74f96946df59351091c11057051 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Wed, 26 Oct 2022 13:26:50 -0700 Subject: [PATCH 036/121] Only keep traceback reprs, to avoid cycles (#447) * only keep traceback reprs, to avoid cycles * fix attr name * use format_tb instead of repr * Small format change Co-authored-by: Manolis Papadakis --- legate/core/exception.py | 9 +++++---- legate/core/operation.py | 16 +++++++--------- legate/core/runtime.py | 6 ++---- legate/core/utils.py | 6 +++--- 4 files changed, 17 insertions(+), 20 deletions(-) diff --git a/legate/core/exception.py b/legate/core/exception.py index a1d4daae9..5b8bace1e 100644 --- a/legate/core/exception.py +++ b/legate/core/exception.py @@ -19,7 +19,6 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from types import TracebackType from typing import Optional from ._legion import Future @@ -30,11 +29,11 @@ def __init__( self, exn_types: list[type], future: Future, - tb: Optional[TracebackType] = None, + tb_repr: Optional[str] = None, ): self._exn_types = exn_types self._future = future - self._tb = tb + self._tb_repr = tb_repr def raise_exception(self) -> None: buf = self._future.get_buffer() @@ -45,5 +44,7 @@ def raise_exception(self) -> None: error_message = buf[9 : 9 + error_size].decode() exn_type = self._exn_types[exn_index] exn_reraised = exn_type(error_message) - exn_original = exn_type(error_message).with_traceback(self._tb) + if self._tb_repr is not None: + error_message += "\n" + self._tb_repr[:-1] # remove extra newline + exn_original = exn_type(error_message) raise exn_reraised from exn_original diff --git a/legate/core/operation.py b/legate/core/operation.py index fcd626acb..c079afd0d 100644 --- a/legate/core/operation.py +++ b/legate/core/operation.py @@ -32,11 +32,9 @@ from .partition import REPLICATE, Weighted from .shape import Shape from .store import Store, StorePartition -from .utils import OrderedSet, capture_traceback +from .utils import OrderedSet, capture_traceback_repr if TYPE_CHECKING: - from types import TracebackType - from .communicator import Communicator from .constraints import Constraint from .context import Context @@ -244,7 +242,7 @@ def __init__( self._scalar_args: list[tuple[Any, Union[DTType, tuple[DTType]]]] = [] self._comm_args: list[Communicator] = [] self._exn_types: list[type] = [] - self._tb: Union[None, TracebackType] = None + self._tb_repr: Union[None, str] = None self._side_effect = False @property @@ -279,7 +277,7 @@ def can_raise_exception(self) -> bool: return len(self._exn_types) > 0 def capture_traceback(self) -> None: - self._tb = capture_traceback() + self._tb_repr = capture_traceback_repr() def _add_scalar_args_to_launcher(self, launcher: TaskLauncher) -> None: for (arg, dtype) in self._scalar_args: @@ -309,7 +307,7 @@ def _demux_scalar_stores_future(self, result: Future) -> None: output.set_storage(result) elif self.can_raise_exception: runtime.record_pending_exception( - self._exn_types, result, self._tb + self._exn_types, result, self._tb_repr ) else: assert num_unbound_outs == 1 @@ -327,7 +325,7 @@ def _demux_scalar_stores_future(self, result: Future) -> None: runtime.record_pending_exception( self._exn_types, runtime.extract_scalar(result, idx), - self._tb, + self._tb_repr, ) def _demux_scalar_stores_future_map( @@ -366,7 +364,7 @@ def _demux_scalar_stores_future_map( runtime.record_pending_exception( self._exn_types, runtime.reduce_exception_future_map(result), - self._tb, + self._tb_repr, ) else: assert False @@ -400,7 +398,7 @@ def _demux_scalar_stores_future_map( runtime.record_pending_exception( self._exn_types, runtime.reduce_exception_future_map(exn_fut_map), - self._tb, + self._tb_repr, ) def _demux_scalar_stores( diff --git a/legate/core/runtime.py b/legate/core/runtime.py index 22b3815e2..4d12a6591 100644 --- a/legate/core/runtime.py +++ b/legate/core/runtime.py @@ -51,8 +51,6 @@ from .shape import Shape if TYPE_CHECKING: - from types import TracebackType - from . import ArgumentMap, Detach, IndexDetach, IndexPartition, Library from ._legion import FieldListLike, PhysicalRegion from .communicator import Communicator @@ -1551,9 +1549,9 @@ def record_pending_exception( self, exn_types: list[type], future: Future, - tb: Optional[TracebackType] = None, + tb_repr: Optional[str] = None, ) -> None: - exn = PendingException(exn_types, future, tb) + exn = PendingException(exn_types, future, tb_repr) self._pending_exceptions.append(exn) def raise_exceptions(self) -> None: diff --git a/legate/core/utils.py b/legate/core/utils.py index a9fa0e9e1..6a59ca02f 100644 --- a/legate/core/utils.py +++ b/legate/core/utils.py @@ -72,9 +72,9 @@ def cast_tuple(value: Any) -> tuple[Any, ...]: return value if isinstance(value, tuple) else tuple(value) -def capture_traceback( +def capture_traceback_repr( skip_core_frames: bool = True, -) -> Optional[TracebackType]: +) -> Optional[str]: tb = None for frame, _ in traceback.walk_stack(None): if frame.f_globals["__name__"].startswith("legate.core"): @@ -85,4 +85,4 @@ def capture_traceback( tb_lasti=frame.f_lasti, tb_lineno=frame.f_lineno, ) - return tb + return "".join(traceback.format_tb(tb)) if tb is not None else None From 95b91eab63ffad606d0884b355593925a879d9d0 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Wed, 26 Oct 2022 14:47:15 -0700 Subject: [PATCH 037/121] Fix up mypy errors in tests (#456) --- .pre-commit-config.yaml | 4 ++-- legate/tester/stages/test_stage.py | 2 +- tests/unit/legate/driver/test_config.py | 10 +++++----- tests/unit/legate/jupyter/test_kernel.py | 10 +++++----- tests/unit/legate/tester/stages/test_test_stage.py | 9 ++++++++- tests/unit/legate/tester/stages/test_util.py | 9 +++++---- tests/unit/legate/util/test_system.py | 2 +- 7 files changed, 27 insertions(+), 19 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 04478d01c..402bffb64 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,7 +22,7 @@ repos: hooks: - id: mypy pass_filenames: false - args: ['legate'] - additional_dependencies: [numpy] + args: ['legate', 'tests'] + additional_dependencies: [numpy,pytest,pytest_mock] default_language_version: python: python3 diff --git a/legate/tester/stages/test_stage.py b/legate/tester/stages/test_stage.py index 5962500bf..f9c871461 100644 --- a/legate/tester/stages/test_stage.py +++ b/legate/tester/stages/test_stage.py @@ -92,7 +92,7 @@ def delay(self, shard: Shard, config: Config, system: TestSystem) -> None: Process execution wrapper """ - ... + return def shard_args(self, shard: Shard, config: Config) -> ArgList: """Generate the command line arguments necessary to launch diff --git a/tests/unit/legate/driver/test_config.py b/tests/unit/legate/driver/test_config.py index 2408bfe08..249107ed2 100644 --- a/tests/unit/legate/driver/test_config.py +++ b/tests/unit/legate/driver/test_config.py @@ -59,12 +59,12 @@ def test_mixin(self) -> None: "extra", (["a"], ["a", "b c"], ["a", "b c", "d e"], ["a", "b c", "d e", "f"]), ) - def test_launcher_extra_fixup_basic(self, extra) -> None: + def test_launcher_extra_fixup_basic(self, extra: list[str]) -> None: mn = m.MultiNode( nodes=1, ranks_per_node=1, not_control_replicable=False, - launcher="launcher", + launcher="mpirun", launcher_extra=extra, ) assert mn.launcher_extra == sum((x.split() for x in extra), []) @@ -74,7 +74,7 @@ def test_launcher_extra_fixup_complex(self) -> None: nodes=1, ranks_per_node=1, not_control_replicable=False, - launcher="launcher", + launcher="mpirun", launcher_extra=[ "-H g0002,g0002 -X SOMEENV --fork", "-bind-to none", @@ -95,7 +95,7 @@ def test_launcher_extra_fixup_quoted(self) -> None: nodes=1, ranks_per_node=1, not_control_replicable=False, - launcher="launcher", + launcher="mpirun", launcher_extra=[ "-f 'some path with spaces/foo.txt'", ], @@ -165,7 +165,7 @@ def test_mixin(self) -> None: "extra", (["a"], ["a", "b c"], ["a", "b c", "d e"], ["a", "b c", "d e", "f"]), ) - def test_nsys_extra_fixup_basic(self, extra) -> None: + def test_nsys_extra_fixup_basic(self, extra: list[str]) -> None: p = m.Profiling( profile=True, nvprof=True, diff --git a/tests/unit/legate/jupyter/test_kernel.py b/tests/unit/legate/jupyter/test_kernel.py index 42925387b..d176ba23d 100644 --- a/tests/unit/legate/jupyter/test_kernel.py +++ b/tests/unit/legate/jupyter/test_kernel.py @@ -53,11 +53,11 @@ def test_defatul(self) -> None: ] = config.kernel.spec_name assert spec.display_name == config.kernel.display_name - assert spec.language == "python" - assert spec.argv[:-3] == list(driver.cmd) - assert spec.argv[-3].endswith("_legion_kernel.py") - assert spec.argv[-2:] == ["-f", "{connection_file}"] - assert spec.env == expected_env + assert spec.language == "python" # type: ignore + assert spec.argv[:-3] == list(driver.cmd) # type: ignore + assert spec.argv[-3].endswith("_legion_kernel.py") # type: ignore + assert spec.argv[-2:] == ["-f", "{connection_file}"] # type: ignore + assert spec.env == expected_env # type: ignore assert m.LEGATE_JUPYTER_METADATA_KEY in spec.metadata metadata = spec.metadata[m.LEGATE_JUPYTER_METADATA_KEY] assert metadata == { diff --git a/tests/unit/legate/tester/stages/test_test_stage.py b/tests/unit/legate/tester/stages/test_test_stage.py index 6a5678c22..fcdc7a934 100644 --- a/tests/unit/legate/tester/stages/test_test_stage.py +++ b/tests/unit/legate/tester/stages/test_test_stage.py @@ -23,8 +23,9 @@ from legate.tester import FeatureType from legate.tester.config import Config from legate.tester.stages import test_stage as m -from legate.tester.stages.util import StageResult, StageSpec +from legate.tester.stages.util import Shard, StageResult, StageSpec from legate.tester.test_system import ProcessResult, TestSystem as _TestSystem +from legate.util.types import ArgList, EnvDict from . import FakeSystem @@ -45,6 +46,12 @@ def __init__(self, config: Config, system: _TestSystem) -> None: def compute_spec(self, config: Config, system: _TestSystem) -> StageSpec: return StageSpec(2, [(0,), (1,), (2,)]) + def shard_args(self, shard: Shard, config: Config) -> ArgList: + return [] + + def env(self, config: Config, system: _TestSystem) -> EnvDict: + return {} + class TestTestStage: def test_name(self) -> None: diff --git a/tests/unit/legate/tester/stages/test_util.py b/tests/unit/legate/tester/stages/test_util.py index f97174de8..0729253dd 100644 --- a/tests/unit/legate/tester/stages/test_util.py +++ b/tests/unit/legate/tester/stages/test_util.py @@ -17,6 +17,7 @@ """ from __future__ import annotations +from datetime import timedelta from pathlib import Path import pytest @@ -33,7 +34,7 @@ def test_StageResult() -> None: procs[2].returncode = 10 procs[7].returncode = -2 - result = m.StageResult(procs=procs, time=0) + result = m.StageResult(procs=procs, time=timedelta(0)) assert result.total == 10 assert result.passed == 8 @@ -67,7 +68,7 @@ def test_requested_too_large(self) -> None: class Test_log_proc: @pytest.mark.parametrize("returncode", (-23, -1, 0, 1, 17)) - def test_skipped(self, returncode) -> None: + def test_skipped(self, returncode: int) -> None: config = Config([]) proc = ProcessResult( "proc", Path("proc"), skipped=True, returncode=returncode @@ -100,7 +101,7 @@ def test_passed_verbose(self) -> None: ) @pytest.mark.parametrize("returncode", (-23, -1, 1, 17)) - def test_failed(self, returncode) -> None: + def test_failed(self, returncode: int) -> None: config = Config([]) proc = ProcessResult("proc", Path("proc"), returncode=returncode) @@ -112,7 +113,7 @@ def test_failed(self, returncode) -> None: ) @pytest.mark.parametrize("returncode", (-23, -1, 1, 17)) - def test_failed_verbose(self, returncode) -> None: + def test_failed_verbose(self, returncode: int) -> None: config = Config([]) proc = ProcessResult( "proc", Path("proc"), returncode=returncode, output="foo\nbar" diff --git a/tests/unit/legate/util/test_system.py b/tests/unit/legate/util/test_system.py index c3a5d6184..38db9cc0b 100644 --- a/tests/unit/legate/util/test_system.py +++ b/tests/unit/legate/util/test_system.py @@ -109,5 +109,5 @@ def test_gpus_osx(self) -> None: s = m.System() msg = "GPU execution is not available on OSX." - with pytest.raises(RuntimeError, msg=msg): + with pytest.raises(RuntimeError, msg=msg): # type: ignore s.gpus From f6fb68fc64d962a6878a621f662bc9546db2b1af Mon Sep 17 00:00:00 2001 From: Jeremy Date: Thu, 27 Oct 2022 07:20:56 -0700 Subject: [PATCH 038/121] Fix returned legion paths for editable install with separate legion build (#442) --- legate/util/fs.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/legate/util/fs.py b/legate/util/fs.py index e3ea9e958..15338d783 100644 --- a/legate/util/fs.py +++ b/legate/util/fs.py @@ -295,8 +295,9 @@ def installed_legion_paths(legion_dir: Path) -> LegionPaths: ) if legion_dir.joinpath("CMakeCache.txt").exists(): cmake_cache_txt = legion_dir / "CMakeCache.txt" - - except Exception: + finally: + # Hopefully at this point we have a valid cmake_cache_txt with a + # valid Legion_SOURCE_DIR and Legion_BINARY_DIR try: # If Legion_SOURCE_DIR and Legion_BINARY_DIR are in CMakeCache.txt, # return the paths to Legion in the legate_core build dir. From 7a20ea85a4ea9c5aa4c201ccd7dfc5073c0bd47f Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Thu, 27 Oct 2022 10:10:43 -0700 Subject: [PATCH 039/121] Fix BUILD.md link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fe0d5b5e4..f713191e9 100644 --- a/README.md +++ b/README.md @@ -230,7 +230,7 @@ Docker image build scripts, as well as specialized install scripts for supported clusters are available on the [quickstart](https://github.com/nv-legate/quickstart) repo. -See [BUILD.md]() for instructions on building Legate Core from source. +See [BUILD.md](BUILD.md) for instructions on building Legate Core from source. ## How Do I Use Legate? From f8052ccda03b860e5f1dfa963909a515bdef8c21 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Thu, 27 Oct 2022 23:21:08 -0700 Subject: [PATCH 040/121] Mapper improvements (#452) * Make mapping::RegionField agnostic to the owning operator * Rename MapperDeserializer to mapping::TaskDeserializer * Plumbing to use Legate Copy objects in the mapper * Rename mapping/task.* to mapping/operation.* * Use the right target processors for point copies * Use requirements directly instead of their indices in the store mapping * Make sure there is only up to one indirection field in a copy * Assign the right requirement index in CopyReqAnalyzer * Refactor map_copy to use map_legate_store instead of the legacy mapping call * Use map_legate_store in map_inline * Use map_legate_store in map_partition * Remove the obsolete map_raw_array finally * Clean up default store target lookup * Refactor several switch statements into dispatch templates * Unify sharding functor handling * Minor tweak to task slicing for manually parallelized tasks * Clean up variant lookups using std::optional * Forbid colocations on unbound stores (reduction stores can colocate though) * Massive clean-up on map_task * Unify the code to map multiple legate stores * One last DRY * Missing include * Remove dead code * Make sure the output variable is initialized before calling legate_map_store --- legate/core/_legion/operation.py | 16 + legate/core/launcher.py | 260 ++-- legate/core/operation.py | 10 + legate_core_cpp.cmake | 6 +- src/core/mapping/base_mapper.cc | 1309 ++++++------------ src/core/mapping/base_mapper.h | 73 +- src/core/mapping/mapping.cc | 35 +- src/core/mapping/mapping.h | 9 +- src/core/mapping/{task.cc => operation.cc} | 79 +- src/core/mapping/{task.h => operation.h} | 44 +- src/core/mapping/{task.inl => operation.inl} | 0 src/core/utilities/deserializer.cc | 76 +- src/core/utilities/deserializer.h | 50 +- src/core/utilities/deserializer.inl | 8 +- typings/legion_cffi/lib.pyi | 2 + 15 files changed, 916 insertions(+), 1061 deletions(-) rename src/core/mapping/{task.cc => operation.cc} (57%) rename src/core/mapping/{task.h => operation.h} (77%) rename src/core/mapping/{task.inl => operation.inl} (100%) diff --git a/legate/core/_legion/operation.py b/legate/core/_legion/operation.py index e07b4ba8b..cf13a7bff 100644 --- a/legate/core/_legion/operation.py +++ b/legate/core/_legion/operation.py @@ -648,6 +648,14 @@ def set_sharding_space(self, space: IndexSpace) -> None: self.launcher, space.handle ) + def set_mapper_arg(self, data: Any, size: int) -> None: + legion.legion_copy_launcher_set_mapper_arg( + self.launcher, + (ffi.from_buffer(data), size), + ) + # Hold a reference to the data to prevent collection + self.data = data + @dispatch def launch( self, @@ -1070,6 +1078,14 @@ def set_sharding_space(self, space: IndexSpace) -> None: self.launcher, space.handle ) + def set_mapper_arg(self, data: Any, size: int) -> None: + legion.legion_index_copy_launcher_set_mapper_arg( + self.launcher, + (ffi.from_buffer(data), size), + ) + # Hold a reference to the data to prevent collection + self.data = data + @dispatch def launch( self, diff --git a/legate/core/launcher.py b/legate/core/launcher.py index 8b4e01fbd..ce87ffada 100644 --- a/legate/core/launcher.py +++ b/legate/core/launcher.py @@ -24,7 +24,6 @@ Sequence, Tuple, Union, - overload, ) from . import ( @@ -104,6 +103,13 @@ def _pack(buf: BufferBuilder, value: Any, dtype: Any, is_tuple: bool) -> None: serializer(buf, value) +class RequirementIndexer(Protocol): + def get_requirement_index( + self, req: Union[RegionReq, OutputReq], field_id: int + ) -> int: + ... + + class LauncherArg(Protocol): def pack(self, buf: BufferBuilder) -> None: ... @@ -164,40 +170,16 @@ def __str__(self) -> str: class RegionFieldArg: - @overload def __init__( self, - analyzer: RequirementAnalyzer, - store: Store, - dim: int, - req: RegionReq, - field_id: int, - redop: int, - ) -> None: - ... - - @overload - def __init__( - self, - analyzer: OutputAnalyzer, - store: Store, - dim: int, - req: OutputReq, - field_id: int, - redop: int, - ) -> None: - ... - - def __init__( - self, - analyzer: Union[OutputAnalyzer, RequirementAnalyzer], + indexer: RequirementIndexer, store: Store, dim: int, req: Union[OutputReq, RegionReq], field_id: int, redop: int, ) -> None: - self._analyzer = analyzer + self._indexer = indexer self._store = store self._dim = dim self._req = req @@ -210,9 +192,7 @@ def pack(self, buf: BufferBuilder) -> None: buf.pack_32bit_int(self._dim) buf.pack_32bit_uint( - self._analyzer.get_requirement_index( - self._req, self._field_id # type: ignore [arg-type] - ) + self._indexer.get_requirement_index(self._req, self._field_id) ) buf.pack_32bit_uint(self._field_id) @@ -220,6 +200,15 @@ def __str__(self) -> str: return f"RegionFieldArg({self._dim}, {self._req}, {self._field_id})" +def pack_args( + argbuf: BufferBuilder, + args: Sequence[LauncherArg], +) -> None: + argbuf.pack_32bit_uint(len(args)) + for arg in args: + arg.pack(argbuf) + + AddReqMethod = Any @@ -575,7 +564,7 @@ def coalesce(self, error_on_interference: bool) -> dict[Any, list[int]]: return coalesced -class RequirementAnalyzer: +class RequirementAnalyzer(RequirementIndexer): def __init__(self, error_on_interference: bool = True) -> None: self._field_sets: dict[Region, FieldSet] = {} self._requirements: list[tuple[RegionReq, list[int]]] = [] @@ -614,7 +603,10 @@ def analyze_requirements(self) -> None: self._requirement_map[(req, field_id)] = req_idx self._requirements.append((req, fields)) - def get_requirement_index(self, req: RegionReq, field_id: int) -> int: + def get_requirement_index( + self, req: Union[RegionReq, OutputReq], field_id: int + ) -> int: + assert isinstance(req, RegionReq) try: return self._requirement_map[(req, field_id)] except KeyError: @@ -622,7 +614,7 @@ def get_requirement_index(self, req: RegionReq, field_id: int) -> int: return self._requirement_map[(req, field_id)] -class OutputAnalyzer: +class OutputAnalyzer(RequirementIndexer): def __init__(self) -> None: self._groups: dict[Any, OrderedSet[tuple[int, Store]]] = {} self._requirements: list[tuple[OutputReq, list[int]]] = [] @@ -664,7 +656,10 @@ def analyze_requirements(self) -> None: self._requirements.append((req, fields)) - def get_requirement_index(self, req: OutputReq, field_id: int) -> int: + def get_requirement_index( + self, req: Union[RegionReq, OutputReq], field_id: int + ) -> int: + assert isinstance(req, OutputReq) return self._requirement_map[(req, field_id)] def update_storages(self) -> None: @@ -673,6 +668,28 @@ def update_storages(self) -> None: req.update_storage(store, field_id) +# A simple analyzer that does not coalesce requirements +class CopyReqAnalyzer(RequirementIndexer): + def __init__(self) -> None: + self._requirements: list[tuple[RegionReq, int]] = [] + self._requirement_map: dict[tuple[RegionReq, int], int] = {} + + @property + def requirements(self) -> list[tuple[RegionReq, int]]: + return self._requirements + + def insert(self, req: RegionReq, field_id: int) -> None: + entry = (req, field_id) + self._requirement_map[entry] = len(self._requirements) + self._requirements.append(entry) + + def get_requirement_index( + self, req: Union[RegionReq, OutputReq], field_id: int + ) -> int: + assert isinstance(req, RegionReq) + return self._requirement_map[(req, field_id)] + + class TaskLauncher: def __init__( self, @@ -864,25 +881,16 @@ def set_sharding_space(self, space: IndexSpace) -> None: def set_point(self, point: Point) -> None: self._point = point - @staticmethod - def pack_args( - argbuf: BufferBuilder, - args: Sequence[LauncherArg], - ) -> None: - argbuf.pack_32bit_uint(len(args)) - for arg in args: - arg.pack(argbuf) - def build_task( self, launch_domain: Rect, argbuf: BufferBuilder ) -> IndexTask: self._req_analyzer.analyze_requirements() self._out_analyzer.analyze_requirements() - self.pack_args(argbuf, self._inputs) - self.pack_args(argbuf, self._outputs) - self.pack_args(argbuf, self._reductions) - self.pack_args(argbuf, self._scalars) + pack_args(argbuf, self._inputs) + pack_args(argbuf, self._outputs) + pack_args(argbuf, self._reductions) + pack_args(argbuf, self._scalars) argbuf.pack_bool(self._can_raise_exception) argbuf.pack_bool(self._insert_barrier) argbuf.pack_32bit_uint(len(self._comms)) @@ -921,10 +929,10 @@ def build_single_task(self, argbuf: BufferBuilder) -> SingleTask: self._req_analyzer.analyze_requirements() self._out_analyzer.analyze_requirements() - self.pack_args(argbuf, self._inputs) - self.pack_args(argbuf, self._outputs) - self.pack_args(argbuf, self._reductions) - self.pack_args(argbuf, self._scalars) + pack_args(argbuf, self._inputs) + pack_args(argbuf, self._outputs) + pack_args(argbuf, self._reductions) + pack_args(argbuf, self._scalars) argbuf.pack_bool(self._can_raise_exception) assert len(self._comms) == 0 @@ -982,7 +990,15 @@ def __init__( assert type(tag) != bool self._context = context self._mapper_id = mapper_id - self._req_analyzer = RequirementAnalyzer() + self._inputs: list[LauncherArg] = [] + self._outputs: list[LauncherArg] = [] + self._reductions: list[LauncherArg] = [] + self._source_indirects: list[LauncherArg] = [] + self._target_indirects: list[LauncherArg] = [] + self._input_reqs = CopyReqAnalyzer() + self._output_reqs = CopyReqAnalyzer() + self._source_indirect_reqs = CopyReqAnalyzer() + self._target_indirect_reqs = CopyReqAnalyzer() self._tag = tag self._sharding_space: Union[IndexSpace, None] = None self._point: Union[Point, None] = None @@ -998,11 +1014,15 @@ def library_mapper_id(self) -> int: def legion_mapper_id(self) -> int: return self._context.get_mapper_id(self._mapper_id) - def __del__(self) -> None: - del self._req_analyzer - def add_store( - self, store: Store, proj: Proj, perm: Permission, tag: int, flags: int + self, + args: list[LauncherArg], + req_analyzer: CopyReqAnalyzer, + store: Store, + proj: Proj, + perm: Permission, + tag: int, + flags: int, ) -> None: assert store.kind is not Future assert store._transform.bottom @@ -1015,37 +1035,97 @@ def add_store( req = RegionReq(region, perm, proj, tag, flags) - self._req_analyzer.insert(req, field_id) + req_analyzer.insert(req, field_id) + + redop = -1 if proj.redop is None else proj.redop + args.append( + RegionFieldArg( + req_analyzer, + store, + region.index_space.get_dim(), + req, + field_id, + redop, + ) + ) def add_input( self, store: Store, proj: Proj, tag: int = 0, flags: int = 0 ) -> None: - self.add_store(store, proj, Permission.READ, tag, flags) + self.add_store( + self._inputs, + self._input_reqs, + store, + proj, + Permission.READ, + tag, + flags, + ) def add_output( self, store: Store, proj: Proj, tag: int = 0, flags: int = 0 ) -> None: - self.add_store(store, proj, Permission.WRITE, tag, flags) + self.add_store( + self._outputs, + self._output_reqs, + store, + proj, + Permission.WRITE, + tag, + flags, + ) def add_inout( self, store: Store, proj: Proj, tag: int = 0, flags: int = 0 ) -> None: - self.add_store(store, proj, Permission.READ_WRITE, tag, flags) + self.add_store( + self._outputs, + self._output_reqs, + store, + proj, + Permission.READ_WRITE, + tag, + flags, + ) def add_reduction( self, store: Store, proj: Proj, tag: int = 0, flags: int = 0 ) -> None: - self.add_store(store, proj, Permission.REDUCTION, tag, flags) + self.add_store( + self._reductions, + self._output_reqs, + store, + proj, + Permission.REDUCTION, + tag, + flags, + ) def add_source_indirect( self, store: Store, proj: Proj, tag: int = 0, flags: int = 0 ) -> None: - self.add_store(store, proj, Permission.SOURCE_INDIRECT, tag, flags) + self.add_store( + self._source_indirects, + self._source_indirect_reqs, + store, + proj, + Permission.SOURCE_INDIRECT, + tag, + flags, + ) def add_target_indirect( self, store: Store, proj: Proj, tag: int = 0, flags: int = 0 ) -> None: - self.add_store(store, proj, Permission.TARGET_INDIRECT, tag, flags) + self.add_store( + self._target_indirects, + self._target_indirect_reqs, + store, + proj, + Permission.TARGET_INDIRECT, + tag, + flags, + ) def set_sharding_space(self, space: IndexSpace) -> None: self._sharding_space = space @@ -1054,7 +1134,11 @@ def set_point(self, point: Point) -> None: self._point = point def build_copy(self, launch_domain: Rect) -> IndexCopy: - self._req_analyzer.analyze_requirements() + argbuf = BufferBuilder() + pack_args(argbuf, self._inputs) + pack_args(argbuf, self._outputs + self._reductions) + pack_args(argbuf, self._source_indirects) + pack_args(argbuf, self._target_indirects) copy = IndexCopy( launch_domain, @@ -1062,39 +1146,48 @@ def build_copy(self, launch_domain: Rect) -> IndexCopy: tag=self._tag, provenance=self._provenance, ) - for (req, fields) in self._req_analyzer.requirements: - if req.permission in ( - Permission.SOURCE_INDIRECT, - Permission.TARGET_INDIRECT, - ): - assert len(fields) == 1 - req.proj.add(copy, req, fields[0], _index_copy_calls) - else: - req.proj.add(copy, req, fields, _index_copy_calls) + + def add_requirements( + requirements: list[tuple[RegionReq, int]] + ) -> None: + for (req, field) in requirements: + req.proj.add(copy, req, field, _index_copy_calls) + + add_requirements(self._input_reqs.requirements) + add_requirements(self._output_reqs.requirements) + add_requirements(self._source_indirect_reqs.requirements) + add_requirements(self._target_indirect_reqs.requirements) if self._sharding_space is not None: copy.set_sharding_space(self._sharding_space) copy.set_possible_src_indirect_out_of_range(self._source_oor) copy.set_possible_dst_indirect_out_of_range(self._target_oor) + copy.set_mapper_arg(argbuf.get_string(), argbuf.get_size()) return copy def build_single_copy(self) -> SingleCopy: - self._req_analyzer.analyze_requirements() + argbuf = BufferBuilder() + pack_args(argbuf, self._inputs) + pack_args(argbuf, self._outputs + self._reductions) + pack_args(argbuf, self._source_indirects) + pack_args(argbuf, self._target_indirects) copy = SingleCopy( mapper=self.legion_mapper_id, tag=self._tag, provenance=self._provenance, ) - for (req, fields) in self._req_analyzer.requirements: - if req.permission in ( - Permission.SOURCE_INDIRECT, - Permission.TARGET_INDIRECT, - ): - assert len(fields) == 1 - req.proj.add_single(copy, req, fields[0], _single_copy_calls) - else: - req.proj.add_single(copy, req, fields, _single_copy_calls) + + def add_requirements( + requirements: list[tuple[RegionReq, int]] + ) -> None: + for (req, field) in requirements: + req.proj.add_single(copy, req, field, _single_copy_calls) + + add_requirements(self._input_reqs.requirements) + add_requirements(self._output_reqs.requirements) + add_requirements(self._source_indirect_reqs.requirements) + add_requirements(self._target_indirect_reqs.requirements) if self._sharding_space is not None: copy.set_sharding_space(self._sharding_space) @@ -1102,6 +1195,7 @@ def build_single_copy(self) -> SingleCopy: copy.set_point(self._point) copy.set_possible_src_indirect_out_of_range(self._source_oor) copy.set_possible_dst_indirect_out_of_range(self._target_oor) + copy.set_mapper_arg(argbuf.get_string(), argbuf.get_size()) return copy def execute( diff --git a/legate/core/operation.py b/legate/core/operation.py index c079afd0d..a158ece7d 100644 --- a/legate/core/operation.py +++ b/legate/core/operation.py @@ -837,6 +837,11 @@ def add_reduction( def add_source_indirect( self, store: Store, partition: Optional[PartSym] = None ) -> None: + if len(self._source_indirects) != 0: + raise RuntimeError( + "There can be only up to one source indirection store for " + "a Copy operation" + ) self._check_store(store) if partition is None: partition = self._get_unique_partition(store) @@ -846,6 +851,11 @@ def add_source_indirect( def add_target_indirect( self, store: Store, partition: Optional[PartSym] = None ) -> None: + if len(self._target_indirects) != 0: + raise RuntimeError( + "There can be only up to one target indirection store for " + "a Copy operation" + ) self._check_store(store) if partition is None: partition = self._get_unique_partition(store) diff --git a/legate_core_cpp.cmake b/legate_core_cpp.cmake index bf89ff01b..6150a1908 100644 --- a/legate_core_cpp.cmake +++ b/legate_core_cpp.cmake @@ -196,7 +196,7 @@ list(APPEND legate_core_SOURCES src/core/mapping/core_mapper.cc src/core/mapping/instance_manager.cc src/core/mapping/mapping.cc - src/core/mapping/task.cc + src/core/mapping/operation.cc src/core/runtime/context.cc src/core/runtime/projection.cc src/core/runtime/runtime.cc @@ -344,8 +344,8 @@ install( install( FILES src/core/mapping/base_mapper.h src/core/mapping/mapping.h - src/core/mapping/task.h - src/core/mapping/task.inl + src/core/mapping/operation.h + src/core/mapping/operation.inl DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/legate/core/mapping) install( diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index 983043322..9ae61f62b 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -16,6 +16,7 @@ #include #include +#include #include "legion/legion_mapping.h" #include "mappers/mapping_utilities.h" @@ -23,13 +24,14 @@ #include "core/data/store.h" #include "core/mapping/base_mapper.h" #include "core/mapping/instance_manager.h" -#include "core/mapping/task.h" +#include "core/mapping/operation.h" #include "core/runtime/projection.h" #include "core/runtime/shard.h" #include "core/utilities/linearize.h" #include "legate_defines.h" using LegionTask = Legion::Task; +using LegionCopy = Legion::Copy; using namespace Legion; using namespace Legion::Mapping; @@ -37,6 +39,42 @@ using namespace Legion::Mapping; namespace legate { namespace mapping { +namespace { + +const std::vector& default_store_targets(Processor::Kind kind) +{ + static const std::map> defaults = { + {Processor::LOC_PROC, {StoreTarget::SYSMEM}}, + {Processor::TOC_PROC, {StoreTarget::FBMEM, StoreTarget::ZCMEM}}, + {Processor::OMP_PROC, {StoreTarget::SOCKETMEM, StoreTarget::SYSMEM}}, + }; + + auto finder = defaults.find(kind); + if (defaults.end() == finder) LEGATE_ABORT; + return finder->second; +} + +std::string log_mappable(const Mappable& mappable, bool prefix_only = false) +{ + static const std::map prefixes = { + {LEGION_TASK_MAPPABLE, "Task "}, + {LEGION_COPY_MAPPABLE, "Copy "}, + {LEGION_INLINE_MAPPABLE, "Inline mapping "}, + {LEGION_PARTITION_MAPPABLE, "Partition "}, + }; + auto finder = prefixes.find(mappable.get_mappable_type()); +#ifdef DEBUG_LEGATE + assert(finder != prefixes.end()); +#endif + if (prefix_only) return finder->second; + + std::stringstream ss; + ss << finder->second << mappable.get_unique_id(); + return ss.str(); +} + +} // namespace + BaseMapper::BaseMapper(Runtime* rt, Machine m, const LibraryContext& ctx) : Mapper(rt->get_mapper_runtime()), legion_runtime(rt), @@ -65,14 +103,6 @@ BaseMapper::BaseMapper(Runtime* rt, Machine m, const LibraryContext& ctx) local_omps.push_back(local_proc); break; } - case Processor::IO_PROC: { - local_ios.push_back(local_proc); - break; - } - case Processor::PY_PROC: { - local_pys.push_back(local_proc); - break; - } default: break; } } @@ -187,21 +217,8 @@ void BaseMapper::select_task_options(const MapperContext ctx, Task legate_task(&task, context, runtime, ctx); auto target = task_target(legate_task, options); + dispatch(target, [&output](auto& procs) { output.initial_proc = procs.front(); }); // We never want valid instances - switch (target) { - case TaskTarget::CPU: { - output.initial_proc = local_cpus.front(); - break; - } - case TaskTarget::GPU: { - output.initial_proc = local_gpus.front(); - break; - } - case TaskTarget::OMP: { - output.initial_proc = local_omps.front(); - break; - } - } output.valid_instances = false; } @@ -256,21 +273,7 @@ void BaseMapper::slice_auto_task(const MapperContext ctx, } }; - switch (task.target_proc.kind()) { - case Processor::LOC_PROC: { - round_robin(local_cpus); - break; - } - case Processor::TOC_PROC: { - round_robin(local_gpus); - break; - } - case Processor::OMP_PROC: { - round_robin(local_omps); - break; - } - default: LEGATE_ABORT; - } + dispatch(task.target_proc.kind(), round_robin); } void BaseMapper::generate_prime_factor(const std::vector& processors, @@ -306,22 +309,7 @@ const std::vector BaseMapper::get_processor_grid(Legion::Processor::Kin auto finder = proc_grids.find(key); if (finder != proc_grids.end()) return finder->second; - int32_t num_procs = 1; - switch (kind) { - case Processor::LOC_PROC: { - num_procs = static_cast(local_cpus.size()); - break; - } - case Processor::TOC_PROC: { - num_procs = static_cast(local_gpus.size()); - break; - } - case Processor::OMP_PROC: { - num_procs = static_cast(local_omps.size()); - break; - } - default: LEGATE_ABORT; - } + int32_t num_procs = dispatch(kind, [](auto& procs) { return procs.size(); }); std::vector grid; auto factor_it = all_factors[kind].begin(); @@ -348,11 +336,6 @@ void BaseMapper::slice_manual_task(const MapperContext ctx, { output.slices.reserve(input.domain.get_volume()); - // Get the domain for the sharding space also - Domain sharding_domain = task.index_domain; - if (task.sharding_space.exists()) - sharding_domain = runtime->get_index_space_domain(ctx, task.sharding_space); - auto distribute = [&](auto& procs) { auto ndim = input.domain.dim; auto& proc_grid = get_processor_grid(task.target_proc.kind(), ndim); @@ -364,21 +347,7 @@ void BaseMapper::slice_manual_task(const MapperContext ctx, } }; - switch (task.target_proc.kind()) { - case Processor::LOC_PROC: { - distribute(local_cpus); - break; - } - case Processor::TOC_PROC: { - distribute(local_gpus); - break; - } - case Processor::OMP_PROC: { - distribute(local_omps); - break; - } - default: LEGATE_ABORT; - } + dispatch(task.target_proc.kind(), distribute); } void BaseMapper::slice_round_robin_task(const MapperContext ctx, @@ -405,21 +374,7 @@ void BaseMapper::slice_round_robin_task(const MapperContext ctx, } }; - switch (task.target_proc.kind()) { - case Processor::LOC_PROC: { - distribute(local_cpus); - break; - } - case Processor::TOC_PROC: { - distribute(local_gpus); - break; - } - case Processor::OMP_PROC: { - distribute(local_omps); - break; - } - default: LEGATE_ABORT; - } + dispatch(task.target_proc.kind(), distribute); } void BaseMapper::slice_task(const MapperContext ctx, @@ -427,74 +382,46 @@ void BaseMapper::slice_task(const MapperContext ctx, const SliceTaskInput& input, SliceTaskOutput& output) { - if (task.tag == LEGATE_CORE_MANUAL_PARALLEL_LAUNCH_TAG) { - if (task.regions.size() == 0) - slice_round_robin_task(ctx, task, input, output); - else - slice_manual_task(ctx, task, input, output); - } else + if (task.tag == LEGATE_CORE_MANUAL_PARALLEL_LAUNCH_TAG) + slice_manual_task(ctx, task, input, output); + else if (task.regions.size() == 0) + slice_round_robin_task(ctx, task, input, output); + else slice_auto_task(ctx, task, input, output); } bool BaseMapper::has_variant(const MapperContext ctx, const LegionTask& task, Processor::Kind kind) { - const std::pair key(task.task_id, kind); - // Check to see if we already have it - auto finder = leaf_variants.find(key); - if ((finder != leaf_variants.end()) && (finder->second != 0)) return true; - std::vector variants; - runtime->find_valid_variants(ctx, key.first, variants, key.second); - // Process all the results, record if we found what we were looking for - bool has_leaf = false; - for (auto vid : variants) { - assert(vid > 0); - switch (vid) { - case LEGATE_CPU_VARIANT: - case LEGATE_OMP_VARIANT: - case LEGATE_GPU_VARIANT: { - has_leaf = true; - leaf_variants[key] = vid; - break; - } - default: // TODO: handle vectorized variants - LEGATE_ABORT; // unhandled variant kind - } - } - if (!has_leaf) leaf_variants[key] = 0; - return has_leaf; + return find_variant(ctx, task, kind).has_value(); } -VariantID BaseMapper::find_variant(const MapperContext ctx, - const LegionTask& task, - Processor::Kind kind) +std::optional BaseMapper::find_variant(const MapperContext ctx, + const LegionTask& task, + Processor::Kind kind) { - const std::pair key(task.task_id, kind); - auto finder = leaf_variants.find(key); - if ((finder != leaf_variants.end()) && (finder->second != 0)) return finder->second; + const VariantCacheKey key(task.task_id, kind); + auto finder = variants.find(key); + if (finder != variants.end()) return finder->second; + // Haven't seen it before so let's look it up to make sure it exists - std::vector variants; - runtime->find_valid_variants(ctx, key.first, variants, key.second); - VariantID result = 0; // 0 is reserved - bool has_leaf = false; - // Process all the results, record if we found what we were looking for - for (auto vid : variants) { + std::vector avail_variants; + runtime->find_valid_variants(ctx, key.first, avail_variants, key.second); + std::optional result; + for (auto vid : avail_variants) { +#ifdef DEBUG_LEGATE assert(vid > 0); +#endif switch (vid) { case LEGATE_CPU_VARIANT: case LEGATE_OMP_VARIANT: case LEGATE_GPU_VARIANT: { - has_leaf = true; - leaf_variants[key] = vid; - result = vid; + result = vid; break; } - default: // TODO: handle vectorized variants - LEGATE_ABORT; // unhandled variant kind + default: LEGATE_ABORT; // unhandled variant kind } } - if (!has_leaf) leaf_variants[key] = 0; - // We must always be able to find the variant; - assert(result != 0); + variants[key] = result; return result; } @@ -511,246 +438,139 @@ void BaseMapper::map_task(const MapperContext ctx, assert(task.get_depth() > 0); // Let's populate easy outputs first - output.chosen_variant = find_variant(ctx, task, task.target_proc.kind()); + auto variant = find_variant(ctx, task, task.target_proc.kind()); +#ifdef DEBUG_LEGATE + assert(variant.has_value()); +#endif + output.chosen_variant = *variant; // Just put our target proc in the target processors for now output.target_procs.push_back(task.target_proc); Task legate_task(&task, context, runtime, ctx); - std::vector options; - switch (task.target_proc.kind()) { - case Processor::LOC_PROC: { - options = {StoreTarget::SYSMEM}; - break; - } - case Processor::TOC_PROC: { - options = {StoreTarget::FBMEM, StoreTarget::ZCMEM}; - break; - } - case Processor::OMP_PROC: { - options = {StoreTarget::SOCKETMEM, StoreTarget::SYSMEM}; - break; - } - default: LEGATE_ABORT; - } + const auto& options = default_store_targets(task.target_proc.kind()); auto mappings = store_mappings(legate_task, options); - std::map client_mapped_regions; - std::map client_mapped_futures; - for (uint32_t mapping_idx = 0; mapping_idx < mappings.size(); ++mapping_idx) { - auto& mapping = mappings[mapping_idx]; - - assert(mapping.stores.size() > 0); - for (uint32_t store_idx = 1; store_idx < mapping.stores.size(); ++store_idx) { - if (!mapping.stores[store_idx].can_colocate_with(mapping.stores[0])) { - logger.error("Mapper %s tried to colocate stores that cannot colocate", get_mapper_name()); - LEGATE_ABORT; - } + auto validate_colocation = [this](const auto& mapping) { + if (mapping.stores.empty()) { + logger.error("Store mapping must contain at least one store"); + LEGATE_ABORT; } - if (mapping.stores.size() > 1 && mapping.policy.ordering.relative) { logger.error("Colocation with relative dimension ordering is illegal"); LEGATE_ABORT; } - - for (auto& store : mapping.stores) { - if (store.is_future()) { - auto fut_idx = store.future().index(); - client_mapped_futures[fut_idx] = mapping_idx; - continue; + auto& first_store = mapping.stores.front(); + for (auto it = mapping.stores.begin() + 1; it != mapping.stores.end(); ++it) { + if (!it->can_colocate_with(first_store)) { + logger.error("Mapper %s tried to colocate stores that cannot colocate", get_mapper_name()); + LEGATE_ABORT; } + } + assert(!(mapping.for_future() || mapping.for_unbound_store()) || mapping.stores.size() == 1); + }; + +#ifdef DEBUG_LEGATE + for (auto& mapping : mappings) validate_colocation(mapping); +#endif - auto& rf = store.region_field(); - auto key = rf.unique_id(); - - auto finder = client_mapped_regions.find(key); - // If this is the first store mapping for this requirement, - // we record the mapping index for future reference. - if (finder == client_mapped_regions.end()) client_mapped_regions[key] = mapping_idx; - // If we're still in the same store mapping, we know for sure - // that the mapping is consistent. - else { - if (finder->second == mapping_idx) continue; - // Otherwise, we do consistency checking - auto& other_mapping = mappings[finder->second]; - if (mapping.policy != other_mapping.policy) { + std::vector for_futures, for_unbound_stores, for_stores; + std::set mapped_futures; + std::set mapped_regions; + + for (auto& mapping : mappings) { + if (mapping.for_future()) { + mapped_futures.insert(mapping.store().future_index()); + for_futures.push_back(std::move(mapping)); + } else if (mapping.for_unbound_store()) { + mapped_regions.insert(mapping.store().unique_region_field_id()); + for_unbound_stores.push_back(std::move(mapping)); + } else { + for (auto& store : mapping.stores) mapped_regions.insert(store.unique_region_field_id()); + for_stores.push_back(std::move(mapping)); + } + } + + auto check_consistency = [this](const auto& mappings) { + std::map policies; + for (const auto& mapping : mappings) + for (auto& store : mapping.stores) { + auto key = store.unique_region_field_id(); + auto finder = policies.find(key); + if (policies.end() == finder) + policies[key] = mapping.policy; + else if (mapping.policy != finder->second) { logger.error("Mapper %s returned inconsistent store mappings", get_mapper_name()); LEGATE_ABORT; } } - } - } + }; +#ifdef DEBUG_LEGATE + check_consistency(for_stores); +#endif // Generate default mappings for stores that are not yet mapped by the client mapper auto default_option = options.front(); auto generate_default_mappings = [&](auto& stores, bool exact) { for (auto& store : stores) { + auto mapping = StoreMapping::default_mapping(store, default_option, exact); if (store.is_future()) { - auto fut_idx = store.future().index(); - if (client_mapped_futures.find(fut_idx) == client_mapped_futures.end()) - mappings.push_back(StoreMapping::default_mapping(store, default_option, exact)); - continue; + auto fut_idx = store.future_index(); + if (mapped_futures.find(fut_idx) != mapped_futures.end()) continue; + mapped_futures.insert(fut_idx); + for_futures.push_back(std::move(mapping)); } else { - auto key = store.region_field().unique_id(); - if (client_mapped_regions.find(key) != client_mapped_regions.end()) continue; - client_mapped_regions[key] = static_cast(mappings.size()); - mappings.push_back(StoreMapping::default_mapping(store, default_option, exact)); + auto key = store.unique_region_field_id(); + if (mapped_regions.find(key) != mapped_regions.end()) continue; + mapped_regions.insert(key); + if (store.unbound()) + for_unbound_stores.push_back(std::move(mapping)); + else + for_stores.push_back(std::move(mapping)); } } }; - generate_default_mappings(legate_task.inputs(), false); generate_default_mappings(legate_task.outputs(), false); generate_default_mappings(legate_task.reductions(), false); - output.chosen_instances.resize(task.regions.size()); - - bool can_fail = true; - std::map> instance_to_mappings; - std::map mapping_to_instance; - std::vector handled(mappings.size(), false); - - // See case of failed instance creation below - auto tighten_write_reqs = [&]() { - for (int32_t mapping_idx = 0; mapping_idx < mappings.size(); ++mapping_idx) { - auto& mapping = mappings[mapping_idx]; - PrivilegeMode priv = LEGION_NO_ACCESS; -#ifdef DEBUG_LEGATE - std::stringstream reqs_ss; -#endif - for (auto req_idx : mapping.requirement_indices()) { - const RegionRequirement& req = task.regions[req_idx]; - if (!req.region.exists()) continue; - priv |= req.privilege; -#ifdef DEBUG_LEGATE - reqs_ss << " " << req_idx; -#endif - } - if (!(priv & LEGION_WRITE_PRIV) || mapping.policy.exact) continue; -#ifdef DEBUG_LEGATE - logger.debug() << "Task " << task.get_unique_id() - << ": tightened mapping policy for reqs:" << reqs_ss.str(); -#endif - mapping.policy.exact = true; - if (!handled[mapping_idx]) continue; - handled[mapping_idx] = false; - auto m2i_it = mapping_to_instance.find(mapping_idx); - if (m2i_it == mapping_to_instance.end()) continue; - PhysicalInstance inst = m2i_it->second; - mapping_to_instance.erase(m2i_it); - auto i2m_it = instance_to_mappings.find(inst); - i2m_it->second.erase(mapping_idx); - if (i2m_it->second.empty()) { - runtime->release_instance(ctx, inst); - instance_to_mappings.erase(i2m_it); - } - } - }; - - // Mapping each field separately for each of the logical regions - for (int32_t mapping_idx = 0; mapping_idx < mappings.size(); ++mapping_idx) { - if (handled[mapping_idx]) continue; - auto& mapping = mappings[mapping_idx]; - auto req_indices = mapping.requirement_indices(); - - if (req_indices.empty()) { - // This is a mapping for futures + // Map future-backed stores + auto map_futures = [&](auto& mappings) { + for (auto& mapping : mappings) { StoreTarget target = mapping.policy.target; #ifdef LEGATE_NO_FUTURES_ON_FB if (target == StoreTarget::FBMEM) target = StoreTarget::ZCMEM; #endif output.future_locations.push_back(get_target_memory(task.target_proc, target)); - handled[mapping_idx] = true; - continue; } - - if (mapping.for_unbound_stores()) { - for (auto req_idx : req_indices) { - output.output_targets[req_idx] = get_target_memory(task.target_proc, mapping.policy.target); - auto ndim = mapping.stores.front().dim(); - // FIXME: Unbound stores can have more than one dimension later - std::vector dimension_ordering; - for (int32_t dim = ndim - 1; dim >= 0; --dim) - dimension_ordering.push_back( - static_cast(static_cast(DimensionKind::LEGION_DIM_X) + dim)); - dimension_ordering.push_back(DimensionKind::LEGION_DIM_F); - output.output_constraints[req_idx].ordering_constraint = - OrderingConstraint(dimension_ordering, false); - } - handled[mapping_idx] = true; - continue; - } - - std::vector> reqs; -#ifdef DEBUG_LEGATE - std::stringstream reqs_ss; -#endif - for (auto req_idx : req_indices) { - const auto& req = task.regions[req_idx]; - if (!req.region.exists()) continue; - reqs.push_back(std::cref(req)); -#ifdef DEBUG_LEGATE - reqs_ss << " " << req_idx; -#endif - } - if (reqs.empty()) { - handled[mapping_idx] = true; - continue; - } - - // Get an instance and acquire it if necessary. If the acquire fails then prune it from the - // mapper's data structures and retry, until we succeed or map_legate_store fails with an out of - // memory error. - PhysicalInstance result; - while (map_legate_store(ctx, task, mapping, reqs, task.target_proc, result, can_fail)) { - if (result == PhysicalInstance()) break; - if (instance_to_mappings.count(result) > 0 || runtime->acquire_instance(ctx, result)) { -#ifdef DEBUG_LEGATE - logger.debug() << "Task " << task.get_unique_id() << ": acquired instance " << result - << " for reqs:" << reqs_ss.str(); -#endif - break; - } -#ifdef DEBUG_LEGATE - logger.debug() << "Task " << task.get_unique_id() << ": failed to acquire instance " << result - << " for reqs:" << reqs_ss.str(); -#endif - AutoLock lock(ctx, local_instances->manager_lock()); - local_instances->erase(result); - } - - // If instance creation failed we try mapping all stores again, but request tight instances for - // write requirements. The hope is that these write requirements cover the entire region (i.e. - // they use a complete partition), so the new tight instances will invalidate any pre-existing - // "bloated" instances for the same region, freeing up enough memory so that mapping can succeed - if (result == PhysicalInstance()) { -#ifdef DEBUG_LEGATE - logger.debug() << "Task " << task.get_unique_id() - << ": failed mapping for reqs:" << reqs_ss.str(); -#endif - assert(can_fail); - tighten_write_reqs(); - mapping_idx = -1; - can_fail = false; - continue; + }; + map_futures(for_futures); + + // Map unbound stores + auto map_unbound_stores = [&](auto& mappings) { + for (auto& mapping : mappings) { + auto req_idx = mapping.requirement_index(); + output.output_targets[req_idx] = get_target_memory(task.target_proc, mapping.policy.target); + auto ndim = mapping.store().dim(); + // FIXME: Unbound stores can have more than one dimension later + std::vector dimension_ordering; + for (int32_t dim = ndim - 1; dim >= 0; --dim) + dimension_ordering.push_back( + static_cast(static_cast(DimensionKind::LEGION_DIM_X) + dim)); + dimension_ordering.push_back(DimensionKind::LEGION_DIM_F); + output.output_constraints[req_idx].ordering_constraint = + OrderingConstraint(dimension_ordering, false); } + }; + map_unbound_stores(for_unbound_stores); - // Success; record the instance for this mapping. -#ifdef DEBUG_LEGATE - logger.debug() << "Task " << task.get_unique_id() - << ": completed mapping for reqs:" << reqs_ss.str(); -#endif - instance_to_mappings[result].insert(mapping_idx); - mapping_to_instance[mapping_idx] = result; - handled[mapping_idx] = true; - } + output.chosen_instances.resize(task.regions.size()); + std::map*> output_map; + for (uint32_t idx = 0; idx < task.regions.size(); ++idx) + output_map[&task.regions[idx]] = &output.chosen_instances[idx]; - // Succeeded in mapping all stores, record it on map_task output. - for (const auto& m2i : mapping_to_instance) - for (auto req_idx : mappings[m2i.first].requirement_indices()) - if (task.regions[req_idx].region.exists()) - output.chosen_instances[req_idx].push_back(m2i.second); + map_legate_stores(ctx, task, for_stores, task.target_proc, output_map); } void BaseMapper::map_replicate_task(const MapperContext ctx, @@ -762,35 +582,6 @@ void BaseMapper::map_replicate_task(const MapperContext ctx, LEGATE_ABORT; } -bool BaseMapper::find_existing_instance(const MapperContext ctx, - LogicalRegion region, - FieldID fid, - Memory target_memory, - PhysicalInstance& result, - Strictness strictness, - bool acquire_instance_lock) -{ - std::unique_ptr lock = - acquire_instance_lock ? std::make_unique(ctx, local_instances->manager_lock()) - : nullptr; - // See if we already have it in our local instances - if (local_instances->find_instance(region, fid, target_memory, result)) - return true; - else if (strictness == Strictness::strict) - return false; - - // See if we can find an existing instance in any memory - if (local_instances->find_instance(region, fid, local_system_memory, result)) return true; - - for (auto& pair : local_frame_buffers) - if (local_instances->find_instance(region, fid, pair.second, result)) return true; - - for (auto& pair : local_numa_domains) - if (local_instances->find_instance(region, fid, pair.second, result)) return true; - - return false; -} - Memory BaseMapper::get_target_memory(Processor proc, StoreTarget target) { switch (target) { @@ -804,26 +595,120 @@ Memory BaseMapper::get_target_memory(Processor proc, StoreTarget target) return Memory::NO_MEMORY; } +void BaseMapper::map_legate_stores(const MapperContext ctx, + const Mappable& mappable, + std::vector& mappings, + Processor target_proc, + OutputMap& output_map) +{ + auto try_mapping = [&](bool can_fail) { + const PhysicalInstance NO_INST{}; + std::vector instances; + for (auto& mapping : mappings) { + PhysicalInstance result = NO_INST; + auto reqs = mapping.requirements(); + while (map_legate_store(ctx, mappable, mapping, reqs, target_proc, result, can_fail)) { + if (NO_INST == result) { +#ifdef DEBUG_LEGATE + assert(can_fail); +#endif + for (auto& instance : instances) runtime->release_instance(ctx, instance); + return false; + } +#ifdef DEBUG_LEGATE + std::stringstream reqs_ss; + for (auto req_idx : mapping.requirement_indices()) reqs_ss << " " << req_idx; +#endif + if (runtime->acquire_instance(ctx, result)) { +#ifdef DEBUG_LEGATE + logger.debug() << log_mappable(mappable) << ": acquired instance " << result + << " for reqs:" << reqs_ss.str(); +#endif + break; + } +#ifdef DEBUG_LEGATE + logger.debug() << log_mappable(mappable) << ": failed to acquire instance " << result + << " for reqs:" << reqs_ss.str(); +#endif + AutoLock lock(ctx, local_instances->manager_lock()); + local_instances->erase(result); + result = NO_INST; + } + instances.push_back(result); + } + + // If we're here, all stores are mapped and instances are all acquired + for (uint32_t idx = 0; idx < mappings.size(); ++idx) { + auto& mapping = mappings[idx]; + auto& instance = instances[idx]; + for (auto& req : mapping.requirements()) output_map[req]->push_back(instance); + } + return true; + }; + + // We can retry the mapping with tightened policies only if at least one of the policies + // is lenient + bool can_fail = false; + for (auto& mapping : mappings) can_fail = can_fail || !mapping.policy.exact; + + if (!try_mapping(can_fail)) { +#ifdef DEBUG_LEGATE + logger.debug() << log_mappable(mappable) << " failed to map all stores, retrying with " + << "tighter policies"; +#endif + // If instance creation failed we try mapping all stores again, but request tight instances for + // write requirements. The hope is that these write requirements cover the entire region (i.e. + // they use a complete partition), so the new tight instances will invalidate any pre-existing + // "bloated" instances for the same region, freeing up enough memory so that mapping can succeed + tighten_write_policies(mappable, mappings); + try_mapping(false); + } +} + +void BaseMapper::tighten_write_policies(const Mappable& mappable, + std::vector& mappings) +{ + for (auto& mapping : mappings) { + // If the policy is exact, there's nothing we can tighten + if (mapping.policy.exact) continue; + + PrivilegeMode priv = LEGION_NO_ACCESS; + for (auto* req : mapping.requirements()) priv |= req->privilege; + // We tighten only write requirements + if (!(priv & LEGION_WRITE_PRIV)) continue; + +#ifdef DEBUG_LEGATE + std::stringstream reqs_ss; + for (auto req_idx : mapping.requirement_indices()) reqs_ss << " " << req_idx; + logger.debug() << log_mappable(mappable) + << ": tightened mapping policy for reqs:" << reqs_ss.str(); +#endif + mapping.policy.exact = true; + } +} + bool BaseMapper::map_legate_store(const MapperContext ctx, const Mappable& mappable, const StoreMapping& mapping, - std::vector> reqs, + const std::set& reqs, Processor target_proc, PhysicalInstance& result, bool can_fail) { + if (reqs.empty()) return false; + const auto& policy = mapping.policy; std::vector regions; - for (auto& req : reqs) regions.push_back(req.get().region); + for (auto* req : reqs) regions.push_back(req->region); auto target_memory = get_target_memory(target_proc, policy.target); ReductionOpID redop = 0; bool first = true; - for (auto& req : reqs) { + for (auto* req : reqs) { if (first) - redop = req.get().redop; + redop = req->redop; else { - if (redop != req.get().redop) { + if (redop != req->redop) { logger.error( "Colocated stores should be either non-reduction arguments " "or reductions with the same reduction operator."); @@ -970,281 +855,41 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, return true; } -bool BaseMapper::map_raw_array(const MapperContext ctx, - const Mappable& mappable, - uint32_t index, - LogicalRegion region, - FieldID fid, - Memory target_memory, - Processor target_proc, - const std::vector& valid, - PhysicalInstance& result, - bool memoize_result, - ReductionOpID redop /*=0*/) -{ - // If we're making a reduction instance, we should just make it now - if (redop != 0) { - // Switch the target memory if we're going to a GPU because - // Realm's DMA system still does not support reductions - const std::vector regions(1, region); - LayoutConstraintSet layout_constraints; - // No specialization - layout_constraints.add_constraint(SpecializedConstraint(REDUCTION_FOLD_SPECIALIZE, redop)); - // SOA-C dimension ordering - std::vector dimension_ordering(4); - dimension_ordering[0] = DIM_Z; - dimension_ordering[1] = DIM_Y; - dimension_ordering[2] = DIM_X; - dimension_ordering[3] = DIM_F; - layout_constraints.add_constraint(OrderingConstraint(dimension_ordering, false /*contiguous*/)); - // Constraint for the kind of memory - layout_constraints.add_constraint(MemoryConstraint(target_memory.kind())); - // Make sure we have our field - const std::vector fields(1, fid); - layout_constraints.add_constraint(FieldConstraint(fields, true /*contiguous*/)); - if (!runtime->create_physical_instance( - ctx, target_memory, layout_constraints, regions, result, true /*acquire*/)) - report_failed_mapping(mappable, index, target_memory, redop); - // We already did the acquire - return false; - } - - AutoLock lock(ctx, local_instances->manager_lock()); - - // See if we already have it in our local instances - if (local_instances->find_instance(region, fid, target_memory, result)) - // Needs acquire to keep the runtime happy - return true; - - // There's a little asymmetry here between CPUs and GPUs for NUMA effects - // For CPUs NUMA-effects are within a factor of 2X additional latency and - // reduced bandwidth, so it's better to just use data where it is rather - // than move it. For GPUs though, the difference between local framebuffer - // and remote can be on the order of 800 GB/s versus 20 GB/s over NVLink - // so it's better to move things local, so we'll always try to make a local - // instance before checking for a nearby instance in a different GPU. - if (target_proc.exists() && ((target_proc.kind() == Processor::LOC_PROC) || - (target_proc.kind() == Processor::OMP_PROC))) { - Machine::MemoryQuery affinity_mems(machine); - affinity_mems.has_affinity_to(target_proc); - for (auto memory : affinity_mems) { - if (local_instances->find_instance(region, fid, memory, result)) - // Needs acquire to keep the runtime happy - return true; - } - } - // This whole process has to appear atomic - runtime->disable_reentrant(ctx); - // Haven't made this instance before, so make it now - // We can do an interesting optimization here to try to reduce unnecessary - // inter-memory copies. For logical regions that are overlapping we try - // to accumulate as many as possible into one physical instance and use - // that instance for all the tasks for the different regions. - // First we have to see if there is anything we overlap with - const IndexSpace is = region.get_index_space(); - const Domain domain = runtime->get_index_space_domain(ctx, is); - auto group = local_instances->find_region_group(region, domain, fid, target_memory); - - // We're going to need some of this constraint information no matter - // which path we end up taking below - LayoutConstraintSet layout_constraints; - // No specialization - layout_constraints.add_constraint(SpecializedConstraint()); - // SOA-C dimension ordering - std::vector dimension_ordering(4); - dimension_ordering[0] = DIM_Z; - dimension_ordering[1] = DIM_Y; - dimension_ordering[2] = DIM_X; - dimension_ordering[3] = DIM_F; - layout_constraints.add_constraint(OrderingConstraint(dimension_ordering, false /*contiguous*/)); - // Constraint for the kind of memory - layout_constraints.add_constraint(MemoryConstraint(target_memory.kind())); - // Make sure we have our field - const std::vector fields(1, fid); - layout_constraints.add_constraint(FieldConstraint(fields, true /*contiguous*/)); - - bool created; - size_t footprint; - if (runtime->find_or_create_physical_instance(ctx, - target_memory, - layout_constraints, - group->get_regions(), - result, - created, - true /*acquire*/, - memoize_result ? GC_NEVER_PRIORITY : 0, - false /*tight bounds*/, - &footprint)) { - // We succeeded in making the instance where we want it - assert(result.exists()); - if (created) - logger.info("%s created instance %lx containing %zd bytes in memory " IDFMT, - get_mapper_name(), - result.get_instance_id(), - footprint, - target_memory.id); - // Only save the result for future use if it is not an external instance - if (memoize_result && !result.is_external_instance()) { - auto replaced = local_instances->record_instance(group, fid, result); - for (auto& instance : replaced) { - if (!instance.is_external_instance()) - runtime->set_garbage_collection_priority(ctx, instance, 0); - } - } - // We made it so no need for an acquire - runtime->enable_reentrant(ctx); - return false; - } - // Done with the atomic part - runtime->enable_reentrant(ctx); - - // If we get here it's because we failed to make the instance, we still - // have a few more tricks that we can try - // First see if we can find an existing valid instance that we can use - // with affinity to our target processor - if (!valid.empty()) - for (auto& instance : valid) { - // If it doesn't have the field then we don't care - if (instance.has_field(fid)) continue; - if (!target_proc.exists() || machine.has_affinity(target_proc, instance.get_location())) { - result = instance; - return true; - } - } - - // Still couldn't find an instance, see if we can find any instances - // in memories that are local to our node that we can use - if (target_proc.exists()) { - Machine::MemoryQuery affinity_mems(machine); - affinity_mems.has_affinity_to(target_proc); - for (auto mem : affinity_mems) - if (local_instances->find_instance(region, fid, mem, result)) - // Needs acquire to keep the runtime happy - return true; - } else if (find_existing_instance( - ctx, region, fid, target_memory, result, Strictness::strict, false)) - return true; - // If we make it here then we failed entirely - report_failed_mapping(mappable, index, target_memory, redop); - return true; -} - -void BaseMapper::filter_failed_acquires(const MapperContext ctx, - std::vector& needed_acquires, - std::set& failed_acquires) -{ - AutoLock lock(ctx, local_instances->manager_lock()); - for (auto& instance : needed_acquires) { - if (failed_acquires.find(instance) != failed_acquires.end()) continue; - failed_acquires.insert(instance); - local_instances->erase(instance); - } - needed_acquires.clear(); -} - void BaseMapper::report_failed_mapping(const Mappable& mappable, uint32_t index, Memory target_memory, ReductionOpID redop) { - const char* memory_kinds[] = { + static const char* memory_kinds[] = { #define MEM_NAMES(name, desc) desc, REALM_MEMORY_KINDS(MEM_NAMES) #undef MEM_NAMES }; + + std::string opname = ""; + if (mappable.get_mappable_type() == Mappable::TASK_MAPPABLE) { + const auto task = mappable.as_task(); + opname = task->get_task_name(); + } + std::string provenance = mappable.get_provenance_string(); if (provenance.empty()) provenance = "unknown provenance"; - switch (mappable.get_mappable_type()) { - case Mappable::TASK_MAPPABLE: { - const auto task = mappable.as_task(); - if (redop > 0) - logger.error( - "Mapper %s failed to map reduction (%d) region " - "requirement %d of task %s [%s] (UID %lld) into %s memory " IDFMT, - get_mapper_name(), - redop, - index, - task->get_task_name(), - provenance.c_str(), - mappable.get_unique_id(), - memory_kinds[target_memory.kind()], - target_memory.id); - else - logger.error( - "Mapper %s failed to map region requirement %d of " - "task %s [%s] (UID %lld) into %s memory " IDFMT, - get_mapper_name(), - index, - task->get_task_name(), - provenance.c_str(), - mappable.get_unique_id(), - memory_kinds[target_memory.kind()], - target_memory.id); - break; - } - case Mappable::COPY_MAPPABLE: { - if (redop > 0) - logger.error( - "Mapper %s failed to map reduction (%d) region " - "requirement %d of copy [%s] (UID %lld) into %s memory " IDFMT, - get_mapper_name(), - redop, - index, - provenance.c_str(), - mappable.get_unique_id(), - memory_kinds[target_memory.kind()], - target_memory.id); - else - logger.error( - "Mapper %s failed to map region requirement %d of " - "copy [%s] (UID %lld) into %s memory " IDFMT, - get_mapper_name(), - index, - provenance.c_str(), - mappable.get_unique_id(), - memory_kinds[target_memory.kind()], - target_memory.id); - break; - } - case Mappable::INLINE_MAPPABLE: { - if (redop > 0) - logger.error( - "Mapper %s failed to map reduction (%d) region " - "requirement %d of inline mapping [%s] (UID %lld) into %s memory " IDFMT, - get_mapper_name(), - redop, - index, - provenance.c_str(), - mappable.get_unique_id(), - memory_kinds[target_memory.kind()], - target_memory.id); - else - logger.error( - "Mapper %s failed to map region requirement %d of " - "inline mapping [%s] (UID %lld) into %s memory " IDFMT, - get_mapper_name(), - index, - provenance.c_str(), - mappable.get_unique_id(), - memory_kinds[target_memory.kind()], - target_memory.id); - break; - } - case Mappable::PARTITION_MAPPABLE: { - assert(redop == 0); - logger.error( - "Mapper %s failed to map region requirement %d of " - "partition (UID %lld) into %s memory " IDFMT, - get_mapper_name(), - index, - mappable.get_unique_id(), - memory_kinds[target_memory.kind()], - target_memory.id); - break; - } - default: LEGATE_ABORT; // should never get here - } - LEGATE_ABORT; + + std::stringstream req_ss; + if (redop > 0) + req_ss << "reduction (" << redop << ") requirement " << index; + else + req_ss << "region requirement " << index; + + logger.error("Mapper %s failed to map %s of %s%s[%s] (UID %lld) into %s memory " IDFMT, + get_mapper_name(), + req_ss.str().c_str(), + log_mappable(mappable, true /*prefix_only*/).c_str(), + opname.c_str(), + provenance.c_str(), + mappable.get_unique_id(), + memory_kinds[target_memory.kind()], + target_memory.id); } void BaseMapper::select_task_variant(const MapperContext ctx, @@ -1252,7 +897,11 @@ void BaseMapper::select_task_variant(const MapperContext ctx, const SelectVariantInput& input, SelectVariantOutput& output) { - output.chosen_variant = find_variant(ctx, task, input.processor.kind()); + auto variant = find_variant(ctx, task, input.processor.kind()); +#ifdef DEBUG_LEGATE + assert(variant.has_value()); +#endif + output.chosen_variant = *variant; } void BaseMapper::postmap_task(const MapperContext ctx, @@ -1307,14 +956,6 @@ void BaseMapper::legate_select_sources(const MapperContext ctx, if (!affinity.empty()) { assert(affinity.size() == 1); memory_bandwidth = affinity[0].bandwidth; -#if 0 - } else { - // TODO: More graceful way of dealing with multi-hop copies - logger.warning("Legate mapper is potentially " - "requesting a multi-hop copy between memories " - IDFMT " and " IDFMT "!", location.id, - destination_memory.id); -#endif } source_memories[location] = memory_bandwidth; band_ranking.push_back(std::pair(instance, memory_bandwidth)); @@ -1349,18 +990,26 @@ void BaseMapper::report_profiling(const MapperContext ctx, LEGATE_ABORT; } +ShardingID BaseMapper::find_sharding_functor_by_key_store_projection( + const std::vector& requirements) +{ + ProjectionID proj_id = 0; + for (auto& requirement : requirements) + if (LEGATE_CORE_KEY_STORE_TAG == requirement.tag) { + proj_id = requirement.projection; + break; + } + return find_sharding_functor_by_projection_functor(proj_id); +} + void BaseMapper::select_sharding_functor(const MapperContext ctx, const LegionTask& task, const SelectShardingFunctorInput& input, SelectShardingFunctorOutput& output) { - for (auto& req : task.regions) - if (req.tag == LEGATE_CORE_KEY_STORE_TAG) { - output.chosen_functor = find_sharding_functor_by_projection_functor(req.projection); - return; - } - - output.chosen_functor = 0; + output.chosen_functor = task.is_index_space + ? find_sharding_functor_by_key_store_projection(task.regions) + : find_sharding_functor_by_projection_functor(0); } void BaseMapper::map_inline(const MapperContext ctx, @@ -1368,49 +1017,26 @@ void BaseMapper::map_inline(const MapperContext ctx, const MapInlineInput& input, MapInlineOutput& output) { - const std::vector& valid = input.valid_instances; - const RegionRequirement& req = inline_op.requirement; - output.chosen_instances.resize(req.privilege_fields.size()); - uint32_t index = 0; - std::vector needed_acquires; - for (auto fid : req.privilege_fields) { - if (map_raw_array(ctx, - inline_op, - 0, - req.region, - fid, - local_system_memory, - inline_op.parent_task->current_proc, - valid, - output.chosen_instances[index], - false /*memoize*/, - req.redop)) - needed_acquires.push_back(output.chosen_instances[index]); - ++index; - } - while (!needed_acquires.empty() && - !runtime->acquire_and_filter_instances(ctx, needed_acquires, true /*filter on acquire*/)) { - assert(!needed_acquires.empty()); - std::set failed_instances; - filter_failed_acquires(ctx, needed_acquires, failed_instances); - // Now go through all the fields for the instances and try and remap - std::set::const_iterator fit = req.privilege_fields.begin(); - for (uint32_t idx = 0; idx < output.chosen_instances.size(); idx++, fit++) { - if (failed_instances.find(output.chosen_instances[idx]) == failed_instances.end()) continue; - // Now try to remap it - if (map_raw_array(ctx, - inline_op, - 0 /*idx*/, - req.region, - *fit, - local_system_memory, - inline_op.parent_task->current_proc, - valid, - output.chosen_instances[idx], - false /*memoize*/)) - needed_acquires.push_back(output.chosen_instances[idx]); - } - } + Processor target_proc{Processor::NO_PROC}; + if (!local_omps.empty()) + target_proc = local_omps.front(); + else + target_proc = local_cpus.front(); + + auto store_target = default_store_targets(target_proc.kind()).front(); + +#ifdef DEBUG_LEGATE + assert(inline_op.requirement.instance_fields.size() == 1); +#endif + + Store store(legion_runtime->get_mapper_runtime(), ctx, &inline_op.requirement); + std::vector mappings; + mappings.push_back(StoreMapping::default_mapping(store, store_target, false)); + + std::map*> output_map; + for (auto* req : mappings.front().requirements()) output_map[req] = &output.chosen_instances; + + map_legate_stores(ctx, inline_op, mappings, target_proc, output_map); } void BaseMapper::select_inline_sources(const MapperContext ctx, @@ -1430,181 +1056,97 @@ void BaseMapper::report_profiling(const MapperContext ctx, } void BaseMapper::map_copy(const MapperContext ctx, - const Copy& copy, + const LegionCopy& copy, const MapCopyInput& input, MapCopyOutput& output) { - // We should always be able to materialize instances of the things - // we are copying so make concrete source instances - std::vector needed_acquires; - Memory target_memory = local_system_memory; - /* - if (copy.is_index_space) { - // If we've got GPUs, assume we're using them - if (!local_gpus.empty() || !local_omps.empty()) { - const ShardingID sid = select_sharding_functor(copy); - NumPyShardingFunctor* functor = find_sharding_functor(sid); - Domain sharding_domain = copy.index_domain; - if (copy.sharding_space.exists()) - sharding_domain = runtime->get_index_space_domain(ctx, copy.sharding_space); - const uint32_t local_index = - functor->localize(copy.index_point, sharding_domain, total_nodes, local_node); - if (!local_gpus.empty()) { - const Processor proc = local_gpus[local_index % local_gpus.size()]; - target_memory = local_frame_buffers[proc]; - } else { - const Processor proc = local_omps[local_index % local_omps.size()]; - target_memory = local_numa_domains[proc]; - } - } - } else { - */ - { - // If we have just one local GPU then let's use it, otherwise punt to CPU - // since it's not clear which one we should use - if (local_frame_buffers.size() == 1) target_memory = local_frame_buffers.begin()->second; - } + Processor target_proc{Processor::NO_PROC}; - auto map_stores = [&](auto idx, auto& req, auto& inputs, auto& outputs) { - auto& region = req.region; - outputs.resize(req.privilege_fields.size()); - const auto& valid = inputs; - uint32_t fidx = 0; - const bool memoize = req.privilege != LEGION_REDUCE; - for (auto fid : req.privilege_fields) { - if (req.redop != 0) { - ++fidx; - continue; - } - if (find_existing_instance(ctx, region, fid, target_memory, outputs[fidx]) || - map_raw_array(ctx, - copy, - idx, - region, - fid, - target_memory, - Processor::NO_PROC, - valid, - outputs[fidx], - memoize)) - needed_acquires.push_back(outputs[fidx]); - ++fidx; - } - }; + uint32_t proc_id = 0; + if (copy.is_index_space) { + Domain sharding_domain = copy.index_domain; + if (copy.sharding_space.exists()) + sharding_domain = runtime->get_index_space_domain(ctx, copy.sharding_space); - auto dst_offset = copy.src_requirements.size(); - auto src_indirect_offset = dst_offset + copy.dst_requirements.size(); - auto dst_indirect_offset = src_indirect_offset + copy.src_indirect_requirements.size(); - - for (uint32_t idx = 0; idx < copy.src_requirements.size(); idx++) { - map_stores( - idx, copy.src_requirements[idx], input.src_instances[idx], output.src_instances[idx]); - - map_stores(idx + dst_offset, - copy.dst_requirements[idx], - input.dst_instances[idx], - output.dst_instances[idx]); - - if (idx < copy.src_indirect_requirements.size()) { - std::vector outputs; - map_stores(idx + src_indirect_offset, - copy.src_indirect_requirements[idx], - input.src_indirect_instances[idx], - outputs); - output.src_indirect_instances[idx] = outputs[0]; - } + // FIXME: We might later have non-identity projections for copy requirements, + // in which case we should find the key store and use its projection functor + // for the linearization + auto* key_functor = find_legate_projection_functor(0); - if (idx < copy.dst_indirect_requirements.size()) { - std::vector outputs; - map_stores(idx + dst_indirect_offset, - copy.dst_indirect_requirements[idx], - input.dst_indirect_instances[idx], - outputs); - output.dst_indirect_instances[idx] = outputs[0]; + if (key_functor != nullptr) { + auto lo = key_functor->project_point(sharding_domain.lo(), sharding_domain); + auto hi = key_functor->project_point(sharding_domain.hi(), sharding_domain); + auto p = key_functor->project_point(copy.index_point, sharding_domain); + proc_id = linearize(lo, hi, p); + } else { + proc_id = linearize(sharding_domain.lo(), sharding_domain.hi(), copy.index_point); } } + if (!local_gpus.empty()) + target_proc = local_gpus[proc_id % local_gpus.size()]; + else if (!local_omps.empty()) + target_proc = local_omps[proc_id % local_omps.size()]; + else + target_proc = local_cpus[proc_id % local_cpus.size()]; - auto remap_stores = [&](auto idx, auto& req, auto& inputs, auto& outputs, auto& failed_acquires) { - auto& region = req.region; - const auto& valid = inputs; - uint32_t fidx = 0; - const bool memoize = req.privilege != LEGION_REDUCE; - for (auto fid : req.privilege_fields) { - if (failed_acquires.find(outputs[fidx]) == failed_acquires.end()) { - ++fidx; - continue; - } - if (map_raw_array(ctx, - copy, - idx, - region, - fid, - target_memory, - Processor::NO_PROC, - valid, - outputs[fidx], - memoize)) - needed_acquires.push_back(outputs[fidx]); - ++fidx; - } + auto store_target = default_store_targets(target_proc.kind()).front(); + + Copy legate_copy(©, runtime, ctx); + + std::map*> output_map; + auto add_to_output_map = [&output_map](auto& reqs, auto& instances) { + instances.resize(reqs.size()); + for (uint32_t idx = 0; idx < reqs.size(); ++idx) output_map[&reqs[idx]] = &instances[idx]; }; + add_to_output_map(copy.src_requirements, output.src_instances); + add_to_output_map(copy.dst_requirements, output.dst_instances); - while (!needed_acquires.empty() && - !runtime->acquire_and_filter_instances(ctx, needed_acquires, true /*filter on acquire*/)) { - assert(!needed_acquires.empty()); - // If we failed to acquire any of the instances we need to prune them - // out of the mapper's data structure so do that first - std::set failed_acquires; - filter_failed_acquires(ctx, needed_acquires, failed_acquires); - - // Now go through and try to remap region requirements with failed acquisitions - for (uint32_t idx = 0; idx < copy.src_requirements.size(); idx++) { - remap_stores(idx, - copy.src_requirements[idx], - input.src_instances[idx], - output.src_instances[idx], - failed_acquires); - - remap_stores(idx + dst_offset, - copy.dst_requirements[idx], - input.dst_instances[idx], - output.dst_instances[idx], - failed_acquires); - if (idx < copy.src_indirect_requirements.size()) { - std::vector outputs(1, output.src_indirect_instances[idx]); - remap_stores(idx + src_indirect_offset, - copy.src_indirect_requirements[idx], - input.src_indirect_instances[idx], - outputs, - failed_acquires); - } - if (idx < copy.dst_indirect_requirements.size()) { - std::vector outputs(1, output.dst_indirect_instances[idx]); - remap_stores(idx + dst_indirect_offset, - copy.dst_indirect_requirements[idx], - input.dst_indirect_instances[idx], - outputs, - failed_acquires); - } - } +#ifdef DEBUG_LEGATE + assert(copy.src_indirect_requirements.size() <= 1); + assert(copy.dst_indirect_requirements.size() <= 1); +#endif + if (!copy.src_indirect_requirements.empty()) { + // This is to make the push_back call later add the isntance to the right place + output.src_indirect_instances.clear(); + output_map[©.src_indirect_requirements.front()] = &output.src_indirect_instances; } + if (!copy.dst_indirect_requirements.empty()) { + // This is to make the push_back call later add the isntance to the right place + output.dst_indirect_instances.clear(); + output_map[©.dst_indirect_requirements.front()] = &output.dst_indirect_instances; + } + + std::vector mappings; + + for (auto& store : legate_copy.inputs()) + mappings.push_back(StoreMapping::default_mapping(store, store_target, false)); + for (auto& store : legate_copy.outputs()) + mappings.push_back(StoreMapping::default_mapping(store, store_target, false)); + for (auto& store : legate_copy.input_indirections()) + mappings.push_back(StoreMapping::default_mapping(store, store_target, false)); + for (auto& store : legate_copy.output_indirections()) + mappings.push_back(StoreMapping::default_mapping(store, store_target, false)); + + map_legate_stores(ctx, copy, mappings, target_proc, output_map); } void BaseMapper::select_copy_sources(const MapperContext ctx, - const Copy& copy, + const LegionCopy& copy, const SelectCopySrcInput& input, SelectCopySrcOutput& output) { legate_select_sources(ctx, input.target, input.source_instances, output.chosen_ranking); } -void BaseMapper::speculate(const MapperContext ctx, const Copy& copy, SpeculativeOutput& output) +void BaseMapper::speculate(const MapperContext ctx, + const LegionCopy& copy, + SpeculativeOutput& output) { output.speculate = false; } void BaseMapper::report_profiling(const MapperContext ctx, - const Copy& copy, + const LegionCopy& copy, const CopyProfilingInfo& input) { // No profiling for copies yet @@ -1612,11 +1154,12 @@ void BaseMapper::report_profiling(const MapperContext ctx, } void BaseMapper::select_sharding_functor(const MapperContext ctx, - const Copy& copy, + const LegionCopy& copy, const SelectShardingFunctorInput& input, SelectShardingFunctorOutput& output) { - output.chosen_functor = 0; + // TODO: Copies can have key stores in the future + output.chosen_functor = find_sharding_functor_by_projection_functor(0); } void BaseMapper::select_close_sources(const MapperContext ctx, @@ -1730,56 +1273,26 @@ void BaseMapper::map_partition(const MapperContext ctx, const MapPartitionInput& input, MapPartitionOutput& output) { - const RegionRequirement& req = partition.requirement; - output.chosen_instances.resize(req.privilege_fields.size()); - const std::vector& valid = input.valid_instances; - std::vector needed_acquires; - uint32_t fidx = 0; - const bool memoize = true; - for (auto fid : req.privilege_fields) { - if (find_existing_instance(ctx, - req.region, - fid, - local_system_memory, - output.chosen_instances[fidx], - Strictness::strict) || - map_raw_array(ctx, - partition, - 0, - req.region, - fid, - local_system_memory, - Processor::NO_PROC, - valid, - output.chosen_instances[fidx], - memoize)) { - needed_acquires.push_back(output.chosen_instances[fidx]); - } - ++fidx; - } - while (!needed_acquires.empty() && - !runtime->acquire_and_filter_instances(ctx, needed_acquires, true /*filter on acquire*/)) { - assert(!needed_acquires.empty()); - std::set failed_instances; - filter_failed_acquires(ctx, needed_acquires, failed_instances); - // Now go through all the fields for the instances and try and remap - auto fit = req.privilege_fields.begin(); - for (uint32_t idx = 0; idx < output.chosen_instances.size(); idx++, fit++) { - if (failed_instances.find(output.chosen_instances[idx]) == failed_instances.end()) continue; - // Now try to remap it - if (map_raw_array(ctx, - partition, - 0 /*idx*/, - req.region, - *fit, - local_system_memory, - Processor::NO_PROC, - valid, - output.chosen_instances[idx], - memoize)) - needed_acquires.push_back(output.chosen_instances[idx]); - } - } + Processor target_proc{Processor::NO_PROC}; + if (!local_omps.empty()) + target_proc = local_omps.front(); + else + target_proc = local_cpus.front(); + + auto store_target = default_store_targets(target_proc.kind()).front(); + +#ifdef DEBUG_LEGATE + assert(partition.requirement.instance_fields.size() == 1); +#endif + + Store store(legion_runtime->get_mapper_runtime(), ctx, &partition.requirement); + std::vector mappings; + mappings.push_back(StoreMapping::default_mapping(store, store_target, false)); + + std::map*> output_map; + for (auto* req : mappings.front().requirements()) output_map[req] = &output.chosen_instances; + + map_legate_stores(ctx, partition, mappings, target_proc, output_map); } void BaseMapper::select_partition_sources(const MapperContext ctx, @@ -1803,7 +1316,7 @@ void BaseMapper::select_sharding_functor(const MapperContext ctx, const SelectShardingFunctorInput& input, SelectShardingFunctorOutput& output) { - output.chosen_functor = 0; + output.chosen_functor = find_sharding_functor_by_projection_functor(0); } void BaseMapper::select_sharding_functor(const MapperContext ctx, @@ -1811,7 +1324,9 @@ void BaseMapper::select_sharding_functor(const MapperContext ctx, const SelectShardingFunctorInput& input, SelectShardingFunctorOutput& output) { - output.chosen_functor = 0; + output.chosen_functor = fill.is_index_space + ? find_sharding_functor_by_key_store_projection({fill.requirement}) + : find_sharding_functor_by_projection_functor(0); } void BaseMapper::configure_context(const MapperContext ctx, diff --git a/src/core/mapping/base_mapper.h b/src/core/mapping/base_mapper.h index d81898411..850427b6d 100644 --- a/src/core/mapping/base_mapper.h +++ b/src/core/mapping/base_mapper.h @@ -18,6 +18,7 @@ #include #include +#include #include "legion.h" @@ -256,34 +257,22 @@ class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { protected: Legion::Memory get_target_memory(Legion::Processor proc, StoreTarget target); - bool find_existing_instance(const Legion::Mapping::MapperContext ctx, - Legion::LogicalRegion region, - Legion::FieldID fid, - Legion::Memory target_memory, - Legion::Mapping::PhysicalInstance& result, - Strictness strictness = Strictness::hint, - bool acquire_instance_lock = true); + using OutputMap = + std::map*>; + void map_legate_stores(const Legion::Mapping::MapperContext ctx, + const Legion::Mappable& mappable, + std::vector& mappings, + Legion::Processor target_proc, + OutputMap& output_map); + void tighten_write_policies(const Legion::Mappable& mappable, + std::vector& mappings); bool map_legate_store(const Legion::Mapping::MapperContext ctx, const Legion::Mappable& mappable, const StoreMapping& mapping, - std::vector> reqs, + const std::set& reqs, Legion::Processor target_proc, Legion::Mapping::PhysicalInstance& result, bool can_fail); - bool map_raw_array(const Legion::Mapping::MapperContext ctx, - const Legion::Mappable& mappable, - unsigned index, - Legion::LogicalRegion region, - Legion::FieldID fid, - Legion::Memory target_memory, - Legion::Processor target_proc, - const std::vector& valid, - Legion::Mapping::PhysicalInstance& result, - bool memoize, - Legion::ReductionOpID redop = 0); - void filter_failed_acquires(const Legion::Mapping::MapperContext ctx, - std::vector& needed_acquires, - std::set& failed_acquires); void report_failed_mapping(const Legion::Mappable& mappable, unsigned index, Legion::Memory target_memory, @@ -297,15 +286,40 @@ class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { bool has_variant(const Legion::Mapping::MapperContext ctx, const Legion::Task& task, Legion::Processor::Kind kind); - Legion::VariantID find_variant(const Legion::Mapping::MapperContext ctx, - const Legion::Task& task, - Legion::Processor::Kind kind); + std::optional find_variant(const Legion::Mapping::MapperContext ctx, + const Legion::Task& task, + Legion::Processor::Kind kind); private: void generate_prime_factors(); void generate_prime_factor(const std::vector& processors, Legion::Processor::Kind kind); + protected: + template + decltype(auto) dispatch(TaskTarget target, Functor functor) + { + switch (target) { + case TaskTarget::CPU: return functor(local_cpus); + case TaskTarget::GPU: return functor(local_gpus); + case TaskTarget::OMP: return functor(local_omps); + } + assert(false); + return functor(local_cpus); + } + template + decltype(auto) dispatch(Legion::Processor::Kind kind, Functor functor) + { + switch (kind) { + case Legion::Processor::LOC_PROC: return functor(local_cpus); + case Legion::Processor::TOC_PROC: return functor(local_gpus); + case Legion::Processor::OMP_PROC: return functor(local_omps); + default: LEGATE_ABORT; + } + assert(false); + return functor(local_cpus); + } + protected: const std::vector get_processor_grid(Legion::Processor::Kind kind, int32_t ndim); void slice_auto_task(const Legion::Mapping::MapperContext ctx, @@ -321,6 +335,10 @@ class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { const SliceTaskInput& input, SliceTaskOutput& output); + protected: + Legion::ShardingID find_sharding_functor_by_key_store_projection( + const std::vector& requirements); + protected: static inline bool physical_sort_func( const std::pair& left, @@ -343,15 +361,14 @@ class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { std::vector local_cpus; std::vector local_gpus; std::vector local_omps; // OpenMP processors - std::vector local_ios; // I/O processors - std::vector local_pys; // Python processors protected: Legion::Memory local_system_memory, local_zerocopy_memory; std::map local_frame_buffers; std::map local_numa_domains; protected: - std::map, Legion::VariantID> leaf_variants; + using VariantCacheKey = std::pair; + std::map> variants; protected: InstanceManager* local_instances; diff --git a/src/core/mapping/mapping.cc b/src/core/mapping/mapping.cc index bbd75ee53..5d1aa971c 100644 --- a/src/core/mapping/mapping.cc +++ b/src/core/mapping/mapping.cc @@ -112,23 +112,42 @@ void InstanceMappingPolicy::populate_layout_constraints( return std::move(policy); } -bool StoreMapping::for_unbound_stores() const +bool StoreMapping::for_future() const +{ + for (auto& store : stores) return store.is_future(); + assert(false); + return false; +} + +bool StoreMapping::for_unbound_store() const { for (auto& store : stores) return store.unbound(); assert(false); return false; } +const Store& StoreMapping::store() const +{ +#ifdef DEBUG_LEGATE + assert(stores.size() == 1); +#endif + return stores.front(); +} + uint32_t StoreMapping::requirement_index() const { +#ifdef DEBUG_LEGATE assert(stores.size() > 0); uint32_t result = -1U; for (auto& store : stores) { - auto idx = store.region_field().index(); + auto idx = store.requirement_index(); assert(result == -1U || result == idx); result = idx; } return result; +#else + return stores.front().requirement_index(); +#endif } std::set StoreMapping::requirement_indices() const @@ -141,6 +160,18 @@ std::set StoreMapping::requirement_indices() const return std::move(indices); } +std::set StoreMapping::requirements() const +{ + std::set reqs; + for (auto& store : stores) { + if (store.is_future()) continue; + auto* req = store.region_field().get_requirement(); + if (!req->region.exists()) continue; + reqs.insert(req); + } + return std::move(reqs); +} + void StoreMapping::populate_layout_constraints( Legion::LayoutConstraintSet& layout_constraints) const { diff --git a/src/core/mapping/mapping.h b/src/core/mapping/mapping.h index 5d5ab8466..2d56d1a67 100644 --- a/src/core/mapping/mapping.h +++ b/src/core/mapping/mapping.h @@ -16,7 +16,7 @@ #pragma once -#include "core/mapping/task.h" +#include "core/mapping/operation.h" namespace legate { namespace mapping { @@ -133,9 +133,14 @@ struct StoreMapping { StoreMapping& operator=(StoreMapping&&) = default; public: - bool for_unbound_stores() const; + bool for_future() const; + bool for_unbound_store() const; + const Store& store() const; + + public: uint32_t requirement_index() const; std::set requirement_indices() const; + std::set requirements() const; public: void populate_layout_constraints(Legion::LayoutConstraintSet& layout_constraints) const; diff --git a/src/core/mapping/task.cc b/src/core/mapping/operation.cc similarity index 57% rename from src/core/mapping/task.cc rename to src/core/mapping/operation.cc index 42549b826..03f34d5b2 100644 --- a/src/core/mapping/task.cc +++ b/src/core/mapping/operation.cc @@ -14,27 +14,28 @@ * */ -#include "core/mapping/task.h" +#include "core/mapping/operation.h" #include "core/utilities/deserializer.h" namespace legate { namespace mapping { using LegionTask = Legion::Task; +using LegionCopy = Legion::Copy; using namespace Legion; using namespace Legion::Mapping; -RegionField::RegionField(const LegionTask* task, int32_t dim, uint32_t idx, FieldID fid) - : task_(task), dim_(dim), idx_(idx), fid_(fid) +RegionField::RegionField(const RegionRequirement* req, int32_t dim, uint32_t idx, FieldID fid) + : req_(req), dim_(dim), idx_(idx), fid_(fid) { } bool RegionField::can_colocate_with(const RegionField& other) const { - auto& my_req = get_requirement(); - auto& other_req = other.get_requirement(); - return my_req.region.get_tree_id() == other_req.region.get_tree_id(); + auto* my_req = get_requirement(); + auto* other_req = other.get_requirement(); + return my_req->region.get_tree_id() == other_req->region.get_tree_id(); } Domain RegionField::domain(MapperRuntime* runtime, const MapperContext context) const @@ -42,15 +43,7 @@ Domain RegionField::domain(MapperRuntime* runtime, const MapperContext context) return runtime->get_index_space_domain(context, get_index_space()); } -const RegionRequirement& RegionField::get_requirement() const -{ - return dim_ > 0 ? task_->regions[idx_] : task_->output_regions[idx_]; -} - -IndexSpace RegionField::get_index_space() const -{ - return get_requirement().region.get_index_space(); -} +IndexSpace RegionField::get_index_space() const { return req_->region.get_index_space(); } FutureWrapper::FutureWrapper(uint32_t idx, const Domain& domain) : idx_(idx), domain_(domain) {} @@ -90,27 +83,53 @@ Store::Store(Legion::Mapping::MapperRuntime* runtime, { } +Store::Store(Legion::Mapping::MapperRuntime* runtime, + const Legion::Mapping::MapperContext context, + const Legion::RegionRequirement* requirement) + : is_future_(false), + is_output_store_(false), + dim_(requirement->region.get_dim()), + code_(LegateTypeCode::MAX_TYPE_NUMBER), + redop_id_(-1), + runtime_(runtime), + context_(context) +{ + region_field_ = RegionField(requirement, dim_, 0, requirement->instance_fields.front()); +} + bool Store::can_colocate_with(const Store& other) const { if (is_future() || other.is_future()) return false; - else if (is_reduction() || other.is_reduction()) + else if (unbound() || other.unbound()) return false; + else if (is_reduction() || other.is_reduction()) + return redop() == other.redop() && region_field_.can_colocate_with(other.region_field_); return region_field_.can_colocate_with(other.region_field_); } const RegionField& Store::region_field() const { +#ifdef DEBUG_LEGATE assert(!is_future()); +#endif return region_field_; } const FutureWrapper& Store::future() const { +#ifdef DEBUG_LEGATE assert(is_future()); +#endif return future_; } +RegionField::Id Store::unique_region_field_id() const { return region_field().unique_id(); } + +uint32_t Store::requirement_index() const { return region_field().index(); } + +uint32_t Store::future_index() const { return future().index(); } + Domain Store::domain() const { assert(!unbound()); @@ -126,7 +145,7 @@ Task::Task(const LegionTask* task, const MapperContext context) : task_(task), library_(library) { - MapperDeserializer dez(task, runtime, context); + TaskDeserializer dez(task, runtime, context); inputs_ = dez.unpack>(); outputs_ = dez.unpack>(); reductions_ = dez.unpack>(); @@ -135,5 +154,31 @@ Task::Task(const LegionTask* task, int64_t Task::task_id() const { return library_.get_local_task_id(task_->task_id); } +Copy::Copy(const LegionCopy* copy, MapperRuntime* runtime, const MapperContext context) + : copy_(copy) +{ + CopyDeserializer dez(copy->mapper_data, + copy->mapper_data_size, + {copy->src_requirements, + copy->dst_requirements, + copy->src_indirect_requirements, + copy->dst_indirect_requirements}, + runtime, + context); + inputs_ = dez.unpack>(); + dez.next_requirement_list(); + outputs_ = dez.unpack>(); + dez.next_requirement_list(); + input_indirections_ = dez.unpack>(); + dez.next_requirement_list(); + output_indirections_ = dez.unpack>(); +#ifdef DEBUG_LEGATE + for (auto& input : inputs_) assert(!input.is_future()); + for (auto& output : outputs_) assert(!output.is_future()); + for (auto& input_indirection : input_indirections_) assert(!input_indirection.is_future()); + for (auto& output_indirection : output_indirections_) assert(!output_indirection.is_future()); +#endif +} + } // namespace mapping } // namespace legate diff --git a/src/core/mapping/task.h b/src/core/mapping/operation.h similarity index 77% rename from src/core/mapping/task.h rename to src/core/mapping/operation.h index 2d3ad9d6b..0cc5dc267 100644 --- a/src/core/mapping/task.h +++ b/src/core/mapping/operation.h @@ -32,7 +32,7 @@ class RegionField { public: RegionField() {} - RegionField(const Legion::Task* task, int32_t dim, uint32_t idx, Legion::FieldID fid); + RegionField(const Legion::RegionRequirement* req, int32_t dim, uint32_t idx, Legion::FieldID fid); public: RegionField(const RegionField& other) = default; @@ -62,12 +62,12 @@ class RegionField { Legion::FieldID field_id() const { return fid_; } bool unbound() const { return dim_ < 0; } - private: - const Legion::RegionRequirement& get_requirement() const; + public: + const Legion::RegionRequirement* get_requirement() const { return req_; } Legion::IndexSpace get_index_space() const; private: - const Legion::Task* task_{nullptr}; + const Legion::RegionRequirement* req_{nullptr}; int32_t dim_{-1}; uint32_t idx_{-1U}; Legion::FieldID fid_{-1U}; @@ -111,6 +111,10 @@ class Store { const RegionField& region_field, bool is_output_store = false, std::shared_ptr&& transform = nullptr); + // A special constructor to create a mapper view of a store from a region requirement + Store(Legion::Mapping::MapperRuntime* runtime, + const Legion::Mapping::MapperContext context, + const Legion::RegionRequirement* requirement); public: Store(const Store& other) = default; @@ -134,6 +138,11 @@ class Store { const RegionField& region_field() const; const FutureWrapper& future() const; + public: + RegionField::Id unique_region_field_id() const; + uint32_t requirement_index() const; + uint32_t future_index() const; + public: template Legion::Rect shape() const; @@ -188,7 +197,32 @@ class Task { std::vector scalars_; }; +class Copy { + public: + Copy(const Legion::Copy* copy, + Legion::Mapping::MapperRuntime* runtime, + const Legion::Mapping::MapperContext context); + + public: + const std::vector& inputs() const { return inputs_; } + const std::vector& outputs() const { return outputs_; } + const std::vector& input_indirections() const { return input_indirections_; } + const std::vector& output_indirections() const { return output_indirections_; } + + public: + Legion::DomainPoint point() const { return copy_->index_point; } + + private: + const Legion::Copy* copy_; + + private: + std::vector inputs_; + std::vector outputs_; + std::vector input_indirections_; + std::vector output_indirections_; +}; + } // namespace mapping } // namespace legate -#include "core/mapping/task.inl" +#include "core/mapping/operation.inl" diff --git a/src/core/mapping/task.inl b/src/core/mapping/operation.inl similarity index 100% rename from src/core/mapping/task.inl rename to src/core/mapping/operation.inl diff --git a/src/core/utilities/deserializer.cc b/src/core/utilities/deserializer.cc index 1fb0c8b0e..f62f5b50a 100644 --- a/src/core/utilities/deserializer.cc +++ b/src/core/utilities/deserializer.cc @@ -17,7 +17,6 @@ #include "core/utilities/deserializer.h" #include "core/data/scalar.h" #include "core/data/store.h" -#include "core/mapping/task.h" #include "core/utilities/machine.h" #include "legion/legion_c.h" @@ -32,7 +31,7 @@ namespace legate { TaskDeserializer::TaskDeserializer(const LegionTask* task, const std::vector& regions) - : BaseDeserializer(task), + : BaseDeserializer(static_cast(task->args), task->arglen), futures_{task->futures.data(), task->futures.size()}, regions_{regions.data(), regions.size()}, outputs_() @@ -128,15 +127,19 @@ void TaskDeserializer::_unpack(Legion::PhaseBarrier& barrier) namespace mapping { -MapperDeserializer::MapperDeserializer(const LegionTask* task, - MapperRuntime* runtime, - MapperContext context) - : BaseDeserializer(task), runtime_(runtime), context_(context), future_index_(0) +TaskDeserializer::TaskDeserializer(const Legion::Task* task, + MapperRuntime* runtime, + MapperContext context) + : BaseDeserializer(static_cast(task->args), task->arglen), + task_(task), + runtime_(runtime), + context_(context), + future_index_(0) { first_task_ = false; } -void MapperDeserializer::_unpack(Store& value) +void TaskDeserializer::_unpack(Store& value) { auto is_future = unpack(); auto is_output_region = unpack(); @@ -159,7 +162,7 @@ void MapperDeserializer::_unpack(Store& value) } } -void MapperDeserializer::_unpack(FutureWrapper& value) +void TaskDeserializer::_unpack(FutureWrapper& value) { // We still need to deserialize these fields to get to the domain unpack(); @@ -177,13 +180,66 @@ void MapperDeserializer::_unpack(FutureWrapper& value) value = FutureWrapper(future_index_++, domain); } -void MapperDeserializer::_unpack(RegionField& value, bool is_output_region) +void TaskDeserializer::_unpack(RegionField& value, bool is_output_region) +{ + auto dim = unpack(); + auto idx = unpack(); + auto fid = unpack(); + + auto req = is_output_region ? &task_->output_regions[idx] : &task_->regions[idx]; + value = RegionField(req, dim, idx, fid); +} + +CopyDeserializer::CopyDeserializer(const void* args, + size_t arglen, + std::vector&& all_requirements, + MapperRuntime* runtime, + MapperContext context) + : BaseDeserializer(static_cast(args), arglen), + all_reqs_(std::forward>(all_requirements)), + curr_reqs_(all_reqs_.begin()), + runtime_(runtime), + context_(context), + req_index_offset_(0) +{ +} + +void CopyDeserializer::next_requirement_list() +{ +#ifdef DEBUG_LEGATE + assert(curr_reqs_ != all_reqs_.end()); +#endif + req_index_offset_ += curr_reqs_->get().size(); + ++curr_reqs_; +} + +void CopyDeserializer::_unpack(Store& value) +{ + auto is_future = unpack(); + auto is_output_region = unpack(); + auto dim = unpack(); + auto code = unpack(); + + auto transform = unpack_transform(); + +#ifdef DEBUG_LEGATE + assert(!is_future && !is_output_region); +#endif + auto redop_id = unpack(); + RegionField rf; + _unpack(rf); + value = + Store(runtime_, context_, dim, code, redop_id, rf, is_output_region, std::move(transform)); +} + +void CopyDeserializer::_unpack(RegionField& value) { auto dim = unpack(); auto idx = unpack(); auto fid = unpack(); - value = RegionField(task_, dim, idx, fid); + auto req = &curr_reqs_->get()[idx]; + value = RegionField(req, dim, idx + req_index_offset_, fid); } } // namespace mapping diff --git a/src/core/utilities/deserializer.h b/src/core/utilities/deserializer.h index 72ed67852..cd0e6aba7 100644 --- a/src/core/utilities/deserializer.h +++ b/src/core/utilities/deserializer.h @@ -23,7 +23,7 @@ #include "core/comm/communicator.h" #include "core/data/scalar.h" #include "core/data/store.h" -#include "core/mapping/task.h" +#include "core/mapping/operation.h" #include "core/utilities/span.h" #include "core/utilities/type_traits.h" #include "core/utilities/typedefs.h" @@ -34,7 +34,7 @@ namespace legate { template class BaseDeserializer { public: - BaseDeserializer(const Legion::Task* task); + BaseDeserializer(const int8_t* args, size_t arglen); public: template @@ -49,8 +49,8 @@ class BaseDeserializer { template != MAX_TYPE_NUMBER>* = nullptr> void _unpack(T& value) { - value = *reinterpret_cast(task_args_.ptr()); - task_args_ = task_args_.subspan(sizeof(T)); + value = *reinterpret_cast(args_.ptr()); + args_ = args_.subspan(sizeof(T)); } public: @@ -69,11 +69,10 @@ class BaseDeserializer { std::shared_ptr unpack_transform(); protected: - const Legion::Task* task_; bool first_task_; private: - Span task_args_; + Span args_; }; class TaskDeserializer : public BaseDeserializer { @@ -99,11 +98,11 @@ class TaskDeserializer : public BaseDeserializer { namespace mapping { -class MapperDeserializer : public BaseDeserializer { +class TaskDeserializer : public BaseDeserializer { public: - MapperDeserializer(const Legion::Task* task, - Legion::Mapping::MapperRuntime* runtime, - Legion::Mapping::MapperContext context); + TaskDeserializer(const Legion::Task* task, + Legion::Mapping::MapperRuntime* runtime, + Legion::Mapping::MapperContext context); public: using BaseDeserializer::_unpack; @@ -114,11 +113,42 @@ class MapperDeserializer : public BaseDeserializer { void _unpack(RegionField& value, bool is_output_region); private: + const Legion::Task* task_; Legion::Mapping::MapperRuntime* runtime_; Legion::Mapping::MapperContext context_; uint32_t future_index_; }; +class CopyDeserializer : public BaseDeserializer { + private: + using Requirements = std::vector; + using ReqsRef = std::reference_wrapper; + + public: + CopyDeserializer(const void* args, + size_t arglen, + std::vector&& all_requirements, + Legion::Mapping::MapperRuntime* runtime, + Legion::Mapping::MapperContext context); + + public: + using BaseDeserializer::_unpack; + + public: + void next_requirement_list(); + + public: + void _unpack(Store& value); + void _unpack(RegionField& value); + + private: + std::vector all_reqs_; + std::vector::iterator curr_reqs_; + Legion::Mapping::MapperRuntime* runtime_; + Legion::Mapping::MapperContext context_; + uint32_t req_index_offset_; +}; + } // namespace mapping } // namespace legate diff --git a/src/core/utilities/deserializer.inl b/src/core/utilities/deserializer.inl index caebd1a0d..feeed5238 100644 --- a/src/core/utilities/deserializer.inl +++ b/src/core/utilities/deserializer.inl @@ -17,8 +17,8 @@ namespace legate { template -BaseDeserializer::BaseDeserializer(const Legion::Task* task) - : task_(task), task_args_{static_cast(task->args), task->arglen} +BaseDeserializer::BaseDeserializer(const int8_t* args, size_t arglen) + : args_(Span(args, arglen)) { } @@ -33,8 +33,8 @@ void BaseDeserializer::_unpack(Scalar& value) { auto tuple = unpack(); auto code = unpack(); - value = Scalar(tuple, code, task_args_.ptr()); - task_args_ = task_args_.subspan(value.size()); + value = Scalar(tuple, code, args_.ptr()); + args_ = args_.subspan(value.size()); } template diff --git a/typings/legion_cffi/lib.pyi b/typings/legion_cffi/lib.pyi index 2326b327c..364d43414 100644 --- a/typings/legion_cffi/lib.pyi +++ b/typings/legion_cffi/lib.pyi @@ -126,6 +126,7 @@ def legion_copy_launcher_set_possible_src_indirect_out_of_range( ) -> Any: ... def legion_copy_launcher_set_sharding_space(*args: Any) -> Any: ... def legion_copy_launcher_set_provenance(*args: Any) -> Any: ... +def legion_copy_launcher_set_mapper_arg(*args: Any) -> Any: ... def legion_detach_external_resources(*args: Any) -> Any: ... def legion_domain_affine_transform_identity(*args: Any) -> Any: ... def legion_domain_empty(*args: Any) -> Any: ... @@ -211,6 +212,7 @@ def legion_index_copy_launcher_set_possible_src_indirect_out_of_range( ) -> Any: ... def legion_index_copy_launcher_set_sharding_space(*args: Any) -> Any: ... def legion_index_copy_launcher_set_provenance(*args: Any) -> Any: ... +def legion_index_copy_launcher_set_mapper_arg(*args: Any) -> Any: ... def legion_index_fill_launcher_create_from_future_with_domain( *args: Any, ) -> Any: ... From eae28ac46a421d395ddc6bdf338a5357574444c5 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Tue, 1 Nov 2022 11:02:06 -0700 Subject: [PATCH 041/121] Support for concurrent launches (#459) * Fixes to use concurrent task launches: * Start using concurrent launches for communicators * Extend the variant registration API for concurrent variants * Python API to mark concurrent tasks * Helper methods in VariantOptions * Mark communicator metatasks concurrent and remove obsolete fences --- legate/core/_legion/task.py | 16 +++++++ legate/core/communicator.py | 7 ++- legate/core/launcher.py | 5 +++ legate/core/operation.py | 28 ++++-------- src/core/task/task.cc | 14 +++--- src/core/task/task.h | 90 ++++++++++++++++++++++--------------- typings/legion_cffi/lib.pyi | 2 + 7 files changed, 96 insertions(+), 66 deletions(-) diff --git a/legate/core/_legion/task.py b/legate/core/_legion/task.py index 7e463abec..670b2796c 100644 --- a/legate/core/_legion/task.py +++ b/legate/core/_legion/task.py @@ -1028,6 +1028,22 @@ def set_sharding_space(self, space: IndexSpace) -> None: self.launcher, space.handle ) + def set_concurrent(self, concurrent: bool) -> None: + """ + Set a flag indicating whether point tasks must execute + concurrently. Setting true to the flag directs the runtime + to make sure the tasks are using a concurrent variant and + also mapped to distinct processors with concurrent + execution guarantee (i.e., no subset of the processors execute + other tasks). + + Parameters + ---------- + concurrent : bool + Whether the point tasks must run concurrently + """ + legion.legion_index_launcher_set_concurrent(self.launcher, concurrent) + @dispatch def launch( self, diff --git a/legate/core/communicator.py b/legate/core/communicator.py index a794ab478..019258a51 100644 --- a/legate/core/communicator.py +++ b/legate/core/communicator.py @@ -101,14 +101,16 @@ def _initialize(self, volume: int) -> FutureMap: task = Task(self._context, self._init_nccl, tag=self._tag) task.add_future(nccl_id) + task.set_concurrent(True) handle = task.execute(Rect([volume])) - self._runtime.issue_execution_fence() return handle def _finalize(self, volume: int, handle: FutureMap) -> None: from .launcher import TaskLauncher as Task task = Task(self._context, self._finalize_nccl, tag=self._tag) + # Finalize may not need to be concurrent, but set it just in case + task.set_concurrent(True) task.add_future_map(handle) task.execute(Rect([volume])) @@ -161,8 +163,8 @@ def _initialize(self, volume: int) -> FutureMap: for i in range(volume): f = mapping_table_fm.get_future(Point([i])) task.add_future(f) + task.set_concurrent(True) handle = task.execute(Rect([volume])) - self._runtime.issue_execution_fence() return handle def _finalize(self, volume: int, handle: FutureMap) -> None: @@ -170,5 +172,6 @@ def _finalize(self, volume: int, handle: FutureMap) -> None: task = Task(self._context, self._finalize_cpucoll, tag=self._tag) task.add_future_map(handle) + task.set_concurrent(True) task.execute(Rect([volume])) self._runtime.issue_execution_fence() diff --git a/legate/core/launcher.py b/legate/core/launcher.py index ce87ffada..b19b357c0 100644 --- a/legate/core/launcher.py +++ b/legate/core/launcher.py @@ -724,6 +724,7 @@ def __init__( self._insert_barrier = False self._can_raise_exception = False self._provenance = provenance + self._concurrent = False @property def library_task_id(self) -> int: @@ -875,6 +876,9 @@ def insert_barrier(self) -> None: def set_can_raise_exception(self, can_raise_exception: bool) -> None: self._can_raise_exception = can_raise_exception + def set_concurrent(self, concurrent: bool) -> None: + self._concurrent = concurrent + def set_sharding_space(self, space: IndexSpace) -> None: self._sharding_space = space @@ -921,6 +925,7 @@ def build_task( out_req.add(task, fields) for comm in self._comms: task.add_point_future(ArgumentMap(future_map=comm)) + task.set_concurrent(len(self._comms) > 0 or self._concurrent) for future_map in self._future_map_args: task.add_point_future(ArgumentMap(future_map=future_map)) return task diff --git a/legate/core/operation.py b/legate/core/operation.py index a158ece7d..998276e6a 100644 --- a/legate/core/operation.py +++ b/legate/core/operation.py @@ -244,6 +244,7 @@ def __init__( self._exn_types: list[type] = [] self._tb_repr: Union[None, str] = None self._side_effect = False + self._concurrent = False @property def side_effect(self) -> bool: @@ -253,8 +254,11 @@ def set_side_effect(self, side_effect: bool) -> None: self._side_effect = side_effect @property - def uses_communicator(self) -> bool: - return len(self._comm_args) > 0 + def concurrent(self) -> bool: + return self._concurrent + + def set_concurrent(self, concurrent: bool) -> None: + self._concurrent = concurrent def get_name(self) -> str: libname = self.context.library.get_name() @@ -615,25 +619,17 @@ def get_requirement( self._add_scalar_args_to_launcher(launcher) launcher.set_can_raise_exception(self.can_raise_exception) + launcher.set_concurrent(self.concurrent) launch_domain = strategy.launch_domain if strategy.parallel else None self._add_communicators(launcher, launch_domain) - # TODO: For now we make sure no other operations are interleaved with - # the set of tasks that use a communicator. In the future, the - # communicator monad will do this for us. - if self.uses_communicator: - self._context.issue_execution_fence() - result: Union[Future, FutureMap] if launch_domain is not None: result = launcher.execute(launch_domain) else: result = launcher.execute_single() - if self.uses_communicator: - self._context.issue_execution_fence() - self._demux_scalar_stores(result, launch_domain) @@ -776,20 +772,12 @@ def launch(self, strategy: Strategy) -> None: self._add_scalar_args_to_launcher(launcher) launcher.set_can_raise_exception(self.can_raise_exception) + launcher.set_concurrent(self.concurrent) self._add_communicators(launcher, self._launch_domain) - # TODO: For now we make sure no other operations are interleaved with - # the set of tasks that use a communicator. In the future, the - # communicator monad will do this for us. - if self.uses_communicator: - self._context.issue_execution_fence() - result = launcher.execute(self._launch_domain) - if self.uses_communicator: - self._context.issue_execution_fence() - self._demux_scalar_stores(result, self._launch_domain) diff --git a/src/core/task/task.cc b/src/core/task/task.cc index 014c6f801..51ac3e1c1 100644 --- a/src/core/task/task.cc +++ b/src/core/task/task.cc @@ -27,10 +27,7 @@ void LegateTaskRegistrar::record_variant(TaskID tid, TaskLayoutConstraintSet& layout_constraints, LegateVariantCode var, Processor::Kind kind, - bool leaf, - bool inner, - bool idempotent, - size_t ret_size) + const VariantOptions& options) { assert((kind == Processor::LOC_PROC) || (kind == Processor::TOC_PROC) || (kind == Processor::OMP_PROC)); @@ -44,15 +41,16 @@ void LegateTaskRegistrar::record_variant(TaskID tid, task_name, descriptor, var, - ret_size)); + options.return_size)); auto& registrar = pending_task_variants_.back(); registrar.execution_constraints.swap(execution_constraints); registrar.layout_constraints.swap(layout_constraints); registrar.add_constraint(ProcessorConstraint(kind)); - registrar.set_leaf(leaf); - registrar.set_inner(inner); - registrar.set_idempotent(idempotent); + registrar.set_leaf(options.leaf); + registrar.set_inner(options.inner); + registrar.set_idempotent(options.idempotent); + registrar.set_concurrent(options.concurrent); } void LegateTaskRegistrar::register_all_tasks(Runtime* runtime, LibraryContext& context) diff --git a/src/core/task/task.h b/src/core/task/task.h index f86e9987c..c06006f49 100644 --- a/src/core/task/task.h +++ b/src/core/task/task.h @@ -35,6 +35,35 @@ namespace legate { // We're going to allow for each task to use only up to 341 scalar output stores constexpr size_t LEGATE_MAX_SIZE_SCALAR_RETURN = 4096; +struct VariantOptions { + bool leaf{true}; + bool inner{false}; + bool idempotent{false}; + bool concurrent{false}; + size_t return_size{LEGATE_MAX_SIZE_SCALAR_RETURN}; + + VariantOptions& with_leaf(bool _leaf) + { + leaf = _leaf; + return *this; + } + VariantOptions& with_inner(bool _inner) + { + inner = _inner; + return *this; + } + VariantOptions& with_idempotent(bool _idempotent) + { + idempotent = _idempotent; + return *this; + } + VariantOptions& with_concurrent(bool _concurrent) + { + concurrent = _concurrent; + return *this; + } +}; + using LegateVariantImpl = void (*)(TaskContext&); template @@ -65,11 +94,6 @@ class LegateTask { static const bool value = (sizeof(test(0)) == sizeof(__yes)); }; - public: - static void register_variants(); - template - static void register_variants_with_return(); - public: static const char* task_name() { @@ -129,33 +153,24 @@ class LegateTask { Legion::TaskLayoutConstraintSet& layout_constraints, LegateVariantCode var, Legion::Processor::Kind kind, - bool leaf = false, - bool inner = false, - bool idempotent = false) + const VariantOptions& options) { // Construct the code descriptor for this task so that the library // can register it later when it is ready Legion::CodeDescriptor desc(legate_task_wrapper); auto task_id = T::TASK_ID; - T::Registrar::record_variant(task_id, - T::task_name(), - desc, - execution_constraints, - layout_constraints, - var, - kind, - leaf, - inner, - idempotent, - LEGATE_MAX_SIZE_SCALAR_RETURN); + T::Registrar::record_variant( + task_id, T::task_name(), desc, execution_constraints, layout_constraints, var, kind, options); } + static void register_variants( + const std::map& all_options = {}); }; template class RegisterCPUVariant { public: - static void register_variant() + static void register_variant(const VariantOptions& options) { Legion::ExecutionConstraintSet execution_constraints; Legion::TaskLayoutConstraintSet layout_constraints; @@ -163,14 +178,14 @@ class RegisterCPUVariant { layout_constraints, LEGATE_CPU_VARIANT, Legion::Processor::LOC_PROC, - true /*leaf*/); + options); } }; template class RegisterCPUVariant { public: - static void register_variant() + static void register_variant(const VariantOptions& options) { // Do nothing } @@ -179,7 +194,7 @@ class RegisterCPUVariant { template class RegisterOMPVariant { public: - static void register_variant() + static void register_variant(const VariantOptions& options) { Legion::ExecutionConstraintSet execution_constraints; Legion::TaskLayoutConstraintSet layout_constraints; @@ -187,14 +202,14 @@ class RegisterOMPVariant { layout_constraints, LEGATE_OMP_VARIANT, Legion::Processor::OMP_PROC, - true /*leaf*/); + options); } }; template class RegisterOMPVariant { public: - static void register_variant() + static void register_variant(const VariantOptions& options) { // Do nothing } @@ -203,7 +218,7 @@ class RegisterOMPVariant { template class RegisterGPUVariant { public: - static void register_variant() + static void register_variant(const VariantOptions& options) { Legion::ExecutionConstraintSet execution_constraints; Legion::TaskLayoutConstraintSet layout_constraints; @@ -211,25 +226,31 @@ class RegisterGPUVariant { layout_constraints, LEGATE_GPU_VARIANT, Legion::Processor::TOC_PROC, - true /*leaf*/); + options); } }; template class RegisterGPUVariant { public: - static void register_variant() + static void register_variant(const VariantOptions& options) { // Do nothing } }; template -/*static*/ void LegateTask::register_variants() +/*static*/ void LegateTask::register_variants( + const std::map& all_options) { - RegisterCPUVariant, HasCPUVariant::value>::register_variant(); - RegisterOMPVariant, HasOMPVariant::value>::register_variant(); - RegisterGPUVariant, HasGPUVariant::value>::register_variant(); + // Make a copy of the map of options so that we can do find-or-create on it + auto all_options_copy = all_options; + RegisterCPUVariant, HasCPUVariant::value>::register_variant( + all_options_copy[LEGATE_CPU_VARIANT]); + RegisterOMPVariant, HasOMPVariant::value>::register_variant( + all_options_copy[LEGATE_OMP_VARIANT]); + RegisterGPUVariant, HasGPUVariant::value>::register_variant( + all_options_copy[LEGATE_GPU_VARIANT]); } class LegateTaskRegistrar { @@ -241,10 +262,7 @@ class LegateTaskRegistrar { Legion::TaskLayoutConstraintSet& layout_constraints, LegateVariantCode var, Legion::Processor::Kind kind, - bool leaf, - bool inner, - bool idempotent, - size_t ret_size); + const VariantOptions& options); public: void register_all_tasks(Legion::Runtime* runtime, LibraryContext& context); diff --git a/typings/legion_cffi/lib.pyi b/typings/legion_cffi/lib.pyi index 364d43414..6ae3b017a 100644 --- a/typings/legion_cffi/lib.pyi +++ b/typings/legion_cffi/lib.pyi @@ -249,6 +249,7 @@ def legion_index_launcher_execute_outputs(*args: Any) -> Any: ... def legion_index_launcher_execute_reduction_and_outputs(*args: Any) -> Any: ... def legion_index_launcher_set_sharding_space(*args: Any) -> Any: ... def legion_index_launcher_set_provenance(*args: Any) -> Any: ... +def legion_index_launcher_set_concurrent(*args: Any) -> Any: ... def legion_index_partition_create_by_domain(*args: Any) -> Any: ... def legion_index_partition_create_by_domain_future_map(*args: Any) -> Any: ... def legion_index_partition_create_by_image(*args: Any) -> Any: ... @@ -501,6 +502,7 @@ __all__ = ( "legion_index_launcher_execute_reduction_and_outputs", "legion_index_launcher_set_sharding_space", "legion_index_launcher_set_provenance", + "legion_index_launcher_set_concurrent", "legion_index_partition_create_by_domain", "legion_index_partition_create_by_domain_future_map", "legion_index_partition_create_by_image", From f934afea8123ba10ee45f9afd8754207e9f5c17e Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Tue, 1 Nov 2022 15:44:17 -0700 Subject: [PATCH 042/121] Some quality-of-life changes (#458) * Make is_complex take type codes just like the others and add is_complex_type instead * Minimize the header dependencies for span.h --- src/core/utilities/span.h | 3 ++- src/core/utilities/type_traits.h | 18 +++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/core/utilities/span.h b/src/core/utilities/span.h index f35caf41c..a4fd12a8c 100644 --- a/src/core/utilities/span.h +++ b/src/core/utilities/span.h @@ -16,7 +16,8 @@ #pragma once -#include "legion.h" +#include +#include namespace legate { diff --git a/src/core/utilities/type_traits.h b/src/core/utilities/type_traits.h index b172f5b2c..4d8b324b4 100644 --- a/src/core/utilities/type_traits.h +++ b/src/core/utilities/type_traits.h @@ -171,16 +171,28 @@ struct is_floating_point { static constexpr bool value = std::is_floating_point>::value; }; -template +template struct is_complex : std::false_type { }; template <> -struct is_complex> : std::true_type { +struct is_complex : std::true_type { +}; + +template <> +struct is_complex : std::true_type { +}; + +template +struct is_complex_type : std::false_type { +}; + +template <> +struct is_complex_type> : std::true_type { }; template <> -struct is_complex> : std::true_type { +struct is_complex_type> : std::true_type { }; } // namespace legate From 1c616ccfb65b5eb0a3775f6728b868a9ffc3a76f Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Wed, 2 Nov 2022 11:23:44 -0700 Subject: [PATCH 043/121] Missing LEGATE_ABORT (#462) --- src/core/mapping/base_mapper.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index 9ae61f62b..dcc393023 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -890,6 +890,7 @@ void BaseMapper::report_failed_mapping(const Mappable& mappable, mappable.get_unique_id(), memory_kinds[target_memory.kind()], target_memory.id); + LEGATE_ABORT; } void BaseMapper::select_task_variant(const MapperContext ctx, From 2ede57f728a74479c207cd12c5e68924ab6b6185 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Wed, 2 Nov 2022 16:49:25 -0700 Subject: [PATCH 044/121] Call bind.sh unconditionally (#461) --- bind.sh | 193 ++++++++++++++--------- legate/driver/command.py | 28 ++-- tests/unit/legate/driver/test_command.py | 30 +++- 3 files changed, 155 insertions(+), 96 deletions(-) diff --git a/bind.sh b/bind.sh index edb5fd05b..7e23b7acf 100755 --- a/bind.sh +++ b/bind.sh @@ -17,88 +17,131 @@ set -euo pipefail -# Usage: bind.sh [--cpus ] [--gpus ] [--mems ] [--nics ] ... -# specifies the resources to bind each node-local rank to, with ranks -# separated by /, e.g. 0,1/2,3/4,5/6,7 for 4 ranks per node. - -# Detect node-local rank based on launcher -IDX=none -case "$1" in - mpirun) IDX="$OMPI_COMM_WORLD_LOCAL_RANK" ;; - jsrun) IDX="$OMPI_COMM_WORLD_LOCAL_RANK" ;; - srun) IDX="$SLURM_LOCALID" ;; - local) IDX=0 ;; - none) IDX="${SLURM_LOCALID:-${OMPI_COMM_WORLD_LOCAL_RANK:-${MV2_COMM_WORLD_LOCAL_RANK:-none}}}" ;; +help() { + cat 1>&2 <&2 + help + ;; + esac + shift 2 +done + +case "$launcher" in + mpirun) rank="${OMPI_COMM_WORLD_LOCAL_RANK:-unknown}" ;; + jsrun ) rank="${OMPI_COMM_WORLD_LOCAL_RANK:-unknown}" ;; + srun ) rank="${SLURM_LOCALID:-unknown}" ;; + auto ) rank="${SLURM_LOCALID:-${OMPI_COMM_WORLD_LOCAL_RANK:-${MV2_COMM_WORLD_LOCAL_RANK:-unknown}}}" ;; + local ) rank="0" ;; + *) + echo "Unexpected launcher value: $launcher" 1>&2 + help + ;; esac -shift -if [[ "$IDX" == "none" ]]; then - echo "Error: Cannot detect node-local rank" 1>&2 + +if [[ "$rank" == "unknown" ]]; then + echo "Error: Could not determine node-local rank" 1>&2 exit 1 fi -# Read binding specifications -while [[ $# -gt 0 ]]; do - case "$1" in - --cpus) - CPUS=(${2//\// }) - if [[ "$IDX" -ge "${#CPUS[@]}" ]]; then - echo "Error: Incomplete CPU binding specification" 1>&2 - exit 1 - fi - ;; - --gpus) - GPUS=(${2//\// }) - if [[ "$IDX" -ge "${#GPUS[@]}" ]]; then - echo "Error: Incomplete GPU binding specification" 1>&2 - exit 1 - fi - ;; - --mems) - MEMS=(${2//\// }) - if [[ "$IDX" -ge "${#MEMS[@]}" ]]; then - echo "Error: Incomplete MEM binding specification" 1>&2 - exit 1 - fi - ;; - --nics) - NICS=(${2//\// }) - if [[ "$IDX" -ge "${#NICS[@]}" ]]; then - echo "Error: Incomplete NIC binding specification" 1>&2 - exit 1 - fi - ;; - *) - break - ;; - esac - shift 2 -done +export LEGATE_RANK="$rank" + +if [ -n "${cpus+x}" ]; then + cpus=(${cpus//\// }) + if [[ "$rank" -ge "${#cpus[@]}" ]]; then + echo "Error: Incomplete CPU binding specification" 1>&2 + exit 1 + fi +fi + +if [ -n "${gpus+x}" ]; then + gpus=(${gpus//\// }) + if [[ "$rank" -ge "${#gpus[@]}" ]]; then + echo "Error: Incomplete GPU binding specification" 1>&2 + exit 1 + fi + export CUDA_VISIBLE_DEVICES="${gpus[$rank]}" +fi -# Prepare environment -if [[ -n "${GPUS+x}" ]]; then - export CUDA_VISIBLE_DEVICES="${GPUS[$IDX]}" +if [ -n "${mems+x}" ]; then + mems=(${mems//\// }) + if [[ "$rank" -ge "${#mems[@]}" ]]; then + echo "Error: Incomplete MEM binding specification" 1>&2 + exit 1 + fi fi -if [[ -n "${NICS+x}" ]]; then - # Set all potentially relevant variables, hopefully they are ignored if we - # are not using the corresponding network. - NIC="${NICS[$IDX]}" - export UCX_NET_DEVICES="${NIC//,/:1,}":1 - export NCCL_IB_HCA="$NIC" - NIC_ARR=(${NIC//,/ }) - export GASNET_NUM_QPS="${#NIC_ARR[@]}" - export GASNET_IBV_PORTS="${NIC//,/+}" + +if [ -n "${nics+x}" ]; then + nics=(${nics//\// }) + if [[ "$rank" -ge "${#nics[@]}" ]]; then + echo "Error: Incomplete NIC binding specification" 1>&2 + exit 1 + fi + + # set all potentially relevant variables (hopefully they are ignored if we + # are not using the corresponding network) + nic="${nics[$rank]}" + nic_array=(${nic//,/ }) + export UCX_NET_DEVICES="${nic//,/:1,}":1 + export NCCL_IB_HCA="$nic" + export GASNET_NUM_QPS="${#nic_array[@]}" + export GASNET_IBV_PORTS="${nic//,/+}" fi -# Prepare command -if command -v numactl &> /dev/null; then - if [[ -n "${CPUS+x}" ]]; then - set -- --physcpubind "${CPUS[$IDX]}" "$@" - fi - if [[ -n "${MEMS+x}" ]]; then - set -- --membind "${MEMS[$IDX]}" "$@" - fi - set -- numactl "$@" -elif [[ -n "${CPUS+x}" || -n "${MEMS+x}" ]]; then - echo "Warning: numactl is not available, cannot bind to cores or memories" 1>&2 +# numactl is only needed if cpu or memory pinning was requested +if [[ -n "${cpus+x}" || -n "${mems+x}" ]]; then + if command -v numactl &> /dev/null; then + if [[ -n "${cpus+x}" ]]; then + set -- --physcpubind "${cpus[$rank]}" "$@" + fi + if [[ -n "${mems+x}" ]]; then + set -- --membind "${mems[$rank]}" "$@" + fi + set -- numactl "$@" + else + echo "Warning: numactl is not available, cannot bind to cores or memories" 1>&2 + fi fi + exec "$@" diff --git a/legate/driver/command.py b/legate/driver/command.py index f45a10c7c..0e72cfe7b 100644 --- a/legate/driver/command.py +++ b/legate/driver/command.py @@ -30,21 +30,17 @@ def cmd_bind( config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: - cpu_bind = config.binding.cpu_bind - mem_bind = config.binding.mem_bind - gpu_bind = config.binding.gpu_bind - nic_bind = config.binding.nic_bind - - if all(x is None for x in (cpu_bind, mem_bind, gpu_bind, nic_bind)): - return () - ranks = config.multi_node.ranks + if launcher.kind == "none": + bind_launcher_arg = "local" if ranks == 1 else "auto" + else: + bind_launcher_arg = launcher.kind + opts: CommandPart = ( str(system.legate_paths.bind_sh_path), - "local" - if launcher.kind == "none" and ranks == 1 - else str(launcher.kind), + "--launcher", + bind_launcher_arg, ) ranks_per_node = config.multi_node.ranks_per_node @@ -56,17 +52,17 @@ def check_bind_ranks(name: str, binding: str) -> None: raise RuntimeError(errmsg.format(name=name)) bindings = ( - ("cpu", cpu_bind), - ("gpu", gpu_bind), - ("mem", mem_bind), - ("nic", nic_bind), + ("cpu", config.binding.cpu_bind), + ("gpu", config.binding.gpu_bind), + ("mem", config.binding.mem_bind), + ("nic", config.binding.nic_bind), ) for name, binding in bindings: if binding is not None: check_bind_ranks(name, binding) opts += (f"--{name}s", binding) - return opts + return opts + ("--",) def cmd_gdb( diff --git a/tests/unit/legate/driver/test_command.py b/tests/unit/legate/driver/test_command.py index 29d4a8632..739dd7f9c 100644 --- a/tests/unit/legate/driver/test_command.py +++ b/tests/unit/legate/driver/test_command.py @@ -67,7 +67,8 @@ def test_default(self, genobjs: GenObjs) -> None: result = m.cmd_bind(config, system, launcher) - assert result == () + bind_sh = str(system.legate_paths.bind_sh_path) + assert result == (bind_sh, "--launcher", "local", "--") @pytest.mark.parametrize("kind", ("cpu", "gpu", "mem", "nic")) def test_basic_local(self, genobjs: GenObjs, kind: str) -> None: @@ -76,7 +77,14 @@ def test_basic_local(self, genobjs: GenObjs, kind: str) -> None: result = m.cmd_bind(config, system, launcher) bind_sh = str(system.legate_paths.bind_sh_path) - assert result == (bind_sh, "local", f"--{kind}s", "1") + assert result == ( + bind_sh, + "--launcher", + "local", + f"--{kind}s", + "1", + "--", + ) @pytest.mark.parametrize("launch", ("none", "mpirun", "jsrun", "srun")) def test_combo_local( @@ -101,14 +109,17 @@ def test_combo_local( result = m.cmd_bind(config, system, launcher) bind_sh = str(system.legate_paths.bind_sh_path) - assert result[:2] == ( + assert result[:3] == ( bind_sh, + "--launcher", "local" if launch == "none" else launch, ) - x = iter(result[2:]) + x = iter(result[3:]) for name, binding in zip(x, x): # pairwise assert f"{name} {binding}" in "--cpus 1 --gpus 1 --nics 1 --mems 1" + assert result[-1] == "--" + @pytest.mark.parametrize("launch", ("none", "mpirun", "jsrun", "srun")) @pytest.mark.parametrize("rank_var", RANK_ENV_VARS) @pytest.mark.parametrize("kind", ("cpu", "gpu", "mem", "nic")) @@ -127,8 +138,17 @@ def test_ranks_good( result = m.cmd_bind(config, system, launcher) + launcher_arg = "auto" if launch == "none" else launch + bind_sh = str(system.legate_paths.bind_sh_path) - assert result == (bind_sh, launch, f"--{kind}s", "1/2") + assert result == ( + bind_sh, + "--launcher", + launcher_arg, + f"--{kind}s", + "1/2", + "--", + ) @pytest.mark.parametrize("binding", ("1", "1/2/3")) @pytest.mark.parametrize("rank_var", RANK_ENV_VARS) From 2a1ca972d5bb942fc6c3a2cc84777844a75b20b9 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Fri, 4 Nov 2022 13:18:29 -0700 Subject: [PATCH 045/121] Make `install.py` reconfigure editable installs when build type changes (#455) * pass -mindepth 1 so we don't accidentally delete the search root if it matches one of the `-d` names * pass unknown flags to `pip install` command * use CMAKE_ARGS instead of SKBUILD_CONFIGURE_OPTIONS to work around scikit-build bug (fixes #372) * replace SKBUILD_CONFIGURE_OPTIONS with CMAKE_ARGS everywhere --- BUILD.md | 50 +++++++++++-------- install.py | 14 ++++-- legate/util/fs.py | 4 +- scripts/build-install.sh | 4 +- scripts/build-no-install.sh | 4 +- scripts/build-separately-no-install.sh | 4 +- scripts/build-with-legion-no-install.sh | 4 +- ...build-with-legion-separately-no-install.sh | 4 +- ...uninstall-global-legion-and-legate-core.sh | 36 ++++++------- 9 files changed, 70 insertions(+), 54 deletions(-) diff --git a/BUILD.md b/BUILD.md index 320059f9f..2495f4d3b 100644 --- a/BUILD.md +++ b/BUILD.md @@ -17,7 +17,7 @@ limitations under the License. # TL;DR -1) Check if there are specialized scripts available for your cluster at https://github.com/nv-legate/quickstart. +1) Check if there are specialized scripts available for your cluster at [nv-legate/quickstart](https://github.com/nv-legate/quickstart). 2) [Install dependencies from conda](#getting-dependencies-through-conda) 3) [Build using install.py](#using-installpy) @@ -33,7 +33,7 @@ Please use the `scripts/generate-conda-envs.py` script to create a conda environment file listing all the packages that are required to build, run and test Legate Core and all downstream libraries. For example: -``` +```shell $ ./scripts/generate-conda-envs.py --python 3.10 --ctk 11.7 --os linux --compilers --openmpi --- generating: environment-test-linux-py310-cuda-11.7-compilers-openmpi.yaml ``` @@ -45,13 +45,13 @@ generated environment file (e.g. all the supported Python versions). See the Once you have this environment file, you can install the required packages by creating a new conda environment: -``` +```shell conda env create -n legate -f .yaml ``` or by updating an existing environment: -``` +```shell conda env update -f .yaml ``` @@ -161,14 +161,14 @@ after to trip GLIBC's internal version checks, since the conda library expects to find symbols with more recent version numbers than what is available on the system-wide GLIBC: -``` +```shell /lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.30' not found (required by /opt/conda/envs/legate/lib/libarrow.so) ``` You can usually work around this issue by putting the conda library directory first in the dynamic library resolution path: -``` +```shell LD_LIBRARY_PATH="$CONDA_PREFIX/lib:$LD_LIBRARY_PATH" ``` @@ -186,14 +186,14 @@ the C++ and Python components under the currently active Python environment. To add GPU support, use the `--cuda` flag: -``` +```shell ./install.py --cuda ``` You can specify the CUDA toolkit directory and the CUDA architecture you want to target using the `--with-cuda` and `--arch` flags, e.g.: -``` +```shell ./install.py --cuda --with-cuda /usr/local/cuda/ --arch ampere ``` @@ -215,18 +215,21 @@ You also need to specify the interconnect network of the target machine using th For example this would be an installation for a [DGX SuperPOD](https://www.nvidia.com/en-us/data-center/dgx-superpod/): -``` + +```shell ./install.py --network gasnet1 --conduit ibv --cuda --arch ampere ``` + Alternatively, here is an install line for the [Piz-Daint](https://www.cscs.ch/computers/dismissed/piz-daint-piz-dora/) supercomputer: -``` + +```shell ./install.py --network gasnet1 --conduit aries --cuda --arch pascal ``` To see all available configuration options, run with the `--help` flag: -``` +```shell ./install.py --help ``` @@ -237,11 +240,13 @@ can still use the pip installer to build and install Legate Core. The following command will trigger a single-node, CPU-only build of Legate Core, then install it into the currently active Python environment: -``` +```shell $ pip install . ``` + or -``` + +```shell $ python3 -m pip install . ``` @@ -249,17 +254,20 @@ $ python3 -m pip install . Legate relies on CMake to select its toolchain and build flags. Users can set the environment variables `CXX` or `CXXFLAGS` prior to building to override the -CMake defaults. Alternatively, CMake values can be overridden through the -`SKBUILD_CONFIGURE_OPTIONS` variable: +CMake defaults. -``` -$ SKBUILD_CONFIGURE_OPTIONS="-D Legion_USE_CUDA:BOOL=ON" \ +Alternatively, CMake and build tool arguments can be passed via the +`CMAKE_ARGS`/`SKBUILD_CONFIGURE_OPTIONS` and `SKBUILD_BUILD_OPTIONS` +[environment variables](https://scikit-build.readthedocs.io/en/latest/usage.html#environment-variable-configuration): + +```shell +$ CMAKE_ARGS="${CMAKE_ARGS:-} -D Legion_USE_CUDA:BOOL=ON" \ pip install . ``` An alternative syntax using `setup.py` with `scikit-build` is -``` +```shell $ python setup.py install -- -DLegion_USE_CUDA:BOOL=ON ``` @@ -287,20 +295,20 @@ There are several examples in the `scripts` folder. We walk through the steps in First, the CMake build needs to be configured: -``` +```shell $ cmake -S . -B build -GNinja -D Legion_USE_CUDA=ON ``` Once configured, we can build the C++ libraries: -``` +```shell $ cmake --build build ``` This will invoke Ninja (or make) to execute the build. Once the C++ libraries are available, we can do an editable (development) pip installation. -``` +```shell $ SKBUILD_BUILD_OPTIONS="-D FIND_LEGATE_CORE_CPP=ON -D legate_core_ROOT=$(pwd)/build" \ python3 -m pip install \ --root / --no-deps --no-build-isolation diff --git a/install.py b/install.py index e3303ae72..dad033c42 100755 --- a/install.py +++ b/install.py @@ -309,6 +309,7 @@ def install( print("legion_src_dir:", legion_src_dir) print("legion_url:", legion_url) print("legion_branch:", legion_branch) + print("unknown:", str(unknown)) join = os.path.join exists = os.path.exists @@ -396,15 +397,22 @@ def validate_path(path): pip_install_cmd += ["--no-deps", "--no-build-isolation"] pip_install_cmd += ["--upgrade"] + if unknown is not None: + pip_install_cmd += unknown + pip_install_cmd += ["."] if verbose: pip_install_cmd += ["-vv"] - cmake_flags = [] + # Also use preexisting CMAKE_ARGS from conda if set + cmake_flags = cmd_env.get("CMAKE_ARGS", "").split(" ") if cmake_generator: - cmake_flags += [f"-G'{cmake_generator}'"] + if " " not in cmake_generator: + cmake_flags += [f"-G{cmake_generator}"] + else: + cmake_flags += [f"-G'{cmake_generator}'"] if debug or verbose: cmake_flags += ["--log-level=%s" % ("DEBUG" if debug else "VERBOSE")] @@ -458,7 +466,7 @@ def validate_path(path): cmd_env.update( { "SKBUILD_BUILD_OPTIONS": f"-j{str(thread_count)}", - "SKBUILD_CONFIGURE_OPTIONS": "\n".join(cmake_flags), + "CMAKE_ARGS": " ".join(cmake_flags), } ) diff --git a/legate/util/fs.py b/legate/util/fs.py index 15338d783..4b7465799 100644 --- a/legate/util/fs.py +++ b/legate/util/fs.py @@ -220,13 +220,13 @@ def get_legion_paths(legate_paths: LegatePaths) -> LegionPaths: # 1. Legion was found in a standard system location (/usr, $CONDA_PREFIX) # 2. Legion was built as a side-effect of building legate_core: # ``` - # SKBUILD_CONFIGURE_OPTIONS="" python -m pip install . + # CMAKE_ARGS="" python -m pip install . # ``` # 3. Legion was built in a separate directory independent of legate_core # and the path to its build directory was given when configuring # legate_core: # ``` - # SKBUILD_CONFIGURE_OPTIONS="-D Legion_ROOT=/legion/build" \ + # CMAKE_ARGS="-D Legion_ROOT=/legion/build" \ # python -m pip install . # ``` # diff --git a/scripts/build-install.sh b/scripts/build-install.sh index b0aa91925..f7b5a3854 100755 --- a/scripts/build-install.sh +++ b/scripts/build-install.sh @@ -13,7 +13,7 @@ source ./scripts/util/uninstall-global-legion-and-legate-core.sh rm -rf ./{build,_skbuild,dist,legate.core.egg-info} # Define CMake configuration arguments -cmake_args= +cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi @@ -30,7 +30,7 @@ ninja_args="-j$(nproc --ignore=2)" # Build legion_core + legion_core_python and install into the current Python environment SKBUILD_BUILD_OPTIONS="$ninja_args" \ -SKBUILD_CONFIGURE_OPTIONS="$cmake_args" \ +CMAKE_ARGS="$cmake_args" \ python -m pip install \ --root / --prefix "$CONDA_PREFIX" \ --no-deps --no-build-isolation \ diff --git a/scripts/build-no-install.sh b/scripts/build-no-install.sh index b6ced5da5..8cb6665e4 100755 --- a/scripts/build-no-install.sh +++ b/scripts/build-no-install.sh @@ -11,7 +11,7 @@ source ./scripts/util/compiler-flags.sh rm -rf ./{build,_skbuild,dist,legate.core.egg-info} # Define CMake configuration arguments -cmake_args= +cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi @@ -28,7 +28,7 @@ ninja_args="-j$(nproc --ignore=2)" # Build legion_core + legion_core_python and perform an "editable" install SKBUILD_BUILD_OPTIONS="$ninja_args" \ -SKBUILD_CONFIGURE_OPTIONS="$cmake_args" \ +CMAKE_ARGS="$cmake_args" \ SETUPTOOLS_ENABLE_FEATURES="legacy-editable" \ python -m pip install \ --root / --prefix "$CONDA_PREFIX" \ diff --git a/scripts/build-separately-no-install.sh b/scripts/build-separately-no-install.sh index f2b0188c7..1ffacde26 100755 --- a/scripts/build-separately-no-install.sh +++ b/scripts/build-separately-no-install.sh @@ -11,7 +11,7 @@ source ./scripts/util/compiler-flags.sh rm -rf ./{build,_skbuild,dist,legate.core.egg-info} # Define CMake configuration arguments -cmake_args= +cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi @@ -48,7 +48,7 @@ cmake_args+=" # Build legion_core_python and perform an "editable" install SKBUILD_BUILD_OPTIONS="$ninja_args" \ -SKBUILD_CONFIGURE_OPTIONS="$cmake_args" \ +CMAKE_ARGS="$cmake_args" \ SETUPTOOLS_ENABLE_FEATURES="legacy-editable" \ python -m pip install \ --root / --prefix "$CONDA_PREFIX" \ diff --git a/scripts/build-with-legion-no-install.sh b/scripts/build-with-legion-no-install.sh index 5d52c2c00..5cc03b624 100755 --- a/scripts/build-with-legion-no-install.sh +++ b/scripts/build-with-legion-no-install.sh @@ -26,7 +26,7 @@ if [[ -f "$Legion_ROOT/CMakeCache.txt" ]]; then fi # Define CMake configuration arguments -cmake_args= +cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi @@ -41,7 +41,7 @@ cmake_args+=" # Build legion_core + legion_core_python and perform an "editable" install SKBUILD_BUILD_OPTIONS="$ninja_args" \ -SKBUILD_CONFIGURE_OPTIONS="$cmake_args" \ +CMAKE_ARGS="$cmake_args" \ SETUPTOOLS_ENABLE_FEATURES="legacy-editable" \ python -m pip install \ --root / --prefix "$CONDA_PREFIX" \ diff --git a/scripts/build-with-legion-separately-no-install.sh b/scripts/build-with-legion-separately-no-install.sh index 200b67aa9..a497af581 100755 --- a/scripts/build-with-legion-separately-no-install.sh +++ b/scripts/build-with-legion-separately-no-install.sh @@ -26,7 +26,7 @@ if [[ -f "$Legion_ROOT/CMakeCache.txt" ]]; then fi # Define CMake configuration arguments -cmake_args= +cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi @@ -49,7 +49,7 @@ cmake_args+=" # Build legion_core_python and perform an "editable" install SKBUILD_BUILD_OPTIONS="$ninja_args" \ -SKBUILD_CONFIGURE_OPTIONS="$cmake_args" \ +CMAKE_ARGS="$cmake_args" \ SETUPTOOLS_ENABLE_FEATURES="legacy-editable" \ python -m pip install \ --root / --prefix "$CONDA_PREFIX" \ diff --git a/scripts/util/uninstall-global-legion-and-legate-core.sh b/scripts/util/uninstall-global-legion-and-legate-core.sh index 17e17bd5d..916f3e993 100755 --- a/scripts/util/uninstall-global-legion-and-legate-core.sh +++ b/scripts/util/uninstall-global-legion-and-legate-core.sh @@ -1,21 +1,21 @@ #! /usr/bin/env bash -rm -rf $(find "$CONDA_PREFIX" -type d -name '*realm*') \ - $(find "$CONDA_PREFIX" -type d -name '*legion*') \ - $(find "$CONDA_PREFIX" -type d -name '*legate*') \ - $(find "$CONDA_PREFIX" -type d -name '*Legion*') \ - $(find "$CONDA_PREFIX" -type f -name 'realm*.h') \ - $(find "$CONDA_PREFIX" -type f -name 'legion*.h') \ - $(find "$CONDA_PREFIX" -type f -name 'pygion.py') \ - $(find "$CONDA_PREFIX" -type f -name 'legion_top.py') \ - $(find "$CONDA_PREFIX" -type f -name 'legion_cffi.py') \ - $(find "$CONDA_PREFIX/lib" -type f -name 'librealm*') \ - $(find "$CONDA_PREFIX/lib" -type f -name 'libregent*') \ - $(find "$CONDA_PREFIX/lib" -type f -name 'liblegion*') \ - $(find "$CONDA_PREFIX/lib" -type f -name 'liblgcore*') \ - $(find "$CONDA_PREFIX/lib" -type f -name 'legate.core.egg-link') \ - $(find "$CONDA_PREFIX/bin" -type f -name '*legion*') \ - $(find "$CONDA_PREFIX/bin" -type f -name 'legate') \ - $(find "$CONDA_PREFIX/bin" -type f -name 'bind.sh') \ - $(find "$CONDA_PREFIX/bin" -type f -name 'lgpatch') \ +rm -rf $(find "$CONDA_PREFIX" -mindepth 1 -type d -name '*realm*') \ + $(find "$CONDA_PREFIX" -mindepth 1 -type d -name '*legion*') \ + $(find "$CONDA_PREFIX" -mindepth 1 -type d -name '*legate*') \ + $(find "$CONDA_PREFIX" -mindepth 1 -type d -name '*Legion*') \ + $(find "$CONDA_PREFIX" -mindepth 1 -type f -name 'realm*.h') \ + $(find "$CONDA_PREFIX" -mindepth 1 -type f -name 'legion*.h') \ + $(find "$CONDA_PREFIX" -mindepth 1 -type f -name 'pygion.py') \ + $(find "$CONDA_PREFIX" -mindepth 1 -type f -name 'legion_top.py') \ + $(find "$CONDA_PREFIX" -mindepth 1 -type f -name 'legion_cffi.py') \ + $(find "$CONDA_PREFIX/lib" -mindepth 1 -type f -name 'librealm*') \ + $(find "$CONDA_PREFIX/lib" -mindepth 1 -type f -name 'libregent*') \ + $(find "$CONDA_PREFIX/lib" -mindepth 1 -type f -name 'liblegion*') \ + $(find "$CONDA_PREFIX/lib" -mindepth 1 -type f -name 'liblgcore*') \ + $(find "$CONDA_PREFIX/lib" -mindepth 1 -type f -name 'legate.core.egg-link') \ + $(find "$CONDA_PREFIX/bin" -mindepth 1 -type f -name '*legion*') \ + $(find "$CONDA_PREFIX/bin" -mindepth 1 -type f -name 'legate') \ + $(find "$CONDA_PREFIX/bin" -mindepth 1 -type f -name 'bind.sh') \ + $(find "$CONDA_PREFIX/bin" -mindepth 1 -type f -name 'lgpatch') \ ; From dbe9ebec522a3bef4f427a3aa4f4b6bd4ffa9058 Mon Sep 17 00:00:00 2001 From: Rohan Yadav Date: Fri, 4 Nov 2022 21:07:05 -0700 Subject: [PATCH 046/121] legate/core/types: add missing `to_pandas_type` on Complex types (#467) Signed-off-by: Rohan Yadav Signed-off-by: Rohan Yadav --- legate/core/types.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/legate/core/types.py b/legate/core/types.py index fdce689de..0b226275e 100644 --- a/legate/core/types.py +++ b/legate/core/types.py @@ -17,6 +17,7 @@ from enum import IntEnum, unique from typing import Any, Iterable, Type, Union +import numpy as np import pyarrow as pa from . import legion @@ -41,6 +42,9 @@ def __arrow_ext_deserialize__( def __hash__(self) -> int: return hash(self.__class__) + def to_pandas_dtype(self) -> np.dtype[Any]: + return np.dtype(np.complex64) + class Complex128Dtype(pa.ExtensionType): def __init__(self) -> None: @@ -58,6 +62,9 @@ def __arrow_ext_deserialize__( def __hash__(self) -> int: return hash(self.__class__) + def to_pandas_dtype(self) -> np.dtype[Any]: + return np.dtype(np.complex128) + bool_ = pa.bool_() int8 = pa.int8() From d7f8f99952f32486b0292fa448bf9679a099563e Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Mon, 7 Nov 2022 11:53:19 -0800 Subject: [PATCH 047/121] Python optimization experiments (#460) * checkpoint * only compute PartSym hash once * don't define unneeded closure on every call * try caching get_subregion_size? --- legate/core/constraints.py | 4 +- legate/core/operation.py | 64 ++++++++++++------------ legate/core/partition.py | 2 + legate/core/shape.py | 100 +++++++++++++++++++++++++------------ 4 files changed, 104 insertions(+), 66 deletions(-) diff --git a/legate/core/constraints.py b/legate/core/constraints.py index b8fdc6c49..d18b5fab0 100644 --- a/legate/core/constraints.py +++ b/legate/core/constraints.py @@ -102,6 +102,8 @@ def __init__( self._disjoint = disjoint self._complete = complete + self._hash = hash((self._op_hash, self._id)) + @property def ndim(self) -> int: return self._store.ndim @@ -120,7 +122,7 @@ def __repr__(self) -> str: return f"X{self._id}({disj},{comp})@{self._op_name}" def __hash__(self) -> int: - return hash((self._op_hash, self._id)) + return self._hash def subst(self, mapping: dict[PartSym, PartitionBase]) -> Expr: return Lit(mapping[self]) diff --git a/legate/core/operation.py b/legate/core/operation.py index 998276e6a..d6788cf60 100644 --- a/legate/core/operation.py +++ b/legate/core/operation.py @@ -457,6 +457,14 @@ def __init__( self._output_parts: list[PartSym] = [] self._reduction_parts: list[PartSym] = [] + def get_requirement( + self, store: Store, part_symb: PartSym, strategy: Strategy + ) -> tuple[Proj, int, StorePartition]: + store_part = store.partition(strategy.get_partition(part_symb)) + req = store_part.get_requirement(strategy.launch_ndim) + tag = self.get_tag(strategy, part_symb) + return req, tag, store_part + def add_input( self, store: Store, partition: Optional[PartSym] = None ) -> None: @@ -571,18 +579,10 @@ def launch(self, strategy: Strategy) -> None: provenance=self.provenance, ) - def get_requirement( - store: Store, part_symb: PartSym - ) -> tuple[Proj, int, StorePartition]: - store_part = store.partition(strategy.get_partition(part_symb)) - req = store_part.get_requirement(strategy.launch_ndim) - tag = self.get_tag(strategy, part_symb) - return req, tag, store_part - self.find_all_reusable_store_pairs(strategy) for store, part_symb in zip(self._inputs, self._input_parts): - req, tag, _ = get_requirement(store, part_symb) + req, tag, _ = self.get_requirement(store, part_symb, strategy) launcher.add_input(store, req, tag=tag) for idx, (store, part_symb) in enumerate( @@ -592,7 +592,9 @@ def get_requirement( continue if idx in self._reuse_map: store.move_data(self._reuse_map[idx]) - req, tag, store_part = get_requirement(store, part_symb) + req, tag, store_part = self.get_requirement( + store, part_symb, strategy + ) launcher.add_output(store, req, tag=tag) # We update the key partition of a store only when it gets updated store.set_key_partition(store_part.partition) @@ -600,7 +602,9 @@ def get_requirement( for ((store, redop), part_symb) in zip( self._reductions, self._reduction_parts ): - req, tag, store_part = get_requirement(store, part_symb) + req, tag, store_part = self.get_requirement( + store, part_symb, strategy + ) can_read_write = store_part.is_disjoint_for(strategy.launch_domain) req.redop = store.type.reduction_op_id(redop) @@ -941,21 +945,15 @@ def launch(self, strategy: Strategy) -> None: # will need to be extended accordingly. scatter = len(self._target_indirects) > 0 - def get_requirement( - store: Store, part_symb: PartSym - ) -> tuple[Proj, int, StorePartition]: - store_part = store.partition(strategy.get_partition(part_symb)) - req = store_part.get_requirement(strategy.launch_ndim) - tag = self.get_tag(strategy, part_symb) - return req, tag, store_part - for store, part_symb in zip(self._inputs, self._input_parts): - req, tag, _ = get_requirement(store, part_symb) + req, tag, _ = self.get_requirement(store, part_symb, strategy) launcher.add_input(store, req, tag=tag) for store, part_symb in zip(self._outputs, self._output_parts): assert not store.unbound - req, tag, store_part = get_requirement(store, part_symb) + req, tag, store_part = self.get_requirement( + store, part_symb, strategy + ) if scatter: launcher.add_inout(store, req, tag=tag) else: @@ -964,18 +962,24 @@ def get_requirement( for ((store, redop), part_symb) in zip( self._reductions, self._reduction_parts ): - req, tag, store_part = get_requirement(store, part_symb) + req, tag, store_part = self.get_requirement( + store, part_symb, strategy + ) req.redop = store.type.reduction_op_id(redop) launcher.add_reduction(store, req, tag=tag) for store, part_symb in zip( self._source_indirects, self._source_indirect_parts ): - req, tag, store_part = get_requirement(store, part_symb) + req, tag, store_part = self.get_requirement( + store, part_symb, strategy + ) launcher.add_source_indirect(store, req, tag=tag) for store, part_symb in zip( self._target_indirects, self._target_indirect_parts ): - req, tag, store_part = get_requirement(store, part_symb) + req, tag, store_part = self.get_requirement( + store, part_symb, strategy + ) launcher.add_target_indirect(store, req, tag=tag) if strategy.launch_domain is not None: @@ -1040,17 +1044,11 @@ def add_reduction( raise TypeError("No reductions can be added to fills") def launch(self, strategy: Strategy) -> None: - def get_requirement( - store: Store, part_symb: PartSym - ) -> tuple[Proj, int, StorePartition]: - store_part = store.partition(strategy.get_partition(part_symb)) - req = store_part.get_requirement(strategy.launch_ndim) - tag = self.get_tag(strategy, part_symb) - return req, tag, store_part - lhs = self._outputs[0] lhs_part_sym = self._output_parts[0] - lhs_proj, _, lhs_part = get_requirement(lhs, lhs_part_sym) + lhs_proj, _, lhs_part = self.get_requirement( + lhs, lhs_part_sym, strategy + ) lhs.set_key_partition(lhs_part.partition) launcher = FillLauncher( self.context, diff --git a/legate/core/partition.py b/legate/core/partition.py index ba13b5351..162e7fb6a 100644 --- a/legate/core/partition.py +++ b/legate/core/partition.py @@ -15,6 +15,7 @@ from __future__ import annotations from abc import ABC, abstractmethod, abstractproperty +from functools import lru_cache from typing import TYPE_CHECKING, Optional, Sequence, Type, Union from . import ( @@ -237,6 +238,7 @@ def is_disjoint_for(self, launch_domain: Optional[Rect]) -> bool: def has_color(self, color: Shape) -> bool: return color >= 0 and color < self._color_shape + @lru_cache def get_subregion_size(self, extents: Shape, color: Shape) -> Shape: lo = self._tile_shape * color + self._offset hi = self._tile_shape * (color + 1) + self._offset diff --git a/legate/core/shape.py b/legate/core/shape.py index 9147d0865..98207191f 100644 --- a/legate/core/shape.py +++ b/legate/core/shape.py @@ -26,15 +26,10 @@ ExtentLike: TypeAlias = Union["Shape", int, Iterable[int]] -def _cast_tuple(value: ExtentLike, ndim: int) -> tuple[int, ...]: - if isinstance(value, Shape): - return value.extents - elif isinstance(value, Iterable): - return tuple(value) - elif isinstance(value, int): +def _cast_tuple(value: int | Iterable[int], ndim: int) -> tuple[int, ...]: + if isinstance(value, int): return (value,) * ndim - else: - raise ValueError(f"Cannot cast {type(value).__name__} to tuple") + return tuple(value) class Shape: @@ -46,8 +41,11 @@ def __init__( extents: Optional[ExtentLike] = None, ispace: Optional[IndexSpace] = None, ) -> None: - if extents is not None: - self._extents = _cast_tuple(extents, 1) + if isinstance(extents, int): + self._extents = (extents,) + self._ispace = None + elif extents is not None: + self._extents = tuple(extents) self._ispace = None else: assert ispace is not None @@ -59,9 +57,7 @@ def extents(self) -> tuple[int, ...]: if self._extents is None: assert self._ispace is not None bounds = self._ispace.get_bounds() - lo = bounds.lo hi = bounds.hi - assert all(lo[idx] == 0 for idx in range(lo.dim)) self._extents = tuple(hi[idx] + 1 for idx in range(hi.dim)) return self._extents @@ -148,55 +144,95 @@ def __eq__(self, other: object) -> bool: else: return self.extents == other.extents elif isinstance(other, (int, Iterable)): - lh = _cast_tuple(self, self.ndim) - rh = _cast_tuple(other, self.ndim) + lh = self.extents + rh = ( + other.extents + if isinstance(other, Shape) + else _cast_tuple(other, self.ndim) + ) return lh == rh else: return False def __le__(self, other: ExtentLike) -> bool: - lh = _cast_tuple(self, self.ndim) - rh = _cast_tuple(other, self.ndim) + lh = self.extents + rh = ( + other.extents + if isinstance(other, Shape) + else _cast_tuple(other, self.ndim) + ) return len(lh) == len(rh) and lh <= rh def __lt__(self, other: ExtentLike) -> bool: - lh = _cast_tuple(self, self.ndim) - rh = _cast_tuple(other, self.ndim) + lh = self.extents + rh = ( + other.extents + if isinstance(other, Shape) + else _cast_tuple(other, self.ndim) + ) return len(lh) == len(rh) and lh < rh def __ge__(self, other: ExtentLike) -> bool: - lh = _cast_tuple(self, self.ndim) - rh = _cast_tuple(other, self.ndim) + lh = self.extents + rh = ( + other.extents + if isinstance(other, Shape) + else _cast_tuple(other, self.ndim) + ) return len(lh) == len(rh) and lh >= rh def __gt__(self, other: ExtentLike) -> bool: - lh = _cast_tuple(self, self.ndim) - rh = _cast_tuple(other, self.ndim) + lh = self.extents + rh = ( + other.extents + if isinstance(other, Shape) + else _cast_tuple(other, self.ndim) + ) return len(lh) == len(rh) and lh > rh def __add__(self, other: ExtentLike) -> Shape: - lh = _cast_tuple(self, self.ndim) - rh = _cast_tuple(other, self.ndim) + lh = self.extents + rh = ( + other.extents + if isinstance(other, Shape) + else _cast_tuple(other, self.ndim) + ) return Shape(tuple(a + b for (a, b) in zip(lh, rh))) def __sub__(self, other: ExtentLike) -> Shape: - lh = _cast_tuple(self, self.ndim) - rh = _cast_tuple(other, self.ndim) + lh = self.extents + rh = ( + other.extents + if isinstance(other, Shape) + else _cast_tuple(other, self.ndim) + ) return Shape(tuple(a - b for (a, b) in zip(lh, rh))) def __mul__(self, other: ExtentLike) -> Shape: - lh = _cast_tuple(self, self.ndim) - rh = _cast_tuple(other, self.ndim) + lh = self.extents + rh = ( + other.extents + if isinstance(other, Shape) + else _cast_tuple(other, self.ndim) + ) return Shape(tuple(a * b for (a, b) in zip(lh, rh))) def __mod__(self, other: ExtentLike) -> Shape: - lh = _cast_tuple(self, self.ndim) - rh = _cast_tuple(other, self.ndim) + lh = self.extents + rh = ( + other.extents + if isinstance(other, Shape) + else _cast_tuple(other, self.ndim) + ) return Shape(tuple(a % b for (a, b) in zip(lh, rh))) def __floordiv__(self, other: ExtentLike) -> Shape: - lh = _cast_tuple(self, self.ndim) - rh = _cast_tuple(other, self.ndim) + lh = self.extents + rh = ( + other.extents + if isinstance(other, Shape) + else _cast_tuple(other, self.ndim) + ) return Shape(tuple(a // b for (a, b) in zip(lh, rh))) def drop(self, dim: int) -> Shape: From 1ec34b41696b0c712358b13c34b073cdf55856e6 Mon Sep 17 00:00:00 2001 From: Wei Wu Date: Mon, 7 Nov 2022 15:47:52 -0700 Subject: [PATCH 048/121] fix for -ll:networks none, we will init MPI if it has not been initialized (#465) * fix for -ll:networks none, we will init MPI if it has not been initialized. * add the self mpi finalize * fix for LEGATE_NEED_NETWORK --- src/core/comm/coll.cc | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/src/core/comm/coll.cc b/src/core/comm/coll.cc index 8f0f14104..6ca5d6787 100644 --- a/src/core/comm/coll.cc +++ b/src/core/comm/coll.cc @@ -59,6 +59,8 @@ static int current_unique_id = 0; static bool coll_inited = false; +static bool self_mpi_init = false; + // functions start here #ifdef LEGATE_USE_NETWORK static inline std::pair mostFrequent(const int* arr, int n); @@ -242,23 +244,31 @@ int collInit(int argc, char* argv[]) { current_unique_id = 0; #ifdef LEGATE_USE_NETWORK - int provided, init_flag = 0; + int init_flag = 0; CHECK_MPI(MPI_Initialized(&init_flag)); if (!init_flag) { - log_coll.fatal( - "MPI has not been initialized, it should be initialized by " - "the networking backend"); - LEGATE_ABORT; - } else { - int mpi_thread_model; - MPI_Query_thread(&mpi_thread_model); - if (mpi_thread_model != MPI_THREAD_MULTIPLE) { + char* network = getenv("LEGATE_NEED_NETWORK"); + int need_network = 0; + if (network != nullptr) { need_network = atoi(network); } + if (need_network) { log_coll.fatal( - "MPI has been initialized by others, but is not initialized with " - "MPI_THREAD_MULTIPLE"); + "MPI has not been initialized, it should be initialized by " + "the networking backend."); LEGATE_ABORT; + } else { + int provided; + MPI_Init_thread(0, 0, MPI_THREAD_MULTIPLE, &provided); + self_mpi_init = true; } } + int mpi_thread_model; + MPI_Query_thread(&mpi_thread_model); + if (mpi_thread_model != MPI_THREAD_MULTIPLE) { + log_coll.fatal( + "MPI has been initialized by others, but is not initialized with " + "MPI_THREAD_MULTIPLE"); + LEGATE_ABORT; + } // check int *tag_ub, flag; CHECK_MPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &tag_ub, &flag)); @@ -285,6 +295,7 @@ int collFinalize() log_coll.fatal("MPI should not have been finalized"); LEGATE_ABORT; } + if (self_mpi_init) { CHECK_MPI(MPI_Finalize()); } #else for (ThreadComm* thread_comm : thread_comms) { assert(!thread_comm->ready_flag); From 734c2da289ccb863db24754df0026fcbc4b0df50 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Tue, 8 Nov 2022 13:21:21 -0800 Subject: [PATCH 049/121] Add option to have per-file, per-stage test overrides (#469) * leave override config to client projects * Add option to have per-file, per-stage test overrides * remove example custom test --- legate/tester/__init__.py | 38 +++++++++++++++++++++--------- legate/tester/stages/test_stage.py | 23 +++++++++++++++--- 2 files changed, 47 insertions(+), 14 deletions(-) diff --git a/legate/tester/__init__.py b/legate/tester/__init__.py index 270abcf8d..045eca19d 100644 --- a/legate/tester/__init__.py +++ b/legate/tester/__init__.py @@ -17,9 +17,12 @@ """ from __future__ import annotations +from dataclasses import dataclass from typing import Union from typing_extensions import Literal, TypeAlias +from ..util.types import ArgList + #: Define the available feature types for tests FeatureType: TypeAlias = Union[ Literal["cpus"], Literal["cuda"], Literal["eager"], Literal["openmp"] @@ -57,15 +60,28 @@ "openmp", ) -#: Paths to example files that should be skipped. -SKIPPED_EXAMPLES = { - "examples/ingest.py", - "examples/kmeans_sort.py", - "examples/lstm_full.py", - "examples/wgrad.py", -} +#: Paths to test files that should be skipped entirely in all stages. +#: +#: Client test scripts should udpate this set with their own customizations. +SKIPPED_EXAMPLES: set[str] = set() -#: Extra arguments to supply when specific examples are executed. -PER_FILE_ARGS = { - "examples/lstm_full.py": ["--file", "resources/lstm_input.txt"], -} +#: Extra arguments to add when specific test files are executed (in any stage). +#: +#: Client test scripts should udpate this dict with their own customizations. +PER_FILE_ARGS: dict[str, ArgList] = {} + + +@dataclass +class CustomTest: + file: str + kind: FeatureType + args: ArgList + + +#: Customized configurations for specific test files. Each entry will result +#: in the specified test file being run in the specified stage, with the given +#: command line arguments appended (overriding default stage arguments). These +#: files are run serially, after the sharded, parallelized tests. +#: +#: Client test scripts should udpate this set with their own customizations. +CUSTOM_FILES: list[CustomTest] = [] diff --git a/legate/tester/stages/test_stage.py b/legate/tester/stages/test_stage.py index f9c871461..ed24ae461 100644 --- a/legate/tester/stages/test_stage.py +++ b/legate/tester/stages/test_stage.py @@ -23,7 +23,7 @@ from ...util.colors import yellow from ...util.types import ArgList, EnvDict from ...util.ui import banner, summary -from .. import PER_FILE_ARGS, FeatureType +from .. import CUSTOM_FILES, PER_FILE_ARGS, FeatureType from ..config import Config from ..test_system import ProcessResult, TestSystem from .util import Shard, StageResult, StageSpec, log_proc @@ -224,7 +224,12 @@ def cov_args(self, config: Config) -> ArgList: return args def run( - self, test_file: Path, config: Config, system: TestSystem + self, + test_file: Path, + config: Config, + system: TestSystem, + *, + custom_args: ArgList | None = None, ) -> ProcessResult: """Execute a single test files with appropriate environment and command-line options for a feature test stage. @@ -254,6 +259,9 @@ def run( cmd += stage_args + file_args + config.extra_args + if custom_args: + cmd += custom_args + self.delay(shard, config, system) result = system.run(cmd, test_file, env=self._env(config, system)) @@ -286,4 +294,13 @@ def _launch( ] pool.close() - return [job.get() for job in jobs] + sharded_results = [job.get() for job in jobs] + + custom = (x for x in CUSTOM_FILES if x.kind == self.kind) + + custom_results = [ + self.run(Path(x.file), config, system, custom_args=x.args) + for x in custom + ] + + return sharded_results + custom_results From e6cc081336982a916678bb822b446244e1b273b4 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Tue, 8 Nov 2022 23:38:52 -0800 Subject: [PATCH 050/121] Construct region-backed 0D stores in a correct way (#450) --- legate/core/launcher.py | 9 ++++++++- legate/core/runtime.py | 19 ++++++++++++++++++- legate/core/store.py | 17 +++++++---------- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/legate/core/launcher.py b/legate/core/launcher.py index b19b357c0..2f8ff886d 100644 --- a/legate/core/launcher.py +++ b/legate/core/launcher.py @@ -1030,7 +1030,14 @@ def add_store( flags: int, ) -> None: assert store.kind is not Future - assert store._transform.bottom + assert ( + store._transform.bottom + # Although we should not allow any transformed stores for copies, + # as affine transformations in copies are not yet supported, + # the 0D-to-1D case is benign and the backing region is guaranteed + # to be singleton, so we can accept (i.e., ignore) it. + or (store.ndim == 0 and store._storage.ndim == 1) + ) if TYPE_CHECKING: assert isinstance(store.storage, RegionField) diff --git a/legate/core/runtime.py b/legate/core/runtime.py index 4d12a6591..6e0796a7a 100644 --- a/legate/core/runtime.py +++ b/legate/core/runtime.py @@ -1266,10 +1266,27 @@ def create_store( if optimize_scalar and shape is not None and shape.volume() == 1 else RegionField ) - storage = Storage(shape, 0, dtype, data=data, kind=kind) + + sanitized_shape: Optional[Shape] + if kind is RegionField and shape is not None and shape.ndim == 0: + from .transform import Project, identity + + # If the client requested a 0D region-backed store, we need to + # promote the shape to 1D to create the storage, as Legion + # doesn't allow 0D regions. And we also need to set up a transform + # to map "0D" points back to 1D so that the store looks like 0D + # to the client. + sanitized_shape = Shape([1]) + transform = identity.stack(Project(0, 0)) + else: + sanitized_shape = shape + transform = None + + storage = Storage(sanitized_shape, 0, dtype, data=data, kind=kind) return Store( dtype, storage, + transform=transform, shape=shape, ndim=ndim, ) diff --git a/legate/core/store.py b/legate/core/store.py index 4c947829b..a8d01546b 100644 --- a/legate/core/store.py +++ b/legate/core/store.py @@ -18,6 +18,7 @@ from typing import TYPE_CHECKING, Any, Optional, Sequence, Type, Union from . import ( + AffineTransform, Attach, Detach, Future, @@ -42,7 +43,6 @@ Project, Promote, Shift, - TransformStack, Transpose, identity, ) @@ -50,7 +50,6 @@ if TYPE_CHECKING: from . import ( - AffineTransform, BufferBuilder, Partition as LegionPartition, PhysicalRegion, @@ -824,9 +823,7 @@ def get_child_store(self, *indices: int) -> Store: child_storage = self._storage_partition.get_child(color) child_transform = self.transform for dim, offset in enumerate(child_storage.offsets): - child_transform = TransformStack( - Shift(dim, -offset), child_transform - ) + child_transform = child_transform.stack(Shift(dim, -offset)) return Store( self._store.type, child_storage, @@ -1077,7 +1074,7 @@ def promote(self, extra_dim: int, dim_size: int = 1) -> Store: return Store( self._dtype, self._storage, - TransformStack(transform, self._transform), + self._transform.stack(transform), shape=shape, ) @@ -1116,7 +1113,7 @@ def project(self, dim: int, index: int) -> Store: return Store( self._dtype, storage, - TransformStack(transform, self._transform), + self._transform.stack(transform), shape=shape, ) @@ -1160,7 +1157,7 @@ def slice(self, dim: int, sl: slice) -> Store: transform = ( self._transform if start == 0 - else TransformStack(Shift(dim, -start), self._transform) + else self._transform.stack(Shift(dim, -start)) ) return Store( self._dtype, @@ -1192,7 +1189,7 @@ def transpose(self, axes: tuple[int, ...]) -> Store: return Store( self._dtype, self._storage, - TransformStack(transform, self._transform), + self._transform.stack(transform), shape=shape, ) @@ -1218,7 +1215,7 @@ def delinearize(self, dim: int, shape: tuple[int, ...]) -> Store: return Store( self._dtype, self._storage, - TransformStack(transform, self._transform), + self._transform.stack(transform), shape=new_shape, ) From bdcb603ee19fc6f36e265475cc9075ac8f73849d Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Wed, 9 Nov 2022 01:26:15 -0800 Subject: [PATCH 051/121] Show provenance strings in the progress logs (#473) --- src/core/runtime/runtime.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/runtime/runtime.cc b/src/core/runtime/runtime.cc index 78329649b..4ef4d831d 100644 --- a/src/core/runtime/runtime.cc +++ b/src/core/runtime/runtime.cc @@ -123,9 +123,10 @@ static void extract_scalar_task( point_str << point[0]; for (int32_t dim = 1; dim < task->index_point.dim; ++dim) point_str << "," << point[dim]; - log_legate.print("%s %s task, pt = (%s), proc = " IDFMT, + log_legate.print("%s %s task [%s], pt = (%s), proc = " IDFMT, task_name, proc_kind_str, + task->get_provenance_string().c_str(), point_str.str().c_str(), exec_proc.id); } From eabdbefc2b66c91d76ab2e1c307df908fd0c070b Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Wed, 9 Nov 2022 11:22:02 -0800 Subject: [PATCH 052/121] Use bind.sh for all rank detection (#471) * use bind.sh for all rank detection * add tests for detected_rank * add --bind-detail option * address review comments * remove Launcher.rank_id * more careful debug echo * docs --- bind.sh | 80 +++++++++++++++++------ legate/driver/args.py | 9 +++ legate/driver/command.py | 16 ++++- legate/driver/config.py | 1 + legate/driver/driver.py | 5 +- legate/driver/launcher.py | 41 +++++------- legate/driver/logs.py | 2 +- legate/jupyter/config.py | 2 +- tests/unit/legate/driver/test_args.py | 3 + tests/unit/legate/driver/test_command.py | 50 +++++++++++--- tests/unit/legate/driver/test_config.py | 5 +- tests/unit/legate/driver/test_launcher.py | 40 ++++++------ tests/unit/legate/jupyter/test_config.py | 4 +- 13 files changed, 179 insertions(+), 79 deletions(-) diff --git a/bind.sh b/bind.sh index 7e23b7acf..394deed3c 100755 --- a/bind.sh +++ b/bind.sh @@ -32,6 +32,7 @@ Options: --nics=SPEC Network interface binding specification, used to set all of: UCX_NET_DEVICES, NCCL_IB_HCA, GASNET_NUM_QPS, and GASNET_IBV_PORTS + --debug print out the final computed invocation before exectuting SPEC specifies the resources to bind each node-local rank to, with ranks separated by /, e.g. '0,1/2,3/4,5/6,7' for 4 ranks per node. @@ -46,15 +47,17 @@ EOM exit 2 } +debug="0" launcher=auto while : do case "$1" in - --launcher) launcher="$2" ;; - --cpus) cpus="$2" ;; - --gpus) gpus="$2" ;; - --mems) mems="$2" ;; - --nics) nics="$2" ;; + --launcher) launcher="$2"; shift 2 ;; + --cpus) cpus="$2"; shift 2 ;; + --gpus) gpus="$2"; shift 2 ;; + --mems) mems="$2"; shift 2 ;; + --nics) nics="$2"; shift 2 ;; + --debug) debug="1"; shift ;; --help) help ;; --) shift; @@ -65,31 +68,51 @@ do help ;; esac - shift 2 done case "$launcher" in - mpirun) rank="${OMPI_COMM_WORLD_LOCAL_RANK:-unknown}" ;; - jsrun ) rank="${OMPI_COMM_WORLD_LOCAL_RANK:-unknown}" ;; - srun ) rank="${SLURM_LOCALID:-unknown}" ;; - auto ) rank="${SLURM_LOCALID:-${OMPI_COMM_WORLD_LOCAL_RANK:-${MV2_COMM_WORLD_LOCAL_RANK:-unknown}}}" ;; - local ) rank="0" ;; + mpirun) + local_rank="${OMPI_COMM_WORLD_LOCAL_RANK:-unknown}" + global_rank="${OMPI_COMM_WORLD_RANK:-unknown}" + ;; + jsrun ) + local_rank="${OMPI_COMM_WORLD_LOCAL_RANK:-unknown}" + gloabl_rank="${OMPI_COMM_WORLD_RANK:-unknown}" + ;; + srun ) + local_rank="${SLURM_LOCALID:-unknown}" + global_rank="${SLURM_PROCID:-unknown}" + ;; + auto ) + local_rank="${SLURM_LOCALID:-${OMPI_COMM_WORLD_LOCAL_RANK:-${MV2_COMM_WORLD_LOCAL_RANK:-unknown}}}" + global_rank="${OMPI_COMM_WORLD_RANK:-${PMI_RANK:-${MV2_COMM_WORLD_RANK:-${SLURM_PROCID:-unknown}}}}" + ;; + local ) + local_rank="0" + global_rank="0" + ;; *) echo "Unexpected launcher value: $launcher" 1>&2 help ;; esac -if [[ "$rank" == "unknown" ]]; then +if [[ "$local_rank" == "unknown" ]]; then echo "Error: Could not determine node-local rank" 1>&2 exit 1 fi -export LEGATE_RANK="$rank" +if [[ "$global_rank" == "unknown" ]]; then + echo "Error: Could not determine global rank" 1>&2 + exit 1 +fi + +export LEGATE_LOCAL_RANK="$local_rank" +export LEGATE_GLOBAL_RANK="$global_rank" if [ -n "${cpus+x}" ]; then cpus=(${cpus//\// }) - if [[ "$rank" -ge "${#cpus[@]}" ]]; then + if [[ "$local_rank" -ge "${#cpus[@]}" ]]; then echo "Error: Incomplete CPU binding specification" 1>&2 exit 1 fi @@ -97,16 +120,16 @@ fi if [ -n "${gpus+x}" ]; then gpus=(${gpus//\// }) - if [[ "$rank" -ge "${#gpus[@]}" ]]; then + if [[ "$local_rank" -ge "${#gpus[@]}" ]]; then echo "Error: Incomplete GPU binding specification" 1>&2 exit 1 fi - export CUDA_VISIBLE_DEVICES="${gpus[$rank]}" + export CUDA_VISIBLE_DEVICES="${gpus[$local_rank]}" fi if [ -n "${mems+x}" ]; then mems=(${mems//\// }) - if [[ "$rank" -ge "${#mems[@]}" ]]; then + if [[ "$local_rank" -ge "${#mems[@]}" ]]; then echo "Error: Incomplete MEM binding specification" 1>&2 exit 1 fi @@ -114,14 +137,14 @@ fi if [ -n "${nics+x}" ]; then nics=(${nics//\// }) - if [[ "$rank" -ge "${#nics[@]}" ]]; then + if [[ "$local_rank" -ge "${#nics[@]}" ]]; then echo "Error: Incomplete NIC binding specification" 1>&2 exit 1 fi # set all potentially relevant variables (hopefully they are ignored if we # are not using the corresponding network) - nic="${nics[$rank]}" + nic="${nics[$local_rank]}" nic_array=(${nic//,/ }) export UCX_NET_DEVICES="${nic//,/:1,}":1 export NCCL_IB_HCA="$nic" @@ -133,10 +156,10 @@ fi if [[ -n "${cpus+x}" || -n "${mems+x}" ]]; then if command -v numactl &> /dev/null; then if [[ -n "${cpus+x}" ]]; then - set -- --physcpubind "${cpus[$rank]}" "$@" + set -- --physcpubind "${cpus[$local_rank]}" "$@" fi if [[ -n "${mems+x}" ]]; then - set -- --membind "${mems[$rank]}" "$@" + set -- --membind "${mems[$local_rank]}" "$@" fi set -- numactl "$@" else @@ -144,4 +167,19 @@ if [[ -n "${cpus+x}" || -n "${mems+x}" ]]; then fi fi +# arguments may contain the substring %%LEGATE_GLOBAL_RANK%% which needs to be +# be replaced with the actual computed rank for downstream processes to use +updated=() +for arg in "$@"; do + updated+=("${arg/\%\%LEGATE_GLOBAL_RANK\%\%/$LEGATE_GLOBAL_RANK}") +done + +set -- "${updated[@]}" + +if [ "$debug" == "1" ]; then + echo -n "bind.sh: $@" 1>&2 + for TOK in "$@"; do printf " %q" "$TOK" 1>&2; done + echo +fi + exec "$@" diff --git a/legate/driver/args.py b/legate/driver/args.py index c473efa5f..e36f783b1 100644 --- a/legate/driver/args.py +++ b/legate/driver/args.py @@ -304,6 +304,15 @@ ) +info.add_argument( + "--bind-detail", + dest="bind_detail", + action="store_true", + required=False, + help="print out the final invocation run by bind.sh", +) + + other = parser.add_argument_group("Other options") diff --git a/legate/driver/command.py b/legate/driver/command.py index 0e72cfe7b..0c7909564 100644 --- a/legate/driver/command.py +++ b/legate/driver/command.py @@ -27,6 +27,10 @@ __all__ = ("CMD_PARTS",) +# this will be replaced by bind.sh with the actual computed rank at runtime +LEGATE_GLOBAL_RANK_SUBSTITUTION = "%%LEGATE_GLOBAL_RANK%%" + + def cmd_bind( config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: @@ -62,6 +66,9 @@ def check_bind_ranks(name: str, binding: str) -> None: check_bind_ranks(name, binding) opts += (f"--{name}s", binding) + if config.info.bind_detail: + opts += ("--debug",) + return opts + ("--",) @@ -97,7 +104,10 @@ def cmd_nvprof( if not config.profiling.nvprof: return () - log_path = str(config.logging.logdir / f"legate_{launcher.rank_id}.nvvp") + log_path = str( + config.logging.logdir + / f"legate_{LEGATE_GLOBAL_RANK_SUBSTITUTION}.nvvp" + ) return ("nvprof", "-o", log_path) @@ -108,7 +118,9 @@ def cmd_nsys( if not config.profiling.nsys: return () - log_path = str(config.logging.logdir / f"legate_{launcher.rank_id}") + log_path = str( + config.logging.logdir / f"legate_{LEGATE_GLOBAL_RANK_SUBSTITUTION}" + ) targets = config.profiling.nsys_targets extra = config.profiling.nsys_extra diff --git a/legate/driver/config.py b/legate/driver/config.py index 470cca123..711162ac5 100644 --- a/legate/driver/config.py +++ b/legate/driver/config.py @@ -134,6 +134,7 @@ class Info(DataclassMixin): progress: bool mem_usage: bool verbose: bool + bind_detail: bool @dataclass(frozen=True) diff --git a/legate/driver/driver.py b/legate/driver/driver.py index 5329b951f..7f3e17d33 100644 --- a/legate/driver/driver.py +++ b/legate/driver/driver.py @@ -93,7 +93,10 @@ def run(self) -> int: """ if self.config.info.verbose: # we only want to print verbose output on a "head" node - if self.launcher.kind != "none" or self.launcher.rank_id == "0": + if ( + self.launcher.kind != "none" + or self.launcher.detected_rank_id == "0" + ): print_verbose(self.system, self) self._darwin_gdb_warn() diff --git a/legate/driver/launcher.py b/legate/driver/launcher.py index e41b0a2e1..1f67046b5 100644 --- a/legate/driver/launcher.py +++ b/legate/driver/launcher.py @@ -54,8 +54,8 @@ class Launcher: """A base class for custom launch handlers for Legate. - Subclasses should set ``kind``, ``rank_id``, and ``cmd`` properties during - their initialization. + Subclasses should set ``kind`` and ``cmd`` properties during their + initialization. Parameters ---------- @@ -67,10 +67,11 @@ class Launcher: kind: LauncherType - rank_id: str - cmd: Command + # base class will attempt to set this + detected_rank_id: str | None = None + _config: ConfigProtocol _system: System @@ -83,13 +84,20 @@ def __init__(self, config: ConfigProtocol, system: System) -> None: self._config = config self._system = system + if config.multi_node.ranks == 1: + self.detected_rank_id = "0" + else: + for var in RANK_ENV_VARS: + if var in system.env: + self.detected_rank_id = system.env[var] + break + self._check_realm_python() def __eq__(self, other: object) -> bool: return ( isinstance(other, type(self)) and self.kind == other.kind - and self.rank_id == other.rank_id and self.cmd == other.cmd and self.env == other.env ) @@ -294,18 +302,11 @@ class SimpleLauncher(Launcher): def __init__(self, config: ConfigProtocol, system: System) -> None: super().__init__(config, system) - if config.multi_node.ranks == 1: - self.rank_id = "0" - - else: - for var in RANK_ENV_VARS: - if var in system.env: - self.rank_id = system.env[var] - break - - # NB: for-else clause! (executes if NO loop break) - else: - raise RuntimeError(RANK_ERR_MSG) + # bind.sh handles computing local and global rank id, even in the + # simple case, just for consistency. But we do still check the known + # rank env vars below in order to issue RANK_ERR_MSG if needed + if config.multi_node.ranks > 1 and self.detected_rank_id is None: + raise RuntimeError(RANK_ERR_MSG) self.cmd = () @@ -322,8 +323,6 @@ class MPILauncher(Launcher): def __init__(self, config: ConfigProtocol, system: System) -> None: super().__init__(config, system) - self.rank_id = "%q{OMPI_COMM_WORLD_RANK}" - ranks = config.multi_node.ranks ranks_per_node = config.multi_node.ranks_per_node @@ -352,8 +351,6 @@ class JSRunLauncher(Launcher): def __init__(self, config: ConfigProtocol, system: System) -> None: super().__init__(config, system) - self.rank_id = "%q{OMPI_COMM_WORLD_RANK}" - ranks = config.multi_node.ranks ranks_per_node = config.multi_node.ranks_per_node @@ -380,8 +377,6 @@ class SRunLauncher(Launcher): def __init__(self, config: ConfigProtocol, system: System) -> None: super().__init__(config, system) - self.rank_id = "%q{SLURM_PROCID}" - ranks = config.multi_node.ranks ranks_per_node = config.multi_node.ranks_per_node diff --git a/legate/driver/logs.py b/legate/driver/logs.py index 95b2ed46f..8dad3a3b4 100644 --- a/legate/driver/logs.py +++ b/legate/driver/logs.py @@ -185,7 +185,7 @@ def process_logs( handlers: list[LogHandler] = [] - if launcher.kind != "none" or launcher.rank_id == "0": + if launcher.kind != "none" or launcher.detected_rank_id == "0": if config.profiling.profile: handlers.append(ProfilingHandler(config, system)) diff --git a/legate/jupyter/config.py b/legate/jupyter/config.py index 745238b63..77ee521cd 100644 --- a/legate/jupyter/config.py +++ b/legate/jupyter/config.py @@ -86,5 +86,5 @@ def __init__(self, argv: ArgList) -> None: self.debugging = Debugging( False, False, False, False, False, False, False ) - self.info = Info(False, False, self.verbose > 0) + self.info = Info(False, False, self.verbose > 0, False) self.other = Other(None, False, False) diff --git a/tests/unit/legate/driver/test_args.py b/tests/unit/legate/driver/test_args.py index fa9d36929..4881521e8 100644 --- a/tests/unit/legate/driver/test_args.py +++ b/tests/unit/legate/driver/test_args.py @@ -166,6 +166,9 @@ def test_mem_usage(self) -> None: def test_verbose(self) -> None: assert m.parser.get_default("verbose") is False + def test_bind_detail(self) -> None: + assert m.parser.get_default("bind_detail") is False + # other def test_module(self) -> None: diff --git a/tests/unit/legate/driver/test_command.py b/tests/unit/legate/driver/test_command.py index 739dd7f9c..fede85b11 100644 --- a/tests/unit/legate/driver/test_command.py +++ b/tests/unit/legate/driver/test_command.py @@ -32,6 +32,10 @@ def test___all__() -> None: assert m.__all__ == ("CMD_PARTS",) +def test_LEGATE_GLOBAL_RANK_SUBSTITUTION() -> None: + assert m.LEGATE_GLOBAL_RANK_SUBSTITUTION == "%%LEGATE_GLOBAL_RANK%%" + + def test_CMD_PARTS() -> None: assert m.CMD_PARTS == ( m.cmd_bind, @@ -70,6 +74,14 @@ def test_default(self, genobjs: GenObjs) -> None: bind_sh = str(system.legate_paths.bind_sh_path) assert result == (bind_sh, "--launcher", "local", "--") + def test_bind_detail(self, genobjs: GenObjs) -> None: + config, system, launcher = genobjs(["--bind-detail"]) + + result = m.cmd_bind(config, system, launcher) + + bind_sh = str(system.legate_paths.bind_sh_path) + assert result == (bind_sh, "--launcher", "local", "--debug", "--") + @pytest.mark.parametrize("kind", ("cpu", "gpu", "mem", "nic")) def test_basic_local(self, genobjs: GenObjs, kind: str) -> None: config, system, launcher = genobjs([f"--{kind}-bind", "1"]) @@ -261,7 +273,10 @@ def test_with_option(self, genobjs: GenObjs) -> None: result = m.cmd_nvprof(config, system, launcher) - log_path = str(config.logging.logdir / "legate_0.nvvp") + log_path = str( + config.logging.logdir + / f"legate_{m.LEGATE_GLOBAL_RANK_SUBSTITUTION}.nvvp" + ) assert result == ("nvprof", "-o", log_path) @pytest.mark.parametrize("rank_var", RANK_ENV_VARS) @@ -277,7 +292,10 @@ def test_multi_rank_no_launcher( result = m.cmd_nvprof(config, system, launcher) - log_path = str(config.logging.logdir / f"legate_{rank}.nvvp") + log_path = str( + config.logging.logdir + / f"legate_{m.LEGATE_GLOBAL_RANK_SUBSTITUTION}.nvvp" + ) assert result == ("nvprof", "-o", log_path) @pytest.mark.parametrize("launch", ("mpirun", "jsrun", "srun")) @@ -294,7 +312,8 @@ def test_multi_rank_with_launcher( result = m.cmd_nvprof(config, system, launcher) log_path = str( - config.logging.logdir / f"legate_{launcher.rank_id}.nvvp" + config.logging.logdir + / f"legate_{m.LEGATE_GLOBAL_RANK_SUBSTITUTION}.nvvp" ) assert result == ("nvprof", "-o", log_path) @@ -320,7 +339,10 @@ def test_multi_rank_no_launcher( result = m.cmd_nsys(config, system, launcher) - log_path = str(config.logging.logdir / f"legate_{rank}") + log_path = str( + config.logging.logdir + / f"legate_{m.LEGATE_GLOBAL_RANK_SUBSTITUTION}" + ) assert result == ( "nsys", "profile", @@ -343,7 +365,10 @@ def test_multi_rank_with_launcher( result = m.cmd_nsys(config, system, launcher) - log_path = str(config.logging.logdir / f"legate_{launcher.rank_id}") + log_path = str( + config.logging.logdir + / f"legate_{m.LEGATE_GLOBAL_RANK_SUBSTITUTION}" + ) assert result == ( "nsys", "profile", @@ -376,7 +401,10 @@ def test_multi_rank_extra_no_s( result = m.cmd_nsys(config, system, launcher) - log_path = str(config.logging.logdir / f"legate_{rank}") + log_path = str( + config.logging.logdir + / f"legate_{m.LEGATE_GLOBAL_RANK_SUBSTITUTION}" + ) assert result == ( "nsys", "profile", @@ -414,7 +442,10 @@ def test_multi_rank_extra_with_s( result = m.cmd_nsys(config, system, launcher) - log_path = str(config.logging.logdir / f"legate_{rank}") + log_path = str( + config.logging.logdir + / f"legate_{m.LEGATE_GLOBAL_RANK_SUBSTITUTION}" + ) assert result == ( "nsys", "profile", @@ -447,7 +478,10 @@ def test_multi_rank_targets( result = m.cmd_nsys(config, system, launcher) - log_path = str(config.logging.logdir / f"legate_{rank}") + log_path = str( + config.logging.logdir + / f"legate_{m.LEGATE_GLOBAL_RANK_SUBSTITUTION}" + ) assert result == ( "nsys", "profile", diff --git a/tests/unit/legate/driver/test_config.py b/tests/unit/legate/driver/test_config.py index 249107ed2..67e0d2473 100644 --- a/tests/unit/legate/driver/test_config.py +++ b/tests/unit/legate/driver/test_config.py @@ -247,6 +247,7 @@ def test_fields(self) -> None: "progress", "mem_usage", "verbose", + "bind_detail", } def test_mixin(self) -> None: @@ -331,7 +332,9 @@ def test_default_init(self) -> None: event=False, ) - assert c.info == m.Info(progress=False, mem_usage=False, verbose=False) + assert c.info == m.Info( + progress=False, mem_usage=False, verbose=False, bind_detail=False + ) assert c.other == m.Other(module=None, dry_run=False, rlwrap=False) diff --git a/tests/unit/legate/driver/test_launcher.py b/tests/unit/legate/driver/test_launcher.py index ebfc793c5..f9a1b9d1a 100644 --- a/tests/unit/legate/driver/test_launcher.py +++ b/tests/unit/legate/driver/test_launcher.py @@ -126,7 +126,7 @@ def test_identical_config( assert launcher1 == launcher2 assert launcher1.kind == launcher2.kind - assert launcher1.rank_id == launcher2.rank_id + assert launcher1.detected_rank_id == launcher2.detected_rank_id assert launcher1.cmd == launcher2.cmd assert launcher1.env == launcher2.env @@ -358,7 +358,7 @@ def test_single_rank(self, genconfig: GenConfig) -> None: launcher = m.Launcher.create(config, SYSTEM) - assert launcher.rank_id == "0" + assert launcher.detected_rank_id == "0" assert launcher.cmd == () def test_single_rank_launcher_extra_ignored( @@ -370,7 +370,7 @@ def test_single_rank_launcher_extra_ignored( launcher = m.Launcher.create(config, SYSTEM) - assert launcher.rank_id == "0" + assert launcher.detected_rank_id == "0" assert launcher.cmd == () @pytest.mark.parametrize("rank_var", m.RANK_ENV_VARS) @@ -388,7 +388,7 @@ def test_multi_rank( system = System() launcher = m.Launcher.create(config, system) - assert launcher.rank_id == "123" + assert launcher.detected_rank_id == "123" assert launcher.cmd == () def test_multi_rank_bad(self, genconfig: GenConfig) -> None: @@ -416,7 +416,7 @@ def test_multi_rank_launcher_extra_ignored( system = System() launcher = m.Launcher.create(config, system) - assert launcher.rank_id == "123" + assert launcher.detected_rank_id == "123" assert launcher.cmd == () @@ -465,7 +465,7 @@ def test_single_rank(self, genconfig: GenConfig) -> None: launcher = m.Launcher.create(config, SYSTEM) - assert launcher.rank_id == "%q{OMPI_COMM_WORLD_RANK}" + assert launcher.detected_rank_id == "0" # TODO (bv) -x env args currnetly too fragile to test assert launcher.cmd[:10] == ( @@ -490,9 +490,9 @@ def test_single_rank_launcher_extra(self, genconfig: GenConfig) -> None: launcher = m.Launcher.create(config, SYSTEM) - assert launcher.rank_id == "%q{OMPI_COMM_WORLD_RANK}" + assert launcher.detected_rank_id == "0" - # TODO (bv) -x env args currnetly too fragile to test + # TODO (bv) -x env args currently too fragile to test assert launcher.cmd[:10] == ( ("mpirun",) + ("-n", "1", "--npernode", "1", "--bind-to", "none") @@ -517,7 +517,7 @@ def test_multi_rank( system = System() launcher = m.Launcher.create(config, system) - assert launcher.rank_id == "%q{OMPI_COMM_WORLD_RANK}" + assert launcher.detected_rank_id == "123" # TODO (bv) -x env args currnetly too fragile to test assert launcher.cmd[:10] == ( @@ -555,7 +555,7 @@ def test_multi_rank_launcher_extra( system = System() launcher = m.Launcher.create(config, system) - assert launcher.rank_id == "%q{OMPI_COMM_WORLD_RANK}" + assert launcher.detected_rank_id == "123" # TODO (bv) -x env args currnetly too fragile to test assert launcher.cmd[:10] == ( @@ -575,7 +575,7 @@ def test_single_rank(self, genconfig: GenConfig) -> None: launcher = m.Launcher.create(config, SYSTEM) - assert launcher.rank_id == "%q{OMPI_COMM_WORLD_RANK}" + assert launcher.detected_rank_id == "0" assert launcher.cmd == ( ("jsrun",) + ("-n", "1", "-r", "1", "-a", "1") @@ -596,7 +596,7 @@ def test_single_rank_launcher_extra(self, genconfig: GenConfig) -> None: launcher = m.Launcher.create(config, SYSTEM) - assert launcher.rank_id == "%q{OMPI_COMM_WORLD_RANK}" + assert launcher.detected_rank_id == "0" assert launcher.cmd == ( ("jsrun",) + ("-n", "1", "-r", "1", "-a", "1") @@ -619,7 +619,7 @@ def test_multi_rank( system = System() launcher = m.Launcher.create(config, system) - assert launcher.rank_id == "%q{OMPI_COMM_WORLD_RANK}" + assert launcher.detected_rank_id == "123" assert launcher.cmd == ( ("jsrun",) + ("-n", "100", "-r", "1", "-a", "2") @@ -652,7 +652,7 @@ def test_multi_rank_launcher_extra( system = System() launcher = m.Launcher.create(config, system) - assert launcher.rank_id == "%q{OMPI_COMM_WORLD_RANK}" + assert launcher.detected_rank_id == "123" assert launcher.cmd == ( ("jsrun",) + ("-n", "100", "-r", "1", "-a", "2") @@ -667,7 +667,7 @@ def test_single_rank(self, genconfig: GenConfig) -> None: launcher = m.Launcher.create(config, SYSTEM) - assert launcher.rank_id == "%q{SLURM_PROCID}" + assert launcher.detected_rank_id == "0" assert launcher.cmd == ("srun", "-n", "1", "--ntasks-per-node", "1") def test_single_rank_launcher_extra(self, genconfig: GenConfig) -> None: @@ -684,7 +684,7 @@ def test_single_rank_launcher_extra(self, genconfig: GenConfig) -> None: launcher = m.Launcher.create(config, SYSTEM) - assert launcher.rank_id == "%q{SLURM_PROCID}" + assert launcher.detected_rank_id == "0" assert launcher.cmd == ( "srun", "-n", @@ -705,7 +705,7 @@ def test_single_rank_debugging( launcher = m.Launcher.create(config, SYSTEM) - assert launcher.rank_id == "%q{SLURM_PROCID}" + assert launcher.detected_rank_id == "0" assert launcher.cmd == ( "srun", "-n", @@ -730,7 +730,7 @@ def test_multi_rank( system = System() launcher = m.Launcher.create(config, system) - assert launcher.rank_id == "%q{SLURM_PROCID}" + assert launcher.detected_rank_id == "123" assert launcher.cmd == ("srun", "-n", "200", "--ntasks-per-node", "2") @pytest.mark.parametrize("rank_var", m.RANK_ENV_VARS) @@ -758,7 +758,7 @@ def test_multi_rank_launcher_extra( system = System() launcher = m.Launcher.create(config, system) - assert launcher.rank_id == "%q{SLURM_PROCID}" + assert launcher.detected_rank_id == "123" assert launcher.cmd == ( "srun", "-n", @@ -790,7 +790,7 @@ def test_multi_rank_debugging( system = System() launcher = m.Launcher.create(config, system) - assert launcher.rank_id == "%q{SLURM_PROCID}" + assert launcher.detected_rank_id == "123" assert launcher.cmd == ( "srun", "-n", diff --git a/tests/unit/legate/jupyter/test_config.py b/tests/unit/legate/jupyter/test_config.py index 4e956ff85..17931ed5c 100644 --- a/tests/unit/legate/jupyter/test_config.py +++ b/tests/unit/legate/jupyter/test_config.py @@ -107,7 +107,9 @@ def test_default_init(self) -> None: event=False, ) - assert c.info == m.Info(progress=False, mem_usage=False, verbose=False) + assert c.info == m.Info( + progress=False, mem_usage=False, verbose=False, bind_detail=False + ) assert c.other == m.Other(module=None, dry_run=False, rlwrap=False) From d23493efbe1a1be3f0029ee80ae830185a750264 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Thu, 10 Nov 2022 11:47:45 -0800 Subject: [PATCH 053/121] Print build start and end time (#474) Co-authored-by: Marcin Zalewski --- conda/conda-build/build.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/conda/conda-build/build.sh b/conda/conda-build/build.sh index 96fbdc851..27b5aead1 100644 --- a/conda/conda-build/build.sh +++ b/conda/conda-build/build.sh @@ -1,7 +1,5 @@ #!/bin/bash -set -x; - # Rewrite conda's -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY to # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH CMAKE_ARGS="$(echo "$CMAKE_ARGS" | sed -r "s@_INCLUDE=ONLY@_INCLUDE=BOTH@g")" @@ -30,6 +28,8 @@ export CUDAFLAGS="-UNDEBUG" export CMAKE_GENERATOR=Ninja export CUDAHOSTCXX=${CXX} +echo "Build starting on $(date)" + cmake -S . -B build ${CMAKE_ARGS} cmake --build build -j$CPU_COUNT cmake --install build --prefix "$PREFIX" @@ -49,6 +49,8 @@ $PYTHON -m pip install \ --disable-pip-version-check \ . -vv +echo "Build ending on $(date)" + # Legion leaves an egg-info file which will confuse conda trying to pick up the information # Remove it so the legate-core is the only egg-info file added rm -rf $SP_DIR/legion*egg-info From 3cdc2cc6f755d0a3eb0d72aa802c865cba831592 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Thu, 10 Nov 2022 13:15:14 -0800 Subject: [PATCH 054/121] Pass a sufficiently high default value for gasnet's ibv-max-hcas (#477) Previously we would pass this setting through an envvar in quickstart build.sh, but Legion's cmake workflow overrides this value, so we need to pass it as a cmake flag. Co-authored-by: Manolis Papadakis --- install.py | 1 + 1 file changed, 1 insertion(+) diff --git a/install.py b/install.py index dad033c42..fdff8e692 100755 --- a/install.py +++ b/install.py @@ -439,6 +439,7 @@ def validate_path(path): -DLegion_REDOP_HALF=ON -DLegion_BUILD_BINDINGS=ON -DLegion_BUILD_JUPYTER=ON +-DLegion_EMBED_GASNet_CONFIGURE_ARGS="--with-ibv-max-hcas=8" """.splitlines() if nccl_dir: From 5792d5f376c12c8481272b4fd61fd0a2a4d8e1a3 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Fri, 11 Nov 2022 10:08:22 -0800 Subject: [PATCH 055/121] Clarification in docs --- BUILD.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BUILD.md b/BUILD.md index 2495f4d3b..f14dd8710 100644 --- a/BUILD.md +++ b/BUILD.md @@ -133,7 +133,7 @@ Only necessary if you wish to run on multiple nodes. Not available on conda; typically available through MOFED or the system-level package manager. -If using UCX, a build configured with `--enable-mt` is required. +If using UCX, a build of UCX configured with `--enable-mt` is required. ## Alternative sources for dependencies From 002f828b469d8bc287ce5b87aac8f38f8f15ebe0 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Mon, 14 Nov 2022 11:32:08 -0800 Subject: [PATCH 056/121] Support for library specific annotations (#464) * Support for library specific annotations * Address review comments * Replace itertools.chain with a simpler code --- legate/core/__init__.py | 2 +- legate/core/context.py | 63 +++++++++++++++++++++++++++++++++++----- legate/core/operation.py | 7 ++++- 3 files changed, 63 insertions(+), 9 deletions(-) diff --git a/legate/core/__init__.py b/legate/core/__init__.py index 8a6beee0a..0c5eda106 100644 --- a/legate/core/__init__.py +++ b/legate/core/__init__.py @@ -69,7 +69,7 @@ # Import select types for Legate library construction from .allocation import DistributedAllocation -from .context import track_provenance +from .context import Annotation, track_provenance from .legate import ( Array, Library, diff --git a/legate/core/context.py b/legate/core/context.py index d242d3718..e1aac4536 100644 --- a/legate/core/context.py +++ b/legate/core/context.py @@ -66,6 +66,34 @@ def find_last_user_frame(libname: str) -> str: return f"{frame.f_code.co_filename}:{frame.f_lineno}" +class LibraryAnnotations: + def __init__(self) -> None: + self._entries: dict[str, str] = {} + self._provenance: Union[str, None] = None + + @property + def provenance(self) -> Optional[str]: + return self._provenance + + def set_provenance(self, provenance: str) -> None: + self._provenance = provenance + + def reset_provenance(self) -> None: + self._provenance = None + + def update(self, **kwargs: Any) -> None: + self._entries.update(**kwargs) + + def remove(self, key: str) -> None: + del self._entries[key] + + def __repr__(self) -> str: + pairs = [f"{key},{value}" for key, value in self._entries.items()] + if self._provenance is not None: + pairs.append(f"Provenance,{self._provenance}") + return "|".join(pairs) + + class Context: def __init__( self, @@ -125,7 +153,7 @@ def _create_scope( ) self._libname = library.get_name() - self._provenance: list[Union[str, None]] = [None] + self._annotations: list[LibraryAnnotations] = [LibraryAnnotations()] def destroy(self) -> None: self._library.destroy() @@ -162,9 +190,16 @@ def empty_argmap(self) -> ArgumentMap: def type_system(self) -> TypeSystem: return self._type_system + @property + def annotation(self) -> LibraryAnnotations: + return self._annotations[-1] + + def get_all_annotations(self) -> str: + return str(self.annotation) + @property def provenance(self) -> Optional[str]: - return self._provenance[-1] + return self.annotation.provenance def get_task_id(self, task_id: int) -> int: return self._task_scope.translate(task_id) @@ -209,18 +244,19 @@ def get_unique_op_id(self) -> int: return self._runtime.get_unique_op_id() def set_provenance(self, provenance: str) -> None: - self._provenance[-1] = provenance + self._annotations[-1].set_provenance(provenance) def reset_provenance(self) -> None: - self._provenance[-1] = None + self._annotations[-1].reset_provenance() def push_provenance(self, provenance: str) -> None: - self._provenance.append(provenance) + self._annotations.append(LibraryAnnotations()) + self.set_provenance(provenance) def pop_provenance(self) -> None: - if len(self._provenance) == 1: + if len(self._annotations) == 1: raise ValueError("Provenance stack underflow") - self._provenance.pop(-1) + self._annotations.pop(-1) def track_provenance( self, func: AnyCallable, nested: bool = False @@ -380,3 +416,16 @@ def decorator(func: AnyCallable) -> AnyCallable: return context.track_provenance(func, nested=nested) return decorator + + +class Annotation: + def __init__(self, context: Context, pairs: dict[str, str]) -> None: + self._annotation = context.annotation + self._pairs = pairs + + def __enter__(self) -> None: + self._annotation.update(**self._pairs) + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + for key in self._pairs.keys(): + self._annotation.remove(key) diff --git a/legate/core/operation.py b/legate/core/operation.py index d6788cf60..07738aa51 100644 --- a/legate/core/operation.py +++ b/legate/core/operation.py @@ -140,10 +140,15 @@ def __init__( self._all_parts: list[PartSym] = [] self._launch_domain: Union[Rect, None] = None self._error_on_interference = True + self._provenance = ( + None + if context.provenance is None + else (f"{context.provenance}$" f"{context.get_all_annotations()}") + ) @property def provenance(self) -> Optional[str]: - return self._context.provenance + return self._provenance def get_all_stores(self) -> OrderedSet[Store]: result: OrderedSet[Store] = OrderedSet() From 75e46db05a341c8fe6cc85c8d6645e8adff56036 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Mon, 14 Nov 2022 14:32:17 -0800 Subject: [PATCH 057/121] Add --cprofile driver option (#475) * stdout for debug output * add --cprofile driver option --- bind.sh | 4 ++-- legate/driver/args.py | 9 +++++++++ legate/driver/command.py | 16 +++++++++++++++- legate/driver/config.py | 1 + legate/jupyter/config.py | 2 +- tests/unit/legate/driver/test_command.py | 18 ++++++++++++++++++ tests/unit/legate/driver/test_config.py | 6 ++++++ tests/unit/legate/jupyter/test_config.py | 1 + 8 files changed, 53 insertions(+), 4 deletions(-) diff --git a/bind.sh b/bind.sh index 394deed3c..86a9bf44b 100755 --- a/bind.sh +++ b/bind.sh @@ -177,8 +177,8 @@ done set -- "${updated[@]}" if [ "$debug" == "1" ]; then - echo -n "bind.sh: $@" 1>&2 - for TOK in "$@"; do printf " %q" "$TOK" 1>&2; done + echo -n "bind.sh: $@" + for TOK in "$@"; do printf " %q" "$TOK"; done echo fi diff --git a/legate/driver/args.py b/legate/driver/args.py index e36f783b1..9d6758a07 100644 --- a/legate/driver/args.py +++ b/legate/driver/args.py @@ -129,6 +129,15 @@ ) +profiling.add_argument( + "--cprofile", + dest="cprofile", + action="store_true", + required=False, + help="profile Python execution with the cprofile module", +) + + profiling.add_argument( "--nvprof", dest="nvprof", diff --git a/legate/driver/command.py b/legate/driver/command.py index 0c7909564..3ff4bbef7 100644 --- a/legate/driver/command.py +++ b/legate/driver/command.py @@ -152,8 +152,22 @@ def cmd_module( config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: module = config.other.module + cprofile = config.profiling.cprofile - return () if module is None else ("-m", module) + if cprofile and module is not None: + raise ValueError("Only one of --module or --cprofile may be used") + + if module is not None: + return ("-m", module) + + if cprofile: + log_path = str( + config.logging.logdir + / f"legate_{LEGATE_GLOBAL_RANK_SUBSTITUTION}.cprof" + ) + return ("-m", "cProfile", "-o", log_path) + + return () def cmd_rlwrap( diff --git a/legate/driver/config.py b/legate/driver/config.py index 711162ac5..6a526214f 100644 --- a/legate/driver/config.py +++ b/legate/driver/config.py @@ -90,6 +90,7 @@ class Memory(DataclassMixin): @dataclass(frozen=True) class Profiling(DataclassMixin): profile: bool + cprofile: bool nvprof: bool nsys: bool nsys_targets: str # TODO: multi-choice diff --git a/legate/jupyter/config.py b/legate/jupyter/config.py index 77ee521cd..2acbc6dcb 100644 --- a/legate/jupyter/config.py +++ b/legate/jupyter/config.py @@ -81,7 +81,7 @@ def __init__(self, argv: ArgList) -> None: # turn everything else off self.user_opts: tuple[str, ...] = () self.binding = Binding(None, None, None, None) - self.profiling = Profiling(False, False, False, "", []) + self.profiling = Profiling(False, False, False, False, "", []) self.logging = Logging(None, Path(), False, False) self.debugging = Debugging( False, False, False, False, False, False, False diff --git a/tests/unit/legate/driver/test_command.py b/tests/unit/legate/driver/test_command.py index fede85b11..e79310c93 100644 --- a/tests/unit/legate/driver/test_command.py +++ b/tests/unit/legate/driver/test_command.py @@ -548,6 +548,24 @@ def test_with_module(self, genobjs: GenObjs) -> None: assert result == ("-m", "foo") + def test_with_cprofile(self, genobjs: GenObjs) -> None: + config, system, launcher = genobjs(["--cprofile"]) + + result = m.cmd_module(config, system, launcher) + + log_path = str( + config.logging.logdir + / f"legate_{m.LEGATE_GLOBAL_RANK_SUBSTITUTION}.cprof" + ) + assert result == ("-m", "cProfile", "-o", log_path) + + def test_module_and_cprofile_error(self, genobjs: GenObjs) -> None: + config, system, launcher = genobjs(["--module", "foo", "--cprofile"]) + + err = "Only one of --module or --cprofile may be used" + with pytest.raises(ValueError, match=err): + m.cmd_module(config, system, launcher) + class Test_cmd_rlwrap: def test_default(self, genobjs: GenObjs) -> None: diff --git a/tests/unit/legate/driver/test_config.py b/tests/unit/legate/driver/test_config.py index 67e0d2473..483719a6e 100644 --- a/tests/unit/legate/driver/test_config.py +++ b/tests/unit/legate/driver/test_config.py @@ -152,6 +152,7 @@ class TestProfiling: def test_fields(self) -> None: assert set(m.Profiling.__dataclass_fields__) == { "profile", + "cprofile", "nvprof", "nsys", "nsys_targets", @@ -168,6 +169,7 @@ def test_mixin(self) -> None: def test_nsys_extra_fixup_basic(self, extra: list[str]) -> None: p = m.Profiling( profile=True, + cprofile=True, nvprof=True, nsys=True, nsys_targets="foo,bar", @@ -178,6 +180,7 @@ def test_nsys_extra_fixup_basic(self, extra: list[str]) -> None: def test_nsys_extra_fixup_complex(self) -> None: p = m.Profiling( profile=True, + cprofile=True, nvprof=True, nsys=True, nsys_targets="foo,bar", @@ -199,6 +202,7 @@ def test_nsys_extra_fixup_complex(self) -> None: def test_nsys_extra_fixup_quoted(self) -> None: p = m.Profiling( profile=True, + cprofile=True, nvprof=True, nsys=True, nsys_targets="foo,bar", @@ -309,6 +313,7 @@ def test_default_init(self) -> None: c.profiling == m.Profiling( profile=False, + cprofile=False, nvprof=False, nsys=False, nsys_targets="", @@ -414,6 +419,7 @@ def test_log_to_file_fixup( "--gdb", "--keep-logs", "--profile", + "--cprofile", ) ), ) diff --git a/tests/unit/legate/jupyter/test_config.py b/tests/unit/legate/jupyter/test_config.py index 17931ed5c..d1c425237 100644 --- a/tests/unit/legate/jupyter/test_config.py +++ b/tests/unit/legate/jupyter/test_config.py @@ -84,6 +84,7 @@ def test_default_init(self) -> None: c.profiling == m.Profiling( profile=False, + cprofile=False, nvprof=False, nsys=False, nsys_targets="", From ad2c8fe5cdd6b8ef7da17474caf36b9a2b148bb0 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Mon, 14 Nov 2022 23:15:27 -0800 Subject: [PATCH 058/121] Optimize scalar extraction (#472) * Make the scalar extraction a constant time operation * Comment about the format for packed return values * Use a read-write accessor to extract the pointer of a ReturnValue * Pick the right variant of extract_scalar based on machine configuration --- legate/core/runtime.py | 13 ++- src/core/runtime/runtime.cc | 23 +++-- src/core/task/return.cc | 165 +++++++++++++++++++++--------------- src/core/task/return.h | 30 ++++++- src/core/task/task.h | 5 ++ 5 files changed, 158 insertions(+), 78 deletions(-) diff --git a/legate/core/runtime.py b/legate/core/runtime.py index 6e0796a7a..56c106471 100644 --- a/legate/core/runtime.py +++ b/legate/core/runtime.py @@ -1045,6 +1045,15 @@ def num_omps(self) -> int: def num_gpus(self) -> int: return self._num_gpus + @property + def core_task_variant_id(self) -> int: + if self.num_gpus > 0: + return self.core_library.LEGATE_GPU_VARIANT + elif self.num_omps > 0: + return self.core_library.LEGATE_OMP_VARIANT + else: + return self.core_library.LEGATE_CPU_VARIANT + @property def attachment_manager(self) -> AttachmentManager: return self._attachment_manager @@ -1468,7 +1477,7 @@ def extract_scalar(self, future: Future, idx: int) -> Future: launcher = TaskLauncher( self.core_context, self.core_library.LEGATE_CORE_EXTRACT_SCALAR_TASK_ID, - tag=self.core_library.LEGATE_CPU_VARIANT, + tag=self.core_task_variant_id, ) launcher.add_future(future) launcher.add_scalar_arg(idx, ty.int32) @@ -1482,7 +1491,7 @@ def extract_scalar_with_domain( launcher = TaskLauncher( self.core_context, self.core_library.LEGATE_CORE_EXTRACT_SCALAR_TASK_ID, - tag=self.core_library.LEGATE_CPU_VARIANT, + tag=self.core_task_variant_id, ) launcher.add_future_map(future) launcher.add_scalar_arg(idx, ty.int32) diff --git a/src/core/runtime/runtime.cc b/src/core/runtime/runtime.cc index 4ef4d831d..fb1549cf7 100644 --- a/src/core/runtime/runtime.cc +++ b/src/core/runtime/runtime.cc @@ -22,6 +22,7 @@ #include "core/task/exception.h" #include "core/task/task.h" #include "core/utilities/deserializer.h" +#include "core/utilities/machine.h" #include "legate.h" namespace legate { @@ -95,11 +96,11 @@ static void extract_scalar_task( Core::show_progress(task, legion_context, runtime, task->get_task_name()); TaskContext context(task, *regions, legion_context, runtime); - auto values = task->futures[0].get_result(); - auto idx = context.scalars()[0].value(); + auto idx = context.scalars()[0].value(); + auto value_and_size = ReturnValues::extract(task->futures[0], idx); // Legion postamble - ReturnValues({values[idx]}).finalize(legion_context); + value_and_size.finalize(legion_context); } /*static*/ void Core::shutdown(void) @@ -159,13 +160,19 @@ void register_legate_core_tasks(Machine machine, Runtime* runtime, const Library }; // Register the task variants - { - auto registrar = - make_registrar(extract_scalar_task_id, extract_scalar_task_name, Processor::LOC_PROC); + auto register_extract_scalar = [&](auto proc_kind, auto variant_id) { + auto registrar = make_registrar(extract_scalar_task_id, extract_scalar_task_name, proc_kind); Legion::CodeDescriptor desc(extract_scalar_task); runtime->register_task_variant( - registrar, desc, nullptr, 0, LEGATE_MAX_SIZE_SCALAR_RETURN, LEGATE_CPU_VARIANT); - } + registrar, desc, nullptr, 0, LEGATE_MAX_SIZE_SCALAR_RETURN, variant_id); + }; + register_extract_scalar(Processor::LOC_PROC, LEGATE_CPU_VARIANT); +#ifdef LEGATE_USE_CUDA + register_extract_scalar(Processor::TOC_PROC, LEGATE_GPU_VARIANT); +#endif +#ifdef LEGATE_USE_OPENMP + register_extract_scalar(Processor::OMP_PROC, LEGATE_OMP_VARIANT); +#endif comm::register_tasks(machine, runtime, context); } diff --git a/src/core/task/return.cc b/src/core/task/return.cc index 3a9cd9216..c44365516 100644 --- a/src/core/task/return.cc +++ b/src/core/task/return.cc @@ -35,6 +35,40 @@ using namespace Legion; namespace legate { +ReturnValue::ReturnValue(Legion::UntypedDeferredValue value, size_t size) + : value_(value), size_(size) +{ + is_device_value_ = value.get_instance().get_location().kind() == Memory::Kind::GPU_FB_MEM; +} + +/*static*/ ReturnValue ReturnValue::unpack(const void* ptr, size_t size, Memory::Kind memory_kind) +{ + ReturnValue result(UntypedDeferredValue(size, memory_kind), size); +#ifdef DEBUG_LEGATE + assert(!result.is_device_value()); +#endif + memcpy(result.ptr(), ptr, size); + + return result; +} + +void ReturnValue::finalize(Legion::Context legion_context) const +{ + value_.finalize(legion_context); +} + +void* ReturnValue::ptr() +{ + AccessorRW acc(value_, size_, false); + return acc.ptr(0); +} + +const void* ReturnValue::ptr() const +{ + AccessorRO acc(value_, size_, false); + return acc.ptr(0); +} + struct JoinReturnedException { using LHS = ReturnedException; using RHS = LHS; @@ -145,53 +179,6 @@ ReturnValue ReturnedException::pack() const return ReturnValue(buffer, buffer_size); } -namespace { - -template -int8_t* pack_return_value(int8_t* target, const ReturnValue& value) -{ - if constexpr (PACK_SIZE) { - *reinterpret_cast(target) = value.second; - target += sizeof(uint32_t); - } - - AccessorRO acc(value.first, value.second, false); - memcpy(target, acc.ptr(0), value.second); - return target + value.second; -} - -#ifdef LEGATE_USE_CUDA - -template -int8_t* pack_return_value(int8_t* target, const ReturnValue& value, cuda::StreamView& stream) -{ - if constexpr (PACK_SIZE) { - *reinterpret_cast(target) = value.second; - target += sizeof(uint32_t); - } - - AccessorRO acc(value.first, value.second, false); - CHECK_CUDA(cudaMemcpyAsync(target, acc.ptr(0), value.second, cudaMemcpyDeviceToHost, stream)); - return target + value.second; -} - -#endif - -ReturnValue unpack_return_value(const int8_t*& ptr, Memory::Kind memory_kind) -{ - auto size = *reinterpret_cast(ptr); - ptr += sizeof(uint32_t); - - UntypedDeferredValue value(size, memory_kind); - AccessorWO acc(value, size, false); - memcpy(acc.ptr(0), ptr, size); - ptr += size; - - return ReturnValue(value, size); -} - -} // namespace - ReturnValues::ReturnValues() {} ReturnValues::ReturnValues(std::vector&& return_values) @@ -199,9 +186,9 @@ ReturnValues::ReturnValues(std::vector&& return_values) { if (return_values_.size() > 1) { buffer_size_ += sizeof(uint32_t); - for (auto& ret : return_values_) buffer_size_ += sizeof(uint32_t) + ret.second; + for (auto& ret : return_values_) buffer_size_ += sizeof(uint32_t) + ret.size(); } else if (return_values_.size() > 0) - buffer_size_ = return_values_[0].second; + buffer_size_ = return_values_[0].size(); } ReturnValue ReturnValues::operator[](int32_t idx) const { return return_values_[idx]; } @@ -210,31 +197,51 @@ size_t ReturnValues::legion_buffer_size() const { return buffer_size_; } void ReturnValues::legion_serialize(void* buffer) const { + // We pack N return values into the buffer in the following format: + // + // +--------+-----------+-----+------------+-------+-------+-------+----- + // | # | offset to | | offset to | total | value | value | ... + // | values | scalar 1 | ... | scalar N-1 | value | 1 | 2 | + // | | | | | size | | | + // +--------+-----------+-----+------------+-------+-------+-------+----- + // <============ offsets ===============> <==== values =======> + // + // the size of value i is computed by offsets[i] - (i == 0 ? 0 : offsets[i-1]) + #ifdef LEGATE_USE_CUDA auto stream = cuda::StreamPool::get_stream_pool().get_stream(); #endif - auto ptr = static_cast(buffer); if (return_values_.size() == 1) { auto& ret = return_values_.front(); #ifdef LEGATE_USE_CUDA - if (ret.first.get_instance().get_location().kind() == Memory::Kind::GPU_FB_MEM) - ptr = pack_return_value(ptr, ret, stream); + if (ret.is_device_value()) + CHECK_CUDA(cudaMemcpyAsync(buffer, ret.ptr(), ret.size(), cudaMemcpyDeviceToHost, stream)); else #endif - ptr = pack_return_value(ptr, ret); - } else { - *reinterpret_cast(ptr) = return_values_.size(); - ptr += sizeof(uint32_t); + memcpy(buffer, ret.ptr(), ret.size()); + return; + } + + *static_cast(buffer) = return_values_.size(); + auto ptr = static_cast(buffer) + sizeof(uint32_t); - for (auto& ret : return_values_) { + uint32_t offset = 0; + for (auto ret : return_values_) { + offset += ret.size(); + *reinterpret_cast(ptr) = offset; + ptr = ptr + sizeof(uint32_t); + } + + for (auto ret : return_values_) { + uint32_t size = ret.size(); #ifdef LEGATE_USE_CUDA - if (ret.first.get_instance().get_location().kind() == Memory::Kind::GPU_FB_MEM) - ptr = pack_return_value(ptr, ret, stream); - else + if (ret.is_device_value()) + CHECK_CUDA(cudaMemcpyAsync(ptr, ret.ptr(), size, cudaMemcpyDeviceToHost, stream)); + else #endif - ptr = pack_return_value(ptr, ret); - } + memcpy(ptr, ret.ptr(), size); + ptr += size; } } @@ -244,11 +251,35 @@ void ReturnValues::legion_deserialize(const void* buffer) auto ptr = static_cast(buffer); auto num_values = *reinterpret_cast(ptr); - ptr += sizeof(uint32_t); - return_values_.resize(num_values); - for (auto& ret : return_values_) ret = unpack_return_value(ptr, mem_kind); - buffer_size_ = ptr - static_cast(buffer); + auto offsets = reinterpret_cast(ptr + sizeof(uint32_t)); + auto values = ptr + sizeof(uint32_t) + sizeof(uint32_t) * num_values; + + uint32_t offset = 0; + for (uint32_t idx = 0; idx < num_values; ++idx) { + uint32_t next_offset = offsets[idx]; + uint32_t size = next_offset - offset; + return_values_.push_back(ReturnValue::unpack(values + offset, size, mem_kind)); + offset = next_offset; + } +} + +/*static*/ ReturnValue ReturnValues::extract(Legion::Future future, uint32_t to_extract) +{ + auto kind = find_memory_kind_for_executing_processor(); + const auto* buffer = future.get_buffer(kind); + + auto ptr = static_cast(buffer); + auto num_values = *reinterpret_cast(ptr); + + auto offsets = reinterpret_cast(ptr + sizeof(uint32_t)); + auto values = ptr + sizeof(uint32_t) + sizeof(uint32_t) * num_values; + + uint32_t next_offset = offsets[to_extract]; + uint32_t offset = to_extract == 0 ? 0 : offsets[to_extract - 1]; + uint32_t size = next_offset - offset; + + return ReturnValue::unpack(values + offset, size, kind); } void ReturnValues::finalize(Context legion_context) const @@ -257,7 +288,7 @@ void ReturnValues::finalize(Context legion_context) const Runtime::legion_task_postamble(legion_context); return; } else if (return_values_.size() == 1) { - return_values_.front().first.finalize(legion_context); + return_values_.front().finalize(legion_context); return; } diff --git a/src/core/task/return.h b/src/core/task/return.h index f767f6161..9fa558e64 100644 --- a/src/core/task/return.h +++ b/src/core/task/return.h @@ -20,7 +20,32 @@ namespace legate { -using ReturnValue = std::pair; +struct ReturnValue { + public: + ReturnValue(Legion::UntypedDeferredValue value, size_t size); + + public: + ReturnValue(const ReturnValue&) = default; + ReturnValue& operator=(const ReturnValue&) = default; + + public: + static ReturnValue unpack(const void* ptr, size_t size, Legion::Memory::Kind memory_kind); + + public: + void* ptr(); + const void* ptr() const; + const size_t size() const { return size_; } + const bool is_device_value() const { return is_device_value_; } + + public: + // Calls the Legion postamble with an instance + void finalize(Legion::Context legion_context) const; + + private: + Legion::UntypedDeferredValue value_{}; + size_t size_{0}; + bool is_device_value_{false}; +}; struct ReturnedException { public: @@ -65,6 +90,9 @@ struct ReturnValues { void legion_serialize(void* buffer) const; void legion_deserialize(const void* buffer); + public: + static ReturnValue extract(Legion::Future future, uint32_t to_extract); + public: // Calls the Legion postamble with an instance that packs all return values void finalize(Legion::Context legion_context) const; diff --git a/src/core/task/task.h b/src/core/task/task.h index c06006f49..b90a4e86b 100644 --- a/src/core/task/task.h +++ b/src/core/task/task.h @@ -62,6 +62,11 @@ struct VariantOptions { concurrent = _concurrent; return *this; } + VariantOptions& with_return_size(size_t _return_size) + { + return_size = _return_size; + return *this; + } }; using LegateVariantImpl = void (*)(TaskContext&); From 878b6b893f9b73dc57f7e24ada38ea1667c70b0e Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Tue, 15 Nov 2022 10:16:16 -0800 Subject: [PATCH 059/121] Make overlap check tight (#479) Co-authored-by: Manolis Papadakis --- legate/core/store.py | 42 +++++++++--------------------------------- 1 file changed, 9 insertions(+), 33 deletions(-) diff --git a/legate/core/store.py b/legate/core/store.py index a8d01546b..7b09516f1 100644 --- a/legate/core/store.py +++ b/legate/core/store.py @@ -655,42 +655,18 @@ def overlaps(self, other: Storage) -> bool: lhs = self rhs = other - lhs_root = lhs.get_root() - rhs_root = rhs.get_root() - - if lhs_root is not rhs_root: + if lhs.get_root() is not rhs.get_root(): return False - lhs_lvl = lhs.level - rhs_lvl = rhs.level - - if lhs_lvl > rhs_lvl: - lhs, rhs = rhs, lhs - lhs_lvl, rhs_lvl = rhs_lvl, lhs_lvl - - while lhs_lvl < rhs_lvl: - rhs_parent = rhs.parent - assert rhs_parent is not None - rhs = rhs_parent.parent - rhs_lvl -= 2 + if lhs.volume() == 0 or rhs.volume() == 0: + return False - if lhs is rhs: - return True - else: - assert lhs.has_parent and rhs.has_parent - assert self.parent is not None - # Legion doesn't allow passing aliased partitions to a task - if lhs.parent is not rhs.parent: - return True - else: - # TODO: This check is incorrect if the partition is aliased. - # Since we only have a tiling, which is a disjoint - # partition, we put this assertion here to remember - # that we need to exdtend this logic if we have other - # partitions. (We need to carry around the disjointness - # of each partition.) - assert isinstance(self.parent._partition, Tiling) - return lhs.color == rhs.color + return all( + roff < loff + lext if loff <= roff else loff < roff + rext + for (loff, lext, roff, rext) in zip( + lhs.offsets, lhs.extents, rhs.offsets, rhs.extents + ) + ) def attach_external_allocation( self, context: Context, alloc: Attachable, share: bool From 59e1a1ca4fd31e709c360200b59dcebfea9c4398 Mon Sep 17 00:00:00 2001 From: Wei Wu Date: Tue, 15 Nov 2022 13:41:24 -0700 Subject: [PATCH 060/121] Refactor CPU collective communicator (#468) * fix for -ll:networks none, we will init MPI if it has not been initialized. * refactor mpi comm * add the missing file * refactor local comm * remove unused files * always build local comm * now the both mpi and local comms are working * move common used functions into the base class * add a virtual destructor for BackendNetwork base class * select network based on LEGATE_NEED_NETWORK * minor fix for local network * mute printf * use debug instead of info --- legate_core_cpp.cmake | 11 +- src/core/comm/allgather_thread_local.cc | 82 ---- src/core/comm/allgather_thread_mpi.cc | 55 --- src/core/comm/alltoall_thread_local.cc | 87 ---- src/core/comm/alltoall_thread_mpi.cc | 94 ---- src/core/comm/alltoallv_thread_local.cc | 100 ----- src/core/comm/alltoallv_thread_mpi.cc | 103 ----- src/core/comm/bcast_thread_mpi.cc | 85 ---- src/core/comm/coll.cc | 406 +---------------- src/core/comm/coll.h | 243 ++++++---- src/core/comm/comm_cpu.cc | 27 +- src/core/comm/gather_thread_mpi.cc | 100 ----- src/core/comm/local_comm.cc | 351 +++++++++++++++ src/core/comm/mpi_comm.cc | 575 ++++++++++++++++++++++++ 14 files changed, 1124 insertions(+), 1195 deletions(-) delete mode 100644 src/core/comm/allgather_thread_local.cc delete mode 100644 src/core/comm/allgather_thread_mpi.cc delete mode 100644 src/core/comm/alltoall_thread_local.cc delete mode 100644 src/core/comm/alltoall_thread_mpi.cc delete mode 100644 src/core/comm/alltoallv_thread_local.cc delete mode 100644 src/core/comm/alltoallv_thread_mpi.cc delete mode 100644 src/core/comm/bcast_thread_mpi.cc delete mode 100644 src/core/comm/gather_thread_mpi.cc create mode 100644 src/core/comm/local_comm.cc create mode 100644 src/core/comm/mpi_comm.cc diff --git a/legate_core_cpp.cmake b/legate_core_cpp.cmake index 6150a1908..7502e501d 100644 --- a/legate_core_cpp.cmake +++ b/legate_core_cpp.cmake @@ -211,16 +211,11 @@ list(APPEND legate_core_SOURCES if(Legion_NETWORKS) list(APPEND legate_core_SOURCES - src/core/comm/alltoall_thread_mpi.cc - src/core/comm/alltoallv_thread_mpi.cc - src/core/comm/gather_thread_mpi.cc - src/core/comm/allgather_thread_mpi.cc - src/core/comm/bcast_thread_mpi.cc) + src/core/comm/mpi_comm.cc + src/core/comm/local_comm.cc) else() list(APPEND legate_core_SOURCES - src/core/comm/alltoall_thread_local.cc - src/core/comm/alltoallv_thread_local.cc - src/core/comm/allgather_thread_local.cc) + src/core/comm/local_comm.cc) endif() if(Legion_USE_CUDA) diff --git a/src/core/comm/allgather_thread_local.cc b/src/core/comm/allgather_thread_local.cc deleted file mode 100644 index c2a3587b9..000000000 --- a/src/core/comm/allgather_thread_local.cc +++ /dev/null @@ -1,82 +0,0 @@ -/* Copyright 2022 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include -#include -#include -#include - -#include "coll.h" -#include "legion.h" - -namespace legate { -namespace comm { -namespace coll { - -using namespace Legion; -extern Logger log_coll; - -int allgatherLocal( - const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm) -{ - int total_size = global_comm->global_comm_size; - int global_rank = global_comm->global_rank; - - int type_extent = getDtypeSize(type); - - const void* sendbuf_tmp = sendbuf; - - // MPI_IN_PLACE - if (sendbuf == recvbuf) { sendbuf_tmp = allocateInplaceBuffer(recvbuf, type_extent * count); } - - global_comm->comm->buffers[global_rank] = sendbuf_tmp; - __sync_synchronize(); - - for (int recvfrom_global_rank = 0; recvfrom_global_rank < total_size; recvfrom_global_rank++) { - // wait for other threads to update the buffer address - while (global_comm->comm->buffers[recvfrom_global_rank] == nullptr) - ; - const void* src = global_comm->comm->buffers[recvfrom_global_rank]; - char* dst = static_cast(recvbuf) + - static_cast(recvfrom_global_rank) * type_extent * count; -#ifdef DEBUG_LEGATE - log_coll.debug( - "AllgatherLocal i: %d === global_rank %d, dtype %d, copy rank %d (%p) to rank %d (%p)", - recvfrom_global_rank, - global_rank, - type_extent, - recvfrom_global_rank, - src, - global_rank, - dst); -#endif - memcpy(dst, src, count * type_extent); - } - - barrierLocal(global_comm); - if (sendbuf == recvbuf) { free(const_cast(sendbuf_tmp)); } - - __sync_synchronize(); - - resetLocalBuffer(global_comm); - barrierLocal(global_comm); - - return CollSuccess; -} - -} // namespace coll -} // namespace comm -} // namespace legate \ No newline at end of file diff --git a/src/core/comm/allgather_thread_mpi.cc b/src/core/comm/allgather_thread_mpi.cc deleted file mode 100644 index 4e256ce12..000000000 --- a/src/core/comm/allgather_thread_mpi.cc +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright 2022 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include -#include -#include -#include - -#include "coll.h" - -namespace legate { -namespace comm { -namespace coll { - -int allgatherMPI( - const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm) -{ - int total_size = global_comm->global_comm_size; - int global_rank = global_comm->global_rank; - - MPI_Datatype mpi_type = dtypeToMPIDtype(type); - - MPI_Aint lb, type_extent; - MPI_Type_get_extent(mpi_type, &lb, &type_extent); - - void* sendbuf_tmp = const_cast(sendbuf); - - // MPI_IN_PLACE - if (sendbuf == recvbuf) { sendbuf_tmp = allocateInplaceBuffer(recvbuf, type_extent * count); } - - gatherMPI(sendbuf_tmp, recvbuf, count, type, 0, global_comm); - - bcastMPI(recvbuf, count * total_size, type, 0, global_comm); - - if (sendbuf == recvbuf) { free(sendbuf_tmp); } - - return CollSuccess; -} - -} // namespace coll -} // namespace comm -} // namespace legate \ No newline at end of file diff --git a/src/core/comm/alltoall_thread_local.cc b/src/core/comm/alltoall_thread_local.cc deleted file mode 100644 index bffb1061a..000000000 --- a/src/core/comm/alltoall_thread_local.cc +++ /dev/null @@ -1,87 +0,0 @@ -/* Copyright 2022 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include -#include -#include -#include - -#include "coll.h" -#include "legion.h" - -namespace legate { -namespace comm { -namespace coll { - -using namespace Legion; -extern Logger log_coll; - -int alltoallLocal( - const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm) -{ - int res; - - int total_size = global_comm->global_comm_size; - int global_rank = global_comm->global_rank; - - int type_extent = getDtypeSize(type); - - global_comm->comm->buffers[global_rank] = sendbuf; - __sync_synchronize(); - - int recvfrom_global_rank; - int recvfrom_seg_id = global_rank; - const void* src_base = nullptr; - for (int i = 1; i < total_size + 1; i++) { - recvfrom_global_rank = (global_rank + total_size - i) % total_size; - // wait for other threads to update the buffer address - while (global_comm->comm->buffers[recvfrom_global_rank] == nullptr) - ; - src_base = global_comm->comm->buffers[recvfrom_global_rank]; - char* src = static_cast(const_cast(src_base)) + - static_cast(recvfrom_seg_id) * type_extent * count; - char* dst = static_cast(recvbuf) + - static_cast(recvfrom_global_rank) * type_extent * count; -#ifdef DEBUG_LEGATE - log_coll.debug( - "AlltoallLocal i: %d === global_rank %d, dtype %d, copy rank %d (seg %d, %p) to rank %d (seg " - "%d, %p)", - i, - global_rank, - type_extent, - recvfrom_global_rank, - recvfrom_seg_id, - src, - global_rank, - recvfrom_global_rank, - dst); -#endif - memcpy(dst, src, count * type_extent); - } - - barrierLocal(global_comm); - - __sync_synchronize(); - - resetLocalBuffer(global_comm); - barrierLocal(global_comm); - - return CollSuccess; -} - -} // namespace coll -} // namespace comm -} // namespace legate \ No newline at end of file diff --git a/src/core/comm/alltoall_thread_mpi.cc b/src/core/comm/alltoall_thread_mpi.cc deleted file mode 100644 index d151dd5d7..000000000 --- a/src/core/comm/alltoall_thread_mpi.cc +++ /dev/null @@ -1,94 +0,0 @@ -/* Copyright 2022 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include -#include -#include -#include - -#include "coll.h" -#include "legion.h" - -namespace legate { -namespace comm { -namespace coll { - -using namespace Legion; -extern Logger log_coll; - -int alltoallMPI( - const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm) -{ - MPI_Status status; - - int total_size = global_comm->global_comm_size; - int global_rank = global_comm->global_rank; - - MPI_Datatype mpi_type = dtypeToMPIDtype(type); - - MPI_Aint lb, type_extent; - MPI_Type_get_extent(mpi_type, &lb, &type_extent); - - int sendto_global_rank, recvfrom_global_rank, sendto_mpi_rank, recvfrom_mpi_rank; - for (int i = 1; i < total_size + 1; i++) { - sendto_global_rank = (global_rank + i) % total_size; - recvfrom_global_rank = (global_rank + total_size - i) % total_size; - char* src = static_cast(const_cast(sendbuf)) + - static_cast(sendto_global_rank) * type_extent * count; - char* dst = static_cast(recvbuf) + - static_cast(recvfrom_global_rank) * type_extent * count; - sendto_mpi_rank = global_comm->mapping_table.mpi_rank[sendto_global_rank]; - recvfrom_mpi_rank = global_comm->mapping_table.mpi_rank[recvfrom_global_rank]; - assert(sendto_global_rank == global_comm->mapping_table.global_rank[sendto_global_rank]); - assert(recvfrom_global_rank == global_comm->mapping_table.global_rank[recvfrom_global_rank]); - // tag: seg idx + rank_idx + tag - int send_tag = generateAlltoallTag(sendto_global_rank, global_rank, global_comm); - int recv_tag = generateAlltoallTag(global_rank, recvfrom_global_rank, global_comm); -#ifdef DEBUG_LEGATE - log_coll.debug( - "AlltoallMPI i: %d === global_rank %d, mpi rank %d, send to %d (%d), send_tag %d, " - "recv from %d (%d), " - "recv_tag %d", - i, - global_rank, - global_comm->mpi_rank, - sendto_global_rank, - sendto_mpi_rank, - send_tag, - recvfrom_global_rank, - recvfrom_mpi_rank, - recv_tag); -#endif - CHECK_MPI(MPI_Sendrecv(src, - count, - mpi_type, - sendto_mpi_rank, - send_tag, - dst, - count, - mpi_type, - recvfrom_mpi_rank, - recv_tag, - global_comm->comm, - &status)); - } - - return CollSuccess; -} - -} // namespace coll -} // namespace comm -} // namespace legate \ No newline at end of file diff --git a/src/core/comm/alltoallv_thread_local.cc b/src/core/comm/alltoallv_thread_local.cc deleted file mode 100644 index 6615e7459..000000000 --- a/src/core/comm/alltoallv_thread_local.cc +++ /dev/null @@ -1,100 +0,0 @@ -/* Copyright 2022 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include -#include -#include -#include - -#include "coll.h" -#include "legion.h" - -namespace legate { -namespace comm { -namespace coll { - -using namespace Legion; -extern Logger log_coll; - -int alltoallvLocal(const void* sendbuf, - const int sendcounts[], - const int sdispls[], - void* recvbuf, - const int recvcounts[], - const int rdispls[], - CollDataType type, - CollComm global_comm) -{ - int res; - - int total_size = global_comm->global_comm_size; - int global_rank = global_comm->global_rank; - - int type_extent = getDtypeSize(type); - - global_comm->comm->displs[global_rank] = sdispls; - global_comm->comm->buffers[global_rank] = sendbuf; - __sync_synchronize(); - - int recvfrom_global_rank; - int recvfrom_seg_id = global_rank; - const void* src_base = nullptr; - const int* displs = nullptr; - for (int i = 1; i < total_size + 1; i++) { - recvfrom_global_rank = (global_rank + total_size - i) % total_size; - // wait for other threads to update the buffer address - while (global_comm->comm->buffers[recvfrom_global_rank] == nullptr || - global_comm->comm->displs[recvfrom_global_rank] == nullptr) - ; - src_base = global_comm->comm->buffers[recvfrom_global_rank]; - displs = global_comm->comm->displs[recvfrom_global_rank]; - char* src = static_cast(const_cast(src_base)) + - static_cast(displs[recvfrom_seg_id]) * type_extent; - char* dst = static_cast(recvbuf) + - static_cast(rdispls[recvfrom_global_rank]) * type_extent; -#ifdef DEBUG_LEGATE - log_coll.debug( - "AlltoallvLocal i: %d === global_rank %d, dtype %d, copy rank %d (seg %d, sdispls %d, %p) to " - "rank %d (seg " - "%d, rdispls %d, %p)", - i, - global_rank, - type_extent, - recvfrom_global_rank, - recvfrom_seg_id, - sdispls[recvfrom_seg_id], - src, - global_rank, - recvfrom_global_rank, - rdispls[recvfrom_global_rank], - dst); -#endif - memcpy(dst, src, recvcounts[recvfrom_global_rank] * type_extent); - } - - barrierLocal(global_comm); - - __sync_synchronize(); - - resetLocalBuffer(global_comm); - barrierLocal(global_comm); - - return CollSuccess; -} - -} // namespace coll -} // namespace comm -} // namespace legate \ No newline at end of file diff --git a/src/core/comm/alltoallv_thread_mpi.cc b/src/core/comm/alltoallv_thread_mpi.cc deleted file mode 100644 index 1bcc8806e..000000000 --- a/src/core/comm/alltoallv_thread_mpi.cc +++ /dev/null @@ -1,103 +0,0 @@ -/* Copyright 2022 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include -#include -#include -#include -#include - -#include "coll.h" -#include "legion.h" - -namespace legate { -namespace comm { -namespace coll { - -using namespace Legion; -extern Logger log_coll; - -int alltoallvMPI(const void* sendbuf, - const int sendcounts[], - const int sdispls[], - void* recvbuf, - const int recvcounts[], - const int rdispls[], - CollDataType type, - CollComm global_comm) -{ - MPI_Status status; - - int total_size = global_comm->global_comm_size; - int global_rank = global_comm->global_rank; - - MPI_Datatype mpi_type = dtypeToMPIDtype(type); - - MPI_Aint lb, type_extent; - MPI_Type_get_extent(mpi_type, &lb, &type_extent); - - int sendto_global_rank, recvfrom_global_rank, sendto_mpi_rank, recvfrom_mpi_rank; - for (int i = 1; i < total_size + 1; i++) { - sendto_global_rank = (global_rank + i) % total_size; - recvfrom_global_rank = (global_rank + total_size - i) % total_size; - char* src = static_cast(const_cast(sendbuf)) + - static_cast(sdispls[sendto_global_rank]) * type_extent; - char* dst = static_cast(recvbuf) + - static_cast(rdispls[recvfrom_global_rank]) * type_extent; - int scount = sendcounts[sendto_global_rank]; - int rcount = recvcounts[recvfrom_global_rank]; - sendto_mpi_rank = global_comm->mapping_table.mpi_rank[sendto_global_rank]; - recvfrom_mpi_rank = global_comm->mapping_table.mpi_rank[recvfrom_global_rank]; - assert(sendto_global_rank == global_comm->mapping_table.global_rank[sendto_global_rank]); - assert(recvfrom_global_rank == global_comm->mapping_table.global_rank[recvfrom_global_rank]); - // tag: seg idx + rank_idx + tag - int send_tag = generateAlltoallvTag(sendto_global_rank, global_rank, global_comm); - int recv_tag = generateAlltoallvTag(global_rank, recvfrom_global_rank, global_comm); -#ifdef DEBUG_LEGATE - log_coll.debug( - "AlltoallvMPI i: %d === global_rank %d, mpi rank %d, send to %d (%d), send_tag %d, " - "recv from %d (%d), " - "recv_tag %d", - i, - global_rank, - global_comm->mpi_rank, - sendto_global_rank, - sendto_mpi_rank, - send_tag, - recvfrom_global_rank, - recvfrom_mpi_rank, - recv_tag); -#endif - CHECK_MPI(MPI_Sendrecv(src, - scount, - mpi_type, - sendto_mpi_rank, - send_tag, - dst, - rcount, - mpi_type, - recvfrom_mpi_rank, - recv_tag, - global_comm->comm, - &status)); - } - - return CollSuccess; -} - -} // namespace coll -} // namespace comm -} // namespace legate \ No newline at end of file diff --git a/src/core/comm/bcast_thread_mpi.cc b/src/core/comm/bcast_thread_mpi.cc deleted file mode 100644 index 6c7f77092..000000000 --- a/src/core/comm/bcast_thread_mpi.cc +++ /dev/null @@ -1,85 +0,0 @@ -/* Copyright 2022 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include -#include -#include -#include - -#include "coll.h" -#include "legion.h" - -namespace legate { -namespace comm { -namespace coll { - -using namespace Legion; -extern Logger log_coll; - -int bcastMPI(void* buf, int count, CollDataType type, int root, CollComm global_comm) -{ - int tag; - MPI_Status status; - - int total_size = global_comm->global_comm_size; - int global_rank = global_comm->global_rank; - - int root_mpi_rank = global_comm->mapping_table.mpi_rank[root]; - assert(root == global_comm->mapping_table.global_rank[root]); - - MPI_Datatype mpi_type = dtypeToMPIDtype(type); - - // non-root - if (global_rank != root) { - tag = generateBcastTag(global_rank, global_comm); -#ifdef DEBUG_LEGATE - log_coll.debug("BcastMPI: non-root recv global_rank %d, mpi rank %d, send to %d (%d), tag %d", - global_rank, - global_comm->mpi_rank, - root, - root_mpi_rank, - tag); -#endif - CHECK_MPI(MPI_Recv(buf, count, mpi_type, root_mpi_rank, tag, global_comm->comm, &status)); - return CollSuccess; - } - - // root - int sendto_mpi_rank; - for (int i = 0; i < total_size; i++) { - sendto_mpi_rank = global_comm->mapping_table.mpi_rank[i]; - assert(i == global_comm->mapping_table.global_rank[i]); - tag = generateBcastTag(i, global_comm); -#ifdef DEBUG_LEGATE - log_coll.debug("BcastMPI: root i %d === global_rank %d, mpi rank %d, send to %d (%d), tag %d", - i, - global_rank, - global_comm->mpi_rank, - i, - sendto_mpi_rank, - tag); -#endif - if (global_rank != i) { - CHECK_MPI(MPI_Send(buf, count, mpi_type, sendto_mpi_rank, tag, global_comm->comm)); - } - } - - return CollSuccess; -} - -} // namespace coll -} // namespace comm -} // namespace legate \ No newline at end of file diff --git a/src/core/comm/coll.cc b/src/core/comm/coll.cc index 6ca5d6787..5f1a1f4e9 100644 --- a/src/core/comm/coll.cc +++ b/src/core/comm/coll.cc @@ -38,128 +38,20 @@ namespace coll { using namespace Legion; Logger log_coll("coll"); -#ifdef LEGATE_USE_NETWORK - -enum CollTag : int { - BCAST_TAG = 0, - GATHER_TAG = 1, - ALLTOALL_TAG = 2, - ALLTOALLV_TAG = 3, - MAX_TAG = 10, -}; - -static int mpi_tag_ub = 0; - -static std::vector mpi_comms; -#else // undef LEGATE_USE_NETWORK -static std::vector thread_comms; -#endif - -static int current_unique_id = 0; - -static bool coll_inited = false; - -static bool self_mpi_init = false; +BackendNetwork* backend_network = nullptr; // functions start here -#ifdef LEGATE_USE_NETWORK -static inline std::pair mostFrequent(const int* arr, int n); -static inline int match2ranks(int rank1, int rank2, CollComm global_comm); -#endif - int collCommCreate(CollComm global_comm, int global_comm_size, int global_rank, int unique_id, const int* mapping_table) { - global_comm->global_comm_size = global_comm_size; - global_comm->global_rank = global_rank; - global_comm->status = true; - global_comm->unique_id = unique_id; -#ifdef LEGATE_USE_NETWORK - int mpi_rank, mpi_comm_size; - int *tag_ub, flag; - int compare_result; - MPI_Comm comm = mpi_comms[unique_id]; - CHECK_MPI(MPI_Comm_compare(comm, MPI_COMM_WORLD, &compare_result)); - assert(MPI_CONGRUENT == compare_result); - - CHECK_MPI(MPI_Comm_rank(comm, &mpi_rank)); - CHECK_MPI(MPI_Comm_size(comm, &mpi_comm_size)); - global_comm->mpi_comm_size = mpi_comm_size; - global_comm->mpi_rank = mpi_rank; - global_comm->comm = comm; - assert(mapping_table != nullptr); - global_comm->mapping_table.global_rank = (int*)malloc(sizeof(int) * global_comm_size); - global_comm->mapping_table.mpi_rank = (int*)malloc(sizeof(int) * global_comm_size); - memcpy(global_comm->mapping_table.mpi_rank, mapping_table, sizeof(int) * global_comm_size); - for (int i = 0; i < global_comm_size; i++) { global_comm->mapping_table.global_rank[i] = i; } - std::pair p = mostFrequent(mapping_table, global_comm_size); - global_comm->nb_threads = p.first; - global_comm->mpi_comm_size_actual = p.second; -#else - assert(mapping_table == nullptr); - global_comm->mpi_comm_size = 1; - global_comm->mpi_comm_size_actual = 1; - global_comm->mpi_rank = 0; - if (global_comm->global_rank == 0) { - pthread_barrier_init((pthread_barrier_t*)&(thread_comms[global_comm->unique_id]->barrier), - nullptr, - global_comm->global_comm_size); - thread_comms[global_comm->unique_id]->buffers = - (const void**)malloc(sizeof(void*) * global_comm_size); - thread_comms[global_comm->unique_id]->displs = - (const int**)malloc(sizeof(int*) * global_comm_size); - for (int i = 0; i < global_comm_size; i++) { - thread_comms[global_comm->unique_id]->buffers[i] = nullptr; - thread_comms[global_comm->unique_id]->displs[i] = nullptr; - } - __sync_synchronize(); - thread_comms[global_comm->unique_id]->ready_flag = true; - } - __sync_synchronize(); - volatile ThreadComm* data = thread_comms[global_comm->unique_id]; - while (data->ready_flag != true) { data = thread_comms[global_comm->unique_id]; } - global_comm->comm = thread_comms[global_comm->unique_id]; - barrierLocal(global_comm); - assert(global_comm->comm->ready_flag == true); - assert(global_comm->comm->buffers != nullptr); - assert(global_comm->comm->displs != nullptr); - global_comm->nb_threads = global_comm->global_comm_size; -#endif - return CollSuccess; + return backend_network->comm_create( + global_comm, global_comm_size, global_rank, unique_id, mapping_table); } -int collCommDestroy(CollComm global_comm) -{ -#ifdef LEGATE_USE_NETWORK - if (global_comm->mapping_table.global_rank != nullptr) { - free(global_comm->mapping_table.global_rank); - global_comm->mapping_table.global_rank = nullptr; - } - if (global_comm->mapping_table.mpi_rank != nullptr) { - free(global_comm->mapping_table.mpi_rank); - global_comm->mapping_table.mpi_rank = nullptr; - } -#else - barrierLocal(global_comm); - if (global_comm->global_rank == 0) { - pthread_barrier_destroy((pthread_barrier_t*)&(thread_comms[global_comm->unique_id]->barrier)); - free(thread_comms[global_comm->unique_id]->buffers); - thread_comms[global_comm->unique_id]->buffers = nullptr; - free(thread_comms[global_comm->unique_id]->displs); - thread_comms[global_comm->unique_id]->displs = nullptr; - __sync_synchronize(); - thread_comms[global_comm->unique_id]->ready_flag = false; - } - __sync_synchronize(); - volatile ThreadComm* data = thread_comms[global_comm->unique_id]; - while (data->ready_flag != false) { data = thread_comms[global_comm->unique_id]; } -#endif - global_comm->status = false; - return CollSuccess; -} +int collCommDestroy(CollComm global_comm) { return backend_network->comm_destroy(global_comm); } int collAlltoallv(const void* sendbuf, const int sendcounts[], @@ -185,13 +77,8 @@ int collAlltoallv(const void* sendbuf, global_comm->mpi_comm_size, global_comm->mpi_comm_size_actual, global_comm->nb_threads); -#ifdef LEGATE_USE_NETWORK - return alltoallvMPI( - sendbuf, sendcounts, sdispls, recvbuf, recvcounts, rdispls, type, global_comm); -#else - return alltoallvLocal( + return backend_network->alltoallv( sendbuf, sendcounts, sdispls, recvbuf, recvcounts, rdispls, type, global_comm); -#endif } int collAlltoall( @@ -212,11 +99,7 @@ int collAlltoall( global_comm->mpi_comm_size, global_comm->mpi_comm_size_actual, global_comm->nb_threads); -#ifdef LEGATE_USE_NETWORK - return alltoallMPI(sendbuf, recvbuf, count, type, global_comm); -#else - return alltoallLocal(sendbuf, recvbuf, count, type, global_comm); -#endif + return backend_network->alltoall(sendbuf, recvbuf, count, type, global_comm); } int collAllgather( @@ -232,288 +115,47 @@ int collAllgather( global_comm->mpi_comm_size, global_comm->mpi_comm_size_actual, global_comm->nb_threads); -#ifdef LEGATE_USE_NETWORK - return allgatherMPI(sendbuf, recvbuf, count, type, global_comm); -#else - return allgatherLocal(sendbuf, recvbuf, count, type, global_comm); -#endif + return backend_network->allgather(sendbuf, recvbuf, count, type, global_comm); } // called from main thread int collInit(int argc, char* argv[]) { - current_unique_id = 0; #ifdef LEGATE_USE_NETWORK - int init_flag = 0; - CHECK_MPI(MPI_Initialized(&init_flag)); - if (!init_flag) { - char* network = getenv("LEGATE_NEED_NETWORK"); - int need_network = 0; - if (network != nullptr) { need_network = atoi(network); } - if (need_network) { - log_coll.fatal( - "MPI has not been initialized, it should be initialized by " - "the networking backend."); - LEGATE_ABORT; - } else { - int provided; - MPI_Init_thread(0, 0, MPI_THREAD_MULTIPLE, &provided); - self_mpi_init = true; - } - } - int mpi_thread_model; - MPI_Query_thread(&mpi_thread_model); - if (mpi_thread_model != MPI_THREAD_MULTIPLE) { - log_coll.fatal( - "MPI has been initialized by others, but is not initialized with " - "MPI_THREAD_MULTIPLE"); - LEGATE_ABORT; + char* network = getenv("LEGATE_NEED_NETWORK"); + int need_network = 0; + if (network != nullptr) { need_network = atoi(network); } + if (need_network) { + backend_network = new MPINetwork(argc, argv); + } else { + backend_network = new LocalNetwork(argc, argv); } - // check - int *tag_ub, flag; - CHECK_MPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &tag_ub, &flag)); - assert(flag); - mpi_tag_ub = *tag_ub; - assert(mpi_comms.empty()); #else - assert(thread_comms.empty()); + backend_network = new LocalNetwork(argc, argv); #endif - coll_inited = true; return CollSuccess; } int collFinalize() { - assert(coll_inited == true); - coll_inited = false; -#ifdef LEGATE_USE_NETWORK - for (MPI_Comm& mpi_comm : mpi_comms) { CHECK_MPI(MPI_Comm_free(&mpi_comm)); } - mpi_comms.clear(); - int fina_flag = 0; - CHECK_MPI(MPI_Finalized(&fina_flag)); - if (fina_flag == 1) { - log_coll.fatal("MPI should not have been finalized"); - LEGATE_ABORT; - } - if (self_mpi_init) { CHECK_MPI(MPI_Finalize()); } -#else - for (ThreadComm* thread_comm : thread_comms) { - assert(!thread_comm->ready_flag); - free(thread_comm); - } - thread_comms.clear(); -#endif + delete backend_network; return CollSuccess; } -int collGetUniqueId(int* id) -{ - *id = current_unique_id; - current_unique_id++; - return CollSuccess; -} +int collInitComm() { return backend_network->init_comm(); } -int collInitComm() -{ - int id = 0; - collGetUniqueId(&id); -#ifdef LEGATE_USE_NETWORK -#ifdef DEBUG_LEGATE - int mpi_rank; - int send_id = id; - // check if all ranks get the same unique id - CHECK_MPI(MPI_Bcast(&send_id, 1, MPI_INT, 0, MPI_COMM_WORLD)); - assert(send_id == id); -#endif - assert(mpi_comms.size() == id); - // create mpi comm - MPI_Comm mpi_comm; - CHECK_MPI(MPI_Comm_dup(MPI_COMM_WORLD, &mpi_comm)); - mpi_comms.push_back(mpi_comm); -#else - assert(thread_comms.size() == id); - // create thread comm - ThreadComm* thread_comm = (ThreadComm*)malloc(sizeof(ThreadComm)); - thread_comm->ready_flag = false; - thread_comm->buffers = nullptr; - thread_comm->displs = nullptr; - thread_comms.push_back(thread_comm); -#endif - log_coll.debug("Init comm id %d", id); - return id; -} - -#ifdef LEGATE_USE_NETWORK -static inline std::pair mostFrequent(const int* arr, int n) -{ - std::unordered_map hash; - for (int i = 0; i < n; i++) hash[arr[i]]++; - - // find the max frequency - int max_count = 0; - std::unordered_map::iterator it; - for (it = hash.begin(); it != hash.end(); it++) { - if (max_count < it->second) { max_count = it->second; } - } - - return std::make_pair(max_count, hash.size()); -} - -static inline int match2ranks(int rank1, int rank2, CollComm global_comm) -{ - // tag: seg idx + rank_idx + tag - // send_tag = sendto_global_rank * 10000 + global_rank (concat 2 ranks) - // which dst seg it sends to (in dst rank) - // recv_tag = global_rank * 10000 + recvfrom_global_rank (concat 2 ranks) - // idx of current seg we are receving (in src/my rank) - // example: - // 00 | 01 | 02 | 03 - // 10 | 11 | 12 | 13 - // 20 | 21 | 22 | 23 - // 30 | 31 | 32 | 33 - // 01's send_tag = 10, 10's recv_tag = 10, match - // 12's send_tag = 21, 21's recv_tag = 21, match - - int tag; - // old tagging system for debug - // constexpr int const max_ranks = 10000; - // tag = rank1 * max_ranks + rank2; - - // new tagging system, if crash, switch to the old one - - tag = rank1 % global_comm->nb_threads * global_comm->global_comm_size + rank2; - - // Szudzik's Function, two numbers < 32768 - // if (rank1 >= rank2) { - // tag = rank1*rank1 + rank1 + rank2; - // } else { - // tag = rank1 + rank2*rank2; - // } - - // Cantor Pairing Function, two numbers < 32768 - // tag = (rank1 + rank2) * (rank1 + rank2 + 1) / 2 + rank1; - - return tag; -} - -MPI_Datatype dtypeToMPIDtype(CollDataType dtype) -{ - switch (dtype) { - case CollDataType::CollInt8: { - return MPI_INT8_T; - } - case CollDataType::CollChar: { - return MPI_CHAR; - } - case CollDataType::CollUint8: { - return MPI_UINT8_T; - } - case CollDataType::CollInt: { - return MPI_INT; - } - case CollDataType::CollUint32: { - return MPI_UINT32_T; - } - case CollDataType::CollInt64: { - return MPI_INT64_T; - } - case CollDataType::CollUint64: { - return MPI_UINT64_T; - } - case CollDataType::CollFloat: { - return MPI_FLOAT; - } - case CollDataType::CollDouble: { - return MPI_DOUBLE; - } - default: { - log_coll.fatal("Unknown datatype"); - LEGATE_ABORT; - return MPI_BYTE; - } - } -} - -int generateAlltoallTag(int rank1, int rank2, CollComm global_comm) -{ - int tag = match2ranks(rank1, rank2, global_comm) * CollTag::MAX_TAG + CollTag::ALLTOALL_TAG; - assert(tag <= mpi_tag_ub && tag > 0); - return tag; -} - -int generateAlltoallvTag(int rank1, int rank2, CollComm global_comm) -{ - int tag = match2ranks(rank1, rank2, global_comm) * CollTag::MAX_TAG + CollTag::ALLTOALLV_TAG; - assert(tag <= mpi_tag_ub && tag > 0); - return tag; -} - -int generateBcastTag(int rank, CollComm global_comm) -{ - int tag = rank * CollTag::MAX_TAG + CollTag::BCAST_TAG; - assert(tag <= mpi_tag_ub && tag >= 0); - return tag; -} - -int generateGatherTag(int rank, CollComm global_comm) -{ - int tag = rank * CollTag::MAX_TAG + CollTag::GATHER_TAG; - assert(tag <= mpi_tag_ub && tag > 0); - return tag; -} - -#else // undef LEGATE_USE_NETWORK -size_t getDtypeSize(CollDataType dtype) -{ - switch (dtype) { - case CollDataType::CollInt8: - case CollDataType::CollChar: { - return sizeof(char); - } - case CollDataType::CollUint8: { - return sizeof(uint8_t); - } - case CollDataType::CollInt: { - return sizeof(int); - } - case CollDataType::CollUint32: { - return sizeof(uint32_t); - } - case CollDataType::CollInt64: { - return sizeof(int64_t); - } - case CollDataType::CollUint64: { - return sizeof(uint64_t); - } - case CollDataType::CollFloat: { - return sizeof(float); - } - case CollDataType::CollDouble: { - return sizeof(double); - } - default: { - log_coll.fatal("Unknown datatype"); - LEGATE_ABORT; - return 0; - } - } -} +BackendNetwork::BackendNetwork() : coll_inited(false), current_unique_id(0) {} -void resetLocalBuffer(CollComm global_comm) -{ - int global_rank = global_comm->global_rank; - global_comm->comm->buffers[global_rank] = nullptr; - global_comm->comm->displs[global_rank] = nullptr; -} +BackendNetwork::~BackendNetwork() {} -void barrierLocal(CollComm global_comm) +int BackendNetwork::collGetUniqueId(int* id) { - assert(coll_inited == true); - pthread_barrier_wait(const_cast(&(global_comm->comm->barrier))); + *id = current_unique_id; + current_unique_id++; + return CollSuccess; } -#endif -void* allocateInplaceBuffer(const void* recvbuf, size_t size) +void* BackendNetwork::allocateInplaceBuffer(const void* recvbuf, size_t size) { void* sendbuf_tmp = malloc(size); assert(sendbuf_tmp != nullptr); diff --git a/src/core/comm/coll.h b/src/core/comm/coll.h index 0efd39add..08397f53e 100644 --- a/src/core/comm/coll.h +++ b/src/core/comm/coll.h @@ -22,7 +22,8 @@ #ifdef LEGATE_USE_NETWORK #include -#else +#endif + // If we aren't building with networking, we'll use pthread_barrier to // construct a communicator for thread-local communication. Mac OS // does not implement pthread barriers, so we need to include an @@ -32,26 +33,17 @@ #if !defined(_POSIX_BARRIERS) || (_POSIX_BARRIERS < 0) #include "core/comm/pthread_barrier.h" #endif -#endif namespace legate { namespace comm { namespace coll { #ifdef LEGATE_USE_NETWORK - -#define CHECK_MPI(expr) \ - do { \ - int result = (expr); \ - check_mpi(result, __FILE__, __LINE__); \ - } while (false) - struct RankMappingTable { int* mpi_rank; int* global_rank; }; - -#else +#endif struct ThreadComm { pthread_barrier_t barrier; @@ -59,7 +51,6 @@ struct ThreadComm { const void** buffers; const int** displs; }; -#endif enum class CollDataType : int { CollInt8 = 0, @@ -78,13 +69,17 @@ enum CollStatus : int { CollError = 1, }; +enum CollCommType : int { + CollMPI = 0, + CollLocal = 1, +}; + struct Coll_Comm { #ifdef LEGATE_USE_NETWORK - MPI_Comm comm; + MPI_Comm mpi_comm; RankMappingTable mapping_table; -#else - volatile ThreadComm* comm; #endif + volatile ThreadComm* local_comm; int mpi_rank; int mpi_comm_size; int mpi_comm_size_actual; @@ -97,6 +92,151 @@ struct Coll_Comm { typedef Coll_Comm* CollComm; +class BackendNetwork { + public: + BackendNetwork(); + virtual ~BackendNetwork(); + virtual int init_comm() = 0; + + virtual int comm_create(CollComm global_comm, + int global_comm_size, + int global_rank, + int unique_id, + const int* mapping_table) = 0; + + virtual int comm_destroy(CollComm global_comm) = 0; + + virtual int alltoallv(const void* sendbuf, + const int sendcounts[], + const int sdispls[], + void* recvbuf, + const int recvcounts[], + const int rdispls[], + CollDataType type, + CollComm global_comm) = 0; + + virtual int alltoall( + const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm) = 0; + + virtual int allgather( + const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm) = 0; + + protected: + int collGetUniqueId(int* id); + + void* allocateInplaceBuffer(const void* recvbuf, size_t size); + + public: + CollCommType comm_type; + + protected: + bool coll_inited; + int current_unique_id; +}; + +#ifdef LEGATE_USE_NETWORK +class MPINetwork : public BackendNetwork { + public: + MPINetwork(int argc, char* argv[]); + + ~MPINetwork(); + + int init_comm(); + + int comm_create(CollComm global_comm, + int global_comm_size, + int global_rank, + int unique_id, + const int* mapping_table); + + int comm_destroy(CollComm global_comm); + + int alltoallv(const void* sendbuf, + const int sendcounts[], + const int sdispls[], + void* recvbuf, + const int recvcounts[], + const int rdispls[], + CollDataType type, + CollComm global_comm); + + int alltoall( + const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm); + + int allgather( + const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm); + + protected: + int gather(const void* sendbuf, + void* recvbuf, + int count, + CollDataType type, + int root, + CollComm global_comm); + + int bcast(void* buf, int count, CollDataType type, int root, CollComm global_comm); + + MPI_Datatype dtypeToMPIDtype(CollDataType dtype); + + int generateAlltoallTag(int rank1, int rank2, CollComm global_comm); + + int generateAlltoallvTag(int rank1, int rank2, CollComm global_comm); + + int generateBcastTag(int rank, CollComm global_comm); + + int generateGatherTag(int rank, CollComm global_comm); + + private: + int mpi_tag_ub; + bool self_init_mpi; + std::vector mpi_comms; +}; +#endif + +class LocalNetwork : public BackendNetwork { + public: + LocalNetwork(int argc, char* argv[]); + + ~LocalNetwork(); + + int init_comm(); + + int comm_create(CollComm global_comm, + int global_comm_size, + int global_rank, + int unique_id, + const int* mapping_table); + + int comm_destroy(CollComm global_comm); + + int alltoallv(const void* sendbuf, + const int sendcounts[], + const int sdispls[], + void* recvbuf, + const int recvcounts[], + const int rdispls[], + CollDataType type, + CollComm global_comm); + + int alltoall( + const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm); + + int allgather( + const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm); + + protected: + size_t getDtypeSize(CollDataType dtype); + + void resetLocalBuffer(CollComm global_comm); + + void barrierLocal(CollComm global_comm); + + private: + std::vector thread_comms; +}; + +extern BackendNetwork* backend_network; + int collCommCreate(CollComm global_comm, int global_comm_size, int global_rank, @@ -124,81 +264,8 @@ int collInit(int argc, char* argv[]); int collFinalize(); -int collGetUniqueId(int* id); - int collInitComm(); -// The following functions should not be called by users -#ifdef LEGATE_USE_NETWORK -int alltoallvMPI(const void* sendbuf, - const int sendcounts[], - const int sdispls[], - void* recvbuf, - const int recvcounts[], - const int rdispls[], - CollDataType type, - CollComm global_comm); - -int alltoallMPI( - const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm); - -int gatherMPI( - const void* sendbuf, void* recvbuf, int count, CollDataType type, int root, CollComm global_comm); - -int allgatherMPI( - const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm); - -int bcastMPI(void* buf, int count, CollDataType type, int root, CollComm global_comm); - -MPI_Datatype dtypeToMPIDtype(CollDataType dtype); - -int generateAlltoallTag(int rank1, int rank2, CollComm global_comm); - -int generateAlltoallvTag(int rank1, int rank2, CollComm global_comm); - -int generateBcastTag(int rank, CollComm global_comm); - -int generateGatherTag(int rank, CollComm global_comm); -#else -size_t getDtypeSize(CollDataType dtype); - -int alltoallvLocal(const void* sendbuf, - const int sendcounts[], - const int sdispls[], - void* recvbuf, - const int recvcounts[], - const int rdispls[], - CollDataType type, - CollComm global_comm); - -int alltoallLocal( - const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm); - -int allgatherLocal( - const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm); - -void resetLocalBuffer(CollComm global_comm); - -void barrierLocal(CollComm global_comm); -#endif - -void* allocateInplaceBuffer(const void* recvbuf, size_t size); - -#ifdef LEGATE_USE_NETWORK -inline void check_mpi(int error, const char* file, int line) -{ - if (error != MPI_SUCCESS) { - fprintf( - stderr, "Internal MPI failure with error code %d in file %s at line %d\n", error, file, line); -#ifdef DEBUG_LEGATE - assert(false); -#else - exit(error); -#endif - } -} -#endif - } // namespace coll } // namespace comm } // namespace legate diff --git a/src/core/comm/comm_cpu.cc b/src/core/comm/comm_cpu.cc index 161bce446..05c2f6283 100644 --- a/src/core/comm/comm_cpu.cc +++ b/src/core/comm/comm_cpu.cc @@ -33,7 +33,9 @@ static int init_cpucoll_mapping(const Legion::Task* task, Core::show_progress(task, context, runtime, task->get_task_name()); int mpi_rank = 0; #if defined(LEGATE_USE_NETWORK) - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + if (coll::backend_network->comm_type == coll::CollCommType::CollMPI) { + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + } #endif return mpi_rank; @@ -55,17 +57,20 @@ static coll::CollComm init_cpucoll(const Legion::Task* task, coll::CollComm comm = (coll::CollComm)malloc(sizeof(coll::Coll_Comm)); #ifdef LEGATE_USE_NETWORK - int* mapping_table = (int*)malloc(sizeof(int) * num_ranks); - for (int i = 0; i < num_ranks; i++) { - const int mapping_table_element = task->futures[i + 1].get_result(); - mapping_table[i] = mapping_table_element; - } - coll::collCommCreate(comm, num_ranks, point, unique_id, mapping_table); - assert(mapping_table[point] == comm->mpi_rank); - free(mapping_table); -#else - coll::collCommCreate(comm, num_ranks, point, unique_id, nullptr); + if (coll::backend_network->comm_type == coll::CollCommType::CollMPI) { + int* mapping_table = (int*)malloc(sizeof(int) * num_ranks); + for (int i = 0; i < num_ranks; i++) { + const int mapping_table_element = task->futures[i + 1].get_result(); + mapping_table[i] = mapping_table_element; + } + coll::collCommCreate(comm, num_ranks, point, unique_id, mapping_table); + assert(mapping_table[point] == comm->mpi_rank); + free(mapping_table); + } else #endif + { + coll::collCommCreate(comm, num_ranks, point, unique_id, nullptr); + } return comm; } diff --git a/src/core/comm/gather_thread_mpi.cc b/src/core/comm/gather_thread_mpi.cc deleted file mode 100644 index 4ba7fe455..000000000 --- a/src/core/comm/gather_thread_mpi.cc +++ /dev/null @@ -1,100 +0,0 @@ -/* Copyright 2022 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include -#include -#include -#include - -#include "coll.h" -#include "legion.h" - -namespace legate { -namespace comm { -namespace coll { - -using namespace Legion; -extern Logger log_coll; - -int gatherMPI( - const void* sendbuf, void* recvbuf, int count, CollDataType type, int root, CollComm global_comm) -{ - MPI_Status status; - - int total_size = global_comm->global_comm_size; - int global_rank = global_comm->global_rank; - - MPI_Datatype mpi_type = dtypeToMPIDtype(type); - - // Should not see inplace here - if (sendbuf == recvbuf) { assert(0); } - - int root_mpi_rank = global_comm->mapping_table.mpi_rank[root]; - assert(root == global_comm->mapping_table.global_rank[root]); - - int tag; - - // non-root - if (global_rank != root) { - tag = generateGatherTag(global_rank, global_comm); -#ifdef DEBUG_LEGATE - log_coll.debug("GatherMPI: non-root send global_rank %d, mpi rank %d, send to %d (%d), tag %d", - global_rank, - global_comm->mpi_rank, - root, - root_mpi_rank, - tag); -#endif - CHECK_MPI(MPI_Send(sendbuf, count, mpi_type, root_mpi_rank, tag, global_comm->comm)); - return CollSuccess; - } - - // root - MPI_Aint incr, lb, type_extent; - MPI_Type_get_extent(mpi_type, &lb, &type_extent); - incr = type_extent * static_cast(count); - char* dst = static_cast(recvbuf); - int recvfrom_mpi_rank; - for (int i = 0; i < total_size; i++) { - recvfrom_mpi_rank = global_comm->mapping_table.mpi_rank[i]; - assert(i == global_comm->mapping_table.global_rank[i]); - tag = generateGatherTag(i, global_comm); -#ifdef DEBUG_LEGATE - log_coll.debug( - "GatherMPI: root i %d === global_rank %d, mpi rank %d, recv %p, from %d (%d), tag %d", - i, - global_rank, - global_comm->mpi_rank, - dst, - i, - recvfrom_mpi_rank, - tag); -#endif - assert(dst != nullptr); - if (global_rank == i) { - memcpy(dst, sendbuf, incr); - } else { - CHECK_MPI(MPI_Recv(dst, count, mpi_type, recvfrom_mpi_rank, tag, global_comm->comm, &status)); - } - dst += incr; - } - - return CollSuccess; -} - -} // namespace coll -} // namespace comm -} // namespace legate \ No newline at end of file diff --git a/src/core/comm/local_comm.cc b/src/core/comm/local_comm.cc new file mode 100644 index 000000000..8adc4a2f3 --- /dev/null +++ b/src/core/comm/local_comm.cc @@ -0,0 +1,351 @@ +/* Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include +#include + +#include "coll.h" +#include "legate.h" +#include "legion.h" + +namespace legate { +namespace comm { +namespace coll { + +using namespace Legion; +extern Logger log_coll; + +// public functions start from here + +LocalNetwork::LocalNetwork(int argc, char* argv[]) : BackendNetwork() +{ + log_coll.debug("Enable LocalNetwork"); + assert(current_unique_id == 0); + assert(thread_comms.empty()); + BackendNetwork::coll_inited = true; + BackendNetwork::comm_type = CollCommType::CollLocal; +} + +LocalNetwork::~LocalNetwork() +{ + log_coll.debug("Finalize LocalNetwork"); + assert(BackendNetwork::coll_inited == true); + for (ThreadComm* thread_comm : thread_comms) { + assert(!thread_comm->ready_flag); + free(thread_comm); + } + thread_comms.clear(); + BackendNetwork::coll_inited = false; +} + +int LocalNetwork::comm_create(CollComm global_comm, + int global_comm_size, + int global_rank, + int unique_id, + const int* mapping_table) +{ + global_comm->global_comm_size = global_comm_size; + global_comm->global_rank = global_rank; + global_comm->status = true; + global_comm->unique_id = unique_id; + assert(mapping_table == nullptr); + global_comm->mpi_comm_size = 1; + global_comm->mpi_comm_size_actual = 1; + global_comm->mpi_rank = 0; + if (global_comm->global_rank == 0) { + pthread_barrier_init((pthread_barrier_t*)&(thread_comms[global_comm->unique_id]->barrier), + nullptr, + global_comm->global_comm_size); + thread_comms[global_comm->unique_id]->buffers = + (const void**)malloc(sizeof(void*) * global_comm_size); + thread_comms[global_comm->unique_id]->displs = + (const int**)malloc(sizeof(int*) * global_comm_size); + for (int i = 0; i < global_comm_size; i++) { + thread_comms[global_comm->unique_id]->buffers[i] = nullptr; + thread_comms[global_comm->unique_id]->displs[i] = nullptr; + } + __sync_synchronize(); + thread_comms[global_comm->unique_id]->ready_flag = true; + } + __sync_synchronize(); + volatile ThreadComm* data = thread_comms[global_comm->unique_id]; + while (data->ready_flag != true) { data = thread_comms[global_comm->unique_id]; } + global_comm->local_comm = thread_comms[global_comm->unique_id]; + barrierLocal(global_comm); + assert(global_comm->local_comm->ready_flag == true); + assert(global_comm->local_comm->buffers != nullptr); + assert(global_comm->local_comm->displs != nullptr); + global_comm->nb_threads = global_comm->global_comm_size; + return CollSuccess; +} + +int LocalNetwork::comm_destroy(CollComm global_comm) +{ + barrierLocal(global_comm); + if (global_comm->global_rank == 0) { + pthread_barrier_destroy((pthread_barrier_t*)&(thread_comms[global_comm->unique_id]->barrier)); + free(thread_comms[global_comm->unique_id]->buffers); + thread_comms[global_comm->unique_id]->buffers = nullptr; + free(thread_comms[global_comm->unique_id]->displs); + thread_comms[global_comm->unique_id]->displs = nullptr; + __sync_synchronize(); + thread_comms[global_comm->unique_id]->ready_flag = false; + } + __sync_synchronize(); + volatile ThreadComm* data = thread_comms[global_comm->unique_id]; + while (data->ready_flag != false) { data = thread_comms[global_comm->unique_id]; } + global_comm->status = false; + return CollSuccess; +} + +int LocalNetwork::init_comm() +{ + int id = 0; + collGetUniqueId(&id); + assert(thread_comms.size() == id); + // create thread comm + ThreadComm* thread_comm = (ThreadComm*)malloc(sizeof(ThreadComm)); + thread_comm->ready_flag = false; + thread_comm->buffers = nullptr; + thread_comm->displs = nullptr; + thread_comms.push_back(thread_comm); + log_coll.debug("Init comm id %d", id); + return id; +} + +int LocalNetwork::alltoallv(const void* sendbuf, + const int sendcounts[], + const int sdispls[], + void* recvbuf, + const int recvcounts[], + const int rdispls[], + CollDataType type, + CollComm global_comm) +{ + int res; + + int total_size = global_comm->global_comm_size; + int global_rank = global_comm->global_rank; + + int type_extent = getDtypeSize(type); + + global_comm->local_comm->displs[global_rank] = sdispls; + global_comm->local_comm->buffers[global_rank] = sendbuf; + __sync_synchronize(); + + int recvfrom_global_rank; + int recvfrom_seg_id = global_rank; + const void* src_base = nullptr; + const int* displs = nullptr; + for (int i = 1; i < total_size + 1; i++) { + recvfrom_global_rank = (global_rank + total_size - i) % total_size; + // wait for other threads to update the buffer address + while (global_comm->local_comm->buffers[recvfrom_global_rank] == nullptr || + global_comm->local_comm->displs[recvfrom_global_rank] == nullptr) + ; + src_base = global_comm->local_comm->buffers[recvfrom_global_rank]; + displs = global_comm->local_comm->displs[recvfrom_global_rank]; + char* src = static_cast(const_cast(src_base)) + + static_cast(displs[recvfrom_seg_id]) * type_extent; + char* dst = static_cast(recvbuf) + + static_cast(rdispls[recvfrom_global_rank]) * type_extent; +#ifdef DEBUG_LEGATE + log_coll.debug( + "AlltoallvLocal i: %d === global_rank %d, dtype %d, copy rank %d (seg %d, sdispls %d, %p) to " + "rank %d (seg " + "%d, rdispls %d, %p)", + i, + global_rank, + type_extent, + recvfrom_global_rank, + recvfrom_seg_id, + sdispls[recvfrom_seg_id], + src, + global_rank, + recvfrom_global_rank, + rdispls[recvfrom_global_rank], + dst); +#endif + memcpy(dst, src, recvcounts[recvfrom_global_rank] * type_extent); + } + + barrierLocal(global_comm); + + __sync_synchronize(); + + resetLocalBuffer(global_comm); + barrierLocal(global_comm); + + return CollSuccess; +} + +int LocalNetwork::alltoall( + const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm) +{ + int res; + + int total_size = global_comm->global_comm_size; + int global_rank = global_comm->global_rank; + + int type_extent = getDtypeSize(type); + + global_comm->local_comm->buffers[global_rank] = sendbuf; + __sync_synchronize(); + + int recvfrom_global_rank; + int recvfrom_seg_id = global_rank; + const void* src_base = nullptr; + for (int i = 1; i < total_size + 1; i++) { + recvfrom_global_rank = (global_rank + total_size - i) % total_size; + // wait for other threads to update the buffer address + while (global_comm->local_comm->buffers[recvfrom_global_rank] == nullptr) + ; + src_base = global_comm->local_comm->buffers[recvfrom_global_rank]; + char* src = static_cast(const_cast(src_base)) + + static_cast(recvfrom_seg_id) * type_extent * count; + char* dst = static_cast(recvbuf) + + static_cast(recvfrom_global_rank) * type_extent * count; +#ifdef DEBUG_LEGATE + log_coll.debug( + "AlltoallLocal i: %d === global_rank %d, dtype %d, copy rank %d (seg %d, %p) to rank %d (seg " + "%d, %p)", + i, + global_rank, + type_extent, + recvfrom_global_rank, + recvfrom_seg_id, + src, + global_rank, + recvfrom_global_rank, + dst); +#endif + memcpy(dst, src, count * type_extent); + } + + barrierLocal(global_comm); + + __sync_synchronize(); + + resetLocalBuffer(global_comm); + barrierLocal(global_comm); + + return CollSuccess; +} + +int LocalNetwork::allgather( + const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm) +{ + int total_size = global_comm->global_comm_size; + int global_rank = global_comm->global_rank; + + int type_extent = getDtypeSize(type); + + const void* sendbuf_tmp = sendbuf; + + // MPI_IN_PLACE + if (sendbuf == recvbuf) { sendbuf_tmp = allocateInplaceBuffer(recvbuf, type_extent * count); } + + global_comm->local_comm->buffers[global_rank] = sendbuf_tmp; + __sync_synchronize(); + + for (int recvfrom_global_rank = 0; recvfrom_global_rank < total_size; recvfrom_global_rank++) { + // wait for other threads to update the buffer address + while (global_comm->local_comm->buffers[recvfrom_global_rank] == nullptr) + ; + const void* src = global_comm->local_comm->buffers[recvfrom_global_rank]; + char* dst = static_cast(recvbuf) + + static_cast(recvfrom_global_rank) * type_extent * count; +#ifdef DEBUG_LEGATE + log_coll.debug( + "AllgatherLocal i: %d === global_rank %d, dtype %d, copy rank %d (%p) to rank %d (%p)", + recvfrom_global_rank, + global_rank, + type_extent, + recvfrom_global_rank, + src, + global_rank, + dst); +#endif + memcpy(dst, src, count * type_extent); + } + + barrierLocal(global_comm); + if (sendbuf == recvbuf) { free(const_cast(sendbuf_tmp)); } + + __sync_synchronize(); + + resetLocalBuffer(global_comm); + barrierLocal(global_comm); + + return CollSuccess; +} + +// protected functions start from here + +size_t LocalNetwork::getDtypeSize(CollDataType dtype) +{ + switch (dtype) { + case CollDataType::CollInt8: + case CollDataType::CollChar: { + return sizeof(char); + } + case CollDataType::CollUint8: { + return sizeof(uint8_t); + } + case CollDataType::CollInt: { + return sizeof(int); + } + case CollDataType::CollUint32: { + return sizeof(uint32_t); + } + case CollDataType::CollInt64: { + return sizeof(int64_t); + } + case CollDataType::CollUint64: { + return sizeof(uint64_t); + } + case CollDataType::CollFloat: { + return sizeof(float); + } + case CollDataType::CollDouble: { + return sizeof(double); + } + default: { + log_coll.fatal("Unknown datatype"); + LEGATE_ABORT; + return 0; + } + } +} + +void LocalNetwork::resetLocalBuffer(CollComm global_comm) +{ + int global_rank = global_comm->global_rank; + global_comm->local_comm->buffers[global_rank] = nullptr; + global_comm->local_comm->displs[global_rank] = nullptr; +} + +void LocalNetwork::barrierLocal(CollComm global_comm) +{ + assert(BackendNetwork::coll_inited == true); + pthread_barrier_wait(const_cast(&(global_comm->local_comm->barrier))); +} + +} // namespace coll +} // namespace comm +} // namespace legate \ No newline at end of file diff --git a/src/core/comm/mpi_comm.cc b/src/core/comm/mpi_comm.cc new file mode 100644 index 000000000..1761701ff --- /dev/null +++ b/src/core/comm/mpi_comm.cc @@ -0,0 +1,575 @@ +/* Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include +#include + +#include "coll.h" +#include "legate.h" +#include "legion.h" + +namespace legate { +namespace comm { +namespace coll { + +using namespace Legion; +extern Logger log_coll; + +enum CollTag : int { + BCAST_TAG = 0, + GATHER_TAG = 1, + ALLTOALL_TAG = 2, + ALLTOALLV_TAG = 3, + MAX_TAG = 10, +}; + +static inline std::pair mostFrequent(const int* arr, int n); +static inline int match2ranks(int rank1, int rank2, CollComm global_comm); + +inline void check_mpi(int error, const char* file, int line) +{ + if (error != MPI_SUCCESS) { + fprintf( + stderr, "Internal MPI failure with error code %d in file %s at line %d\n", error, file, line); +#ifdef DEBUG_LEGATE + assert(false); +#else + exit(error); +#endif + } +} + +#define CHECK_MPI(expr) \ + do { \ + int result = (expr); \ + check_mpi(result, __FILE__, __LINE__); \ + } while (false) + +// public functions start from here + +MPINetwork::MPINetwork(int argc, char* argv[]) + : BackendNetwork(), mpi_tag_ub(0), self_init_mpi(false) +{ + log_coll.debug("Enable MPINetwork"); + assert(current_unique_id == 0); + int provided, init_flag = 0; + CHECK_MPI(MPI_Initialized(&init_flag)); + if (!init_flag) { + log_coll.fatal( + "MPI has not been initialized, it should be initialized by " + "the networking backend."); + LEGATE_ABORT; + } + int mpi_thread_model; + MPI_Query_thread(&mpi_thread_model); + if (mpi_thread_model != MPI_THREAD_MULTIPLE) { + log_coll.fatal( + "MPI has been initialized by others, but is not initialized with " + "MPI_THREAD_MULTIPLE"); + LEGATE_ABORT; + } + // check + int *tag_ub, flag; + CHECK_MPI(MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &tag_ub, &flag)); + assert(flag); + mpi_tag_ub = *tag_ub; + assert(mpi_comms.empty()); + BackendNetwork::coll_inited = true; + BackendNetwork::comm_type = CollCommType::CollMPI; +} + +MPINetwork::~MPINetwork() +{ + log_coll.debug("Finalize MPINetwork"); + assert(BackendNetwork::coll_inited == true); + for (MPI_Comm& mpi_comm : mpi_comms) { CHECK_MPI(MPI_Comm_free(&mpi_comm)); } + mpi_comms.clear(); + int fina_flag = 0; + CHECK_MPI(MPI_Finalized(&fina_flag)); + if (fina_flag == 1) { + log_coll.fatal("MPI should not have been finalized"); + LEGATE_ABORT; + } + if (self_init_mpi) { + MPI_Finalize(); + printf("finalize mpi\n"); + } + BackendNetwork::coll_inited = false; +} + +int MPINetwork::init_comm() +{ + int id = 0; + collGetUniqueId(&id); +#ifdef DEBUG_LEGATE + int mpi_rank; + int send_id = id; + // check if all ranks get the same unique id + CHECK_MPI(MPI_Bcast(&send_id, 1, MPI_INT, 0, MPI_COMM_WORLD)); + assert(send_id == id); +#endif + assert(mpi_comms.size() == id); + // create mpi comm + MPI_Comm mpi_comm; + CHECK_MPI(MPI_Comm_dup(MPI_COMM_WORLD, &mpi_comm)); + mpi_comms.push_back(mpi_comm); + log_coll.debug("Init comm id %d", id); + return id; +} + +int MPINetwork::comm_create(CollComm global_comm, + int global_comm_size, + int global_rank, + int unique_id, + const int* mapping_table) +{ + global_comm->global_comm_size = global_comm_size; + global_comm->global_rank = global_rank; + global_comm->status = true; + global_comm->unique_id = unique_id; + int mpi_rank, mpi_comm_size; + int *tag_ub, flag; + int compare_result; + MPI_Comm comm = mpi_comms[unique_id]; + CHECK_MPI(MPI_Comm_compare(comm, MPI_COMM_WORLD, &compare_result)); + assert(MPI_CONGRUENT == compare_result); + + CHECK_MPI(MPI_Comm_rank(comm, &mpi_rank)); + CHECK_MPI(MPI_Comm_size(comm, &mpi_comm_size)); + global_comm->mpi_comm_size = mpi_comm_size; + global_comm->mpi_rank = mpi_rank; + global_comm->mpi_comm = comm; + assert(mapping_table != nullptr); + global_comm->mapping_table.global_rank = (int*)malloc(sizeof(int) * global_comm_size); + global_comm->mapping_table.mpi_rank = (int*)malloc(sizeof(int) * global_comm_size); + memcpy(global_comm->mapping_table.mpi_rank, mapping_table, sizeof(int) * global_comm_size); + for (int i = 0; i < global_comm_size; i++) { global_comm->mapping_table.global_rank[i] = i; } + std::pair p = mostFrequent(mapping_table, global_comm_size); + global_comm->nb_threads = p.first; + global_comm->mpi_comm_size_actual = p.second; + return CollSuccess; +} + +int MPINetwork::comm_destroy(CollComm global_comm) +{ + if (global_comm->mapping_table.global_rank != nullptr) { + free(global_comm->mapping_table.global_rank); + global_comm->mapping_table.global_rank = nullptr; + } + if (global_comm->mapping_table.mpi_rank != nullptr) { + free(global_comm->mapping_table.mpi_rank); + global_comm->mapping_table.mpi_rank = nullptr; + } + global_comm->status = false; + return CollSuccess; +} + +int MPINetwork::alltoallv(const void* sendbuf, + const int sendcounts[], + const int sdispls[], + void* recvbuf, + const int recvcounts[], + const int rdispls[], + CollDataType type, + CollComm global_comm) +{ + MPI_Status status; + + int total_size = global_comm->global_comm_size; + int global_rank = global_comm->global_rank; + + MPI_Datatype mpi_type = dtypeToMPIDtype(type); + + MPI_Aint lb, type_extent; + MPI_Type_get_extent(mpi_type, &lb, &type_extent); + + int sendto_global_rank, recvfrom_global_rank, sendto_mpi_rank, recvfrom_mpi_rank; + for (int i = 1; i < total_size + 1; i++) { + sendto_global_rank = (global_rank + i) % total_size; + recvfrom_global_rank = (global_rank + total_size - i) % total_size; + char* src = static_cast(const_cast(sendbuf)) + + static_cast(sdispls[sendto_global_rank]) * type_extent; + char* dst = static_cast(recvbuf) + + static_cast(rdispls[recvfrom_global_rank]) * type_extent; + int scount = sendcounts[sendto_global_rank]; + int rcount = recvcounts[recvfrom_global_rank]; + sendto_mpi_rank = global_comm->mapping_table.mpi_rank[sendto_global_rank]; + recvfrom_mpi_rank = global_comm->mapping_table.mpi_rank[recvfrom_global_rank]; + assert(sendto_global_rank == global_comm->mapping_table.global_rank[sendto_global_rank]); + assert(recvfrom_global_rank == global_comm->mapping_table.global_rank[recvfrom_global_rank]); + // tag: seg idx + rank_idx + tag + int send_tag = generateAlltoallvTag(sendto_global_rank, global_rank, global_comm); + int recv_tag = generateAlltoallvTag(global_rank, recvfrom_global_rank, global_comm); +#ifdef DEBUG_LEGATE + log_coll.debug( + "AlltoallvMPI i: %d === global_rank %d, mpi rank %d, send to %d (%d), send_tag %d, " + "recv from %d (%d), " + "recv_tag %d", + i, + global_rank, + global_comm->mpi_rank, + sendto_global_rank, + sendto_mpi_rank, + send_tag, + recvfrom_global_rank, + recvfrom_mpi_rank, + recv_tag); +#endif + CHECK_MPI(MPI_Sendrecv(src, + scount, + mpi_type, + sendto_mpi_rank, + send_tag, + dst, + rcount, + mpi_type, + recvfrom_mpi_rank, + recv_tag, + global_comm->mpi_comm, + &status)); + } + + return CollSuccess; +} + +int MPINetwork::alltoall( + const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm) +{ + MPI_Status status; + + int total_size = global_comm->global_comm_size; + int global_rank = global_comm->global_rank; + + MPI_Datatype mpi_type = dtypeToMPIDtype(type); + + MPI_Aint lb, type_extent; + MPI_Type_get_extent(mpi_type, &lb, &type_extent); + + int sendto_global_rank, recvfrom_global_rank, sendto_mpi_rank, recvfrom_mpi_rank; + for (int i = 1; i < total_size + 1; i++) { + sendto_global_rank = (global_rank + i) % total_size; + recvfrom_global_rank = (global_rank + total_size - i) % total_size; + char* src = static_cast(const_cast(sendbuf)) + + static_cast(sendto_global_rank) * type_extent * count; + char* dst = static_cast(recvbuf) + + static_cast(recvfrom_global_rank) * type_extent * count; + sendto_mpi_rank = global_comm->mapping_table.mpi_rank[sendto_global_rank]; + recvfrom_mpi_rank = global_comm->mapping_table.mpi_rank[recvfrom_global_rank]; + assert(sendto_global_rank == global_comm->mapping_table.global_rank[sendto_global_rank]); + assert(recvfrom_global_rank == global_comm->mapping_table.global_rank[recvfrom_global_rank]); + // tag: seg idx + rank_idx + tag + int send_tag = generateAlltoallTag(sendto_global_rank, global_rank, global_comm); + int recv_tag = generateAlltoallTag(global_rank, recvfrom_global_rank, global_comm); +#ifdef DEBUG_LEGATE + log_coll.debug( + "AlltoallMPI i: %d === global_rank %d, mpi rank %d, send to %d (%d), send_tag %d, " + "recv from %d (%d), " + "recv_tag %d", + i, + global_rank, + global_comm->mpi_rank, + sendto_global_rank, + sendto_mpi_rank, + send_tag, + recvfrom_global_rank, + recvfrom_mpi_rank, + recv_tag); +#endif + CHECK_MPI(MPI_Sendrecv(src, + count, + mpi_type, + sendto_mpi_rank, + send_tag, + dst, + count, + mpi_type, + recvfrom_mpi_rank, + recv_tag, + global_comm->mpi_comm, + &status)); + } + + return CollSuccess; +} + +int MPINetwork::allgather( + const void* sendbuf, void* recvbuf, int count, CollDataType type, CollComm global_comm) +{ + int total_size = global_comm->global_comm_size; + int global_rank = global_comm->global_rank; + + MPI_Datatype mpi_type = dtypeToMPIDtype(type); + + MPI_Aint lb, type_extent; + MPI_Type_get_extent(mpi_type, &lb, &type_extent); + + void* sendbuf_tmp = const_cast(sendbuf); + + // MPI_IN_PLACE + if (sendbuf == recvbuf) { sendbuf_tmp = allocateInplaceBuffer(recvbuf, type_extent * count); } + + gather(sendbuf_tmp, recvbuf, count, type, 0, global_comm); + + bcast(recvbuf, count * total_size, type, 0, global_comm); + + if (sendbuf == recvbuf) { free(sendbuf_tmp); } + + return CollSuccess; +} + +int MPINetwork::gather( + const void* sendbuf, void* recvbuf, int count, CollDataType type, int root, CollComm global_comm) +{ + MPI_Status status; + + int total_size = global_comm->global_comm_size; + int global_rank = global_comm->global_rank; + + MPI_Datatype mpi_type = dtypeToMPIDtype(type); + + // Should not see inplace here + if (sendbuf == recvbuf) { assert(0); } + + int root_mpi_rank = global_comm->mapping_table.mpi_rank[root]; + assert(root == global_comm->mapping_table.global_rank[root]); + + int tag; + + // non-root + if (global_rank != root) { + tag = generateGatherTag(global_rank, global_comm); +#ifdef DEBUG_LEGATE + log_coll.debug("GatherMPI: non-root send global_rank %d, mpi rank %d, send to %d (%d), tag %d", + global_rank, + global_comm->mpi_rank, + root, + root_mpi_rank, + tag); +#endif + CHECK_MPI(MPI_Send(sendbuf, count, mpi_type, root_mpi_rank, tag, global_comm->mpi_comm)); + return CollSuccess; + } + + // root + MPI_Aint incr, lb, type_extent; + MPI_Type_get_extent(mpi_type, &lb, &type_extent); + incr = type_extent * static_cast(count); + char* dst = static_cast(recvbuf); + int recvfrom_mpi_rank; + for (int i = 0; i < total_size; i++) { + recvfrom_mpi_rank = global_comm->mapping_table.mpi_rank[i]; + assert(i == global_comm->mapping_table.global_rank[i]); + tag = generateGatherTag(i, global_comm); +#ifdef DEBUG_LEGATE + log_coll.debug( + "GatherMPI: root i %d === global_rank %d, mpi rank %d, recv %p, from %d (%d), tag %d", + i, + global_rank, + global_comm->mpi_rank, + dst, + i, + recvfrom_mpi_rank, + tag); +#endif + assert(dst != nullptr); + if (global_rank == i) { + memcpy(dst, sendbuf, incr); + } else { + CHECK_MPI( + MPI_Recv(dst, count, mpi_type, recvfrom_mpi_rank, tag, global_comm->mpi_comm, &status)); + } + dst += incr; + } + + return CollSuccess; +} + +int MPINetwork::bcast(void* buf, int count, CollDataType type, int root, CollComm global_comm) +{ + int tag; + MPI_Status status; + + int total_size = global_comm->global_comm_size; + int global_rank = global_comm->global_rank; + + int root_mpi_rank = global_comm->mapping_table.mpi_rank[root]; + assert(root == global_comm->mapping_table.global_rank[root]); + + MPI_Datatype mpi_type = dtypeToMPIDtype(type); + + // non-root + if (global_rank != root) { + tag = generateBcastTag(global_rank, global_comm); +#ifdef DEBUG_LEGATE + log_coll.debug("BcastMPI: non-root recv global_rank %d, mpi rank %d, send to %d (%d), tag %d", + global_rank, + global_comm->mpi_rank, + root, + root_mpi_rank, + tag); +#endif + CHECK_MPI(MPI_Recv(buf, count, mpi_type, root_mpi_rank, tag, global_comm->mpi_comm, &status)); + return CollSuccess; + } + + // root + int sendto_mpi_rank; + for (int i = 0; i < total_size; i++) { + sendto_mpi_rank = global_comm->mapping_table.mpi_rank[i]; + assert(i == global_comm->mapping_table.global_rank[i]); + tag = generateBcastTag(i, global_comm); +#ifdef DEBUG_LEGATE + log_coll.debug("BcastMPI: root i %d === global_rank %d, mpi rank %d, send to %d (%d), tag %d", + i, + global_rank, + global_comm->mpi_rank, + i, + sendto_mpi_rank, + tag); +#endif + if (global_rank != i) { + CHECK_MPI(MPI_Send(buf, count, mpi_type, sendto_mpi_rank, tag, global_comm->mpi_comm)); + } + } + + return CollSuccess; +} + +static inline std::pair mostFrequent(const int* arr, int n) +{ + std::unordered_map hash; + for (int i = 0; i < n; i++) hash[arr[i]]++; + + // find the max frequency + int max_count = 0; + std::unordered_map::iterator it; + for (it = hash.begin(); it != hash.end(); it++) { + if (max_count < it->second) { max_count = it->second; } + } + + return std::make_pair(max_count, hash.size()); +} + +static inline int match2ranks(int rank1, int rank2, CollComm global_comm) +{ + // tag: seg idx + rank_idx + tag + // send_tag = sendto_global_rank * 10000 + global_rank (concat 2 ranks) + // which dst seg it sends to (in dst rank) + // recv_tag = global_rank * 10000 + recvfrom_global_rank (concat 2 ranks) + // idx of current seg we are receving (in src/my rank) + // example: + // 00 | 01 | 02 | 03 + // 10 | 11 | 12 | 13 + // 20 | 21 | 22 | 23 + // 30 | 31 | 32 | 33 + // 01's send_tag = 10, 10's recv_tag = 10, match + // 12's send_tag = 21, 21's recv_tag = 21, match + + int tag; + // old tagging system for debug + // constexpr int const max_ranks = 10000; + // tag = rank1 * max_ranks + rank2; + + // new tagging system, if crash, switch to the old one + + tag = rank1 % global_comm->nb_threads * global_comm->global_comm_size + rank2; + + // Szudzik's Function, two numbers < 32768 + // if (rank1 >= rank2) { + // tag = rank1*rank1 + rank1 + rank2; + // } else { + // tag = rank1 + rank2*rank2; + // } + + // Cantor Pairing Function, two numbers < 32768 + // tag = (rank1 + rank2) * (rank1 + rank2 + 1) / 2 + rank1; + + return tag; +} + +// protected functions start from here + +MPI_Datatype MPINetwork::dtypeToMPIDtype(CollDataType dtype) +{ + switch (dtype) { + case CollDataType::CollInt8: { + return MPI_INT8_T; + } + case CollDataType::CollChar: { + return MPI_CHAR; + } + case CollDataType::CollUint8: { + return MPI_UINT8_T; + } + case CollDataType::CollInt: { + return MPI_INT; + } + case CollDataType::CollUint32: { + return MPI_UINT32_T; + } + case CollDataType::CollInt64: { + return MPI_INT64_T; + } + case CollDataType::CollUint64: { + return MPI_UINT64_T; + } + case CollDataType::CollFloat: { + return MPI_FLOAT; + } + case CollDataType::CollDouble: { + return MPI_DOUBLE; + } + default: { + log_coll.fatal("Unknown datatype"); + LEGATE_ABORT; + return MPI_BYTE; + } + } +} + +int MPINetwork::generateAlltoallTag(int rank1, int rank2, CollComm global_comm) +{ + int tag = match2ranks(rank1, rank2, global_comm) * CollTag::MAX_TAG + CollTag::ALLTOALL_TAG; + assert(tag <= mpi_tag_ub && tag > 0); + return tag; +} + +int MPINetwork::generateAlltoallvTag(int rank1, int rank2, CollComm global_comm) +{ + int tag = match2ranks(rank1, rank2, global_comm) * CollTag::MAX_TAG + CollTag::ALLTOALLV_TAG; + assert(tag <= mpi_tag_ub && tag > 0); + return tag; +} + +int MPINetwork::generateBcastTag(int rank, CollComm global_comm) +{ + int tag = rank * CollTag::MAX_TAG + CollTag::BCAST_TAG; + assert(tag <= mpi_tag_ub && tag >= 0); + return tag; +} + +int MPINetwork::generateGatherTag(int rank, CollComm global_comm) +{ + int tag = rank * CollTag::MAX_TAG + CollTag::GATHER_TAG; + assert(tag <= mpi_tag_ub && tag > 0); + return tag; +} + +} // namespace coll +} // namespace comm +} // namespace legate \ No newline at end of file From 4d5417e6295464d867a5a2ba8f1ea7dd5bfd4fae Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Thu, 17 Nov 2022 12:35:11 -0800 Subject: [PATCH 061/121] Conda env script fixes (#481) * Disallow cmake 3.25.0 * CUDAConfig.ctk can be "none" but not None --- scripts/generate-conda-envs.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/generate-conda-envs.py b/scripts/generate-conda-envs.py index a5cd426ee..494b408ba 100755 --- a/scripts/generate-conda-envs.py +++ b/scripts/generate-conda-envs.py @@ -48,13 +48,13 @@ def format(self, kind: str) -> str: @dataclass(frozen=True) class CUDAConfig(SectionConfig): - ctk_version: str | None + ctk_version: str header = "cuda" @property def conda(self) -> Reqs: - if self.ctk_version is None: + if self.ctk_version == "none": return () return ( @@ -81,7 +81,8 @@ class BuildConfig(SectionConfig): @property def conda(self) -> Reqs: pkgs = ( - "cmake>=3.24", + # 3.25.0 triggers gitlab.kitware.com/cmake/cmake/-/issues/24119 + "cmake>=3.24,!=3.25.0", "git", "make", "scikit-build>=0.13.1", @@ -166,7 +167,7 @@ class EnvConfig: use: str python: str os: OSType - ctk: str | None + ctk: str compilers: bool openmpi: bool From 8f6fc0697418a3719814e9f9c6842746b5c5b7ec Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Thu, 17 Nov 2022 13:41:45 -0800 Subject: [PATCH 062/121] Refactoring changes (#478) * Start assigning unique ids to stores and storages * Move partition caches to the runtime * Two mapper changes * Start using a special Legate functor for identity projections * Refactor the core mapper's slice_task using dispatch * Make constructors fetch the unique ids instead of passing them as arguments * Stop using offsets in linearization --- legate/core/partition.py | 11 ++-- legate/core/runtime.py | 103 ++++++++++++++++++++++++++------ legate/core/store.py | 74 ++++++++++++----------- src/core/mapping/base_mapper.cc | 71 +++++----------------- src/core/mapping/base_mapper.h | 4 -- src/core/mapping/core_mapper.cc | 91 ++++++++++------------------ src/core/runtime/projection.cc | 36 +++++++---- 7 files changed, 196 insertions(+), 194 deletions(-) diff --git a/legate/core/partition.py b/legate/core/partition.py index 162e7fb6a..be243911d 100644 --- a/legate/core/partition.py +++ b/legate/core/partition.py @@ -38,6 +38,9 @@ RequirementType = Union[Type[Broadcast], Type[Partition]] +part_mgr = runtime.partition_manager + + class PartitionBase(ABC): @abstractproperty def color_shape(self) -> Optional[Shape]: @@ -295,7 +298,7 @@ def construct( self, region: Region, complete: bool = False ) -> Optional[LegionPartition]: index_space = region.index_space - index_partition = runtime.find_partition(index_space, self) + index_partition = part_mgr.find_index_partition(index_space, self) if index_partition is None: tile_shape = self._tile_shape transform = Transform(tile_shape.ndim, tile_shape.ndim) @@ -322,7 +325,7 @@ def construct( kind=kind, keep=True, # export this partition functor to other libraries ) - runtime.record_partition(index_space, self, index_partition) + part_mgr.record_index_partition(index_space, self, index_partition) return region.get_child(index_partition) @@ -406,7 +409,7 @@ def construct( assert complete index_space = region.index_space - index_partition = runtime.find_partition(index_space, self) + index_partition = part_mgr.find_index_partition(index_space, self) if index_partition is None: color_space = runtime.find_or_create_index_space(self._color_shape) functor = PartitionByWeights(self._weights) @@ -420,5 +423,5 @@ def construct( kind=kind, keep=True, # export this partition functor to other libraries ) - runtime.record_partition(index_space, self, index_partition) + part_mgr.record_index_partition(index_space, self, index_partition) return region.get_child(index_partition) diff --git a/legate/core/runtime.py b/legate/core/runtime.py index 56c106471..3b6c5ddec 100644 --- a/legate/core/runtime.py +++ b/legate/core/runtime.py @@ -52,7 +52,11 @@ if TYPE_CHECKING: from . import ArgumentMap, Detach, IndexDetach, IndexPartition, Library - from ._legion import FieldListLike, PhysicalRegion + from ._legion import ( + FieldListLike, + PhysicalRegion, + Partition as LegionPartition, + ) from .communicator import Communicator from .context import Context from .corelib import CoreLib @@ -619,6 +623,12 @@ def __init__(self, runtime: Runtime) -> None: self._index_partitions: dict[ tuple[IndexSpace, PartitionBase], IndexPartition ] = {} + # Maps storage id-partition pairs to Legion partitions + self._legion_partitions: dict[ + tuple[int, PartitionBase], Union[None, LegionPartition] + ] = {} + self._storage_key_partitions: dict[int, PartitionBase] = {} + self._store_key_partitions: dict[int, PartitionBase] = {} def compute_launch_shape( self, store: Store, restrictions: tuple[Restriction, ...] @@ -815,13 +825,13 @@ def use_complete_tiling(self, shape: Shape, tile_shape: Shape) -> bool: num_tiles = (shape // tile_shape).volume() return not (num_tiles > 256 and num_tiles > 16 * self._num_pieces) - def find_partition( + def find_index_partition( self, index_space: IndexSpace, functor: PartitionBase ) -> Union[IndexPartition, None]: key = (index_space, functor) return self._index_partitions.get(key) - def record_partition( + def record_index_partition( self, index_space: IndexSpace, functor: PartitionBase, @@ -831,6 +841,59 @@ def record_partition( assert key not in self._index_partitions self._index_partitions[key] = index_partition + def find_store_key_partition( + self, store_id: int, restrictions: tuple[Restriction, ...] + ) -> Union[None, PartitionBase]: + partition = self._store_key_partitions.get(store_id) + if partition is not None and not partition.satisfies_restriction( + restrictions + ): + partition = None + return partition + + def record_store_key_partition( + self, store_id: int, key_partition: PartitionBase + ) -> None: + self._store_key_partitions[store_id] = key_partition + + def reset_store_key_partition(self, store_id: int) -> None: + del self._store_key_partitions[store_id] + + def find_storage_key_partition( + self, storage_id: int, restrictions: tuple[Restriction, ...] + ) -> Union[None, PartitionBase]: + partition = self._storage_key_partitions.get(storage_id) + if partition is not None and not partition.satisfies_restriction( + restrictions + ): + partition = None + return partition + + def record_storage_key_partition( + self, storage_id: int, key_partition: PartitionBase + ) -> None: + self._storage_key_partitions[storage_id] = key_partition + + def reset_storage_key_partition(self, storage_id: int) -> None: + del self._storage_key_partitions[storage_id] + + def find_legion_partition( + self, storage_id: int, functor: PartitionBase + ) -> tuple[Optional[LegionPartition], bool]: + key = (storage_id, functor) + found = key in self._legion_partitions + part = self._legion_partitions.get(key) + return part, found + + def record_legion_partition( + self, + storage_id: int, + functor: PartitionBase, + legion_partition: Optional[LegionPartition], + ) -> None: + key = (storage_id, functor) + self._legion_partitions[key] = legion_partition + class CommunicatorManager: def __init__(self, runtime: Runtime) -> None: @@ -904,6 +967,9 @@ def __init__(self, core_library: CoreLib) -> None: ty.uint32, ) + self._next_store_id = 0 + self._next_storage_id = 0 + self._barriers: List[legion.legion_phase_barrier_t] = [] self.nccl_needs_barrier = bool( self._core_context.get_tunable( @@ -1138,6 +1204,14 @@ def get_unique_op_id(self) -> int: self._unique_op_id += 1 return op_id + def get_next_store_id(self) -> int: + self._next_store_id += 1 + return self._next_store_id + + def get_next_storage_id(self) -> int: + self._next_storage_id += 1 + return self._next_storage_id + def dispatch(self, op: Dispatchable[T]) -> T: self._attachment_manager.perform_detachments() self._attachment_manager.prune_detachments() @@ -1291,7 +1365,13 @@ def create_store( sanitized_shape = shape transform = None - storage = Storage(sanitized_shape, 0, dtype, data=data, kind=kind) + storage = Storage( + sanitized_shape, + 0, + dtype, + data=data, + kind=kind, + ) return Store( dtype, storage, @@ -1456,21 +1536,6 @@ def create_region( handle, ) - def find_partition( - self, index_space: IndexSpace, functor: PartitionBase - ) -> Union[IndexPartition, None]: - return self._partition_manager.find_partition(index_space, functor) - - def record_partition( - self, - index_space: IndexSpace, - functor: PartitionBase, - index_partition: IndexPartition, - ) -> None: - self._partition_manager.record_partition( - index_space, functor, index_partition - ) - def extract_scalar(self, future: Future, idx: int) -> Future: from .launcher import TaskLauncher diff --git a/legate/core/store.py b/legate/core/store.py index 7b09516f1..be7e44ea1 100644 --- a/legate/core/store.py +++ b/legate/core/store.py @@ -511,6 +511,7 @@ def __init__( ) assert not isinstance(data, Future) or parent is None assert parent is None or color is not None + self._unique_id = runtime.get_next_storage_id() self._extents = extents self._offsets = offsets self._level = level @@ -519,8 +520,6 @@ def __init__( self._kind = kind self._parent = parent self._color = color - self._partitions: dict[PartitionBase, Optional[LegionPartition]] = {} - self._key_partition: Union[None, PartitionBase] = None if self._offsets is None and self._extents is not None: self._offsets = Shape((0,) * self._extents.ndim) @@ -738,21 +737,20 @@ def get_inline_allocation( def find_key_partition( self, restrictions: tuple[Restriction, ...] ) -> Optional[PartitionBase]: - if ( - self._key_partition is not None - and self._key_partition.satisfies_restriction(restrictions) - ): - return self._key_partition - elif self._parent is not None: - return self._parent.find_key_partition(restrictions) - else: - return None + partition = partition_manager.find_storage_key_partition( + self._unique_id, restrictions + ) + if partition is None and self._parent is not None: + partition = self._parent.find_key_partition(restrictions) + return partition def set_key_partition(self, partition: PartitionBase) -> None: - self._key_partition = partition + partition_manager.record_storage_key_partition( + self._unique_id, partition + ) def reset_key_partition(self) -> None: - self._key_partition = None + partition_manager.reset_storage_key_partition(self._unique_id) def find_or_create_legion_partition( self, functor: PartitionBase, complete: bool @@ -762,12 +760,14 @@ def find_or_create_legion_partition( assert isinstance(self.data, RegionField) - if functor in self._partitions: - return self._partitions[functor] - - part = functor.construct(self.data.region, complete=complete) - self._partitions[functor] = part - + part, found = partition_manager.find_legion_partition( + self._unique_id, functor + ) + if not found: + part = functor.construct(self.data.region, complete=complete) + partition_manager.record_legion_partition( + self._unique_id, functor, part + ) return part @@ -862,12 +862,12 @@ def __init__( else: sanitized_transform = identity assert isinstance(shape, Shape) or shape is None + self._unique_id = runtime.get_next_store_id() self._shape = shape self._ndim = ndim self._dtype = dtype self._storage = storage self._transform: TransformStackBase = sanitized_transform - self._key_partition: Union[None, PartitionBase] = None # This is a cache for the projection functor id # when no custom functor is given self._projection: Union[None, int] = None @@ -1022,6 +1022,7 @@ def invert_partition(self, partition: PartitionBase) -> PartitionBase: def __str__(self) -> str: return ( f"Store(" + f"id: {self._unique_id}, " f"shape: {self._shape}, " f"ndim: {self._ndim}, " f"type: {self._dtype}, " @@ -1220,23 +1221,24 @@ def get_key_partition(self) -> Optional[PartitionBase]: # registered correctly runtime.flush_scheduling_window() - restrictions = self.find_restrictions() - - if ( - self._key_partition is not None - and self._key_partition.satisfies_restriction(restrictions) - ): - return self._key_partition - - return None + return partition_manager.find_store_key_partition( + self._unique_id, self.find_restrictions() + ) def has_key_partition(self, restrictions: tuple[Restriction, ...]) -> bool: + key_partition = partition_manager.find_store_key_partition( + self._unique_id, restrictions + ) + if key_partition is not None: + return True restrictions = self._transform.invert_restrictions(restrictions) part = self._storage.find_key_partition(restrictions) return (part is not None) and (part.even or self._transform.bottom) def set_key_partition(self, partition: PartitionBase) -> None: - self._key_partition = partition + partition_manager.record_store_key_partition( + self._unique_id, partition + ) # We also update the storage's key partition for other stores # sharing the same storage self._storage.set_key_partition( @@ -1244,16 +1246,16 @@ def set_key_partition(self, partition: PartitionBase) -> None: ) def reset_key_partition(self) -> None: - self._storage.reset_key_partition() + partition_manager.reset_store_key_partition(self._unique_id) def compute_key_partition( self, restrictions: tuple[Restriction, ...] ) -> PartitionBase: - if ( - self._key_partition is not None - and self._key_partition.satisfies_restriction(restrictions) - ): - return self._key_partition + key_partition = partition_manager.find_store_key_partition( + self._unique_id, restrictions + ) + if key_partition is not None: + return key_partition # If this is effectively a scalar store, we don't need to partition it if self.kind is Future or self.ndim == 0: diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index dcc393023..2f5f5788d 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -235,12 +235,13 @@ void BaseMapper::slice_auto_task(const MapperContext ctx, const SliceTaskInput& input, SliceTaskOutput& output) { - LegateProjectionFunctor* key_functor = nullptr; + ProjectionID projection = 0; for (auto& req : task.regions) if (req.tag == LEGATE_CORE_KEY_STORE_TAG) { - key_functor = find_legate_projection_functor(req.projection); + projection = req.projection; break; } + auto key_functor = find_legate_projection_functor(projection); // For multi-node cases we should already have been sharded so we // should just have one or a few points here on this node, so iterate @@ -253,23 +254,13 @@ void BaseMapper::slice_auto_task(const MapperContext ctx, sharding_domain = runtime->get_index_space_domain(ctx, task.sharding_space); auto round_robin = [&](auto& procs) { - if (nullptr != key_functor) { - auto lo = key_functor->project_point(sharding_domain.lo(), sharding_domain); - auto hi = key_functor->project_point(sharding_domain.hi(), sharding_domain); - for (Domain::DomainPointIterator itr(input.domain); itr; itr++) { - auto p = key_functor->project_point(itr.p, sharding_domain); - auto idx = linearize(lo, hi, p); - output.slices.push_back(TaskSlice( - Domain(itr.p, itr.p), procs[idx % procs.size()], false /*recurse*/, false /*stealable*/)); - } - } else { - auto lo = sharding_domain.lo(); - auto hi = sharding_domain.hi(); - for (Domain::DomainPointIterator itr(input.domain); itr; itr++) { - auto idx = linearize(lo, hi, itr.p); - output.slices.push_back(TaskSlice( - Domain(itr.p, itr.p), procs[idx % procs.size()], false /*recurse*/, false /*stealable*/)); - } + auto lo = key_functor->project_point(sharding_domain.lo(), sharding_domain); + auto hi = key_functor->project_point(sharding_domain.hi(), sharding_domain); + for (Domain::DomainPointIterator itr(input.domain); itr; itr++) { + auto p = key_functor->project_point(itr.p, sharding_domain); + auto idx = linearize(lo, hi, p); + output.slices.push_back(TaskSlice( + Domain(itr.p, itr.p), procs[idx % procs.size()], false /*recurse*/, false /*stealable*/)); } }; @@ -350,33 +341,6 @@ void BaseMapper::slice_manual_task(const MapperContext ctx, dispatch(task.target_proc.kind(), distribute); } -void BaseMapper::slice_round_robin_task(const MapperContext ctx, - const LegionTask& task, - const SliceTaskInput& input, - SliceTaskOutput& output) -{ - // If we're here, that means that the task has no region that we can key off - // to distribute them reasonably. In this case, we just do a round-robin - // assignment. - - output.slices.reserve(input.domain.get_volume()); - - // Get the domain for the sharding space also - Domain sharding_domain = task.index_domain; - if (task.sharding_space.exists()) - sharding_domain = runtime->get_index_space_domain(ctx, task.sharding_space); - - auto distribute = [&](auto& procs) { - size_t idx = 0; - for (Domain::DomainPointIterator itr(input.domain); itr; itr++) { - output.slices.push_back(TaskSlice( - Domain(itr.p, itr.p), procs[idx++ % procs.size()], false /*recurse*/, false /*stealable*/)); - } - }; - - dispatch(task.target_proc.kind(), distribute); -} - void BaseMapper::slice_task(const MapperContext ctx, const LegionTask& task, const SliceTaskInput& input, @@ -384,8 +348,6 @@ void BaseMapper::slice_task(const MapperContext ctx, { if (task.tag == LEGATE_CORE_MANUAL_PARALLEL_LAUNCH_TAG) slice_manual_task(ctx, task, input, output); - else if (task.regions.size() == 0) - slice_round_robin_task(ctx, task, input, output); else slice_auto_task(ctx, task, input, output); } @@ -1073,15 +1035,10 @@ void BaseMapper::map_copy(const MapperContext ctx, // in which case we should find the key store and use its projection functor // for the linearization auto* key_functor = find_legate_projection_functor(0); - - if (key_functor != nullptr) { - auto lo = key_functor->project_point(sharding_domain.lo(), sharding_domain); - auto hi = key_functor->project_point(sharding_domain.hi(), sharding_domain); - auto p = key_functor->project_point(copy.index_point, sharding_domain); - proc_id = linearize(lo, hi, p); - } else { - proc_id = linearize(sharding_domain.lo(), sharding_domain.hi(), copy.index_point); - } + auto lo = key_functor->project_point(sharding_domain.lo(), sharding_domain); + auto hi = key_functor->project_point(sharding_domain.hi(), sharding_domain); + auto p = key_functor->project_point(copy.index_point, sharding_domain); + proc_id = linearize(lo, hi, p); } if (!local_gpus.empty()) target_proc = local_gpus[proc_id % local_gpus.size()]; diff --git a/src/core/mapping/base_mapper.h b/src/core/mapping/base_mapper.h index 850427b6d..17fdb2045 100644 --- a/src/core/mapping/base_mapper.h +++ b/src/core/mapping/base_mapper.h @@ -330,10 +330,6 @@ class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { const Legion::Task& task, const SliceTaskInput& input, SliceTaskOutput& output); - void slice_round_robin_task(const Legion::Mapping::MapperContext ctx, - const Legion::Task& task, - const SliceTaskInput& input, - SliceTaskOutput& output); protected: Legion::ShardingID find_sharding_functor_by_key_store_projection( diff --git a/src/core/mapping/core_mapper.cc b/src/core/mapping/core_mapper.cc index da3f7414b..ccc738983 100644 --- a/src/core/mapping/core_mapper.cc +++ b/src/core/mapping/core_mapper.cc @@ -23,6 +23,7 @@ #include "core/comm/comm_nccl.h" #endif #include "core/task/task.h" +#include "core/utilities/linearize.h" namespace legate { @@ -94,6 +95,20 @@ class CoreMapper : public Legion::Mapping::NullMapper { const SelectTunableInput& input, SelectTunableOutput& output); + protected: + template + decltype(auto) dispatch(Legion::Processor::Kind kind, Functor functor) + { + switch (kind) { + case Legion::Processor::LOC_PROC: return functor(local_cpus); + case Legion::Processor::TOC_PROC: return functor(local_gpus); + case Legion::Processor::OMP_PROC: return functor(local_omps); + default: LEGATE_ABORT; + } + assert(false); + return functor(local_cpus); + } + public: const AddressSpace local_node; const size_t total_nodes; @@ -258,68 +273,22 @@ void CoreMapper::slice_task(const MapperContext ctx, { assert(context.valid_task_id(task.task_id)); output.slices.reserve(input.domain.get_volume()); - // Check to see if we're control replicated or not. If we are then - // we'll already have been sharded. - Machine::ProcessorQuery all_procs(machine); - all_procs.only_kind(task.target_proc.kind()); - if (all_procs.count() == input.domain.get_volume()) { - Machine::ProcessorQuery::iterator pit = all_procs.begin(); - for (Domain::DomainPointIterator itr(input.domain); itr; itr++, pit++) - output.slices.push_back( - TaskSlice(Domain(itr.p, itr.p), *pit, false /*recurse*/, false /*stealable*/)); - } else { - // Control-replicated because we've already been sharded - Domain sharding_domain = task.index_domain; - if (task.sharding_space.exists()) - sharding_domain = runtime->get_index_space_domain(ctx, task.sharding_space); - assert(sharding_domain.get_dim() == 1); - assert(input.domain.get_dim() == 1); - const Rect<1> space = sharding_domain; - const Rect<1> local = input.domain; - const size_t size = (space.hi[0] - space.lo[0]) + 1; - // Assume that if we're control replicated there is one shard per space - const coord_t chunk = (size + total_nodes - 1) / total_nodes; - const coord_t start = local_node * chunk + space.lo[0]; - switch (task.target_proc.kind()) { - case Processor::LOC_PROC: { - for (Domain::DomainPointIterator itr(input.domain); itr; itr++) { - const Point<1> point = itr.p; - assert(point[0] >= start); - assert(point[0] < (start + chunk)); - const unsigned local_index = point[0] - start; - assert(local_index < local_cpus.size()); - output.slices.push_back(TaskSlice( - Domain(itr.p, itr.p), local_cpus[local_index], false /*recurse*/, false /*stealable*/)); - } - break; - } - case Processor::TOC_PROC: { - for (Domain::DomainPointIterator itr(input.domain); itr; itr++) { - const Point<1> point = itr.p; - assert(point[0] >= start); - assert(point[0] < (start + chunk)); - const unsigned local_index = point[0] - start; - assert(local_index < local_gpus.size()); - output.slices.push_back(TaskSlice( - Domain(itr.p, itr.p), local_gpus[local_index], false /*recurse*/, false /*stealable*/)); - } - break; - } - case Processor::OMP_PROC: { - for (Domain::DomainPointIterator itr(input.domain); itr; itr++) { - const Point<1> point = itr.p; - assert(point[0] >= start); - assert(point[0] < (start + chunk)); - const unsigned local_index = point[0] - start; - assert(local_index < local_omps.size()); - output.slices.push_back(TaskSlice( - Domain(itr.p, itr.p), local_omps[local_index], false /*recurse*/, false /*stealable*/)); - } - break; - } - default: LEGATE_ABORT; + + Domain sharding_domain = task.index_domain; + if (task.sharding_space.exists()) + sharding_domain = runtime->get_index_space_domain(ctx, task.sharding_space); + + auto round_robin = [&](auto& procs) { + auto lo = sharding_domain.lo(); + auto hi = sharding_domain.hi(); + for (Domain::DomainPointIterator itr(input.domain); itr; itr++) { + auto idx = linearize(lo, hi, itr.p); + output.slices.push_back(TaskSlice( + Domain(itr.p, itr.p), procs[idx % procs.size()], false /*recurse*/, false /*stealable*/)); } - } + }; + + dispatch(task.target_proc.kind(), round_robin); } void CoreMapper::map_task(const MapperContext ctx, diff --git a/src/core/runtime/projection.cc b/src/core/runtime/projection.cc index 1fde46ea0..5b5809866 100644 --- a/src/core/runtime/projection.cc +++ b/src/core/runtime/projection.cc @@ -74,14 +74,6 @@ LogicalRegion DelinearizationFunctor::project(LogicalPartition upper_bound, return LogicalRegion::NO_REGION; } -void register_legate_core_projection_functors(Legion::Runtime* runtime, - const LibraryContext& context) -{ - auto proj_id = context.get_projection_id(LEGATE_CORE_DELINEARIZE_PROJ_ID); - auto functor = new DelinearizationFunctor(runtime); - runtime->register_projection_functor(proj_id, functor, true /*silence warnings*/); -} - LegateProjectionFunctor::LegateProjectionFunctor(Runtime* rt) : ProjectionFunctor(rt) {} LogicalRegion LegateProjectionFunctor::project(LogicalPartition upper_bound, @@ -101,8 +93,7 @@ class AffineFunctor : public LegateProjectionFunctor { AffineFunctor(Runtime* runtime, int32_t* dims, int32_t* weights, int32_t* offsets); public: - virtual DomainPoint project_point(const DomainPoint& point, - const Domain& launch_domain) const override + DomainPoint project_point(const DomainPoint& point, const Domain& launch_domain) const override { return DomainPoint(transform_ * Point(point) + offsets_); } @@ -142,8 +133,17 @@ template return transform; } -static std::unordered_map functor_table; -static std::mutex functor_table_lock; +struct IdentityFunctor : public LegateProjectionFunctor { + IdentityFunctor(Runtime* runtime) : LegateProjectionFunctor(runtime) {} + DomainPoint project_point(const DomainPoint& point, const Domain&) const override + { + return point; + } +}; + +static LegateProjectionFunctor* identity_functor{nullptr}; +static std::unordered_map functor_table{}; +static std::mutex functor_table_lock{}; struct create_affine_functor_fn { template @@ -158,9 +158,19 @@ struct create_affine_functor_fn { } }; +void register_legate_core_projection_functors(Legion::Runtime* runtime, + const LibraryContext& context) +{ + auto proj_id = context.get_projection_id(LEGATE_CORE_DELINEARIZE_PROJ_ID); + auto functor = new DelinearizationFunctor(runtime); + runtime->register_projection_functor(proj_id, functor, true /*silence warnings*/); + + identity_functor = new IdentityFunctor(runtime); +} + LegateProjectionFunctor* find_legate_projection_functor(ProjectionID proj_id) { - if (0 == proj_id) return nullptr; + if (0 == proj_id) return identity_functor; const std::lock_guard lock(functor_table_lock); return functor_table[proj_id]; } From c3cc9fb58fd3491bdf4fc9b1fede086f72cdfcd4 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Thu, 17 Nov 2022 17:41:43 -0800 Subject: [PATCH 063/121] Cycle detection check (#361) * WIP on cycle detection check * Don't consider cycles going through sys.modules * Return True if cycles are found * Remove some dead code * Add -legate:cycle-check flag, to perform cycle check at exit * Print out list indices on cycle listings * Ignore getset_descriptor objects in cycle check They seem to be triggering false positives * Instead just skip the __globals__ pointer * Skip based on ModuleType, vs inclusion sys.modules * Print name for functions and class objects * Ignore cycles going through types Self-type references and class.__init__.__closure__ seem to produce a lot of false positives. * Look for ny Store, not just RegionField * Filter for more specific Legion handle classes * Add unit test for cycle checker * Restrict to RegionField's, as some Futures naturally leak * Update documentation Co-authored-by: Manolis Papadakis --- legate/core/cycle_detector.py | 153 ++++++++++++++++++++++++++ legate/core/runtime.py | 45 +++++++- tests/unit/legate/test_cycle_check.py | 47 ++++++++ 3 files changed, 239 insertions(+), 6 deletions(-) create mode 100644 legate/core/cycle_detector.py create mode 100644 tests/unit/legate/test_cycle_check.py diff --git a/legate/core/cycle_detector.py b/legate/core/cycle_detector.py new file mode 100644 index 000000000..a90a6531a --- /dev/null +++ b/legate/core/cycle_detector.py @@ -0,0 +1,153 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import gc +import inspect +from collections import deque +from types import FunctionType, ModuleType +from typing import Any, Set, Union + + +def _skip(src: Any, dst: Any) -> bool: + return ( + isinstance(src, type) + or isinstance(src, ModuleType) + or isinstance(src, FunctionType) + and hasattr(src, "__globals__") + and src.__globals__ is dst + ) + + +def _find_cycles(root: Any, all_ids: Set[int]) -> bool: + opened: dict[int, int] = {} + closed: Set[int] = set() + stack = [root] + while len(stack) > 0: + dst = stack[-1] + if id(dst) in opened: + if opened[id(dst)] == len(stack): + del opened[id(dst)] + closed.add(id(dst)) + else: + print("found cycle!") + print(" tail:") + _bfs(dst, root, all_ids) + print(" cycle:") + _bfs(dst, dst, all_ids) + return True + stack.pop() + elif id(dst) in closed: + stack.pop() + else: + opened[id(dst)] = len(stack) + for src in gc.get_referrers(dst): + if id(src) in all_ids and not _skip(src, dst): + stack.append(src) + return False + + +def _find_field(src: Any, dst: Any) -> Union[str, None]: + if type(src) == dict: + for k, v in src.items(): + if v is dst and isinstance(k, str): + return f'["{k}"]' + if type(src) == tuple: + for k, v in enumerate(src): + if v is dst: + return f"[{k}]" + if type(src) == list: + for i, v in enumerate(src): + if v is dst: + return f"[{i}]" + try: + for fld in dir(src): + try: + if hasattr(src, fld) and getattr(src, fld) is dst: + return "." + fld + except Exception: + pass + except Exception: + pass + try: + for fld in vars(src): + try: + if hasattr(src, fld) and getattr(src, fld) is dst: + return "." + fld + except Exception: + pass + except Exception: + pass + try: + for fld, val in inspect.getmembers(src): + if val is dst: + return "." + fld + except Exception: + pass + return None + + +def _obj_str(obj: Any) -> str: + res = f"{hex(id(obj))}: {type(obj)}" + if hasattr(obj, "__name__"): + res += f" {obj.__name__}" + return res + + +def _bfs(begin: Any, end: Any, all_ids: Set[int]) -> None: + parent = {} + q = deque([begin]) + while len(q) > 0: + src = q.popleft() + for dst in gc.get_referents(src): + if id(dst) not in all_ids or id(dst) in parent or _skip(src, dst): + continue + parent[id(dst)] = src + if dst is end: + print(f" {_obj_str(dst)}") + while True: + src = parent[id(dst)] + fld = _find_field(src, dst) + if fld is None: + print(" ^") + else: + print(f" ^ {fld}") + print(f" {_obj_str(src)}") + dst = src + if dst is begin: + break + return + q.append(dst) + print(f" {_obj_str(end)}") + print(" ^") + print(" ???") + print(" ^") + print(f" {_obj_str(begin)}") + + +def find_cycles() -> bool: + from .store import RegionField + + found_cycles = False + all_objs = gc.get_objects() + all_ids = set(id(obj) for obj in all_objs) + for obj in all_objs: + if isinstance(obj, RegionField): + print( + f"looking for cycles involving {hex(id(obj))}, " + f"of type {type(obj)}" + ) + if _find_cycles(obj, all_ids): + found_cycles = True + return found_cycles diff --git a/legate/core/runtime.py b/legate/core/runtime.py index 3b6c5ddec..534862b61 100644 --- a/legate/core/runtime.py +++ b/legate/core/runtime.py @@ -17,9 +17,11 @@ import gc import math import struct +import sys import weakref from collections import deque from dataclasses import dataclass +from types import ModuleType from typing import TYPE_CHECKING, Any, Deque, List, Optional, TypeVar, Union from legion_top import add_cleanup_item, top_level @@ -45,6 +47,7 @@ from .allocation import Attachable from .communicator import CPUCommunicator, NCCLCommunicator from .corelib import core_library +from .cycle_detector import find_cycles from .exception import PendingException from .projection import is_identity_projection, pack_symbolic_projection_repr from .restriction import Restriction @@ -83,7 +86,24 @@ action="store_true", default=False, dest="consensus", - help="Turn on consensus match on single node. (for testing)", + help="Turn on consensus match on single node (for testing).", + ), + ), + Argument( + "cycle-check", + ArgSpec( + action="store_true", + default=False, + dest="cycle_check", + help=( + "Check for reference cycles involving RegionField objects on " + "program exit (developer option). Such cycles have the effect " + "of stopping used RegionFields from being repurposed for " + "other Stores, thus increasing memory pressure. By default " + "this mode will miss any cycles already collected by the " + "garbage collector; run gc.disable() at the beginning of the " + "program to avoid this." + ), ), ), ] @@ -927,11 +947,6 @@ def __init__(self, core_library: CoreLib) -> None: self._args = parse_library_command_args("legate", ARGS) - try: - self._legion_context = top_level.context[0] - except AttributeError: - pass - # Record whether we need to run finalize tasks # Key off whether we are being loaded in a context or not try: @@ -1665,6 +1680,24 @@ def _cleanup_legate_runtime() -> None: add_cleanup_item(_cleanup_legate_runtime) +class _CycleCheckWrapper(ModuleType): + def __init__(self, wrapped_mod: ModuleType): + self._wrapped_mod = wrapped_mod + + def __getattr__(self, attr: str) -> Any: + return getattr(self._wrapped_mod, attr) + + def __del__(self) -> None: + find_cycles() + + +if runtime._args.cycle_check: + # The first thing that legion_top does after executing the user script + # is to remove the newly created "__main__" module. We intercept this + # deletion operation to perform our check. + sys.modules["__main__"] = _CycleCheckWrapper(sys.modules["__main__"]) + + def get_legion_runtime() -> legion.legion_runtime_t: return runtime.legion_runtime diff --git a/tests/unit/legate/test_cycle_check.py b/tests/unit/legate/test_cycle_check.py new file mode 100644 index 000000000..5839bd917 --- /dev/null +++ b/tests/unit/legate/test_cycle_check.py @@ -0,0 +1,47 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import subprocess +from pathlib import Path + +import pytest + +PROG_TEXT = """ +import numpy as np +from legate.core import get_legate_runtime, types as ty +store = get_legate_runtime().core_context.create_store( + ty.int32, shape=(4,), optimize_scalar=False +) +# initialize the RegionField backing the store +store.storage +# create a cycle +x = [store] +x.append(x) +""" + + +def test_cycle_check(tmp_path: Path) -> None: + prog_file = tmp_path / "prog.py" + prog_file.write_text(PROG_TEXT) + output = subprocess.check_output( + ["legate", prog_file, "--cpus", "1", "-legate:cycle-check"] + ) + assert "found cycle!" in output.decode("utf-8") + + +if __name__ == "__main__": + import sys + + sys.exit(pytest.main(sys.argv)) From 278259aa091f820503d23794e630b2bb65f4f064 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Thu, 17 Nov 2022 20:01:56 -0800 Subject: [PATCH 064/121] Fixes for mypy>=0.990 (#482) * Fixes for mypy>=0.990 * Bump up the mypy version * Fix the mypy error by inheriting DataclassMixin explicitly --- .pre-commit-config.yaml | 2 +- legate/core/_legion/util.py | 2 ++ legate/core/constraints.py | 2 +- pyproject.toml | 1 - tests/unit/legate/util/test_args.py | 6 +++--- tests/unit/legate/util/test_types.py | 2 +- 6 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 402bffb64..a22015b82 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: files: \.(cu|cuh|h|cc|inl)$ types_or: [] - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v0.982' + rev: 'v0.991' hooks: - id: mypy pass_filenames: false diff --git a/legate/core/_legion/util.py b/legate/core/_legion/util.py index af4d07cf2..c4b2e9705 100644 --- a/legate/core/_legion/util.py +++ b/legate/core/_legion/util.py @@ -15,6 +15,7 @@ from __future__ import annotations import struct +from abc import abstractmethod from typing import TYPE_CHECKING, Any, Generic, List, Optional, TypeVar, Union import numpy as np @@ -159,6 +160,7 @@ def launch( class Dispatchable(Generic[T]): + @abstractmethod def launch( self, runtime: legion.legion_runtime_t, diff --git a/legate/core/constraints.py b/legate/core/constraints.py index d18b5fab0..5b910c3e3 100644 --- a/legate/core/constraints.py +++ b/legate/core/constraints.py @@ -82,7 +82,7 @@ def reduce(self) -> Lit: return self def unknowns(self) -> Iterator[PartSym]: - pass + return iter([]) class PartSym(Expr): diff --git a/pyproject.toml b/pyproject.toml index 18325b3c6..89a6da552 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,6 @@ warn_no_return = true warn_return_any = false warn_unreachable = true -show_none_errors = true ignore_errors = false allow_untyped_globals = false diff --git a/tests/unit/legate/util/test_args.py b/tests/unit/legate/util/test_args.py index f6c97f4ed..190ff3c6b 100644 --- a/tests/unit/legate/util/test_args.py +++ b/tests/unit/legate/util/test_args.py @@ -114,7 +114,7 @@ def test_default_help( with pytest.raises(SystemExit) as e: m.parse_library_command_args("foo", []) assert e.value.code is None - out, err = capsys.readouterr() # type: ignore[unreachable] + out, err = capsys.readouterr() assert out.startswith("usage: ") def test_default_help_precedence( @@ -125,7 +125,7 @@ def test_default_help_precedence( with pytest.raises(SystemExit) as e: m.parse_library_command_args("foo", args) assert e.value.code is None - out, err = capsys.readouterr() # type: ignore[unreachable] + out, err = capsys.readouterr() assert out.startswith("usage: ") def test_default_help_patches_short_args( @@ -136,7 +136,7 @@ def test_default_help_patches_short_args( with pytest.raises(SystemExit) as e: m.parse_library_command_args("foo", args) assert e.value.code is None - out, err = capsys.readouterr() # type: ignore[unreachable] + out, err = capsys.readouterr() assert out.startswith("usage: ") assert "-foo:bar" in out assert "--foo:bar" not in out diff --git a/tests/unit/legate/util/test_types.py b/tests/unit/legate/util/test_types.py index 01835f882..070bc458f 100644 --- a/tests/unit/legate/util/test_types.py +++ b/tests/unit/legate/util/test_types.py @@ -41,7 +41,7 @@ class Source: @dataclass(frozen=True) -class Target: +class Target(m.DataclassMixin): foo: int bar: float baz: str From 9d7b0cf0daf259fa22b6f18abecb1169107e6ae9 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Mon, 21 Nov 2022 11:36:57 -0800 Subject: [PATCH 065/121] Fix some typos (#485) Co-authored-by: Manolis Papadakis --- legate/core/launcher.py | 22 +++++++++++++++++++--- legate/core/store.py | 2 +- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/legate/core/launcher.py b/legate/core/launcher.py index 2f8ff886d..69d6833c7 100644 --- a/legate/core/launcher.py +++ b/legate/core/launcher.py @@ -277,7 +277,15 @@ def add( parent_partition = parent.parent parent = parent_partition.parent if req.permission != Permission.REDUCTION: - f(task, req.region, fields, 0, parent=parent, tag=req.tag) + f( + task, + req.region, + fields, + 0, + parent=parent, + tag=req.tag, + flags=req.flags, + ) else: f( task, @@ -287,6 +295,7 @@ def add( 0, parent=parent, tag=req.tag, + flags=req.flags, ) def add_single( @@ -355,9 +364,16 @@ def add_single( ) -> None: f = methods[req.permission] if req.permission != Permission.REDUCTION: - f(task, req.region, fields, tag=req.tag) + f(task, req.region, fields, tag=req.tag, flags=req.flags) else: - f(task, req.region, fields, self.redop, tag=req.tag) + f( + task, + req.region, + fields, + self.redop, + tag=req.tag, + flags=req.flags, + ) def __hash__(self) -> int: return hash((self.part, self.proj, self.redop)) diff --git a/legate/core/store.py b/legate/core/store.py index be7e44ea1..63040901b 100644 --- a/legate/core/store.py +++ b/legate/core/store.py @@ -374,7 +374,7 @@ def register_consumer(self, consumer: Any) -> None: # so that we don't create reference cycles. def callback() -> None: - self.decrement_inline_mapped_ref_count() + self.decrement_inline_mapped_ref_count(unordered=True) weakref.finalize(consumer, callback) From 97e2edfabb01667d28dd003e7a3d5b6976a7647a Mon Sep 17 00:00:00 2001 From: Rohan Yadav Date: Mon, 21 Nov 2022 11:39:13 -0800 Subject: [PATCH 066/121] fix several reference cycle / leak related bugs (#488) * legate/core: don't hoist partition manager to top level These declarations appear to cause the partition manager to not be cleaned up, leading to leaks of futures and future maps in the partitions that it points to. Signed-off-by: Rohan Yadav * legate/core/runtime: remove PartitionManager reference on shutdown This enables the `PartitionManager` to get collected on runtime shutdown, even if there are cycles that point to the core's runtime. Signed-off-by: Rohan Yadav Signed-off-by: Rohan Yadav --- legate/core/partition.py | 19 ++++++++++++------- legate/core/runtime.py | 3 +++ legate/core/store.py | 32 +++++++++++++++++--------------- 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/legate/core/partition.py b/legate/core/partition.py index be243911d..d5904c319 100644 --- a/legate/core/partition.py +++ b/legate/core/partition.py @@ -38,9 +38,6 @@ RequirementType = Union[Type[Broadcast], Type[Partition]] -part_mgr = runtime.partition_manager - - class PartitionBase(ABC): @abstractproperty def color_shape(self) -> Optional[Shape]: @@ -298,7 +295,9 @@ def construct( self, region: Region, complete: bool = False ) -> Optional[LegionPartition]: index_space = region.index_space - index_partition = part_mgr.find_index_partition(index_space, self) + index_partition = runtime.partition_manager.find_index_partition( + index_space, self + ) if index_partition is None: tile_shape = self._tile_shape transform = Transform(tile_shape.ndim, tile_shape.ndim) @@ -325,7 +324,9 @@ def construct( kind=kind, keep=True, # export this partition functor to other libraries ) - part_mgr.record_index_partition(index_space, self, index_partition) + runtime.partition_manager.record_index_partition( + index_space, self, index_partition + ) return region.get_child(index_partition) @@ -409,7 +410,9 @@ def construct( assert complete index_space = region.index_space - index_partition = part_mgr.find_index_partition(index_space, self) + index_partition = runtime.partition_manager.find_index_partition( + index_space, self + ) if index_partition is None: color_space = runtime.find_or_create_index_space(self._color_shape) functor = PartitionByWeights(self._weights) @@ -423,5 +426,7 @@ def construct( kind=kind, keep=True, # export this partition functor to other libraries ) - part_mgr.record_index_partition(index_space, self, index_partition) + runtime.partition_manager.record_index_partition( + index_space, self, index_partition + ) return region.get_child(index_partition) diff --git a/legate/core/runtime.py b/legate/core/runtime.py index 534862b61..8306f8b8a 100644 --- a/legate/core/runtime.py +++ b/legate/core/runtime.py @@ -1206,6 +1206,9 @@ def destroy(self) -> None: self.region_managers_by_region = {} self.field_managers = {} self.index_spaces = {} + # Explicitly release the reference to the partition manager so that + # it may be collected, releasing references to Futures and FutureMaps. + self._partition_manager = None # type: ignore if self._finalize_tasks: # Run a gc and then end the legate task diff --git a/legate/core/store.py b/legate/core/store.py index 63040901b..2f4e2f9a7 100644 --- a/legate/core/store.py +++ b/legate/core/store.py @@ -64,7 +64,6 @@ from math import prod attachment_manager = runtime.attachment_manager -partition_manager = runtime.partition_manager # A Field holds a reference to a field in a region tree @@ -695,8 +694,11 @@ def slice(self, tile_shape: Shape, offsets: Shape) -> Storage: shape % tile_shape ).sum() == 0 - if can_tile_completely and partition_manager.use_complete_tiling( - shape, tile_shape + if ( + can_tile_completely + and runtime.partition_manager.use_complete_tiling( + shape, tile_shape + ) ): color_shape = shape // tile_shape color = offsets // tile_shape @@ -737,7 +739,7 @@ def get_inline_allocation( def find_key_partition( self, restrictions: tuple[Restriction, ...] ) -> Optional[PartitionBase]: - partition = partition_manager.find_storage_key_partition( + partition = runtime.partition_manager.find_storage_key_partition( self._unique_id, restrictions ) if partition is None and self._parent is not None: @@ -745,12 +747,12 @@ def find_key_partition( return partition def set_key_partition(self, partition: PartitionBase) -> None: - partition_manager.record_storage_key_partition( + runtime.partition_manager.record_storage_key_partition( self._unique_id, partition ) def reset_key_partition(self) -> None: - partition_manager.reset_storage_key_partition(self._unique_id) + runtime.partition_manager.reset_storage_key_partition(self._unique_id) def find_or_create_legion_partition( self, functor: PartitionBase, complete: bool @@ -760,12 +762,12 @@ def find_or_create_legion_partition( assert isinstance(self.data, RegionField) - part, found = partition_manager.find_legion_partition( + part, found = runtime.partition_manager.find_legion_partition( self._unique_id, functor ) if not found: part = functor.construct(self.data.region, complete=complete) - partition_manager.record_legion_partition( + runtime.partition_manager.record_legion_partition( self._unique_id, functor, part ) return part @@ -1221,12 +1223,12 @@ def get_key_partition(self) -> Optional[PartitionBase]: # registered correctly runtime.flush_scheduling_window() - return partition_manager.find_store_key_partition( + return runtime.partition_manager.find_store_key_partition( self._unique_id, self.find_restrictions() ) def has_key_partition(self, restrictions: tuple[Restriction, ...]) -> bool: - key_partition = partition_manager.find_store_key_partition( + key_partition = runtime.partition_manager.find_store_key_partition( self._unique_id, restrictions ) if key_partition is not None: @@ -1236,7 +1238,7 @@ def has_key_partition(self, restrictions: tuple[Restriction, ...]) -> bool: return (part is not None) and (part.even or self._transform.bottom) def set_key_partition(self, partition: PartitionBase) -> None: - partition_manager.record_store_key_partition( + runtime.partition_manager.record_store_key_partition( self._unique_id, partition ) # We also update the storage's key partition for other stores @@ -1246,12 +1248,12 @@ def set_key_partition(self, partition: PartitionBase) -> None: ) def reset_key_partition(self) -> None: - partition_manager.reset_store_key_partition(self._unique_id) + runtime.partition_manager.reset_store_key_partition(self._unique_id) def compute_key_partition( self, restrictions: tuple[Restriction, ...] ) -> PartitionBase: - key_partition = partition_manager.find_store_key_partition( + key_partition = runtime.partition_manager.find_store_key_partition( self._unique_id, restrictions ) if key_partition is not None: @@ -1274,14 +1276,14 @@ def compute_key_partition( partition = self._transform.convert_partition(partition) return partition else: - launch_shape = partition_manager.compute_launch_shape( + launch_shape = runtime.partition_manager.compute_launch_shape( self, restrictions, ) if launch_shape is None: partition = REPLICATE else: - tile_shape = partition_manager.compute_tile_shape( + tile_shape = runtime.partition_manager.compute_tile_shape( self.shape, launch_shape ) partition = Tiling(tile_shape, launch_shape) From 2c64d1cc60d1dcc5a5c7f449f7011156958f687d Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Mon, 21 Nov 2022 12:01:09 -0800 Subject: [PATCH 067/121] Regenerate `install_info.py` on every build (#486) * regenerate install_info.py on every build * specify custom target dependencies correctly * fix typo --- CMakeLists.txt | 13 ++++---- cmake/generate_install_info_py.cmake | 31 +++++++++++++++++++ legate_core_python.cmake | 20 ++++-------- scripts/build-install.sh | 2 +- scripts/build-no-install.sh | 2 +- scripts/build-separately-no-install.sh | 2 +- scripts/build-with-legion-no-install.sh | 2 +- ...build-with-legion-separately-no-install.sh | 2 +- 8 files changed, 48 insertions(+), 26 deletions(-) create mode 100644 cmake/generate_install_info_py.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index fa2ce2cf4..e83b9a779 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,23 +97,22 @@ endif() if(CMAKE_GENERATOR STREQUAL "Ninja") function(add_touch_legate_core_ninja_build_target) set(_suf ) - set(_depends ) if(SKBUILD) set(_suf "_python") endif() + add_custom_target("touch_legate_core${_suf}_ninja_build" ALL + COMMAND ${CMAKE_COMMAND} -E touch_nocreate "${CMAKE_CURRENT_BINARY_DIR}/build.ninja" + COMMENT "touch build.ninja so ninja doesn't re-run CMake on rebuild" + VERBATIM + ) foreach(_dep IN ITEMS legion_core legion_core_python Legion LegionRuntime Realm RealmRuntime Regent) if(TARGET ${_dep}) - list(APPEND _depends ${_dep}) + add_dependencies("touch_legate_core${_suf}_ninja_build" ${_dep}) endif() endforeach() - add_custom_target("touch_legion_core${_suf}_ninja_build" ALL - COMMAND ${CMAKE_COMMAND} -E touch_nocreate "${CMAKE_CURRENT_BINARY_DIR}/build.ninja" - COMMENT "touch build.ninja so ninja doesn't re-run CMake on rebuild" - VERBATIM DEPENDS ${_depends} - ) endfunction() add_touch_legate_core_ninja_build_target() endif() diff --git a/cmake/generate_install_info_py.cmake b/cmake/generate_install_info_py.cmake new file mode 100644 index 000000000..408500ac9 --- /dev/null +++ b/cmake/generate_install_info_py.cmake @@ -0,0 +1,31 @@ +#============================================================================= +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +execute_process( + COMMAND ${CMAKE_C_COMPILER} + -E -DLEGATE_USE_PYTHON_CFFI + -I "${CMAKE_CURRENT_LIST_DIR}/../src/core" + -P "${CMAKE_CURRENT_LIST_DIR}/../src/core/legate_c.h" + ECHO_ERROR_VARIABLE + OUTPUT_VARIABLE header + COMMAND_ERROR_IS_FATAL ANY +) + +set(libpath "") +configure_file( + "${CMAKE_CURRENT_LIST_DIR}/../legate/install_info.py.in" + "${CMAKE_CURRENT_LIST_DIR}/../legate/install_info.py" +@ONLY) diff --git a/legate_core_python.cmake b/legate_core_python.cmake index 85e57d8d2..05d92853e 100644 --- a/legate_core_python.cmake +++ b/legate_core_python.cmake @@ -43,22 +43,14 @@ if(NOT legate_core_FOUND) set(SKBUILD ON) endif() -execute_process( - COMMAND ${CMAKE_C_COMPILER} - -E -DLEGATE_USE_PYTHON_CFFI - -I "${CMAKE_CURRENT_SOURCE_DIR}/core/src" - -P "${CMAKE_CURRENT_SOURCE_DIR}/src/core/legate_c.h" - ECHO_ERROR_VARIABLE - OUTPUT_VARIABLE header - COMMAND_ERROR_IS_FATAL ANY +add_custom_target("generate_install_info_py" ALL + COMMAND ${CMAKE_COMMAND} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/generate_install_info_py.cmake" + COMMENT "Generate install_info.py" + VERBATIM ) -set(libpath "") -configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/legate/install_info.py.in" - "${CMAKE_CURRENT_SOURCE_DIR}/legate/install_info.py" -@ONLY) - add_library(legate_core_python INTERFACE) add_library(legate::core_python ALIAS legate_core_python) target_link_libraries(legate_core_python INTERFACE legate::core) diff --git a/scripts/build-install.sh b/scripts/build-install.sh index f7b5a3854..a4d671cc3 100755 --- a/scripts/build-install.sh +++ b/scripts/build-install.sh @@ -16,7 +16,7 @@ rm -rf ./{build,_skbuild,dist,legate.core.egg-info} cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed -if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi +if [[ -n "$(which ninja)" ]]; then cmake_args+=" -GNinja"; fi # Add other build options here as desired cmake_args+=" diff --git a/scripts/build-no-install.sh b/scripts/build-no-install.sh index 8cb6665e4..50827ce7a 100755 --- a/scripts/build-no-install.sh +++ b/scripts/build-no-install.sh @@ -14,7 +14,7 @@ rm -rf ./{build,_skbuild,dist,legate.core.egg-info} cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed -if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi +if [[ -n "$(which ninja)" ]]; then cmake_args+=" -GNinja"; fi # Add other build options here as desired cmake_args+=" diff --git a/scripts/build-separately-no-install.sh b/scripts/build-separately-no-install.sh index 1ffacde26..e8d1e64c5 100755 --- a/scripts/build-separately-no-install.sh +++ b/scripts/build-separately-no-install.sh @@ -14,7 +14,7 @@ rm -rf ./{build,_skbuild,dist,legate.core.egg-info} cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed -if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi +if [[ -n "$(which ninja)" ]]; then cmake_args+=" -GNinja"; fi # Add other build options here as desired cmake_args+=" diff --git a/scripts/build-with-legion-no-install.sh b/scripts/build-with-legion-no-install.sh index 5cc03b624..2f3a1e397 100755 --- a/scripts/build-with-legion-no-install.sh +++ b/scripts/build-with-legion-no-install.sh @@ -29,7 +29,7 @@ fi cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed -if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi +if [[ -n "$(which ninja)" ]]; then cmake_args+=" -GNinja"; fi # Add other build options here as desired cmake_args+=" diff --git a/scripts/build-with-legion-separately-no-install.sh b/scripts/build-with-legion-separately-no-install.sh index a497af581..d15180dc0 100755 --- a/scripts/build-with-legion-separately-no-install.sh +++ b/scripts/build-with-legion-separately-no-install.sh @@ -29,7 +29,7 @@ fi cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed -if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi +if [[ -n "$(which ninja)" ]]; then cmake_args+=" -GNinja"; fi # Add other build options here as desired cmake_args+=" From 52047b62abc7f67da18ff6f81b43f3c75fc3d406 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Mon, 21 Nov 2022 12:08:21 -0800 Subject: [PATCH 068/121] Don't load a Realm network module if running on 1 rank (#484) * Don't load a Realm network module if running on 1 rank * Add a unit test Co-authored-by: Manolis Papadakis --- legate/driver/command.py | 8 ++++ tests/unit/legate/driver/test_command.py | 47 ++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/legate/driver/command.py b/legate/driver/command.py index 3ff4bbef7..7aeac6dd9 100644 --- a/legate/driver/command.py +++ b/legate/driver/command.py @@ -302,6 +302,13 @@ def cmd_regmem( return () if regmem == 0 else ("-ll:rsize", str(regmem)) +def cmd_network( + config: ConfigProtocol, system: System, launcher: Launcher +) -> CommandPart: + # Don't initialize a Realm network module if running on a single rank + return () if config.multi_node.ranks > 1 else ("-ll:networks", "none") + + def cmd_log_levels( config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: @@ -384,6 +391,7 @@ def cmd_user_opts( cmd_numamem, cmd_fbmem, cmd_regmem, + cmd_network, cmd_log_levels, cmd_log_file, cmd_eager_alloc, diff --git a/tests/unit/legate/driver/test_command.py b/tests/unit/legate/driver/test_command.py index e79310c93..1c2aa5355 100644 --- a/tests/unit/legate/driver/test_command.py +++ b/tests/unit/legate/driver/test_command.py @@ -58,6 +58,7 @@ def test_CMD_PARTS() -> None: m.cmd_numamem, m.cmd_fbmem, m.cmd_regmem, + m.cmd_network, m.cmd_log_levels, m.cmd_log_file, m.cmd_eager_alloc, @@ -946,6 +947,52 @@ def test_nonzero(self, genobjs: GenObjs, value: str) -> None: assert result == ("-ll:rsize", value) +class Test_cmd_network: + def test_no_launcher_single_rank( + self, + genobjs: GenObjs, + ) -> None: + config, system, launcher = genobjs() + result = m.cmd_network(config, system, launcher) + assert result == ("-ll:networks", "none") + + @pytest.mark.parametrize("rank_var", RANK_ENV_VARS) + def test_no_launcher_multi_rank( + self, + genobjs: GenObjs, + rank_var: dict[str, str], + ) -> None: + config, system, launcher = genobjs( + multi_rank=(2, 2), + rank_env={rank_var: "1"}, + ) + result = m.cmd_network(config, system, launcher) + assert result == () + + @pytest.mark.parametrize("launch", ("mpirun", "jsrun", "srun")) + def test_launcher_single_rank( + self, + genobjs: GenObjs, + launch: LauncherType, + ) -> None: + config, system, launcher = genobjs(["--launcher", launch]) + result = m.cmd_network(config, system, launcher) + assert result == ("-ll:networks", "none") + + @pytest.mark.parametrize("launch", ("mpirun", "jsrun", "srun")) + def test_launcher_multi_rank( + self, + genobjs: GenObjs, + launch: LauncherType, + ) -> None: + config, system, launcher = genobjs( + ["--launcher", launch], + multi_rank=(2, 2), + ) + result = m.cmd_network(config, system, launcher) + assert result == () + + class Test_cmd_log_levels: def test_default(self, genobjs: GenObjs) -> None: config, system, launcher = genobjs([]) From 056723f25b8e17fe4c6dd6e68134d98dd054157d Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Mon, 21 Nov 2022 17:27:26 -0800 Subject: [PATCH 069/121] Don't filter out cycles going through __globals__ (#489) Co-authored-by: Manolis Papadakis --- legate/core/cycle_detector.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/legate/core/cycle_detector.py b/legate/core/cycle_detector.py index a90a6531a..e5dc8d855 100644 --- a/legate/core/cycle_detector.py +++ b/legate/core/cycle_detector.py @@ -16,18 +16,12 @@ import gc import inspect from collections import deque -from types import FunctionType, ModuleType +from types import ModuleType from typing import Any, Set, Union def _skip(src: Any, dst: Any) -> bool: - return ( - isinstance(src, type) - or isinstance(src, ModuleType) - or isinstance(src, FunctionType) - and hasattr(src, "__globals__") - and src.__globals__ is dst - ) + return isinstance(src, type) or isinstance(src, ModuleType) def _find_cycles(root: Any, all_ids: Set[int]) -> bool: From 18721950a154821318fb4f20a4a1250447c3a81e Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Mon, 21 Nov 2022 17:33:53 -0800 Subject: [PATCH 070/121] Fix typo in bind.sh --- bind.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bind.sh b/bind.sh index 86a9bf44b..8ccf56203 100755 --- a/bind.sh +++ b/bind.sh @@ -77,7 +77,7 @@ case "$launcher" in ;; jsrun ) local_rank="${OMPI_COMM_WORLD_LOCAL_RANK:-unknown}" - gloabl_rank="${OMPI_COMM_WORLD_RANK:-unknown}" + global_rank="${OMPI_COMM_WORLD_RANK:-unknown}" ;; srun ) local_rank="${SLURM_LOCALID:-unknown}" From 16624698a25e0d3f70c4d76e432987510b1d7027 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Mon, 21 Nov 2022 21:10:10 -0800 Subject: [PATCH 071/121] Update create_buffer to use socket memories whenever available (#487) --- src/core/data/buffer.h | 8 +++----- src/core/legate_c.h | 1 + src/core/mapping/core_mapper.cc | 4 ++++ src/core/runtime/runtime.cc | 6 ++++++ src/core/runtime/runtime.h | 1 + src/core/task/return.h | 4 ++-- src/core/utilities/machine.cc | 20 +++++++++++++++++--- src/core/utilities/machine.h | 2 +- 8 files changed, 35 insertions(+), 11 deletions(-) diff --git a/src/core/data/buffer.h b/src/core/data/buffer.h index f2b5e81e9..91550f69d 100644 --- a/src/core/data/buffer.h +++ b/src/core/data/buffer.h @@ -18,6 +18,8 @@ #include "legion.h" +#include "core/utilities/machine.h" + namespace legate { template @@ -29,11 +31,7 @@ Buffer create_buffer(const Legion::Point& extents, size_t alignment = 16) { using namespace Legion; - if (Memory::Kind::NO_MEMKIND == kind) { - auto proc = Processor::get_executing_processor(); - kind = proc.kind() == Processor::Kind::TOC_PROC ? Memory::Kind::GPU_FB_MEM - : Memory::Kind::SYSTEM_MEM; - } + if (Memory::Kind::NO_MEMKIND == kind) kind = find_memory_kind_for_executing_processor(false); auto hi = extents - Point::ONES(); // We just avoid creating empty buffers, as they cause all sorts of headaches. for (int32_t idx = 0; idx < DIM; ++idx) hi[idx] = std::max(hi[idx], 0); diff --git a/src/core/legate_c.h b/src/core/legate_c.h index 5b64b67fd..b29fbf056 100644 --- a/src/core/legate_c.h +++ b/src/core/legate_c.h @@ -55,6 +55,7 @@ typedef enum legate_core_tunable_t { LEGATE_CORE_TUNABLE_TOTAL_GPUS, LEGATE_CORE_TUNABLE_NUM_PIECES, LEGATE_CORE_TUNABLE_NUM_NODES, + LEGATE_CORE_TUNABLE_HAS_SOCKET_MEM, LEGATE_CORE_TUNABLE_MIN_SHARD_VOLUME, LEGATE_CORE_TUNABLE_WINDOW_SIZE, LEGATE_CORE_TUNABLE_MAX_PENDING_EXCEPTIONS, diff --git a/src/core/mapping/core_mapper.cc b/src/core/mapping/core_mapper.cc index ccc738983..7f7fbbaf3 100644 --- a/src/core/mapping/core_mapper.cc +++ b/src/core/mapping/core_mapper.cc @@ -409,6 +409,10 @@ void CoreMapper::select_tunable_value(const MapperContext ctx, pack_tunable(min_cpu_chunk, output); return; } + case LEGATE_CORE_TUNABLE_HAS_SOCKET_MEM: { + pack_tunable(has_socket_mem, output); + return; + } case LEGATE_CORE_TUNABLE_WINDOW_SIZE: { pack_tunable(window_size, output); return; diff --git a/src/core/runtime/runtime.cc b/src/core/runtime/runtime.cc index fb1549cf7..27e14a7da 100644 --- a/src/core/runtime/runtime.cc +++ b/src/core/runtime/runtime.cc @@ -43,6 +43,8 @@ static const char* const core_library_name = "legate.core"; /*static*/ bool Core::log_mapping_decisions = false; +/*static*/ bool Core::has_socket_mem = false; + /*static*/ void Core::parse_config(void) { #ifndef LEGATE_USE_CUDA @@ -199,6 +201,10 @@ extern void register_exception_reduction_op(Runtime* runtime, const LibraryConte register_legate_core_projection_functors(runtime, context); register_legate_core_sharding_functors(runtime, context); + + auto fut = runtime->select_tunable_value( + Runtime::get_context(), LEGATE_CORE_TUNABLE_HAS_SOCKET_MEM, context.get_mapper_id(0)); + Core::has_socket_mem = fut.get_result(); } } // namespace legate diff --git a/src/core/runtime/runtime.h b/src/core/runtime/runtime.h index c92f18091..b7b86c836 100644 --- a/src/core/runtime/runtime.h +++ b/src/core/runtime/runtime.h @@ -43,6 +43,7 @@ class Core { static bool use_empty_task; static bool synchronize_stream_view; static bool log_mapping_decisions; + static bool has_socket_mem; }; } // namespace legate diff --git a/src/core/task/return.h b/src/core/task/return.h index 9fa558e64..031bb71f2 100644 --- a/src/core/task/return.h +++ b/src/core/task/return.h @@ -34,8 +34,8 @@ struct ReturnValue { public: void* ptr(); const void* ptr() const; - const size_t size() const { return size_; } - const bool is_device_value() const { return is_device_value_; } + size_t size() const { return size_; } + bool is_device_value() const { return is_device_value_; } public: // Calls the Legion postamble with an instance diff --git a/src/core/utilities/machine.cc b/src/core/utilities/machine.cc index 843f6d7eb..9d7e31cdd 100644 --- a/src/core/utilities/machine.cc +++ b/src/core/utilities/machine.cc @@ -16,15 +16,29 @@ #include "core/utilities/machine.h" +#include "core/runtime/runtime.h" +#include "legate_defines.h" + using namespace Legion; namespace legate { -Memory::Kind find_memory_kind_for_executing_processor() +Memory::Kind find_memory_kind_for_executing_processor(bool host_accessible) { auto proc = Processor::get_executing_processor(); - return proc.kind() == Processor::Kind::TOC_PROC ? Memory::Kind::Z_COPY_MEM - : Memory::Kind::SYSTEM_MEM; + switch (proc.kind()) { + case Processor::Kind::LOC_PROC: { + return Memory::Kind::SYSTEM_MEM; + } + case Processor::Kind::TOC_PROC: { + return host_accessible ? Memory::Kind::Z_COPY_MEM : Memory::Kind::GPU_FB_MEM; + } + case Processor::Kind::OMP_PROC: { + return Core::has_socket_mem ? Memory::Kind::SOCKET_MEM : Memory::Kind::SYSTEM_MEM; + } + } + LEGATE_ABORT; + return Memory::Kind::SYSTEM_MEM; } } // namespace legate diff --git a/src/core/utilities/machine.h b/src/core/utilities/machine.h index a1862336c..b61824542 100644 --- a/src/core/utilities/machine.h +++ b/src/core/utilities/machine.h @@ -20,6 +20,6 @@ namespace legate { -Legion::Memory::Kind find_memory_kind_for_executing_processor(); +Legion::Memory::Kind find_memory_kind_for_executing_processor(bool host_accessible = true); } // namespace legate From e52d20636593e47510a23ad6ba4a6e3fdc0e5fbf Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Tue, 22 Nov 2022 14:36:28 -0800 Subject: [PATCH 072/121] Don't use cmake 3.25.0 in build-isolation mode (#492) Co-authored-by: Manolis Papadakis --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 89a6da552..8f82a0d13 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ requires = [ "ninja", "setuptools", "scikit-build>=0.13.1", - "cmake>=3.22.1,!=3.23.0", + "cmake>=3.22.1,!=3.23.0,!=3.25.0", ] [tool.black] From dc8228104ed82caf69812b3bbab8afcdcc6a50fd Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Wed, 23 Nov 2022 13:00:45 -0800 Subject: [PATCH 073/121] Throw an error if an allocation is not consumed (#490) * Throw an error if an allocation is not consumed * Adjust allocation count instead of erroring out if unconsumed * Deletion in finalizer needs to be unordered * Use the correct parameter name Co-authored-by: Manolis Papadakis --- legate/core/allocation.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/legate/core/allocation.py b/legate/core/allocation.py index ebfcd97a9..4f7354e42 100644 --- a/legate/core/allocation.py +++ b/legate/core/allocation.py @@ -40,6 +40,12 @@ def __init__( self._strides = strides self._consumed = False + def __del__(self) -> None: + if not self._consumed: + self._region_field.decrement_inline_mapped_ref_count( + unordered=True + ) + def consume( self, ctor: Callable[[tuple[int, ...], int, tuple[int, ...]], Any] ) -> Any: From c555632566eeee530408015df305a171d0bd7a27 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 23 Nov 2022 13:28:07 -0800 Subject: [PATCH 074/121] [pre-commit.ci] pre-commit autoupdate (#480) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/pre-commit/mirrors-clang-format: v14.0.6 → v15.0.4](https://github.com/pre-commit/mirrors-clang-format/compare/v14.0.6...v15.0.4) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- src/core/utilities/type_traits.h | 18 ++++++------------ 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a22015b82..0b75f30a1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,7 +12,7 @@ repos: hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-clang-format - rev: 'v14.0.6' + rev: 'v15.0.4' hooks: - id: clang-format files: \.(cu|cuh|h|cc|inl)$ diff --git a/src/core/utilities/type_traits.h b/src/core/utilities/type_traits.h index 4d8b324b4..84197f528 100644 --- a/src/core/utilities/type_traits.h +++ b/src/core/utilities/type_traits.h @@ -172,27 +172,21 @@ struct is_floating_point { }; template -struct is_complex : std::false_type { -}; +struct is_complex : std::false_type {}; template <> -struct is_complex : std::true_type { -}; +struct is_complex : std::true_type {}; template <> -struct is_complex : std::true_type { -}; +struct is_complex : std::true_type {}; template -struct is_complex_type : std::false_type { -}; +struct is_complex_type : std::false_type {}; template <> -struct is_complex_type> : std::true_type { -}; +struct is_complex_type> : std::true_type {}; template <> -struct is_complex_type> : std::true_type { -}; +struct is_complex_type> : std::true_type {}; } // namespace legate From 91a2ff890c65be62b6625d5be0d82c592863862c Mon Sep 17 00:00:00 2001 From: Rohan Yadav Date: Wed, 23 Nov 2022 15:27:50 -0800 Subject: [PATCH 075/121] legate/core: fix FutureMap leak in communicator shutdown (#495) This commit fixes the following leak, leading to shutdown hangs. ``` found cycle! tail: 0x200092ca1e10: ^ 0x200058dd65c0: ^ ["_handles"] 0x2000588dff80: ^ .__dict__ 0x2000889b11b0: ^ ["_nccl"] 0x2000588dd540: ^ .__dict__ 0x2000887b8430: ^ ["_comm_manager"] 0x2000889f7d00: ^ .__dict__ 0x2000887b9f90: ^ ["runtime"] 0x200173d7b180: cycle: 0x200173d7b180: ^ .__dict__ 0x200173d84370: ^ ["manager"] 0x200173e30cc0: ^ .__dict__ 0x200173db2e00: ^ [85] 0x200173df0f80: ^ ["_freed_fields"] 0x2000588ddb40: ^ .__dict__ 0x2000889b0fa0: ^ ["_field_match_manager"] 0x200173d7b180: ``` Signed-off-by: Rohan Yadav Signed-off-by: Rohan Yadav --- legate/core/communicator.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/legate/core/communicator.py b/legate/core/communicator.py index 019258a51..caa40243c 100644 --- a/legate/core/communicator.py +++ b/legate/core/communicator.py @@ -61,6 +61,10 @@ def initialize(self, volume: int) -> None: def destroy(self) -> None: for volume, handle in self._handles.items(): self._finalize(volume, handle) + # Drop the references to the handles dict after + # all handles have been finalized to ensure that + # no references to FutureMaps are kept. + self._handles = {} @abstractproperty def needs_barrier(self) -> bool: From fd57fdae73e5547a3394255a74342608df0d3686 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Mon, 28 Nov 2022 08:54:16 -0800 Subject: [PATCH 076/121] Add type for LegateDataInterface (#494) * add LegateDataInterface protocol and remove object base classes * specific ignore types * tighten up LegateDataInterface definition * remove LDI from Time class * fix circular import * restore type ignore --- legate/core/corelib.py | 2 +- legate/core/legate.py | 36 ++++++++++++++++++++----------- legate/core/runtime.py | 2 +- legate/jupyter/_legion_kernel.py | 6 +++--- legate/jupyter/magic.py | 2 +- legate/tester/args.py | 2 +- legate/timing/timing.py | 10 ++------- tests/unit/legate/util/test_ui.py | 2 +- 8 files changed, 34 insertions(+), 28 deletions(-) diff --git a/legate/core/corelib.py b/legate/core/corelib.py index 4b27bc512..6ccd7cff7 100644 --- a/legate/core/corelib.py +++ b/legate/core/corelib.py @@ -17,7 +17,7 @@ import os from typing import Any, Union -from ..install_info import header, libpath # type: ignore +from ..install_info import header, libpath # type: ignore [import] from .legate import Library from .resource import ResourceConfig diff --git a/legate/core/legate.py b/legate/core/legate.py index 38760f41e..044a41c7e 100644 --- a/legate/core/legate.py +++ b/legate/core/legate.py @@ -15,9 +15,10 @@ from __future__ import annotations import platform -from typing import TYPE_CHECKING, Any, Iterator, Optional, Union +from typing import TYPE_CHECKING, Any, Iterator, Optional, TypedDict, Union import pyarrow +from typing_extensions import Protocol from .resource import ResourceConfig @@ -25,6 +26,17 @@ from .store import Store +class LegateDataInterfaceItem(TypedDict): + version: int + data: dict[pyarrow.Field, Array] + + +class LegateDataInterface(Protocol): + @property + def __legate_data_interface__(self) -> LegateDataInterfaceItem: + ... + + class Array: def __init__( self, @@ -109,7 +121,7 @@ def __len__(self) -> int: raise NotImplementedError("Array.__len__") -class Table: +class Table(LegateDataInterface): def __init__(self, schema: pyarrow.Schema, columns: list[Array]) -> None: """ A Table is a collection of top-level, equal-length Array @@ -127,30 +139,30 @@ def __init__(self, schema: pyarrow.Schema, columns: list[Array]) -> None: self._columns = columns @property - def __legate_data_interface__(self) -> dict[str, Any]: + def __legate_data_interface__(self) -> LegateDataInterfaceItem: """ The Legate data interface allows for different Legate libraries to get access to the base Legion primitives that back objects from different Legate libraries. It currently requires objects that implement it to - return a dictionary that contains two integer members: + return a dictionary that contains two members: Returns ------- A dictionary with the following entries: + 'version' (required) : int An integer showing the version number of this implementation of the interface (i.e. 1 for this version) - 'data' (required) : OrderedDict[Field,Array] - An ordered dictionary mapping 'Field' objects that represent the - names and types of the field data to 'Array' objects containing + + 'data' (required) : dict[Field, Array] + An dictionary mapping ``pyarrow.Field`` objects that represent the + names and types of the field data to ``Array`` objects containing Store objects + """ - result: dict[str, Any] = dict() - result["version"] = 1 - data = {} + result: LegateDataInterfaceItem = {"version": 1, "data": {}} for index, column in enumerate(self._columns): - data[self._schema.field(index)] = column - result["data"] = data + result["data"][self._schema.field(index)] = column return result def add_column( diff --git a/legate/core/runtime.py b/legate/core/runtime.py index 8306f8b8a..cddc49813 100644 --- a/legate/core/runtime.py +++ b/legate/core/runtime.py @@ -1208,7 +1208,7 @@ def destroy(self) -> None: self.index_spaces = {} # Explicitly release the reference to the partition manager so that # it may be collected, releasing references to Futures and FutureMaps. - self._partition_manager = None # type: ignore + del self._partition_manager if self._finalize_tasks: # Run a gc and then end the legate task diff --git a/legate/jupyter/_legion_kernel.py b/legate/jupyter/_legion_kernel.py index b88d23f30..812f81cf6 100644 --- a/legate/jupyter/_legion_kernel.py +++ b/legate/jupyter/_legion_kernel.py @@ -20,7 +20,7 @@ from contextlib import contextmanager from typing import Any, Iterator, TextIO -from ipykernel.ipkernel import IPythonKernel # type: ignore +from ipykernel.ipkernel import IPythonKernel # type: ignore [import] __version__ = "0.1" @@ -33,7 +33,7 @@ def reset_stdout(stdout: TextIO) -> Iterator[None]: sys.stdout = _stdout -class LegionKernel(IPythonKernel): # type: ignore +class LegionKernel(IPythonKernel): # type: ignore [misc,no-any-unimported] implementation = "legion_kernel" implementation_version = __version__ banner = "Legion IPython Kernel for SM" @@ -55,6 +55,6 @@ def __init__(self, **kwargs: Any) -> None: if __name__ == "__main__": - from ipykernel.kernelapp import IPKernelApp # type: ignore + from ipykernel.kernelapp import IPKernelApp # type: ignore [import] IPKernelApp.launch_instance(kernel_class=LegionKernel) diff --git a/legate/jupyter/magic.py b/legate/jupyter/magic.py index b5b82784c..ba80f9c20 100644 --- a/legate/jupyter/magic.py +++ b/legate/jupyter/magic.py @@ -50,7 +50,7 @@ } -class LegateInfo(object): +class LegateInfo: config: LegateMetadata def __init__(self) -> None: diff --git a/legate/tester/args.py b/legate/tester/args.py index 0645fea9e..4b24077a0 100644 --- a/legate/tester/args.py +++ b/legate/tester/args.py @@ -61,7 +61,7 @@ dest="features", action=ExtendAction, choices=MultipleChoices(sorted(FEATURES)), - type=lambda s: s.split(","), # type: ignore + type=lambda s: s.split(","), # type: ignore [arg-type,return-value] help="Test Legate with features (also via USE_*)", ) diff --git a/legate/timing/timing.py b/legate/timing/timing.py index 9d40409a1..9838f7cb4 100644 --- a/legate/timing/timing.py +++ b/legate/timing/timing.py @@ -25,7 +25,7 @@ import pyarrow -class TimingRuntime(object): +class TimingRuntime: def __init__(self) -> None: self.runtime = get_legion_runtime() self.context = get_legion_context() @@ -53,18 +53,12 @@ def measure_nanoseconds(self) -> Future: ) -class Time(object): +class Time: def __init__(self, future: Future, dtype: Any) -> None: self.future = future self.dtype = dtype self.value: Union[int, float, None] = None - @property - def __legate_data_interface__(self) -> dict[str, Any]: - result: dict[str, Any] = {"version": 1, "data": dict()} - result["data"]["Legate Timestamp"] = self - return result - @property def type(self) -> Any: return self.dtype diff --git a/tests/unit/legate/util/test_ui.py b/tests/unit/legate/util/test_ui.py index 4603c053c..56c53dfe7 100644 --- a/tests/unit/legate/util/test_ui.py +++ b/tests/unit/legate/util/test_ui.py @@ -24,7 +24,7 @@ from legate.util import colors, ui as m try: - import colorama # type: ignore + import colorama # type: ignore [import] except ImportError: colorama = None From 25c9771909fcd52a00584cc9dc4dc4b17d3db453 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Mon, 28 Nov 2022 19:09:24 -0800 Subject: [PATCH 077/121] Check for cycles involving Futures after runtime shutdown (#496) * Check for cycles involving Futures after runtime shutdown * Separate Future cycle check into separate option --- legate/core/cycle_detector.py | 18 +++++++++++++--- legate/core/runtime.py | 39 ++++++++++++++++++++++++++++------- 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/legate/core/cycle_detector.py b/legate/core/cycle_detector.py index e5dc8d855..14fb73e5e 100644 --- a/legate/core/cycle_detector.py +++ b/legate/core/cycle_detector.py @@ -130,14 +130,26 @@ def _bfs(begin: Any, end: Any, all_ids: Set[int]) -> None: print(f" {_obj_str(begin)}") -def find_cycles() -> bool: - from .store import RegionField +def find_cycles(for_futures: bool) -> bool: + # Avoid importing RegionField when looking for cycles after Runtime + # deletion, because at that point it is impossible to import store.py. + if for_futures: + from ._legion import Future, FutureMap + + def is_interesting(obj: Any) -> bool: + return isinstance(obj, (Future, FutureMap)) + + else: + from .store import RegionField + + def is_interesting(obj: Any) -> bool: + return isinstance(obj, RegionField) found_cycles = False all_objs = gc.get_objects() all_ids = set(id(obj) for obj in all_objs) for obj in all_objs: - if isinstance(obj, RegionField): + if is_interesting(obj): print( f"looking for cycles involving {hex(id(obj))}, " f"of type {type(obj)}" diff --git a/legate/core/runtime.py b/legate/core/runtime.py index cddc49813..48f4391e9 100644 --- a/legate/core/runtime.py +++ b/legate/core/runtime.py @@ -97,12 +97,26 @@ dest="cycle_check", help=( "Check for reference cycles involving RegionField objects on " - "program exit (developer option). Such cycles have the effect " - "of stopping used RegionFields from being repurposed for " - "other Stores, thus increasing memory pressure. By default " - "this mode will miss any cycles already collected by the " - "garbage collector; run gc.disable() at the beginning of the " - "program to avoid this." + "script exit (developer option). When such cycles arise " + "during execution, they stop used RegionFields from getting " + "collected and reused for new Stores, thus increasing memory " + "pressure. By default this check will miss any RegionField " + "cycles the garbage collector collected during execution; " + "run gc.disable() at the beginning of the program to avoid " + "this." + ), + ), + ), + Argument( + "future-leak-check", + ArgSpec( + action="store_true", + default=False, + dest="future_leak_check", + help=( + "Check for reference cycles keeping Future/FutureMap objects " + "alive after Legate runtime exit (developer option). Such " + "leaks can result in Legion runtime shutdown hangs." ), ), ), @@ -1675,9 +1689,16 @@ def raise_exceptions(self) -> None: def _cleanup_legate_runtime() -> None: global runtime + future_leak_check = runtime._args.future_leak_check runtime.destroy() del runtime gc.collect() + if future_leak_check: + print( + "Looking for cycles that are keeping Future/FutureMap objects " + "alive after Legate runtime exit." + ) + find_cycles(True) add_cleanup_item(_cleanup_legate_runtime) @@ -1691,7 +1712,11 @@ def __getattr__(self, attr: str) -> Any: return getattr(self._wrapped_mod, attr) def __del__(self) -> None: - find_cycles() + print( + "Looking for cycles that are stopping RegionFields from getting " + "collected and reused." + ) + find_cycles(False) if runtime._args.cycle_check: From 472abc966df5da0943c7260931695e32ad4494e8 Mon Sep 17 00:00:00 2001 From: Rohan Yadav Date: Tue, 29 Nov 2022 15:44:42 -0800 Subject: [PATCH 078/121] src/core/mapping: adjust indirect copy mapping for GPUs (#499) Signed-off-by: Rohan Yadav Signed-off-by: Rohan Yadav --- src/core/mapping/base_mapper.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index 2f5f5788d..33681abc0 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -1049,6 +1049,16 @@ void BaseMapper::map_copy(const MapperContext ctx, auto store_target = default_store_targets(target_proc.kind()).front(); + // If we're mapping an indirect copy and have data resident in GPU memory, + // map everything to CPU memory, as indirect copies on GPUs are currently + // extremely slow. + auto indirect = + !copy.src_indirect_requirements.empty() || !copy.dst_indirect_requirements.empty(); + if (indirect && target_proc.kind() == Processor::TOC_PROC) { + target_proc = local_cpus.front(); + store_target = StoreTarget::SYSMEM; + } + Copy legate_copy(©, runtime, ctx); std::map*> output_map; From 09b43106c87fcb26428155fbd51e9bf20a00c975 Mon Sep 17 00:00:00 2001 From: Rohan Yadav Date: Tue, 29 Nov 2022 15:44:51 -0800 Subject: [PATCH 079/121] legate/core: guard deletion in reset_{store,storage}_key_partition (#498) Signed-off-by: Rohan Yadav Signed-off-by: Rohan Yadav --- legate/core/runtime.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/legate/core/runtime.py b/legate/core/runtime.py index 48f4391e9..637b09f72 100644 --- a/legate/core/runtime.py +++ b/legate/core/runtime.py @@ -891,7 +891,8 @@ def record_store_key_partition( self._store_key_partitions[store_id] = key_partition def reset_store_key_partition(self, store_id: int) -> None: - del self._store_key_partitions[store_id] + if store_id in self._store_key_partitions: + del self._store_key_partitions[store_id] def find_storage_key_partition( self, storage_id: int, restrictions: tuple[Restriction, ...] @@ -909,7 +910,8 @@ def record_storage_key_partition( self._storage_key_partitions[storage_id] = key_partition def reset_storage_key_partition(self, storage_id: int) -> None: - del self._storage_key_partitions[storage_id] + if storage_id in self._storage_key_partitions: + del self._storage_key_partitions[storage_id] def find_legion_partition( self, storage_id: int, functor: PartitionBase From b85a1dff9f1d6a10cf73d2acccc71f3fe827c619 Mon Sep 17 00:00:00 2001 From: Rohan Yadav Date: Tue, 29 Nov 2022 15:44:57 -0800 Subject: [PATCH 080/121] src/core/mapping: temporary fix to base_mapper for collective branch (#493) This commit adjusts an assertion in the base mapper for use with the collective branch of legion. Signed-off-by: Rohan Yadav Signed-off-by: Rohan Yadav --- src/core/mapping/base_mapper.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index 33681abc0..c48428de4 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -925,7 +925,10 @@ void BaseMapper::legate_select_sources(const MapperContext ctx, } else band_ranking.push_back(std::pair(instance, finder->second)); } - assert(!band_ranking.empty()); + // If there aren't any sources (for example if there are some collective views + // to choose from, not yet in this branch), just return nothing and let the + // runtime pick something for us. + if (band_ranking.empty()) { return; } // Easy case of only one instance if (band_ranking.size() == 1) { ranking.push_back(band_ranking.begin()->first); From f5fac94ab3b230ca05cb25944eaf91872ffaa6c7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 29 Nov 2022 17:50:54 -0800 Subject: [PATCH 081/121] [pre-commit.ci] pre-commit autoupdate (#502) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/PyCQA/flake8: 5.0.4 → 6.0.0](https://github.com/PyCQA/flake8/compare/5.0.4...6.0.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0b75f30a1..4635440a3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ repos: hooks: - id: black - repo: https://github.com/PyCQA/flake8 - rev: 5.0.4 + rev: 6.0.0 hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-clang-format From 3cdd7252cf485f712e8801f08f3878b7437ffc3b Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Tue, 29 Nov 2022 23:36:38 -0800 Subject: [PATCH 082/121] Don't access stream pools unless we're on GPUs (#503) * Don't access stream pools unless we're on GPUs * Fix for the dangling else --- src/core/task/return.cc | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/src/core/task/return.cc b/src/core/task/return.cc index c44365516..e11640616 100644 --- a/src/core/task/return.cc +++ b/src/core/task/return.cc @@ -208,17 +208,19 @@ void ReturnValues::legion_serialize(void* buffer) const // // the size of value i is computed by offsets[i] - (i == 0 ? 0 : offsets[i-1]) -#ifdef LEGATE_USE_CUDA - auto stream = cuda::StreamPool::get_stream_pool().get_stream(); -#endif - + // Special case with a single scalar if (return_values_.size() == 1) { auto& ret = return_values_.front(); -#ifdef LEGATE_USE_CUDA - if (ret.is_device_value()) - CHECK_CUDA(cudaMemcpyAsync(buffer, ret.ptr(), ret.size(), cudaMemcpyDeviceToHost, stream)); - else + if (ret.is_device_value()) { +#ifdef DEBUG_LEGATE + assert(Processor::get_executing_processor().kind() == Processor::Kind::TOC_PROC); #endif + CHECK_CUDA(cudaMemcpyAsync(buffer, + ret.ptr(), + ret.size(), + cudaMemcpyDeviceToHost, + cuda::StreamPool::get_stream_pool().get_stream())); + } else memcpy(buffer, ret.ptr(), ret.size()); return; } @@ -233,15 +235,25 @@ void ReturnValues::legion_serialize(void* buffer) const ptr = ptr + sizeof(uint32_t); } - for (auto ret : return_values_) { - uint32_t size = ret.size(); #ifdef LEGATE_USE_CUDA - if (ret.is_device_value()) - CHECK_CUDA(cudaMemcpyAsync(ptr, ret.ptr(), size, cudaMemcpyDeviceToHost, stream)); - else + if (Processor::get_executing_processor().kind() == Processor::Kind::TOC_PROC) { + auto stream = cuda::StreamPool::get_stream_pool().get_stream(); + for (auto ret : return_values_) { + uint32_t size = ret.size(); + if (ret.is_device_value()) + CHECK_CUDA(cudaMemcpyAsync(ptr, ret.ptr(), size, cudaMemcpyDeviceToHost, stream)); + else + memcpy(ptr, ret.ptr(), size); + ptr += size; + } + } else #endif + { + for (auto ret : return_values_) { + uint32_t size = ret.size(); memcpy(ptr, ret.ptr(), size); - ptr += size; + ptr += size; + } } } From d7d2d8207178e3ec542326e64f4ee578ed12ab73 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Wed, 30 Nov 2022 14:49:53 -0800 Subject: [PATCH 083/121] Add typing-extensions run export to conda build (#504) Add typing-extensions run export to conda build Co-authored-by: Marcin Zalewski --- conda/conda-build/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/conda/conda-build/meta.yaml b/conda/conda-build/meta.yaml index 32d765ead..51f56180d 100644 --- a/conda/conda-build/meta.yaml +++ b/conda/conda-build/meta.yaml @@ -119,6 +119,7 @@ requirements: - llvm-openmp - numpy {{ numpy_version }} - pyarrow {{ pyarrow_version }} + - typing_extensions {% if gpu_enabled_bool %} - cuda-cudart >={{ cuda_version }} - nccl From 4ca97f7ecb620f198a6aac1a83daeee391756fc1 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Thu, 1 Dec 2022 10:10:34 -0800 Subject: [PATCH 084/121] Add CTK 11.8 to environment generation script (#505) --- scripts/generate-conda-envs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/generate-conda-envs.py b/scripts/generate-conda-envs.py index 494b408ba..dc92d373d 100755 --- a/scripts/generate-conda-envs.py +++ b/scripts/generate-conda-envs.py @@ -221,6 +221,7 @@ def filename(self) -> str: "11.5", "11.6", "11.7", + "11.8", ) OS_NAMES: Tuple[OSType, ...] = ("linux", "osx") From 0a556f23f7ed53cd3d54efcc386ff0bb85776eda Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Thu, 1 Dec 2022 14:39:36 -0800 Subject: [PATCH 085/121] Add machinery for legate.core base tests (#500) * Add machinery for legate.core base tests * fixes to tests * skip main plumbing tests for now --- .github/workflows/ci.yml | 66 ++++++++++++++++++++++++- tests/unit/legate/driver/test_main.py | 6 ++- tests/unit/legate/jupyter/test_main.py | 8 ++- tests/unit/legate/tester/test_config.py | 3 +- tests/unit/legate/util/test_system.py | 6 --- 5 files changed, 78 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 11b700eab..94373c39a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,5 +1,5 @@ name: Build legate.core -on: +on: push: branches-ignore: - gh-pages # deployment target branch (this workflow should not exist on that branch anyway) @@ -63,4 +63,66 @@ jobs: uses: actions/upload-artifact@v3 with: name: build-log - path: ./**/${{ env.COMMIT }}-build.log.gpg \ No newline at end of file + path: ./**/${{ env.COMMIT }}-build.log.gpg + test: + if: ${{ github.repository == 'nv-legate/legate.core' }} + runs-on: self-hosted + needs: build + strategy: + fail-fast: false + matrix: + include: + - {name: mypy, options: mypy, log: mypy} + - {name: pytest unit tests, options: unit, log: pytest} + name: ${{ matrix.name }} + steps: + - name: Dump GitHub context + env: + GITHUB_CONTEXT: ${{ toJSON(github) }} + run: echo "$GITHUB_CONTEXT" + - name: Dump job context + env: + JOB_CONTEXT: ${{ toJSON(job) }} + run: echo "$JOB_CONTEXT" + - name: Dump steps context + env: + STEPS_CONTEXT: ${{ toJSON(steps) }} + run: echo "$STEPS_CONTEXT" + - name: Dump runner context + env: + RUNNER_CONTEXT: ${{ toJSON(runner) }} + run: echo "$RUNNER_CONTEXT" + - name: Dump strategy context + env: + STRATEGY_CONTEXT: ${{ toJSON(strategy) }} + run: echo "$STRATEGY_CONTEXT" + - name: Dump matrix context + env: + MATRIX_CONTEXT: ${{ toJSON(matrix) }} + run: echo "$MATRIX_CONTEXT" + - name: Prepare + run: | + /data/github-runner/legate-bin/setup.sh + cd legate-ci/github-ci/legate.core + if [[ ! -d ngc-artifacts ]] + then + mkdir ngc-artifacts + else + rm -rf ngc-artifacts/* + fi + - name: Test + run: | + cd legate-ci/github-ci/legate.core + ./test.sh ${{ matrix.options }} > ${COMMIT}-test-${{ matrix.log }}.log 2>&1 + - name: Process output + if: always() + run: | + cd legate-ci/github-ci/legate.core + /data/github-runner/legate-bin/encrypt.sh ${COMMIT}-test-${{ matrix.log }}.log + cat *artifacts/*/* + - name: Upload Log + if: always() + uses: actions/upload-artifact@v3 + with: + name: test-${{ matrix.log }}-log + path: ./**/${{ env.COMMIT }}-test-${{ matrix.log }}.log.gpg diff --git a/tests/unit/legate/driver/test_main.py b/tests/unit/legate/driver/test_main.py index a5537afba..c0dfd07d5 100644 --- a/tests/unit/legate/driver/test_main.py +++ b/tests/unit/legate/driver/test_main.py @@ -16,6 +16,7 @@ import sys +import pytest from pytest_mock import MockerFixture import legate.driver as m @@ -27,6 +28,9 @@ # all the expected plumbing is hooked up as it is supposed to be +# TODO: this test with the fake argv path does not work for the way +# legate is installed in CI, so skip for now. +@pytest.mark.skip def test_main(mocker: MockerFixture) -> None: import legate.driver.config import legate.driver.driver @@ -36,7 +40,7 @@ def test_main(mocker: MockerFixture) -> None: system_spy = mocker.spy(legate.util.system.System, "__init__") driver_spy = mocker.spy(legate.driver.driver.Driver, "__init__") mocker.patch("legate.driver.driver.Driver.run", return_value=123) - mocker.patch.object(sys, "argv", ["foo", "bar"]) + mocker.patch.object(sys, "argv", ["/some/path/foo", "bar"]) result = m.main() diff --git a/tests/unit/legate/jupyter/test_main.py b/tests/unit/legate/jupyter/test_main.py index 0e0159dc9..a92b071c7 100644 --- a/tests/unit/legate/jupyter/test_main.py +++ b/tests/unit/legate/jupyter/test_main.py @@ -16,6 +16,7 @@ import sys +import pytest from pytest_mock import MockerFixture import legate.jupyter as m @@ -27,6 +28,9 @@ # all the expected plumbing is hooked up as it is supposed to be +# TODO: this test with the fake argv path does not work for the way +# legate is installed in CI, so skip for now. +@pytest.mark.skip def test_main(mocker: MockerFixture) -> None: import legate.driver.driver import legate.jupyter.config @@ -37,7 +41,9 @@ def test_main(mocker: MockerFixture) -> None: driver_spy = mocker.spy(legate.driver.driver.Driver, "__init__") generate_spy = mocker.spy(legate.jupyter.kernel, "generate_kernel_spec") install_mock = mocker.patch("legate.jupyter.kernel.install_kernel_spec") - mocker.patch.object(sys, "argv", ["legate-jupyter", "--name", "foo"]) + mocker.patch.object( + sys, "argv", ["/some/path/legate-jupyter", "--name", "foo"] + ) m.main() diff --git a/tests/unit/legate/tester/test_config.py b/tests/unit/legate/tester/test_config.py index f0e351caf..ac7b30f07 100644 --- a/tests/unit/legate/tester/test_config.py +++ b/tests/unit/legate/tester/test_config.py @@ -17,6 +17,7 @@ """ from __future__ import annotations +import os from pathlib import Path, PurePath import pytest @@ -64,7 +65,7 @@ def test_default_init(self) -> None: assert c.legate_dir is None assert c.extra_args == [] - assert c.root_dir == PurePath(m.__file__).parents[2] + assert c.root_dir == PurePath(os.getcwd()) # TODO (bv) restore when generalized # assert len(c.test_files) > 0 diff --git a/tests/unit/legate/util/test_system.py b/tests/unit/legate/util/test_system.py index 38db9cc0b..0115e417f 100644 --- a/tests/unit/legate/util/test_system.py +++ b/tests/unit/legate/util/test_system.py @@ -98,12 +98,6 @@ def test_cpus(self) -> None: assert len(cpus) > 0 assert all(len(cpu.ids) > 0 for cpu in cpus) - @pytest.mark.skipif(platform.system() != "Linux", reason="Linux test") - def test_gpus_linux(self) -> None: - s = m.System() - # can't really assume / test much here - s.gpus - @pytest.mark.skipif(platform.system() != "Darwin", reason="OSX test") def test_gpus_osx(self) -> None: s = m.System() From 0c68b4c9868cec53a3fb612269ea8282d7144b32 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Thu, 1 Dec 2022 15:28:02 -0800 Subject: [PATCH 086/121] Fix non-CUDA build (#506) --- src/core/task/return.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/core/task/return.cc b/src/core/task/return.cc index e11640616..98c70419a 100644 --- a/src/core/task/return.cc +++ b/src/core/task/return.cc @@ -211,6 +211,7 @@ void ReturnValues::legion_serialize(void* buffer) const // Special case with a single scalar if (return_values_.size() == 1) { auto& ret = return_values_.front(); +#ifdef LEGATE_USE_CUDA if (ret.is_device_value()) { #ifdef DEBUG_LEGATE assert(Processor::get_executing_processor().kind() == Processor::Kind::TOC_PROC); @@ -221,6 +222,7 @@ void ReturnValues::legion_serialize(void* buffer) const cudaMemcpyDeviceToHost, cuda::StreamPool::get_stream_pool().get_stream())); } else +#endif memcpy(buffer, ret.ptr(), ret.size()); return; } From 8a9d35b4e58dbee74bfbc38028568baf30a4e4ba Mon Sep 17 00:00:00 2001 From: Rohan Yadav Date: Thu, 1 Dec 2022 16:27:21 -0800 Subject: [PATCH 087/121] legate/core: reset storage key partition in Store::reset_key_partition (#507) Signed-off-by: Rohan Yadav Signed-off-by: Rohan Yadav --- legate/core/store.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/legate/core/store.py b/legate/core/store.py index 2f4e2f9a7..0d55cbf62 100644 --- a/legate/core/store.py +++ b/legate/core/store.py @@ -1249,6 +1249,8 @@ def set_key_partition(self, partition: PartitionBase) -> None: def reset_key_partition(self) -> None: runtime.partition_manager.reset_store_key_partition(self._unique_id) + # Also reset the storage's key partition. + self._storage.reset_key_partition() def compute_key_partition( self, restrictions: tuple[Restriction, ...] From ab2f34fc0d354dfa1f627fcd857bad49303241f8 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Fri, 2 Dec 2022 12:41:40 -0800 Subject: [PATCH 088/121] Don't nocr for single node (#508) * Don't nocr for single node * handle multi-rank more generally --- legate/driver/config.py | 6 +++- tests/unit/legate/driver/test_command.py | 37 +++++++++++++++++++++++- tests/unit/legate/driver/test_config.py | 25 ++++++++++++++-- 3 files changed, 64 insertions(+), 4 deletions(-) diff --git a/legate/driver/config.py b/legate/driver/config.py index 6a526214f..0be36f959 100644 --- a/legate/driver/config.py +++ b/legate/driver/config.py @@ -206,7 +206,11 @@ def console(self) -> bool: return not any(opt.endswith(".py") for opt in self.user_opts) def _fixup_nocr(self, args: Namespace) -> None: - if self.console and not args.not_control_replicable: + # this is slightly duplicative of MultiNode.ranks property, but fixup + # checks happen before sub-configs are initialized from args + ranks = int(args.nodes) * int(args.ranks_per_node) + + if self.console and not args.not_control_replicable and ranks > 1: print(warn("Disabling control replication for interactive run")) args.not_control_replicable = True diff --git a/tests/unit/legate/driver/test_command.py b/tests/unit/legate/driver/test_command.py index 1c2aa5355..38f247ea3 100644 --- a/tests/unit/legate/driver/test_command.py +++ b/tests/unit/legate/driver/test_command.py @@ -519,11 +519,46 @@ def test_default(self, genobjs: GenObjs) -> None: assert result == () - def test_console(self, genobjs: GenObjs) -> None: + def test_console_single_node(self, genobjs: GenObjs) -> None: config, system, launcher = genobjs([], fake_module=None) result = m.cmd_nocr(config, system, launcher) + assert result == () + + @pytest.mark.parametrize("rank_var", RANK_ENV_VARS) + @pytest.mark.parametrize("rank", ("0", "1", "2")) + def test_console_multi_node( + self, genobjs: GenObjs, rank: str, rank_var: dict[str, str] + ) -> None: + config, system, launcher = genobjs( + # passing --nodes is not usually necessary for genobjs but we + # are probing a "fixup" check that inspect args directly + ["--nodes", "2"], + multi_rank=(2, 1), + rank_env={rank_var: rank}, + fake_module=None, + ) + + result = m.cmd_nocr(config, system, launcher) + + assert result == ("--nocr",) + + @pytest.mark.parametrize("rank_var", RANK_ENV_VARS) + def test_console_multi_rank( + self, genobjs: GenObjs, rank_var: dict[str, str] + ) -> None: + config, system, launcher = genobjs( + # passing --ranks-per-node is not usually necessary for genobjs + # but we are probing a "fixup" check that inspect args directly + ["--ranks-per-node", "2"], + multi_rank=(1, 2), + rank_env={rank_var: "0"}, + fake_module=None, + ) + + result = m.cmd_nocr(config, system, launcher) + assert result == ("--nocr",) def test_with_option(self, genobjs: GenObjs) -> None: diff --git a/tests/unit/legate/driver/test_config.py b/tests/unit/legate/driver/test_config.py index 483719a6e..91b6f5056 100644 --- a/tests/unit/legate/driver/test_config.py +++ b/tests/unit/legate/driver/test_config.py @@ -284,7 +284,7 @@ def test_default_init(self) -> None: assert c.multi_node == m.MultiNode( nodes=defaults.LEGATE_NODES, ranks_per_node=defaults.LEGATE_RANKS_PER_NODE, - not_control_replicable=True, + not_control_replicable=False, launcher="none", launcher_extra=[], ) @@ -373,9 +373,30 @@ def test_arg_conversions(self, mocker: MockerFixture) -> None: ] ) - def test_nocr_fixup(self, capsys: Capsys) -> None: + def test_nocr_fixup_default_single_node(self, capsys: Capsys) -> None: c = m.Config(["legate"]) + assert c.console + assert not c.multi_node.not_control_replicable + + out, _ = capsys.readouterr() + assert scrub(out).strip() == "" + + def test_nocr_fixup_multi_node(self, capsys: Capsys) -> None: + c = m.Config(["legate", "--nodes", "2"]) + + assert c.console + assert c.multi_node.not_control_replicable + + out, _ = capsys.readouterr() + assert ( + scrub(out).strip() + == "WARNING: Disabling control replication for interactive run" + ) + + def test_nocr_fixup_multi_rank(self, capsys: Capsys) -> None: + c = m.Config(["legate", "--ranks-per-node", "2"]) + assert c.console assert c.multi_node.not_control_replicable From 363c478abda48aee0bcddfcf0e24beb1f7361439 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Mon, 12 Dec 2022 11:43:19 -0800 Subject: [PATCH 089/121] Fix for 509 (#510) * Retry constraint solving with fewer key partitions when the first round makes the operation sequential * Add comments to the new code --- legate/core/solver.py | 158 +++++++++++++++++++++++++++++------------- 1 file changed, 110 insertions(+), 48 deletions(-) diff --git a/legate/core/solver.py b/legate/core/solver.py index d34f0d62b..8efe88866 100644 --- a/legate/core/solver.py +++ b/legate/core/solver.py @@ -14,7 +14,7 @@ # from __future__ import annotations -from typing import TYPE_CHECKING, Generic, List, Optional, TypeVar +from typing import TYPE_CHECKING, Generic, List, Optional, Tuple, TypeVar from . import FieldSpace, Future, Rect from .constraints import Alignment, Broadcast, Containment, PartSym @@ -350,13 +350,16 @@ def compute_launch_shape( partitions: dict[PartSym, PartitionBase], all_outputs: set[Store], unbound_ndim: Optional[int], - ) -> Optional[Shape]: + # The Boolean return value denotes whether the computed launch shape + # is "final". If it's True, there's no room for the solver to improve + # the quality of parallelization. + ) -> Tuple[Optional[Shape], bool]: # We filter out the cases where any of the outputs is assigned # to replication, in which case the operation must be performed # sequentially for unknown, part in partitions.items(): if unknown.store in all_outputs and part is REPLICATE: - return None + return None, True # If we're here, this means that replicated stores are safe to access # in parallel, so we filter those out to determine the launch domain @@ -364,7 +367,7 @@ def compute_launch_shape( # If all stores are replicated, we can't parallelize the operation if len(parts) == 0: - return None + return None, True # Here we check if all partitions agree on the color shape must_be_1d_launch = False @@ -382,7 +385,7 @@ def compute_launch_shape( # we can't use a 1-D launch domain, hence falling back to # a sequential launch if unbound_ndim is not None and unbound_ndim != 1: - return None + return None, True # If all color spaces don't have the same number of colors, # it means some inputs are much smaller than the others @@ -393,15 +396,80 @@ def compute_launch_shape( assert part.color_shape is not None volumes.add(part.color_shape.volume()) if len(volumes) > 1: - return None + return None, False else: - return Shape(volumes) + return Shape(volumes), True # If there is an unbound store, the store's dimensionality must be # the same as that of the launch domain elif unbound_ndim is None or unbound_ndim == launch_shape.ndim: - return launch_shape + return launch_shape, True else: - return None + return None, True + + def _solve_store_constraints( + self, + partitions: dict[PartSym, PartitionBase], + unknowns: list[PartSym], + dependent: dict[PartSym, Expr], + all_restrictions: dict[PartSym, Restrictions], + constraints: EqClass[PartSym], + must_be_even: OrderedSet[PartSym], + ) -> Tuple[dict[PartSym, PartitionBase], set[PartSym]]: + result = partitions.copy() + key_parts: set[PartSym] = set() + + for unknown in unknowns: + if unknown in result: + continue + elif unknown in dependent: + continue + + store = unknown.store + restrictions = all_restrictions[unknown] + cls = constraints.find(unknown) + + partition = store.compute_key_partition(restrictions) + if not partition.even and len(cls) > 1: + partition, unknown = self.maybe_find_alternative_key_partition( + partition, + unknown, + cls, + restrictions, + must_be_even, + ) + key_parts.add(unknown) + + for to_align in cls: + if to_align in result: + continue + result[to_align] = partition + + for rhs, lhs in dependent.items(): + expr = lhs.subst(result).reduce() + if TYPE_CHECKING: + assert isinstance(expr, Lit) + result[rhs] = expr._part + + return result, key_parts + + @staticmethod + def _reset_less_optimal_partitions( + partitions: dict[PartSym, PartitionBase], + ) -> bool: + valid_parts = [ + (part.color_shape.volume(), unknown) + for unknown, part in partitions.items() + if part.color_shape is not None + ] + max_dop = max(dop for dop, _ in valid_parts) + reset_any = False + for dop, unknown in valid_parts: + if dop == max_dop: + continue + reset_any = True + unknown.store.reset_key_partition() + + return reset_any def partition_stores(self) -> Strategy: unknowns: OrderedSet[PartSym] = OrderedSet() @@ -476,45 +544,39 @@ def cost(unknown: PartSym) -> tuple[int, bool]: not store.has_key_partition(all_restrictions[unknown]), ) - sorted_unknowns = sorted(unknowns, key=cost) - - key_parts = set() - for unknown in sorted_unknowns: - if unknown in partitions: - continue - elif unknown in dependent: - continue - - store = unknown.store - restrictions = all_restrictions[unknown] - cls = constraints.find(unknown) - - partition = store.compute_key_partition(restrictions) - if not partition.even and len(cls) > 1: - partition, unknown = self.maybe_find_alternative_key_partition( - partition, - unknown, - cls, - restrictions, - must_be_even, - ) - key_parts.add(unknown) + result: dict[PartSym, PartitionBase] + key_parts: set[PartSym] + launch_shape: Optional[Shape] + + can_retry = True + while True: + result, key_parts = self._solve_store_constraints( + partitions, + sorted(unknowns, key=cost), + dependent, + all_restrictions, + constraints, + must_be_even, + ) - for to_align in cls: - if to_align in partitions: + launch_shape, done = self.compute_launch_shape( + result, all_outputs, unbound_ndim + ) + # When partitions have different numbers of chunks, the solver + # normally decides to serialize the operation, as there's no + # obvious mapping between the partitions. However, it is + # sometimes possible to recover parallelism by searching for + # an alternative solution to the given set of partitioning + # constraints, especially when some of the stores have cached key + # partitions that are not computed for themselves but copied from + # others due to alignments. + if can_retry and not done: + # We only retry once because resetting the cached key + # partitions followed recomputing key partitions is + # idempotent. + can_retry = False + if self._reset_less_optimal_partitions(result): continue - partitions[to_align] = partition - - for rhs, lhs in dependent.items(): - expr = lhs.subst(partitions).reduce() - if TYPE_CHECKING: - assert isinstance(expr, Lit) - partitions[rhs] = expr._part - - launch_shape = self.compute_launch_shape( - partitions, all_outputs, unbound_ndim - ) + break - return Strategy( - launch_shape, partitions, fspaces, key_parts, constraints - ) + return Strategy(launch_shape, result, fspaces, key_parts, constraints) From bce684e6d7b651b3fbe6be89badeea8877c56067 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 12 Dec 2022 22:04:09 -0800 Subject: [PATCH 090/121] [pre-commit.ci] pre-commit autoupdate (#513) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/PyCQA/isort: 5.10.1 → 5.11.1](https://github.com/PyCQA/isort/compare/5.10.1...5.11.1) - [github.com/psf/black: 22.10.0 → 22.12.0](https://github.com/psf/black/compare/22.10.0...22.12.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4635440a3..bb12ddb99 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,10 +1,10 @@ repos: - repo: https://github.com/PyCQA/isort - rev: 5.10.1 + rev: 5.11.1 hooks: - id: isort - repo: https://github.com/psf/black - rev: 22.10.0 + rev: 22.12.0 hooks: - id: black - repo: https://github.com/PyCQA/flake8 From 4d6fae5a008238324669487212c5b9a3583130d4 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Thu, 15 Dec 2022 10:46:47 -0800 Subject: [PATCH 091/121] fix uninstall instructions for jupyter extension (#514) --- legate/jupyter/kernel.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/legate/jupyter/kernel.py b/legate/jupyter/kernel.py index e71604ed8..daadae9ff 100644 --- a/legate/jupyter/kernel.py +++ b/legate/jupyter/kernel.py @@ -90,9 +90,11 @@ def install_kernel_spec(spec: KernelSpec, config: Config) -> None: except NoSuchKernel: pass else: + # inexplicably, install_kernel_spec calls lower on the supplied kernel + # name before using, so we need to call lower for this advice to work msg = error( f"kernel spec {spec_name!r} already exists. Remove it by " - f"running 'jupyter kernelspec uninstall {spec_name!r}, " + f"running: 'jupyter kernelspec uninstall {spec_name.lower()}', " "or choose a new kernel name." ) print(msg) From dd8b87f22d1773509f5895696f2460c7b54102ff Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Fri, 16 Dec 2022 14:58:10 -0800 Subject: [PATCH 092/121] Change the default legion branch to collective for now (#515) --- install.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install.py b/install.py index fdff8e692..e3dfdef1c 100755 --- a/install.py +++ b/install.py @@ -724,7 +724,7 @@ def driver(): "--legion-branch", dest="legion_branch", required=False, - default="control_replication", + default="collective", help="Legion branch to build Legate with.", ) args, unknown = parser.parse_known_args() From 080aa750cb1d9b2d6388417b63026cca78195934 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Sat, 17 Dec 2022 02:25:54 +0200 Subject: [PATCH 093/121] Improve build documentation (#517) * Install ninja on dev envs, for better build error messages * Cosmetic changes * Add documentation for some more dependencies * Put the basic build workflow clearly on the top * Typo --- BUILD.md | 201 +++++++++++++++++++-------------- README.md | 4 - scripts/generate-conda-envs.py | 1 + 3 files changed, 118 insertions(+), 88 deletions(-) diff --git a/BUILD.md b/BUILD.md index f14dd8710..2191c2c7b 100644 --- a/BUILD.md +++ b/BUILD.md @@ -15,13 +15,12 @@ limitations under the License. --> -# TL;DR +# Basic build -1) Check if there are specialized scripts available for your cluster at [nv-legate/quickstart](https://github.com/nv-legate/quickstart). -2) [Install dependencies from conda](#getting-dependencies-through-conda) -3) [Build using install.py](#using-installpy) - -# Getting dependencies +If you are building on a cluster, first check if there are specialized scripts +available for your cluster at +[nv-legate/quickstart](https://github.com/nv-legate/quickstart). Even if your +specific cluster is not covered, you may be able to adapt an existing workflow. ## Getting dependencies through conda @@ -40,7 +39,7 @@ $ ./scripts/generate-conda-envs.py --python 3.10 --ctk 11.7 --os linux --compile Run this script with `-h` to see all available configuration options for the generated environment file (e.g. all the supported Python versions). See the -[Notable Dependencies](#notable-dependencies) section for more details. +[Dependencies](#dependency-listing) section for more details. Once you have this environment file, you can install the required packages by creating a new conda environment: @@ -55,20 +54,77 @@ or by updating an existing environment: conda env update -f .yaml ``` -## Notable dependencies +## Building through install.py + +The Legate Core repository comes with a helper `install.py` script in the +top-level directory, that will build the C++ parts of the library and install +the C++ and Python components under the currently active Python environment. + +To add GPU support, use the `--cuda` flag: + +```shell +./install.py --cuda +``` + +You can specify the CUDA toolkit directory and the CUDA architecture you want to +target using the `--with-cuda` and `--arch` flags, e.g.: + +```shell +./install.py --cuda --with-cuda /usr/local/cuda/ --arch ampere +``` + +By default the script relies on CMake's auto-detection for these settings. +CMake will first search the currently active Python/conda environment +for dependencies, then any common system-wide installation directories (e.g. +`/usr/lib`). If a dependency cannot be found but is publicly available in source +form (e.g. OpenBLAS), cmake will fetch and build it automatically. You can +override this search by providing an install location for any dependency +explicitly, using a `--with-` flag, e.g. `--with-nccl` and +`--with-openblas`. + +For multi-node execution Legate uses [GASNet](https://gasnet.lbl.gov/) which can be +requested using the `--network gasnet1` or `--network gasnetex` flag. By default +GASNet will be automatically downloaded and built, but if you have an existing +installation then you can inform the install script using the `--with-gasnet` flag. +You also need to specify the interconnect network of the target machine using the +`--conduit` flag. + +For example this would be an installation for a +[DGX SuperPOD](https://www.nvidia.com/en-us/data-center/dgx-superpod/): + +```shell +./install.py --network gasnet1 --conduit ibv --cuda --arch ampere +``` + +Alternatively, here is an install line for the +[Piz-Daint](https://www.cscs.ch/computers/dismissed/piz-daint-piz-dora/) supercomputer: -### OS (`--os` option) +```shell +./install.py --network gasnet1 --conduit aries --cuda --arch pascal +``` + +To see all available configuration options, run with the `--help` flag: + +```shell +./install.py --help +``` + +# Advanced topics + +## Dependency listing + +### OS (`--os` flag) Legate has been tested on Linux and MacOS, although only a few flavors of Linux such as Ubuntu have been thoroughly tested. There is currently no support for Windows. -### Python >= 3.8 (`--python` option) +### Python >= 3.8 (`--python` flag) In terms of Python compatibility, Legate *roughly* follows the timeline outlined in [NEP 29](https://numpy.org/neps/nep-0029-deprecation_policy.html). -### C++17 compatible compiler (`--compilers` option) +### C++17 compatible compiler (`--compilers` flag) For example: g++, clang, or nvc++. When creating an environment using the `--compilers` flag, an appropriate compiler for the current system will be @@ -91,7 +147,7 @@ stubs, are not distributed through conda. These must instead be installed using [system-level packages](https://developer.nvidia.com/cuda-downloads). Independent of the system-level CUDA installation, conda will need to install an -environment-local copy of the CUDA toolkit (which is what the `--ctk` option +environment-local copy of the CUDA toolkit (which is what the `--ctk` flag controls). To avoid versioning conflicts it is safest to match the version of CUDA installed system-wide on your machine @@ -100,12 +156,51 @@ architectures. You can use Legate with Pascal GPUs as well, but there could be issues due to lack of independent thread scheduling. Please report any such issues on GitHub. -### Fortran compiler (optional) +### CUDA Libraries (optional) + +Only necessary if you wish to run with Nvidia GPUs. + +The following libraries are included automatically in CUDA-enabled environment +files: + +- `cutensor` +- `nccl` + +If you wish to provide alternative installations for these, then you can remove +them from the environment file and pass the corresponding `--with-` flag +to `install.py`. + +### Build tools + +The following tools are used for building Legate, and are automatically included +in the environment file: -Only necessary if you wish to build OpenBLAS from source. +- `cmake` +- `git` +- `make` +- `ninja` (this is optional, but produces more informative build output) +- `scikit-build` -Not included by default in the generated conda environment files; install -`fortran-compiler` from `conda-forge` if you need it. +### OpenBLAS + +This library is automatically pulled from conda. If you wish to provide an +alternative installation, then you can manually remove `openblas` from the +generated environment file and pass `--with-openblas` to `install.py`. + +Note that you will need to get a Fortran compiler before you can build OpenBLAS +from source, e.g. by pulling `fortran-compiler` from `conda-forge`. + +If you wish to compile Legate with OpenMP support, then you need a build of +OpenBLAS configured with the following options: + +- `USE_THREAD=1` +- `USE_OPENMP=1` +- `NUM_PARALLEL=32` (or at least as many as the NUMA domains on the target + machine) -- The `NUM_PARALLEL` flag defines how many instances of OpenBLAS's + calculation API can run in parallel. Legate will typically instantiate a + separate OpenMP group per NUMA domain, and each group can launch independent + BLAS work. If `NUM_PARALLEL` is not high enough, some of this parallel work + will be serialized. ### Numactl (optional) @@ -114,7 +209,7 @@ Required to support CPU and memory binding in the Legate launcher. Not available on conda; typically available through the system-level package manager. -### MPI (`--openmpi` option; optional) +### MPI (`--openmpi` flag; optional) Only necessary if you wish to run on multiple nodes. @@ -139,8 +234,9 @@ If using UCX, a build of UCX configured with `--enable-mt` is required. If you do not wish to use conda for some (or all) of the dependencies, you can remove the corresponding entries from the environment file before passing it to -conda. See [the `install.py` section](#using-installpy) for instructions on how -to provide alternative locations for these dependencies to the build process. +conda. See [the `install.py` section](#building-through-installpy) for +instructions on how to provide alternative locations for these dependencies to +the build process. Note that this is likely to result in conflicts between conda-provided and system-provided libraries. @@ -176,64 +272,7 @@ This way you can make sure that the (typically more recent) conda version of any common library will be preferred over the system-wide one, no matter which component requests it first. -# Building for Users - -## Using install.py - -The Legate Core repository comes with a helper `install.py` script in the -top-level directory, that will build the C++ parts of the library and install -the C++ and Python components under the currently active Python environment. - -To add GPU support, use the `--cuda` flag: - -```shell -./install.py --cuda -``` - -You can specify the CUDA toolkit directory and the CUDA architecture you want to -target using the `--with-cuda` and `--arch` flags, e.g.: - -```shell -./install.py --cuda --with-cuda /usr/local/cuda/ --arch ampere -``` - -By default the script relies on CMake's auto-detection for these settings. -CMake will first search the currently active Python/conda environment -for dependencies, then any common system-wide installation directories (e.g. -`/usr/lib`). If a dependency cannot be found but is publicly available in source -form (e.g. OpenBLAS), cmake will fetch and build it automatically. You can -override this search by providing an install location for any dependency -explicitly, using a `--with-dep` flag, e.g. `--with-nccl` and -`--with-openblas`. - -For multi-node execution Legate uses [GASNet](https://gasnet.lbl.gov/) which can be -requested using the `--network gasnet1` or `--network gasnetex` flag. By default -GASNet will be automatically downloaded and built, but if you have an existing -installation then you can inform the install script using the `--with-gasnet` flag. -You also need to specify the interconnect network of the target machine using the -`--conduit` flag. - -For example this would be an installation for a -[DGX SuperPOD](https://www.nvidia.com/en-us/data-center/dgx-superpod/): - -```shell -./install.py --network gasnet1 --conduit ibv --cuda --arch ampere -``` - -Alternatively, here is an install line for the -[Piz-Daint](https://www.cscs.ch/computers/dismissed/piz-daint-piz-dora/) supercomputer: - -```shell -./install.py --network gasnet1 --conduit aries --cuda --arch pascal -``` - -To see all available configuration options, run with the `--help` flag: - -```shell -./install.py --help -``` - -## Using pip +## Building through pip Legate Core is not yet registered in a standard pip repository. However, users can still use the pip installer to build and install Legate Core. The following @@ -250,8 +289,6 @@ or $ python3 -m pip install . ``` -## Advanced Customization - Legate relies on CMake to select its toolchain and build flags. Users can set the environment variables `CXX` or `CXXFLAGS` prior to building to override the CMake defaults. @@ -271,9 +308,7 @@ An alternative syntax using `setup.py` with `scikit-build` is $ python setup.py install -- -DLegion_USE_CUDA:BOOL=ON ``` -# Building for Developers - -## Overview +## Building through pip & cmake pip uses [scikit-build](https://scikit-build.readthedocs.io/en/latest/) in `setup.py` to drive the build and installation. A `pip install` will trigger three general actions: @@ -288,8 +323,6 @@ After building the C++ libraries, the `pip install` can be done in "editable" mo This configures the Python site packages to import the Python source tree directly. The Python source can then be edited and used directly for testing without requiring another `pip install`. -## Example - There are several examples in the `scripts` folder. We walk through the steps in `build-separately-no-install.sh` here. diff --git a/README.md b/README.md index f713191e9..05aa5b159 100644 --- a/README.md +++ b/README.md @@ -226,10 +226,6 @@ conda install -c nvidia -c conda-forge -c legate legate-core The conda package is compatible with CUDA >= 11.4 (CUDA driver version >= r470), and Volta or later GPU architectures. -Docker image build scripts, as well as specialized -install scripts for supported clusters are available on the -[quickstart](https://github.com/nv-legate/quickstart) repo. - See [BUILD.md](BUILD.md) for instructions on building Legate Core from source. ## How Do I Use Legate? diff --git a/scripts/generate-conda-envs.py b/scripts/generate-conda-envs.py index dc92d373d..9798d7680 100755 --- a/scripts/generate-conda-envs.py +++ b/scripts/generate-conda-envs.py @@ -85,6 +85,7 @@ def conda(self) -> Reqs: "cmake>=3.24,!=3.25.0", "git", "make", + "ninja", "scikit-build>=0.13.1", "setuptools>=60", "zlib", From b93b57a115286b5be96da33f85e4002aadcea811 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Mon, 19 Dec 2022 15:13:11 -0800 Subject: [PATCH 094/121] Map futures to zero-copy memory on GPUs (#518) --- src/core/mapping/base_mapper.cc | 6 ++++-- src/core/mapping/core_mapper.cc | 11 +---------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index c48428de4..bc80521bf 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -476,14 +476,16 @@ void BaseMapper::map_task(const MapperContext ctx, auto default_option = options.front(); auto generate_default_mappings = [&](auto& stores, bool exact) { for (auto& store : stores) { - auto mapping = StoreMapping::default_mapping(store, default_option, exact); if (store.is_future()) { + auto option = default_option == StoreTarget::FBMEM ? StoreTarget::ZCMEM : default_option; + auto mapping = StoreMapping::default_mapping(store, option, exact); auto fut_idx = store.future_index(); if (mapped_futures.find(fut_idx) != mapped_futures.end()) continue; mapped_futures.insert(fut_idx); for_futures.push_back(std::move(mapping)); } else { - auto key = store.unique_region_field_id(); + auto mapping = StoreMapping::default_mapping(store, default_option, exact); + auto key = store.unique_region_field_id(); if (mapped_regions.find(key) != mapped_regions.end()) continue; mapped_regions.insert(key); if (store.unbound()) diff --git a/src/core/mapping/core_mapper.cc b/src/core/mapping/core_mapper.cc index 7f7fbbaf3..89a5b4147 100644 --- a/src/core/mapping/core_mapper.cc +++ b/src/core/mapping/core_mapper.cc @@ -350,19 +350,10 @@ void CoreMapper::map_future_map_reduction(const MapperContext ctx, { output.serdez_upper_bound = LEGATE_MAX_SIZE_SCALAR_RETURN; -#ifdef LEGATE_MAP_FUTURE_MAP_REDUCTIONS_TO_GPU - // TODO: It's been reported that blindly mapping target instances of future map reductions - // to framebuffers hurts performance. Until we find a better mapping policy, we guard - // the current policy with a macro. - - // If this was joining exceptions, we don't want to put instances anywhere - // other than the system memory because they need serdez - if (input.tag == LEGATE_CORE_JOIN_EXCEPTION_TAG) return; if (!local_gpus.empty()) - for (auto& pair : local_frame_buffers) output.destination_memories.push_back(pair.second); + output.destination_memories.push_back(local_zerocopy_memory); else if (has_socket_mem) for (auto& pair : local_numa_domains) output.destination_memories.push_back(pair.second); -#endif } void CoreMapper::select_tunable_value(const MapperContext ctx, From 8381c518b30aec90af9b18564e535851c8f9370a Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Tue, 20 Dec 2022 12:07:30 -0800 Subject: [PATCH 095/121] Implementin logic for reuse of reduction instances (#511) Implementing logic for reuse of reduction instances --- src/core/mapping/base_mapper.cc | 45 +++++++++++++--- src/core/mapping/base_mapper.h | 2 + src/core/mapping/instance_manager.cc | 71 ++++++++++++++++++++++++ src/core/mapping/instance_manager.h | 81 +++++++++++++++++++++++++--- 4 files changed, 185 insertions(+), 14 deletions(-) diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index bc80521bf..aedaeceb3 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -84,7 +84,8 @@ BaseMapper::BaseMapper(Runtime* rt, Machine m, const LibraryContext& ctx) total_nodes(get_total_nodes(m)), mapper_name(std::move(create_name(local_node))), logger(create_logger_name().c_str()), - local_instances(InstanceManager::get_instance_manager()) + local_instances(InstanceManager::get_instance_manager()), + reduction_instances(ReductionInstanceManager::get_instance_manager()) { // Query to find all our local processors Machine::ProcessorQuery local_procs(machine); @@ -684,9 +685,35 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, // Generate layout constraints from the store mapping LayoutConstraintSet layout_constraints; mapping.populate_layout_constraints(layout_constraints); + auto& fields = layout_constraints.field_constraint.field_set; - // If we're making a reduction instance, we should just make it now + // If we're making a reduction instance: if (redop != 0) { + // We need to hold the instance manager lock as we're about to try + // to find an instance + AutoLock reduction_lock(ctx, reduction_instances->manager_lock()); + + // This whole process has to appear atomic + runtime->disable_reentrant(ctx); + + // reuse reductions only for GPU tasks: + if (target_proc.kind() == Processor::TOC_PROC) { + // See if we already have it in our local instances + if (fields.size() == 1 && regions.size() == 1 && + reduction_instances->find_instance( + redop, regions.front(), fields.front(), target_memory, result, policy)) { +#ifdef DEBUG_LEGATE + logger.debug() << "Operation " << mappable.get_unique_id() + << ": reused cached reduction instance " << result << " for " + << regions.front(); +#endif + runtime->enable_reentrant(ctx); + // Needs acquire to keep the runtime happy + return true; + } + } + + // if we didn't find it, create one layout_constraints.add_constraint(SpecializedConstraint(REDUCTION_FOLD_SPECIALIZE, redop)); size_t footprint = 0; if (runtime->create_physical_instance(ctx, @@ -705,6 +732,14 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, for (LogicalRegion r : regions) msg << " " << r; msg << " (size: " << footprint << " bytes, memory: " << target_memory << ")"; #endif + if (target_proc.kind() == Processor::TOC_PROC) { + // store reduction instance + if (fields.size() == 1 && regions.size() == 1) { + auto fid = fields.front(); + reduction_instances->record_instance(redop, regions.front(), fid, result, policy); + } + } + runtime->enable_reentrant(ctx); // We already did the acquire return false; } @@ -713,14 +748,8 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, return true; } - auto& fields = layout_constraints.field_constraint.field_set; - - // We need to hold the instance manager lock as we're about to try to find an instance AutoLock lock(ctx, local_instances->manager_lock()); - - // This whole process has to appear atomic runtime->disable_reentrant(ctx); - // See if we already have it in our local instances if (fields.size() == 1 && regions.size() == 1 && local_instances->find_instance( diff --git a/src/core/mapping/base_mapper.h b/src/core/mapping/base_mapper.h index 17fdb2045..5e5bf1f49 100644 --- a/src/core/mapping/base_mapper.h +++ b/src/core/mapping/base_mapper.h @@ -31,6 +31,7 @@ namespace legate { namespace mapping { class InstanceManager; +class ReductionInstanceManager; enum class Strictness : bool { strict = true, @@ -368,6 +369,7 @@ class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { protected: InstanceManager* local_instances; + ReductionInstanceManager* reduction_instances; protected: // Used for n-D cyclic distribution diff --git a/src/core/mapping/instance_manager.cc b/src/core/mapping/instance_manager.cc index bc0adf0f6..511487892 100644 --- a/src/core/mapping/instance_manager.cc +++ b/src/core/mapping/instance_manager.cc @@ -302,6 +302,37 @@ void InstanceSet::dump_and_sanity_check() const for (auto& entry : instances_) assert(found_groups.count(entry.first) > 0); } +bool ReductionInstanceSet::find_instance(ReductionOpID& redop, + Region& region, + Instance& result, + const InstanceMappingPolicy& policy) const +{ + auto finder = instances_.find(region); + if (finder == instances_.end()) return false; + auto& spec = finder->second; + if (spec.policy == policy && spec.redop == redop) { + result = spec.instance; + return true; + } else + return false; +} + +void ReductionInstanceSet::record_instance(ReductionOpID& redop, + Region& region, + Instance& instance, + const InstanceMappingPolicy& policy) +{ + auto finder = instances_.find(region); + if (finder != instances_.end()) { + auto& spec = finder->second; + if (spec.policy != policy || spec.redop != redop) { + instances_.insert_or_assign(region, ReductionInstanceSpec(redop, instance, policy)); + } + } else { + instances_[region] = ReductionInstanceSpec(redop, instance, policy); + } +} + bool InstanceManager::find_instance(Region region, FieldID field_id, Memory memory, @@ -384,5 +415,45 @@ std::map InstanceManager::aggregate_instance_sizes() con return manager; } +bool ReductionInstanceManager::find_instance(ReductionOpID& redop, + Region region, + FieldID field_id, + Memory memory, + Instance& result, + const InstanceMappingPolicy& policy) +{ + auto finder = instance_sets_.find(FieldMemInfo(region.get_tree_id(), field_id, memory)); + return policy.allocation != AllocPolicy::MUST_ALLOC && finder != instance_sets_.end() && + finder->second.find_instance(redop, region, result, policy); +} + +void ReductionInstanceManager::record_instance(ReductionOpID& redop, + Region region, + FieldID fid, + Instance instance, + const InstanceMappingPolicy& policy) +{ + const auto mem = instance.get_location(); + const auto tid = instance.get_tree_id(); + + FieldMemInfo key(tid, fid, mem); + auto finder = instance_sets_.find(key); + if (finder != instance_sets_.end()) + instance_sets_[key].record_instance(redop, region, instance, policy); + else { + ReductionInstanceSet set; + set.record_instance(redop, region, instance, policy); + instance_sets_[key] = set; + } +} + +/*static*/ ReductionInstanceManager* ReductionInstanceManager::get_instance_manager() +{ + static ReductionInstanceManager* manager{nullptr}; + + if (nullptr == manager) manager = new ReductionInstanceManager(); + return manager; +} + } // namespace mapping } // namespace legate diff --git a/src/core/mapping/instance_manager.h b/src/core/mapping/instance_manager.h index 2861532cb..0b6b2918a 100644 --- a/src/core/mapping/instance_manager.h +++ b/src/core/mapping/instance_manager.h @@ -95,7 +95,45 @@ struct InstanceSet { std::map groups_; }; -class InstanceManager { +class ReductionInstanceSet { + public: + using Region = Legion::LogicalRegion; + using Instance = Legion::Mapping::PhysicalInstance; + using Domain = Legion::Domain; + using ReductionOpID = Legion::ReductionOpID; + + public: + struct ReductionInstanceSpec { + ReductionInstanceSpec() {} + ReductionInstanceSpec(const ReductionOpID& op, + const Instance& inst, + const InstanceMappingPolicy& po) + : redop(op), instance(inst), policy(po) + { + } + + ReductionOpID redop{0}; + Instance instance{}; + InstanceMappingPolicy policy{}; + }; + + public: + bool find_instance(ReductionOpID& redop, + Region& region, + Instance& result, + const InstanceMappingPolicy& policy) const; + + public: + void record_instance(ReductionOpID& redop, + Region& region, + Instance& instance, + const InstanceMappingPolicy& policy); + + private: + std::map instances_; +}; + +class BaseInstanceManager { public: using Region = Legion::LogicalRegion; using RegionTreeID = Legion::RegionTreeID; @@ -103,7 +141,6 @@ class InstanceManager { using Domain = Legion::Domain; using FieldID = Legion::FieldID; using Memory = Legion::Memory; - using RegionGroupP = std::shared_ptr; public: struct FieldMemInfo { @@ -132,6 +169,17 @@ class InstanceManager { Memory memory; }; + public: + Legion::Mapping::LocalLock& manager_lock() { return manager_lock_; } + + private: + Legion::Mapping::LocalLock manager_lock_{}; +}; + +class InstanceManager : public BaseInstanceManager { + public: + using RegionGroupP = std::shared_ptr; + public: bool find_instance(Region region, FieldID field_id, @@ -151,9 +199,6 @@ class InstanceManager { public: void erase(Instance inst); - public: - Legion::Mapping::LocalLock& manager_lock() { return manager_lock_; } - public: static InstanceManager* get_instance_manager(); @@ -162,7 +207,31 @@ class InstanceManager { private: std::map instance_sets_{}; - Legion::Mapping::LocalLock manager_lock_{}; +}; + +class ReductionInstanceManager : public BaseInstanceManager { + public: + using ReductionOpID = Legion::ReductionOpID; + + public: + bool find_instance(ReductionOpID& redop, + Region region, + FieldID field_id, + Memory memory, + Instance& result, + const InstanceMappingPolicy& policy = {}); + + void record_instance(ReductionOpID& redop, + Region region, + FieldID field_id, + Instance instance, + const InstanceMappingPolicy& policy = {}); + + public: + static ReductionInstanceManager* get_instance_manager(); + + private: + std::map instance_sets_{}; }; } // namespace mapping From 81e85341ed9fd4dfc3f29a76a35dda8f0bba5234 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 21 Dec 2022 14:58:24 -0800 Subject: [PATCH 096/121] [pre-commit.ci] pre-commit autoupdate (#519) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/PyCQA/isort: 5.11.1 → v5.11.3](https://github.com/PyCQA/isort/compare/5.11.1...v5.11.3) - [github.com/pre-commit/mirrors-clang-format: v15.0.4 → v15.0.6](https://github.com/pre-commit/mirrors-clang-format/compare/v15.0.4...v15.0.6) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bb12ddb99..48d53dba9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/PyCQA/isort - rev: 5.11.1 + rev: v5.11.3 hooks: - id: isort - repo: https://github.com/psf/black @@ -12,7 +12,7 @@ repos: hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-clang-format - rev: 'v15.0.4' + rev: 'v15.0.6' hooks: - id: clang-format files: \.(cu|cuh|h|cc|inl)$ From 6d400aed6b92e1ec511fe6028391bb7309b42a1b Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Wed, 21 Dec 2022 22:28:39 -0800 Subject: [PATCH 097/121] Sharding functor for delinearizing functor (#520) * Two changes to the projection functor code: * Delinearizing functor is now an instance of LegateProjectionFunctor so we can derive a sharding functor from it * Projection functor creations and queries are now logged. Debugging code that renders projection functors is also improved. * Remove dead code * Use the stream API to print the log message --- legate/core/projection.py | 3 + legate/core/transform.py | 6 +- src/core/runtime/projection.cc | 166 +++++++++++++++++++++------------ 3 files changed, 115 insertions(+), 60 deletions(-) diff --git a/legate/core/projection.py b/legate/core/projection.py index 68989f13f..18d21a8cf 100644 --- a/legate/core/projection.py +++ b/legate/core/projection.py @@ -47,6 +47,7 @@ def __repr__(self) -> str: if self._repr is None: s = "" if self._weight != 0: + assert self._dim != -1 if self._weight != 1: s += f"{self._weight} * " s += f"COORD{self._dim}" @@ -55,6 +56,8 @@ def __repr__(self) -> str: s += f" + {self._offset}" else: s += f" - {abs(self._offset)}" + elif self._weight == 0: + s += "0" self._repr = s return self._repr diff --git a/legate/core/transform.py b/legate/core/transform.py index 817f08e25..9a7d5baa0 100644 --- a/legate/core/transform.py +++ b/legate/core/transform.py @@ -313,7 +313,11 @@ def invert_point(self, point: Shape) -> Shape: return point.insert(self._dim, self._index) def invert_symbolic_point(self, dims: SymbolicPoint) -> SymbolicPoint: - return dims[: self._dim] + (ProjExpr(-1),) + dims[self._dim :] + return ( + dims[: self._dim] + + (ProjExpr(dim=-1, weight=0),) + + dims[self._dim :] + ) def invert_restrictions(self, restrictions: Restrictions) -> Restrictions: left = restrictions[: self._dim] diff --git a/src/core/runtime/projection.cc b/src/core/runtime/projection.cc index 5b5809866..f13ff8021 100644 --- a/src/core/runtime/projection.cc +++ b/src/core/runtime/projection.cc @@ -22,27 +22,64 @@ #include "core/runtime/projection.h" #include "core/utilities/dispatch.h" +#include "legate_defines.h" using namespace Legion; namespace legate { -class DelinearizationFunctor : public ProjectionFunctor { +extern Logger log_legate; + +// This special functor overrides the default projection implementation because it needs +// to know the the target color space for delinearization. Also note that this functor's +// project_point passes through input points, as we already know they are always 1D points +// and the output will be linearized back to integers. +class DelinearizationFunctor : public LegateProjectionFunctor { public: DelinearizationFunctor(Runtime* runtime); public: virtual Legion::LogicalRegion project(Legion::LogicalPartition upper_bound, const Legion::DomainPoint& point, - const Legion::Domain& launch_domain); + const Legion::Domain& launch_domain) override; public: - virtual bool is_functional(void) const { return true; } - virtual bool is_exclusive(void) const { return true; } - virtual unsigned get_depth(void) const { return 0; } + virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, + const Legion::Domain& launch_domain) const override; }; -DelinearizationFunctor::DelinearizationFunctor(Runtime* runtime) : ProjectionFunctor(runtime) {} +template +class AffineFunctor : public LegateProjectionFunctor { + public: + AffineFunctor(Runtime* runtime, int32_t* dims, int32_t* weights, int32_t* offsets); + + public: + DomainPoint project_point(const DomainPoint& point, const Domain& launch_domain) const override; + + public: + static Legion::Transform create_transform(int32_t* dims, int32_t* weights); + + private: + const Legion::Transform transform_; + Point offsets_; +}; + +LegateProjectionFunctor::LegateProjectionFunctor(Runtime* rt) : ProjectionFunctor(rt) {} + +LogicalRegion LegateProjectionFunctor::project(LogicalPartition upper_bound, + const DomainPoint& point, + const Domain& launch_domain) +{ + const DomainPoint dp = project_point(point, launch_domain); + if (runtime->has_logical_subregion_by_color(upper_bound, dp)) + return runtime->get_logical_subregion_by_color(upper_bound, dp); + else + return LogicalRegion::NO_REGION; +} + +DelinearizationFunctor::DelinearizationFunctor(Runtime* runtime) : LegateProjectionFunctor(runtime) +{ +} LogicalRegion DelinearizationFunctor::project(LogicalPartition upper_bound, const DomainPoint& point, @@ -74,38 +111,12 @@ LogicalRegion DelinearizationFunctor::project(LogicalPartition upper_bound, return LogicalRegion::NO_REGION; } -LegateProjectionFunctor::LegateProjectionFunctor(Runtime* rt) : ProjectionFunctor(rt) {} - -LogicalRegion LegateProjectionFunctor::project(LogicalPartition upper_bound, - const DomainPoint& point, - const Domain& launch_domain) +Legion::DomainPoint DelinearizationFunctor::project_point(const Legion::DomainPoint& point, + const Legion::Domain& launch_domain) const { - const DomainPoint dp = project_point(point, launch_domain); - if (runtime->has_logical_subregion_by_color(upper_bound, dp)) - return runtime->get_logical_subregion_by_color(upper_bound, dp); - else - return LogicalRegion::NO_REGION; + return point; } -template -class AffineFunctor : public LegateProjectionFunctor { - public: - AffineFunctor(Runtime* runtime, int32_t* dims, int32_t* weights, int32_t* offsets); - - public: - DomainPoint project_point(const DomainPoint& point, const Domain& launch_domain) const override - { - return DomainPoint(transform_ * Point(point) + offsets_); - } - - public: - static Transform create_transform(int32_t* dims, int32_t* weights); - - private: - const Transform transform_; - Point offsets_; -}; - template AffineFunctor::AffineFunctor(Runtime* runtime, int32_t* dims, @@ -117,10 +128,17 @@ AffineFunctor::AffineFunctor(Runtime* runtime, } template -/*static*/ Transform AffineFunctor::create_transform( +DomainPoint AffineFunctor::project_point(const DomainPoint& point, + const Domain& launch_domain) const +{ + return DomainPoint(transform_ * Point(point) + offsets_); +} + +template +/*static*/ Legion::Transform AffineFunctor::create_transform( int32_t* dims, int32_t* weights) { - Transform transform; + Legion::Transform transform; for (int32_t tgt_dim = 0; tgt_dim < TGT_DIM; ++tgt_dim) for (int32_t src_dim = 0; src_dim < SRC_DIM; ++src_dim) transform[tgt_dim][src_dim] = 0; @@ -146,11 +164,54 @@ static std::unordered_map functor_table{ static std::mutex functor_table_lock{}; struct create_affine_functor_fn { + static void spec_to_string(std::stringstream& ss, + int32_t src_ndim, + int32_t tgt_ndim, + int32_t* dims, + int32_t* weights, + int32_t* offsets) + { + ss << "\\("; + for (int32_t idx = 0; idx < src_ndim; ++idx) { + if (idx != 0) ss << ","; + ss << "x" << idx; + } + ss << ")->("; + for (int32_t idx = 0; idx < tgt_ndim; ++idx) { + if (idx != 0) ss << ","; + auto dim = dims[idx]; + auto weight = weights[idx]; + auto offset = offsets[idx]; + if (dim != -1) + if (weight != 0) { + assert(dim != -1); + if (weight != 1) ss << weight << "*"; + ss << "x" << dim; + } + if (offset != 0) { + if (offset > 0) + ss << "+" << offset; + else + ss << "-" << -offset; + } else if (weight == 0) + ss << "0"; + } + ss << ")"; + } + template void operator()( Runtime* runtime, int32_t* dims, int32_t* weights, int32_t* offsets, ProjectionID proj_id) { auto functor = new AffineFunctor(runtime, dims, weights, offsets); +#ifdef DEBUG_LEGATE + std::stringstream ss; + ss << "Register projection functor: functor: " << functor << ", id: " << proj_id << ", "; + spec_to_string(ss, SRC_DIM, TGT_DIM, dims, weights, offsets); + log_legate.debug() << ss.str(); +#else + log_legate.debug("Register projection functor: functor: %p, id: %d", functor, proj_id); +#endif runtime->register_projection_functor(proj_id, functor, true /*silence warnings*/); const std::lock_guard lock(functor_table_lock); @@ -163,8 +224,12 @@ void register_legate_core_projection_functors(Legion::Runtime* runtime, { auto proj_id = context.get_projection_id(LEGATE_CORE_DELINEARIZE_PROJ_ID); auto functor = new DelinearizationFunctor(runtime); + log_legate.debug("Register delinearizing functor: functor: %p, id: %d", functor, proj_id); runtime->register_projection_functor(proj_id, functor, true /*silence warnings*/); - + { + const std::lock_guard lock(functor_table_lock); + functor_table[proj_id] = functor; + } identity_functor = new IdentityFunctor(runtime); } @@ -172,28 +237,11 @@ LegateProjectionFunctor* find_legate_projection_functor(ProjectionID proj_id) { if (0 == proj_id) return identity_functor; const std::lock_guard lock(functor_table_lock); - return functor_table[proj_id]; -} - -DomainPoint delinearize_future_map_domain(const DomainPoint& point, - const Domain& domain, - const Domain& range) -{ - int32_t ndim = range.dim; - - DomainPoint result; - result.dim = ndim; - - auto lo = range.lo(); - auto hi = range.hi(); - - int64_t idx = point[0]; - for (int32_t dim = ndim - 1; dim >= 0; --dim) { - int64_t extent = hi[dim] - lo[dim] + 1; - result[dim] = idx % extent; - idx = idx / extent; + auto result = functor_table[proj_id]; + if (nullptr == result) { + log_legate.debug("Failed to find projection functor of id %d", proj_id); + LEGATE_ABORT; } - return result; } From dc882cbab1b97bb0e2ec658e97357c2a70e464c1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 27 Dec 2022 14:25:44 +0000 Subject: [PATCH 098/121] [pre-commit.ci] pre-commit autoupdate (#522) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/PyCQA/isort: v5.11.3 → 5.11.4](https://github.com/PyCQA/isort/compare/v5.11.3...5.11.4) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 48d53dba9..6ebe994d9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/PyCQA/isort - rev: v5.11.3 + rev: 5.11.4 hooks: - id: isort - repo: https://github.com/psf/black From 69e208912bbc030fd736570667ac87be43634fc8 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Wed, 28 Dec 2022 15:07:01 -0800 Subject: [PATCH 099/121] Fix a link in the README --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 05aa5b159..7f13cc33e 100644 --- a/README.md +++ b/README.md @@ -310,7 +310,8 @@ line options, and their default values are as follows. ### Distributed Launch -If Legate is compiled with networking support ([see the installation section](#Installation)), +If Legate is compiled with networking support (see the +[installation section](#how-do-i-install-legate)), it can be run in parallel by using the `--nodes` option followed by the number of nodes to be used. Whenever the `--nodes` option is used, Legate will be launched using `mpirun`, even with `--nodes 1`. Without the `--nodes` option, no launcher will From 81d0e309338180f0dfb1ccd060b96e3a62f07f11 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Thu, 29 Dec 2022 14:42:00 -0800 Subject: [PATCH 100/121] New collective (#466) * adding logic for collective instances * making collective logic be optional * making use of collective instances be ON by default * making collective branch be default for Legion --- cmake/Modules/legate_core_options.cmake | 2 ++ cmake/thirdparty/get_legion.cmake | 2 +- legate_core_cpp.cmake | 4 ++++ src/core/mapping/base_mapper.cc | 10 ++++++++++ src/core/runtime/projection.cc | 18 ++++++++++++++++++ src/core/runtime/projection.h | 5 +++++ 6 files changed, 40 insertions(+), 1 deletion(-) diff --git a/cmake/Modules/legate_core_options.cmake b/cmake/Modules/legate_core_options.cmake index 0bf870fe6..158eae120 100644 --- a/cmake/Modules/legate_core_options.cmake +++ b/cmake/Modules/legate_core_options.cmake @@ -78,6 +78,8 @@ endif() option(legate_core_STATIC_CUDA_RUNTIME "Statically link the cuda runtime library" OFF) option(legate_core_EXCLUDE_LEGION_FROM_ALL "Exclude Legion targets from legate.core's 'all' target" OFF) +option(legate_core_COLLECTIVE "Use of collective instances" ON) + set_or_default(NCCL_DIR NCCL_PATH) set_or_default(Thrust_DIR THRUST_PATH) diff --git a/cmake/thirdparty/get_legion.cmake b/cmake/thirdparty/get_legion.cmake index 5faf54023..92ec30247 100644 --- a/cmake/thirdparty/get_legion.cmake +++ b/cmake/thirdparty/get_legion.cmake @@ -175,7 +175,7 @@ function(find_or_configure_legion) endfunction() if(NOT DEFINED legate_core_LEGION_BRANCH) - set(legate_core_LEGION_BRANCH control_replication) + set(legate_core_LEGION_BRANCH collective) endif() if(NOT DEFINED legate_core_LEGION_REPOSITORY) diff --git a/legate_core_cpp.cmake b/legate_core_cpp.cmake index 7502e501d..642c9ceb7 100644 --- a/legate_core_cpp.cmake +++ b/legate_core_cpp.cmake @@ -142,6 +142,10 @@ set(legate_core_CUDA_OPTIONS "") include(cmake/Modules/set_cpu_arch_flags.cmake) set_cpu_arch_flags(legate_core_CXX_OPTIONS) +if (legate_core_COLLECTIVE) + list(APPEND legate_core_CXX_DEFS LEGATE_USE_COLLECTIVE) +endif() + if(NOT CMAKE_BUILD_TYPE STREQUAL "Release") list(APPEND legate_core_CXX_DEFS DEBUG_LEGATE) list(APPEND legate_core_CUDA_DEFS DEBUG_LEGATE) diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index aedaeceb3..670db0329 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -208,6 +208,16 @@ void BaseMapper::select_task_options(const MapperContext ctx, const LegionTask& task, TaskOptions& output) { +#ifdef LEGATE_USE_COLLECTIVE + for (uint32_t idx = 0; idx < task.regions.size(); ++idx) { + auto& req = task.regions[idx]; + if ((req.handle_type == LEGION_SINGULAR_PROJECTION) || + (find_legate_projection_functor(req.projection)->is_collective())) { + output.check_collective_regions.insert(idx); + } + } +#endif + std::vector options; if (!local_gpus.empty() && has_variant(ctx, task, Processor::TOC_PROC)) options.push_back(TaskTarget::GPU); diff --git a/src/core/runtime/projection.cc b/src/core/runtime/projection.cc index f13ff8021..9b6947277 100644 --- a/src/core/runtime/projection.cc +++ b/src/core/runtime/projection.cc @@ -125,6 +125,24 @@ AffineFunctor::AffineFunctor(Runtime* runtime, : LegateProjectionFunctor(runtime), transform_(create_transform(dims, weights)) { for (int32_t dim = 0; dim < TGT_DIM; ++dim) offsets_[dim] = offsets[dim]; + + // mapping to a different dimension + if (SRC_DIM > TGT_DIM) { + set_collective(); + return; + } + + // find if there is `-1` in the dimensions + std::set unique; + for (int32_t dim = 0; dim < SRC_DIM; ++dim) { + if (dims[dim] == -1) { + set_collective(); + return; + } + unique.insert(dims[dim]); + } + // if there are repeated dimensions + if (unique.size() != SRC_DIM) set_collective(); } template diff --git a/src/core/runtime/projection.h b/src/core/runtime/projection.h index 740c4bef5..b363b52c2 100644 --- a/src/core/runtime/projection.h +++ b/src/core/runtime/projection.h @@ -38,10 +38,15 @@ class LegateProjectionFunctor : public Legion::ProjectionFunctor { virtual bool is_functional(void) const { return true; } virtual bool is_exclusive(void) const { return true; } virtual unsigned get_depth(void) const { return 0; } + bool is_collective() const { return is_collective_; } + void set_collective() { is_collective_ = true; } public: virtual Legion::DomainPoint project_point(const Legion::DomainPoint& point, const Legion::Domain& launch_domain) const = 0; + + private: + bool is_collective_ = false; }; void register_legate_core_projection_functors(Legion::Runtime* runtime, From c7039056732a05a91b2197be6e761c5d3b2fb4ac Mon Sep 17 00:00:00 2001 From: Mark Vaz Date: Wed, 4 Jan 2023 06:12:08 +1100 Subject: [PATCH 101/121] Update env gen script so OS type works for mac (#523) * Update environment generation scripts so OS type works for osx --- scripts/generate-conda-envs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/generate-conda-envs.py b/scripts/generate-conda-envs.py index 9798d7680..c4bc9059c 100755 --- a/scripts/generate-conda-envs.py +++ b/scripts/generate-conda-envs.py @@ -21,7 +21,7 @@ Req = str Reqs = Tuple[Req, ...] -OSType = Literal["linux", "darwin"] +OSType = Literal["linux", "osx"] class SectionConfig(Protocol): @@ -258,7 +258,7 @@ def filename(self) -> str: for compilers in (True, False) for openmpi in (True, False) ] + [ - EnvConfig("test", python, "darwin", "none", compilers, openmpi) + EnvConfig("test", python, "osx", "none", compilers, openmpi) for python in PYTHON_VERSIONS for compilers in (True, False) for openmpi in (True, False) From 2a3dc8336b285e450276e7c6376d44e3824a84f8 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Wed, 4 Jan 2023 13:19:58 -0800 Subject: [PATCH 102/121] Switch docs from recommonmark to myst-parser (#524) --- BUILD.md | 2 +- docs/legate/core/source/BUILD.md | 1 + docs/legate/core/source/conf.py | 5 +++-- docs/legate/core/source/index.rst | 1 + scripts/generate-conda-envs.py | 3 +-- 5 files changed, 7 insertions(+), 5 deletions(-) create mode 120000 docs/legate/core/source/BUILD.md diff --git a/BUILD.md b/BUILD.md index 2191c2c7b..8f749b0be 100644 --- a/BUILD.md +++ b/BUILD.md @@ -257,7 +257,7 @@ after to trip GLIBC's internal version checks, since the conda library expects to find symbols with more recent version numbers than what is available on the system-wide GLIBC: -```shell +``` /lib/x86_64-linux-gnu/libstdc++.so.6: version `GLIBCXX_3.4.30' not found (required by /opt/conda/envs/legate/lib/libarrow.so) ``` diff --git a/docs/legate/core/source/BUILD.md b/docs/legate/core/source/BUILD.md new file mode 120000 index 000000000..ad5438194 --- /dev/null +++ b/docs/legate/core/source/BUILD.md @@ -0,0 +1 @@ +../../../../BUILD.md \ No newline at end of file diff --git a/docs/legate/core/source/conf.py b/docs/legate/core/source/conf.py index 8cfdf75d8..6ebb74e89 100644 --- a/docs/legate/core/source/conf.py +++ b/docs/legate/core/source/conf.py @@ -38,10 +38,11 @@ "sphinx.ext.mathjax", "sphinx.ext.napoleon", "sphinx_copybutton", - "sphinx_markdown_tables", - "recommonmark", + "myst_parser", ] +suppress_warnings = ["ref.myst"] + # The master toctree document. master_doc = "index" diff --git a/docs/legate/core/source/index.rst b/docs/legate/core/source/index.rst index 4c6f1afd1..5475b6fa6 100644 --- a/docs/legate/core/source/index.rst +++ b/docs/legate/core/source/index.rst @@ -5,6 +5,7 @@ Welcome to Legate Core's documentation! :maxdepth: 1 Overview + Build instructions API Reference Contributing Versions diff --git a/scripts/generate-conda-envs.py b/scripts/generate-conda-envs.py index c4bc9059c..677b42f48 100755 --- a/scripts/generate-conda-envs.py +++ b/scripts/generate-conda-envs.py @@ -156,9 +156,8 @@ def pip(self) -> Reqs: "jinja2", "markdown<3.4.0", "pydata-sphinx-theme", - "recommonmark", + "myst-parser", "sphinx-copybutton", - "sphinx-markdown-tables", "sphinx>=4.4.0", ) From 026fb6e700ed4cad9b918bbd582b4316502e0beb Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Thu, 5 Jan 2023 13:58:20 -0800 Subject: [PATCH 103/121] remove realm pylib check (#525) --- legate/driver/launcher.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/legate/driver/launcher.py b/legate/driver/launcher.py index 1f67046b5..78f34f307 100644 --- a/legate/driver/launcher.py +++ b/legate/driver/launcher.py @@ -15,12 +15,10 @@ from __future__ import annotations import os -import sys from pathlib import Path from typing import TYPE_CHECKING from ..util.fs import read_c_define -from ..util.ui import warn if TYPE_CHECKING: from ..util.system import System @@ -92,8 +90,6 @@ def __init__(self, config: ConfigProtocol, system: System) -> None: self.detected_rank_id = system.env[var] break - self._check_realm_python() - def __eq__(self, other: object) -> bool: return ( isinstance(other, type(self)) @@ -154,27 +150,6 @@ def is_launcher_var(name: str) -> bool: name.startswith(prefix) for prefix in LAUNCHER_VAR_PREFIXES ) - def _check_realm_python(self) -> None: - - # Make sure the version of Python used by Realm is the same as what the - # user is using currently. - realm_pylib = read_c_define( - self._system.legion_paths.realm_defines_h, "REALM_PYTHON_LIB" - ) - - if realm_pylib is None: - raise RuntimeError("Cannot determine Realm Python Lib") - - realm_home = Path(realm_pylib[1:-1]).parents[1] - if (current_home := Path(sys.executable).parents[1]) != realm_home: - print( - warn( - "Legate was compiled against the Python installation at " - f"{realm_home}, but you are currently using the Python " - f"installation at {current_home}" - ) - ) - def _compute_env(self) -> tuple[EnvDict, set[str]]: config = self._config system = self._system From 12d3693eefd925bb26a4522d34cbcb1e0eb13e7a Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Thu, 5 Jan 2023 15:05:27 -0800 Subject: [PATCH 104/121] Typo --- legate/driver/args.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/legate/driver/args.py b/legate/driver/args.py index 9d6758a07..92bce362b 100644 --- a/legate/driver/args.py +++ b/legate/driver/args.py @@ -262,7 +262,7 @@ default=False, required=False, help="enable GASNet tracing (assumes GASNet was configured with " - "--enable--trace)", + "--enable-trace)", ) debugging.add_argument( From d0b89184288c529d7b3c1d700a0604050ce142f8 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Mon, 9 Jan 2023 14:40:33 +0900 Subject: [PATCH 105/121] More changes to map futures to zero-copy memory (#521) * More sensible placement of futures on zero-copy memory * Remove dead code --- src/core/data/store.cc | 8 ++++---- src/core/mapping/base_mapper.cc | 6 ++---- src/core/mapping/core_mapper.cc | 16 ++++++++++++++-- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/core/data/store.cc b/src/core/data/store.cc index a4ca73f4b..c185ab602 100644 --- a/src/core/data/store.cc +++ b/src/core/data/store.cc @@ -136,13 +136,13 @@ FutureWrapper::FutureWrapper( #ifdef DEBUG_LEGATE assert(!initialize || future_.get_untyped_size() == field_size); #endif - auto proc = Processor::get_executing_processor(); + auto mem_kind = find_memory_kind_for_executing_processor( #ifdef LEGATE_NO_FUTURES_ON_FB - auto mem_kind = find_memory_kind_for_executing_processor(); + true #else - auto mem_kind = proc.kind() == Processor::Kind::TOC_PROC ? Memory::Kind::GPU_FB_MEM - : Memory::Kind::SYSTEM_MEM; + false #endif + ); if (initialize) { auto p_init_value = future_.get_buffer(mem_kind); #ifdef LEGATE_USE_CUDA diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index 670db0329..bd0b72b6a 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -487,16 +487,14 @@ void BaseMapper::map_task(const MapperContext ctx, auto default_option = options.front(); auto generate_default_mappings = [&](auto& stores, bool exact) { for (auto& store : stores) { + auto mapping = StoreMapping::default_mapping(store, default_option, exact); if (store.is_future()) { - auto option = default_option == StoreTarget::FBMEM ? StoreTarget::ZCMEM : default_option; - auto mapping = StoreMapping::default_mapping(store, option, exact); auto fut_idx = store.future_index(); if (mapped_futures.find(fut_idx) != mapped_futures.end()) continue; mapped_futures.insert(fut_idx); for_futures.push_back(std::move(mapping)); } else { - auto mapping = StoreMapping::default_mapping(store, default_option, exact); - auto key = store.unique_region_field_id(); + auto key = store.unique_region_field_id(); if (mapped_regions.find(key) != mapped_regions.end()) continue; mapped_regions.insert(key); if (store.unbound()) diff --git a/src/core/mapping/core_mapper.cc b/src/core/mapping/core_mapper.cc index 89a5b4147..bb9389108 100644 --- a/src/core/mapping/core_mapper.cc +++ b/src/core/mapping/core_mapper.cc @@ -350,9 +350,21 @@ void CoreMapper::map_future_map_reduction(const MapperContext ctx, { output.serdez_upper_bound = LEGATE_MAX_SIZE_SCALAR_RETURN; - if (!local_gpus.empty()) + if (!local_gpus.empty()) { + // TODO: It's been reported that blindly mapping target instances of future map reductions + // to framebuffers hurts performance. Until we find a better mapping policy, we guard + // the current policy with a macro. +#ifdef LEGATE_MAP_FUTURE_MAP_REDUCTIONS_TO_GPU + // If this was joining exceptions, we should put instances on a host-visible memory + // because they need serdez + if (input.tag == LEGATE_CORE_JOIN_EXCEPTION_TAG) + output.destination_memories.push_back(local_zerocopy_memory); + else + for (auto& pair : local_frame_buffers) output.destination_memories.push_back(pair.second); +#else output.destination_memories.push_back(local_zerocopy_memory); - else if (has_socket_mem) +#endif + } else if (has_socket_mem) for (auto& pair : local_numa_domains) output.destination_memories.push_back(pair.second); } From 340e15684052d4cb6d37362b0182180a8f893efc Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Tue, 10 Jan 2023 14:30:58 -0800 Subject: [PATCH 106/121] Don't check for collective behavior when we have WRITE reqs (#526) --- src/core/mapping/base_mapper.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index bd0b72b6a..1fff1f157 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -211,6 +211,7 @@ void BaseMapper::select_task_options(const MapperContext ctx, #ifdef LEGATE_USE_COLLECTIVE for (uint32_t idx = 0; idx < task.regions.size(); ++idx) { auto& req = task.regions[idx]; + if (req.privilege & LEGION_WRITE_PRIV) continue; if ((req.handle_type == LEGION_SINGULAR_PROJECTION) || (find_legate_projection_functor(req.projection)->is_collective())) { output.check_collective_regions.insert(idx); From fdaac615f60147b4c3138f16597e8a5bd7244304 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Thu, 12 Jan 2023 13:27:54 -0800 Subject: [PATCH 107/121] All NCCL ranks on the same node must get the same NCCL_IB_HCA (#528) * Fix info message; --foo=ARG isn't actually accepted * Fix double printing in debug message * All NCCL ranks on the same node must get the same NCCL_IB_HCA --- bind.sh | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/bind.sh b/bind.sh index 8ccf56203..a371be953 100755 --- a/bind.sh +++ b/bind.sh @@ -22,14 +22,14 @@ help() { Usage: bind.sh [OPTIONS]... -- APP... Options: - --launcher={mpirun|srun|jrun|auto|local} + --launcher {mpirun|srun|jrun|auto|local} Launcher type, used to set LEGATE_RANK If 'auto', attempt to find the launcher rank automatically If 'local', rank is set to "0". - --cpus=SPEC CPU binding specification, passed to numactl - --gpus=SPEC GPU binding specification, used to set CUDA_VISIBLE_DEVICES - --mems=SPEC Memory binding specification, passed to numactl - --nics=SPEC Network interface binding specification, used to set + --cpus SPEC CPU binding specification, passed to numactl + --gpus SPEC GPU binding specification, used to set CUDA_VISIBLE_DEVICES + --mems SPEC Memory binding specification, passed to numactl + --nics SPEC Network interface binding specification, used to set all of: UCX_NET_DEVICES, NCCL_IB_HCA, GASNET_NUM_QPS, and GASNET_IBV_PORTS --debug print out the final computed invocation before exectuting @@ -147,9 +147,15 @@ if [ -n "${nics+x}" ]; then nic="${nics[$local_rank]}" nic_array=(${nic//,/ }) export UCX_NET_DEVICES="${nic//,/:1,}":1 - export NCCL_IB_HCA="$nic" export GASNET_NUM_QPS="${#nic_array[@]}" export GASNET_IBV_PORTS="${nic//,/+}" + + # NCCL is supposed to detect the topology and use the right NIC automatically. + # NCCL env vars must be set the same way for all ranks on the same node, so + # the best we can do here is to constrain NCCL to the full set of NICs that + # the user specified. + # Note the added "=", to do exact instead of prefix match. + export NCCL_IB_HCA="=$(IFS=, ; echo "${nics[*]}")" fi # numactl is only needed if cpu or memory pinning was requested @@ -177,7 +183,7 @@ done set -- "${updated[@]}" if [ "$debug" == "1" ]; then - echo -n "bind.sh: $@" + echo -n "bind.sh:" for TOK in "$@"; do printf " %q" "$TOK"; done echo fi From 02315a0c46dee81d15cc584d44deff5099a921b2 Mon Sep 17 00:00:00 2001 From: Rohan Yadav Date: Fri, 13 Jan 2023 09:31:10 -0800 Subject: [PATCH 108/121] legate/core/_legion: add default new argument to dep part functions (#527) Recent Legion changes added an argument to these functions, we so we need to pass something through. Signed-off-by: Rohan Yadav Signed-off-by: Rohan Yadav --- legate/core/_legion/partition_functor.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/legate/core/_legion/partition_functor.py b/legate/core/_legion/partition_functor.py index 469a12c2c..47eec302d 100644 --- a/legate/core/_legion/partition_functor.py +++ b/legate/core/_legion/partition_functor.py @@ -140,6 +140,7 @@ def partition( part_id, self.mapper, self.tag, + (ffi.NULL, 0), ) @@ -185,6 +186,7 @@ def partition( part_id, self.mapper, self.tag, + (ffi.NULL, 0), ) @@ -231,6 +233,7 @@ def partition( part_id, self.mapper, self.tag, + (ffi.NULL, 0), ) @@ -277,6 +280,7 @@ def partition( part_id, self.mapper, self.tag, + (ffi.NULL, 0), ) From 367e26f39202b1faef992fe3298d00e19f12d4ee Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Wed, 18 Jan 2023 15:48:55 -0800 Subject: [PATCH 109/121] Don't turn on Legate debug checks on debug-rel builds (#533) --- legate_core_cpp.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/legate_core_cpp.cmake b/legate_core_cpp.cmake index 642c9ceb7..ef714345f 100644 --- a/legate_core_cpp.cmake +++ b/legate_core_cpp.cmake @@ -146,7 +146,7 @@ if (legate_core_COLLECTIVE) list(APPEND legate_core_CXX_DEFS LEGATE_USE_COLLECTIVE) endif() -if(NOT CMAKE_BUILD_TYPE STREQUAL "Release") +if(CMAKE_BUILD_TYPE STREQUAL "Debug") list(APPEND legate_core_CXX_DEFS DEBUG_LEGATE) list(APPEND legate_core_CUDA_DEFS DEBUG_LEGATE) endif() From 0e3d9cdc984557617d9391f1abcc542e6df7a5b3 Mon Sep 17 00:00:00 2001 From: Rohan Yadav Date: Wed, 18 Jan 2023 22:59:27 -0800 Subject: [PATCH 110/121] src/core: gaurd against missing projection functors in collective check (#534) Signed-off-by: Rohan Yadav Signed-off-by: Rohan Yadav --- src/core/mapping/base_mapper.cc | 7 ++++++- src/core/runtime/projection.cc | 5 +++-- src/core/runtime/projection.h | 3 ++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index 1fff1f157..eaf37c83e 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -212,8 +212,13 @@ void BaseMapper::select_task_options(const MapperContext ctx, for (uint32_t idx = 0; idx < task.regions.size(); ++idx) { auto& req = task.regions[idx]; if (req.privilege & LEGION_WRITE_PRIV) continue; + // Look up the projection for the input region. There are cases where + // Legate libraries register their own projection functors that are + // not recorded by Legate Core. So, handle the case when these functors + // are not present and allow for them to be missing. + auto projection = find_legate_projection_functor(req.projection, true /* allow_mising */); if ((req.handle_type == LEGION_SINGULAR_PROJECTION) || - (find_legate_projection_functor(req.projection)->is_collective())) { + (projection != nullptr && projection->is_collective())) { output.check_collective_regions.insert(idx); } } diff --git a/src/core/runtime/projection.cc b/src/core/runtime/projection.cc index 9b6947277..9392cd051 100644 --- a/src/core/runtime/projection.cc +++ b/src/core/runtime/projection.cc @@ -251,12 +251,13 @@ void register_legate_core_projection_functors(Legion::Runtime* runtime, identity_functor = new IdentityFunctor(runtime); } -LegateProjectionFunctor* find_legate_projection_functor(ProjectionID proj_id) +LegateProjectionFunctor* find_legate_projection_functor(ProjectionID proj_id, bool allow_missing) { if (0 == proj_id) return identity_functor; const std::lock_guard lock(functor_table_lock); auto result = functor_table[proj_id]; - if (nullptr == result) { + // If we're not OK with a missing projection functor, then throw an error. + if (nullptr == result && !allow_missing) { log_legate.debug("Failed to find projection functor of id %d", proj_id); LEGATE_ABORT; } diff --git a/src/core/runtime/projection.h b/src/core/runtime/projection.h index b363b52c2..cf74d1689 100644 --- a/src/core/runtime/projection.h +++ b/src/core/runtime/projection.h @@ -52,6 +52,7 @@ class LegateProjectionFunctor : public Legion::ProjectionFunctor { void register_legate_core_projection_functors(Legion::Runtime* runtime, const LibraryContext& context); -LegateProjectionFunctor* find_legate_projection_functor(Legion::ProjectionID proj_id); +LegateProjectionFunctor* find_legate_projection_functor(Legion::ProjectionID proj_id, + bool allow_missing = false); } // namespace legate From 03372857480ad8567957d3157e35c007de12e6c3 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Thu, 19 Jan 2023 15:21:39 -0800 Subject: [PATCH 111/121] Erase cached reduction instances that cannot be acquired (#536) --- src/core/mapping/base_mapper.cc | 28 ++++++++++++++------------ src/core/mapping/instance_manager.cc | 30 ++++++++++++++++++++++++++++ src/core/mapping/instance_manager.h | 6 ++++++ 3 files changed, 51 insertions(+), 13 deletions(-) diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index eaf37c83e..5c6fc5dd1 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -609,8 +609,13 @@ void BaseMapper::map_legate_stores(const MapperContext ctx, logger.debug() << log_mappable(mappable) << ": failed to acquire instance " << result << " for reqs:" << reqs_ss.str(); #endif - AutoLock lock(ctx, local_instances->manager_lock()); - local_instances->erase(result); + if ((*reqs.begin())->redop != 0) { + AutoLock lock(ctx, reduction_instances->manager_lock()); + reduction_instances->erase(result); + } else { + AutoLock lock(ctx, local_instances->manager_lock()); + local_instances->erase(result); + } result = NO_INST; } instances.push_back(result); @@ -681,20 +686,17 @@ bool BaseMapper::map_legate_store(const MapperContext ctx, for (auto* req : reqs) regions.push_back(req->region); auto target_memory = get_target_memory(target_proc, policy.target); - ReductionOpID redop = 0; - bool first = true; + ReductionOpID redop = (*reqs.begin())->redop; +#ifdef DEBUG_LEGATE for (auto* req : reqs) { - if (first) - redop = req->redop; - else { - if (redop != req->redop) { - logger.error( - "Colocated stores should be either non-reduction arguments " - "or reductions with the same reduction operator."); - LEGATE_ABORT; - } + if (redop != req->redop) { + logger.error( + "Colocated stores should be either non-reduction arguments " + "or reductions with the same reduction operator."); + LEGATE_ABORT; } } +#endif // Generate layout constraints from the store mapping LayoutConstraintSet layout_constraints; diff --git a/src/core/mapping/instance_manager.cc b/src/core/mapping/instance_manager.cc index 511487892..f732b0b79 100644 --- a/src/core/mapping/instance_manager.cc +++ b/src/core/mapping/instance_manager.cc @@ -333,6 +333,18 @@ void ReductionInstanceSet::record_instance(ReductionOpID& redop, } } +bool ReductionInstanceSet::erase(PhysicalInstance inst) +{ + for (auto it = instances_.begin(); it != instances_.end(); /*nothing*/) { + if (it->second.instance == inst) { + auto to_erase = it++; + instances_.erase(to_erase); + } else + it++; + } + return instances_.empty(); +} + bool InstanceManager::find_instance(Region region, FieldID field_id, Memory memory, @@ -447,6 +459,24 @@ void ReductionInstanceManager::record_instance(ReductionOpID& redop, } } +void ReductionInstanceManager::erase(PhysicalInstance inst) +{ + const auto mem = inst.get_location(); + const auto tid = inst.get_tree_id(); + + for (auto fit = instance_sets_.begin(); fit != instance_sets_.end(); /*nothing*/) { + if ((fit->first.memory != mem) || (fit->first.tid != tid)) { + fit++; + continue; + } + if (fit->second.erase(inst)) { + auto to_erase = fit++; + instance_sets_.erase(to_erase); + } else + fit++; + } +} + /*static*/ ReductionInstanceManager* ReductionInstanceManager::get_instance_manager() { static ReductionInstanceManager* manager{nullptr}; diff --git a/src/core/mapping/instance_manager.h b/src/core/mapping/instance_manager.h index 0b6b2918a..c42df3119 100644 --- a/src/core/mapping/instance_manager.h +++ b/src/core/mapping/instance_manager.h @@ -129,6 +129,9 @@ class ReductionInstanceSet { Instance& instance, const InstanceMappingPolicy& policy); + public: + bool erase(Instance inst); + private: std::map instances_; }; @@ -227,6 +230,9 @@ class ReductionInstanceManager : public BaseInstanceManager { Instance instance, const InstanceMappingPolicy& policy = {}); + public: + void erase(Instance inst); + public: static ReductionInstanceManager* get_instance_manager(); From 025a66ba7a2134c5f1652b99ef28969d6b7107aa Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Fri, 20 Jan 2023 09:47:58 -0800 Subject: [PATCH 112/121] Pass `CMAKE_GENERATOR` to scikit-build (#529) * pass cmake_generator to skbuild as envvar so it overrides skbuild's generator detection * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * change variable name Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- install.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/install.py b/install.py index e3dfdef1c..0e9816916 100755 --- a/install.py +++ b/install.py @@ -408,12 +408,6 @@ def validate_path(path): # Also use preexisting CMAKE_ARGS from conda if set cmake_flags = cmd_env.get("CMAKE_ARGS", "").split(" ") - if cmake_generator: - if " " not in cmake_generator: - cmake_flags += [f"-G{cmake_generator}"] - else: - cmake_flags += [f"-G'{cmake_generator}'"] - if debug or verbose: cmake_flags += ["--log-level=%s" % ("DEBUG" if debug else "VERBOSE")] @@ -464,10 +458,18 @@ def validate_path(path): cmake_flags += ["-Dlegate_core_LEGION_BRANCH=%s" % legion_branch] cmake_flags += extra_flags + build_flags = [f"-j{str(thread_count)}"] + if verbose: + if cmake_generator == "Unix Makefiles": + build_flags += ["VERBOSE=1"] + else: + build_flags += ["--verbose"] + cmd_env.update( { - "SKBUILD_BUILD_OPTIONS": f"-j{str(thread_count)}", "CMAKE_ARGS": " ".join(cmake_flags), + "CMAKE_GENERATOR": cmake_generator, + "SKBUILD_BUILD_OPTIONS": " ".join(build_flags), } ) @@ -634,7 +636,10 @@ def driver(): "--cmake-generator", dest="cmake_generator", required=False, - default=(None if shutil.which("ninja") is None else "Ninja"), + default=os.environ.get( + "CMAKE_GENERATOR", + "Unix Makefiles" if shutil.which("ninja") is None else "Ninja", + ), choices=["Ninja", "Unix Makefiles", None], help="The CMake makefiles generator", ) From e537b10ab1f112d1949b457b3967842bbd3049ad Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Mon, 23 Jan 2023 14:11:29 -0800 Subject: [PATCH 113/121] Change the default CPU architecture to haswell. (#538) When on an x86 platform, change the CPU architecture default to haswell --- install.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install.py b/install.py index 0e9816916..3f599b301 100755 --- a/install.py +++ b/install.py @@ -576,7 +576,7 @@ def driver(): "--march", dest="march", required=False, - default="native", + default=("haswell" if platform.machine() == "x86_64" else "native"), help="Specify the target CPU architecture.", ) parser.add_argument( From 50084ce29ea33ad46e74749e7aa1f6598a34aedf Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Mon, 23 Jan 2023 14:25:44 -0800 Subject: [PATCH 114/121] Build rust `legion_prof` (#535) * build rust legion prof * add rust to generate-conda-envs.py and meta.yaml --- cmake/thirdparty/get_legion.cmake | 1 + conda/conda-build/meta.yaml | 1 + scripts/generate-conda-envs.py | 1 + 3 files changed, 3 insertions(+) diff --git a/cmake/thirdparty/get_legion.cmake b/cmake/thirdparty/get_legion.cmake index 92ec30247..e158391cc 100644 --- a/cmake/thirdparty/get_legion.cmake +++ b/cmake/thirdparty/get_legion.cmake @@ -159,6 +159,7 @@ function(find_or_configure_legion) "Legion_REDOP_HALF ON" "Legion_REDOP_COMPLEX ON" "Legion_GPU_REDUCTIONS OFF" + "Legion_BUILD_RUST_PROFILER ON" ) endif() diff --git a/conda/conda-build/meta.yaml b/conda/conda-build/meta.yaml index 51f56180d..77722cab1 100644 --- a/conda/conda-build/meta.yaml +++ b/conda/conda-build/meta.yaml @@ -94,6 +94,7 @@ build: requirements: build: - make + - rust - ninja - cmake {{ cmake_version }} - {{ compiler('c') }} =11.2 diff --git a/scripts/generate-conda-envs.py b/scripts/generate-conda-envs.py index 677b42f48..17c5fb2a7 100755 --- a/scripts/generate-conda-envs.py +++ b/scripts/generate-conda-envs.py @@ -85,6 +85,7 @@ def conda(self) -> Reqs: "cmake>=3.24,!=3.25.0", "git", "make", + "rust", "ninja", "scikit-build>=0.13.1", "setuptools>=60", From 3545b3ab38c6b6f1af5e59e53e9484c24f1e81b5 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Tue, 24 Jan 2023 08:37:02 -0800 Subject: [PATCH 115/121] adding logic for collective instances to the legate_select_sources (#532) * adding logic for collective instances to the legate_select_sources * fixing logic for legate_select_sources * removing all_local logic --- src/core/mapping/base_mapper.cc | 91 +++++++++++++++++++-------------- src/core/mapping/base_mapper.h | 1 + 2 files changed, 55 insertions(+), 37 deletions(-) diff --git a/src/core/mapping/base_mapper.cc b/src/core/mapping/base_mapper.cc index 5c6fc5dd1..0fe6075e9 100644 --- a/src/core/mapping/base_mapper.cc +++ b/src/core/mapping/base_mapper.cc @@ -928,54 +928,66 @@ void BaseMapper::select_task_sources(const MapperContext ctx, const SelectTaskSrcInput& input, SelectTaskSrcOutput& output) { - legate_select_sources(ctx, input.target, input.source_instances, output.chosen_ranking); + legate_select_sources( + ctx, input.target, input.source_instances, input.collective_views, output.chosen_ranking); +} + +void add_instance_to_band_ranking(const PhysicalInstance& instance, + const Legion::AddressSpace& local_node, + std::map& source_memories, + std::vector>& band_ranking, + const Memory& destination_memory, + const Legion::Machine& machine) +{ + Memory location = instance.get_location(); + auto finder = source_memories.find(location); + if (finder == source_memories.end()) { + std::vector affinity; + machine.get_mem_mem_affinity( + affinity, location, destination_memory, false /*not just local affinities*/); + uint32_t memory_bandwidth = 0; + if (!affinity.empty()) { +#ifdef DEBUG_LEGATE + assert(affinity.size() == 1); +#endif + memory_bandwidth = affinity[0].bandwidth; + } + source_memories[location] = memory_bandwidth; + band_ranking.push_back(std::pair(instance, memory_bandwidth)); + } else + band_ranking.push_back(std::pair(instance, finder->second)); } void BaseMapper::legate_select_sources(const MapperContext ctx, const PhysicalInstance& target, const std::vector& sources, + const std::vector& collective_sources, std::deque& ranking) { std::map source_memories; // For right now we'll rank instances by the bandwidth of the memory - // they are in to the destination, we'll only rank sources from the - // local node if there are any - bool all_local = false; + // they are in to the destination. // TODO: consider layouts when ranking source to help out the DMA system Memory destination_memory = target.get_location(); - std::vector affinity(1); // fill in a vector of the sources with their bandwidths and sort them std::vector> band_ranking; for (uint32_t idx = 0; idx < sources.size(); idx++) { const PhysicalInstance& instance = sources[idx]; - Memory location = instance.get_location(); - if (location.address_space() == local_node) { - if (!all_local) { - source_memories.clear(); - band_ranking.clear(); - all_local = true; - } - } else if (all_local) // Skip any remote instances once we're local - continue; - auto finder = source_memories.find(location); - if (finder == source_memories.end()) { - affinity.clear(); - machine.get_mem_mem_affinity( - affinity, location, destination_memory, false /*not just local affinities*/); - uint32_t memory_bandwidth = 0; - if (!affinity.empty()) { - assert(affinity.size() == 1); - memory_bandwidth = affinity[0].bandwidth; - } - source_memories[location] = memory_bandwidth; - band_ranking.push_back(std::pair(instance, memory_bandwidth)); - } else - band_ranking.push_back(std::pair(instance, finder->second)); + add_instance_to_band_ranking( + instance, local_node, source_memories, band_ranking, destination_memory, machine); } - // If there aren't any sources (for example if there are some collective views - // to choose from, not yet in this branch), just return nothing and let the - // runtime pick something for us. - if (band_ranking.empty()) { return; } + + for (uint32_t idx = 0; idx < collective_sources.size(); idx++) { + std::vector col_instances; + collective_sources[idx].find_instances_nearest_memory(destination_memory, col_instances); + // we need only first instance if there are several + const PhysicalInstance& instance = col_instances[0]; + add_instance_to_band_ranking( + instance, local_node, source_memories, band_ranking, destination_memory, machine); + } +#ifdef DEBUG_LEGATE + assert(!band_ranking.empty()); +#endif // Easy case of only one instance if (band_ranking.size() == 1) { ranking.push_back(band_ranking.begin()->first); @@ -1057,7 +1069,8 @@ void BaseMapper::select_inline_sources(const MapperContext ctx, const SelectInlineSrcInput& input, SelectInlineSrcOutput& output) { - legate_select_sources(ctx, input.target, input.source_instances, output.chosen_ranking); + legate_select_sources( + ctx, input.target, input.source_instances, input.collective_views, output.chosen_ranking); } void BaseMapper::report_profiling(const MapperContext ctx, @@ -1153,7 +1166,8 @@ void BaseMapper::select_copy_sources(const MapperContext ctx, const SelectCopySrcInput& input, SelectCopySrcOutput& output) { - legate_select_sources(ctx, input.target, input.source_instances, output.chosen_ranking); + legate_select_sources( + ctx, input.target, input.source_instances, input.collective_views, output.chosen_ranking); } void BaseMapper::speculate(const MapperContext ctx, @@ -1185,7 +1199,8 @@ void BaseMapper::select_close_sources(const MapperContext ctx, const SelectCloseSrcInput& input, SelectCloseSrcOutput& output) { - legate_select_sources(ctx, input.target, input.source_instances, output.chosen_ranking); + legate_select_sources( + ctx, input.target, input.source_instances, input.collective_views, output.chosen_ranking); } void BaseMapper::report_profiling(const MapperContext ctx, @@ -1248,7 +1263,8 @@ void BaseMapper::select_release_sources(const MapperContext ctx, const SelectReleaseSrcInput& input, SelectReleaseSrcOutput& output) { - legate_select_sources(ctx, input.target, input.source_instances, output.chosen_ranking); + legate_select_sources( + ctx, input.target, input.source_instances, input.collective_views, output.chosen_ranking); } void BaseMapper::speculate(const MapperContext ctx, @@ -1318,7 +1334,8 @@ void BaseMapper::select_partition_sources(const MapperContext ctx, const SelectPartitionSrcInput& input, SelectPartitionSrcOutput& output) { - legate_select_sources(ctx, input.target, input.source_instances, output.chosen_ranking); + legate_select_sources( + ctx, input.target, input.source_instances, input.collective_views, output.chosen_ranking); } void BaseMapper::report_profiling(const MapperContext ctx, diff --git a/src/core/mapping/base_mapper.h b/src/core/mapping/base_mapper.h index 5e5bf1f49..86e558e0b 100644 --- a/src/core/mapping/base_mapper.h +++ b/src/core/mapping/base_mapper.h @@ -281,6 +281,7 @@ class BaseMapper : public Legion::Mapping::Mapper, public LegateMapper { void legate_select_sources(const Legion::Mapping::MapperContext ctx, const Legion::Mapping::PhysicalInstance& target, const std::vector& sources, + const std::vector& collective_sources, std::deque& ranking); protected: From 6e04a53e3e7b810371ebb74a7975c2fad9f116d9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Jan 2023 09:43:56 -0800 Subject: [PATCH 116/121] [pre-commit.ci] pre-commit autoupdate (#542) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/mirrors-clang-format: v15.0.6 → v15.0.7](https://github.com/pre-commit/mirrors-clang-format/compare/v15.0.6...v15.0.7) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6ebe994d9..c6003f5fc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,7 +12,7 @@ repos: hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-clang-format - rev: 'v15.0.6' + rev: 'v15.0.7' hooks: - id: clang-format files: \.(cu|cuh|h|cc|inl)$ From f5152c1d4478b6c2dc1e66a3f1ff8a1485a8971c Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Tue, 24 Jan 2023 10:39:35 -0800 Subject: [PATCH 117/121] Temporarily disable collectives, to work around CI failures (#544) Co-authored-by: Manolis Papadakis --- cmake/Modules/legate_core_options.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/Modules/legate_core_options.cmake b/cmake/Modules/legate_core_options.cmake index 158eae120..7e4b80261 100644 --- a/cmake/Modules/legate_core_options.cmake +++ b/cmake/Modules/legate_core_options.cmake @@ -78,7 +78,7 @@ endif() option(legate_core_STATIC_CUDA_RUNTIME "Statically link the cuda runtime library" OFF) option(legate_core_EXCLUDE_LEGION_FROM_ALL "Exclude Legion targets from legate.core's 'all' target" OFF) -option(legate_core_COLLECTIVE "Use of collective instances" ON) +option(legate_core_COLLECTIVE "Use of collective instances" OFF) set_or_default(NCCL_DIR NCCL_PATH) From 1a29d33660c8c0aeb972e7a5cdea3cedb8544bb4 Mon Sep 17 00:00:00 2001 From: Seyed Mirsadeghi Date: Wed, 25 Jan 2023 16:21:16 -0500 Subject: [PATCH 118/121] Add support for building Legion with the UCX backend (#516) * Add support for building Legion with the UCX backend * Add UCX-specific command-line options and env vars * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Set -ucx:tls_host to ^dc,ud The latest UCX now fixes a bug which allows us to list disble dc and ud in the correct way, that is, by ^dc,ud instead of explicily naming all other transports. * Fix typo * Enable passing of UCX_ROOT * Include UCX in our environment generation script * More documentation on UCX and CUDA prereqs * Typo Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Manolis Papadakis Co-authored-by: Manolis Papadakis --- BUILD.md | 57 +++++++++++++++++------- CMakeLists.txt | 7 +++ README.md | 2 +- install.py | 16 ++++++- legate/driver/command.py | 7 +++ legate/driver/launcher.py | 11 +++++ scripts/generate-conda-envs.py | 21 +++++++-- src/legate_defines.h | 3 +- tests/unit/legate/driver/test_command.py | 1 + 9 files changed, 102 insertions(+), 23 deletions(-) diff --git a/BUILD.md b/BUILD.md index 8f749b0be..406953fa7 100644 --- a/BUILD.md +++ b/BUILD.md @@ -82,12 +82,18 @@ override this search by providing an install location for any dependency explicitly, using a `--with-` flag, e.g. `--with-nccl` and `--with-openblas`. -For multi-node execution Legate uses [GASNet](https://gasnet.lbl.gov/) which can be -requested using the `--network gasnet1` or `--network gasnetex` flag. By default -GASNet will be automatically downloaded and built, but if you have an existing -installation then you can inform the install script using the `--with-gasnet` flag. -You also need to specify the interconnect network of the target machine using the -`--conduit` flag. +For multi-node execution Legate can use [GASNet](https://gasnet.lbl.gov/) (use +`--network gasnet1` or `--network gasnetex`) or [UCX](https://openucx.org) (use +`--network ucx`). +With gasnet1 or gasnetex, GASNet will be automatically downloaded and built, +but if you have an existing installation then you can inform the install script +using the `--with-gasnet` flag. You also need to specify the interconnect network +of the target machine using the `--conduit` flag. +With UCX, the library must be already installed and `--with-ucx` can be used +to point to the installation path if UCX is not installed under common system paths. +At least version 1.14 is required, configured with `--enable-mt`. + +Compiling with networking support requires MPI. For example this would be an installation for a [DGX SuperPOD](https://www.nvidia.com/en-us/data-center/dgx-superpod/): @@ -113,7 +119,7 @@ To see all available configuration options, run with the `--help` flag: ## Dependency listing -### OS (`--os` flag) +### OS (`--os` flag on `generate-conda-envs.py`) Legate has been tested on Linux and MacOS, although only a few flavors of Linux such as Ubuntu have been thoroughly tested. There is currently no support for @@ -144,7 +150,9 @@ Only necessary if you wish to run with Nvidia GPUs. Some CUDA components necessary for building, e.g. the `nvcc` compiler and driver stubs, are not distributed through conda. These must instead be installed using -[system-level packages](https://developer.nvidia.com/cuda-downloads). +[system-level packages](https://developer.nvidia.com/cuda-downloads). If these +are not installed under a standard system location, you will need to inform +`install.py` of their location using `--with-cuda`. Independent of the system-level CUDA installation, conda will need to install an environment-local copy of the CUDA toolkit (which is what the `--ctk` flag @@ -160,15 +168,17 @@ issues on GitHub. Only necessary if you wish to run with Nvidia GPUs. -The following libraries are included automatically in CUDA-enabled environment -files: +The following additional CUDA libraries are required: -- `cutensor` -- `nccl` +- `curand` (only necessary to provide this if building without CUDA support; + CUDA-enabled installations will use the version bundled with CUDA) +- `cutensor` >= 1.3.3 (included in conda environment file) +- `nccl` (included in conda environment file) +- `thrust` >= 1.15 (pulled from github) If you wish to provide alternative installations for these, then you can remove -them from the environment file and pass the corresponding `--with-` flag -to `install.py`. +them from the environment file (if necessary) and pass the corresponding +`--with-` flag to `install.py`. ### Build tools @@ -221,14 +231,27 @@ file generated with `--no-openmpi`. Legate requires a build of MPI that supports `MPI_THREAD_MULTIPLE`. -### Networking libraries (e.g. Infiniband, RoCE, UCX; optional) +### Infiniband/RoCE networking libraries (optional) -Only necessary if you wish to run on multiple nodes. +Only necessary if you wish to run on multiple nodes, using the corresponding +networking hardware. Not available on conda; typically available through MOFED or the system-level package manager. -If using UCX, a build of UCX configured with `--enable-mt` is required. +### UCX >= 1.14 (`--ucx` flag; optional) + +Only necessary if you wish to run on multiple nodes, using the UCX Realm +networking backend. + +A build of UCX configured with `--enable-mt` is required. + +The build of UCX available on conda might not include support for the particular +networking hardware on your machine (or may not be optimally tuned for such). In +that case you may want to use an environment file generated with `--no-ucx`, +get UCX from another source (e.g. MOFED, the system-level package manager, or +compiled manually from source), and pass the location of your installation to +`install.py` (if necessary) using `--with-ucx`. ## Alternative sources for dependencies diff --git a/CMakeLists.txt b/CMakeLists.txt index e83b9a779..c83e6b7c1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,13 @@ cmake_minimum_required(VERSION 3.22.1 FATAL_ERROR) +if(POLICY CMP0074) + # find_package() uses _ROOT variables + # https://cmake.org/cmake/help/latest/policy/CMP0074.html#policy:CMP0074 + cmake_policy(SET CMP0074 NEW) + set(CMAKE_POLICY_DEFAULT_CMP0074 NEW) +endif() + if(POLICY CMP0060) # Link libraries by full path even in implicit directories # https://cmake.org/cmake/help/latest/policy/CMP0060.html#policy:CMP0060 diff --git a/README.md b/README.md index 7f13cc33e..040142e6f 100644 --- a/README.md +++ b/README.md @@ -417,7 +417,7 @@ Memory: * *Does Legate only work on NVIDIA hardware?* No, Legate will run on any processor supported by Legion (e.g. x86, ARM, and - PowerPC CPUs), and any network supported by GASNet (e.g. Infiniband, + PowerPC CPUs), and any network supported by GASNet or UCX (e.g. Infiniband, Cray, Omnipath, and (ROC-)Ethernet based interconnects). * *What languages does the Legate Core API have bindings for?* diff --git a/install.py b/install.py index 3f599b301..e2506a68d 100755 --- a/install.py +++ b/install.py @@ -246,6 +246,7 @@ def install( cmake_exe, cmake_generator, gasnet_dir, + ucx_dir, cuda_dir, maxdim, maxfields, @@ -292,6 +293,7 @@ def install( print("cmake_exe:", cmake_exe) print("cmake_generator:", cmake_generator) print("gasnet_dir:", gasnet_dir) + print("ucx_dir:", ucx_dir) print("cuda_dir:", cuda_dir) print("maxdim:", maxdim) print("maxfields:", maxfields) @@ -334,6 +336,7 @@ def validate_path(path): legion_dir = validate_path(legion_dir) legion_src_dir = validate_path(legion_src_dir) gasnet_dir = validate_path(gasnet_dir) + ucx_dir = validate_path(ucx_dir) thrust_dir = validate_path(thrust_dir) if verbose: @@ -343,6 +346,7 @@ def validate_path(path): print("legion_dir: ", legion_dir) print("legion_src_dir: ", legion_src_dir) print("gasnet_dir: ", gasnet_dir) + print("ucx_dir: ", ucx_dir) print("thrust_dir: ", thrust_dir) if thread_count is None: @@ -440,6 +444,8 @@ def validate_path(path): cmake_flags += ["-DNCCL_DIR=%s" % nccl_dir] if gasnet_dir: cmake_flags += ["-DGASNet_ROOT_DIR=%s" % gasnet_dir] + if ucx_dir: + cmake_flags += ["-DUCX_ROOT=%s" % ucx_dir] if conduit: cmake_flags += ["-DGASNet_CONDUIT=%s" % conduit] if cuda_dir: @@ -532,7 +538,7 @@ def driver(): dest="networks", action="append", required=False, - choices=["gasnet1", "gasnetex", "mpi"], + choices=["gasnet1", "gasnetex", "ucx", "mpi"], default=[], help="Realm networking backend to use for multi-node execution.", ) @@ -544,6 +550,14 @@ def driver(): default=os.environ.get("GASNET"), help="Path to GASNet installation directory.", ) + parser.add_argument( + "--with-ucx", + dest="ucx_dir", + metavar="DIR", + required=False, + default=os.environ.get("UCX_ROOT"), + help="Path to UCX installation directory.", + ) parser.add_argument( "--cuda", action=BooleanFlag, diff --git a/legate/driver/command.py b/legate/driver/command.py index 7aeac6dd9..2fc8cc1ed 100644 --- a/legate/driver/command.py +++ b/legate/driver/command.py @@ -360,6 +360,12 @@ def cmd_eager_alloc( return ("-lg:eager_alloc_percentage", str(eager_alloc)) +def cmd_ucx( + config: ConfigProtocol, system: System, launcher: Launcher +) -> CommandPart: + return ("-ucx:tls_host", "^dc,ud") + + def cmd_user_opts( config: ConfigProtocol, system: System, launcher: Launcher ) -> CommandPart: @@ -395,6 +401,7 @@ def cmd_user_opts( cmd_log_levels, cmd_log_file, cmd_eager_alloc, + cmd_ucx, # Append user flags so they can override whatever we provided cmd_user_opts, ) diff --git a/legate/driver/launcher.py b/legate/driver/launcher.py index 78f34f307..c5e55cfc2 100644 --- a/legate/driver/launcher.py +++ b/legate/driver/launcher.py @@ -186,6 +186,17 @@ def _compute_env(self) -> tuple[EnvDict, set[str]]: # threading support env["GASNET_MPI_THREAD"] = "MPI_THREAD_MULTIPLE" + # UCX-related environment variables + env["UCX_CUDA_COPY_MAX_REG_RATIO"] = "1.0" + env["UCX_MULTI_LANE_MAX_RATIO"] = "1.0" + env["UCX_IB_RCACHE_PURGE_ON_FORK"] = "n" + env["UCX_RC_TX_POLL_ALWAYS"] = "y" + + # Link to the UCX bootstrap plugin, in case Realm is using UCX + env["REALM_UCP_BOOTSTRAP_PLUGIN"] = str( + system.legion_paths.legion_lib_path / "realm_ucp_bootstrap_mpi.so" + ) + # Set some environment variables depending on our configuration that # we will check in the Legate binary to ensure that it is properly. # configured. Always make sure we include the Legion library diff --git a/scripts/generate-conda-envs.py b/scripts/generate-conda-envs.py index 17c5fb2a7..361f35149 100755 --- a/scripts/generate-conda-envs.py +++ b/scripts/generate-conda-envs.py @@ -75,6 +75,7 @@ def __str__(self) -> str: class BuildConfig(SectionConfig): compilers: bool = True openmpi: bool = True + ucx: bool = True header = "build" @@ -95,11 +96,14 @@ def conda(self) -> Reqs: pkgs += ("c-compiler", "cxx-compiler") if self.openmpi: pkgs += ("openmpi",) + if self.ucx: + pkgs += ("ucx>=1.14",) return sorted(pkgs) def __str__(self) -> str: val = "-compilers" if self.compilers else "" val += "-openmpi" if self.openmpi else "" + val += "-ucx" if self.ucx else "" return val @@ -171,6 +175,7 @@ class EnvConfig: ctk: str compilers: bool openmpi: bool + ucx: bool @property def sections(self) -> Tuple[SectionConfig, ...]: @@ -188,7 +193,7 @@ def cuda(self) -> CUDAConfig: @property def build(self) -> BuildConfig: - return BuildConfig(self.compilers, self.openmpi) + return BuildConfig(self.compilers, self.openmpi, self.ucx) @property def runtime(self) -> RuntimeConfig: @@ -252,13 +257,14 @@ def filename(self) -> str: """ ALL_CONFIGS = [ - EnvConfig("test", python, "linux", ctk, compilers, openmpi) + EnvConfig("test", python, "linux", ctk, compilers, openmpi, ucx) for python in PYTHON_VERSIONS for ctk in CTK_VERSIONS for compilers in (True, False) for openmpi in (True, False) + for ucx in (True, False) ] + [ - EnvConfig("test", python, "osx", "none", compilers, openmpi) + EnvConfig("test", python, "osx", "none", compilers, openmpi, False) for python in PYTHON_VERSIONS for compilers in (True, False) for openmpi in (True, False) @@ -345,6 +351,13 @@ def __call__(self, parser, namespace, values, option_string): default=None, help="Whether to include openmpi or not (default: both)", ) + parser.add_argument( + "--ucx", + action=BooleanFlag, + dest="ucx", + default=None, + help="Whether to include UCX or not (default: both)", + ) args = parser.parse_args(sys.argv[1:]) @@ -362,6 +375,8 @@ def __call__(self, parser, namespace, values, option_string): configs = (x for x in configs if x.os == args.os) if args.openmpi is not None: configs = (x for x in configs if x.build.openmpi == args.openmpi) + if args.ucx is not None: + configs = (x for x in configs if x.build.ucx == args.ucx) for config in configs: conda_sections = indent( diff --git a/src/legate_defines.h b/src/legate_defines.h index fa215e8e7..de272dde0 100644 --- a/src/legate_defines.h +++ b/src/legate_defines.h @@ -46,7 +46,8 @@ #endif #ifndef LEGATE_USE_NETWORK -#if defined(REALM_USE_GASNET1) || defined(REALM_USE_GASNETEX) || defined(REALM_USE_MPI) +#if defined(REALM_USE_GASNET1) || defined(REALM_USE_GASNETEX) || defined(REALM_USE_MPI) || \ + defined(REALM_USE_UCX) #define LEGATE_USE_NETWORK #endif #endif diff --git a/tests/unit/legate/driver/test_command.py b/tests/unit/legate/driver/test_command.py index 38f247ea3..436d53bbf 100644 --- a/tests/unit/legate/driver/test_command.py +++ b/tests/unit/legate/driver/test_command.py @@ -62,6 +62,7 @@ def test_CMD_PARTS() -> None: m.cmd_log_levels, m.cmd_log_file, m.cmd_eager_alloc, + m.cmd_ucx, m.cmd_user_opts, ) From 6c718de3c7a986a8a5003d0964dfcc810756bc6b Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Fri, 27 Jan 2023 10:31:48 -0800 Subject: [PATCH 119/121] Update the architectures built in conda package (#545) (#546) Co-authored-by: Marcin Zalewski --- conda/conda-build/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/conda-build/build.sh b/conda/conda-build/build.sh index 27b5aead1..317947dc4 100644 --- a/conda/conda-build/build.sh +++ b/conda/conda-build/build.sh @@ -16,7 +16,7 @@ CMAKE_ARGS+=" if [ -z "$CPU_ONLY" ]; then CMAKE_ARGS+=" -DLegion_USE_CUDA=ON --DCMAKE_CUDA_ARCHITECTURES:LIST=60-real;70-real;75-real;80-real;86 +-DCMAKE_CUDA_ARCHITECTURES:LIST=60-real;70-real;75-real;80-real;90 " fi From 90153d289519437f22393af03ed077b6fad3e0a7 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Mon, 30 Jan 2023 13:55:09 -0800 Subject: [PATCH 120/121] Revert "Update the architectures built in conda package (#545) (#546)" (#550) This reverts commit 6c718de3c7a986a8a5003d0964dfcc810756bc6b. Co-authored-by: Marcin Zalewski --- conda/conda-build/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/conda-build/build.sh b/conda/conda-build/build.sh index 317947dc4..27b5aead1 100644 --- a/conda/conda-build/build.sh +++ b/conda/conda-build/build.sh @@ -16,7 +16,7 @@ CMAKE_ARGS+=" if [ -z "$CPU_ONLY" ]; then CMAKE_ARGS+=" -DLegion_USE_CUDA=ON --DCMAKE_CUDA_ARCHITECTURES:LIST=60-real;70-real;75-real;80-real;90 +-DCMAKE_CUDA_ARCHITECTURES:LIST=60-real;70-real;75-real;80-real;86 " fi From 3b5f69a372e23d26e574475663212ea8977bd17d Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Mon, 30 Jan 2023 14:14:04 -0800 Subject: [PATCH 121/121] Fix the default Legion version (#547) Co-authored-by: Marcin Zalewski --- install.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install.py b/install.py index e2506a68d..56c508ad9 100755 --- a/install.py +++ b/install.py @@ -743,7 +743,7 @@ def driver(): "--legion-branch", dest="legion_branch", required=False, - default="collective", + default="04cf06a2", help="Legion branch to build Legate with.", ) args, unknown = parser.parse_known_args()