diff --git a/.github/workflows/require-labels.yml b/.github/workflows/require-labels.yml
index d6634be4f..6f1f751d2 100644
--- a/.github/workflows/require-labels.yml
+++ b/.github/workflows/require-labels.yml
@@ -6,13 +6,9 @@ jobs:
label:
runs-on: ubuntu-latest
steps:
- - name: Delay checking labels if PR is just created
- if: ${{ github.event.action == 'opened' }}
- run: sleep 300s
- shell: bash
- name: Check Labels
- uses: mheap/github-action-required-labels@v2
+ uses: mheap/github-action-required-labels@v3
with:
mode: exactly
count: 1
- labels: "category:new-feature, category:improvement, category:bug-fix, category:task"
\ No newline at end of file
+ labels: "category:new-feature, category:improvement, category:bug-fix, category:task, category:documentation"
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c6003f5fc..a8ac54762 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,10 +1,10 @@
repos:
- repo: https://github.com/PyCQA/isort
- rev: 5.11.4
+ rev: 5.12.0
hooks:
- id: isort
- repo: https://github.com/psf/black
- rev: 22.12.0
+ rev: 23.1.0
hooks:
- id: black
- repo: https://github.com/PyCQA/flake8
@@ -18,7 +18,7 @@ repos:
files: \.(cu|cuh|h|cc|inl)$
types_or: []
- repo: https://github.com/pre-commit/mirrors-mypy
- rev: 'v0.991'
+ rev: 'v1.0.1'
hooks:
- id: mypy
pass_filenames: false
diff --git a/BUILD.md b/BUILD.md
index 406953fa7..43c054fe9 100644
--- a/BUILD.md
+++ b/BUILD.md
@@ -125,7 +125,7 @@ Legate has been tested on Linux and MacOS, although only a few flavors of Linux
such as Ubuntu have been thoroughly tested. There is currently no support for
Windows.
-### Python >= 3.8 (`--python` flag)
+### Python >= 3.9 (`--python` flag)
In terms of Python compatibility, Legate *roughly* follows the timeline outlined
in [NEP 29](https://numpy.org/neps/nep-0029-deprecation_policy.html).
@@ -189,6 +189,7 @@ in the environment file:
- `git`
- `make`
- `ninja` (this is optional, but produces more informative build output)
+- `rust`
- `scikit-build`
### OpenBLAS
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c83e6b7c1..62f4f3cba 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -62,7 +62,7 @@ endif()
# - Download and initialize RAPIDS CMake helpers -----------------------------
if(NOT EXISTS ${CMAKE_BINARY_DIR}/RAPIDS.cmake)
- file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.08/RAPIDS.cmake
+ file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.02/RAPIDS.cmake
${CMAKE_BINARY_DIR}/RAPIDS.cmake)
endif()
include(${CMAKE_BINARY_DIR}/RAPIDS.cmake)
@@ -72,7 +72,7 @@ include(rapids-cuda)
include(rapids-export)
include(rapids-find)
-set(legate_core_version 22.12.00)
+set(legate_core_version 23.03.00)
# For now we want the optimization flags to match on both normal make and cmake
# builds so we override the cmake defaults here for release, this changes
@@ -123,3 +123,8 @@ if(CMAKE_GENERATOR STREQUAL "Ninja")
endfunction()
add_touch_legate_core_ninja_build_target()
endif()
+
+option(legate_core_EXAMPLE_BUILD_TESTS OFF)
+if (legate_core_EXAMPLE_BUILD_TESTS)
+ add_subdirectory(examples)
+endif()
diff --git a/README.md b/README.md
index 040142e6f..576222033 100644
--- a/README.md
+++ b/README.md
@@ -23,9 +23,7 @@ by running the same code that runs on a desktop or a laptop at scale.
Using this technology, computational and data scientists can develop and test
programs on moderately sized data sets on local machines and then immediately
scale up to larger data sets deployed on many nodes in the cloud or on a
-supercomputer without any code modifications. In visual terms:
-
-
+supercomputer without any code modifications.
The Legate project is built upon two foundational principles:
@@ -223,8 +221,12 @@ Legate Core is available [on conda](https://anaconda.org/legate/legate-core):
conda install -c nvidia -c conda-forge -c legate legate-core
```
-The conda package is compatible with CUDA >= 11.4 (CUDA driver version >= r470),
-and Volta or later GPU architectures.
+Only linux-64 packages are available at the moment.
+
+The default package contains GPU support, and is compatible with CUDA >= 11.4
+(CUDA driver version >= r470), and Volta or later GPU architectures. There are
+also CPU-only packages available, and will be automatically selected by `conda`
+when installing on a machine without GPUs.
See [BUILD.md](BUILD.md) for instructions on building Legate Core from source.
@@ -461,15 +463,14 @@ See the discussion of contributing in [CONTRIBUTING.md](CONTRIBUTING.md).
## Documentation
-A complete list of available features can is provided in the [API
-reference](https://nv-legate.github.io/legate.core/api.html).
+A complete list of available features can is found in the [Legate Core
+documentation](https://nv-legate.github.io/legate.core).
## Next Steps
We recommend starting by experimenting with at least one Legate application
library to test out performance and see how Legate works. If you are interested
in building your own Legate application library, we recommend that you
-investigate our [Legate Hello World application
-library](https://github.com/nv-legate/legate.hello) that provides a small
-example of how to get started developing your own drop-in replacement library
-on top of Legion using the Legate Core library.
+investigate our [Legate Hello World application library](examples/hello) that
+provides a small example of how to get started developing your own drop-in
+replacement library on top of Legion using the Legate Core library.
diff --git a/cmake/Modules/cuda_arch_helpers.cmake b/cmake/Modules/cuda_arch_helpers.cmake
index c70235f68..9a2206f69 100644
--- a/cmake/Modules/cuda_arch_helpers.cmake
+++ b/cmake/Modules/cuda_arch_helpers.cmake
@@ -44,6 +44,9 @@ function(set_cuda_arch_from_names)
if(CMAKE_CUDA_ARCHITECTURES MATCHES "ampere")
list(APPEND cuda_archs 80)
endif()
+ if(CMAKE_CUDA_ARCHITECTURES MATCHES "hopper")
+ list(APPEND cuda_archs 90)
+ endif()
if(cuda_archs)
list(LENGTH cuda_archs num_archs)
@@ -83,6 +86,7 @@ function(add_cuda_architecture_defines defs)
add_def_if_arch_enabled("70" "VOLTA_ARCH")
add_def_if_arch_enabled("75" "TURING_ARCH")
add_def_if_arch_enabled("80" "AMPERE_ARCH")
+ add_def_if_arch_enabled("90" "HOPPER_ARCH")
set(${defs} ${_defs} PARENT_SCOPE)
endfunction()
diff --git a/cmake/Modules/legate_core_options.cmake b/cmake/Modules/legate_core_options.cmake
index 7e4b80261..62fd1e759 100644
--- a/cmake/Modules/legate_core_options.cmake
+++ b/cmake/Modules/legate_core_options.cmake
@@ -78,7 +78,8 @@ endif()
option(legate_core_STATIC_CUDA_RUNTIME "Statically link the cuda runtime library" OFF)
option(legate_core_EXCLUDE_LEGION_FROM_ALL "Exclude Legion targets from legate.core's 'all' target" OFF)
-option(legate_core_COLLECTIVE "Use of collective instances" OFF)
+option(legate_core_COLLECTIVE "Use of collective instances" ON)
+option(legate_core_BUILD_DOCS "Build doxygen docs" OFF)
set_or_default(NCCL_DIR NCCL_PATH)
diff --git a/cmake/legate_helper_functions.cmake b/cmake/legate_helper_functions.cmake
new file mode 100644
index 000000000..db269fa78
--- /dev/null
+++ b/cmake/legate_helper_functions.cmake
@@ -0,0 +1,442 @@
+macro(legate_include_rapids)
+ if (NOT _LEGATE_HAS_RAPIDS)
+ if(NOT EXISTS ${CMAKE_BINARY_DIR}/LEGATE_RAPIDS.cmake)
+ file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.02/RAPIDS.cmake
+ ${CMAKE_BINARY_DIR}/LEGATE_RAPIDS.cmake)
+ endif()
+ include(${CMAKE_BINARY_DIR}/LEGATE_RAPIDS.cmake)
+ include(rapids-cmake)
+ include(rapids-cpm)
+ include(rapids-cuda)
+ include(rapids-export)
+ include(rapids-find)
+ set(_LEGATE_HAS_RAPIDS ON)
+ endif()
+endmacro()
+
+function(legate_default_cpp_install target)
+ set(options)
+ set(one_value_args EXPORT)
+ set(multi_value_args)
+ cmake_parse_arguments(
+ LEGATE_OPT
+ "${options}"
+ "${one_value_args}"
+ "${multi_value_args}"
+ ${ARGN}
+ )
+
+ if (NOT LEGATE_OPT_EXPORT)
+ message(FATAL_ERROR "Need EXPORT name for legate_default_install")
+ endif()
+
+ legate_include_rapids()
+
+ rapids_cmake_install_lib_dir(lib_dir)
+
+ install(TARGETS ${target}
+ DESTINATION ${lib_dir}
+ EXPORT ${LEGATE_OPT_EXPORT})
+
+ set(final_code_block
+ "set(${target}_BUILD_LIBDIR ${CMAKE_BINARY_DIR}/legate_${target})"
+ )
+
+ rapids_export(
+ INSTALL ${target}
+ EXPORT_SET ${LEGATE_OPT_EXPORT}
+ GLOBAL_TARGETS ${target}
+ NAMESPACE legate::
+ LANGUAGES ${ENABLED_LANGUAGES}
+ )
+
+ # build export targets
+ rapids_export(
+ BUILD ${target}
+ EXPORT_SET ${LEGATE_OPT_EXPORT}
+ GLOBAL_TARGETS ${target}
+ NAMESPACE legate::
+ FINAL_CODE_BLOCK final_code_block
+ LANGUAGES ${ENABLED_LANGUAGES}
+ )
+endfunction()
+
+function(legate_add_cffi header)
+ if (NOT DEFINED CMAKE_C_COMPILER)
+ message(FATAL_ERROR "Must enable C language to build Legate projects")
+ endif()
+
+ set(options)
+ set(one_value_args TARGET)
+ set(multi_value_args)
+ cmake_parse_arguments(
+ LEGATE_OPT
+ "${options}"
+ "${one_value_args}"
+ "${multi_value_args}"
+ ${ARGN}
+ )
+ # abbreviate for the function below
+ set(target ${LEGATE_OPT_TARGET})
+ set(install_info_in
+[=[
+from pathlib import Path
+
+def get_libpath():
+ import os, sys, platform
+ join = os.path.join
+ exists = os.path.exists
+ dirname = os.path.dirname
+ cn_path = dirname(dirname(__file__))
+ so_ext = {
+ "": "",
+ "Java": ".jar",
+ "Linux": ".so",
+ "Darwin": ".dylib",
+ "Windows": ".dll"
+ }[platform.system()]
+
+ def find_lib(libdir):
+ target = f"lib@target@{so_ext}*"
+ search_path = Path(libdir)
+ matches = [m for m in search_path.rglob(target)]
+ if matches:
+ return matches[0].parent
+ return None
+
+ return (
+ find_lib("@libdir@") or
+ find_lib(join(dirname(dirname(dirname(cn_path))), "lib")) or
+ find_lib(join(dirname(dirname(sys.executable)), "lib")) or
+ ""
+ )
+
+libpath: str = get_libpath()
+
+header: str = """
+ @header@
+ void @target@_perform_registration();
+"""
+]=])
+ set(install_info_py_in ${CMAKE_BINARY_DIR}/legate_${target}/install_info.py.in)
+ set(install_info_py ${CMAKE_SOURCE_DIR}/${target}/install_info.py)
+ file(WRITE ${install_info_py_in} "${install_info_in}")
+
+ set(generate_script_content
+ [=[
+ execute_process(
+ COMMAND ${CMAKE_C_COMPILER}
+ -E
+ -P @header@
+ ECHO_ERROR_VARIABLE
+ OUTPUT_VARIABLE header
+ COMMAND_ERROR_IS_FATAL ANY
+ )
+ configure_file(
+ @install_info_py_in@
+ @install_info_py@
+ @ONLY)
+ ]=])
+
+ set(generate_script ${CMAKE_BINARY_DIR}/gen_install_info.cmake)
+ file(CONFIGURE
+ OUTPUT ${generate_script}
+ CONTENT "${generate_script_content}"
+ @ONLY
+ )
+
+ if (DEFINED ${target}_BUILD_LIBDIR)
+ # this must have been imported from an existing editable build
+ set(libdir ${${target}_BUILD_LIBDIR})
+ else()
+ # libraries are built in a common spot
+ set(libdir ${CMAKE_BINARY_DIR}/legate_${target})
+ message("libdir to binary dir")
+ endif()
+ add_custom_target("generate_install_info_py" ALL
+ COMMAND ${CMAKE_COMMAND}
+ -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+ -Dtarget=${target}
+ -Dlibdir=${libdir}
+ -P ${generate_script}
+ OUTPUT ${install_info_py}
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ COMMENT "Generating install_info.py"
+ DEPENDS ${header}
+ )
+endfunction()
+
+function(legate_default_python_install target)
+ set(options)
+ set(one_value_args EXPORT)
+ set(multi_value_args)
+ cmake_parse_arguments(
+ LEGATE_OPT
+ "${options}"
+ "${one_value_args}"
+ "${multi_value_args}"
+ ${ARGN}
+ )
+
+ if (NOT LEGATE_OPT_EXPORT)
+ message(FATAL_ERROR "Need EXPORT name for legate_default_python_install")
+ endif()
+
+ if (SKBUILD)
+ add_library(${target}_python INTERFACE)
+ add_library(legate::${target}_python ALIAS ${target}_python)
+ target_link_libraries(${target}_python INTERFACE legate::core legate::${target})
+
+ install(TARGETS ${target}_python
+ DESTINATION ${lib_dir}
+ EXPORT ${LEGATE_OPT_EXPORT})
+
+ legate_include_rapids()
+ rapids_export(
+ INSTALL ${target}_python
+ EXPORT_SET ${LEGATE_OPT_EXPORT}
+ GLOBAL_TARGETS ${target}_python
+ NAMESPACE legate::
+ )
+ endif()
+endfunction()
+
+function(legate_add_cpp_subdirectory dir)
+ set(options)
+ set(one_value_args EXPORT TARGET)
+ set(multi_value_args)
+ cmake_parse_arguments(
+ LEGATE_OPT
+ "${options}"
+ "${one_value_args}"
+ "${multi_value_args}"
+ ${ARGN}
+ )
+
+ if (NOT LEGATE_OPT_EXPORT)
+ message(FATAL_ERROR "Need EXPORT name for legate_default_install")
+ endif()
+
+ if (NOT LEGATE_OPT_TARGET)
+ message(FATAL_ERROR "Need TARGET name for Legate package")
+ endif()
+ # abbreviate for the function
+ set(target ${LEGATE_OPT_TARGET})
+
+ legate_include_rapids()
+
+ rapids_find_package(legate_core CONFIG
+ GLOBAL_TARGETS legate::core
+ BUILD_EXPORT_SET ${LEGATE_OPT_EXPORT}
+ INSTALL_EXPORT_SET ${LEGATE_OPT_EXPORT})
+
+ if (SKBUILD)
+ if (NOT DEFINED ${target}_ROOT)
+ set(${target}_ROOT ${CMAKE_SOURCE_DIR}/build)
+ endif()
+ rapids_find_package(${target} CONFIG
+ GLOBAL_TARGETS legate::${target}
+ BUILD_EXPORT_SET ${LEGATE_OPT_EXPORT}
+ INSTALL_EXPORT_SET ${LEGATE_OPT_EXPORT})
+ if (NOT ${target}_FOUND)
+ add_subdirectory(${dir} ${CMAKE_BINARY_DIR}/legate_${target})
+ legate_default_cpp_install(${target} EXPORT ${LEGATE_OPT_EXPORT})
+ else()
+ # Make sure the libdir is visible to other functions
+ set(${target}_BUILD_LIBDIR "${${target}_BUILD_LIBDIR}" PARENT_SCOPE)
+ endif()
+ else()
+ add_subdirectory(${dir} ${CMAKE_BINARY_DIR}/legate_${target})
+ legate_default_cpp_install(${target} EXPORT ${LEGATE_OPT_EXPORT})
+ endif()
+
+endfunction()
+
+function(legate_cpp_library_template target output_sources_variable)
+ set(file_template
+[=[
+#pragma once
+
+#include "legate.h"
+
+namespace @target@ {
+
+struct Registry {
+ public:
+ template
+ static void record_variant(Args&&... args)
+ {
+ get_registrar().record_variant(std::forward(args)...);
+ }
+ static legate::TaskRegistrar& get_registrar();
+};
+
+template
+struct Task : public legate::LegateTask {
+ using Registrar = Registry;
+ static constexpr int TASK_ID = ID;
+};
+
+}
+]=])
+ string(CONFIGURE "${file_template}" file_content @ONLY)
+ file(WRITE ${CMAKE_CURRENT_SOURCE_DIR}/legate_library.h "${file_content}")
+
+ set(file_template
+[=[
+#include "legate_library.h"
+#include "core/mapping/mapping.h"
+
+namespace @target@ {
+
+class Mapper : public legate::mapping::LegateMapper {
+ public:
+ Mapper(){}
+
+ private:
+ Mapper(const Mapper& rhs) = delete;
+ Mapper& operator=(const Mapper& rhs) = delete;
+
+ // Legate mapping functions
+ public:
+ void set_machine(const legate::mapping::MachineQueryInterface* machine) override {
+ machine_ = machine;
+ }
+
+ legate::mapping::TaskTarget task_target(
+ const legate::mapping::Task& task,
+ const std::vector& options) override {
+ return *options.begin();
+ }
+
+ std::vector store_mappings(
+ const legate::mapping::Task& task,
+ const std::vector& options) override {
+ using legate::mapping::StoreMapping;
+ std::vector mappings;
+ auto& inputs = task.inputs();
+ auto& outputs = task.outputs();
+ for (auto& input : inputs) {
+ mappings.push_back(StoreMapping::default_mapping(input, options.front()));
+ mappings.back().policy.exact = true;
+ }
+ for (auto& output : outputs) {
+ mappings.push_back(StoreMapping::default_mapping(output, options.front()));
+ mappings.back().policy.exact = true;
+ }
+ return std::move(mappings);
+ }
+
+ legate::Scalar tunable_value(legate::TunableID tunable_id) override {
+ return 0;
+ }
+
+ private:
+ const legate::mapping::MachineQueryInterface* machine_;
+};
+
+static const char* const library_name = "@target@";
+
+Legion::Logger log_@target@(library_name);
+
+/*static*/ legate::TaskRegistrar& Registry::get_registrar()
+{
+ static legate::TaskRegistrar registrar;
+ return registrar;
+}
+
+void registration_callback()
+{
+ legate::ResourceConfig config;
+ config.max_mappers = 1;
+ config.max_tasks = 1024;
+ config.max_reduction_ops = 8;
+ legate::LibraryContext context(library_name, config);
+
+ Registry::get_registrar().register_all_tasks(context);
+
+ // Now we can register our mapper with the runtime
+ context.register_mapper(std::make_unique(), 0);
+}
+
+} // namespace @target@
+
+extern "C" {
+
+void @target@_perform_registration(void)
+{
+ // Tell the runtime about our registration callback so we hook it
+ // in before the runtime starts and make it global so that we know
+ // that this call back is invoked everywhere across all nodes
+ legate::Core::perform_registration<@target@::registration_callback>();
+}
+
+}
+]=])
+ string(CONFIGURE "${file_template}" file_content @ONLY)
+ file(WRITE ${CMAKE_CURRENT_SOURCE_DIR}/legate_library.cc "${file_content}")
+
+ set(${output_sources_variable}
+ legate_library.h
+ legate_library.cc
+ PARENT_SCOPE
+ )
+endfunction()
+
+function(legate_python_library_template target)
+set(file_template
+[=[
+from legate.core import (
+ Library,
+ ResourceConfig,
+ get_legate_runtime,
+)
+import os
+from typing import Any
+
+class UserLibrary(Library):
+ def __init__(self, name: str) -> None:
+ self.name = name
+ self.shared_object: Any = None
+
+ @property
+ def cffi(self) -> Any:
+ return self.shared_object
+
+ def get_name(self) -> str:
+ return self.name
+
+ def get_shared_library(self) -> str:
+ from @target@.install_info import libpath
+ return os.path.join(libpath, f"lib@target@{self.get_library_extension()}")
+
+ def get_c_header(self) -> str:
+ from @target@.install_info import header
+
+ return header
+
+ def get_registration_callback(self) -> str:
+ return "@target@_perform_registration"
+
+ def get_resource_configuration(self) -> ResourceConfig:
+ assert self.shared_object is not None
+ config = ResourceConfig()
+ config.max_tasks = 1024
+ config.max_mappers = 1
+ config.max_reduction_ops = 8
+ config.max_projections = 0
+ config.max_shardings = 0
+ return config
+
+ def initialize(self, shared_object: Any) -> None:
+ self.shared_object = shared_object
+
+ def destroy(self) -> None:
+ pass
+
+user_lib = UserLibrary("@target@")
+user_context = get_legate_runtime().register_library(user_lib)
+]=])
+ string(CONFIGURE "${file_template}" file_content @ONLY)
+ file(WRITE ${CMAKE_SOURCE_DIR}/${target}/library.py "${file_content}")
+endfunction()
diff --git a/cmake/thirdparty/get_legion.cmake b/cmake/thirdparty/get_legion.cmake
index e158391cc..71ce33fb1 100644
--- a/cmake/thirdparty/get_legion.cmake
+++ b/cmake/thirdparty/get_legion.cmake
@@ -1,5 +1,5 @@
#=============================================================================
-# Copyright 2022 NVIDIA Corporation
+# Copyright 2022-2023 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,6 +14,8 @@
# limitations under the License.
#=============================================================================
+include_guard(GLOBAL)
+
function(find_or_configure_legion)
set(oneValueArgs VERSION REPOSITORY BRANCH EXCLUDE_FROM_ALL)
cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
@@ -21,6 +23,22 @@ function(find_or_configure_legion)
include("${rapids-cmake-dir}/export/detail/parse_version.cmake")
rapids_export_parse_version(${PKG_VERSION} Legion PKG_VERSION)
+ string(REGEX REPLACE "^0([0-9]+)?$" "\\1" Legion_major_version "${Legion_major_version}")
+ string(REGEX REPLACE "^0([0-9]+)?$" "\\1" Legion_minor_version "${Legion_minor_version}")
+ string(REGEX REPLACE "^0([0-9]+)?$" "\\1" Legion_patch_version "${Legion_patch_version}")
+
+ include("${rapids-cmake-dir}/cpm/detail/package_details.cmake")
+ rapids_cpm_package_details(Legion version git_repo git_branch shallow exclude_from_all)
+
+ set(version "${Legion_major_version}.${Legion_minor_version}.${Legion_patch_version}")
+ set(exclude_from_all ${PKG_EXCLUDE_FROM_ALL})
+ if(PKG_BRANCH)
+ set(git_branch "${PKG_BRANCH}")
+ endif()
+ if(PKG_REPOSITORY)
+ set(git_repo "${PKG_REPOSITORY}")
+ endif()
+
set(Legion_CUDA_ARCH "")
if(Legion_USE_CUDA)
set(Legion_CUDA_ARCH ${CMAKE_CUDA_ARCHITECTURES})
@@ -47,14 +65,15 @@ function(find_or_configure_legion)
if(Legion_DIR OR Legion_ROOT)
set(_find_mode REQUIRED)
endif()
- rapids_find_package(Legion ${PKG_VERSION} EXACT CONFIG ${_find_mode} ${FIND_PKG_ARGS})
+ rapids_find_package(Legion ${version} EXACT CONFIG ${_find_mode} ${FIND_PKG_ARGS})
endif()
if(Legion_FOUND)
- message(STATUS "CPM: using local package Legion@${PKG_VERSION}")
+ message(STATUS "CPM: using local package Legion@${version}")
else()
+
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/cpm_helpers.cmake)
- get_cpm_git_args(legion_cpm_git_args REPOSITORY ${PKG_REPOSITORY} BRANCH ${PKG_BRANCH})
+ get_cpm_git_args(legion_cpm_git_args REPOSITORY ${git_repo} BRANCH ${git_branch})
if(NOT DEFINED Legion_PYTHON_EXTRA_INSTALL_ARGS)
set(Legion_PYTHON_EXTRA_INSTALL_ARGS "--single-version-externally-managed --root=/")
endif()
@@ -86,38 +105,41 @@ function(find_or_configure_legion)
endif()
# Get the `stubs/libcuda.so` path so we can set CMAKE_LIBRARY_PATH for FindCUDA.cmake
+ set(_libdir "lib64")
+ if(CMAKE_SIZEOF_VOID_P LESS 8)
+ set(_libdir "lib")
+ endif()
- # Prefer users' CUDA_PATH envvar (if set)
- set(_cuda_stubs "$ENV{CUDA_PATH}")
- if(NOT _cuda_stubs)
- if(DEFINED ENV{CUDA_LIB_PATH})
- # Prefer users' CUDA_LIB_PATH envvar (if set)
- list(APPEND _cuda_stubs "$ENV{CUDA_LIB_PATH}")
- message(VERBOSE "legate.core: Path(s) to CUDA stubs: ${_cuda_stubs}")
- elseif(EXISTS "${CUDAToolkit_LIBRARY_DIR}/stubs/libcuda.so")
- # This might be the path to the `$CONDA_PREFIX/lib`
- # If it is (and it has the libcuda.so driver stub),
- # then we know we're using the cuda-toolkit package
- # and should link to that driver stub instead of the
- # one potentially in `/usr/local/cuda/lib[64]/stubs`
- list(APPEND _cuda_stubs "${CUDAToolkit_LIBRARY_DIR}/stubs")
- message(VERBOSE "legate.core: Path(s) to CUDA stubs: ${_cuda_stubs}")
- elseif(DEFINED ENV{LIBRARY_PATH})
- # LIBRARY_PATH is set automatically in the `nvidia/cuda` containers.
- # Only use it if the conda env doesn't have the `stubs/libcuda.so` lib.
- list(APPEND _cuda_stubs "$ENV{LIBRARY_PATH}")
- message(VERBOSE "legate.core: Path(s) to CUDA stubs: ${_cuda_stubs}")
- elseif(CMAKE_SIZEOF_VOID_P LESS 8)
- # Otherwise assume stubs are relative to the CUDA toolkit root dir
- list(APPEND _cuda_stubs "${CUDAToolkit_LIBRARY_ROOT}/lib/stubs")
- message(VERBOSE "legate.core: Path(s) to CUDA stubs: ${_cuda_stubs}")
- else()
- # Otherwise assume stubs are relative to the CUDA toolkit root dir
- list(APPEND _cuda_stubs "${CUDAToolkit_LIBRARY_ROOT}/lib64/stubs")
- message(VERBOSE "legate.core: Path(s) to CUDA stubs: ${_cuda_stubs}")
- endif()
+ if(EXISTS "${CUDAToolkit_LIBRARY_DIR}/stubs/libcuda.so")
+ # This might be the path to the `$CONDA_PREFIX/lib`
+ # If it is (and it has the libcuda.so driver stub),
+ # then we know we're using the cuda-toolkit package
+ # and should link to that driver stub instead of the
+ # one potentially in `/usr/local/cuda/lib[64]/stubs`
+ list(APPEND _cuda_stubs "${CUDAToolkit_LIBRARY_DIR}/stubs")
+ elseif(EXISTS "${CUDAToolkit_TARGET_DIR}/${_libdir}/stubs/libcuda.so")
+ # Otherwise assume stubs are relative to the CUDA toolkit root dir
+ list(APPEND _cuda_stubs "${CUDAToolkit_TARGET_DIR}/${_libdir}/stubs")
+ elseif(EXISTS "${CUDAToolkit_LIBRARY_ROOT}/${_libdir}/stubs/libcuda.so")
+ list(APPEND _cuda_stubs "${CUDAToolkit_LIBRARY_ROOT}/${_libdir}/stubs")
+ elseif(DEFINED ENV{CUDA_PATH} AND EXISTS "$ENV{CUDA_PATH}/${_libdir}/stubs/libcuda.so")
+ # Use CUDA_PATH envvar (if set)
+ list(APPEND _cuda_stubs "$ENV{CUDA_PATH}/${_libdir}/stubs/libcuda.so")
+ elseif(DEFINED ENV{CUDA_LIB_PATH} AND EXISTS "$ENV{CUDA_LIB_PATH}/stubs/libcuda.so")
+ # Use CUDA_LIB_PATH envvar (if set)
+ list(APPEND _cuda_stubs "$ENV{CUDA_LIB_PATH}/stubs/libcuda.so")
+ elseif(DEFINED ENV{LIBRARY_PATH} AND
+ ("$ENV{LIBRARY_PATH}" STREQUAL "/usr/local/cuda/${_libdir}/stubs"))
+ # LIBRARY_PATH is set in the `nvidia/cuda` containers to /usr/local/cuda/lib64/stubs
+ list(APPEND _cuda_stubs "$ENV{LIBRARY_PATH}")
+ else()
+ message(FATAL_ERROR "Could not find the libcuda.so driver stub. "
+ "Please reconfigure with -DCUDAToolkit_ROOT= "
+ "set to a valid CUDA Toolkit installation.")
endif()
+ message(VERBOSE "legate.core: Path(s) to CUDA stubs: ${_cuda_stubs}")
+
list(APPEND _legion_cuda_options "CUDA_NVCC_FLAGS ${_nvcc_flags}")
list(APPEND _legion_cuda_options "CMAKE_CUDA_STANDARD ${_cuda_std}")
# Set this so Legion correctly finds the CUDA toolkit.
@@ -131,6 +153,8 @@ function(find_or_configure_legion)
list(APPEND CMAKE_C_IMPLICIT_LINK_DIRECTORIES "${_cuda_stubs}")
list(APPEND CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "${_cuda_stubs}")
list(APPEND CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES "${_cuda_stubs}")
+ set(legate_core_cuda_stubs_path "${_cuda_stubs}" PARENT_SCOPE)
+ set(legate_core_cuda_stubs_path "${_cuda_stubs}" CACHE STRING "" FORCE)
endif()
# Because legion sets these as cache variables, we need to force set this as a cache variable here
@@ -144,14 +168,19 @@ function(find_or_configure_legion)
set(Legion_CUDA_ARCH ${Legion_CUDA_ARCH} CACHE STRING
"Comma-separated list of CUDA architectures to build for (e.g. 60,70)" FORCE)
- rapids_cpm_find(Legion ${PKG_VERSION} ${FIND_PKG_ARGS}
+ message(VERBOSE "legate.core: Legion version: ${version}")
+ message(VERBOSE "legate.core: Legion git_repo: ${git_repo}")
+ message(VERBOSE "legate.core: Legion git_branch: ${git_branch}")
+ message(VERBOSE "legate.core: Legion exclude_from_all: ${exclude_from_all}")
+
+ rapids_cpm_find(Legion ${version} ${FIND_PKG_ARGS}
CPM_ARGS
${legion_cpm_git_args}
FIND_PACKAGE_ARGUMENTS EXACT
- EXCLUDE_FROM_ALL ${PKG_EXCLUDE_FROM_ALL}
+ EXCLUDE_FROM_ALL ${exclude_from_all}
OPTIONS ${_legion_cuda_options}
"CMAKE_CXX_STANDARD ${_cxx_std}"
- "Legion_VERSION ${PKG_VERSION}"
+ "Legion_VERSION ${version}"
"Legion_BUILD_BINDINGS ON"
"Legion_BUILD_APPS OFF"
"Legion_BUILD_TESTS OFF"
@@ -160,6 +189,14 @@ function(find_or_configure_legion)
"Legion_REDOP_COMPLEX ON"
"Legion_GPU_REDUCTIONS OFF"
"Legion_BUILD_RUST_PROFILER ON"
+ "Legion_SPY ${Legion_SPY}"
+ "Legion_USE_LLVM ${Legion_USE_LLVM}"
+ "Legion_USE_HDF5 ${Legion_USE_HDF5}"
+ "Legion_USE_CUDA ${Legion_USE_CUDA}"
+ "Legion_NETWORKS ${Legion_NETWORKS}"
+ "Legion_USE_OpenMP ${Legion_USE_OpenMP}"
+ "Legion_USE_Python ${Legion_USE_Python}"
+ "Legion_BOUNDS_CHECKS ${Legion_BOUNDS_CHECKS}"
)
endif()
@@ -175,16 +212,23 @@ function(find_or_configure_legion)
endfunction()
-if(NOT DEFINED legate_core_LEGION_BRANCH)
- set(legate_core_LEGION_BRANCH collective)
-endif()
-
-if(NOT DEFINED legate_core_LEGION_REPOSITORY)
- set(legate_core_LEGION_REPOSITORY https://gitlab.com/StanfordLegion/legion.git)
-endif()
+foreach(_var IN ITEMS "legate_core_LEGION_VERSION"
+ "legate_core_LEGION_BRANCH"
+ "legate_core_LEGION_REPOSITORY"
+ "legate_core_EXCLUDE_LEGION_FROM_ALL")
+ if(DEFINED ${_var})
+ # Create a legate_core_LEGION_BRANCH variable in the current scope either from the existing
+ # current-scope variable, or the cache variable.
+ set(${_var} "${${_var}}")
+ # Remove legate_core_LEGION_BRANCH from the CMakeCache.txt. This ensures reconfiguring the same
+ # build dir without passing `-Dlegate_core_LEGION_BRANCH=` reverts to the value in versions.json
+ # instead of reusing the previous `-Dlegate_core_LEGION_BRANCH=` value.
+ unset(${_var} CACHE)
+ endif()
+endforeach()
if(NOT DEFINED legate_core_LEGION_VERSION)
- set(legate_core_LEGION_VERSION "${legate_core_VERSION_MAJOR}.${legate_core_VERSION_MINOR}.0")
+ set(legate_core_LEGION_VERSION "${legate_core_VERSION}")
endif()
find_or_configure_legion(VERSION ${legate_core_LEGION_VERSION}
diff --git a/cmake/versions.json b/cmake/versions.json
index 02d16cf16..c5bf2d31a 100644
--- a/cmake/versions.json
+++ b/cmake/versions.json
@@ -1,9 +1,13 @@
{
"packages" : {
- "Thrust" : {
- "version" : "1.15.0.0",
- "git_url" : "https://github.com/NVIDIA/thrust.git",
- "git_tag" : "1.15.0"
+ "Thrust" : {
+ "version" : "1.17.0.0",
+ "git_url" : "https://github.com/NVIDIA/thrust.git",
+ "git_tag" : "1.17.0"
+ },
+ "Legion": {
+ "git_url" : "https://gitlab.com/StanfordLegion/legion.git",
+ "git_tag" : "e1f1ef61e29c3160419d0cd528950b2d565c2a0d"
}
}
}
diff --git a/conda/conda-build/build.sh b/conda/conda-build/build.sh
index 27b5aead1..317947dc4 100644
--- a/conda/conda-build/build.sh
+++ b/conda/conda-build/build.sh
@@ -16,7 +16,7 @@ CMAKE_ARGS+="
if [ -z "$CPU_ONLY" ]; then
CMAKE_ARGS+="
-DLegion_USE_CUDA=ON
--DCMAKE_CUDA_ARCHITECTURES:LIST=60-real;70-real;75-real;80-real;86
+-DCMAKE_CUDA_ARCHITECTURES:LIST=60-real;70-real;75-real;80-real;90
"
fi
diff --git a/conda/conda-build/conda_build_config.yaml b/conda/conda-build/conda_build_config.yaml
index e970f469e..81326c39a 100644
--- a/conda/conda-build/conda_build_config.yaml
+++ b/conda/conda-build/conda_build_config.yaml
@@ -3,9 +3,9 @@ gpu_enabled:
- false
python:
- - 3.8
- - 3.9
+ - "3.9,!=3.9.7"
- 3.10
+ - 3.11
numpy_version:
- ">=1.22"
diff --git a/conda/conda-build/meta.yaml b/conda/conda-build/meta.yaml
index 77722cab1..ace526e84 100644
--- a/conda/conda-build/meta.yaml
+++ b/conda/conda-build/meta.yaml
@@ -112,6 +112,7 @@ requirements:
- cuda-nvtx ={{ cuda_version }}
- cuda-cccl ={{ cuda_version }}
- cuda-cudart ={{ cuda_version }}
+ - cuda-nvml-dev ={{ cuda_version }}
- cuda-driver-dev ={{ cuda_version }}
- cuda-cudart-dev ={{ cuda_version }}
{% endif %}
diff --git a/docs/figures/vision.png b/docs/figures/vision.png
deleted file mode 100644
index 6cd072bac..000000000
Binary files a/docs/figures/vision.png and /dev/null differ
diff --git a/docs/legate/core/Makefile b/docs/legate/core/Makefile
index bff64057f..72ea3d7ea 100644
--- a/docs/legate/core/Makefile
+++ b/docs/legate/core/Makefile
@@ -19,6 +19,7 @@
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?= -v -W
+PARALLEL_BUILD ?= 1
SPHINXBUILD ?= legate $(shell which sphinx-build)
SOURCEDIR = source
BUILDDIR = build
@@ -32,7 +33,7 @@ help:
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
- @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -j $(PARALLEL_BUILD)
rm -rf build/html/docs
mkdir -p build/html/docs
cp -r ../../figures build/html/docs/
diff --git a/docs/legate/core/source/api/allocation.rst b/docs/legate/core/source/api/allocation.rst
new file mode 100644
index 000000000..5f71df89c
--- /dev/null
+++ b/docs/legate/core/source/api/allocation.rst
@@ -0,0 +1,20 @@
+.. _label_allocation:
+
+.. currentmodule:: legate.core.allocation
+
+Inline mapping
+==============
+
+When a client requests an immediate allocation of a store with
+:meth:`legate.core.store.Store.get_inline_allocation`, the runtime gives you
+back an ``InlineMappedAllocation`` object, which is a thin wrapper around the
+allocation. Since the runtime needs to keep track of lifetimes of Python
+objects using the allocation, the wrapper reveals the allocation to a callback
+and not directly. Doing it this way allows the runtime to capture the object
+constructed from the allocation and tie their lifetimes.
+
+
+.. autosummary::
+ :toctree: generated/
+
+ InlineMappedAllocation.consume
diff --git a/docs/legate/core/source/api/classes.rst b/docs/legate/core/source/api/classes.rst
new file mode 100644
index 000000000..f46811a82
--- /dev/null
+++ b/docs/legate/core/source/api/classes.rst
@@ -0,0 +1,14 @@
+-------
+Classes
+-------
+
+.. toctree::
+ :maxdepth: 2
+
+ runtime
+ operation
+ store
+ allocation
+ shape
+.. partition
+.. legion
diff --git a/docs/legate/core/source/api/index.rst b/docs/legate/core/source/api/index.rst
index d5516f6dc..abe6a5ab9 100644
--- a/docs/legate/core/source/api/index.rst
+++ b/docs/legate/core/source/api/index.rst
@@ -5,43 +5,7 @@ API Reference
.. currentmodule:: legate.core
.. toctree::
- :maxdepth: 1
+ :maxdepth: 2
-.. autosummary::
- :toctree: generated/
-
- Point
- Rect
- Domain
- Transform
- AffineTransform
- IndexSpace
- PartitionFunctor
- PartitionByRestriction
- PartitionByImage
- PartitionByImageRange
- EqualPartition
- PartitionByWeights
- IndexPartition
- FieldSpace
- FieldID
- Region
- Partition
- Fill
- IndexFill
- Copy
- IndexCopy
- Attach
- Detach
- Acquire
- Release
- Future
- OutputRegion
- PhysicalRegion
- InlineMapping
- Task
- FutureMap
- IndexTask
- Fence
- ArgumentMap
- BufferBuilder
+ routines
+ classes
diff --git a/docs/legate/core/source/api/operation.rst b/docs/legate/core/source/api/operation.rst
new file mode 100644
index 000000000..f84a1bced
--- /dev/null
+++ b/docs/legate/core/source/api/operation.rst
@@ -0,0 +1,120 @@
+.. _label_operation:
+
+.. currentmodule:: legate.core.operation
+
+Operations
+==========
+
+Operations in Legate are by default automatically parallelized. Legate extracts
+parallelism from an operation by partitioning its store arguments. Operations
+usually require the partitions to be aligned in some way; e.g., partitioning
+vectors across multiple addition tasks requires the vectors to be partitioned
+in the same way. Legate provides APIs for developers to control how stores are
+partitioned via `partitioning constraints`.
+
+When an operation needs a store to be partitioned more than one way, the
+operation can create `partition symbols` and use them in partitioning
+constraints. In that case, a partition symbol must be passed along with the
+store when the store is added. Stores can be partitioned in multiple ways when
+they are used only for read accesses or reductions.
+
+AutoTask
+--------
+
+``AutoTask`` is a type of tasks that are automatically parallelized. Each
+Legate task is associated with a task id that uniquely names a task to invoke.
+The actual task implementation resides on the C++ side.
+
+.. autosummary::
+ :toctree: generated/
+
+ AutoTask.add_input
+ AutoTask.add_output
+ AutoTask.add_reduction
+ AutoTask.add_scalar_arg
+ AutoTask.declare_partition
+ AutoTask.add_constraint
+ AutoTask.add_alignment
+ AutoTask.add_broadcast
+ AutoTask.throws_exception
+ AutoTask.can_raise_exception
+ AutoTask.add_nccl_communicator
+ AutoTask.add_cpu_communicator
+ AutoTask.side_effect
+ AutoTask.set_concurrent
+ AutoTask.set_side_effect
+ AutoTask.execute
+
+
+Copy
+----
+
+``Copy`` is a special kind of operation for copying data from one store to
+another. Unlike tasks that are mapped to and run on application processors,
+copies are performed by the DMA engine in the runtime. Also, unlike tasks that
+are user-defined, copies have well-defined semantics and come with predefined
+partitioning assumptions on stores. Hence, copies need not take partitioning
+constraints from developers.
+
+A copy can optionally take a store for indices that need to be used in
+accessing the source or target. With an `indirection` store on the source, the
+copy performs a gather operation, and with an indirection on the target, the
+copy does a scatter; when indirections exist for both the source and target,
+the copy turns into a full gather-scatter copy. Out-of-bounds indices are not
+checked and can produce undefined behavior. The caller therefore is responsible
+for making sure the indices are within bounds.
+
+.. autosummary::
+ :toctree: generated/
+
+ Copy.add_input
+ Copy.add_output
+ Copy.add_reduction
+ Copy.add_source_indirect
+ Copy.add_target_indirect
+ Copy.execute
+
+Fill
+----
+
+``Fill`` is a special kind of operation for filling a store with constant values.
+Like coipes, fills are performed by the DMA engine and their partitioning
+constraints are predefined.
+
+.. autosummary::
+ :toctree: generated/
+
+ Fill.execute
+
+
+Manually Parallelized Tasks
+---------------------------
+
+In some occassions, tasks are unnatural or even impossible to write in the
+auto-parallelized style. For those occassions, Legate provides explicit control
+on how tasks are parallelized via ``ManualTask``. Each manual task requires the
+caller to provide a `launch domain` that determines the degree of parallelism
+and also names task instances initiaed by the task. Direct store arguments to a
+manual task are assumed to be replicated across task instances, and it's the
+developer's responsibility to partition stores. Mapping between points in the
+launch domain and colors in the color space of a store partition is assumed to
+be an identity mapping by default, but it can be configured with a `projection
+function`, a Python function on tuples of coordinates. (See
+:ref:`StorePartition ` for definitions of color,
+color space, and store partition.)
+
+.. autosummary::
+ :toctree: generated/
+
+ ManualTask.side_effect
+ ManualTask.set_concurrent
+ ManualTask.set_side_effect
+ ManualTask.add_input
+ ManualTask.add_output
+ ManualTask.add_reduction
+ ManualTask.add_scalar_arg
+ ManualTask.throws_exception
+ ManualTask.can_raise_exception
+ ManualTask.add_nccl_communicator
+ ManualTask.add_cpu_communicator
+ ManualTask.execute
diff --git a/docs/legate/core/source/api/routines.rst b/docs/legate/core/source/api/routines.rst
new file mode 100644
index 000000000..78dd1609a
--- /dev/null
+++ b/docs/legate/core/source/api/routines.rst
@@ -0,0 +1,11 @@
+.. currentmodule:: legate.core
+
+--------
+Routines
+--------
+
+.. autosummary::
+ :toctree: generated/
+
+ get_legate_runtime
+ track_provenance
diff --git a/docs/legate/core/source/api/runtime.rst b/docs/legate/core/source/api/runtime.rst
new file mode 100644
index 000000000..8a4033e43
--- /dev/null
+++ b/docs/legate/core/source/api/runtime.rst
@@ -0,0 +1,101 @@
+.. _label_runtime:
+
+.. currentmodule:: legate.core
+
+Runtime and Library Contexts
+============================
+
+Library
+-------
+
+A ``Library`` class is an interface that every library descriptor needs to
+implement. Each library should tell the Legate runtime how to initialize and
+configure the library, and this class provides a common way to reveal that
+information to the runtime. Each library should register to the runtime a
+library descriptor object that implements ``Library`` directly or via duck
+typing. (See :meth:`legate.core.runtime.Runtime.register_library`.)
+
+.. autosummary::
+ :toctree: generated/
+
+ Library.get_name
+ Library.get_shared_library
+ Library.get_c_header
+ Library.get_registration_callback
+ Library.get_resource_configuration
+
+
+Resource configuration
+----------------------
+
+A ``ResourceConfig`` object describes the maximum number of handles that a
+library uses.
+
+.. autosummary::
+ :toctree: generated/
+
+ ResourceConfig.max_tasks
+ ResourceConfig.max_reduction_ops
+ ResourceConfig.max_mappers
+
+
+Context
+-------
+
+A ``Context`` object provides APIs for creating stores and issuing tasks and
+other kinds of operations. When a library registers itself to the Legate
+runtime, the runtime gives back a context object unique to the library.
+
+.. autosummary::
+ :toctree: generated/
+
+ context.Context.create_store
+ context.Context.create_task
+ context.Context.create_manual_task
+ context.Context.create_auto_task
+ context.Context.create_copy
+ context.Context.create_fill
+ context.Context.issue_execution_fence
+ context.Context.tree_reduce
+ context.Context.get_tunable
+ context.Context.provenance
+ context.Context.annotation
+ context.Context.set_provenance
+ context.Context.reset_provenance
+ context.Context.push_provenance
+ context.Context.pop_provenance
+ context.Context.track_provenance
+
+
+Legate Runtime
+--------------
+
+.. autosummary::
+ :toctree: generated/
+
+ runtime.Runtime.num_cpus
+ runtime.Runtime.num_omps
+ runtime.Runtime.num_gpus
+ runtime.Runtime.register_library
+ runtime.Runtime.create_future
+
+
+Annotation
+----------
+
+An ``Annotation`` is a context manager to set library specific annotations that
+are to be attached to operations issued within a scope. A typical usage of
+``Annotation`` would look like this:
+
+::
+
+ with Annotation(lib_context, { "key1" : "value1", "key2" : "value2", ... }:
+ ...
+
+Then each operation in the scope is annotated with the key-value pairs,
+which are later rendered in execution profiles.
+
+.. autosummary::
+ :toctree: generated/
+
+ context.Annotation.__init__
diff --git a/docs/legate/core/source/api/shape.rst b/docs/legate/core/source/api/shape.rst
new file mode 100644
index 000000000..46b4aeba1
--- /dev/null
+++ b/docs/legate/core/source/api/shape.rst
@@ -0,0 +1,62 @@
+.. currentmodule:: legate.core.shape
+
+Shape
+=====
+
+A ``Shape`` is used in expressing the shape of a certain entity in Legate. The
+reason Legate introduces this indirection to the shape metadata is that stores
+in Legate can have unknown shapes at creation time; the shape of an unbound
+store is determined only when the producer task finishes. The shape object can
+help the runtime query the store's metadata or construct another store
+isomorphic to the store without getting blocked.
+
+Shape objects should behave just like an array of integers, but operations that
+introspect the values implicitly block on completion of the producer task.
+
+
+.. autosummary::
+ :toctree: generated/
+
+ Shape.__init__
+
+
+Properties
+----------
+.. autosummary::
+ :toctree: generated/
+
+ Shape.extents
+ Shape.fixed
+ Shape.ndim
+ Shape.volume
+ Shape.sum
+ Shape.strides
+
+
+Manipulation Methods
+--------------------
+.. autosummary::
+ :toctree: generated/
+
+ Shape.drop
+ Shape.update
+ Shape.replace
+ Shape.insert
+ Shape.map
+
+
+Arithmetic and comparison
+-------------------------
+.. autosummary::
+ :toctree: generated/
+
+ Shape.__eq__
+ Shape.__le__
+ Shape.__lt__
+ Shape.__ge__
+ Shape.__gt__
+ Shape.__add__
+ Shape.__sub__
+ Shape.__mul__
+ Shape.__mod__
+ Shape.__floordiv__
diff --git a/docs/legate/core/source/api/store.rst b/docs/legate/core/source/api/store.rst
new file mode 100644
index 000000000..0b95ca443
--- /dev/null
+++ b/docs/legate/core/source/api/store.rst
@@ -0,0 +1,125 @@
+.. currentmodule:: legate.core.store
+
+Store
+=====
+
+`Store` is a multi-dimensional data container for fixed-size elements. Stores
+are internally partitioned and distributed across the system. By default,
+Legate clients need not create nor maintain the partitions explicitly, and the
+Legate runtime is responsible for managing them. Legate clients can control how
+stores should be partitioned for a given task by attaching partitioning
+constraints to the task (see section :ref:`label_operation` for partitioning
+constraint APIs).
+
+Each Store object is a logical handle to the data and is not immediately
+associated with a physical allocation. To access the data, a client must
+`map` the store to a physical instance. A client can map a store by passing
+it to a task, in which case the task body can see the allocation, or calling
+``get_inline_allocation``, which gives the client a linear handle to the
+physical allocation (see section :ref:`label_allocation` for details about
+inline allocations).
+
+Normally, a store gets a fixed shape upon creation. However, there is a special
+type of stores called `unbound` stores whose shapes are unknown at creation
+time. (see section :ref:`label_runtime` for the store creation API.) The shape
+of an unbound store is determined by a task that first updates the store; upon
+the submission of the task, the store becomes a normal store. Passing an
+unbound store as a read-only argument or requesting an inline allocation of an
+unbound store are invalid.
+
+One consequence due to the nature of unbound stores is that querying the shape
+of a previously unbound store can block the client's control flow for an
+obvious reason; to know the shape of the store whose shape was unknown at
+creation time, the client must wait until the updater task to finish. However,
+passing a previously unbound store to a downstream operation can be
+non-blocking, as long as the operation requires no changes in the partitioning
+and mapping for the store.
+
+
+Basic Properties
+----------------
+
+.. autosummary::
+ :toctree: generated/
+
+ Store.shape
+ Store.ndim
+ Store.size
+ Store.type
+ Store.kind
+ Store.unbound
+ Store.scalar
+.. Store.extents
+
+
+Transformation
+--------------
+
+Legate provides several API calls to transform stores. A store after a
+transformation is a view to the original store; i.e., any changes made to the
+transformed store are visible via the original one and vice versa.
+
+.. autosummary::
+ :toctree: generated/
+
+ Store.transform
+ Store.transformed
+ Store.promote
+ Store.project
+ Store.slice
+ Store.transpose
+ Store.delinearize
+
+
+Storage management
+------------------
+
+.. autosummary::
+ :toctree: generated/
+
+ Store.get_inline_allocation
+.. Store.storage
+.. Store.has_storage
+
+
+Partition management
+--------------------
+
+In most cases, Legate clients need not create nor manage partitions manually by
+themselves. However, there are occasions where the clients need to parallelize
+tasks manually, for which stores need to be partitioned manually as well. For
+those occasions, clients may want to query and update the `key` partition of
+each store, i.e., the partition used for updating the store for the last time.
+The following are the API calls for manual partition management.
+
+.. autosummary::
+ :toctree: generated/
+
+ Store.get_key_partition
+ Store.set_key_partition
+ Store.reset_key_partition
+ Store.partition_by_tiling
+
+
+.. _label_store_partition:
+
+StorePartition
+==============
+
+A ``StorePartition`` is an object that represents a partitioned state of a
+store. A store partition is a name of a collection of `sub-stores`, each of
+which contains to a subset of elements in the store. Sub-stores in a store
+partition are uniquely identified by their `colors`, and a set of all colors
+of a given store partition is called a `color space`.
+
+It is recommended that store partitions and their sub-stores be used as
+arguments to ``ManualTask`` (see section :ref:`label_operation` for APIs for
+manual parallelization).
+
+
+.. autosummary::
+ :toctree: generated/
+
+ StorePartition.store
+ StorePartition.partition
+ StorePartition.get_child_store
diff --git a/docs/legate/core/source/index.rst b/docs/legate/core/source/index.rst
index 5475b6fa6..b968d3738 100644
--- a/docs/legate/core/source/index.rst
+++ b/docs/legate/core/source/index.rst
@@ -6,7 +6,8 @@ Welcome to Legate Core's documentation!
Overview
Build instructions
- API Reference
+ Python API Reference
+ C++ API Reference
Contributing
Versions
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
new file mode 100644
index 000000000..fdee93f24
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,5 @@
+# We abuse find package for testing purposes here to
+# 'find' the current build tree to test package builds
+set(legate_core_ROOT ${CMAKE_BINARY_DIR})
+
+add_subdirectory(hello)
diff --git a/examples/hello/CMakeLists.txt b/examples/hello/CMakeLists.txt
new file mode 100644
index 000000000..f163cfc7c
--- /dev/null
+++ b/examples/hello/CMakeLists.txt
@@ -0,0 +1,14 @@
+cmake_minimum_required(VERSION 3.22.1 FATAL_ERROR)
+
+project(hello VERSION 1.0 LANGUAGES C CXX)
+
+set(CMAKE_CXX_STANDARD 17)
+set(BUILD_SHARED_LIBS ON)
+
+find_package(legate_core REQUIRED)
+
+legate_add_cpp_subdirectory(src TARGET hello EXPORT hello-export)
+
+legate_add_cffi(${CMAKE_CURRENT_SOURCE_DIR}/src/hello_cffi.h TARGET hello)
+legate_python_library_template(hello)
+legate_default_python_install(hello EXPORT hello-export)
diff --git a/examples/hello/README.md b/examples/hello/README.md
new file mode 100644
index 000000000..d2a26a40e
--- /dev/null
+++ b/examples/hello/README.md
@@ -0,0 +1,218 @@
+# Legate Hello World Application
+
+Here we illustrate a minimal example to get a Legate library up and running.
+The example here shows how to get started with the minimum amount of boilerplate.
+For advanced use cases, the boilerplate generated can be customized as needed.
+In general, a Legate application will need to implement three pieces.
+
+1. Build system
+1. C++ tasks
+1. Python library
+
+Please refer to the README in the [Legate repo](https://github.com/nv-legate/legate.core/blob/HEAD/README.md)
+for first installing `legate.core`. We strongly recommend creating a Conda environment for development and testing.
+
+# Build System
+
+## Build Steps
+
+To build the project, the user can do the following:
+
+```
+$ cmake -S . -B build
+$ cmake --build build
+$ python -m pip install -e .
+```
+
+This performs an editable install of the project, which we recommend for development.
+If `cmake` fails to find Legate, the path to the installed Legate can be manually
+specific as `-Dlegate_core_ROOT=<...>` to the `cmake` configuration.
+Alternatively, the user can just do a regular pip installation:
+
+```
+$ python -m pip install .
+```
+
+These approaches are illustrated in the `editable-install.sh` and `install.sh` scripts.
+In particular, `editable-install.sh` shows how to use Legate install info to
+point CMake to the correct installation root.
+
+## CMake
+CMake is the officially supported mechanism for building Legate libraries.
+Legate exports a CMake target and helper functions for building libraries and provides by-far the easiest onboarding.
+There are only a few main steps in setting up a build system.
+First, the user should initialize a CMake project.
+
+```cmake
+cmake_minimum_required(VERSION 3.24.0 FATAL_ERROR)
+
+project(hello VERSION 1.0 LANGUAGES C CXX)
+```
+
+Next the user needs to find an existing Legate core:
+
+```cmake
+find_package(legate_core REQUIRED)
+```
+
+Once the `legate_core` package is located, a number of helper functions will be available.
+In a source folder, the user can define a library that will implement the C++ tasks:
+
+```cmake
+legate_cpp_library_template(hello TEMPLATE_SOURCES)
+
+add_library(
+ hello
+ hello_world.cc
+ hello_world.h
+ ${TEMPLATE_SOURCES}
+)
+target_link_libraries(hello PRIVATE legate::core)
+```
+
+First, a helper function is invoked to generate the Legate C++ boilerplate files.
+The list of generated files is returned in the `TEMPLATE_SOURCES` variable.
+Second, the CMake library is linked against the imported `legate::core` target.
+
+Two helper functions are provided to generate the Python boilerplate.
+In the top-level CMakeLists.txt, the Python-C bindings can be generated using CFFI:
+
+```cmake
+legate_add_cffi(${CMAKE_SOURCE_DIR}/src/hello_world.h TARGET hello)
+```
+
+The header file is implemented by the user and contains all the enums required
+to implement a Legate library. The necessary Python file is generated in the `hello`
+subdirectory. Additionally, the user may want to generate a standard `library.py`
+in the Python `hello` folder:
+
+```cmake
+legate_python_library_template(hello)
+```
+
+Finally, default pip installation hooks (via scikit-build) can be added:
+
+```cmake
+legate_default_python_install(hello EXPORT hello-export)
+```
+
+## Editable Builds
+
+Although the final user Python library will likely be installed with `pip`,
+the user will usually need to iterate on the C++ implementation of tasks
+for debugging and optmization. The user will therefore want to be able
+to first build the C++ pieces of the project and then install the Python.
+To support this workflow, legate provides a helper function:
+
+```cmake
+legate_add_cpp_subdirectory(src hello EXPORT hello-export)
+```
+This encapsulates the build target `hello` so that the C++ library can
+be first built with CMake and then pip-installed in a separate step.
+This is optional, though, and the entire build can always be executed by
+doing a regular pip install:
+
+```
+$ python -m pip install .
+```
+
+# C++ tasks
+
+First, a `hello_world.h` header is needed to define all enums. In this case,
+we have enums identifying the different task types:
+
+```cpp
+enum HelloOpCode {
+ _OP_CODE_BASE = 0,
+ HELLO_WORLD_TASK = 1,
+};
+```
+
+We implement this CPU-only task in a `hello_world.cc`.
+
+```cpp
+#include "legate_library.h"
+#include "hello_world.h"
+
+namespace hello {
+```
+
+The source file should include the library header and the generated file `legate_library.h`.
+Because the target was named `hello` in the build files, all generated files create types
+in the `hello` namespace.
+
+The task implementation is simple:
+
+```cpp
+class HelloWorldTask : public Task {
+ public:
+ static void cpu_variant(legate::TaskContext& context){
+ std::string message = context.scalars()[0].value();
+ std::cout << message << std::endl;
+ }
+};
+```
+Here we define a CPU variant. The task is given the unique enum ID from `hello_world.h`.
+The task unpacks a string from the input context and prints it.
+Task types needed to be statically registered, which requires a bit of extra boilerplate:
+
+```cpp
+namespace
+{
+
+static void __attribute__((constructor)) register_tasks(void)
+{
+ hello::HelloWorldTask::register_variants();
+}
+
+}
+```
+
+Any tasks instantiated in the Python library will ultimately invoke this C++ task.
+
+# Python library
+
+The example uses two generated files `library.py` and `install_info.py`.
+The implementation of tasks is provided in the `hello.py` file.
+First, we have to import a few types and a context object for creating tasks.
+The context object is automatically created in the generated boilerplate.
+
+```python
+from .library import user_context, user_lib
+from enum import IntEnum
+from legate.core import Rect
+import legate.core.types as types
+```
+
+The C++ enums can be mapped into Python:
+
+```python
+class HelloOpCode(IntEnum):
+ HELLO_WORLD = user_lib.cffi.HELLO_WORLD_TASK
+```
+
+The example here provides two library functions. The first prints a single message.
+The second prints a fixed number of of messages. For `print_hello`,
+a new task is created in `user_context`. The message string is added as a scalar argument.
+In the second example, a launch domain for a fixed `n` is provided.
+
+These library functions can now be imported and used in python.
+This is shown in `examples/hello.py`:
+
+```
+from hello import print_hello
+
+print_hello("Hello, world")
+```
+
+
+# Examples
+
+The tutorial contains a few examples that illustate key Legate concepts:
+
+1. [Hello World](examples/hello-world.md): Shows the basics of creating tasks and adding task arguments.
+1. [Variance](examples/variance.md): Shows how to create input arrays and tasks operating on partitioned data.
+Also shows how to perform reduction tasks like summation.
+
+
+
diff --git a/examples/hello/editable-install.sh b/examples/hello/editable-install.sh
new file mode 100755
index 000000000..5c5774ece
--- /dev/null
+++ b/examples/hello/editable-install.sh
@@ -0,0 +1,5 @@
+legate_root=`python -c 'import legate.install_info as i; from pathlib import Path; print(Path(i.libpath).parent.resolve())'`
+echo "Using Legate at $legate_root"
+cmake -S . -B build -D legate_core_ROOT=$legate_root
+cmake --build build
+python -m pip install -e .
diff --git a/examples/hello/examples/cunumeric-variance.py b/examples/hello/examples/cunumeric-variance.py
new file mode 100644
index 000000000..cfaed064d
--- /dev/null
+++ b/examples/hello/examples/cunumeric-variance.py
@@ -0,0 +1,26 @@
+from typing import Any
+
+import cunumeric
+import numpy as np
+from hello import square, sum, to_scalar
+
+from legate.core import Store
+
+
+def mean_and_variance(a: Any, n: int) -> float:
+ a_sq: Store = square(a) # A 1-D array of shape (4,)
+ sum_sq: Store = sum(a_sq) # A scalar sum
+ sum_a: Store = sum(a) # A scalar sum
+
+ # Extract scalar values from the Legate stores
+ mean_a: float = to_scalar(sum_a) / n
+ mean_sum_sq: float = to_scalar(sum_sq) / n
+ variance = mean_sum_sq - mean_a * mean_a
+ return mean_a, variance
+
+
+# Example: Use a random array from cunumeric
+n = 4
+a = cunumeric.random.randn(n).astype(np.float32)
+print(a)
+print(mean_and_variance(a, n))
diff --git a/examples/hello/examples/hello-world.md b/examples/hello/examples/hello-world.md
new file mode 100644
index 000000000..5df75a854
--- /dev/null
+++ b/examples/hello/examples/hello-world.md
@@ -0,0 +1,37 @@
+# Basic Hello, World Application
+
+The code for this example can be found in the [library file](../hello/hello.py) and [example](hello-world.py).
+
+## Single, auto task
+
+Generally auto tasks should be preferred that automatically
+partition and parallelize task launches.
+In the hello world example, only a single scalar argument
+is added and the task is enqueued with `execute`:
+
+```
+task = user_context.create_auto_task(HelloOpCode.HELLO_WORLD)
+task.add_scalar_arg(message, types.string)
+task.execute()
+```
+
+In this case, the cost heuristic in the runtime will notice
+that the task is inexpensive and launch a single instance.
+
+## Manual task with explicit launch domain
+
+It is possibly to manually specify the launch domain for a task,
+overriding the internal heuristics.
+
+```
+launch_domain = Rect(lo=[0], hi=[n], exclusive=True)
+task = user_context.create_manual_task(
+ HelloOpCode.HELLO_WORLD, launch_domain=launch_domain
+)
+task.add_scalar_arg(message, types.string)
+task.execute()
+```
+
+Now `n` replica tasks will be launched. In this case,
+the `Rect` launch domain is linear, but multi-dimensional domains
+are also possible.
diff --git a/examples/hello/examples/hello-world.py b/examples/hello/examples/hello-world.py
new file mode 100644
index 000000000..15728fb48
--- /dev/null
+++ b/examples/hello/examples/hello-world.py
@@ -0,0 +1,5 @@
+from hello import print_hello, print_hellos
+
+print_hello("Hello, world")
+
+print_hellos(message="Romanes eunt domus", n=2)
diff --git a/examples/hello/examples/variance.md b/examples/hello/examples/variance.md
new file mode 100644
index 000000000..c11042823
--- /dev/null
+++ b/examples/hello/examples/variance.md
@@ -0,0 +1,67 @@
+# Variance Example
+
+The code for this example can be found in the [library file](../hello/hello.py) and [example](variance.py).
+
+## Creating a store
+
+As seen in the `iota` task, a store can be created from a context as, e.g.
+
+```
+output = user_context.create_store(
+ types.float32,
+ shape=(size,),
+ optimize_scalar=True,
+)
+```
+
+At this point, the store may not be allocated or contain data,
+but can still be passed to tasks as a valid output handle.
+
+## Elementwise task with aligned partitions
+
+Tasks are also created on a context:
+
+```
+task = user_context.create_auto_task(HelloOpCode.SQUARE)
+
+task.add_input(input)
+task.add_output(output)
+task.add_alignment(input, output)
+task.execute()
+```
+
+An auto task indicates Legate should auto-partition based
+on cost heuristics and partitioning constraints.
+An input and output array are added.
+The most critical step here, though, is the alignment of
+the input and output. Since we want to do elementwise operations,
+we need the input and output partitions to be aligned.
+This expresses an auto-partitioning constraint.
+Finally, the task is enqueued by calling its `execute` method.
+
+## Reduction (Summation)
+
+We similarly set up a task, but now add the output
+as a reduction.
+
+```
+task = user_context.create_auto_task(HelloOpCode.SUM)
+
+task.add_input(input)
+task.add_reduction(output, types.ReductionOp.ADD)
+task.execute()
+```
+
+The output is a scalar, which means there is no partitioning
+alignment constraint with input and output.
+
+## Using data from other Legate libraries
+
+Data structures from other libraries (e.g. cunumeric)
+can be passed into functions from other Legate libraries,
+even if the libraries are unaware of each other.
+Legate provides a common interface for data structures
+to provide a schema and access to its underlying stores.
+This is shown in the `_get_legate_store` function via
+the `__legate_data_interface__`.
+
diff --git a/examples/hello/examples/variance.py b/examples/hello/examples/variance.py
new file mode 100644
index 000000000..a8272eec9
--- /dev/null
+++ b/examples/hello/examples/variance.py
@@ -0,0 +1,23 @@
+from typing import Any
+
+from hello import iota, square, sum, to_scalar
+
+from legate.core import Store
+
+
+def mean_and_variance(a: Any, n: int) -> float:
+ a_sq: Store = square(a) # A 1-D array of shape (4,)
+ sum_sq: Store = sum(a_sq) # A scalar sum
+ sum_a: Store = sum(a) # A scalar sum
+
+ # Extract scalar values from the Legate stores
+ mean_a: float = to_scalar(sum_a) / n
+ mean_sum_sq: float = to_scalar(sum_sq) / n
+ variance = mean_sum_sq - mean_a * mean_a
+ return mean_a, variance
+
+
+# Example: Use a basic 1,2,3,4 array
+n = 4
+a = iota(n)
+print(mean_and_variance(a, n))
diff --git a/examples/hello/hello/__init__.py b/examples/hello/hello/__init__.py
new file mode 100644
index 000000000..6e38a3cde
--- /dev/null
+++ b/examples/hello/hello/__init__.py
@@ -0,0 +1,2 @@
+from .library import user_lib
+from .hello import iota, print_hello, print_hellos, square, sum, to_scalar
diff --git a/examples/hello/hello/hello.py b/examples/hello/hello/hello.py
new file mode 100644
index 000000000..6548806b2
--- /dev/null
+++ b/examples/hello/hello/hello.py
@@ -0,0 +1,169 @@
+import struct
+from enum import IntEnum
+from typing import Any
+
+import numpy as np
+
+import legate.core.types as types
+from legate.core import Rect, Store, get_legate_runtime
+
+from .library import user_context, user_lib
+
+
+class HelloOpCode(IntEnum):
+ HELLO_WORLD = user_lib.cffi.HELLO_WORLD
+ SUM = user_lib.cffi.SUM
+ SQUARE = user_lib.cffi.SQUARE
+ IOTA = user_lib.cffi.IOTA
+
+
+def print_hello(message: str) -> None:
+ """Create a Legate task launch to print a message
+
+ Args:
+ message (str): The message to print
+ """
+ task = user_context.create_auto_task(HelloOpCode.HELLO_WORLD)
+ task.add_scalar_arg(message, types.string)
+ task.execute()
+
+
+def print_hellos(message: str, n: int) -> None:
+ """Create a Legate task launch to print a message n times,
+ using n replicas of the task
+
+ Args:
+ message (str): The message to print
+ n (int): The number of times to print
+ """
+ launch_domain = Rect(lo=[0], hi=[n])
+ task = user_context.create_manual_task(
+ HelloOpCode.HELLO_WORLD, launch_domain=launch_domain
+ )
+ task.add_scalar_arg(message, types.string)
+ task.execute()
+
+
+def _get_legate_store(input: Any) -> Store:
+ """Extracts a Legate store from any object
+ implementing the legete data interface
+
+ Args:
+ input (Any): The input object
+
+ Returns:
+ Store: The extracted Legate store
+ """
+ if isinstance(input, Store):
+ return input
+ data = input.__legate_data_interface__["data"]
+ field = next(iter(data))
+ array = data[field]
+ _, store = array.stores()
+ return store
+
+
+def to_scalar(input: Store) -> float:
+ """Extracts a Python scalar value from a Legate store
+ encapsulating a single scalar
+
+ Args:
+ input (Store): The Legate store encapsulating a scalar
+
+ Returns:
+ float: A Python scalar
+ """
+ # This operation blocks until the data in the Store
+ # is available and correct
+ buf = input.storage.get_buffer(np.float32().itemsize)
+ result = np.frombuffer(buf, dtype=np.float32, count=1)
+ return float(result[0])
+
+
+def zero() -> Store:
+ """Creates a Legate store representing a single zero scalar
+
+ Returns:
+ Store: A Legate store representing a scalar zero
+ """
+ data = bytearray(4)
+ buf = struct.pack(f"{len(data)}s", data)
+ future = get_legate_runtime().create_future(buf, len(buf))
+ return user_context.create_store(
+ types.float32,
+ shape=(1,),
+ storage=future,
+ optimize_scalar=True,
+ )
+
+
+def iota(size: int) -> Store:
+ """Enqueues a task that will generate a 1-D array
+ 1,2,...size.
+
+ Args:
+ size (int): The number of elements to generate
+
+ Returns:
+ Store: The Legate store that will hold the iota values
+ """
+ output = user_context.create_store(
+ types.float32,
+ shape=(size,),
+ optimize_scalar=True,
+ )
+ task = user_context.create_auto_task(
+ HelloOpCode.IOTA,
+ )
+ task.add_output(output)
+ task.execute()
+ return output
+
+
+def sum(input: Any) -> Store:
+ """Sums a 1-D array into a single scalar
+
+ Args:
+ input (Any): A Legate store or any object implementing
+ the Legate data interface.
+
+ Returns:
+ Store: A Legate store encapsulating the array sum
+ """
+ input_store = _get_legate_store(input)
+
+ task = user_context.create_auto_task(HelloOpCode.SUM)
+
+ # zero-initialize the output for the summation
+ output = zero()
+
+ task.add_input(input_store)
+ task.add_reduction(output, types.ReductionOp.ADD)
+ task.execute()
+ return output
+
+
+def square(input: Any) -> Store:
+ """Computes the elementwise square of a 1-D array
+
+ Args:
+ input (Any): A Legate store or any object implementing
+ the Legate data interface.
+
+ Returns:
+ Store: A Legate store encapsulating a 1-D array
+ holding the elementwise square values
+ """
+ input_store = _get_legate_store(input)
+
+ output = user_context.create_store(
+ types.float32, shape=input_store.shape, optimize_scalar=True
+ )
+ task = user_context.create_auto_task(HelloOpCode.SQUARE)
+
+ task.add_input(input_store)
+ task.add_output(output)
+ task.add_alignment(input_store, output)
+ task.execute()
+
+ return output
diff --git a/examples/hello/install.sh b/examples/hello/install.sh
new file mode 100755
index 000000000..b4d1f47d4
--- /dev/null
+++ b/examples/hello/install.sh
@@ -0,0 +1 @@
+python -m pip install .
diff --git a/examples/hello/setup.py b/examples/hello/setup.py
new file mode 100644
index 000000000..f919989b5
--- /dev/null
+++ b/examples/hello/setup.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+
+# Copyright 2021-2022 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+from pathlib import Path
+
+from setuptools import find_packages
+from skbuild import setup
+
+import legate.install_info as lg_install_info
+
+legate_dir = Path(lg_install_info.libpath).parent.as_posix()
+
+cmake_flags = [
+ f"-Dlegate_core_ROOT:STRING={legate_dir}",
+]
+
+env_cmake_args = os.environ.get("CMAKE_ARGS")
+if env_cmake_args is not None:
+ cmake_flags.append(env_cmake_args)
+os.environ["CMAKE_ARGS"] = " ".join(cmake_flags)
+
+
+setup(
+ name="Legate Hello",
+ version="0.1",
+ description="A Hello World for Legate",
+ author="NVIDIA Corporation",
+ license="Apache 2.0",
+ classifiers=[
+ "Intended Audience :: Developers",
+ "Topic :: Database",
+ "Topic :: Scientific/Engineering",
+ "License :: OSI Approved :: Apache Software License",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ ],
+ packages=find_packages(
+ where=".",
+ include=["hello", "hello.*"],
+ ),
+ include_package_data=True,
+ zip_safe=False,
+)
diff --git a/examples/hello/src/CMakeLists.txt b/examples/hello/src/CMakeLists.txt
new file mode 100644
index 000000000..dfe343722
--- /dev/null
+++ b/examples/hello/src/CMakeLists.txt
@@ -0,0 +1,22 @@
+
+legate_cpp_library_template(hello TEMPLATE_SOURCES)
+
+add_library(
+ hello
+ hello_world.cc
+ hello_world.h
+ iota.cc
+ sum.cc
+ square.cc
+ ${TEMPLATE_SOURCES}
+)
+
+target_include_directories(hello
+ PRIVATE
+ $
+ INTERFACE
+ $
+)
+
+target_link_libraries(hello PRIVATE legate::core)
+
diff --git a/examples/hello/src/hello_cffi.h b/examples/hello/src/hello_cffi.h
new file mode 100644
index 000000000..c9b8a2c33
--- /dev/null
+++ b/examples/hello/src/hello_cffi.h
@@ -0,0 +1,7 @@
+enum HelloOpCode {
+ _OP_CODE_BASE = 0,
+ HELLO_WORLD = 1,
+ SUM = 2,
+ SQUARE = 3,
+ IOTA = 4,
+};
diff --git a/examples/hello/src/hello_world.cc b/examples/hello/src/hello_world.cc
new file mode 100644
index 000000000..9cea64c42
--- /dev/null
+++ b/examples/hello/src/hello_world.cc
@@ -0,0 +1,25 @@
+#include "hello_world.h"
+#include "legate_library.h"
+
+namespace hello {
+
+Legion::Logger logger("legate.hello");
+
+class HelloWorldTask : public Task {
+public:
+ static void cpu_variant(legate::TaskContext &context) {
+ std::string message = context.scalars()[0].value();
+ std::cout << message << std::endl;
+ }
+};
+
+} // namespace hello
+
+namespace // unnamed
+{
+
+static void __attribute__((constructor)) register_tasks(void) {
+ hello::HelloWorldTask::register_variants();
+}
+
+} // namespace
diff --git a/examples/hello/src/hello_world.h b/examples/hello/src/hello_world.h
new file mode 100644
index 000000000..8b1c7fb8e
--- /dev/null
+++ b/examples/hello/src/hello_world.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include "hello_cffi.h"
+#include "legate_library.h"
+
+namespace hello {
+
+extern Legion::Logger logger;
+
+}
\ No newline at end of file
diff --git a/examples/hello/src/iota.cc b/examples/hello/src/iota.cc
new file mode 100644
index 000000000..4f408008b
--- /dev/null
+++ b/examples/hello/src/iota.cc
@@ -0,0 +1,33 @@
+#include "hello_world.h"
+#include "legate_library.h"
+
+namespace hello {
+
+class IotaTask : public Task {
+public:
+ static void cpu_variant(legate::TaskContext &context) {
+
+ legate::Store &output = context.outputs()[0];
+ legate::Rect<1> output_shape = output.shape<1>();
+ auto out = output.write_accessor();
+
+ logger.info() << "Iota task [" << output_shape.lo << "," << output_shape.hi
+ << "]";
+
+ // i is a global index for the complete array
+ for (size_t i = output_shape.lo; i <= output_shape.hi; ++i) {
+ out[i] = i + 1;
+ }
+ }
+};
+
+} // namespace hello
+
+namespace // unnamed
+{
+
+static void __attribute__((constructor)) register_tasks(void) {
+ hello::IotaTask::register_variants();
+}
+
+} // namespace
diff --git a/examples/hello/src/square.cc b/examples/hello/src/square.cc
new file mode 100644
index 000000000..eb44365dc
--- /dev/null
+++ b/examples/hello/src/square.cc
@@ -0,0 +1,44 @@
+#include "hello_world.h"
+#include "legate_library.h"
+
+namespace hello {
+
+class SquareTask : public Task {
+public:
+ static void cpu_variant(legate::TaskContext &context) {
+ legate::Store &output = context.outputs()[0];
+ // Best-practice to validate the store types
+ assert(output.code() == FLOAT_LT);
+ assert(output.dim() == 1);
+ legate::Rect<1> output_shape = output.shape<1>();
+ auto out = output.write_accessor();
+
+ legate::Store &input = context.inputs()[0];
+ // Best-practice to validate the store types
+ assert(input.code() == FLOAT_LT);
+ assert(input.dim() == 1);
+ legate::Rect<1> input_shape = input.shape<1>(); // should be a 1-Dim array
+ auto in = input.read_accessor();
+
+ assert(input_shape == output_shape);
+
+ logger.info() << "Elementwise square [" << output_shape.lo << ","
+ << output_shape.hi << "]";
+
+ // i is a global index for the complete array
+ for (size_t i = input_shape.lo; i <= input_shape.hi; ++i) {
+ out[i] = in[i] * in[i];
+ }
+ }
+};
+
+} // namespace hello
+
+namespace // unnamed
+{
+
+static void __attribute__((constructor)) register_tasks(void) {
+ hello::SquareTask::register_variants();
+}
+
+} // namespace
diff --git a/examples/hello/src/sum.cc b/examples/hello/src/sum.cc
new file mode 100644
index 000000000..e736600f2
--- /dev/null
+++ b/examples/hello/src/sum.cc
@@ -0,0 +1,49 @@
+#include "hello_world.h"
+#include "legate_library.h"
+
+namespace hello {
+
+class SumTask : public Task {
+public:
+ static void cpu_variant(legate::TaskContext &context) {
+ legate::Store &input = context.inputs()[0];
+ legate::Rect<1> input_shape = input.shape<1>(); // should be a 1-Dim array
+ auto in = input.read_accessor();
+
+ logger.info() << "Sum [" << input_shape.lo << "," << input_shape.hi << "]";
+
+ float total = 0;
+ // i is a global index for the complete array
+ for (size_t i = input_shape.lo; i <= input_shape.hi; ++i) {
+ total += in[i];
+ }
+
+ /**
+ The task launch as a whole will return a single value (Store of size 1)
+ to the caller. However, each point task gets a separate Store of the
+ same size as the result, to reduce into. This "local accumulator" will
+ be initialized by the runtime, and all we need to do is call .reduce()
+ to add our local contribution. After all point tasks return, the runtime
+ will make sure to combine all their buffers into the single final result.
+ */
+ using Reduce = Legion::SumReduction;
+ legate::Store &output = context.reductions()[0];
+ auto sum = output.reduce_accessor();
+ // Best-practice is to validate types
+ assert(output.code() == FLOAT_LT);
+ assert(output.dim() == 1);
+ assert(output.shape<1>() == legate::Rect<1>(0, 0));
+ sum.reduce(0, total);
+ }
+};
+
+} // namespace hello
+
+namespace // unnamed
+{
+
+static void __attribute__((constructor)) register_tasks(void) {
+ hello::SumTask::register_variants();
+}
+
+} // namespace
diff --git a/install.py b/install.py
index 56c508ad9..4c94b3b32 100755
--- a/install.py
+++ b/install.py
@@ -241,6 +241,7 @@ def install(
hdf,
llvm,
spy,
+ build_docs,
conduit,
nccl_dir,
cmake_exe,
@@ -288,6 +289,7 @@ def install(
print("hdf:", hdf)
print("llvm:", llvm)
print("spy:", spy)
+ print("build_docs:", build_docs)
print("conduit:", conduit)
print("nccl_dir:", nccl_dir)
print("cmake_exe:", cmake_exe)
@@ -324,12 +326,14 @@ def install(
print("Using python lib and version: {}, {}".format(pylib_name, pyversion))
def validate_path(path):
- if path is not None and (path := str(path)) != "":
- if not os.path.isabs(path):
- path = join(legate_core_dir, path)
- if exists(path := realpath(path)):
- return path
- return None
+ if path is None or (path := str(path)) == "":
+ return None
+ if not os.path.isabs(path):
+ path = join(legate_core_dir, path)
+ if not exists(path := realpath(path)):
+ print(f"Error: path does not exist: {path}")
+ sys.exit(1)
+ return path
cuda_dir = validate_path(cuda_dir)
nccl_dir = validate_path(nccl_dir)
@@ -413,7 +417,7 @@ def validate_path(path):
cmake_flags = cmd_env.get("CMAKE_ARGS", "").split(" ")
if debug or verbose:
- cmake_flags += ["--log-level=%s" % ("DEBUG" if debug else "VERBOSE")]
+ cmake_flags += [f"--log-level={'DEBUG' if debug else 'VERBOSE'}"]
cmake_flags += f"""\
-DCMAKE_BUILD_TYPE={(
@@ -441,27 +445,29 @@ def validate_path(path):
""".splitlines()
if nccl_dir:
- cmake_flags += ["-DNCCL_DIR=%s" % nccl_dir]
+ cmake_flags += [f"-DNCCL_DIR={nccl_dir}"]
if gasnet_dir:
- cmake_flags += ["-DGASNet_ROOT_DIR=%s" % gasnet_dir]
+ cmake_flags += [f"-DGASNet_ROOT_DIR={gasnet_dir}"]
if ucx_dir:
- cmake_flags += ["-DUCX_ROOT=%s" % ucx_dir]
+ cmake_flags += [f"-DUCX_ROOT={ucx_dir}"]
if conduit:
- cmake_flags += ["-DGASNet_CONDUIT=%s" % conduit]
+ cmake_flags += [f"-DGASNet_CONDUIT={conduit}"]
if cuda_dir:
- cmake_flags += ["-DCUDA_TOOLKIT_ROOT_DIR=%s" % cuda_dir]
+ cmake_flags += [f"-DCUDAToolkit_ROOT={cuda_dir}"]
if thrust_dir:
- cmake_flags += ["-DThrust_ROOT=%s" % thrust_dir]
+ cmake_flags += [f"-DThrust_ROOT={thrust_dir}"]
if legion_dir:
- cmake_flags += ["-DLegion_ROOT=%s" % legion_dir]
+ cmake_flags += [f"-DLegion_ROOT={legion_dir}"]
elif legion_src_dir:
- cmake_flags += ["-DCPM_Legion_SOURCE=%s" % legion_src_dir]
+ cmake_flags += [f"-DCPM_Legion_SOURCE={legion_src_dir}"]
else:
cmake_flags += ["-DCPM_DOWNLOAD_Legion=ON"]
if legion_url:
- cmake_flags += ["-Dlegate_core_LEGION_REPOSITORY=%s" % legion_url]
+ cmake_flags += [f"-Dlegate_core_LEGION_REPOSITORY={legion_url}"]
if legion_branch:
- cmake_flags += ["-Dlegate_core_LEGION_BRANCH=%s" % legion_branch]
+ cmake_flags += [f"-Dlegate_core_LEGION_BRANCH={legion_branch}"]
+ if build_docs:
+ cmake_flags += ["-Dlegate_core_BUILD_DOCS=ON"]
cmake_flags += extra_flags
build_flags = [f"-j{str(thread_count)}"]
@@ -618,6 +624,14 @@ def driver():
default=os.environ.get("USE_SPY", "0") == "1",
help="Build Legate with detailed Legion Spy enabled.",
)
+ parser.add_argument(
+ "--docs",
+ dest="build_docs",
+ action="store_true",
+ required=False,
+ default=False,
+ help="Build Doxygen docs.",
+ )
parser.add_argument(
"--conduit",
dest="conduit",
@@ -736,14 +750,14 @@ def driver():
"--legion-url",
dest="legion_url",
required=False,
- default="https://gitlab.com/StanfordLegion/legion.git",
+ default=None,
help="Legion git URL to build Legate with.",
)
parser.add_argument(
"--legion-branch",
dest="legion_branch",
required=False,
- default="04cf06a2",
+ default=None,
help="Legion branch to build Legate with.",
)
args, unknown = parser.parse_known_args()
@@ -761,7 +775,7 @@ def driver():
)
print("to specify the CMake executable if it is not on PATH.")
print()
- print("Attempted to execute: %s" % args.cmake_exe)
+ print(f"Attempted to execute: {args.cmake_exe}")
sys.exit(1)
install(unknown=unknown, **vars(args))
diff --git a/legate/_sphinxext/__init__.py b/legate/_sphinxext/__init__.py
new file mode 100644
index 000000000..dca0f1315
--- /dev/null
+++ b/legate/_sphinxext/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/legate/_sphinxext/settings.py b/legate/_sphinxext/settings.py
new file mode 100644
index 000000000..0cba5d1b1
--- /dev/null
+++ b/legate/_sphinxext/settings.py
@@ -0,0 +1,108 @@
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from __future__ import annotations
+
+import importlib
+import textwrap
+
+from docutils import nodes
+from docutils.parsers.rst.directives import unchanged
+from docutils.statemachine import ViewList
+from jinja2 import Template
+from sphinx.errors import SphinxError
+from sphinx.util.docutils import SphinxDirective
+from sphinx.util.nodes import nested_parse_with_titles
+
+from legate.util.settings import PrioritizedSetting, _Unset
+
+SETTINGS_DETAIL = Template(
+ """
+{% for setting in settings %}
+
+``{{ setting['name'] }}``
+{{ "''''" + "'" * setting['name']|length }}
+
+:**Type**: {{ setting['type'] }}
+:**Env var**: ``{{ setting['env_var'] }}``
+:**Default**: {{ setting['default'] }}
+
+{{ setting['help'] }}
+
+{% endfor %}
+"""
+)
+
+
+class SettingsDirective(SphinxDirective):
+ has_content = True
+ required_arguments = 1
+ optional_arguments = 1
+ option_spec = {"module": unchanged}
+
+ def run(self):
+ obj_name = " ".join(self.arguments)
+ module_name = self.options["module"]
+
+ try:
+ module = importlib.import_module(module_name)
+ except ImportError:
+ raise SphinxError(
+ f"Unable to generate reference docs for {obj_name}: "
+ f"couldn't import module {module_name}"
+ )
+
+ obj = getattr(module, obj_name, None)
+ if obj is None:
+ raise SphinxError(
+ f"Unable to generate reference docs for {obj_name}: "
+ f"no model {obj_name} in module {module_name}"
+ )
+
+ settings = []
+ for x in obj.__class__.__dict__.values():
+ if not isinstance(x, PrioritizedSetting):
+ continue
+ # help = [line.strip() for line in x.help.strip().split("\n")]
+ setting = {
+ "name": x.name,
+ "env_var": x.env_var,
+ "type": x.convert_type,
+ "help": textwrap.dedent(x.help),
+ "default": "(Unset)"
+ if x.default is _Unset
+ else repr(x.default),
+ }
+ settings.append(setting)
+
+ rst_text = SETTINGS_DETAIL.render(
+ name=obj_name, module_name=module_name, settings=settings
+ )
+ return self.parse(rst_text, "")
+
+ def parse(self, rst_text, annotation):
+ result = ViewList()
+ for line in rst_text.split("\n"):
+ result.append(line, annotation)
+ node = nodes.paragraph()
+ node.document = self.state.document
+ nested_parse_with_titles(self.state, result, node)
+ return node.children
+
+
+def setup(app):
+ """Required Sphinx extension setup function."""
+ app.add_directive_to_domain("py", "settings", SettingsDirective)
+
+ return dict(parallel_read_safe=True, parallel_write_safe=True)
diff --git a/legate/core/__init__.py b/legate/core/__init__.py
index 0c5eda106..8a278ed92 100644
--- a/legate/core/__init__.py
+++ b/legate/core/__init__.py
@@ -14,12 +14,27 @@
#
from __future__ import annotations
-from ..rc import check_legion
-from ..util.args import parse_library_command_args
+from legion_cffi import is_legion_python, ffi, lib as legion
-check_legion()
+if is_legion_python == False:
+ from legion_top import (
+ legion_canonical_python_main,
+ legion_canonical_python_cleanup,
+ )
+ from ..driver.main import prepare_driver, CanonicalDriver
+ import atexit, os, shlex, sys
-from legion_cffi import ffi, lib as legion
+ argv = ["legate"] + shlex.split(os.environ.get("LEGATE_CONFIG", ""))
+
+ driver = prepare_driver(argv, CanonicalDriver)
+
+ if driver.dry_run:
+ sys.exit(0)
+
+ os.environ.update(driver.env)
+
+ legion_canonical_python_main(driver.cmd)
+ atexit.register(legion_canonical_python_cleanup)
from ._legion import (
LEGATE_MAX_DIM,
diff --git a/legate/core/_legion/future.py b/legate/core/_legion/future.py
index f4d98c882..470af4dd0 100644
--- a/legate/core/_legion/future.py
+++ b/legate/core/_legion/future.py
@@ -386,7 +386,7 @@ def from_dict(
num_futures = len(futures)
points = ffi.new("legion_domain_point_t[%d]" % num_futures)
futures_ = ffi.new("legion_future_t[%d]" % num_futures)
- for (i, (point, future)) in enumerate(futures.items()):
+ for i, (point, future) in enumerate(futures.items()):
points[i] = point.raw()
futures_[i] = future.handle
handle = legion.legion_future_map_construct_from_futures(
diff --git a/legate/core/_legion/operation.py b/legate/core/_legion/operation.py
index cf13a7bff..95cf96fbb 100644
--- a/legate/core/_legion/operation.py
+++ b/legate/core/_legion/operation.py
@@ -1309,7 +1309,7 @@ def __init__(
mem = legion.legion_memory_query_next(query, mem)
legion.legion_memory_query_destroy(query)
legion.legion_machine_destroy(machine)
- for (sub_region, buf) in shard_local_data.items():
+ for sub_region, buf in shard_local_data.items():
if sub_region.parent is not None:
assert sub_region.parent.parent is parent
legion.legion_index_attach_launcher_attach_array_soa(
diff --git a/legate/core/_legion/partition_functor.py b/legate/core/_legion/partition_functor.py
index 47eec302d..2f9822441 100644
--- a/legate/core/_legion/partition_functor.py
+++ b/legate/core/_legion/partition_functor.py
@@ -379,7 +379,7 @@ def partition(
assert num_domains <= color_space.get_volume()
colors = ffi.new("legion_domain_point_t[%d]" % num_domains)
domains = ffi.new("legion_domain_t[%d]" % num_domains)
- for (i, (point, rect)) in enumerate(self.domains.items()):
+ for i, (point, rect) in enumerate(self.domains.items()):
colors[i] = point.raw()
domains[i] = rect.raw()
return legion.legion_index_partition_create_by_domain(
diff --git a/legate/core/_legion/region.py b/legate/core/_legion/region.py
index a1eaf3cfb..60577ad1d 100644
--- a/legate/core/_legion/region.py
+++ b/legate/core/_legion/region.py
@@ -28,7 +28,6 @@
class Region:
-
handle: Any
def __init__(
diff --git a/legate/core/_legion/task.py b/legate/core/_legion/task.py
index 670b2796c..b086523fd 100644
--- a/legate/core/_legion/task.py
+++ b/legate/core/_legion/task.py
@@ -471,7 +471,6 @@ def launch(
class IndexTask(Dispatchable[Union[Future, FutureMap]]):
-
point_args: Union[list[Any], None]
def __init__(
diff --git a/legate/core/allocation.py b/legate/core/allocation.py
index 4f7354e42..b2b86f6c2 100644
--- a/legate/core/allocation.py
+++ b/legate/core/allocation.py
@@ -49,6 +49,23 @@ def __del__(self) -> None:
def consume(
self, ctor: Callable[[tuple[int, ...], int, tuple[int, ...]], Any]
) -> Any:
+ """
+ Consumes the allocation. Each allocation can be consumed only once.
+
+ Parameters
+ ----------
+ ctor : Callback
+ Callback that constructs a Python object from the allocation.
+ Each callback gets the shape, the physical address, and the strides
+ of the allocation, and is supposed to return a Python object
+ using the allocation. Leaking the three arguments in some other way
+ will lead to an undefined behavior.
+
+ Returns
+ -------
+ Any
+ Python object the callback constructs from the allocation
+ """
if self._consumed:
raise RuntimeError("Each inline mapping can be consumed only once")
self._consumed = True
diff --git a/legate/core/context.py b/legate/core/context.py
index e1aac4536..e044271ae 100644
--- a/legate/core/context.py
+++ b/legate/core/context.py
@@ -54,7 +54,7 @@ def __call__(self, *args: Any, **kwargs: Any) -> Any:
def find_last_user_frame(libname: str) -> str:
- for (frame, _) in traceback.walk_stack(None):
+ for frame, _ in traceback.walk_stack(None):
if "__name__" not in frame.f_globals:
continue
if not any(
@@ -160,6 +160,14 @@ def destroy(self) -> None:
@property
def runtime(self) -> Runtime:
+ """
+ Returns the runtime
+
+ Returns
+ -------
+ Runtime
+ The runtime object
+ """
return self._runtime
@property
@@ -192,6 +200,15 @@ def type_system(self) -> TypeSystem:
@property
def annotation(self) -> LibraryAnnotations:
+ """
+ Returns the current set of annotations. Provenance string is one
+ entry in the set.
+
+ Returns
+ -------
+ LibraryAnnotations
+ Library annotations
+ """
return self._annotations[-1]
def get_all_annotations(self) -> str:
@@ -199,6 +216,15 @@ def get_all_annotations(self) -> str:
@property
def provenance(self) -> Optional[str]:
+ """
+ Returns the current provenance string. Attached to every operation
+ issued with the context.
+
+ Returns
+ -------
+ str or None
+ Provenance string
+ """
return self.annotation.provenance
def get_task_id(self, task_id: int) -> int:
@@ -226,6 +252,25 @@ def get_sharding_id(self, shard_id: int) -> int:
def get_tunable(
self, tunable_id: int, dtype: DataType, mapper_id: int = 0
) -> npt.NDArray[Any]:
+ """
+ Queries a tunable parameter to the mapper.
+
+ Parameters
+ ----------
+ tunable_id : int
+ Tunable id. Local to each mapper.
+
+ dtype : DataType
+ Value type
+
+ mapper_id : int
+ Id of the mapper that should handle the tunable query
+
+ Returns
+ -------
+ np.ndarray
+ A NumPy array holding the value of the tunable parameter
+ """
dt = np.dtype(dtype.to_pandas_dtype())
mapper_id = self.get_mapper_id(mapper_id)
fut = Future(
@@ -244,16 +289,38 @@ def get_unique_op_id(self) -> int:
return self._runtime.get_unique_op_id()
def set_provenance(self, provenance: str) -> None:
+ """
+ Sets a new provenance string
+
+ Parameters
+ ----------
+ provenance : str
+ Provenance string
+ """
self._annotations[-1].set_provenance(provenance)
def reset_provenance(self) -> None:
+ """
+ Clears the provenance string that is currently set
+ """
self._annotations[-1].reset_provenance()
def push_provenance(self, provenance: str) -> None:
+ """
+ Pushes a provenance string to the stack
+
+ Parameters
+ ----------
+ provenance : str
+ Provenance string
+ """
self._annotations.append(LibraryAnnotations())
self.set_provenance(provenance)
def pop_provenance(self) -> None:
+ """
+ Pops the provenance string on top the stack
+ """
if len(self._annotations) == 1:
raise ValueError("Provenance stack underflow")
self._annotations.pop(-1)
@@ -261,6 +328,25 @@ def pop_provenance(self) -> None:
def track_provenance(
self, func: AnyCallable, nested: bool = False
) -> AnyCallable:
+ """
+ Wraps a function with provenance tracking. Provenance of each operation
+ issued within the wrapped function will be tracked automatically.
+
+ Parameters
+ ----------
+ func : AnyCallable
+ Function to wrap
+
+ nested : bool
+ If ``True``, each invocation to a wrapped function within another
+ wrapped function updates the provenance string. Otherwise, the
+ provenance is tracked only for the outermost wrapped function.
+
+ Returns
+ -------
+ AnyCallable
+ Wrapped function
+ """
if nested:
def wrapper(*args: Any, **kwargs: Any) -> Any:
@@ -286,9 +372,37 @@ def create_task(
self,
task_id: int,
mapper_id: int = 0,
- manual: Optional[bool] = False,
+ manual: bool = False,
launch_domain: Optional[Rect] = None,
) -> Union[AutoTask, ManualTask]:
+ """
+ Creates a task. The type of the returned task is determined by the
+ value of ``manual``.
+
+ Parameters
+ ----------
+ task_id : int
+ Task id. Scoped locally within the context; i.e., different
+ libraries can use the same task id. There must be a task
+ implementation corresponding to the task id.
+
+ mapper_id : int, optional
+ Id of the mapper that should determine mapping policies for the
+ task. Used only when the library has more than one mapper.
+
+ manual : bool
+ Indicates whether the task should be manually parallelized;
+ if ``True``, the task is parallelized manually by the caller.
+
+ launch_domain : Rect, optional
+ Launch domain of the task. Ignored if the task is automatically
+ parallelized, mandatory otherwise.
+
+ Returns
+ -------
+ AutoTask or ManualTask
+ A new task
+ """
from .operation import AutoTask, ManualTask
unique_op_id = self.get_unique_op_id()
@@ -314,6 +428,30 @@ def create_manual_task(
mapper_id: int = 0,
launch_domain: Optional[Rect] = None,
) -> ManualTask:
+ """
+ Type safe version of ``Context.create_task``. Always returns a
+ `ManualTask`.
+
+ Parameters
+ ----------
+ task_id : int
+ Task id
+
+ mapper_id : int, optional
+ Mapper id
+
+ launch_domain : Rect, optional
+ Launch domain of the task.
+
+ Returns
+ -------
+ AutoTask
+ A new auto-parallelized task
+
+ See Also
+ --------
+ Context.create_task
+ """
from .operation import ManualTask
return cast(
@@ -330,8 +468,32 @@ def create_auto_task(
self,
task_id: int,
mapper_id: int = 0,
- launch_domain: Optional[Rect] = None,
) -> AutoTask:
+ """
+ Type safe version of ``Context.create_task``. Always returns an
+ `AutoTask`.
+
+ Parameters
+ ----------
+ task_id : int
+ Task id
+
+ mapper_id : int, optional
+ Mapper id
+
+ launch_domain : Rect, optional
+ Launch domain of the task.
+
+ Returns
+ -------
+ AutoTask
+ A new manually parallelized task
+
+ See Also
+ --------
+ Context.create_task
+ """
+
from .operation import AutoTask
return cast(
@@ -340,11 +502,25 @@ def create_auto_task(
task_id=task_id,
mapper_id=mapper_id,
manual=False,
- launch_domain=launch_domain,
),
)
def create_copy(self, mapper_id: int = 0) -> Copy:
+ """
+ Creates a copy operation.
+
+ Parameters
+ ----------
+ mapper_id : int, optional
+ Id of the mapper that should determine mapping policies for the
+ copy. Used only when the library has more than one mapper.
+
+ Returns
+ -------
+ Copy
+ A new copy operation
+ """
+
from .operation import Copy
return Copy(self, mapper_id, self.get_unique_op_id())
@@ -352,6 +528,32 @@ def create_copy(self, mapper_id: int = 0) -> Copy:
def create_fill(
self, lhs: Store, value: Store, mapper_id: int = 0
) -> Fill:
+ """
+ Creates a fill operation.
+
+ Parameters
+ ----------
+ lhs : Store
+ Store to fill
+
+ value : Store
+ Store holding the constant value to fill the ``lhs`` with
+
+ mapper_id : int, optional
+ Id of the mapper that should determine mapping policies for the
+ fill. Used only when the library has more than one mapper.
+
+ Returns
+ -------
+ Copy
+ A new fill operation
+
+ Raises
+ ------
+ ValueError
+ If the ``value`` is not scalar or the ``lhs`` is either unbound or
+ scalar
+ """
from .operation import Fill
return Fill(self, lhs, value, mapper_id, self.get_unique_op_id())
@@ -370,6 +572,34 @@ def create_store(
optimize_scalar: bool = False,
ndim: Optional[int] = None,
) -> Store:
+ """
+ Creates a fresh store.
+
+ Parameters
+ ----------
+ ty : Dtype
+ Type of the elements
+
+ shape : Shape or tuple[int], optional
+ Shape of the store. The store becomes unbound if no shape is
+ given.
+
+ storage : RegionField or Future, optional
+ Optional storage to initialize the store with. Used only when the
+ store is constructed from a future holding a scalar value.
+
+ optimize_scalar : bool
+ If ``True``, the runtime will use a ``Future`` when the store's
+ size is 1
+
+ ndim : int, optional
+ Dimension of the store. Must be passed if the store is unbound.
+
+ Returns
+ -------
+ Store
+ A new store
+ """
dtype = self.type_system[ty]
return self._runtime.create_store(
dtype,
@@ -386,11 +616,50 @@ def get_cpu_communicator(self) -> Communicator:
return self._runtime.get_cpu_communicator()
def issue_execution_fence(self, block: bool = False) -> None:
+ """
+ Issues an execution fence. A fence is a special operation that
+ guarantees that all upstream operations finish before any of the
+ downstream operations start. The caller can optionally block on
+ completion of all upstream operations.
+
+ Parameters
+ ----------
+ block : bool
+ If ``True``, the call blocks until all upstream operations finish.
+ """
self._runtime.issue_execution_fence(block=block)
def tree_reduce(
self, task_id: int, store: Store, mapper_id: int = 0, radix: int = 4
) -> Store:
+ """
+ Performs a user-defined reduction by building a tree of reduction
+ tasks. At each step, the reducer task gets up to ``radix`` input stores
+ and is supposed to produce outputs in a single unbound store.
+
+ Parameters
+ ----------
+ task_id : int
+ Id of the reducer task
+
+ store : Store
+ Store to perform reductions on
+
+ mapper_id : int
+ Id of the mapper that should decide mapping policies for reducer
+ tasks
+
+ radix : int
+ Fan-in of each reducer task. If the store is partitioned into
+ :math:`N` sub-stores by the runtime, then the first level of
+ reduction tree has :math:`\\ceil{N / \\mathtt{radix}}` reducer
+ tasks.
+
+ Returns
+ -------
+ Store
+ Store that contains reduction results
+ """
from .operation import Reduce
result = self.create_store(store.type)
@@ -412,6 +681,31 @@ def track_provenance(
context: Context,
nested: bool = False,
) -> Callable[[AnyCallable], AnyCallable]:
+ """
+ Decorator that adds provenance tracking to functions. Provenance of each
+ operation issued within the wrapped function will be tracked automatically.
+
+ Parameters
+ ----------
+ context : Context
+ Context that the function uses to issue operations
+
+ nested : bool
+ If ``True``, each invocation to a wrapped function within another
+ wrapped function updates the provenance string. Otherwise, the
+ provenance is tracked only for the outermost wrapped function.
+
+ Returns
+ -------
+ Decorator
+ Function that takes a function and returns a one with provenance
+ tracking
+
+ See Also
+ --------
+ legate.core.context.Context.track_provenance
+ """
+
def decorator(func: AnyCallable) -> AnyCallable:
return context.track_provenance(func, nested=nested)
@@ -420,6 +714,16 @@ def decorator(func: AnyCallable) -> AnyCallable:
class Annotation:
def __init__(self, context: Context, pairs: dict[str, str]) -> None:
+ """
+ Constructs a new annotation object
+
+ Parameters
+ ----------
+ context : Context
+ Context to which the annotations should be added
+ pairs : dict[str, str]
+ Annotations as key-value pairs
+ """
self._annotation = context.annotation
self._pairs = pairs
diff --git a/legate/core/launcher.py b/legate/core/launcher.py
index 69d6833c7..5dc8bb34b 100644
--- a/legate/core/launcher.py
+++ b/legate/core/launcher.py
@@ -928,7 +928,7 @@ def build_task(
if self._sharding_space is not None:
task.set_sharding_space(self._sharding_space)
- for (req, fields) in self._req_analyzer.requirements:
+ for req, fields in self._req_analyzer.requirements:
req.proj.add(task, req, fields, _index_task_calls)
for future in self._future_args:
task.add_future(future)
@@ -937,7 +937,7 @@ def build_task(
arrival, wait = runtime.get_barriers(volume)
task.add_future(arrival)
task.add_future(wait)
- for (out_req, fields) in self._out_analyzer.requirements:
+ for out_req, fields in self._out_analyzer.requirements:
out_req.add(task, fields)
for comm in self._comms:
task.add_point_future(ArgumentMap(future_map=comm))
@@ -966,11 +966,11 @@ def build_single_task(self, argbuf: BufferBuilder) -> SingleTask:
tag=self._tag,
provenance=self._provenance,
)
- for (req, fields) in self._req_analyzer.requirements:
+ for req, fields in self._req_analyzer.requirements:
req.proj.add_single(task, req, fields, _single_task_calls)
for future in self._future_args:
task.add_future(future)
- for (out_req, fields) in self._out_analyzer.requirements:
+ for out_req, fields in self._out_analyzer.requirements:
out_req.add_single(task, fields)
if (
not self._has_side_effect
@@ -1178,7 +1178,7 @@ def build_copy(self, launch_domain: Rect) -> IndexCopy:
def add_requirements(
requirements: list[tuple[RegionReq, int]]
) -> None:
- for (req, field) in requirements:
+ for req, field in requirements:
req.proj.add(copy, req, field, _index_copy_calls)
add_requirements(self._input_reqs.requirements)
@@ -1209,7 +1209,7 @@ def build_single_copy(self) -> SingleCopy:
def add_requirements(
requirements: list[tuple[RegionReq, int]]
) -> None:
- for (req, field) in requirements:
+ for req, field in requirements:
req.proj.add_single(copy, req, field, _single_copy_calls)
add_requirements(self._input_reqs.requirements)
diff --git a/legate/core/legate.py b/legate/core/legate.py
index 044a41c7e..48b5c3de7 100644
--- a/legate/core/legate.py
+++ b/legate/core/legate.py
@@ -520,31 +520,56 @@ def __init__(self) -> None:
def get_name(self) -> str:
"""
- Return a string name describing this library
+ Returns a name of the library
+
+ Returns
+ -------
+ str
+ Library name
"""
raise NotImplementedError("Implement in derived classes")
- def get_shared_library(self) -> Any:
+ def get_shared_library(self) -> Optional[str]:
"""
- Return the name of the shared library
+ Returns the path to the shared library
+
+ Returns
+ -------
+ str or ``None``
+ Path to the shared library
"""
raise NotImplementedError("Implement in derived classes")
def get_c_header(self) -> str:
"""
- Return a compiled C string header for this library
+ Returns a compiled C header string for the library
+
+ Returns
+ -------
+ str
+ C header string
"""
raise NotImplementedError("Implement in derived classes")
def get_registration_callback(self) -> str:
"""
- Return the name of a C registration callback for this library
+ Returns the name of a C registration callback for the library
+
+ Returns
+ -------
+ str
+ The name of the C registration callback
"""
raise NotImplementedError("Implement in derived classes")
def get_resource_configuration(self) -> ResourceConfig:
"""
- Return a ResourceConfig object that configures the library
+ Returns a ResourceConfig object that configures the library
+
+ Returns
+ -------
+ ResourceConfig
+ A ``ResourceConfig`` object
"""
# Return the default configuration
return ResourceConfig()
diff --git a/legate/core/operation.py b/legate/core/operation.py
index 07738aa51..bdc282809 100644
--- a/legate/core/operation.py
+++ b/legate/core/operation.py
@@ -158,6 +158,26 @@ def get_all_stores(self) -> OrderedSet[Store]:
return result
def add_alignment(self, store1: Store, store2: Store) -> None:
+ """
+ Sets an alignment between stores. Equivalent to the following code:
+
+ ::
+
+ symb1 = op.declare_partition(store1)
+ symb2 = op.declare_partition(store2)
+ op.add_constraint(symb1 == symb2)
+
+ Parameters
+ ----------
+ store1, store2 : Store
+ Stores to align
+
+ Raises
+ ------
+ ValueError
+ If the stores don't have the same shape or only one of them is
+ unbound
+ """
self._check_store(store1, allow_unbound=True)
self._check_store(store2, allow_unbound=True)
if not (
@@ -175,14 +195,44 @@ def add_alignment(self, store1: Store, store2: Store) -> None:
def add_broadcast(
self, store: Store, axes: Optional[Union[int, Iterable[int]]] = None
) -> None:
+ """
+ Sets a broadcasting constraint on the store. Equivalent to the
+ following code:
+
+ ::
+
+ symb = op.declare_partition(store)
+ op.add_constraint(symb.broadcast(axes))
+
+ Parameters
+ ----------
+ store : Store
+ Store to set a broadcasting constraint on
+ axes : int or Iterable[int], optional
+ Axes to broadcast. The entire store is replicated if no axes are
+ given.
+ """
self._check_store(store)
part = self._get_unique_partition(store)
self.add_constraint(part.broadcast(axes=axes))
def add_constraint(self, constraint: Constraint) -> None:
+ """
+ Adds a partitioning constraint to the operation
+
+ Parameters
+ ----------
+ constraint : Constraint
+ Partitioning constraint
+ """
self._constraints.append(constraint)
def execute(self) -> None:
+ """
+ Submits the operation to the runtime. There is no guarantee that the
+ operation will start the execution right upon the return of this
+ method.
+ """
self._context.runtime.submit(self)
@staticmethod
@@ -220,6 +270,23 @@ def get_name(self) -> str:
def declare_partition(
self, store: Store, disjoint: bool = True, complete: bool = True
) -> PartSym:
+ """
+ Creates a partition symbol for the store
+
+ Parameters
+ ----------
+ store : Store
+ Store to associate the partition symbol with
+ disjoint : bool, optional
+ ``True`` (by default) means the partition must be disjoint
+ complete : bool, optional
+ ``True`` (by default) means the partition must be complete
+
+ Returns
+ -------
+ PartSym
+ A partition symbol
+ """
sym = PartSym(
self._op_id,
self.get_name(),
@@ -253,16 +320,62 @@ def __init__(
@property
def side_effect(self) -> bool:
+ """
+ Indicates whether the task has side effects
+
+ Returns
+ -------
+ bool
+ ``True`` if the task has side efects
+ """
return self._side_effect
def set_side_effect(self, side_effect: bool) -> None:
+ """
+ Sets whether the task has side effects or not. A task is assumed to be
+ free of side effects by default if the task only has scalar arguments.
+
+ Parameters
+ ----------
+ side_effect : bool
+ A new boolean value indicating whether the task has side effects
+ """
self._side_effect = side_effect
@property
def concurrent(self) -> bool:
+ """
+ Indicates whether the task needs a concurrent task launch.
+
+ A concurrent task launch guarantees that all tasks will be active at
+ the same time and make progress concurrently. This means that the tasks
+ will and should be mapped to distinct processors and that no other
+ tasks will be interleaved at any given point in time during execution
+ of the concurrent tasks. This operational guarantee is useful
+ when the tasks need to perform collective operations or explicit
+ communication outside Legate, but comes with performance overhead
+ due to distributed rendezvous used in the launch.
+
+ Returns
+ -------
+ bool
+ ``True`` if the task needs a concurrent task launch
+ """
return self._concurrent
def set_concurrent(self, concurrent: bool) -> None:
+ """
+ Sets whether the task needs a concurrent task launch. Any task with at
+ least one communicator will implicitly use concurrent task launch, so
+ this method is to be used when the task needs a concurrent task launch
+ for a reason unknown to Legate.
+
+ Parameters
+ ----------
+ concurrent : bool
+ A new boolean value indicating whether the task needs a concurrent
+ task launch
+ """
self._concurrent = concurrent
def get_name(self) -> str:
@@ -272,6 +385,18 @@ def get_name(self) -> str:
def add_scalar_arg(
self, value: Any, dtype: Union[DTType, tuple[DTType]]
) -> None:
+ """
+ Adds a by-value argument to the task
+
+ Parameters
+ ----------
+ value : Any
+ Scalar value or a tuple of scalars (but no nested tuples)
+ dtype : DType
+ Data type descriptor for the scalar value. A descriptor ``(T,)``
+ means that the value is a tuple of elements of type ``T``.
+ """
+
self._scalar_args.append((value, dtype))
def add_dtype_arg(self, dtype: DTType) -> None:
@@ -279,17 +404,36 @@ def add_dtype_arg(self, dtype: DTType) -> None:
self._scalar_args.append((code, ty.int32))
def throws_exception(self, exn_type: type) -> None:
+ """
+ Declares that the task can raise an exception. If more than one
+ exception is added to the task, they are numbered by the order in which
+ they are added, and those numbers are used to refer to them in the C++
+ task.
+
+ Parameters
+ ----------
+ exn_type : Type
+ Type of exception
+ """
self._exn_types.append(exn_type)
@property
def can_raise_exception(self) -> bool:
+ """
+ Indicates whether the task can raise an exception
+
+ Returns
+ -------
+ bool
+ ``True`` if the task can raise an exception
+ """
return len(self._exn_types) > 0
def capture_traceback(self) -> None:
self._tb_repr = capture_traceback_repr()
def _add_scalar_args_to_launcher(self, launcher: TaskLauncher) -> None:
- for (arg, dtype) in self._scalar_args:
+ for arg, dtype in self._scalar_args:
launcher.add_scalar_arg(arg, dtype)
def _demux_scalar_stores_future(self, result: Future) -> None:
@@ -427,10 +571,16 @@ def _demux_scalar_stores(
self._demux_scalar_stores_future_map(result, launch_domain)
def add_nccl_communicator(self) -> None:
+ """
+ Adds a NCCL communicator to the task
+ """
comm = self._context.get_nccl_communicator()
self._comm_args.append(comm)
def add_cpu_communicator(self) -> None:
+ """
+ Adds a CPU communicator to the task
+ """
comm = self._context.get_cpu_communicator()
self._comm_args.append(comm)
@@ -470,9 +620,42 @@ def get_requirement(
tag = self.get_tag(strategy, part_symb)
return req, tag, store_part
+
+class AutoTask(AutoOperation, Task):
+ """
+ A type of tasks that are automatically parallelized
+ """
+
+ def __init__(
+ self,
+ context: Context,
+ task_id: int,
+ mapper_id: int,
+ op_id: int,
+ ) -> None:
+ super().__init__(
+ context=context,
+ task_id=task_id,
+ mapper_id=mapper_id,
+ op_id=op_id,
+ )
+ self._reusable_stores: list[Tuple[Store, PartSym]] = []
+ self._reuse_map: dict[int, Store] = {}
+
def add_input(
self, store: Store, partition: Optional[PartSym] = None
) -> None:
+ """
+ Adds a store as input to the task
+
+ Parameters
+ ----------
+ store : Store
+ Store to pass as input
+ partition : PartSym, optional
+ Partition to associate with the store. The default partition is
+ picked if none is given.
+ """
self._check_store(store)
if partition is None:
partition = self._get_unique_partition(store)
@@ -482,6 +665,17 @@ def add_input(
def add_output(
self, store: Store, partition: Optional[PartSym] = None
) -> None:
+ """
+ Adds a store as output to the task
+
+ Parameters
+ ----------
+ store : Store
+ Store to pass as output
+ partition : PartSym, optional
+ Partition to associate with the store. The default partition is
+ picked if none is given.
+ """
self._check_store(store, allow_unbound=True)
if store.kind is Future:
self._scalar_outputs.append(len(self._outputs))
@@ -495,6 +689,19 @@ def add_output(
def add_reduction(
self, store: Store, redop: int, partition: Optional[PartSym] = None
) -> None:
+ """
+ Adds a store to the task for reduction
+
+ Parameters
+ ----------
+ store : Store
+ Store to pass for reduction
+ redop : int
+ Reduction operator ID
+ partition : PartSym, optional
+ Partition to associate with the store. The default partition is
+ picked if none is given.
+ """
self._check_store(store)
if store.kind is Future:
self._scalar_reductions.append(len(self._reductions))
@@ -503,24 +710,6 @@ def add_reduction(
self._reductions.append((store, redop))
self._reduction_parts.append(partition)
-
-class AutoTask(AutoOperation, Task):
- def __init__(
- self,
- context: Context,
- task_id: int,
- mapper_id: int,
- op_id: int,
- ) -> None:
- super().__init__(
- context=context,
- task_id=task_id,
- mapper_id=mapper_id,
- op_id=op_id,
- )
- self._reusable_stores: list[Tuple[Store, PartSym]] = []
- self._reuse_map: dict[int, Store] = {}
-
def record_reuse(
self,
strategy: Strategy,
@@ -604,7 +793,7 @@ def launch(self, strategy: Strategy) -> None:
# We update the key partition of a store only when it gets updated
store.set_key_partition(store_part.partition)
- for ((store, redop), part_symb) in zip(
+ for (store, redop), part_symb in zip(
self._reductions, self._reduction_parts
):
req, tag, store_part = self.get_requirement(
@@ -618,7 +807,7 @@ def launch(self, strategy: Strategy) -> None:
store, req, tag=tag, read_write=can_read_write
)
- for (store, part_symb) in zip(self._outputs, self._output_parts):
+ for store, part_symb in zip(self._outputs, self._output_parts):
if not store.unbound:
continue
fspace = strategy.get_field_space(part_symb)
@@ -643,6 +832,10 @@ def launch(self, strategy: Strategy) -> None:
class ManualTask(Operation, Task):
+ """
+ A type of tasks that need explicit parallelization
+ """
+
def __init__(
self,
context: Context,
@@ -685,6 +878,16 @@ def add_input(
arg: Union[Store, StorePartition],
proj: Optional[ProjFn] = None,
) -> None:
+ """
+ Adds a store as input to the task
+
+ Parameters
+ ----------
+ arg : Store or StorePartition
+ Store or store partition to pass as input
+ proj : ProjFn, optional
+ Projection function
+ """
self._check_arg(arg)
if isinstance(arg, Store):
self._input_parts.append(arg.partition(REPLICATE))
@@ -697,10 +900,25 @@ def add_output(
arg: Union[Store, StorePartition],
proj: Optional[ProjFn] = None,
) -> None:
+ """
+ Adds a store as output to the task
+
+ Parameters
+ ----------
+ arg : Store or StorePartition
+ Store or store partition to pass as output
+ proj : ProjFn, optional
+ Projection function
+
+ Raises
+ ------
+ NotImplementedError
+ If the store is unbound
+ """
self._check_arg(arg)
if isinstance(arg, Store):
if arg.unbound:
- raise ValueError(
+ raise NotImplementedError(
"Unbound store cannot be used with "
"manually parallelized task"
)
@@ -718,6 +936,16 @@ def add_reduction(
redop: int,
proj: Optional[ProjFn] = None,
) -> None:
+ """
+ Adds a store to the task for reduction
+
+ Parameters
+ ----------
+ arg : Store or StorePartition
+ Store or store partition to pass for reduction
+ proj : ProjFn, optional
+ Projection function
+ """
self._check_arg(arg)
if isinstance(arg, Store):
if arg.kind is Future:
@@ -791,6 +1019,10 @@ def launch(self, strategy: Strategy) -> None:
class Copy(AutoOperation):
+ """
+ A special kind of operation for copying data from one store to another.
+ """
+
def __init__(
self,
context: Context,
@@ -813,49 +1045,141 @@ def get_name(self) -> str:
def inputs(self) -> list[Store]:
return super().inputs + self._source_indirects + self._target_indirects
- def add_output(
- self, store: Store, partition: Optional[PartSym] = None
- ) -> None:
+ def add_input(self, store: Store) -> None:
+ """
+ Adds a store as a source of the copy
+
+ Parameters
+ ----------
+ store : Store
+ Source store
+
+ Raises
+ ------
+ ValueError
+ If the store is scalar or unbound
+ """
+ if store.kind is Future or store.unbound:
+ raise ValueError(
+ "Copy input must be a normal, region-backed store"
+ )
+ self._check_store(store)
+ partition = self._get_unique_partition(store)
+ self._inputs.append(store)
+ self._input_parts.append(partition)
+
+ def add_output(self, store: Store) -> None:
+ """
+ Adds a store as a target of the copy. To avoid ambiguity in matching
+ sources and targets, one copy cannot have both normal targets and
+ reduction targets.
+
+ Parameters
+ ----------
+ store : Store
+ Target store
+
+ Raises
+ ------
+ RuntimeError
+ If the copy already has a reduction target
+ ValueError
+ If the store is scalar or unbound
+ """
if len(self._reductions) > 0:
raise RuntimeError(
"Copy targets must be either all normal outputs or reductions"
)
- super().add_output(store, partition)
+ if store.kind is Future or store.unbound:
+ raise ValueError(
+ "Copy target must be a normal, region-backed store"
+ )
- def add_reduction(
- self, store: Store, redop: int, partition: Optional[PartSym] = None
- ) -> None:
+ self._check_store(store)
+ partition = self._get_unique_partition(store)
+ self._outputs.append(store)
+ self._output_parts.append(partition)
+
+ def add_reduction(self, store: Store, redop: int) -> None:
+ """
+ Adds a store as a reduction target of the copy. To avoid ambiguity in
+ matching sources and targets, one copy cannot have both normal targets
+ and reduction targets.
+
+ Parameters
+ ----------
+ store : Store
+ Reduction target store
+ redop : int
+ Reduction operator ID
+
+ Raises
+ ------
+ RuntimeError
+ If the copy already has a normal target
+ ValueError
+ If the store is scalar or unbound
+ """
if len(self._outputs) > 0:
raise RuntimeError(
"Copy targets must be either all normal outputs or reductions"
)
- super().add_reduction(store, redop, partition)
+ if store.kind is Future or store.unbound:
+ raise ValueError(
+ "Copy target must be a normal, region-backed store"
+ )
+ self._check_store(store)
+ partition = self._get_unique_partition(store)
+ self._reductions.append((store, redop))
+ self._reduction_parts.append(partition)
- def add_source_indirect(
- self, store: Store, partition: Optional[PartSym] = None
- ) -> None:
+ def add_source_indirect(self, store: Store) -> None:
+ """
+ Adds an indirection for sources. A copy can have only up to one source
+ indirection.
+
+ Parameters
+ ----------
+ store : Store
+ Source indirection store
+
+ Raises
+ ------
+ RuntimeError
+ If the copy already has a source indirection
+ """
if len(self._source_indirects) != 0:
raise RuntimeError(
"There can be only up to one source indirection store for "
"a Copy operation"
)
self._check_store(store)
- if partition is None:
- partition = self._get_unique_partition(store)
+ partition = self._get_unique_partition(store)
self._source_indirects.append(store)
self._source_indirect_parts.append(partition)
- def add_target_indirect(
- self, store: Store, partition: Optional[PartSym] = None
- ) -> None:
+ def add_target_indirect(self, store: Store) -> None:
+ """
+ Adds an indirection for targets. A copy can have only up to one target
+ indirection.
+
+ Parameters
+ ----------
+ store : Store
+ Target indirection store
+
+ Raises
+ ------
+ RuntimeError
+ If the copy already has a target indirection
+ """
if len(self._target_indirects) != 0:
raise RuntimeError(
"There can be only up to one target indirection store for "
"a Copy operation"
)
self._check_store(store)
- if partition is None:
- partition = self._get_unique_partition(store)
+ partition = self._get_unique_partition(store)
self._target_indirects.append(store)
self._target_indirect_parts.append(partition)
@@ -964,7 +1288,7 @@ def launch(self, strategy: Strategy) -> None:
else:
launcher.add_output(store, req, tag=tag)
- for ((store, redop), part_symb) in zip(
+ for (store, redop), part_symb in zip(
self._reductions, self._reduction_parts
):
req, tag, store_part = self.get_requirement(
@@ -994,6 +1318,10 @@ def launch(self, strategy: Strategy) -> None:
class Fill(AutoOperation):
+ """
+ A special kind of operation for filling a store with constant values
+ """
+
def __init__(
self,
context: Context,
@@ -1009,8 +1337,18 @@ def __init__(
raise ValueError("Fill lhs must be a bound Store")
if lhs.kind is Future:
raise ValueError("Fill lhs must be a RegionField-backed Store")
- super().add_input(value)
- super().add_output(lhs)
+ self._add_value(value)
+ self._add_lhs(lhs)
+
+ def _add_value(self, value: Store) -> None:
+ partition = self._get_unique_partition(value)
+ self._inputs.append(value)
+ self._input_parts.append(partition)
+
+ def _add_lhs(self, lhs: Store) -> None:
+ partition = self._get_unique_partition(lhs)
+ self._outputs.append(lhs)
+ self._output_parts.append(partition)
def get_name(self) -> str:
libname = self.context.library.get_name()
@@ -1033,21 +1371,6 @@ def add_constraint(self, constraint: Constraint) -> None:
"User partitioning constraints are not allowed for fills"
)
- def add_input(
- self, store: Store, partition: Optional[PartSym] = None
- ) -> None:
- raise TypeError("No further inputs can be added to fills")
-
- def add_output(
- self, store: Store, partition: Optional[PartSym] = None
- ) -> None:
- raise TypeError("No further outputs can be added to fills")
-
- def add_reduction(
- self, store: Store, redop: int, partition: Optional[PartSym] = None
- ) -> None:
- raise TypeError("No reductions can be added to fills")
-
def launch(self, strategy: Strategy) -> None:
lhs = self._outputs[0]
lhs_part_sym = self._output_parts[0]
@@ -1096,6 +1419,19 @@ def __init__(
self._radix = radix
self._task_id = task_id
+ def add_input(self, store: Store) -> None:
+ self._check_store(store)
+ partition = self._get_unique_partition(store)
+ self._inputs.append(store)
+ self._input_parts.append(partition)
+
+ def add_output(self, store: Store) -> None:
+ assert store.unbound
+ partition = self._get_unique_partition(store)
+ self._unbound_outputs.append(len(self._outputs))
+ self._outputs.append(store)
+ self._output_parts.append(partition)
+
def launch(self, strategy: Strategy) -> None:
assert len(self._inputs) == 1 and len(self._outputs) == 1
diff --git a/legate/core/partition.py b/legate/core/partition.py
index d5904c319..c0eb24ad6 100644
--- a/legate/core/partition.py
+++ b/legate/core/partition.py
@@ -227,7 +227,7 @@ def is_complete_for(self, extents: Shape, offsets: Shape) -> bool:
my_lo = self._offset
my_hi = self._offset + self.tile_shape * self._color_shape
- return my_lo <= offsets and offsets + extents <= my_hi
+ return all(my_lo <= offsets) and all(offsets + extents <= my_hi)
def is_disjoint_for(self, launch_domain: Optional[Rect]) -> bool:
return (
@@ -236,7 +236,7 @@ def is_disjoint_for(self, launch_domain: Optional[Rect]) -> bool:
)
def has_color(self, color: Shape) -> bool:
- return color >= 0 and color < self._color_shape
+ return all(color >= 0) and all(color < self._color_shape)
@lru_cache
def get_subregion_size(self, extents: Shape, color: Shape) -> Shape:
@@ -396,7 +396,7 @@ def is_disjoint_for(self, launch_domain: Optional[Rect]) -> bool:
return True
def has_color(self, color: Shape) -> bool:
- return color >= 0 and color < self._color_shape
+ return all(color >= 0) and all(color < self._color_shape)
def translate(self, offset: Shape) -> None:
raise NotImplementedError("This method shouldn't be invoked")
diff --git a/legate/core/resource.py b/legate/core/resource.py
index 240c7b6b8..e3d85e43e 100644
--- a/legate/core/resource.py
+++ b/legate/core/resource.py
@@ -22,20 +22,11 @@
class ResourceConfig:
- __slots__ = (
- "max_tasks",
- "max_mappers",
- "max_reduction_ops",
- "max_projections",
- "max_shardings",
- )
-
- def __init__(self) -> None:
- self.max_tasks = 1_000_000
- self.max_mappers = 1
- self.max_reduction_ops = 0
- self.max_projections = 0
- self.max_shardings = 0
+ max_tasks = 1_000_000
+ max_reduction_ops = 0
+ max_mappers = 1
+ max_projections = 0
+ max_shardings = 0
class ResourceScope:
diff --git a/legate/core/runtime.py b/legate/core/runtime.py
index 637b09f72..00b48510f 100644
--- a/legate/core/runtime.py
+++ b/legate/core/runtime.py
@@ -26,7 +26,7 @@
from legion_top import add_cleanup_item, top_level
-from ..util.args import ArgSpec, Argument, parse_library_command_args
+from ..settings import settings
from . import ffi # Make sure we only have one ffi instance
from . import (
Fence,
@@ -79,49 +79,6 @@
_LEGATE_FIELD_ID_BASE = 1000
-ARGS = [
- Argument(
- "consensus",
- ArgSpec(
- action="store_true",
- default=False,
- dest="consensus",
- help="Turn on consensus match on single node (for testing).",
- ),
- ),
- Argument(
- "cycle-check",
- ArgSpec(
- action="store_true",
- default=False,
- dest="cycle_check",
- help=(
- "Check for reference cycles involving RegionField objects on "
- "script exit (developer option). When such cycles arise "
- "during execution, they stop used RegionFields from getting "
- "collected and reused for new Stores, thus increasing memory "
- "pressure. By default this check will miss any RegionField "
- "cycles the garbage collector collected during execution; "
- "run gc.disable() at the beginning of the program to avoid "
- "this."
- ),
- ),
- ),
- Argument(
- "future-leak-check",
- ArgSpec(
- action="store_true",
- default=False,
- dest="future_leak_check",
- help=(
- "Check for reference cycles keeping Future/FutureMap objects "
- "alive after Legate runtime exit (developer option). Such "
- "leaks can result in Legion runtime shutdown hangs."
- ),
- ),
- ),
-]
-
# A helper class for doing field management with control replication
@dataclass(frozen=True)
@@ -961,8 +918,6 @@ def __init__(self, core_library: CoreLib) -> None:
focus on implementing their domain logic.
"""
- self._args = parse_library_command_args("legate", ARGS)
-
# Record whether we need to run finalize tasks
# Key off whether we are being loaded in a context or not
try:
@@ -1047,7 +1002,7 @@ def __init__(self, core_library: CoreLib) -> None:
)
self._field_manager_class = (
ConsensusMatchingFieldManager
- if self._num_nodes > 1 or self._args.consensus
+ if self._num_nodes > 1 or settings.consensus()
else FieldManager
)
self._max_lru_length = int(
@@ -1132,14 +1087,38 @@ def empty_argmap(self) -> ArgumentMap:
@property
def num_cpus(self) -> int:
+ """
+ Returns the total number of CPUs in the system
+
+ Returns
+ -------
+ int
+ Number of CPUs
+ """
return self._num_cpus
@property
def num_omps(self) -> int:
+ """
+ Returns the total number of OpenMP processors in the system
+
+ Returns
+ -------
+ int
+ Number of OpenMP processors
+ """
return self._num_omps
@property
def num_gpus(self) -> int:
+ """
+ Returns the total number of GPUs in the system
+
+ Returns
+ -------
+ int
+ Number of GPUs
+ """
return self._num_gpus
@property
@@ -1164,6 +1143,19 @@ def field_match_manager(self) -> FieldMatchManager:
return self._field_match_manager
def register_library(self, library: Library) -> Context:
+ """
+ Registers a library to the runtime.
+
+ Parameters
+ ----------
+ library : Library
+ Library object
+
+ Returns
+ -------
+ Context
+ A new context for the library
+ """
from .context import Context
libname = library.get_name()
@@ -1356,6 +1348,23 @@ def get_transform_code(self, name: str) -> int:
)
def create_future(self, data: Any, size: int) -> Future:
+ """
+ Creates a future from a buffer holding a scalar value. The value is
+ copied to the future.
+
+ Parameters
+ ----------
+ data : buffer
+ Buffer that holds a scalar value
+
+ size : int
+ Size of the value
+
+ Returns
+ -------
+ Future
+ A new future
+ """
future = Future()
future.set_value(self.legion_runtime, data, size)
return future
@@ -1691,7 +1700,7 @@ def raise_exceptions(self) -> None:
def _cleanup_legate_runtime() -> None:
global runtime
- future_leak_check = runtime._args.future_leak_check
+ future_leak_check = settings.future_leak_check()
runtime.destroy()
del runtime
gc.collect()
@@ -1721,7 +1730,7 @@ def __del__(self) -> None:
find_cycles(False)
-if runtime._args.cycle_check:
+if settings.cycle_check():
# The first thing that legion_top does after executing the user script
# is to remove the newly created "__main__" module. We intercept this
# deletion operation to perform our check.
@@ -1741,4 +1750,11 @@ def legate_add_library(library: Library) -> None:
def get_legate_runtime() -> Runtime:
+ """
+ Returns the Legate runtime
+
+ Returns
+ -------
+ Legate runtime object
+ """
return runtime
diff --git a/legate/core/shape.py b/legate/core/shape.py
index 98207191f..af84a6132 100644
--- a/legate/core/shape.py
+++ b/legate/core/shape.py
@@ -32,6 +32,11 @@ def _cast_tuple(value: int | Iterable[int], ndim: int) -> tuple[int, ...]:
return tuple(value)
+class _ShapeComparisonResult(tuple[bool, ...]):
+ def __bool__(self) -> bool:
+ assert False, "use any() or all()"
+
+
class Shape:
_extents: Union[tuple[int, ...], None]
_ispace: Union[IndexSpace, None]
@@ -41,6 +46,19 @@ def __init__(
extents: Optional[ExtentLike] = None,
ispace: Optional[IndexSpace] = None,
) -> None:
+ """
+ Constructs a new shape object
+
+ Parameters
+ ----------
+ extents: int, Iterable[int], or Shape
+ Extents to construct the shape object with. Must be passed unless an
+ ``ispace`` is given.
+ ispace : IndexSpace, optional
+ A Legion index space handle to construct the shape object with.
+ Must not be used by clients explicitly, as they don't have access
+ to index spaces.
+ """
if isinstance(extents, int):
self._extents = (extents,)
self._ispace = None
@@ -54,6 +72,18 @@ def __init__(
@property
def extents(self) -> tuple[int, ...]:
+ """
+ Returns the extents of the shape in a tuple
+
+ Returns
+ -------
+ tuple[int]
+ Extents of the shape
+
+ Notes
+ -----
+ Can block on the producer task
+ """
if self._extents is None:
assert self._ispace is not None
bounds = self._ispace.get_bounds()
@@ -95,6 +125,14 @@ def __contains__(self, value: object) -> bool:
@property
def fixed(self) -> bool:
+ """
+ Indicates whether the shape's extents are already computed
+
+ Returns
+ ------
+ bool
+ If ``True``, the shape has fixed extents
+ """
return self._extents is not None
@property
@@ -103,6 +141,15 @@ def ispace(self) -> Union[IndexSpace, None]:
@property
def ndim(self) -> int:
+ """
+ Dimension of the shape. Unlike the ``extents`` property, this is
+ non-blocking.
+
+ Returns
+ ------
+ int
+ Dimension of the shape
+ """
if self._extents is None:
assert self._ispace is not None
return self._ispace.get_dim()
@@ -122,9 +169,33 @@ def get_index_space(self, runtime: Runtime) -> IndexSpace:
return self._ispace
def volume(self) -> int:
+ """
+ Returns the shape's volume
+
+ Returns
+ ------
+ int
+ Volume of the shape
+
+ Notes
+ -----
+ Can block on the producer task
+ """
return reduce(lambda x, y: x * y, self.extents, 1)
def sum(self) -> int:
+ """
+ Returns a sum of the extents
+
+ Returns
+ ------
+ int
+ Sum of the extents
+
+ Notes
+ -----
+ Can block on the producer task
+ """
return reduce(lambda x, y: x + y, self.extents, 0)
def __hash__(self) -> int:
@@ -134,6 +205,23 @@ def __hash__(self) -> int:
return hash((self.__class__, True, self._extents))
def __eq__(self, other: object) -> bool:
+ """
+ Checks whether the shape is identical to a given shape
+
+ Parameters
+ ----------
+ other : Shape or Iterable[int]
+ Shape to compare with
+
+ Returns
+ ------
+ bool
+ ``True`` if the shapes are identical
+
+ Notes
+ -----
+ Can block on the producer task
+ """
if isinstance(other, Shape):
if (
self._ispace is not None
@@ -154,43 +242,132 @@ def __eq__(self, other: object) -> bool:
else:
return False
- def __le__(self, other: ExtentLike) -> bool:
+ def __le__(self, other: ExtentLike) -> _ShapeComparisonResult:
+ """
+ Returns the result of element-wise ``<=``.
+
+ Parameters
+ ----------
+ other : Shape or Iterable[int]
+ Shape to compare with
+
+ Returns
+ ------
+ tuple[bool]
+ Result of element-wise ``<=``.
+
+ Notes
+ -----
+ Can block on the producer task
+ """
lh = self.extents
rh = (
other.extents
if isinstance(other, Shape)
else _cast_tuple(other, self.ndim)
)
- return len(lh) == len(rh) and lh <= rh
-
- def __lt__(self, other: ExtentLike) -> bool:
+ assert len(lh) == len(rh)
+ return _ShapeComparisonResult(l <= r for (l, r) in zip(lh, rh))
+
+ def __lt__(self, other: ExtentLike) -> _ShapeComparisonResult:
+ """
+ Returns the result of element-wise ``<``.
+
+ Parameters
+ ----------
+ other : Shape or Iterable[int]
+ Shape to compare with
+
+ Returns
+ ------
+ tuple[bool]
+ Result of element-wise ``<``.
+
+ Notes
+ -----
+ Can block on the producer task
+ """
lh = self.extents
rh = (
other.extents
if isinstance(other, Shape)
else _cast_tuple(other, self.ndim)
)
- return len(lh) == len(rh) and lh < rh
-
- def __ge__(self, other: ExtentLike) -> bool:
+ assert len(lh) == len(rh)
+ return _ShapeComparisonResult(l < r for (l, r) in zip(lh, rh))
+
+ def __ge__(self, other: ExtentLike) -> _ShapeComparisonResult:
+ """
+ Returns the result of element-wise ``<=``.
+
+ Parameters
+ ----------
+ other : Shape or Iterable[int]
+ Shape to compare with
+
+ Returns
+ ------
+ tuple[bool]
+ Result of element-wise ``<=``.
+
+ Notes
+ -----
+ Can block on the producer task
+ """
lh = self.extents
rh = (
other.extents
if isinstance(other, Shape)
else _cast_tuple(other, self.ndim)
)
- return len(lh) == len(rh) and lh >= rh
-
- def __gt__(self, other: ExtentLike) -> bool:
+ assert len(lh) == len(rh)
+ return _ShapeComparisonResult(l >= r for (l, r) in zip(lh, rh))
+
+ def __gt__(self, other: ExtentLike) -> _ShapeComparisonResult:
+ """
+ Returns the result of element-wise ``<=``.
+
+ Parameters
+ ----------
+ other : Shape or Iterable[int]
+ Shape to compare with
+
+ Returns
+ ------
+ tuple[bool]
+ Result of element-wise ``<=``.
+
+ Notes
+ -----
+ Can block on the producer task
+ """
lh = self.extents
rh = (
other.extents
if isinstance(other, Shape)
else _cast_tuple(other, self.ndim)
)
- return len(lh) == len(rh) and lh > rh
+ assert len(lh) == len(rh)
+ return _ShapeComparisonResult(l > r for (l, r) in zip(lh, rh))
def __add__(self, other: ExtentLike) -> Shape:
+ """
+ Returns an element-wise addition of the shapes
+
+ Parameters
+ ----------
+ other : Shape or Iterable[int]
+ A shape to add to this shape
+
+ Returns
+ ------
+ bool
+ Result of element-wise addition
+
+ Notes
+ -----
+ Can block on the producer task
+ """
lh = self.extents
rh = (
other.extents
@@ -200,6 +377,23 @@ def __add__(self, other: ExtentLike) -> Shape:
return Shape(tuple(a + b for (a, b) in zip(lh, rh)))
def __sub__(self, other: ExtentLike) -> Shape:
+ """
+ Returns an element-wise subtraction between the shapes
+
+ Parameters
+ ----------
+ other : Shape or Iterable[int]
+ A shape to subtract from this shape
+
+ Returns
+ ------
+ bool
+ Result of element-wise subtraction
+
+ Notes
+ -----
+ Can block on the producer task
+ """
lh = self.extents
rh = (
other.extents
@@ -209,6 +403,23 @@ def __sub__(self, other: ExtentLike) -> Shape:
return Shape(tuple(a - b for (a, b) in zip(lh, rh)))
def __mul__(self, other: ExtentLike) -> Shape:
+ """
+ Returns an element-wise multiplication of the shapes
+
+ Parameters
+ ----------
+ other : Shape or Iterable[int]
+ A shape to multiply with this shape
+
+ Returns
+ ------
+ bool
+ Result of element-wise multiplication
+
+ Notes
+ -----
+ Can block on the producer task
+ """
lh = self.extents
rh = (
other.extents
@@ -218,6 +429,23 @@ def __mul__(self, other: ExtentLike) -> Shape:
return Shape(tuple(a * b for (a, b) in zip(lh, rh)))
def __mod__(self, other: ExtentLike) -> Shape:
+ """
+ Returns the result of element-wise modulo operation
+
+ Parameters
+ ----------
+ other : Shape or Iterable[int]
+ Shape to compare with
+
+ Returns
+ ------
+ bool
+ Result of element-wise modulo operation
+
+ Notes
+ -----
+ Can block on the producer task
+ """
lh = self.extents
rh = (
other.extents
@@ -227,6 +455,23 @@ def __mod__(self, other: ExtentLike) -> Shape:
return Shape(tuple(a % b for (a, b) in zip(lh, rh)))
def __floordiv__(self, other: ExtentLike) -> Shape:
+ """
+ Returns the result of element-wise integer division
+
+ Parameters
+ ----------
+ other : Shape or Iterable[int]
+ A shape to divide this shape by
+
+ Returns
+ ------
+ bool
+ Result of element-wise integer division
+
+ Notes
+ -----
+ Can block on the producer task
+ """
lh = self.extents
rh = (
other.extents
@@ -236,26 +481,138 @@ def __floordiv__(self, other: ExtentLike) -> Shape:
return Shape(tuple(a // b for (a, b) in zip(lh, rh)))
def drop(self, dim: int) -> Shape:
+ """
+ Removes a dimension from the shape
+
+ Parameters
+ ----------
+ dim : int
+ Dimension to remove
+
+ Returns
+ ------
+ Shape
+ Shape with one less dimension
+
+ Notes
+ -----
+ Can block on the producer task
+ """
extents = self.extents
return Shape(extents[:dim] + extents[dim + 1 :])
def update(self, dim: int, new_value: int) -> Shape:
+ """
+ Replaces the extent of a dimension with a new extent
+
+ Parameters
+ ----------
+ dim : int
+ Dimension to replace
+
+ new_value : int
+ New extent
+
+ Returns
+ ------
+ Shape
+ Shape with the chosen dimension updated
+
+ Notes
+ -----
+ Can block on the producer task
+ """
return self.replace(dim, (new_value,))
def replace(self, dim: int, new_values: Iterable[int]) -> Shape:
+ """
+ Replaces a dimension with multiple dimensions
+
+ Parameters
+ ----------
+ dim : int
+ Dimension to replace
+
+ new_values : Iterable[int]
+ Extents of the new dimensions
+
+ Returns
+ ------
+ Shape
+ Shape with the chosen dimension replaced
+
+ Notes
+ -----
+ Can block on the producer task
+ """
if not isinstance(new_values, tuple):
new_values = tuple(new_values)
extents = self.extents
return Shape(extents[:dim] + new_values + extents[dim + 1 :])
def insert(self, dim: int, new_value: int) -> Shape:
+ """
+ Inserts a new dimension
+
+ Parameters
+ ----------
+ dim : int
+ Location to insert the new dimension
+
+ new_value : int
+ Extent of the new dimension
+
+ Returns
+ ------
+ Shape
+ Shape with one more dimension
+
+ Notes
+ -----
+ Can block on the producer task
+ """
extents = self.extents
return Shape(extents[:dim] + (new_value,) + extents[dim:])
def map(self, mapping: tuple[int, ...]) -> Shape:
+ """
+ Applies a mapping to each extent in the shape
+
+ Parameters
+ ----------
+ maping : tuple[int]
+ New values for dimensions
+
+ Returns
+ ------
+ Shape
+ Shape with the extents replaced
+
+ Notes
+ -----
+ Can block on the producer task
+ """
return Shape(tuple(self[mapping[dim]] for dim in range(self.ndim)))
def strides(self) -> Shape:
+ """
+ Computes strides of the shape. The last dimension is considered the
+ most rapidly changing one. For example, if the shape is ``(3, 4, 5)``,
+ the strides are
+
+ ::
+
+ (20, 5, 1)
+
+ Returns
+ ------
+ Shape
+ Strides of the shape
+
+ Notes
+ -----
+ Can block on the producer task
+ """
strides: tuple[int, ...] = ()
stride = 1
for size in reversed(self.extents):
diff --git a/legate/core/store.py b/legate/core/store.py
index 0d55cbf62..309585999 100644
--- a/legate/core/store.py
+++ b/legate/core/store.py
@@ -178,10 +178,9 @@ def record_detach(detach: Union[Detach, IndexDetach]) -> None:
mapper=context.mapper_id,
provenance=context.provenance,
)
- # If we're not sharing then there is no need to map or restrict the
- # attachment
+ attach.set_restricted(False)
+ # If we're not sharing then there is no need to map the attachment
if not share:
- attach.set_restricted(False)
attach.set_mapped(False)
else:
self.physical_region_mapped = True
@@ -209,7 +208,7 @@ def record_detach(detach: Union[Detach, IndexDetach]) -> None:
else field_type
)
shard_local_data = {}
- for (c, buf) in alloc.shard_local_buffers.items():
+ for c, buf in alloc.shard_local_buffers.items():
subregion = alloc.partition.get_child(c)
bounds = subregion.index_space.get_bounds()
if buf.shape != tuple(
@@ -231,9 +230,7 @@ def record_detach(detach: Union[Detach, IndexDetach]) -> None:
provenance=context.provenance,
)
index_attach.set_deduplicate_across_shards(True)
- # If we're not sharing there is no need to restrict the attachment
- if not share:
- index_attach.set_restricted(False)
+ index_attach.set_restricted(False)
external_resources = runtime.dispatch(index_attach)
# We don't need to flush the contents back to the attached memory
# if this is an internal temporary allocation.
@@ -786,10 +783,26 @@ def __init__(
@property
def store(self) -> Store:
+ """
+ Returns the store of the store partition
+
+ Returns
+ -------
+ Store
+ A ``Store`` object wrapped in the store partition
+ """
return self._store
@property
def partition(self) -> PartitionBase:
+ """
+ Returns the partition descriptor of the store partition
+
+ Returns
+ -------
+ PartitionBase
+ A ``PartitionBase`` object wrapped in the store partition
+ """
return self._partition
@property
@@ -797,6 +810,19 @@ def transform(self) -> TransformStackBase:
return self._store.transform
def get_child_store(self, *indices: int) -> Store:
+ """
+ Returns the sub-store of a given color
+
+ Parameters
+ ----------
+ indices : tuple[int]
+ Color of the sub-store
+
+ Returns
+ -------
+ Store
+ The sub-store of the chosen color
+ """
color = self.transform.invert_color(Shape(indices))
child_storage = self._storage_partition.get_child(color)
child_transform = self.transform
@@ -895,6 +921,15 @@ def move_data(self, other: Store) -> None:
@property
def shape(self) -> Shape:
+ """
+ Returns the shape of the store. Flushes the scheduling window if the
+ store is unbound and has no shape assigned.
+
+ Returns
+ -------
+ Shape
+ The store's shape
+ """
if self._shape is None:
# If someone wants to access the shape of an unbound
# store before it is set, that means the producer task is
@@ -915,6 +950,14 @@ def shape(self) -> Shape:
@property
def ndim(self) -> int:
+ """
+ Returns the number of dimensions of the store.
+
+ Returns
+ -------
+ int
+ The number of dimensions
+ """
if self._shape is None:
assert self._ndim is not None
return self._ndim
@@ -923,12 +966,25 @@ def ndim(self) -> int:
@property
def size(self) -> int:
+ """
+ Returns the number of elements in the store.
+
+ Returns
+ -------
+ int
+ The store's size
+ """
return prod(self.shape) if self.ndim > 0 else 1
@property
def type(self) -> _Dtype:
"""
- Return the type of the data in this storage primitive
+ Returns the element type of the store.
+
+ Returns
+ -------
+ _Dtype
+ Type of elements in the store
"""
return self._dtype
@@ -938,24 +994,42 @@ def get_dtype(self) -> _Dtype:
@property
def kind(self) -> Union[Type[RegionField], Type[Future]]:
"""
- Return the type of the Legion storage object backing the data in this
- storage object: either Future, or RegionField.
+ Returns the kind of backing storage
+
+ Returns
+ -------
+ Type
+ `RegionField` or `Future`
"""
return self._storage.kind
@property
def unbound(self) -> bool:
+ """
+ Indicates whether the store is unbound
+
+ Returns
+ -------
+ bool
+ ``True`` if the store is unbound
+ """
return self._shape is None
@property
def scalar(self) -> bool:
+ """
+ Indicates whether the store is scalar (i.e., backed by a `Future` and
+ of size 1)
+
+ Returns
+ -------
+ bool
+ ``True`` if the store is scalar
+ """
return self.kind is Future and self.shape.volume() == 1
@property
def storage(self) -> Union[RegionField, Future]:
- """
- Return the Legion container backing this Store.
- """
if self.unbound:
raise RuntimeError(
"Storage of a variable size store cannot be retrieved "
@@ -976,10 +1050,26 @@ def extents(self) -> Shape:
@property
def transform(self) -> TransformStackBase:
+ """
+ Returns a transformation attached to the store
+
+ Returns
+ -------
+ TransformStackBase
+ Transformation attached to the store
+ """
return self._transform
@property
def transformed(self) -> bool:
+ """
+ Indicates whether the store is transformed
+
+ Returns
+ -------
+ bool
+ If ``True``, the store is transformed
+ """
return not self._transform.bottom
def attach_external_allocation(
@@ -1038,6 +1128,46 @@ def __repr__(self) -> str:
# Convert a store in N-D space to that in (N+1)-D space.
# The extra_dim specifies the added dimension
def promote(self, extra_dim: int, dim_size: int = 1) -> Store:
+ """
+ Adds an extra dimension to the store. Value of ``extra_dim`` decides
+ where a new dimension should be added, and each dimension `i`, where
+ `i` >= ``extra_dim``, is mapped to dimension `i+1` in a returned store.
+ A returned store provides a view to the input store where the values
+ are broadcasted along the new dimension.
+
+ For example, for a 1D store ``A`` contains ``[1, 2, 3]``,
+ ``A.promote(0, 2)`` yields a store equivalent to:
+
+ ::
+
+ [[1, 2, 3],
+ [1, 2, 3]]
+
+ whereas ``A.promote(1, 2)`` yields:
+
+ ::
+
+ [[1, 1],
+ [2, 2],
+ [3, 3]]
+
+ Parameters
+ ----------
+ extra_dim : int
+ Position for a new dimension
+ dim_size : int, optional
+ Extent of the new dimension
+
+ Returns
+ -------
+ Store
+ A new store with an extra dimension
+
+ Raises
+ ------
+ ValueError
+ If ``extra_dim`` is not a valid dimension name
+ """
extra_dim = extra_dim + self.ndim if extra_dim < 0 else extra_dim
if extra_dim < 0 or extra_dim > self.ndim:
raise ValueError(
@@ -1060,6 +1190,34 @@ def promote(self, extra_dim: int, dim_size: int = 1) -> Store:
# Take a hyperplane of an N-D store for a given index
# to create an (N-1)-D store
def project(self, dim: int, index: int) -> Store:
+ """
+ Projects out a dimension of the store. Each dimension `i`, where
+ `i` > ``dim``, is mapped to dimension `i-1` in a returned store.
+ A returned store provides a view to the input store where the values
+ are on hyperplane :math:`x_\\mathtt{dim} = \\mathtt{index}`.
+
+ For example, if a 2D store ``A`` contains ``[[1, 2], [3, 4]]``,
+ ``A.project(0, 1)`` yields a store equivalent to ``[3, 4]``, whereas
+ ``A.project(1, 0)`` yields ``[1, 3]``.
+
+ Parameters
+ ----------
+ dim : int
+ Dimension to project out
+ index : int
+ Index on the chosen dimension
+
+ Returns
+ -------
+ Store
+ A new store with one fewer dimension
+
+ Raises
+ ------
+ ValueError
+ If ``dim`` is not a valid dimension name or ``index`` is
+ out of bounds
+ """
dim = dim + self.ndim if dim < 0 else dim
if dim < 0 or dim >= self.ndim:
raise ValueError(
@@ -1097,6 +1255,68 @@ def project(self, dim: int, index: int) -> Store:
)
def slice(self, dim: int, sl: slice) -> Store:
+ """
+ Slices a contiguous sub-section of the store.
+
+ For example, consider a 2D store ``A``
+
+ ::
+
+ [[1, 2, 3],
+ [4, 5, 6],
+ [7, 8, 9]]
+
+ A slicing ``A.slice(0, slice(1, None))`` yields:
+
+ ::
+
+ [[4, 5, 6],
+ [7, 8, 9]]
+
+ The result store will look like this on a different slicing call
+ ``A.slice(1, slice(None, 2))``:
+
+ ::
+
+ [[1, 2],
+ [4, 5],
+ [7, 8]]
+
+ Finally, chained slicing calls
+
+ ::
+
+ A.slice(0, slice(1, None)).slice(1, slice(None, 2))
+
+ results in:
+
+ ::
+
+ [[4, 5],
+ [7, 8]]
+
+
+ Parameters
+ ----------
+ dim : int
+ Dimension to slice
+ sl : slice
+ Slice that expresses a sub-section
+
+ Returns
+ -------
+ Store
+ A new store that correponds to the sliced section
+
+ Notes
+ -----
+ Slicing with a non-unit step is currently not supported.
+
+ Raises
+ ------
+ ValueError
+ If ``sl.step`` is not a unit or ``sl`` is out of bounds
+ """
dim = dim + self.ndim if dim < 0 else dim
if dim < 0 or dim >= self.ndim:
raise ValueError(
@@ -1146,6 +1366,60 @@ def slice(self, dim: int, sl: slice) -> Store:
)
def transpose(self, axes: tuple[int, ...]) -> Store:
+ """
+ Reorders dimensions of the store. Dimension ``i`` of the resulting
+ store is mapped to dimension ``axes[i]`` of the input store.
+
+ For example, for a 3D store ``A``
+
+ ::
+
+ [[[1, 2],
+ [3, 4]],
+
+ [[5, 6],
+ [7, 8]]]
+
+ transpose calls ``A.transpose([1, 2, 0])`` and ``A.transpose([2, 1,
+ 0])`` yield the following stores, respectively:
+
+ ::
+
+ [[[1, 5],
+ [2, 6]],
+
+ [[3, 7],
+ [4, 8]]]
+
+
+ ::
+
+ [[[1, 5],
+ [3, 7]],
+
+ [[2, 6],
+ [4, 8]]]
+
+
+ Parameters
+ ----------
+ axes : tuple[int]
+ Mapping from dimensions of the resulting store to those of the
+ input
+
+ Returns
+ -------
+ Store
+ A new store with the dimensions transposed
+
+ Raises
+ ------
+ ValueError
+ If any of the following happens: 1) The length of ``axes`` doesn't
+ match the store's dimension; 2) ``axes`` has duplicates; 3) Any
+ value in ``axes`` is negative, or greater than or equal to the
+ store's dimension
+ """
if len(axes) != self.ndim:
raise ValueError(
f"dimension mismatch: expected {self.ndim} axes, "
@@ -1173,6 +1447,54 @@ def transpose(self, axes: tuple[int, ...]) -> Store:
)
def delinearize(self, dim: int, shape: tuple[int, ...]) -> Store:
+ """
+ Delinearizes a dimension into multiple dimensions. Each dimension
+ `i` of the store, where `i` > ``dim``, will be mapped to dimension
+ `i+N` of the resulting store, where `N` is the length of ``shape``.
+ A delinearization that does not preserve the size of the store is
+ invalid.
+
+ For example, consider a 2D store ``A``
+
+ ::
+
+ [[1, 2, 3, 4],
+ [5, 6, 7, 8]]
+
+ A delinearizing call `A.delinearize(1, [2, 2]))` yields:
+
+ ::
+
+ [[[1, 2],
+ [3, 4]],
+
+ [[5, 6],
+ [7, 8]]]
+
+ Parameters
+ ----------
+ dim : int
+ Dimension to delinearize
+ shape : tuple[int]
+ New shape for the chosen dimension
+
+ Returns
+ -------
+ Store
+ A new store with the chosen dimension delinearized
+
+ Notes
+ -----
+ Unlike other transformations, delinearization is not an affine
+ transformation. Due to this nature, delinearized stores can raise
+ `NonInvertibleError` in places where they cannot be used.
+
+ Raises
+ ------
+ ValueError
+ If ``dim`` is invalid for the store or ``shape`` does not preserve
+ the size of the chosen dimenison
+ """
dim = dim + self.ndim if dim < 0 else dim
if dim < 0 or dim >= self.ndim:
raise ValueError(
@@ -1201,6 +1523,24 @@ def delinearize(self, dim: int, shape: tuple[int, ...]) -> Store:
def get_inline_allocation(
self, context: Optional[Context] = None
) -> InlineMappedAllocation:
+ """
+ Creates an inline allocation for the store.
+
+ Parameters
+ ----------
+ context : Context, optional
+ Library context within which the allocation is created
+
+ Notes
+ -------
+ This call blocks the client's control flow. And it fetches the data for
+ the whole store on a single node.
+
+ Returns
+ -------
+ InlineMappedAllocation
+ A helper object wrapping the allocation
+ """
assert self.kind is RegionField
return self._storage.get_inline_allocation(
self.shape,
@@ -1219,6 +1559,14 @@ def serialize(self, buf: BufferBuilder) -> None:
self._transform.serialize(buf)
def get_key_partition(self) -> Optional[PartitionBase]:
+ """
+ Returns the current key partition of the store
+
+ Returns
+ -------
+ PartitionBase
+ The store's key partition
+ """
# Flush outstanding operations to have the key partition of this store
# registered correctly
runtime.flush_scheduling_window()
@@ -1238,6 +1586,14 @@ def has_key_partition(self, restrictions: tuple[Restriction, ...]) -> bool:
return (part is not None) and (part.even or self._transform.bottom)
def set_key_partition(self, partition: PartitionBase) -> None:
+ """
+ Sets a new key partition for the store
+
+ Parameters
+ ----------
+ partition : PartitionBase
+ A new key partition
+ """
runtime.partition_manager.record_store_key_partition(
self._unique_id, partition
)
@@ -1248,6 +1604,9 @@ def set_key_partition(self, partition: PartitionBase) -> None:
)
def reset_key_partition(self) -> None:
+ """
+ Clears the store's key partition
+ """
runtime.partition_manager.reset_store_key_partition(self._unique_id)
# Also reset the storage's key partition.
self._storage.reset_key_partition()
@@ -1346,6 +1705,19 @@ def partition(self, partition: PartitionBase) -> StorePartition:
def partition_by_tiling(
self, tile_shape: Union[Shape, Sequence[int]]
) -> StorePartition:
+ """
+ Creates a tiled partition of the store
+
+ Parameters
+ ----------
+ tile_shape : Shape or Sequence[int]
+ Shape of tiles
+
+ Returns
+ -------
+ StorePartition
+ A ``StorePartition`` object
+ """
if self.unbound:
raise TypeError("Unbound store cannot be manually partitioned")
if not isinstance(tile_shape, Shape):
diff --git a/legate/driver/__init__.py b/legate/driver/__init__.py
index 67ce493b8..786b0f069 100644
--- a/legate/driver/__init__.py
+++ b/legate/driver/__init__.py
@@ -15,13 +15,23 @@
from __future__ import annotations
from .config import Config
-from .driver import Driver
+from .driver import LegateDriver, CanonicalDriver
from .launcher import Launcher
def main() -> int:
- import sys
+ import os, shlex, sys
- from .main import main as _main
+ from .main import legate_main as _main
- return _main(sys.argv)
+ # A little explanation. We want to encourage configuration options be
+ # passed via LEGATE_CONFIG, in order to be considerate to user scripts.
+ # But we still need to accept actual command line args for comaptibility,
+ # and those should also take precedences. Here we splice the options from
+ # LEGATE_CONFIG in before sys.argv, and take advantage of the fact that if
+ # there are any options repeated in both places, argparse will use the
+ # latter (i.e. the actual command line provided ones).
+ env_args = shlex.split(os.environ.get("LEGATE_CONFIG", ""))
+ argv = sys.argv[:1] + env_args + sys.argv[1:]
+
+ return _main(argv)
diff --git a/legate/driver/args.py b/legate/driver/args.py
index 92bce362b..daee26ab0 100644
--- a/legate/driver/args.py
+++ b/legate/driver/args.py
@@ -16,8 +16,9 @@
#
from __future__ import annotations
-from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
+from argparse import REMAINDER, ArgumentDefaultsHelpFormatter, ArgumentParser
+from .. import __version__
from ..util.shared_args import (
CPUS,
FBMEM,
@@ -39,12 +40,20 @@
__all__ = ("parser",)
+
parser = ArgumentParser(
description="Legate Driver",
allow_abbrev=False,
formatter_class=ArgumentDefaultsHelpFormatter,
)
+parser.add_argument(
+ "command",
+ nargs=REMAINDER,
+ help="A python script to run, plus any arguments for the script. "
+ "Any arguments after the script will be passed to the script, i.e. "
+ "NOT used as arguments to legate itself.",
+)
multi_node = parser.add_argument_group("Multi-node configuration")
multi_node.add_argument(NODES.name, **NODES.kwargs)
@@ -62,7 +71,8 @@
help="CPU cores to bind each rank to. Comma-separated core IDs as "
"well as ranges are accepted, as reported by `numactl`. Binding "
"instructions for all ranks should be listed in one string, separated "
- "by `/`.",
+ "by `/`. "
+ "[legate-only, not supported with standard Python invocation]",
)
@@ -70,7 +80,8 @@
"--mem-bind",
help="NUMA memories to bind each rank to. Use comma-separated integer "
"IDs as reported by `numactl`. Binding instructions for all ranks "
- "should be listed in one string, separated by `/`.",
+ "should be listed in one string, separated by `/`. "
+ "[legate-only, not supported with standard Python invocation]",
)
@@ -78,7 +89,8 @@
"--gpu-bind",
help="GPUs to bind each rank to. Use comma-separated integer IDs as "
"reported by `nvidia-smi`. Binding instructions for all ranks "
- "should be listed in one string, separated by `/`.",
+ "should be listed in one string, separated by `/`. "
+ "[legate-only, not supported with standard Python invocation]",
)
@@ -86,11 +98,12 @@
"--nic-bind",
help="NICs to bind each rank to. Use comma-separated device names as "
"appropriate for the network in use. Binding instructions for all ranks "
- "should be listed in one string, separated by `/`.",
+ "should be listed in one string, separated by `/`. "
+ "[legate-only, not supported with standard Python invocation]",
)
-core = parser.add_argument_group("Core alloction")
+core = parser.add_argument_group("Core allocation")
core.add_argument(CPUS.name, **CPUS.kwargs)
core.add_argument(GPUS.name, **GPUS.kwargs)
core.add_argument(OMPS.name, **OMPS.kwargs)
@@ -98,7 +111,7 @@
core.add_argument(UTILITY.name, **UTILITY.kwargs)
-memory = parser.add_argument_group("Memory alloction")
+memory = parser.add_argument_group("Memory allocation")
memory.add_argument(SYSMEM.name, **SYSMEM.kwargs)
memory.add_argument(NUMAMEM.name, **NUMAMEM.kwargs)
memory.add_argument(FBMEM.name, **FBMEM.kwargs)
@@ -134,7 +147,8 @@
dest="cprofile",
action="store_true",
required=False,
- help="profile Python execution with the cprofile module",
+ help="profile Python execution with the cprofile module, "
+ "[legate-only, not supported with standard Python invocation]",
)
@@ -143,7 +157,8 @@
dest="nvprof",
action="store_true",
required=False,
- help="run Legate with nvprof",
+ help="run Legate with nvprof, "
+ "[legate-only, not supported with standard Python invocation]",
)
@@ -152,7 +167,8 @@
dest="nsys",
action="store_true",
required=False,
- help="run Legate with Nsight Systems",
+ help="run Legate with Nsight Systems, "
+ "[legate-only, not supported with standard Python invocation]",
)
@@ -161,7 +177,8 @@
dest="nsys_targets",
default="cublas,cuda,cudnn,nvtx,ucx",
required=False,
- help="Specify profiling targets for Nsight Systems",
+ help="Specify profiling targets for Nsight Systems, "
+ "[legate-only, not supported with standard Python invocation]",
)
@@ -173,7 +190,8 @@
required=False,
help="Specify extra flags for Nsight Systems (can appear more than once). "
"Multiple arguments may be provided together in a quoted string "
- "(arguments with spaces inside must be additionally quoted)",
+ "(arguments with spaces inside must be additionally quoted), "
+ "[legate-only, not supported with standard Python invocation]",
)
logging = parser.add_argument_group("Logging")
@@ -223,7 +241,8 @@
dest="gdb",
action="store_true",
required=False,
- help="run Legate inside gdb",
+ help="run Legate inside gdb, "
+ "[legate-only, not supported with standard Python invocation]",
)
@@ -232,7 +251,8 @@
dest="cuda_gdb",
action="store_true",
required=False,
- help="run Legate inside cuda-gdb",
+ help="run Legate inside cuda-gdb, "
+ "[legate-only, not supported with standard Python invocation]",
)
@@ -241,7 +261,8 @@
dest="memcheck",
action="store_true",
required=False,
- help="run Legate with cuda-memcheck",
+ help="run Legate with cuda-memcheck, "
+ "[legate-only, not supported with standard Python invocation]",
)
@@ -318,7 +339,8 @@
dest="bind_detail",
action="store_true",
required=False,
- help="print out the final invocation run by bind.sh",
+ help="print out the final invocation run by bind.sh, "
+ "[legate-only, not supported with standard Python invocation]",
)
@@ -330,7 +352,8 @@
dest="module",
default=None,
required=False,
- help="Specify a Python module to load before running",
+ help="Specify a Python module to load before running, "
+ "[legate-only, not supported with standard Python invocation]",
)
@@ -348,7 +371,8 @@
dest="rlwrap",
action="store_true",
required=False,
- help="Whether to run with rlwrap to improve readline ability",
+ help="Whether to run with rlwrap to improve readline ability, "
+ "[legate-only, not supported with standard Python invocation]",
)
other.add_argument(
@@ -358,3 +382,9 @@
required=False,
help="Whether to use color terminal output (if colorama is installed)",
)
+
+other.add_argument(
+ "--version",
+ action="version",
+ version=__version__,
+)
diff --git a/legate/driver/command.py b/legate/driver/command.py
index 2fc8cc1ed..b4ab348fe 100644
--- a/legate/driver/command.py
+++ b/legate/driver/command.py
@@ -16,6 +16,7 @@
from typing import TYPE_CHECKING
+from .. import install_info
from ..util.ui import warn
if TYPE_CHECKING:
@@ -24,7 +25,7 @@
from .config import ConfigProtocol
from .launcher import Launcher
-__all__ = ("CMD_PARTS",)
+__all__ = ("CMD_PARTS_LEGION", "CMD_PARTS_CANONICAL")
# this will be replaced by bind.sh with the actual computed rank at runtime
@@ -182,11 +183,18 @@ def cmd_legion(
return (str(system.legion_paths.legion_python),)
-def cmd_processor(
+def cmd_python_processor(
config: ConfigProtocol, system: System, launcher: Launcher
) -> CommandPart:
- # We always need one python processor per rank and no local fields
- return ("-ll:py", "1", "-lg:local", "0")
+ # We always need one python processor per rank
+ return ("-ll:py", "1")
+
+
+def cmd_local_field(
+ config: ConfigProtocol, system: System, launcher: Launcher
+) -> CommandPart:
+ # We always need no local fields
+ return ("-lg:local", "0")
def cmd_kthreads(
@@ -251,26 +259,37 @@ def cmd_openmp(
)
-def cmd_utility(
+def cmd_bgwork(
config: ConfigProtocol, system: System, launcher: Launcher
) -> CommandPart:
- utility = config.core.utility
ranks = config.multi_node.ranks
+ utility = config.core.utility
- if utility == 1:
- return ()
-
- opts: CommandPart = ("-ll:util", str(utility))
+ opts: CommandPart = ()
# If we are running multi-rank then make the number of active
# message handler threads equal to our number of utility
# processors in order to prevent head-of-line blocking
if ranks > 1:
- opts += ("-ll:bgwork", str(utility))
+ opts += ("-ll:bgwork", str(max(utility, 2)))
+
+ if ranks > 1 and "ucx" in install_info.networks:
+ opts += ("-ll:bgworkpin", "1")
return opts
+def cmd_utility(
+ config: ConfigProtocol, system: System, launcher: Launcher
+) -> CommandPart:
+ utility = config.core.utility
+
+ if utility == 1:
+ return ()
+
+ return ("-ll:util", str(utility))
+
+
def cmd_mem(
config: ConfigProtocol, system: System, launcher: Launcher
) -> CommandPart:
@@ -347,7 +366,7 @@ def cmd_log_file(
log_to_file = config.logging.log_to_file
if log_to_file:
- return ("-logfile", str(log_dir / "legate_%.log"))
+ return ("-logfile", str(log_dir / "legate_%.log"), "-errlevel", "4")
return ()
@@ -363,7 +382,13 @@ def cmd_eager_alloc(
def cmd_ucx(
config: ConfigProtocol, system: System, launcher: Launcher
) -> CommandPart:
- return ("-ucx:tls_host", "^dc,ud")
+ return ("-ucx:tls_host", "rc,tcp,cuda_copy,cuda_ipc,sm,self")
+
+
+def cmd_user_script(
+ config: ConfigProtocol, system: System, launcher: Launcher
+) -> CommandPart:
+ return () if config.user_script is None else (config.user_script,)
def cmd_user_opts(
@@ -372,27 +397,17 @@ def cmd_user_opts(
return config.user_opts
-CMD_PARTS = (
- cmd_bind,
- cmd_rlwrap,
- cmd_gdb,
- cmd_cuda_gdb,
- cmd_nvprof,
- cmd_nsys,
- # Add memcheck right before the binary
- cmd_memcheck,
- # Now we're ready to build the actual command to run
- cmd_legion,
+_CMD_PARTS_SHARED = (
# This has to go before script name
cmd_nocr,
- cmd_module,
- cmd_processor,
+ cmd_local_field,
cmd_kthreads,
# Translate the requests to Realm command line parameters
cmd_cpus,
cmd_gpus,
cmd_openmp,
cmd_utility,
+ cmd_bgwork,
cmd_mem,
cmd_numamem,
cmd_fbmem,
@@ -402,6 +417,41 @@ def cmd_user_opts(
cmd_log_file,
cmd_eager_alloc,
cmd_ucx,
- # Append user flags so they can override whatever we provided
- cmd_user_opts,
+)
+
+CMD_PARTS_LEGION = (
+ (
+ cmd_bind,
+ cmd_rlwrap,
+ cmd_gdb,
+ cmd_cuda_gdb,
+ cmd_nvprof,
+ cmd_nsys,
+ # Add memcheck right before the binary
+ cmd_memcheck,
+ # Now we're ready to build the actual command to run
+ cmd_legion,
+ # This has to go before script name
+ cmd_python_processor,
+ cmd_module,
+ )
+ + _CMD_PARTS_SHARED
+ + (
+ # User script
+ cmd_user_script,
+ # Append user flags so they can override whatever we provided
+ cmd_user_opts,
+ )
+)
+
+CMD_PARTS_CANONICAL = (
+ (
+ # User script
+ cmd_user_script,
+ )
+ + _CMD_PARTS_SHARED
+ + (
+ # Append user flags so they can override whatever we provided
+ cmd_user_opts,
+ )
)
diff --git a/legate/driver/config.py b/legate/driver/config.py
index 0be36f959..de0394a7d 100644
--- a/legate/driver/config.py
+++ b/legate/driver/config.py
@@ -22,7 +22,7 @@
from dataclasses import dataclass
from functools import cached_property
from pathlib import Path
-from typing import Any, Protocol
+from typing import Any, Optional, Protocol
from ..util import colors
from ..util.types import (
@@ -146,11 +146,11 @@ class Other(DataclassMixin):
class ConfigProtocol(Protocol):
-
_args: Namespace
argv: ArgList
+ user_script: Optional[str]
user_opts: tuple[str, ...]
multi_node: MultiNode
binding: Binding
@@ -177,14 +177,15 @@ class Config:
def __init__(self, argv: ArgList) -> None:
self.argv = argv
- args, extra = parser.parse_known_args(self.argv[1:])
+ args = parser.parse_args(self.argv[1:])
colors.ENABLED = args.color
# only saving this for help with testing
self._args = args
- self.user_opts = tuple(extra)
+ self.user_script = args.command[0] if args.command else None
+ self.user_opts = tuple(args.command[1:]) if self.user_script else ()
# these may modify the args, so apply before dataclass conversions
self._fixup_nocr(args)
@@ -203,7 +204,7 @@ def __init__(self, argv: ArgList) -> None:
@cached_property
def console(self) -> bool:
"""Whether we are starting Legate as an interactive console."""
- return not any(opt.endswith(".py") for opt in self.user_opts)
+ return self.user_script is None
def _fixup_nocr(self, args: Namespace) -> None:
# this is slightly duplicative of MultiNode.ranks property, but fixup
diff --git a/legate/driver/driver.py b/legate/driver/driver.py
index 7f3e17d33..a534bf09f 100644
--- a/legate/driver/driver.py
+++ b/legate/driver/driver.py
@@ -14,22 +14,24 @@
#
from __future__ import annotations
+from dataclasses import dataclass
from shlex import quote
from subprocess import run
from textwrap import indent
from typing import TYPE_CHECKING
from ..util.system import System
+from ..util.types import DataclassMixin
from ..util.ui import kvtable, rule, section, value, warn
-from .command import CMD_PARTS
+from .command import CMD_PARTS_CANONICAL, CMD_PARTS_LEGION
from .config import ConfigProtocol
-from .launcher import Launcher
+from .launcher import Launcher, SimpleLauncher
from .logs import process_logs
if TYPE_CHECKING:
from ..util.types import Command, EnvDict
-__all__ = ("Driver", "print_verbose")
+__all__ = ("LegateDriver", "CanonicalDriver", "print_verbose")
_DARWIN_GDB_WARN = """\
You must start the debugging session with the following command,
@@ -41,7 +43,14 @@
"""
-class Driver:
+@dataclass(frozen=True)
+class LegateVersions(DataclassMixin):
+ """Collect package versions relevant to Legate."""
+
+ legate_version: str
+
+
+class LegateDriver:
"""Coordinate the system, user-configuration, and launcher to appropriately
execute the Legate process.
@@ -65,7 +74,7 @@ def cmd(self) -> Command:
launcher = self.launcher
system = self.system
- parts = (part(config, system, launcher) for part in CMD_PARTS)
+ parts = (part(config, system, launcher) for part in CMD_PARTS_LEGION)
return launcher.cmd + sum(parts, ())
@property
@@ -83,12 +92,13 @@ def custom_env_vars(self) -> set[str]:
# in case we want to augment the launcher env we could do it here
return self.launcher.custom_env_vars
- def run(self) -> int:
- """Run the Legate process.
+ @property
+ def dry_run(self) -> bool:
+ """Check verbose and dry run.
Returns
-------
- int : process return code
+ bool : whether dry run is enabled
"""
if self.config.info.verbose:
@@ -101,7 +111,17 @@ def run(self) -> int:
self._darwin_gdb_warn()
- if self.config.other.dry_run:
+ return self.config.other.dry_run
+
+ def run(self) -> int:
+ """Run the Legate process.
+
+ Returns
+ -------
+ int : process return code
+
+ """
+ if self.dry_run:
return 0
with process_logs(self.config, self.system, self.launcher):
@@ -122,9 +142,55 @@ def _darwin_gdb_warn(self) -> None:
)
+class CanonicalDriver(LegateDriver):
+ """Coordinate the system, user-configuration, and launcher to appropriately
+ execute the Legate process.
+
+ Parameters
+ ----------
+ config : Config
+
+ system : System
+
+ """
+
+ def __init__(self, config: ConfigProtocol, system: System) -> None:
+ self.config = config
+ self.system = system
+ self.launcher = SimpleLauncher(config, system)
+
+ @property
+ def cmd(self) -> Command:
+ """The full command invocation that should be used to start Legate."""
+ config = self.config
+ launcher = self.launcher
+ system = self.system
+
+ parts = (
+ part(config, system, launcher) for part in CMD_PARTS_CANONICAL
+ )
+ return sum(parts, ())
+
+ def run(self) -> int:
+ """Run the Legate process.
+
+ Returns
+ -------
+ int : process return code
+
+ """
+ assert False, "This function should not be invoked."
+
+
+def get_versions() -> LegateVersions:
+ from legate import __version__ as lg_version
+
+ return LegateVersions(legate_version=lg_version)
+
+
def print_verbose(
system: System,
- driver: Driver | None = None,
+ driver: LegateDriver | None = None,
) -> None:
"""Print system and driver configuration values.
@@ -151,6 +217,9 @@ def print_verbose(
print(section("\nLegion paths:"))
print(indent(str(system.legion_paths), prefix=" "))
+ print(section("\nVersions:"))
+ print(indent(str(get_versions()), prefix=" "))
+
if driver:
print(section("\nCommand:"))
cmd = " ".join(quote(t) for t in driver.cmd)
diff --git a/legate/driver/main.py b/legate/driver/main.py
index 2ca3f04be..bb02c7cbb 100644
--- a/legate/driver/main.py
+++ b/legate/driver/main.py
@@ -17,32 +17,26 @@
"""
from __future__ import annotations
-__all__ = ("main",)
+from typing import Type, Union
+from . import CanonicalDriver, LegateDriver
-def main(argv: list[str]) -> int:
- """A main function for the Legate driver that can be used programmatically
- or by entry-points.
+__all__ = ("legate_main",)
- Parameters
- ----------
- argv : list[str]
- Command-line arguments to start the Legate driver with
- Returns
- -------
- int, a process return code
-
- """
+def prepare_driver(
+ argv: list[str],
+ driver_type: Union[Type[CanonicalDriver], Type[LegateDriver]],
+) -> Union[CanonicalDriver, LegateDriver]:
from ..util.system import System
from ..util.ui import error
- from . import Config, Driver
+ from . import Config
from .driver import print_verbose
try:
config = Config(argv)
except Exception as e:
- print(error("Could not configure Legate driver:\n"))
+ print(error("Could not configure driver:\n"))
raise e
try:
@@ -52,11 +46,29 @@ def main(argv: list[str]) -> int:
raise e
try:
- driver = Driver(config, system)
+ driver = driver_type(config, system)
except Exception as e:
- msg = "Could not initialize Legate driver, path config and exception follow:" # noqa
+ msg = "Could not initialize driver, path config and exception follow:" # noqa
print(error(msg))
print_verbose(system)
raise e
+ return driver
+
+
+def legate_main(argv: list[str]) -> int:
+ """A main function for the Legate driver that can be used programmatically
+ or by entry-points.
+
+ Parameters
+ ----------
+ argv : list[str]
+ Command-line arguments to start the Legate driver with
+
+ Returns
+ -------
+ int, a process return code
+
+ """
+ driver = prepare_driver(argv, LegateDriver)
return driver.run()
diff --git a/legate/install_info.py.in b/legate/install_info.py.in
index 4f0416979..bc38c4083 100644
--- a/legate/install_info.py.in
+++ b/legate/install_info.py.in
@@ -42,3 +42,5 @@ def get_libpath():
libpath: str = get_libpath()
header: str = """@header@"""
+
+networks: list[str] = "@Legion_NETWORKS@".split()
diff --git a/legate/jupyter/config.py b/legate/jupyter/config.py
index 2acbc6dcb..ebec279be 100644
--- a/legate/jupyter/config.py
+++ b/legate/jupyter/config.py
@@ -19,6 +19,7 @@
from dataclasses import dataclass
from pathlib import Path
+from typing import Optional
import legate.util.colors as colors
from legate.driver.config import (
@@ -79,6 +80,7 @@ def __init__(self, argv: ArgList) -> None:
self.memory = object_to_dataclass(args, Memory)
# turn everything else off
+ self.user_script: Optional[str] = None
self.user_opts: tuple[str, ...] = ()
self.binding = Binding(None, None, None, None)
self.profiling = Profiling(False, False, False, False, "", [])
diff --git a/legate/jupyter/kernel.py b/legate/jupyter/kernel.py
index daadae9ff..ae371e28f 100644
--- a/legate/jupyter/kernel.py
+++ b/legate/jupyter/kernel.py
@@ -31,7 +31,7 @@
NoSuchKernel,
)
-from legate.driver import Driver
+from legate.driver import LegateDriver
from legate.jupyter.config import Config
from legate.util.types import ArgList
from legate.util.ui import error
@@ -48,7 +48,7 @@ class LegateMetadata(TypedDict):
LEGATE_JUPYTER_METADATA_KEY: Literal["legate"] = "legate"
-def generate_kernel_spec(driver: Driver, config: Config) -> KernelSpec:
+def generate_kernel_spec(driver: LegateDriver, config: Config) -> KernelSpec:
legion_kernel = Path(__file__).parent / "_legion_kernel.py"
argv = list(driver.cmd) + [str(legion_kernel), "-f", "{connection_file}"]
diff --git a/legate/jupyter/main.py b/legate/jupyter/main.py
index 494fdf421..d287022d3 100644
--- a/legate/jupyter/main.py
+++ b/legate/jupyter/main.py
@@ -16,7 +16,7 @@
#
from __future__ import annotations
-from legate.driver import Driver
+from legate.driver import LegateDriver
from legate.jupyter.config import Config
from legate.jupyter.kernel import generate_kernel_spec, install_kernel_spec
from legate.util.system import System
@@ -28,7 +28,7 @@ def main(argv: list[str]) -> int:
config = Config(argv)
system = System()
- driver = Driver(config, system)
+ driver = LegateDriver(config, system)
spec = generate_kernel_spec(driver, config)
diff --git a/legate/rc.py b/legate/rc.py
deleted file mode 100644
index bd4abca51..000000000
--- a/legate/rc.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright 2021-2022 NVIDIA Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-from __future__ import annotations
-
-LEGION_WARNING = """
-
-All Legate programs must be run with a legion_python interperter. We
-recommend that you use the Legate driver script "bin/legate" found
-in the installation directory to launch Legate programs as it
-provides easy-to-use flags for invoking legion_python. You can see
-options for using the driver script with "bin/legate --help". You
-can also invoke legion_python directly.
-
-Use "bin/legate --verbose ..." to see some examples of how to call
-legion_python directly.
-"""
-
-# TODO (bv) temp transitive imports until cunumeric is updated
-from .util.args import ( # noqa
- ArgSpec,
- Argument,
- parse_library_command_args as parse_command_args,
-)
-
-
-def has_legion_context() -> bool:
- """Determine whether we are running in legion_python.
-
- Returns
- bool : True if running in legion_python, otherwise False
-
- """
- try:
- from legion_cffi import lib
-
- return lib.legion_runtime_has_context()
- except (ModuleNotFoundError, AttributeError):
- return False
-
-
-def check_legion(msg: str = LEGION_WARNING) -> None:
- """Raise an error if we are not running in legion_python."""
- if not has_legion_context():
- raise RuntimeError(msg)
diff --git a/legate/settings.py b/legate/settings.py
new file mode 100644
index 000000000..c3b89ece0
--- /dev/null
+++ b/legate/settings.py
@@ -0,0 +1,62 @@
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from __future__ import annotations
+
+from .util.settings import PrioritizedSetting, Settings, convert_bool
+
+__all__ = ("settings",)
+
+
+class LegateRuntimeSettings(Settings):
+ consensus: PrioritizedSetting[bool] = PrioritizedSetting(
+ "consensus",
+ "LEGATE_CONSENSUS",
+ default=False,
+ convert=convert_bool,
+ help="""
+ Whether to enable consensus match on single node (for testing).
+ """,
+ )
+
+ cycle_check: PrioritizedSetting[bool] = PrioritizedSetting(
+ "cycle_check",
+ "LEGATE_CYCLE_CHECK",
+ default=False,
+ convert=convert_bool,
+ help="""
+ Whether to check for reference cycles involving RegionField objects on
+ exit (developer option). When such cycles arise during execution they
+ stop used RegionFields from being collected and reused for new Stores,
+ thus increasing memory pressure. By default this check will miss any
+ RegionField cycles the garbage collector collected during execution.
+
+ Run gc.disable() at the beginning of the program to avoid this.
+ """,
+ )
+
+ future_leak_check: PrioritizedSetting[bool] = PrioritizedSetting(
+ "future_leak_check",
+ "LEGATE_FUTURE_LEAK_CHECK",
+ default=False,
+ convert=convert_bool,
+ help="""
+ Whether to check for reference cycles keeping Future/FutureMap objects
+ alive after Legate runtime exit (developer option). Such leaks can
+ result in Legion runtime shutdown hangs.
+ """,
+ )
+
+
+settings = LegateRuntimeSettings()
diff --git a/legate/tester/__init__.py b/legate/tester/__init__.py
index 045eca19d..29f8d0b20 100644
--- a/legate/tester/__init__.py
+++ b/legate/tester/__init__.py
@@ -29,7 +29,7 @@
]
#: Value to use if --cpus is not specified.
-DEFAULT_CPUS_PER_NODE = 4
+DEFAULT_CPUS_PER_NODE = 2
#: Value to use if --gpus is not specified.
DEFAULT_GPUS_PER_NODE = 1
@@ -46,6 +46,9 @@
#: Value to use if --ompthreads is not specified.
DEFAULT_OMPTHREADS = 4
+#: Value to use if --numamem is not specified.
+DEFAULT_NUMAMEM = 0
+
#: Default values to apply to normalize the testing environment.
DEFAULT_PROCESS_ENV = {
"LEGATE_TEST": "1",
diff --git a/legate/tester/args.py b/legate/tester/args.py
index 4b24077a0..5119a1994 100644
--- a/legate/tester/args.py
+++ b/legate/tester/args.py
@@ -28,6 +28,7 @@
DEFAULT_GPU_DELAY,
DEFAULT_GPU_MEMORY_BUDGET,
DEFAULT_GPUS_PER_NODE,
+ DEFAULT_NUMAMEM,
DEFAULT_OMPS_PER_NODE,
DEFAULT_OMPTHREADS,
FEATURES,
@@ -161,6 +162,15 @@
)
+feature_opts.add_argument(
+ "--numamem",
+ dest="numamem",
+ type=int,
+ default=DEFAULT_NUMAMEM,
+ help="NUMA memory for OpenMP processors (MB)",
+)
+
+
test_opts = parser.add_argument_group("Test run configuration options")
diff --git a/legate/tester/config.py b/legate/tester/config.py
index 39441e433..d943b4b14 100644
--- a/legate/tester/config.py
+++ b/legate/tester/config.py
@@ -63,6 +63,7 @@ def __init__(self, argv: ArgList) -> None:
self.fbmem = args.fbmem
self.gpu_delay = args.gpu_delay
self.ompthreads = args.ompthreads
+ self.numamem = args.numamem
# test run configuration
self.debug = args.debug
diff --git a/legate/tester/stages/_linux/cpu.py b/legate/tester/stages/_linux/cpu.py
index deb5610a6..8c17343c7 100644
--- a/legate/tester/stages/_linux/cpu.py
+++ b/legate/tester/stages/_linux/cpu.py
@@ -19,7 +19,7 @@
from ..test_stage import TestStage
from ..util import (
- CUNUMERIC_TEST_ARG,
+ CUNUMERIC_TEST_ENV,
UNPIN_ENV,
Shard,
StageSpec,
@@ -48,13 +48,15 @@ class CPU(TestStage):
kind: FeatureType = "cpus"
- args = [CUNUMERIC_TEST_ARG]
+ args: ArgList = []
def __init__(self, config: Config, system: TestSystem) -> None:
self._init(config, system)
def env(self, config: Config, system: TestSystem) -> EnvDict:
- return {} if config.cpu_pin == "strict" else dict(UNPIN_ENV)
+ env = {} if config.cpu_pin == "strict" else dict(UNPIN_ENV)
+ env.update(CUNUMERIC_TEST_ENV)
+ return env
def shard_args(self, shard: Shard, config: Config) -> ArgList:
args = [
diff --git a/legate/tester/stages/_linux/eager.py b/legate/tester/stages/_linux/eager.py
index cc9a08d5a..12a19c24b 100644
--- a/legate/tester/stages/_linux/eager.py
+++ b/legate/tester/stages/_linux/eager.py
@@ -17,7 +17,7 @@
from typing import TYPE_CHECKING
from ..test_stage import TestStage
-from ..util import Shard, StageSpec, adjust_workers
+from ..util import EAGER_ENV, Shard, StageSpec, adjust_workers
if TYPE_CHECKING:
from ....util.types import ArgList, EnvDict
@@ -47,12 +47,7 @@ def __init__(self, config: Config, system: TestSystem) -> None:
self._init(config, system)
def env(self, config: Config, system: TestSystem) -> EnvDict:
- # Raise min chunk sizes for deferred codepaths to force eager execution
- env = {
- "CUNUMERIC_MIN_CPU_CHUNK": "2000000000",
- "CUNUMERIC_MIN_OMP_CHUNK": "2000000000",
- "CUNUMERIC_MIN_GPU_CHUNK": "2000000000",
- }
+ env = dict(EAGER_ENV)
return env
def shard_args(self, shard: Shard, config: Config) -> ArgList:
diff --git a/legate/tester/stages/_linux/gpu.py b/legate/tester/stages/_linux/gpu.py
index 64f625c00..751118dcf 100644
--- a/legate/tester/stages/_linux/gpu.py
+++ b/legate/tester/stages/_linux/gpu.py
@@ -18,7 +18,7 @@
from typing import TYPE_CHECKING
from ..test_stage import TestStage
-from ..util import CUNUMERIC_TEST_ARG, Shard, StageSpec, adjust_workers
+from ..util import CUNUMERIC_TEST_ENV, Shard, StageSpec, adjust_workers
if TYPE_CHECKING:
from ....util.types import ArgList, EnvDict
@@ -44,13 +44,13 @@ class GPU(TestStage):
kind: FeatureType = "cuda"
- args = [CUNUMERIC_TEST_ARG]
+ args: ArgList = []
def __init__(self, config: Config, system: TestSystem) -> None:
self._init(config, system)
def env(self, config: Config, system: TestSystem) -> EnvDict:
- return {}
+ return dict(CUNUMERIC_TEST_ENV)
def delay(self, shard: Shard, config: Config, system: TestSystem) -> None:
time.sleep(config.gpu_delay / 1000)
diff --git a/legate/tester/stages/_linux/omp.py b/legate/tester/stages/_linux/omp.py
index f7af3e9d0..09101bafd 100644
--- a/legate/tester/stages/_linux/omp.py
+++ b/legate/tester/stages/_linux/omp.py
@@ -19,7 +19,7 @@
from ..test_stage import TestStage
from ..util import (
- CUNUMERIC_TEST_ARG,
+ CUNUMERIC_TEST_ENV,
UNPIN_ENV,
Shard,
StageSpec,
@@ -48,13 +48,15 @@ class OMP(TestStage):
kind: FeatureType = "openmp"
- args = [CUNUMERIC_TEST_ARG]
+ args: ArgList = []
def __init__(self, config: Config, system: TestSystem) -> None:
self._init(config, system)
def env(self, config: Config, system: TestSystem) -> EnvDict:
- return {} if config.cpu_pin == "strict" else dict(UNPIN_ENV)
+ env = {} if config.cpu_pin == "strict" else dict(UNPIN_ENV)
+ env.update(CUNUMERIC_TEST_ENV)
+ return env
def shard_args(self, shard: Shard, config: Config) -> ArgList:
args = [
@@ -62,6 +64,8 @@ def shard_args(self, shard: Shard, config: Config) -> ArgList:
str(config.omps),
"--ompthreads",
str(config.ompthreads),
+ "--numamem",
+ str(config.numamem),
]
if config.cpu_pin != "none":
args += [
diff --git a/legate/tester/stages/_osx/cpu.py b/legate/tester/stages/_osx/cpu.py
index 182a6d76b..e911892de 100644
--- a/legate/tester/stages/_osx/cpu.py
+++ b/legate/tester/stages/_osx/cpu.py
@@ -18,7 +18,7 @@
from ..test_stage import TestStage
from ..util import (
- CUNUMERIC_TEST_ARG,
+ CUNUMERIC_TEST_ENV,
UNPIN_ENV,
Shard,
StageSpec,
@@ -47,13 +47,15 @@ class CPU(TestStage):
kind: FeatureType = "cpus"
- args = [CUNUMERIC_TEST_ARG]
+ args: ArgList = []
def __init__(self, config: Config, system: TestSystem) -> None:
self._init(config, system)
def env(self, config: Config, system: TestSystem) -> EnvDict:
- return UNPIN_ENV
+ env = dict(UNPIN_ENV)
+ env.update(CUNUMERIC_TEST_ENV)
+ return env
def shard_args(self, shard: Shard, config: Config) -> ArgList:
return ["--cpus", str(config.cpus)]
diff --git a/legate/tester/stages/_osx/eager.py b/legate/tester/stages/_osx/eager.py
index b32feb17d..4cb0be16b 100644
--- a/legate/tester/stages/_osx/eager.py
+++ b/legate/tester/stages/_osx/eager.py
@@ -17,7 +17,7 @@
from typing import TYPE_CHECKING
from ..test_stage import TestStage
-from ..util import UNPIN_ENV, Shard, StageSpec, adjust_workers
+from ..util import EAGER_ENV, UNPIN_ENV, Shard, StageSpec, adjust_workers
if TYPE_CHECKING:
from ....util.types import ArgList, EnvDict
@@ -47,12 +47,7 @@ def __init__(self, config: Config, system: TestSystem) -> None:
self._init(config, system)
def env(self, config: Config, system: TestSystem) -> EnvDict:
- # Raise min chunk sizes for deferred codepaths to force eager execution
- env = {
- "CUNUMERIC_MIN_CPU_CHUNK": "2000000000",
- "CUNUMERIC_MIN_OMP_CHUNK": "2000000000",
- "CUNUMERIC_MIN_GPU_CHUNK": "2000000000",
- }
+ env = dict(EAGER_ENV)
env.update(UNPIN_ENV)
return env
diff --git a/legate/tester/stages/_osx/gpu.py b/legate/tester/stages/_osx/gpu.py
index 2a1597494..1e54ba737 100644
--- a/legate/tester/stages/_osx/gpu.py
+++ b/legate/tester/stages/_osx/gpu.py
@@ -18,7 +18,7 @@
from typing import TYPE_CHECKING
from ..test_stage import TestStage
-from ..util import CUNUMERIC_TEST_ARG, UNPIN_ENV, Shard
+from ..util import CUNUMERIC_TEST_ENV, UNPIN_ENV, Shard
if TYPE_CHECKING:
from ....util.types import ArgList, EnvDict
@@ -42,13 +42,15 @@ class GPU(TestStage):
kind: FeatureType = "cuda"
- args: ArgList = [CUNUMERIC_TEST_ARG]
+ args: ArgList = []
def __init__(self, config: Config, system: TestSystem) -> None:
raise RuntimeError("GPU test are not supported on OSX")
def env(self, config: Config, system: TestSystem) -> EnvDict:
- return UNPIN_ENV
+ env = dict(UNPIN_ENV)
+ env.update(CUNUMERIC_TEST_ENV)
+ return env
def delay(self, shard: Shard, config: Config, system: TestSystem) -> None:
time.sleep(config.gpu_delay / 1000)
diff --git a/legate/tester/stages/_osx/omp.py b/legate/tester/stages/_osx/omp.py
index eb279791a..1d1a8d24b 100644
--- a/legate/tester/stages/_osx/omp.py
+++ b/legate/tester/stages/_osx/omp.py
@@ -18,7 +18,7 @@
from ..test_stage import TestStage
from ..util import (
- CUNUMERIC_TEST_ARG,
+ CUNUMERIC_TEST_ENV,
UNPIN_ENV,
Shard,
StageSpec,
@@ -47,13 +47,15 @@ class OMP(TestStage):
kind: FeatureType = "openmp"
- args = [CUNUMERIC_TEST_ARG]
+ args: ArgList = []
def __init__(self, config: Config, system: TestSystem) -> None:
self._init(config, system)
def env(self, config: Config, system: TestSystem) -> EnvDict:
- return UNPIN_ENV
+ env = dict(UNPIN_ENV)
+ env.update(CUNUMERIC_TEST_ENV)
+ return env
def shard_args(self, shard: Shard, config: Config) -> ArgList:
return [
diff --git a/legate/tester/stages/test_stage.py b/legate/tester/stages/test_stage.py
index ed24ae461..e9080394c 100644
--- a/legate/tester/stages/test_stage.py
+++ b/legate/tester/stages/test_stage.py
@@ -252,12 +252,17 @@ def run(
cov_args = self.cov_args(config)
- cmd = [str(config.legate_path)] + cov_args + [str(test_path)]
-
stage_args = self.args + self.shard_args(shard, config)
file_args = self.file_args(test_file, config)
- cmd += stage_args + file_args + config.extra_args
+ cmd = (
+ [str(config.legate_path)]
+ + stage_args
+ + cov_args
+ + [str(test_path)]
+ + file_args
+ + config.extra_args
+ )
if custom_args:
cmd += custom_args
@@ -285,7 +290,6 @@ def _init(self, config: Config, system: TestSystem) -> None:
def _launch(
self, config: Config, system: TestSystem
) -> list[ProcessResult]:
-
pool = multiprocessing.pool.ThreadPool(self.spec.workers)
jobs = [
diff --git a/legate/tester/stages/util.py b/legate/tester/stages/util.py
index 27d53bbd1..fb38d77c1 100644
--- a/legate/tester/stages/util.py
+++ b/legate/tester/stages/util.py
@@ -25,10 +25,19 @@
from ..logger import LOG
from ..test_system import ProcessResult
-CUNUMERIC_TEST_ARG = "-cunumeric:test"
-
UNPIN_ENV = {"REALM_SYNTHETIC_CORE_MAP": ""}
+CUNUMERIC_TEST_ENV = {"CUNUMERIC_TEST": "1"}
+
+# Raise min chunk sizes for deferred codepaths to force eager execution
+EAGER_ENV = {
+ "CUNUMERIC_TEST": "0",
+ "CUNUMERIC_MIN_CPU_CHUNK": "2000000000",
+ "CUNUMERIC_MIN_OMP_CHUNK": "2000000000",
+ "CUNUMERIC_MIN_GPU_CHUNK": "2000000000",
+}
+
+
Shard: TypeAlias = Tuple[int, ...]
diff --git a/legate/tester/test_system.py b/legate/tester/test_system.py
index 2c4e9949f..9f416782b 100644
--- a/legate/tester/test_system.py
+++ b/legate/tester/test_system.py
@@ -33,7 +33,6 @@
@dataclass
class ProcessResult:
-
#: The command invovation, including relevant environment vars
invocation: str
diff --git a/legate/util/args.py b/legate/util/args.py
index 4485d6db2..d23dda932 100644
--- a/legate/util/args.py
+++ b/legate/util/args.py
@@ -14,9 +14,6 @@
#
from __future__ import annotations
-import re
-import sys
-import warnings
from argparse import Action, ArgumentParser, Namespace
from dataclasses import dataclass, fields
from typing import (
@@ -144,67 +141,3 @@ def __call__(
items.append(values)
# removing any duplicates before storing
setattr(namespace, self.dest, list(set(items)))
-
-
-def parse_library_command_args(
- libname: str, args: Iterable[Argument]
-) -> Namespace:
- """ """
- if not libname.isidentifier():
- raise ValueError(
- f"Invalid library {libname!r} for command line arguments"
- )
-
- parser = ArgumentParser(
- prog=f"<{libname} program>", add_help=False, allow_abbrev=False
- )
-
- # Some explanation is in order. Argparse treats arguments with a single
- # dash differently, e.g. "-xyz" is interpreted as "-x -y -z". This can
- # cause confusion and clashes when there are multiple single-dash args
- # with identical prefixes. TLDR; we want "-legate:foo" to behave just
- # as if it was "--legate:foo". In order to do this, we configure a parser
- # for "long argumens" and then munge the values in sys.argv to update
- # any "short prefix" arguments to be "long prefix" arguments first, before
- # parsing. We also take care to update any output. The alternative here
- # would be to abandon argparse entirely, and parse sys.argv manually.
- #
- # ref: https://github.com/nv-legate/legate.core/issues/415
-
- short_prefix = f"-{libname}:"
- long_prefix = f"-{short_prefix}"
-
- argnames = [arg.name for arg in args]
-
- for arg in args:
- argname = f"{long_prefix}{arg.name}"
- parser.add_argument(argname, **arg.kwargs)
-
- has_custom_help = "help" in argnames
-
- if f"{short_prefix}help" in sys.argv and not has_custom_help:
- help_string = parser.format_help()
-
- # this is a little sloppy but should suffice in practice
- print(help_string.replace(long_prefix, short_prefix))
-
- sys.exit()
-
- # convert any short-prefix args to be long-prefix
- sys.argv = [re.sub(f"^{short_prefix}", long_prefix, x) for x in sys.argv]
-
- args, extra = parser.parse_known_args()
-
- # put any unconsumed args back they way they were
- extra = [re.sub(f"^{long_prefix}", short_prefix, x) for x in extra]
-
- for item in extra:
- if item.startswith(short_prefix):
- warnings.warn(
- f"Unrecognized argument {item!r} for {libname} (passed on as-is)" # noqa: E501
- )
- break
-
- sys.argv = sys.argv[:1] + extra
-
- return args
diff --git a/legate/util/colors.py b/legate/util/colors.py
index 6c417c221..547f0e015 100644
--- a/legate/util/colors.py
+++ b/legate/util/colors.py
@@ -94,7 +94,6 @@ def yellow(text: str) -> str:
colorama.init()
except ImportError:
-
bright = dim = white = cyan = red = magenta = green = yellow = _text
# ref: https://stackoverflow.com/a/14693789
diff --git a/legate/util/fs.py b/legate/util/fs.py
index 4b7465799..ffc5f5ab8 100644
--- a/legate/util/fs.py
+++ b/legate/util/fs.py
@@ -114,7 +114,6 @@ def get_legate_build_dir(legate_dir: Path) -> Path | None:
return None
for f in skbuild_dir.iterdir():
-
# If using a local scikit-build dir at _skbuild//cmake-build,
# read Legion_BINARY_DIR and Legion_SOURCE_DIR from CMakeCache.txt
diff --git a/legate/util/settings.py b/legate/util/settings.py
new file mode 100644
index 000000000..c4ef60899
--- /dev/null
+++ b/legate/util/settings.py
@@ -0,0 +1,310 @@
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+""" Control global configuration options with environment variables.
+
+Precedence
+~~~~~~~~~~
+
+Setting values are always looked up in the following prescribed order:
+
+immediately supplied values
+ These are values that are passed to the setting:
+
+ .. code-block:: python
+
+ settings.consensus(value)
+
+ If ``value`` is not None, then it will be returned, as-is. Otherwise, if
+ None is passed, then the setting will continue to look down the search
+ order for a value. This is useful for passing optional function paramters
+ that are None by default. If the parameter is passed to the function, then
+ it will take precedence.
+
+previously user-set values
+ If the value is set explicity in code:
+
+ .. code-block:: python
+
+ settings.minified = False
+
+ Then this value will take precedence over other sources. Applications
+ may use this ability to set values supplied on the command line, so that
+ they take precedence over environment variables.
+
+environment variable
+ Values found in the associated environment variables:
+
+ .. code-block:: sh
+
+ LEGATE_CONSENSUS=yes legate script.py
+
+local defaults
+ These are default values defined when accessing the setting:
+
+ .. code-block:: python
+
+ settings.concensus(default=True)
+
+ Local defaults have lower precendence than every other setting mechanism
+ except global defaults.
+
+global defaults
+ These are default values defined by the setting declarations. They have
+ lower precedence than every other setting mechanism.
+
+If no value is obtained after searching all of these locations, then a
+RuntimeError will be raised.
+
+"""
+from __future__ import annotations
+
+import os
+from typing import Any, Generic, Type, TypeVar, Union
+
+from typing_extensions import TypeAlias
+
+__all__ = (
+ "convert_str",
+ "convert_bool",
+ "convert_str_seq",
+ "PrioritizedSetting",
+ "Settings",
+)
+
+
+class _Unset:
+ pass
+
+
+T = TypeVar("T")
+
+
+Unset: TypeAlias = Union[T, Type[_Unset]]
+
+
+def convert_str(value: str) -> str:
+ """Return a string as-is."""
+ return value
+
+
+def convert_bool(value: bool | str) -> bool:
+ """Convert a string to True or False.
+
+ If a boolean is passed in, it is returned as-is. Otherwise the function
+ maps the following strings, ignoring case:
+
+ * "yes", "1", "on", "true" -> True
+ * "no", "0", "off", "false" -> False
+
+ Args:
+ value (str):
+ A string value to convert to bool
+
+ Returns:
+ bool
+
+ Raises:
+ ValueError
+
+ """
+ if isinstance(value, bool):
+ return value
+
+ val = value.lower()
+ if val in ("yes", "1", "on", "true"):
+ return True
+ if val in ("no", "0", "off", "false"):
+ return False
+
+ raise ValueError(f"Cannot convert {value} to boolean value")
+
+
+def convert_str_seq(
+ value: list[str] | tuple[str, ...] | str
+) -> tuple[str, ...]:
+ """Convert a string to a list of strings.
+
+ If a list or tuple is passed in, it is returned as-is.
+
+ Args:
+ value (seq[str] or str) :
+ A string to convert to a list of strings
+
+ Returns
+ list[str]
+
+ Raises:
+ ValueError
+
+ """
+ if isinstance(value, (list, tuple)):
+ return tuple(value)
+
+ try:
+ return tuple(value.split(","))
+ except Exception:
+ raise ValueError(f"Cannot convert {value} to list value")
+
+
+class PrioritizedSetting(Generic[T]):
+ """Return a value for a global setting according to configuration
+ precedence.
+
+ The following methods are searched in order for the setting:
+
+ 4. immediately supplied values
+ 3. previously user-set values (e.g. set from command line)
+ 2. environment variable
+ 1. local defaults
+ 0. global defaults
+
+ If a value cannot be determined, a RuntimeError is raised.
+
+ The ``env_var`` argument specifies the name of an environment to check for
+ setting values, e.g. ``"LEGATE_CHECK_CYCLE"``.
+
+ The optional ``default`` argument specified an implicit default value for
+ the setting that is returned if no other methods provide a value.
+
+ A ``convert`` agument may be provided to convert values before they are
+ returned.
+ """
+
+ _parent: Settings | None
+ _user_value: Unset[str | T]
+
+ def __init__(
+ self,
+ name: str,
+ env_var: str | None = None,
+ default: Unset[T] = _Unset,
+ convert: Any | None = None,
+ help: str = "",
+ ) -> None:
+ self._convert = convert if convert else convert_str
+ self._default = default
+ self._env_var = env_var
+ self._help = help
+ self._name = name
+ self._parent = None
+ self._user_value = _Unset
+
+ def __call__(
+ self, value: T | str | None = None, default: Unset[T] = _Unset
+ ) -> T:
+ """Return the setting value according to the standard precedence.
+
+ Args:
+ value (any, optional):
+ An optional immediate value. If not None, the value will
+ be converted, then returned.
+
+ default (any, optional):
+ An optional default value that only takes precendence over
+ implicit default values specified on the property itself.
+
+ Returns:
+ str or int or float
+
+ Raises:
+ RuntimeError
+ """
+
+ # 4. immediate values
+ if value is not None:
+ return self._convert(value)
+
+ # 3. previously user-set value
+ if self._user_value is not _Unset:
+ return self._convert(self._user_value)
+
+ # 2. environment variable
+ if self._env_var and self._env_var in os.environ:
+ return self._convert(os.environ[self._env_var])
+
+ # 1. local defaults
+ if default is not _Unset:
+ return self._convert(default)
+
+ # 0. global defaults
+ if self._default is not _Unset:
+ return self._convert(self._default)
+
+ raise RuntimeError(
+ f"No configured value found for setting {self._name!r}"
+ )
+
+ def __get__(
+ self, instance: Any, owner: type[Any]
+ ) -> PrioritizedSetting[T]:
+ return self
+
+ def __set__(self, instance: Any, value: str | T) -> None:
+ self.set_value(value)
+
+ def set_value(self, value: str | T) -> None:
+ """Specify a value for this setting programmatically.
+
+ A value set this way takes precedence over all other methods except
+ immediate values.
+
+ Args:
+ value (str or int or float):
+ A user-set value for this setting
+
+ Returns:
+ None
+ """
+ # It is usually not advised to store any data directly on descriptors,
+ # since they are shared by all instances. But in our case we only ever
+ # have a single instance of a given settings object.
+ self._user_value = value
+
+ def unset_value(self) -> None:
+ """Unset the previous user value such that the priority is reset."""
+ self._user_value = _Unset
+
+ @property
+ def env_var(self) -> str | None:
+ return self._env_var
+
+ @property
+ def default(self) -> Unset[T]:
+ return self._default
+
+ @property
+ def name(self) -> str:
+ return self._name
+
+ @property
+ def help(self) -> str:
+ return self._help
+
+ @property
+ def convert_type(self) -> str:
+ if self._convert is convert_str:
+ return "str"
+ if self._convert is convert_bool:
+ return "bool"
+ if self._convert is convert_str_seq:
+ return "tuple[str, ...]"
+ raise RuntimeError("unreachable")
+
+
+class Settings:
+ def __init__(self) -> None:
+ for x in self.__class__.__dict__.values():
+ if isinstance(x, PrioritizedSetting):
+ x._parent = self
diff --git a/legate/util/shared_args.py b/legate/util/shared_args.py
index 688c0bfa3..ac3a561f6 100644
--- a/legate/util/shared_args.py
+++ b/legate/util/shared_args.py
@@ -83,7 +83,8 @@
choices=LAUNCHERS,
default="none",
help='launcher program to use (set to "none" for local runs, or if '
- "the launch has already happened by the time legate is invoked)",
+ "the launch has already happened by the time legate is invoked), "
+ "[legate-only, not supported with standard Python invocation]",
),
)
@@ -97,7 +98,8 @@
required=False,
help="additional argument to pass to the launcher (can appear more "
"than once). Multiple arguments may be provided together in a quoted "
- "string (arguments with spaces inside must be additionally quoted)",
+ "string (arguments with spaces inside must be additionally quoted), "
+ "[legate-only, not supported with standard Python invocation]",
),
)
diff --git a/legate_core_cpp.cmake b/legate_core_cpp.cmake
index ef714345f..499db234f 100644
--- a/legate_core_cpp.cmake
+++ b/legate_core_cpp.cmake
@@ -205,8 +205,10 @@ list(APPEND legate_core_SOURCES
src/core/runtime/projection.cc
src/core/runtime/runtime.cc
src/core/runtime/shard.cc
+ src/core/task/registrar.cc
src/core/task/return.cc
src/core/task/task.cc
+ src/core/task/variant.cc
src/core/utilities/debug.cc
src/core/utilities/deserializer.cc
src/core/utilities/machine.cc
@@ -231,11 +233,17 @@ endif()
add_library(legate_core ${legate_core_SOURCES})
add_library(legate::core ALIAS legate_core)
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ set(platform_rpath_origin "\$ORIGIN")
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+ set(platform_rpath_origin "@loader_path")
+endif ()
+
set_target_properties(legate_core
PROPERTIES EXPORT_NAME core
LIBRARY_OUTPUT_NAME lgcore
- BUILD_RPATH "\$ORIGIN"
- INSTALL_RPATH "\$ORIGIN"
+ BUILD_RPATH "${platform_rpath_origin}"
+ INSTALL_RPATH "${platform_rpath_origin}"
CXX_STANDARD 17
CXX_STANDARD_REQUIRED ON
CUDA_STANDARD 17
@@ -297,6 +305,59 @@ SECTIONS
target_link_options(legate_core PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
endif()
+##############################################################################
+# - Doxygen target------------------------------------------------------------
+
+if (legate_core_BUILD_DOCS)
+ find_package(Doxygen)
+ if(Doxygen_FOUND)
+ set(legate_core_DOC_SOURCES "")
+ list(APPEND legate_core_DOC_SOURCES
+ # task
+ src/core/task/task.h
+ src/core/task/registrar.h
+ src/core/task/variant.h
+ src/core/task/exception.h
+ src/core/cuda/stream_pool.h
+ # data
+ src/core/data/store.h
+ src/core/data/scalar.h
+ src/core/data/buffer.h
+ src/core/utilities/span.h
+ src/core/data/allocator.h
+ # runtime
+ src/core/runtime/runtime.h
+ src/core/runtime/runtime.inl
+ src/core/runtime/context.h
+ # mapping
+ src/core/mapping/mapping.h
+ src/core/mapping/operation.h
+ # aliases
+ src/core/utilities/typedefs.h
+ # utilities
+ src/core/utilities/debug.h
+ src/core/utilities/dispatch.h
+ src/core/utilities/type_traits.h
+ # main page
+ src/legate.h
+ )
+ set(DOXYGEN_PROJECT_NAME "Legate")
+ set(DOXYGEN_FULL_PATH_NAMES NO)
+ set(DOXYGEN_GENERATE_HTML YES)
+ set(DOXYGEN_GENERATE_LATEX NO)
+ set(DOXYGEN_EXTENSION_MAPPING cu=C++ cuh=C++)
+ set(DOXYGEN_HIDE_UNDOC_MEMBERS YES)
+ set(DOXYGEN_HIDE_UNDOC_CLASSES YES)
+ set(DOXYGEN_STRIP_FROM_INC_PATH ${CMAKE_SOURCE_DIR}/src)
+ doxygen_add_docs("doxygen_legate" ALL
+ ${legate_core_DOC_SOURCES}
+ COMMENT "Custom command for building Doxygen docs."
+ )
+ else()
+ message(STATUS "cannot find Doxygen. not generating docs.")
+ endif()
+endif()
+
##############################################################################
# - install targets-----------------------------------------------------------
@@ -349,13 +410,18 @@ install(
install(
FILES src/core/runtime/context.h
+ src/core/runtime/context.inl
src/core/runtime/runtime.h
+ src/core/runtime/runtime.inl
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/legate/core/runtime)
install(
FILES src/core/task/exception.h
+ src/core/task/registrar.h
src/core/task/return.h
src/core/task/task.h
+ src/core/task/task.inl
+ src/core/task/variant.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/legate/core/task)
install(
@@ -382,6 +448,8 @@ Imported Targets:
]=])
+file(READ ${CMAKE_SOURCE_DIR}/cmake/legate_helper_functions.cmake helper_functions)
+
string(JOIN "\n" code_string
[=[
if(NOT TARGET legate::Thrust)
@@ -398,15 +466,25 @@ if(Legion_NETWORKS)
find_package(MPI REQUIRED COMPONENTS CXX)
endif()
]=]
+"${helper_functions}"
)
+if(DEFINED legate_core_cuda_stubs_path)
+ string(JOIN "\n" code_string "${code_string}"
+ "list(APPEND CMAKE_C_IMPLICIT_LINK_DIRECTORIES ${legate_core_cuda_stubs_path})"
+ "list(APPEND CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES ${legate_core_cuda_stubs_path})"
+ "list(APPEND CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES ${legate_core_cuda_stubs_path})")
+endif()
+
rapids_export(
INSTALL legate_core
EXPORT_SET legate-core-exports
GLOBAL_TARGETS core
NAMESPACE legate::
DOCUMENTATION doc_string
- FINAL_CODE_BLOCK code_string)
+ FINAL_CODE_BLOCK code_string
+ LANGUAGES ${ENABLED_LANGUAGES}
+)
# build export targets
rapids_export(
@@ -415,4 +493,6 @@ rapids_export(
GLOBAL_TARGETS core
NAMESPACE legate::
DOCUMENTATION doc_string
- FINAL_CODE_BLOCK code_string)
+ FINAL_CODE_BLOCK code_string
+ LANGUAGES ${ENABLED_LANGUAES}
+)
diff --git a/legate_core_python.cmake b/legate_core_python.cmake
index 05d92853e..c3db1b60e 100644
--- a/legate_core_python.cmake
+++ b/legate_core_python.cmake
@@ -45,10 +45,10 @@ endif()
add_custom_target("generate_install_info_py" ALL
COMMAND ${CMAKE_COMMAND}
+ -DLegion_NETWORKS="${Legion_NETWORKS}"
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/generate_install_info_py.cmake"
COMMENT "Generate install_info.py"
- VERBATIM
)
add_library(legate_core_python INTERFACE)
diff --git a/pyproject.toml b/pyproject.toml
index 8f82a0d13..c186fc785 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -88,11 +88,14 @@ strict_equality = true
warn_unused_configs = true
+exclude = ['tests/examples']
+
[[tool.mypy.overrides]]
# ignore certain auto-generated and utility files
module = [
"legate._version",
"legate.__main__",
"legate.install_info",
+ "legate._sphinxext.*",
]
ignore_errors = true
diff --git a/scripts/generate-conda-envs.py b/scripts/generate-conda-envs.py
index 361f35149..18e35d2b3 100755
--- a/scripts/generate-conda-envs.py
+++ b/scripts/generate-conda-envs.py
@@ -155,13 +155,19 @@ def pip(self) -> Reqs:
class DocsConfig(SectionConfig):
header = "docs"
+ @property
+ def conda(self) -> Reqs:
+ return ("pandoc", "doxygen")
+
@property
def pip(self) -> Reqs:
return (
+ "ipython",
"jinja2",
"markdown<3.4.0",
- "pydata-sphinx-theme",
+ "pydata-sphinx-theme>=0.13",
"myst-parser",
+ "nbsphinx",
"sphinx-copybutton",
"sphinx>=4.4.0",
)
@@ -214,7 +220,7 @@ def filename(self) -> str:
# --- Setup -------------------------------------------------------------------
-PYTHON_VERSIONS = ("3.8", "3.9", "3.10")
+PYTHON_VERSIONS = ("3.9", "3.10", "3.11")
CTK_VERSIONS = (
"none",
@@ -239,7 +245,7 @@ def filename(self) -> str:
- conda-forge
dependencies:
- - python={python}
+ - python={python},!=3.9.7 # avoid https://bugs.python.org/issue45121
{conda_sections}{pip}
"""
@@ -314,7 +320,6 @@ def __call__(self, parser, namespace, values, option_string):
if __name__ == "__main__":
-
import sys
parser = ArgumentParser()
diff --git a/setup.cfg b/setup.cfg
index ce00187b8..65a47048c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -61,4 +61,4 @@ packages = find:
install_requires =
numpy>=1.22
# TODO: Add rest of install dependencies
-python_requires = >=3.8
+python_requires = >=3.9,!=3.9.7
diff --git a/setup.py b/setup.py
index 89583411a..83912f31f 100755
--- a/setup.py
+++ b/setup.py
@@ -33,9 +33,9 @@
"Topic :: Scientific/Engineering",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python",
- "Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
],
extras_require={
"test": [
diff --git a/src/core/comm/coll.cc b/src/core/comm/coll.cc
index 5f1a1f4e9..4bd76a758 100644
--- a/src/core/comm/coll.cc
+++ b/src/core/comm/coll.cc
@@ -35,7 +35,6 @@ namespace legate {
namespace comm {
namespace coll {
-using namespace Legion;
Logger log_coll("coll");
BackendNetwork* backend_network = nullptr;
diff --git a/src/core/comm/comm_cpu.cc b/src/core/comm/comm_cpu.cc
index 05c2f6283..7b0393f91 100644
--- a/src/core/comm/comm_cpu.cc
+++ b/src/core/comm/comm_cpu.cc
@@ -19,8 +19,6 @@
#include "core/comm/coll.h"
-using namespace Legion;
-
namespace legate {
namespace comm {
namespace cpu {
@@ -30,7 +28,7 @@ static int init_cpucoll_mapping(const Legion::Task* task,
Legion::Context context,
Legion::Runtime* runtime)
{
- Core::show_progress(task, context, runtime, task->get_task_name());
+ Core::show_progress(task, context, runtime);
int mpi_rank = 0;
#if defined(LEGATE_USE_NETWORK)
if (coll::backend_network->comm_type == coll::CollCommType::CollMPI) {
@@ -46,7 +44,7 @@ static coll::CollComm init_cpucoll(const Legion::Task* task,
Legion::Context context,
Legion::Runtime* runtime)
{
- Core::show_progress(task, context, runtime, task->get_task_name());
+ Core::show_progress(task, context, runtime);
const int point = task->index_point[0];
int num_ranks = task->index_domain.get_volume();
@@ -80,7 +78,7 @@ static void finalize_cpucoll(const Legion::Task* task,
Legion::Context context,
Legion::Runtime* runtime)
{
- Core::show_progress(task, context, runtime, task->get_task_name());
+ Core::show_progress(task, context, runtime);
assert(task->futures.size() == 1);
coll::CollComm comm = task->futures[0].get_result();
@@ -95,32 +93,29 @@ void register_tasks(Legion::Machine machine,
Legion::Runtime* runtime,
const LibraryContext& context)
{
- const InputArgs& command_args = Legion::Runtime::get_input_args();
- int argc = command_args.argc;
- char** argv = command_args.argv;
- coll::collInit(argc, argv);
+ const auto& command_args = Legion::Runtime::get_input_args();
+ coll::collInit(command_args.argc, command_args.argv);
- const TaskID init_cpucoll_mapping_task_id =
- context.get_task_id(LEGATE_CORE_INIT_CPUCOLL_MAPPING_TASK_ID);
+ auto init_cpucoll_mapping_task_id = context.get_task_id(LEGATE_CORE_INIT_CPUCOLL_MAPPING_TASK_ID);
const char* init_cpucoll_mapping_task_name = "core::comm::cpu::init_mapping";
runtime->attach_name(init_cpucoll_mapping_task_id,
init_cpucoll_mapping_task_name,
false /*mutable*/,
true /*local only*/);
- const TaskID init_cpucoll_task_id = context.get_task_id(LEGATE_CORE_INIT_CPUCOLL_TASK_ID);
+ auto init_cpucoll_task_id = context.get_task_id(LEGATE_CORE_INIT_CPUCOLL_TASK_ID);
const char* init_cpucoll_task_name = "core::comm::cpu::init";
runtime->attach_name(
init_cpucoll_task_id, init_cpucoll_task_name, false /*mutable*/, true /*local only*/);
- const TaskID finalize_cpucoll_task_id = context.get_task_id(LEGATE_CORE_FINALIZE_CPUCOLL_TASK_ID);
+ auto finalize_cpucoll_task_id = context.get_task_id(LEGATE_CORE_FINALIZE_CPUCOLL_TASK_ID);
const char* finalize_cpucoll_task_name = "core::comm::cpu::finalize";
runtime->attach_name(
finalize_cpucoll_task_id, finalize_cpucoll_task_name, false /*mutable*/, true /*local only*/);
auto make_registrar = [&](auto task_id, auto* task_name, auto proc_kind) {
- TaskVariantRegistrar registrar(task_id, task_name);
- registrar.add_constraint(ProcessorConstraint(proc_kind));
+ Legion::TaskVariantRegistrar registrar(task_id, task_name);
+ registrar.add_constraint(Legion::ProcessorConstraint(proc_kind));
registrar.set_leaf(true);
registrar.global_registration = false;
return registrar;
diff --git a/src/core/comm/comm_nccl.cu b/src/core/comm/comm_nccl.cu
index 2e22b6e92..0f95fe74e 100644
--- a/src/core/comm/comm_nccl.cu
+++ b/src/core/comm/comm_nccl.cu
@@ -17,14 +17,14 @@
#include "core/comm/comm_nccl.h"
#include "core/cuda/cuda_help.h"
#include "core/cuda/stream_pool.h"
+#include "core/data/buffer.h"
#include "core/utilities/nvtx_help.h"
+#include "core/utilities/typedefs.h"
#include "legate.h"
#include
#include
-using namespace Legion;
-
namespace legate {
namespace comm {
namespace nccl {
@@ -59,7 +59,7 @@ static ncclUniqueId init_nccl_id(const Legion::Task* task,
{
legate::nvtx::Range auto_range("core::comm::nccl::init_id");
- Core::show_progress(task, context, runtime, task->get_task_name());
+ Core::show_progress(task, context, runtime);
ncclUniqueId id;
CHECK_NCCL(ncclGetUniqueId(&id));
@@ -74,7 +74,7 @@ static ncclComm_t* init_nccl(const Legion::Task* task,
{
legate::nvtx::Range auto_range("core::comm::nccl::init");
- Core::show_progress(task, context, runtime, task->get_task_name());
+ Core::show_progress(task, context, runtime);
assert(task->futures.size() == 1);
@@ -92,13 +92,8 @@ static ncclComm_t* init_nccl(const Legion::Task* task,
// Perform a warm-up all-to-all
- using namespace Legion;
-
- DeferredBuffer<_Payload, 1> src_buffer(Memory::GPU_FB_MEM,
- Domain(Rect<1>{Point<1>{0}, Point<1>{num_ranks - 1}}));
-
- DeferredBuffer<_Payload, 1> tgt_buffer(Memory::GPU_FB_MEM,
- Domain(Rect<1>{Point<1>{0}, Point<1>{num_ranks - 1}}));
+ auto src_buffer = create_buffer<_Payload>(num_ranks, Memory::Kind::GPU_FB_MEM);
+ auto tgt_buffer = create_buffer<_Payload>(num_ranks, Memory::Kind::GPU_FB_MEM);
CHECK_NCCL(ncclGroupStart());
for (auto idx = 0; idx < num_ranks; ++idx) {
@@ -119,7 +114,7 @@ static void finalize_nccl(const Legion::Task* task,
{
legate::nvtx::Range auto_range("core::comm::nccl::finalize");
- Core::show_progress(task, context, runtime, task->get_task_name());
+ Core::show_progress(task, context, runtime);
assert(task->futures.size() == 1);
auto comm = task->futures[0].get_result();
@@ -131,24 +126,24 @@ void register_tasks(Legion::Machine machine,
Legion::Runtime* runtime,
const LibraryContext& context)
{
- const TaskID init_nccl_id_task_id = context.get_task_id(LEGATE_CORE_INIT_NCCL_ID_TASK_ID);
+ auto init_nccl_id_task_id = context.get_task_id(LEGATE_CORE_INIT_NCCL_ID_TASK_ID);
const char* init_nccl_id_task_name = "core::comm::nccl::init_id";
runtime->attach_name(
init_nccl_id_task_id, init_nccl_id_task_name, false /*mutable*/, true /*local only*/);
- const TaskID init_nccl_task_id = context.get_task_id(LEGATE_CORE_INIT_NCCL_TASK_ID);
+ auto init_nccl_task_id = context.get_task_id(LEGATE_CORE_INIT_NCCL_TASK_ID);
const char* init_nccl_task_name = "core::comm::nccl::init";
runtime->attach_name(
init_nccl_task_id, init_nccl_task_name, false /*mutable*/, true /*local only*/);
- const TaskID finalize_nccl_task_id = context.get_task_id(LEGATE_CORE_FINALIZE_NCCL_TASK_ID);
+ auto finalize_nccl_task_id = context.get_task_id(LEGATE_CORE_FINALIZE_NCCL_TASK_ID);
const char* finalize_nccl_task_name = "core::comm::nccl::finalize";
runtime->attach_name(
finalize_nccl_task_id, finalize_nccl_task_name, false /*mutable*/, true /*local only*/);
auto make_registrar = [&](auto task_id, auto* task_name, auto proc_kind) {
- TaskVariantRegistrar registrar(task_id, task_name);
- registrar.add_constraint(ProcessorConstraint(proc_kind));
+ Legion::TaskVariantRegistrar registrar(task_id, task_name);
+ registrar.add_constraint(Legion::ProcessorConstraint(proc_kind));
registrar.set_leaf(true);
registrar.global_registration = false;
return registrar;
diff --git a/src/core/comm/communicator.h b/src/core/comm/communicator.h
index ed75aeb66..4da3cf7b4 100644
--- a/src/core/comm/communicator.h
+++ b/src/core/comm/communicator.h
@@ -18,17 +18,25 @@
#include "legion.h"
+/**
+ * @file
+ * @brief Class definition for legate::comm::Communicator
+ */
+
namespace legate {
namespace comm {
-// This is a thin class wrapping a future that contains a communicator.
-// This class only provides a template member function for retrieving the handle
-// and the client is expected to use a correct type for the communicators that it uses.
-//
-// The following is the list of handle types for supported communicators:
-//
-// - NCCL: ncclComm_t*
-//
+/**
+ * @ingroup task
+ * @brief A thin wrapper class for communicators stored in futures. This class only provides
+ * a tempalte method to retrieve the communicator handle and the client is expected to pass
+ * the right handle type.
+ *
+ * The following is the list of handle types for communicators supported in Legate:
+ *
+ * - NCCL: ncclComm_t*
+ * - CPU communicator in Legate: legate::comm::coll::CollComm*
+ */
class Communicator {
public:
Communicator() {}
@@ -39,6 +47,13 @@ class Communicator {
Communicator& operator=(const Communicator&) = default;
public:
+ /**
+ * @brief Returns the communicator stored in the wrapper
+ *
+ * @tparam T The type of communicator handle to get (see valid types above)
+ *
+ * @return A communicator
+ */
template
T get() const
{
diff --git a/src/core/comm/local_comm.cc b/src/core/comm/local_comm.cc
index 8adc4a2f3..29d317c38 100644
--- a/src/core/comm/local_comm.cc
+++ b/src/core/comm/local_comm.cc
@@ -27,7 +27,6 @@ namespace legate {
namespace comm {
namespace coll {
-using namespace Legion;
extern Logger log_coll;
// public functions start from here
@@ -348,4 +347,4 @@ void LocalNetwork::barrierLocal(CollComm global_comm)
} // namespace coll
} // namespace comm
-} // namespace legate
\ No newline at end of file
+} // namespace legate
diff --git a/src/core/comm/mpi_comm.cc b/src/core/comm/mpi_comm.cc
index 1761701ff..114c82171 100644
--- a/src/core/comm/mpi_comm.cc
+++ b/src/core/comm/mpi_comm.cc
@@ -27,7 +27,6 @@ namespace legate {
namespace comm {
namespace coll {
-using namespace Legion;
extern Logger log_coll;
enum CollTag : int {
@@ -572,4 +571,4 @@ int MPINetwork::generateGatherTag(int rank, CollComm global_comm)
} // namespace coll
} // namespace comm
-} // namespace legate
\ No newline at end of file
+} // namespace legate
diff --git a/src/core/cuda/stream_pool.h b/src/core/cuda/stream_pool.h
index e2eeb86ee..27c55fa90 100644
--- a/src/core/cuda/stream_pool.h
+++ b/src/core/cuda/stream_pool.h
@@ -21,11 +21,28 @@
#include
#include "legion.h"
+/**
+ * @file
+ * @brief Class definition for legate::cuda::StreamPool
+ */
+
namespace legate {
namespace cuda {
+/**
+ * @ingroup task
+ * @brief A simple wrapper around CUDA streams to inject auxiliary features
+ *
+ * When `LEGATE_SYNC_STREAM_VIEW` is set to 1, every `StreamView` synchronizes the CUDA stream
+ * that it wraps when it is destroyed.
+ */
struct StreamView {
public:
+ /**
+ * @brief Creates a `StreamView` with a raw CUDA stream
+ *
+ * @param stream Raw CUDA stream to wrap
+ */
StreamView(cudaStream_t stream) : valid_(true), stream_(stream) {}
~StreamView();
@@ -38,6 +55,11 @@ struct StreamView {
StreamView& operator=(StreamView&&);
public:
+ /**
+ * @brief Unwraps the raw CUDA stream
+ *
+ * @return Raw CUDA stream wrapped by the `StreamView`
+ */
operator cudaStream_t() const { return stream_; }
private:
@@ -45,19 +67,37 @@ struct StreamView {
cudaStream_t stream_;
};
+/**
+ * @brief A stream pool
+ */
struct StreamPool {
public:
StreamPool() {}
~StreamPool();
public:
+ /**
+ * @brief Returns a `StreamView` in the pool
+ *
+ * @return A `StreamView` object. Currently, all stream views returned from this pool are backed
+ * by the same CUDA stream.
+ */
StreamView get_stream();
public:
+ /**
+ * @brief Returns a singleton stream pool
+ *
+ * The stream pool is alive throughout the program execution.
+ *
+ * @return A `StreamPool` object
+ */
static StreamPool& get_stream_pool();
private:
// For now we keep only one stream in the pool
+ // TODO: If this ever changes, the use of non-stream-ordered `DeferredBuffer`s
+ // in `core/data/buffer.h` will no longer be safe.
std::unique_ptr cached_stream_{nullptr};
};
diff --git a/src/core/data/allocator.cc b/src/core/data/allocator.cc
index 62051d05e..7f4512064 100644
--- a/src/core/data/allocator.cc
+++ b/src/core/data/allocator.cc
@@ -19,7 +19,7 @@
namespace legate {
-ScopedAllocator::ScopedAllocator(Legion::Memory::Kind kind, bool scoped, size_t alignment)
+ScopedAllocator::ScopedAllocator(Memory::Kind kind, bool scoped, size_t alignment)
: target_kind_(kind), scoped_(scoped), alignment_(alignment)
{
}
@@ -59,4 +59,4 @@ void ScopedAllocator::deallocate(void* ptr)
buffer.destroy();
}
-} // namespace legate
\ No newline at end of file
+} // namespace legate
diff --git a/src/core/data/allocator.h b/src/core/data/allocator.h
index 47d3c1a32..f9c80a64f 100644
--- a/src/core/data/allocator.h
+++ b/src/core/data/allocator.h
@@ -20,8 +20,22 @@
#include
+/**
+ * @file
+ * @brief Class definition for legate::ScopedAllocator
+ */
+
namespace legate {
+/**
+ * @ingroup data
+ * @brief A simple allocator backed by `Buffer` objects
+ *
+ * For each allocation request, this allocator creates a 1D `Buffer` of `int8_t` and returns
+ * the raw pointer to it. By default, all allocations are deallocated when the allocator is
+ * destroyed, and can optionally be made alive until the task finishes by making the allocator
+ * unscoped.
+ */
class ScopedAllocator {
public:
using ByteBuffer = Buffer;
@@ -31,18 +45,43 @@ class ScopedAllocator {
// Iff 'scoped', all allocations will be released upon destruction.
// Otherwise this is up to the runtime after the task has finished.
- ScopedAllocator(Legion::Memory::Kind kind, bool scoped = true, size_t alignment = 16);
+ /**
+ * @brief Create a `ScopedAllocator` for a specific memory kind
+ *
+ * @param kind Kind of the memory on which the `Buffer`s should be created
+ * @param scoped If true, the allocator is scoped; i.e., lifetimes of allocations are tied to
+ * the allocator's lifetime. Otherwise, the allocations are alive until the task finishes
+ * (and unless explicitly deallocated).
+ * @param alignment Alignment for the allocations
+ */
+ ScopedAllocator(Memory::Kind kind, bool scoped = true, size_t alignment = 16);
~ScopedAllocator();
public:
+ /**
+ * @brief Allocates a contiguous buffer of the given Memory::Kind
+ *
+ * When the allocator runs out of memory, the runtime will fail with an error message.
+ * Otherwise, the function returns a valid pointer.
+ *
+ * @param bytes Size of the allocation in bytes
+ *
+ * @return A raw pointer to the allocation
+ */
void* allocate(size_t bytes);
+ /**
+ * @brief Deallocates an allocation. The input pointer must be one that was previously
+ * returned by an `allocate` call, otherwise the code will fail with an error message.
+ *
+ * @param ptr Pointer to the allocation to deallocate
+ */
void deallocate(void* ptr);
private:
- Legion::Memory::Kind target_kind_{Legion::Memory::Kind::SYSTEM_MEM};
+ Memory::Kind target_kind_{Memory::Kind::SYSTEM_MEM};
bool scoped_;
size_t alignment_;
std::unordered_map buffers_{};
};
-} // namespace legate
\ No newline at end of file
+} // namespace legate
diff --git a/src/core/data/buffer.h b/src/core/data/buffer.h
index 91550f69d..f706c232c 100644
--- a/src/core/data/buffer.h
+++ b/src/core/data/buffer.h
@@ -19,18 +19,62 @@
#include "legion.h"
#include "core/utilities/machine.h"
+#include "core/utilities/typedefs.h"
+
+/**
+ * @file
+ * @brief Type alias definition for legate::Buffer and utility functions for it
+ */
namespace legate {
+/**
+ * @ingroup data
+ * @brief A typed buffer class for intra-task temporary allocations
+ *
+ * Values in a buffer can be accessed by index expressions with legate::Point objects,
+ * or via a raw pointer to the underlying allocation, which can be queried with the `ptr` method.
+ *
+ * `legate::Buffer` is an alias to
+ * [`Legion::DeferredBuffer`](https://github.com/StanfordLegion/legion/blob/9ed6f4d6b579c4f17e0298462e89548a4f0ed6e5/runtime/legion.h#L3509-L3609).
+ *
+ * Note on using temporary buffers in CUDA tasks:
+ *
+ * We use Legion `DeferredBuffer`, whose lifetime is not connected with the CUDA stream(s) used to
+ * launch kernels. The buffer is allocated immediately at the point when `create_buffer` is called,
+ * whereas the kernel that uses it is placed on a stream, and may run at a later point. Normally
+ * a `DeferredBuffer` is deallocated automatically by Legion once all the kernels launched in the
+ * task are complete. However, a `DeferredBuffer` can also be deallocated immediately using
+ * `destroy()`, which is useful for operations that want to deallocate intermediate memory as soon
+ * as possible. This deallocation is not synchronized with the task stream, i.e. it may happen
+ * before a kernel which uses the buffer has actually completed. This is safe as long as we use the
+ * same stream on all GPU tasks running on the same device (which is guaranteed by the current
+ * implementation of `get_cached_stream`), because then all the actual uses of the buffer are done
+ * in order on the one stream. It is important that all library CUDA code uses
+ * `get_cached_stream()`, and all CUDA operations (including library calls) are enqueued on that
+ * stream exclusively. This analysis additionally assumes that no code outside of Legate is
+ * concurrently allocating from the eager pool, and that it's OK for kernels to access a buffer even
+ * after it's technically been deallocated.
+ */
template
using Buffer = Legion::DeferredBuffer;
+/**
+ * @ingroup data
+ * @brief Creates a `Buffer` of specific extents
+ *
+ * @param extents Extents of the buffer
+ * @param kind Kind of the target memory (optional). If not given, the runtime will pick
+ * automatically based on the executing processor
+ * @param alignment Alignment for the memory allocation (optional)
+ *
+ * @return A `Buffer` object
+ */
template
-Buffer create_buffer(const Legion::Point& extents,
- Legion::Memory::Kind kind = Legion::Memory::Kind::NO_MEMKIND,
- size_t alignment = 16)
+Buffer create_buffer(const Point& extents,
+ Memory::Kind kind = Memory::Kind::NO_MEMKIND,
+ size_t alignment = 16)
{
- using namespace Legion;
if (Memory::Kind::NO_MEMKIND == kind) kind = find_memory_kind_for_executing_processor(false);
auto hi = extents - Point::ONES();
// We just avoid creating empty buffers, as they cause all sorts of headaches.
@@ -39,12 +83,23 @@ Buffer create_buffer(const Legion::Point& extents,
return Buffer(bounds, kind, nullptr, alignment);
}
+/**
+ * @ingroup data
+ * @brief Creates a `Buffer` of a specific size. Always returns a 1D buffer.
+ *
+ * @param size Size of the buffdr
+ * @param kind Kind of the target memory (optional). If not given, the runtime will pick
+ * automatically based on the executing processor
+ * @param alignment Alignment for the memory allocation (optional)
+ *
+ * @return A 1D `Buffer` object
+ */
template
Buffer create_buffer(size_t size,
- Legion::Memory::Kind kind = Legion::Memory::Kind::NO_MEMKIND,
- size_t alignment = 16)
+ Memory::Kind kind = Memory::Kind::NO_MEMKIND,
+ size_t alignment = 16)
{
- return create_buffer(Legion::Point<1>(size), kind, alignment);
+ return create_buffer(Point<1>(size), kind, alignment);
}
} // namespace legate
diff --git a/src/core/data/scalar.cc b/src/core/data/scalar.cc
index 2282c2f8a..13300e60e 100644
--- a/src/core/data/scalar.cc
+++ b/src/core/data/scalar.cc
@@ -71,7 +71,7 @@ size_t Scalar::size() const
auto elem_size = type_dispatch(code_, elem_size_fn{});
if (tuple_) {
auto num_elements = *static_cast(data_);
- return sizeof(int32_t) + num_elements * elem_size;
+ return sizeof(uint32_t) + num_elements * elem_size;
} else
return elem_size;
}
diff --git a/src/core/data/scalar.h b/src/core/data/scalar.h
index 5fc782271..63db09fbd 100644
--- a/src/core/data/scalar.h
+++ b/src/core/data/scalar.h
@@ -20,18 +20,58 @@
#include "core/utilities/type_traits.h"
#include "core/utilities/typedefs.h"
+/**
+ * @file
+ * @brief Class definition for legate::Scalar
+ */
+
namespace legate {
+/**
+ * @ingroup data
+ * @brief A type-erased container for scalars and tuples of scalars.
+ *
+ * A Scalar can be owned or shared, depending on whether it owns the backing allocation:
+ * If a `Scalar` is shared, it does not own the allocation and any of its copies are also
+ * shared. If a `Scalar` is owned, it owns the backing allocation and releases it upon
+ * destruction. Any copy of an owned `Scalar` is owned as well.
+ *
+ * A `Scalar` that stores a tuple of scalars has an allocation big enough to contain both
+ * the number of elements and the elements themselves. The number of elements should be
+ * stored in the first four bytes of the allocation.
+ *
+ */
class Scalar {
public:
Scalar() = default;
Scalar(const Scalar& other);
+ /**
+ * @brief Creates a shared `Scalar` with an existing allocation. The caller is responsible
+ * for passing in a sufficiently big allocation.
+ *
+ * @param tuple If true, the allocation contains a tuple of scalars.
+ * @param code Type code of the scalar(s)
+ * @param data Allocation containing the data.
+ */
Scalar(bool tuple, LegateTypeCode code, const void* data);
~Scalar();
public:
+ /**
+ * @brief Creates an owned scalar from a scalar value
+ *
+ * @tparam T The scalar type to wrap
+ *
+ * @param value A scalar value to create a `Scalar` with
+ */
template
Scalar(T value);
+ /**
+ * @brief Creates an owned scalar from a tuple of scalars. The values in the input vector
+ * will be copied.
+ *
+ * @param values A vector that contains elements of a tuple
+ */
template
Scalar(const std::vector& values);
@@ -42,14 +82,44 @@ class Scalar {
void copy(const Scalar& other);
public:
+ /**
+ * @brief Indicates if the `Scalar` object represents a tuple
+ *
+ * @return true The `Scalar` is a tuple
+ * @return false The `Scalar` is a scalar
+ */
bool is_tuple() const { return tuple_; }
+ /**
+ * @brief Returns the size of allocation for the `Scalar`.
+ *
+ * @return The size of allocation
+ */
size_t size() const;
public:
+ /**
+ * @brief Returns the value stored in the `Scalar`. The call does no type checking;
+ * i.e., passing a wrong type parameter will not be caught by the call.
+ *
+ * @tparam VAL Type of the value to unwrap
+ *
+ * @return The value stored in the `Scalar`
+ */
template
VAL value() const;
+ /**
+ * @brief Returns values stored in the `Scalar`. If the `Scalar` contains a scalar,
+ * a unit span will be returned.
+ *
+ * @return Values stored in the `Scalar`
+ */
template
Span values() const;
+ /**
+ * @brief Returns a raw pointer to the backing allocation
+ *
+ * @return A raw pointer to the `Scalar`'s data
+ */
const void* ptr() const { return data_; }
private:
diff --git a/src/core/data/store.cc b/src/core/data/store.cc
index c185ab602..bc7592c33 100644
--- a/src/core/data/store.cc
+++ b/src/core/data/store.cc
@@ -28,9 +28,7 @@
namespace legate {
-using namespace Legion;
-
-RegionField::RegionField(int32_t dim, const PhysicalRegion& pr, FieldID fid)
+RegionField::RegionField(int32_t dim, const Legion::PhysicalRegion& pr, Legion::FieldID fid)
: dim_(dim), pr_(pr), fid_(fid)
{
auto priv = pr.get_privilege();
@@ -60,14 +58,18 @@ RegionField& RegionField::operator=(RegionField&& other) noexcept
return *this;
}
-bool RegionField::valid() const { return pr_.get_logical_region() != LogicalRegion::NO_REGION; }
+bool RegionField::valid() const
+{
+ return pr_.get_logical_region() != Legion::LogicalRegion::NO_REGION;
+}
Domain RegionField::domain() const { return dim_dispatch(dim_, get_domain_fn{}, pr_); }
-OutputRegionField::OutputRegionField(const OutputRegion& out, FieldID fid)
+OutputRegionField::OutputRegionField(const Legion::OutputRegion& out, Legion::FieldID fid)
: out_(out),
fid_(fid),
- num_elements_(UntypedDeferredValue(sizeof(size_t), find_memory_kind_for_executing_processor()))
+ num_elements_(
+ Legion::UntypedDeferredValue(sizeof(size_t), find_memory_kind_for_executing_processor()))
{
}
@@ -75,9 +77,9 @@ OutputRegionField::OutputRegionField(OutputRegionField&& other) noexcept
: bound_(other.bound_), out_(other.out_), fid_(other.fid_), num_elements_(other.num_elements_)
{
other.bound_ = false;
- other.out_ = OutputRegion();
+ other.out_ = Legion::OutputRegion();
other.fid_ = -1;
- other.num_elements_ = UntypedDeferredValue();
+ other.num_elements_ = Legion::UntypedDeferredValue();
}
OutputRegionField& OutputRegionField::operator=(OutputRegionField&& other) noexcept
@@ -88,9 +90,9 @@ OutputRegionField& OutputRegionField::operator=(OutputRegionField&& other) noexc
num_elements_ = other.num_elements_;
other.bound_ = false;
- other.out_ = OutputRegion();
+ other.out_ = Legion::OutputRegion();
other.fid_ = -1;
- other.num_elements_ = UntypedDeferredValue();
+ other.num_elements_ = Legion::UntypedDeferredValue();
return *this;
}
@@ -125,8 +127,11 @@ void OutputRegionField::update_num_elements(size_t num_elements)
acc[0] = num_elements;
}
-FutureWrapper::FutureWrapper(
- bool read_only, int32_t field_size, Domain domain, Future future, bool initialize /*= false*/)
+FutureWrapper::FutureWrapper(bool read_only,
+ int32_t field_size,
+ Domain domain,
+ Legion::Future future,
+ bool initialize /*= false*/)
: read_only_(read_only), field_size_(field_size), domain_(domain), future_(future)
{
#ifdef DEBUG_LEGATE
@@ -148,16 +153,16 @@ FutureWrapper::FutureWrapper(
#ifdef LEGATE_USE_CUDA
if (mem_kind == Memory::Kind::GPU_FB_MEM) {
// TODO: This should be done by Legion
- buffer_ = UntypedDeferredValue(field_size, mem_kind);
+ buffer_ = Legion::UntypedDeferredValue(field_size, mem_kind);
AccessorWO acc(buffer_, field_size, false);
auto stream = cuda::StreamPool::get_stream_pool().get_stream();
CHECK_CUDA(
cudaMemcpyAsync(acc.ptr(0), p_init_value, field_size, cudaMemcpyDeviceToDevice, stream));
} else
#endif
- buffer_ = UntypedDeferredValue(field_size, mem_kind, p_init_value);
+ buffer_ = Legion::UntypedDeferredValue(field_size, mem_kind, p_init_value);
} else
- buffer_ = UntypedDeferredValue(field_size, mem_kind);
+ buffer_ = Legion::UntypedDeferredValue(field_size, mem_kind);
}
}
@@ -187,7 +192,7 @@ void FutureWrapper::initialize_with_identity(int32_t redop_id)
auto untyped_acc = AccessorWO(buffer_, field_size_);
auto ptr = untyped_acc.ptr(0);
- auto redop = Runtime::get_reduction_op(redop_id);
+ auto redop = Legion::Runtime::get_reduction_op(redop_id);
#ifdef DEBUG_LEGATE
assert(redop->sizeof_lhs == field_size_);
#endif
diff --git a/src/core/data/store.h b/src/core/data/store.h
index f21c820fc..a66617b71 100644
--- a/src/core/data/store.h
+++ b/src/core/data/store.h
@@ -24,6 +24,14 @@
#include "legate_defines.h"
#include "legion.h"
+/** @defgroup data Data abstractions and allocators
+ */
+
+/**
+ * @file
+ * @brief Class definition for legate::Store
+ */
+
namespace legate {
class RegionField {
@@ -59,7 +67,7 @@ class RegionField {
ACC operator()(const Legion::PhysicalRegion& pr,
Legion::FieldID fid,
const Legion::AffineTransform& transform,
- const Legion::Rect& bounds)
+ const Rect& bounds)
{
return ACC(pr, fid, transform, bounds);
}
@@ -76,7 +84,7 @@ class RegionField {
Legion::FieldID fid,
int32_t redop_id,
const Legion::AffineTransform& transform,
- const Legion::Rect& bounds)
+ const Rect& bounds)
{
return ACC(pr, fid, redop_id, transform, bounds);
}
@@ -84,9 +92,9 @@ class RegionField {
struct get_domain_fn {
template
- Legion::Domain operator()(const Legion::PhysicalRegion& pr)
+ Domain operator()(const Legion::PhysicalRegion& pr)
{
- return Legion::Domain(pr.get_bounds());
+ return Domain(pr.get_bounds());
}
};
@@ -113,35 +121,34 @@ class RegionField {
public:
template
- AccessorRO read_accessor(const Legion::Rect& bounds) const;
+ AccessorRO read_accessor(const Rect& bounds) const;
template
- AccessorWO write_accessor(const Legion::Rect& bounds) const;
+ AccessorWO write_accessor(const Rect& bounds) const;
template
- AccessorRW read_write_accessor(const Legion::Rect& bounds) const;
+ AccessorRW read_write_accessor(const Rect& bounds) const;
template
- AccessorRD reduce_accessor(int32_t redop_id,
- const Legion::Rect& bounds) const;
+ AccessorRD reduce_accessor(int32_t redop_id, const Rect& bounds) const;
public:
template
- AccessorRO read_accessor(const Legion::Rect& bounds,
+ AccessorRO read_accessor(const Rect& bounds,
const Legion::DomainAffineTransform& transform) const;
template
- AccessorWO write_accessor(const Legion::Rect& bounds,
+ AccessorWO write_accessor(const Rect& bounds,
const Legion::DomainAffineTransform& transform) const;
template
- AccessorRW read_write_accessor(const Legion::Rect& bounds,
+ AccessorRW read_write_accessor(const Rect& bounds,
const Legion::DomainAffineTransform& transform) const;
template
AccessorRD reduce_accessor(
int32_t redop_id,
- const Legion::Rect& bounds,
+ const Rect& bounds,
const Legion::DomainAffineTransform& transform) const;
public:
template
- Legion::Rect shape() const;
- Legion::Domain domain() const;
+ Rect shape() const;
+ Domain domain() const;
public:
bool is_readable() const { return readable_; }
@@ -177,11 +184,11 @@ class OutputRegionField {
public:
template
- Buffer create_output_buffer(const Legion::Point& extents, bool return_buffer);
+ Buffer create_output_buffer(const Point& extents, bool return_buffer);
public:
template
- void return_data(Buffer& buffer, const Legion::Point& extents);
+ void return_data(Buffer& buffer, const Point& extents);
void make_empty(int32_t dim);
public:
@@ -202,7 +209,7 @@ class FutureWrapper {
FutureWrapper() {}
FutureWrapper(bool read_only,
int32_t field_size,
- Legion::Domain domain,
+ Domain domain,
Legion::Future future,
bool initialize = false);
@@ -225,14 +232,13 @@ class FutureWrapper {
public:
template
- AccessorRO read_accessor(const Legion::Rect& bounds) const;
+ AccessorRO read_accessor(const Rect& bounds) const;
template
- AccessorWO write_accessor(const Legion::Rect& bounds) const;
+ AccessorWO write_accessor(const Rect& bounds) const;
template
- AccessorRW read_write_accessor(const Legion::Rect& bounds) const;
+ AccessorRW read_write_accessor(const Rect& bounds) const;
template
- AccessorRD reduce_accessor(int32_t redop_id,
- const Legion::Rect& bounds) const;
+ AccessorRD reduce_accessor(int32_t redop_id, const Rect& bounds) const;
public:
template
@@ -240,8 +246,8 @@ class FutureWrapper {
public:
template
- Legion::Rect shape() const;
- Legion::Domain domain() const;
+ Rect shape() const;
+ Domain domain() const;
public:
void initialize_with_identity(int32_t redop_id);
@@ -252,11 +258,15 @@ class FutureWrapper {
private:
bool read_only_{true};
size_t field_size_{0};
- Legion::Domain domain_{};
+ Domain domain_{};
Legion::Future future_{};
Legion::UntypedDeferredValue buffer_{};
};
+/**
+ * @ingroup data
+ * @brief A multi-dimensional data container storing task data
+ */
class Store {
public:
Store() {}
@@ -284,11 +294,34 @@ class Store {
Store& operator=(const Store& other) = delete;
public:
+ /**
+ * @brief Indicates whether the store is valid. A store passed to a task can be invalid
+ * only for reducer tasks for tree reduction.
+ *
+ * @return true The store is valid
+ * @return false The store is invalid and cannot be used in any data access
+ */
bool valid() const;
+ /**
+ * @brief Indicates whether the store is transformed in any way.
+ *
+ * @return true The store is transformed
+ * @return false The store is not transformed
+ */
bool transformed() const { return !transform_->identity(); }
public:
+ /**
+ * @brief Returns the dimension of the store
+ *
+ * @return The store's dimension
+ */
int32_t dim() const { return dim_; }
+ /**
+ * @brief Returns the type code of the store
+ *
+ * @return The store's type code
+ */
template
TYPE_CODE code() const
{
@@ -296,51 +329,200 @@ class Store {
}
public:
+ /**
+ * @brief Returns a read-only accessor to the store for the entire domain
+ *
+ * @return A read-only accessor to the store
+ */
template
AccessorRO read_accessor() const;
+ /**
+ * @brief Returns a write-only accessor to the store for the entire domain
+ *
+ * @return A write-only accessor to the store
+ */
template
AccessorWO write_accessor() const;
+ /**
+ * @brief Returns a read-write accessor to the store for the entire domain
+ *
+ * @return A read-write accessor to the store
+ */
template
AccessorRW read_write_accessor() const;
+ /**
+ * @brief Returns a reduction accessor to the store for the entire domain
+ *
+ * @tparam OP Reduction operator class. For details about reduction operators, See
+ * LibraryContext::register_reduction_operator.
+ *
+ * @tparam EXCLUSIVE Indicates whether reductions can be performed in exclusive mode. If
+ * `EXCLUSIVE` is `false`, every reduction via the acecssor is performed atomically.
+ *
+ * @return A reduction accessor to the store
+ */
template
AccessorRD reduce_accessor() const;
public:
+ /**
+ * @brief Returns a read-only accessor to the store for specific bounds.
+ *
+ * @param bounds Domain within which accesses should be allowed.
+ * The actual bounds for valid access are determined by an intersection between
+ * the store's domain and the bounds.
+ *
+ * @return A read-only accessor to the store
+ */
template
- AccessorRO read_accessor(const Legion::Rect& bounds) const;
+ AccessorRO read_accessor(const Rect& bounds) const;
+ /**
+ * @brief Returns a write-only accessor to the store for the entire domain
+ *
+ * @param bounds Domain within which accesses should be allowed.
+ * The actual bounds for valid access are determined by an intersection between
+ * the store's domain and the bounds.
+ *
+ * @return A write-only accessor to the store
+ */
template
- AccessorWO write_accessor(const Legion::Rect& bounds) const;
+ AccessorWO write_accessor(const Rect& bounds) const;
+ /**
+ * @brief Returns a read-write accessor to the store for the entire domain
+ *
+ * @param bounds Domain within which accesses should be allowed.
+ * The actual bounds for valid access are determined by an intersection between
+ * the store's domain and the bounds.
+ *
+ * @return A read-write accessor to the store
+ */
template
- AccessorRW read_write_accessor(const Legion::Rect& bounds) const;
+ AccessorRW read_write_accessor(const Rect& bounds) const;
+ /**
+ * @brief Returns a reduction accessor to the store for the entire domain
+ *
+ * @param bounds Domain within which accesses should be allowed.
+ * The actual bounds for valid access are determined by an intersection between
+ * the store's domain and the bounds.
+ *
+ * @tparam OP Reduction operator class. For details about reduction operators, See
+ * LibraryContext::register_reduction_operator.
+ *
+ * @tparam EXCLUSIVE Indicates whether reductions can be performed in exclusive mode. If
+ * `EXCLUSIVE` is `false`, every reduction via the acecssor is performed atomically.
+ *
+ * @return A reduction accessor to the store
+ */
template
- AccessorRD reduce_accessor(const Legion::Rect& bounds) const;
-
- public:
+ AccessorRD reduce_accessor(const Rect& bounds) const;
+
+ public:
+ /**
+ * @brief Creates a buffer of specified extents for the unbound store. The returned
+ * buffer is always consistent with the mapping policy for the store. Can be invoked
+ * multiple times unless `return_buffer` is true.
+ *
+ * @param extents Extents of the buffer
+ *
+ * @param return_buffer If the value is true, the created buffer will be bound
+ * to the store upon return
+ *
+ * @return A reduction accessor to the store
+ */
template
- Buffer create_output_buffer(const Legion::Point& extents,
- bool return_buffer = false);
+ Buffer create_output_buffer(const Point& extents, bool return_buffer = false);
public:
+ /**
+ * @brief Returns the store's domain
+ *
+ * @return Store's domain
+ */
template
- Legion::Rect shape() const;
- Legion::Domain domain() const;
-
- public:
+ Rect shape() const;
+ /**
+ * @brief Returns the store's domain in a dimension-erased domain type
+ *
+ * @return Store's domain in a dimension-erased domain type
+ */
+ Domain domain() const;
+
+ public:
+ /**
+ * @brief Indicates whether the store can have a read accessor
+ *
+ * @return true The store can have a read accessor
+ * @return false The store cannot have a read accesor
+ */
bool is_readable() const { return readable_; }
+ /**
+ * @brief Indicates whether the store can have a write accessor
+ *
+ * @return true The store can have a write accessor
+ * @return false The store cannot have a write accesor
+ */
bool is_writable() const { return writable_; }
+ /**
+ * @brief Indicates whether the store can have a reduction accessor
+ *
+ * @return true The store can have a reduction accessor
+ * @return false The store cannot have a reduction accesor
+ */
bool is_reducible() const { return reducible_; }
public:
+ /**
+ * @brief Returns the scalar value stored in the store.
+ *
+ * The requested type must match with the store's data type. If the store is not
+ * backed by the future, the runtime will fail with an error message.
+ *
+ * @tparam VAL Type of the scalar value
+ *
+ * @return The scalar value stored in the store
+ */
template
VAL scalar() const;
public:
+ /**
+ * @brief Binds a buffer to the store. Valid only when the store is unbound and
+ * has not yet been bound to another buffer. The buffer must be consistent with
+ * the mapping policy for the store. Recommend that the buffer be created by
+ * a `create_output_buffer` call.
+ *
+ * @param buffer Buffer to bind to the store
+ *
+ * @param extents Extents of the buffer. Passing extents smaller than the actual
+ * extents of the buffer is legal; the runtime uses the passed extents as the
+ * extents of this store.
+ *
+ */
template
- void return_data(Buffer& buffer, const Legion::Point& extents);
+ void return_data(Buffer& buffer, const Point& extents);
+ /**
+ * @brief Makes the unbound store empty. Valid only when the store is unbound and
+ * has not yet been bound to another buffer.
+ */
void make_empty();
public:
+ /**
+ * @brief Indicates whether the store is backed by a future
+ * (i.e., a container for scalar value)
+ *
+ * @return true The store is backed by a future
+ * @return false The store is backed by a region field
+ */
bool is_future() const { return is_future_; }
+ /**
+ * @brief Indicates whether the store is an unbound store. The value DOES NOT indicate
+ * that the store has already assigned to a buffer; i.e., the store may have been assigned
+ * to a buffer even when this function returns `true`.
+ *
+ * @return true The store is an unbound store
+ * @return false The store is a normal store
+ */
bool is_output_store() const { return is_output_store_; }
ReturnValue pack() const { return future_.pack(); }
ReturnValue pack_weight() const { return output_field_.pack_weight(); }
diff --git a/src/core/data/store.inl b/src/core/data/store.inl
index 7dc1d38db..d3cd6e594 100644
--- a/src/core/data/store.inl
+++ b/src/core/data/store.inl
@@ -72,32 +72,32 @@ AccessorRD RegionField::reduce_accessor(
}
template
-AccessorRO RegionField::read_accessor(const Legion::Rect& bounds) const
+AccessorRO RegionField::read_accessor(const Rect& bounds) const
{
return AccessorRO(pr_, fid_, bounds);
}
template
-AccessorWO RegionField::write_accessor(const Legion::Rect& bounds) const
+AccessorWO RegionField::write_accessor(const Rect& bounds) const
{
return AccessorWO(pr_, fid_, bounds);
}
template
-AccessorRW RegionField::read_write_accessor(const Legion::Rect& bounds) const
+AccessorRW RegionField::read_write_accessor(const Rect& bounds) const
{
return AccessorRW(pr_, fid_, bounds);
}
template
AccessorRD RegionField::reduce_accessor(int32_t redop_id,
- const Legion::Rect& bounds) const
+ const Rect& bounds) const
{
return AccessorRD(pr_, fid_, redop_id, bounds);
}
template
-AccessorRO RegionField::read_accessor(const Legion::Rect& bounds,
+AccessorRO RegionField::read_accessor(const Rect& bounds,
const Legion::DomainAffineTransform& transform) const
{
using ACC = AccessorRO;
@@ -106,7 +106,7 @@ AccessorRO RegionField::read_accessor(const Legion::Rect& bounds,
}
template
-AccessorWO RegionField::write_accessor(const Legion::Rect& bounds,
+AccessorWO RegionField::write_accessor(const Rect& bounds,
const Legion::DomainAffineTransform& transform) const
{
using ACC = AccessorWO;
@@ -116,7 +116,7 @@ AccessorWO RegionField::write_accessor(const Legion::Rect& bounds,
template
AccessorRW RegionField::read_write_accessor(
- const Legion::Rect& bounds, const Legion::DomainAffineTransform& transform) const
+ const Rect& bounds, const Legion::DomainAffineTransform& transform) const
{
using ACC = AccessorRW;
return dim_dispatch(
@@ -125,9 +125,7 @@ AccessorRW RegionField::read_write_accessor(
template
AccessorRD RegionField::reduce_accessor(
- int32_t redop_id,
- const Legion::Rect& bounds,
- const Legion::DomainAffineTransform& transform) const
+ int32_t redop_id, const Rect& bounds, const Legion::DomainAffineTransform& transform) const
{
using ACC = AccessorRD;
return dim_dispatch(
@@ -135,9 +133,9 @@ AccessorRD RegionField::reduce_accessor(
}
template
-Legion::Rect RegionField::shape() const
+Rect RegionField::shape() const
{
- return Legion::Rect(pr_);
+ return Rect(pr_);
}
template
@@ -147,7 +145,7 @@ AccessorRO