From aa9bf9486aecc324b1197e4a4751e3bed961b152 Mon Sep 17 00:00:00 2001 From: Rajeev Rao Date: Wed, 22 Sep 2021 10:54:42 -0700 Subject: [PATCH] Upgrade Polygraphy to v0.33.0. Prominent updates include (see [CHANGELOG](tools/Polygraphy/CHANGELOG.md] for details) - Added various examples, a CLI User Guide and how-to guides. - Added experimental support for DLA. - Added a `data to-input` tool that can combine inputs/outputs created by `--save-inputs`/`--save-outputs`. - Added a `PluginRefRunner` which provides CPU reference implementations for TensorRT plugins - Made several performance improvements in the Polygraphy CUDA wrapper. - Removed the `to-json` tool which was used to convert Pickled data generated by Polygraphy 0.26.1 and older to JSON. Signed-off-by: Rajeev Rao --- tools/Polygraphy/CHANGELOG.md | 63 ++++ tools/Polygraphy/Makefile | 8 +- tools/Polygraphy/README.md | 41 +-- tools/Polygraphy/docs/conf.py | 2 +- tools/Polygraphy/examples/README.md | 2 - .../build_and_run.py | 2 +- .../load_and_run.py | 2 +- .../api/01_comparing_frameworks/README.md | 2 +- .../api/01_comparing_frameworks/example.py | 8 +- .../example.py | 2 +- .../api/06_immediate_eval_api/README.md | 36 +- .../{example.py => build_and_run.py} | 19 +- .../api/06_immediate_eval_api/load_and_run.py | 44 +++ .../07_tensorrt_and_dynamic_shapes/README.md | 2 + .../07_tensorrt_and_dynamic_shapes/example.py | 8 +- tools/Polygraphy/examples/api/README.md | 4 +- tools/Polygraphy/examples/cli/README.md | 62 +--- .../01_int8_calibration_in_tensorrt/README.md | 2 +- .../01_debugging_flaky_trt_tactics/README.md | 44 ++- .../02_reducing_failing_onnx_models/README.md | 136 ++++++++ .../model.onnx | Bin 0 -> 1204476 bytes .../02_reducing_failing_onnx_models/model.png | Bin 0 -> 48963 bytes .../05_inspecting_inference_outputs/README.md | 2 +- .../06_inspecting_input_data/README.md | 2 +- .../cli/run/01_comparing_frameworks/README.md | 25 ++ .../dynamic_identity.onnx | 14 +- .../README.md | 45 ++- .../create_config.py | 0 .../define_network.py | 0 .../identity.onnx | 0 .../05_comparing_with_custom_data/README.md | 4 +- .../03_modifying_input_shapes/README.md | 31 ++ .../03_modifying_input_shapes/identity.onnx | 15 + tools/Polygraphy/how-to/README.md | 5 + tools/Polygraphy/how-to/debug_accuracy.md | 123 +++++++ tools/Polygraphy/polygraphy/__init__.py | 2 +- .../polygraphy/backend/base/runner.py | 125 ++++--- .../polygraphy/backend/common/loader.py | 3 +- .../polygraphy/backend/onnx/loader.py | 61 +++- .../polygraphy/backend/onnx/util.py | 37 ++- .../polygraphy/backend/onnxrt/loader.py | 5 +- .../polygraphy/backend/onnxrt/runner.py | 38 +-- .../polygraphy/backend/pluginref/__init__.py | 17 + .../backend/pluginref/references.py | 85 +++++ .../backend/pluginref/requirements.txt | 2 + .../polygraphy/backend/pluginref/runner.py | 74 +++++ .../polygraphy/backend/pyt/runner.py | 13 +- .../polygraphy/backend/tf/loader.py | 27 +- .../polygraphy/backend/tf/runner.py | 18 +- .../backend/trt/algorithm_selector.py | 2 +- .../polygraphy/backend/trt/calibrator.py | 6 +- .../polygraphy/backend/trt/loader.py | 77 +++-- .../polygraphy/backend/trt/profile.py | 2 +- .../polygraphy/backend/trt/runner.py | 67 ++-- .../Polygraphy/polygraphy/backend/trt/util.py | 5 +- .../polygraphy/backend/trt_legacy.py | 59 +++- .../Polygraphy/polygraphy/common/constants.py | 2 +- tools/Polygraphy/polygraphy/common/cuda.py | 2 +- .../Polygraphy/polygraphy/common/exception.py | 2 +- tools/Polygraphy/polygraphy/common/func.py | 2 +- .../polygraphy/comparator/comparator.py | 18 +- .../polygraphy/comparator/compare.py | 311 +++++++++--------- .../polygraphy/comparator/data_loader.py | 15 +- .../polygraphy/comparator/postprocess.py | 13 +- .../Polygraphy/polygraphy/comparator/util.py | 26 +- tools/Polygraphy/polygraphy/cuda/cuda.py | 40 ++- tools/Polygraphy/polygraphy/json/serde.py | 20 +- tools/Polygraphy/polygraphy/logger/logger.py | 4 +- tools/Polygraphy/polygraphy/mod/importer.py | 30 +- tools/Polygraphy/polygraphy/tools/README.md | 173 +++++++++- .../polygraphy/tools/args/__init__.py | 1 + .../polygraphy/tools/args/comparator.py | 8 +- .../polygraphy/tools/args/data_loader.py | 37 +++ .../polygraphy/tools/args/logger.py | 2 +- .../Polygraphy/polygraphy/tools/args/model.py | 14 +- .../polygraphy/tools/args/onnx/loader.py | 5 +- .../tools/args/pluginref/__init__.py | 16 + .../polygraphy/tools/args/pluginref/runner.py | 43 +++ .../polygraphy/tools/args/trt/config.py | 63 +++- .../polygraphy/tools/args/trt_legacy.py | 45 ++- .../polygraphy/tools/args/util/util.py | 72 +++- .../polygraphy/tools/data/README.md | 24 ++ .../polygraphy/tools/data/__init__.py | 1 + .../Polygraphy/polygraphy/tools/data/data.py | 37 +++ .../polygraphy/tools/data/subtool/__init__.py | 1 + .../polygraphy/tools/data/subtool/to_input.py | 71 ++++ .../polygraphy/tools/debug/README.md | 23 +- .../tools/debug/subtool/artifact_sorter.py | 15 +- .../polygraphy/tools/debug/subtool/base.py | 2 +- .../tools/debug/subtool/diff_tactics.py | 21 +- .../polygraphy/tools/debug/subtool/reduce.py | 16 +- .../tools/inspect/subtool/capability.py | 10 +- tools/Polygraphy/polygraphy/tools/registry.py | 5 +- tools/Polygraphy/polygraphy/tools/run/run.py | 12 +- .../tools/surgeon/subtool/extract.py | 19 +- .../tools/surgeon/subtool/insert.py | 9 +- .../tools/surgeon/subtool/sanitize.py | 16 +- .../tools/template/subtool/__init__.py | 1 + .../tools/template/subtool/trt_config.py | 60 ++++ .../tools/template/subtool/trt_network.py | 6 +- .../polygraphy/tools/template/template.py | 3 +- .../polygraphy/tools/to_json/README.md | 11 - .../polygraphy/tools/to_json/__init__.py | 1 - .../polygraphy/tools/to_json/to_json.py | 53 --- tools/Polygraphy/polygraphy/tools/util.py | 23 +- tools/Polygraphy/polygraphy/util/util.py | 48 ++- tools/Polygraphy/tests/README.md | 21 ++ .../tests/backend/onnx/test_loader.py | 13 +- .../tests/backend/onnxrt/test_runner.py | 1 + .../tests/backend/pluginref/__init__.py | 0 .../tests/backend/pluginref/test_runner.py | 80 +++++ .../tests/backend/test_tensorrt_legacy.py | 1 - .../tests/backend/tf/test_runner.py | 1 + .../tests/backend/trt/test_loader.py | 14 + .../tests/backend/trt/test_runner.py | 58 +++- .../tests/comparator/test_comparator.py | 12 +- .../tests/comparator/test_compare.py | 44 +-- .../tests/comparator/test_data_loader.py | 16 + tools/Polygraphy/tests/cuda/test_cuda.py | 40 ++- tools/Polygraphy/tests/helper.py | 14 + .../Polygraphy/tests/models/instancenorm.onnx | 27 ++ .../Polygraphy/tests/models/make_reducable.py | 84 ++++- tools/Polygraphy/tests/models/meta.py | 14 +- .../tests/models/reducable_with_const.onnx | Bin 0 -> 358 bytes .../tests/test_deprecated_aliases.py | 63 +--- tools/Polygraphy/tests/test_deps.py | 4 +- tools/Polygraphy/tests/test_examples.py | 55 ++-- .../tests/tools/args/onnx/test_loader.py | 19 ++ .../tests/tools/args/test_data_loader.py | 16 +- .../Polygraphy/tests/tools/args/test_util.py | 31 +- .../tests/tools/args/trt/test_config.py | 32 +- tools/Polygraphy/tests/tools/common.py | 4 + tools/Polygraphy/tests/tools/test_data.py | 35 ++ tools/Polygraphy/tests/tools/test_debug.py | 48 ++- tools/Polygraphy/tests/tools/test_run.py | 10 + tools/Polygraphy/tests/tools/test_template.py | 25 +- tools/Polygraphy/tests/util/test_util.py | 14 + 137 files changed, 2790 insertions(+), 938 deletions(-) rename tools/Polygraphy/examples/api/06_immediate_eval_api/{example.py => build_and_run.py} (75%) create mode 100644 tools/Polygraphy/examples/api/06_immediate_eval_api/load_and_run.py create mode 100644 tools/Polygraphy/examples/cli/debug/02_reducing_failing_onnx_models/README.md create mode 100644 tools/Polygraphy/examples/cli/debug/02_reducing_failing_onnx_models/model.onnx create mode 100644 tools/Polygraphy/examples/cli/debug/02_reducing_failing_onnx_models/model.png rename tools/Polygraphy/examples/cli/run/{04_defining_a_tensorrt_network_manually => 04_defining_a_tensorrt_network_or_config_manually}/README.md (50%) rename tools/Polygraphy/examples/cli/run/{04_defining_a_tensorrt_network_manually => 04_defining_a_tensorrt_network_or_config_manually}/create_config.py (100%) rename tools/Polygraphy/examples/cli/run/{04_defining_a_tensorrt_network_manually => 04_defining_a_tensorrt_network_or_config_manually}/define_network.py (100%) rename tools/Polygraphy/examples/cli/run/{04_defining_a_tensorrt_network_manually => 04_defining_a_tensorrt_network_or_config_manually}/identity.onnx (100%) create mode 100644 tools/Polygraphy/examples/cli/surgeon/03_modifying_input_shapes/README.md create mode 100644 tools/Polygraphy/examples/cli/surgeon/03_modifying_input_shapes/identity.onnx create mode 100644 tools/Polygraphy/how-to/README.md create mode 100644 tools/Polygraphy/how-to/debug_accuracy.md create mode 100644 tools/Polygraphy/polygraphy/backend/pluginref/__init__.py create mode 100644 tools/Polygraphy/polygraphy/backend/pluginref/references.py create mode 100644 tools/Polygraphy/polygraphy/backend/pluginref/requirements.txt create mode 100644 tools/Polygraphy/polygraphy/backend/pluginref/runner.py create mode 100644 tools/Polygraphy/polygraphy/tools/args/pluginref/__init__.py create mode 100644 tools/Polygraphy/polygraphy/tools/args/pluginref/runner.py create mode 100644 tools/Polygraphy/polygraphy/tools/data/README.md create mode 100644 tools/Polygraphy/polygraphy/tools/data/__init__.py create mode 100644 tools/Polygraphy/polygraphy/tools/data/data.py create mode 100644 tools/Polygraphy/polygraphy/tools/data/subtool/__init__.py create mode 100644 tools/Polygraphy/polygraphy/tools/data/subtool/to_input.py create mode 100644 tools/Polygraphy/polygraphy/tools/template/subtool/trt_config.py delete mode 100644 tools/Polygraphy/polygraphy/tools/to_json/README.md delete mode 100644 tools/Polygraphy/polygraphy/tools/to_json/__init__.py delete mode 100644 tools/Polygraphy/polygraphy/tools/to_json/to_json.py create mode 100644 tools/Polygraphy/tests/README.md create mode 100644 tools/Polygraphy/tests/backend/pluginref/__init__.py create mode 100644 tools/Polygraphy/tests/backend/pluginref/test_runner.py create mode 100644 tools/Polygraphy/tests/models/instancenorm.onnx create mode 100644 tools/Polygraphy/tests/models/reducable_with_const.onnx create mode 100644 tools/Polygraphy/tests/tools/test_data.py diff --git a/tools/Polygraphy/CHANGELOG.md b/tools/Polygraphy/CHANGELOG.md index 43c297f6..da865f3b 100644 --- a/tools/Polygraphy/CHANGELOG.md +++ b/tools/Polygraphy/CHANGELOG.md @@ -3,6 +3,69 @@ Dates are in YYYY-MM-DD format. +## v0.33.0 (2021-09-16) +### Added +- Added various examples, a [CLI User Guide](polygraphy/tools/) and [directory for how-to guides](./how-to). +- Added an experimental `template trt-config` tool to generate template scripts that create TensorRT builder configurations. +- Added `--hide-fail-output` to make `debug` subtools suppress output from failed iterations. +- Added experimental support for DLA. +- Added a `data to-input` tool that can combine inputs/outputs created by `--save-inputs`/`--save-outputs`. + The resulting file is compatible with `--load-inputs`. + +### Changed +- Updated `debug` subtools to show captured output on failed iterations. +- The logger will now emit all `CRITICAL` messages to `stderr` instead of `stdout`. +- Renamed `CompareFunc.basic_compare_func` to `CompareFunc.simple`. The old name is preserved as an alias. +- The `--good` and `--bad` arguments in `diff-tactics` can now also accept single files instead of directories. + +### Fixed +- Fixed a bug where `debug reduce` would crash when ONNX models included `Constant` nodes whose outputs + needed to be marked as model outputs. + + +## v0.32.0 (2021-08-10) +### Added +- Added support for `K`, `M`, and `G` suffixes to CLI arguments that expect a number of bytes (e.g. `--workspace`). + These correspond to `KiB`, `MiB`, and `GiB` respectively. + For example, `--workspace=16M` is equivalent to `--workspace=16777216`. +- Added a `copy_outputs_to_host` parameter in `TrtRunner.infer()`, which, when set to `False`, will cause the runner + to return `DeviceView`s instead of NumPy arrays for inference outputs. This allows us to avoid a + device-to-host and host-to-device copy if we want outputs to remain on the device. +- Added a `view()` method to `DeviceArray`s to create read-only `DeviceView`s over their data. +- Added a `PluginRefRunner` which provides CPU reference implementations for TensorRT plugins + and a corresponding `--pluginref` runner option in `polygraphy run`. + +### Changed +- Marked old shape syntax (`,dim0xdim1x...xdimN,`) as deprecated since it leads to ambiguity when + parsing shapes including named dynamic dimensions. + + For example, compare: + ``` + --input-shapes input0,xxyxz + ``` + + and: + ``` + --input-shapes input0:[x,y,z] + ``` + + For now, the old syntax continues to work for shapes without named dimensions, + but it will be removed in a future version of Polygraphy. + + The newer syntax, which was originally introduced in Polygraphy 0.25.0, + uses the list syntax already present in other parts of Polygraphy. + For example, `--val-range [0,1]` in `run` and `--attrs axes=[0,1]` in `surgeon insert` use the same syntax. +- Made several performance improvements in the Polygraphy CUDA wrapper. +- Added a loud warning when the deprecated `--int-min`/`--int-max` or `--float-min`/`--float-max` options are used. + These are superseded by `--val-range` which allows you to specify data ranges on a per-input basis. + +### Removed +- Removed various deprecated aliases: `ModifyOnnx`, `SessionFromOnnxBytes`, `ModifyNetwork`, `ModifyGraph` +- Removed the `to-json` tool which was used to convert Pickled data generated by Polygraphy 0.26.1 and older to JSON. + Polygraphy 0.27.0 and later only support reading and writing data in JSON format. +- Removed deprecated legacy submodule `polygraphy.util.misc` which was just an alias for `polygraphy.util`. + + ## v0.31.1 (2021-07-16) ### Changed - Improved the quality of several examples and added information on how to load serialized TensorRT engines diff --git a/tools/Polygraphy/Makefile b/tools/Polygraphy/Makefile index 06e5c919..9898c6e1 100644 --- a/tools/Polygraphy/Makefile +++ b/tools/Polygraphy/Makefile @@ -4,10 +4,16 @@ NPROC ?= 8 # Tests also check that docs can build test: docs + # Some tests need to be run serially - we annotate those with a `serial` marker. export PYTHONPATH=$(CURDIR):$${PYTHONPATH} && \ + export PATH=$(CURDIR)/bin:$${PATH} && \ + export POLYGRAPHY_INTERNAL_CORRECTNESS_CHECKS=1 && \ + python3 -m pytest tests/ -v -x --durations=5 -m "serial" && \ + \ + export PYTHONPATH=$(CURDIR):$${PYTHONPATH} && \ export PATH=$(CURDIR)/bin:$${PATH} && \ export POLYGRAPHY_INTERNAL_CORRECTNESS_CHECKS=1 && \ - python3 -m pytest tests/ -v -x -n $(NPROC) --dist=loadscope --durations=5 + python3 -m pytest tests/ -v -x -n $(NPROC) --dist=loadscope --durations=5 -m "not serial" leak_check: export PYTHONPATH=$(CURDIR):$${PYTHONPATH} && \ diff --git a/tools/Polygraphy/README.md b/tools/Polygraphy/README.md index 2786744c..79278e18 100644 --- a/tools/Polygraphy/README.md +++ b/tools/Polygraphy/README.md @@ -5,11 +5,10 @@ - [Introduction](#introduction) - [Installation](#installation) -- [Usage](#usage) +- [Command-line Toolkit](#command-line-toolkit) +- [Python API](#python-api) - [Examples](#examples) -- [Advanced](#advanced) - - [Using The Python API](#using-the-python-api) - - [Enabling Internal Correctness Checks](#enabling-internal-correctness-checks) +- [How-To Guides](#how-to-guides) - [Contributing](#contributing) @@ -43,7 +42,7 @@ Among other things, Polygraphy lets you: ### Installing Prebuilt Wheels ```bash -python -m pip install colored polygraphy --index-url https://pypi.ngc.nvidia.com +python -m pip install colored polygraphy --extra-index-url https://pypi.ngc.nvidia.com ``` **NOTE:** *When using this method, the command-line toolkit will be installed into `${HOME}/.local/bin` by default.* @@ -137,41 +136,27 @@ You can install the additional packages manually with: python -m pip install ``` -## Usage -Polygraphy includes a command-line interface, [`polygraphy`](./bin/polygraphy), which provides various tools. -For usage information, run `polygraphy --help` +## Command-line Toolkit -For details on the various tools included in the Polygraphy toolkit, see the -[tools directory](./polygraphy/tools). +For details on the various tools included in the Polygraphy toolkit, +see the [CLI User Guide](./polygraphy/tools). -## Examples - -For examples of both the CLI and Python API, see the [examples directory](./examples). - - -## Advanced - -### Using The Python API +### Python API For more information on the Polygraphy Python API, including a high-level overview and the Python API reference documentation, see the [API directory](./polygraphy). -### Enabling Internal Correctness Checks +## Examples + +For examples of both the CLI and Python API, see the [examples directory](./examples). -Polygraphy includes various runtime checks for internal correctness, which are -disabled by default. These checks can be enabled by setting the `POLYGRAPHY_INTERNAL_CORRECTNESS_CHECKS` -environment variable to `1` or `polygraphy.config.INTERNAL_CORRECTNESS_CHECKS = True` in the Python API. -A failure in this type of check indicates a bug in Polygraphy. -When the checks are enabled, Polygraphy will ensure, for example, that loaders do not -modify their state when they are called, and that runners will reset their state correctly in -`deactivate()`. +## How-To Guides -**NOTE:** *`POLYGRAPHY_INTERNAL_CORRECTNESS_CHECKS` only relates to checks that validate Polygraphy's* - *internal APIs. User input validation and public API checks are always enabled and cannot be disabled.* +For how-to guides, see the [how-to guides directory](./how-to). ## Contributing diff --git a/tools/Polygraphy/docs/conf.py b/tools/Polygraphy/docs/conf.py index b3cbe4f7..01777e53 100644 --- a/tools/Polygraphy/docs/conf.py +++ b/tools/Polygraphy/docs/conf.py @@ -36,7 +36,7 @@ autodoc_default_options = { "members": True, "show-inheritance": True, - "exclude-members": "activate_impl, deactivate_impl, get_input_metadata_impl, infer_impl, BaseNetworkFromOnnx, Encoder, Decoder, add_json_methods, constantmethod", + "exclude-members": "activate_impl, deactivate_impl, get_input_metadata_impl, BaseNetworkFromOnnx, Encoder, Decoder, add_json_methods, constantmethod", "special-members": "__call__, __getitem__, __bool__, __enter__, __exit__", } diff --git a/tools/Polygraphy/examples/README.md b/tools/Polygraphy/examples/README.md index dafe8571..2dbd905a 100644 --- a/tools/Polygraphy/examples/README.md +++ b/tools/Polygraphy/examples/README.md @@ -3,5 +3,3 @@ This directory includes various examples covering the Polygraphy [CLI](./cli), [Python API](./api), and [development practices](./dev). The paths used in each example assume that the example is being run from within that example's directory. - -All the models used by these examples are provided in the [models directory](./models). diff --git a/tools/Polygraphy/examples/api/00_inference_with_tensorrt/build_and_run.py b/tools/Polygraphy/examples/api/00_inference_with_tensorrt/build_and_run.py index 166c3505..41f8547a 100644 --- a/tools/Polygraphy/examples/api/00_inference_with_tensorrt/build_and_run.py +++ b/tools/Polygraphy/examples/api/00_inference_with_tensorrt/build_and_run.py @@ -29,7 +29,7 @@ def main(): # # NOTE: `build_engine` is a *callable* that returns an engine, not the engine itself. # To get the engine directly, you can use the immediately evaluated functional API. - # See eexamples/api/06_immediate_eval_api for details. + # See examples/api/06_immediate_eval_api for details. build_engine = EngineFromNetwork( NetworkFromOnnxPath("identity.onnx"), config=CreateConfig(fp16=True) ) # Note that config is an optional argument. diff --git a/tools/Polygraphy/examples/api/00_inference_with_tensorrt/load_and_run.py b/tools/Polygraphy/examples/api/00_inference_with_tensorrt/load_and_run.py index b51b623f..a9eb4752 100644 --- a/tools/Polygraphy/examples/api/00_inference_with_tensorrt/load_and_run.py +++ b/tools/Polygraphy/examples/api/00_inference_with_tensorrt/load_and_run.py @@ -16,7 +16,7 @@ # """ -This script loads the TensorRT engine built by `build_and_run.py` and then runs it. +This script loads the TensorRT engine built by `build_and_run.py` and runs inference. """ import numpy as np from polygraphy.backend.common import BytesFromPath diff --git a/tools/Polygraphy/examples/api/01_comparing_frameworks/README.md b/tools/Polygraphy/examples/api/01_comparing_frameworks/README.md index 443b7a4d..e63016a8 100644 --- a/tools/Polygraphy/examples/api/01_comparing_frameworks/README.md +++ b/tools/Polygraphy/examples/api/01_comparing_frameworks/README.md @@ -8,7 +8,7 @@ different backends. This makes it possible to check the accuracy of one backend respect to another. In this example, we'll look at how you can use the Polygraphy API to run inference -on a model using ONNX Runtime and TensorRT, and then compare the results. +with synthetic input data using ONNX Runtime and TensorRT, and then compare the results. ## Running The Example diff --git a/tools/Polygraphy/examples/api/01_comparing_frameworks/example.py b/tools/Polygraphy/examples/api/01_comparing_frameworks/example.py index fff7eb6c..8a837dba 100644 --- a/tools/Polygraphy/examples/api/01_comparing_frameworks/example.py +++ b/tools/Polygraphy/examples/api/01_comparing_frameworks/example.py @@ -21,7 +21,7 @@ """ from polygraphy.backend.onnxrt import OnnxrtRunner, SessionFromOnnx from polygraphy.backend.trt import EngineFromNetwork, NetworkFromOnnxPath, TrtRunner -from polygraphy.comparator import Comparator +from polygraphy.comparator import Comparator, CompareFunc def main(): @@ -46,7 +46,11 @@ def main(): run_results = Comparator.run(runners) # `Comparator.compare_accuracy()` checks that outputs match between runners. - assert bool(Comparator.compare_accuracy(run_results)) + # + # TIP: The `compare_func` parameter can be used to control how outputs are compared (see API reference for details). + # The default comparison function is created by `CompareFunc.simple()`, but we can construct it + # explicitly if we want to change the default parameters, such as tolerance. + assert bool(Comparator.compare_accuracy(run_results, compare_func=CompareFunc.simple(atol=1e-8))) # We can use `RunResults.save()` method to save the inference results to a JSON file. # This can be useful if you want to generate and compare results separately. diff --git a/tools/Polygraphy/examples/api/04_int8_calibration_in_tensorrt/example.py b/tools/Polygraphy/examples/api/04_int8_calibration_in_tensorrt/example.py index 008e6e01..a21ff342 100644 --- a/tools/Polygraphy/examples/api/04_int8_calibration_in_tensorrt/example.py +++ b/tools/Polygraphy/examples/api/04_int8_calibration_in_tensorrt/example.py @@ -29,7 +29,7 @@ def calib_data(): for _ in range(4): # TIP: If your calibration data is already on the GPU, you can instead provide GPU pointers - # (as `int`s) or Polygraphy `DeviceView`s instead of NumPy arrays. + # (as `int`s) or Polygraphy `DeviceView`s instead of NumPy arrays. # # For details on `DeviceView`, see `polygraphy/cuda/cuda.py`. yield {"x": np.ones(shape=(1, 1, 2, 2), dtype=np.float32)} # Totally real data diff --git a/tools/Polygraphy/examples/api/06_immediate_eval_api/README.md b/tools/Polygraphy/examples/api/06_immediate_eval_api/README.md index 59473059..8946d162 100644 --- a/tools/Polygraphy/examples/api/06_immediate_eval_api/README.md +++ b/tools/Polygraphy/examples/api/06_immediate_eval_api/README.md @@ -2,6 +2,7 @@ ## Introduction + Most of the time, the lazy loaders included with Polygraphy have several advantages: - They allow us to defer the work until we actually need to do it, which can potentially save @@ -16,6 +17,7 @@ Most of the time, the lazy loaders included with Polygraphy have several advanta ```python build_engine = EngineBytesFromNetwork(NetworkFromOnnxPath("/path/to/model.onnx")) ``` + - They allow for special semantics where if a callable is provided to a loader, it takes ownership of the return value, whereas otherwise it does not. These special semantics are useful for sharing objects between multiple loaders. @@ -46,12 +48,18 @@ engine = build_engine() becomes: ```python -builder, network = network_from_onnx_path("/path/to/model.onnx") +builder, network, parser = network_from_onnx_path("/path/to/model.onnx") config = create_config(builder, network, fp16=True, tf32=True) -engine = engine_from_network((builder, network), config) +engine = engine_from_network((builder, network, parser), config) ``` + + + +In this example, we'll look at how you can leverage the functional API to convert an ONNX +model to a TensorRT network, modify the network, build a TensorRT engine with FP16 precision +enabled, and run inference. +We'll also save the engine to a file to see how you can load it again and run inference. -`example.py` showcases basic usage of the immediately evaluated functional API. ## Running The Example @@ -59,8 +67,26 @@ engine = engine_from_network((builder, network), config) * Ensure that TensorRT is installed * Install other dependencies with `python3 -m pip install -r requirements.txt` -2. Run the example: +2. **[Optional]** Inspect the model before running the example: + + ```bash + polygraphy inspect model identity.onnx + ``` + +3. Run the script that builds and runs the engine: + + ```bash + python3 build_and_run.py + ``` + +4. **[Optional]** Inspect the TensorRT engine built by the example: + + ```bash + polygraphy inspect model identity.engine + ``` + +5. Run the script that loads the previously built engine, then runs it: ```bash - python3 example.py + python3 load_and_run.py ``` diff --git a/tools/Polygraphy/examples/api/06_immediate_eval_api/example.py b/tools/Polygraphy/examples/api/06_immediate_eval_api/build_and_run.py similarity index 75% rename from tools/Polygraphy/examples/api/06_immediate_eval_api/example.py rename to tools/Polygraphy/examples/api/06_immediate_eval_api/build_and_run.py index be1ced43..1b87f072 100644 --- a/tools/Polygraphy/examples/api/06_immediate_eval_api/example.py +++ b/tools/Polygraphy/examples/api/06_immediate_eval_api/build_and_run.py @@ -18,12 +18,11 @@ """ This script uses Polygraphy's immediately evaluated functional APIs to load an ONNX model, convert it into a TensorRT network, add an identity -layer to the end of it, build an engine with FP16 mode enabled, and finally -run inference. +layer to the end of it, build an engine with FP16 mode enabled, +save the engine, and finally run inference. """ import numpy as np - -from polygraphy.backend.trt import TrtRunner, create_config, engine_from_network, network_from_onnx_path +from polygraphy.backend.trt import TrtRunner, create_config, engine_from_network, network_from_onnx_path, save_engine def main(): @@ -34,7 +33,10 @@ def main(): # Since we are immediately evaluating, we take ownership of objects, and are responsible for freeing them. builder, network, parser = network_from_onnx_path("identity.onnx") - # Extend the network with an identity layer. + # Extend the network with an identity layer (purely for the sake of example). + # Note that unlike with lazy loaders, we don't need to do anything special to modify the network. + # If we were using lazy loaders, we would need to use `func.extend()` as described + # in example 03 and example 05. prev_output = network.get_output(0) network.unmark_output(prev_output) output = network.add_identity(prev_output).get_output(0) @@ -45,11 +47,14 @@ def main(): config = create_config(builder, network, fp16=True) # We can free everything we constructed above once we're done building the engine. - # NOTE: In TensorRT 8.0, we do *not* need to use a context manager here. + # NOTE: In TensorRT 8.0 and newer, we do *not* need to use a context manager here. with builder, network, parser, config: engine = engine_from_network((builder, network), config) - # NOTE: In TensorRT 8.0, we do *not* need to use a context manager to free `engine`. + # To reuse the engine elsewhere, we can serialize it and save it to a file. + save_engine(engine, path="identity.engine") + + # NOTE: In TensorRT 8.0 and newer, we do *not* need to use a context manager to free `engine`. with engine, TrtRunner(engine) as runner: inp_data = np.ones((1, 1, 2, 2), dtype=np.float32) diff --git a/tools/Polygraphy/examples/api/06_immediate_eval_api/load_and_run.py b/tools/Polygraphy/examples/api/06_immediate_eval_api/load_and_run.py new file mode 100644 index 00000000..f5c8eddf --- /dev/null +++ b/tools/Polygraphy/examples/api/06_immediate_eval_api/load_and_run.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +""" +This script uses Polygraphy's immediately evaluated functional APIs +to load the TensorRT engine built by `build_and_run.py` and run inference. +""" +import numpy as np +from polygraphy.backend.common import bytes_from_path +from polygraphy.backend.trt import TrtRunner, engine_from_bytes + + +def main(): + engine = engine_from_bytes(bytes_from_path("identity.engine")) + + # NOTE: In TensorRT 8.0 and newer, we do *not* need to use a context manager to free `engine`. + with engine, TrtRunner(engine) as runner: + inp_data = np.ones((1, 1, 2, 2), dtype=np.float32) + + # NOTE: The runner owns the output buffers and is free to reuse them between `infer()` calls. + # Thus, if you want to store results from multiple inferences, you should use `copy.deepcopy()`. + outputs = runner.infer(feed_dict={"x": inp_data}) + + assert np.array_equal(outputs["output"], inp_data) # It's an identity model! + + print("Inference succeeded!") + + +if __name__ == "__main__": + main() diff --git a/tools/Polygraphy/examples/api/07_tensorrt_and_dynamic_shapes/README.md b/tools/Polygraphy/examples/api/07_tensorrt_and_dynamic_shapes/README.md index c9d9ab25..e1e9d7c1 100644 --- a/tools/Polygraphy/examples/api/07_tensorrt_and_dynamic_shapes/README.md +++ b/tools/Polygraphy/examples/api/07_tensorrt_and_dynamic_shapes/README.md @@ -25,6 +25,7 @@ Using the TensorRT API, the process involves two steps: For a single-input, single-output model, this would look roughly as follows: + ```python context.set_binding_shape(0, inp.shape) @@ -33,6 +34,7 @@ Using the TensorRT API, the process involves two steps: # Rest of inference code... ``` + Polygraphy can simplify both steps and help you avoid common pitfalls: diff --git a/tools/Polygraphy/examples/api/07_tensorrt_and_dynamic_shapes/example.py b/tools/Polygraphy/examples/api/07_tensorrt_and_dynamic_shapes/example.py index 8cd5ce9b..c021dcb3 100644 --- a/tools/Polygraphy/examples/api/07_tensorrt_and_dynamic_shapes/example.py +++ b/tools/Polygraphy/examples/api/07_tensorrt_and_dynamic_shapes/example.py @@ -23,10 +23,10 @@ import numpy as np from polygraphy.backend.trt import ( CreateConfig, - NetworkFromOnnxPath, Profile, TrtRunner, engine_from_network, + network_from_onnx_path, save_engine, ) from polygraphy.logger import G_LOGGER @@ -34,6 +34,7 @@ def main(): # A Profile maps each input tensor to a range of shapes. + # The `add()` method can be used to add shapes for a single input. # # TIP: To save lines, calls to `add` can be chained: # profile.add("input0", ...).add("input1", ...) @@ -52,7 +53,10 @@ def main(): ] # See examples/api/06_immediate_eval_api for details on immediately evaluated functional loaders like `engine_from_network`. - engine = engine_from_network(NetworkFromOnnxPath("dynamic_identity.onnx"), config=CreateConfig(profiles=profiles)) + # Note that we can freely inter-mix lazy and immediately-evaluated loaders. + engine = engine_from_network( + network_from_onnx_path("dynamic_identity.onnx"), config=CreateConfig(profiles=profiles) + ) # We'll save the engine so that we can inspect it with `inspect model`. # This should make it easy to see how the engine bindings are laid out. diff --git a/tools/Polygraphy/examples/api/README.md b/tools/Polygraphy/examples/api/README.md index f97c9a3f..b952e02e 100644 --- a/tools/Polygraphy/examples/api/README.md +++ b/tools/Polygraphy/examples/api/README.md @@ -3,6 +3,8 @@ This directory includes examples that use the Polygraphy Python API. For examples of the command-line tools, see the [cli](../cli/) directory instead. +You may find it useful to read the [Python API Overview](../../polygraphy/) prior +to looking at the API examples. ## Generating Your Own Examples @@ -26,7 +28,7 @@ will display something like this on `stdout`: ```py #!/usr/bin/env python3 # Template auto-generated by polygraphy [v0.31.0] on 01/01/20 at 10:10:10 -# Generation Command: /infrastructure/Polygraphy/bin/polygraphy run --gen - model.onnx --trt --onnxrt +# Generation Command: polygraphy run --gen - model.onnx --trt --onnxrt # This script compares model.onnx between TensorRT and ONNX Runtime from polygraphy.logger import G_LOGGER diff --git a/tools/Polygraphy/examples/cli/README.md b/tools/Polygraphy/examples/cli/README.md index 204bc0f9..a9780a36 100644 --- a/tools/Polygraphy/examples/cli/README.md +++ b/tools/Polygraphy/examples/cli/README.md @@ -3,64 +3,4 @@ This directory includes examples that use the Polygraphy CLI. For examples of the Python API, see the [api](../api/) directory instead. -## Common Topics - -This section covers some concepts that are common to multiple tools. - -### Using Custom Input Data - -For tools that require input data, such as `run`, Polygraphy currently -provides 2 ways to use custom input data: - -1. `--load-input-data`, which takes a path to a JSON file containing a `List[Dict[str, np.ndarray]]`. - This will cause Polygraphy to load the entire object into memory. - *NOTE: This may be impractical or even impossible if the data is very large.* - -2. `--data-loader-script`, which takes a path to a Python script that defines a `load_data` function - that returns a data loader. The data loader can be any iterable or generator that yields - `Dict[str, np.ndarray]`. By using a generator, we can avoid loading all the data - at once, and instead limit it to just a single input at a time. - - *TIP: If you have an existing script that already defines such a function, you do **not** need to create* - *a separate script just for the sake of `--data-loader-script`. You can simply use the existing script* - *and optionally use the `--data-loader-func-name` argument to specify the name of the function (if it's not `load_data`)* - -See [`run` example 05](run/05_comparing_with_custom_data) for details. - -### Defining A Custom TensorRT Network - -Many of the command-line tools involve creating TensorRT networks. In most cases, these -are created by parsing a model from a framework (generally in ONNX format). However, it -is also possible to define the TensorRT network manually using a Python script. - -1. To get started, generate a template script with: - - ```bash - polygraphy template trt-network -o define_network.py - ``` - - If you want to start from a model and modify the resulting TensorRT network instead - of creating one from scratch, simply provide the model as an argument to `template trt-network`. - For example, for an ONNX model: - - ```bash - polygraphy template trt-network -o define_network.py - ``` - -2. Once you've filled out the body of the `load_network` function in `define_network.py`, - you can use it in the tools by providing the script in place of a model argument. - For example: - - ```bash - polygraphy run define_network.py --model-type=trt-network-script --trt - ``` - -See [`run` example 04](run/04_defining_a_tensorrt_network_manually) for details. - - -### Defining a Custom TensorRT Builder Configuration - -Similar to defining custom TensorRT networks, it is possible to provide custom -TensorRT builder configurations on the command-line using a Python script. - -See [`run` example 04](run/04_defining_a_tensorrt_network_manually) for details. +You may find the [CLI User Guide](../../polygraphy/tools/) useful to navigate the CLI examples. diff --git a/tools/Polygraphy/examples/cli/convert/01_int8_calibration_in_tensorrt/README.md b/tools/Polygraphy/examples/cli/convert/01_int8_calibration_in_tensorrt/README.md index 7d6ac06a..a447d612 100644 --- a/tools/Polygraphy/examples/cli/convert/01_int8_calibration_in_tensorrt/README.md +++ b/tools/Polygraphy/examples/cli/convert/01_int8_calibration_in_tensorrt/README.md @@ -9,7 +9,7 @@ Polygraphy's included calibrator to easily run int8 calibration with TensorRT. But what if we wanted to do the same thing on the command-line? To do this, we need a way to supply custom input data to our command-line tools. -Polygraphy provides multiple ways to do so, which are detailed [here](../../). +Polygraphy provides multiple ways to do so, which are detailed [here](../../../cli#using-custom-input-data). In this example, we'll use a data loader script by defining a `load_data` function in a Python script called `data_loader.py` and then use `polygraphy convert` to build the TensorRT engine. diff --git a/tools/Polygraphy/examples/cli/debug/01_debugging_flaky_trt_tactics/README.md b/tools/Polygraphy/examples/cli/debug/01_debugging_flaky_trt_tactics/README.md index 19f6bbad..66455bb5 100644 --- a/tools/Polygraphy/examples/cli/debug/01_debugging_flaky_trt_tactics/README.md +++ b/tools/Polygraphy/examples/cli/debug/01_debugging_flaky_trt_tactics/README.md @@ -10,11 +10,12 @@ manifest as flaky failures. One approach to tackling the problem is to run the builder several times, saving tactic replay files from each run. Once we have a set of known-good and -known-bad tactics, we can compare them to determine which tactic(s) +known-bad tactics, we can compare them to determine which tactic is likely to be the source of error. -The `debug` subtool allows us to automate this process. +The `debug build` subtool allows us to automate this process. +For more details on how the `debug` tools work, see [here](polygraphy/tools/debug/). ## Running The Example @@ -27,29 +28,38 @@ For this example, we'll break the process down into 3 steps: --save-outputs golden.json ``` -2. Use `debug build` to repeatedly build TensorRT engines (in this case, for 2 iterations) +2. Use `debug build` to repeatedly build TensorRT engines (in this case, for 2 iterations, specified in `--until`) and compare results against the golden outputs, saving a tactic replay file each time: ```bash - polygraphy debug build identity.onnx --save-tactics replay.json \ + polygraphy debug build identity.onnx --fp16 --save-tactics replay.json \ --artifacts-dir replays --artifacts replay.json --until=2 \ --check polygraphy run polygraphy_debug.engine --trt --load-outputs golden.json ``` - `debug build` will build the engine and write it to a file called `polygraphy_debug.engine` in the - current directory. During each iteration, the engine saved during the previous iteration will be overwritten. + `debug build` will build the engine, in this case with FP16 mode enabled, + and write it to a file called `polygraphy_debug.engine` in the current directory. + During each iteration, the engine saved during the previous iteration will be overwritten. + + *TIP: `debug build` supports all the TensorRT builder configuration options supported by other tools,* + *like `convert` or `run`. See `polygraphy debug build -h` for details.* The `--save-tactics replay.json` option will write out a tactic replay file to `replay.json` for each iteration. + Since we want to sort these into `good` and `bad` replays, we let `debug build` manage - them by specifying them as `--artifacts`. If the `--check` command returns an exit status of 0, + them by specifying them as `--artifacts`. If the `--check` command succeeds, the run is considered `good` and the tactic replay will be moved to `replays/good`. Otherwise, it will be considered `bad` and the tactic replay will be moved to `replays/bad`. In our `--check` command, we compare our TensorRT results to the previously generated - golden outputs. + golden outputs. If the outputs don't match, the command will fail. + + *TIP: For finer control over what qualifies as a `--check` success/failure, you can use the* + *`--fail-regex`, `--fail-code`, and `--ignore-fail-code` options. See `polygraphy debug build -h` for details.* + *By default, only the status code is taken into consideration.* - NOTE: In this case, all the replay files should be copied into the `good` directory - it's - very unlikely that a simple identity model will fail. + *NOTE: In this case, all the replay files should be copied into the `good` directory - it's* + *very unlikely that a simple identity model will fail.* 3. Use `debug diff-tactics` to determine which tactics could be bad: @@ -57,9 +67,13 @@ For this example, we'll break the process down into 3 steps: polygraphy debug diff-tactics --dir replays ``` - NOTE: This last step should report that it could not determine potentially bad tactics since - our `bad` directory is empty at this point: + *NOTE: This last step should report that it could not determine potentially bad tactics since* + *our `bad` directory is empty at this point:* - [I] Loaded 2 good tactic replays. - [I] Loaded 0 bad tactic replays. - [I] Could not determine potentially bad tactics. Try generating more tactic replay files? + + ``` + [I] Loaded 2 good tactic replays. + [I] Loaded 0 bad tactic replays. + [I] Could not determine potentially bad tactics. Try generating more tactic replay files? + ``` + diff --git a/tools/Polygraphy/examples/cli/debug/02_reducing_failing_onnx_models/README.md b/tools/Polygraphy/examples/cli/debug/02_reducing_failing_onnx_models/README.md new file mode 100644 index 00000000..6a4c760a --- /dev/null +++ b/tools/Polygraphy/examples/cli/debug/02_reducing_failing_onnx_models/README.md @@ -0,0 +1,136 @@ +# Reducing Failing ONNX Models + +## Introduction + +When a model fails for any reason (for example, an accuracy issue in TensorRT) it is often +useful to reduce it to the smallest possible subgraph that triggers the failure. That makes +it easier to pinpoint the cause of the failure. + +One approach to doing so is to generate successively smaller subgraphs of the original ONNX model. +At each iteration, we can check whether the subgraph works or is still failing; once we have a working +subgraph, we know that the subgraph generated by the previous iteration is the smallest failing +subgraph. + +The `debug reduce` subtool allows us to automate this process. The tool offers multiple strategies to +reduce the model: `bisect` operates in O(log(N)) time, while `linear` operates in O(N) time but may +lead to smaller models. A good compromise is to use `bisect` on the original model, then further reduce +the result using `linear`. This is the approach that will be outlined in this example. + +For more details on how the `debug` tools work, see [here](polygraphy/tools/debug/). + + +## Running The Example + +For the sake of this example, we'll assume our model (`./model.onnx`) has accuracy issues +in TensorRT. Since the model actually does work in TensorRT (please report a bug if not!), +we'll outline the commands that you would normally run followed by commands you can run to +simulate a failure to get a feel for how the tool looks in practice. + +Our simulated failures will trigger whenever there's a `Mul` node in the model: + +![./model.png](./model.png) + +Hence, the final reduced model should contain just the `Mul` node (since the other nodes don't cause a failure). + +1. To start with, let's assume ONNX-Runtime gives us correct outputs. We'll start by generating golden + values for every tensor in the network. We'll also save the inputs we use: + + ```bash + polygraphy run model.onnx --onnxrt \ + --save-inputs inputs.json \ + --onnx-outputs mark all --save-outputs layerwise_golden.json + ``` + + Then we'll combine the inputs and layerwise outputs into a single layerwise inputs file + using the `data to-input` subtool (we'll see why this is necessary in the next step): + + ```bash + polygraphy data to-input inputs.json layerwise_golden.json -o layerwise_inputs.json + ``` + + +2. Next, we'll use `debug reduce` in `bisect` mode: + + ```bash + polygraphy debug reduce model.onnx -o initial_reduced.onnx --mode=bisect \ + --check polygraphy run polygraphy_debug.onnx --trt \ + --load-inputs layerwise_inputs.json --load-outputs layerwise_golden.json + ``` + + Similar to the other `debug` subtools, `debug reduce` generates an intermediate artifact each iteration + (`./polygraphy_debug.onnx` by default). In order for `debug reduce` to determine whether the model + fails or passes, we need to provide a `--check` command. Since we're looking into an accuracy issue, + we can use `polygraphy run` to compare against our golden outputs from before. + + *NOTE: We must provide the layerwise inputs file, since otherwise, `polygraphy run`* + *would generate new inputs for the subgraph tensors, which may not match the values those tensors* + *had when we generated our golden data. An alternative approach is to run the reference implementation* + *(ONNX-Runtime here) during each iteration of `debug reduce` rather than ahead of time.* + + Sometimes, the reduced model may fail in a different way than the original; generally, we're more interested + in the original failure, and so we would like to ignore other types of failures. The `debug` subtools provide + multiple ways of doing so. For example, with `--fail-regex` set, `debug reduce` counts a failure only + when some part of the output (on either `stdout` or `stderr`) from the `--check` command matches the specified regular expression(s). + + Finally, the reduced model will be written to `initial_reduced.onnx`, as specified by the `-o` option. + + *TIP: It can also be useful to write out the first passing subgraph to compare it to the reduced failing model.* + *To do so, you can use `--min-good `.* + + **To Simulate A Failure:** We can use `polygraphy inspect model` in conjunction with `--fail-regex` to trigger + a failure whenever the model contains a `Mul` node: + + ```bash + polygraphy debug reduce model.onnx -o initial_reduced.onnx --mode=bisect \ + --fail-regex "Op: Mul" \ + --check polygraphy inspect model polygraphy_debug.onnx --mode=basic + ``` + +3. **[Optional]** As a sanity check, we can inspect our reduced model to ensure that it does contain the `Mul` node: + + ```bash + polygraphy inspect model initial_reduced.onnx --mode=basic + ``` + +4. Since we used `bisect` mode in the previous step, the model may not be as minimal as it could be. + To further refine it, we'll run `debug reduce` again in `linear` mode: + + ```bash + polygraphy debug reduce initial_reduced.onnx -o final_reduced.onnx --mode=linear \ + --check polygraphy run polygraphy_debug.onnx --trt \ + --load-inputs layerwise_inputs.json --load-outputs layerwise_golden.json + ``` + + **To Simulate A Failure:** We'll use the same technique as before: + + ```bash + polygraphy debug reduce initial_reduced.onnx -o final_reduced.onnx --mode=linear \ + --fail-regex "Op: Mul" \ + --check polygraphy inspect model polygraphy_debug.onnx --mode=basic + ``` + +5. **[Optional]** At this stage, `final_reduced.onnx` should contain just the failing node - the `Mul`. + We can verify this with `inspect model`: + + ```bash + polygraphy inspect model final_reduced.onnx --mode=basic + ``` + + +## A Note On Models With Dynamic Input Shapes + +For models with dynamic input shapes, we may not always know the shapes of intermediate +tensors in the model. Thus, when we check subgraphs, our `--check` command may end up using +incorrect tensor shapes. + +To get around this, you can specify fixed shapes to use for the inputs of the original model +with `--model-input-shapes`. `debug reduce` will use ONNX shape inference to infer the shapes +of the intermediate tensors based on these. The `polygraphy_debug.onnx` subgraph generated during +each iteration will then have fixed shapes. + +In case there's a problem with ONNX shape inference, you can set `--force-fallback-shape-inference` +to infer shapes using ONNX-Runtime instead. + +Alternatively, you can use `--no-reduce-inputs` so that the model inputs are not modified. +The `polygraphy_debug.onnx` subgraph generated during each iteration will always use the inputs +of the original model; only layers from the end will be removed. diff --git a/tools/Polygraphy/examples/cli/debug/02_reducing_failing_onnx_models/model.onnx b/tools/Polygraphy/examples/cli/debug/02_reducing_failing_onnx_models/model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..27cc8f3b4b91b880a4ff2bdf7a42eabd71e9c1fc GIT binary patch literal 1204476 zcmeIwKWYL&6bE3(4LD31S;!v(Ng-BN+NKGyi;x2(jVK0F7!4*1TY3jil$-1ka)fLY zOAjD?FmK?!$29Z%V(b3*=P6yJFiFjFkheaH#duYGce3YVk#5qVPhpz3CbQXo_c)u? zPnu;v9cFoW>16ddzgjJq<#@iHe7=7b>-pPixo>*gsvAShVioGs4#O}RZK`ggv3XGb zfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_ z1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;= zV1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~ z0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz z7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|Xg zfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_ z1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;= zV1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~ z0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz z7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|Xg zfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_ z1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;= zV1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~ z0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz z7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|Xg zfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_ z1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;= zV1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~ z0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz z7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|Xg zfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_ z1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;= zV1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~ z0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz z7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|Xg zfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_ z1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;= zV1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~ z0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz z7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|Xg zfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_ z1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;= zV1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~ z0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz z7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|Xg zfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_ z1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;= zV1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~ z0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz z7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|Xg zfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_ z1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;= zV1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~ z0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz z7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|Xg zfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_ z1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;= zV1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~ z0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz z7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|Xg zfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_ z1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;= zV1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~ z0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz z7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|Xg zfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_1{h#~0R|XgfB^;=V1NMz7+`<_ z1{gSf1HEn4jUi^S3iWA+VVI6KRadm9AFT{!$N&QjFu(u<3^2d|0}L?000Rs#zyJdb zFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?0 z00Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u< z3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs# zzyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d| z0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdb zFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?0 z00Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u< z3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs# zzyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d| z0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdb zFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?0 z00Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u< z3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs# zzyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d| z0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdb zFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?0 z00Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u< z3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs# zzyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d| z0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdb zFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?0 z00Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u< z3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs# zzyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d| z0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdb zFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?0 z00Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u< z3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs# zzyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d| z0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdb zFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?0 z00Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u< z3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs# zzyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d| z0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdb zFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?0 z00Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u< z3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs# zzyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d| z0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdb zFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?0 z00Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u< z3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs# zzyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d| z0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdb zFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?0 z00Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u< z3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs# zzyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d| z0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdb zFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?0 z00Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u< z3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs# zzyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d| z0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdb zFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|1OEnI?^3hu=hx{nhCENr80yMW7OSwk VcKgBc{q#14m*eZ(8?K}viHbyk1Oo$uDlH|Z0s{jFf`Ne*d4mAGk~ROS z7W($u>67&5H_+z&#v}~-9p718(^=Kd%-PM*(G|Udhqv=eN`~gP zr`ZUiQ{|{(VPU;>{|z{xMZ0vTDJ~9gYHG@zrJ$eUsDKdoDzZ0V=4-FTbV~HgtU6y3 zRlEyIN+LEkHav4@P1p?C3ErZhTo{@beO6OjcLdvmb=+I^hK7e}nVD%A7*I-Q_7DDu z{cQzFtazyI^sT6*>;x>IJ$Z&F&Q%&Ozm=5GAt56>fOTMkLPCt{znq;nJEsV1sH>~! z=*U4=Lq$dPSzUb+>}0N=LiKXLY9Pu&T?7H-dw#m4azeJ$^p!2j zudXNMRc$^m8(6WWy+KDWGVYH_ajpv9uLrHKuRHHgzZDV^f-aE9{q%d~FeV1H#jAJb zpk;lsJRz0I2@<(@3z#8ceeJfpC{nG1@mo*c%uixsuVJ>gw-b|+ZX3z}j)nog0N>D>!|9(0NhHxG>dm3%=}TgTpea+LMCH~B zHAaAH&kjg=Jo6mPf7NZ_d{$OgZtiPv+8s-IusB*s9X0Ff>iS4a+qrK4j)-XKYo)kY z&-DnUz%_~LCfW6gBFkfb-s0{iiC6FfXmRj9XOO?O;OEu*A?W4=#vz-K94_;Z~ z_^6y~B-&*~K|$vj*(wy{#fM^)0~3UfBj+k1H-X_*-GP#2+m5l{__g>#9Whkze%ht4c%c_nUqv3J;0WQuaz3qQmq*>C7_AQXSRuf30BHe*!UTatB`fa=mtHgL3C7SCco?^BAS;@ zyQbds;x% zI)^x?i0x#Vi0P=DQe2EF0E-6hI9x=BaN%3_TdsF{+p+C$b;Kd_safL_eN2tw>kZ;F zQH~)YAq`C!kSVQl^-KjCrj$qu$(VPTm>UNPVq#xt`DW2(lo>pyE%H)+Bpkr=q8J~d z&U8*qE}(r56?R&p+!5qUmI832Zvz$a;vJ;rB4VhmX8{E}jhzP6KLg*_c1l0F39Yec zf!XN36-Z)*MW%O8G28vbEz1HC`u82>2eI!FX@R2Zz3~3WuMSEwpRL`XYYXN6PSZ{g`;VmEPQJWVbHh<<0Z^YP?)~o(jx6Bc}ba-QrQ#+fTY@tXP-tr-o>-6rtc+2A` z>hYXju8A%t$O3?`tuRyTJ?5ND!iM$un|`E~G2!Osqe}ZQ#od@RN^PL=6s*~pu0k6j z16(1C+m1z^A4j$!y?J%X!$|t*nX1P*m@*H6(!h_a()IIVT2|jkO6{1+&kW*7b2h7T z!r?f}7G7h#3Al|aCM|_NdcdInqVFhVGW9kEb-IObr_H?mDkVpjs&XYPFzx4xa;gLH zvJZ=9)fliN%q4Vvf(VsrIBgl9a@M02>AhC7Z=LWkDSET2ci6A7x8wB<4F^WMtJ3qOn^=*~V?DOKI*Euv-gXd3>LlV5 zX)(x*^Wn!WZ80gBi}Iv;QNNqZU7u?7)9K7ZMPm`e#MN-4t4X7`yB8QA^G-H57aWdq zm>9K}H{eVi_ncqKrPo*tfI0ol-T6-XQyK99&F1620rJc%mpx7{%{vU-DSN-d#q;Ud z$mID%)wzp|2{8`rS-5gBiG^wyTSY85^L}^g(Vv2=wSdCUvBH~X&nu6OtiaP>)`c;I zpFSgU**@0NlBck$_WPnVRp90p(#5>DrtnhaQS%iML&N)Ijr)A=W{+k=VOkHXrl!_W zEIf~sI$GpA@w234vPl~wzK;do5EaH(XMFOcOio^oe6P?u#Ki@8-p8AXE{1_`LWW}R zT7~&~?PK20<2SjWFml9S9B)eD3G_zx`WyycX;y18T*hd6(Znf1=->e0Xk^a@!-t)5 zhg4oysv4}d0i(w&UIF~Pw8T4R>Y2;~VFXfdz1Ka4dob?A*zf0#phuX@++~zlE)%KB z>~5JO1MH?5e)zRNAfu&>5nJTwi*~I*dQdvbiG39kH;3hdTo*w0Mq{))fC3;v6H{WynNB~wkcEl+f6vU3)Bt+qsnj)Oy*V*3dX zp8Zx_&b-L|ZiE9Tc#4_Ce*`lnzq(?9+!osti;A)Z8S(9RfoDU`Y%AeL^QmW1iV6k; zqU~nW$$Ja}zv@&Y!s?igzH9aQrtV&#d*u`(kGR#zYzF)wvHJZMohV6q8glj+4=#SD~369x2mE z(vFI)CXK|Qt*rn>6>hZ@rnrI=$<$AG;%j>+?DGr%t`KNvzSN54es>FBHpIus;&rZd zeJA(yAi`vONGVSwUXXitTV^9}5N0wjAU&^$*8T+{%G|mPpUyQ7s10J~(qfL;yxMY* zbNA*w7yP+Gd)o!RU9a^R@ic_L)dMISQzuLpTc-G`!msX=+*`#L zFrXP9obi#L5J2lS^TkYAc&I9b$yLh`=x2~*Ke+tm;$iDy?% zQMf}fSCBDlBoHv5EoLjpnsMdN{v;+BN;Ps(6?FAaLLr7fR0iN10ET+2 zUq@5tYEZ^=)7~)!ZlBD6$IIuUyVpC8>W$jz(7b|yc`XdH<#MHN(*ClY%XKoL#C$2ya^=J6L9uS1qt(+5Vxw@8N!#!zTBNeRZQUg?G(!p1!E zn0RMO?9&MO`ILZFu^n6_jI(~sBJ8`?tUo$CV$om-@CO(4@-PU^J$jE*{Zjb69NmPr zW!#A(4&LI~2#bj5iz1ZN!;xLhSiLz93med?ClvH{$2y^=#O+$QS5i^=q^XIgURp0c zhL7n)qe>l{tN+IS_eKz;tLqaT9UU~-o=expusokF*Os)lE~^}V2aT-}EVddLk@kA! z2i*WWcX~nWJA%;7_qUZtb=hcRIl_%USdu#&eXAk%PFI$AI11c5JZO4J8N`^#gCVkC z-UO2RWz)_D+cS8P`MK{Q4}FbI`|UFBcb3zRQt4NANa*P3YF~EaB=}^F9DLOr-n?v% z`!}_Mrc30`^xtK_;On>E3u+I2fAr_`FtCt1!k9{OV>|1!=T7KGJefvqHT7;YU_zb* z{TlEhjeN;->UfbrDNQw%?z@kSi5Q=}hXi^M;R^MlJbqJm$@dRvB@W?IwK1u@tg;7$ zZL9sQrgCNdk|&*{*kEaaPU7yth^N zm&PS?tN+rrur3=b3_(Y7w=1jhYT}$f4r8ou2uI+qd$Am!QvP+R^FRWXi_EdgwY1_E z8uQ(p9-%{DZu5?>KOqQpzy-dy6MvZn+m_iQ3%uRNdVzFWwV5C6ttWPQ2NCBXq6oV2 zI-a;N`M1nN+9kb>PZgx&slL?wUV__!ZIoq4lbsBHop6!nR zVY|)DZL??m_YR|BVfi(;9TukZ`Czu3f|GN0&J;S86db$b|D6rnAhQxg!+iey`KB>p zN6ks#>FG(Z;y*je90|l&K)uwoY)h&!>AkNPnUm9&_)m{+iBeyrMm;}Q&dHjZ{)jEL zBwOp9s za4EyVIXR@Aot$ih9O4QHK?pC3Mv7ZfWSa2$^3uX@|Iq22)@0&#ULTkBeX|G zM&|LpIi(Vc(ALtDHZ&v)MI-JyxWptPiW)ULchUMMVHi9g-Hz>_)N44nxSP(rw2X{D z>guq8gFzD(Ehes{6(uEI+m7q5&1asi_2Z$z!G`r;d^FQuR4L_y1QOiX-R*y`p(FC& zC;>kSI!WwqLT1U~?rvzW5g7wR*;RJyzsXXNeqfU#qo7biZ+hjUrE1Yfh&3nl4rrN} zb_U`pq*yScrcL0B$~h>4*lCl9cJG$<=6{X&@ue_4UfVqI?H z%crpa1t7xlf|Bg6?e0nn3&R{O)}Oo3l9Q)oVbLhqKir%lFFI?y_%-s~?*{bb8+_0n z6dyjU1uW9NfGU}R0Z_ojPWSh?VX*c;&01b!+x`=G!OT>M3UsNHHN2>XLnpqYr%Y=r z^z z-;-o1Vs@C-^>Hx?642R@-l{>Q%cm0*BacBT$ezW++}^8g*V-rR=N*!G^J}r26{xpS zzZy;1Wxr1)aP09dg}OiUxv?0L)=AHJ=cD%M;BSXWLx-y%b#s~hDYg(uLd z&C~6bvxLrkVLsH{#z_3VmjYA#5BnA179T{S(ae4$h{;E znkZ#{(Q|fUBg+Vp>7y)j-1G2i!V=$`+P5*sL*#Q<;yNcRBgfwAt%9M`@@T)=(#@ag zgX!T$l(NMak$MD#36YtY6D>r`3HJ9Jh!g6W( zZ|CJ0n8ys(UD08Vj8Ascny*)gC@G z7TtLN@VKt|EktUF0Yhy>lHM&gMaM~O7cz{40e)?{r4Bu9UvyUP6#<4_ue04L5D%D( z{v5ozWpMW<+AL)HAmmG@Dya!Wuj?h;_uQJ|b&@smQ2AIL+N^b{f9-hUGo`gH6g1G{ zAxGlJ9P9U+dheL>5C$t zA*>uxz#kZJiL6zHfN9rEGCXL82uQBEZlaZo0Lr9ebrz5Z>2%pUvWsvf^S`(!OAT8=__#6-ScF zeJV9X{uGfv@X_vi`)3d)7vI;c@G_t80BeW84O;i1ayM+NOn1gC!|WVO%Vj+x*|psw zO-VTSPUfx)FA&MaV+4^tWFP&w%we7f<0^`wdrr%72-YM8+nG+8KZqe9aQ&L#iS?R) zdTO>likGY500s{uDexVe95l3w>A1+dq&Db_%H1EC?9Ql5pJ+=METoR56wT0*PbPnS zIZG#MGq>~hv+9?P%eEsSi--m=0Gtp3_V5AXb9bXZCfOb|h+mEW1YbYa>KNpV9g$WU zIH4c=eZ)8z=eLcB%8dmlPOjl0xN>EkElx&jFL-UkAZT{r2>D;F+U!8mZ$}GDZ%qS2 zN5%=WF&W7x)&EV34j#PqM2x5COE7>hdmGidG8A*DLjWqO#;<+5B$;L2@?-(V@|ycN z;?MJI=~tv5r=stxU_+m%pdBD2^|#H^AHHO22R;yLBl|L#NY zp|b^Vy|EaRp;B4RKl|9^wfg2y`+Y07vz<-i!-ms3(9;QNx$Q{eCX#%`rLE4;&w9hN za=$C&ahBV-Lo4V!KTI!l1== z&Ao>a3x8d&uZyD>yG(Q7OZ!CB^Y)$>RZ7C`$;lqT42gmvL#z2{8qY?|nRLkmLVF6e zWQCTXe}>4G^I=<$bJ}aHGo*doqkE_(#t&t#$@gyxl3L+fxeVWr`Q zmS>q8KX-hhG7jgX%l4psQ_2N8=UG=GHp0`roi(rHzROtUZ1v@FQMl$MdGpu6vhc7| zB^kG@HU$l_f+I^VsAt1!WBQb?VQtL-HJ#RwhUfs;M`LR8NuB?WHREv1s&!gWi^asY zy127;R}XZj68_6{t2VxWvFGvXiZ6=!Ya;~y-!^`1z31&wD=qnkEbg6?=1adVA|0j_ zSZFL0o5}3}n=>xLdHPp*`mn>tusd^ zbGvtI*On4sg>JgbL|`mNeocwqWwz9x4lg@UVPQS-;`39Og0(H*hv?dvnZ`x zJbkKA*qF^-Ut`EE_!%by435caa=1iY4y!8!Hhe`dl59erq|J74L(SLz@R@`*x9Vqt zKbqmZC)!%yfIUOxD$RMv;DtE~%~`esN)3LenuNoI;hV#-a#_2LX6JmTwR?sI$E(J# zdmIVX?Y~3(@>jk|%ti?b0Q&&coUe_OB5MMJiEPbtjf?E_LaDJ9Ea5GO9UZcdbaFKS zI}IZ9Bf_dr{x6w@W4LN#lIO#b2H@sSO+`mZosaix z9jcLz4Eg#a!ms;b0dGhuM(?EeAHT)!P8+L%C7&9*_IG)b%iTW(IT3W`BBP&&%6;4Z zu-w822l=#zbLgLRYrS=mG;Y=uIm8}M)y}IXqAM%|M#o9CB1iwk$L+mE z{K#%&=p(@yOru&>ElYm=@bz7TULgK2+LRGd)XaXOu_}mNL6JUd?>48kh&=5Vv4Z%K zw{kwFEi9@nADku^9sIE<5rs4?^GBJ zMyNu&{t?DmfKOVR`&LyI@+7gXIF13rBvn_qDxzO-*s>On4AU_=CT8&Sc~yE^VC~|5@FUiWuhT2b99zTH2`5m!4oAot`U+I= zE^rk>%eIY?p`=8TEk{e0T(04TZg?{kX+K=hgMi$}h9oj1V(!aTc%B@_#~B?0(yf*s zRpw5X{3p=ud3b!=8hj;Ce1(5RZe{~`$@v{9&kPmXbXyaaYFlX-bpaAl5)^A)+q_N_ z<$jeJcnjm79A;(O=ACAxhsntqJET{ViT-H2x3+_2*(cUT*e|&Hla;ET;GWJV)Mdt3 zr)d~iY5AY~tz?Do_~iV&qWRNg+^uG@gD~64xpiOOuqq5EEiav22g(`O3CV zJ8C-fJVL`!`-TOJ#|TN46i+4*KB8tS?fDP6W7a#LYwN=3ri2fGMDHv9+L-ieN`=kQ zSGG&w$kZpZx3mCi);b{m61ZSAwYwxF*_l11yLo=`w=NQKel37KSAz)G)P7+TGrWNY8Vs0k=A@qzQGoAJ73Bl9kch3dq6LM=juV>*-VOK#a&iP{xruwHDA zpZPi4*=pv@R5UADDAof*9bA;&YA4&}YGz1<5B#9)kTDU)Ij0_>8BXu1^^;%!9jR!4 zf`cXMdrt?njilZ@@zCt1aA5Gw93Qh_?n-{Wj+VRSw8mHYUQS$hC2JLwT;9X@b zjJR{h4l?f=963*f9iLs_xGS{4zsxU|cJc^&cka&>+n}7jw$`omAJZY~qQctnXfKNNH>c^J` zO}6At)RdS623=yb5RH~jdQ#i|1&7y)Ph`Qb$f)HIxKmrh+FP3v0!U~_Jm~^V$6KX# z&aB*>)sg!3{F3W*X~t*0_?ZP}vieN_Jl3{pMq5029fgEN-aoYu+ z2-eB!XS}}0Ji-vsg)+_f(RHUFKI0UTC_=tBknm4zDL$W$Ez>5SOL9?;H%!OxP_A@- zsgL2dLpAKUS=ndr0v*aKMLm{!?)_p8p;5Q}j@jwVH)Ns!T#U0|QJyWFG*&{G%&nar zER)(C*68_as%SN@d?`GGPTmhO11)L=C(Z%QVNvh38jUx?IWw&f1&*Fw?iZ3peOIvy z1^Cms>De_JdYe-&c5^nWX-GG#;o(sl&pAEu#wKTfWi;nV6+SjC@N=HAnwdAy+@3Y> z*}yniyw3H){WDL2<-T>Bq$%-( zb5GMhYzI@);Y}4?EzU4G2?qF-3^s&CF!0$QHop4^8 z`hOVH_RcE4qIV62}Pf5Ts_}#TxwlW3;U0=wU0~a z?IEdG5Uu(`7<3XO^>Sk`dlU`G0oOUUS?%NZ=&-S()~6e$hGR#g<~+cf;oNg!&22=7GkBZMoMsTq{tx^h=L8#`b#jCk3^LMmBcP*05EQdRa%ipjY{ zA@D{U6W`g3Es2zg2f)I)b{cz|h}wWLSqvaM%y*5NYA5XW15?X{A2ac*#<|Z6*`r1k$Yc^JY|b6E1{4KP3`w@corv;l}a7V z*=LIVIUY;N13}MH1*ehi=f3I-;+6PL+~LOkEZ>1CKKPMZO+Ri%EGK>AFCuIFC%USn zpF67nynB7ov5$yc$6ABKd#l;z>Ge=-MT>W#daVPCBi&H>e(h*h0cj(qX?-s9+nrJN-+nKRXB!TlH`*G*t=TU9|E6syP0SXwZBq&9Dcy;kNM$v zhS`v5UW=CB`^lg5Gl^T)BH29N*mu5uOuO@%lSpU56&1N3tC**R{Xc)lN1ZoHX{(u= z)Tr`b!#9PsaZ5H&mv-xL!ieO?O&A+= z)NXEM;)t>O59wz`yJ4&-HKJ#Q7a@U+!fHJCZNr6=-glt-_^O{4U5pA>Hmjn8isr3p z@nHj06xOHgdaVl)3#~b2*ZO9Au`se}*XVpxzSlU$j7oKkSsr#K>$V1pCc7a&%9=a~ z{Ct@4uiLgCj#icV((5H`#Olr!9>+XEWd_}(6~_MNa6FBo)zBPSzvRrTsJ_j z2^mRHH>l4>yrBHAfSsP|VmAV09EG$8D}d`GYH`wc?FMq6s1Sz~>+9#cSd(#7)Xn38 z)uMK`M2pCNbhJw;esni#Bs;<)WM00S4s zn|`p|X)GkD2knO+UZP34$te16YNK{r3FXvNRw zu*ELfiw~8Bx-z0jvjw$##rGb2(P!>U!V6zwW>LWigm8A!W@`;cCObzPff~tYTb1U2 zW(AITuBOZ;^>6)k^?a||u99=F0gH7pVEo=h&sEdg%W;69C2_6Bba>TDYuUXUZN4P2 z$;6%PWuX{3`RG4-*-g_MOTFsqYG__sWdq?vLQ-f~92qx=G8@^<|Lwz&0ZkL9+jYms zjmVykX;EsMKhpC=O~Ku0|Hi`!XxDf7p!d*DNNYR7ujRyUOhPPfXN(`*jIQJ>5uJo> zt0nEK>_Q9&4Hcs?k`f;pLpLG-ZbUq7TtJT1EMvHj#_WTNF?qYWaw`g@ShmbxMh%*u z$okMxx?L>L@6o_xOEf>caq;!D53WRLR-c*fHERvRyUL}o%Ctc-^YfbeDHDBZ_-=TVwrt5jYza)9gzMk>_T_(|9U1n=Fj_A) zp_>~AI^Dnf6y>k~X5Hu08=-s=s*sbpINi^0eaV6}VWNUuPUJP=mXp*>y1`vGPB%Op zaLM2I^-CW3E(_d>5bo#&NBWp~o?9Kn#l8e1NEkw41SWq8TgZ1aLhyTxcu#gmFOSIq zC{}!;J1|gg3s3nE!v6pA@OJGtg4FH9!)_*_NxJ$A2*HDxGW^3Iiat}&C}ehQ^Y%0H zAN;a?0l|0IE{=Z_%WFfwfu0OO^GXbfpsshAV zPOFP;Va{>{w~vv=K1OQExwD4ethSCu9mr@3dhsIHWcg<$df2h8WOrstpASf9c9trl zbbduxQXoR~HF&J~jN_Xe>ecaiB|E8SqF3>}htI=5c%t@cFmaw?M}ARQW&iUAN)hfF zrSaJ79b78b?docjBh&s3*SK~ko^x|%^10=W6R1k1@}Wgh)MEVBa@oHtEsx_#m>8F% z7+mS7hCx;bVB2W+HXl(E)|MzV zmh;K`nApT*XR`s#aPi=ID=7^qwQp+a?FZcog1^WqKu~5*qYe|ajHFiyqEhf!aO5U} z@<&k+;QC{T^BWuI=cwvEB=Rw!C93vzp_hsd5Ba17c6s*rf$sBAomZiOOST99acBD< zBpBNWPR6Z3IU1dr`C6Vq&m;WoLHyx)Z6F?}J%cJ@kKyy}ce&|RXF5csd$#ROg!i1*8>Zw&WRZJe51Ihu~ z7^>D*CSZ9HpU;WFVOyxGo#+sb{1uJ3eA+$~aGdN)7qD#7VL~I~R^of)hBAEo{b3#+ z>RL1wIVL}Ua6xR8v9~B6P0m{%w*wF_&QG#M9X`^FgM|DKKVa%f$z`Y?Z@b8=CNBkC|W8j)4bjtnqyHh z)}qG@ty$Jv2?hoPu&dCg5JL;4Q+r!I;aIN`5HLwdj-fQmg0%nk0%XvAf@(2K_9Msl z?rr5<{ot zRUwCLqf7JhU;Z%^dR>MUFNjG$K`c_usChBWI&!0!wy~r_Xx}lkH-rwBK4XxnI_`r~#CtyOaw+ci?1fnxJ2!G-1crI$n^FG7d9P(nd;kaLv* z?9ytP!R0HK>((U5Jr@f(KJeHN1_2;ZK!&1~?GO=Gpw`4N}>o#SE~D4Lw}T|}GCNc$B} zsXjzRMD4H6q0@h}w?ZvJ`#DA-N&Dp-L!SuN=~)|Ge5^3=EfAlhjK0yW=1w1CpGew^ zsu@sjG4mhqd&9pp8`aiCpQ%(G96P)9jVvO=F0vf8VwO;nbslM~QsZ>cw_~pdE|qac zHT_?`Ve8M%roC045f6{!L0R_KmggzOd6idJxo#|ta0X#U(*v3#*zsqTmG-d5o02?8 z?<{zX@BZzmFdbiSB3t>_@K9#!F>oEHAJ@BPGjymgH>pA)98e; zdy7BSn_-@l$p6v;9BC-zLkXg-fcp~1IC!Dv!Rg9t^rF?0=sKDt;5`jQ@G&Saow{GQ z@f9q?YlnT88M*Hr|NiT!b!OAZk@^PdwY}3tzkd@2h$My>@lDsF2t@8bb$JDEen+cZ9~gf^P|@+ZxOerw)z^)~rzpjFb((v} zB~<|hs|JIL1?3@au?aFR1G4>+Y@u2-0CkRCIo02DMou1WdU3QDs1Txxo*i)~nMs)P zXRx6H1#M_EKgr_~-E(=y$`;j(;gwQWK$i7F3)XdgwX=Nsfaq%LfLBK3R~mXQZnXs` z>>|Q9br>^qu=;$27Joys6abXz4?Wlxa}`;YvQXc3`1gEo_Q$HKsky8)w4s6!utdx{ z4t9Q@O~ruS{v6k;dVo4{n;b|H8K?soI@b5iiLIVG+ilak8})p^o9LBJrBmTkM}5`} znK}@!9iE0md%(>XKCWlOR}(pH19hN=HQVpSP}`uB{nrV5znG!duk8=#tIr!Q&(a|u zpgBd&mk`J zbzzd_++F9kg*{P*{~PxdOLnS@^?;ZzcJ2R})&0SU$VeW?tv672adUsqRnHgCx&3=0 zAL?1HISKzmLP$_`0u{2CZDDs1H7GxcH$J>KGLq7h~nwM^0aHDFFpvwEhAEhzg&!(u zShs34C9KFd`e@S57asI_iv>4i=APH;?ONtjtMS(oaHSxHP^bn0r=3MH1|h1`6;_l*u9N$M4sTpV(&+V;fq-t z6e?i(&=WKWgIJ4u2GY=|nKLHe$<^Xde;3hvXm*m$<$m^f#BH^Rn9(r~E!Ak#HuSzT z2Cea=$}jpoM~?&wTcFUYQnTerexz1>m3GoH&^7wwqU{9!DZgJP`+iC$;uXupeH#yy zJVAaQEOgqp9?vgCAN=dmDNUa@j%3G3{b`)AU>lXP?irxtwhZ%0NL$)+s!~U6DnQ0`l92NZF!vQm7W_f)v6*G}_IL@u1xn|E zEs3CIdgUZZn8m@NCARo6`fssc3Nmpf(}aMnZ1P)DhYP^dyo^okXs~C3NMulDe6-f% zFhBLME7%rU-kgcjm|Ujcylc3c2cH zO!!Y?t8Cdl6)<{N-cRVWhnAj|FRY14iFy3Tc}-0=;NFV#hz~>XC17%3IaBmkw=^Zg zn5pDvOi|vgxQ15zLQ1ZVqqEQBkp~%F`P~8xo=>!FvR*8{V?b1pTgKPj2@n3vbX5&C znVo`%_i)|A+TfTR0 zjv3%NGgJ^sY$)_Ohn+)}f|+yAOYI=&DXdyz39sH!8~LIe)SWRjx$czad_ZUBn>h4P zYQ|#Pi5WomNAvQIh?ASykyWDOpsJ12a62fe^_(g_EA*u*bt#Eq`I1tx?E$Az zKuN8m208ofk;xRq?w9e2n7D$qBjlsYOC_#{7d?=OF42=+0vfwvvNr z-HbV^t9vw>b%-U9;tlQy{Wu4KZ1_Di$n(2n&mK#-+>Kq%dYhnrKF8{%?k|5-(Aby? zeF(>ZEsX`Q?z?CKQ*3|W+(EBdEd~+M;en%A!Q1~Q2>^$y_dN!+o z`tPy4U5DIuP3Z3@7GM73#_K#&6dRA&GDCCZa@%L-f~A1B!A_4sG>vv~f@yyp9=XcV zpEn?1&X85s04B*l15P(hc$PIKpW0Rtee4KCKZR9B{*7~>k#i?i-OlVllCj@#!9l<) zduz=O{gk8p8hpc=lpp2cBVmwBZT?#gXp!}vy&b6Zqe*C^=He7AX({lVjfkr6-K z_XFTa$nW3t)LD;S?l6aiHIMT61rcB-W`0%v7*|PvSC4v_ApB>GmeR#;e=r~%-&cTj zcJuI_DYCNRcngNpC}g?68+2*YqmN;&BV>p)7_Nb-`2@FW{TM^2e{}%6;(pT`{k9-S(ZN?P0Iw!LjQT z>U4iVz{^nuw7Z3{+g=|ze8TR9mD{z_1dmf__&{1oXL2N|@6a+{zT~OqFmCh)kn~`+Rhj$9HBs?UcbuNbG0C@wB{}+6(W4=w$jV^*!{MQd!Umd zYi}tC@GX#e^$wt~rk+hueQ_fDM-@H6Ud^xKVNSvIsSIP?sO{G1J<)1cipTq+3iB>i z%Ik1*`%1OL;qnLkqkW`;6Ps%XtCT>K?biwRE}IpNj@J^i z0zQ?I{Y@UkX#NzCK^?-LzLVMN+MTI)^C^kN=#xU6*{R!*!4mHB^RYRgqdBa1hPa+H ztTJkrkl%Y!+r#1ReB0cUY-iW(l~5?}rPsCh4N2=%&mVYt+S7(#Jn{l~w>LmaeL|6+ zozYH^=7)7=`aS+VDl_T@n2&mA42rF_rNKx|!i8dm%-#uBC@??D9;b|3sz>L<>(O%C zGY75Edp`@^Sn`ZNaYpMhUS3j9M(7Z%&sGwi%>`-rdE1lTFS_Mt^h$wlUUvaBl;;O* z11s^n83l;Sd>&;Z6w#B_NTR+`U)ji+UoQGwq@7|*iwQh7KkK!Q;p*!Lb0>yDWE222Z~o<4 zpE4zsw&hN-`fx75#wR4zHG{gevi7M0ToYHxw(sXkhupT?Pcb(IetaHZdF&V0EsT8j z8yq3T>%%}usTL0J#kNwROtK&gmS^y*qW9%o93Zu%I*{qF=f14oEE>BiU$POf%BtsY zDzq#N&a%Lj#$_0!ly<;awiFK($DYj1oUp`8CB1osQxX&YjQ#82*8R}frE&Gzr*#!! zgk$beG;|D^0Tl*q2=;i_^-@sRu$3$f2VEMvbiBJf6uJa=`w`cEZC4=TQogLibk z@~tJOc(++XqPu@hd$P+QyDM4HySyo9G8AGIzKY${Zt6H9EU}qB6gI)yJa?XI*c})A zk%td4!SuVBlT>igtV$;Wp9Khb^K)F$ApdjX&>rQ?Av`RIK64S<`Nv zsEvuf!&||7J=Y{8dAzUlrukgW6Np|NYb9*DPC z^zKTSz*4pUT_K#+rLW_vp1683@tq{A&x&ymI^g?sTJa4VkZbO}fgo;Z@=qGSVLM-f zfO-HaQ5{4dk(FKO@*by#k}=&sKaJrgmSM#I(vBH$4q#MPWHMckljSEQvo#Vh`ciXu zr7U)3_uypefOD(QRsNmm-Qdqt`jPf}32MQ2-+p|Ed;r>TR4LsYAC#@Ch}E|S1}?M) zgy(SNfNL?|+GIcO|1iJkwq$U3{6eT&H1qq8@C_Ol=5=IfBh7%wZA<`;nl?vZ5&zgj zomNj)24)#Gw=2&16e&${_r03&n@KoMg&F71g@@m>m=X#GC6>InfRFXjoF@-8Ct!G+ zci+C)Yky70gj~JGEYJ{bg&$5sHb#H4WOMkIWWAiJ=;9;k19NF=y;?&mK^5<=J$_Rr z@rJBtRQy(%e4J6gGvYa0_JVjytI3;5Sw4plQSOeh6t^euLh-|6yLy%95$|ldUS~oP zH9!n9FTrQRhMdyU8KG9Hbtc36w$q<3Xe&j%b}P3knF6ELfFmPY@dJ!qRI#j9t0#vY zKYldqn!~yyFC<#ZEY57+AOIM+MhGsktyzu-qdtt2l$CVr2Va{Vo{Ol29X;ER0pe-5|7SF5Gu^Q8@ z&xja&0>Rw6rS|+xI=gJzaD&g_bPaRVZ&NnCD-Ev3zokyGC(U{`+J6vQ>i`|G8eXT*&+8jz2)zukc}kEw{^&F{Y=xf*^=>p%;m z;F%S^yyBvL$X4)q*x+u7J`6;bdh{SK1U0D8ZfiqN`ds`$Gr#Y%^%T2u5{7ZNb(Eo! zw^%3lh;Z;=&@j!ncR5BYnavSKYHzS6anCO%o*W}|snrEQ5&obZqxmP;$dyX}kUnL) zmGQOsQ5d9G-BHt?nFf{cv1yEc10Uwh`gBTZ4EdF`BkL>ji_EF7(G{zD_bK;_-;2S! z9;bl_MWN3ERvj+DW^$O7En4KgYVM67#w{FWPN-gxQHusveJ@_CHmc`bsl;T>3YKPa z)MMC}(l%*({|+$G~_tTNxF&1RZ=3c*w;gb6HoS#u1ob_>8VOCb7@zf*c? zLi*JYX7osUBpvgcBSX%}QZk>>L0B4g==!~G8H2?&8f5MJMJcAW69xYJWXWx=gk>bM zpx@LaSYA&REPQ5f2lFl!1`o!#j+izqjsQ$2}#Od{fC=i>fm$~i*b=ixYoqmi1X#oAbcZD{dFK4XONb~$ zw`;N9_OE}F$}T>zezx(BCtiKVGo}%(1{5k3d1%iQd9@HO+xXBAz5Q2% z<;N9L2wrfFv=FguYetDdg5cA@a2_RqFgufC4S4h6$;Pk)d}PZQ0pPkwUYQr2{&JiZDe7Q%nN zl`z(L$Od!IS;2B5WlEM!=SN+60|~f8F{4^wXU=6^f(>dG?DVcuuX^PO15UY@1`~SK z-;#=RvmZ0~6l}7*tn?`!dOOHNb(P_BLYkGKuOGiYj}r~@)o#fmDr zA-0;IiqKQsT5>+qLvhJIey$3~8?)Yk=kfIZReLgStPH~cRZ>josLvC^9$o0hpYmsp zyXb@LpCc^E#}wM-+Q8PF~eMRRy|4p~%P*3r@6roR`7A3lV=3`o9o$c?FV@`EM< zJ(QDpNeOSWtmdqkEN+tu#Ws_Y@TDrt{6~L$#a*#Hn|ynvi+`oEi^-Zd+t;8#K~5h1 zv|}3$wm_|WNjL8$X#f8Bi>%zl)*7(lo7D?PjZR{Cg?{ADaD6}ma{5HXDQM(qU%phY zy}QXh+XV_5+K1EU@1xYF*R}v%!~*NAoPKvMi!7+9vm8kISgKmW7WHQPS&7+%73;z# zqKbuen%`7PXNx7#wc}v$^1!ldel8x#seDGe%)SgY3zt~BNEUzR4E_n( z@EPnbxm4l7$1T-|ApJ9?w`b6*-(Y;jSZ0!0L2IsS&^c7>--AOpQL#w5Way+QNv;Hnm>5p*|QW{S`1lBjLd0G9sOp5SFz; zLp7S7xqAr}hXGY&7`y(x1-UJx4n^mPVORF^hJ8Xx97!^td~;heu|uf80y{^@013X_ zn(gJ+4yH(aF)~NuQ0gD?Dc6<_%FZXaJ`scFk?$~#bGH4ijT=1B)66vVKELFLA6a_s zq+!bZQZ`zQA8WtZrR`4ZFul$U$(8>l?8OktnSwMygy@7J_eLg?;{5>=rKJuPrS+56 z+|h`kkwl4Fr*2p8bB3#^CDPC+BzbtQ<>Nyn)&7g3g`n-X9D4tEqS^}7&XwP7IcgHh zwv<{zMicbNZ~c>&%lc$xxPL!>51kReXI0VD9y8*rjNhrTxF1;zaXv2w%GK2Pbih(YO2h6*naM(w9yyb>Ou$XnjK6`1%=P1eHM1a z!`t@>H8d}FjJ(e(!5PmCn5xdBJ8-QH1_*OLhH;Kpa@AkqZ_>3|1X(2>H1cxA?w?rxjxo&1MC>EBj&TU>Qs(V>gW- zo80zwyYBPwm6D8Xs|o@@#Sk%Bb9gyYGSU>e8`U;3I7++4duGD*Jz>$UL5>dv_Q#WA z>d+F1u@8P8X|8JZ(Ox+`3y*`!3Jr=AKb%0SB*)S4gfqLVe`9!8&&J}a0enXfCJgwC;M(lLC@drDoy63oFzc{jj(Ft(K@Q-#lqfhC$&{7fZw@6Wvma`z3H1tRs86ck5y& zgr7U7v>k353d=nrDzjEz79+FzgpV{I1_K7Z6k%cY5#+Ve?ux$6)QZu9jdFqKe5uNc z&YlQmsT*-H!~`;_8vVY*)0RW<;E1fnE{r|waqC)=7Pn|&YirTcU*~zZTL$Y3>Rok6FOSY_e%_nX7-Hh zJgH;DLdEtO9!hfJu^2>L7M|qNiSO(UGz&%+uGKz7$TzgW-!2P955D6(IbC_P5+iLm z!i2A_ny_tDoOMpP)#BT!6#gDx%J6SM4liBk{ExpE)L51$$%DOFIL>7jTZBfhcmRPJ zsQGsuI%?n7IzN}yzpl@FV{}%F@6{D*s<&p{O8Hzv`o+ThXOHw3)|j_s9`IRL9sE<{ zr1+L49uDAG*vNdId7D&qw#eOK`h^C54I~$DP?sNTie8zlBrKx^_s+*IDSvO~seR$`f=*S0Q!>JI7G$?X~F#awMZm!Y{YMv*F5(#aoR zb_5CVF)Jc`2qAE>*P0H|NwipA&y%Wvo#_&fux!hLpKqt@c-mICq%e<;YF@Fe$@$C` z^&Ze)_IV+#{?>`bJJBCFp?@kocmzUAe8Bm*+N+!h8ddsJ-y>8sI;*cn2={(z4!OAz zrxmK{JxhZ;lB8aoiQwO7CmdpukbrJ1_*qlNfP{{xr>C=c{Tq?eo_Uuk zhsr8!w-$1vmyzsABL{jGYD~{BE)KE|rviXA=d+hr8$~0~3%T)hfiGTOdvj#YGPFZr zFm7G*IUYg5G_CDsfZ3m?aGI1Tz(YOVId?5AD?2`6d0ntX6BuDo-oAwbm;zE$0%?24 zM)#dIo%r^r3nrRQ(f_?2X>Z-s_n|T%aj3n$JypO%U2p}9yDcyfo-m?+Isg_1#+b|R z-&?%{YbMyNxsNb5WGWW{`7gi-ef6Wa`7B?6nKIV@+Ud(na-fR6MGVk|SH~+5oHtbV zId5}L%T^*!dE?x*a=~h5c6Ny4yS>Vp>!dQZ-|}InK1=5GKFh0? z)}Ns51s156C}F{okdU}-k^8^n{wQM=@GkrDYJVC*Pj5}$`kSX*%#w+&uC5seevyd| zbx$BFIR%Bk9|F3vn%eBj#;dg2k}2weL2l!FbZq>>9s8do068}MQ8fG$C-C7eAY%il z#6PXPatGAqRmxQBa(A@}040u7_l~M4pi`tW4zJEiFhPnphoi1{j08GL=VSgCD{Rq| zl#xF_QZz3d#as}8eu88FYr2utRZ4d6nllvdmRy)WxnR{jjTd9pZkIyhEAx2@w#1Vi zdzv)fJBeY(FK*XhM1NNo7Z%aC_QyI2IX?dszMRw4oC;F2#hLErz_xlLgR*)_ozD0J zW`L&>9nT&W{y0PU;gUA&N<&dqu|_Cn$96|~LzzYJ=Dnj7Lguxz^7#T?e4B0e>auQu zPeN$2Utc1sWEgC1qs2|lM&t->Av8=_ZtfTrDFalWnnYL2h-M3T`1efSC!+X~(289{ zcwqzSgZDTJ$~2k}pGw*@I&Jknldg||U0;YMZ)&PQa<^YDcOm3Q)x!{s0q z-|j@G*mgTGLE%bqRZvv$`S|l*e!VLxLaE!WK3Fk4DmR>a83<*7)E+DfA-smxx&JeK zQjji`D}f=IS)#DF*SnqEVAiq{e$jIbeqf^>rjy5-Y1*jg>=LGIP9hww&~oa<15?UE z`K=Q4Jg;XXQrVPG_JzrI8nl1q@4Q=R*PQETTzDatxZUVkcWp3+;GH8HIW9FOk*35+ zG-~3(H}3bM!Rft02I%tC&L4e(Kwb}M8PA6Y9&y`1*bAt1FtltsH@2YmXiYkAD;IMB zGp=>!6W?xwd_sx6uC_Rm z$)hu=*oUKG%3NP#xpISY_%#Q&Ya%ErR9 zTpJ^8=#rQjT%edp>|AwBkw*kT<}3wI&!>eGG*PEAAg2!#Fl1eR2t~!w<}yH!)c#wZ zTwQ`l%h}7+msUuDRY7jTrsGN#?>{-b$~+q!^Plj62`^qjJyiBTGe1~YA^M*Z|1%ru z|EGE2MUsBKxcOeYTt4H~6lI6f{&Lutl>VnuZ|!B$yo@M;tB-@V#=w35EJ@q{)x!VR zz`Wk{_nM*!`#-w_$aBbM=-+pZgZMYQ@+yO0lfK05@r4)pvu=R*Db|QYeTGAx*F8Q# z9<_aUTe~*JVWD;vboIMhq{>x<({g(+juxG=RiV_KTlpf@5L);PS4r1_;Ky}z{w@KN zxAk|`Pjc{K2RErZ#{cxDJeJ8n%gZzawm$xw)sXeiS`Y>3yYI=JYtHZLt$}%~r*g@mQfPK*~ z(?8WyuV$<8jW2kkVKu%UV8y-{fu>&TAC}meQ=<~Be7s(FrG}+PXMgxH@P^i3;J4)B z46FZs=eM?ur`?E243Bco5UUuI&yeC64GKOq^Mk|_J-WCsQo77=lO zxuTN)q;U&h3K&GuWzSd=%8TU-?9#i_Ch;}aw7ikM_p?q6Hl4X^)$Sq;pCNDlKrG=( zz~x3O2iOd57Hnd*;nuZcR5f-(1f`mF$=DUSG4D_c0qD@2^9wPOCGYzwWc8Z;`!_IW z{fW)7vn3jr{V`KCP{g=Q1vTMs>riCA4gSGCoUb(?qSw_L+|KWc6G6=FsP_Fqz*S{4 ztNaHuWmTmxY*eS+w<)LApMud~I$y3-Iuz!kl?DE^6g`%$XWDmy+Z+^y-Kt#DOnlJ` zOj2HCa`nA-q2R7h1H3|70I04@SOjO zcw}{OyV90}mYrH(nDk-eUKXt;8Zu{D`p_C**}IUt?Db-0B(~{Ej9S`hd?@QgbOp9S zk<$pIu3~}*Z+usUN6ufrG(WV_Q7w#s(pM4 zaKzqm3xZwTo?aI6&9mvq`vVv7dNS{VeeDx}BE=vkD8P^k_OU)ka;un{;CC%9Hxe~p<4LUGnbiNMAUyFUv1|BQ z-po!%@q@J-YyR(-Gii&4*3njlwxo;G(4?0Vd^Dk!pNm}vicuq$pg{?KBU)sV&-oug zynRfVCFxocom!5pR-m5}ry9ayTQu0Gre}jDPys#$k`@B;m0;h2(Rd-_-@kl))boZv-e1okWyA2_jxL2~k#q8E-dI78ZH81_XVPw(Om z&0FOa_seKbs-9_9eIq#42uU38roiQ`z|r@lt;d}D@wy!Z)l6eD5t(PPH9{y8Of}cy zI01M8fc4<^0eDja|Nb>|;v@37J!xCFe15n#8ck#c9H5{asYquPf52Y!%5@tVPOM>@ z44@c@1#tJXu=f#^?d=V7LY31wG@KYXEJ_EW)fzo1y<4Q#`%dAL!2LP=CzQ6TXOu*r z#B+;XGd_K)d_s)`T&-7Na28%6mE#^pDd*X&2zv|(UiTyvP37V_a-``=XjSGlERfNt z>Vs@n4x^`NZXH4HzpW7;{r;ra{mjTaLkG^Mt{KTmQaSw#HE-*$RPH%t&yaV=>in5b z%Y}@`y^~A{*p8fLJ=8U4+_Yh(OI<0JE*wJQrOg_v>#bh2m@P zjS3g<$oShKto`zVCbktqiy>%vgWHVvxQfcF|~*<~`2hisCG zrWo7*VB{vq-yUglbH%E8i(^-w{+9^gq{>A4`0x_tO2VpSKk8pJKIkuFUfT&|voqht)%(ho=#jbEtvQC&{9gd<-qt?=)?)jeXswg< zK8dKb-CJlupU+=}?cN5y?49!K&lu-J9ANqZVmkidtR@*dj-eu2ids+a2HB5Zt8aXZ zFYoUz@vnNagYk)dap1PX0%S=L`#?io+xM(W?=pC|*1{UU4sX+r{&#CAN_wcklO zUETzO{8hE-*TO|eNC9w#`x41+TIE8ctkq>-3yr2rAk=K&NpFx zts>tk?u&L2TTL7wj?k80mXNi-h%PFdpxeH9a5(CP_BXlkDm+y|;X;cm$dXR+$1LBb zEHwg%Q;~$U6&tNkUFAW{ZCdS1b_?25rN(@v643N z{c|!TQgjC6jdf35t8^-Y)Y0fYzd`NE5F;BKRk^+814wmnLQRR?+>cfa9H{mx25mti z@GwgJ>9JKdZl`~rEzAAgh;ggq_8%kdfZsv!X9t4*OM7mgENKD?@iwAQRv(c)`(2#&ldK>qqLshFz9@46Qr0S2v>cIPu1TyxO#>!uA9 z*Q2}h*ak;cCKE9{K7RisqetGa74f^~Vv(x;UFTph)XmK^hmMojO?e!5QLeAA!47y+ z!$m3u&HVl;@sc?r72upHvu1v*{~TBwIxt1kk0$L`>LJt1jWr(tBe1B$ulp8%|BGM% zKqUb3K!9Bfz)nH|D$xzQ$p%|JpxDLh;cEZn-G9M!BdwEL{#QM{iH0@r(1Gd3YTcZv z^~1x^gapDOdK)Vs6?Jj(8^Cfd^mGPm(~tsC&Z)gD;fe<9o*XUITo~j5L!MJ(%HJTT zn$UiD9G0IBHFzk{}d>Y92^|**o)S4064>HIe>gj z+m-@$->Iy|bi&y^2_QEBid%So4s3~(sZRsA!+osEa;&GPr((&hW|=x6+JM9WJK(VV z)k!wpef7*2noD!tcSk<}&HjBwb%x{Kanr|r{&Dj6ZOJTMi$C%|G1-Uz`08L;rRmep zSx*;NT+)o44ZgU(mQcca70j%~@rfPK381~ktRFHChfaKe)PU7nDnb||J7D)TXq=Fb z@qcb`(1~82o1KN0?o$s|PmXNAaQ)P{a$*w!}89YwV$VPOVO+e#=$N3#3$O_ISQ zlUwZy|MTddP75|e0-qu0xsZ;%M9AR$ICZgxL-_y3MZ{SHlnt=nH~fNw!(^5{j$y~( zm8pNddKDC^yShWkFMUJ9H>*&KElF3?&?`hy|NB$jOexBSRU;}^=a1`lo4qN)h?pV2 zX}kZ0Ilu=~76N61`w8eHynsAfJH*u48)2C*UB;-jg`ed=HS~WcpgqCcV!HkuMsk8` zinQ>u?<{`ilO(jd~$;NcBEO1R&|;g>f?;i9Bt|U zE@9KT1rR$AVZ;eqo*aHSeBC!jsrXmtRCGDheYB~R329A-o#L(vSubk%v7TIlng z&ejLp`Ek9`vWiK}n3wm+rzG=@l3;@_!&^xLzF+4;Hny>V6yHu^01zF3`W01FoSrE1 z^$iRlfIuK8J{^~!|1isD#R+Mfwjg#^n~!$UX&J=5h;~oN&cxoqXZ&P%Ac*iw_5K}P zg4t=xmxpKAbLaG2yJxm|Tl&5e=!XDyE;lB}0?ABk&e1q+a(}awrQ=aR}y)9;VBF3Zc`aAKmY{00Lz%z+a8 zmUIz-H;c;4`?t>5cXlKIcONK$TOm>}NEP%-24a8>J02eICmx{M!_{MC5h!#9RCcu- z!HOo2(qy@?KlidEE!JZk)OWEXkH+VtHNf^#j$Bp|R!M(!$U1Pdn`6nuVH(+VY z4Mgq(^lFas&q(BjCkpR2Qoq>#aKBph8R$N@<>wuL6=CBb0DA(>0C*uh8sR*B!L_@G z2Rq!5Y-LVnYQDELeUGkv@Q}F{H-r`W!KViTDRvqf+&!278hD*uc z%BWaa-tmBJ|23BFaZp>-9)$Zc$f9Y=W-0u)allx%7g@sif=|9w7_6`-mbU+B|E9|1y)Ry z%|RybLfB2%-A0K}o}zpYm&81i+5$PXMyg;p*jKl&gC3%9yBS_;NzU36Oy?L4m(Mw$ z9&);#45k`GT^?@<*A;5GSl0V+Wj_45Q`<|5i6wbT9c|!NbTT0f?xOF@;Qmg2hKKu?96KLA`n(m37pAY%9-O@AV!CytVBDGshxlPzsA1U){ za-?y4?j7ZwmX+)0yM2_Nv*V`Nk;eh2S8?X=a*8746d+C?%}D zN0*UsFqo6?#5Nr^Q z?~rc){W8zp~>ZfNt z(luml1tY?x<{iCc=gi>PJ*B0A8lYCidn2qLY8PboA?=;)5L@hwBgUOv9g)WzXp~9% z#_`nLr;Nw>c57}(^ZIDUKNB0BpmA$ky*is*K-QvNZWAHC9CjrJ&kjYilm>e7TRjyt zy~UvZi@;tA!bRE4lK^S)kiUQbAq#3|#0t-wZSq{s)u-XS6dm^G8Vr!-;S5MqoRD7sy^9VR>twpc58}QlW8yZt;IrtxEd3kFg!h6v}&0B=?O(|lG1!Vk?p@c_9fO9Iu6vvX5kKJdg%KeTh5*BjM*kZu0 zYck#$L-;Tk!DfdAbiP6JcgVj{hyjq+VB-Er41lj^f6G@1D&d%eY3-v8WQYR?ttC9} zOT5aRj>`azxCWohcr>K*ex5LYRlwIC1BvO9^g`WQe=Oc8TrR?U6u_`9XL8P$`AG?n z25274VCgF?@GH|+N?t-yi+N?+`J;wbo#nkRdFeIEa|?SMD&yOIoacG+R}WFqkat?R zD|c72$JL_##nQR|aORTZUMnu6nuWhh4X3D0F~#pRwt`5!GuzvV{knYQ3l2~0$`ktx zU-1W^emooO;u$F2?*B>|=u z)DpL6EmMlE_PVL3&!boir}V#da0>QyYpOBuDdhh8uvDido*Or;58_)vyZkbu!Ss}D z^M>5J)Mt<8v7OJ|K!e&vCzY4r%{)D`nFGt&cB) z@>>&yv3hKu5oj7>uI8WL(ANv)AA(z%QVse-Q~k2HqOz ziy0XZ>uo0U>)bJ&p7Num5_aJG!wqVDXKaHty}>dd$^oeZe0 zIZXyvkbIl{fvEG}%e>cd$NCMNHI|hX?hyKpn>Df^oZ&d%F}XYZl>Om`L-Ls-Jb<{< z*$4I1`M7-G-*URSJDWFT?d9dA;Y{t{7?n!5y~1ep;ruTV4INMOmY?HvRLbmJlO6Q0 zNZ$-3v+E|G&FR`db84YFg6#kKx{OZd4ka{5baA8DSlN^bBb2jRP--llw_B!1t924~zYna8@*Qzy6ina$x!x6953B(NuxyUuW*W(j#T9lf@2|Mg7Zbw4B6k|7vDC8lu&+$RL7j#p6l z)A+)52~~H;evlwH01K_yGkzQ%x4eE{tx-`4Znf`H3^PO*DA zQ`+mcRN4K29Q8gL9Jr(Uetr598$8{@bvC7-Yw$qC`svdTYJ}BiYNkx*)0WbEPnP{H zo2@Rm3dfEzoTn#6)y^WXR(gJhe_>cK*Xvf4{V|QpZ;tP8yk{yOK{dSYU4ORl1y@?b zEzsYraECu4aCGR7{B(JV0k19%33UjddV4Es^>7D}R2{|bV0pVd+X84Jpo3SQUYlE6 z6S~&aCN&>Y#sV`kGOTDQ#p&zn>VSjkfDo52?c@*cl_E}Pz$^BRhZn31`2Kdyu{|4^ z&CU4Xl45ptb}m8W?v+nZo`L8jt1AYt?PjAY14elHYT1Sx%A&4x=D)Eid$r8k@xZBH zCumtCylL{8=REBH&1wD>!lAMW{b+B_jNGi`lL&EbPR^{Y=|rKi;3rF6Jzd>!A*O@F z!xrA?acyvpcgiQ8J%+WdAH3%H)`P zu8ygV6NL0k-PL5>6aS0W#ZRZ(kViQ%WK&r&jD+<(>+m@&GP}ZiOtN;Da$--$Ou?kc zCnb2I0yIC(Z|@8jQ^SkKmeqUKWgHXTn2)}_P`Oc8m|NcVn(0h_^^(zxA9!0^EC+{$PtC#Io67BdKK9ylkqRM= zzU`>JW1q=+omrCEzsF)FC^}2Z{S6j38Bdos3q=zn@l36yW%YNTRs5zbE+awY)g#G% zUX|2~ZdL(8^&?4@84fO)KUe2Yo~c#c4BEFRWLHvE{v4hFz2#MuD`(C6y|L!_+o4eM z43MB-pJe$j-AjmE<})Ef5T~BhSPO*~L%d@O%e9G313_rsxnCNAHGh-4HILN@Td)>4M(^Cr0r%1c%-*`b*v zLOpAHPONoCqUM?1`w+^J1^3ch>Thj7X&@OOEy7+WYA%X zC)=94UnCm1ZY^@$C+mmX)R)LCP{{<`H>!H5YaH)FSi8Dr&2YiLn@CYqgTf;XbM+N2 zb<$SQRmo+0;cFkJ$B z9oqz}j19}a0Ca~I{&Mq!CX?cT#vbOW~TOAMJct-fw*W{i)NBmAUUx6 z*ti!DgZQT;Yt?s0RtlqjO`iQqBzmBb)hkhA>XxOshriBXu{!QgL4K5z9wNPv?dd{D zd9Hd1A|hfIaC9gDlDzs|hu3W;s3!n&bAQru>eIgx1swXf?Wsj79@N^}dUk!Cr;s7I zLi7uOh@4*2!*q0X3L5ZWp{I1qmlcRdFr3j>Iyqc!pexY1isQ|IEpxn@|jO$WV17cgl^e%CfoWCED?# z9UE%w5K_r|q+)?TS2N}$@{$wx?2~cf)Mca={U60x{J{&p6I60I);AZgf~X?q>r{y z42jXQ%V7EiHOKugy|b4$Hzt#rBEZp~glGtBUbjTR-%68Q5`efg^YOO(#?^}k5v&oBPk^Zj*cqdd0Ka}0TfBv z2@^fs|Nl}XSKPe^whN)rO*@uEaf*VXk z$&2sVz$Qu-mMaFBAo6JwAlICZ`Vr(ZWrK9|{Ua4+T;oRsN)i(t9wIFx3Rcq8=1tCR z9Ye2!?<fMo$`XpV<=I!w60Q9LcrNH{}!)PjP70LjS+KA_eb zg9k?jr`8vHH(E0oo1JmMdq2!Lz`1$!yF`S;N4hHUcgelUw`O&Rj1(TeoNF{1H zH!yXQLAn}2eaS^S|7hUew~f))B` z1;m2jf8xIf#fcammSFxFY;5_>`K5-|va!`eJ*bzRoijpT3o+?%u|iOserLn@jlxymo%0QO4BD8L{p0Bi6f>SY?6{XB7)j8DN7XJRfsZ9=2)R_^&$;?IH z{CKP$e3YV`MNM#mnh29Y2ki$p+RG&`c=-W#YlF=u$j1P|P6MG)| z$F^wBkG>qN`{mf9;e!3U4K8+#%}XuTc9mzLp~1ZBofWq_L&7HiNGd+q1j;woDUgw4 zw!*VV*<;E~BudCW-Y^wJYUoZ8%0eZhD_3r?L^A7jXM1(5KgVdOs8U2AMy0N`1cF~Y z6R9$b{-SzYyZJL0P;XQc=UP)#AePKZIf9?egAGnvok}_Eb1ya-zFNw{7@@|sLi!1; zO0J=UzCs@GM_*3&-BmtjB?c04X1{PgGiNLY`Zf zY+*d%Wn!s|QC?gees&VEVaqhC&y%`A>1=9tEYC_%v9pDpWbfxwz|rHx7|)}g zKywPvf!#G4RHv1mIjNm|u(e6cEhfeTyjnT{u_ZFx4rgVE!E*_?w)ZC@_ zLSIcvWZTGy5ckIAkNpZRV&kz6lHE->IoUB;>S!wY$mHSd<)Vc6S=TV@K5LP=WUiQ$ zvdJ03p;3=!R35Y$N4v|DRSn7Zc(9c;ndO6~S$S8g6aTIAl0s3F1NYMK0fP5(+^?h& z*wB9E`wixKd$})$p}~W8ySz^xbFnYcap=p#j}=eYkw0+gaRw^yg}C0d)QY?x%PvSa zQVUGT#!cVuBVxjf8!FywpCH?kPlvo*9?ej9lCg=t;a+}7woA7$^wg%9V~V>=PAL>p z87e^dGJkV!a8ThsZiV^C)jBXb5dS;FF*re}`Ina>Goo7>_8-oa_dH`8esQ_fa%2Jv zzi20Arx!jucQk~LNdz$;ChESoViug=AyGFxvi7T{9@8DnJsFtmZojc3z{Cte3{G<3 zM=o%xc*c|KYbk2_qh|r@HWr9yNCvF}!;VX#?d+s-UUg%~-?>#TjZ)KYhc08vb=w?S z1yCFj;A(l-q(4I3DBTFsvnWg{Yvfyp)}CiEkGkPyU{mI7c3(c+vhy=1^b#GOuhzw; z@}LiZ%atmzqcA9~Ho61K!0s3Y(=sY5uOxn&4DnrsmWq9&9{#7xs405x;nrR#8F7>E zlJ36~Lqt)QbnoF(Dl8iB!+&ga!vW-&XhC926WTu?Pg>A@ZdW~+2jhaZYuzqQOhywi zj@Y(;eUBwcgUsbhu`-g4aBFg=rLPsy%Xu~ z0d3!`l*9u=ep+#wKG&h?^{$t0(sdb={h93TLZ7Q)eB*1aA@=+T zOsu;L#hbaqCr$(6`fP}CkN(Tc+ej)_Pv#ESD=p9GDo~cwGLriCJQOnm=$a$4!aL0N z`59lkp+bLSo@qenhOzy62LE7s+P=&y+ScpPUbjNVw9l)^C!AMLSK^@x_kL-hy>AQh zBlW7H{Nu`7PRal*o0Im4z|icqiaqHs9FFbxEWA@pH!t?#z;SS4Jbr8pK&nrNSH1WD z<^qfidUt?Wm|TcvY91#J{dD|MkVrTw;r#zV#v)EbBVKtL5n;duo(EU@{O2FQ*1`^7 zcs)jRjKc_!g&$x`G?QXv{h_V6mV?qM* zN@8sEpyuYiErH!}dna9H+<_T5d>Z$AdtD)(Nq!IBcF0FmJ=tEh`UHz-87eZdUT#9Oq*iEr(rv-I&*nV9s6ni=7YD79G5$N#Z;-V3VXC7va$yr@5 zTd_ca-T0;iMKbj0JlPo3yfUsmWLbRFPqP@XDy;t+lQdwfv$owjcfFEomF90)LcOZN z3(=MYSTo`E1Z*K0-896y;hoa;spfn`_ z8TTzRvQf7`;=|+~65xuTv_IMaY^5aLcLtRs82h=i(Na`i!n9=XJ-MywjG zJpZI4_gy;$3N`EjtME-r>jS3+7Lc#g#Y9i>|2}QUB%_+||t7JS#5` z4gkxi%#N11q>UTT?mg2=$XxIQ+&R#Ka%x~{H6)ipU)ik&WWY?H@KsN_=zn;iFe zTuumqq!8hc!XMvt9ul1?|0E+Fu&8^FiRm|Mf$B_4I%Z|Fv@I&c{7M*RoLT;*@%SA! zwuGuG)=y!mhNW;oIjMKy?RW^qB_)BaYjX>Lpn|t2eaN`miVKsNxLp4DH_+^X$iL-io&j8C6%X?GLaKj$*z87~)V8yz_%DL6zz8UPCp)xgQ~C8)71Nw;z)RD5dn zmEEH9u7AM3xxrn2D9my(i59x0iDXKXbD-duiB-N^q%RdZ@@(Kh;Nfle zLhW$Pd24D)#KqR}tnjp=6T*SA?dPXk0hU4~I(;BgIGdK*d*@|{8VOlDw1+{3TpG34 zPf6}JWNd}Zc&`#fVaWDG2a*1;p^P<`t`7FczfAOSJLpSr-CxsS-HVU=bh}B%wgiHc z+Iz;V5KQ;uwZSNXtR=W!Uvs-|%_iLKyPQ(b&N+uMUVEqC?IX8d4VScDO_Y3%g~G!e zf=rb>Ik`E2`2k`*jr$dJ9E|XQD`ugAl8W|W>!nNU`LJVYdxHQD!SvVb%`c_H)lrSx1y3g~9b0))?9kG> z4>a_qNeholpsoA0c*FFl3GVn@U)H!M+p>=<9WiJe*8-@Akn z?jd({Lig@;9B5ClW4e}qzjW`pU6t^P$36${>1p;vDLHDM*J%?bCS7u`ZM$uLAVeiL$ z^XU1_*YJKAVCSTmah+(L=e=NV>e z4T{}sPZ;mY5E>fEBYx|;Z)xr3_<*Dik%Py5xiBJrob29%+_ING=@sx^JaaeMIkFKl zZ<|8C1s|78ocCUM{^IHw)IAl^bvwY$1~nlKAWU3bX$_E0Iwx$|3JfL8!6b&rf7$b0 zgDpp1cezLac|~_qQI8|DGBhJJ+Wia^1f}xRK7nn|6X4O4*1SSS=YO{CwarZ?OsBZM zJW9CA^P5+W8KA8f+ja&d_yH;FF_r{{g2$w8N6Kg1larIH42jLr)bXxvd(;xS5E@gc z*=zeYT}vo3cDFlzURB7=KrC~zSj2bz(|};@+$(kzz&2-%nphrEnYEV%`%^$foKILU z&z4nJKiQB9EDWu#acL9M<3hF0Vzsx#Jd%v*55K|PH|jThyipj00%&40oZp2fY>|}h z-F*k&Zl2fw;1STB@Tr$|luh&X!=JNaEZE?{I(2b?>?tR`N0mTjBBNq7o<(=;#DT`Z z5GR0Uo}7Yf&z)XWk|Mi@JQ31UxtYyFxFE`ksKR1$oZ|U4n?v~C2q+BQAX3uO&l$Xb-{<>#*Lweecdd68F2(u8iGB9jdtdwPs{~^z_so59M=N@` zD#WO!=~wDTM*hb_LD8RIrK_Gb1`1$!suYjG1_g1WPGqArR#f9H7K@=#|? zyWH1_R2x{nv@e-2vVXO|%;~mBp49$}ro>k;gqwM?}gM(io^G#me&A505_}_;8xKoAQtc;l@dv-qu zP}(Xm3UZQB_)W-8L5O)n_IDVqMjo7V2&QrCO?Fq2vuF5X3lUPazEL(tW~>W!<;RJ) zWAI}Oql{Z^Z=F+rKrSDV4f{ph*6YS*@S)MR72f-vZ|#lONhX!d887E;l=!P`1+bwCSPg3|C23 znm+m_=WQqrzYAq7SMWJ!xv~2AvAsS=F-^aP?V#oCtW#?87iAtRI@5OL(^px>7PmGp zMnx$P3ZheDRDWsnmTi^)UOvuR?kxa+tcdho7?cX3e?cl+Tur z+u+v(ie0&!d1v~h@?*dI?DnVDh?oTxs*h1~6!a+)31~3iXBQSSLC7g60`x;Z^gz-l zJB(n13kz#=^Q>h7B`3Q>Okyj!zh|1fd};#PwhXdEgah*=y%Ox7Nw*iIxvz9))HPPD zN{&_kN>kfyBjUv*N1g1AHfsDbMtYwNO!!7;n{}U+o9RnPdPA{#sSGB;HeD`>Eq8gw z?Neuh&v_}3D!1P}gqIZ@bWf)7(r80?`<)8KQEa6b{ncrEKw}dO-?ODY_Q70#!NJ7g z3(w=dBz)`>!&z%Xj|XBFHh)^9#;e_CHX85M`UevFX6fhPtyMnu*o0Vn8nC`R!%Le(_Z>Gw)=gOW*n4J##pX zuNN>MF1oEhi|V#oV$VXpE$7yIOK@lsS03y;3;j9Y;cTWe<4I}K-tR@JukcSb`Z1^+S$V7~LJmr2-835JQb$FS0I@8GT>QUWu1IS=`Io ziwn;<2g5V&mSvVhRy`rTDu^~D9&b6b(vC>wmlQdR-f`U|w64^y&}N%teIYH|?WKpW zKpgYjqG1CkQ%?d*?U2FeP-C}}=2%^YU$g5RgN^d6pO7Q9`j*qZsRIeu*T_>*$RlnDzfYZypLtg;%S zPFEcqi~h#t`s;;;hK3MqlTOujXkdHo&|6raBXf9C?N`S_jd2e;8Mv=$pd z6!R`DBn0E*r%#{IDieG-@*G5!nWq&Cxk>M27?oQMW#T*WqpQmjS>J~S7ezWyP*G6< zEB6+IfIuQ}caRyP!E@y=dC>PR_W?bu2~sYBXQ9l1j!Lw0$mJSQJ&pZQ^C0F^e$C7N zxDOvNN2PoTSf>fh$Wz%q(Fq6;`_peio)3##nu zk_FIx<r+uh%KcC1zq` z0@7G@_W@Z6XxeAcG&7JN|L4LdJ13`7n+@HxR{%4^{s@N)psjBZktx)uqw2cx4#LmR z|2{A<1|+C%Iz96+7f1_Nc3=| z3+Ft#E(YxM1B@2}R0c>*ORJo)o!r+7%I)ZQCiV21L3nwRX*v=Dm`WhZ$y|@nu@@E= zmQZji%u9l%Ks5t+HP>U_b{{I|HzaWuJ~pP+6-5&@{Q3^~CL=a3?j5{b9mp0K;~1My z4x%^SiczDJ(}72(pP)sD8nAR;8#lACu!t^3dmS;)3minHUD(+2KK1gvprfaE-L~_* z9<^&$ibtvou=k)2Lg&UI5f#;7GPjw(L1egtY)+-K{2u6`zg{@H*LU!6YiXZ7{O4nK z_6@%`qxmLWKYg1p)f5$REvr+U*N~2@11ZLTc`n-hh>*9P&i^>-bC7`dPrHn&fB~0f zs-92=mqW6=_7whY*+Sd>%^S;IIG8smg&h1gH*FczGL>Cj`KhBAQ5L$xkejiZM0bzOMKXbL}XMEo;sb*+1K6pmUnc`lxJEv&r8 z9kv`OM=fK-c_VTwVQsq$B5S#yOOmr^9`M0hT+E4doY%O_G;UOC-IH*mbuC~Ks>t~; z5_i!gEBeOh?T5x*0rKPKPMb9AvgTjAqrJVzhy2{cX=Kx7&fm4Rt=8Z~ECx@j(cx;c z=J4L$-Ye40$ID&7wFH8AqhUt%?OP#`UDC>-AR9fLBggA~>H<+zQYzmGW=#|t+3s80 zcP6@xO&+zi7`;ypr{J0C$X;j8S(ISoY>&+bt^iQr)Bfi7-$4l=VoreDda)6Zpo z6yd7w%*hWA%X&d>iN0|sj1la7(zV%p{|XVFdD?SyAeLw@v+%jJotVb+L5<<^=c4TV zKABEA8A=DZn_(&Q^r7I1c}IQ>f9Z$Nrm{ybtI^VJpXw}n!(EKJufP8tyfd$@t<7IQ zCLEKQnHfYIMy98gb#x{U8)8~j`%2$*c(5|L@34>UNRA)xE(q8rQd8GXRJxwwtbg7j zx5dzZK`9nJ~N0g^WhFO(%BNLXYZJx0leiJ^XfXw9WouXfb(DL$6@&1yw zjik01sjN)G%b3a?5g`pG{AkJ+zqYLpK&{A_nbE4NtLNtD-)f*FTa}HX9$fB?!}zmd zbfx(4+tfebpUfBMz8Jp~} zLG&z^MpJ-)w;|pTvlk=jN)AszSgK9SCowOdSl7NWxpx) zbJfTgyX{-N)X%zRJ(*;lj(iTbg3YN~!j`$AT!lCwZXm7|Ytr|cFE%A*LYVU8l=Csx zwsXYIjlC13hGRB?nJK{uAeJ?`kv-C*m#FmlM`v#+d3|{preiL95B<*mNr~6;#$%_sms9HJn>!jYiVb;;nF*i4_@IHg-m-S!8lOTv|XFXWK zS&yXtPEZITz(7?HY7H-4v0HLVv}AuMDM@X$Y_1_IsBttaB0#ymaMt>d%*dDZTpdvk zM}pec9;TZ~un-D@=fCB8e7Ta=-TQGQN2Eu-U<&CBoE~-xo#`s|r%FQ&i3y(4=B0%e zu2(zSJm#?Bc2lcdkf@9)oskxZ8Ab4lVqEP?!ztGp_s-Je@0WDOx1^`*C}~U76OHD+ zFr9f|t8eu6U464EJT9>zIs7#Zk}5F%0O7s+a16~pKcbp3b$1(NM?sO1IL46|OU)W)J6y z^5q(ePzM4Otf#nc42zZ=CKFT&q(kvlCQq5IZYD`&@d>8gH^Uc&Q~&&gH*-^M>KwJG zOB?y+g=x{J*WMU;-gd6$6>+dR9;X7-Yc|$gNF)dx#2s>6gKUSqt7ZWCiIk9#s>*X1-=jAdI)CBwCDY&A>a4K!nf@wK zBD>@fY{u!(W2@tUcGB?cbu`inI``pA%bW}Y;wW))yY!W-Yf z^yP4Mjt$A02o0NkuC)SF=WX;tnrTP^8!U)W_xAOfj+YsXyA9KjjRArfAc(-=*r8vd4-b!UHbda`I6pQ4RN;>&; zM5d$-e;D|?UIRB&AJB(AJtEoLy>O)JWNbJTV8>ZzGm!2gl}ykZ+k+_>&8Srra6%hk z?HIUY(A?}`;sS3c7956$;#n;!I}49f2zQ%OBI3_4`Qhodo;~-o21oLQ>7GB`%~IkT zG*BL+I$^^p$uZuUfbs{%Ik_bnH!MIRyw6c}hW~26(l7fbUYnCOVW6X?rlvhTJ;pn; zO&~0OtAP)P8>DxFLyjO(3>@uR18g>}97S&Vm{l~|iiz!ND`SWE16+4 z%By`UMW7(dNbjqwrk+5dddejMMjoqQd(%D4KY(PZnO!`pkqkEdQcwHXaIW>J{>R^S zwxA^fb}PUF);W!Eb`+unbXLy4=bvEdlX2w?_mt?M)y@3*$v!xDY4zIo0+3`>)dz*D z)oNBWh(EtINgT)H$f@wsl_a3FH( zx^F5VxuO2Up>GgF&?yXp?Frh6`tB0z`l?>n5E>O7jtvxzQ$O$K=bxJ zdRwvK_Iu2TT;9d2vWh<~n$s5?Kd8w2eBoNpg8CEe#=VlaP_h-vq@u7q zp!BE_;opW&pT1T7b3^;Dx(w7qJX2D_>+I|_x3c0jX~P7`x@X5bG?N%ILGOXZ4Q<#G z$j;6NpoGa9GUIL2tZ~v0Lcs*RarKFd1u0r=fs!Su@ou8(US9{11r^ut88)p}T-+=o ziQ2_DUJ;b27tdtI{l*>q{A;j(A`M=4)PGi@aaLBKAA@ZFrrRC}TdT6DMJ7~R=$*lY z6Yp!pwLSVKZF@SiJTr1(3?RC|_%VV5@t+=_<6>*UE|XF(Fv`>*K(A{SX$`zHwOPyrwFa*_?<; z@Fe0v{B~_j?9YcB5{V80gl_x7)_r}|7+{M4SnR6r87a(gop|!)r zP})tDieIa0+kH`-B&yB!x8}5YHLr|@q>^Q)Dlkx#x%t8zk`JFN3??qZ2?>Ja(YlSN zF_2xq?s07r_B~e*UEz5zdn| zL3>M36{MBpDe$j6K9>6BzM^^>Pcwa#&=ZVNueMS4I_O9EIu4Tx;rc4kYBuhFX~ zpK>32TZ7pGY>erWzL~&#jm^l2n6M>)>F{6zEyQis6^=A+0Am0Wp#uyOffRoTe~w!_ z!U-U?NGj37;^KB_EwBBoSc#TRMStu=Ts*ubP&JMI(W6hiC;@?^2HFzzJw2{D4y-o; zLWXMc6a+pM=%>H?{_0&&1Yom@?luskl#~=SAeJ>$@V9AC{o6|KXaoP|%NI0PA%I3E zCMLqEL#lm$LP_km=Z1`|*%g7Y0O6Gf`0n8JMhhk#s z=r;i;b!y8J-A_?B4wS_;Ar=I;)+qt_dZ=nELa+}qYvxDC#C%GK4+d5Ix*Fa0LDe^K zUQgokj~Oycl4i3C{i~=okcRh@ZSVy=fU{)Ady|Bm6#1|Wah)e!vkr8QR+Fe zpy*q7H;u2dl9EzI|25i#c;8m4AD5rc5E>f#+SaxMS{oM^_Y`e&^v6Eo@i~7BO!a_X z^F?shWplQPB5R1`-#2oAu^S&)+I_CCudJeizp$_{kRh4`*b$AB=*J(jv4wy6!e@z0 z4*^t5e|DGR6BCJUWBMDUqQ_l6O1YIoDkxa$;a8)p(^uf|n!J+I^#n061gS}!JbTB~ zk*v(X!^;~I7M4>}lW=i%tO=4B{(bXlt^;epXTVqqTxay!2FT%u8K@Od;(UUF*bRJS zet-$qau97HVuDbExaScFp#SqfH|Lr3^85rb2|0E3yRNRT&tzm`K!^49^+AVyzyb?8 z*{btbq3u6yA$TN$Evfs=StWJdZYqwRoPs?=*cx zOB(7F+iu%Rwyc2>zs8bZ4J$W$&x6AqT=^cndLR&6i zAodYpF!q6jfksLfBEtS%u2~$k$t4JpauO9`P*d^#SNllK%&7eV8-=R>9;H% zSQe{k_LiVJ-}~TTXYd!YDfBwVBb^XL-;X~!IoTIOD<=frjBt+@W;gcCd}6Ncu_$&bY)8 zr`02YN@(Azom17%emek&9Bw~{ z++FIn_liMJ{$oKupfPaXN%?{5gyir=b_dA+yX3q-?U z4@51^W*gn7z4ixzqEpw=>4n=(3He@nMSs2zT6Y(5c^_&a?Zc?E5g<;A?2P#M4zR(8god_(UBmMjN2((TZ z;O&cxrjeAwH~iWce(J9r18YHQYI>@pqr*6n>;(wjq}ewk_UC|YR8qnQ=rn?cUoV4w z$?ITP4&*d2d;s|%;4>|x%dQht0t7)_Pftl!_Et&$4i2MMnW0*qyH46!-01ZyI=+%4X!v<0-q<6T3ivV8xT6l7}6;n zlX!(oT~^<+-5baF*FduZ3jz0kIqlI?4a9u( z{Ah1ymseI62~@De@ojWW>@FFSj}5dINCilL25Itzg@x2nfEZdiLnPrQHo^4#FMnDN zj_oQ$7g*ka!H@@RCt&hG8vS+k3akH_kX`hIC@L#=fFuR50=2cZv$|E2xiCQ9m4yX8 zwANF*?2c`vHW;6Wyu8&zOCV?fatu1UyYD@C&`CEX*V)XDPd8A;DI%^c`1tsMwir~ZKxbF5DqQoihCjot zCE@`Q#WzrBKIpO3L%-4Wjuj_Y341kAv6B`7!$zl+@Ib;D98MJP>sRRSEzZTJewn(GC8JdKe058O~@OgVshB z8czX58V>Amq?DeXUf9qYp3}=~=|m67qZ|YR;T*XIbckFB*K#u`+5y%-*cU-a_}@q( z(F=5!0UYxV{#=kq6h7nX;((Ewni{l1EPwr+o9O2}`scO_%uX(X{K2wy5JAo`3L)Y$ zo{)2`?qtaW^>A>gSZRX;13}2LA6^{E0dKqJVhuM}#33RQ37WPR+FJo{_6wp%k8{t5 zeDKWHrCo>%L2E*o$lr^5-ZVZMx$XFtGJgzvU z$y^jQs&}4$a`<20CER7a#JeJFF>1*?w3q12P;V`hy;h~S0@_N$*}96@n%!84AyanC zGY@DsL$ALw|A%S1?}KSdMt8_`?2z zL~jVPwDZUwPLB$_wlsS9NAf@MI4Reqd0J}EPZLP|`F+!RDbaC=WHH5aCj$lZ@+krR;Yg<=X&XHr@lJwKi-i3b+z3M;o)BuUOYc?{2mu&w2bP<2k;oaH= zp%CoqRWjcN^|qte*R1rJA&^x2i)OcD=HBG$Pl0jGohoCczQxS=*=?u)JVv7~A*Pil z01yyBtS3$ve8eDG-W_AK{V6W}FSZe$H+)ZLtdi1A8_XUQ?_~OQYW7DONe+eWP<5?f zXEQijU9xDgXM~nc*_EwQUXM5emI`ne{}s4rvf!;hw%B@6FfrZ!vF!G@emHK^qjn}m zZ?|>4C8mnO?L-r9;7e&$iKk>-8t1jnkmK=x6i(t?@;}ahe%IXhFa6WJ!iXtIx}kc zAkDn>?2r+mXL8!zp~h*%`B5BI(`LX_=l^Q1cA7(XBm8A_6m%tr8bmI`sA#sjv9$B& zdNa5ohhEwh3A&OUZ2E(eVq!@+V;lu8Wd2^m|9Qfb3_TDWzo*URMl`?hJCf@Akz=9N z4zVI~a0OXdG>}Hq-mvq1X=2|*uGC~w3Su+DaYU%| z^@3QQ1bGkbuSRaHGGJu3d!4Hr4HLhfZ66aq^b0H!c7qS~A3FRY9NIg+`i)Bgqskw% z=n`Pa{6oq+>s&*5miwM?_AEJ6>F6Ah`tS2}(QZ!#KT77qK|ynKCfLJ(MgjW!B8$r( z>|H?V07nS0gJU*O#+cacuMUDTeY7YGuq@;Wzay^M2Jgm17oc^8xpf-#_Mu0Be($%X z$Py>Py>}kMzv6ymq*`|aEHpLrNz1MKROGo5I13>kio&hjG4Ze5@16E6CZ-k zJY+*N*FKZwyWnSg)NWk^gE0EUanU{|fqe01Vd3pIdx_QGo2a^sL#~G*~ zdXEyL{>Z-R#sMvRW8LPw*LRT0?6M zxk`2dX=H*>O5!m34++;A>kbv@SuD5^BsYSMAq60R06y|*xr+S}5KIf8BXe^)Zf?-_ zYqf4JuMYMy%NkuUN0>J-z!pXwwHCA)AnLgI47hH<=*W2bzML_vQyL&K3~|jC*LQhj zx5Qg$a{+Mk$+2wR;h5fE;|T`s{;LsMbFlKzBLl`}`ETwdn8u#JX@Ee@qbK`%*LnO% z4Wy7>f21@2?hgPK6bro05Cmy{Kt2Dxj%e?ioB02KATL$}BqoN+FE`#<@%^{#l;z3NcMt~ z4{15M%)vqlGPc&5c<(0F#0L}8-Q8wej1UYH_B?!fy-;6*mwGnzlYqCm+w3n+^~0O2bGDt^Oz zN6Ff;BP7xBq}``;_cexhZ*-Yg+Me8qEt$9ZoRdG+U0A41$xJrQ&y6uPS<56cq4(kr z`eH+Agc7z6~9srR^_Nh&q3p|)x|KA zC^IrE&2m$G+De&NXa=lIRtPdpy2 zsWFM#T;OGj!%wT3GF!9ez=1;|k+$PWKC`<8~yWITmI%Y{_Id2!4Y7e^EAZ?5_YbHH^Q(2-m2{$v-}(n+_TrX^b}+S zPd|tD2{!fGa9)M_Z>^+nIrCTvF&}fRZ5SC!%Z|(D_de0O6ntwACf@_sgM^R{I%&R$ zKmYmj*NK{aA%=iFRvMF&Y^C7QNrtt(yz)}>-tbG$Xa-F^OqgKpfhQXSEyK-unB&B7 z(0`$7Nn`H4{6sxEN4MvCh8R9=Y(!=vV>x{@g11%j<}e1me>mzGFofUwx|ZMRIT0w| z(@W1f*7=SQmsxaft`j}x%N{_#A%^uM_y}IRG72Ki_|x&!7Rs=bxWvi1I%6v7vas$& ze9tKSIOcq!6PbcoUST(x=#%@?=Z~oyX={FwAF335L-70ag@dIY{k}cXsT*bND;$Yy zDvV=WxsCN%7z6(WgZUT+M|?>sLj6{EEKJ|#^<9DjP~xqiMH()%&95b%=IR&eh3(17 zPcKQT9g_VD`Suc5Vy#^rtFl+CY>X7BW+q0hVu=XK%q(8GcxI_AIrj{P=Q&=T*_PBD zk&U8NuG1FZnwI%(XK&q}$zx?P7b9{+|DM8Uw`iEAkn0()c0*oO@InKFsb+qhK+#1O zh9ER6M&0tGjnuv{GqZLWv#+o)FvYxF=<^kdI_g~P^u;4y8x7>wj|HkmlhuKCihuG(H{pYioVlde8;ckYIC}+s%_s0 zM2?Y%UhZ*N%;4f>31lIbE?(Hq`KMmH?N5W-N_(QMth}mnoF$6v^l>A77?Mt>IgnaO z>IZACw&Dxz%RLW0_g4DlQWnuzmiVFWVzLl7{CI&9p!$gZo;1sD*W75=orJ^6)2fiV z0eUvQcRa%ka=Fdh${*pIq43 z3QwrZ65&zn1B^`YfgbK3^G$B^ji!DSS6hKc{uTz`3g2Fz0MsHFY z&@O)qwxdRw*4D6vfy7L?**l`!pV_xu4;Nqr<$Y*;x~IV(s0ZX zhR(JWr@qe{h3i6d@lr=Om3p226iwG>`V%oqw2N@D(Q>jUSr`i1H_utr9YHpJ*1?c# zB+>T`yWjcO*zL%v>GQ{L5w_Us(SI;C^&o=P##QC;f6t`;iaUZ{<2=A_l?h@KME)YT zw#~Esbthf|xy^3ekxbpMvEob#vq?*C&sc}u{;uwtt;D#ikN<#? zRk*3#kDclX@?~q6WMddJC%%^>t@%UzD;VJ0?ui4&M+%^|TZ+TZ(&oJC?rwRQUl(G2oNa+Rl*G+r7cjZ6F% zfo$@w^70b@f{|41j!H0$WU?0gY~Smr5zLFp%6#8cl)iWzCOmb#b*+R&p4UAfYePe~ zZn*7G5?+!z~m2VG68_q#IfqF3}pCb20 zc$5Vry6>(%95ZcOgQ>e^r>h-3&~*B;`?V^J6to-bFn5H__AIAP8K1Af<OQ*3+` z=iB^;G?1HVt5uZmpCOnAEBwPt9Z6Jc^-`{a*>Nz9f_sx{+t903@4H@eUYToW5D3>% zydDGs>B@A%Q+rA=HE};EaurJFk!A z={LUJt+cVUwA|b4=d^Nm-p7msSDf|9VEZ{%0C+|ziNnIuk|E3cXyWELF){I88y;uw z43xDN0(t(=3UTE8SgD?lzWzX4oAj?k55>HuH0n&>%aehjA;t;TuMmh7_jKYNxi3bB zR+g3;#Xn+~Kcb*eXzJt*h}64yJ)vy9T4>(rI-P=QhRESpeofE`rbH*a?%#z(Hr99$ zNGE72DS?{(aSHm5U2sQ!y9La~rvE!J1S0D<8tZO-cy{5^FtfS6O}*h+s>Pb2$_P-v zKHyZsYipwpFPsqRig~fTR!GVXPYC4e5_7GXt8S^*c;FT|XbAb1_t0Dj2s+?tr3MIIf& zz_B;-h^`QG2*ghzcY5!|>)NRa6iPleUNH|12p)PgeN(>hk))uYSa%Vb0QqGg$*Fos zLhm&L1VZ|%kNsZx=sM;7Dc#aU5G0LP?99s3;z;6^l=O9A`f*v^fXe}aP#W!udi|ls z4{k3hFK1N9bYD#Yn5qw5=$1)!b+wMRwtTFuy?wz9lqCH&5Q3rCTYC$lFo2WaxfKA~ z9pjVlYHiIjXvwRv*GXbzE<{OueO;JSe+M~1Ft+A5^>ma($?)r3-BL>~(&X72kgwbk zM^KvPKtNqUuaFP06Mp`e4yidQ8Q`Rrq$mX9zy}&$3?^>Eb7rUxHB_HP1%U{WuOx>2 z*g1|fx6Oz52Iz>pff1LCJ{PJ$06 z1_fq1dLpYip|;$hzee(0;#JmJs;lJ5RekMTAM0RTKQ@r)R42S(+Y2+{@vF4ugHHXE zldvK&>*4h<#9kPQvq0}JIVD81dleZ5#IIZfx==Bzm^gPPB&uM^X*8HX9$-&5^xn!#SK(y!HR|i$%GS66 zfdn(x{(eyGel@d!5SO>)yb^iEIo-l@*0iWrvye{Odb&^_tU#RJK;W)f0|^Pnn%`JyQDX%Al*&K*VpRM zg-VZ;^YWeLQ;%5Z{S?W#5}yOrM0?^4Fa>Xuo$!p-`qiorl6g)40IZt^C{3wncXi9B7i*|7eoGvjWrssb?R!7-FV|zyi(QOb8=@(C)<9oNT3y zhpK|_z3}7oNV(e=Dt>|i;DW{LF&MVYaf-B6f`v(^07^Cs;edA$0_k|ku`}b^($Jvi z5Ss%kYGY?9(MzfU$zf@*Vi-F(aEU#O`{r07X)!xQ0K`552fH2#SA3x|6nhf2PctGE7 z0!g>`@DOo!c2tSVeFzrq46js5csF*bXs@%4xdyTo|n+zp;KrboP5I-RfL_imU zhJz=L0K~4cG6ozReo_1b`~-~O@}3K7H>sb2!ra_=N$(I`&H@$vnokwXBSiV}N@775 zP&OcQFj}iP0oITKuu5pH9;( bytes): A callable that returns the bytes to save, or bytes directly. + obj (Union[bytes, Callable() -> bytes]): + The bytes to save or a callable that returns them. path (str): The file path. """ self._bytes = obj diff --git a/tools/Polygraphy/polygraphy/backend/onnx/loader.py b/tools/Polygraphy/polygraphy/backend/onnx/loader.py index 70af98b4..31005758 100644 --- a/tools/Polygraphy/polygraphy/backend/onnx/loader.py +++ b/tools/Polygraphy/polygraphy/backend/onnx/loader.py @@ -46,7 +46,8 @@ class BaseLoadOnnxCopy(BaseLoader): def __init__(self, model, copy=None): """ Args: - model (Callable() -> onnx.ModelProto): A loader that can supply an ONNX model. + model (Union[onnx.ModelProto, Callable() -> onnx.ModelProto]): + An ONNX model or a callable that returns one. copy (bool): Whether to create a copy of the model first. Defaults to False. """ @@ -77,7 +78,7 @@ def __enter__(self): if self.USE_GS_GRAPH: self.graph = model.copy() else: - self.graph = gs.import_onnx(model) + self.graph = gs_from_onnx(model) return self def __exit__(self, exc_type, exc_value, traceback): @@ -87,6 +88,31 @@ def __exit__(self, exc_type, exc_value, traceback): self.retval = gs.export_onnx(self.graph, do_type_check=False) +@mod.export(funcify=True) +class GsFromOnnx(BaseLoader): + """ + Functor that creates an ONNX-GraphSurgeon graph from an ONNX ModelProto. + """ + + def __init__(self, model): + """ + Creates an ONNX-GraphSurgeon graph from an ONNX ModelProto. + + Args: + model (Union[onnx.ModelProto, Callable() -> onnx.ModelProto]): + An ONNX model or a callable that returns one. + """ + self._model = model + + def call_impl(self): + """ + Returns: + onnx_graphsurgeon.Graph: The ONNX-GraphSurgeon representation of the ONNX model + """ + model, _ = util.invoke_if_callable(self._model) + return gs.import_onnx(model) + + @mod.export(funcify=True) class OnnxFromPath(BaseLoader): """ @@ -130,9 +156,8 @@ def __init__(self, graph, opset=None, optimize=None, fold_constant=None): Converts a TensorFlow model into ONNX. Args: - graph (Callable() -> Tuple[tf.Graph, Sequence[str]]): - A callable that can supply a tuple containing a TensorFlow - graph and output names. + graph (Union[Tuple[tf.Graph, Sequence[str]], Callable() -> Tuple[tf.Graph, Sequence[str]]]): + A tuple containing a TensorFlow graph and output names or a callable that returns one. opset (int): The ONNX opset to use during conversion. @@ -179,7 +204,6 @@ def call_impl(self): return onnx_graph.make_model("model") -@mod.export_deprecated_alias("ModifyOnnx", remove_in="0.32.0") @mod.export(funcify=True) class ModifyOutputs(BaseLoadOnnxCopy): """ @@ -191,7 +215,8 @@ def __init__(self, model, outputs=None, exclude_outputs=None, copy=None): Modifies outputs of an ONNX model. Args: - model (Callable() -> onnx.ModelProto): A loader that can supply an ONNX model. + model (Union[onnx.ModelProto, Callable() -> onnx.ModelProto]): + An ONNX model or a callable that returns one. outputs (Sequence[str]): Names of tensors to mark as outputs. If provided, this will override the @@ -237,7 +262,8 @@ def __init__(self, model, copy=None): Converts all floating point tensors in the model to 16-bit precision. Args: - model (Callable() -> onnx.ModelProto): A loader that can supply an ONNX model. + model (Union[onnx.ModelProto, Callable() -> onnx.ModelProto]): + An ONNX model or a callable that returns one. copy (bool): Whether to create a copy of the model first. Defaults to False. """ super().__init__(model, copy) @@ -280,7 +306,8 @@ def __init__( Fold constants in an ONNX model. Args: - model (Callable() -> onnx.ModelProto): A loader that can supply an ONNX model. + model (Union[onnx.ModelProto, Callable() -> onnx.ModelProto]): + An ONNX model or a callable that returns one. num_passes (int): The number of constant folding passes to run. @@ -327,7 +354,7 @@ def call_impl(self): """ def run_const_fold_pass(model): - graph = gs.import_onnx(model) + graph = gs_from_onnx(model) del model try: @@ -404,8 +431,8 @@ def __init__(self, model, error_ok=None, external_data_dir=None, save_to_disk_th Run shape inference on an ONNX model. Args: - model (Callable() -> onnx.ModelProto): - A loader that can supply an ONNX model, or a path to a model. + model (Union[onnx.ModelProto, Callable() -> onnx.ModelProto]): + An ONNX model or a callable that returns one, or a path to a model. Supports models larger than the 2 GiB protobuf limit. error_ok (bool): @@ -492,8 +519,8 @@ def __init__(self, model, input_metadata=None, output_metadata=None, check_meta= Extracts a subgraph from an ONNX model. Args: - model (Callable() -> Union[onnx.ModelProto, onnx_graphsurgeon.Graph]): - A loader that can supply an ONNX model or an ONNX-GraphSurgeon graph. + model (Union[Union[onnx.ModelProto, onnx_graphsurgeon.Graph], Callable() -> Union[onnx.ModelProto, onnx_graphsurgeon.Graph]]): + An ONNX model or ONNX-GraphSurgeon Graph or a callable that returns one. input_metadata (TensorMetadata): Metadata for the inputs of the subgraph. @@ -580,7 +607,8 @@ def __init__(self, model, path, external_data_path=None, size_threshold=None, al Saves an ONNX model to the specified path. Args: - model (Callable() -> onnx.ModelProto): A loader that can supply an ONNX model. + model (Union[onnx.ModelProto, Callable() -> onnx.ModelProto]): + An ONNX model or a callable that returns one. path (str): Path at which to write the ONNX model. external_data_path (str): Path to save external data. @@ -660,7 +688,8 @@ def __init__(self, model): Serializes an ONNX model. Args: - model (Callable() -> onnx.ModelProto): A loader that can supply an ONNX model. + model (Union[onnx.ModelProto, Callable() -> onnx.ModelProto]): + An ONNX model or a callable that returns one. """ self._model = model diff --git a/tools/Polygraphy/polygraphy/backend/onnx/util.py b/tools/Polygraphy/polygraphy/backend/onnx/util.py index ac24be64..a256c314 100644 --- a/tools/Polygraphy/polygraphy/backend/onnx/util.py +++ b/tools/Polygraphy/polygraphy/backend/onnx/util.py @@ -20,8 +20,9 @@ from polygraphy.common import TensorMetadata from polygraphy.logger import G_LOGGER -onnx = mod.lazy_import("onnx") +gs = mod.lazy_import("onnx_graphsurgeon") numpy_helper = mod.lazy_import("onnx.numpy_helper") +onnx = mod.lazy_import("onnx") def get_num_nodes(model): @@ -288,3 +289,37 @@ def process_attr(attr_str: str): onnx_str += "\n" return util.indent_block(onnx_str, indent_level) + + +## +## ONNX-GraphSurgeon utilities +## + + +def meta_from_gs_tensors(tensors): + """Get TensorMetadata from a list of ONNX-GraphSurgeon tensors""" + meta = TensorMetadata() + for tensor in tensors: + meta.add(tensor.name, tensor.dtype, tensor.shape) + return meta + + +def set_shapes_from_layerwise_meta(graph, layerwise_meta): + for tensor in graph.tensors().values(): + if isinstance(tensor, gs.Variable) and tensor.name in layerwise_meta: + tensor.shape = layerwise_meta[tensor.name].shape + tensor.dtype = layerwise_meta[tensor.name].dtype + + +def lower_constant_nodes(graph): + """Converts the outputs of Constant nodes into constant tensors, removing the nodes""" + remove_nodes = set() + with graph.node_ids(): + for node in graph.nodes: + if node.op == "Constant" and "value" in node.attrs: + node.outputs[0].to_constant(node.attrs["value"].values) + remove_nodes.add(node.id) + # Iterate from the end so we don't shift the list under us. + for node_id in sorted(remove_nodes, reverse=True): + del graph.nodes[node_id] + return graph diff --git a/tools/Polygraphy/polygraphy/backend/onnxrt/loader.py b/tools/Polygraphy/polygraphy/backend/onnxrt/loader.py index cd80ca59..18c64043 100644 --- a/tools/Polygraphy/polygraphy/backend/onnxrt/loader.py +++ b/tools/Polygraphy/polygraphy/backend/onnxrt/loader.py @@ -19,7 +19,6 @@ onnxruntime = mod.lazy_import("onnxruntime") -@mod.export_deprecated_alias("SessionFromOnnxBytes", remove_in="0.32.0") @mod.export(funcify=True) class SessionFromOnnx(BaseLoader): """ @@ -31,8 +30,8 @@ def __init__(self, model_bytes): Builds an ONNX-Runtime inference session. Args: - model_bytes (Callable() -> Union[bytes, str]): - A serialized ONNX model or a path to a model, or a callable that returns the same. + model_bytes (Union[Union[bytes, str], Callable() -> Union[bytes, str]]): + A serialized ONNX model or a path to a model or a callable that returns one of those. """ self._model_bytes_or_path = model_bytes diff --git a/tools/Polygraphy/polygraphy/backend/onnxrt/runner.py b/tools/Polygraphy/polygraphy/backend/onnxrt/runner.py index d5f3a8ec..796a302f 100644 --- a/tools/Polygraphy/polygraphy/backend/onnxrt/runner.py +++ b/tools/Polygraphy/polygraphy/backend/onnxrt/runner.py @@ -16,7 +16,7 @@ import time from collections import OrderedDict -from polygraphy import func, mod, util +from polygraphy import mod, util from polygraphy.backend.base import BaseRunner from polygraphy.common import TensorMetadata @@ -32,11 +32,8 @@ class OnnxrtRunner(BaseRunner): def __init__(self, sess, name=None): """ Args: - sess (Callable() -> onnxruntime.InferenceSession): - A callable that can supply an ONNX-Runtime inference session. - This callable is invoked whenever the runner is activated. - - Alternatively, the inference session may be supplied directly. + sess (Union[onnxruntime.InferenceSession, Callable() -> onnxruntime.InferenceSession]): + An ONNX-Runtime inference session or a callable that returns one. """ super().__init__(name=name, prefix="onnxrt-runner") self._sess = sess @@ -44,21 +41,6 @@ def __init__(self, sess, name=None): def activate_impl(self): self.sess, _ = util.invoke_if_callable(self._sess) - def deactivate_impl(self): - del self.sess - - def infer_impl(self, feed_dict): - start = time.time() - inference_outputs = self.sess.run(None, feed_dict) - end = time.time() - - out_dict = OrderedDict() - for node, out in zip(self.sess.get_outputs(), inference_outputs): - out_dict[node.name] = out - self.inference_time = end - start - return out_dict - - @func.constantmethod def get_input_metadata_impl(self): ONNX_RT_TYPE_TO_NP = { "tensor(double)": np.float64, @@ -81,3 +63,17 @@ def get_input_metadata_impl(self): dtype = ONNX_RT_TYPE_TO_NP[node.type] if node.type in ONNX_RT_TYPE_TO_NP else None meta.add(node.name, dtype=dtype, shape=node.shape) return meta + + def infer_impl(self, feed_dict): + start = time.time() + inference_outputs = self.sess.run(None, feed_dict) + end = time.time() + + out_dict = OrderedDict() + for node, out in zip(self.sess.get_outputs(), inference_outputs): + out_dict[node.name] = out + self.inference_time = end - start + return out_dict + + def deactivate_impl(self): + del self.sess diff --git a/tools/Polygraphy/polygraphy/backend/pluginref/__init__.py b/tools/Polygraphy/polygraphy/backend/pluginref/__init__.py new file mode 100644 index 00000000..e8904ded --- /dev/null +++ b/tools/Polygraphy/polygraphy/backend/pluginref/__init__.py @@ -0,0 +1,17 @@ +# +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from polygraphy.backend.pluginref.runner import * diff --git a/tools/Polygraphy/polygraphy/backend/pluginref/references.py b/tools/Polygraphy/polygraphy/backend/pluginref/references.py new file mode 100644 index 00000000..fb71311e --- /dev/null +++ b/tools/Polygraphy/polygraphy/backend/pluginref/references.py @@ -0,0 +1,85 @@ +# +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from polygraphy import mod +from polygraphy.logger import G_LOGGER + +np = mod.lazy_import("numpy") +gs = mod.lazy_import("onnx_graphsurgeon") + +OP_REGISTRY = {} # Dict[str, Callable]: Maps op names to reference implementations + + +def register(op): + """ + Registers a function as the reference implementation for a given op. + + Args: + op (str): The name of the op for which to register this function. + """ + + def register_impl(func): + def wrapped_func(node, intermediate_tensors): + inputs = [] + for inp in node.inputs: + if inp.is_empty(): # Optional input + inputs.append(None) + elif isinstance(inp, gs.Constant): + inputs.append(inp.values) + elif inp.name in intermediate_tensors: + inputs.append(intermediate_tensors[inp.name]) + else: + G_LOGGER.internal_error( + "Input: {:} was not found in intermediate tensors and is not a constant.\n" + "Note: Intermediate tensors include: {:}".format(inp.name, list(intermediate_tensors.keys())) + ) + + outputs = func(node.attrs, *inputs) + if len(outputs) != len(node.outputs): + G_LOGGER.internal_error( + "{:} reference implementation returned the wrong number of outputs.\n" + "Note: Expected {:} but recevied {:}".format(op, len(node.outputs), len(outputs)) + ) + + return {out_tensor.name: out for out_tensor, out in zip(node.outputs, outputs)} + + OP_REGISTRY[op] = wrapped_func + return wrapped_func + + return register_impl + + +@register("Identity") +def run_identity(attrs, x): + return [x] + + +@register("InstanceNormalization") +def run_instancenorm(attrs, x, weights, bias): + epsilon = attrs.get("epsilon", 1.0e-5) + + rank = len(x.shape) + axis = tuple(range(2, rank)) + mean = np.mean(x, axis=axis, keepdims=True) + var = np.var(x, axis=axis, keepdims=True) + + # Weights and bias needs to be broadcasted to shape of X. C dimension should be a wildcard. + broadcast_shape = [-1] + [1] * (rank - 2) + weights = weights.reshape(broadcast_shape) + bias = bias.reshape(broadcast_shape) + + res = weights * (x - mean) / np.sqrt(var + epsilon) + bias + return [res] diff --git a/tools/Polygraphy/polygraphy/backend/pluginref/requirements.txt b/tools/Polygraphy/polygraphy/backend/pluginref/requirements.txt new file mode 100644 index 00000000..80c7f8f5 --- /dev/null +++ b/tools/Polygraphy/polygraphy/backend/pluginref/requirements.txt @@ -0,0 +1,2 @@ +numpy +onnx_graphsurgeon diff --git a/tools/Polygraphy/polygraphy/backend/pluginref/runner.py b/tools/Polygraphy/polygraphy/backend/pluginref/runner.py new file mode 100644 index 00000000..fc0b0b84 --- /dev/null +++ b/tools/Polygraphy/polygraphy/backend/pluginref/runner.py @@ -0,0 +1,74 @@ +# +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import copy +import time +from collections import OrderedDict + +from polygraphy import mod, util +from polygraphy.backend.base import BaseRunner +from polygraphy.backend.pluginref.references import OP_REGISTRY +from polygraphy.logger import G_LOGGER + +np = mod.lazy_import("numpy") +onnx_util = mod.lazy_import("polygraphy.backend.onnx.util") + + +@mod.export() +class PluginRefRunner(BaseRunner): + """ + Runner for reference checking TensorRT plugins + """ + + def __init__(self, graph, name=None): + """ + Args: + graph (Union[onnx_graphsurgeon.Graph, Callable() -> onnx_graphsurgeon.Graph]): + An ONNX-GraphSurgeon graph or a callable that returns one. + name (str): + The human-readable name prefix to use for this runner. + A runner count and timestamp will be appended to this prefix. + """ + super().__init__(name=name, prefix="pluginref-runner") + self._graph = graph + + def activate_impl(self): + self.graph, _ = util.invoke_if_callable(self._graph) + + def get_input_metadata_impl(self): + return onnx_util.meta_from_gs_tensors(self.graph.inputs) + + def infer_impl(self, feed_dict): + start = time.time() + + intermediate_tensors = copy.copy(feed_dict) + for node in self.graph.nodes: + if node.op not in OP_REGISTRY: + G_LOGGER.critical("Op: {:} does not have a reference implementation registered!".format(node.op)) + + intermediate_tensors.update(OP_REGISTRY[node.op](node, intermediate_tensors)) + + outputs = OrderedDict() + for out in self.graph.outputs: + outputs[out.name] = intermediate_tensors[out.name] + + end = time.time() + + self.inference_time = end - start + return outputs + + def deactivate_impl(self): + del self.graph diff --git a/tools/Polygraphy/polygraphy/backend/pyt/runner.py b/tools/Polygraphy/polygraphy/backend/pyt/runner.py index edafee60..cc6ca332 100644 --- a/tools/Polygraphy/polygraphy/backend/pyt/runner.py +++ b/tools/Polygraphy/polygraphy/backend/pyt/runner.py @@ -16,7 +16,7 @@ import time from collections import OrderedDict -from polygraphy import func, mod, util +from polygraphy import mod, util from polygraphy.backend.base import BaseRunner torch = mod.lazy_import("torch") @@ -31,8 +31,8 @@ class PytRunner(BaseRunner): def __init__(self, model, input_metadata, output_names, name=None): """ Args: - model (Callable() -> torch.nn.Module): - A model loader that returns a torch.nn.Module or subclass. + model (Union[torch.nn.Module, Callable() -> torch.nn.Module]): + A torch.nn.Module or subclass or a callable that returns one. input_metadata (TensorMetadata): Mapping of input names to their data types and shapes. output_names (List[str]): A list of output names of the model. This information is used by the @@ -52,6 +52,9 @@ def activate_impl(self): self.model, _ = util.invoke_if_callable(self._model) self.model.eval() + def get_input_metadata_impl(self): + return self.input_metadata + def infer_impl(self, feed_dict): with torch.no_grad(): inputs = [ @@ -69,7 +72,3 @@ def infer_impl(self, feed_dict): def deactivate_impl(self): del self.model - - @func.constantmethod - def get_input_metadata_impl(self): - return self.input_metadata diff --git a/tools/Polygraphy/polygraphy/backend/tf/loader.py b/tools/Polygraphy/polygraphy/backend/tf/loader.py index 172fc839..909cb5cd 100644 --- a/tools/Polygraphy/polygraphy/backend/tf/loader.py +++ b/tools/Polygraphy/polygraphy/backend/tf/loader.py @@ -35,8 +35,8 @@ def __init__(self, graph): Freezes a TensorFlow graph and folds constants. Args: - graph (Callable() -> Tuple[tf.Graph, Sequence[str]]): - A callable that can supply a tuple containing a TensorFlow graph and output names. + graph (Union[Tuple[tf.Graph, Sequence[str]], Callable() -> Tuple[tf.Graph, Sequence[str]]]): + A tuple containing a TensorFlow graph and output names or a callable that returns one. """ self._graph = graph @@ -244,8 +244,8 @@ def __init__( Optimizes a TensorFlow model using TF-TRT. Args: - graph (Callable() -> Tuple[tf.Graph, Sequence[str]]): - A callable that can supply a tuple containing a TensorFlow graph and output names. + graph (Union[Tuple[tf.Graph, Sequence[str]], Callable() -> Tuple[tf.Graph, Sequence[str]]]): + A tuple containing a TensorFlow graph and output names or a callable that returns one. max_workspace_size (int): The maximum workspace size. fp16 (bool): Whether to run in FP16 mode. max_batch_size (int): The maximum batch size. @@ -304,7 +304,6 @@ def call_impl(self): return graph, tf_util.get_graph_output_names(graph) -@mod.export_deprecated_alias("ModifyGraph", remove_in="0.32.0") @mod.export(funcify=True) class ModifyGraphOutputs(BaseLoader): """ @@ -316,9 +315,8 @@ def __init__(self, graph, outputs=None): Modifies outputs of a TensorFlow graph. Args: - graph (Callable() -> Tuple[tf.Graph, Sequence[str]]): - A callable that can supply a tuple containing a - TensorFlow graph and output names. + graph (Union[Tuple[tf.Graph, Sequence[str]], Callable() -> Tuple[tf.Graph, Sequence[str]]]): + A tuple containing a TensorFlow graph and output names or a callable that returns one. outputs (List[str]): @@ -355,9 +353,8 @@ def __init__(self, graph, path=None, tensorboard_dir=None, engine_dir=None): Writes out artifacts from a TensorFlow Graph. Args: - graph (Callable() -> Tuple[tf.Graph, Sequence[str]]): - A callable that can supply a tuple containing a - TensorFlow graph and output names. + graph (Union[Tuple[tf.Graph, Sequence[str]], Callable() -> Tuple[tf.Graph, Sequence[str]]]): + A tuple containing a TensorFlow graph and output names or a callable that returns one. path (str): Path at which to save the frozen graphdef. @@ -447,12 +444,12 @@ def __init__(self, graph, config=None): Creates a TensorFlow session. Args: - graph (Callable() -> Tuple[tf.Graph, Sequence[str]]): - A callable that can supply a tuple containing a - TensorFlow graph and output names. + graph (Union[Tuple[tf.Graph, Sequence[str]], Callable() -> Tuple[tf.Graph, Sequence[str]]]): + A tuple containing a TensorFlow graph and output names or a callable that returns one. - config (Callable() -> tf.ConfigProto): + config (Union[tf.ConfigProto, Callable() -> tf.ConfigProto]): + A TensorFlow ConfigProto or a callable that returns one. """ self.graph = graph self.config = util.default(config, CreateConfig()) diff --git a/tools/Polygraphy/polygraphy/backend/tf/runner.py b/tools/Polygraphy/polygraphy/backend/tf/runner.py index c90eab25..2ab30b2a 100644 --- a/tools/Polygraphy/polygraphy/backend/tf/runner.py +++ b/tools/Polygraphy/polygraphy/backend/tf/runner.py @@ -18,7 +18,7 @@ import time from collections import OrderedDict -from polygraphy import func, mod, util +from polygraphy import mod, util from polygraphy.backend.base import BaseRunner from polygraphy.backend.tf import util as tf_util from polygraphy.logger import G_LOGGER @@ -35,9 +35,8 @@ class TfRunner(BaseRunner): def __init__(self, sess, timeline_dir=None, name=None): """ Args: - sess (Callable() -> Tuple[tf.Session, Sequence[str]]): - A callable that can supply a tuple containing a - TensorFlow session and output names. + sess (Union[Tuple[tf.Session, Sequence[str]], Callable() -> Tuple[tf.Session, Sequence[str]]]): + A tuple containing a TensorFlow session and output names or a callable that returns one. timeline_dir (str): @@ -64,15 +63,9 @@ def __init__(self, sess, timeline_dir=None, name=None): def activate_impl(self): (self.sess, self.output_names), _ = util.invoke_if_callable(self._sess) - @func.constantmethod def get_input_metadata_impl(self): return tf_util.get_input_metadata(self.sess.graph) - def deactivate_impl(self): - self.sess.close() - del (self.sess, self.output_names) - self.num_inferences = 0 - def infer_impl(self, feed_dict): G_LOGGER.extra_verbose("Received feed_dict: {:}".format(feed_dict)) start = time.time() @@ -99,3 +92,8 @@ def infer_impl(self, feed_dict): self.num_inferences += 1 return out_dict + + def deactivate_impl(self): + self.sess.close() + del (self.sess, self.output_names) + self.num_inferences = 0 diff --git a/tools/Polygraphy/polygraphy/backend/trt/algorithm_selector.py b/tools/Polygraphy/polygraphy/backend/trt/algorithm_selector.py index ef0a274f..2a78aebc 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/algorithm_selector.py +++ b/tools/Polygraphy/polygraphy/backend/trt/algorithm_selector.py @@ -149,7 +149,7 @@ def decode_algo_io(io_list): class TacticReplayData(TypedDict(lambda: str, lambda: Algorithm)): """ Maps layer names to corresponding tactics. - More specifically, it is an `OrderedDict[str, Algorithm]` + More specifically, it is an ``OrderedDict[str, Algorithm]``. """ def add(self, name, algorithm): diff --git a/tools/Polygraphy/polygraphy/backend/trt/calibrator.py b/tools/Polygraphy/polygraphy/backend/trt/calibrator.py index da33f613..e879e44b 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/calibrator.py +++ b/tools/Polygraphy/polygraphy/backend/trt/calibrator.py @@ -36,7 +36,7 @@ def Calibrator( arrays, Polygraphy DeviceViews, or GPU pointers. In case you don't know details about the inputs ahead of time, you can access the - `input_metadata` property in your data loader, which will be set to an `TensorMetadata` instance. + `input_metadata` property in your data loader, which will be set to an ``TensorMetadata`` instance. Note that this does not work for generators or lists. The number of calibration batches is controlled by the number of items supplied @@ -48,7 +48,7 @@ def Calibrator( By default, the calibration cache is not saved. BaseClass (type): The type of calibrator to inherit from. - Defaults to trt.IInt8MinMaxCalibrator. + Defaults to ``trt.IInt8MinMaxCalibrator``. batch_size (int): [DEPRECATED] The size of each batch provided by the data loader. quantile (float): @@ -62,7 +62,7 @@ def Calibrator( algo (trt.CalibrationAlgoType): Calibration algorithm to use for ``trt.IInt8Calibrator``. Has no effect for other calibrator types. - Defaults to trt.CalibrationAlgoType.MINMAX_CALIBRATION. + Defaults to ``trt.CalibrationAlgoType.MINMAX_CALIBRATION``. """ BaseClass = util.default(BaseClass, trt.IInt8MinMaxCalibrator) diff --git a/tools/Polygraphy/polygraphy/backend/trt/loader.py b/tools/Polygraphy/polygraphy/backend/trt/loader.py index 382f3bb6..bb68b5ab 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/loader.py +++ b/tools/Polygraphy/polygraphy/backend/trt/loader.py @@ -18,7 +18,7 @@ import ctypes import time -from polygraphy import config, constants, mod, util +from polygraphy import constants, mod, util from polygraphy.backend.base import BaseLoader from polygraphy.backend.trt import util as trt_util from polygraphy.backend.trt.profile import Profile @@ -49,7 +49,7 @@ def __init__(self, plugins=None, obj=None): Args: plugins (List[str]): A list of paths to plugin libraries to load before inference. - obj (BaseLoader): + obj (object): An object or callable to return or call respectively. If ``obj`` is callable, extra parameters will be forwarded to ``obj``. If ``obj`` is not callable, it will be returned. @@ -136,7 +136,8 @@ def __init__(self, model_bytes, explicit_precision=None): Parses an ONNX model. Args: - model_bytes (Callable() -> bytes): A loader that can supply a serialized ONNX model. + model_bytes (Union[bytes, Callable() -> bytes]): + A serialized ONNX model or a callable that returns one. explicit_precision (bool): Whether to construct the TensorRT network with explicit precision enabled. """ super().__init__(explicit_precision) @@ -193,7 +194,6 @@ def call_impl(self): return network_from_onnx_bytes(bytes_from_path(path), self.explicit_precision) -@mod.export_deprecated_alias("ModifyNetwork", remove_in="0.32.0") @mod.export(funcify=True) class ModifyNetworkOutputs(BaseLoader): """ @@ -205,11 +205,9 @@ def __init__(self, network, outputs=None, exclude_outputs=None): Modifies outputs in a TensorRT ``INetworkDefinition``. Args: - network (Callable() -> trt.Builder, trt.INetworkDefinition): - A callable capable of returning a TensorRT Builder and INetworkDefinition. The callable may - have at most 3 return values if another object needs to be kept alive for the duration of the network, - e.g., in the case of a parser. The first and second return values must - always be the builder and network respectively. ModifyNetworkOutputs will never take ownership of these. + network (Union[Tuple[trt.Builder, trt.INetworkDefinition, Optional[parser]], Callable() -> Tuple[trt.Builder, trt.INetworkDefinition, Optional[parser]]): + A tuple containing a TensorRT builder, network and optionally parser or a callable that returns one. + To omit the parser, return a tuple containing just the builder and network. outputs (Sequence[str]): Names of tensors to mark as outputs. If provided, this will override the outputs @@ -268,6 +266,8 @@ def __init__( sparse_weights=None, tactic_sources=None, restricted=None, + use_dla=None, + allow_gpu_fallback=None, ): """ Creates a TensorRT IBuilderConfig that can be used by EngineFromNetwork. @@ -320,6 +320,13 @@ def __init__( Whether to enable safety scope checking in the builder. This will check if the network and builder configuration are compatible with safety scope. Defaults to False. + use_dla (bool): + [EXPERIMENTAL] Whether to enable DLA as the default device type. + Defaults to False. + allow_gpu_fallback (bool): + [EXPERIMENTAL] When DLA is enabled, whether to allow layers to fall back to GPU if they cannot be run on DLA. + Has no effect if DLA is not enabled. + Defaults to False. """ self.max_workspace_size = util.default(max_workspace_size, 1 << 24) self.tf32 = util.default(tf32, False) @@ -333,6 +340,8 @@ def __init__( self.algorithm_selector = algorithm_selector self.sparse_weights = util.default(sparse_weights, False) self.tactic_sources = tactic_sources + self.use_dla = util.default(use_dla, False) + self.allow_gpu_fallback = util.default(allow_gpu_fallback, False) if self.calibrator is not None and not self.int8: G_LOGGER.warning( @@ -410,6 +419,13 @@ def try_set_flag(flag_name): if self.sparse_weights: try_set_flag("SPARSE_WEIGHTS") + if self.use_dla: + config.default_device_type = trt.DeviceType.DLA + config.DLA_core = 0 + + if self.allow_gpu_fallback: + try_set_flag("GPU_FALLBACK") + if self.tactic_sources is not None: tactic_sources_flag = 0 for source in self.tactic_sources: @@ -451,19 +467,13 @@ def __init__(self, network, config=None, save_timing_cache=None): Builds and serializes TensorRT engine. Args: - network (Callable() -> trt.Builder, trt.INetworkDefinition): - A callable capable of returning a TensorRT Builder and INetworkDefinition. The returned builder - and network are owned by EngineFromNetwork and should not be freed manually. The callable may - have at most 3 return values if another object needs to be kept alive for the duration of the network, - e.g., in the case of a parser. EngineFromNetwork will take ownership of the third return value, and, - like the network, it should not be freed by the callable. The first and second return values must - always be the builder and network respectively. - If instead of a loader, the network, builder, and optional parser arguments are provided directly, - then EngineFromNetwork will *not* deallocate them. + network (Union[Tuple[trt.Builder, trt.INetworkDefinition, Optional[parser]], Callable() -> Tuple[trt.Builder, trt.INetworkDefinition, Optional[parser]]): + A tuple containing a TensorRT builder, network and optionally parser or a callable that returns one. + To omit the parser, return a tuple containing just the builder and network. config (Callable(trt.Builder, trt.INetworkDefinition) -> trt.IBuilderConfig): - A callable that returns a TensorRT builder configuration. If not supplied, + A TensorRT builder configuration or a callable that returns one. If not supplied, a `CreateConfig` instance with default parameters is used. save_timing_cache (Union[str, file-like]): A path or file-like object at which to save a tactic timing cache. @@ -582,8 +592,8 @@ def __init__(self, serialized_engine): Deserializes an engine from a buffer. Args: - serialized_engine (Callable() -> Union[str, bytes]): - Either a loader that can supply a memory buffer, or a memory buffer itself. + serialized_engine (Union[Union[str, bytes], Callable() -> Union[str, bytes]]): + The serialized engine bytes or a callable that returns them. """ self._serialized_engine = serialized_engine @@ -621,8 +631,8 @@ def __init__(self, engine): Serializes an engine. Args: - engine (Callable() -> trt.ICudaEngine): - Either a loader that can supply an engine, or the engine itself. + engine (Union[trt.ICudaEngine, Callable() -> trt.ICudaEngine]): + An engine or a callable that returns one. """ self._engine = engine @@ -652,8 +662,8 @@ def __init__(self, engine, path): Saves an engine to the provided path. Args: - engine (Callable() -> trt.ICudaEngine): - A callable that can supply a TensorRT engine. + engine (Union[trt.ICudaEngine, Callable() -> trt.ICudaEngine]): + An engine or a callable that returns one. path (str): The path at which to save the engine. @@ -686,25 +696,22 @@ def __init__(self, network) -> None: """ [HIGHLY EXPERIMENTAL] Creates an ONNX-like, but **not** valid ONNX, model from a TensorRT network. This uses the ONNX format, but generates nodes that are **not** valid ONNX operators. - Hence, the resulting model is **not** valid ONNX. - This should be used **only** for visualization or debugging purposes. + Hence, this should be used **only** for visualization or debugging purposes. - The resulting model does **not** include enough information to faithfully reconstruct the TensorRT network. + The resulting model does **not** include enough information to faithfully reconstruct the TensorRT network, + but does preserve the structure of the network and many of the layer parameters. Args: - network (Callable() -> trt.Builder, trt.INetworkDefinition): - A callable capable of returning a TensorRT Builder and INetworkDefinition. The callable may - have at most 3 return values if another object needs to be kept alive for the duration of the network, - e.g., in the case of a parser. The first and second return values must always be the builder and network respectively. - If instead of a loader, the network, builder, and optional parser arguments are provided directly, - then OnnxLikeFromNetwork will *not* deallocate them. + network (Union[Tuple[trt.Builder, trt.INetworkDefinition, Optional[parser]], Callable() -> Tuple[trt.Builder, trt.INetworkDefinition, Optional[parser]]): + A tuple containing a TensorRT builder, network and optionally parser or a callable that returns one. + To omit the parser, return a tuple containing just the builder and network. """ self._network = network def call_impl(self): """ Returns: - onnx.ModelProto: The ONNX-like, but **not** valid ONNX, model. + onnx.ModelProto: The ONNX-like, but **not** valid ONNX, representation of the TensorRT network. """ ret, owns_network = util.invoke_if_callable(self._network) builder, network, parser = util.unpack_args(ret, num=3) diff --git a/tools/Polygraphy/polygraphy/backend/trt/profile.py b/tools/Polygraphy/polygraphy/backend/trt/profile.py index 1118071c..3aec9af5 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/profile.py +++ b/tools/Polygraphy/polygraphy/backend/trt/profile.py @@ -52,7 +52,7 @@ class Profile(TypedDict(lambda: str, lambda: ShapeTuple)): An ordered dictionary that represents a single optimization profile that can be used to build an engine. - More specifically, this is a OrderedDict[str, ShapeTuple] which maps binding + More specifically, it is an ``OrderedDict[str, ShapeTuple]`` which maps binding names to a set of min/opt/max shapes. """ diff --git a/tools/Polygraphy/polygraphy/backend/trt/runner.py b/tools/Polygraphy/polygraphy/backend/trt/runner.py index c1df5fda..ce44506d 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/runner.py +++ b/tools/Polygraphy/polygraphy/backend/trt/runner.py @@ -18,7 +18,7 @@ import time from collections import OrderedDict -from polygraphy import cuda, func, mod, util +from polygraphy import cuda, mod, util from polygraphy.backend.base import BaseRunner from polygraphy.backend.trt import util as trt_util from polygraphy.logger import G_LOGGER @@ -39,15 +39,9 @@ class TrtRunner(BaseRunner): def __init__(self, engine, name=None): """ Args: - engine (Callable() -> Union[trt.ICudaEngine, trt.IExecutionContext]): - A callable that can supply either a TensorRT engine or execution context. + engine (Union[Union[trt.ICudaEngine, trt.IExecutionContext], Callable() -> Union[trt.ICudaEngine, trt.IExecutionContext]]): + A TensorRT engine or execution context or a callable that returns one. If an engine is provided, the runner will create a context automatically. - This callable is invoked whenever the runner is activated. - - Alternatively, the engine or context may be supplied directly instead of - through a callable, in which case the runner will *not* take ownership of it, - and therefore will not destroy it. - name (str): The human-readable name prefix to use for this runner. @@ -56,12 +50,6 @@ def __init__(self, engine, name=None): super().__init__(name=name, prefix="trt-runner") self._engine_or_context = engine - @func.constantmethod - def get_input_metadata_impl(self): - start_binding, end_binding = trt_util.get_active_profile_bindings(self.context) - # This function always uses binding names of the 0th profile. - return trt_util.get_input_metadata_from_engine(self.context.engine, start_binding, end_binding) - def activate_impl(self): def make_buffers(engine): """ @@ -135,6 +123,11 @@ def set_profile(self, index): else: self.context.set_optimization_profile_async(index, self.stream.ptr) + def get_input_metadata_impl(self): + start_binding, end_binding = trt_util.get_active_profile_bindings(self.context) + # This function always uses binding names of the 0th profile. + return trt_util.get_input_metadata_from_engine(self.context.engine, start_binding, end_binding) + def _set_shapes_from_feed_dict(self, feed_dict): """ Sets context shapes according to the provided feed_dict. @@ -193,7 +186,28 @@ def is_dynamic_shape_input(binding): return start_binding, end_binding - def infer_impl(self, feed_dict): + def infer_impl(self, feed_dict, copy_outputs_to_host=True): + """ + Implementation for running inference with TensorRT. + Do not call this method directly - use ``infer()`` instead, + which will forward unrecognized arguments to this method. + + In addition to accepting NumPy arrays in the feed_dict, this runner can also + accept Polygraphy DeviceViews. In that case, no host-to-device copy is necessary for the inputs. + + Args: + feed_dict (OrderedDict[str, Union[numpy.ndarray, DeviceView]]): + A mapping of input tensor names to corresponding input NumPy arrays + or Polygraphy DeviceViews. + + copy_outputs_to_host (bool): + Whether to copy inference outputs back to the host. + If this is False, Polygraphy DeviceViews are returned + instead of NumPy arrays. + Defaults to True. + """ + + start = time.time() start_binding, end_binding = self._set_shapes_from_feed_dict(feed_dict) # Resize output device buffers - host buffers will be automatically resized by copy_to @@ -203,8 +217,6 @@ def infer_impl(self, feed_dict): shape = tuple(self.context.get_binding_shape(binding)) self.device_buffers[name].resize(shape) - start = time.time() - # Use a shallow copy in case we need to replace our allocated buffers with provided DeviceViews. dev_bufs = copy.copy(self.device_buffers) for name, buffer in feed_dict.items(): @@ -214,8 +226,8 @@ def infer_impl(self, feed_dict): dev_bufs[name].copy_from(buffer, self.stream) else: G_LOGGER.critical( - "Unrecognized type in feed_dict: {:} for input: {:}.\n" - "Please provide either a NumPy array or Polygraphy DeviceView. ".format(type(buffer).__name__, name) + "For input: {:}, unrecognized type in feed_dict: {:}.\n" + "Please provide either a NumPy array or Polygraphy DeviceView. ".format(name, type(buffer).__name__) ) # Need to offset bindings in case the active profile is not 0. @@ -224,15 +236,20 @@ def infer_impl(self, feed_dict): if not success: G_LOGGER.critical("Model execution failed. Please see the log messages above for details") + output_buffers = OrderedDict() for name, buffer in self.host_output_buffers.items(): - self.host_output_buffers[name] = dev_bufs[name].copy_to(buffer, self.stream) + if copy_outputs_to_host: + self.host_output_buffers[name] = dev_bufs[name].copy_to(buffer, self.stream) + output_buffers[name] = self.host_output_buffers[name] + else: + output_buffers[name] = dev_bufs[name].view() self.stream.synchronize() end = time.time() self.inference_time = end - start - return self.host_output_buffers + return output_buffers def deactivate_impl(self): with contextlib.ExitStack() as stack: @@ -255,8 +272,8 @@ def deactivate_impl(self): ) # Note: This can be removed once TRT 6 support is dropped. - def infer(self, feed_dict, check_inputs=None): + def infer(self, feed_dict, check_inputs=None, *args, **kwargs): # Disable checks by default on TRT 6.0 due to implicit batch semantics. if mod.version(trt.__version__) < mod.version("7.0"): - return super().infer(feed_dict, util.default(check_inputs, False)) - return super().infer(feed_dict, util.default(check_inputs, True)) + return super().infer(feed_dict, util.default(check_inputs, False), *args, **kwargs) + return super().infer(feed_dict, util.default(check_inputs, True), *args, **kwargs) diff --git a/tools/Polygraphy/polygraphy/backend/trt/util.py b/tools/Polygraphy/polygraphy/backend/trt/util.py index f7fe3e4a..1da50c14 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/util.py +++ b/tools/Polygraphy/polygraphy/backend/trt/util.py @@ -65,7 +65,7 @@ def try_add(layer_type, layer_cls): except AttributeError: if config.INTERNAL_CORRECTNESS_CHECKS: G_LOGGER.warning( - "Could not find one or more of layer type: {:} or layer class: {:}".format(layer_type, layer_cls) + "Could not find layer type: {:} or layer class: {:}".format(layer_type, layer_cls) ) else: layer_class_mapping[layer_type] = layer_cls @@ -109,6 +109,9 @@ def try_add(layer_type, layer_cls): try_add("CONDITION", "IConditionLayer") try_add("CONDITIONAL_INPUT", "IIfConditionalInputLayer") try_add("CONDITIONAL_OUTPUT", "IIfConditionalOutputLayer") + try_add("ASSERTION", "IAssertionLayer") + try_add("SCATTER", "IScatterLayer") + try_add("EINSUM", "IEinsumLayer") return layer_class_mapping diff --git a/tools/Polygraphy/polygraphy/backend/trt_legacy.py b/tools/Polygraphy/polygraphy/backend/trt_legacy.py index f5ac1d2a..b877dd62 100644 --- a/tools/Polygraphy/polygraphy/backend/trt_legacy.py +++ b/tools/Polygraphy/polygraphy/backend/trt_legacy.py @@ -129,7 +129,8 @@ def __init__(self, onnx_loader): implicit batch version of the parser. Args: - onnx_loader (Callable() -> onnx.ModelProto): A loader that can supply an ONNX model. + onnx_loader (Union[onnx.ModelProto, Callable() -> onnx.ModelProto]): + An ONNX model or a callable that returns one. """ super().__init__(explicit_precision=False, explicit_batch=False) self.onnx_loader = onnx_loader @@ -172,15 +173,22 @@ def __call__(self): network = builder.create_network() parser = trt.CaffeParser() - model_tensors = parser.parse(deploy=self.deploy, model=self.model, network=network, dtype=self.dtype) + parser.parse(deploy=self.deploy, model=self.model, network=network, dtype=self.dtype) if self.outputs and self.outputs != constants.MARK_ALL: - for output in self.outputs: - network.mark_output(model_tensors.find(output)) + trt_util.mark_outputs(network, self.outputs) return builder, network, parser, self.batch_size +def _input_metadata_from_network(network): + input_metadata = TensorMetadata() + for index in range(network.num_inputs): + tensor = network.get_input(index) + input_metadata.add(name=tensor.name, dtype=np.dtype(trt.nptype(tensor.dtype)), shape=tensor.shape) + return input_metadata + + # Builds and tracks a single engine for a single network. class TrtLegacyRunner(BaseRunner): """ @@ -208,6 +216,10 @@ def __init__( layerwise=False, plugins=[], name=None, + int8=None, + calibrator=None, + use_dla=None, + allow_gpu_fallback=None, ): """ Creates a runner that manages a single TensorRT engine. @@ -247,6 +259,10 @@ def __init__( self.layerwise = layerwise self.max_batch_size = max_batch_size + self.int8 = util.default(int8, False) + self.calibrator = calibrator + self.use_dla = use_dla + self.allow_gpu_fallback = allow_gpu_fallback def activate_impl(self): """ @@ -268,7 +284,6 @@ def activate_impl(self): def allocate_buffers(engine): input_buffers = OrderedDict() output_buffers = OrderedDict() - bindings = [] stream = cuda.Stream() G_LOGGER.verbose("Using batch size: " + str(engine.max_batch_size) + " during buffer allocation") for binding in engine: @@ -294,6 +309,10 @@ def allocate_buffers(engine): trt.init_libnvinfer_plugins(get_trt_logger(), "") builder, network, parser, model_batch_size = self.network_loader() with builder, network, parser, builder.create_builder_config() as config: + if not network: + G_LOGGER.critical("Invalid network") + G_LOGGER.super_verbose(lambda: trt_util.str_from_network(network) or "Finished logging network") + builder.max_batch_size = int(self.max_batch_size or model_batch_size or 1) config.max_workspace_size = int(self.max_workspace_size) @@ -302,24 +321,30 @@ def allocate_buffers(engine): with contextlib.suppress(AttributeError): config.clear_flag(trt.BuilderFlag.TF32) if self.fp16: - config.flags = 1 << int(trt.BuilderFlag.FP16) + config.set_flag(trt.BuilderFlag.FP16) - if not network: - G_LOGGER.critical("Invalid network") - G_LOGGER.super_verbose(lambda: trt_util.str_from_network(network) or "Finished logging network") + if self.int8: + config.set_flag(trt.BuilderFlag.INT8) + input_metadata = _input_metadata_from_network(network) + with contextlib.suppress(AttributeError): # Polygraphy calibrator has a reset method + self.calibrator.reset(input_metadata) + config.int8_calibrator = self.calibrator + + if self.use_dla: + config.default_device_type = trt.DeviceType.DLA + config.DLA_core = 0 + + if self.allow_gpu_fallback: + config.set_flag(trt.BuilderFlag.GPU_FALLBACK) if self.layerwise: - # In layerwise mode, every layer becomes an output. - G_LOGGER.info("Running in layerwise mode. Marking {:} layers as outputs".format(network.num_layers)) - for layer in network: - for index in range(layer.num_outputs): - out = layer.get_output(index) - if not out.is_network_output: - network.mark_output(out) + trt_util.mark_layerwise(network) G_LOGGER.info( "Building engine: max workspace size={:} bytes, max batch size={:}, fp16={:}, " - "tf32={:}".format(config.max_workspace_size, builder.max_batch_size, self.fp16, self.tf32) + "tf32={:}, int8={:}".format( + config.max_workspace_size, builder.max_batch_size, self.fp16, self.tf32, self.int8 + ) ) self.engine = builder.build_engine(network, config) diff --git a/tools/Polygraphy/polygraphy/common/constants.py b/tools/Polygraphy/polygraphy/common/constants.py index bb0ce358..56efabd2 100644 --- a/tools/Polygraphy/polygraphy/common/constants.py +++ b/tools/Polygraphy/polygraphy/common/constants.py @@ -16,4 +16,4 @@ from polygraphy import mod from polygraphy.constants import * -mod.warn_deprecated("polygraphy.common.constants", "polygraphy.constants", remove_in="0.32.0") +mod.warn_deprecated("polygraphy.common.constants", "polygraphy.constants", remove_in="0.34.0") diff --git a/tools/Polygraphy/polygraphy/common/cuda.py b/tools/Polygraphy/polygraphy/common/cuda.py index 821660b1..460d23a9 100644 --- a/tools/Polygraphy/polygraphy/common/cuda.py +++ b/tools/Polygraphy/polygraphy/common/cuda.py @@ -16,4 +16,4 @@ from polygraphy import mod from polygraphy.cuda import * -mod.warn_deprecated("polygraphy.common.cuda", "polygraphy.cuda", remove_in="0.32.0") +mod.warn_deprecated("polygraphy.common.cuda", "polygraphy.cuda", remove_in="0.34.0") diff --git a/tools/Polygraphy/polygraphy/common/exception.py b/tools/Polygraphy/polygraphy/common/exception.py index 0c5c03e2..f34f1100 100644 --- a/tools/Polygraphy/polygraphy/common/exception.py +++ b/tools/Polygraphy/polygraphy/common/exception.py @@ -16,4 +16,4 @@ from polygraphy import mod from polygraphy.exception import * -mod.warn_deprecated("polygraphy.common.exception", "polygraphy.exception", remove_in="0.32.0") +mod.warn_deprecated("polygraphy.common.exception", "polygraphy.exception", remove_in="0.34.0") diff --git a/tools/Polygraphy/polygraphy/common/func.py b/tools/Polygraphy/polygraphy/common/func.py index ee5a98f9..d48310f8 100644 --- a/tools/Polygraphy/polygraphy/common/func.py +++ b/tools/Polygraphy/polygraphy/common/func.py @@ -16,4 +16,4 @@ from polygraphy import mod from polygraphy.func import * -mod.warn_deprecated("polygraphy.common.func", "polygraphy.func", remove_in="0.32.0") +mod.warn_deprecated("polygraphy.common.func", "polygraphy.func", remove_in="0.34.0") diff --git a/tools/Polygraphy/polygraphy/comparator/comparator.py b/tools/Polygraphy/polygraphy/comparator/comparator.py index f5f2814a..b4dbf180 100644 --- a/tools/Polygraphy/polygraphy/comparator/comparator.py +++ b/tools/Polygraphy/polygraphy/comparator/comparator.py @@ -95,14 +95,9 @@ def run( def execute_runner(runner, loader_cache): with runner as active_runner: - input_metadata = active_runner.get_input_metadata() - G_LOGGER.info( - "{:35}\n---- Model Input(s) ----\n{:}".format(active_runner.name, input_metadata), mode=LogMode.ONCE - ) - # DataLoaderCache will ensure that the feed_dict does not contain any extra entries # based on the provided input_metadata. - loader_cache.set_input_metadata(input_metadata) + loader_cache.set_input_metadata(active_runner.get_input_metadata()) if warm_up: G_LOGGER.start("{:35} | Running {:} warm-up run(s)".format(active_runner.name, warm_up)) @@ -126,6 +121,13 @@ def execute_runner(runner, loader_cache): total_runtime = 0 for index, feed_dict in enumerate(loader_cache): + G_LOGGER.info( + "{:35}\n---- Inference Input(s) ----\n{:}".format( + active_runner.name, TensorMetadata().from_feed_dict(feed_dict) + ), + mode=LogMode.ONCE, + ) + G_LOGGER.extra_verbose( lambda: "{:35} | Feeding inputs:\n{:}".format(active_runner.name, util.indent_block(feed_dict)) ) @@ -139,7 +141,7 @@ def execute_runner(runner, loader_cache): ) G_LOGGER.info( - "{:35}\n---- Model Output(s) ----\n{:}".format( + "{:35}\n---- Inference Output(s) ----\n{:}".format( active_runner.name, TensorMetadata().from_feed_dict(outputs) ), mode=LogMode.ONCE, @@ -275,7 +277,7 @@ def compare_accuracy(run_results, fail_fast=False, comparisons=None, compare_fun def find_mismatched(match_dict): return [name for name, matched in match_dict.items() if not bool(matched)] - compare_func = util.default(compare_func, CompareFunc.basic_compare_func()) + compare_func = util.default(compare_func, CompareFunc.simple()) comparisons = util.default(comparisons, Comparator.default_comparisons(run_results)) accuracy_result = AccuracyResult() diff --git a/tools/Polygraphy/polygraphy/comparator/compare.py b/tools/Polygraphy/polygraphy/comparator/compare.py index f0f3ee59..de971bc2 100644 --- a/tools/Polygraphy/polygraphy/comparator/compare.py +++ b/tools/Polygraphy/polygraphy/comparator/compare.py @@ -71,6 +71,146 @@ def __str__(self): return "(atol={:}, rtol={:})".format(self.max_absdiff, self.max_reldiff) +def check_outputs_match( + out0, out0_name, out1, out1_name, per_out_rtol, per_out_atol, per_out_err_stat, runner0_name, runner1_name +): + """ + Checks whether two outputs matched. + + Args: + out0 (np.array): The first output. + out0_name (str): The name of the first output. + out1 (np.array): The second output. + out1_name (str): The name of the second output. + per_out_rtol (float): The relative tolerance to use for comparison. + per_out_atol (float): The absolute tolerance to use for comparison. + per_out_err_stat (str): The error statistic to check. See the docstring of ``simple`` for details. + runner0_name (str): The name of the runner that generated the first output. + runner1_name (str): The name of the runner that generated the second output. + + Returns: + OutputCompareResult: Details on whether the outputs matched. + """ + VALID_CHECK_ERROR_STATS = ["max", "mean", "median", "elemwise"] + if per_out_err_stat not in VALID_CHECK_ERROR_STATS: + G_LOGGER.critical( + "Invalid choice for check_error_stat: {:}.\n" + "Note: Valid choices are: {:}".format(per_out_err_stat, VALID_CHECK_ERROR_STATS) + ) + + G_LOGGER.super_verbose( + "{:35} | Output: {:} (dtype={:}, shape={:}):\n{:}".format( + runner0_name, out0_name, out0.dtype, out0.shape, util.indent_block(out0) + ) + ) + G_LOGGER.super_verbose( + "{:35} | Output: {:} (dtype={:}, shape={:}):\n{:}".format( + runner1_name, out1_name, out1.dtype, out1.shape, util.indent_block(out1) + ) + ) + + # Check difference vs. tolerances + if np.issubdtype(out0.dtype, np.bool_) and np.issubdtype(out1.dtype, np.bool_): + absdiff = np.logical_xor(out0, out1) + else: + absdiff = np.abs(out0 - out1) + + absout1 = np.abs(out1) + with np.testing.suppress_warnings() as sup: + sup.filter(RuntimeWarning) + reldiff = absdiff / absout1 + max_reldiff = comp_util.compute_max(reldiff) + mean_reldiff = comp_util.compute_mean(reldiff) + median_reldiff = comp_util.compute_median(reldiff) + + max_absdiff = comp_util.compute_max(absdiff) + mean_absdiff = comp_util.compute_mean(absdiff) + median_absdiff = comp_util.compute_median(absdiff) + + max_elemwiseabs = "Unknown" + max_elemwiserel = "Unknown" + + if per_out_err_stat == "mean": + failed = mean_absdiff > per_out_atol and (np.isnan(mean_reldiff) or mean_reldiff > per_out_rtol) + elif per_out_err_stat == "median": + failed = median_absdiff > per_out_atol and (np.isnan(median_reldiff) or median_reldiff > per_out_rtol) + elif per_out_err_stat == "max": + failed = max_absdiff > per_out_atol and (np.isnan(max_reldiff) or max_reldiff > per_out_rtol) + else: + assert per_out_err_stat == "elemwise", "This branch should be unreachable unless per_out_err_stat is 'elemwise'" + with np.testing.suppress_warnings() as sup: + sup.filter(RuntimeWarning) + mismatches = (absdiff > per_out_atol) & (reldiff > per_out_rtol) + + failed = np.any(mismatches) + try: + with np.testing.suppress_warnings() as sup: + sup.filter(RuntimeWarning) + # Special because we need to account for tolerances too. + max_elemwiseabs = comp_util.compute_max(absdiff[mismatches]) + max_elemwiserel = comp_util.compute_max(reldiff[mismatches]) + + with G_LOGGER.indent(): + G_LOGGER.super_verbose("Mismatched indices:\n{:}".format(np.argwhere(mismatches))) + G_LOGGER.extra_verbose("{:35} | Mismatched values:\n{:}".format(runner0_name, out0[mismatches])) + G_LOGGER.extra_verbose("{:35} | Mismatched values:\n{:}".format(runner1_name, out1[mismatches])) + except Exception as err: + G_LOGGER.warning("Failing to log mismatches.\nNote: Error was: {:}".format(err)) + + # Log information about the outputs + hist_bin_range = ( + min(comp_util.compute_min(out0), comp_util.compute_min(out1)), + max(comp_util.compute_max(out0), comp_util.compute_max(out1)), + ) + comp_util.log_output_stats(out0, failed, runner0_name + ": " + out0_name, hist_range=hist_bin_range) + comp_util.log_output_stats(out1, failed, runner1_name + ": " + out1_name, hist_range=hist_bin_range) + + G_LOGGER.info("Error Metrics: {:}".format(out0_name)) + with G_LOGGER.indent(): + + def req_tol(mean_diff, median_diff, max_diff, elemwise_diff): + return { + "mean": mean_diff, + "median": median_diff, + "max": max_diff, + "elemwise": elemwise_diff, + }[per_out_err_stat] + + G_LOGGER.info( + "Minimum Required Tolerance: {:} error | [abs={:.5g}] OR [rel={:.5g}]".format( + per_out_err_stat, + req_tol(mean_absdiff, median_absdiff, max_absdiff, max_elemwiseabs), + req_tol(mean_reldiff, median_reldiff, max_reldiff, max_elemwiserel), + ) + ) + comp_util.log_output_stats(absdiff, failed, "Absolute Difference") + with np.testing.suppress_warnings() as sup: + sup.filter(RuntimeWarning) + comp_util.log_output_stats(reldiff, failed, "Relative Difference") + + # Finally show summary. + if failed: + G_LOGGER.error("FAILED | Difference exceeds tolerance (rel={:}, abs={:})".format(per_out_rtol, per_out_atol)) + else: + G_LOGGER.finish("PASSED | Difference is within tolerance (rel={:}, abs={:})".format(per_out_rtol, per_out_atol)) + + G_LOGGER.extra_verbose( + "Finished comparing: '{:}' (dtype={:}, shape={:}) [{:}] and '{:}' (dtype={:}, shape={:}) [{:}]".format( + out0_name, + out0.dtype, + out0.shape, + runner0_name, + out1_name, + out1.dtype, + out1.shape, + runner1_name, + ) + ) + return OutputCompareResult( + not failed, max_absdiff, max_reldiff, mean_absdiff, mean_reldiff, median_absdiff, median_reldiff + ) + + # Provides functions to compare two IterationResults @mod.export() class CompareFunc(object): @@ -79,9 +219,12 @@ class CompareFunc(object): """ @staticmethod - def basic_compare_func( - check_shapes=None, rtol=None, atol=None, fail_fast=None, find_output_func=None, check_error_stat=None - ): + def basic_compare_func(*args, **kwargs): + mod.warn_deprecated("basic_compare_func", remove_in="0.40.0", use_instead="simple") + return CompareFunc.simple(*args, **kwargs) + + @staticmethod + def simple(check_shapes=None, rtol=None, atol=None, fail_fast=None, find_output_func=None, check_error_stat=None): """ Creates a function that compares two IterationResults, and can be used as the `compare_func` argument in ``Comparator.compare_accuracy``. @@ -172,140 +315,6 @@ def check_dict(dct, dict_name): check_dict(atol, "the atol dictionary") check_dict(check_error_stat, "the check_error_stat dictionary") - # Returns whether the outputs match - def check_outputs_match(out0, out0_name, out1, out1_name, per_out_rtol, per_out_atol, per_out_err_stat): - VALID_CHECK_ERROR_STATS = ["max", "mean", "median", "elemwise"] - if per_out_err_stat not in VALID_CHECK_ERROR_STATS: - G_LOGGER.critical( - "Invalid choice for check_error_stat: {:}.\n" - "Note: Valid choices are: {:}".format(per_out_err_stat, VALID_CHECK_ERROR_STATS) - ) - - G_LOGGER.super_verbose( - "{:35} | Output: {:} (dtype={:}, shape={:}):\n{:}".format( - iter_result0.runner_name, out0_name, out0.dtype, out0.shape, util.indent_block(out0) - ) - ) - G_LOGGER.super_verbose( - "{:35} | Output: {:} (dtype={:}, shape={:}):\n{:}".format( - iter_result1.runner_name, out1_name, out1.dtype, out1.shape, util.indent_block(out1) - ) - ) - - # Check difference vs. tolerances - if np.issubdtype(out0.dtype, np.bool_) and np.issubdtype(out1.dtype, np.bool_): - absdiff = np.logical_xor(out0, out1) - else: - absdiff = np.abs(out0 - out1) - - absout1 = np.abs(out1) - with np.testing.suppress_warnings() as sup: - sup.filter(RuntimeWarning) - reldiff = absdiff / absout1 - - max_absdiff = comp_util.compute_max(absdiff) - mean_absdiff = comp_util.compute_mean(absdiff) - median_absdiff = comp_util.compute_median(absdiff) - max_reldiff = comp_util.compute_max(reldiff) - mean_reldiff = comp_util.compute_mean(reldiff) - median_reldiff = comp_util.compute_median(reldiff) - - max_elemwiseabs = "Unknown" - max_elemwiserel = "Unknown" - - if per_out_err_stat == "mean": - failed = mean_absdiff > per_out_atol and (np.isnan(mean_reldiff) or mean_reldiff > per_out_rtol) - elif per_out_err_stat == "median": - failed = median_absdiff > per_out_atol and ( - np.isnan(median_reldiff) or median_reldiff > per_out_rtol - ) - elif per_out_err_stat == "max": - failed = max_absdiff > per_out_atol and (np.isnan(max_reldiff) or max_reldiff > per_out_rtol) - else: - assert ( - per_out_err_stat == "elemwise" - ), "This branch should be unreachable unless per_out_err_stat is 'elemwise'" - mismatches = (absdiff > per_out_atol) & (reldiff > per_out_rtol) - - failed = np.any(mismatches) - try: - # Special because we need to account for tolerances too. - max_elemwiseabs = comp_util.compute_max(absdiff[mismatches]) - max_elemwiserel = comp_util.compute_max(reldiff[mismatches]) - - with G_LOGGER.indent(): - G_LOGGER.super_verbose("Mismatched indices:\n{:}".format(np.argwhere(mismatches))) - G_LOGGER.extra_verbose( - "{:35} | Mismatched values:\n{:}".format(iter_result0.runner_name, out0[mismatches]) - ) - G_LOGGER.extra_verbose( - "{:35} | Mismatched values:\n{:}".format(iter_result1.runner_name, out1[mismatches]) - ) - except Exception as err: - G_LOGGER.warning("Failing to log mismatches.\nNote: Error was: {:}".format(err)) - - # Log information about the outputs - hist_bin_range = ( - min(comp_util.compute_min(out0), comp_util.compute_min(out1)), - max(comp_util.compute_max(out0), comp_util.compute_max(out1)), - ) - comp_util.log_output_stats( - out0, failed, iter_result0.runner_name + ": " + out0_name, hist_range=hist_bin_range - ) - comp_util.log_output_stats( - out1, failed, iter_result1.runner_name + ": " + out1_name, hist_range=hist_bin_range - ) - - G_LOGGER.info("Error Metrics: {:}".format(out0_name)) - with G_LOGGER.indent(): - - def req_tol(mean_diff, median_diff, max_diff, elemwise_diff): - return { - "mean": mean_diff, - "median": median_diff, - "max": max_diff, - "elemwise": elemwise_diff, - }[per_out_err_stat] - - G_LOGGER.info( - "Minimum Required Tolerance: {:} error | [abs={:.5g}] OR [rel={:.5g}]".format( - per_out_err_stat, - req_tol(mean_absdiff, median_absdiff, max_absdiff, max_elemwiseabs), - req_tol(mean_reldiff, median_reldiff, max_reldiff, max_elemwiserel), - ) - ) - comp_util.log_output_stats(absdiff, failed, "Absolute Difference") - comp_util.log_output_stats(reldiff, failed, "Relative Difference") - - # Finally show summary. - if failed: - G_LOGGER.error( - "FAILED | Difference exceeds tolerance (rel={:}, abs={:})".format(per_out_rtol, per_out_atol) - ) - else: - G_LOGGER.finish( - "PASSED | Difference is within tolerance (rel={:}, abs={:})".format(per_out_rtol, per_out_atol) - ) - - G_LOGGER.extra_verbose( - "Finished comparing: '{:}' (dtype={:}, shape={:}) [{:}] and '{:}' (dtype={:}, shape={:}) [{:}]".format( - out0_name, - out0.dtype, - out0.shape, - iter_result0.runner_name, - out1_name, - out1.dtype, - out1.shape, - iter_result1.runner_name, - ) - ) - return OutputCompareResult( - not failed, max_absdiff, max_reldiff, mean_absdiff, mean_reldiff, median_absdiff, median_reldiff - ) - # - # End: def check_outputs_match - # - output_status = OrderedDict() # OrderedDict[str, bool] Maps output names to whether they matched. if not check_shapes: @@ -353,29 +362,9 @@ def default_find_output_func(output_name, index, iter_result): ) continue - def get_tol(tol_dict, default): - if isinstance(tol_dict, numbers.Number): - return tol_dict - - if out0_name in tol_dict: - return tol_dict[out0_name] - elif "" in tol_dict: - return tol_dict[""] - return default - - def get_error_stat(): - if isinstance(check_error_stat, str): - return check_error_stat - - if out0_name in check_error_stat: - return check_error_stat[out0_name] - elif "" in check_error_stat: - return check_error_stat[""] - return default_error_stat - - per_out_atol = get_tol(atol, default_atol) - per_out_rtol = get_tol(rtol, default_rtol) - per_out_err_stat = get_error_stat() + per_out_atol = util.value_or_from_dict(atol, out0_name, default_atol) + per_out_rtol = util.value_or_from_dict(rtol, out0_name, default_rtol) + per_out_err_stat = util.value_or_from_dict(check_error_stat, out0_name, default_error_stat) output1 = iter_result1[out1_name] G_LOGGER.start( @@ -419,6 +408,8 @@ def get_error_stat(): per_out_rtol=per_out_rtol, per_out_atol=per_out_atol, per_out_err_stat=per_out_err_stat, + runner0_name=iter_result0.runner_name, + runner1_name=iter_result1.runner_name, ) output_status[out0_name] = outputs_match diff --git a/tools/Polygraphy/polygraphy/comparator/data_loader.py b/tools/Polygraphy/polygraphy/comparator/data_loader.py index 1c5423c5..8fa25e53 100644 --- a/tools/Polygraphy/polygraphy/comparator/data_loader.py +++ b/tools/Polygraphy/polygraphy/comparator/data_loader.py @@ -56,6 +56,7 @@ def __init__( minimum and maximum. This can be specified on a per-input basis using a dictionary. In that case, use an empty string ("") as the key to specify default range for inputs not explicitly listed. + Defaults to (0.0, 1.0). int_range (Tuple[int]): [DEPRECATED - Use val_range instead] @@ -87,12 +88,12 @@ def default_tuple(tup, default): self.int_range_set = int_range is not None if self.int_range_set: - mod.warn_deprecated("The int_range parameter in DataLoader", "val_range", remove_in="0.32.0") + mod.warn_deprecated("The int_range parameter in DataLoader", "val_range", remove_in="0.35.0") self.int_range = default_tuple(int_range, (1, 25)) self.float_range_set = float_range is not None if self.float_range_set: - mod.warn_deprecated("The float_range parameter in DataLoader", "val_range", remove_in="0.32.0") + mod.warn_deprecated("The float_range parameter in DataLoader", "val_range", remove_in="0.35.0") self.float_range = default_tuple(float_range, (-1.0, 1.0)) self.input_metadata = None @@ -123,15 +124,7 @@ def _get_range(self, name, cast_type): elif cast_type == float and self.float_range_set: return self.float_range - tup = None - if isinstance(self.val_range, tuple): - tup = self.val_range - elif name in self.val_range: - tup = self.val_range[name] - elif "" in self.val_range: - tup = self.val_range[""] - else: - tup = self.default_val_range + tup = util.value_or_from_dict(self.val_range, name, self.default_val_range) return tuple(cast_type(val) for val in tup) def __getitem__(self, index): diff --git a/tools/Polygraphy/polygraphy/comparator/postprocess.py b/tools/Polygraphy/polygraphy/comparator/postprocess.py index d04fd5ea..85d1783f 100644 --- a/tools/Polygraphy/polygraphy/comparator/postprocess.py +++ b/tools/Polygraphy/polygraphy/comparator/postprocess.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from polygraphy import mod +from polygraphy import mod, util np = mod.lazy_import("numpy") @@ -47,17 +47,8 @@ def topk_func(k=10, axis=-1): """ # Top-K implementation. def topk(iter_result): - def get_k(name): - if isinstance(k, int): - return k - elif name in k: - return k[name] - elif "" in k: - return k[""] - return None - for name, output in iter_result.items(): - k_val = get_k(name) + k_val = util.value_or_from_dict(k, name) if k_val: indices = np.argsort(-output, axis=axis, kind="stable") axis_len = indices.shape[axis] diff --git a/tools/Polygraphy/polygraphy/comparator/util.py b/tools/Polygraphy/polygraphy/comparator/util.py index 005bba21..7a5478d9 100644 --- a/tools/Polygraphy/polygraphy/comparator/util.py +++ b/tools/Polygraphy/polygraphy/comparator/util.py @@ -58,6 +58,11 @@ def compute_median(buffer): return np.median(buffer) +@zero_on_empty +def compute_average_magnitude(buffer): + return np.mean(np.abs(buffer)) + + def str_histogram(output, hist_range=None): if np.issubdtype(output.dtype, np.bool_): return "" @@ -108,17 +113,16 @@ def str_output_stats(output, runner_name=None): try: with np.testing.suppress_warnings() as sup: sup.filter(RuntimeWarning) - ret += ( - "mean={:.5g}, std-dev={:.5g}, var={:.5g}, median={:.5g}, min={:.5g} at {:}, max={:.5g} at {:}\n".format( - compute_mean(output), - compute_stddev(output), - compute_variance(output), - compute_median(output), - compute_min(output), - compute_argmin(output), - compute_max(output), - compute_argmax(output), - ) + ret += "mean={:.5g}, std-dev={:.5g}, var={:.5g}, median={:.5g}, min={:.5g} at {:}, max={:.5g} at {:}, avg-magnitude={:.5g}\n".format( + compute_mean(output), + compute_stddev(output), + compute_variance(output), + compute_median(output), + compute_min(output), + compute_argmin(output), + compute_max(output), + compute_argmax(output), + compute_average_magnitude(output), ) except Exception as err: G_LOGGER.verbose("Could not generate statistics.\nNote: Error was: {:}".format(err)) diff --git a/tools/Polygraphy/polygraphy/cuda/cuda.py b/tools/Polygraphy/polygraphy/cuda/cuda.py index 1f71597b..9e680a25 100644 --- a/tools/Polygraphy/polygraphy/cuda/cuda.py +++ b/tools/Polygraphy/polygraphy/cuda/cuda.py @@ -14,7 +14,7 @@ # limitations under the License. # import ctypes -import glob +import time import os import sys @@ -224,6 +224,13 @@ def try_get_stream_handle(stream): return stream.ptr +# Make a numpy array contiguous if it's not already. +def make_np_contiguous(arr): + if not arr.flags["C_CONTIGUOUS"]: + return np.ascontiguousarray(arr) + return arr + + @mod.export() class DeviceView(object): """ @@ -242,6 +249,7 @@ def __init__(self, ptr, shape, dtype): """int: The memory address of the underlying GPU memory""" self.shape = shape """Tuple[int]: The shape of the device buffer""" + self.itemsize = None self.dtype = dtype """np.dtype: The data type of the device buffer""" @@ -252,12 +260,21 @@ def _check_dtype_matches(self, host_buffer): "This may cause CUDA errors!".format(host_buffer.dtype, self.dtype) ) + @property + def dtype(self): + return self._dtype + + @dtype.setter + def dtype(self, new): + self._dtype = new + self.itemsize = np.dtype(new).itemsize + @property def nbytes(self): """ The number of bytes in the memory region. """ - return util.volume(self.shape) * np.dtype(self.dtype).itemsize + return util.volume(self.shape) * self.itemsize @func.constantmethod def copy_to(self, host_buffer, stream=None): @@ -266,7 +283,7 @@ def copy_to(self, host_buffer, stream=None): Args: host_buffer (numpy.ndarray): - The host buffer to copy into. The buffer will be resized to match the + The host buffer to copy into. The buffer will be reshaped to match the shape of this device buffer. If the provided host buffer is too small, it will be freed and reallocated. The buffer may also be reallocated if it is not contiguous in @@ -292,7 +309,7 @@ def copy_to(self, host_buffer, stream=None): if not self.nbytes: return host_buffer - host_buffer = np.ascontiguousarray(host_buffer) + host_buffer = make_np_contiguous(host_buffer) wrapper().memcpy( dst=host_buffer.ctypes.data, src=self.ptr, @@ -301,8 +318,6 @@ def copy_to(self, host_buffer, stream=None): stream_ptr=try_get_stream_handle(stream), ) - # Use resize instead of reshape since it operates in-place. - host_buffer.resize(self.shape, refcheck=False) return host_buffer @func.constantmethod @@ -377,7 +392,7 @@ def resize(self, shape): Args: shape (Tuple[int]): The new shape. """ - nbytes = util.volume(shape) * np.dtype(self.dtype).itemsize + nbytes = util.volume(shape) * self.itemsize if nbytes > self.allocated_nbytes: self.free() self.allocate(nbytes) @@ -402,7 +417,7 @@ def copy_from(self, host_buffer, stream=None): if host_buffer.nbytes: self._check_dtype_matches(host_buffer) self.resize(host_buffer.shape) - host_buffer = np.ascontiguousarray(host_buffer.ravel()) + host_buffer = make_np_contiguous(host_buffer) wrapper().memcpy( dst=self.ptr, src=host_buffer.ctypes.data, @@ -412,6 +427,15 @@ def copy_from(self, host_buffer, stream=None): ) return self + def view(self): + """ + Creates a read-only DeviceView from this DeviceArray. + + Returns: + DeviceView: A view of this arrays data on the device. + """ + return DeviceView(self.ptr, self.shape, self.dtype) + def __str__(self): return "DeviceArray[(dtype={:}, shape={:}), ptr={:}]".format( np.dtype(self.dtype).name, self.shape, hex(self.ptr) diff --git a/tools/Polygraphy/polygraphy/json/serde.py b/tools/Polygraphy/polygraphy/json/serde.py index ee5c8145..8a62a9fa 100644 --- a/tools/Polygraphy/polygraphy/json/serde.py +++ b/tools/Polygraphy/polygraphy/json/serde.py @@ -271,13 +271,6 @@ def from_json(src): return json.loads(src, object_pairs_hook=Decoder()) -@mod.export_deprecated_alias( - "pickle_save", - remove_in="0.32.0", - use_instead="JSON serialization. " - "This function has been migrated to use JSON and will NOT pickle the input object. " - "Use save_json", -) @mod.export() @try_register_numpy_json def save_json(obj, dest, description=None): @@ -293,7 +286,6 @@ def save_json(obj, dest, description=None): util.save_file(to_json(obj), dest, mode="w", description=description) -@mod.export_deprecated_alias("pickle_load", remove_in="0.32.0", use_instead="load_json") @mod.export() @try_register_numpy_json def load_json(src, description=None): @@ -308,17 +300,7 @@ def load_json(src, description=None): Returns: object: The object, or `None` if nothing could be read. """ - try: - return from_json(util.load_file(src, mode="r", description=description)) - except UnicodeDecodeError: - # This is a pickle file from Polygraphy 0.26.1 or older. - mod.warn_deprecated("pickle", use_instead="JSON", remove_in="0.32.0") - G_LOGGER.critical( - "It looks like you're trying to load a Pickle file.\nPolygraphy migrated to using JSON " - "instead of Pickle in version 0.27.0 for security reasons.\nYou can convert your existing " - "pickled data to JSON using the command-line tool: `polygraphy to-json {:} -o new.json`.\nAll data serialized " - "from this and future versions of Polygraphy will always use JSON. ".format(src) - ) + return from_json(util.load_file(src, mode="r", description=description)) @mod.export() diff --git a/tools/Polygraphy/polygraphy/logger/logger.py b/tools/Polygraphy/polygraphy/logger/logger.py index 6f0b1034..d2eaa999 100644 --- a/tools/Polygraphy/polygraphy/logger/logger.py +++ b/tools/Polygraphy/polygraphy/logger/logger.py @@ -308,7 +308,7 @@ def should_log(message): self._log_file.write(message + "\n") self._log_file.flush() - print(message) + print(message, file=sys.stdout if severity < Logger.CRITICAL else sys.stderr) def backtrace(self, depth=0, limit=None, severity=ERROR): limit = limit if limit is not None else (3 - self.severity // 10) * 2 # Info provides 1 stack frame @@ -373,7 +373,7 @@ def try_append(func): return ret def module_info(self, module, name=None, severity=VERBOSE): - G_LOGGER.log(self._str_from_module_info(module, name), severity=severity, mode=LogMode.ONCE) + self.log(self._str_from_module_info(module, name), severity=severity, mode=LogMode.ONCE) def log_exception(self, func): """ diff --git a/tools/Polygraphy/polygraphy/mod/importer.py b/tools/Polygraphy/polygraphy/mod/importer.py index cb7e01ba..f47a2530 100644 --- a/tools/Polygraphy/polygraphy/mod/importer.py +++ b/tools/Polygraphy/polygraphy/mod/importer.py @@ -26,12 +26,12 @@ _all_external_lazy_imports = set() # Sometimes the Python package name differs from the module name. -_MODULE_TO_PKG_NAME = { +_PKG_NAME_FROM_MODULE = { "tensorrt": "nvidia-tensorrt", } # Some packages need additional flags to install correctly. -_MODULE_EXTRA_FLAGS = { +_EXTRA_FLAGS_FOR_MODULE = { "tensorrt": ["--extra-index-url=https://pypi.ngc.nvidia.com"], "onnx_graphsurgeon": ["--extra-index-url=https://pypi.ngc.nvidia.com"], } @@ -93,8 +93,8 @@ def import_mod(): def install_mod(raise_error=True): modname = name.split(".")[0] - pkg = _MODULE_TO_PKG_NAME.get(modname, modname) - extra_flags = _MODULE_EXTRA_FLAGS.get(modname, []) + pkg = _PKG_NAME_FROM_MODULE.get(modname, modname) + extra_flags = _EXTRA_FLAGS_FOR_MODULE.get(modname, []) if version == LATEST_VERSION: extra_flags.append("--upgrade") @@ -102,15 +102,12 @@ def install_mod(raise_error=True): pkg += version cmd = config.INSTALL_CMD + [pkg] + extra_flags - G_LOGGER.info( - "{:} is required, but not installed. Attempting to install now.\n" - "Running: {:}".format(pkg, " ".join(cmd)) - ) + G_LOGGER.info("Running installation command: {:}".format(" ".join(cmd))) status = sp.run(cmd) if status.returncode != 0: log_func = G_LOGGER.critical if raise_error else G_LOGGER.warning log_func( - "Could not automatically install required package: {:}. Please install it manually.".format(pkg) + "Could not automatically install required module: {:}. Please install it manually.".format(pkg) ) mod = importlib.import_module(name) @@ -119,15 +116,16 @@ def install_mod(raise_error=True): mod = None try: mod = importlib.import_module(name) - except: + except ImportError: if config.AUTOINSTALL_DEPS: + G_LOGGER.info("Module: '{:}' is required, but not installed. Attempting to install now.".format(name)) mod = install_mod() else: G_LOGGER.critical( - "Module: {:} is required but could not be imported.\n" + "Module: '{:}' is required but could not be imported.\n" "You can try setting POLYGRAPHY_AUTOINSTALL_DEPS=1 in your environment variables " - "to allow Polygraphy to automatically install missing packages.\n" - "Note that this may cause existing packages to be overwritten - hence, it may be " + "to allow Polygraphy to automatically install missing modules.\n" + "Note that this may cause existing modules to be overwritten - hence, it may be " "desirable to use a Python virtual environment or container. ".format(name) ) @@ -135,13 +133,13 @@ def install_mod(raise_error=True): if version is not None and hasattr(mod, "__version__") and not _version_ok(mod.__version__, version): if config.AUTOINSTALL_DEPS: G_LOGGER.info( - "Note: Package: '{name}' version {cur_ver} is installed, but version {rec_ver} is recommended.\n" - "Upgrading...".format(name=name, cur_ver=mod.__version__, rec_ver=version) + "Note: Module: '{name}' version '{cur_ver}' is installed, but version '{rec_ver}' is recommended.\n" + "Attempting to upgrade now.".format(name=name, cur_ver=mod.__version__, rec_ver=version) ) mod = install_mod(raise_error=False) # We can try to use the other version if install fails. elif version != LATEST_VERSION: G_LOGGER.warning( - "Package: '{name}' version {cur_ver} is installed, but version {rec_ver} is recommended.\n" + "Module: '{name}' version '{cur_ver}' is installed, but version '{rec_ver}' is recommended.\n" "Consider installing the recommended version or setting POLYGRAPHY_AUTOINSTALL_DEPS=1 in your " "environment variables to do so automatically. ".format( name=name, cur_ver=mod.__version__, rec_ver=version diff --git a/tools/Polygraphy/polygraphy/tools/README.md b/tools/Polygraphy/polygraphy/tools/README.md index 877322b3..98c0ede1 100644 --- a/tools/Polygraphy/polygraphy/tools/README.md +++ b/tools/Polygraphy/polygraphy/tools/README.md @@ -1,28 +1,189 @@ -# Polygraphy Tools +# Polygraphy Command-line Toolkit User Guide ## Table of Contents - [Introduction](#introduction) -- [Usage](#usage) +- [Common Use-Cases](#common-use-cases) + - [Inspecting A Model](#inspecting-a-model) + - [Converting A Model To TensorRT](#converting-a-model-to-tensorrt) + - [Sanitizing An ONNX Model](#sanitizing-an-onnx-model) + - [Comparing A Model Between Frameworks](#comparing-a-model-between-frameworks) + - [Using Custom Input Data](#using-custom-input-data) + - [Modifying Input Shapes In An ONNX Model](#modifying-input-shapes-in-an-onnx-model) +- [Advanced Topics](#advanced-topics) + - [Deterministic Engine Builds](#deterministic-engine-builds) + - [Defining A Custom TensorRT Network](#defining-a-custom-tensorrt-network) + - [Defining A Custom TensorRT Builder Configuration](#defining-a-custom-tensorrt-builder-configuration) + - [Extracting A Subgraph Of An ONNX Model](#extracting-a-subgraph-of-an-onnx-model) + - [Debugging Intermittent TensorRT Failures](#debugging-intermittent-tensorrt-failures) + - [Reducing Failing ONNX Models](#reducing-failing-onnx-models) - [Examples](#examples) - [Adding New Tools](#adding-new-tools) ## Introduction -This directory includes command-line tools from the Polygraphy debugging toolkit. +The Polygraphy command-line toolkit includes several tools covering a wide variety of prototyping +and debugging use-cases. This guide provides a broad overview of the capabilities included. For more information about a specific tool, see the README in the corresponding directory here. Note that some of the tools included are still experimental. Any tool labeled `[EXPERIMENTAL]` may be subject to backwards-incompatible changes, or even complete removal at any point in time. +All the tools provided by Polygraphy can be invoked using the polygraphy binary: [`bin/polygraphy`](../../bin/polygraphy). +For usage information on a specific tool, you can see the help output with: `polygraphy -h` -## Usage -All the tools provided by Polygraphy can be invoked using the polygraphy binary: [`bin/polygraphy`](../../bin/polygraphy) +## Common Use-Cases -For usage information on a specific tool, you can see the help output with: `polygraphy -h` + +### Inspecting A Model + +It is often useful to inspect various aspects of a model, such as layer names, attributes, +or overall structure. The `inspect model` subtool aims to provide this functionality in a way +that is conducive to programmatic analysis; that is, it provides a human-readable text representation +of the model (and it's `grep`-friendly!). + +For more details, refer to the examples, which, among other things, show how to inspect: +- [TensorRT Networks](../../examples/cli/inspect/01_inspecting_a_tensorrt_network/) +- [TensorRT Engines](../../examples/cli/inspect/02_inspecting_a_tensorrt_engine/) +- [ONNX models](../../examples/cli/inspect/03_inspecting_an_onnx_model/) + +You can find the complete listing of `inspect` examples [here](../../examples/cli/inspect). + + +### Converting A Model To TensorRT + +The `convert` tool can be used to convert various types of models to other formats; +for example, converting ONNX models to TensorRT. + +For more information, refer to the examples, which, among other things, show how to: +- [Convert models with dynamic shapes to TensorRT](../../examples/cli/convert/03_dynamic_shapes_in_tensorrt/) +- [Run TensorRT INT8 calibration during conversion](../../examples/cli/convert/01_int8_calibration_in_tensorrt) + +You can find the complete listing of `convert` examples [here](../../examples/cli/convert/). + + +### Sanitizing An ONNX Model + +Sometimes, TensorRT may be unable to import an ONNX model. In these cases, it often helps to +sanitize the ONNX model, removing excess nodes and folding constants. The `surgeon sanitize` +subtool provides this capability using ONNX-GraphSurgeon and ONNX-Runtime. + +For more details, refer to [`surgeon` example 02](../../examples/cli/surgeon/02_folding_constants/). + +You can find the complete listing of `surgeon` examples [here](../../examples/cli/surgeon/). + + +### Comparing A Model Between Frameworks + +The `run` tool can run inference and compare outputs generated by one or more frameworks. +This can be used to check the accuracy of a framework for a particular model and debug +when the accuracy is poor. + +Moreover, it can be used to generate Python scripts that do the same. + +For more information, refer to the examples, which, among other things, show how to: +- [Compare outputs between frameworks](../../examples/cli/run/01_comparing_frameworks/) +- [Save and load outputs to compare across runs](../../examples/cli/run/02_comparing_across_runs) +- [Generate comparison scripts](../../examples/cli/run/03_generating_a_comparison_script/) + + +You can find the complete listing of `run` examples [here](../../examples/cli/run/). + + +### Using Custom Input Data + +For any tools that use inference input data, such as `run` or `convert`, Polygraphy +provides 2 ways to supply custom input data: + +1. `--load-input-data`, which takes a path to a JSON file containing a `List[Dict[str, np.ndarray]]`. + This will cause Polygraphy to load the entire object into memory. + *NOTE: This may be impractical or impossible if the data is very large.* + +2. `--data-loader-script`, which takes a path to a Python script that defines a `load_data` function + that returns a data loader. The data loader can be any iterable or generator that yields + `Dict[str, np.ndarray]`. By using a generator, we can avoid loading all the data + at once, and instead limit it to just a single input at a time. + + *TIP: If you have an existing script that already defines such a function, you do **not** need to create* + *a separate script just for the sake of `--data-loader-script`. You can simply use the existing script* + *and optionally use the `--data-loader-func-name` argument to specify the name of the function if it's not `load_data`* + +For more information, refer to [`run` example 05](../../examples/cli/run/05_comparing_with_custom_data/). + + +### Modifying Input Shapes In An ONNX Model + +The best way to modify input shapes in an ONNX model is to re-export the model with +the desired input shapes. When this is not possible, you can use the `surgeon sanitize` +tool to do this. + +For more information, refer to [`surgeon` example 03](../../examples/cli/surgeon/03_modifying_input_shapes/). + + +## Advanced Topics + +### Deterministic Engine Builds + +Because of the way TensorRT works, the engine building process is non-deterministic. +Even when using the same model with the same builder configuration, engines can vary slightly. + +To make engine building reproducible, all Polygraphy CLI tools that involve building TensorRT engines +accept `--save-tactics` and `--load-tactics` options, which allow you to save the tactics selected +for an engine and reload them during subsequent builds. + +For more information, refer to [`convert` example 02](../../examples/cli/convert/02_deterministic_engine_builds_in_tensorrt/). + + +### Defining A Custom TensorRT Network + +Many of the command-line tools involve creating TensorRT networks. Most of the time, networks +are created by parsing a model from a framework (generally in ONNX format). However, it +is also possible to define the TensorRT network manually using a Python script. + +This is useful if you want to modify the network in some way using the TensorRT Python +API; for example, setting layer precisions, or per-layer device preferences. + +For more information, refer to [`run` example 04](../../examples/cli/run/04_defining_a_tensorrt_network_or_config_manually). + + +### Defining A Custom TensorRT Builder Configuration + +Similar to defining custom TensorRT networks, it is possible to provide custom +TensorRT builder configurations on the command-line using a Python script. + +This is useful when Polygraphy does not support certain TensorRT builder configuration options. + +For more information, refer to [`run` example 04](../../examples/cli/run/04_defining_a_tensorrt_network_or_config_manually). + + +### Extracting A Subgraph Of An ONNX Model + +When debugging ONNX models, it can be useful to extract subgraphs to look at them in isolation. +The `surgeon extract` tool allows you to do so. + +For more information, refer to [`surgeon` example 01](../../examples/cli/surgeon/01_isolating_subgraphs/). + + +### Debugging Intermittent TensorRT Failures + +Since the TensorRT optimizer is inherently non-deterministic, rebuilding an engine may +result in different tactics being selected. If a particular tactic is faulty, this may manifest +as an intermittent failure. The `debug build` tools helps find faulty tactics in such cases +and reproduce failures reliably. + +For more information, refer to [`debug` example 01](../../examples/cli/debug/01_debugging_flaky_trt_tactics/). + + +### Reducing Failing ONNX Models + +When investigating bugs involving ONNX models, it can be useful to reduce the model to a minimum +faliing subgraph. This helps us pinpoint the issue and makes further debugging much easier. +The `debug reduce` tools helps us automate this process. + +For more information, refer to [`debug` example 02](../../examples/cli/debug/02_reducing_failing_onnx_models/). ## Examples diff --git a/tools/Polygraphy/polygraphy/tools/args/__init__.py b/tools/Polygraphy/polygraphy/tools/args/__init__.py index d21d6fc6..3dacc26b 100644 --- a/tools/Polygraphy/polygraphy/tools/args/__init__.py +++ b/tools/Polygraphy/polygraphy/tools/args/__init__.py @@ -19,6 +19,7 @@ from polygraphy.tools.args.model import * from polygraphy.tools.args.onnx import * from polygraphy.tools.args.onnxrt import * +from polygraphy.tools.args.pluginref import * from polygraphy.tools.args.tf2onnx import * from polygraphy.tools.args.tf import * from polygraphy.tools.args.trt import * diff --git a/tools/Polygraphy/polygraphy/tools/args/comparator.py b/tools/Polygraphy/polygraphy/tools/args/comparator.py index 79e3e8b9..152fcea0 100644 --- a/tools/Polygraphy/polygraphy/tools/args/comparator.py +++ b/tools/Polygraphy/polygraphy/tools/args/comparator.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from polygraphy import mod, util +from polygraphy import mod from polygraphy.logger import G_LOGGER from polygraphy.tools.args import util as args_util from polygraphy.tools.args.base import BaseArgs @@ -156,7 +156,7 @@ def add_to_parser(self, parser): comparator_args.add_argument( "--check-error-stat", help="The error statistic to check. " - "For details on possible values, see the documentation for CompareFunc.basic_compare_func(). " + "For details on possible values, see the documentation for CompareFunc.simple(). " "To specify per-output values, use the format: --check-error-stat [:]. If no output name is provided, " "the value is used for any outputs not explicitly specified. For example: " "--check-error-stat max out0:mean out1:median", @@ -194,7 +194,7 @@ def parse(self, args): ) # FIXME: This should be a proper dependency from a RunnerArgs - self.runners = util.default(args_util.get(args, "runners"), []) + self.runners = args_util.get(args, "runners", default=[]) def add_to_script(self, script, results_name): script.add_import(imports=["Comparator"], frm="polygraphy.comparator") @@ -228,7 +228,7 @@ def add_to_script(self, script, results_name): script.append_suffix(safe("# Accuracy Comparison")) compare_func_str = make_invocable_if_nondefault( - "CompareFunc.basic_compare_func", + "CompareFunc.simple", rtol=self.rtol, atol=self.atol, check_shapes=False if self.no_shape_check else None, diff --git a/tools/Polygraphy/polygraphy/tools/args/data_loader.py b/tools/Polygraphy/polygraphy/tools/args/data_loader.py index cb16f6dc..5592b997 100644 --- a/tools/Polygraphy/polygraphy/tools/args/data_loader.py +++ b/tools/Polygraphy/polygraphy/tools/args/data_loader.py @@ -14,7 +14,10 @@ # limitations under the License. # +import numbers + from polygraphy import mod, util +from polygraphy.logger import G_LOGGER from polygraphy.tools.args import util as args_util from polygraphy.tools.args.base import BaseArgs from polygraphy.tools.script import Script, make_invocable, make_invocable_if_nondefault, safe @@ -115,7 +118,26 @@ def omit_none_tuple(tup): self.int_range = omit_none_tuple(tup=(args_util.get(args, "int_min"), args_util.get(args, "int_max"))) self.float_range = omit_none_tuple(tup=(args_util.get(args, "float_min"), args_util.get(args, "float_max"))) + if self.int_range or self.float_range: + G_LOGGER.warning( + "The --int-min/--int-max and --float-min/--float-max options are deprecated.\n" + "Please use `--val-range` instead, which allows you to specify per-input data ranges." + ) + self.val_range = args_util.parse_dict_with_default(args_util.get(args, "val_range"), cast_to=tuple) + if self.val_range is not None: + for name, vals in self.val_range.items(): + if len(vals) != 2: + G_LOGGER.critical( + "In --val-range, for input: {:}, expected to receive exactly 2 values, but received {:}.\n" + "Note: Option was parsed as: input: {:}, range: {:}".format(name, len(vals), name, vals) + ) + + if any(not isinstance(elem, numbers.Number) for elem in vals): + G_LOGGER.critical( + "In --val-range, for input: {:}, one or more elements of the range could not be parsed as a number.\n" + "Note: Option was parsed as: input: {:}, range: {:}".format(name, name, vals) + ) self.iterations = args_util.get(args, "iterations") @@ -125,6 +147,8 @@ def omit_none_tuple(tup): def _add_to_script(self, script, user_input_metadata_str=None): needs_invoke = False + using_random_data = False + if self.data_loader_script: script.add_import(imports=["mod"], frm="polygraphy") data_loader = make_invocable( @@ -140,6 +164,7 @@ def _add_to_script(self, script, user_input_metadata_str=None): data_loader=Script.DATA_LOADER_NAME, ) else: + using_random_data = True if user_input_metadata_str is None and self.model_args is not None and self.model_args.input_shapes: user_input_metadata_str = self.model_args.input_shapes @@ -158,6 +183,9 @@ def _add_to_script(self, script, user_input_metadata_str=None): if data_loader: script.add_import(imports=["DataLoader"], frm="polygraphy.comparator") + if using_random_data != self.is_using_random_data(): + G_LOGGER.internal_error("is_using_random_data() reported a false positive!") + return script.set_data_loader(data_loader), needs_invoke def add_data_loader(self, script, *args, **kwargs): @@ -195,3 +223,12 @@ def add_to_script_wrapper(script, *args, **kwargs): if needs_invoke: data_loader = data_loader() return data_loader + + def is_using_random_data(self): + """ + Whether this data loader will randomly generate data rather than use real data. + + Returns: + bool + """ + return not self.data_loader_script and not self.load_inputs diff --git a/tools/Polygraphy/polygraphy/tools/args/logger.py b/tools/Polygraphy/polygraphy/tools/args/logger.py index b479cce2..8827b070 100644 --- a/tools/Polygraphy/polygraphy/tools/args/logger.py +++ b/tools/Polygraphy/polygraphy/tools/args/logger.py @@ -58,7 +58,7 @@ def add_to_parser(self, parser): def parse(self, args): self.verbosity_count = args_util.get(args, "verbose") - args_util.get(args, "quiet") self.silent = args_util.get(args, "silent") - self.log_format = util.default(args_util.get(args, "log_format"), []) + self.log_format = args_util.get(args, "log_format", default=[]) self.log_file = args_util.get(args, "log_file") # Enable logger settings immediately on parsing. diff --git a/tools/Polygraphy/polygraphy/tools/args/model.py b/tools/Polygraphy/polygraphy/tools/args/model.py index faf4e00c..00114fc9 100644 --- a/tools/Polygraphy/polygraphy/tools/args/model.py +++ b/tools/Polygraphy/polygraphy/tools/args/model.py @@ -56,12 +56,17 @@ def is_onnx(self): def is_trt(self): return self in ModelArgs.ModelType.TRT_TYPES - def __init__(self, model_required=False, inputs="--inputs", model_type=None): + def __init__(self, model_required=False, inputs="--inputs", model_type=None, inputs_doc=None): super().__init__() self._model_required = model_required self._inputs = inputs # If model type is provided, it means the tool only supports a single type of model. self._model_type = model_type + self._inputs_doc = util.default( + inputs_doc, + "Model input(s) and their shape(s). " + "Used to determine shapes to use while generating input data for inference", + ) def add_to_parser(self, parser): model_args = parser.add_argument_group("Model", "Options for the model") @@ -81,10 +86,9 @@ def add_to_parser(self, parser): model_args.add_argument( self._inputs.replace("inputs", "input") + "-shapes", self._inputs, - help="Model input(s) and their shape(s). Generally, this is used to determine inference-time input shapes, " - "or override dynamic shapes set in the model. Format: {arg_name}-shapes :. " + help="{:}. Format: {arg_name}-shapes :. " "For example: {arg_name}-shapes image:[1,3,224,224] other_input:[10]".format( - arg_name=self._inputs.replace("inputs", "input") + self._inputs_doc, arg_name=self._inputs.replace("inputs", "input") ), nargs="+", default=None, @@ -104,7 +108,7 @@ def use_ext(ext_mapping): if file_ext in ext_mapping: return ext_mapping[file_ext] - runners = util.default(args_util.get(args, "runners"), []) + runners = args_util.get(args, "runners", default=[]) if args_util.get(args, "ckpt") or os.path.isdir(args.model_file): return "ckpt" elif "tf" in runners or "trt_legacy" in runners: diff --git a/tools/Polygraphy/polygraphy/tools/args/onnx/loader.py b/tools/Polygraphy/polygraphy/tools/args/onnx/loader.py index f5add269..06f82075 100644 --- a/tools/Polygraphy/polygraphy/tools/args/onnx/loader.py +++ b/tools/Polygraphy/polygraphy/tools/args/onnx/loader.py @@ -91,9 +91,10 @@ def add_to_parser(self, parser): "--external-data-size-threshold", help="The size threshold, in bytes, above which tensor data will be stored in the external file. " "Tensors smaller that this threshold will remain in the ONNX file. " + "Optionally, use a `K`, `M`, or `G` suffix to indicate KiB, MiB, or GiB respectively." + "For example, `--external-data-size-threshold=16M` is equivalent to `--external-data-size-threshold=16777216`" "Has no effect if `--save-external-data` is not set", default=None, - type=int, ) self.group.add_argument( "--no-save-all-tensors-to-one-file", @@ -111,7 +112,7 @@ def parse(self, args): save_external_data = save_external_data[0] or "" self.save_external_data = save_external_data - self.size_threshold = args_util.get(args, "external_data_size_threshold") + self.size_threshold = args_util.parse_num_bytes(args_util.get(args, "external_data_size_threshold")) self.all_tensors_to_one_file = args_util.get(args, "all_tensors_to_one_file") def add_save_onnx(self, script, loader_name): diff --git a/tools/Polygraphy/polygraphy/tools/args/pluginref/__init__.py b/tools/Polygraphy/polygraphy/tools/args/pluginref/__init__.py new file mode 100644 index 00000000..62ae8f8d --- /dev/null +++ b/tools/Polygraphy/polygraphy/tools/args/pluginref/__init__.py @@ -0,0 +1,16 @@ +# +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from polygraphy.tools.args.pluginref.runner import * diff --git a/tools/Polygraphy/polygraphy/tools/args/pluginref/runner.py b/tools/Polygraphy/polygraphy/tools/args/pluginref/runner.py new file mode 100644 index 00000000..337a6c5b --- /dev/null +++ b/tools/Polygraphy/polygraphy/tools/args/pluginref/runner.py @@ -0,0 +1,43 @@ +# +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from polygraphy import mod +from polygraphy.tools.args.base import BaseArgs +from polygraphy.tools.script import make_invocable + + +@mod.export() +class PluginRefArgs(BaseArgs): + def register(self, maker): + from polygraphy.tools.args.model import ModelArgs + from polygraphy.tools.args.onnx import OnnxLoaderArgs + + if isinstance(maker, OnnxLoaderArgs): + self.onnx_loader_args = maker + if isinstance(maker, ModelArgs): + self.model_args = maker + + def check_registered(self): + assert self.onnx_loader_args is not None, "OnnxLoaderArgs is required!" + assert self.model_args is not None, "ModelArgs is required!" + + def add_to_script(self, script): + script.add_import(imports=["GsFromOnnx"], frm="polygraphy.backend.onnx") + script.add_import(imports=["PluginRefRunner"], frm="polygraphy.backend.pluginref") + + onnx_name = self.onnx_loader_args.add_onnx_loader(script) + loader_name = script.add_loader(make_invocable("GsFromOnnx", onnx_name), "pluginref") + script.add_runner(make_invocable("PluginRefRunner", loader_name)) diff --git a/tools/Polygraphy/polygraphy/tools/args/trt/config.py b/tools/Polygraphy/polygraphy/tools/args/trt/config.py index 53ccdf93..15cf1a48 100644 --- a/tools/Polygraphy/polygraphy/tools/args/trt/config.py +++ b/tools/Polygraphy/polygraphy/tools/args/trt/config.py @@ -85,11 +85,19 @@ def get_shapes(lst, idx): @mod.export() class TrtConfigArgs(BaseArgs): - def __init__(self, strict_types_default=None): + def __init__(self, strict_types_default=None, random_data_calib_warning=True): + """ + Args: + strict_types_default (bool): Whether strict types should be enabled by default. + random_data_calib_warning (bool): + Whether to issue a warning when randomly generated data is being used + for calibration. + """ super().__init__() self.model_args = None self.data_loader_args = None self._strict_types_default = strict_types_default + self._random_data_calib_warning = random_data_calib_warning def add_to_parser(self, parser): trt_config_args = parser.add_argument_group( @@ -132,8 +140,8 @@ def add_to_parser(self, parser): trt_config_args.add_argument( "--int8", help="Enable int8 precision in TensorRT. " - "If no calibration cache is provided, this option will cause TensorRT to run int8 calibration " - "using the Polygraphy data loader to provide calibration data. ", + "If calibration is required but no calibration cache is provided, this option will cause TensorRT to run " + "int8 calibration using the Polygraphy data loader to provide calibration data. ", action="store_true", default=None, ) @@ -163,12 +171,12 @@ def add_to_parser(self, parser): default=None, ) - # Workspace uses float to enable scientific notation (e.g. 1e9) trt_config_args.add_argument( "--workspace", metavar="BYTES", - help="Memory in bytes to allocate for the TensorRT builder's workspace", - type=float, + help="Amount of memory, in bytes, to allocate for the TensorRT builder's workspace. " + "Optionally, use a `K`, `M`, or `G` suffix to indicate KiB, MiB, or GiB respectively." + "For example, `--workspace=16M` is equivalent to `--workspace=16777216`", default=None, ) trt_config_args.add_argument( @@ -253,9 +261,24 @@ def add_to_parser(self, parser): default="load_config", ) trt_config_args.add_argument( - "--trt-safety-restricted", help="Enable safety scope checking in TensorRT", action="store_true", default=None, + "--trt-safety-restricted", + help="Enable safety scope checking in TensorRT", + action="store_true", + default=None, dest="restricted", ) + trt_config_args.add_argument( + "--use-dla", + help="[EXPERIMENTAL] Use DLA as the default device type", + action="store_true", + default=None, + ) + trt_config_args.add_argument( + "--allow-gpu-fallback", + help="[EXPERIMENTAL] Allow layers unsupported on the DLA to fall back to GPU. Has no effect if --dla is not set.", + action="store_true", + default=None, + ) def register(self, maker): from polygraphy.tools.args.data_loader import DataLoaderArgs @@ -267,9 +290,9 @@ def register(self, maker): self.data_loader_args = maker def parse(self, args): - trt_min_shapes = util.default(args_util.get(args, "trt_min_shapes"), []) - trt_max_shapes = util.default(args_util.get(args, "trt_max_shapes"), []) - trt_opt_shapes = util.default(args_util.get(args, "trt_opt_shapes"), []) + trt_min_shapes = args_util.get(args, "trt_min_shapes", default=[]) + trt_max_shapes = args_util.get(args, "trt_max_shapes", default=[]) + trt_opt_shapes = args_util.get(args, "trt_opt_shapes", default=[]) default_shapes = TensorMetadata() if self.model_args is not None: @@ -278,8 +301,7 @@ def parse(self, args): self.profile_dicts = parse_profile_shapes(default_shapes, trt_min_shapes, trt_opt_shapes, trt_max_shapes) - workspace = args_util.get(args, "workspace") - self.workspace = int(workspace) if workspace is not None else workspace + self.workspace = args_util.parse_num_bytes(args_util.get(args, "workspace")) self.tf32 = args_util.get(args, "tf32") self.fp16 = args_util.get(args, "fp16") @@ -323,6 +345,9 @@ def parse(self, args): self.trt_config_script = args_util.get(args, "trt_config_script") self.trt_config_func_name = args_util.get(args, "trt_config_func_name") + self.use_dla = args_util.get(args, "use_dla") + self.allow_gpu_fallback = args_util.get(args, "allow_gpu_fallback") + def add_trt_config_loader(self, script): profiles = [] for (min_shape, opt_shape, max_shape) in self.profile_dicts: @@ -353,6 +378,18 @@ def add_trt_config_loader(self, script): if self.calibration_base_class: script.add_import(imports=["tensorrt as trt"]) + if ( + self.data_loader_args.is_using_random_data() + and (not self.calibration_cache or not os.path.exists(self.calibration_cache)) + and self._random_data_calib_warning + ): + G_LOGGER.warning( + "Int8 Calibration is using randomly generated input data.\n" + "This could negatively impact accuracy if the inference-time input data is dissimilar " + "to the randomly generated calibration data.\n" + "You may want to consider providing real data via the --data-loader-script option." + ) + calibrator = make_invocable( "Calibrator", data_loader=data_loader_name if data_loader_name else inline(safe("DataLoader()")), @@ -395,6 +432,8 @@ def add_trt_config_loader(self, script): algorithm_selector=algo_selector, sparse_weights=self.sparse_weights, tactic_sources=self.tactic_sources, + use_dla=self.use_dla, + allow_gpu_fallback=self.allow_gpu_fallback, ) if config_loader_str is not None: script.add_import(imports=["CreateConfig as CreateTrtConfig"], frm="polygraphy.backend.trt") diff --git a/tools/Polygraphy/polygraphy/tools/args/trt_legacy.py b/tools/Polygraphy/polygraphy/tools/args/trt_legacy.py index 0d440b82..5ff8ea16 100644 --- a/tools/Polygraphy/polygraphy/tools/args/trt_legacy.py +++ b/tools/Polygraphy/polygraphy/tools/args/trt_legacy.py @@ -13,11 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import os + from polygraphy import constants, mod, util from polygraphy.logger import G_LOGGER from polygraphy.tools.args import util as args_util from polygraphy.tools.args.base import BaseArgs -from polygraphy.tools.script import make_invocable +from polygraphy.tools.script import assert_identifier, inline, make_invocable, safe @mod.export() @@ -49,6 +51,7 @@ def add_to_parser(self, parser): def register(self, maker): from polygraphy.tools.args.model import ModelArgs + from polygraphy.tools.args.data_loader import DataLoaderArgs from polygraphy.tools.args.onnx.loader import OnnxLoaderArgs from polygraphy.tools.args.tf.loader import TfLoaderArgs from polygraphy.tools.args.trt.config import TrtConfigArgs @@ -69,19 +72,34 @@ def register(self, maker): self.trt_engine_save_args = maker if isinstance(maker, TrtRunnerArgs): self.trt_runner_args = maker + if isinstance(maker, DataLoaderArgs): + self.data_loader_args = maker def check_registered(self): assert self.model_args is not None, "ModelArgs is required!" assert self.trt_engine_loader_args is not None, "TrtEngineLoaderArgs is required!" def parse(self, args): - self.trt_outputs = args_util.get(args, "trt_outputs") + self.trt_outputs = args_util.get_outputs(args, "trt_outputs") self.caffe_model = args_util.get(args, "caffe_model") self.batch_size = args_util.get(args, "batch_size") self.save_uff = args_util.get(args, "save_uff") self.uff_order = args_util.get(args, "uff_order") self.preprocessor = args_util.get(args, "preprocessor") + self.calibration_cache = args_util.get(args, "calibration_cache") + calib_base = args_util.get(args, "calibration_base_class") + self.calibration_base_class = None + if calib_base is not None: + calib_base = safe(assert_identifier(calib_base)) + self.calibration_base_class = inline(safe("trt.{:}", inline(calib_base))) + + self.quantile = args_util.get(args, "quantile") + self.regression_cutoff = args_util.get(args, "regression_cutoff") + + self.use_dla = args_util.get(args, "use_dla") + self.allow_gpu_fallback = args_util.get(args, "allow_gpu_fallback") + def add_to_script(self, script): script.add_import(imports=["TrtLegacyRunner"], frm="polygraphy.backend.trt_legacy") G_LOGGER.warning("Legacy TensorRT runner only supports implicit batch TensorFlow/UFF, ONNX, and Caffe models") @@ -133,6 +151,25 @@ def add_to_script(self, script): make_invocable("LoadNetworkFromUff", loader_name, uff_order=self.uff_order), "uff_network_loader" ) + calibrator = None + if ( + self.trt_config_args.int8 and self.data_loader_args is not None + ): # We cannot do calibration if there is no data loader. + script.add_import(imports=["Calibrator"], frm="polygraphy.backend.trt") + script.add_import(imports=["DataLoader"], frm="polygraphy.comparator") + data_loader_name = self.data_loader_args.add_data_loader(script) + if self.calibration_base_class: + script.add_import(imports=["tensorrt as trt"]) + + calibrator = make_invocable( + "Calibrator", + data_loader=data_loader_name if data_loader_name else inline(safe("DataLoader()")), + cache=self.calibration_cache, + BaseClass=self.calibration_base_class, + quantile=self.quantile, + regression_cutoff=self.regression_cutoff, + ) + runner_str = make_invocable( "TrtLegacyRunner", network_loader=loader_name, @@ -144,6 +181,10 @@ def add_to_script(self, script): save_engine=self.trt_engine_save_args.path, layerwise=self.trt_outputs == constants.MARK_ALL, plugins=self.trt_engine_loader_args.plugins, + int8=self.trt_config_args.int8, + calibrator=calibrator, + use_dla=self.use_dla, + allow_gpu_fallback=self.allow_gpu_fallback, ) script.add_runner(runner_str) diff --git a/tools/Polygraphy/polygraphy/tools/args/util/util.py b/tools/Polygraphy/polygraphy/tools/args/util/util.py index 0127c30b..861921b0 100644 --- a/tools/Polygraphy/polygraphy/tools/args/util/util.py +++ b/tools/Polygraphy/polygraphy/tools/args/util/util.py @@ -16,7 +16,7 @@ from polygraphy import constants, mod, util from polygraphy.common import TensorMetadata -from polygraphy.logger import G_LOGGER +from polygraphy.logger import G_LOGGER, LogMode from polygraphy.tools.script import Script, ensure_safe, inline, safe np = mod.lazy_import("numpy") @@ -94,17 +94,18 @@ def run_script(script_func, *args): @mod.export() -def get(args, attr): +def get(args, attr, default=None): """ - Gets a command-line argument if it exists, otherwise returns None. + Gets a command-line argument if it exists, otherwise returns a default value. Args: args: The command-line arguments. attr (str): The name of the command-line argument. + default (obj): The default value to return if the argument is not found. Defaults to None. """ if hasattr(args, attr): return getattr(args, attr) - return None + return default @mod.export() @@ -268,6 +269,31 @@ def parse_shape_dim(buf): name = tensor_meta_arg meta.add(name, dtype, shape) + + new_style = [] + for m_arg in meta_args: + arg = m_arg + if includes_shape: + arg = arg.replace(",", ":[", 1) + if includes_dtype: + arg = arg.replace(",", "]:", 1) + else: + arg += "]" + + arg = arg.replace(",auto", ":auto") + arg = arg.replace(",", ":") + + if includes_shape: + arg = arg.replace("x", ",") + + new_style.append(arg) + + G_LOGGER.warning( + "The old shape syntax is deprecated and will be removed in a future version of Polygraphy\n" + "See the CHANGELOG for the motivation behind this deprecation.", + mode=LogMode.ONCE, + ) + G_LOGGER.warning("Instead of: '{:}', use: '{:}'\n".format(" ".join(meta_args), " ".join(new_style))) return meta @@ -314,3 +340,41 @@ def parse_meta(meta_args, includes_shape=True, includes_dtype=True): if all((includes_shape and "[" in arg) or (includes_dtype and "," not in arg) for arg in meta_args): return parse_meta_new_impl(meta_args, includes_shape, includes_dtype) return parse_meta_legacy(meta_args, includes_shape, includes_dtype) + + +@mod.export() +def parse_num_bytes(num_bytes_arg): + """ + Parses an argument that indicates a number of bytes. The argument may use scientific notation, + or contain a `K`, `M`, or `G` suffix (case-insensitive), indicating `KiB`, `MiB`, or `GiB` respectively. + If the number is fractional, it will be truncated to the nearest integer value. + + If the provided argument is `None`, `None` is returned. + + Args: + num_bytes_arg (str): The argument indicating the number of bytes. + + Returns: + int: The number of bytes. + """ + if num_bytes_arg is None: + return None + + num_component = num_bytes_arg # Numerical component of the argument + multiplier = 1 + + suffix_mulitplier = {"K": 1 << 10, "M": 1 << 20, "G": 1 << 30} + for suffix, mult in suffix_mulitplier.items(): + if num_bytes_arg.upper().endswith(suffix): + num_component = num_bytes_arg.upper().rstrip(suffix) + multiplier = mult + break + + try: + return int(float(num_component) * multiplier) + except: + G_LOGGER.critical( + "Could not convert {:} to a number of bytes. " + "Please use either an integer (e.g. 16000000), scientific notation (e.g. 16e6), " + "or a number with a valid suffix: K, M, or G (e.g. 16M).".format(num_bytes_arg) + ) diff --git a/tools/Polygraphy/polygraphy/tools/data/README.md b/tools/Polygraphy/polygraphy/tools/data/README.md new file mode 100644 index 00000000..a2cc789c --- /dev/null +++ b/tools/Polygraphy/polygraphy/tools/data/README.md @@ -0,0 +1,24 @@ +# Inspect + +## Table of Contents + +- [Introduction](#introduction) +- [Subtools](#subtools) +- [Usage](#usage) + + +## Introduction + +The `data` tool can be used to manipulate input and output data created by Polygraphy. + + +## Subtools + +- `to-input` has two functions: + 1. Converts outputs written by `--save-outputs` to `feed_dict`s compatible with `--load-inputs` + 2. Merges outputs written by `--save-outputs` with inputs written by `--save-inputs` into `feed_dict`s compatible with `--load-inputs` + + +## Usage + +See `polygraphy data -h` for usage information. diff --git a/tools/Polygraphy/polygraphy/tools/data/__init__.py b/tools/Polygraphy/polygraphy/tools/data/__init__.py new file mode 100644 index 00000000..11187e0c --- /dev/null +++ b/tools/Polygraphy/polygraphy/tools/data/__init__.py @@ -0,0 +1 @@ +from polygraphy.tools.data.data import Data diff --git a/tools/Polygraphy/polygraphy/tools/data/data.py b/tools/Polygraphy/polygraphy/tools/data/data.py new file mode 100644 index 00000000..d9a32bfa --- /dev/null +++ b/tools/Polygraphy/polygraphy/tools/data/data.py @@ -0,0 +1,37 @@ +# +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from polygraphy.tools.base import Tool +from polygraphy.tools.data.subtool import ToInput + + +class Data(Tool): + """ + Manipulate input and output data generated by other Polygraphy subtools. + """ + + def __init__(self): + super().__init__("data") + + def add_parser_args(self, parser): + subparsers = parser.add_subparsers(title="Data Manipulation Subtools", dest="subtool") + subparsers.required = True + + SUBTOOLS = [ + ToInput(), + ] + + for subtool in SUBTOOLS: + subtool.setup_parser(subparsers) diff --git a/tools/Polygraphy/polygraphy/tools/data/subtool/__init__.py b/tools/Polygraphy/polygraphy/tools/data/subtool/__init__.py new file mode 100644 index 00000000..172919ca --- /dev/null +++ b/tools/Polygraphy/polygraphy/tools/data/subtool/__init__.py @@ -0,0 +1 @@ +from polygraphy.tools.data.subtool.to_input import ToInput diff --git a/tools/Polygraphy/polygraphy/tools/data/subtool/to_input.py b/tools/Polygraphy/polygraphy/tools/data/subtool/to_input.py new file mode 100644 index 00000000..ff21bd0f --- /dev/null +++ b/tools/Polygraphy/polygraphy/tools/data/subtool/to_input.py @@ -0,0 +1,71 @@ +# +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from collections import OrderedDict + +from polygraphy import util +from polygraphy.comparator import RunResults +from polygraphy.json import load_json +from polygraphy.logger import G_LOGGER +from polygraphy.tools.base import Tool + + +class ToInput(Tool): + """ + Combines and converts one or more input/output files generated by + Polygraphy into a single file usable with --load-inputs. + """ + + def __init__(self): + super().__init__("to-input") + + def add_parser_args(self, parser): + parser.add_argument( + "paths", help="Path(s) to file(s) containing input or output data from Polygraphy", nargs="+" + ) + parser.add_argument("-o", "--output", help="Path to the file to generate", required=True) + + def run(self, args): + inputs = [] + + def update_inputs(new_inputs, path): + nonlocal inputs + + if inputs and len(inputs) != len(new_inputs): + G_LOGGER.warning( + "The provided files have different numbers of iterations.\n" + "Note: Inputs currently contains {:} iterations, but the data in {:} contains {:} iterations. " + "Some iterations will contain incomplete data".format(len(inputs), path, len(new_inputs)) + ) + + # Pad to appropriate length + inputs += [OrderedDict()] * (len(new_inputs) - len(inputs)) + + for inp, new_inp in zip(inputs, new_inputs): + inp.update(new_inp) + + for path in args.paths: + # Note: It's important we have encode/decode JSON methods registered + # for the types we care about, e.g. RunResults. Importing the class should generally guarantee this. + data = load_json(path) + if isinstance(data, RunResults): + for _, iters in data.items(): + update_inputs(iters, path) + else: + if not util.is_sequence(data): + data = [data] + update_inputs(data, path) + + util.save_json(inputs, args.output, description="input file containing {:} iteration(s)".format(len(inputs))) diff --git a/tools/Polygraphy/polygraphy/tools/debug/README.md b/tools/Polygraphy/polygraphy/tools/debug/README.md index bd4978ad..4d3280c5 100644 --- a/tools/Polygraphy/polygraphy/tools/debug/README.md +++ b/tools/Polygraphy/polygraphy/tools/debug/README.md @@ -37,6 +37,13 @@ All the `debug` tools work on the same general principles: any artifacts specified to `--artifacts` are moved into a `bad` directory. +Therefore, the general form of most `debug` subtools is: +``` +polygraphy debug [--artifacts per_iteration_files_to_sort...] \ + --check [options to checker_script_or_tool] +``` + + ## Subtools `debug` provides subtools for different tasks: @@ -62,21 +69,7 @@ All the `debug` tools work on the same general principles: - [EXPERIMENTAL] `reduce` can reduce failing ONNX models to a minimal subgraph of failing nodes. This can make further debugging significantly easier. - You can invoke it with the model and a command that can check intermediate models. - The intermediate models will be written to `polygraphy_debug.onnx` by default. - For example, to reduce a model with accuracy errors: - - ```bash - polygraphy debug reduce model.onnx -o reduced.onnx \ - --check polygraphy run polygraphy_debug.onnx --onnxrt --trt - ``` - - When using a model with dynamic shapes, you can use `--model-input-shapes` to freeze the - shapes of the intermediate tensors. In case ONNX shape inference is not able to freeze shapes, - you can enable `--force-fallback-shape-inference`. - Alternatively, you can use `--no-reduce-inputs` so that the model inputs are not modified. - This can be useful in cases where it may not be trivial to implement a `--check` command - that can determine the shapes to use for intermediate tensors. + See the [example](../../../examples//cli/debug/02_reducing_failing_onnx_models/) for details. - [EXPERIMENTAL] `repeat` can run an arbitrary command repeatedly, sorting generated artifacts into `good` and `bad` directories. This is more general than the other `debug` subtools, and is diff --git a/tools/Polygraphy/polygraphy/tools/debug/subtool/artifact_sorter.py b/tools/Polygraphy/polygraphy/tools/debug/subtool/artifact_sorter.py index 7abfda7c..673b6be4 100644 --- a/tools/Polygraphy/polygraphy/tools/debug/subtool/artifact_sorter.py +++ b/tools/Polygraphy/polygraphy/tools/debug/subtool/artifact_sorter.py @@ -99,9 +99,17 @@ def add_to_parser(self, parser): nargs="+", ) - artifact_sorter_args.add_argument( + output_show = artifact_sorter_args.add_mutually_exclusive_group() + output_show.add_argument( "--show-output", - help="Show output from the --check command. Defaults to capturing output instead. ", + help="Show output from the --check command even for passing iterations. " + "By default, output from passing iterations is captured. ", + action="store_true", + ) + output_show.add_argument( + "--hide-fail-output", + help="Suppress output from the --check command for failing iterations. " + "By default, output from failing iterations is displayed. ", action="store_true", ) @@ -144,6 +152,7 @@ def parse(self, args): self.artifacts = util.default(args_util.get(args, "artifacts"), []) self.output = args_util.get(args, "artifacts_dir") self.show_output = args_util.get(args, "show_output") + self.hide_fail_output = args_util.get(args, "hide_fail_output") self.remove_intermediate = args_util.get(args, "remove_intermediate") self.fail_codes = args_util.get(args, "fail_codes") self.ignore_fail_codes = args_util.get(args, "ignore_fail_codes") @@ -271,7 +280,7 @@ def is_success(status): status = sp.run(self.check, stdout=sp.PIPE, stderr=sp.PIPE) success = is_success(status) - if self.show_output: + if self.show_output or (not success and not self.hide_fail_output): stderr_log_level = G_LOGGER.WARNING if success else G_LOGGER.ERROR G_LOGGER.info("========== CAPTURED STDOUT ==========\n{:}".format(status.stdout.decode())) G_LOGGER.log( diff --git a/tools/Polygraphy/polygraphy/tools/debug/subtool/base.py b/tools/Polygraphy/polygraphy/tools/debug/subtool/base.py index 0d22a0fa..3011a1f2 100644 --- a/tools/Polygraphy/polygraphy/tools/debug/subtool/base.py +++ b/tools/Polygraphy/polygraphy/tools/debug/subtool/base.py @@ -16,7 +16,7 @@ import contextlib import os -from polygraphy import mod, util, config +from polygraphy import mod, util from polygraphy.logger import G_LOGGER from polygraphy.tools.args import ( DataLoaderArgs, diff --git a/tools/Polygraphy/polygraphy/tools/debug/subtool/diff_tactics.py b/tools/Polygraphy/polygraphy/tools/debug/subtool/diff_tactics.py index 102658a1..e01433de 100644 --- a/tools/Polygraphy/polygraphy/tools/debug/subtool/diff_tactics.py +++ b/tools/Polygraphy/polygraphy/tools/debug/subtool/diff_tactics.py @@ -41,19 +41,27 @@ def add_parser_args(self, parser): "By default, this tool will search for files in directories called 'good' and 'bad'", default="", ) - parser.add_argument("--good", help="A directory containing good tactic replay files. ", default=None) - parser.add_argument("--bad", help="A directory containing bad tactic replay files. ", default=None) + parser.add_argument( + "--good", + help="A directory containing good tactic replay files or a single good tactic replay file. ", + default=None, + ) + parser.add_argument( + "--bad", + help="A directory containing bad tactic replay files or a single bad tactic replay file. ", + default=None, + ) def run(self, args): if args.dir is None and (args.good is None or args.bad is None): G_LOGGER.critical("Either `--dir`, or both `--good` and `--bad` must be specified.") - def load_tactics(dir): + def load_tactics(dirpath): """ Load all tactic replays from the specified directory into a single dictionary. Args: - dir (str): Directory containing zero or more tactic replay files. + dirpath (str): Directory containing zero or more tactic replay files. Returns: dict[str, Set[polygraphy.backend.trt.algorithm_selector.Algorithm]]: @@ -68,7 +76,10 @@ def try_load_replay(path): tactics = defaultdict(set) replay_paths = [] - for path in glob.iglob(os.path.join(dir, "**"), recursive=True): + search_paths = ( + glob.iglob(os.path.join(dirpath, "**"), recursive=True) if os.path.isdir(dirpath) else [dirpath] + ) + for path in search_paths: replay = try_load_replay(path) if replay is None: G_LOGGER.verbose("{:} does not look like a tactic replay file, skipping.".format(path)) diff --git a/tools/Polygraphy/polygraphy/tools/debug/subtool/reduce.py b/tools/Polygraphy/polygraphy/tools/debug/subtool/reduce.py index ec5c6e6a..9c870de7 100644 --- a/tools/Polygraphy/polygraphy/tools/debug/subtool/reduce.py +++ b/tools/Polygraphy/polygraphy/tools/debug/subtool/reduce.py @@ -178,7 +178,10 @@ def add_parser_args(self, parser): def run(self, args): if not self.arg_groups[OnnxSaveArgs].path and not args.min_good: - G_LOGGER.critical("Either --output or --min-good must be provided!") + G_LOGGER.critical( + "--output (where to write the reduced model) and/or " + "--min-good (where to write a reduced model that passes) must be provided!" + ) model = self.arg_groups[OnnxLoaderArgs].load_onnx() num_orig_nodes = len(model.graph.node) @@ -187,11 +190,16 @@ def run(self, args): # shape inference to figure out the new shapes of intermediate tensors. user_input_metadata = self.arg_groups[ModelArgs].input_shapes if user_input_metadata: - model = gs.export_onnx(tools_util.override_input_shapes(gs.import_onnx(model), user_input_metadata)) + model = gs.export_onnx( + tools_util.override_input_shapes(onnx_backend.gs_from_onnx(model), user_input_metadata) + ) if self.arg_groups[OnnxShapeInferenceArgs].do_shape_inference: model = onnx_backend.infer_shapes(model) - GRAPH = gs.import_onnx(model) + # Lower Constant nodes into Constant tensors + # If we don't do this, the outputs of Constant nodes may be incorrectly marked + # as variable inputs. Further, fallback shape inference does not apply to Constant nodes. + GRAPH = onnx_util.lower_constant_nodes(onnx_backend.gs_from_onnx(model)) _layerwise_outputs = None _layerwise_meta = None @@ -209,7 +217,7 @@ def layerwise(model, include_data=False): if self.arg_groups[OnnxShapeInferenceArgs].force_fallback: G_LOGGER.info("Freezing shapes in the model according to values determined by fallback shape inference") - tools_util.set_shapes_from_layerwise_meta(GRAPH, layerwise(model)) + onnx_util.set_shapes_from_layerwise_meta(GRAPH, layerwise(model)) def fix_graph(graph, model): """ diff --git a/tools/Polygraphy/polygraphy/tools/inspect/subtool/capability.py b/tools/Polygraphy/polygraphy/tools/inspect/subtool/capability.py index 02d5a4bd..2a4092a9 100644 --- a/tools/Polygraphy/polygraphy/tools/inspect/subtool/capability.py +++ b/tools/Polygraphy/polygraphy/tools/inspect/subtool/capability.py @@ -24,7 +24,7 @@ common_backend = mod.lazy_import("polygraphy.backend.common") gs = mod.lazy_import("onnx_graphsurgeon") onnx_backend = mod.lazy_import("polygraphy.backend.onnx") -tools_util = mod.lazy_import("polygraphy.tools.util") +onnx_util = mod.lazy_import("polygraphy.backend.onnx.util") trt = mod.lazy_import("tensorrt") trt_backend = mod.lazy_import("polygraphy.backend.trt") trt_util = mod.lazy_import("polygraphy.backend.trt.util") @@ -37,7 +37,7 @@ class UnsupportedNodeDict(TypedDict(lambda: str, lambda: dict)): while trying to parse them, and the range of node indices for the subgraphs where these errors were encountered. - More specifically, it is an `OrderedDict[str, Dict[str, List[Tuple[int]]]]`. + More specifically, it is an ``OrderedDict[str, Dict[str, List[Tuple[int]]]]``. """ def add(self, op, err_string, node_range): @@ -107,8 +107,8 @@ def save_subgraph(onnx_save_args, graph, start, end, prefix="", use_tmp_file=Fal in_dict = {inp.name: inp for node in subgraph_nodes for inp in node.inputs} # Guess graph inputs/outputs by checking all output tensor names against all input tensor names, and vice-versa. - subgraph_inputs = tools_util.meta_from_gs_tensors([in_dict[k] for k in in_dict if k not in out_dict]) - subgraph_outputs = tools_util.meta_from_gs_tensors([out_dict[k] for k in out_dict if k not in in_dict]) + subgraph_inputs = onnx_util.meta_from_gs_tensors([in_dict[k] for k in in_dict if k not in out_dict]) + subgraph_outputs = onnx_util.meta_from_gs_tensors([out_dict[k] for k in out_dict if k not in in_dict]) subgraph = gs.export_onnx(onnx_backend.extract_subgraph(graph, subgraph_inputs, subgraph_outputs)) @@ -176,7 +176,7 @@ def run(self, args): G_LOGGER.info("Graph is fully supported by TensorRT; Will not generate subgraphs.") return - parent_graph = gs.import_onnx(self.arg_groups[OnnxLoaderArgs].load_onnx()) + parent_graph = onnx_backend.gs_from_onnx(self.arg_groups[OnnxLoaderArgs].load_onnx()) def partition(nodelists, offset): """ diff --git a/tools/Polygraphy/polygraphy/tools/registry.py b/tools/Polygraphy/polygraphy/tools/registry.py index 8d4899cc..5813a6eb 100644 --- a/tools/Polygraphy/polygraphy/tools/registry.py +++ b/tools/Polygraphy/polygraphy/tools/registry.py @@ -45,6 +45,9 @@ def try_register_tool(module, tool_class): ToolClass = getattr(toolmod, tool_class) TOOL_REGISTRY.append(ToolClass()) except Exception as err: + G_LOGGER.internal_error( + "Could not load command-line tool: {:}.\nNote: Error was: {:}".format(tool_class.lower(), err) + ) TOOL_REGISTRY.append(MissingTool(tool_class.lower(), err=err)) @@ -54,7 +57,7 @@ def try_register_tool(module, tool_class): try_register_tool("polygraphy.tools.surgeon", "Surgeon") try_register_tool("polygraphy.tools.template", "Template") try_register_tool("polygraphy.tools.debug", "Debug") -try_register_tool("polygraphy.tools.to_json", "ToJSON") +try_register_tool("polygraphy.tools.data", "Data") # Check that tool names are unique tool_names = [tool.name for tool in TOOL_REGISTRY] diff --git a/tools/Polygraphy/polygraphy/tools/run/run.py b/tools/Polygraphy/polygraphy/tools/run/run.py index a15bb57c..d15f38e2 100644 --- a/tools/Polygraphy/polygraphy/tools/run/run.py +++ b/tools/Polygraphy/polygraphy/tools/run/run.py @@ -27,6 +27,7 @@ OnnxrtRunnerArgs, OnnxSaveArgs, OnnxShapeInferenceArgs, + PluginRefArgs, Tf2OnnxLoaderArgs, TfConfigArgs, TfLoaderArgs, @@ -65,6 +66,10 @@ def add_runner(option, help): ) add_runner("--tf", help="Run inference using TensorFlow") add_runner("--onnxrt", help="Run inference using ONNX Runtime") + add_runner( + "--pluginref", + help="Run inference for models containing single TensorRT plugins using a CPU reference implementation", + ) # Generate a summary line to add as a comment to the script @@ -91,6 +96,7 @@ def join_list(lst): "trt_legacy": "TensorRT Legacy", "tf": "TensorFlow", "onnxrt": "ONNX Runtime", + "pluginref": "CPU plugin references", } runners = [runner_names[runner] for runner in runners] summary += "between " if len(runners) > 1 else "using " @@ -121,7 +127,10 @@ def __init__(self): self.subscribe_args(OnnxShapeInferenceArgs()) self.subscribe_args(OnnxLoaderArgs(save=True)) self.subscribe_args(OnnxrtRunnerArgs()) - self.subscribe_args(TrtConfigArgs()) + self.subscribe_args(PluginRefArgs()) + self.subscribe_args( + TrtConfigArgs(random_data_calib_warning=False) + ) # We run calibration with the inference-time data self.subscribe_args(TrtPluginLoaderArgs()) self.subscribe_args(TrtNetworkLoaderArgs()) self.subscribe_args(TrtEngineSaveArgs(output="save-engine", short_opt=None)) @@ -175,6 +184,7 @@ def build_script(self, args): "onnxrt": self.arg_groups[OnnxrtRunnerArgs].add_to_script, "trt": self.arg_groups[TrtRunnerArgs].add_to_script, "trt_legacy": self.arg_groups[TrtLegacyArgs].add_to_script, + "pluginref": self.arg_groups[PluginRefArgs].add_to_script, }[runner_arg] add_runner_func(script) diff --git a/tools/Polygraphy/polygraphy/tools/surgeon/subtool/extract.py b/tools/Polygraphy/polygraphy/tools/surgeon/subtool/extract.py index 3695e9ef..4ae9dbb3 100644 --- a/tools/Polygraphy/polygraphy/tools/surgeon/subtool/extract.py +++ b/tools/Polygraphy/polygraphy/tools/surgeon/subtool/extract.py @@ -18,23 +18,30 @@ from polygraphy import mod from polygraphy.common import TensorMetadata from polygraphy.logger import G_LOGGER -from polygraphy.tools import util as tools_util from polygraphy.tools.args import DataLoaderArgs, ModelArgs, OnnxLoaderArgs, OnnxSaveArgs, OnnxShapeInferenceArgs from polygraphy.tools.args import util as args_util from polygraphy.tools.surgeon.subtool.base import BaseSurgeonSubtool -gs = mod.lazy_import("onnx_graphsurgeon") onnx_backend = mod.lazy_import("polygraphy.backend.onnx") +onnx_util = mod.lazy_import("polygraphy.backend.onnx.util") class Extract(BaseSurgeonSubtool): """ - Extract a subgraph based on the specified inputs and outputs. + Extract a subgraph from an ONNX model based on the specified inputs and outputs. """ def __init__(self): super().__init__("extract") - self.subscribe_args(ModelArgs(model_required=True, inputs="--model-inputs", model_type="onnx")) + self.subscribe_args( + ModelArgs( + model_required=True, + inputs="--model-inputs", + model_type="onnx", + inputs_doc="Input shapes to use when generating data to run fallback shape inference. " + "Has no effect if fallback shape inference is not run", + ) + ) self.subscribe_args(DataLoaderArgs()) self.subscribe_args(OnnxShapeInferenceArgs(default=False, enable_force_fallback=True)) self.subscribe_args(OnnxLoaderArgs(output_prefix=None)) @@ -85,7 +92,7 @@ def missing_meta_tensors(input_metadata, output_metadata): # Loads an ONNX-GS graph and create new I/O metadata w/ info missing in user_input/output_metadata. def load_graph_and_io_meta(model): - graph = gs.import_onnx(model) + graph = onnx_backend.gs_from_onnx(model) TENSOR_MAP = graph.tensors() def get_tensor(name): @@ -97,7 +104,7 @@ def get_tensor(name): # or details derived from tensors. def make_io_meta(user_meta, tensors): if not user_meta: - return tools_util.meta_from_gs_tensors(tensors) + return onnx_util.meta_from_gs_tensors(tensors) new_meta = copy.copy(user_meta) for name, (dtype, shape) in new_meta.items(): diff --git a/tools/Polygraphy/polygraphy/tools/surgeon/subtool/insert.py b/tools/Polygraphy/polygraphy/tools/surgeon/subtool/insert.py index 13b4b90d..09e46da1 100644 --- a/tools/Polygraphy/polygraphy/tools/surgeon/subtool/insert.py +++ b/tools/Polygraphy/polygraphy/tools/surgeon/subtool/insert.py @@ -15,12 +15,13 @@ # from polygraphy import mod from polygraphy.logger import G_LOGGER -from polygraphy.tools.args import DataLoaderArgs, ModelArgs, OnnxLoaderArgs, OnnxSaveArgs, OnnxShapeInferenceArgs +from polygraphy.tools.args import ModelArgs, OnnxLoaderArgs, OnnxSaveArgs, OnnxShapeInferenceArgs from polygraphy.tools.args import util as args_util from polygraphy.tools.args.base import BaseArgs from polygraphy.tools.surgeon.subtool.base import BaseSurgeonSubtool gs = mod.lazy_import("onnx_graphsurgeon") +onnx_backend = mod.lazy_import("polygraphy.backend.onnx") class OnnxNodeArgs(BaseArgs): @@ -66,20 +67,20 @@ def parse(self, args): class Insert(BaseSurgeonSubtool): """ - [EXPERIMENTAL] Insert a single node into a graph with the specified inputs and outputs. + [EXPERIMENTAL] Insert a single node into an ONNX model with the specified inputs and outputs. Any existing subgraph between the inputs and outputs is replaced. """ def __init__(self): super().__init__("insert") self.subscribe_args(OnnxNodeArgs()) - self.subscribe_args(ModelArgs(model_required=True, inputs="--model-inputs", model_type="onnx")) + self.subscribe_args(ModelArgs(model_required=True, inputs=None, model_type="onnx")) self.subscribe_args(OnnxShapeInferenceArgs()) self.subscribe_args(OnnxLoaderArgs(output_prefix=None)) self.subscribe_args(OnnxSaveArgs(infer_shapes=True, required=True)) def run_impl(self, args): - graph = gs.import_onnx(super().load_model()) + graph = onnx_backend.gs_from_onnx(super().load_model()) TENSOR_MAP = graph.tensors() diff --git a/tools/Polygraphy/polygraphy/tools/surgeon/subtool/sanitize.py b/tools/Polygraphy/polygraphy/tools/surgeon/subtool/sanitize.py index 2bc9f1a9..807f66ad 100644 --- a/tools/Polygraphy/polygraphy/tools/surgeon/subtool/sanitize.py +++ b/tools/Polygraphy/polygraphy/tools/surgeon/subtool/sanitize.py @@ -19,17 +19,25 @@ from polygraphy.tools.surgeon.subtool.base import BaseSurgeonSubtool onnx_backend = mod.lazy_import("polygraphy.backend.onnx") +onnx_util = mod.lazy_import("polygraphy.backend.onnx.util") gs = mod.lazy_import("onnx_graphsurgeon") class Sanitize(BaseSurgeonSubtool): """ - Clean up and optimize an ONNX model. + Clean up, optimize, and/or change input shapes in an ONNX model. """ def __init__(self): super().__init__("sanitize") - self.subscribe_args(ModelArgs(model_required=True, inputs="--override-inputs", model_type="onnx")) + self.subscribe_args( + ModelArgs( + model_required=True, + inputs="--override-inputs", + model_type="onnx", + inputs_doc="Override input shapes in the model for the given inputs", + ) + ) self.subscribe_args(DataLoaderArgs()) self.subscribe_args(OnnxShapeInferenceArgs(default=True, enable_force_fallback=True)) self.subscribe_args(OnnxLoaderArgs(output_prefix="")) @@ -96,7 +104,7 @@ def do_graph_processing(model): def get_graph(): nonlocal graph if graph is None: - graph = gs.import_onnx(model) + graph = onnx_backend.gs_from_onnx(model) return graph user_input_metadata = self.arg_groups[ModelArgs].input_shapes @@ -108,7 +116,7 @@ def get_graph(): if self.arg_groups[OnnxShapeInferenceArgs].force_fallback: _, layerwise_meta = self.arg_groups[OnnxShapeInferenceArgs].fallback_inference(model) graph = get_graph() - tools_util.set_shapes_from_layerwise_meta(graph, layerwise_meta) + onnx_util.set_shapes_from_layerwise_meta(graph, layerwise_meta) if args.cleanup: graph = get_graph() diff --git a/tools/Polygraphy/polygraphy/tools/template/subtool/__init__.py b/tools/Polygraphy/polygraphy/tools/template/subtool/__init__.py index 0e1815b8..cae14de8 100644 --- a/tools/Polygraphy/polygraphy/tools/template/subtool/__init__.py +++ b/tools/Polygraphy/polygraphy/tools/template/subtool/__init__.py @@ -1 +1,2 @@ from polygraphy.tools.template.subtool.trt_network import TrtNetwork +from polygraphy.tools.template.subtool.trt_config import TrtConfig diff --git a/tools/Polygraphy/polygraphy/tools/template/subtool/trt_config.py b/tools/Polygraphy/polygraphy/tools/template/subtool/trt_config.py new file mode 100644 index 00000000..bd44d15b --- /dev/null +++ b/tools/Polygraphy/polygraphy/tools/template/subtool/trt_config.py @@ -0,0 +1,60 @@ +# +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import argparse + +from polygraphy.tools.args import ( + ModelArgs, + DataLoaderArgs, + TrtConfigArgs, +) +from polygraphy.tools.base import Tool +from polygraphy.tools.script import Script, inline, safe + + +class TrtConfig(Tool): + """ + Generate a template script to create a TensorRT builder configuration. + """ + + def __init__(self): + super().__init__("trt-config") + self.subscribe_args(ModelArgs(model_required=False)) + self.subscribe_args(DataLoaderArgs()) + self.subscribe_args(TrtConfigArgs()) + + def add_parser_args(self, parser): + parser.add_argument( + "-o", "--output", help="Path to save the generated script.", type=argparse.FileType("w"), required=True + ) + + def run(self, args): + script = Script(summary="Creates a TensorRT Builder Configuration.", always_create_runners=False) + script.add_import(imports=["func"], frm="polygraphy") + script.add_import(imports=["tensorrt as trt"]) + + loader_name = self.arg_groups[TrtConfigArgs].add_trt_config_loader(script) + if not loader_name: + script.add_import(imports=["CreateConfig"], frm="polygraphy.backend.trt") + loader_name = script.add_loader(safe("CreateConfig()"), "create_trt_config") + params = safe("config") + + script.append_suffix(safe("@func.extend({:})", inline(loader_name))) + script.append_suffix(safe("def load_config({:}):", inline(params))) + script.append_suffix( + safe("\tpass # TODO: Set up the builder configuration here. This function should not return anything.") + ) + + script.save(args.output) diff --git a/tools/Polygraphy/polygraphy/tools/template/subtool/trt_network.py b/tools/Polygraphy/polygraphy/tools/template/subtool/trt_network.py index 0c32b9f8..ee34af2e 100644 --- a/tools/Polygraphy/polygraphy/tools/template/subtool/trt_network.py +++ b/tools/Polygraphy/polygraphy/tools/template/subtool/trt_network.py @@ -29,8 +29,8 @@ class TrtNetwork(Tool): """ - Generate a template script that defines or modifies a TensorRT network - using the TensorRT network API. + Generate a template script to create a TensorRT network using the TensorRT network API, + optionally starting from an existing model. """ def __init__(self): @@ -49,7 +49,7 @@ def add_parser_args(self, parser): def run(self, args): script = Script( - summary="Defines or modifies a TensorRT Network using the Network API.", always_create_runners=False + summary="Creates a TensorRT Network using the Network API.", always_create_runners=False ) script.add_import(imports=["func"], frm="polygraphy") script.add_import(imports=["tensorrt as trt"]) diff --git a/tools/Polygraphy/polygraphy/tools/template/template.py b/tools/Polygraphy/polygraphy/tools/template/template.py index c2b307f4..5803951a 100644 --- a/tools/Polygraphy/polygraphy/tools/template/template.py +++ b/tools/Polygraphy/polygraphy/tools/template/template.py @@ -14,7 +14,7 @@ # limitations under the License. # from polygraphy.tools.base import Tool -from polygraphy.tools.template.subtool import TrtNetwork +from polygraphy.tools.template.subtool import TrtNetwork, TrtConfig class Template(Tool): @@ -31,6 +31,7 @@ def add_parser_args(self, parser): SUBTOOLS = [ TrtNetwork(), + TrtConfig(), ] for subtool in SUBTOOLS: diff --git a/tools/Polygraphy/polygraphy/tools/to_json/README.md b/tools/Polygraphy/polygraphy/tools/to_json/README.md deleted file mode 100644 index 0c3d40c5..00000000 --- a/tools/Polygraphy/polygraphy/tools/to_json/README.md +++ /dev/null @@ -1,11 +0,0 @@ -# To-JSON - -This is a temporary tool that exists to convert old pickled data from Polygraphy to JSON. -In 0.27.0, Polygraphy migrated to JSON serialization for security reasons. - -## Usage - -Simply provide the tool with a path to your old pickled data, and an output path: -```bash -polygraphy to-json old_data.pkl -o data.json -``` diff --git a/tools/Polygraphy/polygraphy/tools/to_json/__init__.py b/tools/Polygraphy/polygraphy/tools/to_json/__init__.py deleted file mode 100644 index 735f5eae..00000000 --- a/tools/Polygraphy/polygraphy/tools/to_json/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from polygraphy.tools.to_json.to_json import ToJSON diff --git a/tools/Polygraphy/polygraphy/tools/to_json/to_json.py b/tools/Polygraphy/polygraphy/tools/to_json/to_json.py deleted file mode 100644 index 02727233..00000000 --- a/tools/Polygraphy/polygraphy/tools/to_json/to_json.py +++ /dev/null @@ -1,53 +0,0 @@ -# -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from polygraphy import mod -from polygraphy.json import save_json -from polygraphy.tools.base import Tool - - -class ToJSON(Tool): - """ - [TEMPORARY] Converts pickled data to JSON. - This tool will be removed in 0.32.0 since all future versions of Polygraphy - will not use Pickle for serialization. - """ - - def __init__(self): - mod.warn_deprecated("to-json", use_instead="JSON serialization", remove_in="0.32.0") - super().__init__(name="to-json") - - def add_parser_args(self, parser): - parser.add_argument("pickle_data", help="Path to old pickled data") - parser.add_argument("-o", "--output", help="Path at which to write the JSON-ified data.", required=True) - - def run(self, args): - import pickle - - import polygraphy - from polygraphy.comparator.struct import RunResults - - class LegacyRunResults(list): - pass - - polygraphy.comparator.struct.RunResults = LegacyRunResults - - with open(args.pickle_data, "rb") as f: - data = pickle.load(f) - - if isinstance(data, LegacyRunResults): - data = RunResults(list(data)) - - save_json(data, args.output) diff --git a/tools/Polygraphy/polygraphy/tools/util.py b/tools/Polygraphy/polygraphy/tools/util.py index 264506d3..fae64d9d 100644 --- a/tools/Polygraphy/polygraphy/tools/util.py +++ b/tools/Polygraphy/polygraphy/tools/util.py @@ -15,22 +15,13 @@ # from polygraphy import mod -from polygraphy.common import TensorMetadata from polygraphy.logger import G_LOGGER onnx_backend = mod.lazy_import("polygraphy.backend.onnx") +onnx_util = mod.lazy_import("polygraphy.backend.onnx.util") gs = mod.lazy_import("onnx_graphsurgeon") -@mod.export() -def meta_from_gs_tensors(tensors): - """Get TensorMetadata from a list of ONNX-GraphSurgeon tensors""" - meta = TensorMetadata() - for tensor in tensors: - meta.add(tensor.name, tensor.dtype, tensor.shape) - return meta - - @mod.export() def override_input_shapes(graph, user_input_metadata): """ @@ -41,11 +32,11 @@ def override_input_shapes(graph, user_input_metadata): """ # We can leverage extract_subgraph if we make sure all the current graph inputs are preserved. # We need to be careful to preserve the order of graph inputs here. - input_metadata = meta_from_gs_tensors(graph.inputs) + input_metadata = onnx_util.meta_from_gs_tensors(graph.inputs) input_metadata.update(user_input_metadata) graph = onnx_backend.extract_subgraph(graph, input_metadata) - G_LOGGER.info("Overriding input shapes to:\n{:}".format(meta_from_gs_tensors(graph.inputs))) + G_LOGGER.info("Overriding input shapes to:\n{:}".format(onnx_util.meta_from_gs_tensors(graph.inputs))) # Have to unset intermediate shapes as they may cause problems. tensors = graph.tensors() @@ -54,11 +45,3 @@ def override_input_shapes(graph, user_input_metadata): tensor.shape = None return graph - - -@mod.export() -def set_shapes_from_layerwise_meta(graph, layerwise_meta): - for tensor in graph.tensors().values(): - if isinstance(tensor, gs.Variable) and tensor.name in layerwise_meta: - tensor.shape = layerwise_meta[tensor.name].shape - tensor.dtype = layerwise_meta[tensor.name].dtype diff --git a/tools/Polygraphy/polygraphy/util/util.py b/tools/Polygraphy/polygraphy/util/util.py index 7ea5956e..fe9c8952 100644 --- a/tools/Polygraphy/polygraphy/util/util.py +++ b/tools/Polygraphy/polygraphy/util/util.py @@ -26,8 +26,6 @@ np = mod.lazy_import("numpy") -mod.export_deprecated_alias("misc", remove_in="0.32.0", use_instead="polygraphy.util")(sys.modules[__name__]) - @mod.export() def check(cond, msg=None): @@ -123,6 +121,46 @@ def check_dict_contains(dct, keys, check_missing=True, dict_name=None, log_func= return not extra_in_dct and not missing_in_dct +@mod.export() +def value_or_from_dict(obj, key, default=None): + """ + Many Polygraphy APIs can accept a `Union[obj, Dict[str, obj]]` to allow + for specifying either a global value, or a per-key (e.g. input, output, etc.) value. + + When a dictionary is provided, the `""` key indiciates a default value to use for keys + not otherwise found. + + For example, Polygraphy allows for providing per-output tolerances. Thus, all of the + following are valid arguments: + :: + + # Value directly + atol = 1.0 + + # Per-output values + atol = {"out1": 1.0, "out2": 2.0} + + # Per-output values with default + atol = {"out1": 1.0, "": 2.0} + + Args: + obj (Union[obj, Dict[str, obj]]): The value, or per-key values. + key (str): The key to use when per-key values are provided. + default (obj): The default value to use if it is not found in the dictionary. + + Returns: + obj: The value. + """ + if not isinstance(obj, dict): + return obj + + if key in obj: + return obj[key] + elif "" in obj: + return obj[""] + return default + + @mod.export() def unique_list(sequence): """ @@ -165,7 +203,6 @@ def unique_list(sequence): # >>> y = MyClass() # >>> y.value # [] -@mod.export_deprecated_alias("default_value", remove_in="0.32.0") @mod.export() def default(value, default): """ @@ -781,6 +818,11 @@ def __exit__(self, exc_type, exc_value, traceback): stack.enter_context(obj) +## +## Attribute Helpers +## + + @mod.export() class TempAttrChange(object): """ diff --git a/tools/Polygraphy/tests/README.md b/tools/Polygraphy/tests/README.md new file mode 100644 index 00000000..17f91007 --- /dev/null +++ b/tools/Polygraphy/tests/README.md @@ -0,0 +1,21 @@ +# Tests + +The tests directory closely mirrors the structure of the main `polygraphy` directory. + +## Adding Tests + +For a given submodule, add tests into the corresponding directory under `tests/`. + +## Parallel Test Execution + +By default, the Polygraphy build system runs tests in parallel. However, some tests +may not be good candidates for parallel exection - for example, performance tests. +You can selectively disable parallel execution for these tests using the `pytest.mark.serial` +marker. The build system will exclude these tests from parallel execution and run them serially instead. + +For example: +```python +@pytest.mark.serial +def my_not_parallel_test(): + ... +``` diff --git a/tools/Polygraphy/tests/backend/onnx/test_loader.py b/tools/Polygraphy/tests/backend/onnx/test_loader.py index 7e4cc1bf..573ffdcb 100644 --- a/tools/Polygraphy/tests/backend/onnx/test_loader.py +++ b/tools/Polygraphy/tests/backend/onnx/test_loader.py @@ -28,6 +28,7 @@ OnnxFromTfGraph, SaveOnnx, extract_subgraph, + gs_from_onnx, infer_shapes, onnx_from_path, ) @@ -45,7 +46,7 @@ def test_set_severity(self, sev): G_LOGGER.severity = sev -class TestOnnxFileLoader(object): +class TestOnnxFromPath(object): def test_basic(self): loader = OnnxFromPath(ONNX_MODELS["identity"].path) assert isinstance(loader(), onnx.ModelProto) @@ -56,6 +57,12 @@ def test_external_data(self): assert isinstance(loader(), onnx.ModelProto) +class TestGsFromOnnx(object): + def test_basic(self): + graph = gs_from_onnx(OnnxFromPath(ONNX_MODELS["identity"].path)) + assert isinstance(graph, gs.Graph) + + class TestExportOnnxFromTf(object): def test_no_optimize(self): loader = OnnxFromTfGraph(TF_MODELS["identity"].loader, optimize=False, fold_constant=False) @@ -177,7 +184,7 @@ def extract_model(): class TestExtractSubgraph(object): def check_model(self, model): - graph = gs.import_onnx(model) + graph = gs_from_onnx(model) assert len(graph.nodes) == 1 assert len(graph.inputs) == 1 @@ -208,7 +215,7 @@ def test_extract_onnx_model_no_output_meta(self, extract_model): def test_extract_onnx_gs_graph(self, extract_model): model, input_meta, output_meta = extract_model - graph = gs.import_onnx(model) + graph = gs_from_onnx(model) subgraph = extract_subgraph(graph, input_meta, output_meta) # Make sure original graph isn't modified. assert len(graph.nodes) == 2 diff --git a/tools/Polygraphy/tests/backend/onnxrt/test_runner.py b/tools/Polygraphy/tests/backend/onnxrt/test_runner.py index 563ba75d..e2395418 100644 --- a/tools/Polygraphy/tests/backend/onnxrt/test_runner.py +++ b/tools/Polygraphy/tests/backend/onnxrt/test_runner.py @@ -38,6 +38,7 @@ def test_basic(self): with OnnxrtRunner(SessionFromOnnx(model.loader)) as runner: assert runner.is_active model.check_runner(runner) + assert runner.last_inference_time() is not None assert not runner.is_active def test_shape_output(self): diff --git a/tools/Polygraphy/tests/backend/pluginref/__init__.py b/tools/Polygraphy/tests/backend/pluginref/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/Polygraphy/tests/backend/pluginref/test_runner.py b/tools/Polygraphy/tests/backend/pluginref/test_runner.py new file mode 100644 index 00000000..2e69bcdf --- /dev/null +++ b/tools/Polygraphy/tests/backend/pluginref/test_runner.py @@ -0,0 +1,80 @@ +# +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from polygraphy.backend.onnx import GsFromOnnx, OnnxFromPath +from polygraphy.backend.pluginref import PluginRefRunner +from polygraphy.exception import PolygraphyException +from polygraphy.logger import G_LOGGER +from tests.models.meta import ONNX_MODELS + + +class TestLoggerCallbacks(object): + @pytest.mark.parametrize("sev", G_LOGGER.SEVERITY_LETTER_MAPPING.keys()) + def test_set_severity(self, sev): + G_LOGGER.severity = sev + + +class TestPluginRefRunner(object): + def test_can_name_runner(self): + NAME = "runner" + runner = PluginRefRunner(None, name=NAME) + assert runner.name == NAME + + def test_basic(self): + model = ONNX_MODELS["identity"] + with PluginRefRunner(GsFromOnnx(OnnxFromPath(model.path))) as runner: + assert runner.is_active + model.check_runner(runner) + assert not runner.is_active + + def test_works_on_multiple_nodes(self): + model = ONNX_MODELS["identity_identity"] + with PluginRefRunner(GsFromOnnx(OnnxFromPath(model.path))) as runner: + model.check_runner(runner) + + def test_fail_on_unsupported_node(self): + model = ONNX_MODELS["and"] + with PluginRefRunner(GsFromOnnx(OnnxFromPath(model.path))) as runner: + with pytest.raises(PolygraphyException, match="does not have a reference implementation registered!"): + runner.infer({"x": np.ones(shape=(3, 4), dtype=np.bool), "y": np.ones(shape=(3, 4), dtype=np.bool)}) + + @pytest.mark.parametrize( + "names, err", + [ + (["fake-input", "x"], "Extra keys in"), + (["fake-input"], "Some keys are missing"), + ([], "Some keys are missing"), + ], + ) + def test_error_on_wrong_name_feed_dict(self, names, err): + model = ONNX_MODELS["identity"] + with PluginRefRunner(GsFromOnnx(OnnxFromPath(model.path))) as runner: + with pytest.raises(PolygraphyException, match=err): + runner.infer({name: np.ones(shape=(1, 1, 2, 2), dtype=np.float32) for name in names}) + + def test_error_on_wrong_dtype_feed_dict(self): + model = ONNX_MODELS["identity"] + with PluginRefRunner(GsFromOnnx(OnnxFromPath(model.path))) as runner: + with pytest.raises(PolygraphyException, match="unexpected dtype."): + runner.infer({"x": np.ones(shape=(1, 1, 2, 2), dtype=np.int32)}) + + def test_error_on_wrong_shape_feed_dict(self): + model = ONNX_MODELS["identity"] + with PluginRefRunner(GsFromOnnx(OnnxFromPath(model.path))) as runner: + with pytest.raises(PolygraphyException, match="incompatible shape."): + runner.infer({"x": np.ones(shape=(1, 1, 3, 2), dtype=np.float32)}) diff --git a/tools/Polygraphy/tests/backend/test_tensorrt_legacy.py b/tools/Polygraphy/tests/backend/test_tensorrt_legacy.py index 5d9bf39a..add4418c 100644 --- a/tools/Polygraphy/tests/backend/test_tensorrt_legacy.py +++ b/tools/Polygraphy/tests/backend/test_tensorrt_legacy.py @@ -18,7 +18,6 @@ from tests.models.meta import TF_MODELS, ONNX_MODELS import numpy as np -import pytest def test_uff_identity(): diff --git a/tools/Polygraphy/tests/backend/tf/test_runner.py b/tools/Polygraphy/tests/backend/tf/test_runner.py index 0f5267bc..5ec7c859 100644 --- a/tools/Polygraphy/tests/backend/tf/test_runner.py +++ b/tools/Polygraphy/tests/backend/tf/test_runner.py @@ -33,6 +33,7 @@ def test_basic(self): with TfRunner(SessionFromGraph(model.loader)) as runner: assert runner.is_active model.check_runner(runner) + assert runner.last_inference_time() is not None assert not runner.is_active @pytest.mark.skip(reason="Non-trivial to set up - requires CUPTI") diff --git a/tools/Polygraphy/tests/backend/trt/test_loader.py b/tools/Polygraphy/tests/backend/trt/test_loader.py index b928a171..74ddfb66 100644 --- a/tools/Polygraphy/tests/backend/trt/test_loader.py +++ b/tools/Polygraphy/tests/backend/trt/test_loader.py @@ -266,6 +266,13 @@ def test_int8(self, identity_builder_network, flag): with loader(builder, network) as config: assert config.get_flag(trt.BuilderFlag.INT8) == flag + @pytest.mark.parametrize("flag", [True, False]) + def test_allow_gpu_fallback(self, identity_builder_network, flag): + builder, network = identity_builder_network + loader = CreateConfig(allow_gpu_fallback=flag) + with loader(builder, network) as config: + assert config.get_flag(trt.BuilderFlag.GPU_FALLBACK) == flag + @pytest.mark.skipif( mod.version(trt.__version__) < mod.version("8.0"), reason="API was not available in 7.2 and older" ) @@ -276,6 +283,13 @@ def test_sparse_weights(self, identity_builder_network, flag): with loader(builder, network) as config: assert config.get_flag(trt.BuilderFlag.SPARSE_WEIGHTS) == flag + def test_use_dla(self, identity_builder_network): + builder, network = identity_builder_network + loader = CreateConfig(use_dla=True) + with loader(builder, network) as config: + assert config.default_device_type == trt.DeviceType.DLA + assert config.DLA_core == 0 + with contextlib.suppress(AttributeError): if mod.version(trt.__version__) < mod.version("8.0"): TACTIC_SOURCES_CASES = [ diff --git a/tools/Polygraphy/tests/backend/trt/test_runner.py b/tools/Polygraphy/tests/backend/trt/test_runner.py index 8dde85f7..26f252a1 100644 --- a/tools/Polygraphy/tests/backend/trt/test_runner.py +++ b/tools/Polygraphy/tests/backend/trt/test_runner.py @@ -26,10 +26,12 @@ Profile, TrtRunner, engine_from_network, + network_from_onnx_bytes, ) from polygraphy.exception import PolygraphyException from polygraphy.logger import G_LOGGER from tests.models.meta import ONNX_MODELS +from tests.helper import time_func class TestLoggerCallbacks(object): @@ -52,6 +54,7 @@ def test_basic(self): assert runner.owns_engine assert runner.owns_context model.check_runner(runner) + assert runner.last_inference_time() is not None assert not runner.is_active def test_context(self): @@ -161,13 +164,23 @@ def test_device_views(self, use_view): x.copy_from(np.ones((1,), dtype=np.float32)) outputs = runner.infer( { - "X0": cuda.DeviceView(x.ptr, x.shape, x.dtype) if use_view else x, + "X0": x.view() if use_view else x, "Y0": np.ones((1,), dtype=np.float32), } ) assert outputs["identity_out_6"][0] == 2 assert outputs["identity_out_8"][0] == 2 + @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") + def test_no_output_copy(self): + model = ONNX_MODELS["identity"] + network_loader = NetworkFromOnnxBytes(model.loader) + with TrtRunner(EngineFromNetwork(network_loader)) as runner: + inp = np.ones(shape=(1, 1, 2, 2), dtype=np.float32) + outputs = runner.infer({"x": inp}, copy_outputs_to_host=False) + assert isinstance(outputs["y"], cuda.DeviceView) + assert np.array_equal(outputs["y"].numpy(), inp) + def test_subsequent_infers_with_different_input_types(self): model = ONNX_MODELS["identity"] network_loader = NetworkFromOnnxBytes(model.loader) @@ -204,3 +217,46 @@ def test_cannot_use_device_view_shape_tensor(self): ) as arr: with pytest.raises(PolygraphyException, match="it must reside in host memory"): runner.infer({"data": np.ones((2, 0, 3, 0), dtype=np.float32), "new_shape": arr}) + + @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") + @pytest.mark.serial + @pytest.mark.parametrize("copy_outputs", [True, False], ids=["output_dtoh", "no_output_copy"]) + @pytest.mark.parametrize("copy_inputs", [True, False], ids=["input_htod", "no_input_copy"]) + def test_infer_overhead(self, copy_inputs, copy_outputs): + inp = np.ones(shape=(1, 2, 1024, 1024), dtype=np.float32) + dev_inp = cuda.DeviceArray(shape=inp.shape, dtype=inp.dtype).copy_from(inp) + + out = np.zeros(shape=(1, 2, 1024, 1024), dtype=np.float32) # Using identity model! + dev_out = cuda.DeviceArray(shape=out.shape, dtype=out.dtype) + + stream = cuda.Stream() + + model = ONNX_MODELS["dynamic_identity"] + profiles = [ + Profile().add("X", (1, 2, 1024, 1024), (1, 2, 1024, 1024), (1, 2, 1024, 1024)), + ] + inp_name = list(model.input_metadata.keys())[0] + + with engine_from_network( + network_from_onnx_bytes(model.loader), CreateConfig(profiles=profiles) + ) as engine, engine.create_execution_context() as context, TrtRunner(context) as runner, dev_inp, dev_out: + # Inference outside the TrtRunner + def infer(): + if copy_inputs: + dev_inp.copy_from(inp, stream=stream) + context.execute_async_v2(bindings=[dev_inp.ptr, dev_out.ptr], stream_handle=stream.ptr) + if copy_outputs: + dev_out.copy_to(out, stream=stream) + stream.synchronize() + + native_time = time_func(infer) + + feed_dict = {inp_name: (inp if copy_inputs else dev_inp)} + runner_time = time_func( + lambda: runner.infer(feed_dict, check_inputs=False, copy_outputs_to_host=copy_outputs) + ) + + # The overhead should be less than 0.5ms, or the runtime should be within 5% + print("Absolute difference: {:.5g}".format(runner_time - native_time)) + print("Relative difference: {:.5g}".format(runner_time / native_time)) + assert (runner_time - native_time) < 0.5e-3 or runner_time <= (native_time * 1.05) diff --git a/tools/Polygraphy/tests/comparator/test_comparator.py b/tools/Polygraphy/tests/comparator/test_comparator.py index 82b5e022..f7216c4c 100644 --- a/tools/Polygraphy/tests/comparator/test_comparator.py +++ b/tools/Polygraphy/tests/comparator/test_comparator.py @@ -18,8 +18,9 @@ import numpy as np import pytest import tensorrt as trt -from polygraphy.backend.onnx import BytesFromOnnx, OnnxFromTfGraph +from polygraphy.backend.onnx import BytesFromOnnx, OnnxFromTfGraph, GsFromOnnx from polygraphy.backend.onnxrt import OnnxrtRunner, SessionFromOnnx +from polygraphy.backend.pluginref import PluginRefRunner from polygraphy.backend.tf import SessionFromGraph, TfRunner from polygraphy.backend.trt import EngineFromNetwork, NetworkFromOnnxBytes, TrtRunner from polygraphy.exception import PolygraphyException @@ -63,18 +64,21 @@ def data(): def test_multiple_runners(self): load_tf = TF_MODELS["identity"].loader build_tf_session = SessionFromGraph(load_tf) - load_serialized_onnx = BytesFromOnnx(OnnxFromTfGraph(load_tf)) + onnx_model = OnnxFromTfGraph(load_tf) + load_serialized_onnx = BytesFromOnnx(onnx_model) build_onnxrt_session = SessionFromOnnx(load_serialized_onnx) load_engine = EngineFromNetwork(NetworkFromOnnxBytes(load_serialized_onnx)) + gs_graph = GsFromOnnx(onnx_model) runners = [ TfRunner(build_tf_session), OnnxrtRunner(build_onnxrt_session), + PluginRefRunner(gs_graph), TrtRunner(load_engine), ] run_results = Comparator.run(runners) - compare_func = CompareFunc.basic_compare_func(check_shapes=mod.version(trt.__version__) >= mod.version("7.0")) + compare_func = CompareFunc.simple(check_shapes=mod.version(trt.__version__) >= mod.version("7.0")) assert bool(Comparator.compare_accuracy(run_results, compare_func=compare_func)) assert len(list(run_results.values())[0]) == 1 # Default number of iterations @@ -140,6 +144,6 @@ def test_dim_param_trt_onnxrt(self): ] run_results = Comparator.run(runners) - compare_func = CompareFunc.basic_compare_func(check_shapes=mod.version(trt.__version__) >= mod.version("7.0")) + compare_func = CompareFunc.simple(check_shapes=mod.version(trt.__version__) >= mod.version("7.0")) assert bool(Comparator.compare_accuracy(run_results, compare_func=compare_func)) assert len(list(run_results.values())[0]) == 1 # Default number of iterations diff --git a/tools/Polygraphy/tests/comparator/test_compare.py b/tools/Polygraphy/tests/comparator/test_compare.py index c3d3f789..3555e3aa 100644 --- a/tools/Polygraphy/tests/comparator/test_compare.py +++ b/tools/Polygraphy/tests/comparator/test_compare.py @@ -26,7 +26,7 @@ def test_can_compare_bool(self): iter_result0 = IterationResult(outputs={"output": np.zeros((4, 4), dtype=np.bool)}) iter_result1 = IterationResult(outputs={"output": np.ones((4, 4), dtype=np.bool)}) - compare_func = CompareFunc.basic_compare_func() + compare_func = CompareFunc.simple() acc = compare_func(iter_result0, iter_result1) assert not acc["output"] @@ -41,7 +41,7 @@ def test_per_output_tol(self, mode): iter_result1 = IterationResult(outputs={OUT0_NAME: OUT_VALS, OUT1_NAME: OUT_VALS + 1}) # With default tolerances, out1 is wrong for the second result. - compare_func = CompareFunc.basic_compare_func() + compare_func = CompareFunc.simple() acc = compare_func(iter_result0, iter_result1) assert acc[OUT0_NAME] assert not acc[OUT1_NAME] @@ -53,9 +53,9 @@ def test_per_output_tol(self, mode): } if mode == "abs": - compare_func = CompareFunc.basic_compare_func(atol=tols) + compare_func = CompareFunc.simple(atol=tols) else: - compare_func = CompareFunc.basic_compare_func(rtol=tols) + compare_func = CompareFunc.simple(rtol=tols) acc = compare_func(iter_result0, iter_result1) assert acc[OUT0_NAME] @@ -70,7 +70,7 @@ def test_per_output_tol_fallback(self, mode): iter_result0 = IterationResult(outputs={OUT0_NAME: OUT_VALS + 1, OUT1_NAME: OUT_VALS}) iter_result1 = IterationResult(outputs={OUT0_NAME: OUT_VALS, OUT1_NAME: OUT_VALS + 1}) - acc = CompareFunc.basic_compare_func()(iter_result0, iter_result1) + acc = CompareFunc.simple()(iter_result0, iter_result1) assert not acc[OUT0_NAME] assert not acc[OUT1_NAME] @@ -80,9 +80,9 @@ def test_per_output_tol_fallback(self, mode): } if mode == "abs": - compare_func = CompareFunc.basic_compare_func(atol=tols) + compare_func = CompareFunc.simple(atol=tols) else: - compare_func = CompareFunc.basic_compare_func(rtol=tols) + compare_func = CompareFunc.simple(rtol=tols) acc = compare_func(iter_result0, iter_result1) assert not acc[OUT0_NAME] @@ -102,9 +102,9 @@ def test_default_tol_in_map(self, mode): } if mode == "abs": - compare_func = CompareFunc.basic_compare_func(atol=tols) + compare_func = CompareFunc.simple(atol=tols) else: - compare_func = CompareFunc.basic_compare_func(rtol=tols) + compare_func = CompareFunc.simple(rtol=tols) acc = compare_func(iter_result0, iter_result1) assert acc[OUT0_NAME] @@ -122,7 +122,7 @@ def test_non_matching_outputs(self, shape): iter_result0 = IterationResult(outputs={"output": np.zeros(shape, dtype=np.float32)}) iter_result1 = IterationResult(outputs={"output": np.ones(shape, dtype=np.float32)}) - compare_func = CompareFunc.basic_compare_func() + compare_func = CompareFunc.simple() with G_LOGGER.verbosity(G_LOGGER.ULTRA_VERBOSE): acc = compare_func(iter_result0, iter_result1) @@ -145,7 +145,7 @@ def test_check_error_stat(self, func, check_error_stat): # Even though the max diff is 100, atol=1 should cause this to pass since we're checking # against the mean error. - compare_func = CompareFunc.basic_compare_func(check_error_stat=check_error_stat, atol=1) + compare_func = CompareFunc.simple(check_error_stat=check_error_stat, atol=1) if check_error_stat in ["max", "elemwise"]: assert not compare_func(iter_result0, iter_result1)["output"] @@ -158,10 +158,10 @@ def test_atol_rtol_either_pass(self, check_error_stat): res0 = IterationResult(outputs={"output": np.array([1, 2], dtype=np.float32)}) res1 = IterationResult(outputs={"output": np.array((1.25, 2.5), dtype=np.float32)}) - assert not CompareFunc.basic_compare_func(check_error_stat=check_error_stat)(res0, res1)["output"] + assert not CompareFunc.simple(check_error_stat=check_error_stat)(res0, res1)["output"] - assert CompareFunc.basic_compare_func(check_error_stat=check_error_stat, rtol=0.25)(res0, res1)["output"] - assert CompareFunc.basic_compare_func(check_error_stat=check_error_stat, atol=0.5)(res0, res1)["output"] + assert CompareFunc.simple(check_error_stat=check_error_stat, rtol=0.25)(res0, res1)["output"] + assert CompareFunc.simple(check_error_stat=check_error_stat, atol=0.5)(res0, res1)["output"] def test_atol_rtol_combined_pass(self): # We should also be able to mix them - i.e. rtol might enough for some, atol for others. @@ -169,12 +169,12 @@ def test_atol_rtol_combined_pass(self): res0 = IterationResult(outputs={"output": np.array([0, 1, 2, 3], dtype=np.float32)}) res1 = IterationResult(outputs={"output": np.array((0.15, 1.25, 2.5, 3.75), dtype=np.float32)}) - assert not CompareFunc.basic_compare_func()(res0, res1)["output"] + assert not CompareFunc.simple()(res0, res1)["output"] - assert not CompareFunc.basic_compare_func(atol=0.3)(res0, res1)["output"] - assert not CompareFunc.basic_compare_func(rtol=0.25)(res0, res1)["output"] + assert not CompareFunc.simple(atol=0.3)(res0, res1)["output"] + assert not CompareFunc.simple(rtol=0.25)(res0, res1)["output"] - assert CompareFunc.basic_compare_func(atol=0.3, rtol=0.25)(res0, res1)["output"] + assert CompareFunc.simple(atol=0.3, rtol=0.25)(res0, res1)["output"] @pytest.mark.parametrize( "check_error_stat", @@ -201,14 +201,14 @@ def test_per_output_error_stat(self, check_error_stat): ) atol = 0.4125 - assert not CompareFunc.basic_compare_func(atol=atol)(res0, res1)["output0"] + assert not CompareFunc.simple(atol=atol)(res0, res1)["output0"] - assert CompareFunc.basic_compare_func(check_error_stat=check_error_stat, atol=atol)(res0, res1)["output0"] - assert CompareFunc.basic_compare_func(check_error_stat=check_error_stat, atol=atol)(res0, res1)["output1"] + assert CompareFunc.simple(check_error_stat=check_error_stat, atol=atol)(res0, res1)["output0"] + assert CompareFunc.simple(check_error_stat=check_error_stat, atol=atol)(res0, res1)["output1"] def test_invalid_error_stat(self): res0 = IterationResult(outputs={"output": np.array([0, 1, 2, 3], dtype=np.float32)}) res1 = IterationResult(outputs={"output": np.array((0.15, 1.25, 2.5, 3.75), dtype=np.float32)}) with pytest.raises(PolygraphyException, match="Invalid choice"): - CompareFunc.basic_compare_func(check_error_stat="invalid-stat")(res0, res1) + CompareFunc.simple(check_error_stat="invalid-stat")(res0, res1) diff --git a/tools/Polygraphy/tests/comparator/test_data_loader.py b/tools/Polygraphy/tests/comparator/test_data_loader.py index 46fdf37e..3c756d19 100644 --- a/tools/Polygraphy/tests/comparator/test_data_loader.py +++ b/tools/Polygraphy/tests/comparator/test_data_loader.py @@ -180,3 +180,19 @@ def test_will_not_give_up_on_first_cache_miss(self): assert np.all(feed_dict["X"] == 0) # Cache can reuse Y, even though it's after X, so we'll get ones from the cache assert np.all(feed_dict["Y"] == 1) + + # The cache should ignore extra data generated by the data loader + def test_ignores_extra_data(self): + SHAPE = (32, 32) + + DATA = [OrderedDict()] + DATA[0]["X"] = np.zeros(SHAPE, dtype=np.int64) + DATA[0]["Y"] = np.zeros(SHAPE, dtype=np.int64) + + cache = DataLoaderCache(DATA) + + cache.set_input_metadata(TensorMetadata().add("X", np.int64, shape=SHAPE)) + + feed_dict = cache[0] + assert list(feed_dict.keys()) == ["X"] + assert np.all(feed_dict["X"] == 0) diff --git a/tools/Polygraphy/tests/cuda/test_cuda.py b/tools/Polygraphy/tests/cuda/test_cuda.py index 829cf790..90f2f6aa 100644 --- a/tools/Polygraphy/tests/cuda/test_cuda.py +++ b/tools/Polygraphy/tests/cuda/test_cuda.py @@ -17,7 +17,8 @@ import pytest import tensorrt as trt from polygraphy import mod, util -from polygraphy.cuda import DeviceArray, Stream, DeviceView +from polygraphy.cuda import DeviceArray, Stream, DeviceView, wrapper, MemcpyKind +from tests.helper import time_func class TestDeviceView(object): @@ -78,6 +79,7 @@ def test_device_buffer_resize(self, shapes): assert buf.allocated_nbytes == shapes.new_bytes assert buf.shape == shapes.new + @pytest.mark.serial # Sometimes the GPU may run out of memory if too many other tests are also running. @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Breaks TRT 6 tests for some reason") def test_large_allocation(self): dtype = np.byte @@ -129,3 +131,39 @@ def test_empty_tensor_to_host(self): assert host_buf.shape == buf.shape assert host_buf.nbytes == 0 assert util.volume(host_buf.shape) == 0 + + @pytest.mark.serial + def test_copy_from_overhead(self): + host_buf = np.ones(shape=(1, 2, 1024, 1024), dtype=np.float32) + with DeviceArray(shape=host_buf.shape, dtype=host_buf.dtype) as dev_buf: + memcpy_time = time_func( + lambda: wrapper().memcpy( + dst=dev_buf.ptr, + src=host_buf.ctypes.data, + nbytes=host_buf.nbytes, + kind=MemcpyKind.HostToDevice, + ) + ) + + copy_from_time = time_func(lambda: dev_buf.copy_from(host_buf)) + + print("memcpy time: {:}, copy_from time: {:}".format(memcpy_time, copy_from_time)) + assert copy_from_time <= (memcpy_time * 1.02) + + @pytest.mark.serial + def test_copy_to_overhead(self): + host_buf = np.ones(shape=(1, 2, 1024, 1024), dtype=np.float32) + with DeviceArray(shape=host_buf.shape, dtype=host_buf.dtype) as dev_buf: + memcpy_time = time_func( + lambda: wrapper().memcpy( + dst=host_buf.ctypes.data, + src=dev_buf.ptr, + nbytes=host_buf.nbytes, + kind=MemcpyKind.DeviceToHost, + ) + ) + + copy_to_time = time_func(lambda: dev_buf.copy_to(host_buf)) + + print("memcpy time: {:}, copy_to time: {:}".format(memcpy_time, copy_to_time)) + assert copy_to_time <= (memcpy_time * 1.04) diff --git a/tools/Polygraphy/tests/helper.py b/tools/Polygraphy/tests/helper.py index 079b5ccb..be738dd8 100644 --- a/tools/Polygraphy/tests/helper.py +++ b/tools/Polygraphy/tests/helper.py @@ -14,6 +14,7 @@ # limitations under the License. # import os +import time def get_file_size(path): @@ -26,3 +27,16 @@ def is_file_empty(path): def is_file_non_empty(path): return not is_file_empty(path) + + +def time_func(func, warm_up=10, iters=100): + for _ in range(warm_up): + func() + + total = 0 + for _ in range(iters): + start = time.time() + func() + end = time.time() + total += end - start + return total / float(iters) diff --git a/tools/Polygraphy/tests/models/instancenorm.onnx b/tools/Polygraphy/tests/models/instancenorm.onnx new file mode 100644 index 00000000..f70d7041 --- /dev/null +++ b/tools/Polygraphy/tests/models/instancenorm.onnx @@ -0,0 +1,27 @@ + TensorRT-WF:é +9 +x +s +biasy"InstanceNormalization* +epsilon +×#< instancenorm2d_4dims_epsilon*" ™ë?ä×o?Ï…¿Bs*" hÖ¿81=¿6þ·¿BbiasZ +x + + + + +Z +s + + +Z +bias + + +b +y + + + + +B \ No newline at end of file diff --git a/tools/Polygraphy/tests/models/make_reducable.py b/tools/Polygraphy/tests/models/make_reducable.py index c5b00586..563930a6 100644 --- a/tools/Polygraphy/tests/models/make_reducable.py +++ b/tools/Polygraphy/tests/models/make_reducable.py @@ -14,7 +14,7 @@ # limitations under the License. # """ -Helper utility to generate a model to help test the `debug reduce` +Helper utility to generate models to help test the `debug reduce` subtool, which reduces failing ONNX models. """ import os @@ -36,7 +36,19 @@ def add(self, a, b): return self.layer(op="Add", inputs=[a, b], outputs=["add_out"])[0] -# Generates a model with multiple inputs/outputs. Something like: +@gs.Graph.register() +def constant(self, values: gs.Constant): + return self.layer(op="Constant", outputs=["constant_out"], attrs={"value": values})[0] + + +def save(graph, model_name): + path = os.path.join(CURDIR, model_name) + print("Writing: {:}".format(path)) + onnx.save(gs.export_onnx(graph), path) + + +# Generates a model with multiple inputs/outputs: +# # X0 Y0 # | | # X1 Y1 @@ -44,27 +56,63 @@ def add(self, a, b): # Z0 # / \ # Z1 Z2 -DTYPE = np.float32 -SHAPE = (1,) +# +def make_multi_input_output(): + DTYPE = np.float32 + SHAPE = (1,) + + X0 = gs.Variable("X0", dtype=DTYPE, shape=SHAPE) + Y0 = gs.Variable("Y0", dtype=DTYPE, shape=SHAPE) + + graph = gs.Graph(inputs=[X0, Y0]) + + X1 = graph.identity(X0) + Y1 = graph.identity(Y0) -X0 = gs.Variable("X0", dtype=DTYPE, shape=SHAPE) -Y0 = gs.Variable("Y0", dtype=DTYPE, shape=SHAPE) + Z0 = graph.add(X1, Y1) + + Z1 = graph.identity(Z0) + Z1.dtype = DTYPE + Z1.shape = SHAPE + + Z2 = graph.identity(Z0) + Z2.dtype = DTYPE + Z2.shape = SHAPE + + graph.outputs = [Z1, Z2] + + save(graph, "reducable.onnx") + + +make_multi_input_output() + + +# Generates a linear model with a Constant node and no inputs: +# +# X0 (Constant) +# | +# X1 (Identity) +# | +# X2 (Identity) +# +def make_constant_linear(): + DTYPE = np.float32 + SHAPE = (4, 4) -graph = gs.Graph(inputs=[X0, Y0]) + graph = gs.Graph() -X1 = graph.identity(X0) -Y1 = graph.identity(Y0) + X0 = graph.constant(gs.Constant("const", values=np.ones(SHAPE, dtype=DTYPE))) + # Explicitly clear shape to trigger the failure condition in reduce + X0.shape = None -Z0 = graph.add(X1, Y1) + X1 = graph.identity(X0) + X2 = graph.identity(X1) + X2.dtype = DTYPE + X2.shape = SHAPE -Z1 = graph.identity(Z0) -Z1.dtype = DTYPE -Z1.shape = SHAPE + graph.outputs = [X2] -Z2 = graph.identity(Z0) -Z2.dtype = DTYPE -Z2.shape = SHAPE + save(graph, "reducable_with_const.onnx") -graph.outputs = [Z1, Z2] -onnx.save(gs.export_onnx(graph), os.path.join(CURDIR, "reducable.onnx")) +make_constant_linear() diff --git a/tools/Polygraphy/tests/models/meta.py b/tools/Polygraphy/tests/models/meta.py index 6a268b37..7bb4ea5b 100644 --- a/tools/Polygraphy/tests/models/meta.py +++ b/tools/Polygraphy/tests/models/meta.py @@ -128,6 +128,11 @@ def no_check_implemented(runner): check_runner=no_check_implemented, input_metadata=TensorMetadata().add("X0", shape=(1,), dtype=np.float32).add("Y0", shape=(1,), dtype=np.float32), ), + "reducable_with_const": Model( + path=model_path("reducable_with_const.onnx"), + LoaderType=BytesFromPath, + check_runner=no_check_implemented, + ), "ext_weights": Model( path=model_path("ext_weights.onnx"), LoaderType=OnnxFromPath, @@ -141,8 +146,9 @@ def no_check_implemented(runner): ext_data=model_path("ext_weights_same_dir"), ), "capability": Model( - path=model_path("capability.onnx"), - LoaderType=BytesFromPath, - check_runner=no_check_implemented - ) + path=model_path("capability.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + ), + "instancenorm": Model( + path=model_path("instancenorm.onnx"), LoaderType=BytesFromPath, check_runner=no_check_implemented + ), } diff --git a/tools/Polygraphy/tests/models/reducable_with_const.onnx b/tools/Polygraphy/tests/models/reducable_with_const.onnx new file mode 100644 index 0000000000000000000000000000000000000000..86d9bc620db72b7a4c284cf9eb3bc53595672c87 GIT binary patch literal 358 zcmd;Jx4O;5HIY$>FF8N2xFj*JBtE~iB;G(uB0n#$B0jw+u^^+kv?x6_KQBHnKP5HZ zP>I7Cs!}VOi?u8Nx?0|P^YJtcSnBMX-c7n+lV_%c&c^GY&H zD#1=P!s$d~B@R!hN{}h2y3kB9!D)&q!W1EK%)kJLOp*ec@g_oiTs$0%LL6L794sL0 K#KOfOzzqQNt!uUb literal 0 HcmV?d00001 diff --git a/tools/Polygraphy/tests/test_deprecated_aliases.py b/tools/Polygraphy/tests/test_deprecated_aliases.py index 9a1689ca..2ca1b34f 100644 --- a/tools/Polygraphy/tests/test_deprecated_aliases.py +++ b/tools/Polygraphy/tests/test_deprecated_aliases.py @@ -15,62 +15,6 @@ # -class TestOnnxLoaders(object): - def test_modify_onnx(self): - from polygraphy.backend.onnx import ModifyOnnx - - ModifyOnnx(None) - - -class TestOnnxrtLoaders(object): - def test_session_from_onnx_bytes(self): - from polygraphy.backend.onnxrt import SessionFromOnnxBytes - - SessionFromOnnxBytes(None) - - -class TestTrtLoaders(object): - def test_modify_network(self): - from polygraphy.backend.trt import ModifyNetwork - - ModifyNetwork(None) - - -class TestTfLoaders(object): - def test_modify_network(self): - from polygraphy.backend.tf import ModifyGraph - - ModifyGraph(None) - - -class TestUtil(object): - def test_misc(self): - from polygraphy.util import misc - - assert misc.default(None, 1) == 1 - - def test_default_value(self): - from polygraphy import util - - assert util.default_value(None, 1) == 1 - - def test_pickle_load(self): - from polygraphy.util import pickle_load - - try: - assert pickle_load(None) is None - except: - pass - - def test_pickle_save(self): - from polygraphy.util import pickle_save - - try: - assert pickle_save(None, None) is None - except: - pass - - class TestCuda(object): def test_cuda(self): from polygraphy.common import cuda @@ -107,3 +51,10 @@ def test_config(self): class TestUtilJson(object): def test_json(self): from polygraphy.util import Decoder, Encoder, from_json, load_json, save_json, to_json + + +class TestCompareFunc(object): + def test_basic_compare_func(self): + from polygraphy.comparator import CompareFunc + + CompareFunc.basic_compare_func(atol=1, rtol=1) diff --git a/tools/Polygraphy/tests/test_deps.py b/tools/Polygraphy/tests/test_deps.py index 1de3bc0a..09700bf6 100644 --- a/tools/Polygraphy/tests/test_deps.py +++ b/tools/Polygraphy/tests/test_deps.py @@ -121,7 +121,9 @@ def test_can_automatically_install_deps(self, virtualenv_with_poly, cmd): virtualenv_with_poly.env["POLYGRAPHY_AUTOINSTALL_DEPS"] = "1" POLYGRAPHY_BIN = os.path.join(ROOT_DIR, "bin", "polygraphy") - output = virtualenv_with_poly.run(["python3", POLYGRAPHY_BIN] + cmd, capture=True) + cmd = ["python3", POLYGRAPHY_BIN] + cmd + print("Running: {:}".format(" ".join(cmd))) + output = virtualenv_with_poly.run(cmd, capture=True) print(output) assert "is required, but not installed. Attempting to install now" in output diff --git a/tools/Polygraphy/tests/test_examples.py b/tools/Polygraphy/tests/test_examples.py index 2fe79516..a80d29c0 100644 --- a/tools/Polygraphy/tests/test_examples.py +++ b/tools/Polygraphy/tests/test_examples.py @@ -21,38 +21,46 @@ import pytest import tensorrt as trt -from polygraphy import mod, util +from polygraphy import mod from polygraphy.logger import G_LOGGER ROOT_DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), os.path.pardir)) EXAMPLES_ROOT = os.path.join(ROOT_DIR, "examples") +IGNORE_START_MARKER = "" +IGNORE_STOP_MARKER = "" + # Extract any ``` blocks from the README # Each block is stored as a separate string in the returned list -def load_code_blocks_from_readme(readme, ignore_block): +def load_code_blocks_from_readme(readme): with open(readme, "r") as f: contents = f.read() # Check that the README has all the expected sections. assert "## Introduction" in contents, "All example READMEs should have an 'Introduction' section!" assert "## Running The Example" in contents, "All example READMEs should have a 'Running The Example' section!" - def ignore_command(cmd): - return "pip" in cmd - commands = [] with open(readme, "r") as f: in_command_block = False + should_ignore = False block = [] for line in f.readlines(): + if line.strip() == IGNORE_START_MARKER: + should_ignore = True + elif line.strip() == IGNORE_STOP_MARKER: + should_ignore = False + + if should_ignore: + continue + if not in_command_block and "```" in line: block = [line.rstrip()] in_command_block = True elif in_command_block: if "```" in line: in_command_block = False - if not ignore_block(block): - commands.append(copy.copy(block) + [line.rstrip()]) - elif not ignore_command(line): + commands.append(copy.copy(block) + [line.rstrip()]) + else: block.append(line.rstrip()) # commands is List[List[str]] - flatten and remove start/end markers: @@ -61,14 +69,13 @@ def ignore_command(cmd): class Example(object): - def __init__(self, path_components, artifact_names=[], ignore_block=None): + def __init__(self, path_components, artifact_names=[]): self.path = os.path.join(EXAMPLES_ROOT, *path_components) self.artifacts = [os.path.join(self.path, name) for name in artifact_names] - self.ignore_block = util.default(ignore_block, lambda block: False) def __enter__(self): readme = os.path.join(self.path, "README.md") - return load_code_blocks_from_readme(readme, self.ignore_block) + return load_code_blocks_from_readme(readme) def run(self, command): G_LOGGER.info("Running: {:} from cwd: {:}".format(command, self.path)) @@ -90,7 +97,7 @@ def __exit__(self, exc_type, exc_value, traceback): """ for artifact in self.artifacts: print("Checking for the existence of artifact: {:}".format(artifact)) - assert os.path.exists(artifact) + assert os.path.exists(artifact), "{:} does not exist!".format(artifact) if os.path.isdir(artifact): shutil.rmtree(artifact) else: @@ -107,12 +114,8 @@ def __str__(self): Example(["api", "03_interoperating_with_tensorrt"]), Example(["api", "04_int8_calibration_in_tensorrt"], artifact_names=["identity-calib.cache"]), Example(["api", "05_using_tensorrt_network_api"]), - Example(["api", "06_immediate_eval_api"], ignore_block=lambda block: "```python" in block[0]), - Example( - ["api", "07_tensorrt_and_dynamic_shapes"], - artifact_names=["dynamic_identity.engine"], - ignore_block=lambda block: "```python" in block[0], - ), + Example(["api", "06_immediate_eval_api"], artifact_names=["identity.engine"]), + Example(["api", "07_tensorrt_and_dynamic_shapes"], artifact_names=["dynamic_identity.engine"]), ] @@ -132,7 +135,10 @@ def test_api_examples(example): Example(["cli", "run", "01_comparing_frameworks"]), Example(["cli", "run", "02_comparing_across_runs"], artifact_names=["system_a_results.json"]), Example(["cli", "run", "03_generating_a_comparison_script"], artifact_names=["compare_trt_onnxrt.py"]), - Example(["cli", "run", "04_defining_a_tensorrt_network_manually"]), + Example( + ["cli", "run", "04_defining_a_tensorrt_network_or_config_manually"], + artifact_names=["my_define_network.py", "my_create_config.py"], + ), Example(["cli", "run", "05_comparing_with_custom_data"]), # Convert Example(["cli", "convert", "01_int8_calibration_in_tensorrt"], artifact_names=["identity.engine"]), @@ -144,8 +150,19 @@ def test_api_examples(example): # Surgeon Example(["cli", "surgeon", "01_isolating_subgraphs"], artifact_names=["subgraph.onnx"]), Example(["cli", "surgeon", "02_folding_constants"], artifact_names=["folded.onnx"]), + Example(["cli", "surgeon", "03_modifying_input_shapes"], artifact_names=["dynamic_identity.onnx"]), # Debug Example(["cli", "debug", "01_debugging_flaky_trt_tactics"], artifact_names=["replays", "golden.json"]), + Example( + ["cli", "debug", "02_reducing_failing_onnx_models"], + artifact_names=[ + "inputs.json", + "layerwise_golden.json", + "layerwise_inputs.json", + "initial_reduced.onnx", + "final_reduced.onnx", + ], + ), ] diff --git a/tools/Polygraphy/tests/tools/args/onnx/test_loader.py b/tools/Polygraphy/tests/tools/args/onnx/test_loader.py index ed7f0475..2397d7c5 100644 --- a/tools/Polygraphy/tests/tools/args/onnx/test_loader.py +++ b/tools/Polygraphy/tests/tools/args/onnx/test_loader.py @@ -18,6 +18,7 @@ import os import tempfile +import pytest from polygraphy import util from polygraphy.backend.onnx import onnx_from_path from polygraphy.tools.args import DataLoaderArgs, ModelArgs, OnnxLoaderArgs, OnnxSaveArgs, OnnxShapeInferenceArgs @@ -81,6 +82,11 @@ def test_shape_inference_ext_data(self): class TestOnnxSaveArgs(object): + def test_defaults(self): + arg_group = ArgGroupTestHelper(OnnxSaveArgs(), deps=[ModelArgs(), OnnxLoaderArgs()]) + arg_group.parse_args([]) + assert arg_group.size_threshold is None + def test_external_data(self): model = onnx_from_path(ONNX_MODELS["const_foldable"].path) arg_group = ArgGroupTestHelper(OnnxSaveArgs(), deps=[ModelArgs(), OnnxLoaderArgs()]) @@ -125,6 +131,19 @@ def test_no_all_tensors_to_one_file(self): outfiles = glob.glob(os.path.join(outdir, "*")) assert len(outfiles) == 4 + @pytest.mark.parametrize( + "arg, expected", + [ + ("16", 16), + ("1e9", 1e9), + ("2M", 2 << 20), + ], + ) + def test_size_threshold_parsing(self, arg, expected): + arg_group = ArgGroupTestHelper(OnnxSaveArgs(), deps=[ModelArgs(), OnnxLoaderArgs()]) + arg_group.parse_args(["--external-data-size-threshold", arg]) + assert arg_group.size_threshold == expected + class TestOnnxShapeInferenceArgs(object): def test_shape_inference_disabled_on_fallback(self): diff --git a/tools/Polygraphy/tests/tools/args/test_data_loader.py b/tools/Polygraphy/tests/tools/args/test_data_loader.py index 8333f3c8..929803e5 100644 --- a/tools/Polygraphy/tests/tools/args/test_data_loader.py +++ b/tools/Polygraphy/tests/tools/args/test_data_loader.py @@ -20,16 +20,17 @@ import pytest from polygraphy import util from polygraphy.common import TensorMetadata +from polygraphy.exception import PolygraphyException from polygraphy.tools.args import DataLoaderArgs, ModelArgs from tests.tools.args.helper import ArgGroupTestHelper - ARG_CASES = [ (["--seed=123"], ["seed"], [123]), (["--int-min=23", "--int-max=94"], ["int_range"], [(23, 94)]), (["--float-min=2.3", "--float-max=9.4"], ["float_range"], [(2.3, 9.4)]), ([], ["val_range"], [None], [(0.0, 1.0)]), # When not specified, this should default to None. (["--val-range", "[0.0,2.3]"], ["val_range"], [{"": (0.0, 2.3)}]), + (["--val-range", "[1,5]"], ["val_range"], [{"": (1, 5)}]), # Should work for integral quantities (["--val-range", "inp0:[0.0,2.3]", "inp1:[4.5,9.6]"], ["val_range"], [{"inp0": (0.0, 2.3), "inp1": (4.5, 9.6)}]), ( ["--val-range", "[-1,0]", "inp0:[0.0,2.3]", "inp1:[4.5,9.6]"], @@ -101,3 +102,16 @@ def my_load_data(): data = list(data_loader) assert len(data) == 5 assert all(np.all(d["inp"] == np.ones((3, 5), dtype=np.float32) * 6.4341) for d in data) + + @pytest.mark.parametrize( + "opts,expected_err", + [ + (["--val-range", "x:[y,2]"], "could not be parsed as a number"), + (["--val-range", "x:[1,2,3]"], "expected to receive exactly 2 values, but received 3"), + ], + ) + def test_val_range_errors(self, opts, expected_err): + arg_group = ArgGroupTestHelper(DataLoaderArgs()) + + with pytest.raises(PolygraphyException, match=expected_err): + arg_group.parse_args(opts) diff --git a/tools/Polygraphy/tests/tools/args/test_util.py b/tools/Polygraphy/tests/tools/args/test_util.py index a2e3640a..b43d50b1 100644 --- a/tools/Polygraphy/tests/tools/args/test_util.py +++ b/tools/Polygraphy/tests/tools/args/test_util.py @@ -16,7 +16,7 @@ import numpy as np import pytest -from polygraphy import mod +from polygraphy.exception import PolygraphyException from polygraphy.tools.args import util as args_util from polygraphy.tools.script import inline, safe @@ -86,3 +86,32 @@ def script_add(script, arg0=0, arg1=0): assert args_util.run_script(script_add) == 0 assert args_util.run_script(script_add, 1) == 1 assert args_util.run_script(script_add, 1, 2) == 3 + + +class TestParseNumBytes(object): + def test_none(self): + assert args_util.parse_num_bytes(None) is None + + @pytest.mark.parametrize( + "arg, expected", + [ + ("16", 16), + ("1e9", 1e9), + ("2M", 2 << 20), + ("2.3m", int(2.3 * (1 << 20))), + ("4.3K", int(4.3 * (1 << 10))), + ("7k", 7 << 10), + ("1G", 1 << 30), + ("2.5g", int(2.5 * (1 << 30))), + ], + ) + def test_num_bytes(self, arg, expected): + assert args_util.parse_num_bytes(arg) == expected + + @pytest.mark.parametrize("arg", ["hi", "4.5x", "2.3.4"]) + def test_negative(self, arg): + with pytest.raises( + PolygraphyException, + match="Could not convert {:} to a number of bytes".format(arg), + ): + args_util.parse_num_bytes(arg) diff --git a/tools/Polygraphy/tests/tools/args/trt/test_config.py b/tools/Polygraphy/tests/tools/args/trt/test_config.py index c1555776..d98736d9 100644 --- a/tools/Polygraphy/tests/tools/args/trt/test_config.py +++ b/tools/Polygraphy/tests/tools/args/trt/test_config.py @@ -31,6 +31,10 @@ def trt_config_args(): class TestTrtConfigArgs(object): + def test_defaults(self, trt_config_args): + trt_config_args.parse_args([]) + assert trt_config_args.workspace is None + def test_create_config(self, trt_config_args): trt_config_args.parse_args([]) builder, network = create_network() @@ -44,6 +48,7 @@ def test_create_config(self, trt_config_args): ("--int8", "INT8"), ("--fp16", "FP16"), ("--tf32", "TF32"), + ("--allow-gpu-fallback", "GPU_FALLBACK"), ], ) def test_precision_flags(self, trt_config_args, arg, flag): @@ -51,11 +56,36 @@ def test_precision_flags(self, trt_config_args, arg, flag): pytest.skip("TF32 support was added in 7.1") trt_config_args.parse_args([arg]) - builder, network = create_network() + builder, network = create_network() with builder, network, trt_config_args.create_config(builder, network=network) as config: assert config.get_flag(getattr(trt.BuilderFlag, flag)) + @pytest.mark.parametrize( + "workspace, expected", + [ + ("16", 16), + ("1e9", 1e9), + ("2M", 2 << 20), + ], + ) + def test_workspace(self, trt_config_args, workspace, expected): + trt_config_args.parse_args(["--workspace", workspace]) + assert trt_config_args.workspace == expected + + builder, network = create_network() + with builder, network, trt_config_args.create_config(builder, network=network) as config: + assert config.max_workspace_size == expected + + def test_dla(self, trt_config_args): + trt_config_args.parse_args(["--use-dla"]) + assert trt_config_args.use_dla + + builder, network = create_network() + with builder, network, trt_config_args.create_config(builder, network=network) as config: + assert config.default_device_type == trt.DeviceType.DLA + assert config.DLA_core == 0 + @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("8.0"), reason="SAFETY_SCOPE was added in TRT 8") def test_restricted_flags(self, trt_config_args): trt_config_args.parse_args(["--trt-safety-restricted"]) diff --git a/tools/Polygraphy/tests/tools/common.py b/tools/Polygraphy/tests/tools/common.py index b833295f..56a347d6 100644 --- a/tools/Polygraphy/tests/tools/common.py +++ b/tools/Polygraphy/tests/tools/common.py @@ -75,3 +75,7 @@ def run_polygraphy_template(additional_opts=[], disable_verbose=False, *args, ** def run_polygraphy_debug(additional_opts=[], disable_verbose=False, *args, **kwargs): return run_subtool("debug", additional_opts, disable_verbose, *args, **kwargs) + + +def run_polygraphy_data(additional_opts=[], disable_verbose=False, *args, **kwargs): + return run_subtool("data", additional_opts, disable_verbose, *args, **kwargs) diff --git a/tools/Polygraphy/tests/tools/test_data.py b/tools/Polygraphy/tests/tools/test_data.py new file mode 100644 index 00000000..fbb5604d --- /dev/null +++ b/tools/Polygraphy/tests/tools/test_data.py @@ -0,0 +1,35 @@ +# +# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import numpy as np +from polygraphy import util +from tests.models.meta import ONNX_MODELS +from tests.tools.common import run_polygraphy_data, run_polygraphy_run + + +class TestToInput(object): + def test_merge_inputs_outputs(self): + with util.NamedTemporaryFile() as inps, util.NamedTemporaryFile() as outs, util.NamedTemporaryFile() as merged: + run_polygraphy_run( + [ONNX_MODELS["identity"].path, "--onnxrt", "--save-inputs", inps.name, "--save-outputs", outs.name], + disable_verbose=True, + ) + + run_polygraphy_data(["to-input", inps.name, outs.name, "-o", merged.name]) + + merged_data = util.load_json(merged.name) + assert len(merged_data) == 1 + assert list(merged_data[0].keys()) == ["x", "y"] + assert all(isinstance(val, np.ndarray) for val in merged_data[0].values()) diff --git a/tools/Polygraphy/tests/tools/test_debug.py b/tools/Polygraphy/tests/tools/test_debug.py index 8741d8b1..62e794f1 100644 --- a/tools/Polygraphy/tests/tools/test_debug.py +++ b/tools/Polygraphy/tests/tools/test_debug.py @@ -71,8 +71,8 @@ def make_path(prefix, *args): EXPECTED_OUTPUT = dedent( """ - [I] Loaded 2 good tactic replays. - [I] Loaded 2 bad tactic replays. + [I] Loaded {num} good tactic replays. + [I] Loaded {num} bad tactic replays. [I] Found potentially bad tactics: [I] Layer: layer0 Algorithms: ["(Implementation: 0, Tactic: 2) | Inputs: (('TensorFormat.LINEAR', 'DataType.FLOAT'),) | Outputs: (('TensorFormat.LINEAR', 'DataType.FLOAT'),)"] @@ -82,11 +82,11 @@ def make_path(prefix, *args): class TestDiffTactics(object): - def check_output(self, status, expected_output): + def check_output(self, status, expected_output, expected_num=2): output = "\n".join( line for line in status.stdout.strip().splitlines() if "Loading tactic replay file from " not in line ) - assert output == expected_output.strip() + assert output == expected_output.format(num=expected_num).strip() def test_dir(self, replay_dir): replay_dir, expected_output = replay_dir @@ -101,6 +101,17 @@ def test_good_bad(self, replay_dir): status = run_polygraphy_debug(["diff-tactics", "--good", good, "--bad", bad], disable_verbose=True) self.check_output(status, expected_output) + def test_good_bad_file(self, replay_dir): + replay_dir, expected_output = replay_dir + + def find_file(dirpath, filename): + return glob.glob(os.path.join(dirpath, "**", filename), recursive=True)[0] + + good = find_file(os.path.join(replay_dir, "good"), "0.json") + bad = find_file(os.path.join(replay_dir, "bad"), "1.json") + status = run_polygraphy_debug(["diff-tactics", "--good", good, "--bad", bad], disable_verbose=True) + self.check_output(status, expected_output, expected_num=1) + @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("8.0"), reason="Unsupported for TRT 7.2 and older") class TestBuild(object): @@ -416,6 +427,35 @@ def test_reduce_shape_inference(self, opts): assert tuple(graph.inputs[0].shape) == (1, 2, 5, 5) assert tuple(graph.outputs[0].shape) == (1, 2, 5, 5) + def test_reduce_with_constant(self): + # Should be no failure when models including Constant nodes use fallback + # shape inference; Constant nodes will be lowered to constant tensors. + with tempfile.TemporaryDirectory() as outdir: + run_polygraphy_debug( + [ + "reduce", + ONNX_MODELS["reducable_with_const"].path, + "--no-shape-inference", + "--mode=linear", + "--output=reduced.onnx", + ] + + [ + "--check", + TestReduce.FAKE_REDUCE_CHECKER, + "polygraphy_debug.onnx", + "--fail-node", + "onnx_graphsurgeon_node_3", + ], + disable_verbose=True, + cwd=outdir, + ) + model = onnx_from_path(os.path.join(outdir, "reduced.onnx")) + graph = gs.import_onnx(model) + assert len(graph.nodes) == 1 + assert graph.nodes[0].name == "onnx_graphsurgeon_node_3" + # Outputs of Constant nodes should not become Variables; thus the model should have no inputs. + assert not graph.inputs + class TestRepeat(object): @pytest.mark.parametrize( diff --git a/tools/Polygraphy/tests/tools/test_run.py b/tools/Polygraphy/tests/tools/test_run.py index d8e727ef..c5fe7335 100644 --- a/tools/Polygraphy/tests/tools/test_run.py +++ b/tools/Polygraphy/tests/tools/test_run.py @@ -490,3 +490,13 @@ def test_save_load_inputs(self): @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") def test_runner_coexistence(self): run_polygraphy_run([TF_MODELS["identity"].path, "--model-type=frozen", "--tf", "--onnxrt", "--trt"]) + + +class TestPluginRef(object): + def test_basic(self): + run_polygraphy_run([ONNX_MODELS["identity"].path, "--pluginref"]) + + @pytest.mark.skipif(mod.version(trt.__version__) < mod.version("7.0"), reason="Unsupported for TRT 6") + @pytest.mark.parametrize("model", ["identity", "instancenorm"]) + def test_ref_implementations(self, model): + run_polygraphy_run([ONNX_MODELS[model].path, "--pluginref", "--onnxrt", "--trt"]) diff --git a/tools/Polygraphy/tests/tools/test_template.py b/tools/Polygraphy/tests/tools/test_template.py index 37f373b2..93ff8d27 100644 --- a/tools/Polygraphy/tests/tools/test_template.py +++ b/tools/Polygraphy/tests/tools/test_template.py @@ -16,7 +16,8 @@ import tensorrt as trt from polygraphy import util -from polygraphy.backend.common.loader import InvokeFromScript +from polygraphy.backend.common import InvokeFromScript +from polygraphy.backend.trt import create_network from tests.models.meta import ONNX_MODELS from tests.tools.common import run_polygraphy_template @@ -42,3 +43,25 @@ def test_with_model_file(self): assert isinstance(builder, trt.Builder) assert isinstance(network, trt.INetworkDefinition) assert isinstance(parser, trt.OnnxParser) + + +class TestTrtConfig(object): + def test_no_opts(self): + with util.NamedTemporaryFile("w+", suffix=".py") as template: + run_polygraphy_template(["trt-config", "-o", template.name]) + + builder, network = create_network() + create_config = InvokeFromScript(template.name, "load_config") + with builder, network, create_config(builder, network) as config: + assert isinstance(config, trt.IBuilderConfig) + + def test_opts_basic(self): + with util.NamedTemporaryFile("w+", suffix=".py") as template: + run_polygraphy_template(["trt-config", "--fp16", "--int8", "-o", template.name]) + + builder, network = create_network() + create_config = InvokeFromScript(template.name, "load_config") + with builder, network, create_config(builder, network) as config: + assert isinstance(config, trt.IBuilderConfig) + assert config.get_flag(trt.BuilderFlag.FP16) + assert config.get_flag(trt.BuilderFlag.INT8) diff --git a/tools/Polygraphy/tests/util/test_util.py b/tools/Polygraphy/tests/util/test_util.py index 899b7e03..61d202b7 100644 --- a/tools/Polygraphy/tests/util/test_util.py +++ b/tools/Polygraphy/tests/util/test_util.py @@ -141,3 +141,17 @@ def test_find_in_dirs(): f.write("This file should be found by find_in_dirs") assert util.find_in_dirs("cudart64_*.dll", dirs) == [path] + + +@pytest.mark.parametrize( + "val,key,default,expected", + [ + (1.0, None, None, 1.0), # Basic + ({"inp": "hi"}, "inp", "", "hi"), # Per-key + ({"inp": "hi"}, "out", "default", "default"), # Per-key missing + ({"inp": 1.0, "": 2.0}, "out", 1.5, 2.0), # Per-key with default + ], +) +def test_value_or_from_dict(val, key, default, expected): + actual = util.value_or_from_dict(val, key, default) + assert actual == expected