Skip to content

Commit

Permalink
Staging for the 10.4-GA release (#995)
Browse files Browse the repository at this point in the history
Signed-off-by: poweiw <[email protected]>
  • Loading branch information
poweiw authored Sep 11, 2024
1 parent efd73c8 commit 3775e49
Show file tree
Hide file tree
Showing 16 changed files with 455 additions and 353 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
# Version information
#--------------------------------------------------
set(ONNX2TRT_MAJOR 10)
set(ONNX2TRT_MINOR 3)
set(ONNX2TRT_MINOR 4)
set(ONNX2TRT_PATCH 0)
set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")

Expand Down
5 changes: 4 additions & 1 deletion ImporterContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,10 @@ void ImporterContext::registerLayer(nvinfer1::ILayer* layer, std::string const&
mConstantLayers.insert({uniqueName, static_cast<nvinfer1::IConstantLayer*>(layer)});
}
}
if (node != nullptr && layer != nullptr)
// Set metadata only if the layer is associated with an ONNX node.
// Skip constant layers because constants are represented as initializers in ONNX and should not be associated
// with any ONNX node.
if (node != nullptr && layer != nullptr && layer->getType() != nvinfer1::LayerType::kCONSTANT)
{
processMetadata(this, *node, layer);
}
Expand Down
14 changes: 13 additions & 1 deletion ModelImporter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,17 @@ Status importLocalFunctions(ImporterContext* ctx, ::ONNX_NAMESPACE::ModelProto c
return Status::success();
}

// Internal helper function used for ONNXRT-TRT EP to filter out DDS nodes
bool isDDSOp(char const* op_name)
{
auto is = [op_name](char const* name) { return std::strcmp(op_name, name) == 0; };
if (is("NonMaxSuppression") || is("NonZero") || is("RoiAlign"))
{
return true;
}
return false;
}

std::pair<bool, ModelImporter::SubGraphSupportVector_t> ModelImporter::doSupportsModel(
void const* serialized_onnx_model, size_t serialized_onnx_model_size, char const* model_path)
{
Expand Down Expand Up @@ -514,9 +525,10 @@ std::pair<bool, ModelImporter::SubGraphSupportVector_t> ModelImporter::doSupport
// 1. It is not a node that requires DDS
// 2. It is not directly connected to an unsupported input
// 3. The importer function did not throw an assertion
bool unsupportedDDS = isDDSOp(node.op_type().c_str());
bool unsupportedInput = (input_node.empty()) ? false : checkForInput(node);
bool unsuccessfulParse = node_idx == error_node;
if (!unsupportedInput && !unsuccessfulParse)
if (!unsupportedDDS && !unsupportedInput && !unsuccessfulParse)
{
if (newSubGraph)
{
Expand Down
178 changes: 69 additions & 109 deletions ModelRefitter.cpp

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions ModelRefitter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,17 @@ class ModelRefitter : public nvonnxparser::IParserRefitter
//! TConvertFunc is a functor for converting ShapedWeights to an array of type T.
//! It should return a T*.
template <typename T, typename TConvertFunc>
ValueOrStatus<size_t> batchnormWeightRefitter(
size_t batchnormWeightRefitter(
::ONNX_NAMESPACE::NodeProto const& node, std::vector<ShapedWeights>& inputs, TConvertFunc&& f);

Status refitOnnxWeights(::ONNX_NAMESPACE::ModelProto const& onnx_model);
Status refitOnnxGraph(::ONNX_NAMESPACE::GraphProto const& graph);
Status refitOnnxNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph);
Status refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& node, std::string const& graphName);
Status refitOnnxBatchNormNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph);
Status refitOnnxIfNode(::ONNX_NAMESPACE::NodeProto const& node);
Status refitOnnxLoopNode(::ONNX_NAMESPACE::NodeProto const& node);
Status refitOnnxScanNode(::ONNX_NAMESPACE::NodeProto const& node);
void refitOnnxWeights(::ONNX_NAMESPACE::ModelProto const& onnx_model);
void refitOnnxGraph(::ONNX_NAMESPACE::GraphProto const& graph);
void refitOnnxNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph);
void refitOnnxConstantNode(::ONNX_NAMESPACE::NodeProto const& node, std::string const& graphName);
void refitOnnxBatchNormNode(::ONNX_NAMESPACE::NodeProto const& node, ::ONNX_NAMESPACE::GraphProto const& graph);
void refitOnnxIfNode(::ONNX_NAMESPACE::NodeProto const& node);
void refitOnnxLoopNode(::ONNX_NAMESPACE::NodeProto const& node);
void refitOnnxScanNode(::ONNX_NAMESPACE::NodeProto const& node);

public:
ModelRefitter(nvinfer1::IRefitter* refitter, nvinfer1::ILogger* logger)
Expand Down
4 changes: 2 additions & 2 deletions OnnxAttrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ onnx2trt::ShapedWeights OnnxAttrs::get<onnx2trt::ShapedWeights>(std::string cons
std::string extName = this->at(key)->ref_attr_name();
bool isExtAttr = isExternalAttribute(extName, mCtx);

::ONNX_NAMESPACE::TensorProto const& onnx_weights_tensor = isExtAttr ? mCtx->localFunctionStack().back().second.at(extName)->t() : this->at(key)->t();
::ONNX_NAMESPACE::TensorProto const& onnxTensor = isExtAttr ? mCtx->localFunctionStack().back().second.at(extName)->t() : this->at(key)->t();
onnx2trt::ShapedWeights weights;
bool success = mCtx->getWeightsContext().convertOnnxWeights(onnx_weights_tensor, &weights);
bool success = mCtx->getWeightsContext().convertOnnxWeights(onnxTensor, &weights, true);
if (!success)
{
throw std::runtime_error{"Unable to convert ONNX weights"};
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia.

## Supported TensorRT Versions

Development on the this branch is for the latest version of [TensorRT 10.2](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
Development on the this branch is for the latest version of [TensorRT 10.4](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.

For previous versions of TensorRT, refer to their respective branches.

Expand All @@ -29,8 +29,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs
### Dependencies

- [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases)
- [TensorRT 10.2](https://developer.nvidia.com/tensorrt)
- [TensorRT 10.2 open source libaries] (https://github.com/NVIDIA/TensorRT/)
- [TensorRT 10.4](https://developer.nvidia.com/tensorrt)
- [TensorRT 10.4 open source libaries] (https://github.com/NVIDIA/TensorRT/)

### Building

Expand Down Expand Up @@ -82,7 +82,7 @@ Refer to the link or run `polygraphy run -h` for more information on CLI options

Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl` files.

TensorRT 10.1 supports ONNX release 1.16.0. Install it with:
TensorRT 10.4 supports ONNX release 1.16.0. Install it with:

python3 -m pip install onnx==1.16.0

Expand Down
2 changes: 1 addition & 1 deletion ShapeTensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ nvinfer1::ISliceLayer* addSlice(ImporterContext* ctx, nvinfer1::ITensor& data, c
constexpr int32_t minDim = std::numeric_limits<int32_t>::min();
constexpr int32_t maxDim = std::numeric_limits<int32_t>::max();
nvinfer1::ISliceLayer* slice = N_CHECK(ctx->network()->addSlice(data,
shapeTensorToDims(starts, "slice start", 0, maxDim), shapeTensorToDims(sizes, "slice size", 0, maxDim),
shapeTensorToDims(starts, "slice start", minDim, maxDim), shapeTensorToDims(sizes, "slice size", 0, maxDim),
shapeTensorToDims(strides, "slide strides", minDim, maxDim)));
setShapeInputIfDynamic(ctx, slice, 1, starts);
setShapeInputIfDynamic(ctx, slice, 2, sizes);
Expand Down
1 change: 1 addition & 0 deletions Status.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ static std::ostream& operator<<(std::ostream& stream, nvinfer1::DataType const&
case nvinfer1::DataType::kBOOL: return stream << "bool";
case nvinfer1::DataType::kFP8: return stream << "float8";
case nvinfer1::DataType::kINT4: return stream << "int4";

default: throw std::runtime_error("Unknown dtype");
}
}
Expand Down
11 changes: 10 additions & 1 deletion docs/Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

# ONNX-TensorRT Changelog

# TensorRT 10.4 GA Release - 2024-9-5
For more details, see the 10.4 GA release notes.

- Added support for tensor `axes` for `Pad` operations
- Added support for `BlackmanWindow`, `HammingWindow`, and `HannWindow` operations
- Improved error handling in `IParserRefitter`
- Fixed kernel shape inference in multi-input convolutions

# TensorRT 10.3 GA Release - 2024-8-7
For more details, see the 10.3 GA release notes.

Expand All @@ -14,13 +22,14 @@ For more details, see the 10.2 GA release notes.
- Improved error handling with new macros and classes
- Minor changes to op importers for `GRU` and `Squeeze`

# TensorRT 10.1 GA Release - 2024-6-17
# TensorRT 10.1 GA Release - 2024-6-10
For more details, see the 10.1 GA release notes.

- Added `supportsModelV2` API
- Added support for `DeformConv` operation
- Added support for `PluginV3` TensorRT Plugins
- Marked all IParser and IParserRefitter APIs as `noexcept`
- Shape inputs can be passed to custom ops supported by `IPluginV3`-based plugins by indicating the input indices to be interpreted as shape inputs by a node attribute named `tensorrt_plugin_shape_input_indices`.

# TensorRT 10.0 GA Release - 2024-4-25
For more details, see the 10.0 GA release notes.
Expand Down
12 changes: 6 additions & 6 deletions docs/operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Supported ONNX Operators

TensorRT 10.0 supports operators in the inclusive range of opset 9 to opset 20. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
TensorRT 10.4 supports operators in the inclusive range of opset 9 to opset 20. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.

TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOAT16, INT32, INT64, FP8, INT8, INT4, UINT8, and BOOL

Expand Down Expand Up @@ -36,7 +36,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
| BitwiseNot | N |
| BitwiseOr | N |
| BitwiseXor | N |
| BlackmanWindow | N |
| BlackmanWindow | Y |
| Cast | Y | FP32, FP16, BF16, INT32, INT64, UINT8, BOOL | |
| CastLike | Y | FP32, FP16, BF16, INT32, INT64, UINT8, BOOL | |
| Ceil | Y | FP32, FP16, BF16 |
Expand Down Expand Up @@ -85,8 +85,8 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
| GridSample | Y | FP32, FP16 | Input must be 4D input.
| GroupNormalization | Y | FP32, FP16, BF16 |
| GRU | Y | FP32, FP16, BF16 | For bidirectional GRUs, activation functions must be the same for both the forward and reverse pass
| HammingWindow | N |
| HannWindow | N |
| HammingWindow | Y |
| HannWindow | Y |
| HardSigmoid | Y | FP32, FP16, BF16 |
| HardSwish | Y | FP32, FP16, BF16 |
| Hardmax | Y | FP32, FP16, BF16 | `axis` dimension of input must be a build-time constant
Expand Down Expand Up @@ -132,7 +132,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
| OptionalGetElement | N |
| OptionalHasElement | N |
| Or | Y | BOOL |
| Pad | Y | FP32, FP16, BF16, INT32, INT64 | `axes` must be an initializer |
| Pad | Y | FP32, FP16, BF16, INT32, INT64 |
| ParametricSoftplus | Y | FP32, FP16, BF16 |
| Pow | Y | FP32, FP16, BF16 |
| PRelu | Y | FP32, FP16, BF16 |
Expand Down Expand Up @@ -184,7 +184,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
| Sin | Y | FP32, FP16, BF16 |
| Sinh | Y | FP32, FP16, BF16 |
| Size | Y | FP32, FP16, BF16, INT32, INT64, BOOL |
| Slice | Y | FP32, FP16, BF16, INT32, INT64, BOOL |
| Slice | Y | FP32, FP16, BF16, INT32, INT64, BOOL |
| Softmax | Y | FP32, FP16, BF16 |
| SoftmaxCrossEntropyLoss | N |
| Softplus | Y | FP32, FP16, BF16 |
Expand Down
Loading

0 comments on commit 3775e49

Please sign in to comment.