From f93a2f368d36d88eecdc58dabf89cce36b07a2e7 Mon Sep 17 00:00:00 2001 From: suchang Date: Wed, 30 Oct 2024 16:47:33 +0800 Subject: [PATCH 01/23] feat: add yolov10 node Signed-off-by: suchang --- .../autoware_tensorrt_yolov10/CMakeLists.txt | 175 ++++++ .../autoware_tensorrt_yolov10/README.md | 2 + .../autoware/tensorrt_yolov10/calibrator.hpp | 493 ++++++++++++++++ .../autoware/tensorrt_yolov10/preprocess.hpp | 202 +++++++ .../tensorrt_yolov10/tensorrt_yolov10.hpp | 135 +++++ .../tensorrt_yolov10_node.hpp | 72 +++ .../launch/yolov10.launch.xml | 13 + .../launch/yolov10_single_image.launch.xml | 14 + .../autoware_tensorrt_yolov10/package.xml | 41 ++ .../src/tensorrt_yolov10.cpp | 526 ++++++++++++++++++ .../src/tensorrt_yolov10_node.cpp | 163 ++++++ .../yolov10_single_image_inference_node.cpp | 79 +++ 12 files changed, 1915 insertions(+) create mode 100644 perception/autoware_tensorrt_yolov10/CMakeLists.txt create mode 100644 perception/autoware_tensorrt_yolov10/README.md create mode 100644 perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/calibrator.hpp create mode 100644 perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/preprocess.hpp create mode 100644 perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp create mode 100644 perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp create mode 100644 perception/autoware_tensorrt_yolov10/launch/yolov10.launch.xml create mode 100644 perception/autoware_tensorrt_yolov10/launch/yolov10_single_image.launch.xml create mode 100644 perception/autoware_tensorrt_yolov10/package.xml create mode 100644 perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp create mode 100644 perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp create mode 100644 perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp diff --git a/perception/autoware_tensorrt_yolov10/CMakeLists.txt b/perception/autoware_tensorrt_yolov10/CMakeLists.txt new file mode 100644 index 0000000000000..daaa0c5ed77ba --- /dev/null +++ b/perception/autoware_tensorrt_yolov10/CMakeLists.txt @@ -0,0 +1,175 @@ +cmake_minimum_required(VERSION 3.17) +project(autoware_tensorrt_yolov10) + +find_package(tensorrt_common) +if(NOT ${tensorrt_common_FOUND}) + message(WARNING "The tensorrt_common package is not found. Please check its dependencies.") + return() +endif() + +find_package(autoware_cmake REQUIRED) +autoware_package() + +find_package(OpenCV REQUIRED) + +option(CUDA_VERBOSE "Verbose output of CUDA modules" OFF) + +# set flags for CUDA availability +option(CUDA_AVAIL "CUDA available" OFF) +find_package(CUDA) +if(CUDA_FOUND) + find_library(CUBLAS_LIBRARIES cublas HINTS + ${CUDA_TOOLKIT_ROOT_DIR}/lib64 + ${CUDA_TOOLKIT_ROOT_DIR}/lib + ) + if(CUDA_VERBOSE) + message("CUDA is available!") + message("CUDA Libs: ${CUDA_LIBRARIES}") + message("CUDA Headers: ${CUDA_INCLUDE_DIRS}") + endif() + # Note: cublas_device was depreciated in CUDA version 9.2 + # https://forums.developer.nvidia.com/t/where-can-i-find-libcublas-device-so-or-libcublas-device-a/67251/4 + # In LibTorch, CUDA_cublas_device_LIBRARY is used. + unset(CUDA_cublas_device_LIBRARY CACHE) + set(CUDA_AVAIL ON) +else() + message("CUDA NOT FOUND") + set(CUDA_AVAIL OFF) +endif() + +# set flags for TensorRT availability +option(TRT_AVAIL "TensorRT available" OFF) +# try to find the tensorRT modules +find_library(NVINFER nvinfer) +find_library(NVONNXPARSER nvonnxparser) +if(NVINFER AND NVONNXPARSER) + if(CUDA_VERBOSE) + message("TensorRT is available!") + message("NVINFER: ${NVINFER}") + message("NVONNXPARSER: ${NVONNXPARSER}") + endif() + set(TRT_AVAIL ON) +else() + message("TensorRT is NOT Available") + set(TRT_AVAIL OFF) +endif() + +# set flags for CUDNN availability +option(CUDNN_AVAIL "CUDNN available" OFF) +# try to find the CUDNN module +find_library(CUDNN_LIBRARY +NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name} +PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} +PATH_SUFFIXES lib lib64 bin +DOC "CUDNN library." +) +if(CUDNN_LIBRARY) + if(CUDA_VERBOSE) + message(STATUS "CUDNN is available!") + message(STATUS "CUDNN_LIBRARY: ${CUDNN_LIBRARY}") + endif() + set(CUDNN_AVAIL ON) +else() + message("CUDNN is NOT Available") + set(CUDNN_AVAIL OFF) +endif() + +find_package(OpenMP) +if(OpenMP_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +endif() + +########## +# tensorrt_yolov10 +ament_auto_add_library(${PROJECT_NAME} SHARED + src/tensorrt_yolov10.cpp + src/tensorrt_yolov10_node.cpp +) + +ament_target_dependencies(${PROJECT_NAME} + OpenCV +) + +if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL) + # Officially, add_library supports .cu file compilation. + # However, as of cmake 3.22.1, it seems to fail compilation because compiler flags for + # C++ are directly passed to nvcc (they are originally space separated + # but nvcc assume comma separated as argument of `-Xcompiler` option). + # That is why `cuda_add_library` is used here. +# cuda_add_library(${PROJECT_NAME}_gpu_preprocess +# SHARED +# src/preprocess.cu +# ) + +# target_include_directories(${PROJECT_NAME}_gpu_preprocess PUBLIC +# $ +# $ +# ) + +# target_link_libraries(${PROJECT_NAME} +# ${tensorrt_common_LIBRARIES} +# ${PROJECT_NAME}_gpu_preprocess +# ) +else() + target_link_libraries(${PROJECT_NAME} + ${tensorrt_common_LIBRARIES} + ) +endif() + +target_compile_definitions(${PROJECT_NAME} PRIVATE + TENSORRT_VERSION_MAJOR=${TENSORRT_VERSION_MAJOR} +) + +ament_auto_add_library(yolov10_single_image_inference_node SHARED + src/yolov10_single_image_inference_node.cpp +) + +ament_target_dependencies(yolov10_single_image_inference_node + OpenCV +) + +target_link_libraries(yolov10_single_image_inference_node + ${PROJECT_NAME} + stdc++fs +) + +target_compile_definitions(yolov10_single_image_inference_node PRIVATE + TENSORRT_VERSION_MAJOR=${TENSORRT_VERSION_MAJOR} +) + +rclcpp_components_register_node(yolov10_single_image_inference_node + PLUGIN "autoware::tensorrt_yolov10::Yolov10SingleImageInferenceNode" + EXECUTABLE yolov10_single_image_inference +) + +ament_auto_add_library(${PROJECT_NAME}_node SHARED + src/tensorrt_yolov10_node.cpp +) + +ament_target_dependencies(${PROJECT_NAME}_node + OpenCV +) + +target_link_libraries(${PROJECT_NAME}_node + ${PROJECT_NAME} +) + +target_compile_definitions(${PROJECT_NAME}_node PRIVATE + TENSORRT_VERSION_MAJOR=${TENSORRT_VERSION_MAJOR} +) + +rclcpp_components_register_node(${PROJECT_NAME}_node + PLUGIN "autoware::tensorrt_yolov10::TrtYolov10Node" + EXECUTABLE ${PROJECT_NAME}_node_exe +) + +if(BUILD_TESTING) + find_package(ament_lint_auto REQUIRED) + ament_lint_auto_find_test_dependencies() +endif() + +ament_auto_package(INSTALL_TO_SHARE + launch + config +) diff --git a/perception/autoware_tensorrt_yolov10/README.md b/perception/autoware_tensorrt_yolov10/README.md new file mode 100644 index 0000000000000..2f7006c1c86fe --- /dev/null +++ b/perception/autoware_tensorrt_yolov10/README.md @@ -0,0 +1,2 @@ +# autoware_tensorrt_yolov10 + diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/calibrator.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/calibrator.hpp new file mode 100644 index 0000000000000..a3e1fd0e257fb --- /dev/null +++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/calibrator.hpp @@ -0,0 +1,493 @@ +// Copyright 2023 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef AUTOWARE__TENSORRT_YOLOV10__CALIBRATOR_HPP_ +#define AUTOWARE__TENSORRT_YOLOV10__CALIBRATOR_HPP_ + +#include "cuda_utils/cuda_check_error.hpp" +#include "cuda_utils/cuda_unique_ptr.hpp" + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +namespace autoware::tensorrt_yolov10 +{ +class ImageStream +{ +public: + ImageStream( + int batch_size, nvinfer1::Dims input_dims, const std::vector calibration_images) + : batch_size_(batch_size), + calibration_images_(calibration_images), + current_batch_(0), + max_batches_(calibration_images.size() / batch_size_), + input_dims_(input_dims) + { + batch_.resize(batch_size_ * input_dims_.d[1] * input_dims_.d[2] * input_dims_.d[3]); + } + + int getBatchSize() const { return batch_size_; } + + int getMaxBatches() const { return max_batches_; } + + float * getBatch() { return &batch_[0]; } + + nvinfer1::Dims getInputDims() { return input_dims_; } + + /** + * @brief Preprocess in calibration + * @param[in] images input images + * @param[in] input_dims input dimensions + * @param[in] norm normalization (0.0-1.0) + * @return vector preprocessed data + */ + std::vector preprocess( + const std::vector & images, nvinfer1::Dims input_dims, double norm) + { + std::vector input_h_; + const auto batch_size = images.size(); + input_dims.d[0] = batch_size; + const float input_height = static_cast(input_dims.d[2]); + const float input_width = static_cast(input_dims.d[3]); + std::vector dst_images; + std::vector scales_; + scales_.clear(); + for (const auto & image : images) { + cv::Mat dst_image; + const float scale = std::min(input_width / image.cols, input_height / image.rows); + scales_.emplace_back(scale); + const auto scale_size = cv::Size(image.cols * scale, image.rows * scale); + cv::resize(image, dst_image, scale_size, 0, 0, cv::INTER_CUBIC); + const auto bottom = input_height - dst_image.rows; + const auto right = input_width - dst_image.cols; + copyMakeBorder( + dst_image, dst_image, 0, bottom, 0, right, cv::BORDER_CONSTANT, {114, 114, 114}); + dst_images.emplace_back(dst_image); + } + const auto chw_images = + cv::dnn::blobFromImages(dst_images, norm, cv::Size(), cv::Scalar(), false, false, CV_32F); + + const auto data_length = chw_images.total(); + input_h_.reserve(data_length); + const auto flat = chw_images.reshape(1, data_length); + input_h_ = chw_images.isContinuous() ? flat : flat.clone(); + return input_h_; + } + + /** + * @brief Decode data in calibration + * @param[in] scale normalization (0.0-1.0) + * @return bool succ or fail + */ + bool next(double scale) + { + if (current_batch_ == max_batches_) { + return false; + } + + for (int i = 0; i < batch_size_; ++i) { + auto image = + cv::imread(calibration_images_[batch_size_ * current_batch_ + i].c_str(), cv::IMREAD_COLOR); + std::cout << current_batch_ << " " << i << " Preprocess " + << calibration_images_[batch_size_ * current_batch_ + i].c_str() << std::endl; + auto input = preprocess({image}, input_dims_, scale); + batch_.insert( + batch_.begin() + i * input_dims_.d[1] * input_dims_.d[2] * input_dims_.d[3], input.begin(), + input.end()); + } + + ++current_batch_; + return true; + } + + /** + * @brief Reset calibration + */ + void reset() { current_batch_ = 0; } + +private: + int batch_size_; + std::vector calibration_images_; + int current_batch_; + int max_batches_; + nvinfer1::Dims input_dims_; + std::vector batch_; +}; + +/**Percentile calibration using legacy calibrator*/ +/** + * @class Int8LegacyCalibrator + * @brief Calibrator for Percentile + * @warning We are confirming bug on Tegra like Xavier and Orin. We recommend use MinMax calibrator + */ +class Int8LegacyCalibrator : public nvinfer1::IInt8LegacyCalibrator +{ +public: + Int8LegacyCalibrator( + ImageStream & stream, const std::string calibration_cache_file, + const std::string histogram_cache_file, double scale = 1.0, bool read_cache = true, + double quantile = 0.999999, double cutoff = 0.999999) + : stream_(stream), + calibration_cache_file_(calibration_cache_file), + histogram_cache_file_(histogram_cache_file), + read_cache_(read_cache) + { + auto d = stream_.getInputDims(); + input_count_ = stream_.getBatchSize() * d.d[1] * d.d[2] * d.d[3]; + CHECK_CUDA_ERROR(cudaMalloc(&device_input_, input_count_ * sizeof(float))); + scale_ = scale; + quantile_ = quantile; + cutoff_ = cutoff; + auto algType = getAlgorithm(); + switch (algType) { + case (nvinfer1::CalibrationAlgoType::kLEGACY_CALIBRATION): + std::cout << "CalibrationAlgoType : kLEGACY_CALIBRATION" << std::endl; + break; + case (nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION): + std::cout << "CalibrationAlgoType : kENTROPY_CALIBRATION" << std::endl; + break; + case (nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION_2): + std::cout << "CalibrationAlgoType : kENTROPY_CALIBRATION_2" << std::endl; + break; + case (nvinfer1::CalibrationAlgoType::kMINMAX_CALIBRATION): + std::cout << "CalibrationAlgoType : kMINMAX_CALIBRATION" << std::endl; + break; + default: + std::cout << "No CalibrationAlgType" << std::endl; + break; + } + } + int getBatchSize() const noexcept override { return stream_.getBatchSize(); } + + virtual ~Int8LegacyCalibrator() { CHECK_CUDA_ERROR(cudaFree(device_input_)); } + + bool getBatch(void * bindings[], const char * names[], int nb_bindings) noexcept override + { + (void)names; + (void)nb_bindings; + + if (!stream_.next(scale_)) { + return false; + } + try { + CHECK_CUDA_ERROR(cudaMemcpy( + device_input_, stream_.getBatch(), input_count_ * sizeof(float), cudaMemcpyHostToDevice)); + } catch (const std::exception & e) { + // Do nothing + } + bindings[0] = device_input_; + return true; + } + + const void * readCalibrationCache(size_t & length) noexcept override + { + calib_cache_.clear(); + std::ifstream input(calibration_cache_file_, std::ios::binary); + input >> std::noskipws; + if (read_cache_ && input.good()) { + std::copy( + std::istream_iterator(input), std::istream_iterator(), + std::back_inserter(calib_cache_)); + } + + length = calib_cache_.size(); + if (length) { + std::cout << "Using cached calibration table to build the engine" << std::endl; + } else { + std::cout << "New calibration table will be created to build the engine" << std::endl; + } + return length ? &calib_cache_[0] : nullptr; + } + + void writeCalibrationCache(const void * cache, size_t length) noexcept override + { + std::ofstream output(calibration_cache_file_, std::ios::binary); + output.write(reinterpret_cast(cache), length); + } + + double getQuantile() const noexcept + { + printf("Quantile %f\n", quantile_); + return quantile_; + } + + double getRegressionCutoff(void) const noexcept + { + printf("Cutoff %f\n", cutoff_); + return cutoff_; + } + + const void * readHistogramCache(std::size_t & length) noexcept + { + hist_cache_.clear(); + std::ifstream input(histogram_cache_file_, std::ios::binary); + input >> std::noskipws; + if (read_cache_ && input.good()) { + std::copy( + std::istream_iterator(input), std::istream_iterator(), + std::back_inserter(hist_cache_)); + } + + length = hist_cache_.size(); + if (length) { + std::cout << "Using cached histogram table to build the engine" << std::endl; + } else { + std::cout << "New histogram table will be created to build the engine" << std::endl; + } + return length ? &hist_cache_[0] : nullptr; + } + void writeHistogramCache(void const * ptr, std::size_t length) noexcept + { + std::ofstream output(histogram_cache_file_, std::ios::binary); + output.write(reinterpret_cast(ptr), length); + } + +private: + ImageStream stream_; + const std::string calibration_cache_file_; + const std::string histogram_cache_file_; + bool read_cache_{true}; + size_t input_count_; + void * device_input_{nullptr}; + std::vector calib_cache_; + std::vector hist_cache_; + double scale_; + double quantile_; + double cutoff_; +}; + +/** + * @class Int8LegacyCalibrator + * @brief Calibrator for Percentile + * @warning This calibrator causes crucial accuracy drop for YOLOV10. + */ +class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2 +{ +public: + Int8EntropyCalibrator( + ImageStream & stream, const std::string calibration_cache_file, double scale = 1.0, + bool read_cache = true) + : stream_(stream), calibration_cache_file_(calibration_cache_file), read_cache_(read_cache) + { + auto d = stream_.getInputDims(); + input_count_ = stream_.getBatchSize() * d.d[1] * d.d[2] * d.d[3]; + CHECK_CUDA_ERROR(cudaMalloc(&device_input_, input_count_ * sizeof(float))); + scale_ = scale; + auto algType = getAlgorithm(); + switch (algType) { + case (nvinfer1::CalibrationAlgoType::kLEGACY_CALIBRATION): + std::cout << "CalibrationAlgoType : kLEGACY_CALIBRATION" << std::endl; + break; + case (nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION): + std::cout << "CalibrationAlgoType : kENTROPY_CALIBRATION" << std::endl; + break; + case (nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION_2): + std::cout << "CalibrationAlgoType : kENTROPY_CALIBRATION_2" << std::endl; + break; + case (nvinfer1::CalibrationAlgoType::kMINMAX_CALIBRATION): + std::cout << "CalibrationAlgoType : kMINMAX_CALIBRATION" << std::endl; + break; + default: + std::cout << "No CalibrationAlgType" << std::endl; + break; + } + } + int getBatchSize() const noexcept override { return stream_.getBatchSize(); } + + virtual ~Int8EntropyCalibrator() { CHECK_CUDA_ERROR(cudaFree(device_input_)); } + + bool getBatch(void * bindings[], const char * names[], int nb_bindings) noexcept override + { + (void)names; + (void)nb_bindings; + + if (!stream_.next(scale_)) { + return false; + } + try { + CHECK_CUDA_ERROR(cudaMemcpy( + device_input_, stream_.getBatch(), input_count_ * sizeof(float), cudaMemcpyHostToDevice)); + } catch (const std::exception & e) { + // Do nothing + } + bindings[0] = device_input_; + return true; + } + + const void * readCalibrationCache(size_t & length) noexcept override + { + calib_cache_.clear(); + std::ifstream input(calibration_cache_file_, std::ios::binary); + input >> std::noskipws; + if (read_cache_ && input.good()) { + std::copy( + std::istream_iterator(input), std::istream_iterator(), + std::back_inserter(calib_cache_)); + } + + length = calib_cache_.size(); + if (length) { + std::cout << "Using cached calibration table to build the engine" << std::endl; + } else { + std::cout << "New calibration table will be created to build the engine" << std::endl; + } + return length ? &calib_cache_[0] : nullptr; + } + + void writeCalibrationCache(const void * cache, size_t length) noexcept override + { + std::ofstream output(calibration_cache_file_, std::ios::binary); + output.write(reinterpret_cast(cache), length); + } + +private: + ImageStream stream_; + const std::string calibration_cache_file_; + bool read_cache_{true}; + size_t input_count_; + void * device_input_{nullptr}; + std::vector calib_cache_; + std::vector hist_cache_; + double scale_; +}; + +/** + * @class Int8MinMaxCalibrator + * @brief Calibrator for MinMax + * @warning We strongly recommend MinMax calibrator for YOLOV10 + */ +class Int8MinMaxCalibrator : public nvinfer1::IInt8MinMaxCalibrator +{ +public: + Int8MinMaxCalibrator( + ImageStream & stream, const std::string calibration_cache_file, double scale = 1.0, + bool read_cache = true) + : stream_(stream), calibration_cache_file_(calibration_cache_file), read_cache_(read_cache) + { + auto d = stream_.getInputDims(); + input_count_ = stream_.getBatchSize() * d.d[1] * d.d[2] * d.d[3]; + CHECK_CUDA_ERROR(cudaMalloc(&device_input_, input_count_ * sizeof(float))); + scale_ = scale; + auto algType = getAlgorithm(); + switch (algType) { + case (nvinfer1::CalibrationAlgoType::kLEGACY_CALIBRATION): + std::cout << "CalibrationAlgoType : kLEGACY_CALIBRATION" << std::endl; + break; + case (nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION): + std::cout << "CalibrationAlgoType : kENTROPY_CALIBRATION" << std::endl; + break; + case (nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION_2): + std::cout << "CalibrationAlgoType : kENTROPY_CALIBRATION_2" << std::endl; + break; + case (nvinfer1::CalibrationAlgoType::kMINMAX_CALIBRATION): + std::cout << "CalibrationAlgoType : kMINMAX_CALIBRATION" << std::endl; + break; + default: + std::cout << "No CalibrationAlgType" << std::endl; + break; + } + } + int getBatchSize() const noexcept override { return stream_.getBatchSize(); } + + virtual ~Int8MinMaxCalibrator() { CHECK_CUDA_ERROR(cudaFree(device_input_)); } + + bool getBatch(void * bindings[], const char * names[], int nb_bindings) noexcept override + { + (void)names; + (void)nb_bindings; + + if (!stream_.next(scale_)) { + return false; + } + try { + CHECK_CUDA_ERROR(cudaMemcpy( + device_input_, stream_.getBatch(), input_count_ * sizeof(float), cudaMemcpyHostToDevice)); + } catch (const std::exception & e) { + // Do nothing + } + bindings[0] = device_input_; + return true; + } + + const void * readCalibrationCache(size_t & length) noexcept override + { + calib_cache_.clear(); + std::ifstream input(calibration_cache_file_, std::ios::binary); + input >> std::noskipws; + if (read_cache_ && input.good()) { + std::copy( + std::istream_iterator(input), std::istream_iterator(), + std::back_inserter(calib_cache_)); + } + + length = calib_cache_.size(); + if (length) { + std::cout << "Using cached calibration table to build the engine" << std::endl; + } else { + std::cout << "New calibration table will be created to build the engine" << std::endl; + } + return length ? &calib_cache_[0] : nullptr; + } + + void writeCalibrationCache(const void * cache, size_t length) noexcept override + { + std::ofstream output(calibration_cache_file_, std::ios::binary); + output.write(reinterpret_cast(cache), length); + } + +private: + ImageStream stream_; + const std::string calibration_cache_file_; + bool read_cache_{true}; + size_t input_count_; + void * device_input_{nullptr}; + std::vector calib_cache_; + std::vector hist_cache_; + double scale_; +}; +} // namespace autoware::tensorrt_yolov10 + +#endif // AUTOWARE__TENSORRT_YOLOV10__CALIBRATOR_HPP_ diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/preprocess.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/preprocess.hpp new file mode 100644 index 0000000000000..7971fbd815deb --- /dev/null +++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/preprocess.hpp @@ -0,0 +1,202 @@ +// Copyright 2023 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__TENSORRT_YOLOX__PREPROCESS_HPP_ +#define AUTOWARE__TENSORRT_YOLOX__PREPROCESS_HPP_ + +#include +#include +#include +#include +#include + +namespace autoware +{ +namespace tensorrt_yolox +{ +struct Roi +{ + int x; + int y; + int w; + int h; +}; + +/** + * @brief Resize a image using bilinear interpolation on gpus + * @param[out] dst Resized image + * @param[in] src image + * @param[in] d_w width for resized image + * @param[in] d_h height for resized image + * @param[in] d_c channel for resized image + * @param[in] s_w width for input image + * @param[in] s_h height for input image + * @param[in] s_c channel for input image + * @param[in] stream cuda stream + */ +extern void resize_bilinear_gpu( + unsigned char * dst, unsigned char * src, int d_w, int d_h, int d_c, int s_w, int s_h, int s_c, + cudaStream_t stream); + +/** + * @brief Letterbox a image on gpus + * @param[out] dst letterbox-ed image + * @param[in] src image + * @param[in] d_w width for letterbox-ing + * @param[in] d_h height for letterbox-ing + * @param[in] d_c channel for letterbox-ing + * @param[in] s_w width for input image + * @param[in] s_h height for input image + * @param[in] s_c channel for input image + * @param[in] stream cuda stream + */ +extern void letterbox_gpu( + unsigned char * dst, unsigned char * src, int d_w, int d_h, int d_c, int s_w, int s_h, int s_c, + cudaStream_t stream); + +/** + * @brief NHWC to NHWC conversion + * @param[out] dst converted image + * @param[in] src image + * @param[in] d_w width for a image + * @param[in] d_h height for a image + * @param[in] d_c channel for a image + * @param[in] stream cuda stream + */ +extern void nchw_to_nhwc_gpu( + unsigned char * dst, unsigned char * src, int d_w, int d_h, int d_c, cudaStream_t stream); + +/** + * @brief Unsigned char to float32 for inference + * @param[out] dst32 converted image + * @param[in] src image + * @param[in] d_w width for a image + * @param[in] d_h height for a image + * @param[in] d_c channel for a image + * @param[in] stream cuda stream + */ +extern void to_float_gpu( + float * dst32, unsigned char * src, int d_w, int d_h, int d_c, cudaStream_t stream); + +/** + * @brief Resize and letterbox a image using bilinear interpolation on gpus + * @param[out] dst processed image + * @param[in] src image + * @param[in] d_w width for output + * @param[in] d_h height for output + * @param[in] d_c channel for output + * @param[in] s_w width for input + * @param[in] s_h height for input + * @param[in] s_c channel for input + * @param[in] stream cuda stream + */ +extern void resize_bilinear_letterbox_gpu( + unsigned char * dst, unsigned char * src, int d_w, int d_h, int d_c, int s_w, int s_h, int s_c, + cudaStream_t stream); + +/** + * @brief Optimized preprocessing including resize, letterbox, nhwc2nchw, toFloat and normalization + * for YOLOX on gpus + * @param[out] dst processed image + * @param[in] src image + * @param[in] d_w width for output + * @param[in] d_h height for output + * @param[in] d_c channel for output + * @param[in] s_w width for input + * @param[in] s_h height for input + * @param[in] s_c channel for input + * @param[in] norm normalization + * @param[in] stream cuda stream + */ +extern void resize_bilinear_letterbox_nhwc_to_nchw32_gpu( + float * dst, unsigned char * src, int d_w, int d_h, int d_c, int s_w, int s_h, int s_c, + float norm, cudaStream_t stream); + +/** + * @brief Optimized preprocessing including resize, letterbox, nhwc2nchw, toFloat and normalization + * with batching for YOLOX on gpus + * @param[out] dst processed image + * @param[in] src image + * @param[in] d_w width for output + * @param[in] d_h height for output + * @param[in] d_c channel for output + * @param[in] s_w width for input + * @param[in] s_h height for input + * @param[in] s_c channel for input + * @param[in] batch batch size + * @param[in] norm normalization + * @param[in] stream cuda stream + */ +extern void resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu( + float * dst, unsigned char * src, int d_w, int d_h, int d_c, int s_w, int s_h, int s_c, int batch, + float norm, cudaStream_t stream); + +/** + * @brief Optimized preprocessing including crop, resize, letterbox, nhwc2nchw, toFloat and + * normalization with batching for YOLOX on gpus + * @param[out] dst processed image + * @param[in] src image + * @param[in] d_w width for output + * @param[in] d_h height for output + * @param[in] d_c channel for output + * @param[in] s_w width for input + * @param[in] s_h height for input + * @param[in] s_c channel for input + * @param[in] d_roi regions of interest for cropping + * @param[in] batch batch size + * @param[in] norm normalization + * @param[in] stream cuda stream + */ +extern void crop_resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu( + float * dst, unsigned char * src, int d_w, int d_h, int d_c, Roi * d_roi, int s_w, int s_h, + int s_c, int batch, float norm, cudaStream_t stream); + +/** + * @brief Optimized multi-scale preprocessing including crop, resize, letterbox, nhwc2nchw, toFloat + * and normalization with batching for YOLOX on gpus + * @param[out] dst processed image + * @param[in] src image + * @param[in] d_w width for output + * @param[in] d_h height for output + * @param[in] d_c channel for output + * @param[in] s_w width for input + * @param[in] s_h height for input + * @param[in] s_c channel for input + * @param[in] d_roi regions of interest for cropping + * @param[in] batch batch size + * @param[in] norm normalization + * @param[in] stream cuda stream + */ +extern void multi_scale_resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu( + float * dst, unsigned char * src, int d_w, int d_h, int d_c, Roi * d_roi, int s_w, int s_h, + int s_c, int batch, float norm, cudaStream_t stream); + +/** + * @brief Argmax on GPU + * @param[out] dst processed image + * @param[in] src probability map + * @param[in] d_w width for output + * @param[in] d_h height for output + * @param[in] s_w width for input + * @param[in] s_h height for input + * @param[in] s_c channel for input + * @param[in] batch batch size + * @param[in] stream cuda stream + */ +extern void argmax_gpu( + unsigned char * dst, float * src, int d_w, int d_h, int s_w, int s_h, int s_c, int batch, + cudaStream_t stream); +} // namespace tensorrt_yolox +} // namespace autoware +#endif // AUTOWARE__TENSORRT_YOLOX__PREPROCESS_HPP_ diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp new file mode 100644 index 0000000000000..5d50d4e667ff0 --- /dev/null +++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp @@ -0,0 +1,135 @@ +#ifndef AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_HPP_ +#define AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_HPP_ + +#include +#include +#include +#include + +#include +#include +#include + + +namespace autoware::tensorrt_yolov10 +{ +using cuda_utils::CudaUniquePtr; +using cuda_utils::CudaUniquePtrHost; +using cuda_utils::makeCudaStream; +using cuda_utils::StreamUniquePtr; + +struct Object +{ + int32_t x_offset; + int32_t y_offset; + int32_t height; + int32_t width; + float score; + int32_t type; +}; + +using ObjectArray = std::vector; +using ObjectArrays = std::vector; + +class TrtYolov10 +{ +public: + TrtYolov10( + const std::string & model_path, + const std::string & precision, + const int num_class = 8, + const float score_threshold = 0.8, + const tensorrt_common::BuildConfig build_config = tensorrt_common::BuildConfig(), + const bool use_gpu_preprocess = false, + const uint8_t gpu_id = 0, + std::string calibration_image_list_file = std::string(), + const double norm_factor = 1.0, + [[maybe_unused]] const std::string & cache_dir = "", + const tensorrt_common::BatchConfig & batch_config = {1, 1, 1}, + const size_t max_workspace_size = (1 << 30)); + + ~TrtYolov10(); + + bool setCudaDeviceId(const uint8_t gpu_id); + + /** + * @brief return a flag for gpu initialization + */ + bool isGPUInitialized() const { return is_gpu_initialized_; } + + /** + * @brief run inference including pre-process and post-process + * @param[out] objects results for object detection + * @param[in] images batched images + */ + bool doInference( + const std::vector & images, ObjectArrays & objects); + +// /** +// * @brief allocate buffer for preprocess on GPU +// * @param[in] width original image width +// * @param[in] height original image height +// * @warning if we don't allocate buffers using it, "preprocessGpu" allocates buffers at the +// * beginning +// */ +// void initPreprocessBuffer(int width, int height); + +// /** +// * @brief output TensorRT profiles for each layer +// */ +// void printProfiling(void); + + void preProcess(cv::Mat* img, int length, float* factor, std::vector& data); + + void preprocess(const std::vector & images,std::vector& data); + ObjectArray postprocess(float* result,float factor); + +// /** +// * @brief run preprocess on GPU +// * @param[in] images batching images +// */ +// void preprocessGpu(const std::vector & images); + + bool feedforward(const std::vector & images, ObjectArrays & objects); + + std::unique_ptr trt_common_; + + std::vector input_h_; + CudaUniquePtr input_d_; + + size_t out_elem_num_; + size_t out_elem_num_per_batch_; + CudaUniquePtr out_d_; + CudaUniquePtrHost out_h_; + StreamUniquePtr stream_; + + int32_t max_detections_; + std::vector scales_; + + int num_class_; + float score_threshold_; + int batch_size_; + + // GPU id for inference + const uint8_t gpu_id_; + // flag for gpu initialization + bool is_gpu_initialized_; + // host buffer for preprocessing on GPU + CudaUniquePtrHost image_buf_h_; + // device buffer for preprocessing on GPU + CudaUniquePtr image_buf_d_; + + int src_width_; + int src_height_; + + CudaUniquePtr argmax_buf_d_; + double norm_factor_; + int multitask_; + bool use_gpu_preprocess_; + + float factor_=1.0; +}; + +} + +#endif \ No newline at end of file diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp new file mode 100644 index 0000000000000..cc09896f2e1cf --- /dev/null +++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp @@ -0,0 +1,72 @@ +// Copyright 2022 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_NODE_HPP_ +#define AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_NODE_HPP_ + +#include "object_recognition_utils/object_recognition_utils.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#if __has_include() +#include +#else +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + +namespace autoware::tensorrt_yolov10 +{ +// cspell: ignore Semseg +class TrtYolov10Node : public rclcpp::Node +{ +public: + explicit TrtYolov10Node(const rclcpp::NodeOptions & node_options); + +private: + void onConnect(); + void onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg); + + std::unique_ptr trt_yolov10_; + + image_transport::Subscriber image_sub_; + + rclcpp::TimerBase::SharedPtr timer_; + std::unique_ptr> stop_watch_ptr_; + std::unique_ptr debug_publisher_; + rclcpp::Publisher::SharedPtr objects_pub_; + image_transport::Publisher image_pub_; + +}; + +} // namespace autoware::tensorrt_yolov10 + +#endif // AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_NODE_HPP_ diff --git a/perception/autoware_tensorrt_yolov10/launch/yolov10.launch.xml b/perception/autoware_tensorrt_yolov10/launch/yolov10.launch.xml new file mode 100644 index 0000000000000..ce89da449f838 --- /dev/null +++ b/perception/autoware_tensorrt_yolov10/launch/yolov10.launch.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/perception/autoware_tensorrt_yolov10/launch/yolov10_single_image.launch.xml b/perception/autoware_tensorrt_yolov10/launch/yolov10_single_image.launch.xml new file mode 100644 index 0000000000000..de0baa1e5b090 --- /dev/null +++ b/perception/autoware_tensorrt_yolov10/launch/yolov10_single_image.launch.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff --git a/perception/autoware_tensorrt_yolov10/package.xml b/perception/autoware_tensorrt_yolov10/package.xml new file mode 100644 index 0000000000000..17fdf53999f76 --- /dev/null +++ b/perception/autoware_tensorrt_yolov10/package.xml @@ -0,0 +1,41 @@ + + + autoware_tensorrt_yolov10 + 0.0.1 + tensorrt library implementation for yolov10 + + Su Chang + Su Chang + + Apache License 2.0 + + ament_cmake_auto + autoware_cmake + cudnn_cmake_module + tensorrt_cmake_module + + cudnn_cmake_module + tensorrt_cmake_module + + autoware_perception_msgs + cuda_utils + cv_bridge + image_transport + libopencv-dev + object_recognition_utils + perception_utils + rclcpp + rclcpp_components + sensor_msgs + tensorrt_common + tier4_perception_msgs + + autoware_image_transport_decompressor + + ament_lint_auto + autoware_lint_common + + + ament_cmake + + diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp new file mode 100644 index 0000000000000..5b601a75beb69 --- /dev/null +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp @@ -0,0 +1,526 @@ +// Copyright 2023 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cuda_utils/cuda_check_error.hpp" +#include "cuda_utils/cuda_unique_ptr.hpp" + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PRINT_DEBUG_INFO printf("line:%d\n", __LINE__); + +namespace +{ +static void trimLeft(std::string & s) +{ + s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); })); +} + +static void trimRight(std::string & s) +{ + s.erase(find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end()); +} + +std::string trim(std::string & s) +{ + trimLeft(s); + trimRight(s); + return s; +} + +bool fileExists(const std::string & file_name, bool verbose) +{ + if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(file_name))) { + if (verbose) { + std::cout << "File does not exist : " << file_name << std::endl; + } + return false; + } + return true; +} + +std::vector loadListFromTextFile(const std::string & filename) +{ + assert(fileExists(filename, true)); + std::vector list; + + std::ifstream f(filename); + if (!f) { + std::cout << "failed to open " << filename << std::endl; + assert(0); + } + + std::string line; + while (std::getline(f, line)) { + if (line.empty()) { + continue; + } else { + list.push_back(trim(line)); + } + } + + return list; +} + +std::vector loadImageList(const std::string & filename, const std::string & prefix) +{ + std::vector fileList = loadListFromTextFile(filename); + for (auto & file : fileList) { + if (fileExists(file, false)) { + continue; + } else { + std::string prefixed = prefix + file; + if (fileExists(prefixed, false)) + file = prefixed; + else + std::cerr << "WARNING: couldn't find: " << prefixed << " while loading: " << filename + << std::endl; + } + } + return fileList; +} + +} // anonymous namespace + +namespace autoware::tensorrt_yolov10 +{ +TrtYolov10::TrtYolov10( + const std::string & model_path, + const std::string & precision, + const int num_class, + const float score_threshold, + const tensorrt_common::BuildConfig build_config, + const bool use_gpu_preprocess, + const uint8_t gpu_id, + std::string calibration_image_list_path, + const double norm_factor, + [[maybe_unused]] const std::string & cache_dir, + const tensorrt_common::BatchConfig & batch_config, + const size_t max_workspace_size + ) +: gpu_id_(gpu_id), is_gpu_initialized_(false) +{ + if (!setCudaDeviceId(gpu_id_)) { + return; + } + is_gpu_initialized_ = true; + + num_class_ = num_class; + score_threshold_ = score_threshold; + + src_width_ = -1; + src_height_ = -1; + norm_factor_ = norm_factor; + batch_size_ = batch_config[2]; + multitask_ = 0; + stream_ = makeCudaStream(); + + if (precision == "int8") { + if (build_config.clip_value <= 0.0) { + if (calibration_image_list_path.empty()) { + throw std::runtime_error( + "calibration_image_list_path should be passed to generate int8 engine " + "or specify values larger than zero to clip_value."); + } + } else { + // if clip value is larger than zero, calibration file is not needed + calibration_image_list_path = ""; + } + + int max_batch_size = batch_size_; + nvinfer1::Dims input_dims = tensorrt_common::get_input_dims(model_path); + std::vector calibration_images; + if (calibration_image_list_path != "") { + calibration_images = loadImageList(calibration_image_list_path, ""); + } + tensorrt_yolov10::ImageStream stream(max_batch_size, input_dims, calibration_images); + fs::path calibration_table{model_path}; + std::string ext = ""; + if (build_config.calib_type_str == "Entropy") { + ext = "EntropyV2-"; + } else if ( + build_config.calib_type_str == "Legacy" || build_config.calib_type_str == "Percentile") { + ext = "Legacy-"; + } else { + ext = "MinMax-"; + } + + ext += "calibration.table"; + calibration_table.replace_extension(ext); + fs::path histogram_table{model_path}; + ext = "histogram.table"; + histogram_table.replace_extension(ext); + + std::unique_ptr calibrator; + if (build_config.calib_type_str == "Entropy") { + calibrator.reset( + new tensorrt_yolov10::Int8EntropyCalibrator(stream, calibration_table, norm_factor_)); + + } else if ( + build_config.calib_type_str == "Legacy" || build_config.calib_type_str == "Percentile") { + const double quantile = 0.999999; + const double cutoff = 0.999999; + calibrator.reset(new tensorrt_yolov10::Int8LegacyCalibrator( + stream, calibration_table, histogram_table, norm_factor_, true, quantile, cutoff)); + } else { + calibrator.reset( + new tensorrt_yolov10::Int8MinMaxCalibrator(stream, calibration_table, norm_factor_)); + } + trt_common_ = std::make_unique( + model_path, precision, std::move(calibrator), batch_config, max_workspace_size, build_config); + } else { + trt_common_ = std::make_unique( + model_path, precision, nullptr, batch_config, max_workspace_size, build_config); + } + trt_common_->setup(); + + if (!trt_common_->isInitialized()) { + return; + } + num_class_ = num_class; + score_threshold_ = score_threshold; + + // GPU memory allocation + const auto input_dims = trt_common_->getBindingDimensions(0); + const auto input_size = + std::accumulate(input_dims.d + 1, input_dims.d + input_dims.nbDims, 1, std::multiplies()); + + const auto output_dims = trt_common_->getBindingDimensions(1); + input_d_ = cuda_utils::make_unique(batch_config[2] * input_size); + max_detections_ = output_dims.d[1]; + out_elem_num_ = std::accumulate( + output_dims.d + 1, output_dims.d + output_dims.nbDims, 1, std::multiplies()); + out_elem_num_ = out_elem_num_ * batch_config[2]; + out_elem_num_per_batch_ = static_cast(out_elem_num_ / batch_config[2]); + out_d_ = cuda_utils::make_unique(out_elem_num_); + out_h_ = cuda_utils::make_unique_host(out_elem_num_, cudaHostAllocPortable); + + if (use_gpu_preprocess) { + use_gpu_preprocess_ = true; + image_buf_h_ = nullptr; + image_buf_d_ = nullptr; + } else { + use_gpu_preprocess_ = false; + } +} + +TrtYolov10::~TrtYolov10() +{ + if (use_gpu_preprocess_) { + if (image_buf_h_) { + image_buf_h_.reset(); + } + if (image_buf_d_) { + image_buf_d_.reset(); + } + if (argmax_buf_d_) { + argmax_buf_d_.reset(); + } + } +} + +bool TrtYolov10::setCudaDeviceId(const uint8_t gpu_id) +{ + cudaError_t cuda_err = cudaSetDevice(gpu_id); + if (cuda_err != cudaSuccess) { + return false; + } else { + return true; + } +} + +// void TrtYolov10::initPreprocessBuffer(int width, int height) +// { +// // if size of source input has been changed... +// if (src_width_ != -1 || src_height_ != -1) { +// if (width != src_width_ || height != src_height_) { +// // Free cuda memory to reallocate +// if (image_buf_h_) { +// image_buf_h_.reset(); +// } +// if (image_buf_d_) { +// image_buf_d_.reset(); +// } +// } +// } +// src_width_ = width; +// src_height_ = height; +// if (use_gpu_preprocess_) { +// auto input_dims = trt_common_->getBindingDimensions(0); +// bool const hasRuntimeDim = std::any_of( +// input_dims.d, input_dims.d + input_dims.nbDims, +// [](int32_t input_dim) { return input_dim == -1; }); +// if (hasRuntimeDim) { +// input_dims.d[0] = batch_size_; +// } +// if (!image_buf_h_) { +// trt_common_->setBindingDimensions(0, input_dims); +// scales_.clear(); +// } +// const float input_height = static_cast(input_dims.d[2]); +// const float input_width = static_cast(input_dims.d[3]); +// if (!image_buf_h_) { +// const float scale = std::min(input_width / width, input_height / height); +// for (int b = 0; b < batch_size_; b++) { +// scales_.emplace_back(scale); +// } +// image_buf_h_ = cuda_utils::make_unique_host( +// width * height * 3 * batch_size_, cudaHostAllocWriteCombined); +// image_buf_d_ = cuda_utils::make_unique(width * height * 3 * batch_size_); +// } +// if (multitask_) { +// size_t argmax_out_elem_num = 0; +// for (int m = 0; m < multitask_; m++) { +// const auto output_dims = +// trt_common_->getBindingDimensions(m + 2); // 0 : input, 1 : output for detections +// const float scale = std::min( +// output_dims.d[3] / static_cast(width), +// output_dims.d[2] / static_cast(height)); +// int out_w = static_cast(width * scale); +// int out_h = static_cast(height * scale); +// // size_t out_elem_num = std::accumulate( +// // output_dims.d + 1, output_dims.d + output_dims.nbDims, 1, std::multiplies()); +// // out_elem_num = out_elem_num * batch_size_; +// size_t out_elem_num = out_w * out_h * batch_size_; +// argmax_out_elem_num += out_elem_num; +// } +// argmax_buf_h_ = +// cuda_utils::make_unique_host(argmax_out_elem_num, +// cudaHostAllocPortable); +// argmax_buf_d_ = cuda_utils::make_unique(argmax_out_elem_num); +// } +// } +// } + +// void TrtYolov10::printProfiling(void) +// { +// trt_common_->printProfiling(); +// } + +void TrtYolov10::preProcess(cv::Mat * img, int length, float * factor, std::vector & data) +{ + // Create a new cv::Mat object for storing the image after conversion + cv::Mat mat; + + // Get the dimensions and number of channels of the input image + int rh = img->rows; // Height of the input image + int rw = img->cols; // Width of the input image + int rc = img->channels(); // Number of channels (e.g., 3 for RGB) + + // Convert the input image from BGR to RGB color space + cv::cvtColor(*img, mat, cv::COLOR_BGR2RGB); + + // Determine the size of the new square image (largest dimension of the input image) + int maxImageLength = rw > rh ? rw : rh; + + // Create a new square image filled with zeros (black) with dimensions maxImageLength x + // maxImageLength + cv::Mat maxImage = cv::Mat::zeros(maxImageLength, maxImageLength, CV_8UC3); + + // Set all pixels to 255 (white) + maxImage = maxImage * 255; + + // Define a Region of Interest (ROI) that covers the entire original image + cv::Rect roi(0, 0, rw, rh); + + // Copy the original image into the ROI of the new square image + mat.copyTo(cv::Mat(maxImage, roi)); + + // Create a new cv::Mat object for storing the resized image + cv::Mat resizeImg; + + // Resize the square image to the specified dimensions (length x length) + cv::resize(maxImage, resizeImg, cv::Size(length, length), 0.0f, 0.0f, cv::INTER_LINEAR); + + // Calculate the scaling factor and store it in the 'factor' variable + *factor = (float)((float)maxImageLength / (float)length); + + // Convert the resized image to floating-point format with values in range [0, 1] + resizeImg.convertTo(resizeImg, CV_32FC3, 1 / 255.0); + + // Update the height, width, and number of channels for the resized image + rh = resizeImg.rows; + rw = resizeImg.cols; + rc = resizeImg.channels(); + + // Extract each channel of the resized image and store it in the 'data' vector + for (int i = 0; i < rc; ++i) { + // Extract the i-th channel and store it in the appropriate part of the 'data' vector + cv::extractChannel(resizeImg, cv::Mat(rh, rw, CV_32FC1, data.data() + i * rh * rw), i); + } +} + +void TrtYolov10::preprocess(const std::vector & images, std::vector & inputData) +{ + for (cv::Mat image : images) { + int length = 640; // 模型输入的大小 + factor_ = 1.0; + + preProcess(&image, length, &factor_, inputData); + + + // check sum + float sum = std::accumulate(inputData.begin(), inputData.end(), 0.0f); + printf("sum=%.2f\n", sum); + } +} + +// void TrtYolov10::preprocess(const std::vector & images) +// { +// const auto batch_size = images.size(); +// auto input_dims = trt_common_->getBindingDimensions(0); +// input_dims.d[0] = batch_size; +// trt_common_->setBindingDimensions(0, input_dims); +// const float input_height = static_cast(input_dims.d[2]); +// const float input_width = static_cast(input_dims.d[3]); +// std::vector dst_images; +// scales_.clear(); +// for (const auto & image : images) { +// cv::Mat dst_image; +// const float scale = std::min(input_width / image.cols, input_height / image.rows); +// scales_.emplace_back(scale); +// const auto scale_size = cv::Size(image.cols * scale, image.rows * scale); +// cv::resize(image, dst_image, scale_size, 0, 0, cv::INTER_CUBIC); +// const auto bottom = input_height - dst_image.rows; +// const auto right = input_width - dst_image.cols; +// copyMakeBorder(dst_image, dst_image, 0, bottom, 0, right, cv::BORDER_CONSTANT, {114, 114, +// 114}); dst_images.emplace_back(dst_image); +// } +// const auto chw_images = cv::dnn::blobFromImages( +// dst_images, norm_factor_, cv::Size(), cv::Scalar(), false, false, CV_32F); + +// const auto data_length = chw_images.total(); +// input_h_.reserve(data_length); +// const auto flat = chw_images.reshape(1, data_length); +// input_h_ = chw_images.isContinuous() ? flat : flat.clone(); +// CHECK_CUDA_ERROR(cudaMemcpy( +// input_d_.get(), input_h_.data(), input_h_.size() * sizeof(float), cudaMemcpyHostToDevice)); +// // No Need for Sync +// } + +bool TrtYolov10::doInference(const std::vector & images, ObjectArrays & objects) +{ + if (!setCudaDeviceId(gpu_id_)) { + return false; + } + + if (!trt_common_->isInitialized()) { + return false; + } + + std::vector input_data(640 * 640 * 3); + preprocess(images, input_data); + CHECK_CUDA_ERROR( + cudaMemcpy(input_d_.get(), input_data.data(), 3 * 640 * 640 * sizeof(float), cudaMemcpyHostToDevice)); + + // preprocess(images); + + return feedforward(images, objects); +} + +bool TrtYolov10::feedforward(const std::vector & images, ObjectArrays & objects) +{ +// PRINT_DEBUG_INFO + std::vector buffers = {input_d_.get(), out_d_.get()}; + + trt_common_->enqueueV2(buffers.data(), *stream_, nullptr); + +// printf("out_elem_num_:%d\n", (int)out_elem_num_); + CHECK_CUDA_ERROR(cudaMemcpyAsync( + out_h_.get(), out_d_.get(), sizeof(float) * out_elem_num_, cudaMemcpyDeviceToHost, *stream_)); + + cudaStreamSynchronize(*stream_); + objects.clear(); + + const auto batch_size = images.size(); + for (size_t i = 0; i < batch_size; ++i) { + // auto image_size = images[i].size(); + float * batch_prob = out_h_.get() + (i * out_elem_num_per_batch_); + + ObjectArray object_array = postprocess(batch_prob,factor_); + objects.emplace_back(object_array); + } + + return true; +} + +ObjectArray TrtYolov10::postprocess(float * result, float factor) +{ + ObjectArray object_array; + std::vector positionBoxes; // Stores bounding boxes of detected objects + std::vector classIds; // Stores class IDs for detected objects + std::vector confidences; // Stores confidence scores for detected objects + + // yolov10m out: 1 x 300 x 6 + for (int i = 0; i < max_detections_; i++) { + int s = 6 * i; + + float score = result[s + 4]; + // printf("i:%d,score:%.3f\n",i,(float)result[s + 4]); + + if (score > score_threshold_) { + printf("i:%d,score:%.3f\n", i, (float)result[s + 4]); + + // Extract the coordinates and dimensions of the bounding box (normalized values) + float cx = result[s + 0]; // Center x-coordinate + float cy = result[s + 1]; // Center y-coordinate + float dx = result[s + 2]; // Bottom-right x-coordinate + float dy = result[s + 3]; // Bottom-right y-coordinate + + // Convert normalized coordinates and dimensions to pixel values using the scaling factor + int x = (int)((cx)*factor); // Top-left x-coordinate of the bounding box + int y = (int)((cy)*factor); // Top-left y-coordinate of the bounding box + int width = (int)((dx - cx) * factor); // Width of the bounding box + int height = (int)((dy - cy) * factor); // Height of the bounding box + + // Create a cv::Rect object to represent the bounding box + cv::Rect box(x, y, width, height); + + // Store the bounding box, class ID, and confidence score in the corresponding vectors + positionBoxes.push_back(box); + classIds.push_back( + (int)result[s + 5]); // Class ID is stored at position s + 5 in the 'result' array + confidences.push_back(( + float)result[s + 4]); // Confidence score is stored at position s + 4 in the 'result' array + + Object object; + object.x_offset = x; + object.y_offset = y; + object.width = width; + object.height = height; + object.score = score; + object.type = int(result[s + 5]); + object_array.emplace_back(object); + } + } + + return object_array; +} + +} // namespace autoware::tensorrt_yolov10 diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp new file mode 100644 index 0000000000000..d60e452f57df1 --- /dev/null +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp @@ -0,0 +1,163 @@ +// Copyright 2022 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp" + +#include "object_recognition_utils/object_classification.hpp" +#include "perception_utils/run_length_encoder.hpp" + +#include + +#include +#include +#include +#include +#include + +#define PRINT_DEBUG_INFO printf("line:%d\n",__LINE__); + + +namespace autoware::tensorrt_yolov10 +{ +TrtYolov10Node::TrtYolov10Node(const rclcpp::NodeOptions & node_options) +: Node("tensorrt_yolov10", node_options) +{ + { + stop_watch_ptr_ = + std::make_unique>(); + debug_publisher_ = + std::make_unique(this, this->get_name()); + stop_watch_ptr_->tic("cyclic_time"); + stop_watch_ptr_->tic("processing_time"); + } + using std::placeholders::_1; + using std::chrono_literals::operator""ms; + + const std::string model_path = this->declare_parameter("model_path"); +// const std::string label_path = this->declare_parameter("label_path"); + const std::string precision = this->declare_parameter("precision","fp16"); +// const float score_threshold = +// static_cast(this->declare_parameter("score_threshold")); +// const int dla_core_id = this->declare_parameter("dla_core_id"); + const uint8_t gpu_id = this->declare_parameter("gpu_id",0); + +// const double norm_factor = 1.0; + const std::string cache_dir = ""; +// const tensorrt_common::BatchConfig batch_config{1, 1, 1}; +// const size_t max_workspace_size = (1 << 30); + + trt_yolov10_ = std::make_unique(model_path, precision); + + if (!trt_yolov10_->isGPUInitialized()) { + RCLCPP_ERROR(this->get_logger(), "GPU %d does not exist or is not suitable.", gpu_id); + rclcpp::shutdown(); + return; + } + RCLCPP_INFO(this->get_logger(), "GPU %d is selected for the inference!", gpu_id); + + timer_ = + rclcpp::create_timer(this, get_clock(), 100ms, std::bind(&TrtYolov10Node::onConnect, this)); + + objects_pub_ = this->create_publisher( + "~/out/objects", 1); + image_pub_ = image_transport::create_publisher(this, "~/out/image"); +} + +void TrtYolov10Node::onConnect() +{ + using std::placeholders::_1; + if ( + objects_pub_->get_subscription_count() == 0 && + objects_pub_->get_intra_process_subscription_count() == 0 && + image_pub_.getNumSubscribers() == 0 ) + { + // printf("no suber who sub from objects_pub_ or image_pub_,shut down image_sub_\n"); + image_sub_.shutdown(); + } + else if (!image_sub_) + { + image_sub_ = image_transport::create_subscription( + this, "~/in/image", std::bind(&TrtYolov10Node::onImage, this, _1), "raw", + rmw_qos_profile_sensor_data); + } +} + +void TrtYolov10Node::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) +{ + stop_watch_ptr_->toc("processing_time", true); + tier4_perception_msgs::msg::DetectedObjectsWithFeature out_objects; + + cv_bridge::CvImagePtr in_image_ptr; + try { + in_image_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::BGR8); + } catch (cv_bridge::Exception & e) { + RCLCPP_ERROR(this->get_logger(), "cv_bridge exception: %s", e.what()); + return; + } + const auto width = in_image_ptr->image.cols; + const auto height = in_image_ptr->image.rows; + + tensorrt_yolov10::ObjectArrays objects; + + if (!trt_yolov10_->doInference({in_image_ptr->image}, objects)) { + RCLCPP_WARN(this->get_logger(), "Fail to inference"); + return; + } + + for (const auto & yolov10_object : objects.at(0)) { + tier4_perception_msgs::msg::DetectedObjectWithFeature object; + object.feature.roi.x_offset = yolov10_object.x_offset; + object.feature.roi.y_offset = yolov10_object.y_offset; + object.feature.roi.width = yolov10_object.width; + object.feature.roi.height = yolov10_object.height; + object.object.existence_probability = yolov10_object.score; + // object.object.classification = object_recognition_utils::toObjectClassifications(label_map_[yolov10_object.type], 1.0f); + out_objects.feature_objects.push_back(object); + const auto left = std::max(0, static_cast(object.feature.roi.x_offset)); + const auto top = std::max(0, static_cast(object.feature.roi.y_offset)); + const auto right = + std::min(static_cast(object.feature.roi.x_offset + object.feature.roi.width), width); + const auto bottom = + std::min(static_cast(object.feature.roi.y_offset + object.feature.roi.height), height); + cv::rectangle( + in_image_ptr->image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3, + 8, 0); + + } + + image_pub_.publish(in_image_ptr->toImageMsg()); + out_objects.header = msg->header; + objects_pub_->publish(out_objects); + + if (debug_publisher_) { + const double processing_time_ms = stop_watch_ptr_->toc("processing_time", true); + const double cyclic_time_ms = stop_watch_ptr_->toc("cyclic_time", true); + const double pipeline_latency_ms = + std::chrono::duration( + std::chrono::nanoseconds( + (this->get_clock()->now() - out_objects.header.stamp).nanoseconds())) + .count(); + debug_publisher_->publish( + "debug/cyclic_time_ms", cyclic_time_ms); + debug_publisher_->publish( + "debug/processing_time_ms", processing_time_ms); + debug_publisher_->publish( + "debug/pipeline_latency_ms", pipeline_latency_ms); + } +} + +} // namespace autoware::tensorrt_yolov10 + +#include "rclcpp_components/register_node_macro.hpp" +RCLCPP_COMPONENTS_REGISTER_NODE(autoware::tensorrt_yolov10::TrtYolov10Node) diff --git a/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp b/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp new file mode 100644 index 0000000000000..e1ecd2c14e7a4 --- /dev/null +++ b/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp @@ -0,0 +1,79 @@ +// Copyright 2022 Tier IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#if (defined(_MSC_VER) or (defined(__GNUC__) and (7 <= __GNUC_MAJOR__))) +#include +namespace fs = ::std::filesystem; +#else +#include +namespace fs = ::std::experimental::filesystem; +#endif + +#include +#include + +#define PRINT_DEBUG_INFO printf("line:%d\n", __LINE__); + +namespace autoware::tensorrt_yolov10 +{ +class Yolov10SingleImageInferenceNode : public rclcpp::Node +{ +public: + explicit Yolov10SingleImageInferenceNode(const rclcpp::NodeOptions & node_options) + : Node("yolov10_single_image_inference", node_options) + { + const auto image_path = declare_parameter("image_path", ""); + const auto model_path = declare_parameter("model_path", ""); + const auto precision = declare_parameter("precision", "fp32"); + const auto save_image = declare_parameter("save_image", true); + + printf("image_path:%s\n", image_path.c_str()); + auto p = fs::path(image_path); + const auto ext = p.extension().string(); + p.replace_extension(""); + const auto output_image_path = + declare_parameter("output_image_path", p.string() + "_detect" + ext); + + printf("model_path:%s,output_image_path:%s\n", model_path.c_str(), output_image_path.c_str()); + auto trt_yolov10 = std::make_unique(model_path, precision); + auto image = cv::imread(image_path); + tensorrt_yolov10::ObjectArrays objects; + // // std::vector masks; + // // std::vector color_masks; + trt_yolov10->doInference({image}, objects); + for (const auto & object : objects[0]) { + + const auto left = object.x_offset; + const auto top = object.y_offset; + const auto right = std::clamp(left + object.width, 0, image.cols); + const auto bottom = std::clamp(top + object.height, 0, image.rows); + cv::rectangle( + image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3, 8, 0); + } + if (!save_image) { + cv::imshow("inference image", image); + cv::waitKey(0); + rclcpp::shutdown(); + } + cv::imwrite(output_image_path, image); + rclcpp::shutdown(); + } +}; +} // namespace autoware::tensorrt_yolov10 + +#include "rclcpp_components/register_node_macro.hpp" +RCLCPP_COMPONENTS_REGISTER_NODE(autoware::tensorrt_yolov10::Yolov10SingleImageInferenceNode) From be204c76a223582685087f3482d9778a010b35f7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 30 Oct 2024 09:12:12 +0000 Subject: [PATCH 02/23] style(pre-commit): autofix --- .../autoware_tensorrt_yolov10/README.md | 1 - .../autoware/tensorrt_yolov10/preprocess.hpp | 6 +- .../tensorrt_yolov10/tensorrt_yolov10.hpp | 74 +++++++++---------- .../tensorrt_yolov10_node.hpp | 1 - .../launch/yolov10_single_image.launch.xml | 1 - .../src/tensorrt_yolov10.cpp | 31 +++----- .../src/tensorrt_yolov10_node.cpp | 34 ++++----- .../yolov10_single_image_inference_node.cpp | 1 - 8 files changed, 63 insertions(+), 86 deletions(-) diff --git a/perception/autoware_tensorrt_yolov10/README.md b/perception/autoware_tensorrt_yolov10/README.md index 2f7006c1c86fe..adda6cebea0f2 100644 --- a/perception/autoware_tensorrt_yolov10/README.md +++ b/perception/autoware_tensorrt_yolov10/README.md @@ -1,2 +1 @@ # autoware_tensorrt_yolov10 - diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/preprocess.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/preprocess.hpp index 7971fbd815deb..e1e47edc9d996 100644 --- a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/preprocess.hpp +++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/preprocess.hpp @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef AUTOWARE__TENSORRT_YOLOX__PREPROCESS_HPP_ -#define AUTOWARE__TENSORRT_YOLOX__PREPROCESS_HPP_ +#ifndef AUTOWARE__TENSORRT_YOLOV10__PREPROCESS_HPP_ +#define AUTOWARE__TENSORRT_YOLOV10__PREPROCESS_HPP_ #include #include @@ -199,4 +199,4 @@ extern void argmax_gpu( cudaStream_t stream); } // namespace tensorrt_yolox } // namespace autoware -#endif // AUTOWARE__TENSORRT_YOLOX__PREPROCESS_HPP_ +#endif // AUTOWARE__TENSORRT_YOLOV10__PREPROCESS_HPP_ diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp index 5d50d4e667ff0..0c75d24489c02 100644 --- a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp +++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp @@ -10,7 +10,6 @@ #include #include - namespace autoware::tensorrt_yolov10 { using cuda_utils::CudaUniquePtr; @@ -35,15 +34,11 @@ class TrtYolov10 { public: TrtYolov10( - const std::string & model_path, - const std::string & precision, - const int num_class = 8, - const float score_threshold = 0.8, + const std::string & model_path, const std::string & precision, const int num_class = 8, + const float score_threshold = 0.8, const tensorrt_common::BuildConfig build_config = tensorrt_common::BuildConfig(), - const bool use_gpu_preprocess = false, - const uint8_t gpu_id = 0, - std::string calibration_image_list_file = std::string(), - const double norm_factor = 1.0, + const bool use_gpu_preprocess = false, const uint8_t gpu_id = 0, + std::string calibration_image_list_file = std::string(), const double norm_factor = 1.0, [[maybe_unused]] const std::string & cache_dir = "", const tensorrt_common::BatchConfig & batch_config = {1, 1, 1}, const size_t max_workspace_size = (1 << 30)); @@ -62,33 +57,32 @@ class TrtYolov10 * @param[out] objects results for object detection * @param[in] images batched images */ - bool doInference( - const std::vector & images, ObjectArrays & objects); - -// /** -// * @brief allocate buffer for preprocess on GPU -// * @param[in] width original image width -// * @param[in] height original image height -// * @warning if we don't allocate buffers using it, "preprocessGpu" allocates buffers at the -// * beginning -// */ -// void initPreprocessBuffer(int width, int height); - -// /** -// * @brief output TensorRT profiles for each layer -// */ -// void printProfiling(void); - - void preProcess(cv::Mat* img, int length, float* factor, std::vector& data); - - void preprocess(const std::vector & images,std::vector& data); - ObjectArray postprocess(float* result,float factor); - -// /** -// * @brief run preprocess on GPU -// * @param[in] images batching images -// */ -// void preprocessGpu(const std::vector & images); + bool doInference(const std::vector & images, ObjectArrays & objects); + + // /** + // * @brief allocate buffer for preprocess on GPU + // * @param[in] width original image width + // * @param[in] height original image height + // * @warning if we don't allocate buffers using it, "preprocessGpu" allocates buffers at the + // * beginning + // */ + // void initPreprocessBuffer(int width, int height); + + // /** + // * @brief output TensorRT profiles for each layer + // */ + // void printProfiling(void); + + void preProcess(cv::Mat * img, int length, float * factor, std::vector & data); + + void preprocess(const std::vector & images, std::vector & data); + ObjectArray postprocess(float * result, float factor); + + // /** + // * @brief run preprocess on GPU + // * @param[in] images batching images + // */ + // void preprocessGpu(const std::vector & images); bool feedforward(const std::vector & images, ObjectArrays & objects); @@ -109,7 +103,7 @@ class TrtYolov10 int num_class_; float score_threshold_; int batch_size_; - + // GPU id for inference const uint8_t gpu_id_; // flag for gpu initialization @@ -127,9 +121,9 @@ class TrtYolov10 int multitask_; bool use_gpu_preprocess_; - float factor_=1.0; + float factor_ = 1.0; }; -} +} // namespace autoware::tensorrt_yolov10 -#endif \ No newline at end of file +#endif // AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_HPP_ diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp index cc09896f2e1cf..1a4b2244e5719 100644 --- a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp +++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp @@ -64,7 +64,6 @@ class TrtYolov10Node : public rclcpp::Node std::unique_ptr debug_publisher_; rclcpp::Publisher::SharedPtr objects_pub_; image_transport::Publisher image_pub_; - }; } // namespace autoware::tensorrt_yolov10 diff --git a/perception/autoware_tensorrt_yolov10/launch/yolov10_single_image.launch.xml b/perception/autoware_tensorrt_yolov10/launch/yolov10_single_image.launch.xml index de0baa1e5b090..fcaa192521131 100644 --- a/perception/autoware_tensorrt_yolov10/launch/yolov10_single_image.launch.xml +++ b/perception/autoware_tensorrt_yolov10/launch/yolov10_single_image.launch.xml @@ -6,7 +6,6 @@ - diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp index 5b601a75beb69..89f1fe2f7772e 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp @@ -111,19 +111,11 @@ std::vector loadImageList(const std::string & filename, const std:: namespace autoware::tensorrt_yolov10 { TrtYolov10::TrtYolov10( - const std::string & model_path, - const std::string & precision, - const int num_class, - const float score_threshold, - const tensorrt_common::BuildConfig build_config, - const bool use_gpu_preprocess, - const uint8_t gpu_id, - std::string calibration_image_list_path, - const double norm_factor, - [[maybe_unused]] const std::string & cache_dir, - const tensorrt_common::BatchConfig & batch_config, - const size_t max_workspace_size - ) + const std::string & model_path, const std::string & precision, const int num_class, + const float score_threshold, const tensorrt_common::BuildConfig build_config, + const bool use_gpu_preprocess, const uint8_t gpu_id, std::string calibration_image_list_path, + const double norm_factor, [[maybe_unused]] const std::string & cache_dir, + const tensorrt_common::BatchConfig & batch_config, const size_t max_workspace_size) : gpu_id_(gpu_id), is_gpu_initialized_(false) { if (!setCudaDeviceId(gpu_id_)) { @@ -383,7 +375,6 @@ void TrtYolov10::preprocess(const std::vector & images, std::vector & images, ObjectArrays & std::vector input_data(640 * 640 * 3); preprocess(images, input_data); - CHECK_CUDA_ERROR( - cudaMemcpy(input_d_.get(), input_data.data(), 3 * 640 * 640 * sizeof(float), cudaMemcpyHostToDevice)); + CHECK_CUDA_ERROR(cudaMemcpy( + input_d_.get(), input_data.data(), 3 * 640 * 640 * sizeof(float), cudaMemcpyHostToDevice)); - // preprocess(images); + // preprocess(images); return feedforward(images, objects); } bool TrtYolov10::feedforward(const std::vector & images, ObjectArrays & objects) { -// PRINT_DEBUG_INFO + // PRINT_DEBUG_INFO std::vector buffers = {input_d_.get(), out_d_.get()}; trt_common_->enqueueV2(buffers.data(), *stream_, nullptr); -// printf("out_elem_num_:%d\n", (int)out_elem_num_); + // printf("out_elem_num_:%d\n", (int)out_elem_num_); CHECK_CUDA_ERROR(cudaMemcpyAsync( out_h_.get(), out_d_.get(), sizeof(float) * out_elem_num_, cudaMemcpyDeviceToHost, *stream_)); @@ -463,7 +454,7 @@ bool TrtYolov10::feedforward(const std::vector & images, ObjectArrays & // auto image_size = images[i].size(); float * batch_prob = out_h_.get() + (i * out_elem_num_per_batch_); - ObjectArray object_array = postprocess(batch_prob,factor_); + ObjectArray object_array = postprocess(batch_prob, factor_); objects.emplace_back(object_array); } diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp index d60e452f57df1..5e51356a9be3c 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp @@ -25,8 +25,7 @@ #include #include -#define PRINT_DEBUG_INFO printf("line:%d\n",__LINE__); - +#define PRINT_DEBUG_INFO printf("line:%d\n", __LINE__); namespace autoware::tensorrt_yolov10 { @@ -45,17 +44,17 @@ TrtYolov10Node::TrtYolov10Node(const rclcpp::NodeOptions & node_options) using std::chrono_literals::operator""ms; const std::string model_path = this->declare_parameter("model_path"); -// const std::string label_path = this->declare_parameter("label_path"); - const std::string precision = this->declare_parameter("precision","fp16"); -// const float score_threshold = -// static_cast(this->declare_parameter("score_threshold")); -// const int dla_core_id = this->declare_parameter("dla_core_id"); - const uint8_t gpu_id = this->declare_parameter("gpu_id",0); - -// const double norm_factor = 1.0; + // const std::string label_path = this->declare_parameter("label_path"); + const std::string precision = this->declare_parameter("precision", "fp16"); + // const float score_threshold = + // static_cast(this->declare_parameter("score_threshold")); + // const int dla_core_id = this->declare_parameter("dla_core_id"); + const uint8_t gpu_id = this->declare_parameter("gpu_id", 0); + + // const double norm_factor = 1.0; const std::string cache_dir = ""; -// const tensorrt_common::BatchConfig batch_config{1, 1, 1}; -// const size_t max_workspace_size = (1 << 30); + // const tensorrt_common::BatchConfig batch_config{1, 1, 1}; + // const size_t max_workspace_size = (1 << 30); trt_yolov10_ = std::make_unique(model_path, precision); @@ -80,13 +79,10 @@ void TrtYolov10Node::onConnect() if ( objects_pub_->get_subscription_count() == 0 && objects_pub_->get_intra_process_subscription_count() == 0 && - image_pub_.getNumSubscribers() == 0 ) - { + image_pub_.getNumSubscribers() == 0) { // printf("no suber who sub from objects_pub_ or image_pub_,shut down image_sub_\n"); image_sub_.shutdown(); - } - else if (!image_sub_) - { + } else if (!image_sub_) { image_sub_ = image_transport::create_subscription( this, "~/in/image", std::bind(&TrtYolov10Node::onImage, this, _1), "raw", rmw_qos_profile_sensor_data); @@ -122,7 +118,8 @@ void TrtYolov10Node::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) object.feature.roi.width = yolov10_object.width; object.feature.roi.height = yolov10_object.height; object.object.existence_probability = yolov10_object.score; - // object.object.classification = object_recognition_utils::toObjectClassifications(label_map_[yolov10_object.type], 1.0f); + // object.object.classification = + // object_recognition_utils::toObjectClassifications(label_map_[yolov10_object.type], 1.0f); out_objects.feature_objects.push_back(object); const auto left = std::max(0, static_cast(object.feature.roi.x_offset)); const auto top = std::max(0, static_cast(object.feature.roi.y_offset)); @@ -133,7 +130,6 @@ void TrtYolov10Node::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) cv::rectangle( in_image_ptr->image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3, 8, 0); - } image_pub_.publish(in_image_ptr->toImageMsg()); diff --git a/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp b/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp index e1ecd2c14e7a4..84ede1fde10f2 100644 --- a/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp +++ b/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp @@ -56,7 +56,6 @@ class Yolov10SingleImageInferenceNode : public rclcpp::Node // // std::vector color_masks; trt_yolov10->doInference({image}, objects); for (const auto & object : objects[0]) { - const auto left = object.x_offset; const auto top = object.y_offset; const auto right = std::clamp(left + object.width, 0, image.cols); From 763063eb2b744b42ebbc740fa94eec2a11d9629b Mon Sep 17 00:00:00 2001 From: suchang Date: Wed, 30 Oct 2024 17:40:57 +0800 Subject: [PATCH 03/23] feat: add yolov10 node Signed-off-by: suchang --- .../launch/yolov10_single_image.launch.xml | 6 ++---- .../autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/perception/autoware_tensorrt_yolov10/launch/yolov10_single_image.launch.xml b/perception/autoware_tensorrt_yolov10/launch/yolov10_single_image.launch.xml index de0baa1e5b090..53d6a3bb05429 100644 --- a/perception/autoware_tensorrt_yolov10/launch/yolov10_single_image.launch.xml +++ b/perception/autoware_tensorrt_yolov10/launch/yolov10_single_image.launch.xml @@ -1,10 +1,8 @@ - - - - + + diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp index 5b601a75beb69..24adc9117a5da 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp @@ -378,16 +378,16 @@ void TrtYolov10::preProcess(cv::Mat * img, int length, float * factor, std::vect void TrtYolov10::preprocess(const std::vector & images, std::vector & inputData) { + //todo: support multi images. now assume that infer only one image. for (cv::Mat image : images) { int length = 640; // 模型输入的大小 factor_ = 1.0; preProcess(&image, length, &factor_, inputData); - - // check sum - float sum = std::accumulate(inputData.begin(), inputData.end(), 0.0f); - printf("sum=%.2f\n", sum); + // // check sum + // float sum = std::accumulate(inputData.begin(), inputData.end(), 0.0f); + // printf("sum=%.2f\n", sum); } } From 032ac7f4371350a3cf4698e27b21b8f2376cbc82 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 30 Oct 2024 09:48:30 +0000 Subject: [PATCH 04/23] style(pre-commit): autofix --- perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp index 03697caf82a61..a9b4b2bc5b3c8 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp @@ -370,13 +370,13 @@ void TrtYolov10::preProcess(cv::Mat * img, int length, float * factor, std::vect void TrtYolov10::preprocess(const std::vector & images, std::vector & inputData) { - //todo: support multi images. now assume that infer only one image. + // todo: support multi images. now assume that infer only one image. for (cv::Mat image : images) { int length = 640; // 模型输入的大小 factor_ = 1.0; preProcess(&image, length, &factor_, inputData); - + // // check sum // float sum = std::accumulate(inputData.begin(), inputData.end(), 0.0f); // printf("sum=%.2f\n", sum); From b5bbede7c28a90de7f04afd152bf8e39012ab174 Mon Sep 17 00:00:00 2001 From: suchang Date: Thu, 31 Oct 2024 13:40:27 +0800 Subject: [PATCH 05/23] fix ci error Signed-off-by: suchang --- .../autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp | 3 +-- .../src/tensorrt_yolov10_node.cpp | 9 --------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp index 03697caf82a61..6f8bf0bf0abf0 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp @@ -323,7 +323,6 @@ void TrtYolov10::preProcess(cv::Mat * img, int length, float * factor, std::vect // Get the dimensions and number of channels of the input image int rh = img->rows; // Height of the input image int rw = img->cols; // Width of the input image - int rc = img->channels(); // Number of channels (e.g., 3 for RGB) // Convert the input image from BGR to RGB color space cv::cvtColor(*img, mat, cv::COLOR_BGR2RGB); @@ -359,7 +358,7 @@ void TrtYolov10::preProcess(cv::Mat * img, int length, float * factor, std::vect // Update the height, width, and number of channels for the resized image rh = resizeImg.rows; rw = resizeImg.cols; - rc = resizeImg.channels(); + int rc = resizeImg.channels(); // Extract each channel of the resized image and store it in the 'data' vector for (int i = 0; i < rc; ++i) { diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp index 5e51356a9be3c..98f2234dc7400 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp @@ -44,18 +44,9 @@ TrtYolov10Node::TrtYolov10Node(const rclcpp::NodeOptions & node_options) using std::chrono_literals::operator""ms; const std::string model_path = this->declare_parameter("model_path"); - // const std::string label_path = this->declare_parameter("label_path"); const std::string precision = this->declare_parameter("precision", "fp16"); - // const float score_threshold = - // static_cast(this->declare_parameter("score_threshold")); - // const int dla_core_id = this->declare_parameter("dla_core_id"); const uint8_t gpu_id = this->declare_parameter("gpu_id", 0); - // const double norm_factor = 1.0; - const std::string cache_dir = ""; - // const tensorrt_common::BatchConfig batch_config{1, 1, 1}; - // const size_t max_workspace_size = (1 << 30); - trt_yolov10_ = std::make_unique(model_path, precision); if (!trt_yolov10_->isGPUInitialized()) { From 7261a5bed17c84587f34b1627d9494e8fca5ebe1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 31 Oct 2024 05:43:20 +0000 Subject: [PATCH 06/23] style(pre-commit): autofix --- perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp index 93fb4e6bfd267..91fd209e07ef2 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp @@ -321,8 +321,8 @@ void TrtYolov10::preProcess(cv::Mat * img, int length, float * factor, std::vect cv::Mat mat; // Get the dimensions and number of channels of the input image - int rh = img->rows; // Height of the input image - int rw = img->cols; // Width of the input image + int rh = img->rows; // Height of the input image + int rw = img->cols; // Width of the input image // Convert the input image from BGR to RGB color space cv::cvtColor(*img, mat, cv::COLOR_BGR2RGB); From a20535ea159eb90bb5b5a30c66d7b7f0fc21749c Mon Sep 17 00:00:00 2001 From: suchang Date: Thu, 31 Oct 2024 15:50:50 +0800 Subject: [PATCH 07/23] fix ci error Signed-off-by: suchang --- .../autoware_tensorrt_yolov10/package.xml | 2 +- .../src/tensorrt_yolov10.cpp | 141 ++---------------- .../yolov10_single_image_inference_node.cpp | 3 +- 3 files changed, 13 insertions(+), 133 deletions(-) diff --git a/perception/autoware_tensorrt_yolov10/package.xml b/perception/autoware_tensorrt_yolov10/package.xml index 17fdf53999f76..c27f6a1597428 100644 --- a/perception/autoware_tensorrt_yolov10/package.xml +++ b/perception/autoware_tensorrt_yolov10/package.xml @@ -27,7 +27,7 @@ rclcpp rclcpp_components sensor_msgs - tensorrt_common + autoware_tensorrt_common tier4_perception_msgs autoware_image_transport_decompressor diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp index 93fb4e6bfd267..14ee708e9c498 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp @@ -247,73 +247,6 @@ bool TrtYolov10::setCudaDeviceId(const uint8_t gpu_id) } } -// void TrtYolov10::initPreprocessBuffer(int width, int height) -// { -// // if size of source input has been changed... -// if (src_width_ != -1 || src_height_ != -1) { -// if (width != src_width_ || height != src_height_) { -// // Free cuda memory to reallocate -// if (image_buf_h_) { -// image_buf_h_.reset(); -// } -// if (image_buf_d_) { -// image_buf_d_.reset(); -// } -// } -// } -// src_width_ = width; -// src_height_ = height; -// if (use_gpu_preprocess_) { -// auto input_dims = trt_common_->getBindingDimensions(0); -// bool const hasRuntimeDim = std::any_of( -// input_dims.d, input_dims.d + input_dims.nbDims, -// [](int32_t input_dim) { return input_dim == -1; }); -// if (hasRuntimeDim) { -// input_dims.d[0] = batch_size_; -// } -// if (!image_buf_h_) { -// trt_common_->setBindingDimensions(0, input_dims); -// scales_.clear(); -// } -// const float input_height = static_cast(input_dims.d[2]); -// const float input_width = static_cast(input_dims.d[3]); -// if (!image_buf_h_) { -// const float scale = std::min(input_width / width, input_height / height); -// for (int b = 0; b < batch_size_; b++) { -// scales_.emplace_back(scale); -// } -// image_buf_h_ = cuda_utils::make_unique_host( -// width * height * 3 * batch_size_, cudaHostAllocWriteCombined); -// image_buf_d_ = cuda_utils::make_unique(width * height * 3 * batch_size_); -// } -// if (multitask_) { -// size_t argmax_out_elem_num = 0; -// for (int m = 0; m < multitask_; m++) { -// const auto output_dims = -// trt_common_->getBindingDimensions(m + 2); // 0 : input, 1 : output for detections -// const float scale = std::min( -// output_dims.d[3] / static_cast(width), -// output_dims.d[2] / static_cast(height)); -// int out_w = static_cast(width * scale); -// int out_h = static_cast(height * scale); -// // size_t out_elem_num = std::accumulate( -// // output_dims.d + 1, output_dims.d + output_dims.nbDims, 1, std::multiplies()); -// // out_elem_num = out_elem_num * batch_size_; -// size_t out_elem_num = out_w * out_h * batch_size_; -// argmax_out_elem_num += out_elem_num; -// } -// argmax_buf_h_ = -// cuda_utils::make_unique_host(argmax_out_elem_num, -// cudaHostAllocPortable); -// argmax_buf_d_ = cuda_utils::make_unique(argmax_out_elem_num); -// } -// } -// } - -// void TrtYolov10::printProfiling(void) -// { -// trt_common_->printProfiling(); -// } void TrtYolov10::preProcess(cv::Mat * img, int length, float * factor, std::vector & data) { @@ -330,8 +263,6 @@ void TrtYolov10::preProcess(cv::Mat * img, int length, float * factor, std::vect // Determine the size of the new square image (largest dimension of the input image) int maxImageLength = rw > rh ? rw : rh; - // Create a new square image filled with zeros (black) with dimensions maxImageLength x - // maxImageLength cv::Mat maxImage = cv::Mat::zeros(maxImageLength, maxImageLength, CV_8UC3); // Set all pixels to 255 (white) @@ -382,39 +313,6 @@ void TrtYolov10::preprocess(const std::vector & images, std::vector & images) -// { -// const auto batch_size = images.size(); -// auto input_dims = trt_common_->getBindingDimensions(0); -// input_dims.d[0] = batch_size; -// trt_common_->setBindingDimensions(0, input_dims); -// const float input_height = static_cast(input_dims.d[2]); -// const float input_width = static_cast(input_dims.d[3]); -// std::vector dst_images; -// scales_.clear(); -// for (const auto & image : images) { -// cv::Mat dst_image; -// const float scale = std::min(input_width / image.cols, input_height / image.rows); -// scales_.emplace_back(scale); -// const auto scale_size = cv::Size(image.cols * scale, image.rows * scale); -// cv::resize(image, dst_image, scale_size, 0, 0, cv::INTER_CUBIC); -// const auto bottom = input_height - dst_image.rows; -// const auto right = input_width - dst_image.cols; -// copyMakeBorder(dst_image, dst_image, 0, bottom, 0, right, cv::BORDER_CONSTANT, {114, 114, -// 114}); dst_images.emplace_back(dst_image); -// } -// const auto chw_images = cv::dnn::blobFromImages( -// dst_images, norm_factor_, cv::Size(), cv::Scalar(), false, false, CV_32F); - -// const auto data_length = chw_images.total(); -// input_h_.reserve(data_length); -// const auto flat = chw_images.reshape(1, data_length); -// input_h_ = chw_images.isContinuous() ? flat : flat.clone(); -// CHECK_CUDA_ERROR(cudaMemcpy( -// input_d_.get(), input_h_.data(), input_h_.size() * sizeof(float), cudaMemcpyHostToDevice)); -// // No Need for Sync -// } - bool TrtYolov10::doInference(const std::vector & images, ObjectArrays & objects) { if (!setCudaDeviceId(gpu_id_)) { @@ -430,8 +328,6 @@ bool TrtYolov10::doInference(const std::vector & images, ObjectArrays & CHECK_CUDA_ERROR(cudaMemcpy( input_d_.get(), input_data.data(), 3 * 640 * 640 * sizeof(float), cudaMemcpyHostToDevice)); - // preprocess(images); - return feedforward(images, objects); } @@ -464,9 +360,6 @@ bool TrtYolov10::feedforward(const std::vector & images, ObjectArrays & ObjectArray TrtYolov10::postprocess(float * result, float factor) { ObjectArray object_array; - std::vector positionBoxes; // Stores bounding boxes of detected objects - std::vector classIds; // Stores class IDs for detected objects - std::vector confidences; // Stores confidence scores for detected objects // yolov10m out: 1 x 300 x 6 for (int i = 0; i < max_detections_; i++) { @@ -476,29 +369,17 @@ ObjectArray TrtYolov10::postprocess(float * result, float factor) // printf("i:%d,score:%.3f\n",i,(float)result[s + 4]); if (score > score_threshold_) { - printf("i:%d,score:%.3f\n", i, (float)result[s + 4]); - - // Extract the coordinates and dimensions of the bounding box (normalized values) - float cx = result[s + 0]; // Center x-coordinate - float cy = result[s + 1]; // Center y-coordinate - float dx = result[s + 2]; // Bottom-right x-coordinate - float dy = result[s + 3]; // Bottom-right y-coordinate - - // Convert normalized coordinates and dimensions to pixel values using the scaling factor - int x = (int)((cx)*factor); // Top-left x-coordinate of the bounding box - int y = (int)((cy)*factor); // Top-left y-coordinate of the bounding box - int width = (int)((dx - cx) * factor); // Width of the bounding box - int height = (int)((dy - cy) * factor); // Height of the bounding box - - // Create a cv::Rect object to represent the bounding box - cv::Rect box(x, y, width, height); - - // Store the bounding box, class ID, and confidence score in the corresponding vectors - positionBoxes.push_back(box); - classIds.push_back( - (int)result[s + 5]); // Class ID is stored at position s + 5 in the 'result' array - confidences.push_back(( - float)result[s + 4]); // Confidence score is stored at position s + 4 in the 'result' array + // printf("i:%d,score:%.3f\n", i, (float)result[s + 4]); + + float ltx = result[s + 0]; + float lty = result[s + 1]; + float rbx = result[s + 2]; + float rby = result[s + 3]; + + int x = (int)((ltx)*factor); + int y = (int)((lty)*factor); + int width = (int)((rbx - ltx) * factor); + int height = (int)((rby - lty) * factor); Object object; object.x_offset = x; diff --git a/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp b/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp index 84ede1fde10f2..028dc3f2604a0 100644 --- a/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp +++ b/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp @@ -52,8 +52,7 @@ class Yolov10SingleImageInferenceNode : public rclcpp::Node auto trt_yolov10 = std::make_unique(model_path, precision); auto image = cv::imread(image_path); tensorrt_yolov10::ObjectArrays objects; - // // std::vector masks; - // // std::vector color_masks; + trt_yolov10->doInference({image}, objects); for (const auto & object : objects[0]) { const auto left = object.x_offset; From 4a44dc47e2762933973fe1f06a1eb6bf359ee3d3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 31 Oct 2024 07:56:39 +0000 Subject: [PATCH 08/23] style(pre-commit): autofix --- .../autoware_tensorrt_yolov10/package.xml | 2 +- .../src/tensorrt_yolov10.cpp | 19 +++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/perception/autoware_tensorrt_yolov10/package.xml b/perception/autoware_tensorrt_yolov10/package.xml index c27f6a1597428..a9bfb383c1f8f 100644 --- a/perception/autoware_tensorrt_yolov10/package.xml +++ b/perception/autoware_tensorrt_yolov10/package.xml @@ -18,6 +18,7 @@ tensorrt_cmake_module autoware_perception_msgs + autoware_tensorrt_common cuda_utils cv_bridge image_transport @@ -27,7 +28,6 @@ rclcpp rclcpp_components sensor_msgs - autoware_tensorrt_common tier4_perception_msgs autoware_image_transport_decompressor diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp index 11854afb7d2cd..1e0ce72760433 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp @@ -247,7 +247,6 @@ bool TrtYolov10::setCudaDeviceId(const uint8_t gpu_id) } } - void TrtYolov10::preProcess(cv::Mat * img, int length, float * factor, std::vector & data) { // Create a new cv::Mat object for storing the image after conversion @@ -369,17 +368,17 @@ ObjectArray TrtYolov10::postprocess(float * result, float factor) // printf("i:%d,score:%.3f\n",i,(float)result[s + 4]); if (score > score_threshold_) { - // printf("i:%d,score:%.3f\n", i, (float)result[s + 4]); + // printf("i:%d,score:%.3f\n", i, (float)result[s + 4]); - float ltx = result[s + 0]; - float lty = result[s + 1]; - float rbx = result[s + 2]; - float rby = result[s + 3]; + float ltx = result[s + 0]; + float lty = result[s + 1]; + float rbx = result[s + 2]; + float rby = result[s + 3]; - int x = (int)((ltx)*factor); - int y = (int)((lty)*factor); - int width = (int)((rbx - ltx) * factor); - int height = (int)((rby - lty) * factor); + int x = (int)((ltx)*factor); + int y = (int)((lty)*factor); + int width = (int)((rbx - ltx) * factor); + int height = (int)((rby - lty) * factor); Object object; object.x_offset = x; From b27a55bd1c44b4c96b5477b5c49ac52e11102dca Mon Sep 17 00:00:00 2001 From: suchang Date: Thu, 31 Oct 2024 17:36:56 +0800 Subject: [PATCH 09/23] fix ci error Signed-off-by: suchang --- .../autoware/tensorrt_yolov10/tensorrt_yolov10.hpp | 2 +- .../autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp index 0c75d24489c02..f7c8ac70467e2 100644 --- a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp +++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp index 11854afb7d2cd..fd68238b9aaae 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp @@ -281,7 +281,7 @@ void TrtYolov10::preProcess(cv::Mat * img, int length, float * factor, std::vect cv::resize(maxImage, resizeImg, cv::Size(length, length), 0.0f, 0.0f, cv::INTER_LINEAR); // Calculate the scaling factor and store it in the 'factor' variable - *factor = (float)((float)maxImageLength / (float)length); + *factor = (1.0 * maxImageLength) / ( 1.0 * length); // Convert the resized image to floating-point format with values in range [0, 1] resizeImg.convertTo(resizeImg, CV_32FC3, 1 / 255.0); @@ -376,10 +376,10 @@ ObjectArray TrtYolov10::postprocess(float * result, float factor) float rbx = result[s + 2]; float rby = result[s + 3]; - int x = (int)((ltx)*factor); - int y = (int)((lty)*factor); - int width = (int)((rbx - ltx) * factor); - int height = (int)((rby - lty) * factor); + int x = static_cast((ltx)*factor); + int y = static_cast((lty)*factor); + int width = static_cast((rbx - ltx) * factor); + int height = static_cast((rby - lty) * factor); Object object; object.x_offset = x; From bba9239e6142dc37678475e6f2120862d59e7d16 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 31 Oct 2024 09:42:25 +0000 Subject: [PATCH 10/23] style(pre-commit): autofix --- .../autoware/tensorrt_yolov10/tensorrt_yolov10.hpp | 2 +- .../autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp index f7c8ac70467e2..552c5dedeceeb 100644 --- a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp +++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp @@ -1,10 +1,10 @@ #ifndef AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_HPP_ #define AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_HPP_ +#include #include #include #include -#include #include #include diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp index bbad7d88f0ef3..7447e8e8c032e 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp @@ -280,7 +280,7 @@ void TrtYolov10::preProcess(cv::Mat * img, int length, float * factor, std::vect cv::resize(maxImage, resizeImg, cv::Size(length, length), 0.0f, 0.0f, cv::INTER_LINEAR); // Calculate the scaling factor and store it in the 'factor' variable - *factor = (1.0 * maxImageLength) / ( 1.0 * length); + *factor = (1.0 * maxImageLength) / (1.0 * length); // Convert the resized image to floating-point format with values in range [0, 1] resizeImg.convertTo(resizeImg, CV_32FC3, 1 / 255.0); @@ -375,10 +375,10 @@ ObjectArray TrtYolov10::postprocess(float * result, float factor) float rbx = result[s + 2]; float rby = result[s + 3]; - int x = static_cast((ltx)*factor); - int y = static_cast((lty)*factor); - int width = static_cast((rbx - ltx) * factor); - int height = static_cast((rby - lty) * factor); + int x = static_cast((ltx)*factor); + int y = static_cast((lty)*factor); + int width = static_cast((rbx - ltx) * factor); + int height = static_cast((rby - lty) * factor); Object object; object.x_offset = x; From 80b6ac844cfd04757686ff0b1ac7e622b38db454 Mon Sep 17 00:00:00 2001 From: suchang Date: Fri, 1 Nov 2024 10:56:42 +0800 Subject: [PATCH 11/23] add cls in result Signed-off-by: suchang --- .../tensorrt_yolov10_node.hpp | 4 +++ .../launch/yolov10.launch.xml | 3 ++ .../src/tensorrt_yolov10_node.cpp | 30 +++++++++++++++++-- .../yolov10_single_image_inference_node.cpp | 7 ++++- 4 files changed, 41 insertions(+), 3 deletions(-) diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp index 1a4b2244e5719..ec1a72483dea7 100644 --- a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp +++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp @@ -55,6 +55,8 @@ class TrtYolov10Node : public rclcpp::Node void onConnect(); void onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg); + bool readLabelFile(const std::string & label_path); + std::unique_ptr trt_yolov10_; image_transport::Subscriber image_sub_; @@ -64,6 +66,8 @@ class TrtYolov10Node : public rclcpp::Node std::unique_ptr debug_publisher_; rclcpp::Publisher::SharedPtr objects_pub_; image_transport::Publisher image_pub_; + + std::map label_map_; }; } // namespace autoware::tensorrt_yolov10 diff --git a/perception/autoware_tensorrt_yolov10/launch/yolov10.launch.xml b/perception/autoware_tensorrt_yolov10/launch/yolov10.launch.xml index ce89da449f838..554fe3d3c6aac 100644 --- a/perception/autoware_tensorrt_yolov10/launch/yolov10.launch.xml +++ b/perception/autoware_tensorrt_yolov10/launch/yolov10.launch.xml @@ -4,10 +4,13 @@ + + + diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp index 98f2234dc7400..84411cbba39b8 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp @@ -46,9 +46,15 @@ TrtYolov10Node::TrtYolov10Node(const rclcpp::NodeOptions & node_options) const std::string model_path = this->declare_parameter("model_path"); const std::string precision = this->declare_parameter("precision", "fp16"); const uint8_t gpu_id = this->declare_parameter("gpu_id", 0); + const std::string label_path = this->declare_parameter("label_path"); trt_yolov10_ = std::make_unique(model_path, precision); + if (!readLabelFile(label_path)) { + RCLCPP_ERROR(this->get_logger(), "Could not find label file"); + rclcpp::shutdown(); + } + if (!trt_yolov10_->isGPUInitialized()) { RCLCPP_ERROR(this->get_logger(), "GPU %d does not exist or is not suitable.", gpu_id); rclcpp::shutdown(); @@ -109,8 +115,7 @@ void TrtYolov10Node::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) object.feature.roi.width = yolov10_object.width; object.feature.roi.height = yolov10_object.height; object.object.existence_probability = yolov10_object.score; - // object.object.classification = - // object_recognition_utils::toObjectClassifications(label_map_[yolov10_object.type], 1.0f); + // object.object.classification = object_recognition_utils::toObjectClassifications(label_map_[yolov10_object.type], 1.0f); out_objects.feature_objects.push_back(object); const auto left = std::max(0, static_cast(object.feature.roi.x_offset)); const auto top = std::max(0, static_cast(object.feature.roi.y_offset)); @@ -121,6 +126,8 @@ void TrtYolov10Node::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) cv::rectangle( in_image_ptr->image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3, 8, 0); + + cv::putText(in_image_ptr->image, label_map_[yolov10_object.type], cv::Point(left,top), cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 255), 1, 8, 0); } image_pub_.publish(in_image_ptr->toImageMsg()); @@ -144,6 +151,25 @@ void TrtYolov10Node::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) } } + +bool TrtYolov10Node::readLabelFile(const std::string & label_path) +{ + std::ifstream label_file(label_path); + if (!label_file.is_open()) { + RCLCPP_ERROR(this->get_logger(), "Could not open label file. [%s]", label_path.c_str()); + return false; + } + int label_index{}; + std::string label; + while (getline(label_file, label)) { + std::transform( + label.begin(), label.end(), label.begin(), [](auto c) { return std::toupper(c); }); + label_map_.insert({label_index, label}); + ++label_index; + } + return true; +} + } // namespace autoware::tensorrt_yolov10 #include "rclcpp_components/register_node_macro.hpp" diff --git a/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp b/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp index 028dc3f2604a0..c246a6ce9a75c 100644 --- a/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp +++ b/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp @@ -26,6 +26,8 @@ namespace fs = ::std::experimental::filesystem; #include #include +using namespace std; + #define PRINT_DEBUG_INFO printf("line:%d\n", __LINE__); namespace autoware::tensorrt_yolov10 @@ -38,7 +40,7 @@ class Yolov10SingleImageInferenceNode : public rclcpp::Node { const auto image_path = declare_parameter("image_path", ""); const auto model_path = declare_parameter("model_path", ""); - const auto precision = declare_parameter("precision", "fp32"); + const auto precision = declare_parameter("precision", "fp16"); const auto save_image = declare_parameter("save_image", true); printf("image_path:%s\n", image_path.c_str()); @@ -61,6 +63,9 @@ class Yolov10SingleImageInferenceNode : public rclcpp::Node const auto bottom = std::clamp(top + object.height, 0, image.rows); cv::rectangle( image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3, 8, 0); + + string cls_id = std::to_string(object.type); + cv::putText(image, cls_id, cv::Point(left,top), cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 255), 1, 8, 0); } if (!save_image) { cv::imshow("inference image", image); From eeb76f3fcf01fc3a7eec2082e60fd4c174416858 Mon Sep 17 00:00:00 2001 From: suchang Date: Fri, 1 Nov 2024 11:29:50 +0800 Subject: [PATCH 12/23] add cls in result Signed-off-by: suchang --- .../tensorrt_yolov10_node.hpp | 1 + .../src/tensorrt_yolov10_node.cpp | 20 ++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp index ec1a72483dea7..5a165d777bc23 100644 --- a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp +++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp @@ -56,6 +56,7 @@ class TrtYolov10Node : public rclcpp::Node void onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg); bool readLabelFile(const std::string & label_path); + void replaceLabelMap(); std::unique_ptr trt_yolov10_; diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp index 84411cbba39b8..2297d9ea6c012 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp @@ -54,6 +54,7 @@ TrtYolov10Node::TrtYolov10Node(const rclcpp::NodeOptions & node_options) RCLCPP_ERROR(this->get_logger(), "Could not find label file"); rclcpp::shutdown(); } + replaceLabelMap(); if (!trt_yolov10_->isGPUInitialized()) { RCLCPP_ERROR(this->get_logger(), "GPU %d does not exist or is not suitable.", gpu_id); @@ -115,7 +116,7 @@ void TrtYolov10Node::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) object.feature.roi.width = yolov10_object.width; object.feature.roi.height = yolov10_object.height; object.object.existence_probability = yolov10_object.score; - // object.object.classification = object_recognition_utils::toObjectClassifications(label_map_[yolov10_object.type], 1.0f); + object.object.classification = object_recognition_utils::toObjectClassifications(label_map_[yolov10_object.type], 1.0f); out_objects.feature_objects.push_back(object); const auto left = std::max(0, static_cast(object.feature.roi.x_offset)); const auto top = std::max(0, static_cast(object.feature.roi.y_offset)); @@ -170,6 +171,23 @@ bool TrtYolov10Node::readLabelFile(const std::string & label_path) return true; } +//we need this because autoware::object_recognition_utils::toLabel(const std::string & class_name) limit label type +void TrtYolov10Node::replaceLabelMap() +{ + for (std::size_t i = 0; i < label_map_.size(); ++i) { + auto & label = label_map_[i]; + if (label == "PERSON") { + label = "PEDESTRIAN"; + } else if (label == "MOTORBIKE") { + label = "MOTORCYCLE"; + } else if ( + label != "CAR" && label != "PEDESTRIAN" && label != "BUS" && label != "TRUCK" && + label != "BICYCLE" && label != "MOTORCYCLE") { + label = "UNKNOWN"; + } + } +} + } // namespace autoware::tensorrt_yolov10 #include "rclcpp_components/register_node_macro.hpp" From 5bb1c57e01ddadf6284bcf86acd4dea67a6c655f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 1 Nov 2024 03:32:56 +0000 Subject: [PATCH 13/23] style(pre-commit): autofix --- .../tensorrt_yolov10/tensorrt_yolov10_node.hpp | 2 +- .../launch/yolov10.launch.xml | 1 - .../src/tensorrt_yolov10_node.cpp | 11 +++++++---- .../src/yolov10_single_image_inference_node.cpp | 6 ++++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp index 5a165d777bc23..4cc57e9404765 100644 --- a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp +++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp @@ -68,7 +68,7 @@ class TrtYolov10Node : public rclcpp::Node rclcpp::Publisher::SharedPtr objects_pub_; image_transport::Publisher image_pub_; - std::map label_map_; + std::map label_map_; }; } // namespace autoware::tensorrt_yolov10 diff --git a/perception/autoware_tensorrt_yolov10/launch/yolov10.launch.xml b/perception/autoware_tensorrt_yolov10/launch/yolov10.launch.xml index 554fe3d3c6aac..dd1b020847437 100644 --- a/perception/autoware_tensorrt_yolov10/launch/yolov10.launch.xml +++ b/perception/autoware_tensorrt_yolov10/launch/yolov10.launch.xml @@ -6,7 +6,6 @@ - diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp index 2297d9ea6c012..0da0d8b3a9c4d 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp @@ -116,7 +116,8 @@ void TrtYolov10Node::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) object.feature.roi.width = yolov10_object.width; object.feature.roi.height = yolov10_object.height; object.object.existence_probability = yolov10_object.score; - object.object.classification = object_recognition_utils::toObjectClassifications(label_map_[yolov10_object.type], 1.0f); + object.object.classification = + object_recognition_utils::toObjectClassifications(label_map_[yolov10_object.type], 1.0f); out_objects.feature_objects.push_back(object); const auto left = std::max(0, static_cast(object.feature.roi.x_offset)); const auto top = std::max(0, static_cast(object.feature.roi.y_offset)); @@ -128,7 +129,9 @@ void TrtYolov10Node::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) in_image_ptr->image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3, 8, 0); - cv::putText(in_image_ptr->image, label_map_[yolov10_object.type], cv::Point(left,top), cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 255), 1, 8, 0); + cv::putText( + in_image_ptr->image, label_map_[yolov10_object.type], cv::Point(left, top), + cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 255), 1, 8, 0); } image_pub_.publish(in_image_ptr->toImageMsg()); @@ -152,7 +155,6 @@ void TrtYolov10Node::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg) } } - bool TrtYolov10Node::readLabelFile(const std::string & label_path) { std::ifstream label_file(label_path); @@ -171,7 +173,8 @@ bool TrtYolov10Node::readLabelFile(const std::string & label_path) return true; } -//we need this because autoware::object_recognition_utils::toLabel(const std::string & class_name) limit label type +// we need this because autoware::object_recognition_utils::toLabel(const std::string & class_name) +// limit label type void TrtYolov10Node::replaceLabelMap() { for (std::size_t i = 0; i < label_map_.size(); ++i) { diff --git a/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp b/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp index c246a6ce9a75c..53f58a30d03ca 100644 --- a/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp +++ b/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp @@ -63,9 +63,11 @@ class Yolov10SingleImageInferenceNode : public rclcpp::Node const auto bottom = std::clamp(top + object.height, 0, image.rows); cv::rectangle( image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3, 8, 0); - + string cls_id = std::to_string(object.type); - cv::putText(image, cls_id, cv::Point(left,top), cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 255), 1, 8, 0); + cv::putText( + image, cls_id, cv::Point(left, top), cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 255), + 1, 8, 0); } if (!save_image) { cv::imshow("inference image", image); From fad0f0b0c6fbf192c33fe8cfaf3005f8a16a04ae Mon Sep 17 00:00:00 2001 From: suchang Date: Fri, 1 Nov 2024 13:37:31 +0800 Subject: [PATCH 14/23] fix ci error --- .../autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp index 2297d9ea6c012..eb9e75193f50d 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp @@ -78,7 +78,7 @@ void TrtYolov10Node::onConnect() objects_pub_->get_subscription_count() == 0 && objects_pub_->get_intra_process_subscription_count() == 0 && image_pub_.getNumSubscribers() == 0) { - // printf("no suber who sub from objects_pub_ or image_pub_,shut down image_sub_\n"); + // printf("no subscribers who sub from objects_pub_ or image_pub_,shut down image_sub_\n"); image_sub_.shutdown(); } else if (!image_sub_) { image_sub_ = image_transport::create_subscription( From 63c58985591e31a8f32b1ce3255d8c8de8184b10 Mon Sep 17 00:00:00 2001 From: suchang Date: Fri, 1 Nov 2024 13:45:07 +0800 Subject: [PATCH 15/23] fix ci error --- .../include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp | 2 +- perception/autoware_tensorrt_yolov10/package.xml | 2 +- .../autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp index 4cc57e9404765..f869ee117db25 100644 --- a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp +++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp @@ -15,7 +15,7 @@ #ifndef AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_NODE_HPP_ #define AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_NODE_HPP_ -#include "object_recognition_utils/object_recognition_utils.hpp" +#include "autoware/object_recognition_utils/object_recognition_utils.hpp" #include #include diff --git a/perception/autoware_tensorrt_yolov10/package.xml b/perception/autoware_tensorrt_yolov10/package.xml index a9bfb383c1f8f..6e2183335889a 100644 --- a/perception/autoware_tensorrt_yolov10/package.xml +++ b/perception/autoware_tensorrt_yolov10/package.xml @@ -23,7 +23,7 @@ cv_bridge image_transport libopencv-dev - object_recognition_utils + autoware_object_recognition_utils perception_utils rclcpp rclcpp_components diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp index 226e250d2f688..6d173876b9840 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10_node.cpp @@ -14,7 +14,7 @@ #include "autoware/tensorrt_yolov10/tensorrt_yolov10_node.hpp" -#include "object_recognition_utils/object_classification.hpp" +#include "autoware/object_recognition_utils/object_classification.hpp" #include "perception_utils/run_length_encoder.hpp" #include From cd2470c3d1ae468c7a62b3f70eca2a3bc91ed322 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 1 Nov 2024 05:47:22 +0000 Subject: [PATCH 16/23] style(pre-commit): autofix --- perception/autoware_tensorrt_yolov10/package.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perception/autoware_tensorrt_yolov10/package.xml b/perception/autoware_tensorrt_yolov10/package.xml index 6e2183335889a..76b161e50cbd6 100644 --- a/perception/autoware_tensorrt_yolov10/package.xml +++ b/perception/autoware_tensorrt_yolov10/package.xml @@ -17,13 +17,13 @@ cudnn_cmake_module tensorrt_cmake_module + autoware_object_recognition_utils autoware_perception_msgs autoware_tensorrt_common cuda_utils cv_bridge image_transport libopencv-dev - autoware_object_recognition_utils perception_utils rclcpp rclcpp_components From df9a7cc5671c93376645e43e656dda12cf35a3f7 Mon Sep 17 00:00:00 2001 From: suchang Date: Fri, 1 Nov 2024 15:25:48 +0800 Subject: [PATCH 17/23] fix ci error Signed-off-by: suchang --- .../tensorrt_yolov10/tensorrt_yolov10.hpp | 15 +++++++++++++++ .../src/tensorrt_yolov10.cpp | 2 +- .../src/yolov10_single_image_inference_node.cpp | 4 ++-- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp index 552c5dedeceeb..9d638a06db2b0 100644 --- a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp +++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp @@ -1,3 +1,18 @@ +// Copyright 2023 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + #ifndef AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_HPP_ #define AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_HPP_ diff --git a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp index 7447e8e8c032e..5106edc9cc6e5 100644 --- a/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp +++ b/perception/autoware_tensorrt_yolov10/src/tensorrt_yolov10.cpp @@ -386,7 +386,7 @@ ObjectArray TrtYolov10::postprocess(float * result, float factor) object.width = width; object.height = height; object.score = score; - object.type = int(result[s + 5]); + object.type = static_cast(result[s + 5]); object_array.emplace_back(object); } } diff --git a/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp b/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp index 53f58a30d03ca..da896629bec94 100644 --- a/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp +++ b/perception/autoware_tensorrt_yolov10/src/yolov10_single_image_inference_node.cpp @@ -26,7 +26,7 @@ namespace fs = ::std::experimental::filesystem; #include #include -using namespace std; +// using namespace std; #define PRINT_DEBUG_INFO printf("line:%d\n", __LINE__); @@ -64,7 +64,7 @@ class Yolov10SingleImageInferenceNode : public rclcpp::Node cv::rectangle( image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3, 8, 0); - string cls_id = std::to_string(object.type); + std::string cls_id = std::to_string(object.type); cv::putText( image, cls_id, cv::Point(left, top), cv::FONT_HERSHEY_COMPLEX, 1, cv::Scalar(0, 255, 255), 1, 8, 0); From 515f864055dc47148da7d53bb8a417305e5fc911 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 1 Nov 2024 07:29:52 +0000 Subject: [PATCH 18/23] style(pre-commit): autofix --- .../include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp index 9d638a06db2b0..6e6de8c6121ab 100644 --- a/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp +++ b/perception/autoware_tensorrt_yolov10/include/autoware/tensorrt_yolov10/tensorrt_yolov10.hpp @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. - #ifndef AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_HPP_ #define AUTOWARE__TENSORRT_YOLOV10__TENSORRT_YOLOV10_HPP_ From 76ce9f0f802f4144392577d47def70409e0fe2f4 Mon Sep 17 00:00:00 2001 From: suchang Date: Mon, 18 Nov 2024 10:53:59 +0800 Subject: [PATCH 19/23] update readme Signed-off-by: suchang --- .../autoware_tensorrt_yolov10/README.md | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) diff --git a/perception/autoware_tensorrt_yolov10/README.md b/perception/autoware_tensorrt_yolov10/README.md index adda6cebea0f2..15046f9c560c5 100644 --- a/perception/autoware_tensorrt_yolov10/README.md +++ b/perception/autoware_tensorrt_yolov10/README.md @@ -1 +1,129 @@ # autoware_tensorrt_yolov10 + +## Purpose + +This package detects target objects e.g., cars, trucks, bicycles, pedestrians,etc on a image based on [YOLOV10](https://github.com/THU-MIG/yolov10) model. + +## Inputs / Outputs + +### Input + +| Name | Type | Description | +| ---------- | ------------------- | --------------- | +| `in/image` | `sensor_msgs/Image` | The input image | + +### Output + +| Name | Type | Description | +| ---------------- | -------------------------------------------------- | ------------------------------------------------------------------- | +| `out/objects` | `tier4_perception_msgs/DetectedObjectsWithFeature` | The detected objects with 2D bounding boxes | +| `out/image` | `sensor_msgs/Image` | The image with 2D bounding boxes for visualization | + +## Assumptions / Known limits + +The label contained in detected 2D bounding boxes (i.e., `out/objects`) will be either one of the followings: + +- CAR +- PEDESTRIAN ("PERSON" will also be categorized as "PEDESTRIAN") +- BUS +- TRUCK +- BICYCLE +- MOTORCYCLE + +If other labels (case insensitive) are contained in the file specified via the `label_file` parameter, +those are labeled as `UNKNOWN`, while detected rectangles are drawn in the visualization result (`out/image`). + +## Onnx model +you can download yolov10m.onnx from [releases](https://github.com/THU-MIG/yolov10/releases) + +## Label file +This file represents the correspondence between class index (integer outputted from YOLOV10 network) and +class label (strings making understanding easier). This package maps class IDs (incremented from 0) +with labels according to the order in this file. + +currently, this file is actually a coco label which contains the following labels: +``` +person +bicycle +car +motorcycle +airplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +couch +potted plant +bed +dining table +toilet +tv +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +``` + +## Reference repositories + +- From 726c120c3d8f47f878a7a2d7e12be87a19dc704c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 18 Nov 2024 02:56:40 +0000 Subject: [PATCH 20/23] style(pre-commit): autofix --- perception/autoware_tensorrt_yolov10/README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/perception/autoware_tensorrt_yolov10/README.md b/perception/autoware_tensorrt_yolov10/README.md index 15046f9c560c5..f07e64df7b1b1 100644 --- a/perception/autoware_tensorrt_yolov10/README.md +++ b/perception/autoware_tensorrt_yolov10/README.md @@ -14,10 +14,10 @@ This package detects target objects e.g., cars, trucks, bicycles, pedestrians,et ### Output -| Name | Type | Description | -| ---------------- | -------------------------------------------------- | ------------------------------------------------------------------- | -| `out/objects` | `tier4_perception_msgs/DetectedObjectsWithFeature` | The detected objects with 2D bounding boxes | -| `out/image` | `sensor_msgs/Image` | The image with 2D bounding boxes for visualization | +| Name | Type | Description | +| ------------- | -------------------------------------------------- | -------------------------------------------------- | +| `out/objects` | `tier4_perception_msgs/DetectedObjectsWithFeature` | The detected objects with 2D bounding boxes | +| `out/image` | `sensor_msgs/Image` | The image with 2D bounding boxes for visualization | ## Assumptions / Known limits @@ -34,14 +34,17 @@ If other labels (case insensitive) are contained in the file specified via the ` those are labeled as `UNKNOWN`, while detected rectangles are drawn in the visualization result (`out/image`). ## Onnx model + you can download yolov10m.onnx from [releases](https://github.com/THU-MIG/yolov10/releases) ## Label file + This file represents the correspondence between class index (integer outputted from YOLOV10 network) and class label (strings making understanding easier). This package maps class IDs (incremented from 0) with labels according to the order in this file. currently, this file is actually a coco label which contains the following labels: + ``` person bicycle From 46c14a37a72b7e5e21522aad982f484b4fa038ef Mon Sep 17 00:00:00 2001 From: suchang Date: Tue, 26 Nov 2024 10:18:19 +0800 Subject: [PATCH 21/23] update readme Signed-off-by: suchang --- perception/autoware_tensorrt_yolov10/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/perception/autoware_tensorrt_yolov10/README.md b/perception/autoware_tensorrt_yolov10/README.md index 15046f9c560c5..3cd7962c06449 100644 --- a/perception/autoware_tensorrt_yolov10/README.md +++ b/perception/autoware_tensorrt_yolov10/README.md @@ -127,3 +127,7 @@ hair drier ## Reference repositories - + + +## Legal Notice +The inference code is licensed under Apache 2.0. The model and training code are licensed under AGPL-3.0. you can check details from https://github.com/THU-MIG/yolov10?tab=AGPL-3.0-1-ov-file. To inquire about a commercial license when using trained model weights please contact yolov10 author. \ No newline at end of file From 1e8a265745f6d0bbb1d15670ee03dc9757b9a3d0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 26 Nov 2024 02:20:53 +0000 Subject: [PATCH 22/23] style(pre-commit): autofix --- perception/autoware_tensorrt_yolov10/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/perception/autoware_tensorrt_yolov10/README.md b/perception/autoware_tensorrt_yolov10/README.md index 3c235210a5682..aa813d5ad5a04 100644 --- a/perception/autoware_tensorrt_yolov10/README.md +++ b/perception/autoware_tensorrt_yolov10/README.md @@ -131,6 +131,6 @@ hair drier - - ## Legal Notice -The inference code is licensed under Apache 2.0. The model and training code are licensed under AGPL-3.0. you can check details from https://github.com/THU-MIG/yolov10?tab=AGPL-3.0-1-ov-file. To inquire about a commercial license when using trained model weights please contact yolov10 author. \ No newline at end of file + +The inference code is licensed under Apache 2.0. The model and training code are licensed under AGPL-3.0. you can check details from . To inquire about a commercial license when using trained model weights please contact yolov10 author. From 1309d742e08f69949987a079336693517f3b9ffc Mon Sep 17 00:00:00 2001 From: storrrrrrrrm <103425473+storrrrrrrrm@users.noreply.github.com> Date: Mon, 2 Dec 2024 10:06:05 +0800 Subject: [PATCH 23/23] Update perception/autoware_tensorrt_yolov10/README.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: M. Fatih Cırıt --- perception/autoware_tensorrt_yolov10/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perception/autoware_tensorrt_yolov10/README.md b/perception/autoware_tensorrt_yolov10/README.md index aa813d5ad5a04..85950717d66da 100644 --- a/perception/autoware_tensorrt_yolov10/README.md +++ b/perception/autoware_tensorrt_yolov10/README.md @@ -45,7 +45,7 @@ with labels according to the order in this file. currently, this file is actually a coco label which contains the following labels: -``` +```text person bicycle car