From 882b732df1510a5a73e893fcfc8497cd81108e0c Mon Sep 17 00:00:00 2001 From: Fiona Gladwin <121928245+fgladwin@users.noreply.github.com> Date: Wed, 13 Dec 2023 09:44:30 +0530 Subject: [PATCH] rocAL Tensor Retinanet training support (#78) * tensor changes * Update Doxyfile * test updates * bug fixes * bug fix * adding image augemntation app changes * Minor changes * Add ROI structure Add support to process multidimension ROI * Add support to update multi dimension ROI * Adding mask pipeline support for rocAL * Fix build issues wrt ROI struct Add necessary ROI changes Add Union for ROI Add pybind changes to copy ROI * Fix build issue * Add box IOU matcher changes Add pybind changes for IOU matcher Remove BoundingBoxCordf * Fixing build issues * Minor change * Remove redundant code * Minor change * Minor changes * Modify struct names * Remove strides in ROI * Change shape to end for ROICords * Fix crop dims in ssd random crop * Change ROI type to const in node.h * Resolving review comments * Resolving review comments * Resolving review comments * Formatting changes * Resolving review comments * Adding min_max scaling mode comment * Removing unused vector in coco reader * Improve code readability Add appropriate comments * Remove criteria argument in IOU matcher * Add IOU matcher info struct * Remove unused criteria variable * Change anchors to a ptr in IOU matcher struct * Minor change --------- Co-authored-by: LakshmiKumar23 Co-authored-by: SundarRajan28 Co-authored-by: Sundar Rajan Vaithiyanathan --- rocAL/include/api/rocal_api_meta_data.h | 21 ++- rocAL/include/meta_data/bounding_box_graph.h | 18 +-- rocAL/include/meta_data/meta_data_graph.h | 8 ++ rocAL/include/pipeline/master_graph.h | 13 +- rocAL/source/api/rocal_api_meta_data.cpp | 31 ++++- rocAL/source/meta_data/bounding_box_graph.cpp | 126 +++++++++++++----- rocAL/source/pipeline/master_graph.cpp | 44 +++++- rocAL_pybind/amd/rocal/fn.py | 7 +- rocAL_pybind/amd/rocal/readers.py | 3 +- rocAL_pybind/rocal_pybind.cpp | 25 ++-- 10 files changed, 222 insertions(+), 74 deletions(-) diff --git a/rocAL/include/api/rocal_api_meta_data.h b/rocAL/include/api/rocal_api_meta_data.h index 5c5a305dd..17407dbb8 100644 --- a/rocAL/include/api/rocal_api_meta_data.h +++ b/rocAL/include/api/rocal_api_meta_data.h @@ -81,9 +81,10 @@ extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTFReaderDetection(RocalContex * \param [in] is_box_encoder If set to True, bboxes are returned as encoded bboxes using the anchors * \param [in] avoid_class_remapping If set to True, classes are returned directly. Otherwise, classes are mapped to consecutive values * \param [in] aspect_ratio_grouping If set to True, images are sorted by their aspect ratio and returned + * \param [in] is_box_iou_matcher If set to True, box iou matcher which returns matched indices is enabled in the pipeline * \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors */ -extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReader(RocalContext rocal_context, const char* source_path, bool is_output, bool mask = false, bool ltrb = true, bool is_box_encoder = false, bool avoid_class_remapping = false, bool aspect_ratio_grouping = false); +extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReader(RocalContext rocal_context, const char* source_path, bool is_output, bool mask = false, bool ltrb = true, bool is_box_encoder = false, bool avoid_class_remapping = false, bool aspect_ratio_grouping = false, bool is_box_iou_matcher = false); /*! \brief create coco reader key points * \ingroup group_rocal_meta_data @@ -296,4 +297,22 @@ extern "C" void ROCAL_API_CALL rocalGetImageId(RocalContext p_context, int* buf) */ extern "C" void ROCAL_API_CALL rocalGetJointsDataPtr(RocalContext p_context, RocalJointsData** joints_data); +/*! \brief API to enable box IOU matcher and pass required params to pipeline + * \ingroup group_rocal_meta_data + * \param [in] p_context rocAL context + * \param [in] anchors The anchors / ground truth bounding box coordinates + * \param [in] high_threshold The max threshold for IOU + * \param [in] low_threshold The min threshold for IOU + * \param [in] allow_low_quality_matches bool value when set to true allows low quality matches + */ +extern "C" void ROCAL_API_CALL rocalBoxIouMatcher(RocalContext p_context, std::vector& anchors, + float high_threshold, float low_threshold, bool allow_low_quality_matches = true); + +/*! \brief API to return the matched indices for the bounding box and anchors + * \ingroup group_rocal_meta_data + * \param [in] p_context rocAL context + * \return RocalTensorList of matched indices + */ +extern "C" RocalTensorList ROCAL_API_CALL rocalGetMatchedIndices(RocalContext p_context); + #endif // MIVISIONX_ROCAL_API_META_DATA_H diff --git a/rocAL/include/meta_data/bounding_box_graph.h b/rocAL/include/meta_data/bounding_box_graph.h index 76e2cf5fe..34710b3ae 100644 --- a/rocAL/include/meta_data/bounding_box_graph.h +++ b/rocAL/include/meta_data/bounding_box_graph.h @@ -26,22 +26,7 @@ THE SOFTWARE. #include "meta_data_graph.h" #include "meta_node.h" -typedef struct { - float xc; - float yc; - float w; - float h; -} BoundingBoxCord_xcycwh; -typedef struct { - float l; - float t; - float r; - float b; -} BoundingBoxCord_ltrb; -typedef union { - BoundingBoxCord_xcycwh xcycwh; - BoundingBoxCord_ltrb ltrb; -} BoundingBoxCordf; // Union comprises of float bbox cords of ltrb/xcycwh type +typedef struct { float xc; float yc; float w; float h; } BoundingBoxCord_xcycwh; class BoundingBoxGraph : public MetaDataGraph { public: @@ -49,4 +34,5 @@ class BoundingBoxGraph : public MetaDataGraph { void update_meta_data(pMetaDataBatch meta_data, decoded_image_info decode_image_info) override; void update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_image_info decoded_image_info, crop_image_info crop_image_info) override; void update_box_encoder_meta_data(std::vector *anchors, pMetaDataBatch full_batch_meta_data, float criteria, bool offset, float scale, std::vector &means, std::vector &stds, float *encoded_boxes_data, int *encoded_labels_data) override; + void update_box_iou_matcher(BoxIouMatcherInfo &iou_matcher_info, int *matches_idx_buffer, pMetaDataBatch full_batch_meta_data) override; }; diff --git a/rocAL/include/meta_data/meta_data_graph.h b/rocAL/include/meta_data/meta_data_graph.h index b66c5d15a..563f7f069 100644 --- a/rocAL/include/meta_data/meta_data_graph.h +++ b/rocAL/include/meta_data/meta_data_graph.h @@ -30,6 +30,13 @@ THE SOFTWARE. #include "parameter_factory.h" #include "randombboxcrop_meta_data_reader.h" +typedef struct { + std::vector *anchors; + float high_threshold; + float low_threshold; + bool allow_low_quality_matches; +} BoxIouMatcherInfo; + class MetaDataGraph { public: virtual ~MetaDataGraph() = default; @@ -37,5 +44,6 @@ class MetaDataGraph { virtual void update_meta_data(pMetaDataBatch meta_data, decoded_image_info decoded_image_info) = 0; virtual void update_random_bbox_meta_data(pMetaDataBatch input_meta_data, pMetaDataBatch output_meta_data, decoded_image_info decoded_image_info, crop_image_info crop_image_info) = 0; virtual void update_box_encoder_meta_data(std::vector *anchors, pMetaDataBatch full_batch_meta_data, float criteria, bool offset, float scale, std::vector &means, std::vector &stds, float *encoded_boxes_data, int *encoded_labels_data) = 0; + virtual void update_box_iou_matcher(BoxIouMatcherInfo &iou_matcher_info, int *matches_idx_buffer, pMetaDataBatch full_batch_meta_data) = 0; std::list> _meta_nodes; }; diff --git a/rocAL/include/pipeline/master_graph.h b/rocAL/include/pipeline/master_graph.h index af2f12b84..6b34d0453 100644 --- a/rocAL/include/pipeline/master_graph.h +++ b/rocAL/include/pipeline/master_graph.h @@ -46,10 +46,11 @@ THE SOFTWARE. #include "randombboxcrop_meta_data_reader.h" #include "rocal_api_types.h" #define MAX_STRING_LENGTH 100 -#define MAX_OBJECTS 50 // Setting an arbitrary value 50.(Max number of objects/image in COCO dataset is 93) +#define MAX_OBJECTS 50 // Setting an arbitrary value 50.(Max number of objects/image in COCO dataset is 93) #define BBOX_COUNT 4 -#define MAX_NUM_ANCHORS 8732 // Num of bbox achors used in SSD training +#define MAX_SSD_ANCHORS 8732 // Num of bbox achors used in SSD training #define MAX_MASK_BUFFER 10000 +#define MAX_RETINANET_ANCHORS 120087 // Num of bbox achors used in Retinanet training #if ENABLE_SIMD #if _WIN32 @@ -107,18 +108,20 @@ class MasterGraph { std::vector create_label_reader(const char *source_path, MetaDataReaderType reader_type); std::vector create_video_label_reader(const char *source_path, MetaDataReaderType reader_type, unsigned sequence_length, unsigned frame_step, unsigned frame_stride, bool file_list_frame_num = true); std::vector create_coco_meta_data_reader(const char *source_path, bool is_output, MetaDataReaderType reader_type, MetaDataType label_type, bool ltrb_bbox = true, bool is_box_encoder = false, - bool avoid_class_remapping = false, bool aspect_ratio_grouping = false, float sigma = 0.0, unsigned pose_output_width = 0, unsigned pose_output_height = 0); + bool avoid_class_remapping = false, bool aspect_ratio_grouping = false, bool is_box_iou_matcher = false, float sigma = 0.0, unsigned pose_output_width = 0, unsigned pose_output_height = 0); std::vector create_tf_record_meta_data_reader(const char *source_path, MetaDataReaderType reader_type, MetaDataType label_type, const std::map feature_key_map); std::vector create_caffe_lmdb_record_meta_data_reader(const char *source_path, MetaDataReaderType reader_type, MetaDataType label_type); std::vector create_caffe2_lmdb_record_meta_data_reader(const char *source_path, MetaDataReaderType reader_type, MetaDataType label_type); std::vector create_cifar10_label_reader(const char *source_path, const char *file_prefix); std::vector create_mxnet_label_reader(const char *source_path, bool is_output); void box_encoder(std::vector &anchors, float criteria, const std::vector &means, const std::vector &stds, bool offset, float scale); + void box_iou_matcher(std::vector &anchors, float high_threshold, float low_threshold, bool allow_low_quality_matches); void create_randombboxcrop_reader(RandomBBoxCrop_MetaDataReaderType reader_type, RandomBBoxCrop_MetaDataType label_type, bool all_boxes_overlap, bool no_crop, FloatParam *aspect_ratio, bool has_shape, int crop_width, int crop_height, int num_attempts, FloatParam *scaling, int total_num_attempts, int64_t seed = 0); const std::pair &meta_data(); TensorList *labels_meta_data(); TensorList *bbox_meta_data(); TensorList *mask_meta_data(); + TensorList *matched_index_meta_data(); void set_loop(bool val) { _loop = val; } void set_output(Tensor *output_tensor); size_t calculate_cpu_num_threads(size_t shard_count); @@ -164,6 +167,7 @@ class MasterGraph { TensorList _labels_tensor_list; TensorList _bbox_tensor_list; TensorList _mask_tensor_list; + TensorList _matches_tensor_list; std::vector _meta_data_buffer_size; #if ENABLE_HIP DeviceManagerHip _device; //!< Keeps the device related constructs needed for running on GPU @@ -204,6 +208,9 @@ class MasterGraph { bool _offset; // Returns normalized offsets ((encoded_bboxes*scale - anchors*scale) - mean) / stds in EncodedBBoxes that use std and the mean and scale arguments if offset="True" std::vector _means, _stds; //_means: [x y w h] mean values for normalization _stds: [x y w h] standard deviations for offset normalization. bool _augmentation_metanode = false; + // box IoU matcher variables + bool _is_box_iou_matcher = false; // bool variable to set the box iou matcher + BoxIouMatcherInfo _iou_matcher_info; #if ENABLE_HIP BoxEncoderGpu *_box_encoder_gpu = nullptr; #endif diff --git a/rocAL/source/api/rocal_api_meta_data.cpp b/rocAL/source/api/rocal_api_meta_data.cpp index ffb68391e..313553507 100644 --- a/rocAL/source/api/rocal_api_meta_data.cpp +++ b/rocAL/source/api/rocal_api_meta_data.cpp @@ -71,14 +71,14 @@ RocalMetaData RocalMetaData ROCAL_API_CALL - rocalCreateCOCOReader(RocalContext p_context, const char* source_path, bool is_output, bool mask, bool ltrb, bool is_box_encoder, bool avoid_class_remapping, bool aspect_ratio_grouping) { + rocalCreateCOCOReader(RocalContext p_context, const char* source_path, bool is_output, bool mask, bool ltrb, bool is_box_encoder, bool avoid_class_remapping, bool aspect_ratio_grouping, bool is_box_iou_matcher) { if (!p_context) THROW("Invalid rocal context passed to rocalCreateCOCOReader") auto context = static_cast(p_context); if (mask) { - return context->master_graph->create_coco_meta_data_reader(source_path, is_output, MetaDataReaderType::COCO_META_DATA_READER, MetaDataType::PolygonMask, ltrb, is_box_encoder, avoid_class_remapping, aspect_ratio_grouping); + return context->master_graph->create_coco_meta_data_reader(source_path, is_output, MetaDataReaderType::COCO_META_DATA_READER, MetaDataType::PolygonMask, ltrb, is_box_encoder, avoid_class_remapping, aspect_ratio_grouping, is_box_iou_matcher); } - return context->master_graph->create_coco_meta_data_reader(source_path, is_output, MetaDataReaderType::COCO_META_DATA_READER, MetaDataType::BoundingBox, ltrb, is_box_encoder, avoid_class_remapping, aspect_ratio_grouping); + return context->master_graph->create_coco_meta_data_reader(source_path, is_output, MetaDataReaderType::COCO_META_DATA_READER, MetaDataType::BoundingBox, ltrb, is_box_encoder, avoid_class_remapping, aspect_ratio_grouping, is_box_iou_matcher); } RocalMetaData @@ -88,7 +88,7 @@ RocalMetaData THROW("Invalid rocal context passed to rocalCreateCOCOReaderKeyPoints") auto context = static_cast(p_context); - return context->master_graph->create_coco_meta_data_reader(source_path, is_output, MetaDataReaderType::COCO_KEY_POINTS_META_DATA_READER, MetaDataType::KeyPoints, sigma, pose_output_width, pose_output_height); + return context->master_graph->create_coco_meta_data_reader(source_path, is_output, MetaDataReaderType::COCO_KEY_POINTS_META_DATA_READER, MetaDataType::KeyPoints, false, false, false, false, sigma, pose_output_width, pose_output_height); } RocalMetaData @@ -490,3 +490,26 @@ void *joints_data = (RocalJointsData*)(&(meta_data.second->get_joints_data_batch())); } + +void + ROCAL_API_CALL + rocalBoxIouMatcher(RocalContext p_context, + std::vector& anchors, + float high_threshold, float low_threshold, + bool allow_low_quality_matches) { + if (!p_context) + THROW("Invalid rocal context passed to rocalBoxIouMatcher") + auto context = static_cast(p_context); + context->master_graph->box_iou_matcher(anchors, high_threshold, + low_threshold, + allow_low_quality_matches); +} + +RocalTensorList + ROCAL_API_CALL + rocalGetMatchedIndices(RocalContext p_context) { + if (!p_context) + THROW("Invalid rocal context passed to rocalGetMatchedIndices") + auto context = static_cast(p_context); + return context->master_graph->matched_index_meta_data(); +} diff --git a/rocAL/source/meta_data/bounding_box_graph.cpp b/rocAL/source/meta_data/bounding_box_graph.cpp index 2c6b2d105..0dd882a2d 100644 --- a/rocAL/source/meta_data/bounding_box_graph.cpp +++ b/rocAL/source/meta_data/bounding_box_graph.cpp @@ -58,13 +58,13 @@ void BoundingBoxGraph::update_meta_data(pMetaDataBatch input_meta_data, decoded_ } } -inline float ssd_BBoxIntersectionOverUnion(const BoundingBoxCord &box1, const float &box1_area, const BoundingBoxCordf &box2) { - float xA = std::max(static_cast(box1.l), box2.ltrb.l); - float yA = std::max(static_cast(box1.t), box2.ltrb.t); - float xB = std::min(static_cast(box1.r), box2.ltrb.r); - float yB = std::min(static_cast(box1.b), box2.ltrb.b); +inline float ssd_BBoxIntersectionOverUnion(const BoundingBoxCord &box1, const float &box1_area, const BoundingBoxCord &box2) { + float xA = std::max(static_cast(box1.l), box2.l); + float yA = std::max(static_cast(box1.t), box2.t); + float xB = std::min(static_cast(box1.r), box2.r); + float yB = std::min(static_cast(box1.b), box2.b); float intersection_area = std::max((float)0.0, xB - xA) * std::max((float)0.0, yB - yA); - float box2_area = (box2.ltrb.b - box2.ltrb.t) * (box2.ltrb.r - box2.ltrb.l); + float box2_area = (box2.b - box2.t) * (box2.r - box2.l); return (float)(intersection_area / (box1_area + box2_area - intersection_area)); } @@ -116,7 +116,7 @@ void BoundingBoxGraph::update_random_bbox_meta_data(pMetaDataBatch input_meta_da } } -inline void calculate_ious_for_box(float *ious, BoundingBoxCord &box, BoundingBoxCordf *anchors, unsigned int num_anchors) { +inline void calculate_ious_for_box(float *ious, BoundingBoxCord &box, BoundingBoxCord *anchors, unsigned int num_anchors) { float box_area = (box.b - box.t) * (box.r - box.l); ious[0] = ssd_BBoxIntersectionOverUnion(box, box_area, anchors[0]); @@ -149,13 +149,13 @@ inline int find_best_box_for_anchor(unsigned anchor_idx, const std::vector *anchors, pMetaDataBatch full_batch_meta_data, float criteria, bool offset, float scale, std::vector &means, std::vector &stds, float *encoded_boxes_data, int *encoded_labels_data) { #pragma omp parallel for for (int i = 0; i < full_batch_meta_data->size(); i++) { - BoundingBoxCordf *bbox_anchors = reinterpret_cast(anchors->data()); + BoundingBoxCord *bbox_anchors = reinterpret_cast(anchors->data()); auto bb_count = full_batch_meta_data->get_labels_batch()[i].size(); int *bb_labels = full_batch_meta_data->get_labels_batch()[i].data(); BoundingBoxCord *bb_coords = reinterpret_cast(full_batch_meta_data->get_bb_cords_batch()[i].data()); unsigned anchors_size = anchors->size() / 4; // divide the anchors_size by 4 to get the total number of anchors int *encoded_labels = encoded_labels_data + (i * anchors_size); - BoundingBoxCordf *encoded_bb = reinterpret_cast(encoded_boxes_data + (i * anchors_size * 4)); + BoundingBoxCord_xcycwh *encoded_bb = reinterpret_cast(encoded_boxes_data + (i * anchors_size * 4)); // Calculate Ious // ious size - bboxes count x anchors count std::vector ious(bb_count * anchors_size); @@ -167,36 +167,36 @@ void BoundingBoxGraph::update_box_encoder_meta_data(std::vector *anchors, float half_scale = 0.5 * scale; // Depending on the matches ->place the best bbox instead of the corresponding anchor_idx in anchor for (unsigned anchor_idx = 0; anchor_idx < anchors_size; anchor_idx++) { - BoundingBoxCordf box_bestidx, anchor_xcyxwh; - BoundingBoxCordf *p_anchor = &bbox_anchors[anchor_idx]; + BoundingBoxCord_xcycwh box_bestidx, anchor_xcyxwh; + BoundingBoxCord *p_anchor = &bbox_anchors[anchor_idx]; const auto best_idx = find_best_box_for_anchor(anchor_idx, ious, bb_count, anchors_size); // Filter matches by criteria if (ious[(best_idx * anchors_size) + anchor_idx] > criteria) // Its a match { // Convert the "ltrb" format to "xcycwh" if (offset) { - box_bestidx.xcycwh.xc = (bb_coords[best_idx].l + bb_coords[best_idx].r) * half_scale; // xc - box_bestidx.xcycwh.yc = (bb_coords[best_idx].t + bb_coords[best_idx].b) * half_scale; // yc - box_bestidx.xcycwh.w = (bb_coords[best_idx].r - bb_coords[best_idx].l) * scale; // w - box_bestidx.xcycwh.h = (bb_coords[best_idx].b - bb_coords[best_idx].t) * scale; // h + box_bestidx.xc = (bb_coords[best_idx].l + bb_coords[best_idx].r) * half_scale; // xc + box_bestidx.yc = (bb_coords[best_idx].t + bb_coords[best_idx].b) * half_scale; // yc + box_bestidx.w = (bb_coords[best_idx].r - bb_coords[best_idx].l) * scale; // w + box_bestidx.h = (bb_coords[best_idx].b - bb_coords[best_idx].t) * scale; // h // Convert the "ltrb" format to "xcycwh" - anchor_xcyxwh.xcycwh.xc = (p_anchor->ltrb.l + p_anchor->ltrb.r) * half_scale; // xc - anchor_xcyxwh.xcycwh.yc = (p_anchor->ltrb.t + p_anchor->ltrb.b) * half_scale; // yc - anchor_xcyxwh.xcycwh.w = (p_anchor->ltrb.r - p_anchor->ltrb.l) * scale; // w - anchor_xcyxwh.xcycwh.h = (p_anchor->ltrb.b - p_anchor->ltrb.t) * scale; // h + anchor_xcyxwh.xc = (p_anchor->l + p_anchor->r) * half_scale; // xc + anchor_xcyxwh.yc = (p_anchor->t + p_anchor->b) * half_scale; // yc + anchor_xcyxwh.w = (p_anchor->r - p_anchor->l) * scale; // w + anchor_xcyxwh.h = (p_anchor->b - p_anchor->t) * scale; // h // Reference for offset calculation between the Ground Truth bounding boxes & anchor boxes in format // https://github.com/sgrvinod/a-PyTorch-Tutorial-to-Object-Detection#predictions-vis-%C3%A0-vis-priors - box_bestidx.xcycwh.xc = ((box_bestidx.xcycwh.xc - anchor_xcyxwh.xcycwh.xc) / anchor_xcyxwh.xcycwh.w - means[0]) * inv_stds[0]; - box_bestidx.xcycwh.yc = ((box_bestidx.xcycwh.yc - anchor_xcyxwh.xcycwh.yc) / anchor_xcyxwh.xcycwh.h - means[1]) * inv_stds[1]; - box_bestidx.xcycwh.w = (std::log(box_bestidx.xcycwh.w / anchor_xcyxwh.xcycwh.w) - means[2]) * inv_stds[2]; - box_bestidx.xcycwh.h = (std::log(box_bestidx.xcycwh.h / anchor_xcyxwh.xcycwh.h) - means[3]) * inv_stds[3]; + box_bestidx.xc = ((box_bestidx.xc - anchor_xcyxwh.xc) / anchor_xcyxwh.w - means[0]) * inv_stds[0]; + box_bestidx.yc = ((box_bestidx.yc - anchor_xcyxwh.yc) / anchor_xcyxwh.h - means[1]) * inv_stds[1]; + box_bestidx.w = (std::log(box_bestidx.w / anchor_xcyxwh.w) - means[2]) * inv_stds[2]; + box_bestidx.h = (std::log(box_bestidx.h / anchor_xcyxwh.h) - means[3]) * inv_stds[3]; encoded_bb[anchor_idx] = box_bestidx; encoded_labels[anchor_idx] = bb_labels[best_idx]; } else { - box_bestidx.xcycwh.xc = 0.5 * (bb_coords[best_idx].l + bb_coords[best_idx].r); // xc - box_bestidx.xcycwh.yc = 0.5 * (bb_coords[best_idx].t + bb_coords[best_idx].b); // yc - box_bestidx.xcycwh.w = bb_coords[best_idx].r - bb_coords[best_idx].l; // w - box_bestidx.xcycwh.h = bb_coords[best_idx].b - bb_coords[best_idx].t; // h + box_bestidx.xc = 0.5 * (bb_coords[best_idx].l + bb_coords[best_idx].r); // xc + box_bestidx.yc = 0.5 * (bb_coords[best_idx].t + bb_coords[best_idx].b); // yc + box_bestidx.w = bb_coords[best_idx].r - bb_coords[best_idx].l; // w + box_bestidx.h = bb_coords[best_idx].b - bb_coords[best_idx].t; // h encoded_bb[anchor_idx] = box_bestidx; encoded_labels[anchor_idx] = bb_labels[best_idx]; } @@ -207,13 +207,77 @@ void BoundingBoxGraph::update_box_encoder_meta_data(std::vector *anchors, encoded_labels[anchor_idx] = 0; } else { // Convert the "ltrb" format to "xcycwh" - encoded_bb[anchor_idx].xcycwh.xc = 0.5 * (p_anchor->ltrb.l + p_anchor->ltrb.r); // xc - encoded_bb[anchor_idx].xcycwh.yc = 0.5 * (p_anchor->ltrb.t + p_anchor->ltrb.b); // yc - encoded_bb[anchor_idx].xcycwh.w = (-p_anchor->ltrb.l + p_anchor->ltrb.r); // w - encoded_bb[anchor_idx].xcycwh.h = (-p_anchor->ltrb.t + p_anchor->ltrb.b); // h + encoded_bb[anchor_idx].xc = 0.5 * (p_anchor->l + p_anchor->r); // xc + encoded_bb[anchor_idx].yc = 0.5 * (p_anchor->t + p_anchor->b); // yc + encoded_bb[anchor_idx].w = (-p_anchor->l + p_anchor->r); // w + encoded_bb[anchor_idx].h = (-p_anchor->t + p_anchor->b); // h encoded_labels[anchor_idx] = 0; } } } } } + +void BoundingBoxGraph::update_box_iou_matcher(BoxIouMatcherInfo &iou_matcher_info, int *matches_idx_buffer, pMetaDataBatch full_batch_meta_data) { + auto bb_coords_batch = full_batch_meta_data->get_bb_cords_batch(); + unsigned anchors_size = iou_matcher_info.anchors->size() / 4; // divide the anchors_size by 4 to get the total number of anchors + BoundingBoxCord *bbox_anchors = reinterpret_cast(iou_matcher_info.anchors->data()); + + std::vector matches(full_batch_meta_data->size()); + for (int i = 0; i < full_batch_meta_data->size(); i++) { + matches[i] = reinterpret_cast(matches_idx_buffer + i * anchors_size); + } + +#pragma omp parallel for + for (int i = 0; i < full_batch_meta_data->size(); i++) { + auto bb_coords = bb_coords_batch[i]; + auto bb_count = bb_coords.size(); + + std::vector matched_vals(anchors_size, -1.0); + std::vector low_quality_preds(anchors_size, -1); + + // Calculate IoU's, The number of IoU Values calculated will be (bb_count x anchors_size) + for (unsigned bb_idx = 0; bb_idx < bb_count; bb_idx++) { + BoundingBoxCord box = bb_coords[bb_idx]; + float box_area = (box.b - box.t) * (box.r - box.l); + float best_bbox_iou = -1.0f; + std::vector bbox_iou(anchors_size); // IoU value for bbox mapped with each anchor + for (unsigned int anchor_idx = 0; anchor_idx < anchors_size; anchor_idx++) { + float iou_val = ssd_BBoxIntersectionOverUnion(box, box_area, bbox_anchors[anchor_idx]); + bbox_iou[anchor_idx] = iou_val; + + // Find col maximum in (bb_count x anchors_size) IoU values calculated + if (iou_val > matched_vals[anchor_idx]) { + matched_vals[anchor_idx] = iou_val; + matches[i][anchor_idx] = static_cast(bb_idx); + } + + // Find row maximum in (bb_count x anchors_size) IoU values calculated + if (iou_matcher_info.allow_low_quality_matches) { + if (iou_val > best_bbox_iou) best_bbox_iou = iou_val; + } + } + + if (iou_matcher_info.allow_low_quality_matches) { + for (unsigned int anchor_idx = 0; anchor_idx < anchors_size; anchor_idx++) { // if the element is found + if (fabs(bbox_iou[anchor_idx] - best_bbox_iou) < 1e-6) // Compare the IOU values and check if they are equal with a tolerance of 1e-6 + low_quality_preds[anchor_idx] = anchor_idx; + } + } + } + + // Update matched indices based on thresholds and low quality matches + for (uint pred_idx = 0; pred_idx < anchors_size; pred_idx++) { + if (!(iou_matcher_info.allow_low_quality_matches && low_quality_preds[pred_idx] != -1)) { + if (matched_vals[pred_idx] < iou_matcher_info.low_threshold) { + matches[i][pred_idx] = -1; + } else if ((matched_vals[pred_idx] < iou_matcher_info.high_threshold)) { + matches[i][pred_idx] = -2; + } + } + } + + matched_vals.clear(); + low_quality_preds.clear(); + } +} diff --git a/rocAL/source/pipeline/master_graph.cpp b/rocAL/source/pipeline/master_graph.cpp index b63f568d6..24a2d59bf 100644 --- a/rocAL/source/pipeline/master_graph.cpp +++ b/rocAL/source/pipeline/master_graph.cpp @@ -959,6 +959,10 @@ void MasterGraph::output_routine() { #endif _meta_data_graph->update_box_encoder_meta_data(&_anchors, output_meta_data, _criteria, _offset, _scale, _means, _stds, (float *)bbox_encode_write_buffers.first, (int *)bbox_encode_write_buffers.second); } + if (_is_box_iou_matcher) { + int *matches_write_buffer = reinterpret_cast(_ring_buffer.get_meta_write_buffers()[2]); + _meta_data_graph->update_box_iou_matcher(_iou_matcher_info, matches_write_buffer, output_meta_data); + } _bencode_time.end(); #ifdef ROCAL_VIDEO _sequence_start_framenum_vec.insert(_sequence_start_framenum_vec.begin(), _loader_module->get_sequence_start_frame_number()); @@ -1001,7 +1005,7 @@ void MasterGraph::stop_processing() { _output_thread.join(); } -std::vector MasterGraph::create_coco_meta_data_reader(const char *source_path, bool is_output, MetaDataReaderType reader_type, MetaDataType metadata_type, bool ltrb_bbox, bool is_box_encoder, bool avoid_class_remapping, bool aspect_ratio_grouping, float sigma, unsigned pose_output_width, unsigned pose_output_height) { +std::vector MasterGraph::create_coco_meta_data_reader(const char *source_path, bool is_output, MetaDataReaderType reader_type, MetaDataType metadata_type, bool ltrb_bbox, bool is_box_encoder, bool avoid_class_remapping, bool aspect_ratio_grouping, bool is_box_iou_matcher, float sigma, unsigned pose_output_width, unsigned pose_output_height) { if (_meta_data_reader) THROW("A metadata reader has already been created") if (_augmented_meta_data) @@ -1017,7 +1021,7 @@ std::vector MasterGraph::create_coco_meta_data_reader(const c _meta_data_reader->read_all(source_path); if (!ltrb_bbox) _augmented_meta_data->set_xywh_bbox(); std::vector dims; - size_t max_objects = static_cast(is_box_encoder ? MAX_NUM_ANCHORS : MAX_OBJECTS); + size_t max_objects = static_cast(is_box_encoder ? MAX_SSD_ANCHORS : MAX_OBJECTS); dims = {max_objects}; auto default_labels_info = TensorInfo(std::move(dims), _mem_type, RocalTensorDataType::INT32); // Create default labels Info default_labels_info.set_metadata(); @@ -1036,6 +1040,13 @@ std::vector MasterGraph::create_coco_meta_data_reader(const c default_mask_info.set_metadata(); _meta_data_buffer_size.emplace_back(_user_batch_size * default_mask_info.data_size()); } + if (is_box_iou_matcher) { + _is_box_iou_matcher = true; + dims = {MAX_RETINANET_ANCHORS}; + default_matches_info = TensorInfo(std::move(dims), _mem_type, RocalTensorDataType::INT32); // Create default matches info + default_matches_info.set_metadata(); + _meta_data_buffer_size.emplace_back(_user_batch_size * default_matches_info.data_size()); + } for (unsigned i = 0; i < _user_batch_size; i++) // Create rocALTensorList for each metadata { @@ -1047,12 +1058,18 @@ std::vector MasterGraph::create_coco_meta_data_reader(const c auto mask_info = default_mask_info; _mask_tensor_list.push_back(new Tensor(mask_info)); } + if(is_box_iou_matcher) { + auto matches_info = default_matches_info; + _matches_tensor_list.push_back(new Tensor(matches_info)); + } } _ring_buffer.init_metadata(RocalMemType::HOST, _meta_data_buffer_size); _metadata_output_tensor_list.emplace_back(&_labels_tensor_list); _metadata_output_tensor_list.emplace_back(&_bbox_tensor_list); if (metadata_type == MetaDataType::PolygonMask) _metadata_output_tensor_list.emplace_back(&_mask_tensor_list); + if(is_box_iou_matcher) + _metadata_output_tensor_list.emplace_back(&_matches_tensor_list); return _metadata_output_tensor_list; } @@ -1344,6 +1361,18 @@ const std::pair &MasterGraph::meta_data() { return _ring_buffer.get_meta_data(); } +void MasterGraph::box_iou_matcher(std::vector &anchors, + float high_threshold, float low_threshold, + bool allow_low_quality_matches) { + if (!_is_box_iou_matcher) + THROW("Box IOU matcher variable not set cannot return matched idx") + _anchors = anchors; // Uses existing _anchors variable used for box encoder + _iou_matcher_info.anchors = &_anchors; + _iou_matcher_info.high_threshold = high_threshold; + _iou_matcher_info.low_threshold = low_threshold; + _iou_matcher_info.allow_low_quality_matches = allow_low_quality_matches; +} + size_t MasterGraph::bounding_box_batch_count(pMetaDataBatch meta_data_batch) { size_t size = 0; for (unsigned i = 0; i < _user_batch_size; i++) @@ -1393,6 +1422,17 @@ TensorList *MasterGraph::mask_meta_data() { return &_mask_tensor_list; } +TensorList *MasterGraph::matched_index_meta_data() { + if (_ring_buffer.level() == 0) + THROW("No meta data has been loaded") + auto meta_data_buffers = reinterpret_cast(_ring_buffer.get_meta_read_buffers()[2]); // Get matches buffer from ring buffer + for (unsigned i = 0; i < _matches_tensor_list.size(); i++) { + _matches_tensor_list[i]->set_mem_handle(reinterpret_cast(meta_data_buffers)); + meta_data_buffers += _matches_tensor_list[i]->info().data_size(); + } + return &_matches_tensor_list; +} + void MasterGraph::notify_user_thread() { if (_output_routine_finished_processing) return; diff --git a/rocAL_pybind/amd/rocal/fn.py b/rocAL_pybind/amd/rocal/fn.py index a5b60b62c..cd0e35c75 100644 --- a/rocAL_pybind/amd/rocal/fn.py +++ b/rocAL_pybind/amd/rocal/fn.py @@ -1033,13 +1033,12 @@ def snp_noise(*inputs, p_noise=0.0, p_salt=0.0, noise_val=0.0, salt_val=0.0, return (snp_noise_added_image) -def box_iou_matcher(*inputs, anchors, criteria=0.5, high_threshold=0.5, +def box_iou_matcher(*inputs, anchors, high_threshold=0.5, low_threshold=0.4, allow_low_quality_matches=True, device=None): """!Applies box IoU matching to the input image. @param inputs (list) The input image to which box IoU matching is applied. @param anchors (list of floats) Anchors to be used for encoding, in the ltrb format. - @param criteria (float, optional, default = 0.5) Criteria value used for box IoU matching. Default is 0.5. @param high_threshold (float, optional, default = 0.5) Upper threshold used for matching indices. Default is 0.5. @param low_threshold (float, optional, default = 0.4) Lower threshold used for matching indices. Default is 0.4. @param allow_low_quality_matches (bool, optional, default = True) Whether to allow low quality matches as output. Default is True. @@ -1049,9 +1048,9 @@ def box_iou_matcher(*inputs, anchors, criteria=0.5, high_threshold=0.5, """ # pybind call arguments - kwargs_pybind = {"anchors": anchors, "criteria": criteria, "high_threshold": high_threshold, + kwargs_pybind = {"anchors": anchors, "high_threshold": high_threshold, "low_threshold": low_threshold, "allow_low_quality_matches": allow_low_quality_matches} - box_iou_matcher = b.BoxIOUMatcher( + box_iou_matcher = b.boxIouMatcher( Pipeline._current_pipeline._handle, *(kwargs_pybind.values())) Pipeline._current_pipeline._box_iou_matcher = True return (box_iou_matcher, []) diff --git a/rocAL_pybind/amd/rocal/readers.py b/rocAL_pybind/amd/rocal/readers.py index 9f28a1996..70e5a25f3 100644 --- a/rocAL_pybind/amd/rocal/readers.py +++ b/rocAL_pybind/amd/rocal/readers.py @@ -57,7 +57,8 @@ def coco(annotations_file='', ltrb=True, masks=False, ratio=False, avoid_class_r "ltrb": ltrb, "is_box_encoder": is_box_encoder, "avoid_class_remapping": avoid_class_remapping, - "aspect_ratio_grouping": aspect_ratio_grouping} + "aspect_ratio_grouping": aspect_ratio_grouping, + "is_box_iou_matcher": is_box_iou_matcher} meta_data = b.cocoReader( Pipeline._current_pipeline._handle, *(kwargs_pybind.values())) return (meta_data, labels, bboxes) diff --git a/rocAL_pybind/rocal_pybind.cpp b/rocAL_pybind/rocal_pybind.cpp index 2c446f274..e4cec4cb7 100644 --- a/rocAL_pybind/rocal_pybind.cpp +++ b/rocAL_pybind/rocal_pybind.cpp @@ -384,7 +384,7 @@ PYBIND11_MODULE(rocal_pybind, m) { // rocal_api_meta_data.h m.def("randomBBoxCrop", &rocalRandomBBoxCrop); m.def("boxEncoder", &rocalBoxEncoder); - // m.def("BoxIOUMatcher", &rocalBoxIOUMatcher); // Will be enabled when IOU matcher changes are introduced in C++ + m.def("boxIouMatcher", &rocalBoxIouMatcher); m.def("getImgSizes", [](RocalContext context, py::array_t array) { auto buf = array.request(); int *ptr = static_cast(buf.ptr); @@ -504,17 +504,18 @@ PYBIND11_MODULE(rocal_pybind, m) { } return complete_list; }); - // Will be enabled when IOU matcher changes are introduced in C++ - // m.def("getMatchedIndices", [](RocalContext context) { - // rocalTensorList *matches = rocalGetMatchedIndices(context); - // return py::array(py::buffer_info( - // (int *)(matches->at(0)->buffer()), - // sizeof(int), - // py::format_descriptor::format(), - // 1, - // {matches->size() * 120087}, - // {sizeof(int) })); - // }, py::return_value_policy::reference); + m.def( + "getMatchedIndices", [](RocalContext context) { + rocalTensorList *matches = rocalGetMatchedIndices(context); + return py::array(py::buffer_info( + static_cast(matches->at(0)->buffer()), + sizeof(int), + py::format_descriptor::format(), + 1, + {matches->size() * matches->at(0)->dims().at(0)}, + {sizeof(int)})); + }, + py::return_value_policy::reference); m.def("rocalGetEncodedBoxesAndLables", [](RocalContext context, uint batch_size, uint num_anchors) { auto vec_pair_labels_boxes = rocalGetEncodedBoxesAndLables(context, batch_size * num_anchors); auto labels_buf_ptr = static_cast(vec_pair_labels_boxes[0]->at(0)->buffer());