Skip to content

Commit

Permalink
rocAL - Tensor mask changes (#74)
Browse files Browse the repository at this point in the history
* tensor changes

* Update Doxyfile

* test updates

* bug fixes

* bug fix

* adding image augemntation app changes

* Minor changes

* Add ROI structure

Add support to process multidimension ROI

* Add support to update multi dimension ROI

* Adding mask pipeline support for rocAL

* Fix build issues wrt ROI struct

Add necessary ROI changes
Add Union for ROI
Add pybind changes to copy ROI

* Fixing build issues

* Minor change

* Remove redundant code

* Minor change

* Minor changes

* Modify struct names

* Remove strides in ROI

* Change shape to end for ROICords

* Fix crop dims in ssd random crop

* Change ROI type to const in node.h

* Resolving review comments

* Resolving review comments

* Resolving review comments

* Formatting changes

* Resolving review comments

* Adding min_max scaling mode comment

* Removing unused vector in coco reader

---------

Co-authored-by: LakshmiKumar23 <[email protected]>
Co-authored-by: fgladwin <[email protected]>
Co-authored-by: SundarRajan28 <[email protected]>
  • Loading branch information
4 people authored Nov 9, 2023
1 parent 8814353 commit a9c18ca
Show file tree
Hide file tree
Showing 20 changed files with 421 additions and 163 deletions.
3 changes: 1 addition & 2 deletions rocAL/include/api/rocal_api_data_transfer.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ extern "C" RocalStatus ROCAL_API_CALL rocalToTensor(RocalContext rocal_context,
RocalTensorLayout tensor_format, RocalTensorOutputType tensor_output_type,
float multiplier0, float multiplier1, float multiplier2, float offset0,
float offset1, float offset2,
bool reverse_channels, RocalOutputMemType output_mem_type);
bool reverse_channels, RocalOutputMemType output_mem_type, int max_roi_height = 0, int max_roi_width = 0);

/*!
* \brief Sets the output images in the RocalContext
Expand All @@ -74,7 +74,6 @@ extern "C" RocalStatus ROCAL_API_CALL rocalToTensor(RocalContext rocal_context,
*/
extern "C" void ROCAL_API_CALL rocalSetOutputs(RocalContext p_context, unsigned int num_of_outputs, std::vector<RocalTensor> &output_images);


/*!
* \brief gives the list of output tensors from rocal context
* \ingroup group_rocal_data_transfer
Expand Down
14 changes: 13 additions & 1 deletion rocAL/include/api/rocal_api_meta_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,14 @@ extern "C" RocalMetaData ROCAL_API_CALL rocalCreateTFReaderDetection(RocalContex
* \ingroup group_rocal_meta_data
* \param [in] rocal_context rocal context
* \param [in] source_path path to the coco json file
* \param [in] mask enable polygon masks
* \param [in] ltrb If set to True, bboxes are returned as [left, top, right, bottom]. If set to False, the bboxes are returned as [x, y, width, height]
* \param [in] is_box_encoder If set to True, bboxes are returned as encoded bboxes using the anchors
* \param [in] avoid_class_remapping If set to True, classes are returned directly. Otherwise, classes are mapped to consecutive values
* \param [in] aspect_ratio_grouping If set to True, images are sorted by their aspect ratio and returned
* \return RocalMetaData object, can be used to inquire about the rocal's output (processed) tensors
*/
extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReader(RocalContext rocal_context, const char* source_path, bool is_output, bool mask = false, bool ltrb = true, bool is_box_encoder = false);
extern "C" RocalMetaData ROCAL_API_CALL rocalCreateCOCOReader(RocalContext rocal_context, const char* source_path, bool is_output, bool mask = false, bool ltrb = true, bool is_box_encoder = false, bool avoid_class_remapping = false, bool aspect_ratio_grouping = false);

/*! \brief create coco reader key points
* \ingroup group_rocal_meta_data
Expand Down Expand Up @@ -209,6 +214,13 @@ extern "C" RocalTensorList ROCAL_API_CALL rocalGetBoundingBoxCords(RocalContext
*/
extern "C" void ROCAL_API_CALL rocalGetImageSizes(RocalContext rocal_context, int* buf);

/*! \brief get ROI image sizes
* \ingroup group_rocal_meta_data
* \param [in] rocal_context rocal context
* \param [out] buf The user's buffer that will be filled with ROI image size info for the images in the output batch
*/
extern "C" void ROCAL_API_CALL rocalGetROIImageSizes(RocalContext rocal_context, int* buf);

/*! \brief create text cifar10 label reader
* \ingroup group_rocal_meta_data
* \param [in] rocal_context rocal context
Expand Down
8 changes: 6 additions & 2 deletions rocAL/include/meta_data/coco_meta_data_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,25 +32,29 @@ class COCOMetaDataReader : public MetaDataReader {
public:
void init(const MetaDataConfig& cfg, pMetaDataBatch meta_data_batch) override;
void lookup(const std::vector<std::string>& image_names) override;
ImgSize lookup_image_size(const std::string& image_name) override;
void read_all(const std::string& path) override;
void release(std::string image_name);
void release() override;
void print_map_contents();
bool set_timestamp_mode() override { return false; }

const std::map<std::string, std::shared_ptr<MetaData>>& get_map_content() override { return _map_content; }
void set_aspect_ratio_grouping(bool aspect_ratio_grouping) override { _aspect_ratio_grouping = aspect_ratio_grouping; }
bool get_aspect_ratio_grouping() const override { return _aspect_ratio_grouping; }
COCOMetaDataReader();

private:
pMetaDataBatch _output;
std::string _path;
int meta_data_reader_type;
bool _avoid_class_remapping;
void add(std::string image_name, BoundingBoxCords bbox, Labels labels, ImgSize image_size, int image_id = 0);
void add(std::string image_name, BoundingBoxCords bbox, Labels labels, ImgSize image_size, MaskCords mask_cords, std::vector<int> polygon_count, std::vector<std::vector<int>> vertices_count); // To add Mask coordinates to Metadata struct
void add(std::string image_name, BoundingBoxCords bbox, Labels labels, ImgSize image_size, MaskCords mask_cords, std::vector<int> polygon_count, std::vector<std::vector<int>> vertices_count, int image_id = 0); // To add Mask coordinates to Metadata struct
bool exists(const std::string& image_name) override;
std::map<std::string, std::shared_ptr<MetaData>> _map_content;
std::map<std::string, std::shared_ptr<MetaData>>::iterator _itr;
std::map<std::string, ImgSize> _map_img_sizes;
std::map<int, std::string> _map_image_names_to_id; // Maps image names to their image IDs
std::map<std::string, ImgSize>::iterator itr;
std::map<int, int> _label_info;
std::map<int, int>::iterator _it_label;
Expand Down
11 changes: 10 additions & 1 deletion rocAL/include/meta_data/meta_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ typedef class MetaDataInfo {
int img_id = -1;
std::string img_name = "";
ImgSize img_size = {};
ImgSize img_roi_size = {};
} MetaDataInfo;

class MetaData {
Expand All @@ -121,9 +122,11 @@ class MetaData {
virtual JointsData& get_joints_data() = 0;
virtual void set_joints_data(JointsData* joints_data) = 0;
ImgSize& get_img_size() { return _info.img_size; }
ImgSize& get_img_roi_size() { return _info.img_roi_size; }
std::string& get_image_name() { return _info.img_name; }
int& get_image_id() { return _info.img_id; }
void set_img_size(ImgSize img_size) { _info.img_size = std::move(img_size); }
void set_img_roi_size(ImgSize img_roi_size) { _info.img_roi_size = std::move(img_roi_size); }
void set_img_id(int img_id) { _info.img_id = img_id; }
void set_img_name(std::string img_name) { _info.img_name = img_name; }
void set_metadata_info(MetaDataInfo info) { _info = std::move(info); }
Expand Down Expand Up @@ -167,13 +170,14 @@ class BoundingBox : public Label {

struct PolygonMask : public BoundingBox {
public:
PolygonMask(BoundingBoxCords bb_cords, Labels bb_label_ids, ImgSize img_size, MaskCords mask_cords, std::vector<int> polygon_count, std::vector<std::vector<int>> vertices_count) {
PolygonMask(BoundingBoxCords bb_cords, Labels bb_label_ids, ImgSize img_size, MaskCords mask_cords, std::vector<int> polygon_count, std::vector<std::vector<int>> vertices_count, int img_id = 0) {
_bb_cords = std::move(bb_cords);
_label_ids = std::move(bb_label_ids);
_info.img_size = std::move(img_size);
_mask_cords = std::move(mask_cords);
_polygon_count = std::move(polygon_count);
_vertices_count = std::move(vertices_count);
_info.img_id = img_id;
}
std::vector<int>& get_polygon_count() override { return _polygon_count; }
std::vector<std::vector<int>>& get_vertices_count() override { return _vertices_count; }
Expand Down Expand Up @@ -207,20 +211,24 @@ class MetaDataInfoBatch {
std::vector<int> img_ids = {};
std::vector<std::string> img_names = {};
std::vector<ImgSize> img_sizes = {};
std::vector<ImgSize> img_roi_sizes = {};
void clear() {
img_ids.clear();
img_names.clear();
img_sizes.clear();
img_roi_sizes.clear();
}
void resize(int batch_size) {
img_ids.resize(batch_size);
img_names.resize(batch_size);
img_sizes.resize(batch_size);
img_roi_sizes.resize(batch_size);
}
void insert(MetaDataInfoBatch& other) {
img_sizes.insert(img_sizes.end(), other.img_sizes.begin(), other.img_sizes.end());
img_ids.insert(img_ids.end(), other.img_ids.begin(), other.img_ids.end());
img_names.insert(img_names.end(), other.img_names.begin(), other.img_names.end());
img_roi_sizes.insert(img_roi_sizes.end(), other.img_roi_sizes.begin(), other.img_roi_sizes.end());
}
};

Expand Down Expand Up @@ -249,6 +257,7 @@ class MetaDataBatch {
std::vector<int>& get_image_id_batch() { return _info_batch.img_ids; }
std::vector<std::string>& get_image_names_batch() { return _info_batch.img_names; }
ImgSizes& get_img_sizes_batch() { return _info_batch.img_sizes; }
ImgSizes& get_img_roi_sizes_batch() { return _info_batch.img_roi_sizes; }
MetaDataInfoBatch& get_info_batch() { return _info_batch; }
void set_metadata_type(MetaDataType metadata_type) { _type = metadata_type; }
MetaDataType get_metadata_type() { return _type; }
Expand Down
12 changes: 12 additions & 0 deletions rocAL/include/meta_data/meta_data_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ struct MetaDataConfig {
unsigned _frame_stride;
unsigned _out_img_width;
unsigned _out_img_height;
bool _avoid_class_remapping;
bool _aspect_ratio_grouping;

public:
MetaDataConfig(const MetaDataType& type, const MetaDataReaderType& reader_type, const std::string& path, const std::map<std::string, std::string>& feature_key_map = std::map<std::string, std::string>(), const std::string file_prefix = std::string(), const unsigned& sequence_length = 3, const unsigned& frame_step = 3, const unsigned& frame_stride = 1)
Expand All @@ -66,16 +68,23 @@ struct MetaDataConfig {
std::string path() const { return _path; }
std::map<std::string, std::string> feature_key_map() const { return _feature_key_map; }
std::string file_prefix() const { return _file_prefix; }
bool class_remapping() const { return _avoid_class_remapping; }
bool get_aspect_ratio_grouping() const { return _aspect_ratio_grouping; }
unsigned sequence_length() const { return _sequence_length; }
unsigned frame_step() const { return _frame_step; }
unsigned frame_stride() const { return _frame_stride; }
unsigned out_img_width() const { return _out_img_width; }
unsigned out_img_height() const { return _out_img_height; }
void set_out_img_width(unsigned out_img_width) { _out_img_width = out_img_width; }
void set_out_img_height(unsigned out_img_height) { _out_img_height = out_img_height; }
void set_avoid_class_remapping(bool avoid_class_remapping) { _avoid_class_remapping = avoid_class_remapping; }
void set_aspect_ratio_grouping(bool aspect_ratio_grouping) { _aspect_ratio_grouping = aspect_ratio_grouping; }
};

class MetaDataReader {
protected:
bool _aspect_ratio_grouping;

public:
enum class Status {
OK = 0
Expand All @@ -88,4 +97,7 @@ class MetaDataReader {
virtual const std::map<std::string, std::shared_ptr<MetaData>>& get_map_content() = 0;
virtual bool exists(const std::string& image_name) = 0;
virtual bool set_timestamp_mode() = 0;
virtual ImgSize lookup_image_size(const std::string& image_name) { return {}; }
virtual void set_aspect_ratio_grouping(bool aspect_ratio_grouping) { return; }
virtual bool get_aspect_ratio_grouping() const { return {}; }
};
5 changes: 3 additions & 2 deletions rocAL/include/pipeline/master_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class MasterGraph {
Status reset();
size_t remaining_count();
MasterGraph::Status to_tensor(void *out_ptr, RocalTensorlayout format, float multiplier0, float multiplier1, float multiplier2,
float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type, RocalOutputMemType output_mem_type);
float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type, RocalOutputMemType output_mem_type, uint max_roi_height = 0, uint max_roi_width = 0);
Status copy_output(unsigned char *out_ptr, size_t out_size_in_bytes);
Status copy_out_tensor_planar(void *out_ptr, RocalTensorlayout format, float multiplier0, float multiplier1, float multiplier2,
float offset0, float offset1, float offset2, bool reverse_channels, RocalTensorDataType output_data_type);
Expand All @@ -106,7 +106,8 @@ class MasterGraph {
Tensor *create_loader_output_tensor(const TensorInfo &info);
std::vector<rocalTensorList *> create_label_reader(const char *source_path, MetaDataReaderType reader_type);
std::vector<rocalTensorList *> create_video_label_reader(const char *source_path, MetaDataReaderType reader_type, unsigned sequence_length, unsigned frame_step, unsigned frame_stride, bool file_list_frame_num = true);
std::vector<rocalTensorList *> create_coco_meta_data_reader(const char *source_path, bool is_output, MetaDataReaderType reader_type, MetaDataType label_type, bool ltrb_bbox = true, bool is_box_encoder = false, float sigma = 0.0, unsigned pose_output_width = 0, unsigned pose_output_height = 0);
std::vector<rocalTensorList *> create_coco_meta_data_reader(const char *source_path, bool is_output, MetaDataReaderType reader_type, MetaDataType label_type, bool ltrb_bbox = true, bool is_box_encoder = false,
bool avoid_class_remapping = false, bool aspect_ratio_grouping = false, float sigma = 0.0, unsigned pose_output_width = 0, unsigned pose_output_height = 0);
std::vector<rocalTensorList *> create_tf_record_meta_data_reader(const char *source_path, MetaDataReaderType reader_type, MetaDataType label_type, const std::map<std::string, std::string> feature_key_map);
std::vector<rocalTensorList *> create_caffe_lmdb_record_meta_data_reader(const char *source_path, MetaDataReaderType reader_type, MetaDataType label_type);
std::vector<rocalTensorList *> create_caffe2_lmdb_record_meta_data_reader(const char *source_path, MetaDataReaderType reader_type, MetaDataType label_type);
Expand Down
5 changes: 3 additions & 2 deletions rocAL/include/readers/image/coco_file_source_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ class COCOFileSourceReader : public Reader {
DIR *_src_dir;
DIR *_sub_dir;
struct dirent *_entity;
std::vector<std::string> _file_names;
std::vector<std::string> _files;
std::vector<std::string> _file_names, _sorted_file_names;
std::vector<float> _aspect_ratios;
unsigned _curr_file_idx;
FILE *_current_fPtr;
std::ifstream _current_ifs;
Expand All @@ -103,4 +103,5 @@ class COCOFileSourceReader : public Reader {
void incremenet_file_id() { _file_id++; }
void replicate_last_image_to_fill_last_shard();
void replicate_last_batch_to_pad_partial_shard();
void shuffle_with_aspect_ratios();
};
Loading

0 comments on commit a9c18ca

Please sign in to comment.