ROCm · kiritigowda · Jan 24, 2024 · Jan 11, 2024 · Jan 12, 2024 · Jan 12, 2024
@@ -48,12 +48,12 @@ int main(int argc, const char** argv) {
     if (argc < MIN_ARG_COUNT) {
         printf(
             "Usage: image_augmentation <image_dataset_folder/video_file> <processing_device=1/cpu=0>  \
-              decode_width decode_height video_mode gray_scale/rgb display_on_off decode_shard_count  <shuffle:0/1> <jpeg_dec_mode<0(tjpeg)/1(opencv)/2(hwdec)>\n");
+              decode_width decode_height decoder_mode gray_scale/rgb display_on_off decode_shard_count  <shuffle:0/1> <jpeg_dec_mode<0(tjpeg)/1(opencv)/2(hwdec)>\n");
         return -1;
     }
     int argIdx = 0;
     const char* folderPath1 = argv[++argIdx];
-    int video_mode = 0;  // 0 means no video decode, 1 means hardware, 2 means software decoding
+    int decoder_mode = 0;  // 0 means no video decode, 1 means hardware, 2 means software decoding
     bool display = 1;    // Display the images
     int aug_depth = 1;   // how deep is the augmentation tree
     int rgb = 1;         // process color images
@@ -62,7 +62,7 @@ int main(int argc, const char** argv) {
     bool processing_device = 1;
     size_t shard_count = 2;
     int shuffle = 0;
-    int dec_mode = 0;
+    int decoder_type = 0;
     const char *outName = "image_augmentation_app.png";
 
     if (argc >= argIdx + MIN_ARG_COUNT)
@@ -75,7 +75,7 @@ int main(int argc, const char** argv) {
         decode_height = atoi(argv[++argIdx]);
 
     if (argc >= argIdx + MIN_ARG_COUNT)
-        video_mode = atoi(argv[++argIdx]);
+        decoder_mode = atoi(argv[++argIdx]);
 
     if (argc >= argIdx + MIN_ARG_COUNT)
         rgb = atoi(argv[++argIdx]);
@@ -90,7 +90,7 @@ int main(int argc, const char** argv) {
         shuffle = atoi(argv[++argIdx]);
 
     if (argc >= argIdx + MIN_ARG_COUNT)
-        dec_mode = atoi(argv[++argIdx]);
+        decoder_type = atoi(argv[++argIdx]);
 
     if (argc >= argIdx + MIN_ARG_COUNT)
         outName = argv[++argIdx];
@@ -108,7 +108,7 @@ int main(int argc, const char** argv) {
         return -1;
     }
 
-    RocalDecoderType dec_type = (RocalDecoderType)dec_mode;
+    RocalDecoderType dec_type = (RocalDecoderType)decoder_type;
 
     /*>>>>>>>>>>>>>>>> Creating rocAL parameters  <<<<<<<<<<<<<<<<*/
 
@@ -126,15 +126,20 @@ int main(int argc, const char** argv) {
     /*>>>>>>>>>>>>>>>>>>> Graph description <<<<<<<<<<<<<<<<<<<*/
     RocalTensor input1;
 
-    if (video_mode != 0) {
+    if (decoder_mode >= 2) {
         unsigned sequence_length = 3;
         unsigned frame_step = 3;
         unsigned frame_stride = 1;
         if (decode_height <= 0 || decode_width <= 0) {
             std::cout << "Output width and height is needed for video decode\n";
             return -1;
         }
-        input1 = rocalVideoFileSource(handle, folderPath1, color_format, ((video_mode == 1) ? RocalDecodeDevice::ROCAL_HW_DECODE : RocalDecodeDevice::ROCAL_SW_DECODE), shard_count, sequence_length, frame_step, frame_stride, shuffle, true, false);
+        input1 = rocalVideoFileSource(handle, folderPath1, color_format, (decoder_mode == 2)? ROCAL_SW_DECODE: ROCAL_HW_DECODE, shard_count, sequence_length, frame_step, frame_stride, shuffle, true, false);
+    } else if (decoder_mode == 1) {
+            std::vector<float> area = {0.08, 1};
+            std::vector<float> aspect_ratio = {3.0f / 4, 4.0f / 3};
+            input1 = rocalFusedJpegCrop(handle, folderPath1, color_format, shard_count, false, area, aspect_ratio, 10, false, false, ROCAL_USE_USER_GIVEN_SIZE_RESTRICTED, decode_width, decode_height);
+
     } else {
         // The jpeg file loader can automatically select the best size to decode all images to that size
         // User can alternatively set the size or change the policy that is used to automatically find the size
@@ -152,7 +157,7 @@ int main(int argc, const char** argv) {
 
     RocalTensor tensor0;
     int resize_w = 112, resize_h = 112;
-    if (video_mode) {
+    if (decoder_mode >= 2) {
         resize_h = decode_height;
         resize_w = decode_width;
         tensor0 = input1;
@@ -214,7 +219,7 @@ int main(int argc, const char** argv) {
     int w = rocalGetOutputWidth(handle);
     int p = ((color_format == RocalImageColor::ROCAL_COLOR_RGB24) ? 3 : 1);
     std::cout << "output width " << w << " output height " << h << " color planes " << p << std::endl;
-    const unsigned number_of_cols = video_mode ? 1 : 10;
+    const unsigned number_of_cols = (decoder_mode >= 2) ? 1 : 10;
     auto cv_color_format = ((color_format == RocalImageColor::ROCAL_COLOR_RGB24) ? CV_8UC3 : CV_8UC1);
     cv::Mat mat_output(h + AMD_ROCm_Black_resize.rows, w * number_of_cols, cv_color_format);
     cv::Mat mat_input(h, w, cv_color_format);

@@ -9,7 +9,7 @@
 import cupy as cp
 
 seed = 1549361629
-image_dir = "../../../../data/images/AMD-tinyDataSet/"
+image_dir = "../../../data/images/AMD-tinyDataSet/"
 batch_size = 4
 gpu_id = 0
 
@@ -34,13 +34,13 @@ def show_pipeline_output(pipe, device):
     pipe.build()
     data_loader = ROCALClassificationIterator(pipe, device)
     images = next(iter(data_loader))
-    show_images(images[0], device)
+    show_images(images[0][0], device)
 
 @pipeline_def(seed=seed)
 def image_decoder_pipeline(device="cpu", path=image_dir):
-    jpegs, labels = fn.readers.file(file_root=path, shard_id=0, num_shards=1, random_shuffle=False)
+    jpegs, labels = fn.readers.file(file_root=path)
     images = fn.decoders.image(jpegs, file_root=path, device=device, output_type=types.RGB, shard_id=0, num_shards=1, random_shuffle=False)
-    return fn.resize(images, device=device, resize_x=300, resize_y=300)
+    return fn.resize(images, device=device, resize_width=300, resize_height=300)
 
 def main():
     print ('Optional arguments: <cpu/gpu image_folder>')
@@ -52,9 +52,8 @@ def main():
           rocal_device = "gpu"
     if  len(sys.argv) > 2:
       img_folder = sys.argv[2]
-
-    pipe = image_decoder_pipeline(batch_size=bs, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC,
-                                  reverse_channels=True, mean = [0, 0, 0], std=[255, 255, 255], device=rocal_device, path=img_folder)
+    pipe = image_decoder_pipeline(batch_size=bs, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, 
+                                reverse_channels=True, mean = [0, 0, 0], std=[255,255,255], device=rocal_device, path=img_folder)
     show_pipeline_output(pipe, device=rocal_device)
 
 if __name__ == '__main__':

@@ -38,7 +38,7 @@
     "%matplotlib inline\n",
     "\n",
     "seed = 1549361629\n",
-    "image_dir = \"../../../../data/images/AMD-tinyDataSet/\"\n",
+    "image_dir = \"../../../data/images/AMD-tinyDataSet/\"\n",
     "batch_size = 4\n",
     "gpu_id = 0\n",
     "\n",
@@ -61,7 +61,7 @@
     "    pipe.build()\n",
     "    data_loader = ROCALClassificationIterator(pipe, device, device_id)\n",
     "    images = next(iter(data_loader))\n",
-    "    show_images(images[0], device)\n"
+    "    show_images(images[0][0], device)\n"
    ]
   },
   {
@@ -82,9 +82,9 @@
    "source": [
     "@pipeline_def(seed=seed)\n",
     "def image_decoder_pipeline(device=\"cpu\"):\n",
-    "    jpegs, labels = fn.readers.file(file_root=image_dir, shard_id=0, num_shards=1, random_shuffle=False)\n",
+    "    jpegs, labels = fn.readers.file(file_root=image_dir)\n",
     "    images = fn.decoders.image(jpegs, file_root=image_dir, device=device, output_type=types.RGB, shard_id=0, num_shards=1, random_shuffle=False)\n",
-    "    return fn.resize(images, device=device, resize_x=300, resize_y=300)\n",
+    "    return fn.resize(images, device=device, resize_width=300, resize_height=300)\n",
     "\n",
     "pipe = image_decoder_pipeline(batch_size=batch_size, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, \n",
     "                            reverse_channels=True, mean = [0, 0, 0], std=[255,255,255], device=\"cpu\")\n",
@@ -109,12 +109,13 @@
    "source": [
     "@pipeline_def(seed=seed)\n",
     "def image_decoder_random_crop_pipeline(device=\"cpu\"):\n",
-    "    jpegs, labels = fn.readers.file(file_root=image_dir, shard_id=0, num_shards=1, random_shuffle=False)\n",
+    "    jpegs, labels = fn.readers.file(file_root=image_dir)\n",
     "    images = fn.decoders.image_slice(jpegs, file_root=image_dir, \n",
-    "                                     device=device,\n",
     "                                     output_type=types.RGB,\n",
+    "                                     shard_id = 0,\n",
+    "                                     num_shards = 1,\n",
     "                                     random_shuffle=True)\n",
-    "    return fn.resize(images, device=device, resize_x=300, resize_y=300)\n",
+    "    return fn.resize(images, device=device, resize_width=300, resize_height=300)\n",
     "    \n",
     "pipe = image_decoder_random_crop_pipeline(batch_size=batch_size, num_threads=1, device_id=gpu_id, rocal_cpu=True, tensor_layout=types.NHWC, \n",
     "                                          reverse_channels=True, mean=[0,0,0], std = [255,255,255], device=\"cpu\")\n",
@@ -184,7 +185,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.10.12"
   },
   "vscode": {
    "interpreter": {

@@ -31,7 +31,7 @@
 
 
 seed = 1549361629
-image_dir = "../../../../data/images/AMD-tinyDataSet/"
+image_dir = "../../../data/images/AMD-tinyDataSet/"
 batch_size = 4
 gpu_id = 0
 

@@ -333,11 +333,11 @@
         os.system('sudo '+linuxFlag+' '+linuxSystemInstall+' ' +
                   linuxSystemInstall_check+' install lmdb-devel rapidjson-devel')
 
-    # turbo-JPEG - https://github.com/rrawther/libjpeg-turbo.git -- 2.0.6.2
+    # turbo-JPEG - https://github.com/libjpeg-turbo/libjpeg-turbo.git -- 3.0.1
     os.system(
-        '(cd '+deps_dir+'; git clone -b 2.0.6.2 https://github.com/rrawther/libjpeg-turbo.git )')
+        '(cd '+deps_dir+'; git clone -b 3.0.1 https://github.com/libjpeg-turbo/libjpeg-turbo.git )')
     os.system('(cd '+deps_dir+'/libjpeg-turbo; mkdir build; cd build; '+linuxCMake +
-              ' -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib ..; make -j 4; sudo make install )')
+              ' -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=RELEASE -DENABLE_STATIC=FALSE -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib -DWITH_JPEG8=TRUE ..; make -j 4; sudo make install )')
     # RPP
     os.system('sudo -v')
     os.system('(cd '+deps_dir+'; git clone -b '+rppVersion+' https://github.com/GPUOpen-ProfessionalCompute-Libraries/rpp.git; cd rpp; mkdir build-'+backend+'; cd build-'+backend+'; ' +

@@ -42,6 +42,14 @@ find_package(Threads QUIET)
 find_package(LMDB QUIET)	
 find_package(RapidJSON QUIET)
 
+if(DEFINED ENV{ROCM_PATH})
+  set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Default ROCm installation path")
+elseif(ROCM_PATH)
+  message("-- INFO:ROCM_PATH Set -- ${ROCM_PATH}")
+else()
+  set(ROCM_PATH /opt/rocm CACHE PATH "Default ROCm installation path")
+endif()
+
 # HIP Backend
 if(GPU_SUPPORT AND "${BACKEND}" STREQUAL "HIP")
     if(NOT DEFINED HIP_PATH)
@@ -220,6 +228,7 @@ if(${BUILD_ROCAL})
                 include/augmentations/geometry_augmentations/
                 include/decoders/image/
                 include/decoders/video/
+                include/decoders/libjpeg/
                 include/device/
                 include/loaders/
                 include/loaders/image/

@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved.
+Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -64,24 +64,8 @@ class TJDecoder : public Decoder {
 
    private:
     tjhandle m_jpegDecompressor;
-    const static unsigned SCALING_FACTORS_COUNT = 16;
-    const tjscalingfactor SCALING_FACTORS[SCALING_FACTORS_COUNT] = {
-        {2, 1},
-        {15, 8},
-        {7, 4},
-        {13, 8},
-        {3, 2},
-        {11, 8},
-        {5, 4},
-        {9, 8},
-        {1, 1},
-        {7, 8},
-        {3, 4},
-        {5, 8},
-        {1, 2},
-        {3, 8},
-        {1, 4},
-        {1, 8}};
+    tjscalingfactor *_scaling_factors = nullptr;
+    int _num_scaling_factors = 0;
     bool _is_partial_decoder = false;
     std::vector<float> _bbox_coord;
     const static unsigned _max_scaling_factor = 8;

@@ -0,0 +1,75 @@
+/*
+Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+#pragma once
+
+#include <turbojpeg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include "libjpeg_utils.h"
+
+extern "C" {
+
+//! extra apis for rocal to support partial decoding
+
+//! * Helper function to se the source
+//! * This function doesn't scale the decoded image
+
+//! * Decompress a subregion of JPEG image to an RGB, grayscale, or CMYK image.
+//! * This function doesn't scale the decoded image
+
+/*!
+  \param handle  TJPeg handle
+  \param jpegBuf compressed jpeg image buffer
+  \param jpegSize Size of the compressed data provided in the input_buffer
+  \param dstBuf user provided output buffer
+  \param width, pitch, height  width, stride and height of the allocated buffer
+  \param flags  TJPEG flags
+  \param pixelFormat  pixel format of the image
+  \param crop_x_diff,  crop_width_diff Actual crop_x and crop_w (adjusted to MB boundery)
+  \param x1, y1, crop_width, crop_height requested crop window
+*/
+
+int tjDecompress2_partial(tjhandle handle, const unsigned char *jpegBuf,
+                                    unsigned long jpegSize, unsigned char *dstBuf,
+                                    int width, int pitch, int height, int pixelFormat,
+                                    int flags, unsigned int *crop_x_diff, unsigned int *crop_width_diff,
+                                    unsigned int x1, unsigned int y1, unsigned int crop_width, unsigned int crop_height);
+
+
+//! * Decompress a subregion of JPEG image to an RGB, grayscale, or CMYK image.
+//! * This function scale the decoded image to fit the output dims
+/*!
+  \param handle  TJPeg handle
+  \param jpegBuf compressed jpeg image buffer
+  \param jpegSize Size of the compressed data provided in the input_buffer
+  \param dstBuf user provided output buffer
+  \param width, pitch, height  width, stride and height of the allocated buffer
+  \param flags  TJPEG flags
+  \param crop_width, crop_height requested crop window
+*/
+
+int tjDecompress2_partial_scale(tjhandle handle, const unsigned char *jpegBuf,
+                            unsigned long jpegSize, unsigned char *dstBuf,
+                            int width, int pitch, int height, int pixelFormat,
+                            int flags, unsigned int crop_width, unsigned int crop_height);
+}
@@ -0,0 +1,30 @@
+/*
+Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+#pragma once
+
+//! turbojpeg includes
+
+extern "C" {
+#include "jerror.h"  
+#include "jpeglib.h" 
+#include "jpegint.h"
+}
@@ -33,14 +33,6 @@ THE SOFTWARE.
 #include "timing_debug.h"
 #include "turbo_jpeg_decoder.h"
 
-/**
- * Compute the scaled value of <tt>dimension</tt> using the given scaling
- * factor.  This macro performs the integer equivalent of <tt>ceil(dimension *
- * scalingFactor)</tt>.
- */
-#define TJSCALED(dimension, scalingFactor)                       \
-    ((dimension * scalingFactor.num + scalingFactor.denom - 1) / \
-     scalingFactor.denom)
 
 class ImageReadAndDecode {
    public: