Skip to content

Commit

Permalink
[GPU] Global Buffer manager and optimization
Browse files Browse the repository at this point in the history
Implemented global Buffers
Optimized pipeline due to reduced buffer creation steps
Modifed command queue and Buffer wrappers

Signed-off-by: Debadri Samaddar <[email protected]>
  • Loading branch information
s-debadri committed Dec 4, 2024
1 parent 0d74457 commit 3d44c5f
Show file tree
Hide file tree
Showing 11 changed files with 319 additions and 18 deletions.
41 changes: 41 additions & 0 deletions nntrainer/cl_buffer_manager.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (C) 2024 Debadri Samaddar <[email protected]>
*
* @file cl_buffer_manager.cpp
* @date 01 Dec 2024
* @see https://github.com/nnstreamer/nntrainer
* @author Debadri Samaddar <[email protected]>
* @bug No known bugs except for NYI items
* @brief This file contains global Buffer objects and manages them
*/

#include <cl_buffer_manager.h>

namespace nntrainer {

ClBufferManager &ClBufferManager::getInstance() {
static ClBufferManager instance;
return instance;
}

// to-do: Implementation to be updated with array of Buffer objects if required
// fp16 Buffer objects to be added in future
ClBufferManager::ClBufferManager() {
readBufferA = new opencl::Buffer(context_inst_, buffer_size_bytes, true);
readBufferB = new opencl::Buffer(context_inst_, buffer_size_bytes, true);
readBufferC = new opencl::Buffer(context_inst_, buffer_size_bytes, true);
writeBufferA = new opencl::Buffer(context_inst_, buffer_size_bytes, false);
writeBufferB = new opencl::Buffer(context_inst_, buffer_size_bytes, false);
ml_logi("ClBufferManager: Buffers initialized");
}

ClBufferManager::~ClBufferManager() {
delete readBufferA;
delete readBufferB;
delete readBufferC;
delete writeBufferA;
delete writeBufferB;
}

} // namespace nntrainer
72 changes: 72 additions & 0 deletions nntrainer/cl_buffer_manager.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (C) 2024 Debadri Samaddar <[email protected]>
*
* @file cl_buffer_manager.h
* @date 01 Dec 2024
* @see https://github.com/nnstreamer/nntrainer
* @author Debadri Samaddar <[email protected]>
* @bug No known bugs except for NYI items
* @brief This file contains global Buffer objects and manages them
*/

#ifndef __CL_BUFFER_MANAGER_H__
#define __CL_BUFFER_MANAGER_H__

#include <string>

#include <opencl_buffer.h>
#include <opencl_context_manager.h>

#include <nntrainer_log.h>

namespace nntrainer {

/**
* @class ClBufferManager contains Buffer object management
* @brief Support for Buffer management
*/

class ClBufferManager {

private:
/**
* @brief Private constructor to prevent object creation
*
*/
ClBufferManager();

/**
* @brief OpenCl context global instance
*
*/
opencl::ContextManager &context_inst_ = opencl::ContextManager::GetInstance();

/**
* @brief Buffer size in bytes preset (256 mebibytes)
*/
size_t buffer_size_bytes = 8192 * 8192 * sizeof(float);

public:
/**
* @brief Get Global ClBufferManager.
*
* @return ClBufferManager&
*/
static ClBufferManager &getInstance();

opencl::Buffer *readBufferA;
opencl::Buffer *readBufferB;
opencl::Buffer *readBufferC;
opencl::Buffer *writeBufferA;
opencl::Buffer *writeBufferB;

/**
* @brief Destroy Buffer pointers.
*
*/
~ClBufferManager();
};
} // namespace nntrainer

#endif /* __CL_BUFFER_MANAGER_H__ */
2 changes: 2 additions & 0 deletions nntrainer/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ nntrainer_common_sources = [

if get_option('enable-opencl')
nntrainer_headers += meson.current_source_dir() / 'cl_context.h'
nntrainer_headers += meson.current_source_dir() / 'cl_buffer_manager.h'
nntrainer_common_sources += 'cl_context.cpp'
nntrainer_common_sources += 'cl_buffer_manager.cpp'
endif

foreach s : nntrainer_common_sources
Expand Down
30 changes: 29 additions & 1 deletion nntrainer/opencl/opencl_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ namespace nntrainer::opencl {
* @param read_only flag
* @param data data for the buffer
*/
Buffer::Buffer(ContextManager &context_manager, int size_in_bytes,
Buffer::Buffer(ContextManager &context_manager, size_t size_in_bytes,
bool read_only, void *data) {
cl_context context = context_manager.GetContext();
cl_mem_flags flags = read_only ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
Expand Down Expand Up @@ -94,6 +94,20 @@ bool Buffer::WriteData(CommandQueueManager &command_queue_inst,
return command_queue_inst.EnqueueWriteBuffer(mem_buf_, size_, data);
}

bool Buffer::WriteDataRegion(CommandQueueManager &command_queue_inst,
size_t size_in_bytes, const void *data,
size_t host_origin_offset,
size_t buffer_origin_offset) {
if (size_in_bytes > size_) {
ml_loge("Failed to write buffer region. Region size(%lu bytes) greater "
"than buffer size(%lu bytes).",
size_in_bytes, size_);
return false;
}
return command_queue_inst.EnqueueWriteBufferRegion(
mem_buf_, size_in_bytes, data, host_origin_offset, buffer_origin_offset);
}

/**
* @brief reading data from the buffer
*
Expand All @@ -105,6 +119,20 @@ bool Buffer::ReadData(CommandQueueManager &command_queue_inst, void *data) {
return command_queue_inst.EnqueueReadBuffer(mem_buf_, size_, data);
}

bool Buffer::ReadDataRegion(CommandQueueManager &command_queue_inst,
size_t size_in_bytes, void *data,
size_t host_origin_offset,
size_t buffer_origin_offset) {
if (size_in_bytes > size_) {
ml_loge("Failed to read from buffer region. Region size(%lu bytes) greater "
"than buffer size(%lu bytes).",
size_in_bytes, size_);
return false;
}
return command_queue_inst.EnqueueReadBufferRegion(
mem_buf_, size_in_bytes, data, host_origin_offset, buffer_origin_offset);
}

void *Buffer::MapBuffer(CommandQueueManager &command_queue_inst,
size_t offset_in_bytes, size_t size_in_bytes,
bool read_only, bool async) {
Expand Down
34 changes: 32 additions & 2 deletions nntrainer/opencl/opencl_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ class Buffer {
* @param read_only flag
* @param data data for the buffer
*/
Buffer(ContextManager &context_manager, int size_in_bytes, bool read_only,
void *data);
Buffer(ContextManager &context_manager, size_t size_in_bytes, bool read_only,
void *data = nullptr);

/**
* @brief Move constructor for buffer by deleting the previous buffer
Expand Down Expand Up @@ -106,6 +106,21 @@ class Buffer {
*/
bool WriteData(CommandQueueManager &command_queue_inst, const void *data);

/**
* @brief writing data to a buffer region
*
* @param command_queue_inst reference of command queue instance
* @param size_in_bytes size of region
* @param data pointer of region
* @param host_origin_offset offset in the host memory region
* @param buffer_origin_offset offset in the buffer memory region
* @return true if successful write or false otherwise
*/
bool WriteDataRegion(CommandQueueManager &command_queue_inst,
size_t size_in_bytes, const void *data,
size_t host_origin_offset = 0,
size_t buffer_origin_offset = 0);

/**
* @brief reading data from the buffer
*
Expand All @@ -115,6 +130,21 @@ class Buffer {
*/
bool ReadData(CommandQueueManager &command_queue_inst, void *data);

/**
* @brief Reading data from a buffer region
*
* @param command_queue_inst reference of command queue instance
* @param size_in_bytes size of region
* @param data pointer of region
* @param host_origin_offset offset in the host memory region
* @param buffer_origin_offset offset in the buffer memory region
* @return true if successful write or false otherwise
*/
bool ReadDataRegion(CommandQueueManager &command_queue_inst,
size_t size_in_bytes, void *data,
size_t host_origin_offset = 0,
size_t buffer_origin_offset = 0);

/**
* @brief Mapping buffer to host memory
*
Expand Down
71 changes: 71 additions & 0 deletions nntrainer/opencl/opencl_command_queue_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,41 @@ bool CommandQueueManager::EnqueueReadBuffer(cl_mem buffer, size_t size_in_bytes,
return true;
}

bool CommandQueueManager::EnqueueReadBufferRegion(
cl_mem buffer, size_t size_in_bytes, void *data, size_t host_origin_offset,
size_t buffer_origin_offset, bool async) {

// managing synchronization
const cl_bool blocking = async ? CL_FALSE : CL_TRUE;

// (x, y, z) offset in the memory region associated with buffer
const size_t buffer_origin[] = {buffer_origin_offset, 0, 0};
// (x, y, z) offset in the memory region associated with host
const size_t host_origin[] = {host_origin_offset, 0, 0};
// region defines the (width in bytes, height in rows, depth in slices)
const size_t region[] = {size_in_bytes, 1, 1};
// length of each row in bytes
size_t row_pitch = region[0];
// length of each 2D slice in bytes
size_t slice_pitch = region[0] * region[1];

// Buffer and host data are interpreted as 1D in this case
// hence row and slice pitch are same for both
cl_int error_code = clEnqueueReadBufferRect(
command_queue_, buffer, blocking, buffer_origin, host_origin, region,
row_pitch, slice_pitch, row_pitch, slice_pitch, data, 0, nullptr, nullptr);

if (error_code != CL_SUCCESS) {
ml_loge("Failed to write data region to GPU (clEnqueueWriteBufferRect). "
"OpenCL error "
"code: %d",
error_code);
return false;
}

return true;
}

/**
* @brief Writing buffer object. Used from Buffer class
*
Expand All @@ -150,6 +185,7 @@ bool CommandQueueManager::EnqueueWriteBuffer(cl_mem buffer,
auto error_code =
clEnqueueWriteBuffer(command_queue_, buffer, blocking, 0, size_in_bytes,
data, 0, nullptr, nullptr);

if (error_code != CL_SUCCESS) {
ml_loge("Failed to upload data to GPU (clEnqueueWriteBuffer). OpenCL error "
"code: %d",
Expand All @@ -160,6 +196,41 @@ bool CommandQueueManager::EnqueueWriteBuffer(cl_mem buffer,
return true;
}

bool CommandQueueManager::EnqueueWriteBufferRegion(
cl_mem buffer, size_t size_in_bytes, const void *data,
size_t host_origin_offset, size_t buffer_origin_offset, bool async) {

// managing synchronization
const cl_bool blocking = async ? CL_FALSE : CL_TRUE;

// (x, y, z) offset in the memory region associated with buffer
const size_t buffer_origin[] = {buffer_origin_offset, 0, 0};
// (x, y, z) offset in the memory region associated with host
const size_t host_origin[] = {host_origin_offset, 0, 0};
// region defines the (width in bytes, height in rows, depth in slices)
const size_t region[] = {size_in_bytes, 1, 1};
// length of each row in bytes
size_t row_pitch = region[0];
// length of each 2D slice in bytes
size_t slice_pitch = region[0] * region[1];

// Buffer and host data are interpreted as 1D in this case
// hence row and slice pitch are same for both
cl_int error_code = clEnqueueWriteBufferRect(
command_queue_, buffer, blocking, buffer_origin, host_origin, region,
row_pitch, slice_pitch, row_pitch, slice_pitch, data, 0, nullptr, nullptr);

if (error_code != CL_SUCCESS) {
ml_loge("Failed to write data region to GPU (clEnqueueWriteBufferRect). "
"OpenCL error "
"code: %d",
error_code);
return false;
}

return true;
}

/**
* @brief Mapping a region of a buffer object into the host address space
*
Expand Down
30 changes: 30 additions & 0 deletions nntrainer/opencl/opencl_command_queue_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,22 @@ class CommandQueueManager {
bool EnqueueReadBuffer(cl_mem buffer, size_t size_in_bytes, void *data,
bool async = false);

/**
* @brief Reading 1D region from a buffer object. Used from Buffer class
*
* @param buffer cl_mem buffer object
* @param size_in_bytes size of data region
* @param data pointer for the region
* @param host_origin_offset offset in the host memory region
* @param buffer_origin_offset offset in the buffer memory region
* @param async flag for asynchronous operation
* @return true if reading is successful or false otherwise
*/
bool EnqueueReadBufferRegion(cl_mem buffer, size_t size_in_bytes, void *data,
size_t host_origin_offset = 0,
size_t buffer_origin_offset = 0,
bool async = false);

/**
* @brief Writing buffer object. Used from Buffer class
*
Expand All @@ -85,6 +101,20 @@ class CommandQueueManager {
bool EnqueueWriteBuffer(cl_mem buffer, size_t size_in_bytes, const void *data,
bool async = false);

/**
* @brief Writing 1D region of a buffer object. Used from Buffer class
*
* @param buffer cl_mem buffer object
* @param size_in_bytes size of data region
* @param data pointer for the region
* @param origin_offset offset in the memory region
* @param async flag for asynchronous operation
* @return true if writing is successful or false otherwise
*/
bool EnqueueWriteBufferRegion(cl_mem buffer, size_t size_in_bytes,
const void *data, size_t host_origin_offset = 0,
size_t buffer_origin_offset = 0,
bool async = false);
/**
* @brief Mapping a region of a buffer object into the host address space
*
Expand Down
4 changes: 4 additions & 0 deletions nntrainer/opencl/opencl_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ void LoadOpenCLFunctions(void *libopencl) {
LoadFunction(clEnqueueReadBuffer);
LoadFunction(clEnqueueMapBuffer);
LoadFunction(clEnqueueUnmapMemObject);
LoadFunction(clEnqueueWriteBufferRect);
LoadFunction(clEnqueueReadBufferRect);
LoadFunction(clCreateProgramWithSource);
LoadFunction(clCreateProgramWithBinary);
LoadFunction(clBuildProgram);
Expand Down Expand Up @@ -102,6 +104,8 @@ PFN_clEnqueueWriteBuffer clEnqueueWriteBuffer;
PFN_clEnqueueReadBuffer clEnqueueReadBuffer;
PFN_clEnqueueMapBuffer clEnqueueMapBuffer;
PFN_clEnqueueUnmapMemObject clEnqueueUnmapMemObject;
PFN_clEnqueueWriteBufferRect clEnqueueWriteBufferRect;
PFN_clEnqueueReadBufferRect clEnqueueReadBufferRect;
PFN_clCreateProgramWithSource clCreateProgramWithSource;
PFN_clCreateProgramWithBinary clCreateProgramWithBinary;
PFN_clBuildProgram clBuildProgram;
Expand Down
Loading

0 comments on commit 3d44c5f

Please sign in to comment.