Skip to content

Commit

Permalink
[GPU/OpenCL] Initial version of Transpose function about different ax…
Browse files Browse the repository at this point in the history
…es with OpenCL ops

Added naive version of OpenCL implementation for Transpose function using blas
Incorporated kernels for ops used.
Added unit tests for transpose about different axes.

Signed-off-by: Niket Agarwal <[email protected]>
  • Loading branch information
niket-agarwal committed Aug 29, 2024
1 parent 0c9c6d7 commit 0c7523e
Show file tree
Hide file tree
Showing 17 changed files with 1,226 additions and 36 deletions.
6 changes: 3 additions & 3 deletions Applications/LLaMA/jni/transpose_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,13 @@ class TransposeLayer final : public nntrainer::Layer {
/**
* @copydoc bool supportBackwarding() const
*/
bool supportBackwarding() const override { return true; };
bool supportBackwarding() const override { return false; };

/**
* @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method)
*/
void exportTo(nntrainer::Exporter &exporter,
const ml::train::ExportMethods &method) const override{};
const ml::train::ExportMethods &method) const override {};

/**
* @copydoc Layer::getType()
Expand All @@ -74,7 +74,7 @@ class TransposeLayer final : public nntrainer::Layer {
/**
* @copydoc Layer::setProperty(const std::vector<std::string> &values)
*/
void setProperty(const std::vector<std::string> &values) override{};
void setProperty(const std::vector<std::string> &values) override {};

inline static const std::string type = "transpose";
};
Expand Down
14 changes: 12 additions & 2 deletions api/ccapi/include/layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,9 @@ enum LayerType {
LAYER_LOSS_CONSTANT_DERIVATIVE, /**< Synthetic loss layer to feed constant
derivative */
LAYER_UPSAMPLE2D, /**< Upsample 2D Layer type */
LAYER_RMSNORM = ML_TRAIN_LAYER_TYPE_RMSNORM, /**<RMS NORM Layer */
LAYER_UNKNOWN = ML_TRAIN_LAYER_TYPE_UNKNOWN /**< Unknown */
LAYER_RMSNORM = ML_TRAIN_LAYER_TYPE_RMSNORM, /**<RMS NORM Layer */
LAYER_TRANSPOSE = ML_TRAIN_LAYER_TYPE_TRANSPOSE, /**< Transpose Layer type */
LAYER_UNKNOWN = ML_TRAIN_LAYER_TYPE_UNKNOWN /**< Unknown */
};

/**
Expand Down Expand Up @@ -326,6 +327,15 @@ RMSNormCl(const std::vector<std::string> &properties = {},
return createLayer(LayerType::LAYER_RMSNORM, properties, compute_engine);
}

/**
* @brief Helper function to create Transpose layer
*/
inline std::unique_ptr<Layer>
Transpose(const std::vector<std::string> &properties = {},
const LayerComputeEngine &compute_engine = LayerComputeEngine::CPU) {
return createLayer(LayerType::LAYER_TRANSPOSE, properties, compute_engine);
}

/**
* @brief Helper function to create batch normalization layer
*/
Expand Down
7 changes: 4 additions & 3 deletions api/nntrainer-api-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,10 @@ typedef enum {
27, /**< Layer Normalization Layer type (Since 7.0) */
ML_TRAIN_LAYER_TYPE_POSITIONAL_ENCODING =
28, /**< Positional Encoding Layer type (Since 7.0) */
ML_TRAIN_LAYER_TYPE_IDENTITY = 29, /**< Identity Layer type (Since 8.0) */
ML_TRAIN_LAYER_TYPE_SWIGLU = 30, /**< Swiglu Layer type */
ML_TRAIN_LAYER_TYPE_WEIGHT = 31, /**< Weight Layer type (Since 9.0)*/
ML_TRAIN_LAYER_TYPE_IDENTITY = 29, /**< Identity Layer type (Since 8.0) */
ML_TRAIN_LAYER_TYPE_SWIGLU = 30, /**< Swiglu Layer type */
ML_TRAIN_LAYER_TYPE_WEIGHT = 31, /**< Weight Layer type (Since 9.0)*/
ML_TRAIN_LAYER_TYPE_TRANSPOSE = 32, /**< Transpose Layer type */
ML_TRAIN_LAYER_TYPE_PREPROCESS_FLIP =
300, /**< Preprocess flip Layer (Since 6.5) */
ML_TRAIN_LAYER_TYPE_PREPROCESS_TRANSLATE =
Expand Down
5 changes: 5 additions & 0 deletions nntrainer/cl_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <reshape_cl.h>
#include <rmsnorm_layer_cl.h>
#include <swiglu_cl.h>
#include <transpose_cl.h>

namespace nntrainer {

Expand Down Expand Up @@ -49,6 +50,10 @@ static void add_default_object(ClContext &cc) {

cc.registerFactory(nntrainer::createLayer<ConcatLayerCl>, ConcatLayerCl::type,
ml::train::LayerType::LAYER_CONCAT);

cc.registerFactory(nntrainer::createLayer<TransposeLayerCl>,
TransposeLayerCl::type,
ml::train::LayerType::LAYER_TRANSPOSE);
}

static void registerer(ClContext &cc) noexcept {
Expand Down
1 change: 1 addition & 0 deletions nntrainer/layers/cl_layers/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ cl_layer_sources = [
'reshape_cl.cpp',
'rmsnorm_layer_cl.cpp',
'concat_cl.cpp',
'transpose_cl.cpp',
]

foreach s : cl_layer_sources
Expand Down
91 changes: 91 additions & 0 deletions nntrainer/layers/cl_layers/transpose_cl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (C) 2024 Niket Agarwal <[email protected]>
*
* @file transpose_cl.cpp
* @date 31 July 2024
* @brief Implementation of transpose layer
* @see https://github.com/nnstreamer/nntrainer
* @author Niket Agarwal <[email protected]>
* @bug No known bugs except for NYI items
*
*/

#include "transpose_cl.h"
#include <blas_kernel_interface.h>
#include <iostream>
#include <layer_context.h>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
#include <node_exporter.h>

namespace nntrainer {

static constexpr size_t SINGLE_INOUT_IDX = 0;

void TransposeLayerCl::finalize(InitLayerContext &context) {
std::vector<TensorDim> dim = context.getInputDimensions();

for (unsigned int i = 0; i < dim.size(); ++i) {
if (dim[i].getDataLen() == 0) {
throw std::invalid_argument("Input dimension is not set");
} else {
dim[i].channel(dim[i].channel());
dim[i].height(dim[i].height());
dim[i].width(dim[i].width());
}
}

context.setOutputDimensions(dim);
}

void TransposeLayerCl::forwarding(RunLayerContext &context, bool training) {
Tensor &in = context.getInput(SINGLE_INOUT_IDX);
Tensor &out = context.getOutput(SINGLE_INOUT_IDX);
Transpose_i_cl("1:0:2", in, out, context);
}

void TransposeLayerCl::incremental_forwarding(RunLayerContext &context,
unsigned int from,
unsigned int to, bool training) {
Tensor &in = context.getInput(SINGLE_INOUT_IDX);
Tensor &out = context.getOutput(SINGLE_INOUT_IDX);
if (from) {
NNTR_THROW_IF(to - from != 1, std::invalid_argument)
<< "incremental step size is not 1";
from = 0;
to = 1;
}
Transpose_i_cl("1:0:2", in, out, context);
}

void TransposeLayerCl::calcDerivative(RunLayerContext &context) {
std::throw_with_nested(std::runtime_error("Training is not supported yet."));
}

void TransposeLayerCl::setProperty(const std::vector<std::string> &values) {
auto remain_props = loadProperties(values, transpose_props);
if (!remain_props.empty()) {
std::string msg = "[TransposeLayerCl] Unknown Layer Properties count " +
std::to_string(values.size());
throw exception::not_supported(msg);
}
}

#ifdef PLUGGABLE

Layer *create_transpose_layer_cl() {
auto layer = new TransposeLayerCl();
return layer;
}

void destroy_transpose_layer_cl(Layer *layer) { delete layer; }

extern "C" {
LayerPluggable ml_train_layer_pluggable{create_transpose_layer_cl,
destroy_transpose_layer_cl};
}

#endif

} // namespace nntrainer
105 changes: 105 additions & 0 deletions nntrainer/layers/cl_layers/transpose_cl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// SPDX-License-Identifier: Apache-2.0
/**
* Copyright (C) 2024 Niket Agarwal <[email protected]>
*
* @file transpose_cl.h
* @date 31 July 2024
* @brief Implementation of transpose layer
* @see https://github.com/nnstreamer/nntrainer
* @author Niket Agarwal <[email protected]>
* @bug No known bugs except for NYI items
*
*/

#ifndef __TRANSPOSE_LAYER_CL_H__
#define __TRANSPOSE_LAYER_CL_H__

#include <common_properties.h>
#include <layer_devel.h>
#include <opencl_buffer.h>
#include <opencl_kernel.h>

#define CREATE_IF_EMPTY_DIMS(tensor, ...) \
do { \
if (tensor.empty()) \
tensor = Tensor(__VA_ARGS__); \
} while (0);

namespace nntrainer {

/**
* @brief A tranpose layer.
*
*/
class TransposeLayerCl final : public Layer {
public:
/**
* @brief Construct a new transpose layer object
*
*/
TransposeLayerCl() : Layer(), transpose_props(props::Print()) {}

/**
* @brief Destroy the transpose layer object
*
*/
~TransposeLayerCl() {}

/**
* @copydoc Layer::finalize(InitLayerContext &context)
*/
void finalize(InitLayerContext &context) override;

/**
* @copydoc Layer::forwarding(RunLayerContext &context, bool training)
*/
void forwarding(RunLayerContext &context, bool training) override;

/**
* @copydoc Layer::incremental_forwarding(RunLayerContext &context, unsigned
* int from, unsigned int to, bool training)
*/
void incremental_forwarding(RunLayerContext &context, unsigned int from,
unsigned int to, bool training) override;

/**
* @copydoc Layer::calcDerivative(RunLayerContext &context)
*/
void calcDerivative(RunLayerContext &context) override;

/**
* @copydoc bool supportBackwarding() const
*/
bool supportBackwarding() const override { return true; };

/**
* @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method)
*/
void exportTo(Exporter &exporter,
const ml::train::ExportMethods &method) const override {};

/**
* @copydoc Layer::getType()
*/
const std::string getType() const override { return TransposeLayerCl::type; };

/**
* @copydoc Layer::setProperty(const std::vector<std::string> &values)
*/
void setProperty(const std::vector<std::string> &values) override;

inline static const std::string type = "transpose";

static opencl::Kernel kernel_transpose_axis0;
static opencl::Kernel kernel_transpose_fp16_axis0;
static opencl::Kernel kernel_transpose_axis1;
static opencl::Kernel kernel_transpose_fp16_axis1;
static opencl::Kernel kernel_transpose_axis2;
static opencl::Kernel kernel_transpose_fp16_axis2;

std::tuple<props::Print> transpose_props; /**< transpose layer properties :
unit - number of output neurons */
};
} // namespace nntrainer

#endif /* __TRANSPOSE_LAYER_CL_H__ */
12 changes: 12 additions & 0 deletions nntrainer/layers/layer_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,18 @@ std::string RunLayerContext::getKernelName(LayerKernel layerKernel) {
return "concat_cl_axis1";
case LayerKernel::CONCAT_AXIS1_FP16:
return "concat_cl_axis1_fp16";
case LayerKernel::TRANSPOSE_AXIS0:
return "transpose_cl_axis0";
case LayerKernel::TRANSPOSE_FP16_AXIS0:
return "transpose_cl_fp16_axis0";
case LayerKernel::TRANSPOSE_AXIS1:
return "transpose_cl_axis1";
case LayerKernel::TRANSPOSE_FP16_AXIS1:
return "transpose_cl_fp16_axis1";
case LayerKernel::TRANSPOSE_AXIS2:
return "transpose_cl_axis2";
case LayerKernel::TRANSPOSE_FP16_AXIS2:
return "transpose_cl_fp16_axis2";
default:
return "";
}
Expand Down
62 changes: 34 additions & 28 deletions nntrainer/layers/layer_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -829,34 +829,40 @@ class RunLayerContext {
* getKernelName function.
*/
enum LayerKernel {
SGEMV = 1ull << 0, /**< placeholder for kernel name */
SGEMV_FP16 = 1ull << 1, /**< placeholder for kernel name */
DOT = 1ull << 2, /**< placeholder for kernel name */
DOT_FP16 = 1ull << 3, /**< placeholder for kernel name */
SGEMM_NOTRANS = 1ull << 4, /**< placeholder for kernel name */
SGEMM_NOTRANS_FP16 = 1ull << 5, /**< placeholder for kernel name */
SGEMM_TRANSA = 1ull << 6, /**< placeholder for kernel name */
SGEMM_TRANSA_FP16 = 1ull << 7, /**< placeholder for kernel name */
SGEMM_TRANSB = 1ull << 8, /**< placeholder for kernel name */
SGEMM_TRANSB_FP16 = 1ull << 9, /**< placeholder for kernel name */
SGEMM_TRANSAB = 1ull << 10, /**< placeholder for kernel name */
SGEMM_TRANSAB_FP16 = 1ull << 11, /**< placeholder for kernel name */
ADD = 1ull << 12, /**< placeholder for kernel name */
ADD_FP16 = 1ull << 13, /**< placeholder for kernel name */
SWIGLU = 1ull << 14, /**< placeholder for kernel name */
SWIGLU_FP16 = 1ull << 15, /**< placeholder for kernel name */
SSCAL = 1ull << 16, /**< placeholder for kernel name */
SSCAL_FP16 = 1ull << 17, /**< placeholder for kernel name */
COPY = 1ull << 18, /**< placeholder for kernel name */
COPY_FP16 = 1ull << 19, /**< placeholder for kernel name */
RMSNORM = 1ull << 20, /**< placeholder for kernel name */
RMSNORM_FP16 = 1ull << 21, /**< placeholder for kernel name */
CONCAT_AXIS3 = 1ull << 22, /**< placeholder for kernel name */
CONCAT_AXIS3_FP16 = 1ull << 23, /**< placeholder for kernel name */
CONCAT_AXIS2 = 1ull << 24, /**< placeholder for kernel name */
CONCAT_AXIS2_FP16 = 1ull << 25, /**< placeholder for kernel name */
CONCAT_AXIS1 = 1ull << 26, /**< placeholder for kernel name */
CONCAT_AXIS1_FP16 = 1ull << 27, /**< placeholder for kernel name */
SGEMV = 1ull << 0, /**< placeholder for kernel name */
SGEMV_FP16 = 1ull << 1, /**< placeholder for kernel name */
DOT = 1ull << 2, /**< placeholder for kernel name */
DOT_FP16 = 1ull << 3, /**< placeholder for kernel name */
SGEMM_NOTRANS = 1ull << 4, /**< placeholder for kernel name */
SGEMM_NOTRANS_FP16 = 1ull << 5, /**< placeholder for kernel name */
SGEMM_TRANSA = 1ull << 6, /**< placeholder for kernel name */
SGEMM_TRANSA_FP16 = 1ull << 7, /**< placeholder for kernel name */
SGEMM_TRANSB = 1ull << 8, /**< placeholder for kernel name */
SGEMM_TRANSB_FP16 = 1ull << 9, /**< placeholder for kernel name */
SGEMM_TRANSAB = 1ull << 10, /**< placeholder for kernel name */
SGEMM_TRANSAB_FP16 = 1ull << 11, /**< placeholder for kernel name */
ADD = 1ull << 12, /**< placeholder for kernel name */
ADD_FP16 = 1ull << 13, /**< placeholder for kernel name */
SWIGLU = 1ull << 14, /**< placeholder for kernel name */
SWIGLU_FP16 = 1ull << 15, /**< placeholder for kernel name */
SSCAL = 1ull << 16, /**< placeholder for kernel name */
SSCAL_FP16 = 1ull << 17, /**< placeholder for kernel name */
COPY = 1ull << 18, /**< placeholder for kernel name */
COPY_FP16 = 1ull << 19, /**< placeholder for kernel name */
RMSNORM = 1ull << 20, /**< placeholder for kernel name */
RMSNORM_FP16 = 1ull << 21, /**< placeholder for kernel name */
CONCAT_AXIS3 = 1ull << 22, /**< placeholder for kernel name */
CONCAT_AXIS3_FP16 = 1ull << 23, /**< placeholder for kernel name */
CONCAT_AXIS2 = 1ull << 24, /**< placeholder for kernel name */
CONCAT_AXIS2_FP16 = 1ull << 25, /**< placeholder for kernel name */
CONCAT_AXIS1 = 1ull << 26, /**< placeholder for kernel name */
CONCAT_AXIS1_FP16 = 1ull << 27, /**< placeholder for kernel name */
TRANSPOSE_AXIS0 = 1ull << 28, /**< placeholder for kernel name */
TRANSPOSE_FP16_AXIS0 = 1ull << 29, /**< placeholder for kernel name */
TRANSPOSE_AXIS1 = 1ull << 30, /**< placeholder for kernel name */
TRANSPOSE_FP16_AXIS1 = 1ull << 31, /**< placeholder for kernel name */
TRANSPOSE_AXIS2 = 1ull << 32, /**< placeholder for kernel name */
TRANSPOSE_FP16_AXIS2 = 1ull << 33 /**< placeholder for kernel name */
};

/**
Expand Down
Loading

0 comments on commit 0c7523e

Please sign in to comment.