nnstreamer · jijoongmoon · Jul 4, 2024 · Jul 3, 2024 · djeong20 · Jul 4, 2024
@@ -12,7 +12,7 @@
  */
 
 #include <addition_layer_cl.h>
-#include <blas_kernels.h>
+#include <blas_kernel_interface.h>
 #include <nntrainer_error.h>
 #include <nntrainer_log.h>
 #include <node_exporter.h>
@@ -37,47 +37,11 @@ void AdditionLayerCL::forwarding(RunLayerContext &context, bool training) {
     if (!idx) {
       hidden_.copy(input_);
     } else {
-      AddProcess(input_, hidden_, context);
+      add_i_cl(input_, hidden_, context);
     }
   }
 }
 
-void AdditionLayerCL::AddProcess(Tensor const &input, Tensor &result,
-                                 RunLayerContext &context) {
-
-  CREATE_IF_EMPTY_DIMS(result, result.getDim());
-
-  NNTR_THROW_IF(result.getData() == nullptr, std::invalid_argument)
-    << result.getName() << " is not allocated";
-  NNTR_THROW_IF(input.getData() == nullptr, std::invalid_argument)
-    << input.getName() << " is not allocated";
-
-  if (input.getDim() != result.getDim()) {
-    throw std::invalid_argument(
-      "Error: Dimensions does not match for addition");
-  }
-
-  if (input.getDataType() == ml::train::TensorDim::DataType::FP32) {
-    unsigned int size = input.size();
-    const float *data = input.getData();
-    float *rdata = result.getData();
-
-    addition_cl(data, rdata, size, context);
-
-  } else if (input.getDataType() == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
-    unsigned int size = input.size();
-    const _FP16 *data = input.getData<_FP16>();
-    _FP16 *rdata = result.getData<_FP16>();
-
-    addition_cl(data, rdata, size, context);
-
-#else
-    throw std::invalid_argument("Error: enable-fp16 is not enabled");
-#endif
-  }
-}
-
 void AdditionLayerCL::incremental_forwarding(RunLayerContext &context,
                                              unsigned int from, unsigned int to,
                                              bool training) {
@@ -113,7 +77,7 @@ void AdditionLayerCL::incremental_forwarding(RunLayerContext &context,
       if (!idx) {
         hidden_step.copy(input_step);
       } else {
-        AddProcess(input_step, hidden_step, context);
+        add_i_cl(input_step, hidden_step, context);
       }
     }
   }

@@ -76,15 +76,6 @@ class AdditionLayerCL : public Layer {
    */
   void calcDerivative(RunLayerContext &context) override;
 
-  /**
-   * @brief Process data and dimensions for add operation used in addition layer
-   * @param[in] input Tensor
-   * @param[in] result Tensor
-   * @param[in] RunLayerContext reference
-   */
-  void AddProcess(Tensor const &input, Tensor &result,
-                  RunLayerContext &context);
-
   /**
    * @copydoc bool supportBackwarding() const
    */

@@ -211,4 +211,39 @@ void multiplyCl(Tensor &input, float const &value, RunLayerContext &context) {
   }
 }
 
+void add_i_cl(Tensor const &input, Tensor &result, RunLayerContext &context) {
+
+  CREATE_IF_EMPTY_DIMS(result, result.getDim());
+
+  NNTR_THROW_IF(result.getData() == nullptr, std::invalid_argument)
+    << result.getName() << " is not allocated";
+  NNTR_THROW_IF(input.getData() == nullptr, std::invalid_argument)
+    << input.getName() << " is not allocated";
+
+  if (input.getDim() != result.getDim()) {
+    throw std::invalid_argument(
+      "Error: Dimensions does not match for addition");
+  }
+
+  if (input.getDataType() == ml::train::TensorDim::DataType::FP32) {
+    unsigned int size = input.size();
+    const float *data = input.getData();
+    float *rdata = result.getData();
+
+    addition_cl(data, rdata, size, context);
+
+  } else if (input.getDataType() == ml::train::TensorDim::DataType::FP16) {
+#ifdef ENABLE_FP16
+    unsigned int size = input.size();
+    const _FP16 *data = input.getData<_FP16>();
+    _FP16 *rdata = result.getData<_FP16>();
+
+    addition_cl(data, rdata, size, context);
+
+#else
+    throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
+  }
+}
+
 } // namespace nntrainer
@@ -63,5 +63,13 @@ void dotBatchedCl(Tensor const &input, Tensor const &m, Tensor &result,
  */
 void multiplyCl(Tensor &input, float const &value, RunLayerContext &context);
 
+/**
+ * @brief Process data and dimensions for add operation
+ * @param[in] input Tensor
+ * @param[in] result Tensor
+ * @param[in] RunLayerContext reference
+ */
+void add_i_cl(Tensor const &input, Tensor &result, RunLayerContext &context);
+
 } // namespace nntrainer
 #endif /* __BLAS_KERNEL_INTERFACE_H__ */
@@ -51,6 +51,6 @@ auto addition_w16a16_gpu = LayerGoldenTestParamType(
   "added_w16a16.nnlayergolden", LayerGoldenTestParamOptions::DEFAULT, "nchw",
   "fp16", "fp16");
 
-GTEST_PARAMETER_TEST(Addition16, LayerGoldenTest,
+GTEST_PARAMETER_TEST(AdditionGPU16, LayerGoldenTest,
                      ::testing::Values(addition_w16a16_gpu));
 #endif