From ad6b0199cba783153747fa51028a3ff05f9c7597 Mon Sep 17 00:00:00 2001
From: Donghyeon Jeong <dhyeon.jeong@samsung.com>
Date: Fri, 8 Mar 2024 11:37:22 +0900
Subject: [PATCH] [bugfix] Fix issues occured in Tensor class refactoring

This commit aims to fix several issues that arose due to the refactoring of the Tensor class.

**Changes proposed in this PR:**
- The copy constructor has been implemented to prevent incorrect behavior of the default copy constructor in this commit
- Tensor add_i() has been newly implemented to fix previous incorrect implementations.
- Add chain() function that returns LazyTensor

**Self-evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test:   [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghyeon Jeong <dhyeon.jeong@samsung.com>
---
 nntrainer/tensor/float_tensor.cpp | 15 +++++++++
 nntrainer/tensor/float_tensor.h   | 11 +++++++
 nntrainer/tensor/half_tensor.cpp  | 16 +++++++++
 nntrainer/tensor/half_tensor.h    | 12 +++++++
 nntrainer/tensor/tensor.cpp       | 54 +++++++++++++++++++++++++------
 nntrainer/tensor/tensor.h         | 18 +++++++++--
 nntrainer/tensor/tensor_base.h    | 20 ++++++++++++
 7 files changed, 133 insertions(+), 13 deletions(-)
diff --git a/nntrainer/tensor/float_tensor.cpp b/nntrainer/tensor/float_tensor.cpp
index 6aecd0d6ad..2652610d15 100644
--- a/nntrainer/tensor/float_tensor.cpp
+++ b/nntrainer/tensor/float_tensor.cpp
@@ -507,6 +507,21 @@ Tensor &FloatTensor::add_strided(Tensor const &input, Tensor &output,
   return output;
 }
 
+int FloatTensor::add_i(Tensor const &m, Tensor &output, float const alpha) {
+  auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf,
+               float *out_buf) {
+    saxpy(e.buffer_size, alpha, m_buf, e.strides[3], out_buf, strides[3]);
+  };
+
+  try {
+    apply_broadcast(m, f, output);
+  } catch (std::exception &err) {
+    ml_loge("%s %s", typeid(err).name(), err.what());
+    return ML_ERROR_INVALID_PARAMETER;
+  }
+  return ML_ERROR_NONE;
+}
+
 Tensor &FloatTensor::add(float const &value, Tensor &output) const {
   auto f = std::bind(std::plus<float>(), std::placeholders::_1, value);
   apply(f, output);
diff --git a/nntrainer/tensor/float_tensor.h b/nntrainer/tensor/float_tensor.h
index e19037ab80..5463e9f1cc 100644
--- a/nntrainer/tensor/float_tensor.h
+++ b/nntrainer/tensor/float_tensor.h
@@ -64,6 +64,12 @@ class FloatTensor : public TensorBase {
     std::vector<std::vector<std::vector<std::vector<float>>>> const &d,
     Tformat fm);
 
+  /**
+   * @brief Construct a new FloatTensor object
+   * @param rhs TensorBase object to copy
+   */
+  FloatTensor(TensorBase &rhs) : TensorBase(rhs) {}
+
   /**
    * @brief Basic Destructor
    */
@@ -256,6 +262,11 @@ class FloatTensor : public TensorBase {
   Tensor &add_strided(Tensor const &input, Tensor &output,
                       const float beta) const override;
 
+  /**
+   * @copydoc Tensor::add_i(Tensor const &m, float const alpha)
+   */
+  int add_i(Tensor const &m, Tensor &output, float const alpha) override;
+
   /**
    * @copydoc Tensor::add(float const &value, Tensor &output)
    */
diff --git a/nntrainer/tensor/half_tensor.cpp b/nntrainer/tensor/half_tensor.cpp
index 760241c10c..2f66f1c074 100644
--- a/nntrainer/tensor/half_tensor.cpp
+++ b/nntrainer/tensor/half_tensor.cpp
@@ -479,6 +479,22 @@ Tensor &HalfTensor::add_strided(Tensor const &input, Tensor &output,
   return output;
 }
 
+int HalfTensor::add_i(Tensor const &m, Tensor &output, float const alpha) {
+  auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf,
+               _FP16 *out_buf) {
+    saxpy(e.buffer_size, alpha, m_buf, e.strides[3], out_buf, strides[3]);
+    /// @todo: saxpy is not valid for _FP16
+  };
+
+  try {
+    apply_broadcast(m, f, output);
+  } catch (std::exception &err) {
+    ml_loge("%s %s", typeid(err).name(), err.what());
+    return ML_ERROR_INVALID_PARAMETER;
+  }
+  return ML_ERROR_NONE;
+}
+
 Tensor &HalfTensor::add(float const &value, Tensor &output) const {
   auto f = std::bind(std::plus<_FP16>(), std::placeholders::_1,
                      static_cast<_FP16>(value));
diff --git a/nntrainer/tensor/half_tensor.h b/nntrainer/tensor/half_tensor.h
index 33f991b359..6ca35e4fd2 100644
--- a/nntrainer/tensor/half_tensor.h
+++ b/nntrainer/tensor/half_tensor.h
@@ -63,6 +63,13 @@ class HalfTensor : public TensorBase {
   HalfTensor(std::vector<std::vector<std::vector<std::vector<_FP16>>>> const &d,
              Tformat fm);
 
+  /**
+   * @brief Construct a new FloatTensor object
+   *
+   * @param rhs TensorBase object to copy
+   */
+  HalfTensor(TensorBase &rhs) : TensorBase(rhs) {}
+
   /**
    * @brief Basic Destructor
    */
@@ -255,6 +262,11 @@ class HalfTensor : public TensorBase {
   Tensor &add_strided(Tensor const &input, Tensor &output,
                       const float beta) const override;
 
+  /**
+   * @copydoc Tensor::add_i(Tensor const &m, float const alpha)
+   */
+  int add_i(Tensor const &m, Tensor &output, float const alpha) override;
+
   /**
    * @copydoc Tensor::add(float const &value, Tensor &output)
    */
diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp
index ef73ec24d3..01400d02a7 100644
--- a/nntrainer/tensor/tensor.cpp
+++ b/nntrainer/tensor/tensor.cpp
@@ -10,6 +10,7 @@
  */
 
 #include <float_tensor.h>
+#include <lazy_tensor.h>
 #include <tensor.h>
 
 #ifdef ENABLE_FP16
@@ -100,6 +101,35 @@ Tensor::Tensor(
 }
 #endif
 
+Tensor::Tensor(const Tensor &rhs) {
+  if (rhs.getDataType() == Tdatatype::FP32) {
+    itensor = std::shared_ptr<FloatTensor>(new FloatTensor(*rhs.itensor),
+                                           std::default_delete<FloatTensor>());
+  } else if (rhs.getDataType() == Tdatatype::FP16) {
+#ifdef ENABLE_FP16
+    itensor = std::shared_ptr<HalfTensor>(new HalfTensor(*rhs.itensor),
+                                          std::default_delete<HalfTensor>());
+#else
+    throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
+  }
+}
+
+Tensor &Tensor::operator=(const Tensor &rhs) {
+  if (rhs.getDataType() == Tdatatype::FP32) {
+    itensor = std::shared_ptr<FloatTensor>(new FloatTensor(*rhs.itensor),
+                                           std::default_delete<FloatTensor>());
+  } else if (rhs.getDataType() == Tdatatype::FP16) {
+#ifdef ENABLE_FP16
+    itensor = std::shared_ptr<HalfTensor>(new HalfTensor(*rhs.itensor),
+                                          std::default_delete<HalfTensor>());
+#else
+    throw std::invalid_argument("Error: enable-fp16 is not enabled");
+#endif
+  }
+  return *this;
+}
+
 bool Tensor::operator==(const Tensor &rhs) const {
   /// compares tensor information
   if (*itensor == *rhs.itensor) {
@@ -176,7 +206,7 @@ int Tensor::multiply_i_strided(Tensor const &m, const float beta) {
 }
 
 Tensor Tensor::multiply_strided(Tensor const &m, const float beta) const {
-  Tensor t;
+  Tensor t("", getFormat(), getDataType());
   return this->multiply_strided(m, t, beta);
 }
 
@@ -194,7 +224,7 @@ int Tensor::multiply_i(float const &value) {
 }
 
 Tensor Tensor::multiply(float const &value) const {
-  Tensor t;
+  Tensor t("", getFormat(), getDataType());
   return multiply(value, t);
 }
 
@@ -319,13 +349,7 @@ Tensor &Tensor::add(float const &value, Tensor &output) const {
 }
 
 int Tensor::add_i(Tensor const &m, float const alpha) {
-  try {
-    this->add(m, *this, alpha);
-  } catch (std::exception &err) {
-    ml_loge("%s %s", typeid(err).name(), err.what());
-    return ML_ERROR_INVALID_PARAMETER;
-  }
-  return ML_ERROR_NONE;
+  return itensor->add_i(m, *this, alpha);
 }
 
 Tensor Tensor::add(Tensor const &m, float const alpha) const {
@@ -538,6 +562,8 @@ void Tensor::cos(Tensor &out, float alpha) {
 
 void Tensor::inv_sqrt_i() { itensor->inv_sqrt(*this); }
 
+LazyTensor Tensor::chain() const { return LazyTensor(*this); }
+
 float Tensor::l2norm() const { return itensor->l2norm(); }
 
 void Tensor::normalization_i() {
@@ -847,7 +873,15 @@ void Tensor::copyData(const Tensor &from) { itensor->copyData(from); }
 void Tensor::copy_with_stride(const Tensor &from) {
   if (itensor->getDim() == from.getDim()) {
     // if the tensor dim matches, copy the data
-    copy(from);
+    for (unsigned int b = 0; b < batch(); ++b) {
+      for (unsigned int c = 0; c < channel(); ++c) {
+        for (unsigned int h = 0; h < height(); ++h) {
+          for (unsigned int w = 0; w < width(); ++w) {
+            setValue(b, c, h, w, from.getValue<float>(b, c, h, w));
+          }
+        }
+      }
+    }
   } else {
     // replace with a new tensor that has the same data as the given tensor
     Tensor t = Tensor(from.getDim(), true);
diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h
index 2292e63664..b6a9ffb1c9 100644
--- a/nntrainer/tensor/tensor.h
+++ b/nntrainer/tensor/tensor.h
@@ -13,6 +13,8 @@
 #define __TENSOR_H__
 #ifdef __cplusplus
 
+#define MAKE_SHARED_TENSOR(...) std::make_shared<nntrainer::Tensor>(__VA_ARGS__)
+
 #define CREATE_IF_EMPTY_DIMS(tensor, ...) \
   do {                                    \
     if (tensor.empty())                   \
@@ -26,6 +28,8 @@
 
 namespace nntrainer {
 
+class LazyTensor;
+
 /**
  * @class   Tensor Class
  * @brief   Tensor Class
@@ -213,7 +217,7 @@ class Tensor {
    *  @brief  Copy constructor of Tensor.
    *  @param[in] Tensor &
    */
-  Tensor(const Tensor &rhs) = default;
+  Tensor(const Tensor &rhs);
 
   /**
    *  @brief  Move constructor of Tensor.
@@ -225,7 +229,7 @@ class Tensor {
    * @brief  Copy assignment operator.
    * @param[in] rhs Tensor to be copied.
    */
-  Tensor &operator=(const Tensor &rhs) = default;
+  Tensor &operator=(const Tensor &rhs);
 
   /**
    * @brief  Move assignment operator.
@@ -269,7 +273,7 @@ class Tensor {
         "Creating shared tensor of size bigger than tensor memory.");
     }
 
-    Tensor output;
+    Tensor output("", d.getFormat(), d.getDataType());
     output.setTensorVar(d, buf, offset);
     return output;
   };
@@ -946,6 +950,12 @@ class Tensor {
    */
   void inv_sqrt_i();
 
+  /**
+   * @brief     Anchor a starting point to defer following evaluation
+   * @retval    LazyTensor class that can be used with run();
+   */
+  LazyTensor chain() const;
+
   /**
    * @brief     l2norm the Tensor elements
    * @retval    Calculated l2norm
@@ -1444,6 +1454,8 @@ class Tensor {
     std::swap(lhs.itensor, rhs.itensor);
   }
 
+  static constexpr float epsilon = 1e-5;
+
 private:
   std::shared_ptr<TensorBase> itensor;
 
diff --git a/nntrainer/tensor/tensor_base.h b/nntrainer/tensor/tensor_base.h
index 616cf1dc3f..945b82b3c6 100644
--- a/nntrainer/tensor/tensor_base.h
+++ b/nntrainer/tensor/tensor_base.h
@@ -114,6 +114,21 @@ class TensorBase {
   TensorBase(const TensorDim &d, const void *buf = nullptr) :
     TensorBase(d, true) {}
 
+  /**
+   *  @brief  Copy constructor of TensorBase.
+   *  @param[in] Tensor &
+   */
+  TensorBase(const TensorBase &rhs) {
+    dim = rhs.dim;
+    strides = rhs.strides;
+    contiguous = rhs.contiguous;
+    initializer = rhs.initializer;
+    name = rhs.name;
+    data = rhs.data;
+    offset = rhs.offset;
+    src_tensor = rhs.src_tensor;
+  }
+
   /**
    * @brief     Comparison operator overload
    * @param[in] rhs Tensor to be compared with
@@ -263,6 +278,11 @@ class TensorBase {
   virtual Tensor &add_strided(Tensor const &input, Tensor &output,
                               const float beta) const = 0;
 
+  /**
+   * @copydoc Tensor::add_i(Tensor const &m, float const alpha)
+   */
+  virtual int add_i(Tensor const &m, Tensor &output, float const alpha) = 0;
+
   /**
    * @copydoc Tensor::add(float const &value, Tensor &output)
    */