From ad6b0199cba783153747fa51028a3ff05f9c7597 Mon Sep 17 00:00:00 2001 From: Donghyeon Jeong Date: Fri, 8 Mar 2024 11:37:22 +0900 Subject: [PATCH] [bugfix] Fix issues occured in Tensor class refactoring This commit aims to fix several issues that arose due to the refactoring of the Tensor class. **Changes proposed in this PR:** - The copy constructor has been implemented to prevent incorrect behavior of the default copy constructor in this commit - Tensor add_i() has been newly implemented to fix previous incorrect implementations. - Add chain() function that returns LazyTensor **Self-evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghyeon Jeong --- nntrainer/tensor/float_tensor.cpp | 15 +++++++++ nntrainer/tensor/float_tensor.h | 11 +++++++ nntrainer/tensor/half_tensor.cpp | 16 +++++++++ nntrainer/tensor/half_tensor.h | 12 +++++++ nntrainer/tensor/tensor.cpp | 54 +++++++++++++++++++++++++------ nntrainer/tensor/tensor.h | 18 +++++++++-- nntrainer/tensor/tensor_base.h | 20 ++++++++++++ 7 files changed, 133 insertions(+), 13 deletions(-) diff --git a/nntrainer/tensor/float_tensor.cpp b/nntrainer/tensor/float_tensor.cpp index 6aecd0d6ad..2652610d15 100644 --- a/nntrainer/tensor/float_tensor.cpp +++ b/nntrainer/tensor/float_tensor.cpp @@ -507,6 +507,21 @@ Tensor &FloatTensor::add_strided(Tensor const &input, Tensor &output, return output; } +int FloatTensor::add_i(Tensor const &m, Tensor &output, float const alpha) { + auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf, + float *out_buf) { + saxpy(e.buffer_size, alpha, m_buf, e.strides[3], out_buf, strides[3]); + }; + + try { + apply_broadcast(m, f, output); + } catch (std::exception &err) { + ml_loge("%s %s", typeid(err).name(), err.what()); + return ML_ERROR_INVALID_PARAMETER; + } + return ML_ERROR_NONE; +} + Tensor &FloatTensor::add(float const &value, Tensor &output) const { auto f = std::bind(std::plus(), std::placeholders::_1, value); apply(f, output); diff --git a/nntrainer/tensor/float_tensor.h b/nntrainer/tensor/float_tensor.h index e19037ab80..5463e9f1cc 100644 --- a/nntrainer/tensor/float_tensor.h +++ b/nntrainer/tensor/float_tensor.h @@ -64,6 +64,12 @@ class FloatTensor : public TensorBase { std::vector>>> const &d, Tformat fm); + /** + * @brief Construct a new FloatTensor object + * @param rhs TensorBase object to copy + */ + FloatTensor(TensorBase &rhs) : TensorBase(rhs) {} + /** * @brief Basic Destructor */ @@ -256,6 +262,11 @@ class FloatTensor : public TensorBase { Tensor &add_strided(Tensor const &input, Tensor &output, const float beta) const override; + /** + * @copydoc Tensor::add_i(Tensor const &m, float const alpha) + */ + int add_i(Tensor const &m, Tensor &output, float const alpha) override; + /** * @copydoc Tensor::add(float const &value, Tensor &output) */ diff --git a/nntrainer/tensor/half_tensor.cpp b/nntrainer/tensor/half_tensor.cpp index 760241c10c..2f66f1c074 100644 --- a/nntrainer/tensor/half_tensor.cpp +++ b/nntrainer/tensor/half_tensor.cpp @@ -479,6 +479,22 @@ Tensor &HalfTensor::add_strided(Tensor const &input, Tensor &output, return output; } +int HalfTensor::add_i(Tensor const &m, Tensor &output, float const alpha) { + auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf, + _FP16 *out_buf) { + saxpy(e.buffer_size, alpha, m_buf, e.strides[3], out_buf, strides[3]); + /// @todo: saxpy is not valid for _FP16 + }; + + try { + apply_broadcast(m, f, output); + } catch (std::exception &err) { + ml_loge("%s %s", typeid(err).name(), err.what()); + return ML_ERROR_INVALID_PARAMETER; + } + return ML_ERROR_NONE; +} + Tensor &HalfTensor::add(float const &value, Tensor &output) const { auto f = std::bind(std::plus<_FP16>(), std::placeholders::_1, static_cast<_FP16>(value)); diff --git a/nntrainer/tensor/half_tensor.h b/nntrainer/tensor/half_tensor.h index 33f991b359..6ca35e4fd2 100644 --- a/nntrainer/tensor/half_tensor.h +++ b/nntrainer/tensor/half_tensor.h @@ -63,6 +63,13 @@ class HalfTensor : public TensorBase { HalfTensor(std::vector>>> const &d, Tformat fm); + /** + * @brief Construct a new FloatTensor object + * + * @param rhs TensorBase object to copy + */ + HalfTensor(TensorBase &rhs) : TensorBase(rhs) {} + /** * @brief Basic Destructor */ @@ -255,6 +262,11 @@ class HalfTensor : public TensorBase { Tensor &add_strided(Tensor const &input, Tensor &output, const float beta) const override; + /** + * @copydoc Tensor::add_i(Tensor const &m, float const alpha) + */ + int add_i(Tensor const &m, Tensor &output, float const alpha) override; + /** * @copydoc Tensor::add(float const &value, Tensor &output) */ diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp index ef73ec24d3..01400d02a7 100644 --- a/nntrainer/tensor/tensor.cpp +++ b/nntrainer/tensor/tensor.cpp @@ -10,6 +10,7 @@ */ #include +#include #include #ifdef ENABLE_FP16 @@ -100,6 +101,35 @@ Tensor::Tensor( } #endif +Tensor::Tensor(const Tensor &rhs) { + if (rhs.getDataType() == Tdatatype::FP32) { + itensor = std::shared_ptr(new FloatTensor(*rhs.itensor), + std::default_delete()); + } else if (rhs.getDataType() == Tdatatype::FP16) { +#ifdef ENABLE_FP16 + itensor = std::shared_ptr(new HalfTensor(*rhs.itensor), + std::default_delete()); +#else + throw std::invalid_argument("Error: enable-fp16 is not enabled"); +#endif + } +} + +Tensor &Tensor::operator=(const Tensor &rhs) { + if (rhs.getDataType() == Tdatatype::FP32) { + itensor = std::shared_ptr(new FloatTensor(*rhs.itensor), + std::default_delete()); + } else if (rhs.getDataType() == Tdatatype::FP16) { +#ifdef ENABLE_FP16 + itensor = std::shared_ptr(new HalfTensor(*rhs.itensor), + std::default_delete()); +#else + throw std::invalid_argument("Error: enable-fp16 is not enabled"); +#endif + } + return *this; +} + bool Tensor::operator==(const Tensor &rhs) const { /// compares tensor information if (*itensor == *rhs.itensor) { @@ -176,7 +206,7 @@ int Tensor::multiply_i_strided(Tensor const &m, const float beta) { } Tensor Tensor::multiply_strided(Tensor const &m, const float beta) const { - Tensor t; + Tensor t("", getFormat(), getDataType()); return this->multiply_strided(m, t, beta); } @@ -194,7 +224,7 @@ int Tensor::multiply_i(float const &value) { } Tensor Tensor::multiply(float const &value) const { - Tensor t; + Tensor t("", getFormat(), getDataType()); return multiply(value, t); } @@ -319,13 +349,7 @@ Tensor &Tensor::add(float const &value, Tensor &output) const { } int Tensor::add_i(Tensor const &m, float const alpha) { - try { - this->add(m, *this, alpha); - } catch (std::exception &err) { - ml_loge("%s %s", typeid(err).name(), err.what()); - return ML_ERROR_INVALID_PARAMETER; - } - return ML_ERROR_NONE; + return itensor->add_i(m, *this, alpha); } Tensor Tensor::add(Tensor const &m, float const alpha) const { @@ -538,6 +562,8 @@ void Tensor::cos(Tensor &out, float alpha) { void Tensor::inv_sqrt_i() { itensor->inv_sqrt(*this); } +LazyTensor Tensor::chain() const { return LazyTensor(*this); } + float Tensor::l2norm() const { return itensor->l2norm(); } void Tensor::normalization_i() { @@ -847,7 +873,15 @@ void Tensor::copyData(const Tensor &from) { itensor->copyData(from); } void Tensor::copy_with_stride(const Tensor &from) { if (itensor->getDim() == from.getDim()) { // if the tensor dim matches, copy the data - copy(from); + for (unsigned int b = 0; b < batch(); ++b) { + for (unsigned int c = 0; c < channel(); ++c) { + for (unsigned int h = 0; h < height(); ++h) { + for (unsigned int w = 0; w < width(); ++w) { + setValue(b, c, h, w, from.getValue(b, c, h, w)); + } + } + } + } } else { // replace with a new tensor that has the same data as the given tensor Tensor t = Tensor(from.getDim(), true); diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h index 2292e63664..b6a9ffb1c9 100644 --- a/nntrainer/tensor/tensor.h +++ b/nntrainer/tensor/tensor.h @@ -13,6 +13,8 @@ #define __TENSOR_H__ #ifdef __cplusplus +#define MAKE_SHARED_TENSOR(...) std::make_shared(__VA_ARGS__) + #define CREATE_IF_EMPTY_DIMS(tensor, ...) \ do { \ if (tensor.empty()) \ @@ -26,6 +28,8 @@ namespace nntrainer { +class LazyTensor; + /** * @class Tensor Class * @brief Tensor Class @@ -213,7 +217,7 @@ class Tensor { * @brief Copy constructor of Tensor. * @param[in] Tensor & */ - Tensor(const Tensor &rhs) = default; + Tensor(const Tensor &rhs); /** * @brief Move constructor of Tensor. @@ -225,7 +229,7 @@ class Tensor { * @brief Copy assignment operator. * @param[in] rhs Tensor to be copied. */ - Tensor &operator=(const Tensor &rhs) = default; + Tensor &operator=(const Tensor &rhs); /** * @brief Move assignment operator. @@ -269,7 +273,7 @@ class Tensor { "Creating shared tensor of size bigger than tensor memory."); } - Tensor output; + Tensor output("", d.getFormat(), d.getDataType()); output.setTensorVar(d, buf, offset); return output; }; @@ -946,6 +950,12 @@ class Tensor { */ void inv_sqrt_i(); + /** + * @brief Anchor a starting point to defer following evaluation + * @retval LazyTensor class that can be used with run(); + */ + LazyTensor chain() const; + /** * @brief l2norm the Tensor elements * @retval Calculated l2norm @@ -1444,6 +1454,8 @@ class Tensor { std::swap(lhs.itensor, rhs.itensor); } + static constexpr float epsilon = 1e-5; + private: std::shared_ptr itensor; diff --git a/nntrainer/tensor/tensor_base.h b/nntrainer/tensor/tensor_base.h index 616cf1dc3f..945b82b3c6 100644 --- a/nntrainer/tensor/tensor_base.h +++ b/nntrainer/tensor/tensor_base.h @@ -114,6 +114,21 @@ class TensorBase { TensorBase(const TensorDim &d, const void *buf = nullptr) : TensorBase(d, true) {} + /** + * @brief Copy constructor of TensorBase. + * @param[in] Tensor & + */ + TensorBase(const TensorBase &rhs) { + dim = rhs.dim; + strides = rhs.strides; + contiguous = rhs.contiguous; + initializer = rhs.initializer; + name = rhs.name; + data = rhs.data; + offset = rhs.offset; + src_tensor = rhs.src_tensor; + } + /** * @brief Comparison operator overload * @param[in] rhs Tensor to be compared with @@ -263,6 +278,11 @@ class TensorBase { virtual Tensor &add_strided(Tensor const &input, Tensor &output, const float beta) const = 0; + /** + * @copydoc Tensor::add_i(Tensor const &m, float const alpha) + */ + virtual int add_i(Tensor const &m, Tensor &output, float const alpha) = 0; + /** * @copydoc Tensor::add(float const &value, Tensor &output) */