From 40a5f54179f268c67ec9ec4644969db02c57f410 Mon Sep 17 00:00:00 2001 From: Donghyeon Jeong Date: Wed, 11 Dec 2024 11:13:06 +0900 Subject: [PATCH] Quantizer class to perform quantization This pull request introduces a quantizer class allowing quantization and dequantization with different schemes. The goal is to offer users more choices when dealing with various types of quantization. Initial support targets include affine quantization (per tensor and per channel) and binary-code-based quantization. This pull request presents the basic structure of these classes, and further implementation details will be added in future updates. **Self-evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghyeon Jeong --- debian/nntrainer-dev.install | 1 + nntrainer/tensor/meson.build | 2 + nntrainer/tensor/quantizer.cpp | 84 ++++++++++ nntrainer/tensor/quantizer.h | 295 +++++++++++++++++++++++++++++++++ packaging/nntrainer.spec | 1 + 5 files changed, 383 insertions(+) create mode 100644 nntrainer/tensor/quantizer.cpp create mode 100644 nntrainer/tensor/quantizer.h diff --git a/debian/nntrainer-dev.install b/debian/nntrainer-dev.install index 8ed07b591e..73459d8097 100644 --- a/debian/nntrainer-dev.install +++ b/debian/nntrainer-dev.install @@ -17,6 +17,7 @@ /usr/include/nntrainer/blas_interface.h /usr/include/nntrainer/var_grad.h /usr/include/nntrainer/weight.h +/usr/include/nntrainer/quantizer.h /usr/include/nntrainer/blas_avx.h # todo: update dataset headers /usr/include/nntrainer/databuffer.h diff --git a/nntrainer/tensor/meson.build b/nntrainer/tensor/meson.build index 83f25d2e95..21bca384a9 100644 --- a/nntrainer/tensor/meson.build +++ b/nntrainer/tensor/meson.build @@ -12,6 +12,7 @@ tensor_sources = [ 'tensor_dim.cpp', 'var_grad.cpp', 'weight.cpp', + 'quantizer.cpp', 'basic_planner.cpp', 'memory_pool.cpp', 'swap_device.cpp', @@ -31,6 +32,7 @@ tensor_headers = [ 'uint_tensor.h', 'weight.h', 'var_grad.h', + 'quantizer.h', 'tensor_wrap_specs.h', 'blas_interface.h', 'manager.h', diff --git a/nntrainer/tensor/quantizer.cpp b/nntrainer/tensor/quantizer.cpp new file mode 100644 index 0000000000..22ef10e0de --- /dev/null +++ b/nntrainer/tensor/quantizer.cpp @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * @file quantizer.cpp + * @date 10 December 2024 + * @brief This defines quantizers for different types of quantization schemes + * @see https://github.com/nnstreamer/nntrainer + * @author Donghyeon Jeong + * @bug No known bugs except for NYI items + */ + +#include + +namespace nntrainer { + +/** + * @brief PerTensorAffineQuantizer class + */ +std::unique_ptr PerTensorAffineQuantizer::create() { + return std::make_unique(); +} + +Tensor PerTensorAffineQuantizer::quantize(const Tensor &input, + Tdatatype qtype) { + /// @todo NYI + return input; +} + +Tensor PerTensorAffineQuantizer::dequantize(const Tensor &input, + Tdatatype dtype) { + /// @todo NYI + return input; +} + +QScheme PerTensorAffineQuantizer::qscheme() const { + return QScheme::PER_TENSOR_AFFINE; +} + +/** + * @brief PerChannelAffineQuantizer class + */ +std::unique_ptr PerChannelAffineQuantizer::create() { + return std::make_unique(); +} + +Tensor PerChannelAffineQuantizer::quantize(const Tensor &input, + Tdatatype qtype) { + /// @todo NYI + return input; +} + +Tensor PerChannelAffineQuantizer::dequantize(const Tensor &input, + Tdatatype dtype) { + /// @todo NYI + return input; +} + +QScheme PerChannelAffineQuantizer::qscheme() const { + return QScheme::PER_CHANNEL_AFFINE; +} + +/** + * @brief BinaryCodeBasedQuantizer class + */ +std::unique_ptr BinaryCodeBasedQuantizer::create() { + return std::make_unique(); +} + +Tensor BinaryCodeBasedQuantizer::quantize(const Tensor &input, + Tdatatype qtype) { + /// @todo NYI + return input; +} + +Tensor BinaryCodeBasedQuantizer::dequantize(const Tensor &input, + Tdatatype dtype) { + /// @todo NYI + return input; +} + +QScheme BinaryCodeBasedQuantizer::qscheme() const { + return QScheme::BINARY_CODE_BASED; +} + +} // namespace nntrainer diff --git a/nntrainer/tensor/quantizer.h b/nntrainer/tensor/quantizer.h new file mode 100644 index 0000000000..ffc7039973 --- /dev/null +++ b/nntrainer/tensor/quantizer.h @@ -0,0 +1,295 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * @file quantizer.h + * @date 10 December 2024 + * @brief This defines quantizers for different types of quantization schemes + * @see https://github.com/nnstreamer/nntrainer + * @author Donghyeon Jeong + * @bug No known bugs except for NYI items + */ + +#ifndef __QUANTIZER_H__ +#define __QUANTIZER_H__ +#ifdef __cplusplus + +#include + +namespace nntrainer { + +/** + * @brief defines the quantization scheme + * @details NNTrainer provides basic quantization schemes (e.g., Per tensor + * affine quantization). Various quantization schemes will be continuously + * updated. If you would like to use a different quantization technique, please + * select a custom quantizer scheme. + */ +enum class QScheme : uint8_t { + /** predefined quantizer */ + PER_TENSOR_AFFINE = 0x00, + PER_CHANNEL_AFFINE = 0x01, + BINARY_CODE_BASED = 0x02, + /** this is for custom use */ + CUSTOM_QUANTIZER_01 = 0x10, + CUSTOM_QUANTIZER_02 = 0x11, + CUSTOM_QUANTIZER_03 = 0x12, + CUSTOM_QUANTIZER_04 = 0x13, + CUSTOM_QUANTIZER_05 = 0x14, + CUSTOM_QUANTIZER_06 = 0x15, +}; + +/** + * @class Quantizer class + * @brief Quantizer class is a base class for all quantizers. + * @note A custom quantizer must inherit this class and implement virtual + * functions. + */ +class Quantizer { +private: + static std::unordered_map + custom_quantizers; /** Hash table that holds empty instances of the custom + quantizers */ + +protected: + /** + * @brief Register the user defined quantizer class + * + * @param qscheme Quantization scheme (use CUSTOM_QUANTIZER_#) + * @param quantizer quantizer class to register + * + * @note This function registers the custom quantizer class. User defined + * derived class must be registered with this function. + */ + static void registerQuantizer(QScheme qscheme, Quantizer &quantizer) { + custom_quantizers.insert(std::make_pair(qscheme, &quantizer)); + } + +public: + /** + * @brief Basic Constructor of a Quantizer + */ + Quantizer() = default; + + /** + * @brief Basic Destructor of a Quantizer + */ + virtual ~Quantizer() = default; + + /** + * @brief Get the Registered Quantizer object + * + * @param qscheme Quantization scheme + * @return Quantizer* registered quantizer object + */ + static Quantizer *getRegisteredQuantizer(QScheme qscheme) { + if (custom_quantizers.find(qscheme) == custom_quantizers.end()) { + throw std::invalid_argument("requested quantizer is not registered."); + } + return custom_quantizers.at(qscheme); + } + + /** Derived classes must implement the following functions */ + /** + * @brief Create a new object of itself + * + * @return std::unique_ptr + */ + virtual std::unique_ptr create() = 0; + + /** + * @brief Quantize a tensor into a quantized tensor. + * @param[in] input Floating point tensor to quantize + * @return Tensor quantized tensor + */ + virtual Tensor quantize(const Tensor &input, Tdatatype qtype) = 0; + + /** + * @brief Dequantize a quantized tensor into a tensor. + * @param[in] input Quantized tensor to dequantize + * @return Tensor dequantized tensor + */ + virtual Tensor dequantize(const Tensor &input, Tdatatype qtype) = 0; + + /** + * @brief Get quantization Scheme type. + * @return Quantization scheme + */ + virtual QScheme qscheme() const = 0; +}; + +/** + * @class UniformQuantizer class + * @brief UniformQuantizer class serves as the parent class for various types of + * uniform quantizers. + */ +class UniformQuantizer : public Quantizer { +public: + UniformQuantizer() : Quantizer() {} +}; + +/** + * @class NonUniformQuantizer class + * @brief NonUniformQuantizer class serves as the parent class for various types + * of non-uniform quantizers. + */ +class NonUniformQuantizer : public Quantizer { +public: + NonUniformQuantizer() : Quantizer() {} +}; + +/** + * @class PerTensorAffineQuantizer class + * @brief PerTensorAffineQuantizer class uses affine quantization scheme. + * + * Quantization: x_q = clip(round(x / scale + zero_point), min, max) + * Dequantization: x = scale * (x_q - zero_point) + * + * @note Single scale and zero point values are used for the entire tensor. + */ +class PerTensorAffineQuantizer : public UniformQuantizer { +public: + /** + * @brief Basic Constructor of a PerTensorAffineQuantizer + */ + PerTensorAffineQuantizer() : UniformQuantizer() {} + + /** + * @copydoc Quantizer::create() + */ + std::unique_ptr create() override; + + /** + * @copydoc Quantizer::quantize(const Tensor &input) + */ + Tensor quantize(const Tensor &input, Tdatatype qtype) override; + + /** + * @copydoc Quantizer::dequantize(const Tensor &input) + */ + Tensor dequantize(const Tensor &input, Tdatatype dtype) override; + + /** + * @copydoc Quantizer::qscheme() + */ + QScheme qscheme() const override; + +private: + float scale; + int zero_point; +}; + +/** + * @class PerChannelAffineQuantizer class + * @brief PerChannelAffineQuantizer class uses affine quantization scheme. + * + * @note PerChannelAffineQuantizer is similar to PerTensorAffineQuantizer, but + * it has separate scale and zero_point parameters for each channel. This allows + * for more precise quantization of different channels within the same tensor. + * + */ +class PerChannelAffineQuantizer : public UniformQuantizer { +public: + /** + * @brief Basic Constructor of a PerChannelAffineQuantizer + */ + PerChannelAffineQuantizer() : UniformQuantizer() {} + + /** + * @copydoc Quantizer::create() + */ + std::unique_ptr create() override; + + /** + * @copydoc Quantizer::quantize(const Tensor &input) + */ + Tensor quantize(const Tensor &input, Tdatatype qtype) override; + + /** + * @copydoc Quantizer::dequantize(const Tensor &input) + */ + Tensor dequantize(const Tensor &input, Tdatatype dtype) override; + + /** + * @copydoc Quantizer::qscheme() + */ + QScheme qscheme() const override; + +private: + float *scales; + int *zero_points; +}; + +/** + * @class BinaryCodeBasedQuantizer class + * @brief BinaryCodeBasedQuantizer class uses Binary-code-based quantization + * (BCQ) scheme. + * + */ +class BinaryCodeBasedQuantizer : public NonUniformQuantizer { +public: + /** + * @brief Basic Constructor of a BinaryCodeBasedQuantizer + */ + BinaryCodeBasedQuantizer() : NonUniformQuantizer() {} + + /** + * @copydoc Quantizer::create() + */ + std::unique_ptr create() override; + + /** + * @copydoc Quantizer::quantize(const Tensor &input) + */ + Tensor quantize(const Tensor &input, Tdatatype qtype) override; + + /** + * @copydoc Quantizer::dequantize(const Tensor &input) + */ + Tensor dequantize(const Tensor &input, Tdatatype dtype) override; + + /** + * @copydoc Quantizer::qscheme() + */ + QScheme qscheme() const override; +}; + +/** + * @brief Quantization class to create a quantizer + * + * @details The quantization class is a creator class to create a predefined + * quantization and a user-defined quantizer. Please check QScheme to find out + * about the predefined quantizers. + * + * If a preferred quantization scheme is not provided, create a new class that + * inherits the Quantizer class, select the quantization scheme + * CUSTOM_QUANTIZER_#, register it using registerQuantizer(), and then use it. + */ +class Quantization { +public: + /** + * @brief Create a Quantizer object + * + * @param qscheme quantization scheme + * @return std::unique_ptr quantizer object + */ + static std::unique_ptr createQuantizer(QScheme qscheme) { + switch (qscheme) { + case QScheme::PER_TENSOR_AFFINE: + return std::make_unique(); + break; + case QScheme::PER_CHANNEL_AFFINE: + return std::make_unique(); + break; + case QScheme::BINARY_CODE_BASED: + return std::make_unique(); + break; + default: + return Quantizer::getRegisteredQuantizer(qscheme)->create(); + break; + } + } +}; + +} // namespace nntrainer + +#endif /* __cplusplus */ +#endif /* __QUANTIZER_H__ */ diff --git a/packaging/nntrainer.spec b/packaging/nntrainer.spec index 41ee30a6ff..0f42a44051 100644 --- a/packaging/nntrainer.spec +++ b/packaging/nntrainer.spec @@ -551,6 +551,7 @@ cp -r result %{buildroot}%{_datadir}/nntrainer/unittest/ %{_includedir}/nntrainer/blas_interface.h %{_includedir}/nntrainer/var_grad.h %{_includedir}/nntrainer/weight.h +%{_includedir}/nntrainer/quantizer.h # @todo: update dataset headers %{_includedir}/nntrainer/databuffer.h %{_includedir}/nntrainer/databuffer_factory.h