Quantizer class to perform quantization

This pull request introduces a quantizer class allowing quantization and dequantization with different schemes. The goal is to offer users more choices when dealing with various types of quantization. Initial support targets include affine quantization (per tensor and per channel) and binary-code-based quantization. This pull request presents the basic structure of these classes, and further implementation details will be added in future updates. **Self-evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghyeon Jeong <[email protected]>
nnstreamer · Dec 16, 2024 · 40a5f54 · 40a5f54
1 parent cd17a66
commit 40a5f54
Show file tree

Hide file tree

Showing 5 changed files with 383 additions and 0 deletions.
diff --git a/debian/nntrainer-dev.install b/debian/nntrainer-dev.install
@@ -17,6 +17,7 @@
 /usr/include/nntrainer/blas_interface.h
 /usr/include/nntrainer/var_grad.h
 /usr/include/nntrainer/weight.h
+/usr/include/nntrainer/quantizer.h
 /usr/include/nntrainer/blas_avx.h
 # todo: update dataset headers
 /usr/include/nntrainer/databuffer.h

diff --git a/nntrainer/tensor/meson.build b/nntrainer/tensor/meson.build
@@ -12,6 +12,7 @@ tensor_sources = [
   'tensor_dim.cpp',
   'var_grad.cpp',
   'weight.cpp',
+  'quantizer.cpp',
   'basic_planner.cpp',
   'memory_pool.cpp',
   'swap_device.cpp',
@@ -31,6 +32,7 @@ tensor_headers = [
   'uint_tensor.h',
   'weight.h',
   'var_grad.h',    
+  'quantizer.h',    
   'tensor_wrap_specs.h',
   'blas_interface.h',
   'manager.h',

diff --git a/nntrainer/tensor/quantizer.cpp b/nntrainer/tensor/quantizer.cpp
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * @file	quantizer.cpp
+ * @date	10 December 2024
+ * @brief	This defines quantizers for different types of quantization schemes
+ * @see		https://github.com/nnstreamer/nntrainer
+ * @author	Donghyeon Jeong <[email protected]>
+ * @bug		No known bugs except for NYI items
+ */
+
+#include <quantizer.h>
+
+namespace nntrainer {
+
+/**
+ * @brief PerTensorAffineQuantizer class
+ */
+std::unique_ptr<Quantizer> PerTensorAffineQuantizer::create() {
+  return std::make_unique<PerTensorAffineQuantizer>();
+}
+
+Tensor PerTensorAffineQuantizer::quantize(const Tensor &input,
+                                          Tdatatype qtype) {
+  /// @todo NYI
+  return input;
+}
+
+Tensor PerTensorAffineQuantizer::dequantize(const Tensor &input,
+                                            Tdatatype dtype) {
+  /// @todo NYI
+  return input;
+}
+
+QScheme PerTensorAffineQuantizer::qscheme() const {
+  return QScheme::PER_TENSOR_AFFINE;
+}
+
+/**
+ * @brief PerChannelAffineQuantizer class
+ */
+std::unique_ptr<Quantizer> PerChannelAffineQuantizer::create() {
+  return std::make_unique<PerChannelAffineQuantizer>();
+}
+
+Tensor PerChannelAffineQuantizer::quantize(const Tensor &input,
+                                           Tdatatype qtype) {
+  /// @todo NYI
+  return input;
+}
+
+Tensor PerChannelAffineQuantizer::dequantize(const Tensor &input,
+                                             Tdatatype dtype) {
+  /// @todo NYI
+  return input;
+}
+
+QScheme PerChannelAffineQuantizer::qscheme() const {
+  return QScheme::PER_CHANNEL_AFFINE;
+}
+
+/**
+ * @brief BinaryCodeBasedQuantizer class
+ */
+std::unique_ptr<Quantizer> BinaryCodeBasedQuantizer::create() {
+  return std::make_unique<BinaryCodeBasedQuantizer>();
+}
+
+Tensor BinaryCodeBasedQuantizer::quantize(const Tensor &input,
+                                          Tdatatype qtype) {
+  /// @todo NYI
+  return input;
+}
+
+Tensor BinaryCodeBasedQuantizer::dequantize(const Tensor &input,
+                                            Tdatatype dtype) {
+  /// @todo NYI
+  return input;
+}
+
+QScheme BinaryCodeBasedQuantizer::qscheme() const {
+  return QScheme::BINARY_CODE_BASED;
+}
+
+} // namespace nntrainer