[ BCQ ] Support BCQ type as model Weight

- Previous PR enabled BCQTensor. - This PR enables BCQTensor as a weight. - This PR add BCQ-FP32 as a weight-activation type. **Self-evaluation:** Build test: [X]Passed [ ]Failed [ ]Skipped Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Eunju Yang <[email protected]>
nnstreamer · Dec 9, 2024 · 2262445 · 2262445
1 parent a7cc856
commit 2262445
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 13 deletions.
diff --git a/api/ccapi/include/tensor_dim.h b/api/ccapi/include/tensor_dim.h
@@ -49,7 +49,7 @@ class TensorDim {
 
   /**
    * @brief Tensor Data Type.
-   * Currently support QINT4, QINT8, UINT8, UINT16, UINT32, FP16 & FP32
+   * Currently support QINT4, QINT8, BCQ, UINT8, UINT16, UINT32, FP16 & FP32
    */
   enum class DataType {
     QINT4,  /** quantized int 4*/
@@ -115,8 +115,8 @@ class TensorDim {
    * @brief     Creator of TensorDim with Format & DataType
    *
    * @param fm format NCHW | HNWC
-   * @param d_type DataType QINT4 | QINT8 | UINT8 | UINT16 | UINT32 | FP16 |
-   * FP32
+   * @param d_type DataType QINT4 | QINT8 | BCQ | UINT8 | UINT16 | UINT32 | FP16
+   * | FP32
    * @param eff_dim_flag_ effective dimension flag (1 means it's effective)
    * @param dyn_dim_flag_ dynamic dimension flag (1 means it's unspecified)
    */
@@ -219,8 +219,8 @@ class TensorDim {
    * @param h height
    * @param w width
    * @param fm format NCHW | HNWC
-   * @param d_type DataType QINT4 | QINT8 | UINT8 | UINT16 | UINT32 | FP16 |
-   * FP32
+   * @param d_type DataType QINT4 | QINT8 | BCQ | UINT8 | UINT16 | UINT32 | FP16
+   * | FP32
    * @param eff_dim_flag_ dimension bit flag to calculate the dynamic
    * dimension, rightmost is width
    */
@@ -249,8 +249,8 @@ class TensorDim {
    *
    * @param shape shape of format
    * @param fm format NCHW | HNWC
-   * @param d_type DataType QINT4 | QINT8 | UINT8 | UINT16 | UINT32 | FP16 |
-   * FP32
+   * @param d_type DataType QINT4 | QINT8 | BCQ | UINT8 | UINT16 | UINT32 | FP16
+   * | FP32
    * @param order data storage order ROW_MAJOR | COL_MAJOR
    */
   TensorDim(const std::string &shape, TensorDim::Format fm,

diff --git a/nntrainer/models/model_common_properties.h b/nntrainer/models/model_common_properties.h
@@ -201,14 +201,24 @@ class MemorySwapMode : public Property<std::string> {
  * @brief     Enumeration of Data Type for model & layer
  */
 struct ModelTensorDataTypeInfo {
-  enum Enum { W4A16, W4A32, W8A16, W8A32, W16A16, W16A32, W32A16, W32A32 };
+  enum Enum {
+    W3A32,
+    W4A16,
+    W4A32,
+    W8A16,
+    W8A32,
+    W16A16,
+    W16A32,
+    W32A16,
+    W32A32
+  };
   static constexpr std::initializer_list<Enum> EnumList = {
-    Enum::W4A16,  Enum::W4A32,  Enum::W8A16,  Enum::W8A32,
+    Enum::W3A32,  Enum::W4A16,  Enum::W4A32,  Enum::W8A16, Enum::W8A32,
     Enum::W16A16, Enum::W16A32, Enum::W32A16, Enum::W32A32};
 
   static constexpr const char *EnumStr[] = {
-    "QINT4-FP16", "QINT4-FP32", "QINT8-FP16", "QINT8-FP32",
-    "FP16-FP16",  "FP16-FP32",  "FP32-FP16",  "FP32-FP32"};
+    "BCQ-FP32",  "QINT4-FP16", "QINT4-FP32", "QINT8-FP16", "QINT8-FP32",
+    "FP16-FP16", "FP16-FP32",  "FP32-FP16",  "FP32-FP32"};
 };
 
 /**

diff --git a/nntrainer/utils/base_properties.h b/nntrainer/utils/base_properties.h
@@ -661,9 +661,10 @@ void from_string(const std::string &value, std::vector<T> &property) {
 struct TensorDataTypeInfo {
   using Enum = nntrainer::TensorDim::DataType;
   static constexpr std::initializer_list<Enum> EnumList = {
-    Enum::QINT4, Enum::QINT8, Enum::FP16, Enum::FP32};
+    Enum::BCQ, Enum::QINT4, Enum::QINT8, Enum::FP16, Enum::FP32};
 
-  static constexpr const char *EnumStr[] = {"QINT4", "QINT8", "FP16", "FP32"};
+  static constexpr const char *EnumStr[] = {"BCQ", "QINT4", "QINT8", "FP16",
+                                            "FP32"};
 };
 
 /**