Skip to content

Commit

Permalink
[ BCQ ] Support BCQ type as model Weight
Browse files Browse the repository at this point in the history
- Previous PR enabled BCQTensor.
- This PR enables BCQTensor as a weight.
- This PR add BCQ-FP32 as a weight-activation type.

**Self-evaluation:**

Build test: [X]Passed [ ]Failed [ ]Skipped
Run test: [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Eunju Yang <[email protected]>
  • Loading branch information
EunjuYang committed Dec 9, 2024
1 parent a7cc856 commit 2262445
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 13 deletions.
14 changes: 7 additions & 7 deletions api/ccapi/include/tensor_dim.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class TensorDim {

/**
* @brief Tensor Data Type.
* Currently support QINT4, QINT8, UINT8, UINT16, UINT32, FP16 & FP32
* Currently support QINT4, QINT8, BCQ, UINT8, UINT16, UINT32, FP16 & FP32
*/
enum class DataType {
QINT4, /** quantized int 4*/
Expand Down Expand Up @@ -115,8 +115,8 @@ class TensorDim {
* @brief Creator of TensorDim with Format & DataType
*
* @param fm format NCHW | HNWC
* @param d_type DataType QINT4 | QINT8 | UINT8 | UINT16 | UINT32 | FP16 |
* FP32
* @param d_type DataType QINT4 | QINT8 | BCQ | UINT8 | UINT16 | UINT32 | FP16
* | FP32
* @param eff_dim_flag_ effective dimension flag (1 means it's effective)
* @param dyn_dim_flag_ dynamic dimension flag (1 means it's unspecified)
*/
Expand Down Expand Up @@ -219,8 +219,8 @@ class TensorDim {
* @param h height
* @param w width
* @param fm format NCHW | HNWC
* @param d_type DataType QINT4 | QINT8 | UINT8 | UINT16 | UINT32 | FP16 |
* FP32
* @param d_type DataType QINT4 | QINT8 | BCQ | UINT8 | UINT16 | UINT32 | FP16
* | FP32
* @param eff_dim_flag_ dimension bit flag to calculate the dynamic
* dimension, rightmost is width
*/
Expand Down Expand Up @@ -249,8 +249,8 @@ class TensorDim {
*
* @param shape shape of format
* @param fm format NCHW | HNWC
* @param d_type DataType QINT4 | QINT8 | UINT8 | UINT16 | UINT32 | FP16 |
* FP32
* @param d_type DataType QINT4 | QINT8 | BCQ | UINT8 | UINT16 | UINT32 | FP16
* | FP32
* @param order data storage order ROW_MAJOR | COL_MAJOR
*/
TensorDim(const std::string &shape, TensorDim::Format fm,
Expand Down
18 changes: 14 additions & 4 deletions nntrainer/models/model_common_properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,14 +201,24 @@ class MemorySwapMode : public Property<std::string> {
* @brief Enumeration of Data Type for model & layer
*/
struct ModelTensorDataTypeInfo {
enum Enum { W4A16, W4A32, W8A16, W8A32, W16A16, W16A32, W32A16, W32A32 };
enum Enum {
W3A32,
W4A16,
W4A32,
W8A16,
W8A32,
W16A16,
W16A32,
W32A16,
W32A32
};
static constexpr std::initializer_list<Enum> EnumList = {
Enum::W4A16, Enum::W4A32, Enum::W8A16, Enum::W8A32,
Enum::W3A32, Enum::W4A16, Enum::W4A32, Enum::W8A16, Enum::W8A32,
Enum::W16A16, Enum::W16A32, Enum::W32A16, Enum::W32A32};

static constexpr const char *EnumStr[] = {
"QINT4-FP16", "QINT4-FP32", "QINT8-FP16", "QINT8-FP32",
"FP16-FP16", "FP16-FP32", "FP32-FP16", "FP32-FP32"};
"BCQ-FP32", "QINT4-FP16", "QINT4-FP32", "QINT8-FP16", "QINT8-FP32",
"FP16-FP16", "FP16-FP32", "FP32-FP16", "FP32-FP32"};
};

/**
Expand Down
5 changes: 3 additions & 2 deletions nntrainer/utils/base_properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -661,9 +661,10 @@ void from_string(const std::string &value, std::vector<T> &property) {
struct TensorDataTypeInfo {
using Enum = nntrainer::TensorDim::DataType;
static constexpr std::initializer_list<Enum> EnumList = {
Enum::QINT4, Enum::QINT8, Enum::FP16, Enum::FP32};
Enum::BCQ, Enum::QINT4, Enum::QINT8, Enum::FP16, Enum::FP32};

static constexpr const char *EnumStr[] = {"QINT4", "QINT8", "FP16", "FP32"};
static constexpr const char *EnumStr[] = {"BCQ", "QINT4", "QINT8", "FP16",
"FP32"};
};

/**
Expand Down

0 comments on commit 2262445

Please sign in to comment.