[GPU/OpenCL/Update] Initial version of LM Head layer with OpencCl ops…

… and Update Addition Layer on GPU with latest Pipeline changes Added initial version of LM head layer fpr GPU and removed dependencies of cl_context for addition_layer. Signed-off-by: Yash Singh <[email protected]>
nnstreamer · Oct 9, 2024 · 781e16e · 781e16e
1 parent f222ecf
commit 781e16e
Show file tree

Hide file tree

Showing 14 changed files with 739 additions and 8 deletions.
diff --git a/api/ccapi/include/layer.h b/api/ccapi/include/layer.h
@@ -8,6 +8,7 @@
  * @author Parichay Kapoor <[email protected]>
  * @author Debadri Samaddar <[email protected]>
  * @author Niket Agarwal <[email protected]>
+ * @author Yash Singh <[email protected]>
  * @bug	   No known bugs except for NYI items
  * @brief  This is layers interface for c++ API
  *
@@ -103,7 +104,8 @@ enum LayerType {
                                      derivative */
   LAYER_UPSAMPLE2D,               /**< Upsample 2D Layer type */
   LAYER_RMSNORM = ML_TRAIN_LAYER_TYPE_RMSNORM, /**<RMS NORM Layer */
-  LAYER_UNKNOWN = ML_TRAIN_LAYER_TYPE_UNKNOWN  /**< Unknown */
+  LAYER_UNKNOWN = ML_TRAIN_LAYER_TYPE_UNKNOWN, /**< Unknown */
+  LAYER_LM_HEAD = ML_TRAIN_LAYER_TYPE_LM_HEAD, /**< LM Head Layer */
 };
 
 /**
@@ -398,6 +400,15 @@ Addition(const std::vector<std::string> &properties = {},
   return createLayer(LayerType::LAYER_ADDITION, properties, compute_engine);
 }
 
+/**
+ * @brief Helper function to create lm_head layer
+ */
+inline std::unique_ptr<Layer>
+LmHead(const std::vector<std::string> &properties = {},
+       const LayerComputeEngine &compute_engine = LayerComputeEngine::CPU) {
+  return createLayer(LayerType::LAYER_LM_HEAD, properties, compute_engine);
+}
+
 /**
  * @brief Helper function to create concat layer
  */

diff --git a/api/nntrainer-api-common.h b/api/nntrainer-api-common.h
@@ -65,6 +65,7 @@ typedef enum {
   ML_TRAIN_LAYER_TYPE_IDENTITY = 29, /**< Identity Layer type (Since 8.0) */
   ML_TRAIN_LAYER_TYPE_SWIGLU = 30,   /**< Swiglu Layer type */
   ML_TRAIN_LAYER_TYPE_WEIGHT = 31,   /**< Weight Layer type (Since 9.0)*/
+  ML_TRAIN_LAYER_TYPE_LM_HEAD = 32,  /**< LM Head Layer type */
   ML_TRAIN_LAYER_TYPE_PREPROCESS_FLIP =
     300, /**< Preprocess flip Layer (Since 6.5) */
   ML_TRAIN_LAYER_TYPE_PREPROCESS_TRANSLATE =

diff --git a/nntrainer/cl_context.cpp b/nntrainer/cl_context.cpp
@@ -7,6 +7,7 @@
  * @see     https://github.com/nnstreamer/nntrainer
  * @author  Debadri Samaddar <[email protected]>
  * @author  Niket Agarwal <[email protected]>
+ * @author  Yash Singh <[email protected]>
  * @author  Thummala Pallavi <[email protected]>
  * @bug     No known bugs except for NYI items
  * @brief   This file contains app context related functions and classes that
@@ -19,6 +20,7 @@
 #include <cl_context.h>
 #include <concat_cl.h>
 #include <fc_layer_cl.h>
+#include <lm_head_layer_cl.h>
 #include <reshape_cl.h>
 #include <rmsnorm_layer_cl.h>
 #include <swiglu_cl.h>
@@ -35,9 +37,9 @@ static void add_default_object(ClContext &cc) {
   //                    FullyConnectedLayerCl::type,
   //                    ml::train::LayerType::LAYER_FC);
 
-  // cc.registerFactory(nntrainer::createLayer<AdditionLayerCL>,
-  //                    AdditionLayerCL::type,
-  //                    ml::train::LayerType::LAYER_ADDITION);
+  cc.registerFactory(nntrainer::createLayer<AdditionLayerCL>,
+                     AdditionLayerCL::type,
+                     ml::train::LayerType::LAYER_ADDITION);
 
   // cc.registerFactory(nntrainer::createLayer<SwiGLULayerCl>,
   // SwiGLULayerCl::type,
@@ -54,6 +56,10 @@ static void add_default_object(ClContext &cc) {
   // cc.registerFactory(nntrainer::createLayer<ConcatLayerCl>,
   // ConcatLayerCl::type,
   //                    ml::train::LayerType::LAYER_CONCAT);
+
+  cc.registerFactory(nntrainer::createLayer<CustomLMHeadLayerCl>,
+                     CustomLMHeadLayerCl::type,
+                     ml::train::LayerType::LAYER_LM_HEAD);
 }
 
 static void registerer(ClContext &cc) noexcept {

diff --git a/nntrainer/layers/cl_layers/addition_layer_cl.cpp b/nntrainer/layers/cl_layers/addition_layer_cl.cpp
@@ -37,7 +37,7 @@ void AdditionLayerCL::forwarding(RunLayerContext &context, bool training) {
     if (!idx) {
       hidden_.copy(input_);
     } else {
-      add_i_cl(input_, hidden_, context);
+      add_i_cl(input_, hidden_);
     }
   }
 }
@@ -77,7 +77,7 @@ void AdditionLayerCL::incremental_forwarding(RunLayerContext &context,
       if (!idx) {
         hidden_step.copy(input_step);
       } else {
-        add_i_cl(input_step, hidden_step, context);
+        add_i_cl(input_step, hidden_step);
       }
     }
   }

diff --git a/nntrainer/layers/cl_layers/addition_layer_cl.h b/nntrainer/layers/cl_layers/addition_layer_cl.h
@@ -15,6 +15,7 @@
 #define __ADDITION_LAYER_CL_H__
 #ifdef __cplusplus
 
+#include <cl_context.h>
 #include <common_properties.h>
 #include <layer_devel.h>
 
@@ -40,7 +41,7 @@ class AdditionLayerCL : public Layer {
   /**
    * @brief     Destructor of Addition Layer
    */
-  ~AdditionLayerCL(){};
+  ~AdditionLayerCL() {};
 
   /**
    *  @brief  Move constructor of AdditionLayer.

diff --git a/nntrainer/layers/cl_layers/custom_vocab_selection.cpp b/nntrainer/layers/cl_layers/custom_vocab_selection.cpp
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 Yash Singh <[email protected]>>
+ *
+ * @file   custom_vocab_selection.cpp
+ * @date   1 Oct 2024
+ * @brief  Implementation of custom vocab selection
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Yash Singh <[email protected]>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+
+#include "custom_vocab_selection.h"
+#include <algorithm>
+
+nntrainer::VocabSelection::VocabSelection(LshType lshType, int lshChoices,
+                                          int hiddenSize, int vocabCnt) :
+  lshType(lshType),
+  lshChoices(lshChoices),
+  vocabCnt(vocabCnt),
+  hiddenSize(hiddenSize),
+  lshBlockNum(0),
+  lshBits(0) {}
+
+nntrainer::VocabSelection::~VocabSelection() {}
+
+nntrainer::VocabSelectionNNTrainer::VocabSelectionNNTrainer(
+  LshType lshType, int lshChoices, int hiddenSize, int vocabCnt,
+  nntrainer::Tensor &weights) :
+  VocabSelection(lshType, lshChoices, hiddenSize, vocabCnt) {
+  this->lshBlockNum = (hiddenSize + lshBlockSize - 1) / lshBlockSize;
+  this->lshBits = lshBlockNum * lshBlockSize;
+  this->lshData = std::vector<lshDataBlock>(this->vocabCnt * lshBlockNum);
+
+  // for (unsigned int i = 0; i < vocabCnt; ++i) {
+  //     for (unsigned int j = 0; j < lshBlockNum; ++j) {
+  //         unsigned int actualSize = std::min(lshBlockSize, hiddenSize -
+  //         (int)j * lshBlockSize); lshDataBlock d; for (unsigned int k = 0; k
+  //         < actualSize; ++k) {
+  //             d[k] = weights.getValue<_FP16>(0, 0, i, j * lshBlockSize + k) >
+  //             0 ? 1 : 0;
+  //         }
+  //         for (unsigned int k = actualSize; k < lshBlockSize; ++k) {
+  //             d[k] = 0;
+  //         }
+  //         this->lshData[i * lshBlockNum + j] = d;
+  //     }
+  // }
+
+  for (unsigned int i = 0; i < lshBlockNum; ++i) {
+    unsigned int actualSize =
+      std::min(lshBlockSize, hiddenSize - (int)i * lshBlockSize);
+    for (unsigned int j = 0; j < vocabCnt; ++j) {
+      lshDataBlock d;
+      for (unsigned int k = 0; k < actualSize; ++k) {
+        if (weights.getDataType() == nntrainer::TensorDim::DataType::FP32) {
+          d[k] = weights.getValue(0, 0, i * lshBlockSize + k, j) > 0 ? 1 : 0;
+        } else if (weights.getDataType() ==
+                   nntrainer::TensorDim::DataType::FP16) {
+          d[k] =
+            weights.getValue<_FP16>(0, 0, i * lshBlockSize + k, j) > 0 ? 1 : 0;
+        }
+      }
+      for (unsigned int k = actualSize; k < lshBlockSize; ++k) {
+        d[k] = 0;
+      }
+      this->lshData[j * lshBlockNum + i] = d;
+    }
+  }
+}
+
+std::vector<std::vector<int>>
+nntrainer::VocabSelectionNNTrainer::getVocabs(const nntrainer::Tensor &input) {
+  unsigned int batchSize = input.height();
+
+  std::vector<std::vector<int>> res = std::vector<std::vector<int>>(batchSize);
+  for (int i = 0; i < batchSize; i++) {
+    std::vector<lshDataBlock> d(lshBlockNum);
+    for (int k = 0; k < lshBlockNum; k++) {
+      int actualSize = std::min(lshBlockSize, hiddenSize - k * lshBlockSize);
+      for (int j = 0; j < actualSize; j++) {
+        if (input.getDataType() == nntrainer::TensorDim::DataType::FP32) {
+          d[k][j] = input.getValue(0, 0, i, j + k * lshBlockSize) >= 0 ? 1 : 0;
+        } else if (input.getDataType() ==
+                   nntrainer::TensorDim::DataType::FP16) {
+          d[k][j] =
+            input.getValue<_FP16>(0, 0, i, j + k * lshBlockSize) >= 0 ? 1 : 0;
+        }
+      }
+      for (int j = actualSize; j < lshBlockSize; j++) {
+        d[k][j] = 0;
+      }
+    }
+    std::vector<int> simResult(vocabCnt, 0);
+    std::vector<int> simCount(lshBits + 1, 0);
+    for (int j = 0; j < vocabCnt; j++) {
+      for (int k = 0; k < lshBlockNum; k++) {
+        simResult[j] += (d[k] ^ lshData[j * lshBlockNum + k]).count();
+      }
+      simCount[simResult[j]]++;
+    }
+    int cut = lshBits + 1;
+    int leftover = 0;
+    int countSum = 0;
+    for (int j = 0; j <= lshBits; j++) {
+      countSum += simCount[j];
+      if (countSum > lshChoices) {
+        cut = j;
+        leftover = simCount[j] - (countSum - lshChoices);
+        break;
+      }
+    }
+    std::vector<int> selectedVocabs(lshChoices);
+    int pos = 0;
+    for (int j = 0; j < vocabCnt; j++) {
+      if (simResult[j] <= cut) {
+        if (simResult[j] < cut) {
+          selectedVocabs[pos] = j;
+          pos++;
+        } else if (leftover > 0) {
+          selectedVocabs[pos] = j;
+          pos++;
+          leftover--;
+        }
+      }
+    }
+    res[i] = selectedVocabs;
+  }
+  return res;
+}
diff --git a/nntrainer/layers/cl_layers/custom_vocab_selection.h b/nntrainer/layers/cl_layers/custom_vocab_selection.h
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2024 Yash Singh <[email protected]>>
+ *
+ * @file   custom_vocab_selection.h
+ * @date   1 Oct 2024
+ * @brief  Implementation of custom vocab selection
+ * @see    https://github.com/nnstreamer/nntrainer
+ * @author Yash Singh <[email protected]>
+ * @bug    No known bugs except for NYI items
+ *
+ */
+
+#ifndef VOCAB_SELECTION_H
+#define VOCAB_SELECTION_H
+
+#include <tensor.h>
+
+#ifndef LSH_BLOCK_SIZE
+#define LSH_BLOCK_SIZE 256
+#endif
+
+using namespace std;
+
+namespace nntrainer {
+
+enum LshType { NONE = 0, SIMHASH = 1, ORTHOSIMHASH = 2 };
+typedef std::bitset<LSH_BLOCK_SIZE> lshDataBlock;
+
+class VocabSelection {
+protected:
+  int hiddenSize;
+  int vocabCnt;
+  const int lshBlockSize = LSH_BLOCK_SIZE;
+  int lshBlockNum;
+  int lshBits; // lshBlockSize * lshBlockNum
+  int lshChoices;
+  LshType lshType;
+  std::vector<lshDataBlock> lshData;
+
+public:
+  VocabSelection(LshType lshType, int lshChoices, int hiddenSize, int vocabCnt);
+  virtual std::vector<std::vector<int>>
+  getVocabs(const nntrainer::Tensor &modelOutput) = 0;
+  ~VocabSelection();
+};
+
+class VocabSelectionNNTrainer : public VocabSelection {
+protected:
+  nntrainer::Tensor lshWeight;
+
+public:
+  VocabSelectionNNTrainer(LshType lshType, int lshChoices, int hiddenSize,
+                          int vocabCnt, nntrainer::Tensor &weights);
+  virtual std::vector<std::vector<int>>
+  getVocabs(const nntrainer::Tensor &modelOutput);
+  ~VocabSelectionNNTrainer() {};
+};
+
+} // namespace nntrainer
+
+#endif