[ Layer ] Added Custom Multi Head Attention layer from FELICE

Incorporated the CPU version of custom MHA layer from FELICE repo Signed-off-by: Debadri Samaddar <[email protected]>
nnstreamer · Jun 25, 2024 · 841c142 · 841c142
1 parent 9c8e874
commit 841c142
Show file tree

Hide file tree

Showing 8 changed files with 1,291 additions and 1 deletion.
diff --git a/api/ccapi/include/layer.h b/api/ccapi/include/layer.h
@@ -75,6 +75,9 @@ enum LayerType {
     ML_TRAIN_LAYER_TYPE_POSITIONAL_ENCODING, /**< Positional Encoding Layer type
                                               */
   LAYER_IDENTITY = ML_TRAIN_LAYER_TYPE_IDENTITY, /**< Identity Layer type */
+  LAYER_CUSTOM_MULTI_HEAD_ATTENTION =
+    ML_TRAIN_LAYER_TYPE_CUSTOM_MULTI_HEAD_ATTENTION, /**< Multi Head Attention
+                                                 Layer type */
   LAYER_PREPROCESS_FLIP =
     ML_TRAIN_LAYER_TYPE_PREPROCESS_FLIP, /**< Preprocess flip Layer type */
   LAYER_PREPROCESS_TRANSLATE =
@@ -503,6 +506,16 @@ MultiHeadAttention(const std::vector<std::string> &properties = {}) {
   return createLayer(LayerType::LAYER_MULTI_HEAD_ATTENTION, properties);
 }
 
+/**
+ * @brief Helper function to create Custom Multi Head Attention Layer
+ */
+inline std::unique_ptr<Layer> CustomMultiHeadAttention(
+  const std::vector<std::string> &properties = {},
+  const LayerComputeEngine &compute_engine = LayerComputeEngine::CPU) {
+  return createLayer(LayerType::LAYER_CUSTOM_MULTI_HEAD_ATTENTION, properties,
+                     compute_engine);
+}
+
 /**
  * @brief Helper function to create Positional Encoding Layer
  */

diff --git a/api/nntrainer-api-common.h b/api/nntrainer-api-common.h
@@ -63,6 +63,8 @@ typedef enum {
   ML_TRAIN_LAYER_TYPE_POSITIONAL_ENCODING =
     28, /**< Positional Encoding Layer type (Since 7.0) */
   ML_TRAIN_LAYER_TYPE_IDENTITY = 29, /**< Identity Layer type (Since 8.0) */
+  ML_TRAIN_LAYER_TYPE_CUSTOM_MULTI_HEAD_ATTENTION =
+    34, /**< Custom Multi Head Attention Layer type (Since 8.0) */
   ML_TRAIN_LAYER_TYPE_PREPROCESS_FLIP =
     300, /**< Preprocess flip Layer (Since 6.5) */
   ML_TRAIN_LAYER_TYPE_PREPROCESS_TRANSLATE =

diff --git a/nntrainer/app_context.cpp b/nntrainer/app_context.cpp
@@ -41,6 +41,7 @@
 #include <conv2d_layer.h>
 #include <cross_entropy_sigmoid_loss_layer.h>
 #include <cross_entropy_softmax_loss_layer.h>
+#include <custom_multi_head_attention_layer.h>
 #include <dropout.h>
 #include <embedding.h>
 #include <fc_layer.h>
@@ -299,6 +300,9 @@ static void add_default_object(AppContext &ac) {
   ac.registerFactory(nntrainer::createLayer<MultiHeadAttentionLayer>,
                      MultiHeadAttentionLayer::type,
                      LayerType::LAYER_MULTI_HEAD_ATTENTION);
+  ac.registerFactory(nntrainer::createLayer<CustomMultiHeadAttentionLayer>,
+                     CustomMultiHeadAttentionLayer::type,
+                     LayerType::LAYER_CUSTOM_MULTI_HEAD_ATTENTION);
   ac.registerFactory(nntrainer::createLayer<ReduceMeanLayer>,
                      ReduceMeanLayer::type, LayerType::LAYER_REDUCE_MEAN);
   ac.registerFactory(nntrainer::createLayer<PositionalEncodingLayer>,