Skip to content

Commit

Permalink
[ Layer ] Added Custom Multi Head Attention layer from FELICE
Browse files Browse the repository at this point in the history
Incorporated the CPU version of custom MHA layer from FELICE repo

Signed-off-by: Debadri Samaddar <[email protected]>
  • Loading branch information
s-debadri committed Jun 25, 2024
1 parent 9c8e874 commit 841c142
Show file tree
Hide file tree
Showing 8 changed files with 1,291 additions and 1 deletion.
13 changes: 13 additions & 0 deletions api/ccapi/include/layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ enum LayerType {
ML_TRAIN_LAYER_TYPE_POSITIONAL_ENCODING, /**< Positional Encoding Layer type
*/
LAYER_IDENTITY = ML_TRAIN_LAYER_TYPE_IDENTITY, /**< Identity Layer type */
LAYER_CUSTOM_MULTI_HEAD_ATTENTION =
ML_TRAIN_LAYER_TYPE_CUSTOM_MULTI_HEAD_ATTENTION, /**< Multi Head Attention
Layer type */
LAYER_PREPROCESS_FLIP =
ML_TRAIN_LAYER_TYPE_PREPROCESS_FLIP, /**< Preprocess flip Layer type */
LAYER_PREPROCESS_TRANSLATE =
Expand Down Expand Up @@ -503,6 +506,16 @@ MultiHeadAttention(const std::vector<std::string> &properties = {}) {
return createLayer(LayerType::LAYER_MULTI_HEAD_ATTENTION, properties);
}

/**
* @brief Helper function to create Custom Multi Head Attention Layer
*/
inline std::unique_ptr<Layer> CustomMultiHeadAttention(
const std::vector<std::string> &properties = {},
const LayerComputeEngine &compute_engine = LayerComputeEngine::CPU) {
return createLayer(LayerType::LAYER_CUSTOM_MULTI_HEAD_ATTENTION, properties,
compute_engine);
}

/**
* @brief Helper function to create Positional Encoding Layer
*/
Expand Down
2 changes: 2 additions & 0 deletions api/nntrainer-api-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ typedef enum {
ML_TRAIN_LAYER_TYPE_POSITIONAL_ENCODING =
28, /**< Positional Encoding Layer type (Since 7.0) */
ML_TRAIN_LAYER_TYPE_IDENTITY = 29, /**< Identity Layer type (Since 8.0) */
ML_TRAIN_LAYER_TYPE_CUSTOM_MULTI_HEAD_ATTENTION =
34, /**< Custom Multi Head Attention Layer type (Since 8.0) */
ML_TRAIN_LAYER_TYPE_PREPROCESS_FLIP =
300, /**< Preprocess flip Layer (Since 6.5) */
ML_TRAIN_LAYER_TYPE_PREPROCESS_TRANSLATE =
Expand Down
4 changes: 4 additions & 0 deletions nntrainer/app_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include <conv2d_layer.h>
#include <cross_entropy_sigmoid_loss_layer.h>
#include <cross_entropy_softmax_loss_layer.h>
#include <custom_multi_head_attention_layer.h>
#include <dropout.h>
#include <embedding.h>
#include <fc_layer.h>
Expand Down Expand Up @@ -299,6 +300,9 @@ static void add_default_object(AppContext &ac) {
ac.registerFactory(nntrainer::createLayer<MultiHeadAttentionLayer>,
MultiHeadAttentionLayer::type,
LayerType::LAYER_MULTI_HEAD_ATTENTION);
ac.registerFactory(nntrainer::createLayer<CustomMultiHeadAttentionLayer>,
CustomMultiHeadAttentionLayer::type,
LayerType::LAYER_CUSTOM_MULTI_HEAD_ATTENTION);
ac.registerFactory(nntrainer::createLayer<ReduceMeanLayer>,
ReduceMeanLayer::type, LayerType::LAYER_REDUCE_MEAN);
ac.registerFactory(nntrainer::createLayer<PositionalEncodingLayer>,
Expand Down
Loading

0 comments on commit 841c142

Please sign in to comment.