-
Notifications
You must be signed in to change notification settings - Fork 75
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[GPU/OpenCL/Update] Initial version of LM Head layer with OpencCl ops…
… and Update Addition Layer on GPU with latest Pipeline changes Added initial version of LM head layer fpr GPU and removed dependencies of cl_context for addition_layer. Signed-off-by: Yash Singh <[email protected]>
- Loading branch information
1 parent
f222ecf
commit 781e16e
Showing
14 changed files
with
739 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
* @author Parichay Kapoor <[email protected]> | ||
* @author Debadri Samaddar <[email protected]> | ||
* @author Niket Agarwal <[email protected]> | ||
* @author Yash Singh <[email protected]> | ||
* @bug No known bugs except for NYI items | ||
* @brief This is layers interface for c++ API | ||
* | ||
|
@@ -103,7 +104,8 @@ enum LayerType { | |
derivative */ | ||
LAYER_UPSAMPLE2D, /**< Upsample 2D Layer type */ | ||
LAYER_RMSNORM = ML_TRAIN_LAYER_TYPE_RMSNORM, /**<RMS NORM Layer */ | ||
LAYER_UNKNOWN = ML_TRAIN_LAYER_TYPE_UNKNOWN /**< Unknown */ | ||
LAYER_UNKNOWN = ML_TRAIN_LAYER_TYPE_UNKNOWN, /**< Unknown */ | ||
LAYER_LM_HEAD = ML_TRAIN_LAYER_TYPE_LM_HEAD, /**< LM Head Layer */ | ||
}; | ||
|
||
/** | ||
|
@@ -398,6 +400,15 @@ Addition(const std::vector<std::string> &properties = {}, | |
return createLayer(LayerType::LAYER_ADDITION, properties, compute_engine); | ||
} | ||
|
||
/** | ||
* @brief Helper function to create lm_head layer | ||
*/ | ||
inline std::unique_ptr<Layer> | ||
LmHead(const std::vector<std::string> &properties = {}, | ||
const LayerComputeEngine &compute_engine = LayerComputeEngine::CPU) { | ||
return createLayer(LayerType::LAYER_LM_HEAD, properties, compute_engine); | ||
} | ||
|
||
/** | ||
* @brief Helper function to create concat layer | ||
*/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
* @see https://github.com/nnstreamer/nntrainer | ||
* @author Debadri Samaddar <[email protected]> | ||
* @author Niket Agarwal <[email protected]> | ||
* @author Yash Singh <[email protected]> | ||
* @author Thummala Pallavi <[email protected]> | ||
* @bug No known bugs except for NYI items | ||
* @brief This file contains app context related functions and classes that | ||
|
@@ -19,6 +20,7 @@ | |
#include <cl_context.h> | ||
#include <concat_cl.h> | ||
#include <fc_layer_cl.h> | ||
#include <lm_head_layer_cl.h> | ||
#include <reshape_cl.h> | ||
#include <rmsnorm_layer_cl.h> | ||
#include <swiglu_cl.h> | ||
|
@@ -35,9 +37,9 @@ static void add_default_object(ClContext &cc) { | |
// FullyConnectedLayerCl::type, | ||
// ml::train::LayerType::LAYER_FC); | ||
|
||
// cc.registerFactory(nntrainer::createLayer<AdditionLayerCL>, | ||
// AdditionLayerCL::type, | ||
// ml::train::LayerType::LAYER_ADDITION); | ||
cc.registerFactory(nntrainer::createLayer<AdditionLayerCL>, | ||
AdditionLayerCL::type, | ||
ml::train::LayerType::LAYER_ADDITION); | ||
|
||
// cc.registerFactory(nntrainer::createLayer<SwiGLULayerCl>, | ||
// SwiGLULayerCl::type, | ||
|
@@ -54,6 +56,10 @@ static void add_default_object(ClContext &cc) { | |
// cc.registerFactory(nntrainer::createLayer<ConcatLayerCl>, | ||
// ConcatLayerCl::type, | ||
// ml::train::LayerType::LAYER_CONCAT); | ||
|
||
cc.registerFactory(nntrainer::createLayer<CustomLMHeadLayerCl>, | ||
CustomLMHeadLayerCl::type, | ||
ml::train::LayerType::LAYER_LM_HEAD); | ||
} | ||
|
||
static void registerer(ClContext &cc) noexcept { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
// SPDX-License-Identifier: Apache-2.0 | ||
/** | ||
* Copyright (C) 2024 Yash Singh <[email protected]>> | ||
* | ||
* @file custom_vocab_selection.cpp | ||
* @date 1 Oct 2024 | ||
* @brief Implementation of custom vocab selection | ||
* @see https://github.com/nnstreamer/nntrainer | ||
* @author Yash Singh <[email protected]> | ||
* @bug No known bugs except for NYI items | ||
* | ||
*/ | ||
|
||
#include "custom_vocab_selection.h" | ||
#include <algorithm> | ||
|
||
nntrainer::VocabSelection::VocabSelection(LshType lshType, int lshChoices, | ||
int hiddenSize, int vocabCnt) : | ||
lshType(lshType), | ||
lshChoices(lshChoices), | ||
vocabCnt(vocabCnt), | ||
hiddenSize(hiddenSize), | ||
lshBlockNum(0), | ||
lshBits(0) {} | ||
|
||
nntrainer::VocabSelection::~VocabSelection() {} | ||
|
||
nntrainer::VocabSelectionNNTrainer::VocabSelectionNNTrainer( | ||
LshType lshType, int lshChoices, int hiddenSize, int vocabCnt, | ||
nntrainer::Tensor &weights) : | ||
VocabSelection(lshType, lshChoices, hiddenSize, vocabCnt) { | ||
this->lshBlockNum = (hiddenSize + lshBlockSize - 1) / lshBlockSize; | ||
this->lshBits = lshBlockNum * lshBlockSize; | ||
this->lshData = std::vector<lshDataBlock>(this->vocabCnt * lshBlockNum); | ||
|
||
// for (unsigned int i = 0; i < vocabCnt; ++i) { | ||
// for (unsigned int j = 0; j < lshBlockNum; ++j) { | ||
// unsigned int actualSize = std::min(lshBlockSize, hiddenSize - | ||
// (int)j * lshBlockSize); lshDataBlock d; for (unsigned int k = 0; k | ||
// < actualSize; ++k) { | ||
// d[k] = weights.getValue<_FP16>(0, 0, i, j * lshBlockSize + k) > | ||
// 0 ? 1 : 0; | ||
// } | ||
// for (unsigned int k = actualSize; k < lshBlockSize; ++k) { | ||
// d[k] = 0; | ||
// } | ||
// this->lshData[i * lshBlockNum + j] = d; | ||
// } | ||
// } | ||
|
||
for (unsigned int i = 0; i < lshBlockNum; ++i) { | ||
unsigned int actualSize = | ||
std::min(lshBlockSize, hiddenSize - (int)i * lshBlockSize); | ||
for (unsigned int j = 0; j < vocabCnt; ++j) { | ||
lshDataBlock d; | ||
for (unsigned int k = 0; k < actualSize; ++k) { | ||
if (weights.getDataType() == nntrainer::TensorDim::DataType::FP32) { | ||
d[k] = weights.getValue(0, 0, i * lshBlockSize + k, j) > 0 ? 1 : 0; | ||
} else if (weights.getDataType() == | ||
nntrainer::TensorDim::DataType::FP16) { | ||
d[k] = | ||
weights.getValue<_FP16>(0, 0, i * lshBlockSize + k, j) > 0 ? 1 : 0; | ||
} | ||
} | ||
for (unsigned int k = actualSize; k < lshBlockSize; ++k) { | ||
d[k] = 0; | ||
} | ||
this->lshData[j * lshBlockNum + i] = d; | ||
} | ||
} | ||
} | ||
|
||
std::vector<std::vector<int>> | ||
nntrainer::VocabSelectionNNTrainer::getVocabs(const nntrainer::Tensor &input) { | ||
unsigned int batchSize = input.height(); | ||
|
||
std::vector<std::vector<int>> res = std::vector<std::vector<int>>(batchSize); | ||
for (int i = 0; i < batchSize; i++) { | ||
std::vector<lshDataBlock> d(lshBlockNum); | ||
for (int k = 0; k < lshBlockNum; k++) { | ||
int actualSize = std::min(lshBlockSize, hiddenSize - k * lshBlockSize); | ||
for (int j = 0; j < actualSize; j++) { | ||
if (input.getDataType() == nntrainer::TensorDim::DataType::FP32) { | ||
d[k][j] = input.getValue(0, 0, i, j + k * lshBlockSize) >= 0 ? 1 : 0; | ||
} else if (input.getDataType() == | ||
nntrainer::TensorDim::DataType::FP16) { | ||
d[k][j] = | ||
input.getValue<_FP16>(0, 0, i, j + k * lshBlockSize) >= 0 ? 1 : 0; | ||
} | ||
} | ||
for (int j = actualSize; j < lshBlockSize; j++) { | ||
d[k][j] = 0; | ||
} | ||
} | ||
std::vector<int> simResult(vocabCnt, 0); | ||
std::vector<int> simCount(lshBits + 1, 0); | ||
for (int j = 0; j < vocabCnt; j++) { | ||
for (int k = 0; k < lshBlockNum; k++) { | ||
simResult[j] += (d[k] ^ lshData[j * lshBlockNum + k]).count(); | ||
} | ||
simCount[simResult[j]]++; | ||
} | ||
int cut = lshBits + 1; | ||
int leftover = 0; | ||
int countSum = 0; | ||
for (int j = 0; j <= lshBits; j++) { | ||
countSum += simCount[j]; | ||
if (countSum > lshChoices) { | ||
cut = j; | ||
leftover = simCount[j] - (countSum - lshChoices); | ||
break; | ||
} | ||
} | ||
std::vector<int> selectedVocabs(lshChoices); | ||
int pos = 0; | ||
for (int j = 0; j < vocabCnt; j++) { | ||
if (simResult[j] <= cut) { | ||
if (simResult[j] < cut) { | ||
selectedVocabs[pos] = j; | ||
pos++; | ||
} else if (leftover > 0) { | ||
selectedVocabs[pos] = j; | ||
pos++; | ||
leftover--; | ||
} | ||
} | ||
} | ||
res[i] = selectedVocabs; | ||
} | ||
return res; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
// SPDX-License-Identifier: Apache-2.0 | ||
/** | ||
* Copyright (C) 2024 Yash Singh <[email protected]>> | ||
* | ||
* @file custom_vocab_selection.h | ||
* @date 1 Oct 2024 | ||
* @brief Implementation of custom vocab selection | ||
* @see https://github.com/nnstreamer/nntrainer | ||
* @author Yash Singh <[email protected]> | ||
* @bug No known bugs except for NYI items | ||
* | ||
*/ | ||
|
||
#ifndef VOCAB_SELECTION_H | ||
#define VOCAB_SELECTION_H | ||
|
||
#include <tensor.h> | ||
|
||
#ifndef LSH_BLOCK_SIZE | ||
#define LSH_BLOCK_SIZE 256 | ||
#endif | ||
|
||
using namespace std; | ||
|
||
namespace nntrainer { | ||
|
||
enum LshType { NONE = 0, SIMHASH = 1, ORTHOSIMHASH = 2 }; | ||
typedef std::bitset<LSH_BLOCK_SIZE> lshDataBlock; | ||
|
||
class VocabSelection { | ||
protected: | ||
int hiddenSize; | ||
int vocabCnt; | ||
const int lshBlockSize = LSH_BLOCK_SIZE; | ||
int lshBlockNum; | ||
int lshBits; // lshBlockSize * lshBlockNum | ||
int lshChoices; | ||
LshType lshType; | ||
std::vector<lshDataBlock> lshData; | ||
|
||
public: | ||
VocabSelection(LshType lshType, int lshChoices, int hiddenSize, int vocabCnt); | ||
virtual std::vector<std::vector<int>> | ||
getVocabs(const nntrainer::Tensor &modelOutput) = 0; | ||
~VocabSelection(); | ||
}; | ||
|
||
class VocabSelectionNNTrainer : public VocabSelection { | ||
protected: | ||
nntrainer::Tensor lshWeight; | ||
|
||
public: | ||
VocabSelectionNNTrainer(LshType lshType, int lshChoices, int hiddenSize, | ||
int vocabCnt, nntrainer::Tensor &weights); | ||
virtual std::vector<std::vector<int>> | ||
getVocabs(const nntrainer::Tensor &modelOutput); | ||
~VocabSelectionNNTrainer() {}; | ||
}; | ||
|
||
} // namespace nntrainer | ||
|
||
#endif |
Oops, something went wrong.