diff --git a/nntrainer/layers/common_properties.h b/nntrainer/layers/common_properties.h index 88d231b46c..198b7c7c78 100644 --- a/nntrainer/layers/common_properties.h +++ b/nntrainer/layers/common_properties.h @@ -1335,12 +1335,12 @@ class AverageAttentionWeight : public Property { /** * @brief LoRA rank property, it is used to set rank of LoRA weight. * @details -*/ -class LoraRank : public Property{ - public: - static constexpr const char *key = - "lora_rank"; /**< unique key to access */ - using prop_tag = int_prop_tag; + */ +class LoraRank : public PositiveIntegerProperty { +public: + static constexpr const char *key = "lora_rank"; /**< unique key to access */ + using prop_tag = uint_prop_tag; /**< property type */ + ; }; /** diff --git a/nntrainer/layers/fc_layer.cpp b/nntrainer/layers/fc_layer.cpp index b9a7a31261..02d3127966 100644 --- a/nntrainer/layers/fc_layer.cpp +++ b/nntrainer/layers/fc_layer.cpp @@ -41,7 +41,7 @@ enum LORAParams { loraA, loraB, loraW }; FullyConnectedLayer::FullyConnectedLayer() : LayerImpl(), - fc_props(props::Unit(), props::LoraRank()){ + fc_props(props::Unit(), props::LoraRank()) { weight_idx.fill(std::numeric_limits::max()); } @@ -58,7 +58,9 @@ void FullyConnectedLayer::finalize(InitLayerContext &context) { auto &disable_bias = std::get(*layer_impl_props); auto unit = std::get(fc_props).get(); - auto lora_rank = (std::get(fc_props).empty())? 0 : std::get(fc_props).get(); + auto lora_rank = (std::get(fc_props).empty()) + ? 0 + : std::get(fc_props).get(); NNTR_THROW_IF(context.getNumInputs() != 1, std::invalid_argument) << "Fully connected layer takes only one input"; @@ -106,11 +108,12 @@ void FullyConnectedLayer::finalize(InitLayerContext &context) { } // create weights for LoRA - if(lora_rank){ + if (lora_rank) { /** set LoRA specifications */ - // loraA is a row vector (in_dim.width, lora_rank), loraB is a col vector of weight (lora_rank, out_dim), - // where shape of (loraA @ loraB) = shape of (W) + // loraA is a row vector (in_dim.width, lora_rank), loraB is a col vector of + // weight (lora_rank, out_dim), where shape of (loraA @ loraB) = shape of + // (W) TensorDim loraA_dim( 1, is_nchw ? 1 : lora_rank, is_nchw ? in_dim.width() : 1, is_nchw ? lora_rank : in_dim.channel(), @@ -133,10 +136,9 @@ void FullyConnectedLayer::finalize(InitLayerContext &context) { // save weight_lora as a tensor to be updated by (loraA @ loraB) lora_idx[LORAParams::loraW] = context.requestTensor( - weight_dim, "weight_lora", Tensor::Initializer::NONE, true, + weight_dim, "weight_lora", Tensor::Initializer::NONE, true, TensorLifespan::ITERATION_LIFESPAN); } - } void FullyConnectedLayer::exportTo( @@ -150,16 +152,13 @@ void FullyConnectedLayer::setProperty(const std::vector &values) { LayerImpl::setProperty(remain_props); } -/** - * @brief forwarding function for lora. - * It would be called during `forwarding` of FC layer. -*/ -void FullyConnectedLayer::forwarding_lora(RunLayerContext &context, Tensor &weight){ +void FullyConnectedLayer::forwarding_lora(RunLayerContext &context, + Tensor &weight) { Tensor &loraA = context.getWeight(lora_idx[LORAParams::loraA]); Tensor &loraB = context.getWeight(lora_idx[LORAParams::loraB]); Tensor &weight_lora = context.getTensor(lora_idx[LORAParams::loraW]); - loraA.dot(loraB, weight_lora); // weight_lora = loraA @ loraB - weight.add(weight_lora, weight); // weight += weight_lora + loraA.dot(loraB, weight_lora); // weight_lora = loraA @ loraB + weight.add_i(weight_lora); } void FullyConnectedLayer::forwarding(RunLayerContext &context, bool training) { @@ -180,11 +179,11 @@ void FullyConnectedLayer::forwarding(RunLayerContext &context, bool training) { context.getWeightObject(weight_idx[FCParams::weight]).getOutputAxis(); weight.dequantize(weight_, axis); - if(!std::get(fc_props).empty()) + if (!std::get(fc_props).empty()) forwarding_lora(context, weight_); input_.dot(weight_, hidden_, false, false); } else { - if(!std::get(fc_props).empty()) + if (!std::get(fc_props).empty()) forwarding_lora(context, weight); input_.dot(weight, hidden_, false, false); } @@ -237,11 +236,6 @@ void FullyConnectedLayer::incremental_forwarding(RunLayerContext &context, } } -/** - * @note - * [note for LoRA] implicit calcDerivative is implicitly applied. - * The weight is already updated with the LoRA's (W = W + W_lora) -*/ void FullyConnectedLayer::calcDerivative(RunLayerContext &context) { Tensor &weight = context.getWeight(weight_idx[FCParams::weight]); @@ -254,7 +248,7 @@ void FullyConnectedLayer::calcDerivative(RunLayerContext &context) { void FullyConnectedLayer::calcGradient(RunLayerContext &context) { // (baseline) calcGradient - if(std::get(fc_props).empty()){ + if (std::get(fc_props).empty()) { Tensor &djdw = context.getWeightGrad(weight_idx[FCParams::weight]); const Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX); @@ -278,9 +272,9 @@ void FullyConnectedLayer::calcGradient(RunLayerContext &context) { !context.isGradientFirstAccess(weight_idx[FCParams::weight])); } else { // (LoRA) calcGradient - Tensor &djdla = context.getWeightGrad(lora_idx[LORAParams::loraA]); - Tensor &djdlb = context.getWeightGrad(lora_idx[LORAParams::loraB]); - Tensor &djdlora_w = context.getTensorGrad(lora_idx[LORAParams::loraW]); + Tensor &djdla = context.getWeightGrad(lora_idx[LORAParams::loraA]); + Tensor &djdlb = context.getWeightGrad(lora_idx[LORAParams::loraB]); + Tensor &djdlora_w = context.getTensorGrad(lora_idx[LORAParams::loraW]); const Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX); Tensor &input_ = context.getInput(SINGLE_INOUT_IDX); @@ -288,16 +282,16 @@ void FullyConnectedLayer::calcGradient(RunLayerContext &context) { Tensor &lora_B = context.getWeight(lora_idx[LORAParams::loraB]); // (cf) forward - // input_.dot(lora_weight, hidden) : hidden = input @ lora_weight - // lora_A.dot(lora_B, lora_weight) : lora_weight = lora_A @ lora_B + // input_.dot(lora_weight, hidden) : hidden = input @ lora_weight + // lora_A.dot(lora_B, lora_weight) : lora_weight = lora_A @ lora_B input_.dot_deriv_wrt_2( djdlora_w, derivative_, false, false, !context.isGradientFirstAccess(lora_idx[LORAParams::loraW])); lora_A.dot_deriv_wrt_2( djdlb, djdlora_w, false, false, !context.isGradientFirstAccess(lora_idx[LORAParams::loraB])); - djdla.dot_batched_deriv_wrt_1( - lora_B, djdlora_w,false, false, + djdla.dot_deriv_wrt_1( + lora_B, djdlora_w, false, false, !context.isGradientFirstAccess(lora_idx[LORAParams::loraA])); } } diff --git a/nntrainer/layers/fc_layer.h b/nntrainer/layers/fc_layer.h index 5df197de2e..f8a4794828 100644 --- a/nntrainer/layers/fc_layer.h +++ b/nntrainer/layers/fc_layer.h @@ -72,6 +72,9 @@ class FullyConnectedLayer : public LayerImpl { /** * @copydoc Layer::calcGradient(RunLayerContext &context) + * @note + * [note for LoRA] implicit calcDerivative is implicitly applied. + * The weight is already updated with the LoRA's (W = W + W_lora) */ void calcGradient(RunLayerContext &context) override; @@ -99,16 +102,21 @@ class FullyConnectedLayer : public LayerImpl { * &value) */ void setProperty(const std::vector &values) override; - + + /** + * @brief forwarding function for lora. + * It would be called during `forwarding` of FC layer. + */ void forwarding_lora(RunLayerContext &context, Tensor &weight); inline static const std::string type = "fully_connected"; private: std::tuple - fc_props; /**< fc layer properties : unit - number of output neurons, lora_rank : rank of lora (optional) */ + fc_props; /**< fc layer properties : unit - number of output neurons, + lora_rank : rank of lora (optional) */ std::array weight_idx; /**< indices of the weights */ - std::array lora_idx; /**< indices of the lora weights */ + std::array lora_idx; /**< indices of the lora weights */ }; } // namespace nntrainer