diff --git a/api/ccapi/include/model.h b/api/ccapi/include/model.h index e8d185bbdb..a9e02e7333 100644 --- a/api/ccapi/include/model.h +++ b/api/ccapi/include/model.h @@ -308,6 +308,7 @@ class Model { * @param[in] init_seq_len initial sequence length * @param[in] from current working step index * @param[in] to next working step index + * @param[in] output_hidden_state return last hidden state if true else return all hidden state * @retval list of output as float * * @note The output memory must not be freed by the caller */ @@ -315,7 +316,8 @@ class Model { incremental_inference(unsigned int batch, const std::vector &input, const std::vector &label, unsigned int init_seq_len, unsigned int from, - unsigned int to) = 0; + unsigned int to, + bool output_hidden_state = false) = 0; /** * @brief Summarize the model diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp index d1a4d9bf3b..ce88593a70 100644 --- a/nntrainer/models/neuralnet.cpp +++ b/nntrainer/models/neuralnet.cpp @@ -851,7 +851,7 @@ sharedConstTensors NeuralNetwork::incremental_inference( std::vector NeuralNetwork::incremental_inference( unsigned int batch_size, const std::vector &input, const std::vector &label, unsigned int init_seq_len, - unsigned int from, unsigned int to) { + unsigned int from, unsigned int to, bool output_hidden_state) { sharedConstTensors input_tensors, output_tensors; auto in_dim = getInputDimension(); @@ -884,27 +884,33 @@ std::vector NeuralNetwork::incremental_inference( unsigned int step = from ? 0 : to - 1; for (auto &out : output_tensors) { - const auto &out_t = *out.get(); - float *last_out_buf_data = new float[batch_size * out_t.width()]; + auto out_t = *out.get(); + float *last_out_buf_data; - for (unsigned int batch = 0; batch < batch_size; ++batch) { - if (out->getDataType() == ml::train::TensorDim::DataType::FP16) { + if (output_hidden_state) { + last_out_buf_data = out_t.getData(); + } else { + last_out_buf_data = new float[batch_size * out_t.width()]; + + for (unsigned int batch = 0; batch < batch_size; ++batch) { + if (out->getDataType() == ml::train::TensorDim::DataType::FP16) { #ifdef ENABLE_FP16 - const _FP16 *out_t_batch_ptr = out_t.getData<_FP16>() + - batch * out_t.getDim().getFeatureLen() + - step * out_t.getDim().width(); - scopy(out_t.getDim().width(), out_t_batch_ptr, 1, - last_out_buf_data + batch * out_t.width(), 1); + const _FP16 *out_t_batch_ptr = + out_t.getData<_FP16>() + batch * out_t.getDim().getFeatureLen() + + step * out_t.width(); + scopy(out_t.width(), out_t_batch_ptr, 1, + last_out_buf_data + batch * out_t.width(), 1); #else - throw std::invalid_argument("Error: enable-fp16 is not set"); + throw std::invalid_argument("Error: enable-fp16 is not set"); #endif - } else if (out->getDataType() == ml::train::TensorDim::DataType::FP32) { - const float *out_t_batch_ptr = out_t.getData() + - batch * out_t.getDim().getFeatureLen() + - step * out_t.getDim().width(); - scopy(out_t.getDim().width(), out_t_batch_ptr, 1, - last_out_buf_data + batch * out_t.width(), 1); + } else if (out->getDataType() == ml::train::TensorDim::DataType::FP32) { + const float *out_t_batch_ptr = + out_t.getData() + batch * out_t.getDim().getFeatureLen() + + step * out_t.width(); + scopy(out_t.width(), out_t_batch_ptr, 1, + last_out_buf_data + batch * out_t.width(), 1); + } } } diff --git a/nntrainer/models/neuralnet.h b/nntrainer/models/neuralnet.h index 4d2f840267..d964c60ebd 100644 --- a/nntrainer/models/neuralnet.h +++ b/nntrainer/models/neuralnet.h @@ -408,7 +408,8 @@ s * @retval shared_ptr const std::vector &label, unsigned int init_seq_len, unsigned int from, - unsigned int to) override; + unsigned int to, + bool output_hidden_state = false) override; /** * @brief Run NeuralNetwork train with callback function by user