diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp index 2ac17f811f0055..407ed3b87fd4e8 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp @@ -11,7 +11,6 @@ #include "fully_connected_inst.h" #include "lstm_seq_inst.h" #include "intel_gpu/runtime/format.hpp" -#include "intel_gpu/primitives/mutable_data.hpp" #include "permute_inst.h" #include "crop_inst.h" #ifdef ENABLE_ONEDNN_FOR_GPU @@ -180,9 +179,11 @@ void post_optimize_weights::add_lstm_weights_reorder(primitive_id input_id, std: auto& crop0_node = get_crop_node(0); auto& crop1_node = get_crop_node(1); - auto& crop2_node = get_crop_node(2); - auto& crop3_node = get_crop_node(3); - std::vector con_input{input_info(crop_id_b + "1"), input_info(crop_id_b + "0"), input_info(crop_id_b + "2"), input_info(crop_id_b + "3")}; + auto crop2_id = primitive_id(crop_id_b + std::to_string(2)); + auto crop2_prim = std::make_shared(crop2_id, reorder_id, cldnn::tensor{dir_num, static_cast(2*hiddenSize), 1, size_third}, + cldnn::tensor{0, static_cast(2*hiddenSize), 0, 0}); + auto& crop2_node = p.get_or_create(crop2_prim); + std::vector con_input{input_info(crop_id_b + "1"), input_info(crop_id_b + "0"), input_info(crop_id_b + "2")}; cldnn::primitive_id concat_id{input_id + "cont"}; auto con = std::make_shared(concat_id, con_input, 1); auto& con_node = p.get_or_create(con); @@ -190,10 +191,8 @@ void post_optimize_weights::add_lstm_weights_reorder(primitive_id input_id, std: p.add_intermediate(crop1_node, con_node, prev, true); p.add_connection(prev, crop0_node, 0); p.add_connection(prev, crop2_node, 0); - p.add_connection(prev, crop3_node, 0); p.add_connection(crop0_node, con_node, 0); p.add_connection(crop2_node, con_node, 0); - p.add_connection(crop3_node, con_node, 0); std::string permute_id = input_id + "_perx"; std::vector ord{0, 2, 1}; auto permute = std::make_shared(permute_id, input_info{concat_id}, ord); @@ -208,7 +207,6 @@ void post_optimize_weights::add_lstm_weights_reorder(primitive_id input_id, std: set_implementation_and_output(crop1_node); set_implementation_and_output(crop0_node); set_implementation_and_output(crop2_node); - set_implementation_and_output(crop3_node); set_implementation_and_output(con_node); set_implementation_and_output(permute_node); } @@ -227,9 +225,11 @@ void post_optimize_weights::add_lstm_bias_reorder(primitive_id input_id, std::sh }; auto& crop0_node = get_crop_node(0); auto& crop1_node = get_crop_node(1); - auto& crop2_node = get_crop_node(2); - auto& crop3_node = get_crop_node(3); - std::vector con_input{input_info(crop1_node.id()), input_info(crop0_node.id()), input_info(crop2_node.id()), input_info(crop3_node.id())}; + auto crop2_id = primitive_id(crop_id_b + std::to_string(2)); + auto crop2_prim = std::make_shared(crop2_id, input_id, cldnn::tensor{dir_num, static_cast(2*hiddenSize), 1, 1}, + cldnn::tensor{0, static_cast(2*hiddenSize), 0, 0}); + auto& crop2_node = p.get_or_create(crop2_prim); + std::vector con_input{input_info(crop1_node.id()), input_info(crop0_node.id()), input_info(crop2_node.id())}; cldnn::primitive_id concat_id{input_id + "concat"}; auto con = std::make_shared(concat_id, con_input, 1); auto& con_node = p.get_or_create(con); @@ -237,10 +237,8 @@ void post_optimize_weights::add_lstm_bias_reorder(primitive_id input_id, std::sh p.add_intermediate(crop1_node, con_node, prev, true); p.add_connection(prev, crop0_node, 0); p.add_connection(prev, crop2_node, 0); - p.add_connection(prev, crop3_node, 0); p.add_connection(crop0_node, con_node, 0); p.add_connection(crop2_node, con_node, 0); - p.add_connection(crop3_node, con_node, 0); auto set_implementation_and_output = [this, &p](program_node& node) { node.get_output_layout(false); select_implementation(p, node); @@ -250,7 +248,6 @@ void post_optimize_weights::add_lstm_bias_reorder(primitive_id input_id, std::sh set_implementation_and_output(crop0_node); set_implementation_and_output(crop1_node); set_implementation_and_output(crop2_node); - set_implementation_and_output(crop3_node); set_implementation_and_output(con_node); } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_cell.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_cell.cpp index b35e3cae1f1e59..a41cd1065122de 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_cell.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/lstm_cell.cpp @@ -28,17 +28,13 @@ struct lstm_cell_impl : typed_primitive_impl_ocl { protected: kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { kernel_arguments_data args; - size_t op_input_size = 6; - for (size_t i = 0; i < op_input_size; i++) { + for (size_t i = 0; i < instance.inputs_memory_count(); i++) { args.inputs.push_back(instance.input_memory_ptr(i)); } for (size_t i = 0; i < instance.outputs_memory_count(); i++) { args.outputs.push_back(instance.output_memory_ptr(i)); } - for (size_t i = op_input_size; i < instance.inputs_memory_count(); i++) { - args.outputs.push_back(instance.dep_memory_ptr(i)); - } return args; } @@ -68,8 +64,6 @@ struct lstm_cell_impl : typed_primitive_impl_ocl { params.SetOffsetOrder(static_cast(primitive->offset_order)); params.clip = primitive->clip; params.direction = primitive->direction; - //Legacy multi-output - params.outputs.push_back(convert_data_tensor(impl_param.input_layouts[1])); return params; } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/rnn_seq.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/rnn_seq.cpp index ce160287d22924..3fb8ae13d3baa4 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/rnn_seq.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/rnn_seq.cpp @@ -29,17 +29,13 @@ struct rnn_seq_impl : typed_primitive_impl_ocl { protected: kernel_arguments_data get_arguments(const typed_primitive_inst& instance) const override { kernel_arguments_data args; - size_t op_input_size = 6 + (instance.has_cell() ? 1 : 0); - for (size_t i = 0; i < op_input_size; i++) { + for (size_t i = 0; i < instance.inputs_memory_count(); i++) { args.inputs.push_back(instance.input_memory_ptr(i)); } for (size_t i = 0; i < instance.outputs_memory_count(); i++) { args.outputs.push_back(instance.output_memory_ptr(i)); } - for (size_t i = op_input_size; i < instance.inputs_memory_count(); i++) { - args.outputs.push_back(instance.dep_memory_ptr(i)); - } return args; } @@ -70,11 +66,6 @@ struct rnn_seq_impl : typed_primitive_impl_ocl { params.SetOffsetOrder(static_cast(primitive->offset_order)); params.clip = primitive->clip; params.direction = primitive->direction; - //Legacy multi-output - params.outputs.push_back(convert_data_tensor(impl_param.input_layouts[1])); - if (!primitive->initial_cell_state.pid.empty()) { - params.outputs.push_back(convert_data_tensor(impl_param.input_layouts[1])); - } return params; } diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp index 637f597cbf9307..545ae780a7548b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/lstm_seq_onednn.hpp @@ -25,8 +25,6 @@ struct LSTMSeqImplementationManager : public ImplementationManager { if (info.arch == gpu_arch::unknown) return false; - const auto& lstm_seq_node = node.as(); - const auto& lstm_seq_prim = lstm_seq_node.get_primitive(); auto in0_dt = node.get_input_layout(0).data_type; auto in1_dt = node.get_input_layout(1).data_type; auto in2_dt = node.get_input_layout(2).data_type; diff --git a/src/plugins/intel_gpu/src/graph/lstm_seq.cpp b/src/plugins/intel_gpu/src/graph/lstm_seq.cpp index 3064b102af4801..f06f7a644ad12a 100644 --- a/src/plugins/intel_gpu/src/graph/lstm_seq.cpp +++ b/src/plugins/intel_gpu/src/graph/lstm_seq.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2024 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "lstm_seq_inst.h"