Skip to content

Commit

Permalink
one less primitive in post optimize weights, deleted legacy output
Browse files Browse the repository at this point in the history
  • Loading branch information
michal-miotk committed Nov 25, 2024
1 parent 2a068c2 commit fcdaab0
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include "fully_connected_inst.h"
#include "lstm_seq_inst.h"
#include "intel_gpu/runtime/format.hpp"
#include "intel_gpu/primitives/mutable_data.hpp"
#include "permute_inst.h"
#include "crop_inst.h"
#ifdef ENABLE_ONEDNN_FOR_GPU
Expand Down Expand Up @@ -180,20 +179,20 @@ void post_optimize_weights::add_lstm_weights_reorder(primitive_id input_id, std:

auto& crop0_node = get_crop_node(0);
auto& crop1_node = get_crop_node(1);
auto& crop2_node = get_crop_node(2);
auto& crop3_node = get_crop_node(3);
std::vector<input_info> con_input{input_info(crop_id_b + "1"), input_info(crop_id_b + "0"), input_info(crop_id_b + "2"), input_info(crop_id_b + "3")};
auto crop2_id = primitive_id(crop_id_b + std::to_string(2));
auto crop2_prim = std::make_shared<cldnn::crop>(crop2_id, reorder_id, cldnn::tensor{dir_num, static_cast<int>(2*hiddenSize), 1, size_third},
cldnn::tensor{0, static_cast<int>(2*hiddenSize), 0, 0});
auto& crop2_node = p.get_or_create(crop2_prim);
std::vector<input_info> con_input{input_info(crop_id_b + "1"), input_info(crop_id_b + "0"), input_info(crop_id_b + "2")};
cldnn::primitive_id concat_id{input_id + "cont"};
auto con = std::make_shared<cldnn::concatenation>(concat_id, con_input, 1);
auto& con_node = p.get_or_create(con);
p.add_intermediate(con_node, node, prev, true);
p.add_intermediate(crop1_node, con_node, prev, true);
p.add_connection(prev, crop0_node, 0);
p.add_connection(prev, crop2_node, 0);
p.add_connection(prev, crop3_node, 0);
p.add_connection(crop0_node, con_node, 0);
p.add_connection(crop2_node, con_node, 0);
p.add_connection(crop3_node, con_node, 0);
std::string permute_id = input_id + "_perx";
std::vector<uint16_t> ord{0, 2, 1};
auto permute = std::make_shared<cldnn::permute>(permute_id, input_info{concat_id}, ord);
Expand All @@ -208,7 +207,6 @@ void post_optimize_weights::add_lstm_weights_reorder(primitive_id input_id, std:
set_implementation_and_output(crop1_node);
set_implementation_and_output(crop0_node);
set_implementation_and_output(crop2_node);
set_implementation_and_output(crop3_node);
set_implementation_and_output(con_node);
set_implementation_and_output(permute_node);
}
Expand All @@ -227,20 +225,20 @@ void post_optimize_weights::add_lstm_bias_reorder(primitive_id input_id, std::sh
};
auto& crop0_node = get_crop_node(0);
auto& crop1_node = get_crop_node(1);
auto& crop2_node = get_crop_node(2);
auto& crop3_node = get_crop_node(3);
std::vector<input_info> con_input{input_info(crop1_node.id()), input_info(crop0_node.id()), input_info(crop2_node.id()), input_info(crop3_node.id())};
auto crop2_id = primitive_id(crop_id_b + std::to_string(2));
auto crop2_prim = std::make_shared<cldnn::crop>(crop2_id, input_id, cldnn::tensor{dir_num, static_cast<int>(2*hiddenSize), 1, 1},
cldnn::tensor{0, static_cast<int>(2*hiddenSize), 0, 0});
auto& crop2_node = p.get_or_create(crop2_prim);
std::vector<input_info> con_input{input_info(crop1_node.id()), input_info(crop0_node.id()), input_info(crop2_node.id())};
cldnn::primitive_id concat_id{input_id + "concat"};
auto con = std::make_shared<cldnn::concatenation>(concat_id, con_input, 1);
auto& con_node = p.get_or_create(con);
p.add_intermediate(con_node, node, prev, true);
p.add_intermediate(crop1_node, con_node, prev, true);
p.add_connection(prev, crop0_node, 0);
p.add_connection(prev, crop2_node, 0);
p.add_connection(prev, crop3_node, 0);
p.add_connection(crop0_node, con_node, 0);
p.add_connection(crop2_node, con_node, 0);
p.add_connection(crop3_node, con_node, 0);
auto set_implementation_and_output = [this, &p](program_node& node) {
node.get_output_layout(false);
select_implementation(p, node);
Expand All @@ -250,7 +248,6 @@ void post_optimize_weights::add_lstm_bias_reorder(primitive_id input_id, std::sh
set_implementation_and_output(crop0_node);
set_implementation_and_output(crop1_node);
set_implementation_and_output(crop2_node);
set_implementation_and_output(crop3_node);
set_implementation_and_output(con_node);
}

Expand Down
8 changes: 1 addition & 7 deletions src/plugins/intel_gpu/src/graph/impls/ocl/lstm_cell.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,13 @@ struct lstm_cell_impl : typed_primitive_impl_ocl<lstm_cell> {
protected:
kernel_arguments_data get_arguments(const typed_primitive_inst<lstm_cell>& instance) const override {
kernel_arguments_data args;
size_t op_input_size = 6;
for (size_t i = 0; i < op_input_size; i++) {
for (size_t i = 0; i < instance.inputs_memory_count(); i++) {
args.inputs.push_back(instance.input_memory_ptr(i));
}

for (size_t i = 0; i < instance.outputs_memory_count(); i++) {
args.outputs.push_back(instance.output_memory_ptr(i));
}
for (size_t i = op_input_size; i < instance.inputs_memory_count(); i++) {
args.outputs.push_back(instance.dep_memory_ptr(i));
}
return args;
}

Expand Down Expand Up @@ -68,8 +64,6 @@ struct lstm_cell_impl : typed_primitive_impl_ocl<lstm_cell> {
params.SetOffsetOrder(static_cast<int32_t>(primitive->offset_order));
params.clip = primitive->clip;
params.direction = primitive->direction;
//Legacy multi-output
params.outputs.push_back(convert_data_tensor(impl_param.input_layouts[1]));

return params;
}
Expand Down
11 changes: 1 addition & 10 deletions src/plugins/intel_gpu/src/graph/impls/ocl/rnn_seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,13 @@ struct rnn_seq_impl : typed_primitive_impl_ocl<lstm_seq> {
protected:
kernel_arguments_data get_arguments(const typed_primitive_inst<lstm_seq>& instance) const override {
kernel_arguments_data args;
size_t op_input_size = 6 + (instance.has_cell() ? 1 : 0);
for (size_t i = 0; i < op_input_size; i++) {
for (size_t i = 0; i < instance.inputs_memory_count(); i++) {
args.inputs.push_back(instance.input_memory_ptr(i));
}

for (size_t i = 0; i < instance.outputs_memory_count(); i++) {
args.outputs.push_back(instance.output_memory_ptr(i));
}
for (size_t i = op_input_size; i < instance.inputs_memory_count(); i++) {
args.outputs.push_back(instance.dep_memory_ptr(i));
}
return args;
}

Expand Down Expand Up @@ -70,11 +66,6 @@ struct rnn_seq_impl : typed_primitive_impl_ocl<lstm_seq> {
params.SetOffsetOrder(static_cast<int32_t>(primitive->offset_order));
params.clip = primitive->clip;
params.direction = primitive->direction;
//Legacy multi-output
params.outputs.push_back(convert_data_tensor(impl_param.input_layouts[1]));
if (!primitive->initial_cell_state.pid.empty()) {
params.outputs.push_back(convert_data_tensor(impl_param.input_layouts[1]));
}
return params;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ struct LSTMSeqImplementationManager : public ImplementationManager {
if (info.arch == gpu_arch::unknown)
return false;

const auto& lstm_seq_node = node.as<lstm_seq>();
const auto& lstm_seq_prim = lstm_seq_node.get_primitive();
auto in0_dt = node.get_input_layout(0).data_type;
auto in1_dt = node.get_input_layout(1).data_type;
auto in2_dt = node.get_input_layout(2).data_type;
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/lstm_seq.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (C) 2018-2024 Intel Corporation
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "lstm_seq_inst.h"
Expand Down

0 comments on commit fcdaab0

Please sign in to comment.