diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 0d0748311caa08..938219709f8cc0 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -389,7 +389,9 @@ void primitive_inst::update_shape() { if (i >= _deps.size()) continue; - if (_deps[i].first->get_node().is_in_shape_of_subgraph()) { + if (_deps[i].first->get_node().is_in_shape_of_subgraph() && + (_deps[i].first->get_node().get_selected_impl() ? _deps[i].first->get_node().get_selected_impl()->is_cpu() + : _deps[i].first->get_node().get_preferred_impl_type() == impl_types::cpu)) { bool can_skip = true; const auto& insts = _deps[i].first->dependant_shape_of_insts; for (auto& inst : insts) { @@ -429,7 +431,8 @@ void primitive_inst::update_shape() { continue; } - if (!get_node().is_type() && !dep->get_node().is_in_shape_of_subgraph()) { + if (!get_node().is_type() && + !(dep->get_node().get_selected_impl() ? dep->get_node().get_selected_impl()->is_cpu() : dep->get_node().get_preferred_impl_type() == impl_types::cpu)) { has_runtime_deps = true; // Events may be not created for in-order queue, so take them for OOO queue only diff --git a/src/plugins/intel_gpu/tests/unit/dynamic_execution/update_shape_test.cpp b/src/plugins/intel_gpu/tests/unit/dynamic_execution/update_shape_test.cpp new file mode 100644 index 00000000000000..ec5042e853fa7a --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/dynamic_execution/update_shape_test.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils.h" + +#include +#include +#include +#include +#include +#include + +#include "program_wrapper.h" + +#include +#include + +using namespace cldnn; +using namespace ::tests; + +namespace update_shape_tests { +TEST(update_shape_test, ocl_impl_in_shapeof_subgraph) { + auto& engine = get_test_engine(); + + layout const1_gather_layout = layout{ov::PartialShape{1}, data_types::i32, format::bfyx}; + auto const1_gather = engine.allocate_memory(const1_gather_layout); + set_values(const1_gather, {1}); + + layout const_broadcast_layout = layout{ov::PartialShape{}, data_types::i32, format::bfyx}; + auto const_broadcast = engine.allocate_memory(const_broadcast_layout); + set_values(const_broadcast, {1}); + + layout input_l= layout{ov::PartialShape{1, 128}, data_types::i32, format::bfyx}; + auto input_mem = engine.allocate_memory(input_l); + set_values(input_mem, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 1, 2, 3, 4, 5, 6, 7, 8,}); + + auto input_l_dynamic = layout{ov::PartialShape::dynamic(2), data_types::i32, format::bfyx}; + topology topology(input_layout("input", input_l_dynamic), + data("const1_gather", const1_gather), + data("const_broadcast", const_broadcast), + shape_of("shape_of", input_info("input"), data_types::i32), + gather("gather", input_info("shape_of"), input_info("const1_gather"), 0, 1, ov::Shape({1})), + broadcast("broadcast1", input_info("const_broadcast"), input_info("gather"), {}, ov::op::BroadcastType::NUMPY), + count_nonzero("count_nonzero", input_info("broadcast1")), + gather_nonzero("gather_nonzero", input_info("broadcast1"), input_info("count_nonzero")), + broadcast("broadcast2", input_info("gather_nonzero"), input_info("shape_of"), {}, ov::op::BroadcastType::BIDIRECTIONAL)); + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + + network network(engine, topology, config); + network.set_input_data("input", input_mem); + std::map outputs; + OV_ASSERT_NO_THROW(outputs = network.execute()); +} +} // update_shape_test