From b1ff99cef534633c01cf8add32c5802ab67e7add Mon Sep 17 00:00:00 2001 From: Damian Kurek Date: Thu, 14 Nov 2024 06:14:10 +0100 Subject: [PATCH 01/10] [GPU] Fix memory leak (#27536) ### Details: - Fix memory leak and improve memory usage with continuous inference ### Tickets: - 148552 --- .../intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h index 3ddb5bf8793c29..a8c715af98f198 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.h @@ -299,6 +299,7 @@ inline void update_shapes(kernel_selector::Params& p, const kernel_impl_params& const auto& fused_prim = impl_param.fused_desc[i]; auto& fd = bp.fused_ops[i]; fd.output_tensor = convert_data_tensor(fused_prim.output_layout); + fd.tensors.clear(); for (size_t i = fd.dep_idx_start; i < fd.dep_idx_start + fd.dep_size; i++) { fd.tensors.push_back(convert_data_tensor(impl_param.get_input_layout(i))); } From 453ee5734383bfa0ed3cab5a369e64153ce7fbab Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Thu, 14 Nov 2024 13:04:36 +0400 Subject: [PATCH 02/10] [TF FE] Stabilize layer tests for Keras GRU layer on all platforms (#27543) **Details:** Stabilize layer tests for Keras GRU layer on all platforms **Ticket:** 156967 --------- Signed-off-by: Kazantsev, Roman --- .../workflows/job_tensorflow_layer_tests.yml | 2 +- .../test_tf2_keras_gru.py | 144 +++++------------- 2 files changed, 36 insertions(+), 110 deletions(-) diff --git a/.github/workflows/job_tensorflow_layer_tests.yml b/.github/workflows/job_tensorflow_layer_tests.yml index 29afb466d69a42..0de1708527739a 100644 --- a/.github/workflows/job_tensorflow_layer_tests.yml +++ b/.github/workflows/job_tensorflow_layer_tests.yml @@ -30,7 +30,7 @@ env: jobs: TensorFlow_Layer_Tests: name: TensorFlow Layer Tests - timeout-minutes: 30 + timeout-minutes: 45 runs-on: ${{ inputs.runner }} container: ${{ fromJSON(inputs.container) }} defaults: diff --git a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_gru.py b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_gru.py index 66b91e9d64daca..fad5c188d38d7f 100644 --- a/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_gru.py +++ b/tests/layer_tests/tensorflow2_keras_tests/test_tf2_keras_gru.py @@ -1,23 +1,30 @@ # Copyright (C) 2022-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import numpy as np import pytest import tensorflow as tf - from common.tf2_layer_test_class import CommonTF2LayerTest +rng = np.random.default_rng(233534) + class TestKerasGru(CommonTF2LayerTest): - def create_keras_gru_net(self, input_names, input_shapes, input_type, units, activation, - recurrent_activation, - use_bias, dropouts, flags, ir_version): - """ - create TensorFlow 2 model with Keras GRU operation - """ + def _prepare_input(self, inputs_info): + assert 'x' in inputs_info, "Test error: inputs_info must contain `x`" + x_shape = inputs_info['x'] + inputs_data = {} + inputs_data['x'] = rng.uniform(-2.0, 2.0, x_shape).astype(self.input_type) + return inputs_data + + def create_keras_gru_net(self, input_shapes, input_type, units, + activation, recurrent_activation, + dropouts, use_bias, flag1, flag2): + self.input_type = input_type tf.keras.backend.clear_session() # For easy reset of notebook state - x1 = tf.keras.Input(shape=input_shapes[0][1:], name=input_names[0]) + x1 = tf.keras.Input(shape=input_shapes[0][1:], dtype=input_type, name='x') dropout, recurrent_dropout = dropouts - go_backwards, reset_after = flags + go_backwards, reset_after = flag1, flag2 y = tf.keras.layers.GRU(units=units, activation=activation, recurrent_activation=recurrent_activation, use_bias=use_bias, dropout=dropout, @@ -25,111 +32,30 @@ def create_keras_gru_net(self, input_names, input_shapes, input_type, units, act return_sequences=False, return_state=False, go_backwards=go_backwards, reset_after=reset_after)(x1) tf2_net = tf.keras.Model(inputs=[x1], outputs=[y]) - - # TODO: add reference IR net. Now it is omitted since inference is more - # important and needs to be checked in the first ref_net = None return tf2_net, ref_net - test_data_simple = [ - dict(input_names=["x"], input_shapes=[[2, 2, 3]], input_type=tf.float32, units=1, - activation='tanh', recurrent_activation='sigmoid', dropouts=(.0, .3), use_bias=True, - flags=(False, False)), - dict(input_names=["x"], input_shapes=[[1, 2, 3]], input_type=tf.float32, units=4, - activation='relu', recurrent_activation='sigmoid', dropouts=(.2, .4), use_bias=True, - flags=(False, False)), - dict(input_names=["x"], input_shapes=[[3, 2, 3]], input_type=tf.float32, units=2, - activation='elu', recurrent_activation='tanh', dropouts=(.3, .5), use_bias=True, - flags=(False, False)), - dict(input_names=["x"], input_shapes=[[2, 3, 4]], input_type=tf.float32, units=1, - activation='elu', recurrent_activation='softmax', dropouts=(.0, .5), use_bias=True, - flags=(False, False)), - dict(input_names=["x"], input_shapes=[[1, 3, 4]], input_type=tf.float32, units=3, - activation='linear', recurrent_activation='sigmoid', dropouts=(.4, .6), - flags=(False, False), use_bias=True) - ] - - @pytest.mark.parametrize("params", test_data_simple) - @pytest.mark.nightly - @pytest.mark.precommit - def test_keras_gru_with_bias_float32(self, params, ie_device, precision, temp_dir, ir_version, - use_legacy_frontend): - self._test(*self.create_keras_gru_net(**params, ir_version=ir_version), - ie_device, precision, temp_dir=temp_dir, ir_version=ir_version, - use_legacy_frontend=use_legacy_frontend, **params) - - test_data_without_bias = [ - dict(input_names=["x"], input_shapes=[[2, 2, 7]], input_type=tf.float32, units=1, - activation='tanh', recurrent_activation='sigmoid', dropouts=(.0, .3), use_bias=False, - flags=(False, False)), - dict(input_names=["x"], input_shapes=[[3, 8, 3]], input_type=tf.float32, units=4, - activation='relu', recurrent_activation='sigmoid', dropouts=(.7, .4), use_bias=False, - flags=(False, False)), - dict(input_names=["x"], input_shapes=[[4, 2, 2]], input_type=tf.float32, units=2, - activation='elu', recurrent_activation='tanh', dropouts=(.0, .5), use_bias=False, - flags=(False, False)) - ] - - @pytest.mark.parametrize("params", test_data_without_bias) - @pytest.mark.nightly - @pytest.mark.precommit - def test_keras_gru_without_bias_float32(self, params, ie_device, precision, temp_dir, - ir_version, use_legacy_frontend): - self._test(*self.create_keras_gru_net(**params, ir_version=ir_version), - ie_device, precision, temp_dir=temp_dir, ir_version=ir_version, - use_legacy_frontend=use_legacy_frontend, **params) - - test_data_different_flags = [ - dict(input_names=["x"], input_shapes=[[2, 3, 2]], input_type=tf.float32, units=1, - activation='elu', recurrent_activation='sigmoid', dropouts=(.0, .3), use_bias=True, - flags=(True, False)), - dict(input_names=["x"], input_shapes=[[4, 8, 3]], input_type=tf.float32, dropouts=(.1, .3), - units=3, activation='relu', use_bias=False, recurrent_activation='tanh', - flags=(False, True)), - dict(input_names=["x"], input_shapes=[[4, 2, 7]], input_type=tf.float32, units=5, - activation='relu', recurrent_activation='tanh', dropouts=(.2, .6), - use_bias=True, flags=(False, False)), - dict(input_names=["x"], input_shapes=[[4, 16, 2]], input_type=tf.float32, units=5, - activation='relu', recurrent_activation='tanh', dropouts=(.2, .6), - use_bias=True, flags=(False, True)), - dict(input_names=["x"], input_shapes=[[4, 8, 7]], input_type=tf.float32, units=5, - activation='elu', recurrent_activation='sigmoid', dropouts=(.2, .6), - use_bias=True, flags=(True, True)), - ] - - @pytest.mark.parametrize("params", test_data_different_flags) - @pytest.mark.nightly - @pytest.mark.precommit - @pytest.mark.xfail(reason="sporadic inference mismatch") - def test_keras_gru_flags_float32(self, params, ie_device, precision, temp_dir, ir_version, - use_legacy_frontend): - self._test(*self.create_keras_gru_net(**params, ir_version=ir_version), - ie_device, precision, temp_dir=temp_dir, ir_version=ir_version, - use_legacy_frontend=use_legacy_frontend, **params) - - test_data_zero_recurrent_dropout = [ - dict(input_names=["x"], input_shapes=[[8, 2, 3]], input_type=tf.float32, units=3, - activation='elu', recurrent_activation='tanh', dropouts=(.7, .0), use_bias=True, - flags=(False, False)), - dict(input_names=["x"], input_shapes=[[4, 8, 5]], input_type=tf.float32, dropouts=(.6, .0), - units=2, activation='elu', use_bias=True, recurrent_activation='tanh', - flags=(False, False)), - dict(input_names=["x"], input_shapes=[[4, 3, 1]], input_type=tf.float32, units=8, - activation='elu', recurrent_activation='tanh', dropouts=(.5, .0), - use_bias=True, flags=(True, False)), - dict(input_names=["x"], input_shapes=[[3, 4, 2]], input_type=tf.float32, units=3, - activation='elu', recurrent_activation='tanh', dropouts=(.7, .0), use_bias=True, - flags=(True, False)), - ] - - @pytest.mark.parametrize("params", test_data_zero_recurrent_dropout) + @pytest.mark.parametrize('input_shapes', [[[2, 3, 4]]]) + @pytest.mark.parametrize('input_type', [np.float32, np.float64]) + @pytest.mark.parametrize('units', [1, 2, 3]) + @pytest.mark.parametrize('activation', ['tanh', 'relu', 'elu', 'linear']) + @pytest.mark.parametrize('recurrent_activation', ['sigmoid', 'tanh', 'softmax']) + @pytest.mark.parametrize('dropouts', [(.0, .0), (.0, .3), (.2, .4), ]) + @pytest.mark.parametrize('use_bias', [True, False]) + @pytest.mark.parametrize('flag1', [True, False]) + @pytest.mark.parametrize('flag2', [True, False]) @pytest.mark.nightly @pytest.mark.precommit - @pytest.mark.xfail(reason="50176") - def test_keras_gru_flags_zero_recurrent_dropout_float32(self, params, ie_device, precision, - temp_dir, ir_version, - use_legacy_frontend): - self._test(*self.create_keras_gru_net(**params, ir_version=ir_version), + def test_keras_gru(self, input_shapes, input_type, units, + activation, recurrent_activation, + dropouts, use_bias, flag1, flag2, + ie_device, precision, temp_dir, ir_version, + use_legacy_frontend): + params = {} + params['input_shapes'] = input_shapes + self._test(*self.create_keras_gru_net(input_shapes, input_type, units, + activation, recurrent_activation, + dropouts, use_bias, flag1, flag2), ie_device, precision, temp_dir=temp_dir, ir_version=ir_version, use_legacy_frontend=use_legacy_frontend, **params) From a661f0d63c0ea204a2f5095a25e4458ba93f5884 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Thu, 14 Nov 2024 19:09:20 +0800 Subject: [PATCH 03/10] [CPU]fix rope mark up to skip shapeof (#27462) ### Details: - *Stop markup at ShapeOf* - *The ShapeOf in the subgraph of Rope's 2nd/3rd input should not be marked* - *The parent of ShapeOf may change when IR changes so skip it to avoid unknown precision problem* ### Tickets: - *CVS-155898* --- .../mark_rope_input_to_keep_in_mixed_precision.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/common/transformations/src/transformations/common_optimizations/mark_rope_input_to_keep_in_mixed_precision.cpp b/src/common/transformations/src/transformations/common_optimizations/mark_rope_input_to_keep_in_mixed_precision.cpp index 63c7495b28112b..e40aeaa67421a8 100644 --- a/src/common/transformations/src/transformations/common_optimizations/mark_rope_input_to_keep_in_mixed_precision.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/mark_rope_input_to_keep_in_mixed_precision.cpp @@ -31,9 +31,12 @@ ov::pass::MarkRopeInputsToKeepInMixedPrecision::MarkRopeInputsToKeepInMixedPreci auto visit_func = [](ov::Node* node) { ov::disable_fp16_compression(node->shared_from_this()); }; - // skip constant and parameter node + // skip constant, parameter and shapeof + // The inputs of cos_sin table generation are position_ids and a ShapeOf [batch, input_length] + // The parent of ShapeOf may change when IR changes so skip it to avoid unknown precision problem auto skip_node_predicate = [](ov::Node* node) -> bool { - return ov::is_type(node) || ov::is_type(node); + return ov::is_type(node) || ov::is_type(node) || + ov::is_type(node); }; if (!visited.count(cos_input_node)) { ov::op::util::visit_path(cos_input_node, visited, visit_func, skip_node_predicate); From fefe0c33e02928d85646a940fe854d39186461a3 Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Thu, 14 Nov 2024 16:38:44 +0100 Subject: [PATCH 04/10] Fixed OVC app name in telemetry. (#27544) ### Details: - Fixed the issue, that '`Model Conversion API`' telemetry category includes `OVC` and all other tools, now `OVC` has a separate category "`OVC`". - Refactored `send_params_info()` method and fixed naming of telemetry init method. --------- Co-authored-by: Roman Kazantsev --- tools/ovc/openvino/tools/ovc/__init__.py | 14 +++++----- tools/ovc/openvino/tools/ovc/__main__.py | 4 +-- tools/ovc/openvino/tools/ovc/convert_impl.py | 10 +++---- tools/ovc/openvino/tools/ovc/ovc.py | 4 +-- .../ovc/openvino/tools/ovc/telemetry_utils.py | 27 +++++++++---------- 5 files changed, 28 insertions(+), 31 deletions(-) diff --git a/tools/ovc/openvino/tools/ovc/__init__.py b/tools/ovc/openvino/tools/ovc/__init__.py index a2912d28e08af7..5b750b58969d24 100644 --- a/tools/ovc/openvino/tools/ovc/__init__.py +++ b/tools/ovc/openvino/tools/ovc/__init__.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 from openvino.tools.ovc.convert import convert_model -from openvino.tools.ovc.telemetry_utils import is_optimum, init_mo_telemetry +from openvino.tools.ovc.telemetry_utils import is_optimum, init_ovc_telemetry import importlib.metadata as importlib_metadata @@ -11,10 +11,10 @@ except importlib_metadata.PackageNotFoundError: optimum_version = None +from openvino.runtime import get_version as get_rt_version # pylint: disable=no-name-in-module,import-error +telemetry = init_ovc_telemetry('OpenVINO') +telemetry.send_event("ov", "import", "general_import") + if is_optimum() and optimum_version is not None: - from openvino.runtime import get_version as get_rt_version # pylint: disable=no-name-in-module,import-error - telemetry = init_mo_telemetry("Optimum Intel", optimum_version) - telemetry.send_event("ov", "import", "import_from_optimum,ov_version:{}".format(get_rt_version())) -else: - telemetry = init_mo_telemetry() - telemetry.send_event("ov", "import", "general_import") + telemetry = init_ovc_telemetry("Optimum Intel", optimum_version) + telemetry.send_event("optimum", "import", "import_from_optimum,ov_version:{}".format(get_rt_version())) diff --git a/tools/ovc/openvino/tools/ovc/__main__.py b/tools/ovc/openvino/tools/ovc/__main__.py index 5e9ef2dfba4e6f..d264010d9870d9 100644 --- a/tools/ovc/openvino/tools/ovc/__main__.py +++ b/tools/ovc/openvino/tools/ovc/__main__.py @@ -4,7 +4,7 @@ import sys from openvino.tools.ovc.main import main -from openvino.tools.ovc.telemetry_utils import init_mo_telemetry +from openvino.tools.ovc.telemetry_utils import init_ovc_telemetry -init_mo_telemetry() +init_ovc_telemetry() sys.exit(main()) diff --git a/tools/ovc/openvino/tools/ovc/convert_impl.py b/tools/ovc/openvino/tools/ovc/convert_impl.py index dc0694f0a405b5..aef054f8aafc24 100644 --- a/tools/ovc/openvino/tools/ovc/convert_impl.py +++ b/tools/ovc/openvino/tools/ovc/convert_impl.py @@ -32,7 +32,7 @@ from openvino.tools.ovc.utils import check_values_equal from openvino.tools.ovc.logger import init_logger from openvino.tools.ovc.telemetry_utils import send_params_info, send_conversion_result, \ - init_mo_telemetry + init_ovc_telemetry from openvino.tools.ovc.moc_frontend.pytorch_frontend_utils import get_pytorch_decoder, \ extract_input_info_from_example, get_pytorch_decoder_for_model_on_disk from openvino.tools.ovc.moc_frontend.paddle_frontend_utils import paddle_frontend_converter @@ -428,7 +428,7 @@ def _convert(cli_parser: argparse.ArgumentParser, args, python_api_used): tracemalloc.start() simplified_ie_version = VersionChecker().get_ie_simplified_version() - telemetry = init_mo_telemetry() + telemetry = init_ovc_telemetry() telemetry.start_session('ovc') telemetry.send_event('ovc', 'version', simplified_ie_version) # Initialize logger with 'ERROR' as default level to be able to form nice messages @@ -484,12 +484,12 @@ def _convert(cli_parser: argparse.ArgumentParser, args, python_api_used): argv.feManager = FrontEndManager() - # send telemetry with params info - send_params_info(argv, cli_parser) - non_default_params = get_non_default_params(argv, cli_parser) argv.is_python_api_used = python_api_used + # send telemetry with params info + send_params_info(non_default_params) + argv.framework = model_framework orig_input_model = argv.input_model diff --git a/tools/ovc/openvino/tools/ovc/ovc.py b/tools/ovc/openvino/tools/ovc/ovc.py index 20c4a675797a92..88f2d7a08619be 100755 --- a/tools/ovc/openvino/tools/ovc/ovc.py +++ b/tools/ovc/openvino/tools/ovc/ovc.py @@ -6,8 +6,8 @@ import sys if __name__ == "__main__": - from openvino.tools.ovc.telemetry_utils import init_mo_telemetry + from openvino.tools.ovc.telemetry_utils import init_ovc_telemetry from openvino.tools.ovc.main import main - init_mo_telemetry() + init_ovc_telemetry() sys.exit(main()) diff --git a/tools/ovc/openvino/tools/ovc/telemetry_utils.py b/tools/ovc/openvino/tools/ovc/telemetry_utils.py index 4a54632b8c642d..412d9b9607541e 100644 --- a/tools/ovc/openvino/tools/ovc/telemetry_utils.py +++ b/tools/ovc/openvino/tools/ovc/telemetry_utils.py @@ -25,7 +25,7 @@ def is_optimum(): return False -def init_mo_telemetry(app_name='Model Conversion API', app_version=None): +def init_ovc_telemetry(app_name='OVC', app_version=None): app_version = app_version if app_version is not None else get_rt_version() return init_telemetry_class(tid=get_tid(), app_name=app_name, @@ -97,22 +97,19 @@ def arg_to_str(arg): return str(type(arg)) -def send_params_info(argv: argparse.Namespace, cli_parser: argparse.ArgumentParser): +def send_params_info(params: dict): """ This function sends information about used command line parameters. - :param argv: command line parameters. - :param cli_parser: command line parameters parser. + :param params: command-line parameters dictionary. """ t = tm.Telemetry() params_with_paths = get_params_with_paths_list() - for arg in vars(argv): - arg_value = getattr(argv, arg) - if not check_values_equal(arg_value, cli_parser.get_default(arg)): - if arg in params_with_paths: - # If command line argument value is a directory or a path to file it is not sent - # as it may contain confidential information. "1" value is used instead. - param_str = arg + ":" + str(1) - else: - param_str = arg + ":" + arg_to_str(arg_value) - - t.send_event('ovc', 'cli_parameters', param_str) + for key, value in params.items(): + if key in params_with_paths: + # If command line argument value is a directory or a path to file it is not sent + # as it may contain confidential information. "1" value is used instead. + param_str = key + ":" + str(1) + else: + param_str = key + ":" + arg_to_str(value) + + t.send_event('ovc', 'cli_parameters', param_str) From 19ae2a950d0127a60d66a6e46b55c91faf07d736 Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Thu, 14 Nov 2024 17:20:56 +0100 Subject: [PATCH 05/10] [GHA] Manylinux added CI tags and manifest (#27540) ### Details: - added actions to save artifacts for Jenkins - create_manifest and store_artifacts - Added overall status workflow ### Tickets: - *ticket-id* --- .github/actions/common/constants.py | 1 + .github/workflows/manylinux_2014.yml | 47 +++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/.github/actions/common/constants.py b/.github/actions/common/constants.py index da55ece2ee4258..6a1d165fc7df13 100644 --- a/.github/actions/common/constants.py +++ b/.github/actions/common/constants.py @@ -16,6 +16,7 @@ class EventType(Enum): 'public_linux_ubuntu_24_04_x86_64_release', 'public_windows_vs2019_Release', 'public_windows_vs2019_Debug', + 'public_manylinux2014_x86_64_release', ) ProductType = Enum('ProductType', {t.upper(): t for t in productTypes}) diff --git a/.github/workflows/manylinux_2014.yml b/.github/workflows/manylinux_2014.yml index ed375fb868459f..bd5da965226a50 100644 --- a/.github/workflows/manylinux_2014.yml +++ b/.github/workflows/manylinux_2014.yml @@ -88,6 +88,7 @@ jobs: options: -e SCCACHE_AZURE_BLOB_CONTAINER -e SCCACHE_AZURE_CONNECTION_STRING -e DOCKER_CONFIG -v ${{ github.workspace }}:${{ github.workspace }} env: CMAKE_BUILD_TYPE: 'Release' + ARCH: 'x86_64' OPENVINO_REPO: ${{ github.workspace }}/src INSTALL_DIR: ${{ github.workspace }}/install/openvino INSTALL_WHEELS_DIR: ${{ github.workspace }}/install/wheels @@ -99,6 +100,9 @@ jobs: SCCACHE_SERVER_PORT: 35555 SCCACHE_CACHE_SIZE: 50G SCCACHE_AZURE_KEY_PREFIX: manylinux_2014 + ARTIFACTS_SHARE: "/mount/build-artifacts" + MANIFEST_PATH: ${{ github.workspace }}/manifest.yml + PRODUCT_TYPE: public_manylinux2014_x86_64_release steps: - name: Clone OpenVINO @@ -109,6 +113,17 @@ jobs: - name: System info uses: ./src/.github/actions/system_info + + - name: Generate product manifest and set CI_BUILD_NUMBER & CI_BUILD_DEV_TAG + id: create_manifest + uses: ./src/.github/actions/create_manifest + with: + repos: | + ${{ env.OPENVINO_REPO }} + product_type: ${{ env.PRODUCT_TYPE }} + target_arch: ${{ env.ARCH }} + build_type: ${{ env.CMAKE_BUILD_TYPE }} + save_to: ${{ env.MANIFEST_PATH }} - name: Create docker build cache run: | @@ -128,6 +143,8 @@ jobs: -e SCCACHE_AZURE_KEY_PREFIX \ -e CMAKE_CXX_COMPILER_LAUNCHER \ -e CMAKE_C_COMPILER_LAUNCHER \ + -e CI_BUILD_NUMBER \ + -e CI_BUILD_DEV_TAG \ -w /work/src \ ${{ fromJSON(needs.docker.outputs.images).ov_build.manylinux2014_x86_64 }} \ /bin/bash -c " @@ -158,6 +175,8 @@ jobs: -e SCCACHE_AZURE_KEY_PREFIX \ -e CMAKE_CXX_COMPILER_LAUNCHER \ -e CMAKE_C_COMPILER_LAUNCHER \ + -e CI_BUILD_NUMBER \ + -e CI_BUILD_DEV_TAG \ -w /work/src \ ${{ fromJSON(needs.docker.outputs.images).ov_build.manylinux2014_x86_64 }} \ /bin/bash -c " @@ -188,4 +207,30 @@ jobs: with: name: openvino_wheels path: ${{ env.INSTALL_WHEELS_DIR }}/wheels/*.whl - if-no-files-found: 'error' \ No newline at end of file + if-no-files-found: 'error' + + - name: Store artifacts to a shared drive + id: store_artifacts + if: ${{ always() }} + uses: ./src/.github/actions/store_artifacts + with: + artifacts: | + ${{ env.BUILD_DIR }}/openvino_package.tar.gz + ${{ env.MANIFEST_PATH }} + ${{ env.INSTALL_WHEELS_DIR }}/wheels + storage_dir: ${{ env.PRODUCT_TYPE }} + storage_root: ${{ env.ARTIFACTS_SHARE }} + + Overall_Status: + name: ci/gha_overall_status_manylinux2014 + needs: [Smart_CI, Build] + if: ${{ always() }} + runs-on: ubuntu-latest + steps: + - name: Check status of all jobs + if: >- + ${{ + contains(needs.*.result, 'failure') || + contains(needs.*.result, 'cancelled') + }} + run: exit 1 \ No newline at end of file From 651a51cf68cc34bcd5b191a573d54f5c2a451af0 Mon Sep 17 00:00:00 2001 From: Tomasz Krupa Date: Fri, 15 Nov 2024 06:22:32 +0000 Subject: [PATCH 06/10] [GPU] Change weights_path mutabilibity to RW (#27553) ### Details: It needs to be readable (e.g. to be queried by hello_query_device sample) and writable for plugin to set the default value. ### Tickets: https://jira.devtools.intel.com/browse/CVS-157364 --- src/plugins/intel_gpu/src/plugin/plugin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index b1cc946559ee94..c3ba90fd66f7a8 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -597,7 +597,7 @@ std::vector Plugin::get_supported_properties() const { ov::PropertyName{ov::device::id.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::dynamic_quantization_group_size.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::activations_scale_factor.name(), PropertyMutability::RW}, - ov::PropertyName{ov::weights_path.name(), PropertyMutability::RO}, + ov::PropertyName{ov::weights_path.name(), PropertyMutability::RW}, }; return supported_properties; From c33855978237389f0d0366d0a2bc8af31c4a78d4 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Fri, 15 Nov 2024 09:29:29 +0100 Subject: [PATCH 07/10] [DOCS] Updating NPU GenAI docs (#27489) Updating the `Run LLMs with OpenVINO GenAI Flavor on NPU` article - adding info about new config options. This PR addresses JIRA ticket: 156503. --------- Signed-off-by: Sebastian Golebiewski Co-authored-by: Karol Blaszczak --- .../llm_inference_guide/genai-guide-npu.rst | 185 ++++++++++++++++-- 1 file changed, 165 insertions(+), 20 deletions(-) diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide-npu.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide-npu.rst index 4585ca97488023..6917d809c7e5d6 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide-npu.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide-npu.rst @@ -9,7 +9,7 @@ This guide will give you extra details on how to utilize NPU with the GenAI flav for information on how to start. Prerequisites -############# +##################### Install required dependencies: @@ -17,35 +17,92 @@ Install required dependencies: python -m venv npu-env npu-env\Scripts\activate - pip install optimum-intel nncf==2.11 onnx==1.16.1 + pip install nncf==2.12 onnx==1.16.1 optimum-intel==1.19.0 pip install --pre openvino openvino-tokenizers openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly Export an LLM model via Hugging Face Optimum-Intel ################################################## -A chat-tuned TinyLlama model is used in this example. The following conversion & optimization -settings are recommended when using the NPU: +Since **symmetrically-quantized 4-bit (INT4) models are preffered for inference on NPU**, make sure to export +the model with the proper conversion and optimization settings. -.. code-block:: python +| You may export LLMs via Optimum-Intel, using one of two compression methods: +| **group quantization** - for both smaller and larger models, +| **channel-wise quantization** - remarkably effective but for models exceeding 1 billion parameters. - optimum-cli export openvino -m TinyLlama/TinyLlama-1.1B-Chat-v1.0 --weight-format int4 --sym --group-size 128 --ratio 1.0 TinyLlama +You select one of the methods by setting the ``--group-size`` parameter to either ``128`` or ``-1``, respectively. See the following examples: -**For models exceeding 1 billion parameters**, it is recommended to use **channel-wise -quantization** that is remarkably effective. For example, you can try the approach with the -llama-2-7b-chat-hf model: +.. tab-set:: + + .. tab-item:: Group quantization + + .. code-block:: console + :name: group-quant + + optimum-cli export openvino -m TinyLlama/TinyLlama-1.1B-Chat-v1.0 --weight-format int4 --sym --ratio 1.0 --group_size 128 TinyLlama-1.1B-Chat-v1.0 + + .. tab-item:: Channel-wise quantization + + .. tab-set:: + + .. tab-item:: Data-free quantization + + + .. code-block:: console + :name: channel-wise-data-free-quant + + optimum-cli export openvino -m meta-llama/Llama-2-7b-chat-hf --weight-format int4 --sym --ratio 1.0 --group-size -1 Llama-2-7b-chat-hf -.. code-block:: python + .. tab-item:: Data-aware quantization - optimum-cli export openvino -m meta-llama/Llama-2-7b-chat-hf --weight-format int4 --sym --group-size -1 --ratio 1.0 Llama-2-7b-chat-hf + If you want to improve accuracy, make sure you: + + 1. Update NNCF: ``pip install nncf==2.13`` + 2. Use ``--scale_estimation --dataset=`` and accuracy aware quantization ``--awq``: + + .. code-block:: console + :name: channel-wise-data-aware-quant + + optimum-cli export openvino -m meta-llama/Llama-2-7b-chat-hf --weight-format int4 --sym --group-size -1 --ratio 1.0 --awq --scale-estimation --dataset=wikitext2 Llama-2-7b-chat-hf + + + .. important:: + + Remember that the negative value of ``-1`` is required here, not ``1``. + + + +You can also try using 4-bit (INT4) +`GPTQ models `__, +which do not require specifying quantization parameters: + +.. code-block:: console + + optimum-cli export openvino -m TheBloke/Llama-2-7B-Chat-GPTQ + + +| Remember, NPU supports GenAI models quantized symmetrically to INT4. +| Below is a list of such models: + +* meta-llama/Meta-Llama-3-8B-Instruct +* microsoft/Phi-3-mini-4k-instruct +* Qwen/Qwen2-7B +* mistralai/Mistral-7B-Instruct-v0.2 +* openbmb/MiniCPM-1B-sft-bf16 +* TinyLlama/TinyLlama-1.1B-Chat-v1.0 +* TheBloke/Llama-2-7B-Chat-GPTQ +* Qwen/Qwen2-7B-Instruct-GPTQ-Int4 Run generation using OpenVINO GenAI ################################### -It is recommended to install the latest available +It is typically recommended to install the latest available `driver `__. -Use the following code snippet to perform generation with OpenVINO GenAI API: +Use the following code snippet to perform generation with OpenVINO GenAI API. +Note that **currently, the NPU pipeline supports greedy decoding only**. This means that +you need to add ``do_sample=False`` **to the** ``generate()`` **method:** .. tab-set:: @@ -53,26 +110,31 @@ Use the following code snippet to perform generation with OpenVINO GenAI API: :sync: py .. code-block:: python + :emphasize-lines: 4 import openvino_genai as ov_genai model_path = "TinyLlama" pipe = ov_genai.LLMPipeline(model_path, "NPU") - print(pipe.generate("The Sun is yellow because", max_new_tokens=100)) + print(pipe.generate("The Sun is yellow because", max_new_tokens=100, do_sample=False)) .. tab-item:: C++ :sync: cpp .. code-block:: cpp + :emphasize-lines: 7, 9 #include "openvino/genai/llm_pipeline.hpp" #include int main(int argc, char* argv[]) { std::string model_path = "TinyLlama"; - ov::genai::LLMPipeline pipe(model_path, "NPU"); - std::cout << pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(100)); + ov::genai::GenerationConfig config; + config.do_sample=false; + config.max_new_tokens=100; + std::cout << pipe.generate("The Sun is yellow because", config); } + Additional configuration options ################################ @@ -88,9 +150,9 @@ user explicitly sets a lower length limit for the response. You may configure both the 'maximum input prompt length' and 'minimum response length' using the following parameters: -* ``MAX_PROMPT_LEN``: Defines the maximum number of tokens that the LLM pipeline can process - for the input prompt (default: 1024). -* ``MIN_RESPONSE_LEN``: Defines the minimum number of tokens that the LLM pipeline will generate +* ``MAX_PROMPT_LEN`` - defines the maximum number of tokens that the LLM pipeline can process + for the input prompt (default: 1024), +* ``MIN_RESPONSE_LEN`` - defines the minimum number of tokens that the LLM pipeline will generate in its response (default: 150). Use the following code snippet to change the default settings: @@ -113,10 +175,93 @@ Use the following code snippet to change the default settings: ov::AnyMap pipeline_config = { { "MAX_PROMPT_LEN", 1024 }, { "MIN_RESPONSE_LEN", 512 } }; ov::genai::LLMPipeline pipe(model_path, "NPU", pipeline_config); +Cache compiled models ++++++++++++++++++++++ + +Specify the ``NPUW_CACHE_DIR`` option in ``pipeline_config`` for NPU pipeline to +cache the compiled models. Using the code snippet below shortens the initialization time +of the pipeline runs coming next: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: python + + pipeline_config = { "NPUW_CACHE_DIR": ".npucache" } + pipe = ov_genai.LLMPipeline(model_path, "NPU", pipeline_config) + + .. tab-item:: C++ + :sync: cpp + + .. code-block:: cpp + + ov::AnyMap pipeline_config = { { "NPUW_CACHE_DIR", ".npucache" } }; + ov::genai::LLMPipeline pipe(model_path, "NPU", pipeline_config); + + +Disable memory allocation ++++++++++++++++++++++++++ + +In case of execution failures, either silent or with errors, try to update the NPU driver to +`32.0.100.3104 or newer `__. +If the update is not possible, set the ``DISABLE_OPENVINO_GENAI_NPU_L0`` +environment variable to disable NPU memory allocation, which might be supported +only on newer drivers for Intel Core Ultra 200V processors. + +Set the environment variable in a terminal: + +.. tab-set:: + + .. tab-item:: Linux + :sync: linux + + .. code-block:: console + + export DISABLE_OPENVINO_GENAI_NPU_L0=1 + + .. tab-item:: Windows + :sync: win + + .. code-block:: console + + set DISABLE_OPENVINO_GENAI_NPU_L0=1 + + +Performance modes ++++++++++++++++++++++ + +You can configure the NPU pipeline with the ``GENERATE_HINT`` option to switch +between two different performance modes: + +* ``FAST_COMPILE`` (default) - enables fast compilation at the expense of performance, +* ``BEST_PERF`` - ensures best possible performance at lower compilation speed. + +Use the following code snippet: + +.. tab-set:: + + .. tab-item:: Python + :sync: py + + .. code-block:: python + + pipeline_config = { "GENERATE_HINT": "BEST_PERF" } + pipe = ov_genai.LLMPipeline(model_path, "NPU", pipeline_config) + + .. tab-item:: C++ + :sync: cpp + + .. code-block:: cpp + + ov::AnyMap pipeline_config = { { "GENERATE_HINT", "BEST_PERF" } }; + ov::genai::LLMPipeline pipe(model_path, "NPU", pipeline_config); + Additional Resources #################### * :doc:`NPU Device <../../openvino-workflow/running-inference/inference-devices-and-modes/npu-device>` * `OpenVINO GenAI Repo `__ -* `Neural Network Compression Framework `__ \ No newline at end of file +* `Neural Network Compression Framework `__ From 250f0015716b254cc580d50bb6d8de204c386a54 Mon Sep 17 00:00:00 2001 From: Andrzej Kopytko Date: Fri, 15 Nov 2024 10:02:49 +0100 Subject: [PATCH 08/10] [DOCS Added new hierarchical field for Coveo (#27569) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- .../openvino_custom_sphinx_sitemap/__init__.py | 17 +++++++++++++---- docs/sphinx_setup/conf.py | 3 ++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py b/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py index c82e0a8d5995f7..ca93d02d75c6a9 100644 --- a/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py +++ b/docs/openvino_custom_sphinx_sitemap/openvino_custom_sphinx_sitemap/__init__.py @@ -120,15 +120,24 @@ def process_coveo_meta(meta, url, link): for namespace, values in meta: namespace_element = ET.SubElement(url, namespace) + loc_element = url.find("loc") for tag_name, tag_value in values.items(): if tag_name == 'ovdoctype': - processed_link = process_link(link) - ET.SubElement(namespace_element, tag_name).text = processed_link - else: + ET.SubElement(namespace_element, tag_name).text = process_link(link) + elif tag_name == 'ovcategory' and loc_element is not None: + ET.SubElement(namespace_element, tag_name).text = extract_link(loc_element.text) + elif tag_name == 'ovversion': ET.SubElement(namespace_element, tag_name).text = tag_value def process_link(link): if '/' in link: return link.split('/')[0].replace("-", " ") - return link.split('.html')[0].replace("-", " ") \ No newline at end of file + return link.split('.html')[0].replace("-", " ") + +def extract_link(link): + path = link.split("://")[-1] + segments = path.split('/')[1:] + if segments and segments[-1].endswith('.html'): + segments = segments[:-1] + return '|'.join(segments) \ No newline at end of file diff --git a/docs/sphinx_setup/conf.py b/docs/sphinx_setup/conf.py index 01c74de0175bcf..8bf8438fb5e2c2 100644 --- a/docs/sphinx_setup/conf.py +++ b/docs/sphinx_setup/conf.py @@ -84,7 +84,8 @@ ov_sitemap_meta = [ ('coveo:metadata', { 'ovversion': version_name, - 'ovdoctype': 'null' + 'ovdoctype': 'null', + 'ovcategory': 'null' }) ] From 6489755e9f91fbb8bb273ee781f80494570f42cb Mon Sep 17 00:00:00 2001 From: Evgeny Kotov Date: Fri, 15 Nov 2024 10:14:49 +0100 Subject: [PATCH 09/10] fix TSUnsqueezeBackward Reshape does nothing (#27467) ### Details: - fix TSUnsqueezeBackward ### Tickets: - CVS-111560 --- .../transpose_sinking/ts_unsqueeze.cpp | 62 +++++++++++- .../transpose_sinking/ts_common_test.cpp | 97 +++++++++++++++++++ .../transpose_sinking/ts_general_test.cpp | 4 +- 3 files changed, 158 insertions(+), 5 deletions(-) diff --git a/src/common/transformations/src/transformations/transpose_sinking/ts_unsqueeze.cpp b/src/common/transformations/src/transformations/transpose_sinking/ts_unsqueeze.cpp index cdeb9226ed236c..ce47caa10c4c0f 100644 --- a/src/common/transformations/src/transformations/transpose_sinking/ts_unsqueeze.cpp +++ b/src/common/transformations/src/transformations/transpose_sinking/ts_unsqueeze.cpp @@ -14,7 +14,6 @@ #include "openvino/op/transpose.hpp" #include "openvino/op/unsqueeze.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" -#include "transformations/rt_info/transpose_sinking_attr.hpp" #include "transformations/transpose_sinking/ts_utils.hpp" #include "transformations/utils/utils.hpp" @@ -99,6 +98,22 @@ bool unsqueeze_axes_to_shape(const Output& input_node, } return true; } + +bool AreInputOutputShapesEqual(const std::shared_ptr& reshape) { + const auto input_shape = reshape->get_input_partial_shape(0); + const auto output_shape = reshape->get_output_partial_shape(0); + + if (input_shape.is_dynamic() || output_shape.is_dynamic()) { + return false; + } + return input_shape == output_shape; +} + +bool HasSpecialOne(const std::shared_ptr& reshape_const) { + auto const_value = reshape_const->cast_vector(); + return std::find(const_value.begin(), const_value.end(), -1) != const_value.end(); +} + } // namespace TSUnsqueezeForward::TSUnsqueezeForward() { @@ -112,6 +127,28 @@ TSUnsqueezeForward::TSUnsqueezeForward() { if (!unsqueeze_axes) { return false; } + auto ts_order_values = transpose_info.transpose_const->cast_vector(); + + // if main_node does nothing, just swap them + auto reshape = as_type_ptr(main_node); + if (reshape && AreInputOutputShapesEqual(reshape) && !HasSpecialOne(unsqueeze_axes)) { + TransposeInputsInfo transpose_input_info = {transpose_info.transpose, transpose_info.transpose_const, 0}; + // remove input Transpose + auto success = sink_forward::UpdateInputTransposes(main_node, transpose_input_info, {0}); + if (!success) { + return false; + } + + const auto reshape_order = ov::pass::transpose_sinking::utils::ReverseTransposeOrder(ts_order_values); + // transpose reshape const with Gather operation + auto axis = std::make_shared(element::i32, Shape{}, 0); + auto gather = + ov::pass::transpose_sinking::utils::ChangeValuesOrder(reshape->input_value(1), reshape_order, axis); + main_node->input(1).replace_source_output(gather); + + default_outputs_update(main_node, transpose_input_info); + return true; + } std::vector non_negative_axes; if (as_type_ptr(main_node)) { @@ -124,7 +161,6 @@ TSUnsqueezeForward::TSUnsqueezeForward() { non_negative_axes = ov::util::try_get_normalized_axis_vector(unsqueeze_axes->get_tensor_view(), rank, *main_node); } - auto ts_order_values = transpose_info.transpose_const->cast_vector(); ts_order_values = GetOrderBeforeReduction(non_negative_axes, ts_order_values); auto new_transpose_order = ov::op::v0::Constant::create(transpose_info.transpose_const->get_element_type(), @@ -183,6 +219,27 @@ TSUnsqueezeBackward::TSUnsqueezeBackward() { if (!transpose_order || !unsqueeze_axes) return false; + auto transpose_order_values = transpose_order->cast_vector(); + + // if main_node does nothing, just swap them + auto reshape = as_type_ptr(main_node); + if (reshape && AreInputOutputShapesEqual(reshape) && !HasSpecialOne(unsqueeze_axes)) { + // insert Transpose before main_node on #0 input + for (auto& new_node : sink_backward::InsertTransposeBeforeNode(main_node, transpose_order, {0})) { + register_new_node(new_node); + } + // transpose reshape const with Gather operation + auto axis = std::make_shared(element::i32, Shape{}, 0); + auto gather = ov::pass::transpose_sinking::utils::ChangeValuesOrder(reshape->input_value(1), + transpose_order_values, + axis); + main_node->input(1).replace_source_output(gather); + + main_node->validate_and_infer_types(); + RemoveTransposeConsumers(main_node); + return true; + } + std::vector non_negative_axes; if (as_type_ptr(main_node)) { auto success = shape_to_unsqueeze_axes(main_node, unsqueeze_axes, non_negative_axes); @@ -205,7 +262,6 @@ TSUnsqueezeBackward::TSUnsqueezeBackward() { } } - auto transpose_order_values = transpose_order->cast_vector(); auto old_transpose_order_values = transpose_order_values; std::vector new_values; diff --git a/src/common/transformations/tests/transpose_sinking/ts_common_test.cpp b/src/common/transformations/tests/transpose_sinking/ts_common_test.cpp index d71c9006edd38a..fc5c315312cfaa 100644 --- a/src/common/transformations/tests/transpose_sinking/ts_common_test.cpp +++ b/src/common/transformations/tests/transpose_sinking/ts_common_test.cpp @@ -1677,6 +1677,103 @@ auto test_backward_unsqueeze_dyn_rank = []() { INSTANTIATE_TEST_SUITE_P(TransposeSinkingCommonUnsqueezeBackwardDynRank, TSTestFixture, test_backward_unsqueeze_dyn_rank()); + +TEST_F(TransformationTestsF, TransposeSinkingCommonReshapeUnsqueezeBackwardSameShape) { + auto create_transpose = [](const std::shared_ptr& parent) { + auto ts_order = std::make_shared(element::u64, Shape{3}, Shape{1, 0, 2}); + return std::make_shared(parent, ts_order); + }; + + const Shape input_shape = {4, 5, 6}; + { + auto X = std::make_shared(element::f32, input_shape); + auto reshape_const = std::make_shared(element::u64, Shape{3}, Shape{4, 5, 6}); + auto reshape = std::make_shared(X, reshape_const, false); + auto transpose = create_transpose(reshape); + model = std::make_shared(ov::OutputVector{transpose}, ov::ParameterVector{X}); + } + + { + auto X = std::make_shared(element::f32, input_shape); + auto transpose = create_transpose(X); + auto reshape_const = std::make_shared(element::u64, Shape{3}, Shape{4, 5, 6}); + auto axis = std::make_shared(element::i32, Shape{}, 0); + auto indices = std::make_shared(element::i32, Shape{3}, Shape{1, 0, 2}); + auto gather = std::make_shared(reshape_const, indices, axis); + auto reshape = std::make_shared(transpose, gather, false); + model_ref = std::make_shared(ov::OutputVector{reshape}, ov::ParameterVector{X}); + } + + manager.register_pass(); +} + +TEST_F(TransformationTestsF, TransposeSinkingCommonReshapeUnsqueezeBackwardSameShapeSpecialOne) { + auto create_transpose = [](const std::shared_ptr& parent) { + auto ts_order = std::make_shared(element::u64, Shape{3}, Shape{1, 0, 2}); + return std::make_shared(parent, ts_order); + }; + + { + auto X = std::make_shared(element::f32, Shape{4, 5, 6}); + auto reshape_const = std::make_shared(element::i64, Shape{3}, std::vector{4, 5, -1}); + auto reshape = std::make_shared(X, reshape_const, false); + auto transpose = create_transpose(reshape); + model = std::make_shared(ov::OutputVector{transpose}, ov::ParameterVector{X}); + } + + model_ref = model->clone(); + + manager.register_pass(); +} + +TEST_F(TransformationTestsF, TransposeSinkingCommonReshapeUnsqueezeForwardSameShape) { + auto create_transpose = [](const std::shared_ptr& parent) { + auto ts_order = std::make_shared(element::u64, Shape{4}, Shape{1, 3, 0, 2}); + return std::make_shared(parent, ts_order); + }; + + const Shape input_shape = {4, 5, 6, 7}; + { + auto X = std::make_shared(element::f32, input_shape); + auto transpose = create_transpose(X); + auto reshape_const = std::make_shared(element::u64, Shape{4}, Shape{5, 7, 4, 6}); + auto reshape = std::make_shared(transpose, reshape_const, false); + model = std::make_shared(ov::OutputVector{reshape}, ov::ParameterVector{X}); + } + + { + auto X = std::make_shared(element::f32, input_shape); + auto reshape_const = std::make_shared(element::u64, Shape{4}, Shape{5, 7, 4, 6}); + auto axis = std::make_shared(element::i32, Shape{}, 0); + auto indices = std::make_shared(element::i32, Shape{4}, Shape{2, 0, 3, 1}); + auto gather = std::make_shared(reshape_const, indices, axis); + auto reshape = std::make_shared(X, gather, false); + auto transpose = create_transpose(reshape); + model_ref = std::make_shared(ov::OutputVector{transpose}, ov::ParameterVector{X}); + } + + manager.register_pass(); +} + +TEST_F(TransformationTestsF, TransposeSinkingCommonReshapeUnsqueezeForwardSameShapeSpecialOne) { + auto create_transpose = [](const std::shared_ptr& parent) { + auto ts_order = std::make_shared(element::u64, Shape{3}, Shape{1, 0, 2}); + return std::make_shared(parent, ts_order); + }; + + { + auto X = std::make_shared(element::f32, Shape{4, 5, 6}); + auto transpose = create_transpose(X); + auto reshape_const = std::make_shared(element::i64, Shape{3}, std::vector{4, 5, -1}); + auto reshape = std::make_shared(transpose, reshape_const, false); + model = std::make_shared(ov::OutputVector{reshape}, ov::ParameterVector{X}); + } + + model_ref = model->clone(); + + manager.register_pass(); +} + } // namespace common } // namespace testing } // namespace transpose_sinking diff --git a/src/common/transformations/tests/transpose_sinking/ts_general_test.cpp b/src/common/transformations/tests/transpose_sinking/ts_general_test.cpp index f00c69d2a8d734..7dc3a2b54c7bea 100644 --- a/src/common/transformations/tests/transpose_sinking/ts_general_test.cpp +++ b/src/common/transformations/tests/transpose_sinking/ts_general_test.cpp @@ -380,7 +380,7 @@ TEST_F(TransformationTestsF, TSGeneralTestMultipleTypes) { auto ng_order0 = std::make_shared(ov::element::u64, ov::Shape{4}, ov::Shape{0, 2, 3, 1}); auto transpose0 = std::make_shared(node0, ng_order0); - auto reshape_const = std::make_shared(ov::element::u64, ov::Shape{4}, ov::Shape{1, 40, 55, 96}); + auto reshape_const = std::make_shared(ov::element::u64, ov::Shape{4}, ov::Shape{2, 20, 55, 96}); auto reshape = std::make_shared(transpose0, reshape_const, false); auto ng_order1 = std::make_shared(ov::element::u64, ov::Shape{4}, ov::Shape{0, 3, 1, 2}); @@ -399,7 +399,7 @@ TEST_F(TransformationTestsF, TSGeneralTestMultipleTypes) { auto ng_order0 = std::make_shared(ov::element::u64, ov::Shape{4}, ov::Shape{0, 2, 3, 1}); auto transpose0 = std::make_shared(node0, ng_order0); - auto reshape_const = std::make_shared(ov::element::u64, ov::Shape{4}, ov::Shape{1, 40, 55, 96}); + auto reshape_const = std::make_shared(ov::element::u64, ov::Shape{4}, ov::Shape{2, 20, 55, 96}); auto reshape = std::make_shared(transpose0, reshape_const, false); auto node1 = MakeAllNodesSubgraph(reshape, 3, 3); From 3e63de016bced2a7ad18550fa5f8ca8fb47ba394 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Fri, 15 Nov 2024 20:57:31 +0800 Subject: [PATCH 10/10] [NPU] Remove template in ext wrapper and fuse functions (#27511) ### Details: - *Remove template in zero_ext_graph_wrappers* - *Remove zero_ext_graph_wrappers_interface.hpp* - *Add more low level debug log* - *Update level-zero-ext repo commit to use 1.9 version* ### Tickets: - *156387* --------- Signed-off-by: Xin Wang --- .../include/driver_compiler_adapter.hpp | 4 +- .../compiler_adapter/include/driver_graph.hpp | 6 +- .../include/plugin_compiler_adapter.hpp | 4 +- .../compiler_adapter/include/plugin_graph.hpp | 6 +- .../include/ze_graph_ext_wrappers.hpp | 112 +--- .../ze_graph_ext_wrappers_interface.hpp | 42 -- .../src/driver_compiler_adapter.cpp | 24 +- .../src/compiler_adapter/src/driver_graph.cpp | 2 +- .../src/plugin_compiler_adapter.cpp | 24 +- .../src/compiler_adapter/src/plugin_graph.cpp | 2 +- .../src/ze_graph_ext_wrappers.cpp | 527 ++++++++---------- .../intel_npu/thirdparty/level-zero-ext | 2 +- 12 files changed, 269 insertions(+), 486 deletions(-) delete mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers_interface.hpp diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index dc000b99d7446b..82ababf21c147a 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -16,7 +16,7 @@ #include "intel_npu/config/config.hpp" #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_init.hpp" -#include "ze_graph_ext_wrappers_interface.hpp" +#include "ze_graph_ext_wrappers.hpp" namespace intel_npu { @@ -54,7 +54,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter { std::string serializeConfig(const Config& config, ze_graph_compiler_version_info_t compilerVersion) const; std::shared_ptr _zeroInitStruct; - std::shared_ptr _zeGraphExt; + std::shared_ptr _zeGraphExt; ze_device_graph_properties_t _deviceGraphProperties = {}; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp index f7ea940cf9a160..0f426581687f65 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp @@ -10,13 +10,13 @@ #include "intel_npu/common/igraph.hpp" #include "intel_npu/utils/zero/zero_init.hpp" -#include "ze_graph_ext_wrappers_interface.hpp" +#include "ze_graph_ext_wrappers.hpp" namespace intel_npu { class DriverGraph final : public IGraph { public: - DriverGraph(const std::shared_ptr& zeGraphExt, + DriverGraph(const std::shared_ptr& zeGraphExt, const std::shared_ptr& zeroInitStruct, ze_graph_handle_t graphHandle, NetworkMetadata metadata, @@ -37,7 +37,7 @@ class DriverGraph final : public IGraph { private: bool release_blob(const Config& config); - std::shared_ptr _zeGraphExt; + std::shared_ptr _zeGraphExt; std::shared_ptr _zeroInitStruct; Logger _logger; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index eab8a19627cd1c..8d2616884e7d5f 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -11,7 +11,7 @@ #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_init.hpp" #include "openvino/runtime/so_ptr.hpp" -#include "ze_graph_ext_wrappers_interface.hpp" +#include "ze_graph_ext_wrappers.hpp" namespace intel_npu { @@ -28,7 +28,7 @@ class PluginCompilerAdapter final : public ICompilerAdapter { private: std::shared_ptr _zeroInitStruct; - std::shared_ptr _zeGraphExt; + std::shared_ptr _zeGraphExt; ov::SoPtr _compiler; Logger _logger; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp index 1028112368e67f..2d7d9bfd429e47 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp @@ -12,13 +12,13 @@ #include "intel_npu/icompiler.hpp" #include "intel_npu/utils/zero/zero_init.hpp" #include "openvino/runtime/so_ptr.hpp" -#include "ze_graph_ext_wrappers_interface.hpp" +#include "ze_graph_ext_wrappers.hpp" namespace intel_npu { class PluginGraph final : public IGraph { public: - PluginGraph(const std::shared_ptr& zeGraphExt, + PluginGraph(const std::shared_ptr& zeGraphExt, const ov::SoPtr& compiler, const std::shared_ptr& zeroInitStruct, ze_graph_handle_t graphHandle, @@ -38,7 +38,7 @@ class PluginGraph final : public IGraph { ~PluginGraph() override; private: - std::shared_ptr _zeGraphExt; + std::shared_ptr _zeGraphExt; std::shared_ptr _zeroInitStruct; const ov::SoPtr _compiler; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp index 1bc58b153a48ff..3e8c17ad13db7e 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp @@ -10,42 +10,19 @@ #include #include +#include "intel_npu/network_metadata.hpp" #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_init.hpp" #include "intel_npu/utils/zero/zero_types.hpp" -#include "ze_graph_ext_wrappers_interface.hpp" namespace intel_npu { -#define NotSupportQuery(T) (T == ZE_GRAPH_EXT_VERSION_1_2) - -// ext version == 1.3 && 1.4, support API (pfnQueryNetworkCreate, pfnQueryNetworkDestroy, -// pfnQueryNetworkGetSupportedLayers) -#define SupportAPIGraphQueryNetworkV1(T) (T == ZE_GRAPH_EXT_VERSION_1_3 || T == ZE_GRAPH_EXT_VERSION_1_4) - -// ext version >= 1.5, support API (pfnCreate2, pfnQueryNetworkCreate2, pfnQueryContextMemory) -#define SupportAPIGraphQueryNetworkV2(T) ((!NotSupportQuery(T) && !SupportAPIGraphQueryNetworkV1(T))) - -// For ext version >= 1.5, pfnCreate2 api is avaible -#define NotSupportGraph2(T) \ - (T == ZE_GRAPH_EXT_VERSION_1_2 || T == ZE_GRAPH_EXT_VERSION_1_3 || T == ZE_GRAPH_EXT_VERSION_1_4) - -// A bug inside the driver makes the "pfnGraphGetArgumentMetadata" call not safe for use prior to -// "ze_graph_dditable_ext_1_6_t". -// See: E#117498 -#define NotSupportArgumentMetadata(T) \ - (T == ZE_GRAPH_EXT_VERSION_1_2 || T == ZE_GRAPH_EXT_VERSION_1_3 || T == ZE_GRAPH_EXT_VERSION_1_4 || \ - T == ZE_GRAPH_EXT_VERSION_1_5) - -#define UseCopyForNativeBinary(T) \ - (T == ZE_GRAPH_EXT_VERSION_1_2 || T == ZE_GRAPH_EXT_VERSION_1_3 || T == ZE_GRAPH_EXT_VERSION_1_4 || \ - T == ZE_GRAPH_EXT_VERSION_1_5 || T == ZE_GRAPH_EXT_VERSION_1_6) +using SerializedIR = std::pair>; /** * Adapter to use CiD through ZeroAPI */ -template -class ZeGraphExtWrappers final : public ZeGraphExtWrappersInterface { +class ZeGraphExtWrappers { public: ZeGraphExtWrappers(const std::shared_ptr& zeroInitStruct); ZeGraphExtWrappers(const ZeGraphExtWrappers&) = delete; @@ -53,105 +30,40 @@ class ZeGraphExtWrappers final : public ZeGraphExtWrappersInterface { ~ZeGraphExtWrappers(); std::unordered_set queryGraph(std::pair> serializedIR, - const std::string& buildFlags) const override; + const std::string& buildFlags) const; ze_graph_handle_t getGraphHandle(std::pair> serializedIR, const std::string& buildFlags, - const uint32_t& flags) const override; + const uint32_t& flags) const; - ze_graph_handle_t getGraphHandle(const std::vector& network) const override; + ze_graph_handle_t getGraphHandle(const std::vector& network) const; - NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const override; + NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const; - _ze_result_t destroyGraph(ze_graph_handle_t graphHandle) override; + _ze_result_t destroyGraph(ze_graph_handle_t graphHandle); void getGraphBinary(ze_graph_handle_t graphHandle, std::vector& blob, const uint8_t*& blobPtr, - size_t& blobSize) const override; + size_t& blobSize) const; - void setGraphArgumentValue(ze_graph_handle_t graphHandle, uint32_t argi_, const void* argv) const override; + void setGraphArgumentValue(ze_graph_handle_t graphHandle, uint32_t argi_, const void* argv) const; - void initializeGraph(ze_graph_handle_t graphHandle, const Config& config) const override; + void initializeGraph(ze_graph_handle_t graphHandle, const Config& config) const; private: - template = true> std::unordered_set getQueryResultFromSupportedLayers( ze_result_t result, ze_graph_query_network_handle_t& hGraphQueryNetwork) const; - template = true> void getMetadata(ze_graph_handle_t graphHandle, uint32_t index, std::vector& inputs, std::vector& outputs) const; - template = true> - void getMetadata(ze_graph_handle_t graphHandle, - uint32_t index, - std::vector& inputs, - std::vector& outputs) const; - - template = true> - void getNativeBinary(ze_graph_handle_t graphHandle, - std::vector& blob, - const uint8_t*& blobPtr, - size_t& blobSize) const; - - template = true> - void getNativeBinary(ze_graph_handle_t graphHandle, - std::vector& /* unusedBlob */, - const uint8_t*& blobPtr, - size_t& blobSize) const; - - template = true> - ze_result_t queryNetworkCreateV2(std::pair> serializedIR, - const std::string& buildFlags, - ze_graph_query_network_handle_t& hGraphQueryNetwork) const; - - // ext version >= 1.5, support API (pfnCreate2, pfnQueryNetworkCreate2, pfnQueryContextMemory) - template = true> - std::unordered_set queryImpl(std::pair> serializedIR, - const std::string& buildFlags) const; - - template = true> - ze_result_t queryNetworkCreateV1(std::pair> serializedIR, - const std::string& buildFlags, - ze_graph_query_network_handle_t& hGraphQueryNetwork) const; - - // ext version == 1.3 && 1.4, support API (pfnQueryNetworkCreate, pfnQueryNetworkDestroy, - // pfnQueryNetworkGetSupportedLayers) - template = true> - std::unordered_set queryImpl(std::pair> serializedIR, - const std::string& buildFlags) const; - - // For ext version < 1.3 - template = true> - std::unordered_set queryImpl(std::pair> serializedIR, - const std::string& buildFlags) const; - - template = true> - void createGraph(std::pair> serializedIR, - const std::string& buildFlags, - const uint32_t& flags, - ze_graph_handle_t* graph) const; - - template = true> - void createGraph(std::pair> serializedIR, - const std::string& buildFlags, - const uint32_t& flags, - ze_graph_handle_t* graph) const; - void initialize_graph_through_command_list(ze_graph_handle_t graphHandle, const Config& config) const; std::shared_ptr _zeroInitStruct; + uint32_t _graphExtVersion; Logger _logger; }; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers_interface.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers_interface.hpp deleted file mode 100644 index ac44f9853e11e3..00000000000000 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers_interface.hpp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -#include "intel_npu/network_metadata.hpp" - -namespace intel_npu { - -using SerializedIR = std::pair>; - -class ZeGraphExtWrappersInterface { -public: - virtual std::unordered_set queryGraph(SerializedIR serializedIR, - const std::string& buildFlags) const = 0; - - virtual ze_graph_handle_t getGraphHandle(SerializedIR serializedIR, - const std::string& buildFlags, - const uint32_t& flags) const = 0; - - virtual ze_graph_handle_t getGraphHandle(const std::vector& network) const = 0; - - virtual NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const = 0; - - virtual _ze_result_t destroyGraph(ze_graph_handle_t graphHandle) = 0; - - virtual void getGraphBinary(ze_graph_handle_t graphHandle, - std::vector& blob, - const uint8_t*& blobPtr, - size_t& blobSize) const = 0; - - virtual void setGraphArgumentValue(ze_graph_handle_t graphHandle, uint32_t argi_, const void* argv) const = 0; - - virtual void initializeGraph(ze_graph_handle_t graphHandle, const Config& config) const = 0; - - virtual ~ZeGraphExtWrappersInterface() = default; -}; - -} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index b4da8a2bcc316b..f819ed73711cf2 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -155,29 +155,7 @@ DriverCompilerAdapter::DriverCompilerAdapter(const std::shared_ptr>(_zeroInitStruct); - break; - case ZE_GRAPH_EXT_VERSION_1_4: - _zeGraphExt = std::make_shared>(_zeroInitStruct); - break; - case ZE_GRAPH_EXT_VERSION_1_5: - _zeGraphExt = std::make_shared>(_zeroInitStruct); - break; - case ZE_GRAPH_EXT_VERSION_1_6: - _zeGraphExt = std::make_shared>(_zeroInitStruct); - break; - case ZE_GRAPH_EXT_VERSION_1_7: - _zeGraphExt = std::make_shared>(_zeroInitStruct); - break; - case ZE_GRAPH_EXT_VERSION_1_8: - _zeGraphExt = std::make_shared>(_zeroInitStruct); - break; - default: - _zeGraphExt = std::make_shared>(_zeroInitStruct); - break; - } + _zeGraphExt = std::make_shared(_zeroInitStruct); _logger.info("initialize DriverCompilerAdapter complete, using graphExtVersion: %d.%d", ZE_MAJOR_VERSION(graphExtVersion), diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index 84759bf802f1c1..e1f3990b835e8d 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -10,7 +10,7 @@ namespace intel_npu { -DriverGraph::DriverGraph(const std::shared_ptr& zeGraphExt, +DriverGraph::DriverGraph(const std::shared_ptr& zeGraphExt, const std::shared_ptr& zeroInitStruct, ze_graph_handle_t graphHandle, NetworkMetadata metadata, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 73dd3817e24812..06d71fd1126c17 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -70,29 +70,7 @@ PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr>(_zeroInitStruct); - break; - case ZE_GRAPH_EXT_VERSION_1_4: - _zeGraphExt = std::make_shared>(_zeroInitStruct); - break; - case ZE_GRAPH_EXT_VERSION_1_5: - _zeGraphExt = std::make_shared>(_zeroInitStruct); - break; - case ZE_GRAPH_EXT_VERSION_1_6: - _zeGraphExt = std::make_shared>(_zeroInitStruct); - break; - case ZE_GRAPH_EXT_VERSION_1_7: - _zeGraphExt = std::make_shared>(_zeroInitStruct); - break; - case ZE_GRAPH_EXT_VERSION_1_8: - _zeGraphExt = std::make_shared>(_zeroInitStruct); - break; - default: - _zeGraphExt = std::make_shared>(_zeroInitStruct); - break; - } + _zeGraphExt = std::make_shared(_zeroInitStruct); _logger.info("initialize PluginCompilerAdapter complete, using graphExtVersion: %d.%d", ZE_MAJOR_VERSION(graphExtVersion), diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index 8f60efd50af75c..c99069a0a9760f 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -10,7 +10,7 @@ namespace intel_npu { -PluginGraph::PluginGraph(const std::shared_ptr& zeGraphExt, +PluginGraph::PluginGraph(const std::shared_ptr& zeGraphExt, const ov::SoPtr& compiler, const std::shared_ptr& zeroInitStruct, ze_graph_handle_t graphHandle, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp index fad389ca30e0c7..f6366a2509747b 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp @@ -14,6 +14,25 @@ #include "intel_npu/utils/zero/zero_wrappers.hpp" #include "openvino/core/model.hpp" +#define NotSupportQuery(T) (T <= ZE_GRAPH_EXT_VERSION_1_2) + +// ext version == 1.3 && 1.4, support API (pfnQueryNetworkCreate, pfnQueryNetworkDestroy, +// pfnQueryNetworkGetSupportedLayers) +#define SupportAPIGraphQueryNetworkV1(T) (T == ZE_GRAPH_EXT_VERSION_1_3 || T == ZE_GRAPH_EXT_VERSION_1_4) + +// ext version >= 1.5, support API (pfnCreate2, pfnQueryNetworkCreate2, pfnQueryContextMemory) +#define SupportAPIGraphQueryNetworkV2(T) ((!NotSupportQuery(T) && !SupportAPIGraphQueryNetworkV1(T))) + +// For ext version >= 1.5, pfnCreate2 api is avaible +#define NotSupportGraph2(T) (T < ZE_GRAPH_EXT_VERSION_1_5) + +// A bug inside the driver makes the "pfnGraphGetArgumentMetadata" call not safe for use prior to +// "ze_graph_dditable_ext_1_6_t". +// See: E#117498 +#define NotSupportArgumentMetadata(T) (T < ZE_GRAPH_EXT_VERSION_1_6) + +#define UseCopyForNativeBinary(T) (T < ZE_GRAPH_EXT_VERSION_1_7) + namespace { ov::element::Type_t toOVElementType(const ze_graph_argument_precision_t zeElementType) { @@ -63,19 +82,28 @@ ov::element::Type_t toOVElementType(const ze_graph_argument_precision_t zeElemen namespace intel_npu { -template -ZeGraphExtWrappers::ZeGraphExtWrappers(const std::shared_ptr& zeroInitStruct) +ZeGraphExtWrappers::ZeGraphExtWrappers(const std::shared_ptr& zeroInitStruct) : _zeroInitStruct(zeroInitStruct), - _logger("ZeGraphExtWrappers", Logger::global().level()) {} + _graphExtVersion(zeroInitStruct->getGraphDdiTable().version()), + _logger("ZeGraphExtWrappers", Logger::global().level()) { + _logger.info("Graph ext version used by zero wrapper: %d.%d", + ZE_MAJOR_VERSION(_graphExtVersion), + ZE_MINOR_VERSION(_graphExtVersion)); + _logger.debug("capabilities:"); + _logger.debug("-SupportQuery: %d", !NotSupportQuery(_graphExtVersion)); + _logger.debug("-SupportAPIGraphQueryNetworkV1: %d", SupportAPIGraphQueryNetworkV1(_graphExtVersion)); + _logger.debug("-SupportAPIGraphQueryNetworkV2 :%d", SupportAPIGraphQueryNetworkV2(_graphExtVersion)); + _logger.debug("-SupportpfnCreate2 :%d", !NotSupportGraph2(_graphExtVersion)); + _logger.debug("-SupportArgumentMetadata :%d", !NotSupportArgumentMetadata(_graphExtVersion)); + _logger.debug("-UseCopyForNativeBinary :%d", UseCopyForNativeBinary(_graphExtVersion)); +} -template -ZeGraphExtWrappers::~ZeGraphExtWrappers() { - _logger.debug("ZeGraphExtWrappers obj destroyed"); +ZeGraphExtWrappers::~ZeGraphExtWrappers() { + _logger.debug("Obj destroyed"); } -template -_ze_result_t ZeGraphExtWrappers::destroyGraph(ze_graph_handle_t graphHandle) { - _logger.debug("destroyGraph - pfnDestroy graphHandle"); +_ze_result_t ZeGraphExtWrappers::destroyGraph(ze_graph_handle_t graphHandle) { + _logger.debug("destroyGraph - perfrom pfnDestroy"); auto result = _zeroInitStruct->getGraphDdiTable().pfnDestroy(graphHandle); if (ZE_RESULT_SUCCESS != result) { @@ -87,73 +115,62 @@ _ze_result_t ZeGraphExtWrappers::destroyGraph(ze_graph_handle_t return result; } -template -template > -void ZeGraphExtWrappers::getNativeBinary(ze_graph_handle_t graphHandle, - std::vector& blob, - const uint8_t*& blobPtr, - size_t& blobSize) const { - // Get blob size first - auto result = _zeroInitStruct->getGraphDdiTable().pfnGetNativeBinary(graphHandle, &blobSize, nullptr); - blob.resize(blobSize); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetNativeBinary get blob size, Failed to compile network.", - result, - _zeroInitStruct->getGraphDdiTable()); - - // Get blob data - result = _zeroInitStruct->getGraphDdiTable().pfnGetNativeBinary(graphHandle, &blobSize, blob.data()); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetNativeBinary get blob data, Failed to compile network.", - result, - _zeroInitStruct->getGraphDdiTable()); - - blobPtr = blob.data(); -} - -template -template > -void ZeGraphExtWrappers::getNativeBinary(ze_graph_handle_t graphHandle, - std::vector& /* unusedBlob */, - const uint8_t*& blobPtr, - size_t& blobSize) const { - // Get blob ptr and size - auto result = _zeroInitStruct->getGraphDdiTable().pfnGetNativeBinary2(graphHandle, &blobSize, &blobPtr); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetNativeBinary get blob size, Failed to compile network.", - result, - _zeroInitStruct->getGraphDdiTable()); -} - -template -void ZeGraphExtWrappers::getGraphBinary(ze_graph_handle_t graphHandle, - std::vector& blob, - const uint8_t*& blobPtr, - size_t& blobSize) const { +void ZeGraphExtWrappers::getGraphBinary(ze_graph_handle_t graphHandle, + std::vector& blob, + const uint8_t*& blobPtr, + size_t& blobSize) const { if (graphHandle == nullptr) { OPENVINO_THROW("Graph handle is null"); } - _logger.info("ZeGraphExtWrappers getGraphBinary get blob from graphHandle"); - - getNativeBinary(graphHandle, blob, blobPtr, blobSize); + _logger.debug("getGraphBinary - get blob from graphHandle"); + + if (UseCopyForNativeBinary(_graphExtVersion)) { + // Get blob size first + _logger.debug("getGraphBinary - perfrom pfnGetNativeBinary to get size"); + auto result = _zeroInitStruct->getGraphDdiTable().pfnGetNativeBinary(graphHandle, &blobSize, nullptr); + blob.resize(blobSize); + THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetNativeBinary get blob size, Failed to compile network.", + result, + _zeroInitStruct->getGraphDdiTable()); + + // Get blob data + _logger.debug("getGraphBinary - perfrom pfnGetNativeBinary to get data"); + result = _zeroInitStruct->getGraphDdiTable().pfnGetNativeBinary(graphHandle, &blobSize, blob.data()); + THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetNativeBinary get blob data, Failed to compile network.", + result, + _zeroInitStruct->getGraphDdiTable()); + + blobPtr = blob.data(); + } else { + // Get blob ptr and size + _logger.debug("getGraphBinary - perfrom pfnGetNativeBinary2 to get size and data"); + auto result = _zeroInitStruct->getGraphDdiTable().pfnGetNativeBinary2(graphHandle, &blobSize, &blobPtr); + THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetNativeBinary get blob size, Failed to compile network.", + result, + _zeroInitStruct->getGraphDdiTable()); + } } -template -void ZeGraphExtWrappers::setGraphArgumentValue(ze_graph_handle_t graphHandle, - uint32_t argi, - const void* argv) const { +void ZeGraphExtWrappers::setGraphArgumentValue(ze_graph_handle_t graphHandle, uint32_t argi, const void* argv) const { + _logger.debug("setGraphArgumentValue - perform pfnSetArgumentValue"); auto result = _zeroInitStruct->getGraphDdiTable().pfnSetArgumentValue(graphHandle, argi, argv); THROW_ON_FAIL_FOR_LEVELZERO_EXT("zeGraphSetArgumentValue", result, _zeroInitStruct->getGraphDdiTable()); } -template -void ZeGraphExtWrappers::initializeGraph(ze_graph_handle_t graphHandle, const Config& config) const { +void ZeGraphExtWrappers::initializeGraph(ze_graph_handle_t graphHandle, const Config& config) const { if (_zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8) { + _logger.debug("Use initialize_graph_through_command_list for ext version smaller than 1.8"); initialize_graph_through_command_list(graphHandle, config); } else { + _logger.debug("Initialize graph based on graph properties for ext version larger than 1.8"); ze_graph_properties_2_t properties = {}; properties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; + _logger.debug("initializeGraph - perfrom pfnGetProperties2"); _zeroInitStruct->getGraphDdiTable().pfnGetProperties2(graphHandle, &properties); if (properties.initStageRequired & ZE_GRAPH_STAGE_INITIALIZE) { + _logger.debug("initializeGraph - perfrom pfnGraphInitialize"); _zeroInitStruct->getGraphDdiTable().pfnGraphInitialize(graphHandle); } @@ -163,32 +180,31 @@ void ZeGraphExtWrappers::initializeGraph(ze_graph_handle_t graph } } -template -void ZeGraphExtWrappers::initialize_graph_through_command_list(ze_graph_handle_t graphHandle, - const Config& config) const { +void ZeGraphExtWrappers::initialize_graph_through_command_list(ze_graph_handle_t graphHandle, + const Config& config) const { ze_device_properties_t deviceProperties = {}; deviceProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; THROW_ON_FAIL_FOR_LEVELZERO("zeDeviceGetProperties", zeDeviceGetProperties(_zeroInitStruct->getDevice(), &deviceProperties)); auto groupOrdinal = zeroUtils::findGroupOrdinal(_zeroInitStruct->getDevice(), deviceProperties); - _logger.debug("ZeGraphExtWrappers::initialize_graph_through_command_list init start - create graph_command_list"); + _logger.debug("initialize_graph_through_command_list init start - create graph_command_list"); CommandList graph_command_list(_zeroInitStruct, groupOrdinal); - _logger.debug("ZeGraphExtWrappers::initialize_graph_through_command_list - create graph_command_queue"); + _logger.debug("initialize_graph_through_command_list - create graph_command_queue"); CommandQueue graph_command_queue(_zeroInitStruct, ZE_COMMAND_QUEUE_PRIORITY_NORMAL, groupOrdinal, false); - _logger.debug("ZeGraphExtWrappers::initialize_graph_through_command_list - create fence"); + _logger.debug("initialize_graph_through_command_list - create fence"); Fence fence(graph_command_queue); - _logger.debug("ZeGraphExtWrappers::initialize_graph_through_command_list - performing appendGraphInitialize"); + _logger.debug("initialize_graph_through_command_list - performing appendGraphInitialize"); graph_command_list.appendGraphInitialize(graphHandle); - _logger.debug("ZeGraphExtWrappers::initialize_graph_through_command_list - closing graph command list"); + _logger.debug("initialize_graph_through_command_list - closing graph command list"); graph_command_list.close(); - _logger.debug("ZeGraphExtWrappers::initialize_graph_through_command_list - performing executeCommandList"); + _logger.debug("initialize_graph_through_command_list - performing executeCommandList"); graph_command_queue.executeCommandList(graph_command_list, fence); - _logger.debug("ZeGraphExtWrappers::initialize_graph_through_command_list - performing hostSynchronize"); + _logger.debug("initialize_graph_through_command_list - performing hostSynchronize"); fence.hostSynchronize(); - _logger.debug("ZeGraphExtWrappers::initialize_graph_through_command_list - hostSynchronize completed"); + _logger.debug("initialize_graph_through_command_list - hostSynchronize completed"); } // Parse the result string of query from foramt to unordered_set of string @@ -210,102 +226,17 @@ static std::unordered_set parseQueryResult(std::vector& data) return result; } -// For ext version < 1.3, query is unsupported, return empty result and add debug log here -template -template > -std::unordered_set ZeGraphExtWrappers::queryImpl( - std::pair>, - const std::string&) const { - _logger.info("queryImpl - Driver version is less than 1.3, queryNetwork is unsupported."); - return std::unordered_set(); -} - -// For ext version == 1.3 && == 1.4 -template -template > -ze_result_t ZeGraphExtWrappers::queryNetworkCreateV1( - std::pair> serializedIR, - const std::string& buildFlags, - ze_graph_query_network_handle_t& hGraphQueryNetwork) const { - ze_graph_desc_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - nullptr, - ZE_GRAPH_FORMAT_NGRAPH_LITE, - serializedIR.first, - serializedIR.second.get(), - buildFlags.c_str()}; - - // Create querynetwork handle - ze_result_t result = _zeroInitStruct->getGraphDdiTable().pfnQueryNetworkCreate(_zeroInitStruct->getContext(), - _zeroInitStruct->getDevice(), - &desc, - &hGraphQueryNetwork); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("queryNetworkCreateV1", result, _zeroInitStruct->getGraphDdiTable()); - - return result; -} - -// For ext version == 1.3 && == 1.4, query is supported, calling querynetwork api in _zeroInitStruct->getGraphDdiTable() -template -template > -std::unordered_set ZeGraphExtWrappers::queryImpl( - std::pair> serializedIR, - const std::string& buildFlags) const { - _logger.info("queryImpl - Calling queryNetwork of 1.3 version."); - - ze_graph_query_network_handle_t hGraphQueryNetwork = nullptr; - - auto result = queryNetworkCreateV1(std::move(serializedIR), buildFlags, hGraphQueryNetwork); - - return getQueryResultFromSupportedLayers(result, hGraphQueryNetwork); -} - -// For ext version >= 1.5 -template -template > -ze_result_t ZeGraphExtWrappers::queryNetworkCreateV2( - std::pair> serializedIR, - const std::string& buildFlags, - ze_graph_query_network_handle_t& hGraphQueryNetwork) const { - ze_graph_desc_2_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - nullptr, - ZE_GRAPH_FORMAT_NGRAPH_LITE, - serializedIR.first, - serializedIR.second.get(), - buildFlags.c_str(), - ZE_GRAPH_FLAG_NONE}; - - // Create querynetwork handle - _logger.debug("queryNetworkCreateV2 - performing pfnQueryNetworkCreate2"); - ze_result_t result = _zeroInitStruct->getGraphDdiTable().pfnQueryNetworkCreate2(_zeroInitStruct->getContext(), - _zeroInitStruct->getDevice(), - &desc, - &hGraphQueryNetwork); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("queryNetworkCreateV2", result, _zeroInitStruct->getGraphDdiTable()); - - return result; -} - -// For ext version >= 1.5 -template -template > -std::unordered_set ZeGraphExtWrappers::queryImpl( - std::pair> serializedIR, - const std::string& buildFlags) const { - _logger.debug("queryImpl - Calling queryNetwork of 1.5 version."); - - ze_graph_query_network_handle_t hGraphQueryNetwork = nullptr; - - auto result = queryNetworkCreateV2(std::move(serializedIR), buildFlags, hGraphQueryNetwork); - - return getQueryResultFromSupportedLayers(result, hGraphQueryNetwork); -} - -template -template > -std::unordered_set ZeGraphExtWrappers::getQueryResultFromSupportedLayers( +std::unordered_set ZeGraphExtWrappers::getQueryResultFromSupportedLayers( ze_result_t result, ze_graph_query_network_handle_t& hGraphQueryNetwork) const { + if (NotSupportQuery(_graphExtVersion)) { + OPENVINO_THROW("pfnQueryNetworkGetSupportedLayers not supported for ", + ZE_MAJOR_VERSION(_graphExtVersion), + ".", + ZE_MINOR_VERSION(_graphExtVersion)); + } // Get the size of query result + _logger.debug("getQueryResultFromSupportLayers - perfrom pfnQueryNetworkGetSupportedLayers to get size"); size_t size = 0; result = _zeroInitStruct->getGraphDdiTable().pfnQueryNetworkGetSupportedLayers(hGraphQueryNetwork, &size, nullptr); THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnQueryNetworkGetSupportedLayers get size of query result", @@ -313,6 +244,7 @@ std::unordered_set ZeGraphExtWrappers::getQueryResu _zeroInitStruct->getGraphDdiTable()); // Get the result data of query + _logger.debug("getQueryResultFromSupportLayers - perfrom pfnQueryNetworkGetSupportedLayers to get data"); std::vector supportedLayers(size); result = _zeroInitStruct->getGraphDdiTable().pfnQueryNetworkGetSupportedLayers(hGraphQueryNetwork, &size, @@ -321,80 +253,117 @@ std::unordered_set ZeGraphExtWrappers::getQueryResu result, _zeroInitStruct->getGraphDdiTable()); + _logger.debug("getQueryResultFromSupportLayers - perfrom pfnQueryNetworkDestroy"); result = _zeroInitStruct->getGraphDdiTable().pfnQueryNetworkDestroy(hGraphQueryNetwork); THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnQueryNetworkDestroy", result, _zeroInitStruct->getGraphDdiTable()); return parseQueryResult(supportedLayers); } -template -std::unordered_set ZeGraphExtWrappers::queryGraph( - std::pair> serializedIR, - const std::string& buildFlags) const { - return queryImpl(std::move(serializedIR), buildFlags); -} - -// For ext version <1.5, calling pfnCreate api in _zeroInitStruct->getGraphDdiTable() -template -template > -void ZeGraphExtWrappers::createGraph(std::pair> serializedIR, - const std::string& buildFlags, - const uint32_t& /*flags*/, - ze_graph_handle_t* graph) const { - ze_graph_desc_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - nullptr, - ZE_GRAPH_FORMAT_NGRAPH_LITE, - serializedIR.first, - serializedIR.second.get(), - buildFlags.c_str()}; - - _logger.debug("createGraph - performing pfnCreate"); - // Create querynetwork handle - auto result = _zeroInitStruct->getGraphDdiTable().pfnCreate(_zeroInitStruct->getContext(), - _zeroInitStruct->getDevice(), - &desc, - graph); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnCreate", result, _zeroInitStruct->getGraphDdiTable()); +std::unordered_set ZeGraphExtWrappers::queryGraph(std::pair> serializedIR, + const std::string& buildFlags) const { + // ext version >= 1.5, support API (pfnCreate2, pfnQueryNetworkCreate2, pfnQueryContextMemory) + // ext version == 1.3 && 1.4, support API (pfnQueryNetworkCreate, pfnQueryNetworkDestroy, + // pfnQueryNetworkGetSupportedLayers) + // For ext version < 1.3, query is not supported + ze_result_t result = ZE_RESULT_SUCCESS; + if (NotSupportQuery(_graphExtVersion)) { + // For ext version < 1.3, query is unsupported, return empty result and add debug log here + _logger.warning("queryGraph - Driver version is less than 1.3, queryNetwork is unsupported."); + return std::unordered_set(); + } else if (SupportAPIGraphQueryNetworkV1(_graphExtVersion)) { + // For ext version == 1.3 && == 1.4, query is supported, calling querynetwork api in + // _zeroInitStruct->getGraphDdiTable() + ze_graph_query_network_handle_t hGraphQueryNetwork = nullptr; + + // For ext version == 1.3 && == 1.4 + ze_graph_desc_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, + nullptr, + ZE_GRAPH_FORMAT_NGRAPH_LITE, + serializedIR.first, + serializedIR.second.get(), + buildFlags.c_str()}; + + // Create querynetwork handle + _logger.debug("For ext of 1.3 and 1.4 - perform pfnQueryNetworkCreate"); + result = _zeroInitStruct->getGraphDdiTable().pfnQueryNetworkCreate(_zeroInitStruct->getContext(), + _zeroInitStruct->getDevice(), + &desc, + &hGraphQueryNetwork); + THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnQueryNetworkCreate", result, _zeroInitStruct->getGraphDdiTable()); + + return getQueryResultFromSupportedLayers(result, hGraphQueryNetwork); + } else if (SupportAPIGraphQueryNetworkV2(_graphExtVersion)) { + // For ext version >= 1.5 + ze_graph_query_network_handle_t hGraphQueryNetwork = nullptr; + + // For ext version >= 1.5 + ze_graph_desc_2_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, + nullptr, + ZE_GRAPH_FORMAT_NGRAPH_LITE, + serializedIR.first, + serializedIR.second.get(), + buildFlags.c_str(), + ZE_GRAPH_FLAG_NONE}; + + // Create querynetwork handle + _logger.debug("For ext larger than 1.4 - perform pfnQueryNetworkCreate2"); + result = _zeroInitStruct->getGraphDdiTable().pfnQueryNetworkCreate2(_zeroInitStruct->getContext(), + _zeroInitStruct->getDevice(), + &desc, + &hGraphQueryNetwork); + THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnQueryNetworkCreate2", result, _zeroInitStruct->getGraphDdiTable()); + + return getQueryResultFromSupportedLayers(result, hGraphQueryNetwork); + } + _logger.warning("queryGraph - Driver version is %d.%d, queryNetwork is unsupported.", + ZE_MAJOR_VERSION(_graphExtVersion), + ZE_MINOR_VERSION(_graphExtVersion)); + return std::unordered_set(); } -// For ext version >= 1.5, calling pfnCreate2 api in _zeroInitStruct->getGraphDdiTable() -template -template > -void ZeGraphExtWrappers::createGraph(std::pair> serializedIR, +ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(std::pair> serializedIR, const std::string& buildFlags, - const uint32_t& flags, - ze_graph_handle_t* graph) const { - ze_graph_desc_2_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - nullptr, - ZE_GRAPH_FORMAT_NGRAPH_LITE, - serializedIR.first, - serializedIR.second.get(), - buildFlags.c_str(), - flags}; - - _logger.debug("createGraph - performing pfnCreate2"); - // Create querynetwork handle - auto result = _zeroInitStruct->getGraphDdiTable().pfnCreate2(_zeroInitStruct->getContext(), - _zeroInitStruct->getDevice(), - &desc, - graph); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnCreate2", result, _zeroInitStruct->getGraphDdiTable()); -} - -template -ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle( - std::pair> serializedIR, - const std::string& buildFlags, - const uint32_t& flags) const { + const uint32_t& flags) const { ze_graph_handle_t graphHandle; - - createGraph(std::move(serializedIR), buildFlags, flags, &graphHandle); - + if (NotSupportGraph2(_graphExtVersion)) { + // For ext version <1.5, calling pfnCreate api in _zeroInitStruct->getGraphDdiTable() + ze_graph_desc_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, + nullptr, + ZE_GRAPH_FORMAT_NGRAPH_LITE, + serializedIR.first, + serializedIR.second.get(), + buildFlags.c_str()}; + + _logger.debug("getGraphHandle - perform pfnCreate"); + // Create querynetwork handle + auto result = _zeroInitStruct->getGraphDdiTable().pfnCreate(_zeroInitStruct->getContext(), + _zeroInitStruct->getDevice(), + &desc, + &graphHandle); + THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnCreate", result, _zeroInitStruct->getGraphDdiTable()); + } else { + // For ext version >= 1.5, calling pfnCreate2 api in _zeroInitStruct->getGraphDdiTable() + ze_graph_desc_2_t desc = {ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, + nullptr, + ZE_GRAPH_FORMAT_NGRAPH_LITE, + serializedIR.first, + serializedIR.second.get(), + buildFlags.c_str(), + flags}; + + _logger.debug("getGraphHandle - perform pfnCreate2"); + // Create querynetwork handle + auto result = _zeroInitStruct->getGraphDdiTable().pfnCreate2(_zeroInitStruct->getContext(), + _zeroInitStruct->getDevice(), + &desc, + &graphHandle); + THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnCreate2", result, _zeroInitStruct->getGraphDdiTable()); + } return graphHandle; } -template -ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const std::vector& network) const { +ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const std::vector& network) const { ze_graph_handle_t graphHandle; if (network.empty()) { @@ -408,6 +377,7 @@ ze_graph_handle_t ZeGraphExtWrappers::getGraphHandle(const std:: network.data(), nullptr}; + _logger.debug("getGraphHandle - perform pfnCreate"); auto result = _zeroInitStruct->getGraphDdiTable().pfnCreate(_zeroInitStruct->getContext(), _zeroInitStruct->getDevice(), &desc, @@ -473,87 +443,74 @@ static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg, metadata.has_value() ? std::optional(shapeFromIRModel) : std::nullopt}; } -template -template > -void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle, - uint32_t index, - std::vector& inputs, - std::vector& outputs) const { - ze_graph_argument_properties_3_t arg; - auto result = _zeroInitStruct->getGraphDdiTable().pfnGetArgumentProperties3(graphHandle, index, &arg); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetArgumentProperties3", result, _zeroInitStruct->getGraphDdiTable()); - - switch (arg.type) { - case ZE_GRAPH_ARGUMENT_TYPE_INPUT: { - inputs.push_back(getIODescriptor(arg, std::nullopt)); - } break; - case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: { - outputs.push_back(getIODescriptor(arg, std::nullopt)); - } break; - default: { - OPENVINO_THROW("Invalid ze_graph_argument_type_t found in ze_graph_argument_properties_3_t object: ", arg.type); - } - } -} - -template -template > -void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle, - uint32_t index, - std::vector& inputs, - std::vector& outputs) const { - ze_graph_argument_properties_3_t arg; - auto result = _zeroInitStruct->getGraphDdiTable().pfnGetArgumentProperties3(graphHandle, index, &arg); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetArgumentProperties3", result, _zeroInitStruct->getGraphDdiTable()); +void ZeGraphExtWrappers::getMetadata(ze_graph_handle_t graphHandle, + uint32_t index, + std::vector& inputs, + std::vector& outputs) const { + if (NotSupportArgumentMetadata(_graphExtVersion)) { + ze_graph_argument_properties_3_t arg; + _logger.debug("getMetadata - perfrom pfnGetArgumentProperties3"); + auto result = _zeroInitStruct->getGraphDdiTable().pfnGetArgumentProperties3(graphHandle, index, &arg); + THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetArgumentProperties3", result, _zeroInitStruct->getGraphDdiTable()); + + switch (arg.type) { + case ZE_GRAPH_ARGUMENT_TYPE_INPUT: { + inputs.push_back(getIODescriptor(arg, std::nullopt)); + } break; + case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: { + outputs.push_back(getIODescriptor(arg, std::nullopt)); + } break; + default: { + OPENVINO_THROW("Invalid ze_graph_argument_type_t found in ze_graph_argument_properties_3_t object: ", + arg.type); + } + } + } else { + ze_graph_argument_properties_3_t arg; + _logger.debug("getMetadata - perfrom pfnGetArgumentProperties3"); + auto result = _zeroInitStruct->getGraphDdiTable().pfnGetArgumentProperties3(graphHandle, index, &arg); + THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetArgumentProperties3", result, _zeroInitStruct->getGraphDdiTable()); - std::optional optionalMetadata = std::nullopt; + std::optional optionalMetadata = std::nullopt; - if (!isStateInputName(arg.name) && !isStateOutputName(arg.name) && !isShapeTensorName(arg.name)) { - ze_graph_argument_metadata_t metadata; - result = _zeroInitStruct->getGraphDdiTable().pfnGraphGetArgumentMetadata(graphHandle, index, &metadata); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGraphGetArgumentMetadata", result, _zeroInitStruct->getGraphDdiTable()); + if (!isStateInputName(arg.name) && !isStateOutputName(arg.name) && !isShapeTensorName(arg.name)) { + _logger.debug("getMetadata - perfrom pfnGetArgumentMetadata"); + ze_graph_argument_metadata_t metadata; + result = _zeroInitStruct->getGraphDdiTable().pfnGraphGetArgumentMetadata(graphHandle, index, &metadata); + THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGraphGetArgumentMetadata", result, _zeroInitStruct->getGraphDdiTable()); - optionalMetadata = std::optional(metadata); - } + optionalMetadata = std::optional(metadata); + } - switch (arg.type) { - case ZE_GRAPH_ARGUMENT_TYPE_INPUT: { - inputs.push_back(getIODescriptor(arg, optionalMetadata)); - } break; - case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: { - outputs.push_back(getIODescriptor(arg, optionalMetadata)); - } break; - default: { - OPENVINO_THROW("Invalid ze_graph_argument_type_t found in ze_graph_argument_properties_3_t object: ", arg.type); - } + switch (arg.type) { + case ZE_GRAPH_ARGUMENT_TYPE_INPUT: { + inputs.push_back(getIODescriptor(arg, optionalMetadata)); + } break; + case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: { + outputs.push_back(getIODescriptor(arg, optionalMetadata)); + } break; + default: { + OPENVINO_THROW("Invalid ze_graph_argument_type_t found in ze_graph_argument_properties_3_t object: ", + arg.type); + } + } } } -template -NetworkMetadata ZeGraphExtWrappers::getNetworkMeta(ze_graph_handle_t graphHandle) const { +NetworkMetadata ZeGraphExtWrappers::getNetworkMeta(ze_graph_handle_t graphHandle) const { ze_graph_properties_t graphProperties{}; + _logger.debug("getNetworkMeta - perfrom pfnGetProperties"); auto result = _zeroInitStruct->getGraphDdiTable().pfnGetProperties(graphHandle, &graphProperties); THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetProperties", result, _zeroInitStruct->getGraphDdiTable()); - NetworkMetadata meta; - for (uint32_t index = 0; index < graphProperties.numGraphArgs; ++index) { getMetadata(graphHandle, index, meta.inputs, meta.outputs); } // TODO: support this information in CiD [track: E#33479] meta.numStreams = 1; meta.bindRelatedDescriptors(); - return meta; } -template class ZeGraphExtWrappers; -template class ZeGraphExtWrappers; -template class ZeGraphExtWrappers; -template class ZeGraphExtWrappers; -template class ZeGraphExtWrappers; -template class ZeGraphExtWrappers; -template class ZeGraphExtWrappers; - } // namespace intel_npu diff --git a/src/plugins/intel_npu/thirdparty/level-zero-ext b/src/plugins/intel_npu/thirdparty/level-zero-ext index a6487cc2c5da9a..a63155ae4e64fe 160000 --- a/src/plugins/intel_npu/thirdparty/level-zero-ext +++ b/src/plugins/intel_npu/thirdparty/level-zero-ext @@ -1 +1 @@ -Subproject commit a6487cc2c5da9aa13db9e005a320a1b6a0ee5919 +Subproject commit a63155ae4e64feaaa6931f4696c2e2e699063875