From b76077f598b0cdc51ee68e83d8b1eabd76def34c Mon Sep 17 00:00:00 2001 From: csoka Date: Thu, 28 Nov 2024 12:56:31 +0200 Subject: [PATCH] [intel-npu] Adding CID version query api; Extend optimization_capabilities property based on CID version --- .../src/backend/include/zero_backend.hpp | 1 + .../src/backend/src/zero_backend.cpp | 4 ++++ .../common/include/intel_npu/common/npu.hpp | 2 ++ src/plugins/intel_npu/src/common/src/npu.cpp | 4 ++++ .../intel_npu/src/plugin/include/backends.hpp | 1 + .../intel_npu/src/plugin/include/metrics.hpp | 3 ++- .../intel_npu/src/plugin/src/backends.cpp | 8 ++++++++ .../intel_npu/src/plugin/src/metrics.cpp | 20 +++++++++++++++++++ .../intel_npu/utils/zero/zero_init.hpp | 5 +++++ .../src/utils/src/zero/zero_init.cpp | 7 +++++++ 10 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_npu/src/backend/include/zero_backend.hpp b/src/plugins/intel_npu/src/backend/include/zero_backend.hpp index 038c7c1d2d9bf9..7975c2bddd7100 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_backend.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_backend.hpp @@ -24,6 +24,7 @@ class ZeroEngineBackend final : public IEngineBackend { const std::vector getDeviceNames() const override; uint32_t getDriverVersion() const override; uint32_t getGraphExtVersion() const override; + uint32_t getCompilerVersion() const override; bool isBatchingSupported() const override; bool isCommandQueueExtSupported() const override; diff --git a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp index 55aaad102e8b8f..de0e8e1c6d55c3 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp @@ -29,6 +29,10 @@ uint32_t ZeroEngineBackend::getGraphExtVersion() const { return _initStruct->getGraphDdiTable().version(); } +uint32_t ZeroEngineBackend::getCompilerVersion() const { + return _initStruct->getCompilerVersion(); +} + bool ZeroEngineBackend::isBatchingSupported() const { return _initStruct->isExtensionSupported("ZE_extension_graph_1_6", ZE_MAKE_VERSION(1, 6)); } diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp index b34f2deee6c61e..c177552978ab45 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp @@ -33,6 +33,8 @@ class IEngineBackend : public std::enable_shared_from_this { virtual uint32_t getDriverVersion() const; /** @brief Provide driver extension version */ virtual uint32_t getGraphExtVersion() const; + /** @brief Provide compiler-in-driver version */ + virtual uint32_t getCompilerVersion() const; /** @brief Get name of backend */ virtual const std::string getName() const = 0; /** @brief Backend has support for concurrency batching */ diff --git a/src/plugins/intel_npu/src/common/src/npu.cpp b/src/plugins/intel_npu/src/common/src/npu.cpp index 0969b200ea09a5..53f486981b9313 100644 --- a/src/plugins/intel_npu/src/common/src/npu.cpp +++ b/src/plugins/intel_npu/src/common/src/npu.cpp @@ -33,6 +33,10 @@ uint32_t IEngineBackend::getGraphExtVersion() const { OPENVINO_THROW("Get NPU driver extension version is not supported with this backend"); } +uint32_t IEngineBackend::getCompilerVersion() const { + OPENVINO_THROW("Get NPU driver-compiler version is not supported with this backend"); +} + void* IEngineBackend::getContext() const { OPENVINO_THROW("Get NPU context is not supported with this backend"); } diff --git a/src/plugins/intel_npu/src/plugin/include/backends.hpp b/src/plugins/intel_npu/src/plugin/include/backends.hpp index 133be9786c26c0..a41112718604c2 100644 --- a/src/plugins/intel_npu/src/plugin/include/backends.hpp +++ b/src/plugins/intel_npu/src/plugin/include/backends.hpp @@ -32,6 +32,7 @@ class NPUBackends final { std::string getBackendName() const; uint32_t getDriverVersion() const; uint32_t getGraphExtVersion() const; + uint32_t getCompilerVersion() const; bool isBatchingSupported() const; bool isCommandQueueExtSupported() const; bool isLUIDExtSupported() const; diff --git a/src/plugins/intel_npu/src/plugin/include/metrics.hpp b/src/plugins/intel_npu/src/plugin/include/metrics.hpp index 7bce9eb0881a51..d3d6589b9a9bf3 100644 --- a/src/plugins/intel_npu/src/plugin/include/metrics.hpp +++ b/src/plugins/intel_npu/src/plugin/include/metrics.hpp @@ -36,6 +36,7 @@ class Metrics final { uint64_t GetDeviceTotalMemSize(const std::string& specifiedDeviceName) const; uint32_t GetDriverVersion() const; uint32_t GetGraphExtVersion() const; + uint32_t GetCompilerVersion() const; uint32_t GetSteppingNumber(const std::string& specifiedDeviceName) const; uint32_t GetMaxTiles(const std::string& specifiedDeviceName) const; ov::device::PCIInfo GetPciInfo(const std::string& specifiedDeviceName) const; @@ -51,7 +52,7 @@ class Metrics final { const std::shared_ptr _backends; std::vector _supportedMetrics; std::vector _supportedConfigKeys; - const std::vector _optimizationCapabilities = { + std::vector _optimizationCapabilities = { ov::device::capability::FP16, ov::device::capability::INT8, ov::device::capability::EXPORT_IMPORT, diff --git a/src/plugins/intel_npu/src/plugin/src/backends.cpp b/src/plugins/intel_npu/src/plugin/src/backends.cpp index 8507098bc85ff1..d485b66a3b6b8d 100644 --- a/src/plugins/intel_npu/src/plugin/src/backends.cpp +++ b/src/plugins/intel_npu/src/plugin/src/backends.cpp @@ -155,6 +155,14 @@ uint32_t NPUBackends::getGraphExtVersion() const { OPENVINO_THROW("No available backend"); } +uint32_t NPUBackends::getCompilerVersion() const { + if (_backend != nullptr) { + return _backend->getCompilerVersion(); + } + + OPENVINO_THROW("No available backend"); +} + bool NPUBackends::isBatchingSupported() const { if (_backend != nullptr) { return _backend->isBatchingSupported(); diff --git a/src/plugins/intel_npu/src/plugin/src/metrics.cpp b/src/plugins/intel_npu/src/plugin/src/metrics.cpp index fe47c8b204908f..e944cd6249cfe5 100644 --- a/src/plugins/intel_npu/src/plugin/src/metrics.cpp +++ b/src/plugins/intel_npu/src/plugin/src/metrics.cpp @@ -9,6 +9,11 @@ #include "intel_npu/npu_private_properties.hpp" #include "openvino/runtime/intel_npu/properties.hpp" +#ifndef ONEAPI_MAKE_VERSION +/// @brief Generates generic 'oneAPI' API versions +# define ONEAPI_MAKE_VERSION(_major, _minor) ((_major << 16) | (_minor & 0x0000ffff)) +#endif // ONEAPI_MAKE_VERSION + namespace intel_npu { Metrics::Metrics(const std::shared_ptr& backends) : _backends(backends) { @@ -39,6 +44,13 @@ Metrics::Metrics(const std::shared_ptr& backends) : _backends ov::hint::num_requests.name(), ov::intel_npu::compilation_mode_params.name(), ov::intel_npu::dynamic_shape_to_static.name()}; + + // optimizationCapabilities: append dynamic features based on compiler version + uint32_t compilerversion = GetCompilerVersion(); + // Dynamic quantization supported starting from compiler version x + if (compilerversion >= ONEAPI_MAKE_VERSION(6, 3)) { + _optimizationCapabilities.push_back(std::string("DQ")); + } } std::vector Metrics::GetAvailableDevicesNames() const { @@ -134,6 +146,14 @@ uint32_t Metrics::GetGraphExtVersion() const { return _backends->getGraphExtVersion(); } +uint32_t Metrics::GetCompilerVersion() const { + if (_backends == nullptr) { + OPENVINO_THROW("No available backends"); + } + + return _backends->getCompilerVersion(); +} + uint32_t Metrics::GetSteppingNumber(const std::string& specifiedDeviceName) const { const auto devName = getDeviceName(specifiedDeviceName); auto device = _backends->getDevice(devName); diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_init.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_init.hpp index 01b2de868e7572..3ba171b4253076 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_init.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_init.hpp @@ -55,6 +55,9 @@ class ZeroInitStructsHolder final { inline ze_api_version_t getZeDrvApiVersion() const { return ze_drv_api_version; } + inline uint32_t getCompilerVersion() const { + return compiler_version; + } // Helper function to check if extension with exists and its newer than inline bool isExtensionSupported(std::string ext_name, uint32_t version) const { auto iter = driver_extension_properties.find(ext_name); @@ -83,6 +86,8 @@ class ZeroInitStructsHolder final { uint32_t mutable_command_list_version = 0; ze_api_version_t ze_drv_api_version = {}; + + uint32_t compiler_version = 0; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_init.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_init.cpp index e87f8db788b9b8..8eb3ec04740a20 100644 --- a/src/plugins/intel_npu/src/utils/src/zero/zero_init.cpp +++ b/src/plugins/intel_npu/src/utils/src/zero/zero_init.cpp @@ -248,6 +248,13 @@ ZeroInitStructsHolder::ZeroInitStructsHolder() : log("NPUZeroInitStructsHolder", ze_context_desc_t context_desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, 0, 0}; THROW_ON_FAIL_FOR_LEVELZERO("zeContextCreate", zeContextCreate(driver_handle, &context_desc, &context)); log.debug("ZeroInitStructsHolder initialize complete"); + + // Obtain compiler-in-driver (vcl) version + ze_device_graph_properties_t graph_props; + graph_props.stype = ZE_STRUCTURE_TYPE_DEVICE_GRAPH_PROPERTIES; + auto result = graph_dditable_ext_decorator->pfnDeviceGetGraphProperties(device_handle, &graph_props); + THROW_ON_FAIL_FOR_LEVELZERO("pfnDeviceGetGraphProperties", result); + compiler_version = ZE_MAKE_VERSION(graph_props.compilerVersion.major, graph_props.compilerVersion.minor); } ZeroInitStructsHolder::~ZeroInitStructsHolder() {