Update for reviewers' comments

riverlijunjie · Oct 6, 2023 · 91cfdae · 91cfdae
1 parent c484b58
commit 91cfdae
Show file tree

Hide file tree

Showing 13 changed files with 106 additions and 128 deletions.
diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -12,7 +12,6 @@
 #include "nodes/memory.hpp"
 #include "openvino/core/type/element_type.hpp"
 #include "openvino/runtime/intel_cpu/properties.hpp"
-#include "precision_utils.h"
 #include "serialize.h"
 #include "threading/ie_executor_manager.hpp"
 #include "transformations/transformation_pipeline.h"
@@ -21,10 +20,6 @@
 #    include <threading/ie_tbb_streams_executor.hpp>
 #endif
 
-#include "ie_ngraph_utils.hpp"
-#include "ie_system_conf.h"
-#include "openvino/core/preprocess/pre_post_process.hpp"
-#include "openvino/opsets/opset1.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "openvino/util/common_util.hpp"
 #include "threading/ie_cpu_streams_executor.hpp"
@@ -209,23 +204,6 @@ std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {
     return get_graph()._graph.dump();
 }
 
-ov::Any CompiledModel::get_property(const std::string& name) const {
-    if (m_graphs.empty())
-        OPENVINO_THROW("No graph was found");
-
-    if (name == ov::loaded_from_cache) {
-        return m_loaded_from_cache;
-    }
-
-    Config engConfig = get_graph()._graph.getConfig();
-    auto option = engConfig._config.find(name);
-    if (option != engConfig._config.end()) {
-        return option->second;
-    }
-
-    return get_metric(name);
-}
-
 ov::Any CompiledModel::get_metric_legacy(const std::string& name, const GraphGuard& graph) const {
     OPENVINO_SUPPRESS_DEPRECATED_START
     if (name == METRIC_KEY(NETWORK_NAME)) {
@@ -255,9 +233,20 @@ ov::Any CompiledModel::get_metric_legacy(const std::string& name, const GraphGua
     OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
-ov::Any CompiledModel::get_metric(const std::string& name) const {
+ov::Any CompiledModel::get_property(const std::string& name) const {
     if (m_graphs.empty())
         OPENVINO_THROW("No graph was found");
+
+    if (name == ov::loaded_from_cache) {
+        return m_loaded_from_cache;
+    }
+
+    Config engConfig = get_graph()._graph.getConfig();
+    auto option = engConfig._config.find(name);
+    if (option != engConfig._config.end()) {
+        return option->second;
+    }
+
     // @todo Can't we just use local copy (_cfg) instead?
     auto graphLock = get_graph();
     const auto& graph = graphLock._graph;

diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h
@@ -81,7 +81,6 @@ class CompiledModel : public ov::ICompiledModel {
      */
     GraphGuard::Lock get_graph() const;
 
-    ov::Any get_metric(const std::string& name) const;
     ov::Any get_metric_legacy(const std::string& name, const GraphGuard& graph) const;
 };
 

diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp
@@ -23,6 +23,16 @@ using namespace dnnl;
 
 namespace ov {
 namespace intel_cpu {
+template <>
+DnnlMemoryDescPtr IMemory::getDescWithType<DnnlMemoryDesc, 0, 0>() const {
+    return MemoryDescUtils::convertToDnnlMemoryDesc(getDescPtr());
+}
+
+template <>
+BlockedMemoryDescPtr IMemory::getDescWithType<BlockedMemoryDesc, 0, 0>() const {
+    return MemoryDescUtils::convertToBlockedMemoryDesc(getDescPtr());
+}
+
 namespace {
     inline void setSubnormalsToZero(float *data, size_t size) {
         uint32_t *u32data = reinterpret_cast<uint32_t *>(data);
@@ -35,29 +45,28 @@ namespace {
 
     void transferData(const IMemory& src, const IMemory& dst, bool ftz) {
         node::Reorder::reorderData(src, dst);
-        try {
-            auto src_data_type = DnnlExtensionUtils::IEPrecisionToDataType(src.getDesc().getPrecision());
-            auto dst_data_type = DnnlExtensionUtils::IEPrecisionToDataType(dst.getDesc().getPrecision());
-            if (src_data_type != memory::data_type::f32 || dst_data_type == memory::data_type::bf16) {
-                return;
-            }
-        } catch (ov::Exception&) {
+
+        if (!ftz) {
             return;
         }
-
-        auto localPrim = dst.getPrimitive();
-        auto desc = localPrim.get_desc();
-        dnnl::impl::memory_desc_wrapper wrapper(desc.get());
-
-        if (ftz
-            && !wrapper.is_wino_desc()
-            // WA: to avoid zero filling auxiliary information
-            && !wrapper.is_rnn_packed_desc()) {
-            // Internal blobs don't have strides yet.
-            auto *memData = static_cast<float *>(dst.getData());
-            memData += wrapper.offset0();
-            setSubnormalsToZero(memData, dst.getSize() / sizeof(float));
+        if (src.getDesc().getPrecision() != Precision::FP32 || dst.getDesc().getPrecision() == Precision::BF16) {
+            return;
+        }
+        size_t offset = 0;
+        if (dst.getDesc().getType() & MemoryDescType::Dnnl) {
+            // here we can safely cast to DnnlMemoryDesc
+            auto dnnl_desc = dst.getDescWithType<DnnlMemoryDesc>();
+            auto desc = dnnl_desc->getDnnlDesc();
+            dnnl::impl::memory_desc_wrapper wrapper(desc.get());
+            offset = wrapper.offset0();
+            if (wrapper.is_wino_desc() || wrapper.is_rnn_packed_desc()) {
+                return;
+            }
         }
+        // actual FTZ
+        auto* memData = static_cast<float*>(dst.getData());
+        memData += offset;
+        setSubnormalsToZero(memData, dst.getSize() / sizeof(float));
     }
 
 }   // namespace
@@ -129,16 +138,6 @@ void Memory::redefineDesc(MemoryDescPtr desc) {
     this->create(desc, nullptr, false);
 }
 
-template<>
-DnnlMemoryDescPtr IMemory::getDescWithType<DnnlMemoryDesc, 0, 0>() const {
-    return MemoryDescUtils::convertToDnnlMemoryDesc(getDescPtr());
-}
-
-template<>
-BlockedMemoryDescPtr IMemory::getDescWithType<BlockedMemoryDesc, 0, 0>() const {
-    return MemoryDescUtils::convertToBlockedMemoryDesc(getDescPtr());
-}
-
 void Memory::update() {
     if (dnnlMemHandle.isInit()) {
         auto prim = dnnlMemHandle.getPrim();

diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
@@ -469,7 +469,7 @@ int get_model_prefer_threads(const int num_streams,
                 model_prefer = proc_type_table[0][ALL_PROC];
             }
 #else
-            bool fp_intesive = !ov::op::util::has_op_with_type<ngraph::op::FakeQuantize>(model);
+            bool fp_intesive = !ov::op::util::has_op_with_type<ov::op::v0::FakeQuantize>(model);
             const int int8_threshold = 4;  // ~relative efficiency of the VNNI-intensive code for Big vs Little cores;
             const int fp32_threshold = 2;  // ~relative efficiency of the AVX2 fp32 code for Big vs Little cores;
             // by default the latency case uses (faster) Big cores only, depending on the compute ratio

diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp
@@ -868,10 +868,10 @@ bool Graph::ProcessDynNodes() {
     return result;
 }
 
-void Graph::PushInputData(const std::string& name, const ov::SoPtr<ITensor> &input) {
+void Graph::PushInputData(const std::string& name, const ov::SoPtr<ITensor>& input) {
     if (!IsReady()) OPENVINO_THROW("Wrong state. Topology not ready.");
-    auto _input = inputNodesMap.find(name);
-    if (_input != inputNodesMap.end()) {
+    auto input_itr = inputNodesMap.find(name);
+    if (input_itr != inputNodesMap.end()) {
         auto create_mem_desc = [&](const ov::SoPtr<ITensor>& tensor) -> CpuBlockedMemoryDesc {
             auto element_type = tensor->get_element_type();
             auto shape = tensor->get_shape();
@@ -899,13 +899,14 @@ void Graph::PushInputData(const std::string& name, const ov::SoPtr<ITensor> &inp
                                    return byte_stride / element_type.size();
                                });
             }
-            InferenceEngine::TensorDesc tensorDesc(ie::details::convertPrecision(tensor->get_element_type()),
-                                                   shape,
-                                                   ie::BlockingDesc{shape, blk_order, 0, dim_offset, blk_strides});
+            InferenceEngine::TensorDesc tensorDesc(
+                InferenceEngine::details::convertPrecision(tensor->get_element_type()),
+                shape,
+                InferenceEngine::BlockingDesc{shape, blk_order, 0, dim_offset, blk_strides});
             return MemoryDescUtils::convertToCpuBlockedMemoryDesc(tensorDesc);
         };
 
-        auto node = _input->second;
+        auto node = input_itr->second;
         auto childEdge = node->getChildEdgeAt(0);
         const auto& outDims = node->getOutputShapeAtPort(0);
 
@@ -915,7 +916,7 @@ void Graph::PushInputData(const std::string& name, const ov::SoPtr<ITensor> &inp
         // Convert data if precision mismatch
         auto& inter_mem_desc = childEdge->getMemory().getDesc();
         auto inter_precision = inter_mem_desc.getPrecision();
-        auto ext_precision = ie::details::convertPrecision(input->get_element_type());
+        auto ext_precision = InferenceEngine::details::convertPrecision(input->get_element_type());
         if (ext_precision != inter_precision) {
             if ((inter_data_ptr == nullptr) || (ext_data_ptr == nullptr)) {
                 OPENVINO_THROW("Get tensor has no allocated memory");

diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp
@@ -401,9 +401,9 @@ static InferenceEngine::TensorDesc create_tensor_desc(const ov::SoPtr<ITensor>&
                        });
     }
     OPENVINO_SUPPRESS_DEPRECATED_START
-    return ie::TensorDesc{ie::details::convertPrecision(element_type),
+    return InferenceEngine::TensorDesc{InferenceEngine::details::convertPrecision(element_type),
                           shape,
-                          ie::BlockingDesc{shape, blk_order, 0, dim_offset, blk_strides}};
+                          InferenceEngine::BlockingDesc{shape, blk_order, 0, dim_offset, blk_strides}};
     OPENVINO_SUPPRESS_DEPRECATED_END
 }
 
@@ -421,9 +421,9 @@ const ov::Output<const ov::Node>& SyncInferRequest::get_internal_port(const ov::
     auto name = get_port_name(port, m_is_legacy_api);
     bool is_input = ov::op::util::is_parameter(port.get_node());
     if (is_input) {
-        return m_input_ports_map[name];
+        return m_input_ports_map.at(name);
     } else {
-        return m_output_ports_map[name];
+        return m_output_ports_map.at(name);
     }
 }
 
@@ -448,27 +448,27 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& in_port, con
         const auto netInPrc = port.get_element_type();
         if (netInPrc != tensor->get_element_type()) {
             IE_THROW(ParameterMismatch) << "Failed to set input tensor with precision: " << tensor->get_element_type()
-                                        << ", if model input tensor precision is: " << netInPrc;
+                                        << ", since the model input tensor precision is: " << netInPrc;
         }
 
         const auto& shape = port.get_partial_shape();
         const bool isDynamic = shape.is_dynamic();
         if (!shape.compatible(ov::PartialShape(tensor->get_shape()))) {
-            OPENVINO_THROW("The tensor size is not equal to model, can't set input tensor with name: ",
+            OPENVINO_THROW("Can't set input tensor with name: ",
                            name,
-                           ", because model input (shape=",
+                           ", because the model input (shape=",
                            shape,
-                           ") and tensor (shape=",
+                           ") and the tensor (shape=",
                            vec2str(tensor->get_shape()),
                            ") are incompatible");
         }
 
         if (!isDynamic && ov::shape_size(shape.to_shape()) != tensor->get_size()) {
             OPENVINO_THROW("Can't set input tensor with name: ",
                            name,
-                           ", because model input size = ",
+                           ", because the model input size = ",
                            ov::shape_size(shape.to_shape()),
-                           " and tensor size = ",
+                           " and the tensor size = ",
                            tensor->get_size(),
                            " are different.");
         }
@@ -501,21 +501,21 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& in_port, con
         const bool isDynamic = shape.is_dynamic();
 
         if (!shape.compatible(ov::PartialShape(tensor->get_shape()))) {
-            OPENVINO_THROW("The tensor size is not equal to model, can't set output tensor with name: ",
+            OPENVINO_THROW("Can't set the output tensor with name: ",
                            name,
-                           ", because model output (shape=",
+                           ", because the model output tensor (shape=",
                            shape,
-                           ") and blob (shape=",
+                           ") and the current tensor (shape=",
                            vec2str(tensor->get_shape()),
                            ") are incompatible");
         }
 
         if (!isDynamic && ov::shape_size(shape.to_shape()) != tensor->get_size()) {
-            OPENVINO_THROW("Can't set output tensor with name: ",
+            OPENVINO_THROW("Can't set the output tensor with name: ",
                            name,
-                           ", because model output size = ",
+                           ", because the model output size = ",
                            ov::shape_size(shape.to_shape()),
-                           " and blob size = ",
+                           " and the currernt tensor size = ",
                            tensor->get_size(),
                            " are different.");
         }
@@ -556,33 +556,33 @@ void SyncInferRequest::init_tensor(const std::string& name) {
     auto input = inMap.find(name);
     if (input != inMap.end()) {
         auto input_port = m_input_ports_map.find(name);
-        if (input_port != m_input_ports_map.end()) {
-            auto& port = input_port->second;
-            tensor = ov::ISyncInferRequest::get_tensor(port);
-
-            if (!tensor) {
-                const auto& shape = port.get_partial_shape();
-                const bool isDynamic = shape.is_dynamic();
-                ov::Shape tensor_shape;
-                if (isDynamic) {
-                    tensor_shape = ov::Shape(shape.rank().get_length(), 0);
-                } else {
-                    tensor_shape = shape.to_shape();
-                }
+        OPENVINO_ASSERT(input_port != m_input_ports_map.end(),
+                        "Tensor with name: ",
+                        name,
+                        " exists in CPU plugin graph, but absents in network inputs");
+        auto& port = input_port->second;
+        tensor = ov::ISyncInferRequest::get_tensor(port);
+
+        if (!tensor) {
+            const auto& shape = port.get_partial_shape();
+            const bool isDynamic = shape.is_dynamic();
+            ov::Shape tensor_shape;
+            if (isDynamic) {
+                tensor_shape = ov::Shape(shape.rank().get_length(), 0);
+            } else {
+                tensor_shape = shape.to_shape();
+            }
 
-                tensor = ov::make_tensor(port.get_element_type(), tensor_shape);
-                ov::ISyncInferRequest::set_tensor(port, tensor);
+            tensor = ov::make_tensor(port.get_element_type(), tensor_shape);
+            ov::ISyncInferRequest::set_tensor(port, tensor);
 
-                auto desc = create_tensor_desc(tensor);
-                if (!isDynamic &&
-                    desc == MemoryDescUtils::convertToTensorDesc(
-                                graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory().getDesc()) &&
-                    graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end()) {
-                    external_ptr[name] = tensor;
-                }
+            auto desc = create_tensor_desc(tensor);
+            if (!isDynamic &&
+                desc == MemoryDescUtils::convertToTensorDesc(
+                            graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory().getDesc()) &&
+                graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end()) {
+                external_ptr[name] = tensor;
             }
-        } else {
-            OPENVINO_THROW("Tensor with name: ", name, " exists in CPU plugin graph, but absents in network inputs");
         }
     }
 
@@ -629,11 +629,6 @@ void SyncInferRequest::init_tensor(const std::string& name) {
                         outputControlBlocks.emplace(std::make_pair(name, std::move(control_block)));
                 } else {
                     tensor_shape = shape.to_shape();
-
-                    InferenceEngine::TensorDesc desc(
-                        InferenceEngine::details::convertPrecision(port.get_element_type()),
-                        tensor_shape,
-                        InferenceEngine::TensorDesc::getLayoutByRank(tensor_shape.size()));
                     tensor = ov::make_tensor(port.get_element_type(), tensor_shape);
                 }
                 ov::ISyncInferRequest::set_tensor(port, tensor);
@@ -659,7 +654,7 @@ void SyncInferRequest::init_tensor(const std::string& name) {
                 if (netOutPrc != tensor->get_element_type()) {
                     IE_THROW(ParameterMismatch)
                         << "Network input and output use the same name: " << name
-                        << " but expect blobs with different precision: " << tensor->get_element_type()
+                        << " but expect tensor with different precision: " << tensor->get_element_type()
                         << " for input and " << netOutPrc << " for output.";
                 }
             }

diff --git a/src/plugins/intel_cpu/src/infer_request.h b/src/plugins/intel_cpu/src/infer_request.h
@@ -46,7 +46,7 @@ class SyncInferRequest : public ov::ISyncInferRequest {
 
     void throw_if_canceled() const;
 
-protected:
+private:
     class OutputControlBlock {
     public:
         using MemMngrPtr = std::shared_ptr<MemoryMngrWithReuse>;
@@ -92,7 +92,6 @@ class SyncInferRequest : public ov::ISyncInferRequest {
     };
     std::unordered_map<std::string, OutputControlBlock> outputControlBlocks;
 
-private:
     void create_infer_request();
 
     void pushInput(const std::string& inputName, ov::SoPtr<ov::ITensor>& inputBlob, InferenceEngine::Precision dataType);
@@ -115,8 +114,8 @@ class SyncInferRequest : public ov::ISyncInferRequest {
     std::vector<ov::SoPtr<ov::IVariableState>> m_memory_states;
     AsyncInferRequest* m_asyncRequest = nullptr;
 
-    mutable std::unordered_map<std::string, ov::Output<const ov::Node>> m_input_ports_map;
-    mutable std::unordered_map<std::string, ov::Output<const ov::Node>> m_output_ports_map;
+    std::unordered_map<std::string, ov::Output<const ov::Node>> m_input_ports_map;
+    std::unordered_map<std::string, ov::Output<const ov::Node>> m_output_ports_map;
     std::unordered_map<std::string, ov::SoPtr<ov::ITensor>> m_outputs;
 
     void change_default_ptr();