Skip to content

Commit

Permalink
Update for reviewers' comments
Browse files Browse the repository at this point in the history
  • Loading branch information
riverlijunjie committed Oct 6, 2023
1 parent c484b58 commit 91cfdae
Show file tree
Hide file tree
Showing 13 changed files with 106 additions and 128 deletions.
35 changes: 12 additions & 23 deletions src/plugins/intel_cpu/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include "nodes/memory.hpp"
#include "openvino/core/type/element_type.hpp"
#include "openvino/runtime/intel_cpu/properties.hpp"
#include "precision_utils.h"
#include "serialize.h"
#include "threading/ie_executor_manager.hpp"
#include "transformations/transformation_pipeline.h"
Expand All @@ -21,10 +20,6 @@
# include <threading/ie_tbb_streams_executor.hpp>
#endif

#include "ie_ngraph_utils.hpp"
#include "ie_system_conf.h"
#include "openvino/core/preprocess/pre_post_process.hpp"
#include "openvino/opsets/opset1.hpp"
#include "openvino/runtime/properties.hpp"
#include "openvino/util/common_util.hpp"
#include "threading/ie_cpu_streams_executor.hpp"
Expand Down Expand Up @@ -209,23 +204,6 @@ std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {
return get_graph()._graph.dump();
}

ov::Any CompiledModel::get_property(const std::string& name) const {
if (m_graphs.empty())
OPENVINO_THROW("No graph was found");

if (name == ov::loaded_from_cache) {
return m_loaded_from_cache;
}

Config engConfig = get_graph()._graph.getConfig();
auto option = engConfig._config.find(name);
if (option != engConfig._config.end()) {
return option->second;
}

return get_metric(name);
}

ov::Any CompiledModel::get_metric_legacy(const std::string& name, const GraphGuard& graph) const {
OPENVINO_SUPPRESS_DEPRECATED_START
if (name == METRIC_KEY(NETWORK_NAME)) {
Expand Down Expand Up @@ -255,9 +233,20 @@ ov::Any CompiledModel::get_metric_legacy(const std::string& name, const GraphGua
OPENVINO_SUPPRESS_DEPRECATED_END
}

ov::Any CompiledModel::get_metric(const std::string& name) const {
ov::Any CompiledModel::get_property(const std::string& name) const {
if (m_graphs.empty())
OPENVINO_THROW("No graph was found");

if (name == ov::loaded_from_cache) {
return m_loaded_from_cache;
}

Config engConfig = get_graph()._graph.getConfig();
auto option = engConfig._config.find(name);
if (option != engConfig._config.end()) {
return option->second;
}

// @todo Can't we just use local copy (_cfg) instead?
auto graphLock = get_graph();
const auto& graph = graphLock._graph;
Expand Down
1 change: 0 additions & 1 deletion src/plugins/intel_cpu/src/compiled_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ class CompiledModel : public ov::ICompiledModel {
*/
GraphGuard::Lock get_graph() const;

ov::Any get_metric(const std::string& name) const;
ov::Any get_metric_legacy(const std::string& name, const GraphGuard& graph) const;
};

Expand Down
59 changes: 29 additions & 30 deletions src/plugins/intel_cpu/src/cpu_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ using namespace dnnl;

namespace ov {
namespace intel_cpu {
template <>
DnnlMemoryDescPtr IMemory::getDescWithType<DnnlMemoryDesc, 0, 0>() const {
return MemoryDescUtils::convertToDnnlMemoryDesc(getDescPtr());
}

template <>
BlockedMemoryDescPtr IMemory::getDescWithType<BlockedMemoryDesc, 0, 0>() const {
return MemoryDescUtils::convertToBlockedMemoryDesc(getDescPtr());
}

namespace {
inline void setSubnormalsToZero(float *data, size_t size) {
uint32_t *u32data = reinterpret_cast<uint32_t *>(data);
Expand All @@ -35,29 +45,28 @@ namespace {

void transferData(const IMemory& src, const IMemory& dst, bool ftz) {
node::Reorder::reorderData(src, dst);
try {
auto src_data_type = DnnlExtensionUtils::IEPrecisionToDataType(src.getDesc().getPrecision());
auto dst_data_type = DnnlExtensionUtils::IEPrecisionToDataType(dst.getDesc().getPrecision());
if (src_data_type != memory::data_type::f32 || dst_data_type == memory::data_type::bf16) {
return;
}
} catch (ov::Exception&) {

if (!ftz) {
return;
}

auto localPrim = dst.getPrimitive();
auto desc = localPrim.get_desc();
dnnl::impl::memory_desc_wrapper wrapper(desc.get());

if (ftz
&& !wrapper.is_wino_desc()
// WA: to avoid zero filling auxiliary information
&& !wrapper.is_rnn_packed_desc()) {
// Internal blobs don't have strides yet.
auto *memData = static_cast<float *>(dst.getData());
memData += wrapper.offset0();
setSubnormalsToZero(memData, dst.getSize() / sizeof(float));
if (src.getDesc().getPrecision() != Precision::FP32 || dst.getDesc().getPrecision() == Precision::BF16) {
return;
}
size_t offset = 0;
if (dst.getDesc().getType() & MemoryDescType::Dnnl) {
// here we can safely cast to DnnlMemoryDesc
auto dnnl_desc = dst.getDescWithType<DnnlMemoryDesc>();
auto desc = dnnl_desc->getDnnlDesc();
dnnl::impl::memory_desc_wrapper wrapper(desc.get());
offset = wrapper.offset0();
if (wrapper.is_wino_desc() || wrapper.is_rnn_packed_desc()) {
return;
}
}
// actual FTZ
auto* memData = static_cast<float*>(dst.getData());
memData += offset;
setSubnormalsToZero(memData, dst.getSize() / sizeof(float));
}

} // namespace
Expand Down Expand Up @@ -129,16 +138,6 @@ void Memory::redefineDesc(MemoryDescPtr desc) {
this->create(desc, nullptr, false);
}

template<>
DnnlMemoryDescPtr IMemory::getDescWithType<DnnlMemoryDesc, 0, 0>() const {
return MemoryDescUtils::convertToDnnlMemoryDesc(getDescPtr());
}

template<>
BlockedMemoryDescPtr IMemory::getDescWithType<BlockedMemoryDesc, 0, 0>() const {
return MemoryDescUtils::convertToBlockedMemoryDesc(getDescPtr());
}

void Memory::update() {
if (dnnlMemHandle.isInit()) {
auto prim = dnnlMemHandle.getPrim();
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ int get_model_prefer_threads(const int num_streams,
model_prefer = proc_type_table[0][ALL_PROC];
}
#else
bool fp_intesive = !ov::op::util::has_op_with_type<ngraph::op::FakeQuantize>(model);
bool fp_intesive = !ov::op::util::has_op_with_type<ov::op::v0::FakeQuantize>(model);
const int int8_threshold = 4; // ~relative efficiency of the VNNI-intensive code for Big vs Little cores;
const int fp32_threshold = 2; // ~relative efficiency of the AVX2 fp32 code for Big vs Little cores;
// by default the latency case uses (faster) Big cores only, depending on the compute ratio
Expand Down
17 changes: 9 additions & 8 deletions src/plugins/intel_cpu/src/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -868,10 +868,10 @@ bool Graph::ProcessDynNodes() {
return result;
}

void Graph::PushInputData(const std::string& name, const ov::SoPtr<ITensor> &input) {
void Graph::PushInputData(const std::string& name, const ov::SoPtr<ITensor>& input) {
if (!IsReady()) OPENVINO_THROW("Wrong state. Topology not ready.");
auto _input = inputNodesMap.find(name);
if (_input != inputNodesMap.end()) {
auto input_itr = inputNodesMap.find(name);
if (input_itr != inputNodesMap.end()) {
auto create_mem_desc = [&](const ov::SoPtr<ITensor>& tensor) -> CpuBlockedMemoryDesc {
auto element_type = tensor->get_element_type();
auto shape = tensor->get_shape();
Expand Down Expand Up @@ -899,13 +899,14 @@ void Graph::PushInputData(const std::string& name, const ov::SoPtr<ITensor> &inp
return byte_stride / element_type.size();
});
}
InferenceEngine::TensorDesc tensorDesc(ie::details::convertPrecision(tensor->get_element_type()),
shape,
ie::BlockingDesc{shape, blk_order, 0, dim_offset, blk_strides});
InferenceEngine::TensorDesc tensorDesc(
InferenceEngine::details::convertPrecision(tensor->get_element_type()),
shape,
InferenceEngine::BlockingDesc{shape, blk_order, 0, dim_offset, blk_strides});
return MemoryDescUtils::convertToCpuBlockedMemoryDesc(tensorDesc);
};

auto node = _input->second;
auto node = input_itr->second;
auto childEdge = node->getChildEdgeAt(0);
const auto& outDims = node->getOutputShapeAtPort(0);

Expand All @@ -915,7 +916,7 @@ void Graph::PushInputData(const std::string& name, const ov::SoPtr<ITensor> &inp
// Convert data if precision mismatch
auto& inter_mem_desc = childEdge->getMemory().getDesc();
auto inter_precision = inter_mem_desc.getPrecision();
auto ext_precision = ie::details::convertPrecision(input->get_element_type());
auto ext_precision = InferenceEngine::details::convertPrecision(input->get_element_type());
if (ext_precision != inter_precision) {
if ((inter_data_ptr == nullptr) || (ext_data_ptr == nullptr)) {
OPENVINO_THROW("Get tensor has no allocated memory");
Expand Down
87 changes: 41 additions & 46 deletions src/plugins/intel_cpu/src/infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,9 +401,9 @@ static InferenceEngine::TensorDesc create_tensor_desc(const ov::SoPtr<ITensor>&
});
}
OPENVINO_SUPPRESS_DEPRECATED_START
return ie::TensorDesc{ie::details::convertPrecision(element_type),
return InferenceEngine::TensorDesc{InferenceEngine::details::convertPrecision(element_type),
shape,
ie::BlockingDesc{shape, blk_order, 0, dim_offset, blk_strides}};
InferenceEngine::BlockingDesc{shape, blk_order, 0, dim_offset, blk_strides}};
OPENVINO_SUPPRESS_DEPRECATED_END
}

Expand All @@ -421,9 +421,9 @@ const ov::Output<const ov::Node>& SyncInferRequest::get_internal_port(const ov::
auto name = get_port_name(port, m_is_legacy_api);
bool is_input = ov::op::util::is_parameter(port.get_node());
if (is_input) {
return m_input_ports_map[name];
return m_input_ports_map.at(name);
} else {
return m_output_ports_map[name];
return m_output_ports_map.at(name);
}
}

Expand All @@ -448,27 +448,27 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& in_port, con
const auto netInPrc = port.get_element_type();
if (netInPrc != tensor->get_element_type()) {
IE_THROW(ParameterMismatch) << "Failed to set input tensor with precision: " << tensor->get_element_type()
<< ", if model input tensor precision is: " << netInPrc;
<< ", since the model input tensor precision is: " << netInPrc;
}

const auto& shape = port.get_partial_shape();
const bool isDynamic = shape.is_dynamic();
if (!shape.compatible(ov::PartialShape(tensor->get_shape()))) {
OPENVINO_THROW("The tensor size is not equal to model, can't set input tensor with name: ",
OPENVINO_THROW("Can't set input tensor with name: ",
name,
", because model input (shape=",
", because the model input (shape=",
shape,
") and tensor (shape=",
") and the tensor (shape=",
vec2str(tensor->get_shape()),
") are incompatible");
}

if (!isDynamic && ov::shape_size(shape.to_shape()) != tensor->get_size()) {
OPENVINO_THROW("Can't set input tensor with name: ",
name,
", because model input size = ",
", because the model input size = ",
ov::shape_size(shape.to_shape()),
" and tensor size = ",
" and the tensor size = ",
tensor->get_size(),
" are different.");
}
Expand Down Expand Up @@ -501,21 +501,21 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& in_port, con
const bool isDynamic = shape.is_dynamic();

if (!shape.compatible(ov::PartialShape(tensor->get_shape()))) {
OPENVINO_THROW("The tensor size is not equal to model, can't set output tensor with name: ",
OPENVINO_THROW("Can't set the output tensor with name: ",
name,
", because model output (shape=",
", because the model output tensor (shape=",
shape,
") and blob (shape=",
") and the current tensor (shape=",
vec2str(tensor->get_shape()),
") are incompatible");
}

if (!isDynamic && ov::shape_size(shape.to_shape()) != tensor->get_size()) {
OPENVINO_THROW("Can't set output tensor with name: ",
OPENVINO_THROW("Can't set the output tensor with name: ",
name,
", because model output size = ",
", because the model output size = ",
ov::shape_size(shape.to_shape()),
" and blob size = ",
" and the currernt tensor size = ",
tensor->get_size(),
" are different.");
}
Expand Down Expand Up @@ -556,33 +556,33 @@ void SyncInferRequest::init_tensor(const std::string& name) {
auto input = inMap.find(name);
if (input != inMap.end()) {
auto input_port = m_input_ports_map.find(name);
if (input_port != m_input_ports_map.end()) {
auto& port = input_port->second;
tensor = ov::ISyncInferRequest::get_tensor(port);

if (!tensor) {
const auto& shape = port.get_partial_shape();
const bool isDynamic = shape.is_dynamic();
ov::Shape tensor_shape;
if (isDynamic) {
tensor_shape = ov::Shape(shape.rank().get_length(), 0);
} else {
tensor_shape = shape.to_shape();
}
OPENVINO_ASSERT(input_port != m_input_ports_map.end(),
"Tensor with name: ",
name,
" exists in CPU plugin graph, but absents in network inputs");
auto& port = input_port->second;
tensor = ov::ISyncInferRequest::get_tensor(port);

if (!tensor) {
const auto& shape = port.get_partial_shape();
const bool isDynamic = shape.is_dynamic();
ov::Shape tensor_shape;
if (isDynamic) {
tensor_shape = ov::Shape(shape.rank().get_length(), 0);
} else {
tensor_shape = shape.to_shape();
}

tensor = ov::make_tensor(port.get_element_type(), tensor_shape);
ov::ISyncInferRequest::set_tensor(port, tensor);
tensor = ov::make_tensor(port.get_element_type(), tensor_shape);
ov::ISyncInferRequest::set_tensor(port, tensor);

auto desc = create_tensor_desc(tensor);
if (!isDynamic &&
desc == MemoryDescUtils::convertToTensorDesc(
graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory().getDesc()) &&
graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end()) {
external_ptr[name] = tensor;
}
auto desc = create_tensor_desc(tensor);
if (!isDynamic &&
desc == MemoryDescUtils::convertToTensorDesc(
graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory().getDesc()) &&
graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end()) {
external_ptr[name] = tensor;
}
} else {
OPENVINO_THROW("Tensor with name: ", name, " exists in CPU plugin graph, but absents in network inputs");
}
}

Expand Down Expand Up @@ -629,11 +629,6 @@ void SyncInferRequest::init_tensor(const std::string& name) {
outputControlBlocks.emplace(std::make_pair(name, std::move(control_block)));
} else {
tensor_shape = shape.to_shape();

InferenceEngine::TensorDesc desc(
InferenceEngine::details::convertPrecision(port.get_element_type()),
tensor_shape,
InferenceEngine::TensorDesc::getLayoutByRank(tensor_shape.size()));
tensor = ov::make_tensor(port.get_element_type(), tensor_shape);
}
ov::ISyncInferRequest::set_tensor(port, tensor);
Expand All @@ -659,7 +654,7 @@ void SyncInferRequest::init_tensor(const std::string& name) {
if (netOutPrc != tensor->get_element_type()) {
IE_THROW(ParameterMismatch)
<< "Network input and output use the same name: " << name
<< " but expect blobs with different precision: " << tensor->get_element_type()
<< " but expect tensor with different precision: " << tensor->get_element_type()
<< " for input and " << netOutPrc << " for output.";
}
}
Expand Down
7 changes: 3 additions & 4 deletions src/plugins/intel_cpu/src/infer_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class SyncInferRequest : public ov::ISyncInferRequest {

void throw_if_canceled() const;

protected:
private:
class OutputControlBlock {
public:
using MemMngrPtr = std::shared_ptr<MemoryMngrWithReuse>;
Expand Down Expand Up @@ -92,7 +92,6 @@ class SyncInferRequest : public ov::ISyncInferRequest {
};
std::unordered_map<std::string, OutputControlBlock> outputControlBlocks;

private:
void create_infer_request();

void pushInput(const std::string& inputName, ov::SoPtr<ov::ITensor>& inputBlob, InferenceEngine::Precision dataType);
Expand All @@ -115,8 +114,8 @@ class SyncInferRequest : public ov::ISyncInferRequest {
std::vector<ov::SoPtr<ov::IVariableState>> m_memory_states;
AsyncInferRequest* m_asyncRequest = nullptr;

mutable std::unordered_map<std::string, ov::Output<const ov::Node>> m_input_ports_map;
mutable std::unordered_map<std::string, ov::Output<const ov::Node>> m_output_ports_map;
std::unordered_map<std::string, ov::Output<const ov::Node>> m_input_ports_map;
std::unordered_map<std::string, ov::Output<const ov::Node>> m_output_ports_map;
std::unordered_map<std::string, ov::SoPtr<ov::ITensor>> m_outputs;

void change_default_ptr();
Expand Down
Loading

0 comments on commit 91cfdae

Please sign in to comment.