diff --git a/src/plugins/intel_gna/backend/am_intel_dnn.cpp b/src/plugins/intel_gna/backend/am_intel_dnn.cpp index b75cc3f265f3b2..7ad4b406084474 100644 --- a/src/plugins/intel_gna/backend/am_intel_dnn.cpp +++ b/src/plugins/intel_gna/backend/am_intel_dnn.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #if defined __INTEL_COMPILER || defined _MSC_VER #include @@ -27,6 +28,7 @@ #include "gna_types.h" #include "gna_limitations.hpp" #include "layers/gna_convolution_layer.hpp" +#include "memory/gna_memory.hpp" #include #include "gna2_model_helper.hpp" @@ -50,16 +52,16 @@ using GNAPluginNS::GNAConvolutionLayer::outputFromConv; using GNAPluginNS::GNAConvolutionLayer::outputFromPooling; using GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy; +using GNAPluginNS::memory::GNAMemoryInterface; + void GNAPluginNS::backend::AMIntelDNN::BeginNewWrite(uint32_t index) { dump_write_index = index; } -void GNAPluginNS::backend::AMIntelDNN::Init(void *ptr_memory, - uint32_t num_memory_bytes, +void GNAPluginNS::backend::AMIntelDNN::Init(GNAMemoryInterface* memoryInterface, intel_dnn_number_type_t compute_precision, float scale_factor) { - ptr_dnn_memory_ = ptr_memory; - num_bytes_dnn_memory_ = num_memory_bytes; + memory = memoryInterface; compute_precision_ = compute_precision; input_scale_factor_ = scale_factor; @@ -740,6 +742,19 @@ void PrintTensors(std::ofstream& out, T tensors) { } } +void GNAPluginNS::backend::AMIntelDNN::PrintOffset(std::ofstream& out, const std::string& type, void* ptr) { + const auto queue = memory->getQueue(ptr); + std::string typeOfRegion = "UNKNOWN_QUEUE"; + auto offset = std::numeric_limits::max(); + if (queue != nullptr) { + typeOfRegion = GNAPluginNS::memory::rRegionToStr(queue->regionType()); + offset = queue->getOffset(ptr).second; + } + out << " " << typeOfRegion << "\n"; + out << "<" << type << "_address> " + << "0x" << std::setfill('0') << std::setw(8) << std::hex << offset << "\n"; +} + void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_dnn_number_type_t logging_precision) { if ((compute_precision_ == kDnnFloat) && (logging_precision == kDnnInt)) { fprintf(stderr, "Error trying to write floating point DNN as integer in GNAPluginNS::backend::AMIntelDNN::WriteDnnText().\n"); @@ -762,7 +777,11 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ out_file << "\n"; out_file << " " << intel_dnn_number_type_name[logging_precision] << "\n"; out_file << " " << intel_dnn_softmax_name[softmax_type] << "\n"; - out_file << " " << std::dec << num_bytes_dnn_memory_ << "\n"; + const auto& regionsMap = GNAPluginNS::memory::GetAllRegionsToStrMap(); + for (const auto& regionPair : regionsMap) { + out_file << " " << std::dec << regionPair.second << "\n"; + out_file << " " << std::dec << memory->getRegionBytes(regionPair.first) << "\n"; + } out_file << " " << std::dec << num_group << "\n"; out_file << " " << std::dec << num_inputs << "\n"; out_file << " " << std::dec << num_outputs << "\n"; @@ -815,10 +834,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ out_file << " " << std::dec << num_bytes_per_input << "\n"; out_file << " " << std::dec << num_bytes_per_output << "\n"; } - out_file << " " << "0x" << std::setfill('0') << std::setw(8) << std::hex - << GNAPluginNS::memory::MemoryOffset(component[i].ptr_inputs, ptr_dnn_memory_) << "\n"; - out_file << " " << "0x" << std::setfill('0') << std::setw(8) << std::hex - << GNAPluginNS::memory::MemoryOffset(component[i].ptr_outputs, ptr_dnn_memory_) << "\n"; + PrintOffset(out_file, "input", component[i].ptr_inputs); + PrintOffset(out_file, "output", component[i].ptr_outputs); switch (component[i].operation) { case kDnnAffineOp: case kDnnDiagonalOp: { @@ -846,10 +863,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ out_file << std::setprecision(12) << std::scientific << " " << output_scale_factor << "\n"; } - out_file << " " << "0x" << std::setfill('0') << std::setw(8) << std::hex - << GNAPluginNS::memory::MemoryOffset(component[i].op.affine.ptr_weights, ptr_dnn_memory_) << "\n"; - out_file << " " << "0x" << std::setfill('0') << std::setw(8) << std::hex - << GNAPluginNS::memory::MemoryOffset(component[i].op.affine.ptr_biases, ptr_dnn_memory_) << "\n"; + PrintOffset(out_file, "weight", component[i].op.affine.ptr_weights); + PrintOffset(out_file, "bias", component[i].op.affine.ptr_biases); #ifdef LIGHT_DUMP std::ofstream out_wfile((out_file_name.str() + "_weights.txt").c_str(), std::ios::out); std::ofstream out_bfile((out_file_name.str() + "_biases.txt").c_str(), std::ios::out); @@ -996,10 +1011,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ out_file << std::setprecision(12) << std::scientific << " " << output_scale_factor << "\n"; } - out_file << " " << "0x" << std::setfill('0') << std::setw(8) << std::hex - << GNAPluginNS::memory::MemoryOffset(component[i].op.conv1D.ptr_filters, ptr_dnn_memory_) << "\n"; - out_file << " " << "0x" << std::setfill('0') << std::setw(8) << std::hex - << GNAPluginNS::memory::MemoryOffset(component[i].op.conv1D.ptr_biases, ptr_dnn_memory_) << "\n"; + PrintOffset(out_file, "filter", component[i].op.conv1D.ptr_filters); + PrintOffset(out_file, "bias", component[i].op.conv1D.ptr_biases); #ifdef LIGHT_DUMP std::ofstream out_wfile((out_file_name.str() + "_weights.txt").c_str(), std::ios::out); @@ -1145,12 +1158,9 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ out_file << std::setprecision(12) << std::scientific << " " << output_scale_factor << "\n"; } - out_file << " " << "0x" << std::setfill('0') << std::setw(8) << std::hex - << GNAPluginNS::memory::MemoryOffset(component[i].op.recurrent.ptr_weights, ptr_dnn_memory_) << "\n"; - out_file << " " << "0x" << std::setfill('0') << std::setw(8) << std::hex - << GNAPluginNS::memory::MemoryOffset(component[i].op.recurrent.ptr_biases, ptr_dnn_memory_) << "\n"; - out_file << " " << "0x" << std::setfill('0') << std::setw(8) << std::hex - << GNAPluginNS::memory::MemoryOffset(component[i].op.recurrent.ptr_feedbacks, ptr_dnn_memory_) << "\n"; + PrintOffset(out_file, "weight", component[i].op.recurrent.ptr_weights); + PrintOffset(out_file, "bias", component[i].op.recurrent.ptr_biases); + PrintOffset(out_file, "feedback", component[i].op.recurrent.ptr_feedbacks); if (num_bytes_per_weight == 1) { #ifdef DUMP_WB int8_t *ptr_weight = reinterpret_cast(component[i].op.recurrent.ptr_weights); @@ -1308,14 +1318,12 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ if (logging_precision == kDnnFloat) { out_file << std::setprecision(12) << std::scientific << " " << 1.0 << "\n"; out_file << " " << std::dec << 0 << "\n"; - out_file << " " << "0x" << std::setfill('0') << std::setw(8) << std::hex - << GNAPluginNS::memory::MemoryOffset(component[i].op.pwl.ptr_segments, ptr_dnn_memory_) << "\n"; + PrintOffset(out_file, "segment", component[i].op.pwl.ptr_segments); } else { out_file << std::setprecision(12) << std::scientific << " " << output_scale_factor << "\n"; out_file << " " << std::dec << num_segments << "\n"; - out_file << " " << "0x" << std::setfill('0') << std::setw(8) << std::hex - << GNAPluginNS::memory::MemoryOffset(component[i].op.pwl.ptr_segments, ptr_dnn_memory_) << "\n"; + PrintOffset(out_file, "segment", component[i].op.pwl.ptr_segments); if (compute_precision_ == kDnnInt) { out_file << " "; for (uint32_t segment = 0; segment < num_segments; segment++) { @@ -1364,8 +1372,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_ } } if (ptr_active_outputs() != nullptr) { - out_file << " " << "0x" << std::setfill('0') << std::setw(8) << std::hex - << GNAPluginNS::memory::MemoryOffset(ptr_active_outputs(), ptr_dnn_memory_) << "\n"; + PrintOffset(out_file, "activelist", ptr_active_outputs()); } out_file << "\n"; out_file.close(); @@ -1410,7 +1417,8 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel, const memset(gnaModel->Operations, 0, gnaModel->NumberOfOperations * sizeof(Gna2Operation)); gnaOperation = gnaModel->Operations; for (int i = 0; i < component.size(); i++) { - // std::cout << "Component + " << i <<"=GNA_" << std::distance(ptr_nnet->pLayers, pLayer) << "\n"; + gnalog() << "Component + " << i << "=GNA_" << std::distance(gnaModel->Operations, gnaOperation) << "\n"; + auto& comp = component[i]; switch (comp.operation) { case kDnnAffineOp: diff --git a/src/plugins/intel_gna/backend/am_intel_dnn.hpp b/src/plugins/intel_gna/backend/am_intel_dnn.hpp index c58f152bbf96f4..338f3601331391 100644 --- a/src/plugins/intel_gna/backend/am_intel_dnn.hpp +++ b/src/plugins/intel_gna/backend/am_intel_dnn.hpp @@ -13,9 +13,10 @@ #include "gna/gna_config.hpp" #include "gna_plugin_log.hpp" - +#include "memory/gna_memory.hpp" #include -#include + +using GNAPluginNS::memory::GNAMemoryInterface; namespace GNAPluginNS { namespace backend { @@ -38,15 +39,12 @@ class AMIntelDNN { ptr_sumgroup_sizes(NULL), num_sumgroup_sizes(0), ptr_priors(NULL), - ptr_dnn_memory_(NULL), - num_bytes_dnn_memory_(0), compute_precision_(kDnnNumNumberType) { } ~AMIntelDNN(); - void Init(void *ptr_memory, - uint32_t num_memory_bytes, + void Init(GNAMemoryInterface * memoryInterface, intel_dnn_number_type_t compute_precision, float scale_factor); @@ -294,6 +292,8 @@ class AMIntelDNN { void WriteGraphWizModel(const char *filename); + void PrintOffset(std::ofstream& out, const std::string& type, void* ptr); + void WriteDnnText(const char *filename, intel_dnn_number_type_t logging_precision); void InitGNAStruct(Gna2Model *gnaModel, const std::string& gnaCompileTarget = InferenceEngine::GNAConfigParams::GNA_TARGET_2_0); @@ -338,8 +338,7 @@ class AMIntelDNN { void BeginNewWrite(uint32_t index); private: - void *ptr_dnn_memory_; - uint32_t num_bytes_dnn_memory_; + GNAMemoryInterface* memory = nullptr; uint32_t *ptr_active_outputs_; uint32_t num_active_outputs_; intel_dnn_number_type_t compute_precision_; diff --git a/src/plugins/intel_gna/backend/gna_limitations.cpp b/src/plugins/intel_gna/backend/gna_limitations.cpp index b50b796486a20a..56f9349a863ce5 100644 --- a/src/plugins/intel_gna/backend/gna_limitations.cpp +++ b/src/plugins/intel_gna/backend/gna_limitations.cpp @@ -4,6 +4,8 @@ #include "gna_limitations.hpp" +#include "gna/gna_config.hpp" + #include #include #include @@ -11,6 +13,7 @@ #include #include #include "gna_graph_tools.hpp" +#include "gna_lib_ver_selector.hpp" namespace GNAPluginNS { namespace GNALimitations { @@ -115,10 +118,11 @@ std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h, c return GetByPrecision(precision).GetErrorOrEmpty(h, w, channels, what); } -bool Validator::ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth, +bool Validator_30::ValidateCnn2D(const std::string &name, const uint32_t inHeight, const uint32_t inWidth, const uint32_t inChannels, const uint32_t kernelH, const uint32_t kernelW, const uint32_t kernelN, const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW, OvGnaType inPrecision, bool exception) const { + const std::string prefix = "Layer Convolution2D: " + name + ":"; auto error = inputHWLimit.GetErrorOrEmpty(inHeight, inWidth); @@ -141,7 +145,8 @@ bool Validator::ValidateCnn2D(std::string name, const uint32_t inHeight, const u return error.empty() ? true : false; } -bool Validator::ValidatePooling2D(std::string name, + +bool Validator_30::ValidatePooling2D(const std::string& name, const uint32_t windowH, const uint32_t windowW, const uint32_t strideH, const uint32_t strideW, bool exception) const { @@ -160,7 +165,14 @@ bool Validator::ValidatePooling2D(std::string name, return error.empty() ? true : false; } -void Validator::ThrowIfNotEmpty(const std::string prefix, const std::string error) { +std::unique_ptr AbstractValidator::Create(const std::string& target) { + if (target == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) { + return tools::make_unique(); + } + return nullptr; +} + +void AbstractValidator::ThrowIfNotEmpty(const std::string& prefix, const std::string& error) { if (!error.empty()) { THROW_GNA_EXCEPTION << prefix << error; } diff --git a/src/plugins/intel_gna/backend/gna_limitations.hpp b/src/plugins/intel_gna/backend/gna_limitations.hpp index be3f04cdd9f75c..734023edbe23da 100644 --- a/src/plugins/intel_gna/backend/gna_limitations.hpp +++ b/src/plugins/intel_gna/backend/gna_limitations.hpp @@ -101,7 +101,23 @@ struct RectLimitByChannelsAndPrecision { const OvGnaType precision, const uint32_t channels, std::string what) const; }; -class Validator { +class AbstractValidator { +protected: + static void ThrowIfNotEmpty(const std::string& prefix, const std::string& error); +public: + virtual bool ValidateCnn2D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth, + const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN, + const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW, + OvGnaType inPrecision, bool exception = true) const = 0; + + virtual bool ValidatePooling2D(const std::string& name, + const uint32_t windowH, const uint32_t windowW, + const uint32_t strideH, const uint32_t strideW, + bool exception = true) const = 0; + static std::unique_ptr Create(const std::string&); +}; + +class Validator_30 : public AbstractValidator { RangeLimit2D inputHWLimit{ { 16, 384, "input height"} , { 16, 240, "input width"} }; RangeMultipleLimit inputChannelsNumberLimit{ {8, 384, "number of input channels"}, 8 }; @@ -123,20 +139,18 @@ class Validator { { convDilationWidth, convDilationWidth, "dilation width" } }; const VectorOrSquareLimit poolingWindowLimit{ 3, 1, 1 }; - static void ThrowIfNotEmpty(const std::string prefix, const std::string error); - public: - Validator() = default; + Validator_30() = default; - bool ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth, - const uint32_t inChannels, const uint32_t kernelH, const uint32_t kernelW, const uint32_t kernelN, + bool ValidateCnn2D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth, + const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN, const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW, - OvGnaType inPrecision, bool exception = true) const; + OvGnaType inPrecision, bool exception = true) const override; - bool ValidatePooling2D(std::string name, + bool ValidatePooling2D(const std::string& name, const uint32_t windowH, const uint32_t windowW, const uint32_t strideH, const uint32_t strideW, - bool exception = true) const; + bool exception = true) const override; }; } // namespace Cnn2D diff --git a/src/plugins/intel_gna/gna2_model_debug_log.cpp b/src/plugins/intel_gna/gna2_model_debug_log.cpp index d42d607808433a..f8e358314ace3a 100644 --- a/src/plugins/intel_gna/gna2_model_debug_log.cpp +++ b/src/plugins/intel_gna/gna2_model_debug_log.cpp @@ -9,6 +9,7 @@ #include "gna2_model_debug_log.hpp" #include "gna2-model-api.h" +#include "gna_device.hpp" #include #include @@ -52,6 +53,55 @@ std::string GetSimpleString(Gna2Shape shape) { return out.str(); } +template +uint32_t FindInMapOrReturnOne(MapType map, typename MapType::key_type key) { + auto value = map.find(key); + if (value != map.end()) { + return value->second; + } + return 1; +} + +uint32_t GetTypeByteSize(Gna2DataType type) { + static const std::map operandTypeMap = { + {Gna2DataTypeNone, 1}, + {Gna2DataTypeBoolean, 1}, + {Gna2DataTypeInt4, 1}, + {Gna2DataTypeInt8, 1}, + {Gna2DataTypeInt16, 2}, + {Gna2DataTypeInt32, 4}, + {Gna2DataTypeUint4, 1}, + {Gna2DataTypeUint8, 1}, + {Gna2DataTypeUint16, 2}, + {Gna2DataTypeUint32, 4}, + {Gna2DataTypeUint64, 8}, + {Gna2DataTypeCompoundBias, 8}, + {Gna2DataTypePwlSegment, 8}, + {Gna2DataTypeWeightScaleFactor, 8}}; + return FindInMapOrReturnOne(operandTypeMap, type); +} + +uint32_t GetGnaShapeSize(const Gna2Shape& shape, const uint32_t bytesPerElement) { + if (shape.NumberOfDimensions == 0) { + return 0; + } + // to compute aligned filters (each filter begin is aligned to 16B) + // e.g., for 3x3 2B filter, its size is 18B, but the next filter will start at 32B offset + // filters are NHWC + uint32_t nAlignement = 1; + if (shape.NumberOfDimensions == 4 && shape.Dimensions[0] != 1) { + nAlignement = 16; + } + uint32_t total = 1; + for (uint32_t i = 1; i < shape.NumberOfDimensions; i++) { + total *= shape.Dimensions[i]; + } + total *= bytesPerElement; + auto totalAligned = Gna2RoundUp(total, nAlignement); + totalAligned *= shape.Dimensions[0]; + return totalAligned; +} + template bool NextElement(T & elementIndex, const Gna2Shape& total) { if (total.NumberOfDimensions == 0) return false; @@ -335,11 +385,11 @@ void DumpPwl(std::ostream& dumpFile, const Gna2Tensor& activation) { double a = static_cast(segments[k].Slope) / factor; double b = static_cast(segments[k].yBase) - ((static_cast(B) * segments[k].Slope) / factor); - dumpFile << "\t\tBase value for input (B) : " << B << "\n"; - dumpFile << "\t\tBase value for output (b) : " << segments[k].yBase << "\n"; - dumpFile << "\t\tSegment slope (S): " << segments[k].Slope << "\n"; - dumpFile << "\t\tShift (scale) : " << scale << "\n"; - dumpFile << "\t\ty = ax + b: a = " << a << ", b = " << b; + dumpFile << "\t\tBase input (B) : " << B << ", "; + dumpFile << "Base output (b) : " << segments[k].yBase << ", "; + dumpFile << "Slope (S): " << segments[k].Slope << ", "; + dumpFile << "Shift (scale) : " << scale << ", "; + dumpFile << "y = (" << a << ")x + (" << b << ")"; if (segments[k].Slope != 0) { double x0 = static_cast(B) - ((static_cast(segments[k].yBase) * factor) / segments[k].Slope); dumpFile << ", x0 = " << x0; @@ -366,18 +416,28 @@ void DumpCharArray(std::ostream& dumpFile, const char *carray, size_t count) { } dumpFile << "\n"; } - } // namespace -void DumpGna2Model(const Gna2Model& gnaModel, const std::string dumpFolderNameGNA, bool dumpData) { +void DumpGna2Model(const Gna2Model& gnaModel, + const std::string& dumpFolderNameGNA, + bool dumpData, + const GnaAllocations& allAllocations, + const std::string& modeOfOperation) { std::stringstream dumpFileName; uint32_t opsNo = gnaModel.NumberOfOperations; std::time_t currTime = std::time(nullptr); - dumpFileName << dumpFolderNameGNA << "Gna2ModelDebugDump_" << opsNo << "_layer_" << std::put_time(std::localtime(&currTime), "%Y%m%d%H%M%S"); + dumpFileName << dumpFolderNameGNA << "Gna2ModelDebugDump_" << opsNo << "_layer_" + << std::put_time(std::localtime(&currTime), "%Y%m%d%H%M%S") << modeOfOperation; std::ofstream dumpFile(dumpFileName.str() + ".txt", std::ios::out); + const auto& allAllocationsSorted = allAllocations.GetAllocationsInExportOrder(); + for (auto&& a : allAllocationsSorted) { + dumpFile << "Allocation: ptr=" << a.ptr << "\tsizeRequested=" << a.sizeRequested << "\tsizeGranted=" << a.sizeGranted << + "\t tag=" << a.GetTagName() << "\n"; + } + dumpFile << "Layers (operations) count: " << opsNo << "\n"; for (size_t i = 0; i < opsNo; i++) { @@ -395,10 +455,27 @@ void DumpGna2Model(const Gna2Model& gnaModel, const std::string dumpFolderNameGN continue; } const auto& operand = *operation.Operands[j]; + void * foundPtr = nullptr; + std::string foundName = "AllocationNotFound"; + size_t offset = 0; + auto found = std::find_if(allAllocationsSorted.begin(), + allAllocationsSorted.end(), + [operand](const GnaAllocation& allocation) { + return allocation.getOffset(operand.Data).first; + }); + if (found != allAllocationsSorted.end()) { + foundPtr = found->ptr; + foundName = found->GetTagName(); + offset = found->getOffset(operand.Data).second; + } dumpFile << "\tOperand " << j << " (" << GetOperandName(operation.Type, j) << ")" << " type: " << GetOperandType(operand.Type) << " shape: " << GetSimpleString(operand.Shape) << + " tag: " << foundName << + " offset: " << offset << + " size: " << Gna2RoundUpTo64(GetGnaShapeSize(operand.Shape, GetTypeByteSize(operand.Type))) << " data: " << operand.Data << + " baseAlloc: " << foundPtr << " layout: "; DumpCharArray(dumpFile, operand.Layout, GNA2_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS); diff --git a/src/plugins/intel_gna/gna2_model_debug_log.hpp b/src/plugins/intel_gna/gna2_model_debug_log.hpp index 1380297ec854e1..d5c363f806d4c3 100644 --- a/src/plugins/intel_gna/gna2_model_debug_log.hpp +++ b/src/plugins/intel_gna/gna2_model_debug_log.hpp @@ -4,9 +4,21 @@ #pragma once +#include #include +#include #include "gna2-model-api.h" +#include "gna_device.hpp" void WriteInputAndOutputTextGNAImpl(const Gna2Model & gnaModel, const std::string dumpFolderNameGNA, const std::string refFolderName); -void DumpGna2Model(const Gna2Model& gnaModel, const std::string dumpFolderNameGNA, bool dumpData); + +void DumpGna2Model(const Gna2Model& gnaModel, const std::string& dumpFolderNameGNA, bool dumpData, const GnaAllocations& allAllocations, + const std::string& modeOfOperation); + +template +std::string toHexString(T t) { + std::ostringstream o; + o << std::hex << t; + return o.str(); +} diff --git a/src/plugins/intel_gna/gna2_model_export_helper.cpp b/src/plugins/intel_gna/gna2_model_export_helper.cpp index af7e0489bfe129..7241cfa9e17c5e 100644 --- a/src/plugins/intel_gna/gna2_model_export_helper.cpp +++ b/src/plugins/intel_gna/gna2_model_export_helper.cpp @@ -10,6 +10,8 @@ #include "gna_api_wrapper.hpp" #include "gna2-device-api.h" +#include "gna2-tlv-writer.h" + #include #include @@ -52,6 +54,142 @@ void * ExportSueLegacyUsingGnaApi2( return bufferDump; } +#define Gna2TlvTypeOVInputScaleFactor GNA2_TLV_IMPL_CHAR_TO_TYPE("OVIS") +#define Gna2TlvTypeOVOutputScaleFactor GNA2_TLV_IMPL_CHAR_TO_TYPE("OVOS") +#define Gna2ExportTlv(...) 1 + +static_assert(std::numeric_limits::is_iec559, "Float is not IEC 559 compatible"); +typedef std::array TlvFloatRecord; + +namespace { +TlvFloatRecord GetFloatInTLV(Gna2TlvType type, float value) { + TlvFloatRecord r; + reinterpret_cast(r.data())->type = type; + reinterpret_cast(r.data())->length = sizeof(float); + *reinterpret_cast(r.data() + sizeof(Gna2TlvRecord)) = value; + return r; +} +} // namespace + +void ExportTlvModel(uint32_t modelId, + uint32_t deviceIndex, + std::ostream& outStream, + Gna2DeviceVersion deviceVersionToExport, + uint32_t input_size, + uint32_t output_size, + float inputSF, + float outputSF) { + + uint32_t exportConfig; + auto status = Gna2ModelExportConfigCreate(gnaUserAllocatorAlignedPage, &exportConfig); + GNADeviceHelper::checkGna2Status(status, "Gna2ModelExportConfigCreate"); + + status = Gna2ModelExportConfigSetSource(exportConfig, deviceIndex, modelId); + GNADeviceHelper::checkGna2Status(status, "Gna2ModelExportConfigSetSource"); + status = Gna2ModelExportConfigSetTarget(exportConfig, deviceVersionToExport); + GNADeviceHelper::checkGna2Status(status, "Gna2ModelExportConfigSetTarget"); + + // first descriptors + void* bufferLayerDescriptors = nullptr;; + uint32_t sizeOfLayerDescriptors; + + status = Gna2ModelExport(exportConfig, + Gna2ModelExportComponentLayerDescriptors, + &bufferLayerDescriptors, &sizeOfLayerDescriptors); + GNADeviceHelper::checkGna2Status(status, "Gna2ModelExport(Gna2ModelExportComponentLayerDescriptors)"); + + // RO + void* bufferROData = nullptr;; + uint32_t sizeOfROData; + + status = Gna2ModelExport(exportConfig, + Gna2ModelExportComponentReadOnlyDump, + &bufferROData, &sizeOfROData); + GNADeviceHelper::checkGna2Status(status, "Gna2ModelExport(Gna2ModelExportComponentReadOnlyDump)"); + + // RW - scratch + void* bufferScratchRWData = nullptr;; + uint32_t sizeOfScratchRWData; + + status = Gna2ModelExport(exportConfig, + Gna2ModelExportComponentScratchDump, + &bufferScratchRWData, &sizeOfScratchRWData); + GNADeviceHelper::checkGna2Status(status, "Gna2ModelExport(Gna2ModelExportComponentScratchDump)"); + + //TODO: This must be first cover by model creation code + void* bufferStateRWData = nullptr; + uint32_t sizeOfStateRWData = 0; + + + // RW - state + status = Gna2ModelExport(exportConfig, + Gna2ModelExportComponentStateDump, + &bufferStateRWData, &sizeOfStateRWData); + if (!Gna2StatusIsSuccessful(status)) { + bufferStateRWData = nullptr; + sizeOfStateRWData = 0; + } + + // RW - external Input + void* bufferInputRWData = nullptr; + uint32_t sizeOfInputRWData; + status = Gna2ModelExport(exportConfig, + Gna2ModelExportComponentInputDump, + &bufferInputRWData, &sizeOfInputRWData); + GNADeviceHelper::checkGna2Status(status, "Gna2ModelExport(Gna2ModelExportComponentInputDump)"); + + // RW - external Output + void* bufferOutputRWData = nullptr; + uint32_t sizeOfOutputRWData; + status = Gna2ModelExport(exportConfig, + Gna2ModelExportComponentOutputDump, + &bufferOutputRWData, &sizeOfOutputRWData); + GNADeviceHelper::checkGna2Status(status, "Gna2ModelExport(Gna2ModelExportComponentOutputDump)"); + + char* outTlv = nullptr; + + const auto gnaLibraryVersion = GNADeviceHelper::GetGnaLibraryVersion(); + + uint32_t outTlvSize = 0; + auto tlv_status = Gna2ExportTlv( + deviceVersionToExport, + gnaUserAllocator, + &outTlv, + &outTlvSize, + (const char*)bufferLayerDescriptors, + sizeOfLayerDescriptors, + (const char*)bufferROData, + sizeOfROData, + (const char*)bufferStateRWData, + sizeOfStateRWData, + sizeOfScratchRWData, + input_size, + output_size, + gnaLibraryVersion.c_str(), + nullptr, + 0); + + if (Gna2TlvStatusSuccess == tlv_status) { + outStream.write(outTlv, outTlvSize); + auto tlvInSF = GetFloatInTLV(Gna2TlvTypeOVInputScaleFactor, inputSF); + auto tlvOutSF = GetFloatInTLV(Gna2TlvTypeOVOutputScaleFactor, outputSF); + outStream.write(tlvInSF.data(), tlvInSF.size()); + outStream.write(tlvOutSF.data(), tlvOutSF.size()); + } + gnaUserFree(outTlv); + + gnaUserFree(bufferLayerDescriptors); + gnaUserFree(bufferROData); + gnaUserFree(bufferScratchRWData); + gnaUserFree(bufferStateRWData); + + gnaUserFree(bufferInputRWData); + gnaUserFree(bufferOutputRWData); + + GNADeviceHelper::checkGna2Status((Gna2Status)status, "ExportTlvModel"); + status = Gna2ModelExportConfigRelease(exportConfig); + GNADeviceHelper::checkGna2Status(status, "Gna2ModelExportConfigRelease"); +} void ExportLdForDeviceVersion( uint32_t modelId, @@ -67,7 +205,7 @@ void ExportLdForDeviceVersion( status = Gna2ModelExportConfigSetTarget(exportConfig, deviceVersionToExport); GNADeviceHelper::checkGna2Status(status, "Gna2ModelExportConfigSetTarget"); - void * ldDump; + void * ldDump = nullptr; uint32_t ldDumpSize; status = Gna2ModelExport(exportConfig, diff --git a/src/plugins/intel_gna/gna2_model_export_helper.hpp b/src/plugins/intel_gna/gna2_model_export_helper.hpp index fdff479cd5bd6b..26e3211b7bb4a8 100644 --- a/src/plugins/intel_gna/gna2_model_export_helper.hpp +++ b/src/plugins/intel_gna/gna2_model_export_helper.hpp @@ -20,4 +20,13 @@ void ExportLdForDeviceVersion( std::ostream & outStream, Gna2DeviceVersion deviceVersionToExport); +void ExportTlvModel(uint32_t modelId, + uint32_t deviceIndex, + std::ostream& outStream, + Gna2DeviceVersion deviceVersionToExport, + uint32_t input_size, + uint32_t output_size, + float inputSF, + float outputSF); + void ExportGnaDescriptorPartiallyFilled(uint32_t numberOfLayers, std::ostream & outStream); diff --git a/src/plugins/intel_gna/gna_data_types.hpp b/src/plugins/intel_gna/gna_data_types.hpp index 2b99b8744099f3..6fbe7c3ec9e668 100644 --- a/src/plugins/intel_gna/gna_data_types.hpp +++ b/src/plugins/intel_gna/gna_data_types.hpp @@ -14,7 +14,6 @@ #include "layers/gna_concat_layer.hpp" #include "layers/gna_split_layer.hpp" #include "gna_api_wrapper.hpp" -#include "memory/polymorph_allocator.hpp" #include "memory/gna_memory.hpp" struct TranspositionInfo { @@ -33,8 +32,11 @@ static inline bool FoundPartToTranspose(const std::vector &tr namespace GNAPluginNS { using dnn_ptr = std::shared_ptr>; - using allocator_type = GNAPluginNS::memory::PolymorphAllocator; - using gna_memory_type = GNAPluginNS::memory::GNAMemory; + + using gna_memory_type = GNAPluginNS::memory::GNAMemoryInterface; + using gna_memory_float = GNAPluginNS::memory::GNAMemory; + using gna_memory_device = GNAPluginNS::memory::GNAMemory<>; + using DnnComponentsForLayer = std::list>; using MemoryConnection = std::list>; using ConcatConnection = std::unordered_map; diff --git a/src/plugins/intel_gna/gna_device.cpp b/src/plugins/intel_gna/gna_device.cpp index 8699de95e5429c..962d0643c195e1 100644 --- a/src/plugins/intel_gna/gna_device.cpp +++ b/src/plugins/intel_gna/gna_device.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include "gna_api_wrapper.hpp" #include "gna2-capability-api.h" @@ -16,13 +17,16 @@ #include "gna2-inference-api.h" #include "gna2-instrumentation-api.h" #include "gna2-memory-api.h" +#include "gna2-model-export-api.h" #include "gna2_model_export_helper.hpp" + #include "gna2_model_debug_log.hpp" #include "backend/am_intel_dnn.hpp" #include "gna/gna_config.hpp" #include "gna_plugin_log.hpp" #include "layers/gna_convolution_layer.hpp" +#include "memory/gna_mem_requests.hpp" //#define MODEL_DUMP @@ -33,18 +37,54 @@ uint8_t* GNADeviceHelper::alloc(uint32_t size_requested, uint32_t *size_granted) void * memPtr = nullptr; const auto status = Gna2MemoryAlloc(size_requested, size_granted, &memPtr); checkGna2Status(status, "Gna2MemoryAlloc"); + + gnalog() << "Gna2MemoryAlloc(" << size_requested << ") -> " << *size_granted << ", " << memPtr << "\n"; + allAllocations.Add(memPtr, size_requested, *size_granted); if (memPtr == nullptr) { THROW_GNA_EXCEPTION << "GNAAlloc failed to allocate memory. Requested: " << size_requested << " Granted: " << *(size_granted); } + dumpXNNROPtr = memPtr; dumpXNNROSize = *size_granted; return static_cast(memPtr); } -void GNADeviceHelper::free(void * ptr) { +void GNADeviceHelper::tagMemoryRegion(void* memPtr, const GNAPluginNS::memory::rRegion tag) { std::unique_lock lockGnaCalls{ acrossPluginsSync }; - const auto status = Gna2MemoryFree(ptr); - checkGna2Status(status, "Gna2MemoryFree"); + using GNAPluginNS::memory::rRegion; + static const std::map tagMap { + {rRegion::REGION_INPUTS, Gna2MemoryTagInput}, + {rRegion::REGION_OUTPUTS, Gna2MemoryTagOutput}, + {rRegion::REGION_SCRATCH, Gna2MemoryTagScratch}, + {rRegion::REGION_RO, Gna2MemoryTagReadOnly}, + {rRegion::REGION_STATES, Gna2MemoryTagState}, + {rRegion::REGION_AUTO, Gna2MemoryTagState}, + }; + auto memoryTag = tagMap.at(tag); + if (tag == rRegion::REGION_AUTO) { + return; + } + const auto status = Gna2MemorySetTag(memPtr, memoryTag); + checkGna2Status(status, "Gna2MemorySetTag"); + gnalog() << "Gna2MemorySetTag(" << memPtr << ", " << memoryTag << ")\n"; + const auto tagSuccess = allAllocations.SetTagFor(memPtr, memoryTag); + if (!tagSuccess) { + THROW_GNA_EXCEPTION << "Allocation not found when tagging memory\n"; + } +} + +void GNADeviceHelper::free(void* ptr) { + Gna2Status status; + bool removeSuccess; + { + std::unique_lock lockGnaCalls{acrossPluginsSync}; + status = Gna2MemoryFree(ptr); + checkGna2Status(status, "Gna2MemoryFree"); + removeSuccess = allAllocations.Remove(ptr); + } + if (!removeSuccess) { + gnawarn() << "Allocation not found when freeing memory\n"; + } } std::string GNADeviceHelper::getGnaLibraryVersionPrivate() { @@ -127,7 +167,9 @@ uint32_t GNADeviceHelper::createModel(Gna2Model& gnaModel) const { #else "./"; #endif - DumpGna2Model(gnaModel, path, false); + const std::string mode = useDeviceEmbeddedExport ? "_ee" : ""; + const auto fileSuffix = mode + "_devVersion_" + toHexString(detectedGnaDevVersion); + DumpGna2Model(gnaModel, path, false, allAllocations, fileSuffix); #endif const auto status = Gna2ModelCreate(nGnaDeviceIndex, &gnaModel, &modelId); @@ -143,7 +185,7 @@ void GNADeviceHelper::releaseModel(const uint32_t model_id) { bool GNADeviceHelper::enforceLegacyCnnNeeded() const { const auto execTargetDevice = getTargetDevice(true); - return (isGnaLibVersion3_0 || isGnaLibVersion2_1) && isUpTo20HwGnaDevice(execTargetDevice); + return isUpTo20HwGnaDevice(execTargetDevice); } Gna2DeviceVersion GNADeviceHelper::parseTarget(const std::string& target) { @@ -166,8 +208,6 @@ Gna2DeviceVersion GNADeviceHelper::parseDeclaredTarget(std::string target, const THROW_GNA_EXCEPTION << "Unsupported " << key << " = \"" << target << "\"" << extraSuffix; }; if (target == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) { - if (!isGnaLibVersion2_1 && !isGnaLibVersion3_0) - throwUnsupportedGnaTarget(", when GNA Library version is 2.0.X.Y"); parsed = Gna2DeviceVersion3_0; } else if (target != InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) { throwUnsupportedGnaTarget(""); @@ -177,7 +217,7 @@ Gna2DeviceVersion GNADeviceHelper::parseDeclaredTarget(std::string target, const Gna2DeviceVersion GNADeviceHelper::getDefaultTarget() const { if (detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation) - return (isGnaLibVersion3_0 || isGnaLibVersion2_1) ? Gna2DeviceVersion3_0 : Gna2DeviceVersion2_0; + return Gna2DeviceVersion3_0; return detectedGnaDevVersion; } @@ -391,6 +431,7 @@ GnaWaitStatus GNADeviceHelper::wait(uint32_t reqId, int64_t millisTimeout) { return GNA_REQUEST_ABORTED; } checkGna2Status(status, "Gna2RequestWait"); + updateGnaPerfCounters(); return GNA_REQUEST_COMPLETED; } @@ -431,9 +472,15 @@ void GNADeviceHelper::dumpXnnForDeviceVersion( outStream.write(reinterpret_cast(&sueHeader), sizeof(sueHeader)); } -void GNADeviceHelper::createVirtualDevice(Gna2DeviceVersion devVersion, std::string purpose) { +void GNADeviceHelper::dumpTLVForDeviceVersion(const uint32_t modelId, std::ostream& outStream, + uint32_t input_size, uint32_t output_size, + float inSF, float outSF) { + ExportTlvModel(modelId, nGnaDeviceIndex, outStream, exportGeneration, input_size, output_size, inSF, outSF); +} + +void GNADeviceHelper::createVirtualDevice(Gna2DeviceVersion devVersion) { const auto status = Gna2DeviceCreateForExport(devVersion, &nGnaDeviceIndex); - GNADeviceHelper::checkGna2Status(status, "Gna2DeviceCreateForExport(" + std::to_string(devVersion) + ")" + purpose); + GNADeviceHelper::checkGna2Status(status, "Gna2DeviceCreateForExport(" + std::to_string(devVersion) + ")"); } void GNADeviceHelper::updateGnaDeviceVersion() { @@ -446,9 +493,10 @@ void GNADeviceHelper::open() { updateGnaDeviceVersion(); const auto gnaExecTarget = parseTarget(executionTarget); if (useDeviceEmbeddedExport) { - createVirtualDevice(exportGeneration, "export"); + createVirtualDevice(exportGeneration); + updateGnaDeviceVersion(); } else if (!executionTarget.empty() && gnaExecTarget != detectedGnaDevVersion) { - createVirtualDevice(gnaExecTarget, "execution"); + createVirtualDevice(gnaExecTarget); updateGnaDeviceVersion(); if (detectedGnaDevVersion != gnaExecTarget) { THROW_GNA_EXCEPTION << "Wrong virtual GNA device version reported: " << detectedGnaDevVersion << " instead of: " << gnaExecTarget; @@ -507,3 +555,16 @@ std::string GNADeviceHelper::getEffectiveGnaCompileTarget() const { } return InferenceEngine::GNAConfigParams::GNA_TARGET_2_0; } + +std::string GNADeviceHelper::GetCompileTarget() const { + static const std::map targetMap = { + {Gna2DeviceVersion2_0, InferenceEngine::GNAConfigParams::GNA_TARGET_2_0}, + {Gna2DeviceVersion3_0, InferenceEngine::GNAConfigParams::GNA_TARGET_3_0}, + }; + const auto target = getTargetDevice(false); + auto found = targetMap.find(target); + if (found == targetMap.end()) { + THROW_GNA_EXCEPTION << "Unknown target Gna2DeviceVersion == " << target; + } + return found->second; +} diff --git a/src/plugins/intel_gna/gna_device.hpp b/src/plugins/intel_gna/gna_device.hpp index 70d56c7c52dcf8..8040beea92b1d2 100644 --- a/src/plugins/intel_gna/gna_device.hpp +++ b/src/plugins/intel_gna/gna_device.hpp @@ -12,17 +12,23 @@ #include #include #include +#include #include +#include "memory/gna_mem_requests.hpp" + #include "gna2-common-api.h" #include "gna2-inference-api.h" #include "gna2-instrumentation-api.h" #include "gna2-memory-api.h" #include "gna2-model-api.h" +#include "gna2-model-export-api.h" #include "gna2-model-suecreek-header.h" +#include "gna_device_allocation.hpp" + enum GnaWaitStatus : int { GNA_REQUEST_COMPLETED = 0, // and removed from GNA library queue GNA_REQUEST_ABORTED = 1, // for QoS purposes @@ -38,6 +44,9 @@ class GNADeviceHelper { static std::string gnaLibraryVersion{ ", GNA library version: " + GNADeviceHelper::GetGnaLibraryVersion() }; return gnaLibraryVersion; } + + std::string modeOfOperation = "default"; + GnaAllocations allAllocations; uint32_t nGnaDeviceIndex = 0; bool swExactMode = false; Gna2DeviceVersion detectedGnaDevVersion = Gna2DeviceVersionSoftwareEmulation; @@ -45,8 +54,6 @@ class GNADeviceHelper { std::string compileTarget; bool useDeviceEmbeddedExport = false; Gna2DeviceVersion exportGeneration = Gna2DeviceVersionEmbedded1_0; - bool isGnaLibVersion2_1 = false; - bool isGnaLibVersion3_0 = false; static const uint32_t TotalGna2InstrumentationPoints = 2; Gna2InstrumentationPoint gna2InstrumentationPoints[TotalGna2InstrumentationPoints] = { @@ -80,12 +87,6 @@ class GNADeviceHelper { // check GNA Library version const auto gnaLibVersion = GetGnaLibraryVersion(); - if (gnaLibVersion.rfind("2.1", 0) == 0) { - isGnaLibVersion2_1 = true; - } - if (gnaLibVersion.rfind("3.0", 0) == 0) { - isGnaLibVersion3_0 = true; - } } GNADeviceHelper(const GNADeviceHelper&) = delete; @@ -97,6 +98,7 @@ class GNADeviceHelper { } uint8_t *alloc(uint32_t size_requested, uint32_t *size_granted); + void tagMemoryRegion(void* memPtr, const GNAPluginNS::memory::rRegion memoryTag); void setUpActiveList(unsigned req_config_id, uint32_t layerIndex, uint32_t* ptr_active_indices, uint32_t num_active_indices); uint32_t propagate(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode); @@ -133,6 +135,10 @@ class GNADeviceHelper { std::ostream & outStream, Gna2DeviceVersion targetDeviceVersion); + void dumpTLVForDeviceVersion(const uint32_t modelId, std::ostream& outStream, + uint32_t input_size, uint32_t output_size, + float inSF, float outSF); + void free(void * ptr); void updateGnaPerfCounters(); @@ -140,6 +146,11 @@ class GNADeviceHelper { InferenceEngine::InferenceEngineProfileInfo>& retPerfCounters); static std::string GetGnaLibraryVersion(); std::string getEffectiveGnaCompileTarget() const; + std::string GetCompileTarget() const; + + const GnaAllocations& getAllAllocations() const { + return allAllocations; + } private: void open(); @@ -158,7 +169,7 @@ class GNADeviceHelper { Gna2DeviceVersion getDefaultTarget() const; Gna2DeviceVersion getTargetDevice(bool execTarget) const; - void createVirtualDevice(Gna2DeviceVersion devVersion, std::string purpose = ""); + void createVirtualDevice(Gna2DeviceVersion devVersion); void updateGnaDeviceVersion(); void initGnaPerfCounters() { diff --git a/src/plugins/intel_gna/gna_device_allocation.hpp b/src/plugins/intel_gna/gna_device_allocation.hpp new file mode 100644 index 00000000000000..b5e3f50207ff2c --- /dev/null +++ b/src/plugins/intel_gna/gna_device_allocation.hpp @@ -0,0 +1,168 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gna2-model-export-api.h" + +#include "memory/gna_mem_regions.hpp" +#include "gna_lib_ver_selector.hpp" + +using GNAPluginNS::memory::rRegion; + +struct GnaAllocation { + void* ptr = nullptr; + size_t sizeRequested = 0; + size_t sizeGranted = 0; + void SetTag(Gna2MemoryTag in) { + isTagSet = true; + tag = in; + } + bool isTag(Gna2MemoryTag in) { + return isTagSet && in == tag; + } + std::string GetTagName() const { + static const std::map< Gna2MemoryTag, std::string > tm = { + { Gna2MemoryTagReadWrite, "Gna2MemoryTagReadWrite" }, + { Gna2MemoryTagInput, "Gna2MemoryTagInput" }, + { Gna2MemoryTagOutput, "Gna2MemoryTagOutput" }, + { Gna2MemoryTagReadOnly, "Gna2MemoryTagReadOnly" }, + { Gna2MemoryTagExternalBufferInput, "Gna2MemoryTagExternalBufferInput" }, + { Gna2MemoryTagExternalBufferOutput, "Gna2MemoryTagExternalBufferOutput" }, + { Gna2MemoryTagScratch, "Gna2MemoryTagScratch" }, + { Gna2MemoryTagState, "Gna2MemoryTagState" }, + }; + if (!isTagSet) { + return "Gna2MemoryTag_NotSet_"; + } + auto f = tm.find(tag); + if (f != tm.end()) { + return f->second; + } + return "Gna2MemoryTag_" + std::to_string(tag) + "_"; + } + + static rRegion GetRegionForTag(Gna2MemoryTag tag) { + static const std::map tm = { + {Gna2MemoryTagInput, rRegion::REGION_INPUTS}, + {Gna2MemoryTagOutput, rRegion::REGION_OUTPUTS}, + {Gna2MemoryTagReadOnly, rRegion::REGION_RO}, + {Gna2MemoryTagScratch, rRegion::REGION_SCRATCH}, + {Gna2MemoryTagState, rRegion::REGION_STATES}, + {Gna2MemoryTagExternalBufferInput, rRegion::REGION_INPUTS}, + {Gna2MemoryTagExternalBufferOutput, rRegion::REGION_OUTPUTS}, + }; + auto f = tm.find(tag); + if (f != tm.end()) { + return f->second; + } + return rRegion::REGION_AUTO; + } + + bool operator<(const GnaAllocation& right) const { + const auto region = GetRegionForTag(tag); + const auto regionRight = GetRegionForTag(right.tag); + return region < regionRight; + } + + std::pair getOffset(void* offset) const { + std::pair v; + v.first = offset >= ptr && offset < static_cast(ptr) + sizeGranted; + v.second = v.first ? static_cast(offset) - static_cast(ptr) : 0; + return v; + } + + uint32_t sizeForExport() const { + return ALIGN64(sizeRequested); + } + +private: + Gna2MemoryTag tag = Gna2MemoryTagScratch; + bool isTagSet = false; +}; + +class GnaAllocations { + std::list allocations; + +public: + GnaAllocations() = default; + template + explicit GnaAllocations(T b, T e) : allocations(b, e) { + } + + static uint32_t GetSizeForExport(const std::list& allocations) { + uint32_t total = 0; + for (auto& a : allocations) { + total += a.sizeForExport(); + } + return total; + } + + uint32_t GetSizeForExport() const { + return GetSizeForExport(allocations); + } + + std::list GetAllocationsInExportOrder() const { + std::vector temp(allocations.begin(), allocations.end()); + std::stable_sort(temp.begin(), temp.end()); + return std::list(temp.begin(), temp.end()); + } + + static std::pair GetOffsetForExport( + const std::list& orderedAllocations, + void* ptr) { + uint64_t curOffset = 0; + for (auto& r : orderedAllocations) { + auto ptrBegin = static_cast(r.ptr); + const auto size = r.sizeForExport(); + if (ptr >= ptrBegin && ptr < ptrBegin + size) { + curOffset += static_cast(ptr) - ptrBegin; + return {true, curOffset}; + } + curOffset += size; + } + return {false, 0}; + } + + bool SetTagFor(void* memPtr, Gna2MemoryTag memoryTag) { + auto found = std::find_if(allocations.begin(), allocations.end(), [memPtr](const GnaAllocation& a) { + return a.ptr == memPtr; + }); + if (found != allocations.end()) { + found->SetTag(memoryTag); + return true; + } + return false; + } + + bool Remove(void* memPtr) { + auto found = std::find_if(allocations.begin(), allocations.end(), [memPtr](const GnaAllocation& a) { + return a.ptr == memPtr; + }); + if (found != allocations.end()) { + allocations.erase(found); + return true; + } + return false; + } + + void Add(void* memPtr, uint32_t sizeRequested, uint32_t sizeGranted) { + GnaAllocation newAllocation; + newAllocation.ptr = memPtr; + newAllocation.sizeRequested = sizeRequested; + newAllocation.sizeGranted = sizeGranted; + allocations.push_back(newAllocation); + } +}; diff --git a/src/plugins/intel_gna/gna_graph_compiler.cpp b/src/plugins/intel_gna/gna_graph_compiler.cpp index 81f5003f5eee88..08837a0889ee59 100644 --- a/src/plugins/intel_gna/gna_graph_compiler.cpp +++ b/src/plugins/intel_gna/gna_graph_compiler.cpp @@ -42,11 +42,10 @@ using namespace InferenceEngine; using namespace std; using namespace GNAPluginNS; +using namespace memory; #define CREATE(name) [](GNAGraphCompiler *p, CNNLayerPtr l) {p->name(l);} -const GNALimitations::Cnn2D::Validator GNAGraphCompiler::cnn2dValidator; - void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr gnaMemPtr) { this->gnamem = std::move(gnaMemPtr); } @@ -194,6 +193,31 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) split_connection.emplace(id, layerInfoItem); } +void GNAPluginNS::GNAGraphCompiler::SetValidatorTarget(std::string target) { + if (InferenceEngine::GNAConfigParams::GNA_TARGET_3_0 == target) { + cnn2dValidator.reset(new GNALimitations::Cnn2D::Validator_30()); + } +} + +void GNAPluginNS::GNAGraphCompiler::ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth, const uint32_t inChannels, + const uint32_t kH, const uint32_t kW, const uint32_t kN, const uint32_t strideH, const uint32_t strideW, OvGnaType inPrecision, + const uint32_t dilH, const uint32_t dilW) const { + if (cnn2dValidator) { + cnn2dValidator->ValidateCnn2D(name, inHeight, inWidth, inChannels, kH, kW, kN, strideH, strideW, dilH, dilW, inPrecision); + } else { + THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << name; + } +} + +void GNAPluginNS::GNAGraphCompiler::ValidatePooling2D(std::string name, const uint32_t windowH, const uint32_t windowW, + const uint32_t strideH, const uint32_t strideW) const { + if (cnn2dValidator) { + cnn2dValidator->ValidatePooling2D(name, windowH, windowW, strideH, strideW); + } else { + THROW_GNA_EXCEPTION << "No Pooling2D validator found for layer " << name; + } +} + void GNAGraphCompiler::DiagonalPrimitive(InferenceEngine::CNNLayerPtr layer) { AffinePrimitive(layer, true); } @@ -208,9 +232,10 @@ void GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer) void* ptr_for_const_blob = &const_connections[constLayer->name]; connectOutput(constLayer, ptr_for_const_blob, const_blob->byteSize()); + // TODO: segment type for bind, bind initializer not used - need refactor to separate bind and allocation requests // dont see practical use case when bind storage type need to be different that allocation type - gnamem->bind_initializer(nullptr, ptr_for_const_blob, [const_blob](void* data, size_t size) { + gnamem->getQueue(REGION_AUTO)->bind_initializer(nullptr, ptr_for_const_blob, [const_blob](void* data, size_t size) { ie_memcpy(data, size, const_blob->buffer(), const_blob->byteSize()); }); } @@ -497,7 +522,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP } if (num_conv_kernel_padding == 0) { - gnamem->readonly().push_local_ptr(layer, ptr_weights, + gnamem->getQueue(REGION_RO)->push_local_ptr(layer, ptr_weights, transposedWeights.data(), convolution._weights->byteSize(), 64); @@ -533,19 +558,20 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP offset += padding_zeros.size(); } }; - gnamem->readonly().push_initializer(layer, ptr_weights, + + gnamem->getQueue(REGION_RO)->push_initializer(layer, ptr_weights, paddedWeightsSize, initializer, 64); } if (convolution._biases) { - gnamem->readonly().push_ptr(layer, ptr_biases, + gnamem->getQueue(REGION_RO)->push_ptr(layer, ptr_biases, convolution._biases->cbuffer().as(), convolution._biases->byteSize(), 64); } else { - gnamem->readonly().push_value(layer, ptr_biases, 0.0f, out_channels, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, out_channels, 64); } } @@ -607,10 +633,10 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP const auto weightPrec = OvGnaTypeIntFromBytes(convolution._weights->getTensorDesc().getPrecision().size()); const auto biasPrec = OvGnaTypeIntFromBytes(biasPrecision.size()); - cnn2dValidator.ValidateCnn2D(layer->name, + ValidateCnn2D(layer->name, in_height, in_width, in_channels, convolution._kernel_y, convolution._kernel_x, filter_n, convolution._stride_y, convolution._stride_x, - convolution._dilation_y, convolution._dilation_x, inputPrec); + inputPrec, convolution._dilation_y, convolution._dilation_x); float weight_scale_factor = getScaleFactor(layer, QuantizedDataType::weights); float output_scale_factor = getScaleFactor(layer, QuantizedDataType::output); @@ -675,18 +701,18 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP transposedWeights.resize(transposedWeights.size() + kernelPad); } - gnamem->readonly().push_local_ptr(layer, ptr_weights, + gnamem->getQueue(REGION_RO)->push_local_ptr(layer, ptr_weights, transposedWeights.data(), transposedWeights.size(), 64); if (convolution._biases) { - gnamem->readonly().push_ptr(layer, ptr_biases, + gnamem->getQueue(REGION_RO)->push_ptr(layer, ptr_biases, convolution._biases->cbuffer().as(), convolution._biases->byteSize(), 64); } else { - gnamem->readonly().push_value(layer, ptr_biases, 0.0f, out_channels, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, out_channels, 64); } } @@ -744,8 +770,8 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) { if (gnaFlags->sw_fp32) { IE_ASSERT(quantized == nullptr); - gnamem->readonly().push_value(layer, ptr_weights, power.scale, num_rows_out, 64); - gnamem->readonly().push_value(layer, ptr_biases, power.offset, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, power.scale, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, power.offset, num_rows_out, 64); } else { IE_ASSERT(quantized != nullptr); if (!gnaFlags->input_low_precision) { @@ -753,15 +779,15 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) { static_cast(INT16_MAX))); auto quantizedOffset = FLOAT_TO_INT32(std::min(quantized->_dst_quant.GetScale() * power.offset, static_cast(INT32_MAX))); - gnamem->readonly().push_value(layer, ptr_weights, quantizedScale, num_rows_out, 64); - gnamem->readonly().push_value(layer, ptr_biases, quantizedOffset, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, quantizedScale, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, quantizedOffset, num_rows_out, 64); } else { auto quantizedScale = FLOAT_TO_INT8(std::min(quantized->_weights_quant.GetScale() * power.scale, static_cast(INT8_MAX))); auto quantizedOffset = FLOAT_TO_INT8(std::min(quantized->_dst_quant.GetScale() * power.offset, static_cast(INT8_MAX))); - gnamem->readonly().push_value(layer, ptr_weights, quantizedScale, num_rows_out, 64); - gnamem->readonly().push_value(layer, ptr_biases, quantizedOffset, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, quantizedScale, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, quantizedOffset, num_rows_out, 64); } } } else { @@ -820,7 +846,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) { connectInput(layer, ptr_pwl_input, num_data_bytes_in, 0, 0); if (ptr_pwl_segments_target != nullptr) { - gnamem->readonly().push_local_ptr(layer, ptr_pwl_segments_target, + gnamem->getQueue(REGION_RO)->push_local_ptr(layer, ptr_pwl_segments_target, &ptr_pwl_segments.front(), ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t), 64); @@ -868,7 +894,7 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) { } if (is2DPooling) { - cnn2dValidator.ValidatePooling2D(layer->name, pooling._kernel_y, pooling._kernel_x, pooling._stride_y, pooling._stride_x); + ValidatePooling2D(layer->name, pooling._kernel_y, pooling._kernel_x, pooling._stride_y, pooling._stride_x); } auto& currentComponent = dnnComponents.addComponent(layer->name, "pooling"); @@ -892,9 +918,24 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) { getScaleFactor(layer, QuantizedDataType::output), ptr_inputs, ptr_outputs); - size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) - * outputs->getPrecision().size(); + size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())); + + // Need to reserve more memory otherwise the compiled model would not be + // backward compatible with GNA 2.0 + // GNA 2.0 produces more outputs from 1D pooling than later GNA generations (including GNA 3.0) + // When the model is compiled for some newer GNA generation (than GNA 2.0) + // but it does not use any specific new GNA features it should be correct to import and run using previous GNA HW + if (!is2DPooling) { + const auto hLegacy = + GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy(h_dim_in, pooling._stride[X_AXIS]); + const auto wLegacy = + GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy(w_dim_in, pooling._stride[Y_AXIS]); + if (num_data_bytes_out < hLegacy * wLegacy * c_dim_out) { + num_data_bytes_out = hLegacy * wLegacy * c_dim_out; + } + } + num_data_bytes_out *= outputs->getPrecision().size(); const auto hw_in = h_dim_in * w_dim_in; // TODO: Is this really needed?, find out why @@ -1142,8 +1183,8 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) { FillWeightOfAligningFilter(layer, ptr_weights, cropOffset, (quantized == nullptr) ? false : true); (quantized == nullptr) ? - gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64) : - gnamem->readonly().push_value(layer, ptr_biases, 0, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64) : + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0, num_rows_out, 64); } } @@ -1277,36 +1318,35 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) { switch (eltwise._operation) { case EltwiseLayer::Sub: if (quantized == nullptr) { - gnamem->readonly().push_value(layer, ptr_weights, -1.0f, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, -1.0f, num_rows_out, 64); } else { auto scaledIdentity = -quantized->_weights_quant.GetScale(); if (gnaFlags->input_low_precision == false) { auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast(INT16_MAX))); - - gnamem->readonly().push_value(layer, ptr_weights, quantizedIdentity, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, quantizedIdentity, num_rows_out, 64); } else { auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast(INT8_MAX))); - gnamem->readonly().push_value(layer, ptr_weights, quantizedIdentity, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, quantizedIdentity, num_rows_out, 64); } } connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx); break; case EltwiseLayer::Sum: if (quantized == nullptr) { - gnamem->readonly().push_value(layer, ptr_weights, 1.0f, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, 1.0f, num_rows_out, 64); } else { auto scaledIdentity = quantized->_weights_quant.GetScale(); if (gnaFlags->input_low_precision == false) { auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast(INT16_MAX))); - gnamem->readonly().push_value(layer, ptr_weights, quantizedIdentity, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, quantizedIdentity, num_rows_out, 64); } else { auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast(INT8_MAX))); - gnamem->readonly().push_value(layer, ptr_weights, quantizedIdentity, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, quantizedIdentity, num_rows_out, 64); } } connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx); @@ -1314,12 +1354,12 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) { case EltwiseLayer::Prod: if (quantized == nullptr) { - gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64); } else { if (gnaFlags->input_low_precision == false) { - gnamem->readonly().push_value(layer, ptr_biases, 0, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0, num_rows_out, 64); } else { - gnamem->readonly().push_value(layer, ptr_biases, 0, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0, num_rows_out, 64); } } connectInput(layer, ptr_weights, num_data_bytes_in, 0, biasesLayerIdx); @@ -1387,9 +1427,9 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) { connectInput(layer, ptr_input_2, num_data_bytes_in_2, 0, 1); if (gnaFlags->sw_fp32) { IE_ASSERT(quantized == nullptr); - gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64); } else { - gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64); } } @@ -1503,12 +1543,12 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool if (num_padding == 0) { if (!transpose) { - gnamem->readonly().push_ptr(layer, ptr_weights, + gnamem->getQueue(REGION_RO)->push_ptr(layer, ptr_weights, weightable._weights->cbuffer().as(), weightable._weights->byteSize(), 64); } else { - gnamem->readonly().push_initializer(layer, ptr_weights, weightable._weights->byteSize(), + gnamem->getQueue(REGION_RO)->push_initializer(layer, ptr_weights, weightable._weights->byteSize(), [isDiag, num_rows_in, num_rows_out, num_padding, transposedRows, transposedCols, weightsBuffer, wpSize](void* data, size_t size) { for (uint32_t k = 0; k < (isDiag ? 1 : num_rows_out); k++) { auto rowOffset = k * transposedRows * transposedCols * wpSize; @@ -1538,7 +1578,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool auto paddedWeights = isDiag ? elementsIn : elementsIn * num_rows_out; auto paddedWeightsSize = paddedWeights * weightable.precision.size(); - gnamem->readonly().push_initializer(layer, ptr_weights, paddedWeightsSize, + gnamem->getQueue(REGION_RO)->push_initializer(layer, ptr_weights, paddedWeightsSize, [isDiag, num_rows_in, num_rows_out, num_padding, weightsBuffer, wpSize](void* data, size_t size) { for (uint32_t i = 0; i < (isDiag ? 1 : num_rows_out); i++) { ie_memcpy(data, size, @@ -1550,16 +1590,16 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool } if (weightable._biases) { - gnamem->readonly().push_ptr(layer, ptr_biases, + gnamem->getQueue(REGION_RO)->push_ptr(layer, ptr_biases, weightable._biases->cbuffer().as(), weightable._biases->byteSize(), 64); } else { // in that case input from previous layer goes into biases, so we have to initialize input pointer by zero if (useBiasConnection) { - gnamem->readonly().push_value(layer, ptr_inputs, 0.0f, num_rows_in + num_padding, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_inputs, 0.0f, num_rows_in + num_padding, 64); } else { - gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out + num_padding_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out + num_padding_out, 64); } } } @@ -1577,7 +1617,10 @@ void GNAGraphCompiler::FillWeightOfAligningFilter(InferenceEngine::CNNLayerPtr l THROW_GNA_EXCEPTION << "Weights memory is not allocated!!!"; } - gnamem->readonly().push_initializer(layer, ptrWeights, num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), [=](void* data, size_t size) { + gnamem->getQueue(REGION_RO)->push_initializer(layer, + ptrWeights, + num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), + [=](void* data, size_t size) { int out = 0; for (int input = offset; input < num_rows_out + offset; ++input) { auto mem_ptr = reinterpret_cast(data) + input * layer->precision.size() + out * ALIGN(num_rows_in, 8) * layer->precision.size(); @@ -1700,7 +1743,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l size_t weights_stride = (num_rows_in + num_rows_copied) * weightsElementSize; size_t weights_offset = weights_stride * num_rows_copied + num_rows_copied * weightsElementSize; - gnamem->readonly().push_initializer(layer, ptr_weights, paddedWeightsSize, [=](void* data, size_t size) { + gnamem->getQueue(REGION_RO)->push_initializer(layer, ptr_weights, paddedWeightsSize, [=](void* data, size_t size) { size_t roffset = weights_offset; size_t woffset = 0; for (int i = 0; i < num_rows_out && size >= woffset; i++) { @@ -1715,12 +1758,12 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l } if (filterLayer->_biases) { - gnamem->readonly().push_ptr(layer, ptr_biases, + gnamem->getQueue(REGION_RO)->push_ptr(layer, ptr_biases, filterLayer->_biases->cbuffer().as(), filterLayer->_biases->byteSize(), 64); } else { - gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64); } } @@ -1793,18 +1836,18 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0); connectOutput(layer, ptr_outputs, num_data_bytes_out); - gnamem->readonly().push_ptr(layer, ptr_weights, + gnamem->getQueue(REGION_RO)->push_ptr(layer, ptr_weights, filterLayer->_weights->cbuffer().as(), filterLayer->_weights->byteSize(), 64); if (filterLayer->_biases) { - gnamem->readonly().push_ptr(layer, ptr_biases, + gnamem->getQueue(REGION_RO)->push_ptr(layer, ptr_biases, filterLayer->_biases->cbuffer().as(), filterLayer->_biases->byteSize(), 64); } else { - gnamem->readonly().push_value(layer, ptr_biases, 0.0f, numberOfFilters, 64); + gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, numberOfFilters, 64); } } @@ -2036,7 +2079,7 @@ case name:\ connectOutput(layer, ptr_outputs, num_data_bytes_out); if (ptr_pwl_segments_target != nullptr) { - gnamem->readonly().push_local_ptr(layer, ptr_pwl_segments_target, + gnamem->getQueue(REGION_RO)->push_local_ptr(layer, ptr_pwl_segments_target, &ptr_pwl_segments.front(), ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t), 64); @@ -2210,13 +2253,12 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, // memory layer not yet initialized if (nextMemoryLayer.reserved_size == 0) { auto memorySize = InferenceEngine::details::product(nextMemoryLayer.getDims()) * nextMemoryLayer.elementSizeBytes(); - gnamem->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64); - gnamem->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer)); - + gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64); + gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer)); nextMemoryLayer.reserved_size = ALIGN64(memorySize); } else { // We may need to extend memory buffer if connected input size is bigger, for example for concat connection - gnamem->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out)); + gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out)); } return; } @@ -2307,7 +2349,8 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, return it != concatItem.second.concatInputLayers.end(); }); if (included == concat_connection.end()) { - gnamem->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size), 64); + auto outputSize = std::max(concatLayerInfoItem.reserved_size, num_data_bytes_out * 2); + gnamem->getQueue(REGION_SCRATCH)->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, ALIGN64(outputSize), 64); std::function allocate_input_recursively = [&allocate_input_recursively](GNAConcatLayer clayer, @@ -2342,16 +2385,24 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer, if (layer->params.find("output_offset") != layer->params.end()) { output_offset = layer->GetParamAsInt("output_offset"); } - gnamem->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, output_offset); + gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, output_offset); } return; } } + // real output should be allocated in separate region. + auto mem_region = REGION_SCRATCH; auto nextLayer = CNNNetCheckNextLayerSkipCertain(layer, 0, 0, true, [](CNNLayerPtr l) { return LayerInfo(l).isNonFunctional(); }).first; // Check that layer will be an output - gnamem->reserve_ptr((LayerInfo(layer).isOutput() || !nextLayer) ? nullptr : layer, ptr, ALIGN64(num_data_bytes_out), 64); + if (LayerInfo(layer).isOutput() || !nextLayer) { + mem_region = REGION_OUTPUTS; + } + if (LayerInfo(layer).isConst()) { + mem_region = REGION_RO; + } + gnamem->getQueue(mem_region)->reserve_ptr(layer, ptr, ALIGN64(num_data_bytes_out), 64); } GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, @@ -2393,12 +2444,12 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, // real allocation pointer will be kept in ptr not in ptr_inputs_global if (!connectTo) { - gnamem->push_value(nullptr, ptr, + gnamem->getQueue(REGION_INPUTS)->push_value(layer, ptr, static_cast(0), num_data_bytes_in, 64); } else { - gnamem->push_value(nullptr, &inputs_ptr_->at(prevLayer->name).ptrs.front(), + gnamem->getQueue(REGION_INPUTS)->push_value(layer, &inputs_ptr_->at(prevLayer->name).ptrs.front(), static_cast(0), num_data_bytes_in, 64); @@ -2414,9 +2465,11 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, } if (connectTo) { - gnamem->bind_ptr(nullptr, ptr, &inputs_ptr_->at(prevLayer->name).ptrs.front(), offset, ALIGN(num_data_bytes_in, 64)); + gnamem->getQueue(REGION_AUTO) + ->bind_ptr(layer, ptr, &inputs_ptr_->at(prevLayer->name).ptrs.front(), offset, ALIGN(num_data_bytes_in, 64)); } else { - gnamem->bind_ptr(nullptr, &inputs_ptr_->at(prevLayer->name).ptrs.front(), ptr, offset, ALIGN(num_data_bytes_in, 64)); + gnamem->getQueue(REGION_AUTO) + ->bind_ptr(layer, &inputs_ptr_->at(prevLayer->name).ptrs.front(), ptr, offset, ALIGN(num_data_bytes_in, 64)); } return prevLayer; @@ -2424,9 +2477,9 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, // const input if (LayerInfo(prevLayer).isConst()) { if (connectTo) { - gnamem->bind_ptr(layer, ptr, const_connections[prevLayer->name], offset); + gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, ptr, const_connections[prevLayer->name], offset); } else { - gnamem->bind_ptr(layer, const_connections[prevLayer->name], ptr, offset); + gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, const_connections[prevLayer->name], ptr, offset); } return prevLayer; @@ -2455,7 +2508,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, gnalog() << "Connecting " << splitName << " input \n"; // splitting layer should take the execution order from the connected layer splittingLayer->userValue = layer->userValue; - auto res = connectInput(splittingLayer, ptr, splitLayerInfoItem.reserved_size, it->offset + offset, 0); + auto res = connectInput(splittingLayer, ptr, std::max(splitLayerInfoItem.reserved_size, num_data_bytes_in), it->offset + offset, 0); gnalog() << "Connected \n"; return res; } @@ -2467,7 +2520,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, if (concatLayerInfo != concat_connection.end()) { auto & concatLayerInfoItem = concatLayerInfo->second; // dnnLayer that is input for concat layer - gnamem->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, offset); + gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, offset, num_data_bytes_in); // return layer over concat return CNNNetPrevLayer(prevLayer); } @@ -2476,7 +2529,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, prevLayer->name); if (cropLayerInfo != crop_connection.end()) { auto & cropLayerInfoItem = cropLayerInfo->second; - gnamem->bind_ptr(layer, ptr, &cropLayerInfoItem.gna_ptr, offset); + gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, ptr, &cropLayerInfoItem.gna_ptr, offset); return CNNNetPrevLayer(prevLayer); } } @@ -2484,7 +2537,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, // check for generic prev layer if (prevDnnLayer != nullptr) { - gnamem->bind_ptr(layer, ptr, &prevDnnLayer->ptr_outputs, offset, num_data_bytes_in); + gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, ptr, &prevDnnLayer->ptr_outputs, offset, num_data_bytes_in); return prevLayer; } @@ -2502,20 +2555,20 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, // connectTo used for indicate that memory layer should be bound to given buffer if (connectTo) { memorySize = std::max(memorySize, num_data_bytes_in); - gnamem->reserve_ptr(nullptr, &memoryLayer.gna_ptr, ALIGN64(memorySize), 64); - gnamem->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset); + gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &memoryLayer.gna_ptr, ALIGN64(memorySize), 64); + gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset); } else { if (num_data_bytes_in < memorySize + offset) { THROW_GNA_LAYER_EXCEPTION(layer) <<" invalid allocation request of " << num_data_bytes_in << " is more then state tensor size of: " << memorySize + offset; } - gnamem->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset); + gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset, ALIGN64(num_data_bytes_in)); } memoryLayer.reserved_size = ALIGN64(memorySize); } else { // We may need to extend memory buffer if connected input size is bigger, for example for concat connection - gnamem->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in)); + gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in)); } return prevLayer; diff --git a/src/plugins/intel_gna/gna_graph_compiler.hpp b/src/plugins/intel_gna/gna_graph_compiler.hpp index 88db82136dffda..ba22d40b640ec9 100644 --- a/src/plugins/intel_gna/gna_graph_compiler.hpp +++ b/src/plugins/intel_gna/gna_graph_compiler.hpp @@ -16,7 +16,6 @@ #include "descriptions/gna_flags.hpp" #include "connection_details.hpp" #include "backend/dnn.hpp" -#include "memory/polymorph_allocator.hpp" #include "memory/gna_memory.hpp" #include "layers/gna_memory_layer.hpp" #include "layers/gna_concat_layer.hpp" @@ -50,7 +49,7 @@ class GNAGraphCompiler { static void assertConvolutionLayoutProper(const InferenceEngine::DataPtr&); std::vector static transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols); - static const GNALimitations::Cnn2D::Validator cnn2dValidator; + std::unique_ptr cnn2dValidator; public: GNAPluginNS::backend::DnnComponents dnnComponents; @@ -69,6 +68,18 @@ class GNAGraphCompiler { void fillConcatConnections(InferenceEngine::CNNLayerPtr layer); void fillSplitConnections(InferenceEngine::CNNLayerPtr layer); + + void ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth, + const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN, + const uint32_t strideH, const uint32_t strideW, OvGnaType inPrecision, + const uint32_t dilH, const uint32_t dilW) const; + + void ValidatePooling2D(std::string name, + const uint32_t windowH, const uint32_t windowW, + const uint32_t strideH, const uint32_t strideW) const; + + void SetValidatorTarget(std::string target); + /** * Connects either memory output, or generic output to a layer * @param layer - layer pointer diff --git a/src/plugins/intel_gna/gna_lib_ver_selector.hpp b/src/plugins/intel_gna/gna_lib_ver_selector.hpp index cafa3264238c4f..8d7147f73bca9c 100644 --- a/src/plugins/intel_gna/gna_lib_ver_selector.hpp +++ b/src/plugins/intel_gna/gna_lib_ver_selector.hpp @@ -27,3 +27,13 @@ * Used for calculating memory sizes of GNA data arrays */ #define ALIGN64(number) ALIGN(number, 64) + +namespace GNAPluginNS { +namespace tools { +template +std::unique_ptr make_unique(Args&&... args) { + return std::unique_ptr(new T(std::forward(args)...)); +} +} // namespace tools + +} // namespace GNAPluginNS diff --git a/src/plugins/intel_gna/gna_model_serial.cpp b/src/plugins/intel_gna/gna_model_serial.cpp index a9126426b121dd..07a21afd10712c 100644 --- a/src/plugins/intel_gna/gna_model_serial.cpp +++ b/src/plugins/intel_gna/gna_model_serial.cpp @@ -370,36 +370,30 @@ void GNAModelSerial::Import(void *basePointer, } } - // once structure has been read lets read whole gna graph is.read(reinterpret_cast(basePointer), gnaGraphSize); } -void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const { +void GNAModelSerial::Export(const GnaAllocations& allocations, std::ostream& os) const { os.exceptions(std::ostream::failbit); const std::vector layers(gna2model_->Operations, gna2model_->Operations + gna2model_->NumberOfOperations); + const auto gnaGraphSize = allocations.GetSizeForExport(); + const auto& allocationsOrdered = allocations.GetAllocationsInExportOrder(); - // all offsets will be from this pointer - auto getOffsetFromBase = [basePointer, &gnaGraphSize](void * pointer, const char * name = nullptr) { - auto offset = static_cast(std::distance(reinterpret_cast(basePointer), reinterpret_cast(pointer))); - if (offset > gnaGraphSize) { - THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer - << ") not in range segment retuned from GNAAlloc(0x" << basePointer << "-0x" - << reinterpret_cast(reinterpret_cast(basePointer) + gnaGraphSize) << ")"; - } - return offset; - }; - - auto getTensorWithProperOffset = [&getOffsetFromBase](const Gna2Tensor& tensor) { + auto getTensorWithProperOffset = [&allocationsOrdered](const Gna2Tensor& tensor) { Gna2Tensor out = tensor; - out.Data = reinterpret_cast(getOffsetFromBase(tensor.Data)); + const auto found = GnaAllocations::GetOffsetForExport(allocationsOrdered, tensor.Data); + if (!found.first) { + THROW_GNA_EXCEPTION << "Tensor data pointer not found in allocations\n"; + } + out.Data = reinterpret_cast(found.second); return out; }; - auto convert_to_serial = [getOffsetFromBase](const GNAPluginNS::GnaDesc &desc) { + auto convert_to_serial = [&allocationsOrdered](const GNAPluginNS::GnaDesc& desc) { HeaderLatest::RuntimeEndPoint ep; ep.elements_count = desc.num_elements; ep.scaleFactor = desc.scale_factor; @@ -408,7 +402,11 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea ep.precision = desc.model_precision; ep.orientation = desc.orientation; ep.tensor_names_count = static_cast(desc.tensor_names.size()); - ep.descriptor_offset = offsetFromBase(*desc.ptrs.begin()); + const auto found = GnaAllocations::GetOffsetForExport(allocationsOrdered, *desc.ptrs.begin()); + if (!found.first) { + THROW_GNA_EXCEPTION << "Endpoint data pointer not found in allocations\n"; + } + ep.descriptor_offset = found.second; // shape ep.shape.NumberOfDimensions = desc.dims.size(); for (size_t i=0; i < ep.shape.NumberOfDimensions; ++i) { @@ -519,7 +517,11 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea std::string name; float scale_factor = 1.0f; std::tie(gna_ptr, reserved_size, name, scale_factor) = state; - writeBits(offsetFromBase(gna_ptr), os); + const auto found = GnaAllocations::GetOffsetForExport(allocationsOrdered, gna_ptr); + if (!found.first) { + THROW_GNA_EXCEPTION << "State data pointer not found in allocations\n"; + } + writeBits(found.second, os); writeBits(reserved_size, os); const auto nameSize = strlen(name.c_str()) + 1; writeBits(static_cast(nameSize), os); @@ -527,8 +529,10 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea writeBits(scale_factor, os); } - // once structure has been written lets push gna graph - os.write(reinterpret_cast(basePointer), gnaGraphSize); + // once structure has been written let's push gna graph memory + for (const auto& a : allocationsOrdered) { + os.write(reinterpret_cast(a.ptr), a.sizeForExport()); + } } void GNAModelSerial::ImportInputs(std::istream &is, void* basePtr, GNAPluginNS::GnaInputs &inputs) { diff --git a/src/plugins/intel_gna/gna_model_serial.hpp b/src/plugins/intel_gna/gna_model_serial.hpp index 50fc858919a4d4..76022b12fa2e8f 100644 --- a/src/plugins/intel_gna/gna_model_serial.hpp +++ b/src/plugins/intel_gna/gna_model_serial.hpp @@ -14,6 +14,7 @@ #include "serial/headers/latest/gna_model_header.hpp" #include "gna2-model-api.h" +#include "gna_device_allocation.hpp" /** * @brief implements serialization tasks for GNAGraph @@ -100,11 +101,9 @@ class GNAModelSerial { /** * save gna graph to an outpus stream - * @param basePtr - * @param gnaGraphSize + * @param allocations * @param os */ - void Export(void *basePtr, - size_t gnaGraphSize, + void Export(const GnaAllocations& allocations, std::ostream &os) const; }; diff --git a/src/plugins/intel_gna/gna_plugin.cpp b/src/plugins/intel_gna/gna_plugin.cpp index b128d6cc9de036..85552736a12bdc 100644 --- a/src/plugins/intel_gna/gna_plugin.cpp +++ b/src/plugins/intel_gna/gna_plugin.cpp @@ -87,6 +87,7 @@ #include #include +#include inline uint32_t ToByteSize(const Gna2DataType type) { switch (type) { @@ -112,6 +113,7 @@ constexpr uint32_t GNAPluginNS::GNAPlugin::FAKE_REQUEST_CONFIG_ID; using namespace InferenceEngine; using namespace std; using namespace GNAPluginNS; +using namespace GNAPluginNS::memory; using namespace InferenceEngine::details; namespace InferenceEngine { @@ -329,6 +331,15 @@ GNAPlugin::GNAPlugin() { InitGNADevice(); } +std::string GNAPluginNS::GNAPlugin::GetCompileTarget() const { + if (gnadevice) { + return gnadevice->GetCompileTarget(); + } else if (!config.gnaCompileTarget.empty()) { + return config.gnaCompileTarget; + } + return InferenceEngine::GNAConfigParams::GNA_TARGET_3_0; +} + GNAPlugin::GNAPlugin(const std::map& configMap) { Init(); SetConfig(configMap); @@ -350,16 +361,16 @@ void GNAPlugin::Init() { void GNAPlugin::InitGNADevice() { OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNADevice"); if (gnaFlags->sw_fp32) { - gnamem.reset(new gna_memory_type(memory::make_polymorph>())); + gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{})); } else { gnadevice = std::make_shared(config.gnaExecTarget, config.gnaCompileTarget, config.swExactMode, gnaFlags->performance_counting, !config.dumpXNNPath.empty(), - GetDeviceVersionFromString(config.dumpXNNGeneration)); + GetDeviceVersionFromString(config.gnaCompileTarget)); size_t page_size_bytes = 4096; - gnamem = std::make_shared(memory::make_polymorph(gnadevice), page_size_bytes); + gnamem = std::make_shared(memory::GNAAllocator(gnadevice), page_size_bytes); } graphCompiler.setGNAMemoryPtr(gnamem); } @@ -495,7 +506,7 @@ bool GNAPlugin::TryToInitOutput(const std::string &portName, InferenceEngine::CN outputs_.at(portName).num_elements = numElem; // binding ptr for first infer request - then others will be setup during relocation - gnamem->bind_ptr(layer, &outputs_.at(portName).ptrs.front(), outputPtr); + gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, &outputs_.at(portName).ptrs.front(), outputPtr); }; // probing gna_primitives @@ -645,7 +656,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { std::string effectiveGnaCompileTarget = config.gnaCompileTarget; if (gnadevice) { - effectiveGnaCompileTarget = gnadevice->getEffectiveGnaCompileTarget(); + effectiveGnaCompileTarget = gnadevice->GetCompileTarget(); } bool isNgraphPassesUsed = false; @@ -904,6 +915,8 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { gnaFlags->num_requests = 1; } + graphCompiler.SetValidatorTarget(GetCompileTarget()); + // keep inputs information and create input primitives inputs_data_map_ = newNet.getInputsInfo(); if (inputs_data_map_.empty()) { @@ -975,20 +988,21 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { // TODO: how active list will work in multioutput case // make room for active list - gnamem->reserve_ptr(nullptr, nullptr, ALIGN64(outputs_.Get().begin()->get_required_size()), 64); + gnamem->getQueue(REGION_OUTPUTS)->reserve_ptr(nullptr, nullptr, ALIGN64(outputs_.Get().begin()->get_required_size()), 64); void *pParallelExecutionData = nullptr; // reserving more bytes for intermediate data in parallel case - TODO: this works incorrectly in compact mode at lest - rwSegmentSize = gnamem->getRWBytes(); + rwSegmentSize = gnamem->getRegionBytes(REGION_SCRATCH); + rwSegmentSize += gnamem->getRegionBytes(REGION_INPUTS); + rwSegmentSize += gnamem->getRegionBytes(REGION_OUTPUTS); if (gnaFlags->num_requests > 1) { - gnamem->reserve_ptr(nullptr, &pParallelExecutionData, gnamem->getRWBytes() * (gnaFlags->num_requests - 1), 64); + gnamem->getQueue(REGION_SCRATCH)->reserve_ptr(nullptr, &pParallelExecutionData, rwSegmentSize * (gnaFlags->num_requests - 1), 64); } gnamem->commit(gnaFlags->compact_mode); - dnn->Init(gnamem->getBasePtr(), - gnamem->getTotalBytes(), + dnn->Init(gnamem.get(), gnaFlags->sw_fp32 ? kDnnFloat : kDnnInt, 1); @@ -1020,8 +1034,11 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { if (ptr_in == nullptr) { ptr_out = nullptr; } else { - auto offset = reinterpret_cast(ptr_in) - reinterpret_cast(gnamem->getBasePtr()); - ptr_out = basePtr + offset; + const auto found = gnamem->getOffsetForMerged(ptr_in); + if (!found.first) { + THROW_GNA_EXCEPTION << "Relocation offset for parallel infer requests was not found\n"; + } + ptr_out = basePtr + found.second; } }; @@ -1105,7 +1122,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { {TranspositionInfo{dnn->do_rotate_input, dnn->num_rotate_rows, dnn->num_rotate_columns}}}); } } - DumpXNNToFile(); #ifdef PLOT @@ -1128,9 +1144,10 @@ void GNAPlugin::createRequestConfigsForGnaModels() { } int GNAPlugin::GetDeviceVersionFromString(const std::string deviceString) { - if (deviceString.empty()) + if (deviceString.empty() || deviceString == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) { return static_cast(Gna2DeviceVersionEmbedded1_0); - THROW_GNA_EXCEPTION << "Wrong GNA generation for embedded model dump: " << deviceString; + } + return static_cast(Gna2DeviceVersionEmbedded3_5); } void GNAPlugin::DumpXNNToFile() const { @@ -1146,12 +1163,28 @@ void GNAPlugin::DumpXNNToFile() const { std::ofstream dumpStream(config.dumpXNNPath, std::ios::out | std::ios::binary); auto const modelId = gnadevice->createModel(std::get<0>(gnaModels.front())->obj); - auto dump = gnadevice->dumpXnn(modelId); - dump.header.RwRegionSize = gnamem->getRWBytes(); - dump.header.InputScalingFactor = inputs_ptr_->Get().begin()->scale_factor; - dump.header.OutputScalingFactor = outputs_.Get().begin()->scale_factor; - dumpStream.write(reinterpret_cast(&dump.header), sizeof(Gna2ModelSueCreekHeader)); - dumpStream.write(reinterpret_cast(dump.model.get()), dump.header.ModelSize); + const auto& inputsDesc = inputs_ptr_->Get(); + const auto& outputsDesc = outputs_.Get(); + + if (InferenceEngine::GNAConfigParams::GNA_TARGET_2_0 == gnadevice->getEffectiveGnaCompileTarget()) { + auto dump = gnadevice->dumpXnn(modelId); + dump.header.RwRegionSize = gnamem->getRegionBytes(REGION_SCRATCH); + dump.header.InputScalingFactor = inputsDesc.begin()->scale_factor; + dump.header.OutputScalingFactor = outputsDesc.begin()->scale_factor; + dumpStream.write(reinterpret_cast(&dump.header), sizeof(Gna2ModelSueCreekHeader)); + dumpStream.write(reinterpret_cast(dump.model.get()), dump.header.ModelSize); + } else { + uint32_t input_size = 0; + uint32_t output_size = 0; + for (auto i : inputsDesc) + input_size += i.get_allocated_size(); + for (auto o : outputsDesc) + output_size += o.get_required_size(); + auto inSF = inputsDesc.begin()->scale_factor; + auto outSF = outputsDesc.front().scale_factor; + gnadevice->dumpTLVForDeviceVersion(modelId, dumpStream, + input_size, output_size, inSF, outSF); + } gnadevice->releaseModel(modelId); } @@ -1366,7 +1399,10 @@ GnaWaitStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) { FILE* f = nullptr; static int num_infers = 0; { - f = fopen("ex_scores.txt", "w"); + f = std::fopen("ex_scores.txt", "w"); + if (!f) { + THROW_GNA_EXCEPTION << "ex_scores.txt opening failed"; + } } num_infers++; if (f) { @@ -1508,7 +1544,9 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i auto header = GNAModelSerial::ReadHeader(networkModel); void *basePtr = nullptr; - gnamem->reserve_ptr(nullptr, &basePtr, header.gnaMemSize); + + gnamem->getQueue(REGION_SCRATCH)->reserve_ptr(nullptr, &basePtr, header.gnaMemSize); + gnamem->commit(); gnaModels.push_back(std::make_tuple(make_shared>(header.layersCount))); GNAModelSerial::MemoryType mt; @@ -1607,7 +1645,7 @@ void GNAPlugin::Export(std::ostream &outStream) { serial.AddState(memoryConnection.second.gna_ptr, memoryConnection.second.reserved_size, memoryConnection.first, state->GetScaleFactor()); } - serial.Export(gnamem->getBasePtr(), gnamem->getTotalBytes(), outStream); + serial.Export(gnadevice->getAllAllocations(), outStream); } std::map GNAPlugin::GetPerformanceCounts() { diff --git a/src/plugins/intel_gna/gna_plugin.hpp b/src/plugins/intel_gna/gna_plugin.hpp index edf85fc45d36b4..2c69b4aacfb57d 100644 --- a/src/plugins/intel_gna/gna_plugin.hpp +++ b/src/plugins/intel_gna/gna_plugin.hpp @@ -70,6 +70,7 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin { std::vector memoryStates; bool trivialTopology = false; + std::string GetCompileTarget() const; public: explicit GNAPlugin(const std::map& configMap); diff --git a/src/plugins/intel_gna/gna_plugin_log.hpp b/src/plugins/intel_gna/gna_plugin_log.hpp index 7a66e08f8c213a..cfb911ddb0e2a1 100644 --- a/src/plugins/intel_gna/gna_plugin_log.hpp +++ b/src/plugins/intel_gna/gna_plugin_log.hpp @@ -13,11 +13,11 @@ /** * @brief used for creating graphviz charts, and layers dump */ -# define PLOT -# define MODEL_DUMP -# define GNA_HEAP_PROFILER -# define gnalog() std::cout -# define gnawarn() std::cerr +#define PLOT +#define GNA_HEAP_PROFILER +#define MODEL_DUMP +#define gnalog() std::cout +#define gnawarn() std::cerr #else #ifdef VERBOSE diff --git a/src/plugins/intel_gna/memory/gna_allocator.hpp b/src/plugins/intel_gna/memory/gna_allocator.hpp index e5ad087743e83c..2d83073775458f 100644 --- a/src/plugins/intel_gna/memory/gna_allocator.hpp +++ b/src/plugins/intel_gna/memory/gna_allocator.hpp @@ -10,7 +10,7 @@ #include #include "gna_device.hpp" -#include "polymorph_allocator.hpp" +#include "memory/gna_mem_requests.hpp" namespace GNAPluginNS { namespace memory { @@ -36,6 +36,9 @@ class GNAAllocator { void deallocate(uint8_t *p, std::size_t n) { _device->free(p); } + void setTag(void* memPtr, GNAPluginNS::memory::rRegion tagValue) { + _device->tagMemoryRegion(memPtr, tagValue); + } }; } // namespace memory } // namespace GNAPluginNS diff --git a/src/plugins/intel_gna/memory/gna_mem_regions.hpp b/src/plugins/intel_gna/memory/gna_mem_regions.hpp new file mode 100644 index 00000000000000..7362e2241b8697 --- /dev/null +++ b/src/plugins/intel_gna/memory/gna_mem_regions.hpp @@ -0,0 +1,48 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace GNAPluginNS { +namespace memory { + +/** + * @brief Logical region of model memory. + * Needed for models for embedded GNA + * When model is exported for non-embedded uses its memory is exported following the enum value order + */ +enum rRegion { + REGION_INPUTS = 0x0, + REGION_OUTPUTS = 0x1, + REGION_SCRATCH = 0x10, + REGION_STATES = 0x100, + REGION_RO = 0x1000, + REGION_AUTO = 0x10000, +}; + +inline std::map GetAllRegionsToStrMap() { + return { + {REGION_INPUTS, "REGION_INPUTS"}, + {REGION_OUTPUTS, "REGION_OUTPUTS"}, + {REGION_SCRATCH, "REGION_SCRATCH"}, + {REGION_STATES, "REGION_STATES"}, + {REGION_RO, "REGION_RO"}, + {REGION_AUTO, "REGION_AUTO"} + }; +} + +inline std::string rRegionToStr(const rRegion region) { + const auto& map = GetAllRegionsToStrMap(); + const auto found = map.find(region); + if (found == map.end()) { + return "UNKNOWN"; + } + return found->second; +} + +} // namespace memory +} // namespace GNAPluginNS diff --git a/src/plugins/intel_gna/memory/gna_mem_requests.hpp b/src/plugins/intel_gna/memory/gna_mem_requests.hpp index 0f3626e5943c68..de07d4c5630a5b 100644 --- a/src/plugins/intel_gna/memory/gna_mem_requests.hpp +++ b/src/plugins/intel_gna/memory/gna_mem_requests.hpp @@ -9,6 +9,7 @@ #include #include "gna_plugin_log.hpp" +#include "gna_mem_regions.hpp" namespace GNAPluginNS { namespace memory { @@ -19,31 +20,8 @@ enum rType : uint8_t { REQUEST_BIND = 0x4, REQUEST_INITIALIZER = 0x8, }; -/** - * @brief region of firmware data - */ -enum rRegion { - REGION_RO, - REGION_RW, - REGION_AUTO, -}; #ifdef GNA_HEAP_PROFILER -inline const char* rRegionToStr(uint8_t region) { - const char* strRegion = "UNKNOWN"; - switch (region) { - case REGION_RO: - strRegion = "REGION_RO"; - break; - case REGION_RW: - strRegion = "REGION_RW"; - break; - case REGION_AUTO: - strRegion = "REGION_AUTO"; - break; - } - return strRegion; -} inline const char* rTypeToStr(uint8_t type) { const char* strType = "UNKNOWN"; @@ -65,6 +43,7 @@ inline const char* rTypeToStr(uint8_t type) { } return strType; } + #endif struct MemRequest { diff --git a/src/plugins/intel_gna/memory/gna_mem_requests_queue.hpp b/src/plugins/intel_gna/memory/gna_mem_requests_queue.hpp index 54a9ce0c4339a3..85163fd03257c9 100644 --- a/src/plugins/intel_gna/memory/gna_mem_requests_queue.hpp +++ b/src/plugins/intel_gna/memory/gna_mem_requests_queue.hpp @@ -8,10 +8,14 @@ #include #include #include +#include #include #include +#include "gna_plugin_log.hpp" #include "gna_mem_requests.hpp" +#include "gna_lib_ver_selector.hpp" +#include "memory_solver.hpp" namespace GNAPluginNS { namespace memory { @@ -30,8 +34,16 @@ inline uint16_t getCNNLayerId(InferenceEngine::CNNLayerPtr layer) { */ class GNAMemRequestsQueue { public: + explicit GNAMemRequestsQueue(rRegion region) : _region_type(region) { + } virtual ~GNAMemRequestsQueue() {} + rRegion _region_type; + size_t _size = 0; + std::vector _mem_requests; + std::list> _local_storage; + std::shared_ptr _basePtr = nullptr; + /** * @brief register initialiser to access memory once it is actually allocated * @param ptr_out @@ -146,9 +158,128 @@ class GNAMemRequestsQueue { /** * @brief interface for actual queue storage */ - virtual rRegion regionType() const = 0; - virtual std::vector & futureHeap() = 0; - virtual std::list> &localStorage() = 0; + rRegion regionType() const { + return _region_type; + } + + std::vector & futureHeap() { + return _mem_requests; + } + + std::list> &localStorage() { + return _local_storage; + } + + virtual size_t calcSize(bool isCompact = false) { + _size = 0; + for (auto &re : _mem_requests) { + if (re._type == REQUEST_BIND || re._ptr_out == nullptr) continue; + _size += ALIGN(re._num_elements * re._element_size + re._padding, re._alignment); + } + return _size; + } + + size_t getSize() const { + return _size; + } + + void *getBasePtr() const { + return _basePtr.get(); + } + + std::pair getOffset(void * ptr) const { + auto ptrBegin = static_cast(getBasePtr()); + auto size = getSize(); + if (ptr >= ptrBegin && ptr < ptrBegin + size) { + auto curOffset = static_cast(ptr) - ptrBegin; + return {true, curOffset}; + } + return {false, 0}; + } + + template + void iterate_binded(GNAPluginNS::memory::MemRequest & reference, const T & visitor) { + for (auto &re : _mem_requests) { + if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) { + // std::cout << " [binded=" << re._type << ", ptr=" << re._ptr_out <<"]\n"; + visitor(reference, re); + // primitive loop check + if (re._ptr_in == re._ptr_out) continue; + // TODO: no circular dependency checking, only tree-style dependency with loops supported + iterate_binded(re, visitor); + } + } + } +}; + +class GNAMemRequestsInputsQueue : public GNAMemRequestsQueue { +public: + explicit GNAMemRequestsInputsQueue() : GNAMemRequestsQueue(REGION_INPUTS) { + } +}; + +class GNAMemRequestsOutputsQueue : public GNAMemRequestsQueue { +public: + explicit GNAMemRequestsOutputsQueue() : GNAMemRequestsQueue(REGION_OUTPUTS) { + } +}; + +class GNAMemRequestsScratchQueue : public GNAMemRequestsQueue { +public: + explicit GNAMemRequestsScratchQueue() : GNAMemRequestsQueue(REGION_SCRATCH) { + } + /** + * @brief optimize memory region by reusing buffers + */ + size_t calcSize(bool isCompact = false) override { + if (isCompact) { + _size = 0; + std::vector boxes; + for (size_t i = 0; i < _mem_requests.size(); ++i) { + // skipping BIND, cross-region and empty requests + if (_mem_requests[i]._type & REQUEST_BIND || _mem_requests[i]._ptr_out == nullptr) { + continue; + } + + auto original_with_pad = ALIGN(_mem_requests[i]._num_elements * _mem_requests[i]._element_size + _mem_requests[i]._padding, + _mem_requests[i]._alignment); + int start = _mem_requests[i]._life_limits.first; + int stop = _mem_requests[i]._life_limits.second; + + boxes.push_back({start, stop, static_cast(original_with_pad), static_cast(i)}); + } + + MemorySolver memSolver(boxes); + _size = memSolver.solve(); + + // setting offsets + for (auto const & box : boxes) { + _mem_requests[box.id]._offset = memSolver.getOffset(box.id); + } + return _size; + } else { + return GNAMemRequestsQueue::calcSize(isCompact); + } + } }; + +class GNAMemRequestsReadOnlyQueue : public GNAMemRequestsQueue { +public: + explicit GNAMemRequestsReadOnlyQueue() : GNAMemRequestsQueue(REGION_RO) { + } +}; + +class GNAMemRequestsStatesQueue : public GNAMemRequestsQueue { +public: + explicit GNAMemRequestsStatesQueue() : GNAMemRequestsQueue(REGION_STATES) { + } +}; + +class GNAMemRequestsBindingsQueue : public GNAMemRequestsQueue { +public: + explicit GNAMemRequestsBindingsQueue() : GNAMemRequestsQueue(REGION_AUTO) { + } +}; + } // namespace memory } // namespace GNAPluginNS diff --git a/src/plugins/intel_gna/memory/gna_memory.hpp b/src/plugins/intel_gna/memory/gna_memory.hpp index 6213a507515e9d..a7853c0dfa22c3 100644 --- a/src/plugins/intel_gna/memory/gna_memory.hpp +++ b/src/plugins/intel_gna/memory/gna_memory.hpp @@ -14,63 +14,75 @@ #include #include #include +#include +#include #include "gna_lib_ver_selector.hpp" #include "memory_solver.hpp" +#include "gna_allocator.hpp" #include "gna_plugin_log.hpp" #include "memory/gna_allocator.hpp" #ifdef GNA_HEAP_PROFILER #include -#include #endif namespace GNAPluginNS { namespace memory { + +class GNAFloatAllocator : public std::allocator < uint8_t > { + public: + void setTag(void*, GNAPluginNS::memory::rRegion) { + } +}; + +class GNAMemoryInterface { +public: + virtual GNAMemRequestsQueue* getQueue(rRegion region) = 0; + virtual GNAMemRequestsQueue* getQueue(void* ptr) = 0; + virtual void commit(bool isCompact = false) = 0; + virtual std::pair getOffsetForMerged(void* ptr) = 0; + virtual size_t getRegionBytes(rRegion region) = 0; + virtual ~GNAMemoryInterface() = default; +}; + /** * @brief encapsulate various request to allocate GNA specific memory, * in order to issue single allocation call and configure actual pointers in requests * @tparam Allocator - a GNAAllocator in case of actual HW offloads */ -template> -class GNAMemory : public GNAMemRequestsQueue { +template +class GNAMemory : public GNAMemoryInterface { protected: - std::vector _future_heap; - std::list> _local_storage; + std::map> _mem_queues; size_t _total = 0; - size_t _rw_section_size = 0; - size_t _ro_section_size = 0; Allocator _allocator; - std::shared_ptr heap = nullptr; size_t _page_alignment = 1; bool _is_compact_mode = false; - class GNAMemRequestsReadOnlyQueue : public GNAMemRequestsQueue { - std::reference_wrapper _that; - public: - explicit GNAMemRequestsReadOnlyQueue(GNAMemory & that) : _that(that) { - } - rRegion regionType() const override { - return REGION_RO; - }; - std::vector & futureHeap() override { - return _that.get().futureHeap(); - } - std::list> &localStorage() override { - return _that.get().localStorage(); - } - }; - - GNAMemRequestsReadOnlyQueue readOnlyFrontEnd; + private: + void initMemQueses() { + _mem_queues[REGION_RO] = tools::make_unique(); + _mem_queues[REGION_INPUTS] = tools::make_unique (); + _mem_queues[REGION_OUTPUTS] = tools::make_unique (); + _mem_queues[REGION_SCRATCH] = tools::make_unique (); + _mem_queues[REGION_STATES] = tools::make_unique (); + _mem_queues[REGION_AUTO] = tools::make_unique (); + } public: explicit GNAMemory(size_t pageAlignment = 1) - : readOnlyFrontEnd(*this), _page_alignment(pageAlignment) {} + : _page_alignment(pageAlignment) { + initMemQueses(); + } explicit GNAMemory(const Allocator &a, size_t pageAlignment = 1) - : _allocator(a), readOnlyFrontEnd(*this), _page_alignment(pageAlignment) {} + : _allocator(a), _page_alignment(pageAlignment) { + initMemQueses(); + } - GNAMemRequestsQueue & readonly() { - return readOnlyFrontEnd; + virtual ~GNAMemory() { + // we have to deallocate regions before _allocator is destoyed + _mem_queues.clear(); } /** @@ -83,52 +95,67 @@ class GNAMemory : public GNAMemRequestsQueue { /** * @brief calculates size required for all requests, allocates memory and updates pointers */ - void commit(bool isCompact = false) { + void commit(bool isCompact = false) override { setCompactMode(isCompact); - // 1st stage -- looking for expandable bind requests: - expandBindings(); - - // 2nd stage -- setup offsets: - setRegionOffsets(REGION_RO); - setRegionOffsets(REGION_RW); - - // 3rd stage -- allocation total memory setting to 0 internally - heap = allocate(getTotalBytes()); - - // 4th stage -- store data and updates pointers - allocateRegion(REGION_RW, 0); - allocateRegion(REGION_RO, _rw_section_size); + for (const auto &queue : _mem_queues) { + // 1st stage -- looking for expandable bind requests: + expandBindings(queue.second.get()); + + // 2nd stage -- setup offsets: + setRegionOffsets(queue.second.get()); + + if (queue.second->calcSize(_is_compact_mode) != 0) { + // 3rd stage -- allocation total memory setting to 0 internally + queue.second->_basePtr = allocate(ALIGN(queue.second->getSize(), _page_alignment)); + gnalog() << rRegionToStr(queue.second->_region_type) << "(" << static_cast(queue.second->_basePtr.get()) << ")" + << " allocated: " << ALIGN(queue.second->getSize(), _page_alignment) << std::endl; + // 4th stage -- setting proper GNA memory region tag for embedded TLV export + _allocator.setTag(queue.second->getBasePtr(), queue.first); + // 5th stage -- store data and updates pointers + allocateRegion(queue.second.get()); + } + } +#ifdef GNA_HEAP_PROFILER + memoryDump(); +#endif } - void *getBasePtr() { - return heap.get(); + GNAMemRequestsQueue *getQueue(rRegion region) override { + return _mem_queues[region].get(); } - size_t getRWBytes() { - updateSectionsSizes(); - return _rw_section_size; + GNAMemRequestsQueue* getQueue(void* ptr) override { + for (auto& queuePair : _mem_queues) { + const auto offset = queuePair.second->getOffset(ptr); + if (offset.first) { + return queuePair.second.get(); + } + } + return nullptr; } - size_t getTotalBytes() { - updateSectionsSizes(); - return _total; + std::pair getOffsetForMerged(void * ptr) override { + uint32_t curOffset = 0; + for (auto& queuePair : _mem_queues) { + const auto offset = queuePair.second->getOffset(ptr); + if (offset.first) { + curOffset += offset.second; + return {true, curOffset}; + } + const auto size = queuePair.second->getSize(); + curOffset += ALIGN64(size); + } + return {false, 0}; } - protected: - rRegion regionType() const override { - return REGION_RW; - }; - std::vector & futureHeap() override { - return _future_heap; - } - std::list> &localStorage() override { - return _local_storage; + size_t getRegionBytes(rRegion region) override { + return ALIGN(getQueue(region)->calcSize(), _page_alignment); } template void iterate_binded(GNAPluginNS::memory::MemRequest & reference, const T & visitor) { - for (auto &re : _future_heap) { + for (auto &re : getQueue(REGION_AUTO)->_mem_requests) { if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) { // std::cout << " [binded=" << rTypeToStr(re._type) << ", ptr=" << re._ptr_out <<"]\n"; visitor(reference, re); @@ -138,22 +165,26 @@ class GNAMemory : public GNAMemRequestsQueue { iterate_binded(re, visitor); } } +#ifdef GNA_HEAP_PROFILER + memoryDump(); +#endif } + protected: std::shared_ptr allocate(size_t bytes) { - std::shared_ptr sp(_allocator.allocate(bytes), [=](uint8_t *p) { - _allocator.deallocate(p, bytes); + Allocator nA = _allocator; + std::shared_ptr sp(_allocator.allocate(bytes), [nA, bytes](uint8_t* p) mutable { + nA.deallocate(p, bytes); }); std::fill(sp.get(), sp.get() + bytes, 0); return sp; } - protected: /** * @brief expand BIND and (BIND | ) requests. Align size(_padding), set execution order */ - void expandBindings() { - for (auto &originated : _future_heap) { + void expandBindings(GNAMemRequestsQueue *mRequests) { + for (auto &originated : mRequests->_mem_requests) { // skipping bind requests to avoid duplications if (originated._type & REQUEST_BIND) continue; @@ -179,11 +210,10 @@ class GNAMemory : public GNAMemRequestsQueue { /** * @brief set offsets for specific region */ - size_t setRegionOffsets(GNAPluginNS::memory::rRegion regType) { + size_t setRegionOffsets(GNAMemRequestsQueue* mRequests) { size_t region_offset = 0; - for (auto &re : _future_heap) { - if (re._region != regType || re._type & REQUEST_BIND || re._ptr_out == nullptr) continue; - + for (auto& re : mRequests->_mem_requests) { + if (re._type & REQUEST_BIND || re._ptr_out == nullptr) continue; re._offset = region_offset; region_offset += ALIGN(re._num_elements * re._element_size + re._padding, re._alignment); } @@ -193,15 +223,14 @@ class GNAMemory : public GNAMemRequestsQueue { /** * @brief allocates memory and updates pointers */ - void allocateRegion(GNAPluginNS::memory::rRegion regType, size_t baseOffset) { - for (auto &re : _future_heap) { + void allocateRegion(GNAMemRequestsQueue *mRequests) { + size_t r_size = ALIGN(mRequests->getSize(), _page_alignment); + for (auto &re : mRequests->_mem_requests) { // skipping Bind, crossregion and empty requests - if (re._region != regType || re._type == REQUEST_BIND || re._ptr_out == nullptr) continue; - - size_t offset = baseOffset + re._offset; - auto cptr = heap.get() + offset; - size_t cptr_avail_size = _total - offset; + if (re._type == REQUEST_BIND || re._ptr_out == nullptr) continue; + auto cptr = mRequests->_basePtr.get() + re._offset; + size_t cptr_avail_size = r_size - re._offset; auto sz = re._element_size * re._num_elements; if (re._type & REQUEST_BIND) { cptr = reinterpret_cast(*reinterpret_cast(re._ptr_out)); @@ -216,6 +245,7 @@ class GNAMemory : public GNAMemRequestsQueue { binded._element_size = reference._element_size; }); + gnalog() << static_cast(cptr) << "(" << sz + re._padding << ")" << std::endl; switch (re._type & ~REQUEST_BIND) { case REQUEST_ALLOCATE : break; @@ -238,54 +268,12 @@ class GNAMemory : public GNAMemRequestsQueue { } } - /** - * @brief optimize memory region by reusing buffers - */ - size_t getSectionSizeOptimized(GNAPluginNS::memory::rRegion regType) { - size_t memSize = 0; - switch (regType) { - case REGION_AUTO: - case REGION_RW: - case REGION_RO: { - std::vector boxes; - for (size_t i = 0; i < _future_heap.size(); ++i) { - // skipping BIND, cross-region and empty requests - if (_future_heap[i]._type & REQUEST_BIND || _future_heap[i]._region != regType || _future_heap[i]._ptr_out == nullptr) { - continue; - } - - auto original_with_pad = ALIGN(_future_heap[i]._num_elements * _future_heap[i]._element_size + _future_heap[i]._padding, - _future_heap[i]._alignment); - int start = _future_heap[i]._life_limits.first; - int stop = _future_heap[i]._life_limits.second; - - boxes.push_back({start, stop, static_cast(original_with_pad), static_cast(i)}); - } - MemorySolver memSolver(boxes); - memSize = memSolver.solve(); - - // setting offsets - for (auto const & box : boxes) { - _future_heap[box.id]._offset = memSolver.getOffset(box.id); - } - } - break; - - default: - break; - } - - return memSize; - } - - #ifdef GNA_HEAP_PROFILER - void memoryDump(std::function filter) { - std::ofstream dumpFile("gna_memory_requests.txt", std::ios::out); - - for (auto &re : _future_heap) { - if (filter(re)) continue; - dumpFile << ": " << " region: " << rRegionToStr(re._region) << ", " + void memoryDump() { + for (const auto &queue : _mem_queues) { + std::ofstream dumpFile("gna_memory_requests_" + rRegionToStr(queue.first) + ".txt", std::ios::out); + for (auto &re : queue.second->_mem_requests) { + dumpFile << "region: " << rRegionToStr(re._region) << ", " << "type: " << std::setw(17) << rTypeToStr(re._type) << " " << "ptr_in: " << std::setw(15) << re._ptr_in << " " << "ptr_out: " << std::setw(15) << re._ptr_out << " " @@ -296,45 +284,11 @@ class GNAMemory : public GNAMemRequestsQueue { << std::setw(8) << re._offset << ", " << "life_time: " << re._life_limits.first << ":" << re._life_limits.second << ", " << std::endl; - } - } -#endif - - void updateSectionsSizes() { - // count total size and size of read/write regions - _rw_section_size = 0; - _ro_section_size = 0; -#ifdef GNA_HEAP_PROFILER - memoryDump([](GNAPluginNS::memory::MemRequest & request) { - return false; - }); -#endif - for (auto &re : _future_heap) { - if (re._type & REQUEST_BIND || re._ptr_out == nullptr) continue; - - size_t current = ALIGN(re._num_elements * re._element_size + re._padding, re._alignment); - if (re._region == REGION_RW) { - _rw_section_size += current; - } else { - _ro_section_size += current; } } - - if (_is_compact_mode) { - _rw_section_size = getSectionSizeOptimized(REGION_RW); - } - - gnalog() << "ro_section_size: " << _ro_section_size << std::endl; - gnalog() << "rw_section_size: " << _rw_section_size << std::endl; - gnalog() << "total: " << _total << std::endl; - - _rw_section_size = ALIGN(_rw_section_size, _page_alignment); - _ro_section_size = ALIGN(_ro_section_size, _page_alignment); - _total = _rw_section_size + _ro_section_size; - - gnalog() << "Aligned ro_section_size: " << _ro_section_size << std::endl; - gnalog() << "Aligned rw_section_size: " << _rw_section_size << std::endl; } +#endif }; + } // namespace memory } // namespace GNAPluginNS diff --git a/src/plugins/intel_gna/memory/ipolymorph_allocator.hpp b/src/plugins/intel_gna/memory/ipolymorph_allocator.hpp deleted file mode 100644 index 1cece4cd8252e1..00000000000000 --- a/src/plugins/intel_gna/memory/ipolymorph_allocator.hpp +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -namespace GNAPluginNS { -namespace memory { - -template -class IPolymorphAllocator { -public: - virtual T *allocate(std::size_t n) = 0; - virtual void deallocate(T *p, std::size_t n) = 0; -}; -} // namespace memory -} // namespace GNAPluginNS diff --git a/src/plugins/intel_gna/memory/polymorph_allocator.hpp b/src/plugins/intel_gna/memory/polymorph_allocator.hpp deleted file mode 100644 index fa7c83445ade99..00000000000000 --- a/src/plugins/intel_gna/memory/polymorph_allocator.hpp +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include - -#include "ipolymorph_allocator.hpp" - -namespace GNAPluginNS { -namespace memory { -/** - * @brief c++17 concept simulation - */ - -template -class PolymorphAllocator { - std::shared_ptr> _impl; - public: - explicit PolymorphAllocator(const std::shared_ptr> &impl) : _impl(impl) {} - - T *allocate(std::size_t n) { - return _impl->allocate(n); - } - - void deallocate(T *p, std::size_t n) { - _impl->deallocate(p, n); - } -}; - -/** - * transform any allocator into polymorph type - * @tparam origin - */ - -template -class PolymorphAdapter : public IPolymorphAllocator { - origin _impl; - using T = typename origin::value_type; - - public: - template - explicit PolymorphAdapter(Args &&... args) - :_impl(std::forward(args)...) { - } - T *allocate(std::size_t n) override { - return _impl.allocate(n); - } - void deallocate(T *p, std::size_t n) override { - _impl.deallocate(p, n); - } -}; - -template -inline PolymorphAllocator make_polymorph(Args &&... args) { - auto sp = std::make_shared>(std::forward(args)...); - auto ipoly = std::static_pointer_cast>(sp); - - return PolymorphAllocator(ipoly); -} -} // namespace memory -} // namespace GNAPluginNS diff --git a/src/plugins/intel_gna/transformations/decompose_2d_convolution.cpp b/src/plugins/intel_gna/transformations/decompose_2d_convolution.cpp index 7338b777b8488f..c189f635d8caee 100644 --- a/src/plugins/intel_gna/transformations/decompose_2d_convolution.cpp +++ b/src/plugins/intel_gna/transformations/decompose_2d_convolution.cpp @@ -83,21 +83,28 @@ static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptrget_friendly_name(), - conv_data.input_height, conv_data.input_width, conv_data.input_channel_count, - conv_data.filter_height, conv_data.filter_width, conv_data.filter_channel_count, - conv_data.filter_stride_height, conv_data.filter_stride_width, conv_data.filter_dilation_height, conv_data.filter_dilation_width, - OvGnaTypeIntFromBytes(gnaPrecision.size()), false) && - (!graph_data.max_pool || cnn2dValidator.ValidatePooling2D(graph_data.conv->get_friendly_name(), + + const auto cnn2dValidatorPtr = GNALimitations::Cnn2D::AbstractValidator::Create(gnaCompileTarget); + if (!cnn2dValidatorPtr) { + return false; + } + const auto& cnn2dValidator = *cnn2dValidatorPtr; + const auto cnnIsValid = cnn2dValidator.ValidateCnn2D(graph_data.conv->get_friendly_name(), + conv_data.input_height, conv_data.input_width, conv_data.input_channel_count, + conv_data.filter_height, conv_data.filter_width, conv_data.filter_channel_count, + conv_data.filter_stride_height, conv_data.filter_stride_width, conv_data.filter_dilation_height, conv_data.filter_dilation_width, + OvGnaTypeIntFromBytes(gnaPrecision.size()), false); + if (!cnnIsValid) { + return false; + } + if (!graph_data.max_pool) { + return true; + } + const auto poolingValid = cnn2dValidator.ValidatePooling2D(graph_data.conv->get_friendly_name(), graph_data.max_pool->get_kernel()[0], graph_data.max_pool->get_kernel()[1], graph_data.max_pool->get_strides()[0], graph_data.max_pool->get_strides()[1], - false))) - return true; - - return false; + false); + return poolingValid; } static size_t CalculateConvCount(const ConvData& conv_data) { diff --git a/src/tests/functional/plugin/gna/shared_tests_instances/skip_tests_check.hpp b/src/tests/functional/plugin/gna/shared_tests_instances/skip_tests_check.hpp index 93fd23493fe38c..576d4b1048733a 100644 --- a/src/tests/functional/plugin/gna/shared_tests_instances/skip_tests_check.hpp +++ b/src/tests/functional/plugin/gna/shared_tests_instances/skip_tests_check.hpp @@ -16,8 +16,8 @@ class GnaLayerTestCheck : virtual public LayerTestsUtils::LayerTestsCommon { if (std::find(metrics.begin(), metrics.end(), METRIC_KEY(GNA_LIBRARY_FULL_VERSION)) != metrics.end()) { std::string gnaLibVer = ie_core.GetMetric(targetDevice, METRIC_KEY(GNA_LIBRARY_FULL_VERSION)); - if (gnaLibVer.rfind("2.1", 0) != 0 && gnaLibVer.rfind("3.0", 0) != 0) { - GTEST_SKIP() << "Disabled test due to GNA library version being not 2.1 or 3.0" << std::endl; + if (gnaLibVer.rfind("2.1", 0) != 0 && gnaLibVer.rfind("3.", 0) != 0) { + GTEST_SKIP() << "Disabled test due to GNA library version being not 2.1 or 3.X" << std::endl; } skipTest = false; } diff --git a/src/tests/unit/gna/gna_allocator_test.cpp b/src/tests/unit/gna/gna_allocator_test.cpp index 6d1a890b634ffe..44d4ff04a45754 100644 --- a/src/tests/unit/gna/gna_allocator_test.cpp +++ b/src/tests/unit/gna/gna_allocator_test.cpp @@ -9,6 +9,7 @@ #include #include +#include "memory/gna_memory.hpp" #include "gna_device.hpp" // dummy definitions to work around issue with Linux userspace library @@ -47,7 +48,7 @@ class GNAAllocatorTest : public ::testing::Test { }; TEST_F(GNAAllocatorTest, canAllocateStdMemory) { - auto sp = GNAPluginNS::memory::make_polymorph>(); + auto sp = GNAPluginNS::memory::GNAFloatAllocator{}; uint8_t *x = nullptr; ASSERT_NO_THROW(x = sp.allocate(100)); ASSERT_NE(x, nullptr); @@ -57,7 +58,7 @@ TEST_F(GNAAllocatorTest, canAllocateStdMemory) { TEST_F(GNAAllocatorTest, canAllocateGNAMemory) { // GNA device can be opened one per process for now gnadevice.reset(new GNADeviceHelper()); - auto sp = GNAPluginNS::memory::make_polymorph(gnadevice); + GNAPluginNS::memory::GNAAllocator sp{ gnadevice }; uint8_t *x = nullptr; ASSERT_NO_THROW(x = sp.allocate(100)); ASSERT_NE(x, nullptr); diff --git a/src/tests/unit/gna/gna_api_stub.cpp b/src/tests/unit/gna/gna_api_stub.cpp index 6fba84c7b69ee3..6646517b70222f 100644 --- a/src/tests/unit/gna/gna_api_stub.cpp +++ b/src/tests/unit/gna/gna_api_stub.cpp @@ -38,6 +38,12 @@ GNA2_API enum Gna2Status Gna2MemoryAlloc( return Gna2StatusSuccess; } +GNA2_API enum Gna2Status Gna2MemorySetTag( + void* memory, + uint32_t tag) { + return Gna2StatusSuccess; +} + GNA2_API enum Gna2Status Gna2DeviceCreateForExport( Gna2DeviceVersion targetDeviceVersion, uint32_t * deviceIndex) { diff --git a/src/tests/unit/gna/gna_memory_compact_test.cpp b/src/tests/unit/gna/gna_memory_compact_test.cpp index 73ff7e022ac948..addb6c4f7b2a9a 100644 --- a/src/tests/unit/gna/gna_memory_compact_test.cpp +++ b/src/tests/unit/gna/gna_memory_compact_test.cpp @@ -21,7 +21,7 @@ using namespace GNAPluginNS::memory; class GNAMemoryCompactTest : public ::testing::Test { protected: - GNAMemory> mem; + GNAMemory mem; bool isCompact = true; void SetUp() override { @@ -39,12 +39,12 @@ TEST_F(GNAMemoryCompactTest, canOptimizeReservePtr) { float* pFuture1 = reinterpret_cast(&pFuture1); float* pFuture2 = reinterpret_cast(&pFuture2); - mem.reserve_ptr(layer1, pFuture1, 3 * sizeof(float)); - mem.reserve_ptr(layer2, pFuture2, 2 * sizeof(float)); + auto scratch = mem.getQueue(rRegion::REGION_SCRATCH); + scratch->reserve_ptr(layer1, pFuture1, 3 * sizeof(float)); + scratch->reserve_ptr(layer2, pFuture2, 2 * sizeof(float)); mem.commit(isCompact); - ASSERT_EQ(mem.getRWBytes(), 3 * sizeof(float)); - ASSERT_EQ(mem.getTotalBytes(), 3 * sizeof(float)); + ASSERT_EQ(scratch->getSize(), 3 * sizeof(float)); } TEST_F(GNAMemoryCompactTest, canOptimizePushValue) { @@ -58,12 +58,12 @@ TEST_F(GNAMemoryCompactTest, canOptimizePushValue) { float* pFuture1 = reinterpret_cast(&pFuture1); float* pFuture2 = reinterpret_cast(&pFuture2); - mem.push_value(layer1, pFuture1, 1.f, 2); - mem.push_value(layer2, pFuture2, 2.f, 3); + auto scratch = mem.getQueue(rRegion::REGION_SCRATCH); + scratch->push_value(layer1, pFuture1, 1.f, 2); + scratch->push_value(layer2, pFuture2, 2.f, 3); mem.commit(isCompact); - ASSERT_EQ(mem.getRWBytes(), 5 * sizeof(float)); - ASSERT_EQ(mem.getTotalBytes(), 5 * sizeof(float)); + ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_SCRATCH), 5 * sizeof(float)); } TEST_F(GNAMemoryCompactTest, canOptimizePushValueAndReservePtr) { @@ -80,13 +80,13 @@ TEST_F(GNAMemoryCompactTest, canOptimizePushValueAndReservePtr) { float* pFuture2 = reinterpret_cast(&pFuture2); float* pFuture3 = reinterpret_cast(&pFuture3); - mem.push_value(layer1, pFuture1, 3.f, 2); - mem.bind_ptr(layer2, pFuture2, pFuture1, 0, 2); - mem.reserve_ptr(layer3, pFuture3, 2 * sizeof(float)); + auto scratchQueue = mem.getQueue(rRegion::REGION_SCRATCH); + scratchQueue->push_value(layer1, pFuture1, 3.f, 2); + scratchQueue->bind_ptr(layer2, pFuture2, pFuture1, 0, 2); + scratchQueue->reserve_ptr(layer3, pFuture3, 2 * sizeof(float)); mem.commit(isCompact); - ASSERT_EQ(mem.getRWBytes(), 2 * sizeof(float)); - ASSERT_EQ(mem.getTotalBytes(), 2 * sizeof(float)); + ASSERT_EQ(scratchQueue->getSize(), 2 * sizeof(float)); } TEST_F(GNAMemoryCompactTest, canOptimizeTwoPushValueAndReservePtr) { @@ -105,14 +105,14 @@ TEST_F(GNAMemoryCompactTest, canOptimizeTwoPushValueAndReservePtr) { float* pFuture2 = reinterpret_cast(&pFuture2); float* pFuture3 = reinterpret_cast(&pFuture3); - mem.push_value(layer1, pFuture1, 1.f, 2); - mem.push_value(layer2, pFuture2, 2.f, 3); - mem.reserve_ptr(layer3, pFuture3, 5 * sizeof(float)); - mem.bind_ptr(layer2, pFuture2, pFuture1, 0, 2); + auto scratchQueue = mem.getQueue(rRegion::REGION_SCRATCH); + scratchQueue->push_value(layer1, pFuture1, 1.f, 2); + scratchQueue->push_value(layer2, pFuture2, 2.f, 3); + scratchQueue->reserve_ptr(layer3, pFuture3, 5 * sizeof(float)); + scratchQueue->bind_ptr(layer2, pFuture2, pFuture1, 0, 2); mem.commit(isCompact); - ASSERT_EQ(mem.getRWBytes(), 5 * sizeof(float)); - ASSERT_EQ(mem.getTotalBytes(), 5 * sizeof(float)); + ASSERT_EQ(scratchQueue->getSize(), 5 * sizeof(float)); } @@ -133,13 +133,13 @@ TEST_F(GNAMemoryCompactTest, canOptimizePushPtrAndReservePtr) { float* pFuture2 = reinterpret_cast(&pFuture2); float* pFuture3 = reinterpret_cast(&pFuture3); - mem.push_ptr(layer1, pFuture1, input, input_size); - mem.reserve_ptr(layer2, pFuture2, input_size); - mem.bind_ptr(layer3, pFuture3, pFuture2, 0, input_size); + auto scratchQueue = mem.getQueue(rRegion::REGION_SCRATCH); + scratchQueue->push_ptr(layer1, pFuture1, input, input_size); + scratchQueue->reserve_ptr(layer2, pFuture2, input_size); + scratchQueue->bind_ptr(layer3, pFuture3, pFuture2, 0, input_size); mem.commit(isCompact); - ASSERT_EQ(mem.getRWBytes(), input_size); - ASSERT_EQ(mem.getTotalBytes(), input_size); + ASSERT_EQ(scratchQueue->getSize(), input_size); } TEST_F(GNAMemoryCompactTest, canOptimizePushLocalPtrAndReservePtr) { @@ -156,19 +156,19 @@ TEST_F(GNAMemoryCompactTest, canOptimizePushLocalPtrAndReservePtr) { float* pFuture2 = reinterpret_cast(&pFuture2); float* pFuture3 = reinterpret_cast(&pFuture3); + auto scratchQueue = mem.getQueue(rRegion::REGION_SCRATCH); size_t input_size; { std::vector input = {1.0f, 2.0f, 3.0f, 4.0f}; input_size = input.size() * sizeof(float); - mem.push_local_ptr(layer1, pFuture1, &*input.begin(), input_size); + scratchQueue->push_local_ptr(layer1, pFuture1, &*input.begin(), input_size); } - mem.reserve_ptr(layer2, pFuture2, input_size); - mem.bind_ptr(layer3, pFuture3, pFuture2, 0, input_size); + scratchQueue->reserve_ptr(layer2, pFuture2, input_size); + scratchQueue->bind_ptr(layer3, pFuture3, pFuture2, 0, input_size); mem.commit(isCompact); - ASSERT_EQ(mem.getRWBytes(), input_size); - ASSERT_EQ(mem.getTotalBytes(), input_size); + ASSERT_EQ(scratchQueue->getSize(), input_size); } TEST_F(GNAMemoryCompactTest, canOptimizePushInitilizerPtrAndReservePtr) { @@ -185,21 +185,21 @@ TEST_F(GNAMemoryCompactTest, canOptimizePushInitilizerPtrAndReservePtr) { float* pFuture2 = reinterpret_cast(&pFuture2); float* pFuture3 = reinterpret_cast(&pFuture3); + auto scratchQueue = mem.getQueue(rRegion::REGION_SCRATCH); size_t input_size; { std::vector input = {1.0f, 2.0f, 3.0f}; input_size = input.size() * sizeof(float); - mem.push_initializer(layer1, pFuture1, input_size, [=](void* data, size_t size){ + scratchQueue->push_initializer(layer1, pFuture1, input_size, [=](void* data, size_t size) { ie_memcpy(data, size, &input[0], input.size()); }); } - mem.reserve_ptr(layer2, pFuture2, 2 * input_size); - mem.bind_ptr(layer3, pFuture3, pFuture2, 0, input_size); + scratchQueue->reserve_ptr(layer2, pFuture2, 2 * input_size); + scratchQueue->bind_ptr(layer3, pFuture3, pFuture2, 0, input_size); mem.commit(isCompact); - ASSERT_EQ(mem.getRWBytes(), 2 * input_size); - ASSERT_EQ(mem.getTotalBytes(), 2 * input_size); + ASSERT_EQ(scratchQueue->getSize(), 2 * input_size); } TEST_F(GNAMemoryCompactTest, canOptimizeBindInitilizerPtrAndReservePtr) { @@ -219,20 +219,20 @@ TEST_F(GNAMemoryCompactTest, canOptimizeBindInitilizerPtrAndReservePtr) { float* pFuture3 = reinterpret_cast(&pFuture3); float* pFuture4 = reinterpret_cast(&pFuture4); + auto scratchQueue = mem.getQueue(rRegion::REGION_SCRATCH); { std::vector input = {1.0f, 2.0f, 3.0f}; - mem.bind_initializer(layer2, pFuture1, [=](void* data, size_t size){ + scratchQueue->bind_initializer(layer2, pFuture1, [=](void* data, size_t size) { ie_memcpy(data, size, &input[0], input.size()); }); } - mem.reserve_ptr(layer1, pFuture1, 4 * sizeof(float)); - mem.reserve_ptr(layer3, pFuture3, 2 * sizeof(float)); - mem.bind_ptr(layer4, pFuture4, pFuture3, 0, 2 * sizeof(float)); + scratchQueue->reserve_ptr(layer1, pFuture1, 4 * sizeof(float)); + scratchQueue->reserve_ptr(layer3, pFuture3, 2 * sizeof(float)); + scratchQueue->bind_ptr(layer4, pFuture4, pFuture3, 0, 2 * sizeof(float)); mem.commit(isCompact); - ASSERT_EQ(mem.getRWBytes(), 4 * sizeof(float)); - ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float)); + ASSERT_EQ(scratchQueue->getSize(), 4 * sizeof(float)); } TEST_F(GNAMemoryCompactTest, canOptimizeReservePtrWithOffset) { @@ -249,24 +249,26 @@ TEST_F(GNAMemoryCompactTest, canOptimizeReservePtrWithOffset) { float* pFuture2 = reinterpret_cast(&pFuture2); float* pFuture3 = reinterpret_cast(&pFuture3); - mem.reserve_ptr(layer1, pFuture1, 2 * sizeof(float)); - mem.reserve_ptr(layer2, pFuture2, 2 * sizeof(float)); - mem.bind_ptr(layer3, pFuture3, pFuture2, 2 * sizeof(float), 2 * sizeof(float)); + auto scratchQueue = mem.getQueue(rRegion::REGION_SCRATCH); + scratchQueue->reserve_ptr(layer1, pFuture1, 2 * sizeof(float)); + scratchQueue->reserve_ptr(layer2, pFuture2, 2 * sizeof(float)); + scratchQueue->bind_ptr(layer3, pFuture3, pFuture2, 2 * sizeof(float), 2 * sizeof(float)); mem.commit(isCompact); - ASSERT_EQ(mem.getRWBytes(), 4 * sizeof(float)); - ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float)); + ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_SCRATCH), 4 * sizeof(float)); } -class GNAMemoryTested : public GNAPluginNS::memory::GNAMemory> { -using GNAMemory::GNAMemory; +class GNAMemoryTested : public GNAPluginNS::memory::GNAMemory { + using GNAMemory::GNAMemory; public: void Test() { // filtering RW allocation requests only - auto filter_req = [] (const MemRequest &re) { return re._region == REGION_RW && re._type != REQUEST_BIND; }; + auto filter_req = [] (const MemRequest &re) { return re._region == REGION_SCRATCH && re._type != REQUEST_BIND; }; std::vector test_reqs; - auto it = std::copy_if(_future_heap.begin(), _future_heap.end(), std::back_inserter(test_reqs), filter_req); + const auto& requests = getQueue(REGION_SCRATCH)->_mem_requests; + + auto it = std::copy_if(requests.begin(), requests.end(), std::back_inserter(test_reqs), filter_req); // intercrossing condition auto is_crossed = [] (const MemRequest &re1, const MemRequest &re2) { @@ -291,7 +293,7 @@ class GNAPluginTested : public GNAPluginNS::GNAPlugin { public: std::shared_ptr gnamem_t; GNAPluginTested() : GNAPluginNS::GNAPlugin() { - gnamem_t = std::make_shared(make_polymorph>()); + gnamem_t = std::make_shared(); gnamem = gnamem_t; graphCompiler.setGNAMemoryPtr(gnamem); gnadevice.reset(); diff --git a/src/tests/unit/gna/gna_memory_test.cpp b/src/tests/unit/gna/gna_memory_test.cpp index 2072ccb1399c95..0fee1a0eb6cd3d 100644 --- a/src/tests/unit/gna/gna_memory_test.cpp +++ b/src/tests/unit/gna/gna_memory_test.cpp @@ -10,7 +10,7 @@ using namespace GNAPluginNS::memory; class GNAMemoryTest : public ::testing::Test { protected: - GNAMemory> mem; + GNAMemory mem{ GNAFloatAllocator{} }; void SetUp() override { } @@ -21,7 +21,7 @@ TEST_F(GNAMemoryTest, canStoreActualBlob) { float* pFuture = nullptr; size_t len = sizeof(input); - mem.push_ptr(nullptr, &pFuture, input, len); + mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, len); mem.commit(); ASSERT_NE(pFuture, nullptr); @@ -36,8 +36,8 @@ TEST_F(GNAMemoryTest, canStore2Blobs) { float* pFuture = nullptr; float* pFuture2 = nullptr; - mem.push_ptr(nullptr, &pFuture, input, 3*4); - mem.push_ptr(nullptr, &pFuture2, input+1, 3*4); + mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, 3*4); + mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture2, input+1, 3*4); mem.commit(); ASSERT_NE(pFuture, input); @@ -55,11 +55,11 @@ TEST_F(GNAMemoryTest, canStore2Blobs) { TEST_F(GNAMemoryTest, canStoreBlobsALIGNED) { float input[] = {1, 2, 3, 4, 5, 6, 7, 8}; float* pFuture = nullptr; - - mem.push_ptr(nullptr, &pFuture, input, 3*4, 8); + auto queue = mem.getQueue(REGION_SCRATCH); + queue->push_ptr(nullptr, &pFuture, input, 3 * 4, 8); mem.commit(); - ASSERT_EQ(16 , mem.getTotalBytes()); + ASSERT_EQ(16, queue->getSize()); ASSERT_NE(pFuture, input); ASSERT_NE(pFuture, nullptr); @@ -75,12 +75,12 @@ TEST_F(GNAMemoryTest, canStore2BlobsALIGNED) { float input[] = {1, 2, 3, 4, 5, 6, 7, 8}; float* pFuture = nullptr; float* pFuture2 = nullptr; - - mem.push_ptr(nullptr, &pFuture, input, 3*4, 8); - mem.push_ptr(nullptr, &pFuture2, input, 3*4, 16); + auto queue = mem.getQueue(REGION_SCRATCH); + queue->push_ptr(nullptr, &pFuture, input, 3 * 4, 8); + queue->push_ptr(nullptr, &pFuture2, input, 3 * 4, 16); mem.commit(); - ASSERT_EQ(32 , mem.getTotalBytes()); + ASSERT_EQ(32 , queue->getSize()); ASSERT_NE(pFuture, nullptr); @@ -95,14 +95,14 @@ TEST_F(GNAMemoryTest, canStore2BlobsALIGNED) { TEST_F(GNAMemoryTest, canReserveData) { float* pFuture = nullptr; - mem.reserve_ptr(nullptr, &pFuture, 3*4); + mem.getQueue(REGION_SCRATCH)->reserve_ptr(nullptr, &pFuture, 3*4); mem.commit(); ASSERT_NE(pFuture, nullptr); } TEST_F(GNAMemoryTest, canReserveDataByVoid) { - mem.reserve_ptr(nullptr, nullptr, 3*4); + mem.getQueue(REGION_SCRATCH)->reserve_ptr(nullptr, nullptr, 3*4); ASSERT_NO_THROW(mem.commit()); } @@ -113,8 +113,8 @@ TEST_F(GNAMemoryTest, canReserveAndPushData) { float* pFuture2 = nullptr; size_t len = sizeof(input); - mem.push_ptr(nullptr, &pFuture, input, len); - mem.reserve_ptr(nullptr, &pFuture2, 3*4); + mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, len); + mem.getQueue(REGION_SCRATCH)->reserve_ptr(nullptr, &pFuture2, 3*4); mem.commit(); ASSERT_NE(pFuture, nullptr); @@ -138,9 +138,9 @@ TEST_F(GNAMemoryTest, canBindAndResolve) { float *pFuture3 = nullptr; size_t len = sizeof(input); - mem.bind_ptr(nullptr, &pFuture3, &pFuture); - mem.push_ptr(nullptr, &pFuture, input, len); - mem.bind_ptr(nullptr, &pFuture2, &pFuture); + mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture3, &pFuture); + mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, len); + mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture2, &pFuture); mem.commit(); @@ -161,9 +161,9 @@ TEST_F(GNAMemoryTest, canBindTransitevlyAndResolve) { float *pFuture4 = nullptr; size_t len = sizeof(input); - mem.bind_ptr(nullptr, &pFuture4, &pFuture3); - mem.bind_ptr(nullptr, &pFuture3, &pFuture); - mem.push_ptr(nullptr, &pFuture, input, len); + mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture4, &pFuture3); + mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture3, &pFuture); + mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, len); mem.commit(); @@ -185,9 +185,9 @@ TEST_F(GNAMemoryTest, canBindTransitevlyWithOffsetsAndResolve) { float *pFuture4 = nullptr; size_t len = sizeof(input); - mem.bind_ptr(nullptr, &pFuture4, &pFuture3, 4); - mem.bind_ptr(nullptr, &pFuture3, &pFuture, 4); - mem.push_ptr(nullptr, &pFuture, input, len); + mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture4, &pFuture3, 4); + mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture3, &pFuture, 4); + mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, len); mem.commit(); @@ -209,9 +209,9 @@ TEST_F(GNAMemoryTest, canBindWithOffsetAndResolve) { float *pFuture3 = nullptr; size_t len = sizeof(input); - mem.bind_ptr(nullptr, &pFuture3, &pFuture, 4); - mem.push_ptr(nullptr, &pFuture, input, len); - mem.bind_ptr(nullptr, &pFuture2, &pFuture); + mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture3, &pFuture, 4); + mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, len); + mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture2, &pFuture); mem.commit(); @@ -233,7 +233,7 @@ TEST_F(GNAMemoryTest, canPushLocal) { { std::vector input = {1.0f, 2.0f, 3.0f, 4.0f}; - mem.push_local_ptr(nullptr, pFuture, &*input.begin(), 4 * 4, 1); + mem.getQueue(REGION_SCRATCH)->push_local_ptr(nullptr, pFuture, &*input.begin(), 4 * 4, 1); } //poison stack @@ -250,8 +250,8 @@ TEST_F(GNAMemoryTest, canPushValue) { float* pFuture2 = reinterpret_cast(&pFuture2); { - mem.push_value(nullptr, pFuture, 3.f, 2); - mem.push_value(nullptr, pFuture2, 13.f, 2); + mem.getQueue(REGION_SCRATCH)->push_value(nullptr, pFuture, 3.f, 2); + mem.getQueue(REGION_SCRATCH)->push_value(nullptr, pFuture2, 13.f, 2); } mem.commit(); @@ -267,66 +267,66 @@ TEST_F(GNAMemoryTest, canPushReadOnlyValue) { float* pFuture2 = reinterpret_cast(&pFuture2); { - mem.push_value(nullptr, pFuture, 3.f, 2); - mem.readonly().push_value(nullptr, pFuture2, 13.f, 2); + mem.getQueue(REGION_SCRATCH)->push_value(nullptr, pFuture, 3.f, 2); + mem.getQueue(REGION_RO)->push_value(nullptr, pFuture2, 13.f, 2); } mem.commit(); ASSERT_FLOAT_EQ(pFuture[0], 3); ASSERT_FLOAT_EQ(pFuture[1], 3); - ASSERT_FLOAT_EQ(pFuture[2], 13); - ASSERT_FLOAT_EQ(pFuture[3], 13); + ASSERT_FLOAT_EQ(pFuture2[0], 13); + ASSERT_FLOAT_EQ(pFuture2[1], 13); } TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeEmptyReqs) { - mem.push_value(nullptr, nullptr, 3.f, 2); - mem.readonly().push_value(nullptr, nullptr, 13.f, 2); + mem.getQueue(REGION_SCRATCH)->push_value(nullptr, nullptr, 3.f, 2); + mem.getQueue(REGION_RO)->push_value(nullptr, nullptr, 13.f, 2); mem.commit(); - ASSERT_EQ(mem.getTotalBytes(), 0); - ASSERT_EQ(mem.getRWBytes(), 0); + ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_SCRATCH), 0); + ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_RO), 0); } TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeWithEmptyReqs) { // empty request before - mem.push_value(nullptr, nullptr, 3.f, 2); + mem.getQueue(REGION_SCRATCH)->push_value(nullptr, nullptr, 3.f, 2); // not empty requests float* pFuture1 = reinterpret_cast(&pFuture1); float* pFuture2 = reinterpret_cast(&pFuture2); - mem.push_value(nullptr, pFuture1, 3.f, 2); - mem.readonly().push_value(nullptr, pFuture2, 13.f, 2); + mem.getQueue(REGION_SCRATCH)->push_value(nullptr, pFuture1, 3.f, 2); + mem.getQueue(REGION_RO)->push_value(nullptr, pFuture2, 13.f, 2); // empty request after - mem.readonly().push_value(nullptr, nullptr, 13.f, 2); - + mem.getQueue(REGION_SCRATCH)->push_value(nullptr, nullptr, 3.f, 2); + mem.getQueue(REGION_RO)->push_value(nullptr, nullptr, 13.f, 2); mem.commit(); - ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float)); - ASSERT_EQ(mem.getRWBytes(), 2 * sizeof(float)); + ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_RO), 2 * sizeof(float)); + ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_SCRATCH), 2 * sizeof(float)); } TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSize) { float* pFuture1 = reinterpret_cast(&pFuture1); float* pFuture2 = reinterpret_cast(&pFuture2); - mem.push_value(nullptr, pFuture1, 3.f, 2); - mem.readonly().push_value(nullptr, pFuture2, 13.f, 2); + mem.getQueue(REGION_SCRATCH)->push_value(nullptr, pFuture1, 3.f, 2); + mem.getQueue(REGION_RO)->push_value(nullptr, pFuture2, 13.f, 2); mem.commit(); - ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float)); - ASSERT_EQ(mem.getRWBytes(), 2 * sizeof(float)); + ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_RO), 2 * sizeof(float)); + ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_SCRATCH), 2 * sizeof(float)); } TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeWithAlignment) { - GNAMemory> memAligned(64); + GNAMemory memAligned(64); float* pFuture1 = reinterpret_cast(&pFuture1); float* pFuture2 = reinterpret_cast(&pFuture2); - memAligned.push_value(nullptr, pFuture1, 3.f, 2); - memAligned.readonly().push_value(nullptr, pFuture2, 13.f, 2); + memAligned.getQueue(REGION_SCRATCH)->push_value(nullptr, pFuture1, 3.f, 2); + memAligned.getQueue(REGION_RO)->push_value(nullptr, pFuture2, 13.f, 2); memAligned.commit(); - ASSERT_EQ(memAligned.getTotalBytes(), 128); - ASSERT_EQ(memAligned.getRWBytes(), 64); + ASSERT_EQ(memAligned.getRegionBytes(rRegion::REGION_RO), 64); + ASSERT_EQ(memAligned.getRegionBytes(rRegion::REGION_SCRATCH), 64); } TEST_F(GNAMemoryTest, canSetUpReadWriteSectionPtr) { @@ -334,15 +334,15 @@ TEST_F(GNAMemoryTest, canSetUpReadWriteSectionPtr) { float* pFuture2 = reinterpret_cast(&pFuture2); float* pFuture3 = reinterpret_cast(&pFuture3); - mem.readonly().push_value(nullptr, pFuture1, 3.f, 2); - mem.push_value(nullptr, pFuture2, 13.f, 3); - mem.readonly().push_value(nullptr, pFuture3, 32.f, 4); + mem.getQueue(REGION_RO)->push_value(nullptr, pFuture1, 3.f, 2); + mem.getQueue(REGION_SCRATCH)->push_value(nullptr, pFuture2, 13.f, 3); + mem.getQueue(REGION_RO)->push_value(nullptr, pFuture3, 32.f, 4); mem.commit(); - ASSERT_EQ(mem.getTotalBytes(), (2+3+4) * sizeof(float)); - ASSERT_EQ(mem.getRWBytes(), 3 * sizeof(float)); + ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_RO), (2 + 4) * sizeof(float)); + ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_SCRATCH), 3 * sizeof(float)); - ASSERT_LT(&pFuture2[0], &pFuture1[0]); + ASSERT_NE(&pFuture2[0], &pFuture1[0]); ASSERT_LT(&pFuture1[0], &pFuture3[0]); ASSERT_FLOAT_EQ(pFuture1[0], 3.f); @@ -367,13 +367,13 @@ TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequest) { size_t len = sizeof(input); - mem.push_ptr(nullptr, &pFuture, input, len); - mem.bind_ptr(nullptr, &pFuture2, &pFuture, len, len); - mem.bind_ptr(nullptr, &pFuture3, &pFuture2, 2 * len, len); + mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, len); + mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture2, &pFuture, len, len); + mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture3, &pFuture2, 2 * len, len); mem.commit(); - ASSERT_EQ(mem.getTotalBytes(), 4 * len); + ASSERT_EQ(mem.getRegionBytes(REGION_SCRATCH), 4 * len); ASSERT_NE(pFuture, nullptr); ASSERT_EQ(pFuture2, pFuture + 3); ASSERT_EQ(pFuture3, pFuture + 9); @@ -399,13 +399,13 @@ TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequestWhenPush) { size_t len = sizeof(input); - mem.push_ptr(nullptr, &pFuture, input, len); - mem.bind_ptr(nullptr, &pFuture2, &pFuture, len, len); - mem.push_ptr(nullptr, &pFutureInput2, input2, len); + mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, len); + mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture2, &pFuture, len, len); + mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFutureInput2, input2, len); mem.commit(); - ASSERT_EQ(mem.getTotalBytes(), 3 * len); + ASSERT_EQ(mem.getRegionBytes(REGION_SCRATCH), 3 * len); ASSERT_NE(pFuture, nullptr); ASSERT_NE(pFutureInput2, nullptr); ASSERT_EQ(pFuture2, pFuture + 3); @@ -430,13 +430,13 @@ TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequestWhenAlloc) { size_t len = sizeof(input); - mem.reserve_ptr(nullptr, &pFuture, len); - mem.bind_ptr(nullptr, &pFuture2, &pFuture, len, len); - mem.push_ptr(nullptr, &pFutureInput, input, len); + mem.getQueue(REGION_SCRATCH)->reserve_ptr(nullptr, &pFuture, len); + mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture2, &pFuture, len, len); + mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFutureInput, input, len); mem.commit(); - ASSERT_EQ(mem.getTotalBytes(), 3 * len); + ASSERT_EQ(mem.getRegionBytes(REGION_SCRATCH), 3 * len); ASSERT_NE(pFuture, nullptr); ASSERT_NE(pFutureInput, nullptr); ASSERT_EQ(pFuture2, pFuture + 3); @@ -450,4 +450,4 @@ TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequestWhenAlloc) { ASSERT_FLOAT_EQ(pFutureInput[0], 1); ASSERT_FLOAT_EQ(pFutureInput[1], 2); ASSERT_FLOAT_EQ(pFutureInput[2], 3); -} \ No newline at end of file +} diff --git a/src/tests/unit/gna/gna_plugin_config_test.cpp b/src/tests/unit/gna/gna_plugin_config_test.cpp index 7b12f82bf984bb..d8e599f30ef786 100644 --- a/src/tests/unit/gna/gna_plugin_config_test.cpp +++ b/src/tests/unit/gna/gna_plugin_config_test.cpp @@ -196,7 +196,7 @@ TEST_F(GNAPluginConfigTest, GnaConfigGnaExecTargetTest) { EXPECT_EQ(config.gnaExecTarget, "GNA_TARGET_2_0"); SetAndCompare(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_3_0"); EXPECT_EQ(config.gnaExecTarget, "GNA_TARGET_3_0"); - ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_3_5"); + ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_3_7"); ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "0"); ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_1_5"); ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET"); @@ -207,7 +207,7 @@ TEST_F(GNAPluginConfigTest, GnaConfigGnaCompileTargetTest) { EXPECT_EQ(config.gnaCompileTarget, "GNA_TARGET_2_0"); SetAndCompare(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET_3_0"); EXPECT_EQ(config.gnaCompileTarget, "GNA_TARGET_3_0"); - ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET_3_5"); + ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET_3_7"); ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "0"); ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET_1_5"); ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET"); diff --git a/src/tests_deprecated/unit/engines/gna/gna_api_stub.cpp b/src/tests_deprecated/unit/engines/gna/gna_api_stub.cpp index 639779bac109d4..81c3655d80a588 100644 --- a/src/tests_deprecated/unit/engines/gna/gna_api_stub.cpp +++ b/src/tests_deprecated/unit/engines/gna/gna_api_stub.cpp @@ -39,6 +39,12 @@ GNA2_API enum Gna2Status Gna2MemoryAlloc( return Gna2StatusSuccess; } +GNA2_API enum Gna2Status Gna2MemorySetTag( + void* memory, + uint32_t tag) { + return Gna2StatusSuccess; +} + GNA2_API enum Gna2Status Gna2DeviceCreateForExport( Gna2DeviceVersion targetDeviceVersion, uint32_t * deviceIndex) { diff --git a/src/tests_deprecated/unit/engines/gna/gna_matcher.cpp b/src/tests_deprecated/unit/engines/gna/gna_matcher.cpp index 8498cfca9de5df..8d1bd1e9bbc9a6 100644 --- a/src/tests_deprecated/unit/engines/gna/gna_matcher.cpp +++ b/src/tests_deprecated/unit/engines/gna/gna_matcher.cpp @@ -108,35 +108,33 @@ void GNAPropagateMatcher :: match() { OutputsDataMap outputsInfo; StrictMock mockApi; - std::vector data; + std::vector> data; if (_env.config[GNA_CONFIG_KEY(DEVICE_MODE)].compare(GNA_CONFIG_VALUE(SW_FP32)) != 0 && !_env.matchThrows) { - EXPECT_CALL(mockApi, Gna2MemoryAlloc(_, _, _)).WillOnce(Invoke([&data]( - uint32_t sizeRequested, - uint32_t *sizeGranted, - void **memoryAddress - ) { - data.resize(sizeRequested); - *sizeGranted = sizeRequested; - *memoryAddress = &data.front(); - return Gna2StatusSuccess; - })); + EXPECT_CALL(mockApi, Gna2MemoryAlloc(_,_,_)) + .WillRepeatedly(Invoke([&data](uint32_t sizeRequested, uint32_t* sizeGranted, void** memoryAddress) { + data.push_back(std::vector(sizeRequested)); + *sizeGranted = sizeRequested; + *memoryAddress = data.back().data(); + return Gna2StatusSuccess; + })); - EXPECT_CALL(mockApi, Gna2DeviceGetVersion(_,_)).WillOnce(Invoke([]( - uint32_t deviceIndex, - enum Gna2DeviceVersion * deviceVersion) { + EXPECT_CALL(mockApi, Gna2DeviceGetVersion(_,_)) + .WillOnce(Invoke([](uint32_t deviceIndex, enum Gna2DeviceVersion* deviceVersion) { *deviceVersion = Gna2DeviceVersionSoftwareEmulation; return Gna2StatusSuccess; })); EXPECT_CALL(mockApi, Gna2DeviceOpen(_)).WillOnce(Return(Gna2StatusSuccess)); - EXPECT_CALL(mockApi, Gna2GetLibraryVersion(_,_)).Times(AtLeast(0)).WillRepeatedly(Return(Gna2StatusSuccess)); + EXPECT_CALL(mockApi, Gna2GetLibraryVersion(_,_)) + .Times(AtLeast(0)) + .WillRepeatedly(Return(Gna2StatusSuccess)); EXPECT_CALL(mockApi, Gna2InstrumentationConfigCreate(_,_,_,_)).WillOnce(Return(Gna2StatusSuccess)); - if(_env.is_setup_of_omp_theads_expected == true) { + if (_env.is_setup_of_omp_theads_expected == true) { EXPECT_CALL(mockApi, Gna2DeviceSetNumberOfThreads(_,_)).WillOnce(Return(Gna2StatusSuccess)); } @@ -200,7 +198,7 @@ void GNAPropagateMatcher :: match() { expect_enqueue_calls(mockApi); } - EXPECT_CALL(mockApi, Gna2MemoryFree(_)).WillOnce(Return(Gna2StatusSuccess)); + EXPECT_CALL(mockApi, Gna2MemoryFree(_)).WillRepeatedly(Return(Gna2StatusSuccess)); EXPECT_CALL(mockApi, Gna2DeviceClose(_)).WillOnce(Return(Gna2StatusSuccess)); } @@ -690,8 +688,8 @@ void GNAQueryStateMatcher :: match() { } }; - EXPECT_CALL(mockApi, Gna2MemoryAlloc(_, _, _)). - WillOnce(DoAll(SetArgPointee<1>(10000), SetArgPointee<2>(&data.front()), Return(Gna2StatusSuccess))); + EXPECT_CALL(mockApi, Gna2MemoryAlloc(_, _, _)).Times(AtLeast(1)) + .WillRepeatedly(DoAll(SetArgPointee<1>(10000), SetArgPointee<2>(&data.front()), Return(Gna2StatusSuccess))); EXPECT_CALL(mockApi, Gna2DeviceGetVersion(_,_)).WillOnce(Invoke([]( uint32_t deviceIndex, @@ -706,7 +704,7 @@ void GNAQueryStateMatcher :: match() { EXPECT_CALL(mockApi, Gna2InstrumentationConfigCreate(_,_,_,_)).WillOnce(Return(Gna2StatusSuccess)); - EXPECT_CALL(mockApi, Gna2MemoryFree(_)).WillOnce(Return(Gna2StatusSuccess)); + EXPECT_CALL(mockApi, Gna2MemoryFree(_)).Times(AtLeast(1)).WillRepeatedly(Return(Gna2StatusSuccess)); EXPECT_CALL(mockApi, Gna2DeviceClose(_)).WillOnce(Return(Gna2StatusSuccess)); diff --git a/src/tests_deprecated/unit/engines/gna/i16_quantisation_test.cpp b/src/tests_deprecated/unit/engines/gna/i16_quantisation_test.cpp index f71f880912af1d..55e46da6c8c566 100644 --- a/src/tests_deprecated/unit/engines/gna/i16_quantisation_test.cpp +++ b/src/tests_deprecated/unit/engines/gna/i16_quantisation_test.cpp @@ -115,23 +115,24 @@ TEST_F(I16QuantisationTest, canQuantizeLstmLikeTopology) { TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect){ ModelQuantizer q; + const float inputScaleFactorTest = 1000; + const float weightValueTest = 100; auto weights = make_shared_blob({ Precision::U8, {440}, C }); weights->allocate(); - fillWeights(weights, {100}); + fillWeights(weights, { weightValueTest }); Core ie; auto network = ie.ReadNetwork(Fc2DOutputModel(), weights); - auto newNet = q.quantize(network, 1000); + auto newNet = q.quantize(network, inputScaleFactorTest); InputsDataMap inputs = newNet.getInputsInfo(); auto affineLayerPtr = getInputTo(inputs.begin()->second->getInputData()).begin()->second; auto quantParams = getInjectedData(affineLayerPtr); - - ASSERT_FLOAT_EQ(quantParams->_dst_quant.GetScale(), 100); - ASSERT_FLOAT_EQ(quantParams->_weights_quant.GetScale(), 100); + ASSERT_FLOAT_EQ(quantParams->_dst_quant.GetScale(), MAX_VAL_2B_WEIGHT / weightValueTest * inputScaleFactorTest); + ASSERT_FLOAT_EQ(quantParams->_weights_quant.GetScale(), MAX_VAL_2B_WEIGHT / weightValueTest); } TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion) {