Skip to content

Commit

Permalink
[GNA] Implement GNA memory region splitting (RO/Input/Output/State/Sc…
Browse files Browse the repository at this point in the history
…ratch) and export in GNA format enabled (openvinotoolkit#11577)
  • Loading branch information
kbruniec authored May 25, 2022
1 parent 4b08ce4 commit 81adc47
Show file tree
Hide file tree
Showing 37 changed files with 1,328 additions and 648 deletions.
68 changes: 38 additions & 30 deletions src/plugins/intel_gna/backend/am_intel_dnn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <string>
#include <algorithm>
#include <map>
#include <limits>

#if defined __INTEL_COMPILER || defined _MSC_VER
#include <malloc.h>
Expand All @@ -27,6 +28,7 @@
#include "gna_types.h"
#include "gna_limitations.hpp"
#include "layers/gna_convolution_layer.hpp"
#include "memory/gna_memory.hpp"

#include <gna2-model-api.h>
#include "gna2_model_helper.hpp"
Expand All @@ -50,16 +52,16 @@ using GNAPluginNS::GNAConvolutionLayer::outputFromConv;
using GNAPluginNS::GNAConvolutionLayer::outputFromPooling;
using GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy;

using GNAPluginNS::memory::GNAMemoryInterface;

void GNAPluginNS::backend::AMIntelDNN::BeginNewWrite(uint32_t index) {
dump_write_index = index;
}

void GNAPluginNS::backend::AMIntelDNN::Init(void *ptr_memory,
uint32_t num_memory_bytes,
void GNAPluginNS::backend::AMIntelDNN::Init(GNAMemoryInterface* memoryInterface,
intel_dnn_number_type_t compute_precision,
float scale_factor) {
ptr_dnn_memory_ = ptr_memory;
num_bytes_dnn_memory_ = num_memory_bytes;
memory = memoryInterface;
compute_precision_ = compute_precision;
input_scale_factor_ = scale_factor;

Expand Down Expand Up @@ -740,6 +742,19 @@ void PrintTensors(std::ofstream& out, T tensors) {
}
}

void GNAPluginNS::backend::AMIntelDNN::PrintOffset(std::ofstream& out, const std::string& type, void* ptr) {
const auto queue = memory->getQueue(ptr);
std::string typeOfRegion = "UNKNOWN_QUEUE";
auto offset = std::numeric_limits<uint32_t>::max();
if (queue != nullptr) {
typeOfRegion = GNAPluginNS::memory::rRegionToStr(queue->regionType());
offset = queue->getOffset(ptr).second;
}
out << "<memory_region_type> " << typeOfRegion << "\n";
out << "<" << type << "_address> "
<< "0x" << std::setfill('0') << std::setw(8) << std::hex << offset << "\n";
}

void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_dnn_number_type_t logging_precision) {
if ((compute_precision_ == kDnnFloat) && (logging_precision == kDnnInt)) {
fprintf(stderr, "Error trying to write floating point DNN as integer in GNAPluginNS::backend::AMIntelDNN::WriteDnnText().\n");
Expand All @@ -762,7 +777,11 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
out_file << "<intel_dnn_file>\n";
out_file << "<number_type> " << intel_dnn_number_type_name[logging_precision] << "\n";
out_file << "<softmax_type> " << intel_dnn_softmax_name[softmax_type] << "\n";
out_file << "<num_memory_bytes> " << std::dec << num_bytes_dnn_memory_ << "\n";
const auto& regionsMap = GNAPluginNS::memory::GetAllRegionsToStrMap();
for (const auto& regionPair : regionsMap) {
out_file << "<memory_region_type> " << std::dec << regionPair.second << "\n";
out_file << "<num_memory_region_bytes> " << std::dec << memory->getRegionBytes(regionPair.first) << "\n";
}
out_file << "<num_group> " << std::dec << num_group << "\n";
out_file << "<number_inputs> " << std::dec << num_inputs << "\n";
out_file << "<num_outputs> " << std::dec << num_outputs << "\n";
Expand Down Expand Up @@ -815,10 +834,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
out_file << "<num_bytes_per_input> " << std::dec << num_bytes_per_input << "\n";
out_file << "<num_bytes_per_output> " << std::dec << num_bytes_per_output << "\n";
}
out_file << "<input_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< GNAPluginNS::memory::MemoryOffset(component[i].ptr_inputs, ptr_dnn_memory_) << "\n";
out_file << "<output_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< GNAPluginNS::memory::MemoryOffset(component[i].ptr_outputs, ptr_dnn_memory_) << "\n";
PrintOffset(out_file, "input", component[i].ptr_inputs);
PrintOffset(out_file, "output", component[i].ptr_outputs);
switch (component[i].operation) {
case kDnnAffineOp:
case kDnnDiagonalOp: {
Expand Down Expand Up @@ -846,10 +863,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> "
<< output_scale_factor << "\n";
}
out_file << "<weight_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< GNAPluginNS::memory::MemoryOffset(component[i].op.affine.ptr_weights, ptr_dnn_memory_) << "\n";
out_file << "<bias_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< GNAPluginNS::memory::MemoryOffset(component[i].op.affine.ptr_biases, ptr_dnn_memory_) << "\n";
PrintOffset(out_file, "weight", component[i].op.affine.ptr_weights);
PrintOffset(out_file, "bias", component[i].op.affine.ptr_biases);
#ifdef LIGHT_DUMP
std::ofstream out_wfile((out_file_name.str() + "_weights.txt").c_str(), std::ios::out);
std::ofstream out_bfile((out_file_name.str() + "_biases.txt").c_str(), std::ios::out);
Expand Down Expand Up @@ -996,10 +1011,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> "
<< output_scale_factor << "\n";
}
out_file << "<filter_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< GNAPluginNS::memory::MemoryOffset(component[i].op.conv1D.ptr_filters, ptr_dnn_memory_) << "\n";
out_file << "<bias_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< GNAPluginNS::memory::MemoryOffset(component[i].op.conv1D.ptr_biases, ptr_dnn_memory_) << "\n";
PrintOffset(out_file, "filter", component[i].op.conv1D.ptr_filters);
PrintOffset(out_file, "bias", component[i].op.conv1D.ptr_biases);

#ifdef LIGHT_DUMP
std::ofstream out_wfile((out_file_name.str() + "_weights.txt").c_str(), std::ios::out);
Expand Down Expand Up @@ -1145,12 +1158,9 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> "
<< output_scale_factor << "\n";
}
out_file << "<weight_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< GNAPluginNS::memory::MemoryOffset(component[i].op.recurrent.ptr_weights, ptr_dnn_memory_) << "\n";
out_file << "<bias_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< GNAPluginNS::memory::MemoryOffset(component[i].op.recurrent.ptr_biases, ptr_dnn_memory_) << "\n";
out_file << "<feedback_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< GNAPluginNS::memory::MemoryOffset(component[i].op.recurrent.ptr_feedbacks, ptr_dnn_memory_) << "\n";
PrintOffset(out_file, "weight", component[i].op.recurrent.ptr_weights);
PrintOffset(out_file, "bias", component[i].op.recurrent.ptr_biases);
PrintOffset(out_file, "feedback", component[i].op.recurrent.ptr_feedbacks);
if (num_bytes_per_weight == 1) {
#ifdef DUMP_WB
int8_t *ptr_weight = reinterpret_cast<int8_t *>(component[i].op.recurrent.ptr_weights);
Expand Down Expand Up @@ -1308,14 +1318,12 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
if (logging_precision == kDnnFloat) {
out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> " << 1.0 << "\n";
out_file << "<num_segments> " << std::dec << 0 << "\n";
out_file << "<segment_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< GNAPluginNS::memory::MemoryOffset(component[i].op.pwl.ptr_segments, ptr_dnn_memory_) << "\n";
PrintOffset(out_file, "segment", component[i].op.pwl.ptr_segments);
} else {
out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> "
<< output_scale_factor << "\n";
out_file << "<num_segments> " << std::dec << num_segments << "\n";
out_file << "<segment_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< GNAPluginNS::memory::MemoryOffset(component[i].op.pwl.ptr_segments, ptr_dnn_memory_) << "\n";
PrintOffset(out_file, "segment", component[i].op.pwl.ptr_segments);
if (compute_precision_ == kDnnInt) {
out_file << "<slope> ";
for (uint32_t segment = 0; segment < num_segments; segment++) {
Expand Down Expand Up @@ -1364,8 +1372,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
}
}
if (ptr_active_outputs() != nullptr) {
out_file << "<activelist_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
<< GNAPluginNS::memory::MemoryOffset(ptr_active_outputs(), ptr_dnn_memory_) << "\n";
PrintOffset(out_file, "activelist", ptr_active_outputs());
}
out_file << "<end_of_file>\n";
out_file.close();
Expand Down Expand Up @@ -1410,7 +1417,8 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel, const
memset(gnaModel->Operations, 0, gnaModel->NumberOfOperations * sizeof(Gna2Operation));
gnaOperation = gnaModel->Operations;
for (int i = 0; i < component.size(); i++) {
// std::cout << "Component + " << i <<"=GNA_" << std::distance(ptr_nnet->pLayers, pLayer) << "\n";
gnalog() << "Component + " << i << "=GNA_" << std::distance(gnaModel->Operations, gnaOperation) << "\n";

auto& comp = component[i];
switch (comp.operation) {
case kDnnAffineOp:
Expand Down
15 changes: 7 additions & 8 deletions src/plugins/intel_gna/backend/am_intel_dnn.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
#include "gna/gna_config.hpp"

#include "gna_plugin_log.hpp"

#include "memory/gna_memory.hpp"
#include <gna2-model-api.h>
#include <gna/gna_config.hpp>

using GNAPluginNS::memory::GNAMemoryInterface;

namespace GNAPluginNS {
namespace backend {
Expand All @@ -38,15 +39,12 @@ class AMIntelDNN {
ptr_sumgroup_sizes(NULL),
num_sumgroup_sizes(0),
ptr_priors(NULL),
ptr_dnn_memory_(NULL),
num_bytes_dnn_memory_(0),
compute_precision_(kDnnNumNumberType) {
}

~AMIntelDNN();

void Init(void *ptr_memory,
uint32_t num_memory_bytes,
void Init(GNAMemoryInterface * memoryInterface,
intel_dnn_number_type_t compute_precision,
float scale_factor);

Expand Down Expand Up @@ -294,6 +292,8 @@ class AMIntelDNN {

void WriteGraphWizModel(const char *filename);

void PrintOffset(std::ofstream& out, const std::string& type, void* ptr);

void WriteDnnText(const char *filename, intel_dnn_number_type_t logging_precision);

void InitGNAStruct(Gna2Model *gnaModel, const std::string& gnaCompileTarget = InferenceEngine::GNAConfigParams::GNA_TARGET_2_0);
Expand Down Expand Up @@ -338,8 +338,7 @@ class AMIntelDNN {
void BeginNewWrite(uint32_t index);

private:
void *ptr_dnn_memory_;
uint32_t num_bytes_dnn_memory_;
GNAMemoryInterface* memory = nullptr;
uint32_t *ptr_active_outputs_;
uint32_t num_active_outputs_;
intel_dnn_number_type_t compute_precision_;
Expand Down
18 changes: 15 additions & 3 deletions src/plugins/intel_gna/backend/gna_limitations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@

#include "gna_limitations.hpp"

#include "gna/gna_config.hpp"

#include <cstdint>
#include <unordered_set>
#include <legacy/ie_layers.h>
#include <legacy/graph_tools.hpp>
#include <layers/gna_layer_type.hpp>
#include <layers/gna_layer_info.hpp>
#include "gna_graph_tools.hpp"
#include "gna_lib_ver_selector.hpp"

namespace GNAPluginNS {
namespace GNALimitations {
Expand Down Expand Up @@ -115,10 +118,11 @@ std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h, c
return GetByPrecision(precision).GetErrorOrEmpty(h, w, channels, what);
}

bool Validator::ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth,
bool Validator_30::ValidateCnn2D(const std::string &name, const uint32_t inHeight, const uint32_t inWidth,
const uint32_t inChannels, const uint32_t kernelH, const uint32_t kernelW, const uint32_t kernelN,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
OvGnaType inPrecision, bool exception) const {

const std::string prefix = "Layer Convolution2D: " + name + ":";
auto error = inputHWLimit.GetErrorOrEmpty(inHeight, inWidth);

Expand All @@ -141,7 +145,8 @@ bool Validator::ValidateCnn2D(std::string name, const uint32_t inHeight, const u
return error.empty() ? true : false;
}

bool Validator::ValidatePooling2D(std::string name,

bool Validator_30::ValidatePooling2D(const std::string& name,
const uint32_t windowH, const uint32_t windowW,
const uint32_t strideH, const uint32_t strideW,
bool exception) const {
Expand All @@ -160,7 +165,14 @@ bool Validator::ValidatePooling2D(std::string name,
return error.empty() ? true : false;
}

void Validator::ThrowIfNotEmpty(const std::string prefix, const std::string error) {
std::unique_ptr<AbstractValidator> AbstractValidator::Create(const std::string& target) {
if (target == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) {
return tools::make_unique<Validator_30>();
}
return nullptr;
}

void AbstractValidator::ThrowIfNotEmpty(const std::string& prefix, const std::string& error) {
if (!error.empty()) {
THROW_GNA_EXCEPTION << prefix << error;
}
Expand Down
32 changes: 23 additions & 9 deletions src/plugins/intel_gna/backend/gna_limitations.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,23 @@ struct RectLimitByChannelsAndPrecision {
const OvGnaType precision, const uint32_t channels, std::string what) const;
};

class Validator {
class AbstractValidator {
protected:
static void ThrowIfNotEmpty(const std::string& prefix, const std::string& error);
public:
virtual bool ValidateCnn2D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth,
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
OvGnaType inPrecision, bool exception = true) const = 0;

virtual bool ValidatePooling2D(const std::string& name,
const uint32_t windowH, const uint32_t windowW,
const uint32_t strideH, const uint32_t strideW,
bool exception = true) const = 0;
static std::unique_ptr<AbstractValidator> Create(const std::string&);
};

class Validator_30 : public AbstractValidator {
RangeLimit2D inputHWLimit{ { 16, 384, "input height"} , { 16, 240, "input width"} };
RangeMultipleLimit inputChannelsNumberLimit{ {8, 384, "number of input channels"}, 8 };

Expand All @@ -123,20 +139,18 @@ class Validator {
{ convDilationWidth, convDilationWidth, "dilation width" } };
const VectorOrSquareLimit poolingWindowLimit{ 3, 1, 1 };

static void ThrowIfNotEmpty(const std::string prefix, const std::string error);

public:
Validator() = default;
Validator_30() = default;

bool ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth,
const uint32_t inChannels, const uint32_t kernelH, const uint32_t kernelW, const uint32_t kernelN,
bool ValidateCnn2D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth,
const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
OvGnaType inPrecision, bool exception = true) const;
OvGnaType inPrecision, bool exception = true) const override;

bool ValidatePooling2D(std::string name,
bool ValidatePooling2D(const std::string& name,
const uint32_t windowH, const uint32_t windowW,
const uint32_t strideH, const uint32_t strideW,
bool exception = true) const;
bool exception = true) const override;
};
} // namespace Cnn2D

Expand Down
Loading

0 comments on commit 81adc47

Please sign in to comment.