diff --git a/src/plugins/intel_gna/backend/am_intel_dnn.cpp b/src/plugins/intel_gna/backend/am_intel_dnn.cpp
index b75cc3f265f3b2..7ad4b406084474 100644
--- a/src/plugins/intel_gna/backend/am_intel_dnn.cpp
+++ b/src/plugins/intel_gna/backend/am_intel_dnn.cpp
@@ -10,6 +10,7 @@
 #include <string>
 #include <algorithm>
 #include <map>
+#include <limits>
 
 #if defined __INTEL_COMPILER || defined _MSC_VER
 #include <malloc.h>
@@ -27,6 +28,7 @@
 #include "gna_types.h"
 #include "gna_limitations.hpp"
 #include "layers/gna_convolution_layer.hpp"
+#include "memory/gna_memory.hpp"
 
 #include <gna2-model-api.h>
 #include "gna2_model_helper.hpp"
@@ -50,16 +52,16 @@ using GNAPluginNS::GNAConvolutionLayer::outputFromConv;
 using GNAPluginNS::GNAConvolutionLayer::outputFromPooling;
 using GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy;
 
+using GNAPluginNS::memory::GNAMemoryInterface;
+
 void GNAPluginNS::backend::AMIntelDNN::BeginNewWrite(uint32_t index) {
     dump_write_index = index;
 }
 
-void GNAPluginNS::backend::AMIntelDNN::Init(void *ptr_memory,
-                      uint32_t num_memory_bytes,
+void GNAPluginNS::backend::AMIntelDNN::Init(GNAMemoryInterface* memoryInterface,
                       intel_dnn_number_type_t compute_precision,
                       float scale_factor) {
-    ptr_dnn_memory_ = ptr_memory;
-    num_bytes_dnn_memory_ = num_memory_bytes;
+    memory = memoryInterface;
     compute_precision_ = compute_precision;
     input_scale_factor_ = scale_factor;
 
@@ -740,6 +742,19 @@ void PrintTensors(std::ofstream& out, T tensors) {
     }
 }
 
+void GNAPluginNS::backend::AMIntelDNN::PrintOffset(std::ofstream& out, const std::string& type, void* ptr) {
+    const auto queue = memory->getQueue(ptr);
+    std::string typeOfRegion = "UNKNOWN_QUEUE";
+    auto offset = std::numeric_limits<uint32_t>::max();
+    if (queue != nullptr) {
+        typeOfRegion = GNAPluginNS::memory::rRegionToStr(queue->regionType());
+        offset = queue->getOffset(ptr).second;
+    }
+    out << "<memory_region_type> " << typeOfRegion << "\n";
+    out << "<" << type << "_address> "
+        << "0x" << std::setfill('0') << std::setw(8) << std::hex << offset << "\n";
+}
+
 void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_dnn_number_type_t logging_precision) {
     if ((compute_precision_ == kDnnFloat) && (logging_precision == kDnnInt)) {
         fprintf(stderr, "Error trying to write floating point DNN as integer in GNAPluginNS::backend::AMIntelDNN::WriteDnnText().\n");
@@ -762,7 +777,11 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
         out_file << "<intel_dnn_file>\n";
         out_file << "<number_type> " << intel_dnn_number_type_name[logging_precision] << "\n";
         out_file << "<softmax_type> " << intel_dnn_softmax_name[softmax_type] << "\n";
-        out_file << "<num_memory_bytes> " << std::dec << num_bytes_dnn_memory_ << "\n";
+        const auto& regionsMap = GNAPluginNS::memory::GetAllRegionsToStrMap();
+        for (const auto& regionPair : regionsMap) {
+            out_file << "<memory_region_type> " << std::dec << regionPair.second << "\n";
+            out_file << "<num_memory_region_bytes> " << std::dec << memory->getRegionBytes(regionPair.first) << "\n";
+        }
         out_file << "<num_group> " << std::dec << num_group << "\n";
         out_file << "<number_inputs> " << std::dec << num_inputs << "\n";
         out_file << "<num_outputs> " << std::dec << num_outputs << "\n";
@@ -815,10 +834,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                 out_file << "<num_bytes_per_input> " << std::dec << num_bytes_per_input << "\n";
                 out_file << "<num_bytes_per_output> " << std::dec << num_bytes_per_output << "\n";
             }
-            out_file << "<input_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
-                     << GNAPluginNS::memory::MemoryOffset(component[i].ptr_inputs, ptr_dnn_memory_) << "\n";
-            out_file << "<output_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
-                     << GNAPluginNS::memory::MemoryOffset(component[i].ptr_outputs, ptr_dnn_memory_) << "\n";
+            PrintOffset(out_file, "input", component[i].ptr_inputs);
+            PrintOffset(out_file, "output", component[i].ptr_outputs);
             switch (component[i].operation) {
                 case kDnnAffineOp:
                 case kDnnDiagonalOp: {
@@ -846,10 +863,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                         out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> "
                                  << output_scale_factor << "\n";
                     }
-                    out_file << "<weight_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
-                             << GNAPluginNS::memory::MemoryOffset(component[i].op.affine.ptr_weights, ptr_dnn_memory_) << "\n";
-                    out_file << "<bias_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
-                             << GNAPluginNS::memory::MemoryOffset(component[i].op.affine.ptr_biases, ptr_dnn_memory_) << "\n";
+                    PrintOffset(out_file, "weight", component[i].op.affine.ptr_weights);
+                    PrintOffset(out_file, "bias", component[i].op.affine.ptr_biases);
 #ifdef LIGHT_DUMP
                     std::ofstream out_wfile((out_file_name.str() + "_weights.txt").c_str(), std::ios::out);
                     std::ofstream out_bfile((out_file_name.str() + "_biases.txt").c_str(), std::ios::out);
@@ -996,10 +1011,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                         out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> "
                                  << output_scale_factor << "\n";
                     }
-                    out_file << "<filter_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
-                             << GNAPluginNS::memory::MemoryOffset(component[i].op.conv1D.ptr_filters, ptr_dnn_memory_) << "\n";
-                    out_file << "<bias_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
-                             << GNAPluginNS::memory::MemoryOffset(component[i].op.conv1D.ptr_biases, ptr_dnn_memory_) << "\n";
+                    PrintOffset(out_file, "filter", component[i].op.conv1D.ptr_filters);
+                    PrintOffset(out_file, "bias", component[i].op.conv1D.ptr_biases);
 
 #ifdef LIGHT_DUMP
                     std::ofstream out_wfile((out_file_name.str() + "_weights.txt").c_str(), std::ios::out);
@@ -1145,12 +1158,9 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                         out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> "
                                  << output_scale_factor << "\n";
                     }
-                    out_file << "<weight_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
-                             << GNAPluginNS::memory::MemoryOffset(component[i].op.recurrent.ptr_weights, ptr_dnn_memory_) << "\n";
-                    out_file << "<bias_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
-                             << GNAPluginNS::memory::MemoryOffset(component[i].op.recurrent.ptr_biases, ptr_dnn_memory_) << "\n";
-                    out_file << "<feedback_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
-                             << GNAPluginNS::memory::MemoryOffset(component[i].op.recurrent.ptr_feedbacks, ptr_dnn_memory_) << "\n";
+                    PrintOffset(out_file, "weight", component[i].op.recurrent.ptr_weights);
+                    PrintOffset(out_file, "bias", component[i].op.recurrent.ptr_biases);
+                    PrintOffset(out_file, "feedback", component[i].op.recurrent.ptr_feedbacks);
                     if (num_bytes_per_weight == 1) {
 #ifdef DUMP_WB
                         int8_t *ptr_weight = reinterpret_cast<int8_t *>(component[i].op.recurrent.ptr_weights);
@@ -1308,14 +1318,12 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                     if (logging_precision == kDnnFloat) {
                         out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> " << 1.0 << "\n";
                         out_file << "<num_segments> " << std::dec << 0 << "\n";
-                        out_file << "<segment_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
-                                 << GNAPluginNS::memory::MemoryOffset(component[i].op.pwl.ptr_segments, ptr_dnn_memory_) << "\n";
+                        PrintOffset(out_file, "segment", component[i].op.pwl.ptr_segments);
                     } else {
                         out_file << std::setprecision(12) << std::scientific << "<output_scale_factor> "
                                  << output_scale_factor << "\n";
                         out_file << "<num_segments> " << std::dec << num_segments << "\n";
-                        out_file << "<segment_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
-                                 << GNAPluginNS::memory::MemoryOffset(component[i].op.pwl.ptr_segments, ptr_dnn_memory_) << "\n";
+                        PrintOffset(out_file, "segment", component[i].op.pwl.ptr_segments);
                         if (compute_precision_ == kDnnInt) {
                             out_file << "<slope> ";
                             for (uint32_t segment = 0; segment < num_segments; segment++) {
@@ -1364,8 +1372,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
             }
         }
         if (ptr_active_outputs() != nullptr) {
-            out_file << "<activelist_address> " << "0x" << std::setfill('0') << std::setw(8) << std::hex
-                     << GNAPluginNS::memory::MemoryOffset(ptr_active_outputs(), ptr_dnn_memory_) << "\n";
+            PrintOffset(out_file, "activelist", ptr_active_outputs());
         }
         out_file << "<end_of_file>\n";
         out_file.close();
@@ -1410,7 +1417,8 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel, const
     memset(gnaModel->Operations, 0, gnaModel->NumberOfOperations * sizeof(Gna2Operation));
     gnaOperation = gnaModel->Operations;
     for (int i = 0; i < component.size(); i++) {
-        // std::cout << "Component + " << i <<"=GNA_" << std::distance(ptr_nnet->pLayers, pLayer) << "\n";
+        gnalog() << "Component + " << i << "=GNA_" << std::distance(gnaModel->Operations, gnaOperation) << "\n";
+
         auto& comp = component[i];
         switch (comp.operation) {
             case kDnnAffineOp:
diff --git a/src/plugins/intel_gna/backend/am_intel_dnn.hpp b/src/plugins/intel_gna/backend/am_intel_dnn.hpp
index c58f152bbf96f4..338f3601331391 100644
--- a/src/plugins/intel_gna/backend/am_intel_dnn.hpp
+++ b/src/plugins/intel_gna/backend/am_intel_dnn.hpp
@@ -13,9 +13,10 @@
 #include "gna/gna_config.hpp"
 
 #include "gna_plugin_log.hpp"
-
+#include "memory/gna_memory.hpp"
 #include <gna2-model-api.h>
-#include <gna/gna_config.hpp>
+
+using GNAPluginNS::memory::GNAMemoryInterface;
 
 namespace GNAPluginNS {
 namespace backend {
@@ -38,15 +39,12 @@ class AMIntelDNN {
               ptr_sumgroup_sizes(NULL),
               num_sumgroup_sizes(0),
               ptr_priors(NULL),
-              ptr_dnn_memory_(NULL),
-              num_bytes_dnn_memory_(0),
               compute_precision_(kDnnNumNumberType) {
     }
 
     ~AMIntelDNN();
 
-    void Init(void *ptr_memory,
-            uint32_t num_memory_bytes,
+    void Init(GNAMemoryInterface * memoryInterface,
             intel_dnn_number_type_t compute_precision,
             float scale_factor);
 
@@ -294,6 +292,8 @@ class AMIntelDNN {
 
     void WriteGraphWizModel(const char *filename);
 
+    void PrintOffset(std::ofstream& out, const std::string& type, void* ptr);
+
     void WriteDnnText(const char *filename, intel_dnn_number_type_t logging_precision);
 
     void InitGNAStruct(Gna2Model *gnaModel, const std::string& gnaCompileTarget = InferenceEngine::GNAConfigParams::GNA_TARGET_2_0);
@@ -338,8 +338,7 @@ class AMIntelDNN {
     void BeginNewWrite(uint32_t index);
 
 private:
-    void *ptr_dnn_memory_;
-    uint32_t num_bytes_dnn_memory_;
+    GNAMemoryInterface* memory = nullptr;
     uint32_t *ptr_active_outputs_;
     uint32_t num_active_outputs_;
     intel_dnn_number_type_t compute_precision_;
diff --git a/src/plugins/intel_gna/backend/gna_limitations.cpp b/src/plugins/intel_gna/backend/gna_limitations.cpp
index b50b796486a20a..56f9349a863ce5 100644
--- a/src/plugins/intel_gna/backend/gna_limitations.cpp
+++ b/src/plugins/intel_gna/backend/gna_limitations.cpp
@@ -4,6 +4,8 @@
 
 #include "gna_limitations.hpp"
 
+#include "gna/gna_config.hpp"
+
 #include <cstdint>
 #include <unordered_set>
 #include <legacy/ie_layers.h>
@@ -11,6 +13,7 @@
 #include <layers/gna_layer_type.hpp>
 #include <layers/gna_layer_info.hpp>
 #include "gna_graph_tools.hpp"
+#include "gna_lib_ver_selector.hpp"
 
 namespace GNAPluginNS {
 namespace GNALimitations {
@@ -115,10 +118,11 @@ std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h, c
     return GetByPrecision(precision).GetErrorOrEmpty(h, w, channels, what);
 }
 
-bool Validator::ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth,
+bool Validator_30::ValidateCnn2D(const std::string &name, const uint32_t inHeight, const uint32_t inWidth,
     const uint32_t inChannels, const uint32_t kernelH, const uint32_t kernelW, const uint32_t kernelN,
     const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
     OvGnaType inPrecision, bool exception) const {
+
     const std::string prefix = "Layer Convolution2D: " + name + ":";
     auto error = inputHWLimit.GetErrorOrEmpty(inHeight, inWidth);
 
@@ -141,7 +145,8 @@ bool Validator::ValidateCnn2D(std::string name, const uint32_t inHeight, const u
     return error.empty() ? true : false;
 }
 
-bool Validator::ValidatePooling2D(std::string name,
+
+bool Validator_30::ValidatePooling2D(const std::string& name,
     const uint32_t windowH, const uint32_t windowW,
     const uint32_t strideH, const uint32_t strideW,
     bool exception) const {
@@ -160,7 +165,14 @@ bool Validator::ValidatePooling2D(std::string name,
     return error.empty() ? true : false;
 }
 
-void Validator::ThrowIfNotEmpty(const std::string prefix, const std::string error) {
+std::unique_ptr<AbstractValidator> AbstractValidator::Create(const std::string& target) {
+    if (target == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) {
+        return tools::make_unique<Validator_30>();
+    }
+    return nullptr;
+}
+
+void AbstractValidator::ThrowIfNotEmpty(const std::string& prefix, const std::string& error) {
     if (!error.empty()) {
         THROW_GNA_EXCEPTION << prefix << error;
     }
diff --git a/src/plugins/intel_gna/backend/gna_limitations.hpp b/src/plugins/intel_gna/backend/gna_limitations.hpp
index be3f04cdd9f75c..734023edbe23da 100644
--- a/src/plugins/intel_gna/backend/gna_limitations.hpp
+++ b/src/plugins/intel_gna/backend/gna_limitations.hpp
@@ -101,7 +101,23 @@ struct RectLimitByChannelsAndPrecision {
         const OvGnaType precision, const uint32_t channels, std::string what) const;
 };
 
-class Validator {
+class AbstractValidator {
+protected:
+    static void ThrowIfNotEmpty(const std::string& prefix, const std::string& error);
+public:
+    virtual bool ValidateCnn2D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth,
+        const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
+        const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
+        OvGnaType inPrecision, bool exception = true) const = 0;
+
+    virtual bool ValidatePooling2D(const std::string& name,
+        const uint32_t windowH, const uint32_t windowW,
+        const uint32_t strideH, const uint32_t strideW,
+        bool exception = true) const = 0;
+    static std::unique_ptr<AbstractValidator> Create(const std::string&);
+};
+
+class Validator_30 : public AbstractValidator {
     RangeLimit2D inputHWLimit{ { 16, 384, "input height"} , { 16, 240, "input width"} };
     RangeMultipleLimit inputChannelsNumberLimit{ {8, 384, "number of input channels"}, 8 };
 
@@ -123,20 +139,18 @@ class Validator {
         { convDilationWidth, convDilationWidth, "dilation width" } };
     const VectorOrSquareLimit poolingWindowLimit{ 3, 1, 1 };
 
-    static void ThrowIfNotEmpty(const std::string prefix, const std::string error);
-
 public:
-    Validator() = default;
+    Validator_30() = default;
 
-    bool ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth,
-        const uint32_t inChannels, const uint32_t kernelH, const uint32_t kernelW, const uint32_t kernelN,
+    bool ValidateCnn2D(const std::string& name, const uint32_t inHeight, const uint32_t inWidth,
+        const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
         const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
-        OvGnaType inPrecision, bool exception = true) const;
+        OvGnaType inPrecision, bool exception = true) const override;
 
-    bool ValidatePooling2D(std::string name,
+    bool ValidatePooling2D(const std::string& name,
         const uint32_t windowH, const uint32_t windowW,
         const uint32_t strideH, const uint32_t strideW,
-        bool exception = true) const;
+        bool exception = true) const override;
 };
 } // namespace Cnn2D
 
diff --git a/src/plugins/intel_gna/gna2_model_debug_log.cpp b/src/plugins/intel_gna/gna2_model_debug_log.cpp
index d42d607808433a..f8e358314ace3a 100644
--- a/src/plugins/intel_gna/gna2_model_debug_log.cpp
+++ b/src/plugins/intel_gna/gna2_model_debug_log.cpp
@@ -9,6 +9,7 @@
 
 #include "gna2_model_debug_log.hpp"
 #include "gna2-model-api.h"
+#include "gna_device.hpp"
 
 #include <cstdint>
 #include <fstream>
@@ -52,6 +53,55 @@ std::string GetSimpleString(Gna2Shape shape) {
     return out.str();
 }
 
+template <class MapType>
+uint32_t FindInMapOrReturnOne(MapType map, typename MapType::key_type key) {
+    auto value = map.find(key);
+    if (value != map.end()) {
+        return value->second;
+    }
+    return 1;
+}
+
+uint32_t GetTypeByteSize(Gna2DataType type) {
+    static const std::map<Gna2DataType, uint32_t> operandTypeMap = {
+        {Gna2DataTypeNone, 1},
+        {Gna2DataTypeBoolean, 1},
+        {Gna2DataTypeInt4, 1},
+        {Gna2DataTypeInt8, 1},
+        {Gna2DataTypeInt16, 2},
+        {Gna2DataTypeInt32, 4},
+        {Gna2DataTypeUint4, 1},
+        {Gna2DataTypeUint8, 1},
+        {Gna2DataTypeUint16, 2},
+        {Gna2DataTypeUint32, 4},
+        {Gna2DataTypeUint64, 8},
+        {Gna2DataTypeCompoundBias, 8},
+        {Gna2DataTypePwlSegment, 8},
+        {Gna2DataTypeWeightScaleFactor, 8}};
+    return FindInMapOrReturnOne(operandTypeMap, type);
+}
+
+uint32_t GetGnaShapeSize(const Gna2Shape& shape, const uint32_t bytesPerElement) {
+    if (shape.NumberOfDimensions == 0) {
+        return 0;
+    }
+    // to compute aligned filters (each filter begin is aligned to 16B)
+    // e.g., for 3x3 2B filter, its size is 18B, but the next filter will start at 32B offset
+    // filters are NHWC
+    uint32_t nAlignement = 1;
+    if (shape.NumberOfDimensions == 4 && shape.Dimensions[0] != 1) {
+        nAlignement = 16;
+    }
+    uint32_t total = 1;
+    for (uint32_t i = 1; i < shape.NumberOfDimensions; i++) {
+        total *= shape.Dimensions[i];
+    }
+    total *= bytesPerElement;
+    auto totalAligned = Gna2RoundUp(total, nAlignement);
+    totalAligned *= shape.Dimensions[0];
+    return totalAligned;
+}
+
 template <class T>
 bool NextElement(T & elementIndex, const Gna2Shape& total) {
     if (total.NumberOfDimensions == 0) return false;
@@ -335,11 +385,11 @@ void DumpPwl(std::ostream& dumpFile, const Gna2Tensor& activation) {
         double a = static_cast<double>(segments[k].Slope) / factor;
         double b = static_cast<double>(segments[k].yBase) - ((static_cast<double>(B) * segments[k].Slope) / factor);
 
-        dumpFile << "\t\tBase value for input (B) : " << B << "\n";
-        dumpFile << "\t\tBase value for output (b) : " << segments[k].yBase << "\n";
-        dumpFile << "\t\tSegment slope (S): " << segments[k].Slope << "\n";
-        dumpFile << "\t\tShift (scale) : " << scale << "\n";
-        dumpFile << "\t\ty = ax + b:   a = " << a << ", b = " << b;
+        dumpFile << "\t\tBase input (B) : " << B << ", ";
+        dumpFile << "Base output (b) : " << segments[k].yBase << ", ";
+        dumpFile << "Slope (S): " << segments[k].Slope << ", ";
+        dumpFile << "Shift (scale) : " << scale << ", ";
+        dumpFile << "y = (" << a << ")x + (" << b << ")";
         if (segments[k].Slope != 0) {
             double x0 = static_cast<double>(B) - ((static_cast<double>(segments[k].yBase) * factor) / segments[k].Slope);
             dumpFile << ", x0 = " << x0;
@@ -366,18 +416,28 @@ void DumpCharArray(std::ostream& dumpFile, const char *carray,  size_t count) {
     }
     dumpFile << "\n";
 }
-
 } // namespace
 
-void DumpGna2Model(const Gna2Model& gnaModel, const std::string dumpFolderNameGNA, bool dumpData) {
+void DumpGna2Model(const Gna2Model& gnaModel,
+                   const std::string& dumpFolderNameGNA,
+                   bool dumpData,
+                   const GnaAllocations& allAllocations,
+                   const std::string& modeOfOperation) {
     std::stringstream dumpFileName;
     uint32_t opsNo = gnaModel.NumberOfOperations;
     std::time_t currTime = std::time(nullptr);
 
-    dumpFileName << dumpFolderNameGNA << "Gna2ModelDebugDump_" << opsNo << "_layer_" << std::put_time(std::localtime(&currTime), "%Y%m%d%H%M%S");
+    dumpFileName << dumpFolderNameGNA << "Gna2ModelDebugDump_" << opsNo << "_layer_"
+                 << std::put_time(std::localtime(&currTime), "%Y%m%d%H%M%S") << modeOfOperation;
 
     std::ofstream dumpFile(dumpFileName.str() + ".txt", std::ios::out);
 
+    const auto& allAllocationsSorted = allAllocations.GetAllocationsInExportOrder();
+    for (auto&& a : allAllocationsSorted) {
+        dumpFile << "Allocation: ptr=" << a.ptr << "\tsizeRequested=" << a.sizeRequested << "\tsizeGranted=" << a.sizeGranted <<
+            "\t tag=" << a.GetTagName() << "\n";
+    }
+
     dumpFile << "Layers (operations) count: " << opsNo << "\n";
 
     for (size_t i = 0; i < opsNo; i++) {
@@ -395,10 +455,27 @@ void DumpGna2Model(const Gna2Model& gnaModel, const std::string dumpFolderNameGN
                 continue;
             }
             const auto& operand = *operation.Operands[j];
+            void * foundPtr = nullptr;
+            std::string foundName = "AllocationNotFound";
+            size_t offset = 0;
+            auto found = std::find_if(allAllocationsSorted.begin(),
+                                      allAllocationsSorted.end(),
+                         [operand](const GnaAllocation& allocation) {
+                             return allocation.getOffset(operand.Data).first;
+                         });
+            if (found != allAllocationsSorted.end()) {
+                foundPtr = found->ptr;
+                foundName = found->GetTagName();
+                offset = found->getOffset(operand.Data).second;
+            }
             dumpFile << "\tOperand " << j << " (" << GetOperandName(operation.Type, j) << ")"
                 << " type: " << GetOperandType(operand.Type) <<
                 " shape: " << GetSimpleString(operand.Shape) <<
+                " tag: " << foundName <<
+                " offset: " << offset <<
+                " size: " << Gna2RoundUpTo64(GetGnaShapeSize(operand.Shape, GetTypeByteSize(operand.Type))) <<
                 " data: " << operand.Data <<
+                " baseAlloc: " << foundPtr <<
                 " layout: ";
 
             DumpCharArray(dumpFile, operand.Layout, GNA2_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS);
diff --git a/src/plugins/intel_gna/gna2_model_debug_log.hpp b/src/plugins/intel_gna/gna2_model_debug_log.hpp
index 1380297ec854e1..d5c363f806d4c3 100644
--- a/src/plugins/intel_gna/gna2_model_debug_log.hpp
+++ b/src/plugins/intel_gna/gna2_model_debug_log.hpp
@@ -4,9 +4,21 @@
 
 #pragma once
 
+#include <ios>
 #include <string>
+#include <sstream>
 
 #include "gna2-model-api.h"
+#include "gna_device.hpp"
 
 void WriteInputAndOutputTextGNAImpl(const Gna2Model & gnaModel, const std::string dumpFolderNameGNA, const std::string refFolderName);
-void DumpGna2Model(const Gna2Model& gnaModel, const std::string dumpFolderNameGNA, bool dumpData);
+
+void DumpGna2Model(const Gna2Model& gnaModel, const std::string& dumpFolderNameGNA, bool dumpData, const GnaAllocations& allAllocations,
+    const std::string& modeOfOperation);
+
+template <class T>
+std::string toHexString(T t) {
+    std::ostringstream o;
+    o << std::hex << t;
+    return o.str();
+}
diff --git a/src/plugins/intel_gna/gna2_model_export_helper.cpp b/src/plugins/intel_gna/gna2_model_export_helper.cpp
index af7e0489bfe129..7241cfa9e17c5e 100644
--- a/src/plugins/intel_gna/gna2_model_export_helper.cpp
+++ b/src/plugins/intel_gna/gna2_model_export_helper.cpp
@@ -10,6 +10,8 @@
 #include "gna_api_wrapper.hpp"
 #include "gna2-device-api.h"
 
+#include "gna2-tlv-writer.h"
+
 #include <cstdint>
 #include <fstream>
 
@@ -52,6 +54,142 @@ void * ExportSueLegacyUsingGnaApi2(
     return bufferDump;
 }
 
+#define Gna2TlvTypeOVInputScaleFactor GNA2_TLV_IMPL_CHAR_TO_TYPE("OVIS")
+#define Gna2TlvTypeOVOutputScaleFactor GNA2_TLV_IMPL_CHAR_TO_TYPE("OVOS")
+#define Gna2ExportTlv(...) 1
+
+static_assert(std::numeric_limits<float>::is_iec559, "Float is not IEC 559 compatible");
+typedef std::array<char, sizeof(Gna2TlvRecord) + sizeof(float)> TlvFloatRecord;
+
+namespace {
+TlvFloatRecord GetFloatInTLV(Gna2TlvType type, float value) {
+    TlvFloatRecord r;
+    reinterpret_cast<Gna2TlvRecord*>(r.data())->type = type;
+    reinterpret_cast<Gna2TlvRecord*>(r.data())->length = sizeof(float);
+    *reinterpret_cast<float*>(r.data() + sizeof(Gna2TlvRecord)) = value;
+    return r;
+}
+}  // namespace
+
+void ExportTlvModel(uint32_t modelId,
+    uint32_t deviceIndex,
+    std::ostream& outStream,
+    Gna2DeviceVersion deviceVersionToExport,
+    uint32_t input_size,
+    uint32_t output_size,
+    float inputSF,
+    float outputSF) {
+
+    uint32_t exportConfig;
+    auto status = Gna2ModelExportConfigCreate(gnaUserAllocatorAlignedPage, &exportConfig);
+    GNADeviceHelper::checkGna2Status(status, "Gna2ModelExportConfigCreate");
+
+    status = Gna2ModelExportConfigSetSource(exportConfig, deviceIndex, modelId);
+    GNADeviceHelper::checkGna2Status(status, "Gna2ModelExportConfigSetSource");
+    status = Gna2ModelExportConfigSetTarget(exportConfig, deviceVersionToExport);
+    GNADeviceHelper::checkGna2Status(status, "Gna2ModelExportConfigSetTarget");
+
+    // first descriptors
+    void* bufferLayerDescriptors = nullptr;;
+    uint32_t sizeOfLayerDescriptors;
+
+    status = Gna2ModelExport(exportConfig,
+        Gna2ModelExportComponentLayerDescriptors,
+        &bufferLayerDescriptors, &sizeOfLayerDescriptors);
+    GNADeviceHelper::checkGna2Status(status, "Gna2ModelExport(Gna2ModelExportComponentLayerDescriptors)");
+
+    // RO
+    void* bufferROData = nullptr;;
+    uint32_t sizeOfROData;
+
+    status = Gna2ModelExport(exportConfig,
+        Gna2ModelExportComponentReadOnlyDump,
+        &bufferROData, &sizeOfROData);
+    GNADeviceHelper::checkGna2Status(status, "Gna2ModelExport(Gna2ModelExportComponentReadOnlyDump)");
+
+    // RW - scratch
+    void* bufferScratchRWData = nullptr;;
+    uint32_t sizeOfScratchRWData;
+
+    status = Gna2ModelExport(exportConfig,
+        Gna2ModelExportComponentScratchDump,
+        &bufferScratchRWData, &sizeOfScratchRWData);
+    GNADeviceHelper::checkGna2Status(status, "Gna2ModelExport(Gna2ModelExportComponentScratchDump)");
+
+    //TODO: This must be first cover by model creation code
+    void* bufferStateRWData = nullptr;
+    uint32_t sizeOfStateRWData = 0;
+
+
+    // RW - state
+    status = Gna2ModelExport(exportConfig,
+        Gna2ModelExportComponentStateDump,
+        &bufferStateRWData, &sizeOfStateRWData);
+    if (!Gna2StatusIsSuccessful(status)) {
+        bufferStateRWData = nullptr;
+        sizeOfStateRWData = 0;
+    }
+
+    // RW - external Input
+    void* bufferInputRWData = nullptr;
+    uint32_t sizeOfInputRWData;
+    status = Gna2ModelExport(exportConfig,
+        Gna2ModelExportComponentInputDump,
+        &bufferInputRWData, &sizeOfInputRWData);
+    GNADeviceHelper::checkGna2Status(status, "Gna2ModelExport(Gna2ModelExportComponentInputDump)");
+
+    // RW - external Output
+    void* bufferOutputRWData = nullptr;
+    uint32_t sizeOfOutputRWData;
+    status = Gna2ModelExport(exportConfig,
+        Gna2ModelExportComponentOutputDump,
+        &bufferOutputRWData, &sizeOfOutputRWData);
+    GNADeviceHelper::checkGna2Status(status, "Gna2ModelExport(Gna2ModelExportComponentOutputDump)");
+
+    char* outTlv = nullptr;
+
+    const auto gnaLibraryVersion = GNADeviceHelper::GetGnaLibraryVersion();
+
+    uint32_t outTlvSize = 0;
+    auto tlv_status = Gna2ExportTlv(
+        deviceVersionToExport,
+        gnaUserAllocator,
+        &outTlv,
+        &outTlvSize,
+        (const char*)bufferLayerDescriptors,
+        sizeOfLayerDescriptors,
+        (const char*)bufferROData,
+        sizeOfROData,
+        (const char*)bufferStateRWData,
+        sizeOfStateRWData,
+        sizeOfScratchRWData,
+        input_size,
+        output_size,
+        gnaLibraryVersion.c_str(),
+        nullptr,
+        0);
+
+    if (Gna2TlvStatusSuccess == tlv_status) {
+        outStream.write(outTlv, outTlvSize);
+        auto tlvInSF = GetFloatInTLV(Gna2TlvTypeOVInputScaleFactor, inputSF);
+        auto tlvOutSF = GetFloatInTLV(Gna2TlvTypeOVOutputScaleFactor, outputSF);
+        outStream.write(tlvInSF.data(), tlvInSF.size());
+        outStream.write(tlvOutSF.data(), tlvOutSF.size());
+    }
+    gnaUserFree(outTlv);
+
+    gnaUserFree(bufferLayerDescriptors);
+    gnaUserFree(bufferROData);
+    gnaUserFree(bufferScratchRWData);
+    gnaUserFree(bufferStateRWData);
+
+    gnaUserFree(bufferInputRWData);
+    gnaUserFree(bufferOutputRWData);
+
+    GNADeviceHelper::checkGna2Status((Gna2Status)status, "ExportTlvModel");
+    status = Gna2ModelExportConfigRelease(exportConfig);
+    GNADeviceHelper::checkGna2Status(status, "Gna2ModelExportConfigRelease");
+}
 
 void ExportLdForDeviceVersion(
     uint32_t modelId,
@@ -67,7 +205,7 @@ void ExportLdForDeviceVersion(
     status = Gna2ModelExportConfigSetTarget(exportConfig, deviceVersionToExport);
     GNADeviceHelper::checkGna2Status(status, "Gna2ModelExportConfigSetTarget");
 
-    void * ldDump;
+    void * ldDump = nullptr;
     uint32_t ldDumpSize;
 
     status = Gna2ModelExport(exportConfig,
diff --git a/src/plugins/intel_gna/gna2_model_export_helper.hpp b/src/plugins/intel_gna/gna2_model_export_helper.hpp
index fdff479cd5bd6b..26e3211b7bb4a8 100644
--- a/src/plugins/intel_gna/gna2_model_export_helper.hpp
+++ b/src/plugins/intel_gna/gna2_model_export_helper.hpp
@@ -20,4 +20,13 @@ void ExportLdForDeviceVersion(
     std::ostream & outStream,
     Gna2DeviceVersion deviceVersionToExport);
 
+void ExportTlvModel(uint32_t modelId,
+    uint32_t deviceIndex,
+    std::ostream& outStream,
+    Gna2DeviceVersion deviceVersionToExport,
+    uint32_t input_size,
+    uint32_t output_size,
+    float inputSF,
+    float outputSF);
+
 void ExportGnaDescriptorPartiallyFilled(uint32_t numberOfLayers, std::ostream & outStream);
diff --git a/src/plugins/intel_gna/gna_data_types.hpp b/src/plugins/intel_gna/gna_data_types.hpp
index 2b99b8744099f3..6fbe7c3ec9e668 100644
--- a/src/plugins/intel_gna/gna_data_types.hpp
+++ b/src/plugins/intel_gna/gna_data_types.hpp
@@ -14,7 +14,6 @@
 #include "layers/gna_concat_layer.hpp"
 #include "layers/gna_split_layer.hpp"
 #include "gna_api_wrapper.hpp"
-#include "memory/polymorph_allocator.hpp"
 #include "memory/gna_memory.hpp"
 
 struct TranspositionInfo {
@@ -33,8 +32,11 @@ static inline bool FoundPartToTranspose(const std::vector<TranspositionInfo> &tr
 
 namespace GNAPluginNS {
     using dnn_ptr = std::shared_ptr<CPPWrapper<Gna2Model>>;
-    using allocator_type = GNAPluginNS::memory::PolymorphAllocator<uint8_t>;
-    using gna_memory_type = GNAPluginNS::memory::GNAMemory<allocator_type>;
+
+    using gna_memory_type = GNAPluginNS::memory::GNAMemoryInterface;
+    using gna_memory_float = GNAPluginNS::memory::GNAMemory<memory::GNAFloatAllocator>;
+    using gna_memory_device = GNAPluginNS::memory::GNAMemory<>;
+
     using DnnComponentsForLayer = std::list<std::pair<std::string, intel_dnn_component_t>>;
     using MemoryConnection = std::list<std::pair<std::string, GNAMemoryLayer>>;
     using ConcatConnection = std::unordered_map<std::string, GNAConcatLayer>;
diff --git a/src/plugins/intel_gna/gna_device.cpp b/src/plugins/intel_gna/gna_device.cpp
index 8699de95e5429c..962d0643c195e1 100644
--- a/src/plugins/intel_gna/gna_device.cpp
+++ b/src/plugins/intel_gna/gna_device.cpp
@@ -9,6 +9,7 @@
 #include <cstring>
 #include <mutex>
 #include <vector>
+#include <fstream>
 
 #include "gna_api_wrapper.hpp"
 #include "gna2-capability-api.h"
@@ -16,13 +17,16 @@
 #include "gna2-inference-api.h"
 #include "gna2-instrumentation-api.h"
 #include "gna2-memory-api.h"
+#include "gna2-model-export-api.h"
 #include "gna2_model_export_helper.hpp"
+
 #include "gna2_model_debug_log.hpp"
 
 #include "backend/am_intel_dnn.hpp"
 #include "gna/gna_config.hpp"
 #include "gna_plugin_log.hpp"
 #include "layers/gna_convolution_layer.hpp"
+#include "memory/gna_mem_requests.hpp"
 
 //#define MODEL_DUMP
 
@@ -33,18 +37,54 @@ uint8_t* GNADeviceHelper::alloc(uint32_t size_requested, uint32_t *size_granted)
     void * memPtr = nullptr;
     const auto status = Gna2MemoryAlloc(size_requested, size_granted, &memPtr);
     checkGna2Status(status, "Gna2MemoryAlloc");
+
+    gnalog() << "Gna2MemoryAlloc(" << size_requested << ") -> " << *size_granted << ", " << memPtr << "\n";
+    allAllocations.Add(memPtr, size_requested, *size_granted);
     if (memPtr == nullptr) {
         THROW_GNA_EXCEPTION << "GNAAlloc failed to allocate memory. Requested: " << size_requested << " Granted: " << *(size_granted);
     }
+
     dumpXNNROPtr = memPtr;
     dumpXNNROSize = *size_granted;
     return static_cast<uint8_t *>(memPtr);
 }
 
-void GNADeviceHelper::free(void * ptr) {
+void GNADeviceHelper::tagMemoryRegion(void* memPtr, const GNAPluginNS::memory::rRegion tag) {
     std::unique_lock<std::mutex> lockGnaCalls{ acrossPluginsSync };
-    const auto status = Gna2MemoryFree(ptr);
-    checkGna2Status(status, "Gna2MemoryFree");
+    using GNAPluginNS::memory::rRegion;
+    static const std::map<rRegion, Gna2MemoryTag> tagMap {
+        {rRegion::REGION_INPUTS, Gna2MemoryTagInput},
+        {rRegion::REGION_OUTPUTS, Gna2MemoryTagOutput},
+        {rRegion::REGION_SCRATCH, Gna2MemoryTagScratch},
+        {rRegion::REGION_RO, Gna2MemoryTagReadOnly},
+        {rRegion::REGION_STATES, Gna2MemoryTagState},
+        {rRegion::REGION_AUTO, Gna2MemoryTagState},
+    };
+    auto memoryTag = tagMap.at(tag);
+    if (tag == rRegion::REGION_AUTO) {
+        return;
+    }
+    const auto status = Gna2MemorySetTag(memPtr, memoryTag);
+    checkGna2Status(status, "Gna2MemorySetTag");
+    gnalog() << "Gna2MemorySetTag(" << memPtr << ", " << memoryTag << ")\n";
+    const auto tagSuccess = allAllocations.SetTagFor(memPtr, memoryTag);
+    if (!tagSuccess) {
+        THROW_GNA_EXCEPTION << "Allocation not found when tagging memory\n";
+    }
+}
+
+void GNADeviceHelper::free(void* ptr) {
+    Gna2Status status;
+    bool removeSuccess;
+    {
+        std::unique_lock<std::mutex> lockGnaCalls{acrossPluginsSync};
+        status = Gna2MemoryFree(ptr);
+        checkGna2Status(status, "Gna2MemoryFree");
+        removeSuccess = allAllocations.Remove(ptr);
+    }
+    if (!removeSuccess) {
+        gnawarn() << "Allocation not found when freeing memory\n";
+    }
 }
 
 std::string GNADeviceHelper::getGnaLibraryVersionPrivate() {
@@ -127,7 +167,9 @@ uint32_t GNADeviceHelper::createModel(Gna2Model& gnaModel) const {
 #else
         "./";
 #endif
-    DumpGna2Model(gnaModel, path, false);
+    const std::string mode = useDeviceEmbeddedExport ? "_ee" : "";
+    const auto fileSuffix = mode + "_devVersion_" + toHexString(detectedGnaDevVersion);
+    DumpGna2Model(gnaModel, path, false, allAllocations, fileSuffix);
 #endif
     const auto status = Gna2ModelCreate(nGnaDeviceIndex, &gnaModel, &modelId);
 
@@ -143,7 +185,7 @@ void GNADeviceHelper::releaseModel(const uint32_t model_id) {
 
 bool GNADeviceHelper::enforceLegacyCnnNeeded() const {
     const auto execTargetDevice = getTargetDevice(true);
-    return (isGnaLibVersion3_0 || isGnaLibVersion2_1) && isUpTo20HwGnaDevice(execTargetDevice);
+    return isUpTo20HwGnaDevice(execTargetDevice);
 }
 
 Gna2DeviceVersion GNADeviceHelper::parseTarget(const std::string& target) {
@@ -166,8 +208,6 @@ Gna2DeviceVersion GNADeviceHelper::parseDeclaredTarget(std::string target, const
         THROW_GNA_EXCEPTION << "Unsupported " << key << " = \"" << target << "\"" << extraSuffix;
     };
     if (target == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) {
-        if (!isGnaLibVersion2_1 && !isGnaLibVersion3_0)
-            throwUnsupportedGnaTarget(", when GNA Library version is 2.0.X.Y");
         parsed = Gna2DeviceVersion3_0;
     } else if (target != InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
         throwUnsupportedGnaTarget("");
@@ -177,7 +217,7 @@ Gna2DeviceVersion GNADeviceHelper::parseDeclaredTarget(std::string target, const
 
 Gna2DeviceVersion GNADeviceHelper::getDefaultTarget() const {
     if (detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation)
-        return (isGnaLibVersion3_0 ||  isGnaLibVersion2_1) ? Gna2DeviceVersion3_0 : Gna2DeviceVersion2_0;
+        return Gna2DeviceVersion3_0;
     return detectedGnaDevVersion;
 }
 
@@ -391,6 +431,7 @@ GnaWaitStatus GNADeviceHelper::wait(uint32_t reqId, int64_t millisTimeout) {
         return GNA_REQUEST_ABORTED;
     }
     checkGna2Status(status, "Gna2RequestWait");
+
     updateGnaPerfCounters();
     return GNA_REQUEST_COMPLETED;
 }
@@ -431,9 +472,15 @@ void GNADeviceHelper::dumpXnnForDeviceVersion(
     outStream.write(reinterpret_cast<const char*>(&sueHeader), sizeof(sueHeader));
 }
 
-void GNADeviceHelper::createVirtualDevice(Gna2DeviceVersion devVersion, std::string purpose) {
+void GNADeviceHelper::dumpTLVForDeviceVersion(const uint32_t modelId, std::ostream& outStream,
+    uint32_t input_size, uint32_t output_size,
+    float inSF, float outSF) {
+    ExportTlvModel(modelId, nGnaDeviceIndex, outStream, exportGeneration, input_size, output_size, inSF, outSF);
+}
+
+void GNADeviceHelper::createVirtualDevice(Gna2DeviceVersion devVersion) {
     const auto status = Gna2DeviceCreateForExport(devVersion, &nGnaDeviceIndex);
-    GNADeviceHelper::checkGna2Status(status, "Gna2DeviceCreateForExport(" + std::to_string(devVersion) + ")" + purpose);
+    GNADeviceHelper::checkGna2Status(status, "Gna2DeviceCreateForExport(" + std::to_string(devVersion) + ")");
 }
 
 void GNADeviceHelper::updateGnaDeviceVersion() {
@@ -446,9 +493,10 @@ void GNADeviceHelper::open() {
     updateGnaDeviceVersion();
     const auto gnaExecTarget = parseTarget(executionTarget);
     if (useDeviceEmbeddedExport) {
-        createVirtualDevice(exportGeneration, "export");
+        createVirtualDevice(exportGeneration);
+        updateGnaDeviceVersion();
     } else if (!executionTarget.empty() && gnaExecTarget != detectedGnaDevVersion) {
-        createVirtualDevice(gnaExecTarget, "execution");
+        createVirtualDevice(gnaExecTarget);
         updateGnaDeviceVersion();
         if (detectedGnaDevVersion != gnaExecTarget) {
             THROW_GNA_EXCEPTION << "Wrong virtual GNA device version reported: " << detectedGnaDevVersion << " instead of: " << gnaExecTarget;
@@ -507,3 +555,16 @@ std::string GNADeviceHelper::getEffectiveGnaCompileTarget() const {
     }
     return InferenceEngine::GNAConfigParams::GNA_TARGET_2_0;
 }
+
+std::string GNADeviceHelper::GetCompileTarget() const {
+    static const std::map<Gna2DeviceVersion, std::string> targetMap = {
+        {Gna2DeviceVersion2_0, InferenceEngine::GNAConfigParams::GNA_TARGET_2_0},
+        {Gna2DeviceVersion3_0, InferenceEngine::GNAConfigParams::GNA_TARGET_3_0},
+    };
+    const auto target = getTargetDevice(false);
+    auto found = targetMap.find(target);
+    if (found == targetMap.end()) {
+        THROW_GNA_EXCEPTION << "Unknown target Gna2DeviceVersion == " << target;
+    }
+    return found->second;
+}
diff --git a/src/plugins/intel_gna/gna_device.hpp b/src/plugins/intel_gna/gna_device.hpp
index 70d56c7c52dcf8..8040beea92b1d2 100644
--- a/src/plugins/intel_gna/gna_device.hpp
+++ b/src/plugins/intel_gna/gna_device.hpp
@@ -12,17 +12,23 @@
 #include <set>
 #include <vector>
 #include <thread>
+#include <list>
 
 #include <ie_common.h>
 
+#include "memory/gna_mem_requests.hpp"
+
 #include "gna2-common-api.h"
 #include "gna2-inference-api.h"
 #include "gna2-instrumentation-api.h"
 
 #include "gna2-memory-api.h"
 #include "gna2-model-api.h"
+#include "gna2-model-export-api.h"
 #include "gna2-model-suecreek-header.h"
 
+#include "gna_device_allocation.hpp"
+
 enum GnaWaitStatus : int {
     GNA_REQUEST_COMPLETED = 0,  // and removed from GNA library queue
     GNA_REQUEST_ABORTED = 1,    // for QoS purposes
@@ -38,6 +44,9 @@ class GNADeviceHelper {
         static std::string gnaLibraryVersion{ ", GNA library version: " + GNADeviceHelper::GetGnaLibraryVersion() };
         return gnaLibraryVersion;
     }
+
+    std::string modeOfOperation = "default";
+    GnaAllocations allAllocations;
     uint32_t nGnaDeviceIndex = 0;
     bool swExactMode = false;
     Gna2DeviceVersion detectedGnaDevVersion = Gna2DeviceVersionSoftwareEmulation;
@@ -45,8 +54,6 @@ class GNADeviceHelper {
     std::string compileTarget;
     bool useDeviceEmbeddedExport = false;
     Gna2DeviceVersion exportGeneration = Gna2DeviceVersionEmbedded1_0;
-    bool isGnaLibVersion2_1 = false;
-    bool isGnaLibVersion3_0 = false;
 
     static const uint32_t TotalGna2InstrumentationPoints = 2;
     Gna2InstrumentationPoint gna2InstrumentationPoints[TotalGna2InstrumentationPoints] = {
@@ -80,12 +87,6 @@ class GNADeviceHelper {
 
         // check GNA Library version
         const auto gnaLibVersion = GetGnaLibraryVersion();
-        if (gnaLibVersion.rfind("2.1", 0) == 0) {
-            isGnaLibVersion2_1 = true;
-        }
-        if (gnaLibVersion.rfind("3.0", 0) == 0) {
-            isGnaLibVersion3_0 = true;
-        }
     }
 
     GNADeviceHelper(const GNADeviceHelper&) = delete;
@@ -97,6 +98,7 @@ class GNADeviceHelper {
     }
 
     uint8_t *alloc(uint32_t size_requested, uint32_t *size_granted);
+    void tagMemoryRegion(void* memPtr, const GNAPluginNS::memory::rRegion memoryTag);
 
     void setUpActiveList(unsigned req_config_id, uint32_t layerIndex, uint32_t* ptr_active_indices, uint32_t num_active_indices);
     uint32_t propagate(const uint32_t requestConfigId, Gna2AccelerationMode gna2AccelerationMode);
@@ -133,6 +135,10 @@ class GNADeviceHelper {
         std::ostream & outStream,
         Gna2DeviceVersion targetDeviceVersion);
 
+    void dumpTLVForDeviceVersion(const uint32_t modelId, std::ostream& outStream,
+        uint32_t input_size, uint32_t output_size,
+        float inSF, float outSF);
+
     void free(void * ptr);
 
     void updateGnaPerfCounters();
@@ -140,6 +146,11 @@ class GNADeviceHelper {
                         InferenceEngine::InferenceEngineProfileInfo>& retPerfCounters);
     static std::string GetGnaLibraryVersion();
     std::string getEffectiveGnaCompileTarget() const;
+    std::string GetCompileTarget() const;
+
+    const GnaAllocations& getAllAllocations() const {
+        return allAllocations;
+    }
 
  private:
     void open();
@@ -158,7 +169,7 @@ class GNADeviceHelper {
     Gna2DeviceVersion getDefaultTarget() const;
     Gna2DeviceVersion getTargetDevice(bool execTarget) const;
 
-    void createVirtualDevice(Gna2DeviceVersion devVersion, std::string purpose = "");
+    void createVirtualDevice(Gna2DeviceVersion devVersion);
     void updateGnaDeviceVersion();
 
     void initGnaPerfCounters() {
diff --git a/src/plugins/intel_gna/gna_device_allocation.hpp b/src/plugins/intel_gna/gna_device_allocation.hpp
new file mode 100644
index 00000000000000..b5e3f50207ff2c
--- /dev/null
+++ b/src/plugins/intel_gna/gna_device_allocation.hpp
@@ -0,0 +1,168 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <map>
+#include <set>
+#include <vector>
+#include <list>
+#include <utility>
+
+#include "gna2-model-export-api.h"
+
+#include "memory/gna_mem_regions.hpp"
+#include "gna_lib_ver_selector.hpp"
+
+using GNAPluginNS::memory::rRegion;
+
+struct GnaAllocation {
+    void* ptr = nullptr;
+    size_t sizeRequested = 0;
+    size_t sizeGranted = 0;
+    void SetTag(Gna2MemoryTag in) {
+        isTagSet = true;
+        tag = in;
+    }
+    bool isTag(Gna2MemoryTag in) {
+        return isTagSet && in == tag;
+    }
+    std::string GetTagName() const {
+        static const std::map< Gna2MemoryTag, std::string > tm = {
+                { Gna2MemoryTagReadWrite, "Gna2MemoryTagReadWrite" },
+                { Gna2MemoryTagInput, "Gna2MemoryTagInput" },
+                { Gna2MemoryTagOutput, "Gna2MemoryTagOutput" },
+                { Gna2MemoryTagReadOnly, "Gna2MemoryTagReadOnly" },
+                { Gna2MemoryTagExternalBufferInput, "Gna2MemoryTagExternalBufferInput" },
+                { Gna2MemoryTagExternalBufferOutput, "Gna2MemoryTagExternalBufferOutput" },
+                { Gna2MemoryTagScratch, "Gna2MemoryTagScratch" },
+                { Gna2MemoryTagState, "Gna2MemoryTagState" },
+        };
+        if (!isTagSet) {
+            return "Gna2MemoryTag_NotSet_";
+        }
+        auto f = tm.find(tag);
+        if (f != tm.end()) {
+            return f->second;
+        }
+        return "Gna2MemoryTag_" + std::to_string(tag) + "_";
+    }
+
+    static rRegion GetRegionForTag(Gna2MemoryTag tag) {
+        static const std::map<Gna2MemoryTag, rRegion> tm = {
+            {Gna2MemoryTagInput, rRegion::REGION_INPUTS},
+            {Gna2MemoryTagOutput, rRegion::REGION_OUTPUTS},
+            {Gna2MemoryTagReadOnly, rRegion::REGION_RO},
+            {Gna2MemoryTagScratch, rRegion::REGION_SCRATCH},
+            {Gna2MemoryTagState, rRegion::REGION_STATES},
+            {Gna2MemoryTagExternalBufferInput, rRegion::REGION_INPUTS},
+            {Gna2MemoryTagExternalBufferOutput, rRegion::REGION_OUTPUTS},
+        };
+        auto f = tm.find(tag);
+        if (f != tm.end()) {
+            return f->second;
+        }
+        return rRegion::REGION_AUTO;
+    }
+
+    bool operator<(const GnaAllocation& right) const {
+        const auto region = GetRegionForTag(tag);
+        const auto regionRight = GetRegionForTag(right.tag);
+        return region < regionRight;
+    }
+
+    std::pair<bool, size_t> getOffset(void* offset) const {
+        std::pair<bool, size_t> v;
+        v.first = offset >= ptr && offset < static_cast<uint8_t*>(ptr) + sizeGranted;
+        v.second = v.first ? static_cast<uint8_t*>(offset) - static_cast<uint8_t*>(ptr) : 0;
+        return v;
+    }
+
+    uint32_t sizeForExport() const {
+        return ALIGN64(sizeRequested);
+    }
+
+private:
+    Gna2MemoryTag tag = Gna2MemoryTagScratch;
+    bool isTagSet = false;
+};
+
+class GnaAllocations {
+    std::list<GnaAllocation> allocations;
+
+public:
+    GnaAllocations() = default;
+    template<class T>
+    explicit GnaAllocations(T b, T e) : allocations(b, e) {
+    }
+
+    static uint32_t GetSizeForExport(const std::list<GnaAllocation>& allocations) {
+        uint32_t total = 0;
+        for (auto& a : allocations) {
+            total += a.sizeForExport();
+        }
+        return total;
+    }
+
+    uint32_t GetSizeForExport() const {
+        return GetSizeForExport(allocations);
+    }
+
+    std::list<GnaAllocation> GetAllocationsInExportOrder() const {
+        std::vector<GnaAllocation> temp(allocations.begin(), allocations.end());
+        std::stable_sort(temp.begin(), temp.end());
+        return std::list<GnaAllocation>(temp.begin(), temp.end());
+    }
+
+    static std::pair<bool, uint64_t> GetOffsetForExport(
+        const std::list<GnaAllocation>& orderedAllocations,
+        void* ptr) {
+        uint64_t curOffset = 0;
+        for (auto& r : orderedAllocations) {
+            auto ptrBegin = static_cast<uint8_t*>(r.ptr);
+            const auto size = r.sizeForExport();
+            if (ptr >= ptrBegin && ptr < ptrBegin + size) {
+                curOffset += static_cast<uint8_t*>(ptr) - ptrBegin;
+                return {true, curOffset};
+            }
+            curOffset += size;
+        }
+        return {false, 0};
+    }
+
+    bool SetTagFor(void* memPtr, Gna2MemoryTag memoryTag) {
+        auto found = std::find_if(allocations.begin(), allocations.end(), [memPtr](const GnaAllocation& a) {
+            return a.ptr == memPtr;
+        });
+        if (found != allocations.end()) {
+            found->SetTag(memoryTag);
+            return true;
+        }
+        return false;
+    }
+
+    bool Remove(void* memPtr) {
+        auto found = std::find_if(allocations.begin(), allocations.end(), [memPtr](const GnaAllocation& a) {
+            return a.ptr == memPtr;
+        });
+        if (found != allocations.end()) {
+            allocations.erase(found);
+            return true;
+        }
+        return false;
+    }
+
+    void Add(void* memPtr, uint32_t sizeRequested, uint32_t sizeGranted) {
+        GnaAllocation newAllocation;
+        newAllocation.ptr = memPtr;
+        newAllocation.sizeRequested = sizeRequested;
+        newAllocation.sizeGranted = sizeGranted;
+        allocations.push_back(newAllocation);
+    }
+};
diff --git a/src/plugins/intel_gna/gna_graph_compiler.cpp b/src/plugins/intel_gna/gna_graph_compiler.cpp
index 81f5003f5eee88..08837a0889ee59 100644
--- a/src/plugins/intel_gna/gna_graph_compiler.cpp
+++ b/src/plugins/intel_gna/gna_graph_compiler.cpp
@@ -42,11 +42,10 @@
 using namespace InferenceEngine;
 using namespace std;
 using namespace GNAPluginNS;
+using namespace memory;
 
 #define CREATE(name) [](GNAGraphCompiler *p, CNNLayerPtr l) {p->name(l);}
 
-const GNALimitations::Cnn2D::Validator GNAGraphCompiler::cnn2dValidator;
-
 void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<GNAPluginNS::gna_memory_type> gnaMemPtr) {
     this->gnamem = std::move(gnaMemPtr);
 }
@@ -194,6 +193,31 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer)
     split_connection.emplace(id, layerInfoItem);
 }
 
+void GNAPluginNS::GNAGraphCompiler::SetValidatorTarget(std::string target) {
+    if (InferenceEngine::GNAConfigParams::GNA_TARGET_3_0 == target) {
+        cnn2dValidator.reset(new GNALimitations::Cnn2D::Validator_30());
+    }
+}
+
+void GNAPluginNS::GNAGraphCompiler::ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth, const uint32_t inChannels,
+    const uint32_t kH, const uint32_t kW, const uint32_t kN, const uint32_t strideH, const uint32_t strideW, OvGnaType inPrecision,
+    const uint32_t dilH, const uint32_t dilW) const {
+    if (cnn2dValidator) {
+        cnn2dValidator->ValidateCnn2D(name, inHeight, inWidth, inChannels, kH, kW, kN, strideH, strideW, dilH, dilW, inPrecision);
+    } else {
+        THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << name;
+    }
+}
+
+void GNAPluginNS::GNAGraphCompiler::ValidatePooling2D(std::string name, const uint32_t windowH, const uint32_t windowW,
+    const uint32_t strideH, const uint32_t strideW) const {
+    if (cnn2dValidator) {
+        cnn2dValidator->ValidatePooling2D(name, windowH, windowW, strideH, strideW);
+    } else {
+        THROW_GNA_EXCEPTION << "No Pooling2D validator found for layer " << name;
+    }
+}
+
 void GNAGraphCompiler::DiagonalPrimitive(InferenceEngine::CNNLayerPtr layer) {
     AffinePrimitive(layer, true);
 }
@@ -208,9 +232,10 @@ void  GNAGraphCompiler::ConstPrimitive(InferenceEngine::CNNLayerPtr constLayer)
     void* ptr_for_const_blob = &const_connections[constLayer->name];
 
     connectOutput(constLayer, ptr_for_const_blob, const_blob->byteSize());
+
     // TODO: segment type for bind, bind initializer not used - need refactor to separate bind and allocation requests
     // dont see practical use case when bind storage type need to be different that allocation type
-    gnamem->bind_initializer(nullptr, ptr_for_const_blob, [const_blob](void* data, size_t size) {
+    gnamem->getQueue(REGION_AUTO)->bind_initializer(nullptr, ptr_for_const_blob, [const_blob](void* data, size_t size) {
         ie_memcpy(data, size, const_blob->buffer(), const_blob->byteSize());
         });
 }
@@ -497,7 +522,7 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
     }
 
     if (num_conv_kernel_padding == 0) {
-        gnamem->readonly().push_local_ptr(layer, ptr_weights,
+        gnamem->getQueue(REGION_RO)->push_local_ptr(layer, ptr_weights,
             transposedWeights.data(),
             convolution._weights->byteSize(),
             64);
@@ -533,19 +558,20 @@ void GNAGraphCompiler::finalizeConvolution1DPrimitive(InferenceEngine::CNNLayerP
                 offset += padding_zeros.size();
             }
         };
-        gnamem->readonly().push_initializer(layer, ptr_weights,
+
+        gnamem->getQueue(REGION_RO)->push_initializer(layer, ptr_weights,
             paddedWeightsSize,
             initializer,
             64);
     }
 
     if (convolution._biases) {
-        gnamem->readonly().push_ptr(layer, ptr_biases,
+        gnamem->getQueue(REGION_RO)->push_ptr(layer, ptr_biases,
             convolution._biases->cbuffer().as<const void*>(),
             convolution._biases->byteSize(),
             64);
     } else {
-        gnamem->readonly().push_value(layer, ptr_biases, 0.0f, out_channels, 64);
+        gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, out_channels, 64);
     }
 }
 
@@ -607,10 +633,10 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
     const auto weightPrec = OvGnaTypeIntFromBytes(convolution._weights->getTensorDesc().getPrecision().size());
     const auto biasPrec = OvGnaTypeIntFromBytes(biasPrecision.size());
 
-    cnn2dValidator.ValidateCnn2D(layer->name,
+    ValidateCnn2D(layer->name,
         in_height, in_width, in_channels,
         convolution._kernel_y, convolution._kernel_x, filter_n, convolution._stride_y, convolution._stride_x,
-        convolution._dilation_y, convolution._dilation_x, inputPrec);
+        inputPrec, convolution._dilation_y, convolution._dilation_x);
 
     float weight_scale_factor = getScaleFactor(layer, QuantizedDataType::weights);
     float output_scale_factor = getScaleFactor(layer, QuantizedDataType::output);
@@ -675,18 +701,18 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
         transposedWeights.resize(transposedWeights.size() + kernelPad);
     }
 
-    gnamem->readonly().push_local_ptr(layer, ptr_weights,
+    gnamem->getQueue(REGION_RO)->push_local_ptr(layer, ptr_weights,
         transposedWeights.data(),
         transposedWeights.size(),
         64);
 
     if (convolution._biases) {
-        gnamem->readonly().push_ptr(layer, ptr_biases,
+        gnamem->getQueue(REGION_RO)->push_ptr(layer, ptr_biases,
             convolution._biases->cbuffer().as<const void*>(),
             convolution._biases->byteSize(),
             64);
     } else {
-        gnamem->readonly().push_value(layer, ptr_biases, 0.0f, out_channels, 64);
+        gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, out_channels, 64);
     }
 }
 
@@ -744,8 +770,8 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
 
         if (gnaFlags->sw_fp32) {
             IE_ASSERT(quantized == nullptr);
-            gnamem->readonly().push_value(layer, ptr_weights, power.scale, num_rows_out, 64);
-            gnamem->readonly().push_value(layer, ptr_biases, power.offset, num_rows_out, 64);
+            gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, power.scale, num_rows_out, 64);
+            gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, power.offset, num_rows_out, 64);
         } else {
             IE_ASSERT(quantized != nullptr);
             if (!gnaFlags->input_low_precision) {
@@ -753,15 +779,15 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
                     static_cast<float>(INT16_MAX)));
                 auto quantizedOffset = FLOAT_TO_INT32(std::min(quantized->_dst_quant.GetScale() * power.offset,
                     static_cast<float>(INT32_MAX)));
-                gnamem->readonly().push_value<int16_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
-                gnamem->readonly().push_value<int32_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
+                gnamem->getQueue(REGION_RO)->push_value<int16_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
+                gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
             } else {
                 auto quantizedScale = FLOAT_TO_INT8(std::min(quantized->_weights_quant.GetScale() * power.scale,
                     static_cast<float>(INT8_MAX)));
                 auto quantizedOffset = FLOAT_TO_INT8(std::min(quantized->_dst_quant.GetScale() * power.offset,
                     static_cast<float>(INT8_MAX)));
-                gnamem->readonly().push_value<int8_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
-                gnamem->readonly().push_value<int8_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
+                gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_weights, quantizedScale, num_rows_out, 64);
+                gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_biases, quantizedOffset, num_rows_out, 64);
             }
         }
     } else {
@@ -820,7 +846,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
         connectInput(layer, ptr_pwl_input, num_data_bytes_in, 0, 0);
 
         if (ptr_pwl_segments_target != nullptr) {
-            gnamem->readonly().push_local_ptr(layer, ptr_pwl_segments_target,
+            gnamem->getQueue(REGION_RO)->push_local_ptr(layer, ptr_pwl_segments_target,
                 &ptr_pwl_segments.front(),
                 ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
                 64);
@@ -868,7 +894,7 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
     }
 
     if (is2DPooling) {
-        cnn2dValidator.ValidatePooling2D(layer->name, pooling._kernel_y, pooling._kernel_x, pooling._stride_y, pooling._stride_x);
+        ValidatePooling2D(layer->name, pooling._kernel_y, pooling._kernel_x, pooling._stride_y, pooling._stride_x);
     }
 
     auto& currentComponent = dnnComponents.addComponent(layer->name, "pooling");
@@ -892,9 +918,24 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) {
         getScaleFactor(layer, QuantizedDataType::output),
         ptr_inputs,
         ptr_outputs);
-    size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()))
-        * outputs->getPrecision().size();
+    size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()));
+
+    // Need to reserve more memory otherwise the compiled model would not be
+    // backward compatible with GNA 2.0
+    // GNA 2.0 produces more outputs from 1D pooling than later GNA generations (including GNA 3.0)
+    // When the model is compiled for some newer GNA generation (than GNA 2.0)
+    // but it does not use any specific new GNA features it should be correct to import and run using previous GNA HW
+    if (!is2DPooling) {
+        const auto hLegacy =
+            GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy(h_dim_in, pooling._stride[X_AXIS]);
+        const auto wLegacy =
+            GNAPluginNS::GNAConvolutionLayer::outputFromPoolingLegacy(w_dim_in, pooling._stride[Y_AXIS]);
+        if (num_data_bytes_out < hLegacy * wLegacy * c_dim_out) {
+            num_data_bytes_out = hLegacy * wLegacy * c_dim_out;
+        }
+    }
 
+    num_data_bytes_out *= outputs->getPrecision().size();
     const auto hw_in = h_dim_in * w_dim_in;
 
     // TODO: Is this really needed?, find out why
@@ -1142,8 +1183,8 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
         FillWeightOfAligningFilter(layer, ptr_weights, cropOffset, (quantized == nullptr) ? false : true);
 
         (quantized == nullptr) ?
-            gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64) :
-            gnamem->readonly().push_value<int32_t>(layer, ptr_biases, 0, num_rows_out, 64);
+            gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64) :
+            gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, 0, num_rows_out, 64);
     }
 }
 
@@ -1277,36 +1318,35 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
     switch (eltwise._operation) {
     case EltwiseLayer::Sub:
         if (quantized == nullptr) {
-            gnamem->readonly().push_value(layer, ptr_weights, -1.0f, num_rows_out, 64);
+            gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, -1.0f, num_rows_out, 64);
         } else {
             auto scaledIdentity = -quantized->_weights_quant.GetScale();
 
             if (gnaFlags->input_low_precision == false) {
                 auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
-
-                gnamem->readonly().push_value<int16_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
+                gnamem->getQueue(REGION_RO)->push_value<int16_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
             } else {
                 auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
 
-                gnamem->readonly().push_value<int8_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
+                gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
             }
         }
         connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
         break;
     case EltwiseLayer::Sum:
         if (quantized == nullptr) {
-            gnamem->readonly().push_value(layer, ptr_weights, 1.0f, num_rows_out, 64);
+            gnamem->getQueue(REGION_RO)->push_value(layer, ptr_weights, 1.0f, num_rows_out, 64);
         } else {
             auto scaledIdentity = quantized->_weights_quant.GetScale();
 
             if (gnaFlags->input_low_precision == false) {
                 auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
 
-                gnamem->readonly().push_value<int16_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
+                gnamem->getQueue(REGION_RO)->push_value<int16_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
             } else {
                 auto quantizedIdentity = FLOAT_TO_INT8(std::min(scaledIdentity, static_cast<float>(INT8_MAX)));
 
-                gnamem->readonly().push_value<int8_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
+                gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_weights, quantizedIdentity, num_rows_out, 64);
             }
         }
         connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
@@ -1314,12 +1354,12 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
 
     case EltwiseLayer::Prod:
         if (quantized == nullptr) {
-            gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
+            gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
         } else {
             if (gnaFlags->input_low_precision == false) {
-                gnamem->readonly().push_value<int32_t>(layer, ptr_biases, 0, num_rows_out, 64);
+                gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, 0, num_rows_out, 64);
             } else {
-                gnamem->readonly().push_value<int8_t>(layer, ptr_biases, 0, num_rows_out, 64);
+                gnamem->getQueue(REGION_RO)->push_value<int8_t>(layer, ptr_biases, 0, num_rows_out, 64);
             }
         }
         connectInput(layer, ptr_weights, num_data_bytes_in, 0, biasesLayerIdx);
@@ -1387,9 +1427,9 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
     connectInput(layer, ptr_input_2, num_data_bytes_in_2, 0, 1);
     if (gnaFlags->sw_fp32) {
         IE_ASSERT(quantized == nullptr);
-        gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
+        gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
     } else {
-        gnamem->readonly().push_value<int32_t>(layer, ptr_biases, 0.0f, num_rows_out, 64);
+        gnamem->getQueue(REGION_RO)->push_value<int32_t>(layer, ptr_biases, 0.0f, num_rows_out, 64);
     }
 }
 
@@ -1503,12 +1543,12 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
 
     if (num_padding == 0) {
         if (!transpose) {
-            gnamem->readonly().push_ptr(layer, ptr_weights,
+            gnamem->getQueue(REGION_RO)->push_ptr(layer, ptr_weights,
                 weightable._weights->cbuffer().as<const void*>(),
                 weightable._weights->byteSize(),
                 64);
         } else {
-            gnamem->readonly().push_initializer(layer, ptr_weights, weightable._weights->byteSize(),
+            gnamem->getQueue(REGION_RO)->push_initializer(layer, ptr_weights, weightable._weights->byteSize(),
                 [isDiag, num_rows_in, num_rows_out, num_padding, transposedRows, transposedCols, weightsBuffer, wpSize](void* data, size_t size) {
                 for (uint32_t k = 0; k < (isDiag ? 1 : num_rows_out); k++) {
                     auto rowOffset = k * transposedRows * transposedCols * wpSize;
@@ -1538,7 +1578,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
         auto paddedWeights = isDiag ? elementsIn : elementsIn * num_rows_out;
         auto paddedWeightsSize = paddedWeights * weightable.precision.size();
 
-        gnamem->readonly().push_initializer(layer, ptr_weights, paddedWeightsSize,
+        gnamem->getQueue(REGION_RO)->push_initializer(layer, ptr_weights, paddedWeightsSize,
             [isDiag, num_rows_in, num_rows_out, num_padding, weightsBuffer, wpSize](void* data, size_t size) {
             for (uint32_t i = 0; i < (isDiag ? 1 : num_rows_out); i++) {
                 ie_memcpy(data, size,
@@ -1550,16 +1590,16 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
     }
 
     if (weightable._biases) {
-        gnamem->readonly().push_ptr(layer, ptr_biases,
+        gnamem->getQueue(REGION_RO)->push_ptr(layer, ptr_biases,
             weightable._biases->cbuffer().as<const void*>(),
             weightable._biases->byteSize(),
             64);
     } else {
         // in that case input from previous layer goes into biases, so we have to initialize input pointer by zero
         if (useBiasConnection) {
-            gnamem->readonly().push_value(layer, ptr_inputs, 0.0f, num_rows_in + num_padding, 64);
+            gnamem->getQueue(REGION_RO)->push_value(layer, ptr_inputs, 0.0f, num_rows_in + num_padding, 64);
         } else {
-            gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out + num_padding_out, 64);
+            gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out + num_padding_out, 64);
         }
     }
 }
@@ -1577,7 +1617,10 @@ void GNAGraphCompiler::FillWeightOfAligningFilter(InferenceEngine::CNNLayerPtr l
         THROW_GNA_EXCEPTION << "Weights memory is not allocated!!!";
     }
 
-    gnamem->readonly().push_initializer(layer, ptrWeights, num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), [=](void* data, size_t size) {
+    gnamem->getQueue(REGION_RO)->push_initializer(layer,
+                                                  ptrWeights,
+                                                  num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(),
+                                                  [=](void* data, size_t size) {
         int out = 0;
         for (int input = offset; input < num_rows_out + offset; ++input) {
             auto mem_ptr = reinterpret_cast<uint8_t*>(data) + input * layer->precision.size() + out * ALIGN(num_rows_in, 8) * layer->precision.size();
@@ -1700,7 +1743,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
         size_t weights_stride =  (num_rows_in + num_rows_copied) * weightsElementSize;
         size_t weights_offset = weights_stride * num_rows_copied +  num_rows_copied * weightsElementSize;
 
-        gnamem->readonly().push_initializer(layer, ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
+        gnamem->getQueue(REGION_RO)->push_initializer(layer, ptr_weights, paddedWeightsSize, [=](void* data, size_t size) {
             size_t roffset = weights_offset;
             size_t woffset = 0;
             for (int i = 0; i < num_rows_out && size >= woffset; i++) {
@@ -1715,12 +1758,12 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
     }
 
     if (filterLayer->_biases) {
-        gnamem->readonly().push_ptr(layer, ptr_biases,
+        gnamem->getQueue(REGION_RO)->push_ptr(layer, ptr_biases,
             filterLayer->_biases->cbuffer().as<const void*>(),
             filterLayer->_biases->byteSize(),
             64);
     } else {
-        gnamem->readonly().push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
+        gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, num_rows_out, 64);
     }
 }
 
@@ -1793,18 +1836,18 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
     connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
     connectOutput(layer, ptr_outputs, num_data_bytes_out);
 
-    gnamem->readonly().push_ptr(layer, ptr_weights,
+    gnamem->getQueue(REGION_RO)->push_ptr(layer, ptr_weights,
         filterLayer->_weights->cbuffer().as<const void*>(),
         filterLayer->_weights->byteSize(),
         64);
 
     if (filterLayer->_biases) {
-        gnamem->readonly().push_ptr(layer, ptr_biases,
+        gnamem->getQueue(REGION_RO)->push_ptr(layer, ptr_biases,
             filterLayer->_biases->cbuffer().as<const void*>(),
             filterLayer->_biases->byteSize(),
             64);
     } else {
-        gnamem->readonly().push_value(layer, ptr_biases, 0.0f, numberOfFilters, 64);
+        gnamem->getQueue(REGION_RO)->push_value(layer, ptr_biases, 0.0f, numberOfFilters, 64);
     }
 }
 
@@ -2036,7 +2079,7 @@ case name:\
     connectOutput(layer, ptr_outputs, num_data_bytes_out);
 
     if (ptr_pwl_segments_target != nullptr) {
-        gnamem->readonly().push_local_ptr(layer, ptr_pwl_segments_target,
+        gnamem->getQueue(REGION_RO)->push_local_ptr(layer, ptr_pwl_segments_target,
             &ptr_pwl_segments.front(),
             ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
             64);
@@ -2210,13 +2253,12 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
                 // memory layer not yet initialized
                 if (nextMemoryLayer.reserved_size == 0) {
                     auto memorySize = InferenceEngine::details::product(nextMemoryLayer.getDims()) * nextMemoryLayer.elementSizeBytes();
-                    gnamem->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
-                    gnamem->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
-
+                    gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &nextMemoryLayer.gna_ptr, ALIGN64(memorySize), 64);
+                    gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer));
                     nextMemoryLayer.reserved_size = ALIGN64(memorySize);
                 } else {
                     // We may need to extend memory buffer if connected input size is bigger, for example for concat connection
-                    gnamem->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
+                    gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &nextMemoryLayer.gna_ptr, getOffsetForBinding(layer), ALIGN64(num_data_bytes_out));
                 }
                 return;
             }
@@ -2307,7 +2349,8 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
                                              return it != concatItem.second.concatInputLayers.end();
                                          });
                     if (included == concat_connection.end()) {
-                        gnamem->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size), 64);
+                        auto outputSize = std::max(concatLayerInfoItem.reserved_size, num_data_bytes_out * 2);
+                        gnamem->getQueue(REGION_SCRATCH)->reserve_ptr(layer, &concatLayerInfoItem.gna_ptr, ALIGN64(outputSize), 64);
 
                         std::function<void(GNAConcatLayer, GNAPluginNS::GnaInputs&, ConcatConnection&)> allocate_input_recursively =
                             [&allocate_input_recursively](GNAConcatLayer clayer,
@@ -2342,16 +2385,24 @@ void GNAGraphCompiler::connectOutput(InferenceEngine::CNNLayerPtr layer,
                 if (layer->params.find("output_offset") != layer->params.end()) {
                     output_offset = layer->GetParamAsInt("output_offset");
                 }
-                gnamem->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, output_offset);
+                gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, output_offset);
             }
             return;
         }
     }
+    // real output should be allocated in separate region.
+    auto mem_region = REGION_SCRATCH;
 
     auto nextLayer = CNNNetCheckNextLayerSkipCertain(layer, 0, 0, true,
         [](CNNLayerPtr l) { return LayerInfo(l).isNonFunctional(); }).first;
     // Check that layer will be an output
-    gnamem->reserve_ptr((LayerInfo(layer).isOutput() || !nextLayer) ? nullptr : layer, ptr, ALIGN64(num_data_bytes_out), 64);
+    if (LayerInfo(layer).isOutput() || !nextLayer) {
+        mem_region = REGION_OUTPUTS;
+    }
+    if (LayerInfo(layer).isConst()) {
+        mem_region = REGION_RO;
+    }
+    gnamem->getQueue(mem_region)->reserve_ptr(layer, ptr, ALIGN64(num_data_bytes_out), 64);
 }
 
 GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
@@ -2393,12 +2444,12 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
 
             // real allocation pointer will be kept in ptr not in ptr_inputs_global
             if (!connectTo) {
-                gnamem->push_value(nullptr, ptr,
+                gnamem->getQueue(REGION_INPUTS)->push_value(layer, ptr,
                                    static_cast<uint8_t>(0),
                                    num_data_bytes_in,
                                    64);
             } else {
-                gnamem->push_value(nullptr, &inputs_ptr_->at(prevLayer->name).ptrs.front(),
+                gnamem->getQueue(REGION_INPUTS)->push_value(layer, &inputs_ptr_->at(prevLayer->name).ptrs.front(),
                                    static_cast<uint8_t>(0),
                                    num_data_bytes_in,
                                    64);
@@ -2414,9 +2465,11 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
         }
 
         if (connectTo) {
-            gnamem->bind_ptr(nullptr, ptr, &inputs_ptr_->at(prevLayer->name).ptrs.front(), offset, ALIGN(num_data_bytes_in, 64));
+            gnamem->getQueue(REGION_AUTO)
+                ->bind_ptr(layer, ptr, &inputs_ptr_->at(prevLayer->name).ptrs.front(), offset, ALIGN(num_data_bytes_in, 64));
         } else {
-            gnamem->bind_ptr(nullptr, &inputs_ptr_->at(prevLayer->name).ptrs.front(), ptr, offset, ALIGN(num_data_bytes_in, 64));
+            gnamem->getQueue(REGION_AUTO)
+                ->bind_ptr(layer, &inputs_ptr_->at(prevLayer->name).ptrs.front(), ptr, offset, ALIGN(num_data_bytes_in, 64));
         }
 
         return prevLayer;
@@ -2424,9 +2477,9 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
     // const input
     if (LayerInfo(prevLayer).isConst()) {
         if (connectTo) {
-            gnamem->bind_ptr(layer, ptr, const_connections[prevLayer->name], offset);
+            gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, ptr, const_connections[prevLayer->name], offset);
         } else {
-            gnamem->bind_ptr(layer, const_connections[prevLayer->name], ptr, offset);
+            gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, const_connections[prevLayer->name], ptr, offset);
         }
 
         return prevLayer;
@@ -2455,7 +2508,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
                 gnalog()  << "Connecting " << splitName << " input \n";
                 // splitting layer should take the execution order from the connected layer
                 splittingLayer->userValue = layer->userValue;
-                auto res = connectInput(splittingLayer, ptr, splitLayerInfoItem.reserved_size, it->offset + offset, 0);
+                auto res = connectInput(splittingLayer, ptr, std::max(splitLayerInfoItem.reserved_size, num_data_bytes_in), it->offset + offset, 0);
                 gnalog()  << "Connected \n";
                 return res;
             }
@@ -2467,7 +2520,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
         if (concatLayerInfo != concat_connection.end()) {
             auto & concatLayerInfoItem = concatLayerInfo->second;
             // dnnLayer that is input for concat layer
-            gnamem->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, offset);
+            gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, ptr, &concatLayerInfoItem.gna_ptr, offset, num_data_bytes_in);
             // return layer over concat
             return CNNNetPrevLayer(prevLayer);
         }
@@ -2476,7 +2529,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
                 prevLayer->name);
         if (cropLayerInfo != crop_connection.end()) {
             auto & cropLayerInfoItem = cropLayerInfo->second;
-            gnamem->bind_ptr(layer, ptr, &cropLayerInfoItem.gna_ptr, offset);
+            gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, ptr, &cropLayerInfoItem.gna_ptr, offset);
             return CNNNetPrevLayer(prevLayer);
         }
     }
@@ -2484,7 +2537,7 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
 
     // check for generic prev layer
     if (prevDnnLayer != nullptr) {
-        gnamem->bind_ptr(layer, ptr, &prevDnnLayer->ptr_outputs, offset, num_data_bytes_in);
+        gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, ptr, &prevDnnLayer->ptr_outputs, offset, num_data_bytes_in);
         return prevLayer;
     }
 
@@ -2502,20 +2555,20 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
             // connectTo used for  indicate that memory layer should be bound to given buffer
             if (connectTo) {
                 memorySize = std::max(memorySize, num_data_bytes_in);
-                gnamem->reserve_ptr(nullptr, &memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
-                gnamem->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset);
+                gnamem->getQueue(REGION_STATES)->reserve_ptr(nullptr, &memoryLayer.gna_ptr, ALIGN64(memorySize), 64);
+                gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset);
             } else {
                 if (num_data_bytes_in < memorySize + offset) {
                     THROW_GNA_LAYER_EXCEPTION(layer) <<" invalid allocation request of "
                                                      << num_data_bytes_in << " is more then state tensor size of: " << memorySize + offset;
                 }
-                gnamem->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset);
+                gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, &memoryLayer.gna_ptr, ptr, offset, ALIGN64(num_data_bytes_in));
             }
 
             memoryLayer.reserved_size = ALIGN64(memorySize);
         } else {
             // We may need to extend memory buffer if connected input size is bigger, for example for concat connection
-            gnamem->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
+            gnamem->getQueue(REGION_AUTO)->bind_ptr(nullptr, ptr, &memoryLayer.gna_ptr, offset, ALIGN64(num_data_bytes_in));
         }
 
         return prevLayer;
diff --git a/src/plugins/intel_gna/gna_graph_compiler.hpp b/src/plugins/intel_gna/gna_graph_compiler.hpp
index 88db82136dffda..ba22d40b640ec9 100644
--- a/src/plugins/intel_gna/gna_graph_compiler.hpp
+++ b/src/plugins/intel_gna/gna_graph_compiler.hpp
@@ -16,7 +16,6 @@
 #include "descriptions/gna_flags.hpp"
 #include "connection_details.hpp"
 #include "backend/dnn.hpp"
-#include "memory/polymorph_allocator.hpp"
 #include "memory/gna_memory.hpp"
 #include "layers/gna_memory_layer.hpp"
 #include "layers/gna_concat_layer.hpp"
@@ -50,7 +49,7 @@ class GNAGraphCompiler {
     static void assertConvolutionLayoutProper(const InferenceEngine::DataPtr&);
     std::vector<uint8_t> static transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint32_t num_rows, uint32_t num_cols);
 
-    static const GNALimitations::Cnn2D::Validator cnn2dValidator;
+    std::unique_ptr<const GNALimitations::Cnn2D::AbstractValidator> cnn2dValidator;
 
 public:
     GNAPluginNS::backend::DnnComponents dnnComponents;
@@ -69,6 +68,18 @@ class GNAGraphCompiler {
     void fillConcatConnections(InferenceEngine::CNNLayerPtr layer);
     void fillSplitConnections(InferenceEngine::CNNLayerPtr layer);
 
+
+    void ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth,
+        const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
+        const uint32_t strideH, const uint32_t strideW, OvGnaType inPrecision,
+        const uint32_t dilH, const uint32_t dilW) const;
+
+    void ValidatePooling2D(std::string name,
+        const uint32_t windowH, const uint32_t windowW,
+        const uint32_t strideH, const uint32_t strideW) const;
+
+    void SetValidatorTarget(std::string target);
+
     /**
     * Connects either memory output, or generic output to a layer
      * @param layer - layer pointer
diff --git a/src/plugins/intel_gna/gna_lib_ver_selector.hpp b/src/plugins/intel_gna/gna_lib_ver_selector.hpp
index cafa3264238c4f..8d7147f73bca9c 100644
--- a/src/plugins/intel_gna/gna_lib_ver_selector.hpp
+++ b/src/plugins/intel_gna/gna_lib_ver_selector.hpp
@@ -27,3 +27,13 @@
  * Used for calculating memory sizes of GNA data arrays
  */
 #define ALIGN64(number) ALIGN(number, 64)
+
+namespace GNAPluginNS {
+namespace tools {
+template <typename T, typename... Args>
+std::unique_ptr<T> make_unique(Args&&... args) {
+    return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+}
+}  // namespace tools
+
+}  // namespace GNAPluginNS
diff --git a/src/plugins/intel_gna/gna_model_serial.cpp b/src/plugins/intel_gna/gna_model_serial.cpp
index a9126426b121dd..07a21afd10712c 100644
--- a/src/plugins/intel_gna/gna_model_serial.cpp
+++ b/src/plugins/intel_gna/gna_model_serial.cpp
@@ -370,36 +370,30 @@ void GNAModelSerial::Import(void *basePointer,
         }
     }
 
-
     // once structure has been read lets read whole gna graph
     is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
 }
 
-void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
+void GNAModelSerial::Export(const GnaAllocations& allocations, std::ostream& os) const {
     os.exceptions(std::ostream::failbit);
 
     const std::vector<Gna2Operation>
         layers(gna2model_->Operations, gna2model_->Operations + gna2model_->NumberOfOperations);
 
+    const auto gnaGraphSize = allocations.GetSizeForExport();
+    const auto& allocationsOrdered = allocations.GetAllocationsInExportOrder();
 
-    // all offsets will be from this pointer
-    auto getOffsetFromBase = [basePointer, &gnaGraphSize](void * pointer, const char * name = nullptr) {
-        auto offset = static_cast<uint64_t>(std::distance(reinterpret_cast<uint8_t*>(basePointer), reinterpret_cast<uint8_t*>(pointer)));
-        if (offset > gnaGraphSize) {
-            THROW_GNA_EXCEPTION << "offset to " << (name == nullptr ? "" : name) << "(0x" << pointer
-                << ") not in range segment retuned from GNAAlloc(0x" << basePointer << "-0x"
-                << reinterpret_cast<void*>(reinterpret_cast<uint8_t*>(basePointer) + gnaGraphSize) << ")";
-        }
-        return offset;
-    };
-
-    auto getTensorWithProperOffset = [&getOffsetFromBase](const Gna2Tensor& tensor) {
+    auto getTensorWithProperOffset = [&allocationsOrdered](const Gna2Tensor& tensor) {
         Gna2Tensor out = tensor;
-        out.Data = reinterpret_cast<void*>(getOffsetFromBase(tensor.Data));
+        const auto found = GnaAllocations::GetOffsetForExport(allocationsOrdered, tensor.Data);
+        if (!found.first) {
+            THROW_GNA_EXCEPTION << "Tensor data pointer not found in allocations\n";
+        }
+        out.Data = reinterpret_cast<void*>(found.second);
         return out;
     };
 
-    auto convert_to_serial = [getOffsetFromBase](const GNAPluginNS::GnaDesc &desc) {
+    auto convert_to_serial = [&allocationsOrdered](const GNAPluginNS::GnaDesc& desc) {
         HeaderLatest::RuntimeEndPoint ep;
         ep.elements_count = desc.num_elements;
         ep.scaleFactor = desc.scale_factor;
@@ -408,7 +402,11 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
         ep.precision = desc.model_precision;
         ep.orientation = desc.orientation;
         ep.tensor_names_count = static_cast<uint8_t>(desc.tensor_names.size());
-        ep.descriptor_offset = offsetFromBase(*desc.ptrs.begin());
+        const auto found = GnaAllocations::GetOffsetForExport(allocationsOrdered, *desc.ptrs.begin());
+        if (!found.first) {
+            THROW_GNA_EXCEPTION << "Endpoint data pointer not found in allocations\n";
+        }
+        ep.descriptor_offset = found.second;
         // shape
         ep.shape.NumberOfDimensions = desc.dims.size();
         for (size_t i=0; i < ep.shape.NumberOfDimensions; ++i) {
@@ -519,7 +517,11 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
         std::string name;
         float scale_factor = 1.0f;
         std::tie(gna_ptr, reserved_size, name, scale_factor) = state;
-        writeBits(offsetFromBase(gna_ptr), os);
+        const auto found = GnaAllocations::GetOffsetForExport(allocationsOrdered, gna_ptr);
+        if (!found.first) {
+            THROW_GNA_EXCEPTION << "State data pointer not found in allocations\n";
+        }
+        writeBits(found.second, os);
         writeBits(reserved_size, os);
         const auto nameSize = strlen(name.c_str()) + 1;
         writeBits(static_cast<uint32_t>(nameSize), os);
@@ -527,8 +529,10 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
         writeBits(scale_factor, os);
     }
 
-    // once structure has been written lets push gna graph
-    os.write(reinterpret_cast<char*>(basePointer), gnaGraphSize);
+    // once structure has been written let's push gna graph memory
+    for (const auto& a : allocationsOrdered) {
+        os.write(reinterpret_cast<char*>(a.ptr), a.sizeForExport());
+    }
 }
 
 void GNAModelSerial::ImportInputs(std::istream &is, void* basePtr, GNAPluginNS::GnaInputs &inputs) {
diff --git a/src/plugins/intel_gna/gna_model_serial.hpp b/src/plugins/intel_gna/gna_model_serial.hpp
index 50fc858919a4d4..76022b12fa2e8f 100644
--- a/src/plugins/intel_gna/gna_model_serial.hpp
+++ b/src/plugins/intel_gna/gna_model_serial.hpp
@@ -14,6 +14,7 @@
 #include "serial/headers/latest/gna_model_header.hpp"
 #include "gna2-model-api.h"
 
+#include "gna_device_allocation.hpp"
 
 /**
  * @brief implements serialization tasks for GNAGraph
@@ -100,11 +101,9 @@ class GNAModelSerial {
 
     /**
      * save gna graph to an outpus stream
-     * @param basePtr
-     * @param gnaGraphSize
+     * @param allocations
      * @param os
      */
-    void Export(void *basePtr,
-                size_t gnaGraphSize,
+    void Export(const GnaAllocations& allocations,
                 std::ostream &os) const;
 };
diff --git a/src/plugins/intel_gna/gna_plugin.cpp b/src/plugins/intel_gna/gna_plugin.cpp
index b128d6cc9de036..85552736a12bdc 100644
--- a/src/plugins/intel_gna/gna_plugin.cpp
+++ b/src/plugins/intel_gna/gna_plugin.cpp
@@ -87,6 +87,7 @@
 #include <ngraph/opsets/opset7.hpp>
 
 #include <gna2-model-api.h>
+#include <gna2-common-api.h>
 
 inline uint32_t ToByteSize(const Gna2DataType type) {
     switch (type) {
@@ -112,6 +113,7 @@ constexpr uint32_t GNAPluginNS::GNAPlugin::FAKE_REQUEST_CONFIG_ID;
 using namespace InferenceEngine;
 using namespace std;
 using namespace GNAPluginNS;
+using namespace GNAPluginNS::memory;
 using namespace InferenceEngine::details;
 
 namespace InferenceEngine {
@@ -329,6 +331,15 @@ GNAPlugin::GNAPlugin() {
     InitGNADevice();
 }
 
+std::string GNAPluginNS::GNAPlugin::GetCompileTarget() const {
+    if (gnadevice) {
+        return gnadevice->GetCompileTarget();
+    } else if (!config.gnaCompileTarget.empty()) {
+        return config.gnaCompileTarget;
+    }
+    return InferenceEngine::GNAConfigParams::GNA_TARGET_3_0;
+}
+
 GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) {
     Init();
     SetConfig(configMap);
@@ -350,16 +361,16 @@ void GNAPlugin::Init() {
 void GNAPlugin::InitGNADevice() {
     OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNADevice");
     if (gnaFlags->sw_fp32) {
-        gnamem.reset(new gna_memory_type(memory::make_polymorph<std::allocator<uint8_t>>()));
+        gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
     } else {
         gnadevice = std::make_shared<GNADeviceHelper>(config.gnaExecTarget,
                     config.gnaCompileTarget,
                     config.swExactMode,
                     gnaFlags->performance_counting,
                     !config.dumpXNNPath.empty(),
-                    GetDeviceVersionFromString(config.dumpXNNGeneration));
+                    GetDeviceVersionFromString(config.gnaCompileTarget));
         size_t page_size_bytes = 4096;
-        gnamem = std::make_shared<gna_memory_type>(memory::make_polymorph<memory::GNAAllocator>(gnadevice), page_size_bytes);
+        gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice), page_size_bytes);
     }
     graphCompiler.setGNAMemoryPtr(gnamem);
 }
@@ -495,7 +506,7 @@ bool GNAPlugin::TryToInitOutput(const std::string &portName, InferenceEngine::CN
         outputs_.at(portName).num_elements = numElem;
 
         // binding ptr for first infer request - then others will be setup during relocation
-        gnamem->bind_ptr(layer, &outputs_.at(portName).ptrs.front(), outputPtr);
+        gnamem->getQueue(REGION_AUTO)->bind_ptr(layer, &outputs_.at(portName).ptrs.front(), outputPtr);
     };
 
     // probing gna_primitives
@@ -645,7 +656,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
 
     std::string effectiveGnaCompileTarget = config.gnaCompileTarget;
     if (gnadevice) {
-        effectiveGnaCompileTarget = gnadevice->getEffectiveGnaCompileTarget();
+        effectiveGnaCompileTarget = gnadevice->GetCompileTarget();
     }
 
     bool isNgraphPassesUsed = false;
@@ -904,6 +915,8 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
         gnaFlags->num_requests = 1;
     }
 
+    graphCompiler.SetValidatorTarget(GetCompileTarget());
+
     // keep inputs information and create input primitives
     inputs_data_map_ = newNet.getInputsInfo();
     if (inputs_data_map_.empty()) {
@@ -975,20 +988,21 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
 
     // TODO: how active list will work in multioutput case
     // make room for active list
-    gnamem->reserve_ptr(nullptr, nullptr, ALIGN64(outputs_.Get().begin()->get_required_size()), 64);
+    gnamem->getQueue(REGION_OUTPUTS)->reserve_ptr(nullptr, nullptr, ALIGN64(outputs_.Get().begin()->get_required_size()), 64);
 
     void *pParallelExecutionData  = nullptr;
 
     // reserving more bytes for intermediate data in parallel case - TODO: this works incorrectly in compact mode at lest
-    rwSegmentSize = gnamem->getRWBytes();
+    rwSegmentSize = gnamem->getRegionBytes(REGION_SCRATCH);
+    rwSegmentSize += gnamem->getRegionBytes(REGION_INPUTS);
+    rwSegmentSize += gnamem->getRegionBytes(REGION_OUTPUTS);
     if (gnaFlags->num_requests > 1) {
-        gnamem->reserve_ptr(nullptr, &pParallelExecutionData, gnamem->getRWBytes() * (gnaFlags->num_requests - 1), 64);
+        gnamem->getQueue(REGION_SCRATCH)->reserve_ptr(nullptr, &pParallelExecutionData, rwSegmentSize * (gnaFlags->num_requests - 1), 64);
     }
 
     gnamem->commit(gnaFlags->compact_mode);
 
-    dnn->Init(gnamem->getBasePtr(),
-             gnamem->getTotalBytes(),
+    dnn->Init(gnamem.get(),
              gnaFlags->sw_fp32 ? kDnnFloat : kDnnInt,
              1);
 
@@ -1020,8 +1034,11 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
             if (ptr_in == nullptr) {
                 ptr_out = nullptr;
             } else {
-                auto offset = reinterpret_cast<uint8_t *>(ptr_in) - reinterpret_cast<uint8_t *>(gnamem->getBasePtr());
-                ptr_out = basePtr + offset;
+                const auto found = gnamem->getOffsetForMerged(ptr_in);
+                if (!found.first) {
+                    THROW_GNA_EXCEPTION << "Relocation offset for parallel infer requests was not found\n";
+                }
+                ptr_out = basePtr + found.second;
             }
         };
 
@@ -1105,7 +1122,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
                 {TranspositionInfo{dnn->do_rotate_input, dnn->num_rotate_rows, dnn->num_rotate_columns}}});
         }
     }
-
     DumpXNNToFile();
 
 #ifdef PLOT
@@ -1128,9 +1144,10 @@ void GNAPlugin::createRequestConfigsForGnaModels() {
 }
 
 int GNAPlugin::GetDeviceVersionFromString(const std::string deviceString) {
-    if (deviceString.empty())
+    if (deviceString.empty() || deviceString == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
         return static_cast<int>(Gna2DeviceVersionEmbedded1_0);
-    THROW_GNA_EXCEPTION << "Wrong GNA generation for embedded model dump: " << deviceString;
+    }
+    return static_cast<int>(Gna2DeviceVersionEmbedded3_5);
 }
 
 void GNAPlugin::DumpXNNToFile() const {
@@ -1146,12 +1163,28 @@ void GNAPlugin::DumpXNNToFile() const {
     std::ofstream dumpStream(config.dumpXNNPath, std::ios::out | std::ios::binary);
 
     auto const modelId = gnadevice->createModel(std::get<0>(gnaModels.front())->obj);
-    auto dump = gnadevice->dumpXnn(modelId);
-    dump.header.RwRegionSize = gnamem->getRWBytes();
-    dump.header.InputScalingFactor = inputs_ptr_->Get().begin()->scale_factor;
-    dump.header.OutputScalingFactor = outputs_.Get().begin()->scale_factor;
-    dumpStream.write(reinterpret_cast<char*>(&dump.header), sizeof(Gna2ModelSueCreekHeader));
-    dumpStream.write(reinterpret_cast<char*>(dump.model.get()), dump.header.ModelSize);
+    const auto& inputsDesc = inputs_ptr_->Get();
+    const auto& outputsDesc = outputs_.Get();
+
+    if (InferenceEngine::GNAConfigParams::GNA_TARGET_2_0 == gnadevice->getEffectiveGnaCompileTarget()) {
+        auto dump = gnadevice->dumpXnn(modelId);
+        dump.header.RwRegionSize = gnamem->getRegionBytes(REGION_SCRATCH);
+        dump.header.InputScalingFactor = inputsDesc.begin()->scale_factor;
+        dump.header.OutputScalingFactor = outputsDesc.begin()->scale_factor;
+        dumpStream.write(reinterpret_cast<char*>(&dump.header), sizeof(Gna2ModelSueCreekHeader));
+        dumpStream.write(reinterpret_cast<char*>(dump.model.get()), dump.header.ModelSize);
+    } else {
+        uint32_t input_size = 0;
+        uint32_t output_size = 0;
+        for (auto i : inputsDesc)
+            input_size += i.get_allocated_size();
+        for (auto o : outputsDesc)
+            output_size += o.get_required_size();
+        auto inSF = inputsDesc.begin()->scale_factor;
+        auto outSF = outputsDesc.front().scale_factor;
+        gnadevice->dumpTLVForDeviceVersion(modelId, dumpStream,
+            input_size, output_size, inSF, outSF);
+    }
     gnadevice->releaseModel(modelId);
 }
 
@@ -1366,7 +1399,10 @@ GnaWaitStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
             FILE* f = nullptr;
             static int num_infers = 0;
             {
-                f = fopen("ex_scores.txt", "w");
+                f = std::fopen("ex_scores.txt", "w");
+                if (!f) {
+                    THROW_GNA_EXCEPTION << "ex_scores.txt opening failed";
+                }
             }
             num_infers++;
             if (f) {
@@ -1508,7 +1544,9 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
     auto header = GNAModelSerial::ReadHeader(networkModel);
 
     void *basePtr = nullptr;
-    gnamem->reserve_ptr(nullptr, &basePtr, header.gnaMemSize);
+
+    gnamem->getQueue(REGION_SCRATCH)->reserve_ptr(nullptr, &basePtr, header.gnaMemSize);
+
     gnamem->commit();
     gnaModels.push_back(std::make_tuple(make_shared<CPPWrapper<Gna2Model>>(header.layersCount)));
     GNAModelSerial::MemoryType  mt;
@@ -1607,7 +1645,7 @@ void GNAPlugin::Export(std::ostream &outStream) {
         serial.AddState(memoryConnection.second.gna_ptr, memoryConnection.second.reserved_size, memoryConnection.first, state->GetScaleFactor());
     }
 
-    serial.Export(gnamem->getBasePtr(), gnamem->getTotalBytes(), outStream);
+    serial.Export(gnadevice->getAllAllocations(), outStream);
 }
 
 std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> GNAPlugin::GetPerformanceCounts() {
diff --git a/src/plugins/intel_gna/gna_plugin.hpp b/src/plugins/intel_gna/gna_plugin.hpp
index edf85fc45d36b4..2c69b4aacfb57d 100644
--- a/src/plugins/intel_gna/gna_plugin.hpp
+++ b/src/plugins/intel_gna/gna_plugin.hpp
@@ -70,6 +70,7 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
 
     std::vector<InferenceEngine::IVariableStateInternal::Ptr> memoryStates;
     bool trivialTopology = false;
+    std::string GetCompileTarget() const;
 
  public:
     explicit GNAPlugin(const std::map<std::string, std::string>& configMap);
diff --git a/src/plugins/intel_gna/gna_plugin_log.hpp b/src/plugins/intel_gna/gna_plugin_log.hpp
index 7a66e08f8c213a..cfb911ddb0e2a1 100644
--- a/src/plugins/intel_gna/gna_plugin_log.hpp
+++ b/src/plugins/intel_gna/gna_plugin_log.hpp
@@ -13,11 +13,11 @@
 /**
  * @brief used for creating graphviz charts, and layers dump
  */
-# define PLOT
-# define MODEL_DUMP
-# define GNA_HEAP_PROFILER
-# define gnalog() std::cout
-# define gnawarn() std::cerr
+#define PLOT
+#define GNA_HEAP_PROFILER
+#define MODEL_DUMP
+#define gnalog() std::cout
+#define gnawarn() std::cerr
 #else
 
 #ifdef VERBOSE
diff --git a/src/plugins/intel_gna/memory/gna_allocator.hpp b/src/plugins/intel_gna/memory/gna_allocator.hpp
index e5ad087743e83c..2d83073775458f 100644
--- a/src/plugins/intel_gna/memory/gna_allocator.hpp
+++ b/src/plugins/intel_gna/memory/gna_allocator.hpp
@@ -10,7 +10,7 @@
 #include <functional>
 
 #include "gna_device.hpp"
-#include "polymorph_allocator.hpp"
+#include "memory/gna_mem_requests.hpp"
 
 namespace GNAPluginNS {
 namespace memory {
@@ -36,6 +36,9 @@ class GNAAllocator {
     void deallocate(uint8_t *p, std::size_t n) {
         _device->free(p);
     }
+    void setTag(void* memPtr, GNAPluginNS::memory::rRegion tagValue) {
+        _device->tagMemoryRegion(memPtr, tagValue);
+    }
 };
 }  // namespace memory
 }  // namespace GNAPluginNS
diff --git a/src/plugins/intel_gna/memory/gna_mem_regions.hpp b/src/plugins/intel_gna/memory/gna_mem_regions.hpp
new file mode 100644
index 00000000000000..7362e2241b8697
--- /dev/null
+++ b/src/plugins/intel_gna/memory/gna_mem_regions.hpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+namespace GNAPluginNS {
+namespace memory {
+
+/**
+ * @brief Logical region of model memory.
+ * Needed for models for embedded GNA
+ * When model is exported for non-embedded uses its memory is exported following the enum value order
+ */
+enum rRegion {
+    REGION_INPUTS = 0x0,
+    REGION_OUTPUTS = 0x1,
+    REGION_SCRATCH = 0x10,
+    REGION_STATES = 0x100,
+    REGION_RO = 0x1000,
+    REGION_AUTO = 0x10000,
+};
+
+inline std::map<rRegion, std::string> GetAllRegionsToStrMap() {
+    return {
+        {REGION_INPUTS,  "REGION_INPUTS"},
+        {REGION_OUTPUTS, "REGION_OUTPUTS"},
+        {REGION_SCRATCH, "REGION_SCRATCH"},
+        {REGION_STATES,  "REGION_STATES"},
+        {REGION_RO,      "REGION_RO"},
+        {REGION_AUTO,    "REGION_AUTO"}
+    };
+}
+
+inline std::string rRegionToStr(const rRegion region) {
+    const auto& map = GetAllRegionsToStrMap();
+    const auto found = map.find(region);
+    if (found == map.end()) {
+        return "UNKNOWN";
+    }
+    return found->second;
+}
+
+}  // namespace memory
+}  // namespace GNAPluginNS
diff --git a/src/plugins/intel_gna/memory/gna_mem_requests.hpp b/src/plugins/intel_gna/memory/gna_mem_requests.hpp
index 0f3626e5943c68..de07d4c5630a5b 100644
--- a/src/plugins/intel_gna/memory/gna_mem_requests.hpp
+++ b/src/plugins/intel_gna/memory/gna_mem_requests.hpp
@@ -9,6 +9,7 @@
 #include <algorithm>
 
 #include "gna_plugin_log.hpp"
+#include "gna_mem_regions.hpp"
 
 namespace GNAPluginNS {
 namespace memory {
@@ -19,31 +20,8 @@ enum rType : uint8_t {
     REQUEST_BIND = 0x4,
     REQUEST_INITIALIZER = 0x8,
 };
-/**
- * @brief region of firmware data
- */
-enum rRegion {
-    REGION_RO,
-    REGION_RW,
-    REGION_AUTO,
-};
 
 #ifdef GNA_HEAP_PROFILER
-inline const char* rRegionToStr(uint8_t region) {
-   const char* strRegion = "UNKNOWN";
-   switch (region) {
-      case REGION_RO:
-        strRegion = "REGION_RO";
-        break;
-      case REGION_RW:
-        strRegion = "REGION_RW";
-        break;
-      case REGION_AUTO:
-        strRegion = "REGION_AUTO";
-        break;
-   }
-   return strRegion;
-}
 
 inline const char* rTypeToStr(uint8_t type) {
    const char* strType = "UNKNOWN";
@@ -65,6 +43,7 @@ inline const char* rTypeToStr(uint8_t type) {
    }
    return strType;
 }
+
 #endif
 
 struct MemRequest {
diff --git a/src/plugins/intel_gna/memory/gna_mem_requests_queue.hpp b/src/plugins/intel_gna/memory/gna_mem_requests_queue.hpp
index 54a9ce0c4339a3..85163fd03257c9 100644
--- a/src/plugins/intel_gna/memory/gna_mem_requests_queue.hpp
+++ b/src/plugins/intel_gna/memory/gna_mem_requests_queue.hpp
@@ -8,10 +8,14 @@
 #include <vector>
 #include <algorithm>
 #include <functional>
+#include <memory>
 
 #include <ie_api.h>
 #include <legacy/ie_layers.h>
+#include "gna_plugin_log.hpp"
 #include "gna_mem_requests.hpp"
+#include "gna_lib_ver_selector.hpp"
+#include "memory_solver.hpp"
 
 namespace GNAPluginNS {
 namespace memory {
@@ -30,8 +34,16 @@ inline uint16_t getCNNLayerId(InferenceEngine::CNNLayerPtr layer) {
  */
 class GNAMemRequestsQueue {
 public:
+    explicit GNAMemRequestsQueue(rRegion region) : _region_type(region) {
+    }
     virtual ~GNAMemRequestsQueue() {}
 
+    rRegion _region_type;
+    size_t _size = 0;
+    std::vector<MemRequest> _mem_requests;
+    std::list<std::vector<char>> _local_storage;
+    std::shared_ptr<uint8_t> _basePtr = nullptr;
+
     /**
      * @brief register initialiser to access memory once it is actually allocated
      * @param ptr_out
@@ -146,9 +158,128 @@ class GNAMemRequestsQueue {
     /**
      * @brief interface for actual queue storage
      */
-    virtual rRegion regionType() const = 0;
-    virtual std::vector<MemRequest> & futureHeap()  = 0;
-    virtual std::list<std::vector<char>> &localStorage() = 0;
+    rRegion regionType() const {
+        return _region_type;
+    }
+
+    std::vector<MemRequest> & futureHeap()  {
+        return _mem_requests;
+    }
+
+    std::list<std::vector<char>> &localStorage() {
+        return _local_storage;
+    }
+
+    virtual size_t calcSize(bool isCompact = false) {
+        _size = 0;
+        for (auto &re : _mem_requests) {
+            if (re._type == REQUEST_BIND || re._ptr_out == nullptr) continue;
+            _size += ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
+        }
+        return _size;
+    }
+
+    size_t getSize() const {
+        return _size;
+    }
+
+    void *getBasePtr() const {
+        return _basePtr.get();
+    }
+
+    std::pair<bool, uint32_t> getOffset(void * ptr) const {
+        auto ptrBegin = static_cast<uint8_t*>(getBasePtr());
+        auto size = getSize();
+        if (ptr >= ptrBegin && ptr < ptrBegin + size) {
+            auto curOffset = static_cast<uint8_t*>(ptr) - ptrBegin;
+            return {true, curOffset};
+        }
+        return {false, 0};
+    }
+
+    template<class T>
+    void iterate_binded(GNAPluginNS::memory::MemRequest & reference, const T & visitor) {
+        for (auto &re : _mem_requests) {
+            if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) {
+                // std::cout << "  [binded=" << re._type << ", ptr=" << re._ptr_out <<"]\n";
+                visitor(reference, re);
+                // primitive loop check
+                if (re._ptr_in == re._ptr_out) continue;
+                // TODO: no circular dependency checking, only tree-style dependency with loops supported
+                iterate_binded(re, visitor);
+            }
+        }
+    }
+};
+
+class GNAMemRequestsInputsQueue : public GNAMemRequestsQueue {
+public:
+    explicit GNAMemRequestsInputsQueue() : GNAMemRequestsQueue(REGION_INPUTS) {
+    }
+};
+
+class GNAMemRequestsOutputsQueue : public GNAMemRequestsQueue {
+public:
+    explicit GNAMemRequestsOutputsQueue() : GNAMemRequestsQueue(REGION_OUTPUTS) {
+    }
+};
+
+class GNAMemRequestsScratchQueue : public GNAMemRequestsQueue {
+public:
+    explicit GNAMemRequestsScratchQueue() : GNAMemRequestsQueue(REGION_SCRATCH) {
+    }
+    /**
+     * @brief optimize memory region by reusing buffers
+     */
+    size_t calcSize(bool isCompact = false) override {
+        if (isCompact) {
+            _size = 0;
+            std::vector<MemorySolver::Box> boxes;
+            for (size_t i = 0; i < _mem_requests.size(); ++i) {
+                // skipping BIND, cross-region and empty requests
+                if (_mem_requests[i]._type & REQUEST_BIND || _mem_requests[i]._ptr_out == nullptr) {
+                    continue;
+                }
+
+                auto original_with_pad = ALIGN(_mem_requests[i]._num_elements * _mem_requests[i]._element_size + _mem_requests[i]._padding,
+                                                _mem_requests[i]._alignment);
+                int start = _mem_requests[i]._life_limits.first;
+                int stop = _mem_requests[i]._life_limits.second;
+
+                boxes.push_back({start, stop, static_cast<int64_t>(original_with_pad), static_cast<int64_t>(i)});
+            }
+
+            MemorySolver memSolver(boxes);
+            _size = memSolver.solve();
+
+            // setting offsets
+            for (auto const & box : boxes) {
+                _mem_requests[box.id]._offset = memSolver.getOffset(box.id);
+            }
+            return _size;
+        } else {
+            return GNAMemRequestsQueue::calcSize(isCompact);
+        }
+    }
 };
+
+class GNAMemRequestsReadOnlyQueue : public GNAMemRequestsQueue {
+public:
+    explicit GNAMemRequestsReadOnlyQueue() : GNAMemRequestsQueue(REGION_RO) {
+    }
+};
+
+class GNAMemRequestsStatesQueue : public GNAMemRequestsQueue {
+public:
+    explicit GNAMemRequestsStatesQueue() : GNAMemRequestsQueue(REGION_STATES) {
+    }
+};
+
+class GNAMemRequestsBindingsQueue : public GNAMemRequestsQueue {
+public:
+    explicit GNAMemRequestsBindingsQueue() : GNAMemRequestsQueue(REGION_AUTO) {
+    }
+};
+
 }  // namespace memory
 }  // namespace GNAPluginNS
diff --git a/src/plugins/intel_gna/memory/gna_memory.hpp b/src/plugins/intel_gna/memory/gna_memory.hpp
index 6213a507515e9d..a7853c0dfa22c3 100644
--- a/src/plugins/intel_gna/memory/gna_memory.hpp
+++ b/src/plugins/intel_gna/memory/gna_memory.hpp
@@ -14,63 +14,75 @@
 #include <algorithm>
 #include <functional>
 #include <iostream>
+#include <fstream>
+#include <utility>
 #include "gna_lib_ver_selector.hpp"
 #include "memory_solver.hpp"
+#include "gna_allocator.hpp"
 #include "gna_plugin_log.hpp"
 #include "memory/gna_allocator.hpp"
 
 #ifdef GNA_HEAP_PROFILER
 #include <iomanip>
-#include <fstream>
 #endif
 
 namespace GNAPluginNS {
 namespace memory {
+
+class GNAFloatAllocator : public std::allocator < uint8_t > {
+ public:
+    void setTag(void*, GNAPluginNS::memory::rRegion) {
+    }
+};
+
+class GNAMemoryInterface {
+public:
+    virtual GNAMemRequestsQueue* getQueue(rRegion region) = 0;
+    virtual GNAMemRequestsQueue* getQueue(void* ptr) = 0;
+    virtual void commit(bool isCompact = false) = 0;
+    virtual std::pair<bool, uint32_t> getOffsetForMerged(void* ptr) = 0;
+    virtual size_t getRegionBytes(rRegion region) = 0;
+    virtual ~GNAMemoryInterface() = default;
+};
+
 /**
  * @brief encapsulate various request to allocate GNA specific memory,
  * in order to issue single allocation call and configure actual pointers in requests
  * @tparam Allocator - a GNAAllocator in case of actual HW offloads
  */
-template<class Allocator = std::allocator<uint8_t>>
-class GNAMemory : public GNAMemRequestsQueue {
+template<class Allocator = GNAAllocator>
+class GNAMemory : public GNAMemoryInterface {
 protected:
-    std::vector<MemRequest> _future_heap;
-    std::list<std::vector<char>> _local_storage;
+    std::map<rRegion, std::unique_ptr<GNAMemRequestsQueue>> _mem_queues;
     size_t _total = 0;
-    size_t _rw_section_size = 0;
-    size_t _ro_section_size = 0;
     Allocator _allocator;
-    std::shared_ptr<uint8_t> heap = nullptr;
     size_t _page_alignment = 1;
     bool _is_compact_mode = false;
 
-    class GNAMemRequestsReadOnlyQueue : public GNAMemRequestsQueue {
-        std::reference_wrapper<GNAMemRequestsQueue> _that;
-     public:
-        explicit GNAMemRequestsReadOnlyQueue(GNAMemory & that) : _that(that) {
-        }
-        rRegion regionType() const override {
-            return REGION_RO;
-        };
-        std::vector<MemRequest> & futureHeap()  override {
-            return _that.get().futureHeap();
-        }
-        std::list<std::vector<char>> &localStorage() override {
-            return _that.get().localStorage();
-        }
-    };
-
-    GNAMemRequestsReadOnlyQueue readOnlyFrontEnd;
+ private:
+    void initMemQueses() {
+        _mem_queues[REGION_RO] = tools::make_unique<GNAMemRequestsReadOnlyQueue>();
+        _mem_queues[REGION_INPUTS] = tools::make_unique <GNAMemRequestsInputsQueue>();
+        _mem_queues[REGION_OUTPUTS] = tools::make_unique <GNAMemRequestsOutputsQueue>();
+        _mem_queues[REGION_SCRATCH] = tools::make_unique <GNAMemRequestsScratchQueue>();
+        _mem_queues[REGION_STATES] = tools::make_unique <GNAMemRequestsStatesQueue>();
+        _mem_queues[REGION_AUTO] = tools::make_unique <GNAMemRequestsBindingsQueue>();
+    }
 
  public:
     explicit GNAMemory(size_t pageAlignment = 1)
-        : readOnlyFrontEnd(*this), _page_alignment(pageAlignment) {}
+        : _page_alignment(pageAlignment) {
+            initMemQueses();
+        }
 
     explicit GNAMemory(const Allocator &a, size_t pageAlignment = 1)
-        : _allocator(a), readOnlyFrontEnd(*this), _page_alignment(pageAlignment) {}
+        : _allocator(a), _page_alignment(pageAlignment) {
+            initMemQueses();
+        }
 
-    GNAMemRequestsQueue & readonly() {
-        return readOnlyFrontEnd;
+    virtual ~GNAMemory() {
+        // we have to deallocate regions before _allocator is destoyed
+        _mem_queues.clear();
     }
 
     /**
@@ -83,52 +95,67 @@ class GNAMemory : public GNAMemRequestsQueue {
     /**
      * @brief calculates size required for all requests, allocates memory and updates pointers
      */
-    void commit(bool isCompact = false) {
+    void commit(bool isCompact = false) override  {
         setCompactMode(isCompact);
 
-        // 1st stage -- looking for expandable bind requests:
-        expandBindings();
-
-        // 2nd stage -- setup offsets:
-        setRegionOffsets(REGION_RO);
-        setRegionOffsets(REGION_RW);
-
-        // 3rd stage -- allocation total memory setting to 0 internally
-        heap = allocate(getTotalBytes());
-
-        // 4th stage -- store data and updates pointers
-        allocateRegion(REGION_RW, 0);
-        allocateRegion(REGION_RO, _rw_section_size);
+        for (const auto &queue : _mem_queues) {
+            // 1st stage -- looking for expandable bind requests:
+            expandBindings(queue.second.get());
+
+            // 2nd stage -- setup offsets:
+            setRegionOffsets(queue.second.get());
+
+            if (queue.second->calcSize(_is_compact_mode) != 0) {
+                // 3rd stage -- allocation total memory setting to 0 internally
+                queue.second->_basePtr = allocate(ALIGN(queue.second->getSize(), _page_alignment));
+                gnalog() << rRegionToStr(queue.second->_region_type) << "(" << static_cast<void*>(queue.second->_basePtr.get()) << ")"
+                         << " allocated: " << ALIGN(queue.second->getSize(), _page_alignment) << std::endl;
+                // 4th stage -- setting proper GNA memory region tag for embedded TLV export
+                _allocator.setTag(queue.second->getBasePtr(), queue.first);
+                // 5th stage -- store data and updates pointers
+                allocateRegion(queue.second.get());
+            }
+        }
+#ifdef GNA_HEAP_PROFILER
+        memoryDump();
+#endif
     }
 
-    void *getBasePtr() {
-        return heap.get();
+    GNAMemRequestsQueue *getQueue(rRegion region) override {
+        return _mem_queues[region].get();
     }
 
-    size_t getRWBytes() {
-        updateSectionsSizes();
-        return _rw_section_size;
+    GNAMemRequestsQueue* getQueue(void* ptr) override {
+        for (auto& queuePair : _mem_queues) {
+            const auto offset = queuePair.second->getOffset(ptr);
+            if (offset.first) {
+                return queuePair.second.get();
+            }
+        }
+        return nullptr;
     }
 
-    size_t getTotalBytes() {
-        updateSectionsSizes();
-        return _total;
+    std::pair<bool, uint32_t> getOffsetForMerged(void * ptr) override {
+        uint32_t curOffset = 0;
+        for (auto& queuePair : _mem_queues) {
+            const auto offset = queuePair.second->getOffset(ptr);
+            if (offset.first) {
+                curOffset += offset.second;
+                return {true, curOffset};
+            }
+            const auto size = queuePair.second->getSize();
+            curOffset += ALIGN64(size);
+        }
+        return {false, 0};
     }
 
- protected:
-    rRegion regionType() const override {
-        return REGION_RW;
-    };
-    std::vector<MemRequest> & futureHeap()  override {
-        return _future_heap;
-    }
-    std::list<std::vector<char>> &localStorage() override {
-        return _local_storage;
+    size_t getRegionBytes(rRegion region) override {
+        return ALIGN(getQueue(region)->calcSize(), _page_alignment);
     }
 
     template<class T>
     void iterate_binded(GNAPluginNS::memory::MemRequest & reference, const T & visitor) {
-        for (auto &re : _future_heap) {
+        for (auto &re : getQueue(REGION_AUTO)->_mem_requests) {
             if ((re._type & REQUEST_BIND) && (re._ptr_in == reference._ptr_out)) {
                 // std::cout << "  [binded=" << rTypeToStr(re._type) << ", ptr=" << re._ptr_out <<"]\n";
                 visitor(reference, re);
@@ -138,22 +165,26 @@ class GNAMemory : public GNAMemRequestsQueue {
                 iterate_binded(re, visitor);
             }
         }
+#ifdef GNA_HEAP_PROFILER
+        memoryDump();
+#endif
     }
 
+ protected:
     std::shared_ptr<uint8_t> allocate(size_t bytes) {
-        std::shared_ptr<uint8_t> sp(_allocator.allocate(bytes), [=](uint8_t *p) {
-            _allocator.deallocate(p, bytes);
+        Allocator nA = _allocator;
+        std::shared_ptr<uint8_t> sp(_allocator.allocate(bytes), [nA, bytes](uint8_t* p) mutable {
+            nA.deallocate(p, bytes);
         });
         std::fill(sp.get(), sp.get() + bytes, 0);
         return sp;
     }
 
- protected:
     /**
      * @brief expand BIND and (BIND | ) requests. Align size(_padding), set execution order
      */
-    void expandBindings() {
-        for (auto &originated : _future_heap) {
+    void expandBindings(GNAMemRequestsQueue *mRequests) {
+        for (auto &originated : mRequests->_mem_requests) {
             // skipping bind requests to avoid duplications
             if (originated._type & REQUEST_BIND) continue;
 
@@ -179,11 +210,10 @@ class GNAMemory : public GNAMemRequestsQueue {
     /**
      * @brief set offsets for specific region
      */
-    size_t setRegionOffsets(GNAPluginNS::memory::rRegion regType) {
+    size_t setRegionOffsets(GNAMemRequestsQueue* mRequests) {
         size_t region_offset = 0;
-        for (auto &re : _future_heap) {
-            if (re._region != regType || re._type & REQUEST_BIND || re._ptr_out == nullptr) continue;
-
+        for (auto& re : mRequests->_mem_requests) {
+            if (re._type & REQUEST_BIND || re._ptr_out == nullptr) continue;
             re._offset = region_offset;
             region_offset += ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
         }
@@ -193,15 +223,14 @@ class GNAMemory : public GNAMemRequestsQueue {
     /**
      * @brief allocates memory and updates pointers
      */
-    void allocateRegion(GNAPluginNS::memory::rRegion regType, size_t baseOffset) {
-        for (auto &re : _future_heap) {
+    void allocateRegion(GNAMemRequestsQueue *mRequests) {
+        size_t r_size = ALIGN(mRequests->getSize(), _page_alignment);
+        for (auto &re : mRequests->_mem_requests) {
             // skipping Bind, crossregion and empty requests
-            if (re._region != regType || re._type == REQUEST_BIND || re._ptr_out == nullptr) continue;
-
-            size_t offset = baseOffset + re._offset;
-            auto cptr = heap.get() + offset;
-            size_t cptr_avail_size = _total - offset;
+            if (re._type == REQUEST_BIND || re._ptr_out == nullptr) continue;
 
+            auto cptr = mRequests->_basePtr.get() + re._offset;
+            size_t cptr_avail_size = r_size - re._offset;
             auto sz = re._element_size * re._num_elements;
             if (re._type & REQUEST_BIND) {
                 cptr = reinterpret_cast<uint8_t*>(*reinterpret_cast<void **>(re._ptr_out));
@@ -216,6 +245,7 @@ class GNAMemory : public GNAMemRequestsQueue {
                 binded._element_size = reference._element_size;
             });
 
+            gnalog() << static_cast<void*>(cptr) << "(" << sz + re._padding << ")" << std::endl;
             switch (re._type & ~REQUEST_BIND) {
                 case REQUEST_ALLOCATE :
                     break;
@@ -238,54 +268,12 @@ class GNAMemory : public GNAMemRequestsQueue {
         }
     }
 
-    /**
-     * @brief optimize memory region by reusing buffers
-     */
-    size_t getSectionSizeOptimized(GNAPluginNS::memory::rRegion regType) {
-        size_t memSize = 0;
-        switch (regType) {
-            case REGION_AUTO:
-            case REGION_RW:
-            case REGION_RO: {
-                    std::vector<MemorySolver::Box> boxes;
-                    for (size_t i = 0; i < _future_heap.size(); ++i) {
-                        // skipping BIND, cross-region and empty requests
-                        if (_future_heap[i]._type & REQUEST_BIND || _future_heap[i]._region != regType || _future_heap[i]._ptr_out == nullptr) {
-                            continue;
-                        }
-
-                        auto original_with_pad = ALIGN(_future_heap[i]._num_elements * _future_heap[i]._element_size + _future_heap[i]._padding,
-                                                       _future_heap[i]._alignment);
-                        int start = _future_heap[i]._life_limits.first;
-                        int stop = _future_heap[i]._life_limits.second;
-
-                        boxes.push_back({start, stop, static_cast<int64_t>(original_with_pad), static_cast<int64_t>(i)});
-                    }
-                    MemorySolver memSolver(boxes);
-                    memSize = memSolver.solve();
-
-                    // setting offsets
-                    for (auto const & box : boxes) {
-                        _future_heap[box.id]._offset = memSolver.getOffset(box.id);
-                    }
-                }
-                break;
-
-            default:
-                break;
-            }
-
-        return memSize;
-    }
-
-
 #ifdef GNA_HEAP_PROFILER
-    void memoryDump(std::function<bool(MemRequest & re)> filter) {
-        std::ofstream dumpFile("gna_memory_requests.txt", std::ios::out);
-
-        for (auto &re : _future_heap) {
-            if (filter(re)) continue;
-            dumpFile << ": " << " region: " << rRegionToStr(re._region) << ", "
+    void memoryDump() {
+        for (const auto &queue : _mem_queues) {
+            std::ofstream dumpFile("gna_memory_requests_" + rRegionToStr(queue.first) + ".txt", std::ios::out);
+            for (auto &re : queue.second->_mem_requests) {
+            dumpFile << "region: " << rRegionToStr(re._region) << ", "
                     << "type: " << std::setw(17) << rTypeToStr(re._type) << " "
                     << "ptr_in: " << std::setw(15) << re._ptr_in << " "
                     << "ptr_out: " << std::setw(15) << re._ptr_out << " "
@@ -296,45 +284,11 @@ class GNAMemory : public GNAMemRequestsQueue {
                     << std::setw(8) << re._offset << ", "
                     << "life_time: " << re._life_limits.first << ":" << re._life_limits.second << ", "
                     << std::endl;
-        }
-    }
-#endif
-
-    void updateSectionsSizes() {
-        // count total size and size of read/write regions
-        _rw_section_size = 0;
-        _ro_section_size = 0;
-#ifdef GNA_HEAP_PROFILER
-        memoryDump([](GNAPluginNS::memory::MemRequest & request) {
-            return false;
-            });
-#endif
-        for (auto &re : _future_heap) {
-            if (re._type & REQUEST_BIND || re._ptr_out == nullptr) continue;
-
-            size_t current = ALIGN(re._num_elements * re._element_size + re._padding, re._alignment);
-            if (re._region == REGION_RW) {
-                _rw_section_size += current;
-            } else {
-                _ro_section_size += current;
             }
         }
-
-        if (_is_compact_mode) {
-            _rw_section_size = getSectionSizeOptimized(REGION_RW);
-        }
-
-        gnalog() << "ro_section_size: " << _ro_section_size << std::endl;
-        gnalog() << "rw_section_size: " << _rw_section_size << std::endl;
-        gnalog() << "total: " << _total << std::endl;
-
-        _rw_section_size = ALIGN(_rw_section_size, _page_alignment);
-        _ro_section_size = ALIGN(_ro_section_size, _page_alignment);
-        _total = _rw_section_size + _ro_section_size;
-
-        gnalog() << "Aligned ro_section_size: " << _ro_section_size << std::endl;
-        gnalog() << "Aligned rw_section_size: " << _rw_section_size << std::endl;
     }
+#endif
 };
+
 }  // namespace memory
 }  // namespace GNAPluginNS
diff --git a/src/plugins/intel_gna/memory/ipolymorph_allocator.hpp b/src/plugins/intel_gna/memory/ipolymorph_allocator.hpp
deleted file mode 100644
index 1cece4cd8252e1..00000000000000
--- a/src/plugins/intel_gna/memory/ipolymorph_allocator.hpp
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <cstddef>
-
-namespace GNAPluginNS {
-namespace memory {
-
-template<class T>
-class IPolymorphAllocator {
-public:
-    virtual T *allocate(std::size_t n)  = 0;
-    virtual void deallocate(T *p, std::size_t n)  = 0;
-};
-}  // namespace memory
-}  // namespace GNAPluginNS
diff --git a/src/plugins/intel_gna/memory/polymorph_allocator.hpp b/src/plugins/intel_gna/memory/polymorph_allocator.hpp
deleted file mode 100644
index fa7c83445ade99..00000000000000
--- a/src/plugins/intel_gna/memory/polymorph_allocator.hpp
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (C) 2018-2022 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <memory>
-#include <utility>
-
-#include "ipolymorph_allocator.hpp"
-
-namespace GNAPluginNS {
-namespace memory {
-/**
- * @brief c++17 concept simulation
- */
-
-template<class T>
-class PolymorphAllocator {
-    std::shared_ptr<IPolymorphAllocator<T>> _impl;
- public:
-    explicit PolymorphAllocator(const std::shared_ptr<IPolymorphAllocator<T>> &impl) : _impl(impl) {}
-
-    T *allocate(std::size_t n) {
-        return _impl->allocate(n);
-    }
-
-    void deallocate(T *p, std::size_t n) {
-        _impl->deallocate(p, n);
-    }
-};
-
-/**
- * transform any allocator into polymorph type
- * @tparam origin
- */
-
-template<class origin>
-class PolymorphAdapter : public IPolymorphAllocator<typename origin::value_type> {
-    origin _impl;
-    using T = typename origin::value_type;
-
- public:
-    template<class ...Args>
-    explicit PolymorphAdapter(Args &&... args)
-        :_impl(std::forward<Args>(args)...) {
-    }
-    T *allocate(std::size_t n) override {
-        return _impl.allocate(n);
-    }
-    void deallocate(T *p, std::size_t n) override {
-        _impl.deallocate(p, n);
-    }
-};
-
-template<class T, class ...Args>
-inline PolymorphAllocator<typename T::value_type> make_polymorph(Args &&... args) {
-    auto sp = std::make_shared<PolymorphAdapter<T>>(std::forward<Args>(args)...);
-    auto ipoly = std::static_pointer_cast<IPolymorphAllocator<typename T::value_type>>(sp);
-
-    return PolymorphAllocator<typename T::value_type>(ipoly);
-}
-}  // namespace memory
-}  // namespace GNAPluginNS
diff --git a/src/plugins/intel_gna/transformations/decompose_2d_convolution.cpp b/src/plugins/intel_gna/transformations/decompose_2d_convolution.cpp
index 7338b777b8488f..c189f635d8caee 100644
--- a/src/plugins/intel_gna/transformations/decompose_2d_convolution.cpp
+++ b/src/plugins/intel_gna/transformations/decompose_2d_convolution.cpp
@@ -83,21 +83,28 @@ static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptr<ngraph::opset7:
 
 static bool GNA30SupportedConv(const std::string& gnaCompileTarget, const InferenceEngine::Precision& gnaPrecision,
     const GraphData& graph_data, const ConvData& conv_data) {
-    const GNALimitations::Cnn2D::Validator cnn2dValidator;
-
-    if (gnaCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0 &&
-        cnn2dValidator.ValidateCnn2D(graph_data.conv->get_friendly_name(),
-            conv_data.input_height, conv_data.input_width, conv_data.input_channel_count,
-            conv_data.filter_height, conv_data.filter_width, conv_data.filter_channel_count,
-            conv_data.filter_stride_height, conv_data.filter_stride_width, conv_data.filter_dilation_height, conv_data.filter_dilation_width,
-            OvGnaTypeIntFromBytes(gnaPrecision.size()), false) &&
-        (!graph_data.max_pool || cnn2dValidator.ValidatePooling2D(graph_data.conv->get_friendly_name(),
+
+    const auto cnn2dValidatorPtr = GNALimitations::Cnn2D::AbstractValidator::Create(gnaCompileTarget);
+    if (!cnn2dValidatorPtr) {
+        return false;
+    }
+    const auto& cnn2dValidator = *cnn2dValidatorPtr;
+    const auto cnnIsValid = cnn2dValidator.ValidateCnn2D(graph_data.conv->get_friendly_name(),
+        conv_data.input_height, conv_data.input_width, conv_data.input_channel_count,
+        conv_data.filter_height, conv_data.filter_width, conv_data.filter_channel_count,
+        conv_data.filter_stride_height, conv_data.filter_stride_width, conv_data.filter_dilation_height, conv_data.filter_dilation_width,
+        OvGnaTypeIntFromBytes(gnaPrecision.size()), false);
+    if (!cnnIsValid) {
+        return false;
+    }
+    if (!graph_data.max_pool) {
+        return true;
+    }
+    const auto poolingValid = cnn2dValidator.ValidatePooling2D(graph_data.conv->get_friendly_name(),
             graph_data.max_pool->get_kernel()[0], graph_data.max_pool->get_kernel()[1],
             graph_data.max_pool->get_strides()[0], graph_data.max_pool->get_strides()[1],
-            false)))
-        return true;
-
-    return false;
+            false);
+    return poolingValid;
 }
 
 static size_t CalculateConvCount(const ConvData& conv_data) {
diff --git a/src/tests/functional/plugin/gna/shared_tests_instances/skip_tests_check.hpp b/src/tests/functional/plugin/gna/shared_tests_instances/skip_tests_check.hpp
index 93fd23493fe38c..576d4b1048733a 100644
--- a/src/tests/functional/plugin/gna/shared_tests_instances/skip_tests_check.hpp
+++ b/src/tests/functional/plugin/gna/shared_tests_instances/skip_tests_check.hpp
@@ -16,8 +16,8 @@ class GnaLayerTestCheck : virtual public LayerTestsUtils::LayerTestsCommon {
             if (std::find(metrics.begin(), metrics.end(), METRIC_KEY(GNA_LIBRARY_FULL_VERSION)) != metrics.end()) {
                 std::string gnaLibVer = ie_core.GetMetric(targetDevice, METRIC_KEY(GNA_LIBRARY_FULL_VERSION));
 
-                if (gnaLibVer.rfind("2.1", 0) != 0 && gnaLibVer.rfind("3.0", 0) != 0) {
-                    GTEST_SKIP() << "Disabled test due to GNA library version being not 2.1 or 3.0" << std::endl;
+                if (gnaLibVer.rfind("2.1", 0) != 0 && gnaLibVer.rfind("3.", 0) != 0) {
+                    GTEST_SKIP() << "Disabled test due to GNA library version being not 2.1 or 3.X" << std::endl;
                 }
                 skipTest = false;
             }
diff --git a/src/tests/unit/gna/gna_allocator_test.cpp b/src/tests/unit/gna/gna_allocator_test.cpp
index 6d1a890b634ffe..44d4ff04a45754 100644
--- a/src/tests/unit/gna/gna_allocator_test.cpp
+++ b/src/tests/unit/gna/gna_allocator_test.cpp
@@ -9,6 +9,7 @@
 
 #include <gtest/gtest.h>
 #include <memory/gna_allocator.hpp>
+#include "memory/gna_memory.hpp"
 #include "gna_device.hpp"
 
 // dummy definitions to work around issue with Linux userspace library
@@ -47,7 +48,7 @@ class GNAAllocatorTest : public ::testing::Test {
 };
 
 TEST_F(GNAAllocatorTest, canAllocateStdMemory) {
-    auto sp = GNAPluginNS::memory::make_polymorph<std::allocator<uint8_t>>();
+    auto sp = GNAPluginNS::memory::GNAFloatAllocator{};
     uint8_t *x = nullptr;
     ASSERT_NO_THROW(x = sp.allocate(100));
     ASSERT_NE(x, nullptr);
@@ -57,7 +58,7 @@ TEST_F(GNAAllocatorTest, canAllocateStdMemory) {
 TEST_F(GNAAllocatorTest, canAllocateGNAMemory) {
     // GNA device can be opened one per process for now
     gnadevice.reset(new GNADeviceHelper());
-    auto sp = GNAPluginNS::memory::make_polymorph<GNAPluginNS::memory::GNAAllocator>(gnadevice);
+    GNAPluginNS::memory::GNAAllocator sp{ gnadevice };
     uint8_t *x = nullptr;
     ASSERT_NO_THROW(x = sp.allocate(100));
     ASSERT_NE(x, nullptr);
diff --git a/src/tests/unit/gna/gna_api_stub.cpp b/src/tests/unit/gna/gna_api_stub.cpp
index 6fba84c7b69ee3..6646517b70222f 100644
--- a/src/tests/unit/gna/gna_api_stub.cpp
+++ b/src/tests/unit/gna/gna_api_stub.cpp
@@ -38,6 +38,12 @@ GNA2_API enum Gna2Status Gna2MemoryAlloc(
     return Gna2StatusSuccess;
 }
 
+GNA2_API enum Gna2Status Gna2MemorySetTag(
+    void* memory,
+    uint32_t tag) {
+    return Gna2StatusSuccess;
+}
+
 GNA2_API enum Gna2Status Gna2DeviceCreateForExport(
     Gna2DeviceVersion targetDeviceVersion,
     uint32_t * deviceIndex) {
diff --git a/src/tests/unit/gna/gna_memory_compact_test.cpp b/src/tests/unit/gna/gna_memory_compact_test.cpp
index 73ff7e022ac948..addb6c4f7b2a9a 100644
--- a/src/tests/unit/gna/gna_memory_compact_test.cpp
+++ b/src/tests/unit/gna/gna_memory_compact_test.cpp
@@ -21,7 +21,7 @@ using namespace GNAPluginNS::memory;
 
 class GNAMemoryCompactTest : public ::testing::Test {
  protected:
-    GNAMemory<std::allocator<uint8_t>> mem;
+    GNAMemory<GNAPluginNS::memory::GNAFloatAllocator> mem;
     bool isCompact = true;
 
     void SetUp() override  {
@@ -39,12 +39,12 @@ TEST_F(GNAMemoryCompactTest, canOptimizeReservePtr) {
     float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
     float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
 
-    mem.reserve_ptr(layer1, pFuture1, 3 * sizeof(float));
-    mem.reserve_ptr(layer2, pFuture2, 2 * sizeof(float));
+    auto scratch = mem.getQueue(rRegion::REGION_SCRATCH);
+    scratch->reserve_ptr(layer1, pFuture1, 3 * sizeof(float));
+    scratch->reserve_ptr(layer2, pFuture2, 2 * sizeof(float));
 
     mem.commit(isCompact);
-    ASSERT_EQ(mem.getRWBytes(), 3 * sizeof(float));
-    ASSERT_EQ(mem.getTotalBytes(), 3 * sizeof(float));
+    ASSERT_EQ(scratch->getSize(), 3 * sizeof(float));
 }
 
 TEST_F(GNAMemoryCompactTest, canOptimizePushValue) {
@@ -58,12 +58,12 @@ TEST_F(GNAMemoryCompactTest, canOptimizePushValue) {
     float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
     float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
 
-    mem.push_value(layer1, pFuture1, 1.f, 2);
-    mem.push_value(layer2, pFuture2, 2.f, 3);
+    auto scratch = mem.getQueue(rRegion::REGION_SCRATCH);
+    scratch->push_value(layer1, pFuture1, 1.f, 2);
+    scratch->push_value(layer2, pFuture2, 2.f, 3);
 
     mem.commit(isCompact);
-    ASSERT_EQ(mem.getRWBytes(), 5 * sizeof(float));
-    ASSERT_EQ(mem.getTotalBytes(), 5 * sizeof(float));
+    ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_SCRATCH), 5 * sizeof(float));
 }
 
 TEST_F(GNAMemoryCompactTest, canOptimizePushValueAndReservePtr) {
@@ -80,13 +80,13 @@ TEST_F(GNAMemoryCompactTest, canOptimizePushValueAndReservePtr) {
     float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
     float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
 
-    mem.push_value(layer1, pFuture1, 3.f, 2);
-    mem.bind_ptr(layer2, pFuture2, pFuture1, 0, 2);
-    mem.reserve_ptr(layer3, pFuture3, 2 * sizeof(float));
+    auto scratchQueue = mem.getQueue(rRegion::REGION_SCRATCH);
+    scratchQueue->push_value(layer1, pFuture1, 3.f, 2);
+    scratchQueue->bind_ptr(layer2, pFuture2, pFuture1, 0, 2);
+    scratchQueue->reserve_ptr(layer3, pFuture3, 2 * sizeof(float));
 
     mem.commit(isCompact);
-    ASSERT_EQ(mem.getRWBytes(), 2 * sizeof(float));
-    ASSERT_EQ(mem.getTotalBytes(), 2 * sizeof(float));
+    ASSERT_EQ(scratchQueue->getSize(), 2 * sizeof(float));
 }
 
 TEST_F(GNAMemoryCompactTest, canOptimizeTwoPushValueAndReservePtr) {
@@ -105,14 +105,14 @@ TEST_F(GNAMemoryCompactTest, canOptimizeTwoPushValueAndReservePtr) {
     float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
     float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
 
-    mem.push_value(layer1, pFuture1, 1.f, 2);
-    mem.push_value(layer2, pFuture2, 2.f, 3);
-    mem.reserve_ptr(layer3, pFuture3, 5 * sizeof(float));
-    mem.bind_ptr(layer2, pFuture2, pFuture1, 0, 2);
+    auto scratchQueue = mem.getQueue(rRegion::REGION_SCRATCH);
+    scratchQueue->push_value(layer1, pFuture1, 1.f, 2);
+    scratchQueue->push_value(layer2, pFuture2, 2.f, 3);
+    scratchQueue->reserve_ptr(layer3, pFuture3, 5 * sizeof(float));
+    scratchQueue->bind_ptr(layer2, pFuture2, pFuture1, 0, 2);
 
     mem.commit(isCompact);
-    ASSERT_EQ(mem.getRWBytes(), 5 * sizeof(float));
-    ASSERT_EQ(mem.getTotalBytes(), 5 * sizeof(float));
+    ASSERT_EQ(scratchQueue->getSize(), 5 * sizeof(float));
 }
 
 
@@ -133,13 +133,13 @@ TEST_F(GNAMemoryCompactTest, canOptimizePushPtrAndReservePtr) {
     float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
     float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
 
-    mem.push_ptr(layer1, pFuture1, input, input_size);
-    mem.reserve_ptr(layer2, pFuture2, input_size);
-    mem.bind_ptr(layer3, pFuture3, pFuture2, 0, input_size);
+    auto scratchQueue = mem.getQueue(rRegion::REGION_SCRATCH);
+    scratchQueue->push_ptr(layer1, pFuture1, input, input_size);
+    scratchQueue->reserve_ptr(layer2, pFuture2, input_size);
+    scratchQueue->bind_ptr(layer3, pFuture3, pFuture2, 0, input_size);
 
     mem.commit(isCompact);
-    ASSERT_EQ(mem.getRWBytes(), input_size);
-    ASSERT_EQ(mem.getTotalBytes(), input_size);
+    ASSERT_EQ(scratchQueue->getSize(), input_size);
 }
 
 TEST_F(GNAMemoryCompactTest, canOptimizePushLocalPtrAndReservePtr) {
@@ -156,19 +156,19 @@ TEST_F(GNAMemoryCompactTest, canOptimizePushLocalPtrAndReservePtr) {
     float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
     float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
 
+    auto scratchQueue = mem.getQueue(rRegion::REGION_SCRATCH);
     size_t input_size;
     {
         std::vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f};
         input_size = input.size() * sizeof(float);
-        mem.push_local_ptr(layer1, pFuture1, &*input.begin(), input_size);
+        scratchQueue->push_local_ptr(layer1, pFuture1, &*input.begin(), input_size);
     }
 
-    mem.reserve_ptr(layer2, pFuture2, input_size);
-    mem.bind_ptr(layer3, pFuture3, pFuture2, 0, input_size);
+    scratchQueue->reserve_ptr(layer2, pFuture2, input_size);
+    scratchQueue->bind_ptr(layer3, pFuture3, pFuture2, 0, input_size);
 
     mem.commit(isCompact);
-    ASSERT_EQ(mem.getRWBytes(), input_size);
-    ASSERT_EQ(mem.getTotalBytes(), input_size);
+    ASSERT_EQ(scratchQueue->getSize(), input_size);
 }
 
 TEST_F(GNAMemoryCompactTest, canOptimizePushInitilizerPtrAndReservePtr) {
@@ -185,21 +185,21 @@ TEST_F(GNAMemoryCompactTest, canOptimizePushInitilizerPtrAndReservePtr) {
     float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
     float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
 
+    auto scratchQueue = mem.getQueue(rRegion::REGION_SCRATCH);
     size_t input_size;
     {
         std::vector<float> input = {1.0f, 2.0f, 3.0f};
         input_size = input.size() * sizeof(float);
-        mem.push_initializer(layer1, pFuture1, input_size, [=](void* data, size_t size){
+        scratchQueue->push_initializer(layer1, pFuture1, input_size, [=](void* data, size_t size) {
             ie_memcpy(data, size, &input[0], input.size());
         });
     }
 
-    mem.reserve_ptr(layer2, pFuture2, 2 * input_size);
-    mem.bind_ptr(layer3, pFuture3, pFuture2, 0, input_size);
+    scratchQueue->reserve_ptr(layer2, pFuture2, 2 * input_size);
+    scratchQueue->bind_ptr(layer3, pFuture3, pFuture2, 0, input_size);
 
     mem.commit(isCompact);
-    ASSERT_EQ(mem.getRWBytes(), 2 * input_size);
-    ASSERT_EQ(mem.getTotalBytes(), 2 * input_size);
+    ASSERT_EQ(scratchQueue->getSize(), 2 * input_size);
 }
 
 TEST_F(GNAMemoryCompactTest, canOptimizeBindInitilizerPtrAndReservePtr) {
@@ -219,20 +219,20 @@ TEST_F(GNAMemoryCompactTest, canOptimizeBindInitilizerPtrAndReservePtr) {
     float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
     float* pFuture4 = reinterpret_cast<float*>(&pFuture4);
 
+    auto scratchQueue = mem.getQueue(rRegion::REGION_SCRATCH);
     {
         std::vector<float> input = {1.0f, 2.0f, 3.0f};
-        mem.bind_initializer(layer2, pFuture1, [=](void* data, size_t size){
+        scratchQueue->bind_initializer(layer2, pFuture1, [=](void* data, size_t size) {
             ie_memcpy(data, size, &input[0], input.size());
         });
     }
 
-    mem.reserve_ptr(layer1, pFuture1, 4 * sizeof(float));
-    mem.reserve_ptr(layer3, pFuture3, 2 * sizeof(float));
-    mem.bind_ptr(layer4, pFuture4, pFuture3, 0, 2 * sizeof(float));
+    scratchQueue->reserve_ptr(layer1, pFuture1, 4 * sizeof(float));
+    scratchQueue->reserve_ptr(layer3, pFuture3, 2 * sizeof(float));
+    scratchQueue->bind_ptr(layer4, pFuture4, pFuture3, 0, 2 * sizeof(float));
 
     mem.commit(isCompact);
-    ASSERT_EQ(mem.getRWBytes(), 4 * sizeof(float));
-    ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float));
+    ASSERT_EQ(scratchQueue->getSize(), 4 * sizeof(float));
 }
 
 TEST_F(GNAMemoryCompactTest, canOptimizeReservePtrWithOffset) {
@@ -249,24 +249,26 @@ TEST_F(GNAMemoryCompactTest, canOptimizeReservePtrWithOffset) {
     float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
     float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
 
-    mem.reserve_ptr(layer1, pFuture1, 2 * sizeof(float));
-    mem.reserve_ptr(layer2, pFuture2, 2 * sizeof(float));
-    mem.bind_ptr(layer3, pFuture3, pFuture2, 2 * sizeof(float), 2 * sizeof(float));
+    auto scratchQueue = mem.getQueue(rRegion::REGION_SCRATCH);
+    scratchQueue->reserve_ptr(layer1, pFuture1, 2 * sizeof(float));
+    scratchQueue->reserve_ptr(layer2, pFuture2, 2 * sizeof(float));
+    scratchQueue->bind_ptr(layer3, pFuture3, pFuture2, 2 * sizeof(float), 2 * sizeof(float));
 
     mem.commit(isCompact);
-    ASSERT_EQ(mem.getRWBytes(), 4 * sizeof(float));
-    ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float));
+    ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_SCRATCH), 4 * sizeof(float));
 }
 
-class GNAMemoryTested : public GNAPluginNS::memory::GNAMemory<GNAPluginNS::memory::PolymorphAllocator<uint8_t>> {
-using GNAMemory::GNAMemory;
+class GNAMemoryTested : public GNAPluginNS::memory::GNAMemory<GNAPluginNS::memory::GNAFloatAllocator> {
+    using GNAMemory::GNAMemory;
 
 public:
     void Test() {
         // filtering RW allocation requests only
-        auto filter_req = [] (const MemRequest &re) { return re._region == REGION_RW && re._type != REQUEST_BIND; };
+        auto filter_req = [] (const MemRequest &re) { return re._region == REGION_SCRATCH && re._type != REQUEST_BIND; };
         std::vector<MemRequest> test_reqs;
-        auto it = std::copy_if(_future_heap.begin(), _future_heap.end(), std::back_inserter(test_reqs), filter_req);
+        const auto& requests = getQueue(REGION_SCRATCH)->_mem_requests;
+
+        auto it = std::copy_if(requests.begin(), requests.end(), std::back_inserter(test_reqs), filter_req);
 
         // intercrossing condition
         auto is_crossed = [] (const MemRequest &re1, const MemRequest &re2) {
@@ -291,7 +293,7 @@ class GNAPluginTested : public GNAPluginNS::GNAPlugin {
 public:
     std::shared_ptr<GNAMemoryTested> gnamem_t;
     GNAPluginTested() : GNAPluginNS::GNAPlugin() {
-        gnamem_t = std::make_shared<GNAMemoryTested>(make_polymorph<std::allocator<uint8_t>>());
+        gnamem_t = std::make_shared<GNAMemoryTested>();
         gnamem = gnamem_t;
         graphCompiler.setGNAMemoryPtr(gnamem);
         gnadevice.reset();
diff --git a/src/tests/unit/gna/gna_memory_test.cpp b/src/tests/unit/gna/gna_memory_test.cpp
index 2072ccb1399c95..0fee1a0eb6cd3d 100644
--- a/src/tests/unit/gna/gna_memory_test.cpp
+++ b/src/tests/unit/gna/gna_memory_test.cpp
@@ -10,7 +10,7 @@ using namespace GNAPluginNS::memory;
 
 class GNAMemoryTest : public ::testing::Test {
  protected:
-    GNAMemory<std::allocator<uint8_t>> mem;
+    GNAMemory<GNAFloatAllocator> mem{ GNAFloatAllocator{} };
 
     void SetUp() override  {
     }
@@ -21,7 +21,7 @@ TEST_F(GNAMemoryTest, canStoreActualBlob) {
     float* pFuture = nullptr;
     size_t len = sizeof(input);
 
-    mem.push_ptr(nullptr, &pFuture, input, len);
+    mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, len);
     mem.commit();
 
     ASSERT_NE(pFuture, nullptr);
@@ -36,8 +36,8 @@ TEST_F(GNAMemoryTest, canStore2Blobs) {
     float* pFuture = nullptr;
     float* pFuture2 = nullptr;
 
-    mem.push_ptr(nullptr, &pFuture, input, 3*4);
-    mem.push_ptr(nullptr, &pFuture2, input+1, 3*4);
+    mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, 3*4);
+    mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture2, input+1, 3*4);
     mem.commit();
 
     ASSERT_NE(pFuture, input);
@@ -55,11 +55,11 @@ TEST_F(GNAMemoryTest, canStore2Blobs) {
 TEST_F(GNAMemoryTest, canStoreBlobsALIGNED) {
     float input[] = {1, 2, 3, 4, 5, 6, 7, 8};
     float* pFuture = nullptr;
-
-    mem.push_ptr(nullptr, &pFuture, input, 3*4, 8);
+    auto queue = mem.getQueue(REGION_SCRATCH);
+    queue->push_ptr(nullptr, &pFuture, input, 3 * 4, 8);
     mem.commit();
 
-    ASSERT_EQ(16 , mem.getTotalBytes());
+    ASSERT_EQ(16, queue->getSize());
 
     ASSERT_NE(pFuture, input);
     ASSERT_NE(pFuture, nullptr);
@@ -75,12 +75,12 @@ TEST_F(GNAMemoryTest, canStore2BlobsALIGNED) {
     float input[] = {1, 2, 3, 4, 5, 6, 7, 8};
     float* pFuture = nullptr;
     float* pFuture2 = nullptr;
-
-    mem.push_ptr(nullptr, &pFuture, input, 3*4, 8);
-    mem.push_ptr(nullptr, &pFuture2, input, 3*4, 16);
+    auto queue = mem.getQueue(REGION_SCRATCH);
+    queue->push_ptr(nullptr, &pFuture, input, 3 * 4, 8);
+    queue->push_ptr(nullptr, &pFuture2, input, 3 * 4, 16);
     mem.commit();
 
-    ASSERT_EQ(32 , mem.getTotalBytes());
+    ASSERT_EQ(32 , queue->getSize());
 
     ASSERT_NE(pFuture, nullptr);
 
@@ -95,14 +95,14 @@ TEST_F(GNAMemoryTest, canStore2BlobsALIGNED) {
 
 TEST_F(GNAMemoryTest, canReserveData) {
     float* pFuture = nullptr;
-    mem.reserve_ptr(nullptr, &pFuture, 3*4);
+    mem.getQueue(REGION_SCRATCH)->reserve_ptr(nullptr, &pFuture, 3*4);
     mem.commit();
 
     ASSERT_NE(pFuture, nullptr);
 }
 
 TEST_F(GNAMemoryTest, canReserveDataByVoid) {
-    mem.reserve_ptr(nullptr, nullptr, 3*4);
+    mem.getQueue(REGION_SCRATCH)->reserve_ptr(nullptr, nullptr, 3*4);
     ASSERT_NO_THROW(mem.commit());
 }
 
@@ -113,8 +113,8 @@ TEST_F(GNAMemoryTest, canReserveAndPushData) {
     float* pFuture2 = nullptr;
     size_t len = sizeof(input);
 
-    mem.push_ptr(nullptr, &pFuture, input, len);
-    mem.reserve_ptr(nullptr, &pFuture2, 3*4);
+    mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, len);
+    mem.getQueue(REGION_SCRATCH)->reserve_ptr(nullptr, &pFuture2, 3*4);
     mem.commit();
 
     ASSERT_NE(pFuture, nullptr);
@@ -138,9 +138,9 @@ TEST_F(GNAMemoryTest, canBindAndResolve) {
     float *pFuture3 = nullptr;
     size_t len = sizeof(input);
 
-    mem.bind_ptr(nullptr, &pFuture3, &pFuture);
-    mem.push_ptr(nullptr, &pFuture, input, len);
-    mem.bind_ptr(nullptr, &pFuture2, &pFuture);
+    mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture3, &pFuture);
+    mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, len);
+    mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture2, &pFuture);
 
     mem.commit();
 
@@ -161,9 +161,9 @@ TEST_F(GNAMemoryTest, canBindTransitevlyAndResolve) {
     float *pFuture4 = nullptr;
     size_t len = sizeof(input);
 
-    mem.bind_ptr(nullptr, &pFuture4, &pFuture3);
-    mem.bind_ptr(nullptr, &pFuture3, &pFuture);
-    mem.push_ptr(nullptr, &pFuture, input, len);
+    mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture4, &pFuture3);
+    mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture3, &pFuture);
+    mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, len);
 
     mem.commit();
 
@@ -185,9 +185,9 @@ TEST_F(GNAMemoryTest, canBindTransitevlyWithOffsetsAndResolve) {
     float *pFuture4 = nullptr;
     size_t len = sizeof(input);
 
-    mem.bind_ptr(nullptr, &pFuture4, &pFuture3, 4);
-    mem.bind_ptr(nullptr, &pFuture3, &pFuture, 4);
-    mem.push_ptr(nullptr, &pFuture, input, len);
+    mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture4, &pFuture3, 4);
+    mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture3, &pFuture, 4);
+    mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, len);
 
     mem.commit();
 
@@ -209,9 +209,9 @@ TEST_F(GNAMemoryTest, canBindWithOffsetAndResolve) {
     float *pFuture3 = nullptr;
     size_t len = sizeof(input);
 
-    mem.bind_ptr(nullptr, &pFuture3, &pFuture, 4);
-    mem.push_ptr(nullptr, &pFuture, input, len);
-    mem.bind_ptr(nullptr, &pFuture2, &pFuture);
+    mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture3, &pFuture, 4);
+    mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, len);
+    mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture2, &pFuture);
 
     mem.commit();
 
@@ -233,7 +233,7 @@ TEST_F(GNAMemoryTest, canPushLocal) {
 
     {
         std::vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f};
-        mem.push_local_ptr(nullptr, pFuture, &*input.begin(), 4 * 4, 1);
+        mem.getQueue(REGION_SCRATCH)->push_local_ptr(nullptr, pFuture, &*input.begin(), 4 * 4, 1);
     }
 
     //poison stack
@@ -250,8 +250,8 @@ TEST_F(GNAMemoryTest, canPushValue) {
     float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
 
     {
-        mem.push_value(nullptr, pFuture, 3.f,  2);
-        mem.push_value(nullptr, pFuture2, 13.f, 2);
+        mem.getQueue(REGION_SCRATCH)->push_value(nullptr, pFuture, 3.f,  2);
+        mem.getQueue(REGION_SCRATCH)->push_value(nullptr, pFuture2, 13.f, 2);
     }
 
     mem.commit();
@@ -267,66 +267,66 @@ TEST_F(GNAMemoryTest, canPushReadOnlyValue) {
     float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
 
     {
-        mem.push_value(nullptr, pFuture, 3.f,  2);
-        mem.readonly().push_value(nullptr, pFuture2, 13.f, 2);
+        mem.getQueue(REGION_SCRATCH)->push_value(nullptr, pFuture, 3.f,  2);
+        mem.getQueue(REGION_RO)->push_value(nullptr, pFuture2, 13.f, 2);
     }
 
     mem.commit();
 
     ASSERT_FLOAT_EQ(pFuture[0], 3);
     ASSERT_FLOAT_EQ(pFuture[1], 3);
-    ASSERT_FLOAT_EQ(pFuture[2], 13);
-    ASSERT_FLOAT_EQ(pFuture[3], 13);
+    ASSERT_FLOAT_EQ(pFuture2[0], 13);
+    ASSERT_FLOAT_EQ(pFuture2[1], 13);
 }
 
 TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeEmptyReqs) {
-    mem.push_value(nullptr, nullptr, 3.f,  2);
-    mem.readonly().push_value(nullptr, nullptr, 13.f, 2);
+    mem.getQueue(REGION_SCRATCH)->push_value(nullptr, nullptr, 3.f,  2);
+    mem.getQueue(REGION_RO)->push_value(nullptr, nullptr, 13.f, 2);
     mem.commit();
 
-    ASSERT_EQ(mem.getTotalBytes(), 0);
-    ASSERT_EQ(mem.getRWBytes(), 0);
+    ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_SCRATCH), 0);
+    ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_RO), 0);
 }
 
 TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeWithEmptyReqs) {
     // empty request before
-    mem.push_value(nullptr, nullptr, 3.f,  2);
+    mem.getQueue(REGION_SCRATCH)->push_value(nullptr, nullptr, 3.f,  2);
     // not empty requests
     float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
     float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
-    mem.push_value(nullptr, pFuture1, 3.f,  2);
-    mem.readonly().push_value(nullptr, pFuture2, 13.f, 2);
+    mem.getQueue(REGION_SCRATCH)->push_value(nullptr, pFuture1, 3.f,  2);
+    mem.getQueue(REGION_RO)->push_value(nullptr, pFuture2, 13.f, 2);
     // empty request after
-    mem.readonly().push_value(nullptr, nullptr, 13.f, 2);
-
+    mem.getQueue(REGION_SCRATCH)->push_value(nullptr, nullptr, 3.f,  2);
+    mem.getQueue(REGION_RO)->push_value(nullptr, nullptr, 13.f, 2);
     mem.commit();
 
-    ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float));
-    ASSERT_EQ(mem.getRWBytes(), 2 * sizeof(float));
+    ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_RO), 2 * sizeof(float));
+    ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_SCRATCH), 2 * sizeof(float));
 }
 
 TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSize) {
     float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
     float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
-    mem.push_value(nullptr, pFuture1, 3.f,  2);
-    mem.readonly().push_value(nullptr, pFuture2, 13.f, 2);
+    mem.getQueue(REGION_SCRATCH)->push_value(nullptr, pFuture1, 3.f,  2);
+    mem.getQueue(REGION_RO)->push_value(nullptr, pFuture2, 13.f, 2);
     mem.commit();
 
-    ASSERT_EQ(mem.getTotalBytes(), 4 * sizeof(float));
-    ASSERT_EQ(mem.getRWBytes(), 2 * sizeof(float));
+    ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_RO), 2 * sizeof(float));
+    ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_SCRATCH), 2 * sizeof(float));
 }
 
 TEST_F(GNAMemoryTest, canCalculateReadWriteSectionSizeWithAlignment) {
-    GNAMemory<std::allocator<uint8_t>> memAligned(64);
+    GNAMemory<GNAPluginNS::memory::GNAFloatAllocator> memAligned(64);
     float* pFuture1 = reinterpret_cast<float*>(&pFuture1);
     float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
 
-    memAligned.push_value(nullptr, pFuture1, 3.f,  2);
-    memAligned.readonly().push_value(nullptr, pFuture2, 13.f, 2);
+    memAligned.getQueue(REGION_SCRATCH)->push_value(nullptr, pFuture1, 3.f,  2);
+    memAligned.getQueue(REGION_RO)->push_value(nullptr, pFuture2, 13.f, 2);
     memAligned.commit();
 
-    ASSERT_EQ(memAligned.getTotalBytes(), 128);
-    ASSERT_EQ(memAligned.getRWBytes(), 64);
+    ASSERT_EQ(memAligned.getRegionBytes(rRegion::REGION_RO), 64);
+    ASSERT_EQ(memAligned.getRegionBytes(rRegion::REGION_SCRATCH), 64);
 }
 
 TEST_F(GNAMemoryTest, canSetUpReadWriteSectionPtr) {
@@ -334,15 +334,15 @@ TEST_F(GNAMemoryTest, canSetUpReadWriteSectionPtr) {
     float* pFuture2 = reinterpret_cast<float*>(&pFuture2);
     float* pFuture3 = reinterpret_cast<float*>(&pFuture3);
 
-    mem.readonly().push_value(nullptr, pFuture1, 3.f,  2);
-    mem.push_value(nullptr, pFuture2, 13.f, 3);
-    mem.readonly().push_value(nullptr, pFuture3, 32.f,  4);
+    mem.getQueue(REGION_RO)->push_value(nullptr, pFuture1, 3.f,  2);
+    mem.getQueue(REGION_SCRATCH)->push_value(nullptr, pFuture2, 13.f, 3);
+    mem.getQueue(REGION_RO)->push_value(nullptr, pFuture3, 32.f,  4);
     mem.commit();
 
-    ASSERT_EQ(mem.getTotalBytes(), (2+3+4) * sizeof(float));
-    ASSERT_EQ(mem.getRWBytes(), 3 * sizeof(float));
+    ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_RO), (2 + 4) * sizeof(float));
+    ASSERT_EQ(mem.getRegionBytes(rRegion::REGION_SCRATCH), 3 * sizeof(float));
 
-    ASSERT_LT(&pFuture2[0], &pFuture1[0]);
+    ASSERT_NE(&pFuture2[0], &pFuture1[0]);
     ASSERT_LT(&pFuture1[0], &pFuture3[0]);
 
     ASSERT_FLOAT_EQ(pFuture1[0], 3.f);
@@ -367,13 +367,13 @@ TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequest) {
 
     size_t len = sizeof(input);
 
-    mem.push_ptr(nullptr, &pFuture, input, len);
-    mem.bind_ptr(nullptr, &pFuture2, &pFuture, len, len);
-    mem.bind_ptr(nullptr, &pFuture3, &pFuture2, 2 * len, len);
+    mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, len);
+    mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture2, &pFuture, len, len);
+    mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture3, &pFuture2, 2 * len, len);
 
     mem.commit();
 
-    ASSERT_EQ(mem.getTotalBytes(), 4 * len);
+    ASSERT_EQ(mem.getRegionBytes(REGION_SCRATCH), 4 * len);
     ASSERT_NE(pFuture, nullptr);
     ASSERT_EQ(pFuture2, pFuture + 3);
     ASSERT_EQ(pFuture3, pFuture + 9);
@@ -399,13 +399,13 @@ TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequestWhenPush) {
 
     size_t len = sizeof(input);
 
-    mem.push_ptr(nullptr, &pFuture, input, len);
-    mem.bind_ptr(nullptr, &pFuture2, &pFuture, len, len);
-    mem.push_ptr(nullptr, &pFutureInput2, input2, len);
+    mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFuture, input, len);
+    mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture2, &pFuture, len, len);
+    mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFutureInput2, input2, len);
 
     mem.commit();
 
-    ASSERT_EQ(mem.getTotalBytes(), 3 * len);
+    ASSERT_EQ(mem.getRegionBytes(REGION_SCRATCH), 3 * len);
     ASSERT_NE(pFuture, nullptr);
     ASSERT_NE(pFutureInput2, nullptr);
     ASSERT_EQ(pFuture2, pFuture + 3);
@@ -430,13 +430,13 @@ TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequestWhenAlloc) {
 
     size_t len = sizeof(input);
 
-    mem.reserve_ptr(nullptr, &pFuture, len);
-    mem.bind_ptr(nullptr, &pFuture2, &pFuture, len, len);
-    mem.push_ptr(nullptr, &pFutureInput, input, len);
+    mem.getQueue(REGION_SCRATCH)->reserve_ptr(nullptr, &pFuture, len);
+    mem.getQueue(REGION_AUTO)->bind_ptr(nullptr, &pFuture2, &pFuture, len, len);
+    mem.getQueue(REGION_SCRATCH)->push_ptr(nullptr, &pFutureInput, input, len);
 
     mem.commit();
 
-    ASSERT_EQ(mem.getTotalBytes(), 3 * len);
+    ASSERT_EQ(mem.getRegionBytes(REGION_SCRATCH), 3 * len);
     ASSERT_NE(pFuture, nullptr);
     ASSERT_NE(pFutureInput, nullptr);
     ASSERT_EQ(pFuture2, pFuture + 3);
@@ -450,4 +450,4 @@ TEST_F(GNAMemoryTest, canUpdateSizeOfPushRequestWithBindRequestWhenAlloc) {
     ASSERT_FLOAT_EQ(pFutureInput[0], 1);
     ASSERT_FLOAT_EQ(pFutureInput[1], 2);
     ASSERT_FLOAT_EQ(pFutureInput[2], 3);
-}
\ No newline at end of file
+}
diff --git a/src/tests/unit/gna/gna_plugin_config_test.cpp b/src/tests/unit/gna/gna_plugin_config_test.cpp
index 7b12f82bf984bb..d8e599f30ef786 100644
--- a/src/tests/unit/gna/gna_plugin_config_test.cpp
+++ b/src/tests/unit/gna/gna_plugin_config_test.cpp
@@ -196,7 +196,7 @@ TEST_F(GNAPluginConfigTest, GnaConfigGnaExecTargetTest) {
     EXPECT_EQ(config.gnaExecTarget, "GNA_TARGET_2_0");
     SetAndCompare(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_3_0");
     EXPECT_EQ(config.gnaExecTarget, "GNA_TARGET_3_0");
-    ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_3_5");
+    ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_3_7");
     ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "0");
     ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET_1_5");
     ExpectThrow(GNA_CONFIG_KEY(EXEC_TARGET), "GNA_TARGET");
@@ -207,7 +207,7 @@ TEST_F(GNAPluginConfigTest, GnaConfigGnaCompileTargetTest) {
     EXPECT_EQ(config.gnaCompileTarget, "GNA_TARGET_2_0");
     SetAndCompare(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET_3_0");
     EXPECT_EQ(config.gnaCompileTarget, "GNA_TARGET_3_0");
-    ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET_3_5");
+    ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET_3_7");
     ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "0");
     ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET_1_5");
     ExpectThrow(GNA_CONFIG_KEY(COMPILE_TARGET), "GNA_TARGET");
diff --git a/src/tests_deprecated/unit/engines/gna/gna_api_stub.cpp b/src/tests_deprecated/unit/engines/gna/gna_api_stub.cpp
index 639779bac109d4..81c3655d80a588 100644
--- a/src/tests_deprecated/unit/engines/gna/gna_api_stub.cpp
+++ b/src/tests_deprecated/unit/engines/gna/gna_api_stub.cpp
@@ -39,6 +39,12 @@ GNA2_API enum Gna2Status Gna2MemoryAlloc(
     return Gna2StatusSuccess;
 }
 
+GNA2_API enum Gna2Status Gna2MemorySetTag(
+    void* memory,
+    uint32_t tag) {
+    return Gna2StatusSuccess;
+}
+
 GNA2_API enum Gna2Status Gna2DeviceCreateForExport(
     Gna2DeviceVersion targetDeviceVersion,
     uint32_t * deviceIndex) {
diff --git a/src/tests_deprecated/unit/engines/gna/gna_matcher.cpp b/src/tests_deprecated/unit/engines/gna/gna_matcher.cpp
index 8498cfca9de5df..8d1bd1e9bbc9a6 100644
--- a/src/tests_deprecated/unit/engines/gna/gna_matcher.cpp
+++ b/src/tests_deprecated/unit/engines/gna/gna_matcher.cpp
@@ -108,35 +108,33 @@ void GNAPropagateMatcher :: match() {
         OutputsDataMap  outputsInfo;
 
         StrictMock<GNACppApi> mockApi;
-        std::vector<uint8_t> data;
+        std::vector<std::vector<uint8_t>> data;
 
         if (_env.config[GNA_CONFIG_KEY(DEVICE_MODE)].compare(GNA_CONFIG_VALUE(SW_FP32)) != 0 &&
             !_env.matchThrows) {
-            EXPECT_CALL(mockApi, Gna2MemoryAlloc(_, _, _)).WillOnce(Invoke([&data](
-                uint32_t sizeRequested,
-                uint32_t *sizeGranted,
-                void **memoryAddress
-                ) {
-                data.resize(sizeRequested);
-                *sizeGranted = sizeRequested;
-                *memoryAddress = &data.front();
-                return Gna2StatusSuccess;
-            }));
+            EXPECT_CALL(mockApi, Gna2MemoryAlloc(_,_,_))
+                .WillRepeatedly(Invoke([&data](uint32_t sizeRequested, uint32_t* sizeGranted, void** memoryAddress) {
+                    data.push_back(std::vector<uint8_t>(sizeRequested));
+                    *sizeGranted = sizeRequested;
+                    *memoryAddress = data.back().data();
+                    return Gna2StatusSuccess;
+                }));
 
-            EXPECT_CALL(mockApi, Gna2DeviceGetVersion(_,_)).WillOnce(Invoke([](
-                uint32_t deviceIndex,
-                enum Gna2DeviceVersion * deviceVersion) {
+            EXPECT_CALL(mockApi, Gna2DeviceGetVersion(_,_))
+                .WillOnce(Invoke([](uint32_t deviceIndex, enum Gna2DeviceVersion* deviceVersion) {
                     *deviceVersion = Gna2DeviceVersionSoftwareEmulation;
                     return Gna2StatusSuccess;
                 }));
 
             EXPECT_CALL(mockApi, Gna2DeviceOpen(_)).WillOnce(Return(Gna2StatusSuccess));
 
-            EXPECT_CALL(mockApi, Gna2GetLibraryVersion(_,_)).Times(AtLeast(0)).WillRepeatedly(Return(Gna2StatusSuccess));
+            EXPECT_CALL(mockApi, Gna2GetLibraryVersion(_,_))
+                .Times(AtLeast(0))
+                .WillRepeatedly(Return(Gna2StatusSuccess));
 
             EXPECT_CALL(mockApi, Gna2InstrumentationConfigCreate(_,_,_,_)).WillOnce(Return(Gna2StatusSuccess));
 
-            if(_env.is_setup_of_omp_theads_expected == true) {
+            if (_env.is_setup_of_omp_theads_expected == true) {
                 EXPECT_CALL(mockApi, Gna2DeviceSetNumberOfThreads(_,_)).WillOnce(Return(Gna2StatusSuccess));
             }
 
@@ -200,7 +198,7 @@ void GNAPropagateMatcher :: match() {
                 expect_enqueue_calls(mockApi);
             }
 
-            EXPECT_CALL(mockApi, Gna2MemoryFree(_)).WillOnce(Return(Gna2StatusSuccess));
+            EXPECT_CALL(mockApi, Gna2MemoryFree(_)).WillRepeatedly(Return(Gna2StatusSuccess));
 
             EXPECT_CALL(mockApi, Gna2DeviceClose(_)).WillOnce(Return(Gna2StatusSuccess));
         }
@@ -690,8 +688,8 @@ void GNAQueryStateMatcher :: match() {
         }
     };
 
-    EXPECT_CALL(mockApi, Gna2MemoryAlloc(_, _, _)).
-        WillOnce(DoAll(SetArgPointee<1>(10000), SetArgPointee<2>(&data.front()), Return(Gna2StatusSuccess)));
+    EXPECT_CALL(mockApi, Gna2MemoryAlloc(_, _, _)).Times(AtLeast(1))
+        .WillRepeatedly(DoAll(SetArgPointee<1>(10000), SetArgPointee<2>(&data.front()), Return(Gna2StatusSuccess)));
 
     EXPECT_CALL(mockApi, Gna2DeviceGetVersion(_,_)).WillOnce(Invoke([](
         uint32_t deviceIndex,
@@ -706,7 +704,7 @@ void GNAQueryStateMatcher :: match() {
 
     EXPECT_CALL(mockApi, Gna2InstrumentationConfigCreate(_,_,_,_)).WillOnce(Return(Gna2StatusSuccess));
 
-    EXPECT_CALL(mockApi, Gna2MemoryFree(_)).WillOnce(Return(Gna2StatusSuccess));
+    EXPECT_CALL(mockApi, Gna2MemoryFree(_)).Times(AtLeast(1)).WillRepeatedly(Return(Gna2StatusSuccess));
 
     EXPECT_CALL(mockApi, Gna2DeviceClose(_)).WillOnce(Return(Gna2StatusSuccess));
 
diff --git a/src/tests_deprecated/unit/engines/gna/i16_quantisation_test.cpp b/src/tests_deprecated/unit/engines/gna/i16_quantisation_test.cpp
index f71f880912af1d..55e46da6c8c566 100644
--- a/src/tests_deprecated/unit/engines/gna/i16_quantisation_test.cpp
+++ b/src/tests_deprecated/unit/engines/gna/i16_quantisation_test.cpp
@@ -115,23 +115,24 @@ TEST_F(I16QuantisationTest, canQuantizeLstmLikeTopology) {
 
 TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect){
     ModelQuantizer<QuantI16> q;
+    const float inputScaleFactorTest = 1000;
+    const float weightValueTest = 100;
 
     auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C });
     weights->allocate();
-    fillWeights(weights, {100});
+    fillWeights(weights, { weightValueTest });
 
     Core ie;
     auto network = ie.ReadNetwork(Fc2DOutputModel(), weights);
 
-    auto newNet = q.quantize(network, 1000);
+    auto newNet = q.quantize(network, inputScaleFactorTest);
     InputsDataMap inputs = newNet.getInputsInfo();
     auto affineLayerPtr = getInputTo(inputs.begin()->second->getInputData()).begin()->second;
 
     auto quantParams = getInjectedData<QuantizedLayerParams>(affineLayerPtr);
 
-
-    ASSERT_FLOAT_EQ(quantParams->_dst_quant.GetScale(), 100);
-    ASSERT_FLOAT_EQ(quantParams->_weights_quant.GetScale(), 100);
+    ASSERT_FLOAT_EQ(quantParams->_dst_quant.GetScale(), MAX_VAL_2B_WEIGHT / weightValueTest * inputScaleFactorTest);
+    ASSERT_FLOAT_EQ(quantParams->_weights_quant.GetScale(), MAX_VAL_2B_WEIGHT / weightValueTest);
 }
 
 TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion) {