diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp
index 510ab7fc43b0c8..1fc3a3e20965c6 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp
@@ -270,4 +270,22 @@ struct BYPASS_UMD_CACHING final : OptionBase<BYPASS_UMD_CACHING, bool> {
         return OptionMode::RunTime;
     }
 };
+
+//
+// RUN_INFERENCES_SEQUENTIALLY
+//
+struct RUN_INFERENCES_SEQUENTIALLY final : OptionBase<RUN_INFERENCES_SEQUENTIALLY, bool> {
+    static std::string_view key() {
+        return ov::intel_npu::run_inferences_sequentially.name();
+    }
+
+    static bool defaultValue() {
+        return false;
+    }
+
+    static OptionMode mode() {
+        return OptionMode::RunTime;
+    }
+};
+
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp
index ec92e10a9f89c8..8aabd132e9431a 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp
@@ -327,5 +327,14 @@ static constexpr ov::Property<std::string, ov::PropertyMutability::RO> backend_n
  */
 static constexpr ov::Property<std::string> backend_compilation_params{"NPU_BACKEND_COMPILATION_PARAMS"};
 
+/**
+ * @brief [Only for NPU Plugin]
+ * Type: boolean, default is false.
+ * This option allows to run inferences sequentially, in the order in which they were created
+ * @note Experimental property, for now it only works in very specific scenarios. We need driver updates before we can
+ * implement a robust solution for in-order execution
+ */
+static constexpr ov::Property<bool> run_inferences_sequentially{"NPU_RUN_INFERENCES_SEQUENTIALLY"};
+
 }  // namespace intel_npu
 }  // namespace ov
diff --git a/src/plugins/intel_npu/src/al/src/config/runtime.cpp b/src/plugins/intel_npu/src/al/src/config/runtime.cpp
index 759956b6f597df..3da16796219332 100644
--- a/src/plugins/intel_npu/src/al/src/config/runtime.cpp
+++ b/src/plugins/intel_npu/src/al/src/config/runtime.cpp
@@ -27,6 +27,7 @@ void intel_npu::registerRunTimeOptions(OptionsDesc& desc) {
     desc.add<WORKLOAD_TYPE>();
     desc.add<TURBO>();
     desc.add<BYPASS_UMD_CACHING>();
+    desc.add<RUN_INFERENCES_SEQUENTIALLY>();
 }
 
 // Heuristically obtained number. Varies depending on the values of PLATFORM and PERFORMANCE_HINT
diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
index 3efbdab666d1ba..1e8781b0afe820 100644
--- a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
+++ b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
@@ -38,25 +38,6 @@ class ZeroInferRequest final : public SyncInferRequest {
     std::vector<ov::ProfilingInfo> get_profiling_info() const override;
     std::vector<uint8_t> get_raw_profiling_data() const;
 
-    /**
-     * @brief Determines if batching can be addressed inside the plugin. In the positive case, the batch size used by
-     * the model will also be deduced and returned.
-     * @details Batching can be handled by the plugin only if:
-     *  - The batch axis is the first axis.
-     *  - The batch size received by the compiler takes the default value of 1.
-     *  - The batch size found in the IR model matches for all inputs/outputs and takes a value different than the
-     * default one.
-     *
-     * If any of the previous conditions is not fulfilled, the functon will return the default batch size, thus no
-     * custom algorithm will be applied inside the plugin in order to address batching.
-     *
-     * @param metadata Metadata containing the shape values as seen by both the compiler and IR model. These will
-     * ultimately be used for determining the batch size.
-     * @returns The batch size deduced by the algorithm or the default value of 1 if batching cannot be performed inside
-     * the plugin.
-     */
-    std::optional<size_t> get_batch_size(const NetworkMetadata& metadata);
-
     /**
      * @brief Check the received tensor and set the Level Zero tensor accordingly
      * @param tensor Reference to a tensor.
@@ -106,22 +87,6 @@ class ZeroInferRequest final : public SyncInferRequest {
     std::shared_ptr<zeroProfiling::NpuInferProfiling> _npuProfiling;
     std::unique_ptr<Pipeline> _pipeline;
 
-    /**
-     * @brief Indicates how many command lists will be used inside the pipeline.
-     * @details Leveraging multiple command lists implies distributing the input/output buffers accross the batch axis
-     * between these lists.
-     *
-     * If batching is handled on compiler's side then a single command list shall be used, we don't do any
-     * specific operation inside the plugin in this case.
-     */
-    size_t _numberOfCommandLists = 1;
-
-    /**
-     * @brief The batch size used by the corresponding model.
-     * @details The attribute contains a value only if the plugin performs the batches splitting operation.
-     */
-    std::optional<std::size_t> _batchSize = std::nullopt;
-
     bool _pipelineIsCreated = false;
 };
 
diff --git a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
index 5b7f488d3eb96a..de5e1ac81c4728 100644
--- a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
+++ b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
@@ -28,7 +28,6 @@ struct Pipeline {
              const std::shared_ptr<zeroProfiling::NpuInferProfiling>& npu_profiling,
              const std::vector<std::vector<std::optional<TensorData>>>& inputTensorsData,
              const std::vector<std::optional<TensorData>>& outputTensorsData,
-             size_t numberOfCommandLists,
              uint32_t group_ordinal);
 
     Pipeline(const Pipeline&) = delete;
@@ -43,12 +42,25 @@ struct Pipeline {
     void updateCommandList(const TensorData& tensorsData, uint32_t index, size_t commandListIndex);
 
 protected:
+    std::shared_ptr<IGraph> _graph;
     const Config _config;
+    const uint32_t _id;
+
+    /**
+     * @brief Indicates how many command lists will be used inside the pipeline.
+     * @details Leveraging multiple command lists implies distributing the input/output buffers accross the batch axis
+     * between these lists.
+     *
+     * If batching is handled on compiler's side then a single command list shall be used, we don't do any
+     * specific operation inside the plugin in this case.
+     */
+    size_t _number_of_command_lists;
+
     std::shared_ptr<CommandQueue> _command_queue;
     std::vector<std::unique_ptr<CommandList>> _command_lists;
     std::vector<std::unique_ptr<Fence>> _fences;
-    EventPool _event_pool;
-    std::vector<std::unique_ptr<Event>> _events;
+    std::shared_ptr<EventPool> _event_pool;
+    std::vector<std::shared_ptr<Event>> _events;
     bool sync_output_with_fences_ = true;
     std::shared_ptr<zeroProfiling::NpuInferProfiling> _npu_profiling;
     Logger _logger;
diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
index 88dfaf944a8b34..a0e5d2d11c1fef 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
@@ -20,8 +20,6 @@ using namespace intel_npu;
 namespace {
 
 constexpr std::size_t SINGLE_TENSOR = 0;
-constexpr std::size_t BATCH_AXIS = 0;
-constexpr std::size_t DEFAULT_BATCH_SIZE = 1;
 constexpr bool INPUT = true;
 constexpr bool OUTPUT = false;
 
@@ -96,64 +94,6 @@ bool memory_was_allocated_in_the_same_l0_context(ze_context_handle_t hContext, c
 
 }  // namespace
 
-std::optional<size_t> ZeroInferRequest::get_batch_size(const NetworkMetadata& metadata) {
-    if (!metadata.outputs.at(0).shapeFromIRModel.has_value()) {
-        _logger.debug("Batching on the plugin is not used, batching is handled by the compiler");
-        return std::nullopt;
-    }
-
-    const ov::PartialShape& firstOutputShape = *metadata.outputs.at(0).shapeFromIRModel;
-    if (firstOutputShape.is_dynamic()) {
-        _logger.warning("Networks using dynamic shapes are not supported when batching is handled by the plugin");
-        return std::nullopt;
-    }
-    if (firstOutputShape.rank().get_length() == 0) {
-        _logger.warning(
-            "Networks using rank 0 shapes for inputs/outputs are not supported when batching is handled by the plugin");
-        return std::nullopt;
-    }
-
-    const size_t candidateBatchSize = firstOutputShape[BATCH_AXIS].get_length();
-    if (candidateBatchSize == 0 || candidateBatchSize == DEFAULT_BATCH_SIZE) {
-        _logger.debug("Batching on the plugin is not used, batching is handled by the compiler");
-        return std::nullopt;
-    }
-
-    auto checkDescriptorsUseCandidateBatchSize = [candidateBatchSize](const std::vector<IODescriptor>& descriptors) {
-        for (const IODescriptor& descriptor : descriptors) {
-            OPENVINO_ASSERT(descriptor.shapeFromIRModel.has_value(),
-                            "Missing value for the \"shapeFromIRModel\" attribute, I/O descriptor");
-
-            const ov::PartialShape& shapeFromCompiler = descriptor.shapeFromCompiler;
-            const ov::PartialShape& shapeFromIRModel = *descriptor.shapeFromIRModel;
-
-            if (shapeFromCompiler.is_dynamic() || shapeFromCompiler.rank().get_length() == 0 ||
-                *shapeFromCompiler.begin() != DEFAULT_BATCH_SIZE) {
-                return false;
-            }
-
-            if (!descriptor.isStateInput && !descriptor.isStateOutput && !descriptor.isShapeTensor) {
-                if (shapeFromIRModel.is_dynamic() || shapeFromIRModel.rank().get_length() == 0 ||
-                    *shapeFromIRModel.begin() != candidateBatchSize) {
-                    return false;
-                }
-            }
-        }
-
-        return true;
-    };
-
-    if (!checkDescriptorsUseCandidateBatchSize(metadata.inputs) ||
-        !checkDescriptorsUseCandidateBatchSize(metadata.outputs)) {
-        _logger.debug("Batching on the plugin is not used, batching is handled by the compiler");
-        return std::nullopt;
-    }
-
-    _logger.debug("Batching is handled by the plugin");
-
-    return candidateBatchSize;
-}
-
 //------------------------------------------------------------------------------
 ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
                                    const std::shared_ptr<const ICompiledModel>& compiledModel,
@@ -187,13 +127,6 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
     _inputAllocator =
         std::make_shared<const zeroMemory::HostMemAllocator>(_initStructs, ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED);
 
-    if (config.get<BATCH_MODE>() != ov::intel_npu::BatchMode::COMPILER) {
-        _batchSize = get_batch_size(_metadata);
-    }
-    if (_batchSize.has_value()) {
-        _numberOfCommandLists = *_batchSize;
-    }
-
     _logger.debug("ZeroInferRequest::ZeroInferRequest - checking level zero attributes and allocating tensors");
 
     size_t ioIndex = 0;
@@ -205,7 +138,8 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
             continue;
         }
 
-        get_level_zero_input(ioIndex) = allocate_tensor(inputDescriptor, ioIndex, INPUT, *_inputAllocator, _batchSize);
+        get_level_zero_input(ioIndex) =
+            allocate_tensor(inputDescriptor, ioIndex, INPUT, *_inputAllocator, _graph->get_batch_size());
         get_input_tensor_data(ioIndex) =
             TensorData{get_level_zero_input(ioIndex)->data(), get_level_zero_input(ioIndex)->get_byte_size()};
 
@@ -222,7 +156,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
         }
 
         _levelZeroOutputTensors.at(ioIndex) =
-            allocate_tensor(outputDescriptor, ioIndex, OUTPUT, *_outputAllocator, _batchSize);
+            allocate_tensor(outputDescriptor, ioIndex, OUTPUT, *_outputAllocator, _graph->get_batch_size());
         _outputTensorsData.at(ioIndex) =
             std::optional(TensorData{_levelZeroOutputTensors.at(ioIndex)->data(),
                                      _levelZeroOutputTensors.at(ioIndex)->get_byte_size()});
@@ -236,7 +170,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
 void ZeroInferRequest::create_pipeline() {
     for (size_t inputIndex = 0; inputIndex < _metadata.inputs.size(); ++inputIndex) {
         if (is_batched_input(inputIndex)) {
-            if (_batchSize.has_value()) {
+            if (_graph->get_batch_size().has_value()) {
                 _logger.debug("ZeroInferRequest::create_pipeline - tensors %s were already allocated",
                               _metadata.inputs.at(inputIndex).nodeFriendlyName.c_str());
                 continue;
@@ -250,8 +184,11 @@ void ZeroInferRequest::create_pipeline() {
         }
 
         _logger.debug("ZeroInferRequest::create_pipeline - allocate new tensor");
-        get_level_zero_input(inputIndex) =
-            allocate_tensor(_metadata.inputs.at(inputIndex), inputIndex, INPUT, *_inputAllocator, _batchSize);
+        get_level_zero_input(inputIndex) = allocate_tensor(_metadata.inputs.at(inputIndex),
+                                                           inputIndex,
+                                                           INPUT,
+                                                           *_inputAllocator,
+                                                           _graph->get_batch_size());
         get_input_tensor_data(inputIndex) = std::optional(
             TensorData{get_level_zero_input(inputIndex)->data(), get_level_zero_input(inputIndex)->get_byte_size()});
     }
@@ -263,17 +200,20 @@ void ZeroInferRequest::create_pipeline() {
             continue;
         }
         _logger.debug("ZeroInferRequest::create_pipeline - allocate new tensor");
-        _levelZeroOutputTensors.at(outputIndex) =
-            allocate_tensor(_metadata.outputs.at(outputIndex), outputIndex, OUTPUT, *_outputAllocator, _batchSize);
+        _levelZeroOutputTensors.at(outputIndex) = allocate_tensor(_metadata.outputs.at(outputIndex),
+                                                                  outputIndex,
+                                                                  OUTPUT,
+                                                                  *_outputAllocator,
+                                                                  _graph->get_batch_size());
         _outputTensorsData.at(outputIndex) =
             std::optional(TensorData{_levelZeroOutputTensors.at(outputIndex)->data(),
                                      _levelZeroOutputTensors.at(outputIndex)->get_byte_size()});
     }
 
     // Find the corresponding command queue group.
-    _logger.debug("ZeroDevice::ZeroDevice - findGroupOrdinal");
+    _logger.debug("ZeroInferRequest::create_pipeline - findGroupOrdinal");
     auto groupOrdinal = zeroUtils::findGroupOrdinal(_initStructs->getDevice(), _properties);
-    _logger.debug("ZeroDevice::ZeroDevice - init completed");
+    _logger.debug("ZeroInferRequest::create_pipeline - init completed");
 
     _logger.debug("ZeroInferRequest::create_pipeline - constructing pipeline");
 
@@ -286,7 +226,6 @@ void ZeroInferRequest::create_pipeline() {
                                            _npuProfiling,
                                            _inputTensorsData,
                                            _outputTensorsData,
-                                           _numberOfCommandLists,
                                            groupOrdinal);
 
     _logger.debug("ZeroInferRequest::create_pipeline - SyncInferRequest completed");
@@ -321,7 +260,7 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor>& tenso
                                                index,
                                                isInput,
                                                isInput ? *_inputAllocator : *_outputAllocator,
-                                               _batchSize);
+                                               _graph->get_batch_size());
 
             setTensorData = true;
             levelZeroTensorCreatedLocally = true;
@@ -444,7 +383,7 @@ void ZeroInferRequest::set_tensors(const ov::Output<const ov::Node>& port,
     get_user_inputs(foundPort.idx) = tensors;
 
     if (_initStructs->getMutableCommandListVersion()) {
-        if (_batchSize.has_value()) {
+        if (_graph->get_batch_size().has_value()) {
             for (size_t i = 0; i < tensors.size(); i++) {
                 auto remoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(tensors[i]._ptr);
 
@@ -525,13 +464,17 @@ ov::SoPtr<ov::ITensor> ZeroInferRequest::get_tensor(const ov::Output<const ov::N
                                        ioIndex,
                                        isInput,
                                        isInput ? *_inputAllocator : *_outputAllocator,
-                                       _batchSize);
+                                       _graph->get_batch_size());
     tensorsData = std::optional(TensorData{levelZeroTensors->data(), levelZeroTensors->get_byte_size()});
 
     return levelZeroTensors;
 }
 
 void ZeroInferRequest::infer() {
+    if (_config.get<RUN_INFERENCES_SEQUENTIALLY>()) {
+        OPENVINO_THROW("Only start async is supported when RUN_INFERENCES_SEQUENTIALLY is enabled!");
+    }
+
     infer_async();
     get_result();
 }
@@ -567,7 +510,7 @@ void ZeroInferRequest::infer_async() {
         }
 
         if (is_batched_input(inputIndex)) {
-            if (_batchSize.has_value()) {
+            if (_graph->get_batch_size().has_value()) {
                 for (size_t i = 0; i < userTensor.size(); i++) {
                     auto levelZeroBatchRemoteTensor =
                         std::dynamic_pointer_cast<ZeroRemoteTensor>(get_level_zero_input(inputIndex, i));
diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
index c782c3e0684f0d..d7f06b813810bb 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
@@ -8,6 +8,7 @@
 #include <ze_graph_ext.h>
 
 #include "intel_npu/common/itt.hpp"
+#include "intel_npu/config/runtime.hpp"
 #include "intel_npu/prefix.hpp"
 #include "intel_npu/utils/logger/logger.hpp"
 #include "intel_npu/utils/zero/zero_api.hpp"
@@ -23,13 +24,15 @@ Pipeline::Pipeline(const Config& config,
                    const std::shared_ptr<zeroProfiling::NpuInferProfiling>& npu_profiling,
                    const std::vector<std::vector<std::optional<TensorData>>>& inputTensorsData,
                    const std::vector<std::optional<TensorData>>& outputTensorsData,
-                   size_t numberOfCommandLists,
                    uint32_t group_ordinal)
-    : _config(config),
-      _command_queue(graph->get_command_queue()),
-      _event_pool{initStructs->getDevice(),
-                  initStructs->getContext(),
-                  numberOfCommandLists ? static_cast<uint32_t>(numberOfCommandLists) : 1},
+    : _graph(graph),
+      _config(config),
+      _id(_graph->get_unique_id()),
+      _number_of_command_lists(_graph->get_batch_size().has_value() ? *_graph->get_batch_size() : 1),
+      _event_pool{
+          std::make_shared<EventPool>(initStructs->getDevice(),
+                                      initStructs->getContext(),
+                                      _number_of_command_lists ? static_cast<uint32_t>(_number_of_command_lists) : 1)},
       _npu_profiling(npu_profiling),
       _logger("Pipeline", _config.get<LOG_LEVEL>()) {
     OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Zero_infer_request::Pipeline::Pipeline");
@@ -39,20 +42,20 @@ Pipeline::Pipeline(const Config& config,
         profiling_query.create(profiling_pool._handle);
     }
 
-    _command_lists.reserve(numberOfCommandLists);
-    _events.reserve(numberOfCommandLists);
-    _fences.reserve(numberOfCommandLists);
+    _command_lists.reserve(_number_of_command_lists);
+    _events.reserve(_number_of_command_lists);
+    _fences.reserve(_number_of_command_lists);
     _logger.debug("Pipeline - emplace_back _event_pool and _command_queue");
-    for (size_t i = 0; i < numberOfCommandLists; i++) {
+    for (size_t i = 0; i < _number_of_command_lists; i++) {
         _command_lists.emplace_back(
             std::make_unique<CommandList>(initStructs,
                                           group_ordinal,
                                           initStructs->getMutableCommandListVersion() ? true : false));
-        _events.emplace_back(std::make_unique<Event>(_event_pool.handle(), static_cast<uint32_t>(i)));
-        _fences.emplace_back(std::make_unique<Fence>(*_command_queue));
+        _events.emplace_back(std::make_shared<Event>(_event_pool, static_cast<uint32_t>(i)));
+        _fences.emplace_back(std::make_unique<Fence>(*_graph->get_command_queue()));
     }
 
-    for (size_t i = 0; i < numberOfCommandLists; i++) {
+    for (size_t i = 0; i < _number_of_command_lists; i++) {
         size_t ioIndex = 0;
         for (const auto& desc : graph->get_input_descriptors()) {
             if (inputTensorsData.at(ioIndex).size() > 1) {
@@ -64,7 +67,7 @@ Pipeline::Pipeline(const Config& config,
 
             graph->set_argument_value(desc.idx,
                                       static_cast<unsigned char*>(inputTensorsData.at(ioIndex).at(0)->mem) +
-                                          (i * inputTensorsData.at(ioIndex).at(0)->size) / numberOfCommandLists);
+                                          (i * inputTensorsData.at(ioIndex).at(0)->size) / _number_of_command_lists);
 
             ++ioIndex;
         }
@@ -73,10 +76,16 @@ Pipeline::Pipeline(const Config& config,
         for (const auto& desc : graph->get_output_descriptors()) {
             graph->set_argument_value(desc.idx,
                                       static_cast<unsigned char*>(outputTensorsData.at(ioIndex)->mem) +
-                                          (i * outputTensorsData.at(ioIndex)->size) / numberOfCommandLists);
+                                          (i * outputTensorsData.at(ioIndex)->size) / _number_of_command_lists);
             ++ioIndex;
         }
 
+        if (_config.get<RUN_INFERENCES_SEQUENTIALLY>()) {
+            if (_graph->get_last_submitted_event(i)) {
+                _graph->get_last_submitted_event(i)->AppendWaitOnEvent(*_command_lists.at(i));
+            }
+        }
+
         /// append timestamp command if feature was activated
         if (_npu_profiling != nullptr) {
             _command_lists.at(i)->appendBarrier();
@@ -92,6 +101,15 @@ Pipeline::Pipeline(const Config& config,
             _command_lists.at(i)->appendNpuTimestamp(reinterpret_cast<uint64_t*>(_npu_profiling->npu_ts_infer_end));
         }
 
+        if (_config.get<RUN_INFERENCES_SEQUENTIALLY>()) {
+            if (_graph->get_last_submitted_event(i)) {
+                _graph->get_last_submitted_event(i)->AppendEventReset(*_command_lists.at(i));
+            }
+
+            _events.at(i)->AppendSignalEvent(*_command_lists.at(i));
+            _graph->set_last_submitted_event(_events.at(i), i);
+        }
+
         // appendBarrier used in L0 as well
         if (!sync_output_with_fences_) {
             _command_lists.at(i)->appendBarrier();
@@ -105,12 +123,24 @@ Pipeline::Pipeline(const Config& config,
 void Pipeline::push() {
     _logger.debug("Pipeline - push() started");
 
+    if (_config.get<RUN_INFERENCES_SEQUENTIALLY>()) {
+        if (_id) {
+            auto previousIndex = _graph->get_last_submitted_id();
+
+            if (_id != ++previousIndex) {
+                OPENVINO_THROW("Inferences should be called in the same order they were called the first time!");
+            }
+        }
+
+        _graph->set_last_submitted_id(_id);
+    }
+
     for (size_t i = 0; i < _command_lists.size(); ++i) {
         OV_ITT_TASK_CHAIN(ZERO_PIPELINE_IP_PUSH, itt::domains::LevelZeroBackend, "Pipeline", "push");
         if (sync_output_with_fences_) {
-            _command_queue->executeCommandList(*_command_lists.at(i), *_fences.at(i));
+            _graph->get_command_queue()->executeCommandList(*_command_lists.at(i), *_fences.at(i));
         } else {
-            _command_queue->executeCommandList(*_command_lists.at(i));
+            _graph->get_command_queue()->executeCommandList(*_command_lists.at(i));
         }
     }
 
@@ -154,12 +184,12 @@ void Pipeline::updateCommandList(const TensorData& tensorsData, uint32_t index)
     OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_UMCL, itt::domains::LevelZeroBackend, "Pipeline", "updateCommandList");
     _logger.debug("Pipeline - updateCommandList");
 
-    const size_t numberOfCommandLists = _command_lists.size();
+    const size_t _number_of_command_lists = _command_lists.size();
 
-    for (size_t i = 0; i < numberOfCommandLists; i++) {
+    for (size_t i = 0; i < _number_of_command_lists; i++) {
         _command_lists.at(i)->updateMutableCommandList(
             index,
-            static_cast<unsigned char*>(tensorsData.mem) + (i * tensorsData.size) / numberOfCommandLists);
+            static_cast<unsigned char*>(tensorsData.mem) + (i * tensorsData.size) / _number_of_command_lists);
         _command_lists.at(i)->close();
     }
 };
@@ -168,9 +198,9 @@ void Pipeline::updateCommandList(const TensorData& tensorsData, uint32_t index,
     OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_UMCL, itt::domains::LevelZeroBackend, "Pipeline", "updateCommandList");
     _logger.debug("Pipeline - updateCommandList");
 
-    const size_t numberOfCommandLists = _command_lists.size();
+    const size_t _number_of_command_lists = _command_lists.size();
 
-    OPENVINO_ASSERT(commandListIndex < numberOfCommandLists,
+    OPENVINO_ASSERT(commandListIndex < _number_of_command_lists,
                     "Command list index is higgher than the number of Command lists ",
                     commandListIndex);
 
diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp
index 51c4a4cf26eafd..7e718d9172f4f7 100644
--- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp
+++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp
@@ -9,6 +9,7 @@
 #include <vector>
 
 #include "intel_npu/network_metadata.hpp"
+#include "intel_npu/utils/zero/zero_init.hpp"
 #include "intel_npu/utils/zero/zero_utils.hpp"
 #include "intel_npu/utils/zero/zero_wrappers.hpp"
 #include "openvino/runtime/profiling_info.hpp"
@@ -17,13 +18,10 @@ namespace intel_npu {
 
 class IGraph : public std::enable_shared_from_this<IGraph> {
 public:
-    IGraph(ze_graph_handle_t handle, NetworkMetadata metadata, std::optional<std::vector<uint8_t>> blob)
-        : _handle(handle),
-          _metadata(std::move(metadata)) {
-        if (blob.has_value()) {
-            _blob = std::move(*blob);
-        }
-    }
+    IGraph(ze_graph_handle_t handle,
+           NetworkMetadata metadata,
+           const Config& config,
+           std::optional<std::vector<uint8_t>> blob);
 
     virtual void export_blob(std::ostream& stream) const = 0;
 
@@ -36,55 +34,48 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
 
     virtual ~IGraph() = default;
 
-    const NetworkMetadata& get_metadata() const {
-        return _metadata;
-    }
-
-    ze_graph_handle_t get_handle() const {
-        return _handle;
-    }
-
-    void update_network_name(std::string_view name) {
-        _metadata.name = name;
-    }
-
-    inline const std::vector<ArgumentDescriptor>& get_input_descriptors() const {
-        return _input_descriptors;
-    }
-
-    inline const std::vector<ArgumentDescriptor>& get_output_descriptors() const {
-        return _output_descriptors;
-    }
-
-    inline const std::shared_ptr<CommandQueue>& get_command_queue() const {
-        return _command_queue;
-    }
-
-    void set_workload_type(const ov::WorkloadType workloadType) const {
-        if (_command_queue == nullptr) {
-            return;
-        }
-
-        ze_command_queue_workload_type_t zeWorkloadType;
-        switch (workloadType) {
-        case ov::WorkloadType::DEFAULT:
-            zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_DEFAULT;
-            break;
-        case ov::WorkloadType::EFFICIENT:
-            zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_BACKGROUND;
-            break;
-        default:
-            OPENVINO_THROW("Unknown value for WorkloadType!");
-        }
-
-        _command_queue->setWorkloadType(zeWorkloadType);
-    }
-
-    std::mutex& get_mutex() {
-        return _mutex;
-    }
+    const NetworkMetadata& get_metadata() const;
+    ze_graph_handle_t get_handle() const;
+
+    void update_network_name(std::string_view name);
+
+    const std::vector<ArgumentDescriptor>& get_input_descriptors() const;
+    const std::vector<ArgumentDescriptor>& get_output_descriptors() const;
+    const std::shared_ptr<CommandQueue>& get_command_queue() const;
+
+    void set_workload_type(const ov::WorkloadType workloadType) const;
+
+    std::mutex& get_mutex();
+
+    void set_last_submitted_event(const std::shared_ptr<Event>& event, size_t indexOfCommandList);
+    const std::shared_ptr<Event>& get_last_submitted_event(size_t indexOfCommandList) const;
+
+    uint32_t get_unique_id();
+    void set_last_submitted_id(uint32_t id_index);
+    const uint32_t get_last_submitted_id() const;
+
+    const std::optional<std::size_t> get_batch_size() const;
 
 protected:
+    /**
+     * @brief Determines if batching can be addressed inside the plugin. In the positive case, the batch size used by
+     * the model will also be deduced and returned.
+     * @details Batching can be handled by the plugin only if:
+     *  - The batch axis is the first axis.
+     *  - The batch size received by the compiler takes the default value of 1.
+     *  - The batch size found in the IR model matches for all inputs/outputs and takes a value different than the
+     * default one.
+     *
+     * If any of the previous conditions is not fulfilled, the functon will return the default batch size, thus no
+     * custom algorithm will be applied inside the plugin in order to address batching.
+     *
+     * @param metadata Metadata containing the shape values as seen by both the compiler and IR model. These will
+     * ultimately be used for determining the batch size.
+     * @returns The batch size deduced by the algorithm or the default value of 1 if batching cannot be performed inside
+     * the plugin.
+     */
+    std::optional<size_t> get_batch_size(const NetworkMetadata& metadata);
+
     ze_graph_handle_t _handle = nullptr;
     NetworkMetadata _metadata;
 
@@ -92,12 +83,24 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
     std::vector<ArgumentDescriptor> _output_descriptors;
 
     std::shared_ptr<CommandQueue> _command_queue;
+    std::vector<std::shared_ptr<Event>> _last_submitted_event;
 
     // Used to protect zero pipeline creation in the graph. The pipeline should be created only once per graph when the
     // first inference starts running
     std::mutex _mutex;
 
     std::vector<uint8_t> _blob;
+
+    uint32_t _unique_id = 0;
+    uint32_t _last_submitted_id;
+
+    /**
+     * @brief The batch size used by the corresponding model.
+     * @details The attribute contains a value only if the plugin performs the batches splitting operation.
+     */
+    std::optional<std::size_t> _batch_size = std::nullopt;
+
+    Logger _logger;
 };
 
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/common/src/igraph.cpp b/src/plugins/intel_npu/src/common/src/igraph.cpp
new file mode 100644
index 00000000000000..fd5463af5eea3e
--- /dev/null
+++ b/src/plugins/intel_npu/src/common/src/igraph.cpp
@@ -0,0 +1,159 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_npu/common/igraph.hpp"
+
+#include "intel_npu/config/compiler.hpp"
+#include "intel_npu/config/runtime.hpp"
+
+namespace {
+constexpr std::size_t BATCH_AXIS = 0;
+constexpr std::size_t DEFAULT_BATCH_SIZE = 1;
+}  // namespace
+
+namespace intel_npu {
+
+IGraph::IGraph(ze_graph_handle_t handle,
+               NetworkMetadata metadata,
+               const Config& config,
+               std::optional<std::vector<uint8_t>> blob)
+    : _handle(handle),
+      _metadata(std::move(metadata)),
+      _logger("IGraph", config.get<LOG_LEVEL>()) {
+    if (blob.has_value()) {
+        _blob = std::move(*blob);
+    }
+}
+
+const NetworkMetadata& IGraph::get_metadata() const {
+    return _metadata;
+}
+
+ze_graph_handle_t IGraph::get_handle() const {
+    return _handle;
+}
+
+void IGraph::update_network_name(std::string_view name) {
+    _metadata.name = name;
+}
+
+const std::vector<ArgumentDescriptor>& IGraph::get_input_descriptors() const {
+    return _input_descriptors;
+}
+
+const std::vector<ArgumentDescriptor>& IGraph::get_output_descriptors() const {
+    return _output_descriptors;
+}
+
+const std::shared_ptr<CommandQueue>& IGraph::get_command_queue() const {
+    return _command_queue;
+}
+
+void IGraph::set_workload_type(const ov::WorkloadType workloadType) const {
+    if (_command_queue == nullptr) {
+        return;
+    }
+
+    ze_command_queue_workload_type_t zeWorkloadType;
+    switch (workloadType) {
+    case ov::WorkloadType::DEFAULT:
+        zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_DEFAULT;
+        break;
+    case ov::WorkloadType::EFFICIENT:
+        zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_BACKGROUND;
+        break;
+    default:
+        OPENVINO_THROW("Unknown value for WorkloadType!");
+    }
+
+    _command_queue->setWorkloadType(zeWorkloadType);
+}
+
+std::mutex& IGraph::get_mutex() {
+    return _mutex;
+}
+
+void IGraph::set_last_submitted_event(const std::shared_ptr<Event>& event, size_t indexOfCommandList) {
+    _last_submitted_event[indexOfCommandList] = event;
+}
+
+const std::shared_ptr<Event>& IGraph::get_last_submitted_event(size_t indexOfCommandList) const {
+    return _last_submitted_event[indexOfCommandList];
+}
+
+uint32_t IGraph::get_unique_id() {
+    return _unique_id++;
+}
+
+void IGraph::set_last_submitted_id(uint32_t id_index) {
+    _last_submitted_id = id_index;
+}
+
+const uint32_t IGraph::get_last_submitted_id() const {
+    return _last_submitted_id;
+}
+
+std::optional<size_t> IGraph::get_batch_size(const NetworkMetadata& metadata) {
+    if (!metadata.outputs.at(0).shapeFromIRModel.has_value()) {
+        _logger.debug("Batching on the plugin is not used, batching is handled by the compiler");
+        return std::nullopt;
+    }
+
+    const ov::PartialShape& firstOutputShape = *metadata.outputs.at(0).shapeFromIRModel;
+    if (firstOutputShape.is_dynamic()) {
+        _logger.warning("Networks using dynamic shapes are not supported when batching is handled by the plugin");
+        return std::nullopt;
+    }
+    if (firstOutputShape.rank().get_length() == 0) {
+        _logger.warning("Networks using rank 0 shapes for inputs/outputs are not supported when batching is "
+                        "handled by the plugin");
+        return std::nullopt;
+    }
+
+    const size_t candidateBatchSize = firstOutputShape[BATCH_AXIS].get_length();
+    if (candidateBatchSize == 0 || candidateBatchSize == DEFAULT_BATCH_SIZE) {
+        _logger.debug("Batching on the plugin is not used, batching is handled by the compiler");
+        return std::nullopt;
+    }
+
+    auto checkDescriptorsUseCandidateBatchSize = [candidateBatchSize](const std::vector<IODescriptor>& descriptors) {
+        for (const IODescriptor& descriptor : descriptors) {
+            OPENVINO_ASSERT(descriptor.shapeFromIRModel.has_value(),
+                            "Missing value for the \"shapeFromIRModel\" attribute, I/O descriptor");
+
+            const ov::PartialShape& shapeFromCompiler = descriptor.shapeFromCompiler;
+            const ov::PartialShape& shapeFromIRModel = *descriptor.shapeFromIRModel;
+
+            if (shapeFromCompiler.is_dynamic() || shapeFromCompiler.rank().get_length() == 0 ||
+                *shapeFromCompiler.begin() != DEFAULT_BATCH_SIZE) {
+                return false;
+            }
+
+            if (!descriptor.isStateInput && !descriptor.isStateOutput && !descriptor.isShapeTensor) {
+                if (shapeFromIRModel.is_dynamic() || shapeFromIRModel.rank().get_length() == 0 ||
+                    *shapeFromIRModel.begin() != candidateBatchSize) {
+                    return false;
+                }
+            }
+        }
+
+        return true;
+    };
+
+    if (!checkDescriptorsUseCandidateBatchSize(metadata.inputs) ||
+        !checkDescriptorsUseCandidateBatchSize(metadata.outputs)) {
+        _logger.debug("Batching on the plugin is not used, batching is handled by the compiler");
+        return std::nullopt;
+    }
+
+    _logger.debug("Batching is handled by the plugin");
+
+    return candidateBatchSize;
+}
+
+const std::optional<std::size_t> IGraph::get_batch_size() const {
+    return _batch_size;
+}
+
+}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
index f819ed73711cf2..9d634656db109a 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
@@ -541,13 +541,21 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config,
         content = std::regex_replace(content, std::regex(batchstr.str()), "");
     }
 
-    // NPU_DEFER_WEIGHTS_LOAD is not supported in versions < 6.2 - need to remove it
-    if ((compilerVersion.major < 6) || (compilerVersion.major == 6 && compilerVersion.minor < 2)) {
+    // NPU_DEFER_WEIGHTS_LOAD is needed at runtime only
+    {
         std::ostringstream batchstr;
         batchstr << ov::intel_npu::defer_weights_load.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+"
                  << VALUE_DELIMITER;
-        logger.warning(
-            "NPU_DEFER_WEIGHTS_LOAD property is not suppored by this compiler version. Removing from parameters");
+        logger.info("NPU_DEFER_WEIGHTS_LOAD property is needed at runtime only. Removing from parameters");
+        content = std::regex_replace(content, std::regex(batchstr.str()), "");
+    }
+
+    // NPU_RUN_INFERENCES_SEQUENTIALLY is needed at runtime only
+    {
+        std::ostringstream batchstr;
+        batchstr << ov::intel_npu::run_inferences_sequentially.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER
+                 << "\\S+" << VALUE_DELIMITER;
+        logger.info("NPU_RUN_INFERENCES_SEQUENTIALLY property is needed at runtime only. Removing from parameters");
         content = std::regex_replace(content, std::regex(batchstr.str()), "");
     }
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp
index e1f3990b835e8d..0d180f983ad3a9 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp
@@ -16,7 +16,7 @@ DriverGraph::DriverGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
                          NetworkMetadata metadata,
                          const Config& config,
                          std::optional<std::vector<uint8_t>> blob)
-    : IGraph(graphHandle, std::move(metadata), std::move(blob)),
+    : IGraph(graphHandle, std::move(metadata), config, std::move(blob)),
       _zeGraphExt(zeGraphExt),
       _zeroInitStruct(zeroInitStruct),
       _logger("DriverGraph", config.get<LOG_LEVEL>()) {
@@ -126,6 +126,16 @@ void DriverGraph::initialize(const Config& config) {
     //  _zeGraphExt->initializeGraph(). The driver will not access the original blob from this moment on, so we are
     //  releasing it here to avoid unnecessary memory usage.
     _blobIsReleased = release_blob(config);
+
+    if (config.get<BATCH_MODE>() != ov::intel_npu::BatchMode::COMPILER) {
+        _batch_size = get_batch_size(_metadata);
+    }
+
+    if (config.get<RUN_INFERENCES_SEQUENTIALLY>()) {
+        auto number_of_command_lists = _batch_size.has_value() ? *_batch_size : 1;
+
+        _last_submitted_event.resize(number_of_command_lists);
+    }
 }
 
 bool DriverGraph::release_blob(const Config& config) {
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp
index c99069a0a9760f..b1658e7e0582e0 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp
@@ -17,7 +17,7 @@ PluginGraph::PluginGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
                          NetworkMetadata metadata,
                          std::vector<uint8_t> blob,
                          const Config& config)
-    : IGraph(graphHandle, std::move(metadata), std::optional<std::vector<uint8_t>>(std::move(blob))),
+    : IGraph(graphHandle, std::move(metadata), config, std::optional<std::vector<uint8_t>>(std::move(blob))),
       _zeGraphExt(zeGraphExt),
       _zeroInitStruct(zeroInitStruct),
       _compiler(compiler),
@@ -115,6 +115,16 @@ void PluginGraph::initialize(const Config& config) {
 
     _zeGraphExt->initializeGraph(_handle, config);
 
+    if (config.get<BATCH_MODE>() != ov::intel_npu::BatchMode::COMPILER) {
+        _batch_size = get_batch_size(_metadata);
+    }
+
+    if (config.get<RUN_INFERENCES_SEQUENTIALLY>()) {
+        auto number_of_command_lists = _batch_size.has_value() ? *_batch_size : 1;
+
+        _last_submitted_event.resize(number_of_command_lists);
+    }
+
     _logger.debug("Graph initialize finish");
 }
 
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
index c6be2793fe6f70..be61fa4de081a6 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
@@ -668,6 +668,10 @@ ov::SoPtr<ov::ICompiledModel> ov::npuw::CompiledModel::compile_submodel(const st
     // NOTE(dm): Not sure if it is required for the NPUW plugin, but likely it is
     auto& device_config = m_meta_devices[device];
 
+    if (ov::npuw::util::starts_with(device, "NPU") && m_cfg.get<::intel_npu::NPUW_UNFOLD_IREQS>()) {
+        device_config["NPU_RUN_INFERENCES_SEQUENTIALLY"] = "YES";
+    }
+
     const auto& cache_dir = m_cfg.get<::intel_npu::NPUW_CACHE_DIR>();
     if (!cache_dir.empty()) {
         LOG_INFO("NPUW will try to utilize CACHE_DIR for " << submodel->get_friendly_name() << " submodel.");
diff --git a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
index 4baf15d76718a8..4e86d32d2f72b1 100644
--- a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
@@ -311,6 +311,12 @@ void CompiledModel::initialize_properties() {
           [](const Config& config) {
               return config.getString<BATCH_MODE>();
           }}},
+        {ov::intel_npu::run_inferences_sequentially.name(),
+         {false,
+          ov::PropertyMutability::RO,
+          [](const Config& config) {
+              return config.get<RUN_INFERENCES_SEQUENTIALLY>();
+          }}},
     };
 
     for (auto& property : _properties) {
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 9f77d952fd813b..18a96bff02fb80 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -568,6 +568,12 @@ Plugin::Plugin()
           [](const Config& config) {
               return config.getString<BACKEND_COMPILATION_PARAMS>();
           }}},
+        {ov::intel_npu::run_inferences_sequentially.name(),
+         {false,
+          ov::PropertyMutability::RW,
+          [](const Config& config) {
+              return config.get<RUN_INFERENCES_SEQUENTIALLY>();
+          }}},
         {ov::intel_npu::batch_mode.name(), {false, ov::PropertyMutability::RW, [](const Config& config) {
                                                 return config.getString<BATCH_MODE>();
                                             }}}};
diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp
index 8883bb99dd178e..0df0c5d66169a4 100644
--- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp
+++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp
@@ -188,7 +188,7 @@ static inline uint32_t findGroupOrdinal(ze_device_handle_t device_handle, const
         "zeDeviceGetCommandQueueGroupProperties",
         zeDeviceGetCommandQueueGroupProperties(device_handle, &command_queue_group_count, nullptr));
 
-    log.debug("ZeroDevice::ZeroDevice - resize command_queue_group_count");
+    log.debug("zero_utils::findGroupOrdinal - resize command_queue_group_count");
     command_group_properties.resize(command_queue_group_count);
 
     for (auto& prop : command_group_properties) {
diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
index 9b5b1b4540fbe7..61999376680e90 100644
--- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
+++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
@@ -37,7 +37,7 @@ class EventPool {
 class Event {
 public:
     Event() = delete;
-    Event(const ze_event_pool_handle_t& event_pool, uint32_t event_index);
+    Event(const std::shared_ptr<EventPool>& event_pool, uint32_t event_index);
     Event(const Event&) = delete;
     Event(Event&&) = delete;
     Event& operator=(const Event&) = delete;
@@ -51,6 +51,7 @@ class Event {
     ~Event();
 
 private:
+    std::shared_ptr<EventPool> _event_pool;
     ze_event_handle_t _handle = nullptr;
 
     Logger _log;
diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
index 858e65d4b5e6ee..d95b0e172a7d64 100644
--- a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
+++ b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
@@ -24,9 +24,11 @@ EventPool::~EventPool() {
     }
 }
 
-Event::Event(const ze_event_pool_handle_t& event_pool, uint32_t event_index) : _log("Event", Logger::global().level()) {
+Event::Event(const std::shared_ptr<EventPool>& event_pool, uint32_t event_index)
+    : _event_pool(event_pool),
+      _log("Event", Logger::global().level()) {
     ze_event_desc_t event_desc = {ZE_STRUCTURE_TYPE_EVENT_DESC, nullptr, event_index, 0, 0};
-    THROW_ON_FAIL_FOR_LEVELZERO("zeEventCreate", zeEventCreate(event_pool, &event_desc, &_handle));
+    THROW_ON_FAIL_FOR_LEVELZERO("zeEventCreate", zeEventCreate(_event_pool->handle(), &event_desc, &_handle));
 }
 void Event::AppendSignalEvent(CommandList& command_list) const {
     THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendSignalEvent",
diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp
index 5d023fe9d0bee6..e4a49ce9b7ccdb 100644
--- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp
+++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp
@@ -19,6 +19,12 @@ INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTest,
                                             ::testing::ValuesIn(configsInferRequestRunTests)),
                          InferRequestRunTests::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest,
+                         RunSeqTests,
+                         ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU),
+                                            ::testing::ValuesIn(configsInferRequestRunTests)),
+                         InferRequestRunTests::getTestCaseName);
+
 const std::vector<ov::AnyMap> batchingConfigs = {
     {ov::log::level(ov::log::Level::WARNING), ov::intel_npu::batch_mode(ov::intel_npu::BatchMode::PLUGIN)},
     {ov::log::level(ov::log::Level::WARNING), ov::intel_npu::batch_mode(ov::intel_npu::BatchMode::COMPILER)},
@@ -29,3 +35,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest,
                          ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU),
                                             ::testing::ValuesIn(batchingConfigs)),
                          InferRequestRunTests::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest,
+                         BatchingRunSeqTests,
+                         ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU),
+                                            ::testing::ValuesIn(batchingConfigs)),
+                         InferRequestRunTests::getTestCaseName);
diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp
index 20be5ed25edd27..07466677b9d547 100644
--- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp
+++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp
@@ -103,9 +103,7 @@ class InferRequestRunTests : public ov::test::behavior::OVPluginTestBase,
         APIBaseTest::TearDown();
     }
 
-    std::shared_ptr<ov::Model> createBatchingModel(element::Type type,
-                                                   const PartialShape& shape,
-                                                   const ov::Layout& layout) {
+    std::shared_ptr<ov::Model> createModel(element::Type type, const PartialShape& shape, const ov::Layout& layout) {
         ResultVector res;
         ParameterVector params;
 
@@ -352,7 +350,7 @@ TEST_P(BatchingRunTests, CheckBatchingSupportInfer) {
 
     ov::InferRequest inference_request;
     auto batch_shape = Shape{4, 2, 32, 32};
-    std::shared_ptr<ov::Model> ov_model_batch = createBatchingModel(element::f32, batch_shape, "N...");
+    std::shared_ptr<ov::Model> ov_model_batch = createModel(element::f32, batch_shape, "N...");
 
     OV_ASSERT_NO_THROW(compiled_model = core->compile_model(ov_model_batch, target_device, configuration));
     OV_ASSERT_NO_THROW(inference_request = compiled_model.create_infer_request());
@@ -365,7 +363,7 @@ TEST_P(BatchingRunTests, CheckBatchingSupportAsync) {
 
     ov::InferRequest inference_request;
     auto batch_shape = Shape{4, 2, 32, 32};
-    std::shared_ptr<ov::Model> ov_model_batch = createBatchingModel(element::f32, batch_shape, "N...");
+    std::shared_ptr<ov::Model> ov_model_batch = createModel(element::f32, batch_shape, "N...");
 
     OV_ASSERT_NO_THROW(compiled_model = core->compile_model(ov_model_batch, target_device, configuration));
     OV_ASSERT_NO_THROW(inference_request = compiled_model.create_infer_request());
@@ -396,7 +394,7 @@ TEST_P(BatchingRunTests, UseCompilerBatchingErrorPluginBatching) {
 TEST_P(BatchingRunTests, SetInputTensorInfer) {
     auto batch_shape = Shape{4, 2, 2, 2};
     auto shape_size = ov::shape_size(batch_shape);
-    auto model = createBatchingModel(element::f32, batch_shape, "N...");
+    auto model = createModel(element::f32, batch_shape, "N...");
     float* buffer = new float[shape_size];
 
     compiled_model = core->compile_model(model, target_device, configuration);
@@ -422,7 +420,7 @@ TEST_P(BatchingRunTests, SetInputTensorInfer) {
 TEST_P(BatchingRunTests, SetInputTensorAsync) {
     auto batch_shape = Shape{4, 2, 2, 2};
     auto shape_size = ov::shape_size(batch_shape);
-    auto model = createBatchingModel(element::f32, batch_shape, "N...");
+    auto model = createModel(element::f32, batch_shape, "N...");
     float* buffer = new float[shape_size];
 
     compiled_model = core->compile_model(model, target_device, configuration);
@@ -449,7 +447,7 @@ TEST_P(BatchingRunTests, SetInputTensorAsync) {
 TEST_P(BatchingRunTests, SetInputTensorInfer_Caching) {
     auto batch_shape = Shape{4, 2, 2, 2};
     auto shape_size = ov::shape_size(batch_shape);
-    auto model = createBatchingModel(element::f32, batch_shape, "N...");
+    auto model = createModel(element::f32, batch_shape, "N...");
     float* buffer = new float[shape_size];
 
     m_cache_dir = generateCacheDirName(GetTestName());
@@ -480,7 +478,7 @@ TEST_P(BatchingRunTests, SetInputTensorInfer_Caching) {
 TEST_P(BatchingRunTests, CheckTwoRunsInfer) {
     auto batch_shape = Shape{4, 2, 2, 2};
     auto shape_size = ov::shape_size(batch_shape);
-    auto model = createBatchingModel(element::f32, batch_shape, "N...");
+    auto model = createModel(element::f32, batch_shape, "N...");
     float* buffer = new float[shape_size];
 
     auto context = core->get_default_context(target_device);
@@ -524,6 +522,250 @@ TEST_P(BatchingRunTests, CheckTwoRunsInfer) {
     delete[] buffer;
 }
 
+using RunSeqTests = InferRequestRunTests;
+
+TEST_P(RunSeqTests, CheckMultipleRunsSeq0) {
+    auto shape = Shape{1, 64, 64, 256};
+    auto shape_size = ov::shape_size(shape);
+    auto model = createModel(element::f32, shape, "N...");
+
+    auto context = core->get_default_context(target_device);
+
+    configuration[ov::intel_npu::run_inferences_sequentially.name()] = true;
+    configuration[ov::intel_npu::tiles.name()] = 2;
+    compiled_model = core->compile_model(model, target_device, configuration);
+
+    const uint32_t inferences = 32;
+    std::array<ov::InferRequest, inferences> inference_request;
+    ov::Tensor input_tensor;
+    std::array<ov::Tensor, inferences> output_tensor;
+
+    input_tensor = context.create_host_tensor(ov::element::f32, shape);
+    for (uint32_t i = 0; i < inferences; i++) {
+        inference_request[i] = compiled_model.create_infer_request();
+        output_tensor[i] = context.create_host_tensor(ov::element::f32, shape);
+    }
+
+    inference_request[0].set_input_tensor(input_tensor);
+    inference_request[0].set_output_tensor(output_tensor[0]);
+
+    const uint32_t runs = 10;
+    for (uint32_t z = 0; z < runs; z++) {
+        auto* input_data = reinterpret_cast<float*>(input_tensor.data());
+        for (size_t i = 0; i < shape_size; ++i) {
+            input_data[i] = static_cast<float>(z);
+        }
+
+        inference_request[0].start_async();  // Adds '1' to each element
+
+        for (uint32_t i = 1; i < inferences; i++) {
+            inference_request[i].set_input_tensor(output_tensor[i - 1]);
+            inference_request[i].set_output_tensor(output_tensor[i]);
+
+            inference_request[i].start_async();  // Adds '1' to each element
+        }
+
+        inference_request[inferences - 1].wait();
+
+        float expected_result = static_cast<float>(z) + 1.f;
+
+        for (uint32_t i = 0; i < inferences; i++) {
+            auto* output_tensor_data = reinterpret_cast<float*>(output_tensor[i].data());
+            for (size_t j = 0; j < shape_size; ++j) {
+                EXPECT_NEAR(output_tensor_data[j], expected_result, 1e-5)
+                    << "Run=" << z << "Output=" << i << " Expected=" << expected_result
+                    << ", actual=" << output_tensor_data[j] << " for index " << j;
+            }
+            expected_result++;
+        }
+    }
+}
+
+TEST_P(RunSeqTests, CheckMultipleRunsSeq1) {
+    auto shape = Shape{1, 64, 64, 256};
+    auto shape_size = ov::shape_size(shape);
+    auto model = createModel(element::f32, shape, "N...");
+
+    auto context = core->get_default_context(target_device);
+
+    configuration[ov::intel_npu::run_inferences_sequentially.name()] = true;
+    configuration[ov::intel_npu::tiles.name()] = 2;
+    compiled_model = core->compile_model(model, target_device, configuration);
+
+    const int inferences = 32;
+    std::array<ov::InferRequest, inferences> inference_request;
+    ov::Tensor input_tensor;
+    std::array<ov::Tensor, inferences> output_tensor;
+
+    input_tensor = context.create_host_tensor(ov::element::f32, shape);
+
+    for (int i = 0; i < inferences; i++) {
+        inference_request[i] = compiled_model.create_infer_request();
+        output_tensor[i] = context.create_host_tensor(ov::element::f32, shape);
+    }
+
+    inference_request[inferences - 1].set_input_tensor(input_tensor);
+    inference_request[inferences - 1].set_output_tensor(output_tensor[inferences - 1]);
+
+    const int runs = 10;
+    for (int z = 0; z < runs; z++) {
+        auto* input_data = reinterpret_cast<float*>(input_tensor.data());
+        for (size_t i = 0; i < shape_size; ++i) {
+            input_data[i] = static_cast<float>(z);
+        }
+
+        inference_request[inferences - 1].start_async();  // Adds '1' to each element
+
+        for (int i = inferences - 2; i >= 0; i--) {
+            inference_request[i].set_input_tensor(output_tensor[i + 1]);
+            inference_request[i].set_output_tensor(output_tensor[i]);
+
+            inference_request[i].start_async();  // Adds '1' to each element
+        }
+
+        inference_request[0].wait();
+
+        float expected_result = static_cast<float>(z) + 1.f;
+
+        for (int i = inferences - 1; i >= 0; i--) {
+            auto* output_tensor_data = reinterpret_cast<float*>(output_tensor[i].data());
+            for (size_t j = 0; j < shape_size; ++j) {
+                EXPECT_NEAR(output_tensor_data[j], expected_result, 1e-5)
+                    << "Run=" << z << "Output=" << i << " Expected=" << expected_result
+                    << ", actual=" << output_tensor_data[j] << " for index " << j;
+            }
+            expected_result++;
+        }
+    }
+}
+
+TEST_P(RunSeqTests, CheckMultipleRunsSeq2) {
+    auto shape = Shape{1, 64, 64, 256};
+    auto shape_size = ov::shape_size(shape);
+    auto model = createModel(element::f32, shape, "N...");
+
+    auto context = core->get_default_context(target_device);
+
+    configuration[ov::intel_npu::run_inferences_sequentially.name()] = true;
+    configuration[ov::intel_npu::tiles.name()] = 2;
+    compiled_model = core->compile_model(model, target_device, configuration);
+
+    const int inferences = 32;
+    std::array<ov::InferRequest, inferences> inference_request;
+    ov::Tensor input_tensor;
+    std::array<ov::Tensor, inferences> output_tensor;
+
+    input_tensor = context.create_host_tensor(ov::element::f32, shape);
+
+    for (int i = 0; i < inferences; i++) {
+        inference_request[i] = compiled_model.create_infer_request();
+        output_tensor[i] = context.create_host_tensor(ov::element::f32, shape);
+    }
+
+    inference_request[inferences - 1].set_input_tensor(input_tensor);
+    inference_request[inferences - 1].set_output_tensor(output_tensor[inferences - 1]);
+
+    auto* input_data = reinterpret_cast<float*>(input_tensor.data());
+    for (size_t i = 0; i < shape_size; ++i) {
+        input_data[i] = 1.f;
+    }
+
+    inference_request[inferences - 1].start_async();
+
+    for (int i = inferences - 2; i >= 0; i--) {
+        inference_request[i].set_input_tensor(output_tensor[i + 1]);
+        inference_request[i].set_output_tensor(output_tensor[i]);
+
+        inference_request[i].start_async();
+    }
+
+    inference_request[0].wait();
+
+    try {
+        inference_request[5].start_async();
+        inference_request[5].wait();
+    } catch (const std::exception& ex) {
+        ASSERT_FALSE(false) << ex.what();
+        return;
+    }
+
+    ASSERT_FALSE(true) << "Exception is expected but it didn't throw any exception!";
+}
+
+TEST_P(RunSeqTests, CheckMultipleRunsSeq3) {
+    auto shape = Shape{1, 64, 64, 256};
+    auto model = createModel(element::f32, shape, "N...");
+
+    configuration[ov::intel_npu::run_inferences_sequentially.name()] = true;
+    configuration[ov::intel_npu::tiles.name()] = 2;
+    compiled_model = core->compile_model(model, target_device, configuration);
+    ov::InferRequest inference_request;
+    inference_request = compiled_model.create_infer_request();
+
+    OV_EXPECT_THROW(inference_request.infer(),
+                    ov::Exception,
+                    HasSubstr("Only start async is supported when RUN_INFERENCES_SEQUENTIALLY is enabled!"));
+}
+
+using BatchingRunSeqTests = InferRequestRunTests;
+
+TEST_P(BatchingRunSeqTests, CheckMultipleBatchingRunsSeq) {
+    auto shape = Shape{4, 2, 64, 64};
+    auto shape_size = ov::shape_size(shape);
+    auto model = createModel(element::f32, shape, "N...");
+
+    auto context = core->get_default_context(target_device);
+
+    configuration[ov::intel_npu::run_inferences_sequentially.name()] = true;
+    configuration[ov::intel_npu::tiles.name()] = 2;
+    compiled_model = core->compile_model(model, target_device, configuration);
+
+    const uint32_t inferences = 32;
+    std::array<ov::InferRequest, inferences> inference_request;
+    ov::Tensor input_tensor;
+    std::array<ov::Tensor, inferences> output_tensor;
+
+    input_tensor = context.create_host_tensor(ov::element::f32, shape);
+    for (uint32_t i = 0; i < inferences; i++) {
+        inference_request[i] = compiled_model.create_infer_request();
+        output_tensor[i] = context.create_host_tensor(ov::element::f32, shape);
+    }
+
+    inference_request[0].set_input_tensor(input_tensor);
+    inference_request[0].set_output_tensor(output_tensor[0]);
+
+    const uint32_t runs = 10;
+    for (uint32_t z = 0; z < runs; z++) {
+        auto* input_data = reinterpret_cast<float*>(input_tensor.data());
+        for (size_t i = 0; i < shape_size; ++i) {
+            input_data[i] = static_cast<float>(z);
+        }
+
+        inference_request[0].start_async();  // Adds '1' to each element
+
+        for (uint32_t i = 1; i < inferences; i++) {
+            inference_request[i].set_input_tensor(output_tensor[i - 1]);
+            inference_request[i].set_output_tensor(output_tensor[i]);
+
+            inference_request[i].start_async();  // Adds '1' to each element
+        }
+
+        inference_request[inferences - 1].wait();
+
+        float expected_result = static_cast<float>(z) + 1.f;
+
+        for (uint32_t i = 0; i < inferences; i++) {
+            auto* output_tensor_data = reinterpret_cast<float*>(output_tensor[i].data());
+            for (size_t j = 0; j < shape_size; ++j) {
+                EXPECT_NEAR(output_tensor_data[j], expected_result, 1e-5)
+                    << "Run=" << z << "Output=" << i << " Expected=" << expected_result
+                    << ", actual=" << output_tensor_data[j] << " for index " << j;
+            }
+            expected_result++;
+        }
+    }
+}
+
 }  // namespace behavior
 }  // namespace test
 }  // namespace ov