diff --git a/docs/articles_en/about-openvino/performance-benchmarks.rst b/docs/articles_en/about-openvino/performance-benchmarks.rst
index 8a58dc27df1f83..78a364c18ca4e6 100644
--- a/docs/articles_en/about-openvino/performance-benchmarks.rst
+++ b/docs/articles_en/about-openvino/performance-benchmarks.rst
@@ -64,7 +64,7 @@ implemented in your solutions. Click the buttons below to see the chosen benchma
          :outline:
          :expand:
 
-         :material-regular:`bar_chart;1.4em` OVMS for GenAI (coming soon)
+         :material-regular:`bar_chart;1.4em` OVMS for GenAI
 
 
 
diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst
index ebd4667d544616..f18b66915fc3ce 100644
--- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst
+++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst
@@ -218,6 +218,114 @@ Specify generation_config to use grouped beam search:
             cout << pipe.generate("The Sun is yellow because", config);
          }
 
+Efficient Text Generation via Speculative Decoding
+##################################################
+
+Speculative decoding (or assisted-generation) enables faster token generation
+when an additional smaller draft model is used alongside the main model.
+The draft model predicts the next K tokens one by one in an autoregressive manner,
+while the main model validates these predictions and corrects them if necessary.
+
+Each predicted token is compared, and when there is a difference between the draft and
+main model, the last token predicted by the main model is kept. Then, the draft
+model acquires this token and tries prediction of the next K tokens,
+thus repeating the cycle.
+
+This method eliminates the need for multiple infer requests to the main model,
+which results in increased performance. Its implementation in the pipeline is
+shown in the code samples below:
+
+.. tab-set::
+
+   .. tab-item:: Python
+      :sync: py
+
+      .. code-block:: python
+
+         import openvino_genai
+         import queue
+         import threading
+
+         def streamer(subword):
+                 print(subword, end='', flush=True)
+                 return False
+
+         def infer(model_dir: str, draft_model_dir: str, prompt: str):
+             main_device = 'CPU'  # GPU can be used as well.
+             draft_device = 'CPU'
+
+             scheduler_config = openvino_genai.SchedulerConfig()
+             scheduler_config.cache_size = 2
+
+             draft_model = openvino_genai.draft_model(draft_model_dir, draft_device)
+
+             pipe = openvino_genai.LLMPipeline(model_dir, main_device, scheduler_config=scheduler_config, draft_model=draft_model)
+
+             config = openvino_genai.GenerationConfig()
+             config.max_new_tokens = 100
+             config.num_assistant_tokens = 5
+
+             pipe.generate(prompt, config, streamer)
+
+
+      For more information, refer to the
+      `Python sample <https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/speculative_decoding_lm/>`__.
+
+
+   .. tab-item:: C++
+      :sync: cpp
+
+      .. code-block:: cpp
+
+         #include <openvino/openvino.hpp>
+
+         #include "openvino/genai/llm_pipeline.hpp"
+
+         int main(int argc, char* argv[]) try {
+             if (4 != argc) {
+                 throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> <DRAFT_MODEL_DIR> '<PROMPT>'");
+             }
+
+             ov::genai::GenerationConfig config;
+             config.max_new_tokens = 100;
+             config.num_assistant_tokens = 5;
+
+             std::string main_model_path = argv[1];
+             std::string draft_model_path = argv[2];
+             std::string prompt = argv[3];
+
+             std::string main_device = "CPU", draft_device = "CPU";
+
+             ov::genai::SchedulerConfig scheduler_config;
+             scheduler_config.cache_size = 5;
+
+             ov::genai::LLMPipeline pipe(
+                 main_model_path,
+                 main_device,
+                 ov::genai::draft_model(draft_model_path, draft_device),
+                 ov::genai::scheduler_config(scheduler_config));
+
+             auto streamer = [](std::string subword) {
+                 std::cout << subword << std::flush;
+                 return false;
+             };
+
+             pipe.generate(prompt, config, streamer);
+         } catch (const std::exception& error) {
+             try {
+                 std::cerr << error.what() << '\n';
+             } catch (const std::ios_base::failure&) {}
+             return EXIT_FAILURE;
+         } catch (...) {
+             try {
+                 std::cerr << "Non-exception object thrown\n";
+             } catch (const std::ios_base::failure&) {}
+             return EXIT_FAILURE;
+         }
+
+
+      For more information, refer to the
+      `C++ sample <https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/speculative_decoding_lm/>`__
 
 Comparing with Hugging Face Results
 #######################################
diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
index 6033bd8ed96106..245a2648aab491 100644
--- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
+++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide/genai-use-cases.rst
@@ -118,7 +118,7 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
                    image_write("baseline.bmp", image)
 
       For more information, refer to the
-      `Python sample <https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/text2image/README.md>`__
+      `Python sample <https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/text2image/>`__
 
    .. tab-item:: C++
       :sync: cpp
@@ -218,7 +218,7 @@ sample shows basic usage of the ``Text2ImagePipeline`` pipeline.
 
 
       For more information, refer to the
-      `C++ sample <https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/cpp/text2image/README.md>`__
+      `C++ sample <https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/text2image/>`__
 
 
 
@@ -269,7 +269,7 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input.
 
 
       For more information, refer to the
-      `Python sample <https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/whisper_speech_recognition/README.md>`__.
+      `Python sample <https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/whisper_speech_recognition/>`__.
 
    .. tab-item:: C++
       :sync: cpp
@@ -322,7 +322,7 @@ and use audio files in WAV format at a sampling rate of 16 kHz as input.
 
 
       For more information, refer to the
-      `C++ sample <https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/cpp/whisper_speech_recognition/README.md>`__.
+      `C++ sample <https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/whisper_speech_recognition/>`__.
 
 
 Using GenAI in Chat Scenario
@@ -367,7 +367,7 @@ mark a conversation session, as shown in the samples below:
 
 
       For more information, refer to the
-      `Python sample <https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/python/chat_sample/README.md>`__.
+      `Python sample <https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/chat_sample/>`__.
 
    .. tab-item:: C++
       :sync: cpp
@@ -415,7 +415,142 @@ mark a conversation session, as shown in the samples below:
 
 
       For more information, refer to the
-      `C++ sample <https://github.com/openvinotoolkit/openvino.genai/blob/master/samples/cpp/chat_sample/README.md>`__
+      `C++ sample <https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/chat_sample/>`__
+
+
+Using GenAI with Vision Language Models
+#######################################
+
+OpenVINO GenAI introduces the ``openvino_genai.VLMPipeline`` pipeline for
+inference of multimodal text-generation Vision Language Models (VLMs).
+With a text prompt and an image as input, VLMPipeline can generate text using
+models such as LLava or MiniCPM-V. See the chat scenario presented
+in the samples below:
+
+.. tab-set::
+
+   .. tab-item:: Python
+      :sync: py
+
+      .. code-block:: python
+
+         import numpy as np
+         import openvino_genai
+         from PIL import Image
+         from openvino import Tensor
+         from pathlib import Path
+
+
+         def streamer(subword: str) -> bool:
+             print(subword, end='', flush=True)
+
+
+         def read_image(path: str) -> Tensor:
+             pic = Image.open(path).convert("RGB")
+             image_data = np.array(pic.getdata()).reshape(1, pic.size[1], pic.size[0], 3).astype(np.uint8)
+             return Tensor(image_data)
+
+
+         def read_images(path: str) -> list[Tensor]:
+             entry = Path(path)
+             if entry.is_dir():
+                 return [read_image(str(file)) for file in sorted(entry.iterdir())]
+             return [read_image(path)]
+
+
+         def infer(model_dir: str, image_dir: str):
+             rgbs = read_images(image_dir)
+             device = 'CPU'  # GPU can be used as well.
+             enable_compile_cache = dict()
+             if "GPU" == device:
+                 enable_compile_cache["CACHE_DIR"] = "vlm_cache"
+             pipe = openvino_genai.VLMPipeline(model_dir, device, **enable_compile_cache)
+
+             config = openvino_genai.GenerationConfig()
+             config.max_new_tokens = 100
+
+             pipe.start_chat()
+             prompt = input('question:\n')
+             pipe.generate(prompt, images=rgbs, generation_config=config, streamer=streamer)
+
+             while True:
+                 try:
+                     prompt = input("\n----------\n"
+                         "question:\n")
+                 except EOFError:
+                     break
+                 pipe.generate(prompt, generation_config=config, streamer=streamer)
+             pipe.finish_chat()
+
+
+      For more information, refer to the
+      `Python sample <https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/python/visual_language_chat>`__.
+
+   .. tab-item:: C++
+      :sync: cpp
+
+      .. code-block:: cpp
+
+         #include "load_image.hpp"
+         #include <openvino/genai/visual_language/pipeline.hpp>
+         #include <filesystem>
+
+         bool print_subword(std::string&& subword) {
+             return !(std::cout << subword << std::flush);
+         }
+
+         int main(int argc, char* argv[]) try {
+             if (3 != argc) {
+                 throw std::runtime_error(std::string{"Usage "} + argv[0] + " <MODEL_DIR> <IMAGE_FILE OR DIR_WITH_IMAGES>");
+             }
+
+             std::vector<ov::Tensor> rgbs = utils::load_images(argv[2]);
+
+             std::string device = "CPU";  // GPU can be used as well.
+             ov::AnyMap enable_compile_cache;
+             if ("GPU" == device) {
+                 enable_compile_cache.insert({ov::cache_dir("vlm_cache")});
+             }
+             ov::genai::VLMPipeline pipe(argv[1], device, enable_compile_cache);
+
+             ov::genai::GenerationConfig generation_config;
+             generation_config.max_new_tokens = 100;
+
+             std::string prompt;
+
+             pipe.start_chat();
+             std::cout << "question:\n";
+
+             std::getline(std::cin, prompt);
+             pipe.generate(prompt,
+                           ov::genai::images(rgbs),
+                           ov::genai::generation_config(generation_config),
+                           ov::genai::streamer(print_subword));
+             std::cout << "\n----------\n"
+                 "question:\n";
+             while (std::getline(std::cin, prompt)) {
+                 pipe.generate(prompt,
+                               ov::genai::generation_config(generation_config),
+                               ov::genai::streamer(print_subword));
+                 std::cout << "\n----------\n"
+                     "question:\n";
+             }
+             pipe.finish_chat();
+         } catch (const std::exception& error) {
+             try {
+                 std::cerr << error.what() << '\n';
+             } catch (const std::ios_base::failure&) {}
+             return EXIT_FAILURE;
+         } catch (...) {
+             try {
+                 std::cerr << "Non-exception object thrown\n";
+             } catch (const std::ios_base::failure&) {}
+             return EXIT_FAILURE;
+         }
+
+
+      For more information, refer to the
+      `C++ sample <https://github.com/openvinotoolkit/openvino.genai/tree/master/samples/cpp/visual_language_chat/>`__
 
 Additional Resources
 #####################
@@ -423,4 +558,6 @@ Additional Resources
 * :doc:`Install OpenVINO GenAI <../../../get-started/install-openvino/install-openvino-genai>`
 * `OpenVINO GenAI Repo <https://github.com/openvinotoolkit/openvino.genai>`__
 * `OpenVINO GenAI Samples <https://github.com/openvinotoolkit/openvino.genai/tree/master/samples>`__
+* A Jupyter notebook demonstrating
+  `Visual-language assistant with MiniCPM-V2 and OpenVINO <https://github.com/openvinotoolkit/openvino_notebooks/tree/latest/notebooks/minicpm-v-multimodal-chatbot>`__
 * `OpenVINO Tokenizers <https://github.com/openvinotoolkit/openvino_tokenizers>`__
diff --git a/docs/sphinx_setup/_static/benchmarks_files/data/graph-data-ovms-genai.json b/docs/sphinx_setup/_static/benchmarks_files/data/graph-data-ovms-genai.json
index f96fb11e6b029d..0d53c3813542d2 100644
--- a/docs/sphinx_setup/_static/benchmarks_files/data/graph-data-ovms-genai.json
+++ b/docs/sphinx_setup/_static/benchmarks_files/data/graph-data-ovms-genai.json
@@ -1,45 +1,330 @@
 [
+    {
+        "Platform": "Intel® Xeon® Platinum 8380",
+        "Model": "meta-llama/Llama-2-7b-chat-hf",
+        "featured_SKU": false,
+        "whats_new_model": false,
+        "PlatformType": "Server Platforms (Intel® Xeon®)",
+        "Parameters": {
+            "OpenVINO Model Server": {
+                "Precisions": [
+                    {
+                        "Throughput": {
+                            "0.2": 94.97,
+                            "0.4": 187.12,
+                            "0.6": 271.85,
+                            "0.8": 290.81,
+                            "1.0": 291.39,
+                            "2.0": 291.45,
+                            "inf": 291.59
+                        },
+                        "Latency": {
+                            "0.2": 74.35,
+                            "0.4": 122.25,
+                            "0.6": 467.49,
+                            "0.8": 749.39,
+                            "1.0": 771.39,
+                            "2.0": 773.31,
+                            "inf": 783.63
+                        }
+                    }
+                ]
+            },
+            "vLLM with OpenVINO backend": {
+                "Precisions": [
+                    {
+                        "Throughput": {
+                            "0.2": 94.83,
+                            "0.4": 187.83,
+                            "0.6": 272.32,
+                            "0.8": 284.07,
+                            "1.0": 291.88,
+                            "2.0": 291.91,
+                            "inf": 288.62
+                        },
+                        "Latency": {
+                            "0.2": 82.31,
+                            "0.4": 134.38,
+                            "0.6": 495.99,
+                            "0.8": 794.41,
+                            "1.0": 798.39,
+                            "2.0": 800.33,
+                            "inf": 809.56
+                        }
+                    }
+                ]
+            }
+        }
+    },
+    {
+        "Platform": "Intel® Xeon® Platinum 8480+",
+        "Model": "meta-llama/Llama-2-7b-chat-hf",
+        "featured_SKU": true,
+        "whats_new_model": false,
+        "PlatformType": "Server Platforms (Intel® Xeon®)",
+        "Parameters": {
+            "OpenVINO Model Server": {
+                "Precisions": [
+                    {
+                        "Throughput": {
+                            "0.2": 95.15,
+                            "0.4": 188.31,
+                            "0.6": 279.3,
+                            "0.8": 366.78,
+                            "1.0": 454.27,
+                            "2.0": 788.9,
+                            "inf": 825.97
+                        },
+                        "Latency": {
+                            "0.2": 60.88,
+                            "0.4": 71.96,
+                            "0.6": 83.45,
+                            "0.8": 103.77,
+                            "1.0": 128.12,
+                            "2.0": 237.62,
+                            "inf": 253.59
+                        }
+                    }
+                ]
+            },
+            "vLLM with OpenVINO backend": {
+                "Precisions": [
+                    {
+                        "Throughput": {
+                            "0.2": 95.06,
+                            "0.4": 188.47,
+                            "0.6": 280.54,
+                            "0.8": 367.47,
+                            "1.0": 450.81,
+                            "2.0": 774.57,
+                            "inf": 793.78
+                        },
+                        "Latency": {
+                            "0.2": 63.84,
+                            "0.4": 76.22,
+                            "0.6": 87.21,
+                            "0.8": 104.75,
+                            "1.0": 136.77,
+                            "2.0": 259.2,
+                            "inf": 273.58
+                        }
+                    }
+                ]
+            }
+        }
+    },
     {
         "Platform": "Intel® Xeon® Platinum 8580",
-        "Model": "mistralai/Mistral-7B-v0.1",
-        "PlatformType": "None",
+        "Model": "meta-llama/Llama-2-7b-chat-hf",
+        "featured_SKU": true,
+        "whats_new_model": false,
+        "PlatformType": "Server Platforms (Intel® Xeon®)",
         "Parameters": {
-            "Vllm": {
+            "OpenVINO Model Server": {
                 "Precisions": [
                     {
                         "Throughput": {
-                            "0.2": "350.06",
-                            "0.6": "486.89",
-                            "0.8": "575.92",
-                            "2.0": "778.07"
+                            "0.2": 95.29,
+                            "0.4": 188.33,
+                            "0.6": 280.09,
+                            "0.8": 367.29,
+                            "1.0": 453.21,
+                            "2.0": 780.05,
+                            "inf": 751.34
+                        },
+                        "Latency": {
+                            "0.2": 52.44,
+                            "0.4": 70.06,
+                            "0.6": 84.54,
+                            "0.8": 108.91,
+                            "1.0": 136.45,
+                            "2.0": 253.55,
+                            "inf": 281.85
                         }
-                    },
+                    }
+                ]
+            },
+            "vLLM with OpenVINO backend": {
+                "Precisions": [
+                    {
+                        "Throughput": {
+                            "0.2": 95.0,
+                            "0.4": 188.26,
+                            "0.6": 279.78,
+                            "0.8": 366.69,
+                            "1.0": 450.26,
+                            "2.0": 770.74,
+                            "inf": 794.39
+                        },
+                        "Latency": {
+                            "0.2": 58.07,
+                            "0.4": 77.65,
+                            "0.6": 91.14,
+                            "0.8": 113.61,
+                            "1.0": 144.21,
+                            "2.0": 269.13,
+                            "inf": 273.27
+                        }
+                    }
+                ]
+            }
+        }
+    },
+    {
+        "Platform": "Intel® Xeon® Platinum 8380",
+        "Model": "meta-llama/Meta-Llama-3-8B-Instruct",
+        "featured_SKU": false,
+        "whats_new_model": true,
+        "PlatformType": "Server Platforms (Intel® Xeon®)",
+        "Parameters": {
+            "OpenVINO Model Server": {
+                "Precisions": [
                     {
+                        "Throughput": {
+                            "0.2": 82.46,
+                            "0.4": 162.73,
+                            "0.6": 240.08,
+                            "0.8": 273.75,
+                            "1.0": 275.85,
+                            "2.0": 276.3,
+                            "inf": 275.15
+                        },
                         "Latency": {
-                            "0.2": "60.93",
-                            "0.6": "91.63",
-                            "0.8": "113.61",
-                            "2.0": "240.25"
+                            "0.2": 76.49,
+                            "0.4": 122.1,
+                            "0.6": 318.14,
+                            "0.8": 785.8,
+                            "1.0": 805.58,
+                            "2.0": 809.37,
+                            "inf": 816.2
                         }
                     }
                 ]
             },
-            "Ovms": {
+            "vLLM with OpenVINO backend": {
                 "Precisions": [
                     {
                         "Throughput": {
-                            "0.2": "90.98",
-                            "0.6": "266.24",
-                            "0.8": "351.63",
-                            "2.0": "195.16"
+                            "0.2": 82.32,
+                            "0.4": 162.98,
+                            "0.6": 239.28,
+                            "2.0": 270.37
+                        },
+                        "Latency": {
+                            "0.2": 87.92,
+                            "0.4": 142.3,
+                            "0.6": 343.36,
+                            "2.0": 873.0
                         }
-                    },
+                    }
+                ]
+            }
+        }
+    },
+    {
+        "Platform": "Intel® Xeon® Platinum 8480+",
+        "Model": "meta-llama/Meta-Llama-3-8B-Instruct",
+        "featured_SKU": true,
+        "whats_new_model": true,
+        "PlatformType": "Server Platforms (Intel® Xeon®)",
+        "Parameters": {
+            "OpenVINO Model Server": {
+                "Precisions": [
                     {
+                        "Throughput": {
+                            "0.2": 82.61,
+                            "0.4": 164.44,
+                            "0.6": 244.92,
+                            "0.8": 323.34,
+                            "1.0": 400.78,
+                            "2.0": 731.9,
+                            "inf": 848.45
+                        },
                         "Latency": {
-                            "0.2": "54.9",
-                            "0.6": "78.78",
-                            "0.8": "95.78",
-                            "2.0": "352.23"
+                            "0.2": 60.77,
+                            "0.4": 69.1,
+                            "0.6": 74.36,
+                            "0.8": 81.41,
+                            "1.0": 100.17,
+                            "2.0": 206.5,
+                            "inf": 246.56
+                        }
+                    }
+                ]
+            },
+            "vLLM with OpenVINO backend": {
+                "Precisions": [
+                    {
+                        "Throughput": {
+                            "0.2": 82.54,
+                            "0.4": 163.66,
+                            "0.6": 243.88,
+                            "0.8": 322.75,
+                            "1.0": 400.46,
+                            "2.0": 727.1
+                        },
+                        "Latency": {
+                            "0.2": 65.37,
+                            "0.4": 75.87,
+                            "0.6": 81.14,
+                            "0.8": 93.91,
+                            "1.0": 107.13,
+                            "2.0": 229.57
+                        }
+                    }
+                ]
+            }
+        }
+    },
+    {
+        "Platform": "Intel® Xeon® Platinum 8580",
+        "Model": "meta-llama/Meta-Llama-3-8B-Instruct",
+        "featured_SKU": true,
+        "whats_new_model": true,
+        "PlatformType": "Server Platforms (Intel® Xeon®)",
+        "Parameters": {
+            "OpenVINO Model Server": {
+                "Precisions": [
+                    {
+                        "Throughput": {
+                            "0.2": 82.55,
+                            "0.4": 164.52,
+                            "0.6": 243.96,
+                            "0.8": 323.07,
+                            "1.0": 399.68,
+                            "2.0": 727.18,
+                            "inf": 856.72
+                        },
+                        "Latency": {
+                            "0.2": 54.57,
+                            "0.4": 69.17,
+                            "0.6": 80.32,
+                            "0.8": 92.94,
+                            "1.0": 111.06,
+                            "2.0": 215.46,
+                            "inf": 245.72
+                        }
+                    }
+                ]
+            },
+            "vLLM with OpenVINO backend": {
+                "Precisions": [
+                    {
+                        "Throughput": {
+                            "0.2": 82.64,
+                            "0.6": 243.81,
+                            "0.8": 321.8,
+                            "1.0": 398.78,
+                            "2.0": 722.48,
+                            "inf": 792.34
+                        },
+                        "Latency": {
+                            "0.2": 61.49,
+                            "0.6": 90.54,
+                            "0.8": 106.25,
+                            "1.0": 123.6,
+                            "2.0": 245.91,
+                            "inf": 279.21
                         }
                     }
                 ]
@@ -47,46 +332,168 @@
         }
     },
     {
-        "Platform": "Intel® Xeon® Platinum 8530",
+        "Platform": "Intel® Xeon® Platinum 8380",
         "Model": "mistralai/Mistral-7B-v0.1",
-        "PlatformType": "None",
+        "featured_SKU": false,
+        "whats_new_model": false,
+        "PlatformType": "Server Platforms (Intel® Xeon®)",
         "Parameters": {
-            "Vllm": {
+            "OpenVINO Model Server": {
+                "Precisions": [
+                    {
+                        "Throughput": {
+                            "0.2": 91.74,
+                            "0.4": 180.4,
+                            "0.6": 262.97,
+                            "0.8": 287.36,
+                            "1.0": 289.08,
+                            "2.0": 289.06,
+                            "inf": 290.69
+                        },
+                        "Latency": {
+                            "0.2": 74.84,
+                            "0.4": 115.4,
+                            "0.6": 345.64,
+                            "0.8": 757.42,
+                            "1.0": 776.6,
+                            "2.0": 778.29,
+                            "inf": 784.42
+                        }
+                    }
+                ]
+            },
+            "vLLM with OpenVINO backend": {
                 "Precisions": [
                     {
                         "Throughput": {
-                            "0.2": "350.06",
-                            "0.6": "486.89",
-                            "0.8": "575.92",
-                            "2.0": "778.07"
+                            "0.2": 97.21,
+                            "0.4": 192.46,
+                            "0.6": 265.82,
+                            "0.8": 273.24,
+                            "1.0": 272.65,
+                            "inf": 274.0
+                        },
+                        "Latency": {
+                            "0.2": 166.77,
+                            "0.4": 161.76,
+                            "0.6": 666.89,
+                            "0.8": 802.15,
+                            "1.0": 810.26,
+                            "inf": 807.71
                         }
-                    },
+                    }
+                ]
+            }
+        }
+    },
+    {
+        "Platform": "Intel® Xeon® Platinum 8480+",
+        "Model": "mistralai/Mistral-7B-v0.1",
+        "featured_SKU": true,
+        "whats_new_model": false,
+        "PlatformType": "Server Platforms (Intel® Xeon®)",
+        "Parameters": {
+            "OpenVINO Model Server": {
+                "Precisions": [
                     {
+                        "Throughput": {
+                            "0.2": 90.95,
+                            "0.4": 181.06,
+                            "0.6": 267.29,
+                            "0.8": 351.62,
+                            "1.0": 431.45,
+                            "2.0": 751.85,
+                            "inf": 596.0
+                        },
                         "Latency": {
-                            "0.2": "60.93",
-                            "0.6": "91.63",
-                            "0.8": "113.61",
-                            "2.0": "240.25"
+                            "0.2": 59.95,
+                            "0.4": 63.41,
+                            "0.6": 73.42,
+                            "0.8": 85.99,
+                            "1.0": 98.67,
+                            "2.0": 205.2,
+                            "inf": 205.97
                         }
                     }
                 ]
             },
-            "Ovms": {
+            "vLLM with OpenVINO backend": {
+                "Precisions": [
+                    {
+                        "Throughput": {
+                            "0.2": 98.18,
+                            "0.4": 194.35,
+                            "0.6": 287.28,
+                            "0.8": 376.31,
+                            "1.0": 460.32,
+                            "2.0": 771.81,
+                            "inf": 789.38
+                        },
+                        "Latency": {
+                            "0.2": 64.88,
+                            "0.4": 73.3,
+                            "0.6": 84.37,
+                            "0.8": 100.8,
+                            "1.0": 133.98,
+                            "2.0": 240.99,
+                            "inf": 251.55
+                        }
+                    }
+                ]
+            }
+        }
+    },
+    {
+        "Platform": "Intel® Xeon® Platinum 8580",
+        "Model": "mistralai/Mistral-7B-v0.1",
+        "featured_SKU": true,
+        "whats_new_model": false,
+        "PlatformType": "Server Platforms (Intel® Xeon®)",
+        "Parameters": {
+            "OpenVINO Model Server": {
                 "Precisions": [
                     {
                         "Throughput": {
-                            "0.2": "90.98",
-                            "0.6": "266.24",
-                            "0.8": "351.63",
-                            "2.0": "195.16"
+                            "0.2": 91.2,
+                            "0.4": 180.14,
+                            "0.6": 267.75,
+                            "0.8": 351.12,
+                            "1.0": 428.31,
+                            "2.0": 744.99,
+                            "inf": 852.05
+                        },
+                        "Latency": {
+                            "0.2": 54.31,
+                            "0.4": 67.14,
+                            "0.6": 77.59,
+                            "0.8": 92.17,
+                            "1.0": 112.75,
+                            "2.0": 225.48,
+                            "inf": 241.49
                         }
-                    },
+                    }
+                ]
+            },
+            "vLLM with OpenVINO backend": {
+                "Precisions": [
                     {
+                        "Throughput": {
+                            "0.2": 98.1,
+                            "0.4": 194.47,
+                            "0.6": 286.97,
+                            "0.8": 375.84,
+                            "1.0": 460.21,
+                            "2.0": 764.54,
+                            "inf": 787.97
+                        },
                         "Latency": {
-                            "0.2": "54.9",
-                            "0.6": "78.78",
-                            "0.8": "95.78",
-                            "2.0": "352.23"
+                            "0.2": 62.26,
+                            "0.4": 78.08,
+                            "0.6": 91.61,
+                            "0.8": 116.71,
+                            "1.0": 141.76,
+                            "2.0": 250.38,
+                            "inf": 254.25
                         }
                     }
                 ]
diff --git a/src/inference/include/openvino/runtime/infer_request.hpp b/src/inference/include/openvino/runtime/infer_request.hpp
index ed4dcd67797b84..10a606a2b6c535 100644
--- a/src/inference/include/openvino/runtime/infer_request.hpp
+++ b/src/inference/include/openvino/runtime/infer_request.hpp
@@ -255,7 +255,7 @@ class OPENVINO_RUNTIME_API InferRequest {
     /**
      * @brief Infers specified input(s) in synchronous mode.
      * @note It blocks all methods of InferRequest while request is ongoing (running or waiting in a queue).
-     *       Calling any method leads to throwning the ov::Busy exception.
+     *       Calling any method leads to throwing the ov::Busy exception.
      */
     void infer();
 
@@ -274,7 +274,7 @@ class OPENVINO_RUNTIME_API InferRequest {
     /**
      * @brief Starts inference of specified input(s) in asynchronous mode.
      * @note It returns immediately. Inference starts also immediately.
-     *       Calling any method while the request in a running state leads to throwning the ov::Busy exception.
+     *       Calling any method while the request in a running state leads to throwing the ov::Busy exception.
      */
     void start_async();
 
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp
index 510ab7fc43b0c8..1fc3a3e20965c6 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp
@@ -270,4 +270,22 @@ struct BYPASS_UMD_CACHING final : OptionBase<BYPASS_UMD_CACHING, bool> {
         return OptionMode::RunTime;
     }
 };
+
+//
+// RUN_INFERENCES_SEQUENTIALLY
+//
+struct RUN_INFERENCES_SEQUENTIALLY final : OptionBase<RUN_INFERENCES_SEQUENTIALLY, bool> {
+    static std::string_view key() {
+        return ov::intel_npu::run_inferences_sequentially.name();
+    }
+
+    static bool defaultValue() {
+        return false;
+    }
+
+    static OptionMode mode() {
+        return OptionMode::RunTime;
+    }
+};
+
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp
index ec92e10a9f89c8..8aabd132e9431a 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp
@@ -327,5 +327,14 @@ static constexpr ov::Property<std::string, ov::PropertyMutability::RO> backend_n
  */
 static constexpr ov::Property<std::string> backend_compilation_params{"NPU_BACKEND_COMPILATION_PARAMS"};
 
+/**
+ * @brief [Only for NPU Plugin]
+ * Type: boolean, default is false.
+ * This option allows to run inferences sequentially, in the order in which they were created
+ * @note Experimental property, for now it only works in very specific scenarios. We need driver updates before we can
+ * implement a robust solution for in-order execution
+ */
+static constexpr ov::Property<bool> run_inferences_sequentially{"NPU_RUN_INFERENCES_SEQUENTIALLY"};
+
 }  // namespace intel_npu
 }  // namespace ov
diff --git a/src/plugins/intel_npu/src/al/src/config/runtime.cpp b/src/plugins/intel_npu/src/al/src/config/runtime.cpp
index 759956b6f597df..3da16796219332 100644
--- a/src/plugins/intel_npu/src/al/src/config/runtime.cpp
+++ b/src/plugins/intel_npu/src/al/src/config/runtime.cpp
@@ -27,6 +27,7 @@ void intel_npu::registerRunTimeOptions(OptionsDesc& desc) {
     desc.add<WORKLOAD_TYPE>();
     desc.add<TURBO>();
     desc.add<BYPASS_UMD_CACHING>();
+    desc.add<RUN_INFERENCES_SEQUENTIALLY>();
 }
 
 // Heuristically obtained number. Varies depending on the values of PLATFORM and PERFORMANCE_HINT
diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
index 3efbdab666d1ba..1e8781b0afe820 100644
--- a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
+++ b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
@@ -38,25 +38,6 @@ class ZeroInferRequest final : public SyncInferRequest {
     std::vector<ov::ProfilingInfo> get_profiling_info() const override;
     std::vector<uint8_t> get_raw_profiling_data() const;
 
-    /**
-     * @brief Determines if batching can be addressed inside the plugin. In the positive case, the batch size used by
-     * the model will also be deduced and returned.
-     * @details Batching can be handled by the plugin only if:
-     *  - The batch axis is the first axis.
-     *  - The batch size received by the compiler takes the default value of 1.
-     *  - The batch size found in the IR model matches for all inputs/outputs and takes a value different than the
-     * default one.
-     *
-     * If any of the previous conditions is not fulfilled, the functon will return the default batch size, thus no
-     * custom algorithm will be applied inside the plugin in order to address batching.
-     *
-     * @param metadata Metadata containing the shape values as seen by both the compiler and IR model. These will
-     * ultimately be used for determining the batch size.
-     * @returns The batch size deduced by the algorithm or the default value of 1 if batching cannot be performed inside
-     * the plugin.
-     */
-    std::optional<size_t> get_batch_size(const NetworkMetadata& metadata);
-
     /**
      * @brief Check the received tensor and set the Level Zero tensor accordingly
      * @param tensor Reference to a tensor.
@@ -106,22 +87,6 @@ class ZeroInferRequest final : public SyncInferRequest {
     std::shared_ptr<zeroProfiling::NpuInferProfiling> _npuProfiling;
     std::unique_ptr<Pipeline> _pipeline;
 
-    /**
-     * @brief Indicates how many command lists will be used inside the pipeline.
-     * @details Leveraging multiple command lists implies distributing the input/output buffers accross the batch axis
-     * between these lists.
-     *
-     * If batching is handled on compiler's side then a single command list shall be used, we don't do any
-     * specific operation inside the plugin in this case.
-     */
-    size_t _numberOfCommandLists = 1;
-
-    /**
-     * @brief The batch size used by the corresponding model.
-     * @details The attribute contains a value only if the plugin performs the batches splitting operation.
-     */
-    std::optional<std::size_t> _batchSize = std::nullopt;
-
     bool _pipelineIsCreated = false;
 };
 
diff --git a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
index 5b7f488d3eb96a..de5e1ac81c4728 100644
--- a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
+++ b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
@@ -28,7 +28,6 @@ struct Pipeline {
              const std::shared_ptr<zeroProfiling::NpuInferProfiling>& npu_profiling,
              const std::vector<std::vector<std::optional<TensorData>>>& inputTensorsData,
              const std::vector<std::optional<TensorData>>& outputTensorsData,
-             size_t numberOfCommandLists,
              uint32_t group_ordinal);
 
     Pipeline(const Pipeline&) = delete;
@@ -43,12 +42,25 @@ struct Pipeline {
     void updateCommandList(const TensorData& tensorsData, uint32_t index, size_t commandListIndex);
 
 protected:
+    std::shared_ptr<IGraph> _graph;
     const Config _config;
+    const uint32_t _id;
+
+    /**
+     * @brief Indicates how many command lists will be used inside the pipeline.
+     * @details Leveraging multiple command lists implies distributing the input/output buffers accross the batch axis
+     * between these lists.
+     *
+     * If batching is handled on compiler's side then a single command list shall be used, we don't do any
+     * specific operation inside the plugin in this case.
+     */
+    size_t _number_of_command_lists;
+
     std::shared_ptr<CommandQueue> _command_queue;
     std::vector<std::unique_ptr<CommandList>> _command_lists;
     std::vector<std::unique_ptr<Fence>> _fences;
-    EventPool _event_pool;
-    std::vector<std::unique_ptr<Event>> _events;
+    std::shared_ptr<EventPool> _event_pool;
+    std::vector<std::shared_ptr<Event>> _events;
     bool sync_output_with_fences_ = true;
     std::shared_ptr<zeroProfiling::NpuInferProfiling> _npu_profiling;
     Logger _logger;
diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
index 88dfaf944a8b34..a0e5d2d11c1fef 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
@@ -20,8 +20,6 @@ using namespace intel_npu;
 namespace {
 
 constexpr std::size_t SINGLE_TENSOR = 0;
-constexpr std::size_t BATCH_AXIS = 0;
-constexpr std::size_t DEFAULT_BATCH_SIZE = 1;
 constexpr bool INPUT = true;
 constexpr bool OUTPUT = false;
 
@@ -96,64 +94,6 @@ bool memory_was_allocated_in_the_same_l0_context(ze_context_handle_t hContext, c
 
 }  // namespace
 
-std::optional<size_t> ZeroInferRequest::get_batch_size(const NetworkMetadata& metadata) {
-    if (!metadata.outputs.at(0).shapeFromIRModel.has_value()) {
-        _logger.debug("Batching on the plugin is not used, batching is handled by the compiler");
-        return std::nullopt;
-    }
-
-    const ov::PartialShape& firstOutputShape = *metadata.outputs.at(0).shapeFromIRModel;
-    if (firstOutputShape.is_dynamic()) {
-        _logger.warning("Networks using dynamic shapes are not supported when batching is handled by the plugin");
-        return std::nullopt;
-    }
-    if (firstOutputShape.rank().get_length() == 0) {
-        _logger.warning(
-            "Networks using rank 0 shapes for inputs/outputs are not supported when batching is handled by the plugin");
-        return std::nullopt;
-    }
-
-    const size_t candidateBatchSize = firstOutputShape[BATCH_AXIS].get_length();
-    if (candidateBatchSize == 0 || candidateBatchSize == DEFAULT_BATCH_SIZE) {
-        _logger.debug("Batching on the plugin is not used, batching is handled by the compiler");
-        return std::nullopt;
-    }
-
-    auto checkDescriptorsUseCandidateBatchSize = [candidateBatchSize](const std::vector<IODescriptor>& descriptors) {
-        for (const IODescriptor& descriptor : descriptors) {
-            OPENVINO_ASSERT(descriptor.shapeFromIRModel.has_value(),
-                            "Missing value for the \"shapeFromIRModel\" attribute, I/O descriptor");
-
-            const ov::PartialShape& shapeFromCompiler = descriptor.shapeFromCompiler;
-            const ov::PartialShape& shapeFromIRModel = *descriptor.shapeFromIRModel;
-
-            if (shapeFromCompiler.is_dynamic() || shapeFromCompiler.rank().get_length() == 0 ||
-                *shapeFromCompiler.begin() != DEFAULT_BATCH_SIZE) {
-                return false;
-            }
-
-            if (!descriptor.isStateInput && !descriptor.isStateOutput && !descriptor.isShapeTensor) {
-                if (shapeFromIRModel.is_dynamic() || shapeFromIRModel.rank().get_length() == 0 ||
-                    *shapeFromIRModel.begin() != candidateBatchSize) {
-                    return false;
-                }
-            }
-        }
-
-        return true;
-    };
-
-    if (!checkDescriptorsUseCandidateBatchSize(metadata.inputs) ||
-        !checkDescriptorsUseCandidateBatchSize(metadata.outputs)) {
-        _logger.debug("Batching on the plugin is not used, batching is handled by the compiler");
-        return std::nullopt;
-    }
-
-    _logger.debug("Batching is handled by the plugin");
-
-    return candidateBatchSize;
-}
-
 //------------------------------------------------------------------------------
 ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
                                    const std::shared_ptr<const ICompiledModel>& compiledModel,
@@ -187,13 +127,6 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
     _inputAllocator =
         std::make_shared<const zeroMemory::HostMemAllocator>(_initStructs, ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED);
 
-    if (config.get<BATCH_MODE>() != ov::intel_npu::BatchMode::COMPILER) {
-        _batchSize = get_batch_size(_metadata);
-    }
-    if (_batchSize.has_value()) {
-        _numberOfCommandLists = *_batchSize;
-    }
-
     _logger.debug("ZeroInferRequest::ZeroInferRequest - checking level zero attributes and allocating tensors");
 
     size_t ioIndex = 0;
@@ -205,7 +138,8 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
             continue;
         }
 
-        get_level_zero_input(ioIndex) = allocate_tensor(inputDescriptor, ioIndex, INPUT, *_inputAllocator, _batchSize);
+        get_level_zero_input(ioIndex) =
+            allocate_tensor(inputDescriptor, ioIndex, INPUT, *_inputAllocator, _graph->get_batch_size());
         get_input_tensor_data(ioIndex) =
             TensorData{get_level_zero_input(ioIndex)->data(), get_level_zero_input(ioIndex)->get_byte_size()};
 
@@ -222,7 +156,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
         }
 
         _levelZeroOutputTensors.at(ioIndex) =
-            allocate_tensor(outputDescriptor, ioIndex, OUTPUT, *_outputAllocator, _batchSize);
+            allocate_tensor(outputDescriptor, ioIndex, OUTPUT, *_outputAllocator, _graph->get_batch_size());
         _outputTensorsData.at(ioIndex) =
             std::optional(TensorData{_levelZeroOutputTensors.at(ioIndex)->data(),
                                      _levelZeroOutputTensors.at(ioIndex)->get_byte_size()});
@@ -236,7 +170,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
 void ZeroInferRequest::create_pipeline() {
     for (size_t inputIndex = 0; inputIndex < _metadata.inputs.size(); ++inputIndex) {
         if (is_batched_input(inputIndex)) {
-            if (_batchSize.has_value()) {
+            if (_graph->get_batch_size().has_value()) {
                 _logger.debug("ZeroInferRequest::create_pipeline - tensors %s were already allocated",
                               _metadata.inputs.at(inputIndex).nodeFriendlyName.c_str());
                 continue;
@@ -250,8 +184,11 @@ void ZeroInferRequest::create_pipeline() {
         }
 
         _logger.debug("ZeroInferRequest::create_pipeline - allocate new tensor");
-        get_level_zero_input(inputIndex) =
-            allocate_tensor(_metadata.inputs.at(inputIndex), inputIndex, INPUT, *_inputAllocator, _batchSize);
+        get_level_zero_input(inputIndex) = allocate_tensor(_metadata.inputs.at(inputIndex),
+                                                           inputIndex,
+                                                           INPUT,
+                                                           *_inputAllocator,
+                                                           _graph->get_batch_size());
         get_input_tensor_data(inputIndex) = std::optional(
             TensorData{get_level_zero_input(inputIndex)->data(), get_level_zero_input(inputIndex)->get_byte_size()});
     }
@@ -263,17 +200,20 @@ void ZeroInferRequest::create_pipeline() {
             continue;
         }
         _logger.debug("ZeroInferRequest::create_pipeline - allocate new tensor");
-        _levelZeroOutputTensors.at(outputIndex) =
-            allocate_tensor(_metadata.outputs.at(outputIndex), outputIndex, OUTPUT, *_outputAllocator, _batchSize);
+        _levelZeroOutputTensors.at(outputIndex) = allocate_tensor(_metadata.outputs.at(outputIndex),
+                                                                  outputIndex,
+                                                                  OUTPUT,
+                                                                  *_outputAllocator,
+                                                                  _graph->get_batch_size());
         _outputTensorsData.at(outputIndex) =
             std::optional(TensorData{_levelZeroOutputTensors.at(outputIndex)->data(),
                                      _levelZeroOutputTensors.at(outputIndex)->get_byte_size()});
     }
 
     // Find the corresponding command queue group.
-    _logger.debug("ZeroDevice::ZeroDevice - findGroupOrdinal");
+    _logger.debug("ZeroInferRequest::create_pipeline - findGroupOrdinal");
     auto groupOrdinal = zeroUtils::findGroupOrdinal(_initStructs->getDevice(), _properties);
-    _logger.debug("ZeroDevice::ZeroDevice - init completed");
+    _logger.debug("ZeroInferRequest::create_pipeline - init completed");
 
     _logger.debug("ZeroInferRequest::create_pipeline - constructing pipeline");
 
@@ -286,7 +226,6 @@ void ZeroInferRequest::create_pipeline() {
                                            _npuProfiling,
                                            _inputTensorsData,
                                            _outputTensorsData,
-                                           _numberOfCommandLists,
                                            groupOrdinal);
 
     _logger.debug("ZeroInferRequest::create_pipeline - SyncInferRequest completed");
@@ -321,7 +260,7 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor>& tenso
                                                index,
                                                isInput,
                                                isInput ? *_inputAllocator : *_outputAllocator,
-                                               _batchSize);
+                                               _graph->get_batch_size());
 
             setTensorData = true;
             levelZeroTensorCreatedLocally = true;
@@ -444,7 +383,7 @@ void ZeroInferRequest::set_tensors(const ov::Output<const ov::Node>& port,
     get_user_inputs(foundPort.idx) = tensors;
 
     if (_initStructs->getMutableCommandListVersion()) {
-        if (_batchSize.has_value()) {
+        if (_graph->get_batch_size().has_value()) {
             for (size_t i = 0; i < tensors.size(); i++) {
                 auto remoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(tensors[i]._ptr);
 
@@ -525,13 +464,17 @@ ov::SoPtr<ov::ITensor> ZeroInferRequest::get_tensor(const ov::Output<const ov::N
                                        ioIndex,
                                        isInput,
                                        isInput ? *_inputAllocator : *_outputAllocator,
-                                       _batchSize);
+                                       _graph->get_batch_size());
     tensorsData = std::optional(TensorData{levelZeroTensors->data(), levelZeroTensors->get_byte_size()});
 
     return levelZeroTensors;
 }
 
 void ZeroInferRequest::infer() {
+    if (_config.get<RUN_INFERENCES_SEQUENTIALLY>()) {
+        OPENVINO_THROW("Only start async is supported when RUN_INFERENCES_SEQUENTIALLY is enabled!");
+    }
+
     infer_async();
     get_result();
 }
@@ -567,7 +510,7 @@ void ZeroInferRequest::infer_async() {
         }
 
         if (is_batched_input(inputIndex)) {
-            if (_batchSize.has_value()) {
+            if (_graph->get_batch_size().has_value()) {
                 for (size_t i = 0; i < userTensor.size(); i++) {
                     auto levelZeroBatchRemoteTensor =
                         std::dynamic_pointer_cast<ZeroRemoteTensor>(get_level_zero_input(inputIndex, i));
diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
index c782c3e0684f0d..d7f06b813810bb 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
@@ -8,6 +8,7 @@
 #include <ze_graph_ext.h>
 
 #include "intel_npu/common/itt.hpp"
+#include "intel_npu/config/runtime.hpp"
 #include "intel_npu/prefix.hpp"
 #include "intel_npu/utils/logger/logger.hpp"
 #include "intel_npu/utils/zero/zero_api.hpp"
@@ -23,13 +24,15 @@ Pipeline::Pipeline(const Config& config,
                    const std::shared_ptr<zeroProfiling::NpuInferProfiling>& npu_profiling,
                    const std::vector<std::vector<std::optional<TensorData>>>& inputTensorsData,
                    const std::vector<std::optional<TensorData>>& outputTensorsData,
-                   size_t numberOfCommandLists,
                    uint32_t group_ordinal)
-    : _config(config),
-      _command_queue(graph->get_command_queue()),
-      _event_pool{initStructs->getDevice(),
-                  initStructs->getContext(),
-                  numberOfCommandLists ? static_cast<uint32_t>(numberOfCommandLists) : 1},
+    : _graph(graph),
+      _config(config),
+      _id(_graph->get_unique_id()),
+      _number_of_command_lists(_graph->get_batch_size().has_value() ? *_graph->get_batch_size() : 1),
+      _event_pool{
+          std::make_shared<EventPool>(initStructs->getDevice(),
+                                      initStructs->getContext(),
+                                      _number_of_command_lists ? static_cast<uint32_t>(_number_of_command_lists) : 1)},
       _npu_profiling(npu_profiling),
       _logger("Pipeline", _config.get<LOG_LEVEL>()) {
     OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Zero_infer_request::Pipeline::Pipeline");
@@ -39,20 +42,20 @@ Pipeline::Pipeline(const Config& config,
         profiling_query.create(profiling_pool._handle);
     }
 
-    _command_lists.reserve(numberOfCommandLists);
-    _events.reserve(numberOfCommandLists);
-    _fences.reserve(numberOfCommandLists);
+    _command_lists.reserve(_number_of_command_lists);
+    _events.reserve(_number_of_command_lists);
+    _fences.reserve(_number_of_command_lists);
     _logger.debug("Pipeline - emplace_back _event_pool and _command_queue");
-    for (size_t i = 0; i < numberOfCommandLists; i++) {
+    for (size_t i = 0; i < _number_of_command_lists; i++) {
         _command_lists.emplace_back(
             std::make_unique<CommandList>(initStructs,
                                           group_ordinal,
                                           initStructs->getMutableCommandListVersion() ? true : false));
-        _events.emplace_back(std::make_unique<Event>(_event_pool.handle(), static_cast<uint32_t>(i)));
-        _fences.emplace_back(std::make_unique<Fence>(*_command_queue));
+        _events.emplace_back(std::make_shared<Event>(_event_pool, static_cast<uint32_t>(i)));
+        _fences.emplace_back(std::make_unique<Fence>(*_graph->get_command_queue()));
     }
 
-    for (size_t i = 0; i < numberOfCommandLists; i++) {
+    for (size_t i = 0; i < _number_of_command_lists; i++) {
         size_t ioIndex = 0;
         for (const auto& desc : graph->get_input_descriptors()) {
             if (inputTensorsData.at(ioIndex).size() > 1) {
@@ -64,7 +67,7 @@ Pipeline::Pipeline(const Config& config,
 
             graph->set_argument_value(desc.idx,
                                       static_cast<unsigned char*>(inputTensorsData.at(ioIndex).at(0)->mem) +
-                                          (i * inputTensorsData.at(ioIndex).at(0)->size) / numberOfCommandLists);
+                                          (i * inputTensorsData.at(ioIndex).at(0)->size) / _number_of_command_lists);
 
             ++ioIndex;
         }
@@ -73,10 +76,16 @@ Pipeline::Pipeline(const Config& config,
         for (const auto& desc : graph->get_output_descriptors()) {
             graph->set_argument_value(desc.idx,
                                       static_cast<unsigned char*>(outputTensorsData.at(ioIndex)->mem) +
-                                          (i * outputTensorsData.at(ioIndex)->size) / numberOfCommandLists);
+                                          (i * outputTensorsData.at(ioIndex)->size) / _number_of_command_lists);
             ++ioIndex;
         }
 
+        if (_config.get<RUN_INFERENCES_SEQUENTIALLY>()) {
+            if (_graph->get_last_submitted_event(i)) {
+                _graph->get_last_submitted_event(i)->AppendWaitOnEvent(*_command_lists.at(i));
+            }
+        }
+
         /// append timestamp command if feature was activated
         if (_npu_profiling != nullptr) {
             _command_lists.at(i)->appendBarrier();
@@ -92,6 +101,15 @@ Pipeline::Pipeline(const Config& config,
             _command_lists.at(i)->appendNpuTimestamp(reinterpret_cast<uint64_t*>(_npu_profiling->npu_ts_infer_end));
         }
 
+        if (_config.get<RUN_INFERENCES_SEQUENTIALLY>()) {
+            if (_graph->get_last_submitted_event(i)) {
+                _graph->get_last_submitted_event(i)->AppendEventReset(*_command_lists.at(i));
+            }
+
+            _events.at(i)->AppendSignalEvent(*_command_lists.at(i));
+            _graph->set_last_submitted_event(_events.at(i), i);
+        }
+
         // appendBarrier used in L0 as well
         if (!sync_output_with_fences_) {
             _command_lists.at(i)->appendBarrier();
@@ -105,12 +123,24 @@ Pipeline::Pipeline(const Config& config,
 void Pipeline::push() {
     _logger.debug("Pipeline - push() started");
 
+    if (_config.get<RUN_INFERENCES_SEQUENTIALLY>()) {
+        if (_id) {
+            auto previousIndex = _graph->get_last_submitted_id();
+
+            if (_id != ++previousIndex) {
+                OPENVINO_THROW("Inferences should be called in the same order they were called the first time!");
+            }
+        }
+
+        _graph->set_last_submitted_id(_id);
+    }
+
     for (size_t i = 0; i < _command_lists.size(); ++i) {
         OV_ITT_TASK_CHAIN(ZERO_PIPELINE_IP_PUSH, itt::domains::LevelZeroBackend, "Pipeline", "push");
         if (sync_output_with_fences_) {
-            _command_queue->executeCommandList(*_command_lists.at(i), *_fences.at(i));
+            _graph->get_command_queue()->executeCommandList(*_command_lists.at(i), *_fences.at(i));
         } else {
-            _command_queue->executeCommandList(*_command_lists.at(i));
+            _graph->get_command_queue()->executeCommandList(*_command_lists.at(i));
         }
     }
 
@@ -154,12 +184,12 @@ void Pipeline::updateCommandList(const TensorData& tensorsData, uint32_t index)
     OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_UMCL, itt::domains::LevelZeroBackend, "Pipeline", "updateCommandList");
     _logger.debug("Pipeline - updateCommandList");
 
-    const size_t numberOfCommandLists = _command_lists.size();
+    const size_t _number_of_command_lists = _command_lists.size();
 
-    for (size_t i = 0; i < numberOfCommandLists; i++) {
+    for (size_t i = 0; i < _number_of_command_lists; i++) {
         _command_lists.at(i)->updateMutableCommandList(
             index,
-            static_cast<unsigned char*>(tensorsData.mem) + (i * tensorsData.size) / numberOfCommandLists);
+            static_cast<unsigned char*>(tensorsData.mem) + (i * tensorsData.size) / _number_of_command_lists);
         _command_lists.at(i)->close();
     }
 };
@@ -168,9 +198,9 @@ void Pipeline::updateCommandList(const TensorData& tensorsData, uint32_t index,
     OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_UMCL, itt::domains::LevelZeroBackend, "Pipeline", "updateCommandList");
     _logger.debug("Pipeline - updateCommandList");
 
-    const size_t numberOfCommandLists = _command_lists.size();
+    const size_t _number_of_command_lists = _command_lists.size();
 
-    OPENVINO_ASSERT(commandListIndex < numberOfCommandLists,
+    OPENVINO_ASSERT(commandListIndex < _number_of_command_lists,
                     "Command list index is higgher than the number of Command lists ",
                     commandListIndex);
 
diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp
index 51c4a4cf26eafd..7e718d9172f4f7 100644
--- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp
+++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp
@@ -9,6 +9,7 @@
 #include <vector>
 
 #include "intel_npu/network_metadata.hpp"
+#include "intel_npu/utils/zero/zero_init.hpp"
 #include "intel_npu/utils/zero/zero_utils.hpp"
 #include "intel_npu/utils/zero/zero_wrappers.hpp"
 #include "openvino/runtime/profiling_info.hpp"
@@ -17,13 +18,10 @@ namespace intel_npu {
 
 class IGraph : public std::enable_shared_from_this<IGraph> {
 public:
-    IGraph(ze_graph_handle_t handle, NetworkMetadata metadata, std::optional<std::vector<uint8_t>> blob)
-        : _handle(handle),
-          _metadata(std::move(metadata)) {
-        if (blob.has_value()) {
-            _blob = std::move(*blob);
-        }
-    }
+    IGraph(ze_graph_handle_t handle,
+           NetworkMetadata metadata,
+           const Config& config,
+           std::optional<std::vector<uint8_t>> blob);
 
     virtual void export_blob(std::ostream& stream) const = 0;
 
@@ -36,55 +34,48 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
 
     virtual ~IGraph() = default;
 
-    const NetworkMetadata& get_metadata() const {
-        return _metadata;
-    }
-
-    ze_graph_handle_t get_handle() const {
-        return _handle;
-    }
-
-    void update_network_name(std::string_view name) {
-        _metadata.name = name;
-    }
-
-    inline const std::vector<ArgumentDescriptor>& get_input_descriptors() const {
-        return _input_descriptors;
-    }
-
-    inline const std::vector<ArgumentDescriptor>& get_output_descriptors() const {
-        return _output_descriptors;
-    }
-
-    inline const std::shared_ptr<CommandQueue>& get_command_queue() const {
-        return _command_queue;
-    }
-
-    void set_workload_type(const ov::WorkloadType workloadType) const {
-        if (_command_queue == nullptr) {
-            return;
-        }
-
-        ze_command_queue_workload_type_t zeWorkloadType;
-        switch (workloadType) {
-        case ov::WorkloadType::DEFAULT:
-            zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_DEFAULT;
-            break;
-        case ov::WorkloadType::EFFICIENT:
-            zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_BACKGROUND;
-            break;
-        default:
-            OPENVINO_THROW("Unknown value for WorkloadType!");
-        }
-
-        _command_queue->setWorkloadType(zeWorkloadType);
-    }
-
-    std::mutex& get_mutex() {
-        return _mutex;
-    }
+    const NetworkMetadata& get_metadata() const;
+    ze_graph_handle_t get_handle() const;
+
+    void update_network_name(std::string_view name);
+
+    const std::vector<ArgumentDescriptor>& get_input_descriptors() const;
+    const std::vector<ArgumentDescriptor>& get_output_descriptors() const;
+    const std::shared_ptr<CommandQueue>& get_command_queue() const;
+
+    void set_workload_type(const ov::WorkloadType workloadType) const;
+
+    std::mutex& get_mutex();
+
+    void set_last_submitted_event(const std::shared_ptr<Event>& event, size_t indexOfCommandList);
+    const std::shared_ptr<Event>& get_last_submitted_event(size_t indexOfCommandList) const;
+
+    uint32_t get_unique_id();
+    void set_last_submitted_id(uint32_t id_index);
+    const uint32_t get_last_submitted_id() const;
+
+    const std::optional<std::size_t> get_batch_size() const;
 
 protected:
+    /**
+     * @brief Determines if batching can be addressed inside the plugin. In the positive case, the batch size used by
+     * the model will also be deduced and returned.
+     * @details Batching can be handled by the plugin only if:
+     *  - The batch axis is the first axis.
+     *  - The batch size received by the compiler takes the default value of 1.
+     *  - The batch size found in the IR model matches for all inputs/outputs and takes a value different than the
+     * default one.
+     *
+     * If any of the previous conditions is not fulfilled, the functon will return the default batch size, thus no
+     * custom algorithm will be applied inside the plugin in order to address batching.
+     *
+     * @param metadata Metadata containing the shape values as seen by both the compiler and IR model. These will
+     * ultimately be used for determining the batch size.
+     * @returns The batch size deduced by the algorithm or the default value of 1 if batching cannot be performed inside
+     * the plugin.
+     */
+    std::optional<size_t> get_batch_size(const NetworkMetadata& metadata);
+
     ze_graph_handle_t _handle = nullptr;
     NetworkMetadata _metadata;
 
@@ -92,12 +83,24 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
     std::vector<ArgumentDescriptor> _output_descriptors;
 
     std::shared_ptr<CommandQueue> _command_queue;
+    std::vector<std::shared_ptr<Event>> _last_submitted_event;
 
     // Used to protect zero pipeline creation in the graph. The pipeline should be created only once per graph when the
     // first inference starts running
     std::mutex _mutex;
 
     std::vector<uint8_t> _blob;
+
+    uint32_t _unique_id = 0;
+    uint32_t _last_submitted_id;
+
+    /**
+     * @brief The batch size used by the corresponding model.
+     * @details The attribute contains a value only if the plugin performs the batches splitting operation.
+     */
+    std::optional<std::size_t> _batch_size = std::nullopt;
+
+    Logger _logger;
 };
 
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/common/src/igraph.cpp b/src/plugins/intel_npu/src/common/src/igraph.cpp
new file mode 100644
index 00000000000000..fd5463af5eea3e
--- /dev/null
+++ b/src/plugins/intel_npu/src/common/src/igraph.cpp
@@ -0,0 +1,159 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_npu/common/igraph.hpp"
+
+#include "intel_npu/config/compiler.hpp"
+#include "intel_npu/config/runtime.hpp"
+
+namespace {
+constexpr std::size_t BATCH_AXIS = 0;
+constexpr std::size_t DEFAULT_BATCH_SIZE = 1;
+}  // namespace
+
+namespace intel_npu {
+
+IGraph::IGraph(ze_graph_handle_t handle,
+               NetworkMetadata metadata,
+               const Config& config,
+               std::optional<std::vector<uint8_t>> blob)
+    : _handle(handle),
+      _metadata(std::move(metadata)),
+      _logger("IGraph", config.get<LOG_LEVEL>()) {
+    if (blob.has_value()) {
+        _blob = std::move(*blob);
+    }
+}
+
+const NetworkMetadata& IGraph::get_metadata() const {
+    return _metadata;
+}
+
+ze_graph_handle_t IGraph::get_handle() const {
+    return _handle;
+}
+
+void IGraph::update_network_name(std::string_view name) {
+    _metadata.name = name;
+}
+
+const std::vector<ArgumentDescriptor>& IGraph::get_input_descriptors() const {
+    return _input_descriptors;
+}
+
+const std::vector<ArgumentDescriptor>& IGraph::get_output_descriptors() const {
+    return _output_descriptors;
+}
+
+const std::shared_ptr<CommandQueue>& IGraph::get_command_queue() const {
+    return _command_queue;
+}
+
+void IGraph::set_workload_type(const ov::WorkloadType workloadType) const {
+    if (_command_queue == nullptr) {
+        return;
+    }
+
+    ze_command_queue_workload_type_t zeWorkloadType;
+    switch (workloadType) {
+    case ov::WorkloadType::DEFAULT:
+        zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_DEFAULT;
+        break;
+    case ov::WorkloadType::EFFICIENT:
+        zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_BACKGROUND;
+        break;
+    default:
+        OPENVINO_THROW("Unknown value for WorkloadType!");
+    }
+
+    _command_queue->setWorkloadType(zeWorkloadType);
+}
+
+std::mutex& IGraph::get_mutex() {
+    return _mutex;
+}
+
+void IGraph::set_last_submitted_event(const std::shared_ptr<Event>& event, size_t indexOfCommandList) {
+    _last_submitted_event[indexOfCommandList] = event;
+}
+
+const std::shared_ptr<Event>& IGraph::get_last_submitted_event(size_t indexOfCommandList) const {
+    return _last_submitted_event[indexOfCommandList];
+}
+
+uint32_t IGraph::get_unique_id() {
+    return _unique_id++;
+}
+
+void IGraph::set_last_submitted_id(uint32_t id_index) {
+    _last_submitted_id = id_index;
+}
+
+const uint32_t IGraph::get_last_submitted_id() const {
+    return _last_submitted_id;
+}
+
+std::optional<size_t> IGraph::get_batch_size(const NetworkMetadata& metadata) {
+    if (!metadata.outputs.at(0).shapeFromIRModel.has_value()) {
+        _logger.debug("Batching on the plugin is not used, batching is handled by the compiler");
+        return std::nullopt;
+    }
+
+    const ov::PartialShape& firstOutputShape = *metadata.outputs.at(0).shapeFromIRModel;
+    if (firstOutputShape.is_dynamic()) {
+        _logger.warning("Networks using dynamic shapes are not supported when batching is handled by the plugin");
+        return std::nullopt;
+    }
+    if (firstOutputShape.rank().get_length() == 0) {
+        _logger.warning("Networks using rank 0 shapes for inputs/outputs are not supported when batching is "
+                        "handled by the plugin");
+        return std::nullopt;
+    }
+
+    const size_t candidateBatchSize = firstOutputShape[BATCH_AXIS].get_length();
+    if (candidateBatchSize == 0 || candidateBatchSize == DEFAULT_BATCH_SIZE) {
+        _logger.debug("Batching on the plugin is not used, batching is handled by the compiler");
+        return std::nullopt;
+    }
+
+    auto checkDescriptorsUseCandidateBatchSize = [candidateBatchSize](const std::vector<IODescriptor>& descriptors) {
+        for (const IODescriptor& descriptor : descriptors) {
+            OPENVINO_ASSERT(descriptor.shapeFromIRModel.has_value(),
+                            "Missing value for the \"shapeFromIRModel\" attribute, I/O descriptor");
+
+            const ov::PartialShape& shapeFromCompiler = descriptor.shapeFromCompiler;
+            const ov::PartialShape& shapeFromIRModel = *descriptor.shapeFromIRModel;
+
+            if (shapeFromCompiler.is_dynamic() || shapeFromCompiler.rank().get_length() == 0 ||
+                *shapeFromCompiler.begin() != DEFAULT_BATCH_SIZE) {
+                return false;
+            }
+
+            if (!descriptor.isStateInput && !descriptor.isStateOutput && !descriptor.isShapeTensor) {
+                if (shapeFromIRModel.is_dynamic() || shapeFromIRModel.rank().get_length() == 0 ||
+                    *shapeFromIRModel.begin() != candidateBatchSize) {
+                    return false;
+                }
+            }
+        }
+
+        return true;
+    };
+
+    if (!checkDescriptorsUseCandidateBatchSize(metadata.inputs) ||
+        !checkDescriptorsUseCandidateBatchSize(metadata.outputs)) {
+        _logger.debug("Batching on the plugin is not used, batching is handled by the compiler");
+        return std::nullopt;
+    }
+
+    _logger.debug("Batching is handled by the plugin");
+
+    return candidateBatchSize;
+}
+
+const std::optional<std::size_t> IGraph::get_batch_size() const {
+    return _batch_size;
+}
+
+}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
index f819ed73711cf2..9d634656db109a 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
@@ -541,13 +541,21 @@ std::string DriverCompilerAdapter::serializeConfig(const Config& config,
         content = std::regex_replace(content, std::regex(batchstr.str()), "");
     }
 
-    // NPU_DEFER_WEIGHTS_LOAD is not supported in versions < 6.2 - need to remove it
-    if ((compilerVersion.major < 6) || (compilerVersion.major == 6 && compilerVersion.minor < 2)) {
+    // NPU_DEFER_WEIGHTS_LOAD is needed at runtime only
+    {
         std::ostringstream batchstr;
         batchstr << ov::intel_npu::defer_weights_load.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER << "\\S+"
                  << VALUE_DELIMITER;
-        logger.warning(
-            "NPU_DEFER_WEIGHTS_LOAD property is not suppored by this compiler version. Removing from parameters");
+        logger.info("NPU_DEFER_WEIGHTS_LOAD property is needed at runtime only. Removing from parameters");
+        content = std::regex_replace(content, std::regex(batchstr.str()), "");
+    }
+
+    // NPU_RUN_INFERENCES_SEQUENTIALLY is needed at runtime only
+    {
+        std::ostringstream batchstr;
+        batchstr << ov::intel_npu::run_inferences_sequentially.name() << KEY_VALUE_SEPARATOR << VALUE_DELIMITER
+                 << "\\S+" << VALUE_DELIMITER;
+        logger.info("NPU_RUN_INFERENCES_SEQUENTIALLY property is needed at runtime only. Removing from parameters");
         content = std::regex_replace(content, std::regex(batchstr.str()), "");
     }
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp
index e1f3990b835e8d..0d180f983ad3a9 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp
@@ -16,7 +16,7 @@ DriverGraph::DriverGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
                          NetworkMetadata metadata,
                          const Config& config,
                          std::optional<std::vector<uint8_t>> blob)
-    : IGraph(graphHandle, std::move(metadata), std::move(blob)),
+    : IGraph(graphHandle, std::move(metadata), config, std::move(blob)),
       _zeGraphExt(zeGraphExt),
       _zeroInitStruct(zeroInitStruct),
       _logger("DriverGraph", config.get<LOG_LEVEL>()) {
@@ -126,6 +126,16 @@ void DriverGraph::initialize(const Config& config) {
     //  _zeGraphExt->initializeGraph(). The driver will not access the original blob from this moment on, so we are
     //  releasing it here to avoid unnecessary memory usage.
     _blobIsReleased = release_blob(config);
+
+    if (config.get<BATCH_MODE>() != ov::intel_npu::BatchMode::COMPILER) {
+        _batch_size = get_batch_size(_metadata);
+    }
+
+    if (config.get<RUN_INFERENCES_SEQUENTIALLY>()) {
+        auto number_of_command_lists = _batch_size.has_value() ? *_batch_size : 1;
+
+        _last_submitted_event.resize(number_of_command_lists);
+    }
 }
 
 bool DriverGraph::release_blob(const Config& config) {
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp
index c99069a0a9760f..b1658e7e0582e0 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp
@@ -17,7 +17,7 @@ PluginGraph::PluginGraph(const std::shared_ptr<ZeGraphExtWrappers>& zeGraphExt,
                          NetworkMetadata metadata,
                          std::vector<uint8_t> blob,
                          const Config& config)
-    : IGraph(graphHandle, std::move(metadata), std::optional<std::vector<uint8_t>>(std::move(blob))),
+    : IGraph(graphHandle, std::move(metadata), config, std::optional<std::vector<uint8_t>>(std::move(blob))),
       _zeGraphExt(zeGraphExt),
       _zeroInitStruct(zeroInitStruct),
       _compiler(compiler),
@@ -115,6 +115,16 @@ void PluginGraph::initialize(const Config& config) {
 
     _zeGraphExt->initializeGraph(_handle, config);
 
+    if (config.get<BATCH_MODE>() != ov::intel_npu::BatchMode::COMPILER) {
+        _batch_size = get_batch_size(_metadata);
+    }
+
+    if (config.get<RUN_INFERENCES_SEQUENTIALLY>()) {
+        auto number_of_command_lists = _batch_size.has_value() ? *_batch_size : 1;
+
+        _last_submitted_event.resize(number_of_command_lists);
+    }
+
     _logger.debug("Graph initialize finish");
 }
 
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
index c6be2793fe6f70..b9cdad9f4879db 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
@@ -434,6 +434,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
 
     // Finalize memory in closures and weight banks
     finalize_weights_bank();
+    detach_memory();
 
     // Print stats report when possible
     {
@@ -499,6 +500,23 @@ void ov::npuw::CompiledModel::finalize_weights_bank() {
     LOG_INFO("Done.");
 }
 
+void ov::npuw::CompiledModel::detach_memory() {
+    LOG_INFO("Detaching model & weight memory...");
+    LOG_BLOCK();
+    for (size_t idx = 0; idx < m_compiled_submodels.size(); ++idx) {
+        auto& comp_model_desc = m_compiled_submodels[idx];
+        auto& proto_comp_model_desc = m_compiled_submodels[comp_model_desc.replaced_by.value_or(idx)];
+        if (!proto_comp_model_desc.model || !proto_comp_model_desc.compiled_model) {
+            continue;  // optimized-out OR already cleared - skip
+        }
+        if (proto_comp_model_desc.device_it + 1 == m_dev_list.end()) {
+            LOG_INFO("No fallback expected - clear the OV model for Subgraph[" << idx << "]");
+            proto_comp_model_desc.model.reset();
+        }
+    }
+    LOG_INFO("Done");
+}
+
 std::string ov::npuw::CompiledModel::global_mem_device() const {
     // Force globally set device if set
     const std::string device_alloc = m_cfg.get<::intel_npu::NPUW_WEIGHTS_BANK_ALLOC>();
@@ -668,6 +686,10 @@ ov::SoPtr<ov::ICompiledModel> ov::npuw::CompiledModel::compile_submodel(const st
     // NOTE(dm): Not sure if it is required for the NPUW plugin, but likely it is
     auto& device_config = m_meta_devices[device];
 
+    if (ov::npuw::util::starts_with(device, "NPU") && m_cfg.get<::intel_npu::NPUW_UNFOLD_IREQS>()) {
+        device_config["NPU_RUN_INFERENCES_SEQUENTIALLY"] = "YES";
+    }
+
     const auto& cache_dir = m_cfg.get<::intel_npu::NPUW_CACHE_DIR>();
     if (!cache_dir.empty()) {
         LOG_INFO("NPUW will try to utilize CACHE_DIR for " << submodel->get_friendly_name() << " submodel.");
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
index ece1bc78fb5bf5..8ccb1f83349e47 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
@@ -78,6 +78,7 @@ class CompiledModel : public ov::ICompiledModel {
     void implement_properties();
 
     void finalize_weights_bank();
+    void detach_memory();
     std::string global_mem_device() const;
     std::string funcall_mem_device(const std::size_t idx) const;
 
diff --git a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp
index 81521222ae6fae..133101da8b7d38 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp
@@ -19,23 +19,34 @@ namespace npuw {
 namespace weights {
 namespace op {
 struct Const {
-    std::shared_ptr<ov::op::v0::Constant> node;
-
+    std::shared_ptr<ov::op::v0::Constant> m_node;
+    ov::element::Type m_cached_type;
+    ov::Shape m_cached_shape;
+    const void* m_cached_ptr = nullptr;
+
+    explicit Const(std::shared_ptr<ov::op::v0::Constant> n) : m_node(n) {
+        m_cached_type = m_node->get_element_type();
+        m_cached_shape = m_node->get_shape();
+        m_cached_ptr = m_node->get_data_ptr();
+    }
     std::size_t hash() const {
-        std::size_t seed = std::hash<const void*>()(node->get_data_ptr()) + 0x9e3779b9;
-        seed ^= node->get_element_type().hash() + 0x9e3779b9;
-        for (const auto& dim : node->get_shape()) {
+        std::size_t seed = std::hash<const void*>()(m_cached_ptr) + 0x9e3779b9;
+        seed ^= m_cached_type.hash() + 0x9e3779b9;
+        for (const auto& dim : m_cached_shape) {
             seed ^= std::hash<std::size_t>()(dim) + 0x9e3779b9;
         }
         return seed;
     }
     bool operator==(const Const& other) const {
-        return (node->get_shape() == other.node->get_shape() &&
-                node->get_element_type() == other.node->get_element_type() &&
-                node->get_data_ptr() == other.node->get_data_ptr());
+        return (m_cached_type == other.m_cached_type && m_cached_shape == other.m_cached_shape &&
+                m_cached_ptr == other.m_cached_ptr);
     }
     ov::Tensor eval() const {
-        return ov::npuw::util::tensor_from_const(node);
+        NPUW_ASSERT(m_node && "Const::eval() can only happen before detach");
+        return ov::npuw::util::tensor_from_const(m_node);
+    }
+    void detach() {
+        m_node.reset();
     }
 };
 struct Concat {
@@ -59,6 +70,11 @@ struct Concat {
         }
         return ov::npuw::util::concat(to_concat, axis);
     }
+    void detach() {
+        for (auto&& lt : tensors) {
+            lt.detach();
+        }
+    }
 };
 
 struct Unpack {
@@ -95,6 +111,11 @@ struct Unpack {
         }
         return dst;
     }
+    void detach() {
+        w.detach();
+        z.detach();
+        s.detach();
+    }
 };
 struct Permute {
     LazyTensor tensor;
@@ -113,6 +134,9 @@ struct Permute {
     ov::Tensor eval() const {
         return ov::npuw::util::permute(tensor.eval(), axes);
     }
+    void detach() {
+        tensor.detach();
+    }
 };
 struct Convert {
     LazyTensor tensor;
@@ -130,6 +154,9 @@ struct Convert {
         NPUW_ASSERT(ov::element::f16 == type);
         return ov::npuw::util::to_f16(tensor.eval());
     }
+    void detach() {
+        tensor.detach();
+    }
 };
 }  // namespace op
 
@@ -137,16 +164,16 @@ using Transform = std::variant<op::Const, op::Concat, op::Unpack, op::Permute, o
 
 struct LazyTensorImpl {
 public:
-    LazyTensorImpl() = default;
     explicit LazyTensorImpl(Transform&& t);
+    bool operator==(const LazyTensorImpl& other) const;
 
     ov::Tensor eval() const;
-
-    bool operator==(const LazyTensorImpl& other) const;
     std::size_t get_hash() const;
 
+    void detach();
+
     Transform m_transform;
-    std::size_t m_hash = 0;
+    const std::size_t m_hash = 0;
 };
 
 }  // namespace weights
@@ -165,26 +192,12 @@ struct overloaded : Ts... {
 template <class... Ts>
 overloaded(Ts...) -> overloaded<Ts...>;
 
-std::size_t LazyTensorImpl::get_hash() const {
-    // Already calculated
-    if (m_hash != 0) {
-        return m_hash;
-    }
-
-    // Get hash
-    std::size_t seed = 0;
-    std::visit(overloaded{[&seed](const auto& op) {
-                   seed ^= op.hash();
-               }},
-               m_transform);
-
-    return seed;
-}
-
-LazyTensorImpl::LazyTensorImpl(Transform&& t) {
-    m_transform = std::move(t);
-    m_hash = get_hash();
-}
+LazyTensorImpl::LazyTensorImpl(Transform&& t)
+    : m_transform(std::move(t)),
+      m_hash(std::visit(overloaded{[](const auto& op) {
+                            return op.hash();
+                        }},
+                        m_transform)) {}
 
 bool LazyTensorImpl::operator==(const LazyTensorImpl& other) const {
     return m_hash == other.m_hash && m_transform == other.m_transform;
@@ -200,17 +213,25 @@ ov::Tensor LazyTensorImpl::eval() const {
     some kind of indicator that the only difference is concat and we should look for an existing ov::Tensor.
     Perhaps it should be done after model compilation and not handled here.
     */
+    return std::visit(overloaded{[](const auto& op) {
+                          return op.eval();
+                      }},
+                      m_transform);
+}
+
+std::size_t LazyTensorImpl::get_hash() const {
+    return m_hash;
+}
 
-    ov::Tensor result = std::visit(overloaded{[](const auto& op) {
-                                       return op.eval();
-                                   }},
-                                   m_transform);
-    NPUW_ASSERT(result);
-    return result;
+void LazyTensorImpl::detach() {
+    std::visit(overloaded{[](auto& op) {
+                   op.detach();
+               }},
+               m_transform);
 }
 
 LazyTensor::LazyTensor(const std::shared_ptr<ov::op::v0::Constant>& const_ptr)
-    : m_impl(std::make_shared<LazyTensorImpl>(op::Const{const_ptr})) {}
+    : m_impl(std::make_shared<LazyTensorImpl>(op::Const(const_ptr))) {}
 LazyTensor::LazyTensor(const std::vector<LazyTensor>& to_concat, const std::size_t axis)
     : m_impl(std::make_shared<LazyTensorImpl>(op::Concat{to_concat, axis})) {}
 LazyTensor::LazyTensor(const LazyTensor& cw,
@@ -233,11 +254,17 @@ LazyTensor LazyTensor::convert(const ov::element::Type& type) {
 }
 
 bool LazyTensor::operator==(const LazyTensor& other) const {
+    if (!m_impl && !other.m_impl) {
+        return true;
+    }
+    if ((!m_impl && other.m_impl) || (m_impl && !other.m_impl)) {
+        return false;
+    }
     return *m_impl.get() == *other.m_impl.get();
 }
 
 bool LazyTensor::operator!=(const LazyTensor& other) const {
-    return !(*m_impl.get() == *other.m_impl.get());
+    return !(*this == other);
 }
 
 ov::Tensor LazyTensor::eval() const {
@@ -254,6 +281,12 @@ std::size_t LazyTensor::get_hash() const {
     return m_impl->get_hash();
 }
 
+void LazyTensor::detach() {
+    if (m_impl) {
+        m_impl->detach();
+    }
+}
+
 std::size_t LazyTensor::Hash::operator()(const LazyTensor& lt) const {
     return lt.get_hash();
 }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp
index 365d9d636872b8..840e22dcedad83 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp
@@ -39,8 +39,8 @@ class LazyTensor {
     bool operator!=(const LazyTensor& other) const;
 
     ov::Tensor eval() const;
-
     std::size_t get_hash() const;
+    void detach();
 
 private:
     std::shared_ptr<LazyTensorImpl> m_impl = nullptr;
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp
index 2b2878481f1330..3e712574606679 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.cpp
@@ -23,7 +23,7 @@ using ov::npuw::online::detail::isOp;
 Group::Group(const std::shared_ptr<ov::Node>& node,
              size_t gid,
              own::ade::NodeHandle nh,
-             const std::shared_ptr<own::ade::Graph>& g,
+             const std::weak_ptr<own::ade::Graph>& g,
              const std::weak_ptr<Snapshot>& snapshot)
     : m_nh(std::move(nh)),
       m_id(gid),
@@ -36,7 +36,7 @@ Group::Group(const std::shared_ptr<ov::Node>& node,
 
 Group::Group(size_t gid,
              own::ade::NodeHandle nh,
-             const std::shared_ptr<own::ade::Graph>& g,
+             const std::weak_ptr<own::ade::Graph>& g,
              const std::weak_ptr<Snapshot>& snapshot)
     : m_nh(std::move(nh)),
       m_id(gid),
@@ -214,14 +214,16 @@ void Group::relinkGraph(const Group::GPtr& gptr_other) {
     auto consumers = gptr_other->dstNodes();
 
     // Remove gptr_other node from the graph. Note: also removes all it's edges
-    m_graph->remove(gptr_other->getHandle());
+    auto&& graph = m_graph.lock();
+    NPUW_ASSERT(graph);
+    graph->remove(gptr_other->getHandle());
     for (const auto& nh : producers) {
         if (m_nh == nh) {
             continue;
         }
         // relink the graph
-        if (!m_graph->linked(nh, m_nh)) {
-            m_graph->link(nh, m_nh);
+        if (!graph->linked(nh, m_nh)) {
+            graph->link(nh, m_nh);
         }
     }
     for (const auto& nh : consumers) {
@@ -229,8 +231,8 @@ void Group::relinkGraph(const Group::GPtr& gptr_other) {
             continue;
         }
         // relink the graph
-        if (!m_graph->linked(m_nh, nh)) {
-            m_graph->link(m_nh, nh);
+        if (!graph->linked(m_nh, nh)) {
+            graph->link(m_nh, nh);
         }
     }
 }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp
index 17527033173a82..1d354542e135a8 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp
@@ -33,11 +33,11 @@ class Group : public std::enable_shared_from_this<Group> {
     Group(const std::shared_ptr<ov::Node>& node,
           size_t gid,
           own::ade::NodeHandle nh,
-          const std::shared_ptr<own::ade::Graph>& g,
+          const std::weak_ptr<own::ade::Graph>& g,
           const std::weak_ptr<Snapshot>& snapshot);
     Group(size_t gid,
           own::ade::NodeHandle nh,
-          const std::shared_ptr<own::ade::Graph>& g,
+          const std::weak_ptr<own::ade::Graph>& g,
           const std::weak_ptr<Snapshot>& snapshot);
 
     // After we formed a final structure of partitioning,
@@ -100,7 +100,7 @@ class Group : public std::enable_shared_from_this<Group> {
 
     own::ade::NodeHandle m_nh;
     size_t m_id;  // used for utility prints only
-    std::shared_ptr<own::ade::Graph> m_graph;
+    std::weak_ptr<own::ade::Graph> m_graph;
     std::weak_ptr<Snapshot> m_snapshot;
     bool m_frozen = false;
     bool m_nofold = false;
diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.hpp b/src/plugins/intel_npu/src/plugin/npuw/util.hpp
index 7a942f0b6c6351..616aff53128292 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/util.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/util.hpp
@@ -141,6 +141,14 @@ Impl<M> _(std::shared_ptr<M> pM) {
 
 }  // namespace at
 
+// Written here to be a drop-in replacement for ov::parallel_for for the debug purposes
+template <typename F>
+void non_parallel_for(std::size_t count, F&& f) {
+    for (std::size_t idx = 0u; idx < count; idx++) {
+        f(idx);
+    }
+}
+
 }  // namespace util
 }  // namespace npuw
 }  // namespace ov
diff --git a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp
index 51cf76020d81a1..2b4be1a759c17c 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp
@@ -40,16 +40,22 @@ ov::Tensor Bank::get(const LazyTensor& tensor, const std::string& device) {
 
     std::lock_guard<std::mutex> guard(m_mutex);
 
-    auto& device_bank = m_device_bank[device_for_alloc];
-    auto iter_device = device_bank.find(tensor);
+    auto& device_bank = m_device_banks[device_for_alloc];
 
-    if (iter_device != device_bank.end() && iter_device->second) {
+    std::unique_lock<std::mutex> dev_guard(device_bank.mutex);
+    auto iter_device = device_bank.storage.find(tensor);
+
+    if (iter_device != device_bank.storage.end() && iter_device->second) {
         // Already allocated
+        // tensor (the key) may be coming from a 2nd (3rd, ...) model
+        // detach it here just in case
+        const_cast<LazyTensor&>(tensor).detach();
         return iter_device->second;
     }
+    dev_guard.unlock();
 
     // Allocation and evaluation needed
-    return unsafe_eval_and_alloc(tensor, device_for_alloc);
+    return eval_and_alloc(tensor, device_bank, device_for_alloc);
 }
 
 void Bank::registerLT(const LazyTensor& tensor, const std::string& device) {
@@ -57,64 +63,75 @@ void Bank::registerLT(const LazyTensor& tensor, const std::string& device) {
 
     std::lock_guard<std::mutex> guard(m_mutex);
 
-    auto& device_bank = m_device_bank[device_for_alloc];
-    if (device_bank.find(tensor) == device_bank.end()) {
-        device_bank[tensor] = ov::Tensor();
+    auto& device_bank = m_device_banks[device_for_alloc];
+    if (device_bank.storage.find(tensor) == device_bank.storage.end()) {
+        device_bank.storage[tensor] = ov::Tensor();
     }
 }
 
 void Bank::evaluate_and_allocate() {
     std::lock_guard<std::mutex> guard(m_mutex);
 
-    for (auto&& bank : m_device_bank) {
+    for (auto&& bank : m_device_banks) {
         const auto& device_for_alloc = bank.first;
         auto& device_bank = bank.second;
+
         std::vector<LazyTensor> vec;
-        for (const auto& el : device_bank) {
+        vec.reserve(device_bank.storage.size());
+        for (const auto& el : device_bank.storage) {
             vec.push_back(el.first);
         }
         ov::parallel_for(vec.size(), [&](std::size_t idx) {
             const auto& lt = vec[idx];
-            auto iter_device = device_bank.find(lt);
-            if (iter_device != device_bank.end() && iter_device->second) {
+            std::unique_lock dev_guard(device_bank.mutex);
+            auto iter_device = device_bank.storage.find(lt);
+            if (iter_device != device_bank.storage.end() && iter_device->second) {
                 // Already allocated
                 return;
             }
+            dev_guard.unlock();
 
             // Allocation and evaluation needed
-            unsafe_eval_and_alloc(lt, device_for_alloc);
+            eval_and_alloc(lt, device_bank, device_for_alloc);
         });
     }
 }
 
-ov::Tensor Bank::unsafe_eval_and_alloc(const LazyTensor& tensor, const std::string& device_for_alloc) {
-    // Note: private method used inside other methods with already locked mutex
+ov::Tensor Bank::eval_and_alloc(const LazyTensor& tensor,
+                                Bank::DeviceBank& dbank,
+                                const std::string& device_for_alloc) {
+    // Evaluate concurrently (see evaluate_and_allocate), lock the device
+    // mutex only to update the device bank (& allocate on-device memory, if needed)
     const auto& transformed_tensor = tensor.eval();
+
+    std::unique_lock<std::mutex> guard(dbank.mutex);
     if (device_for_alloc == "CPU") {
-        m_device_bank[device_for_alloc][tensor] = transformed_tensor;
+        dbank.storage[tensor] = transformed_tensor;
         return transformed_tensor;
     }
 
+    // Non-CPU case: detach the evaluated LazyTensor from its memory
+    const_cast<LazyTensor&>(tensor).detach();
+
     ov::SoPtr<ov::ITensor> remote_tensor;
     ov::Tensor allocated_tensor;
-    {
-        // FIXME: L0 allocation may crash when run in parallel
-        std::lock_guard<std::mutex> guard(m_alloc_mutex);
-        m_remote_ctx = m_core->get_default_context(device_for_alloc)._ptr;
-        remote_tensor =
-            m_remote_ctx->create_host_tensor(transformed_tensor.get_element_type(), transformed_tensor.get_shape());
-        allocated_tensor = ov::make_tensor(remote_tensor);
-    }
+
+    auto remote_ctx = m_core->get_default_context(device_for_alloc)._ptr;
+    remote_tensor =
+        remote_ctx->create_host_tensor(transformed_tensor.get_element_type(), transformed_tensor.get_shape());
+    allocated_tensor = ov::make_tensor(remote_tensor);
+    dbank.storage[tensor] = allocated_tensor;
+    guard.unlock();  // Unlock the guard, map update is done - copy can continue in parallel
+
     transformed_tensor.copy_to(allocated_tensor);
-    m_device_bank[device_for_alloc][tensor] = allocated_tensor;
     return allocated_tensor;
 }
 
 bool Bank::is_remote(const LazyTensor& tensor) const {
     // FIXME: make generic
-    auto npu_bank = m_device_bank.find("NPU");
-    if (npu_bank != m_device_bank.end() && npu_bank->second.find(tensor) != npu_bank->second.end()) {
-        // Found in NPU bank
+    auto npu_bank = m_device_banks.find("NPU");
+    if (npu_bank != m_device_banks.end() && npu_bank->second.storage.find(tensor) != npu_bank->second.storage.end()) {
+        // Found in NPU bank so considered remote (utterly wrong for the generic case)
         return true;
     }
     return false;
diff --git a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.hpp b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.hpp
index b9d8d21143c851..491e962a58b438 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.hpp
@@ -35,13 +35,17 @@ class Bank {
     bool is_remote(const LazyTensor& tensor) const;
 
 private:
-    ov::Tensor unsafe_eval_and_alloc(const LazyTensor& tensor, const std::string& device);
     // Bank for specified device and their allocated memory
-    std::unordered_map<std::string, std::unordered_map<LazyTensor, ov::Tensor, LazyTensor::Hash>> m_device_bank;
+    struct DeviceBank {
+        std::unordered_map<LazyTensor, ov::Tensor, LazyTensor::Hash> storage;
+        std::mutex mutex;
+    };
+    std::unordered_map<std::string, DeviceBank> m_device_banks;
+
+    ov::Tensor eval_and_alloc(const LazyTensor& tensor, DeviceBank& dbank, const std::string& device);
+
     std::mutex m_mutex;
-    std::mutex m_alloc_mutex;
     std::shared_ptr<const ov::ICore> m_core = nullptr;
-    std::shared_ptr<ov::IRemoteContext> m_remote_ctx = nullptr;
     std::string m_alloc_device;
 };
 
diff --git a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
index 4baf15d76718a8..4e86d32d2f72b1 100644
--- a/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/compiled_model.cpp
@@ -311,6 +311,12 @@ void CompiledModel::initialize_properties() {
           [](const Config& config) {
               return config.getString<BATCH_MODE>();
           }}},
+        {ov::intel_npu::run_inferences_sequentially.name(),
+         {false,
+          ov::PropertyMutability::RO,
+          [](const Config& config) {
+              return config.get<RUN_INFERENCES_SEQUENTIALLY>();
+          }}},
     };
 
     for (auto& property : _properties) {
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 9f77d952fd813b..18a96bff02fb80 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -568,6 +568,12 @@ Plugin::Plugin()
           [](const Config& config) {
               return config.getString<BACKEND_COMPILATION_PARAMS>();
           }}},
+        {ov::intel_npu::run_inferences_sequentially.name(),
+         {false,
+          ov::PropertyMutability::RW,
+          [](const Config& config) {
+              return config.get<RUN_INFERENCES_SEQUENTIALLY>();
+          }}},
         {ov::intel_npu::batch_mode.name(), {false, ov::PropertyMutability::RW, [](const Config& config) {
                                                 return config.getString<BATCH_MODE>();
                                             }}}};
diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_api.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_api.hpp
index 6cb9e23d203c11..1e1b50fb925916 100644
--- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_api.hpp
+++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_api.hpp
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <loader/ze_loader.h>
 #include <ze_api.h>
 
 #include <memory>
@@ -57,12 +58,14 @@ namespace intel_npu {
     symbol_statement(zeMemAllocDevice)                        \
     symbol_statement(zeMemAllocHost)                          \
     symbol_statement(zeMemFree)                               \
-    symbol_statement(zeMemGetAllocProperties)
+    symbol_statement(zeMemGetAllocProperties)                 \
+    symbol_statement(zelLoaderGetVersions)
 
 //unsupported symbols with older ze_loader versions
 #define weak_symbols_list()                                   \
     symbol_statement(zeCommandListGetNextCommandIdExp)        \
-    symbol_statement(zeCommandListUpdateMutableCommandsExp)
+    symbol_statement(zeCommandListUpdateMutableCommandsExp)   \
+    symbol_statement(zeInitDrivers)
 // clang-format on
 
 class ZeroApi {
diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_init.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_init.hpp
index 01b2de868e7572..25ceb018cdc243 100644
--- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_init.hpp
+++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_init.hpp
@@ -67,6 +67,8 @@ class ZeroInitStructsHolder final {
     }
 
 private:
+    void initNpuDriver();
+
     static const ze_driver_uuid_t uuid;
     Logger log;
 
diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp
index 8883bb99dd178e..0df0c5d66169a4 100644
--- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp
+++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp
@@ -188,7 +188,7 @@ static inline uint32_t findGroupOrdinal(ze_device_handle_t device_handle, const
         "zeDeviceGetCommandQueueGroupProperties",
         zeDeviceGetCommandQueueGroupProperties(device_handle, &command_queue_group_count, nullptr));
 
-    log.debug("ZeroDevice::ZeroDevice - resize command_queue_group_count");
+    log.debug("zero_utils::findGroupOrdinal - resize command_queue_group_count");
     command_group_properties.resize(command_queue_group_count);
 
     for (auto& prop : command_group_properties) {
diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
index 9b5b1b4540fbe7..61999376680e90 100644
--- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
+++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
@@ -37,7 +37,7 @@ class EventPool {
 class Event {
 public:
     Event() = delete;
-    Event(const ze_event_pool_handle_t& event_pool, uint32_t event_index);
+    Event(const std::shared_ptr<EventPool>& event_pool, uint32_t event_index);
     Event(const Event&) = delete;
     Event(Event&&) = delete;
     Event& operator=(const Event&) = delete;
@@ -51,6 +51,7 @@ class Event {
     ~Event();
 
 private:
+    std::shared_ptr<EventPool> _event_pool;
     ze_event_handle_t _handle = nullptr;
 
     Logger _log;
diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_init.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_init.cpp
index e87f8db788b9b8..b069bd64244142 100644
--- a/src/plugins/intel_npu/src/utils/src/zero/zero_init.cpp
+++ b/src/plugins/intel_npu/src/utils/src/zero/zero_init.cpp
@@ -4,6 +4,7 @@
 
 #include "intel_npu/utils/zero/zero_init.hpp"
 
+#include <loader/ze_loader.h>
 #include <ze_command_queue_npu_ext.h>
 
 #include <regex>
@@ -53,30 +54,93 @@ static std::tuple<uint32_t, std::string> queryDriverExtensionVersion(
     return std::make_tuple(targetVersion, functionExtName ? functionExtName : "");
 }
 
-ZeroInitStructsHolder::ZeroInitStructsHolder() : log("NPUZeroInitStructsHolder", Logger::global().level()) {
-    log.debug("ZeroInitStructsHolder - performing zeInit on VPU only");
-    THROW_ON_FAIL_FOR_LEVELZERO("zeInit", zeInit(ZE_INIT_FLAG_VPU_ONLY));
-
-    uint32_t drivers = 0;
-    THROW_ON_FAIL_FOR_LEVELZERO("zeDriverGet", zeDriverGet(&drivers, nullptr));
+void ZeroInitStructsHolder::initNpuDriver() {
+    auto setNpuDriver = [&](uint32_t drivers_count, std::vector<ze_driver_handle_t> all_drivers) {
+        driver_properties.stype = ZE_STRUCTURE_TYPE_DRIVER_PROPERTIES;
+        log.debug("ZeroInitStructsHolder::initNpuDriver - setting driver properties to "
+                  "ZE_STRUCTURE_TYPE_DRIVER_PROPERTIES");
+        for (uint32_t i = 0; i < drivers_count; ++i) {
+            zeDriverGetProperties(all_drivers[i], &driver_properties);
+
+            if (memcmp(&driver_properties.uuid, &uuid, sizeof(uuid)) == 0) {
+                driver_handle = all_drivers[i];
+                break;
+            }
+        }
+        if (driver_handle == nullptr) {
+            OPENVINO_THROW("NPU driver wasn't found!");
+        }
+    };
+
+    auto fallbackToZeDriverGet = [&]() {
+        log.debug("ZeroInitStructsHolder - zeInitDrivers not supported, fallback to zeDriverGet");
+
+        uint32_t drivers_count = 0;
+        THROW_ON_FAIL_FOR_LEVELZERO("zeDriverGet", zeDriverGet(&drivers_count, nullptr));
+
+        std::vector<ze_driver_handle_t> all_drivers(drivers_count);
+        THROW_ON_FAIL_FOR_LEVELZERO("zeDriverGet", zeDriverGet(&drivers_count, all_drivers.data()));
+
+        // Get our target driver
+        setNpuDriver(drivers_count, std::move(all_drivers));
+    };
+
+    zel_version_t loader_version = {};
+    size_t num_components;
+    auto result = zelLoaderGetVersions(&num_components, nullptr);
+    if (result == ZE_RESULT_SUCCESS) {
+        zel_component_version_t* versions = new zel_component_version_t[num_components];
+        result = zelLoaderGetVersions(&num_components, versions);
+
+        if (result == ZE_RESULT_SUCCESS) {
+            for (size_t i = 0; i < num_components; ++i) {
+                if (strncmp(versions[i].component_name, "loader", strlen("loader")) == 0) {
+                    loader_version = versions[i].component_lib_version;
+
+                    log.debug("ZeroInitStructsHolder - ze_loader.dll version: %d.%d.%d",
+                              loader_version.major,
+                              loader_version.minor,
+                              loader_version.patch);
+                }
+            }
+        }
 
-    std::vector<ze_driver_handle_t> all_drivers(drivers);
-    THROW_ON_FAIL_FOR_LEVELZERO("zeDriverGet", zeDriverGet(&drivers, all_drivers.data()));
+        delete[] versions;
+    }
 
-    // Get our target driver
-    driver_properties.stype = ZE_STRUCTURE_TYPE_DRIVER_PROPERTIES;
-    log.debug("ZeroInitStructsHolder - setting driver properties to ZE_STRUCTURE_TYPE_DRIVER_PROPERTIES");
-    for (uint32_t i = 0; i < drivers; ++i) {
-        zeDriverGetProperties(all_drivers[i], &driver_properties);
+    if (loader_version.major > 1 || (loader_version.major == 1 && loader_version.minor > 18) ||
+        (loader_version.major == 1 && loader_version.minor == 18 && loader_version.patch >= 5)) {
+        uint32_t drivers_count = 0;
+        ze_init_driver_type_desc_t desc = {};
+        desc.flags = ZE_INIT_DRIVER_TYPE_FLAG_NPU;
+        auto result = zeInitDrivers(&drivers_count, nullptr, &desc);
+        if (result != ZE_RESULT_SUCCESS) {
+            fallbackToZeDriverGet();
+            return;
+        }
 
-        if (memcmp(&driver_properties.uuid, &uuid, sizeof(uuid)) == 0) {
-            driver_handle = all_drivers[i];
-            break;
+        std::vector<ze_driver_handle_t> all_drivers(drivers_count);
+        result = zeInitDrivers(&drivers_count, all_drivers.data(), &desc);
+        if (result != ZE_RESULT_SUCCESS) {
+            fallbackToZeDriverGet();
+            return;
         }
+
+        // Get our target driver
+        setNpuDriver(drivers_count, std::move(all_drivers));
+
+        return;
     }
-    if (driver_handle == nullptr) {
-        OPENVINO_THROW("zeDriverGet failed to return NPU driver");
-    }
+
+    fallbackToZeDriverGet();
+}
+
+ZeroInitStructsHolder::ZeroInitStructsHolder() : log("NPUZeroInitStructsHolder", Logger::global().level()) {
+    log.debug("ZeroInitStructsHolder - performing zeInit on NPU only");
+    THROW_ON_FAIL_FOR_LEVELZERO("zeInit", zeInit(ZE_INIT_FLAG_VPU_ONLY));
+
+    log.debug("ZeroInitStructsHolder - initialize NPU Driver");
+    initNpuDriver();
 
     // Check L0 API version
     THROW_ON_FAIL_FOR_LEVELZERO("zeDriverGetApiVersion", zeDriverGetApiVersion(driver_handle, &ze_drv_api_version));
diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
index 858e65d4b5e6ee..d95b0e172a7d64 100644
--- a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
+++ b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
@@ -24,9 +24,11 @@ EventPool::~EventPool() {
     }
 }
 
-Event::Event(const ze_event_pool_handle_t& event_pool, uint32_t event_index) : _log("Event", Logger::global().level()) {
+Event::Event(const std::shared_ptr<EventPool>& event_pool, uint32_t event_index)
+    : _event_pool(event_pool),
+      _log("Event", Logger::global().level()) {
     ze_event_desc_t event_desc = {ZE_STRUCTURE_TYPE_EVENT_DESC, nullptr, event_index, 0, 0};
-    THROW_ON_FAIL_FOR_LEVELZERO("zeEventCreate", zeEventCreate(event_pool, &event_desc, &_handle));
+    THROW_ON_FAIL_FOR_LEVELZERO("zeEventCreate", zeEventCreate(_event_pool->handle(), &event_desc, &_handle));
 }
 void Event::AppendSignalEvent(CommandList& command_list) const {
     THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendSignalEvent",
diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp
index 5d023fe9d0bee6..e4a49ce9b7ccdb 100644
--- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp
+++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp
@@ -19,6 +19,12 @@ INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTest,
                                             ::testing::ValuesIn(configsInferRequestRunTests)),
                          InferRequestRunTests::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest,
+                         RunSeqTests,
+                         ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU),
+                                            ::testing::ValuesIn(configsInferRequestRunTests)),
+                         InferRequestRunTests::getTestCaseName);
+
 const std::vector<ov::AnyMap> batchingConfigs = {
     {ov::log::level(ov::log::Level::WARNING), ov::intel_npu::batch_mode(ov::intel_npu::BatchMode::PLUGIN)},
     {ov::log::level(ov::log::Level::WARNING), ov::intel_npu::batch_mode(ov::intel_npu::BatchMode::COMPILER)},
@@ -29,3 +35,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest,
                          ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU),
                                             ::testing::ValuesIn(batchingConfigs)),
                          InferRequestRunTests::getTestCaseName);
+
+INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest,
+                         BatchingRunSeqTests,
+                         ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU),
+                                            ::testing::ValuesIn(batchingConfigs)),
+                         InferRequestRunTests::getTestCaseName);
diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp
index 20be5ed25edd27..07466677b9d547 100644
--- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp
+++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp
@@ -103,9 +103,7 @@ class InferRequestRunTests : public ov::test::behavior::OVPluginTestBase,
         APIBaseTest::TearDown();
     }
 
-    std::shared_ptr<ov::Model> createBatchingModel(element::Type type,
-                                                   const PartialShape& shape,
-                                                   const ov::Layout& layout) {
+    std::shared_ptr<ov::Model> createModel(element::Type type, const PartialShape& shape, const ov::Layout& layout) {
         ResultVector res;
         ParameterVector params;
 
@@ -352,7 +350,7 @@ TEST_P(BatchingRunTests, CheckBatchingSupportInfer) {
 
     ov::InferRequest inference_request;
     auto batch_shape = Shape{4, 2, 32, 32};
-    std::shared_ptr<ov::Model> ov_model_batch = createBatchingModel(element::f32, batch_shape, "N...");
+    std::shared_ptr<ov::Model> ov_model_batch = createModel(element::f32, batch_shape, "N...");
 
     OV_ASSERT_NO_THROW(compiled_model = core->compile_model(ov_model_batch, target_device, configuration));
     OV_ASSERT_NO_THROW(inference_request = compiled_model.create_infer_request());
@@ -365,7 +363,7 @@ TEST_P(BatchingRunTests, CheckBatchingSupportAsync) {
 
     ov::InferRequest inference_request;
     auto batch_shape = Shape{4, 2, 32, 32};
-    std::shared_ptr<ov::Model> ov_model_batch = createBatchingModel(element::f32, batch_shape, "N...");
+    std::shared_ptr<ov::Model> ov_model_batch = createModel(element::f32, batch_shape, "N...");
 
     OV_ASSERT_NO_THROW(compiled_model = core->compile_model(ov_model_batch, target_device, configuration));
     OV_ASSERT_NO_THROW(inference_request = compiled_model.create_infer_request());
@@ -396,7 +394,7 @@ TEST_P(BatchingRunTests, UseCompilerBatchingErrorPluginBatching) {
 TEST_P(BatchingRunTests, SetInputTensorInfer) {
     auto batch_shape = Shape{4, 2, 2, 2};
     auto shape_size = ov::shape_size(batch_shape);
-    auto model = createBatchingModel(element::f32, batch_shape, "N...");
+    auto model = createModel(element::f32, batch_shape, "N...");
     float* buffer = new float[shape_size];
 
     compiled_model = core->compile_model(model, target_device, configuration);
@@ -422,7 +420,7 @@ TEST_P(BatchingRunTests, SetInputTensorInfer) {
 TEST_P(BatchingRunTests, SetInputTensorAsync) {
     auto batch_shape = Shape{4, 2, 2, 2};
     auto shape_size = ov::shape_size(batch_shape);
-    auto model = createBatchingModel(element::f32, batch_shape, "N...");
+    auto model = createModel(element::f32, batch_shape, "N...");
     float* buffer = new float[shape_size];
 
     compiled_model = core->compile_model(model, target_device, configuration);
@@ -449,7 +447,7 @@ TEST_P(BatchingRunTests, SetInputTensorAsync) {
 TEST_P(BatchingRunTests, SetInputTensorInfer_Caching) {
     auto batch_shape = Shape{4, 2, 2, 2};
     auto shape_size = ov::shape_size(batch_shape);
-    auto model = createBatchingModel(element::f32, batch_shape, "N...");
+    auto model = createModel(element::f32, batch_shape, "N...");
     float* buffer = new float[shape_size];
 
     m_cache_dir = generateCacheDirName(GetTestName());
@@ -480,7 +478,7 @@ TEST_P(BatchingRunTests, SetInputTensorInfer_Caching) {
 TEST_P(BatchingRunTests, CheckTwoRunsInfer) {
     auto batch_shape = Shape{4, 2, 2, 2};
     auto shape_size = ov::shape_size(batch_shape);
-    auto model = createBatchingModel(element::f32, batch_shape, "N...");
+    auto model = createModel(element::f32, batch_shape, "N...");
     float* buffer = new float[shape_size];
 
     auto context = core->get_default_context(target_device);
@@ -524,6 +522,250 @@ TEST_P(BatchingRunTests, CheckTwoRunsInfer) {
     delete[] buffer;
 }
 
+using RunSeqTests = InferRequestRunTests;
+
+TEST_P(RunSeqTests, CheckMultipleRunsSeq0) {
+    auto shape = Shape{1, 64, 64, 256};
+    auto shape_size = ov::shape_size(shape);
+    auto model = createModel(element::f32, shape, "N...");
+
+    auto context = core->get_default_context(target_device);
+
+    configuration[ov::intel_npu::run_inferences_sequentially.name()] = true;
+    configuration[ov::intel_npu::tiles.name()] = 2;
+    compiled_model = core->compile_model(model, target_device, configuration);
+
+    const uint32_t inferences = 32;
+    std::array<ov::InferRequest, inferences> inference_request;
+    ov::Tensor input_tensor;
+    std::array<ov::Tensor, inferences> output_tensor;
+
+    input_tensor = context.create_host_tensor(ov::element::f32, shape);
+    for (uint32_t i = 0; i < inferences; i++) {
+        inference_request[i] = compiled_model.create_infer_request();
+        output_tensor[i] = context.create_host_tensor(ov::element::f32, shape);
+    }
+
+    inference_request[0].set_input_tensor(input_tensor);
+    inference_request[0].set_output_tensor(output_tensor[0]);
+
+    const uint32_t runs = 10;
+    for (uint32_t z = 0; z < runs; z++) {
+        auto* input_data = reinterpret_cast<float*>(input_tensor.data());
+        for (size_t i = 0; i < shape_size; ++i) {
+            input_data[i] = static_cast<float>(z);
+        }
+
+        inference_request[0].start_async();  // Adds '1' to each element
+
+        for (uint32_t i = 1; i < inferences; i++) {
+            inference_request[i].set_input_tensor(output_tensor[i - 1]);
+            inference_request[i].set_output_tensor(output_tensor[i]);
+
+            inference_request[i].start_async();  // Adds '1' to each element
+        }
+
+        inference_request[inferences - 1].wait();
+
+        float expected_result = static_cast<float>(z) + 1.f;
+
+        for (uint32_t i = 0; i < inferences; i++) {
+            auto* output_tensor_data = reinterpret_cast<float*>(output_tensor[i].data());
+            for (size_t j = 0; j < shape_size; ++j) {
+                EXPECT_NEAR(output_tensor_data[j], expected_result, 1e-5)
+                    << "Run=" << z << "Output=" << i << " Expected=" << expected_result
+                    << ", actual=" << output_tensor_data[j] << " for index " << j;
+            }
+            expected_result++;
+        }
+    }
+}
+
+TEST_P(RunSeqTests, CheckMultipleRunsSeq1) {
+    auto shape = Shape{1, 64, 64, 256};
+    auto shape_size = ov::shape_size(shape);
+    auto model = createModel(element::f32, shape, "N...");
+
+    auto context = core->get_default_context(target_device);
+
+    configuration[ov::intel_npu::run_inferences_sequentially.name()] = true;
+    configuration[ov::intel_npu::tiles.name()] = 2;
+    compiled_model = core->compile_model(model, target_device, configuration);
+
+    const int inferences = 32;
+    std::array<ov::InferRequest, inferences> inference_request;
+    ov::Tensor input_tensor;
+    std::array<ov::Tensor, inferences> output_tensor;
+
+    input_tensor = context.create_host_tensor(ov::element::f32, shape);
+
+    for (int i = 0; i < inferences; i++) {
+        inference_request[i] = compiled_model.create_infer_request();
+        output_tensor[i] = context.create_host_tensor(ov::element::f32, shape);
+    }
+
+    inference_request[inferences - 1].set_input_tensor(input_tensor);
+    inference_request[inferences - 1].set_output_tensor(output_tensor[inferences - 1]);
+
+    const int runs = 10;
+    for (int z = 0; z < runs; z++) {
+        auto* input_data = reinterpret_cast<float*>(input_tensor.data());
+        for (size_t i = 0; i < shape_size; ++i) {
+            input_data[i] = static_cast<float>(z);
+        }
+
+        inference_request[inferences - 1].start_async();  // Adds '1' to each element
+
+        for (int i = inferences - 2; i >= 0; i--) {
+            inference_request[i].set_input_tensor(output_tensor[i + 1]);
+            inference_request[i].set_output_tensor(output_tensor[i]);
+
+            inference_request[i].start_async();  // Adds '1' to each element
+        }
+
+        inference_request[0].wait();
+
+        float expected_result = static_cast<float>(z) + 1.f;
+
+        for (int i = inferences - 1; i >= 0; i--) {
+            auto* output_tensor_data = reinterpret_cast<float*>(output_tensor[i].data());
+            for (size_t j = 0; j < shape_size; ++j) {
+                EXPECT_NEAR(output_tensor_data[j], expected_result, 1e-5)
+                    << "Run=" << z << "Output=" << i << " Expected=" << expected_result
+                    << ", actual=" << output_tensor_data[j] << " for index " << j;
+            }
+            expected_result++;
+        }
+    }
+}
+
+TEST_P(RunSeqTests, CheckMultipleRunsSeq2) {
+    auto shape = Shape{1, 64, 64, 256};
+    auto shape_size = ov::shape_size(shape);
+    auto model = createModel(element::f32, shape, "N...");
+
+    auto context = core->get_default_context(target_device);
+
+    configuration[ov::intel_npu::run_inferences_sequentially.name()] = true;
+    configuration[ov::intel_npu::tiles.name()] = 2;
+    compiled_model = core->compile_model(model, target_device, configuration);
+
+    const int inferences = 32;
+    std::array<ov::InferRequest, inferences> inference_request;
+    ov::Tensor input_tensor;
+    std::array<ov::Tensor, inferences> output_tensor;
+
+    input_tensor = context.create_host_tensor(ov::element::f32, shape);
+
+    for (int i = 0; i < inferences; i++) {
+        inference_request[i] = compiled_model.create_infer_request();
+        output_tensor[i] = context.create_host_tensor(ov::element::f32, shape);
+    }
+
+    inference_request[inferences - 1].set_input_tensor(input_tensor);
+    inference_request[inferences - 1].set_output_tensor(output_tensor[inferences - 1]);
+
+    auto* input_data = reinterpret_cast<float*>(input_tensor.data());
+    for (size_t i = 0; i < shape_size; ++i) {
+        input_data[i] = 1.f;
+    }
+
+    inference_request[inferences - 1].start_async();
+
+    for (int i = inferences - 2; i >= 0; i--) {
+        inference_request[i].set_input_tensor(output_tensor[i + 1]);
+        inference_request[i].set_output_tensor(output_tensor[i]);
+
+        inference_request[i].start_async();
+    }
+
+    inference_request[0].wait();
+
+    try {
+        inference_request[5].start_async();
+        inference_request[5].wait();
+    } catch (const std::exception& ex) {
+        ASSERT_FALSE(false) << ex.what();
+        return;
+    }
+
+    ASSERT_FALSE(true) << "Exception is expected but it didn't throw any exception!";
+}
+
+TEST_P(RunSeqTests, CheckMultipleRunsSeq3) {
+    auto shape = Shape{1, 64, 64, 256};
+    auto model = createModel(element::f32, shape, "N...");
+
+    configuration[ov::intel_npu::run_inferences_sequentially.name()] = true;
+    configuration[ov::intel_npu::tiles.name()] = 2;
+    compiled_model = core->compile_model(model, target_device, configuration);
+    ov::InferRequest inference_request;
+    inference_request = compiled_model.create_infer_request();
+
+    OV_EXPECT_THROW(inference_request.infer(),
+                    ov::Exception,
+                    HasSubstr("Only start async is supported when RUN_INFERENCES_SEQUENTIALLY is enabled!"));
+}
+
+using BatchingRunSeqTests = InferRequestRunTests;
+
+TEST_P(BatchingRunSeqTests, CheckMultipleBatchingRunsSeq) {
+    auto shape = Shape{4, 2, 64, 64};
+    auto shape_size = ov::shape_size(shape);
+    auto model = createModel(element::f32, shape, "N...");
+
+    auto context = core->get_default_context(target_device);
+
+    configuration[ov::intel_npu::run_inferences_sequentially.name()] = true;
+    configuration[ov::intel_npu::tiles.name()] = 2;
+    compiled_model = core->compile_model(model, target_device, configuration);
+
+    const uint32_t inferences = 32;
+    std::array<ov::InferRequest, inferences> inference_request;
+    ov::Tensor input_tensor;
+    std::array<ov::Tensor, inferences> output_tensor;
+
+    input_tensor = context.create_host_tensor(ov::element::f32, shape);
+    for (uint32_t i = 0; i < inferences; i++) {
+        inference_request[i] = compiled_model.create_infer_request();
+        output_tensor[i] = context.create_host_tensor(ov::element::f32, shape);
+    }
+
+    inference_request[0].set_input_tensor(input_tensor);
+    inference_request[0].set_output_tensor(output_tensor[0]);
+
+    const uint32_t runs = 10;
+    for (uint32_t z = 0; z < runs; z++) {
+        auto* input_data = reinterpret_cast<float*>(input_tensor.data());
+        for (size_t i = 0; i < shape_size; ++i) {
+            input_data[i] = static_cast<float>(z);
+        }
+
+        inference_request[0].start_async();  // Adds '1' to each element
+
+        for (uint32_t i = 1; i < inferences; i++) {
+            inference_request[i].set_input_tensor(output_tensor[i - 1]);
+            inference_request[i].set_output_tensor(output_tensor[i]);
+
+            inference_request[i].start_async();  // Adds '1' to each element
+        }
+
+        inference_request[inferences - 1].wait();
+
+        float expected_result = static_cast<float>(z) + 1.f;
+
+        for (uint32_t i = 0; i < inferences; i++) {
+            auto* output_tensor_data = reinterpret_cast<float*>(output_tensor[i].data());
+            for (size_t j = 0; j < shape_size; ++j) {
+                EXPECT_NEAR(output_tensor_data[j], expected_result, 1e-5)
+                    << "Run=" << z << "Output=" << i << " Expected=" << expected_result
+                    << ", actual=" << output_tensor_data[j] << " for index " << j;
+            }
+            expected_result++;
+        }
+    }
+}
+
 }  // namespace behavior
 }  // namespace test
 }  // namespace ov