From 228e7ebb899a8558387da380b7f98920e68d87d0 Mon Sep 17 00:00:00 2001
From: DariaMityagina <daria.mityagina@intel.com>
Date: Wed, 3 Sep 2025 17:25:52 +0000
Subject: [PATCH 01/14] Investigate refactoring opportunities for batch
 management in Plugin and Compiler - no metadata changes

---
 .../src/backend/src/zero_infer_request.cpp    |  14 +-
 .../intel_npu/src/plugin/src/plugin.cpp       | 137 +++++++++++++++++-
 2 files changed, 132 insertions(+), 19 deletions(-)
diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
index 3d2df2bbf05212..1ed747b5a3654b 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
@@ -80,10 +80,6 @@ std::optional<size_t> determine_dynamic_batch_size(const IODescriptor& desc,
         return std::nullopt;
     }
 
-    if (!desc.shapeFromIRModel.has_value() || !desc.shapeFromIRModel.value().is_dynamic()) {
-        return std::nullopt;
-    }
-
     if (batchSize.has_value()) {
         return batchSize.value();
     }
@@ -92,11 +88,7 @@ std::optional<size_t> determine_dynamic_batch_size(const IODescriptor& desc,
         return std::nullopt;
     }
 
-    if ((*desc.shapeFromIRModel)[intel_npu::utils::BATCH_AXIS].is_dynamic()) {
-        return tensor->get_shape()[intel_npu::utils::BATCH_AXIS];
-    }
-
-    return std::nullopt;
+    return tensor->get_shape()[intel_npu::utils::BATCH_AXIS];
 }
 
 }  // namespace
@@ -788,8 +780,8 @@ void ZeroInferRequest::infer_async() {
                                   copied_bytes_from_user,
                                   get_level_zero_input(inputIndex)->get_byte_size());
                 }
-                OPENVINO_ASSERT(get_level_zero_input(inputIndex)->get_byte_size() == copied_bytes_from_user,
-                                "Bytes copied must be equal");
+                // OPENVINO_ASSERT(get_level_zero_input(inputIndex)->get_byte_size() == copied_bytes_from_user,
+                //                 "Bytes copied must be equal");
             }
 
             ++inputIndex;
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index a40740e5e9c748..9cd9580f6da054 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -539,9 +539,110 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument
     return _properties->get_property(name, npu_plugin_properties);
 }
 
+bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger logger) {
+    std::set<ov::Output<const ov::Node>> batchedInputs;
+    std::set<ov::Output<const ov::Node>> batchedOutputs;
+    std::set<size_t> sBatchSize;
+
+    const auto& params = model->get_parameters();
+    for (size_t input_id = 0; input_id < params.size(); input_id++) {
+        const auto& input = params[input_id];
+        const auto& shape = input->get_partial_shape();
+        ov::Layout layout = ov::layout::get_layout(input);
+
+        // Batching on plugin is working only when batching is found on 0th dimension
+        if ((shape.size() && shape[0].get_max_length() > 1) ||
+            (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == 0)) {
+            const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : input->get_shape();
+            batchedInputs.insert(params[input_id]->output(0));
+
+            if (shape.rank().is_dynamic()) {
+                OPENVINO_THROW("Shapes with dynamic rank are not supported.");
+            } else {
+                sBatchSize.insert(staticShape[0]);
+            }
+        } else {
+            // gather some diagnostic info
+            std::optional<size_t> batch_dim_index_detected;
+            for (size_t i = 1; i < shape.size(); i++) {
+                if (shape[i].has_symbol()) {
+                    batch_dim_index_detected = i;
+                    break;
+                }
+            }
+            std::stringstream sstream;
+            sstream << "Only networks with inputs batched by 0th dimension are supported. ";
+            if (batch_dim_index_detected.has_value()) {
+                sstream << "The batch has been detected on: " << batch_dim_index_detected.value()
+                        << " dimension instead. ";
+            } else {
+                sstream << "The batch hasn't been detected at all. ";
+            }
+            sstream << "Please check input id: " << input_id << " by the name: " << input->get_friendly_name()
+                    << ", layout: " << layout.to_string() << ", is_dynamic: " << shape.is_dynamic();
+            logger.info("%s", sstream.str());
+            return false;
+        }
+    }
+    for (const auto& output : model->get_results()) {
+        const auto& shape = output->get_output_partial_shape(0);
+        ov::Layout layout = ov::layout::get_layout(output);
+
+        // Batching on plugin is working only when batching is found on 0th dimension
+        if ((shape.size() && shape[0].get_max_length() > 1) ||
+            (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == 0)) {
+            const auto& node = output->input_value(0);
+            const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : output->get_shape();
+            batchedOutputs.insert(ov::Output<const ov::Node>(node.get_node(), node.get_index()));
+
+            if (shape.rank().is_dynamic()) {
+                OPENVINO_THROW("Shapes with dynamic rank are not supported.");
+            } else {
+                sBatchSize.insert(staticShape[0]);
+            }
+        } else {
+            logger.info("Only networks with outputs batched by 0th dimension are supported. Please check an output by "
+                        "the name: %s, layout: %s",
+                        output->get_friendly_name(),
+                        layout.to_string());
+            return false;
+        }
+    }
+    if (!batchedInputs.size() || !batchedOutputs.size()) {
+        logger.info(
+            "Only networks with inputs/outputs featuring batched dim are supported! Got inputs: %ld, outputs: %ld",
+            batchedInputs.size(),
+            batchedOutputs.size());
+        return false;
+    }
+
+    if (sBatchSize.size() != 1) {
+        logger.info("Batching size shall have same value for all tensors! Got unique batch sizes number: %ld",
+                    sBatchSize.size());
+        return false;
+    }
+
+    auto node_info_printer = [&logger](const auto& ov_node, std::string nodeType) {
+        logger.info("%s: %s has shape value: %s",
+                    nodeType,
+                    ov_node.get_any_name(),
+                    ov_node.get_partial_shape().to_string());
+    };
+
+    for (const auto& ov_node : batchedInputs) {
+        node_info_printer(ov_node, "Input");
+    }
+    for (const auto& ov_node : batchedOutputs) {
+        node_info_printer(ov_node, "Output");
+    }
+
+    return true;
+}
+
 std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<const ov::Model>& model,
                                                           const ov::AnyMap& properties) const {
     OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::compile_model");
+    auto modelForCompilation = model->clone();
 
     // Before going any further: if
     // ... 1 - NPUW mode is activated
@@ -589,11 +690,16 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     auto device = _backend == nullptr ? nullptr : _backend->getDevice(localConfig.get<DEVICE_ID>());
     localConfig.update({{ov::intel_npu::platform.name(), platform}});
 
-    if (localConfig.isAvailable(ov::intel_npu::batch_mode.name()) &&
-        !localConfig.has(ov::intel_npu::batch_mode.name())) {
+    auto updateBatchMode = [&](ov::intel_npu::BatchMode mode) {
         std::stringstream strStream;
-        strStream << ov::intel_npu::BatchMode::AUTO;
+        strStream << mode;
+        _logger.info("Setting batching mode to %s.", strStream.str());
         localConfig.update({{ov::intel_npu::batch_mode.name(), strStream.str()}});
+    };
+
+    if (localConfig.isAvailable(ov::intel_npu::batch_mode.name()) &&
+        !localConfig.has(ov::intel_npu::batch_mode.name())) {
+        updateBatchMode(ov::intel_npu::BatchMode::AUTO);
     }
 
     if (localConfig.isAvailable(ov::intel_npu::batch_mode.name()) && !model->get_variables().empty()) {
@@ -601,9 +707,24 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
             OPENVINO_THROW("This model contains states, thus it is not supported when handling batching on the plugin");
         }
 
-        std::stringstream strStream;
-        strStream << ov::intel_npu::BatchMode::COMPILER;
-        localConfig.update({{ov::intel_npu::batch_mode.name(), strStream.str()}});
+        updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
+    }
+
+    if (localConfig.isAvailable(ov::intel_npu::batch_mode.name())) {
+        bool autoOrPluginBatch = localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN ||
+                                 localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::AUTO;
+        bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger);
+        if (autoOrPluginBatch && pluginBatchingIsSupported) {
+            try {
+                _logger.info("Attempting to handle batching on the plugin side.");
+                ov::set_batch(modelForCompilation, 1);
+            } catch (const std::exception& ex) {
+                _logger.info("Couldn't reshape the model. Batching will be handed by compiler.", ex.what());
+            }
+            updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
+        } else {
+            _logger.info("Unable to manage batching on the plugin side, so the compiler will take care of it.");
+        }
     }
 
     // Update stepping w/ information from driver, unless provided by user or we are off-device
@@ -654,10 +775,10 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
         _logger.debug("performing compile");
 
         if (!localConfig.get<WEIGHTLESS_BLOB>()) {
-            graph = compiler->compile(model->clone(), localConfig);
+            graph = compiler->compile(modelForCompilation->clone(), localConfig);
         } else {
             check_weightless_cache_attribute_occurrence(model);
-            graph = compiler->compileWS(model->clone(), localConfig);
+            graph = compiler->compileWS(modelForCompilation->clone(), localConfig);
         }
     } catch (const std::exception& ex) {
         OPENVINO_THROW(ex.what());

From 09307ccbab62e7ee64525ea407588163985aa813 Mon Sep 17 00:00:00 2001
From: DariaMityagina <daria.mityagina@intel.com>
Date: Wed, 3 Sep 2025 22:06:46 +0000
Subject: [PATCH 02/14] Investigate refactoring opportunities for batch
 management in Plugin and Compiler - no metadata changes - fix static tests

---
 .../intel_npu/src/backend/src/zero_infer_request.cpp     | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
index 1ed747b5a3654b..e23f6b0595c193 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
@@ -80,12 +80,13 @@ std::optional<size_t> determine_dynamic_batch_size(const IODescriptor& desc,
         return std::nullopt;
     }
 
-    if (batchSize.has_value()) {
-        return batchSize.value();
+    // Make sure that PLUGIN batch mode is currently active
+    if (*desc.shapeFromCompiler.begin() != intel_npu::utils::DEFAULT_BATCH_SIZE) {
+        return std::nullopt;
     }
 
-    if (tensor->get_shape().empty() || *desc.shapeFromCompiler.begin() != intel_npu::utils::DEFAULT_BATCH_SIZE) {
-        return std::nullopt;
+    if (batchSize.has_value()) {
+        return batchSize.value();
     }
 
     return tensor->get_shape()[intel_npu::utils::BATCH_AXIS];

From 35760fe274cd95d878d712dca907d7a00de7eefc Mon Sep 17 00:00:00 2001
From: DariaMityagina <daria.mityagina@intel.com>
Date: Tue, 9 Sep 2025 22:59:25 +0000
Subject: [PATCH 03/14] Investigate refactoring opportunities for batch
 management in Plugin and Compiler - fix BA issues - treat every model with
 batch 1 as a potentially dynamically batched one

---
 .../intel_npu/src/plugin/src/plugin.cpp       | 40 ++++++++++++++-----
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 9cd9580f6da054..1465bd5a584a0b 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -63,10 +63,17 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
             continue;
         }
 
+        auto shape = inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel
+                                                                  : inputDescriptor.shapeFromCompiler;
+        // Treat every model with batch 1 as a potentially dynamically batched one.
+        // TODO: should we protect this part with a certain condition?
+        if (shape[intel_npu::utils::BATCH_AXIS] == intel_npu::utils::DEFAULT_BATCH_SIZE) {
+            shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1);
+        }
+
         std::shared_ptr<ov::op::v0::Parameter> parameter = std::make_shared<ov::op::v0::Parameter>(
             inputDescriptor.precision,
-            inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel
-                                                         : inputDescriptor.shapeFromCompiler);
+            shape);
 
         parameter->set_friendly_name(inputDescriptor.nodeFriendlyName);
         parameter->output(0).get_tensor().set_names(inputDescriptor.outputTensorNames);
@@ -86,10 +93,16 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
         std::shared_ptr<ov::Node> constantDummy =
             std::make_shared<ov::op::v0::Constant>(outputDescriptor.precision, CONSTANT_NODE_DUMMY_SHAPE);
 
+        auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel
+                                                                   : outputDescriptor.shapeFromCompiler;
+        // Treat every model with batch 1 as a potentially dynamically batched one.
+        if (shape[intel_npu::utils::BATCH_AXIS] == intel_npu::utils::DEFAULT_BATCH_SIZE) {
+            shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1);
+        }
+
         const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy = std::make_shared<ov::descriptor::Tensor>(
             outputDescriptor.precision,
-            outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel
-                                                          : outputDescriptor.shapeFromCompiler,
+            shape,
             outputDescriptor.outputTensorNames);
 
         auto& result = results.emplace_back(std::make_shared<ov::op::v0::Result>(constantDummy));
@@ -713,17 +726,22 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     if (localConfig.isAvailable(ov::intel_npu::batch_mode.name())) {
         bool autoOrPluginBatch = localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN ||
                                  localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::AUTO;
-        bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger);
-        if (autoOrPluginBatch && pluginBatchingIsSupported) {
-            try {
+        try {
+            const bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger);
+            const bool batchedModel = ov::get_batch(modelForCompilation) != intel_npu::utils::DEFAULT_BATCH_SIZE;
+
+            if (autoOrPluginBatch && pluginBatchingIsSupported && batchedModel) {
                 _logger.info("Attempting to handle batching on the plugin side.");
                 ov::set_batch(modelForCompilation, 1);
-            } catch (const std::exception& ex) {
-                _logger.info("Couldn't reshape the model. Batching will be handed by compiler.", ex.what());
+                // TODO: add debatcher for more complicated cases as set_batch is pretty naive.
+            } else {
+                _logger.info("Unable to manage batching on the plugin side, so the compiler will take care of it.");
             }
+
+            updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
+        } catch (const std::exception& ex) {
+            _logger.info("Couldn't validate and reshape the model. Batching will be handed by compiler.", ex.what());
             updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
-        } else {
-            _logger.info("Unable to manage batching on the plugin side, so the compiler will take care of it.");
         }
     }
 

From 9e71e24bb582fb1c727581cd24e1b0060497903e Mon Sep 17 00:00:00 2001
From: DariaMityagina <daria.mityagina@intel.com>
Date: Wed, 10 Sep 2025 11:54:44 +0000
Subject: [PATCH 04/14] Investigate refactoring opportunities for batch
 management in Plugin and Compiler - validateModelBatch conditions

---
 .../intel_npu/src/backend/src/zero_infer_request.cpp      | 4 ++--
 src/plugins/intel_npu/src/plugin/src/plugin.cpp           | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
index e23f6b0595c193..055923159cd14c 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
@@ -781,8 +781,8 @@ void ZeroInferRequest::infer_async() {
                                   copied_bytes_from_user,
                                   get_level_zero_input(inputIndex)->get_byte_size());
                 }
-                // OPENVINO_ASSERT(get_level_zero_input(inputIndex)->get_byte_size() == copied_bytes_from_user,
-                //                 "Bytes copied must be equal");
+                OPENVINO_ASSERT(get_level_zero_input(inputIndex)->get_byte_size() == copied_bytes_from_user,
+                                "Bytes copied must be equal");
             }
 
             ++inputIndex;
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 1465bd5a584a0b..dbe0c413cd9c65 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -564,8 +564,8 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
         ov::Layout layout = ov::layout::get_layout(input);
 
         // Batching on plugin is working only when batching is found on 0th dimension
-        if ((shape.size() && shape[0].get_max_length() > 1) ||
-            (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == 0)) {
+        if ((shape.size() && shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) ||
+            (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) {
             const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : input->get_shape();
             batchedInputs.insert(params[input_id]->output(0));
 
@@ -602,8 +602,8 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
         ov::Layout layout = ov::layout::get_layout(output);
 
         // Batching on plugin is working only when batching is found on 0th dimension
-        if ((shape.size() && shape[0].get_max_length() > 1) ||
-            (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == 0)) {
+        if ((shape.size() && shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) ||
+            (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) {
             const auto& node = output->input_value(0);
             const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : output->get_shape();
             batchedOutputs.insert(ov::Output<const ov::Node>(node.get_node(), node.get_index()));

From c1eddb036124517b452bd972ceed1162568730db Mon Sep 17 00:00:00 2001
From: DariaMityagina <daria.mityagina@intel.com>
Date: Wed, 10 Sep 2025 14:23:18 +0000
Subject: [PATCH 05/14] Investigate refactoring opportunities for batch
 management in Plugin and Compiler - dynamic dims limitation

---
 .../intel_npu/src/plugin/src/plugin.cpp       | 39 ++++++++++++++++++-
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index dbe0c413cd9c65..365b0721d43375 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -552,11 +552,46 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument
     return _properties->get_property(name, npu_plugin_properties);
 }
 
+bool checkDynamicDims(const std::shared_ptr<const ov::Model>& model) {
+    // Check parameters (inputs)
+    for (const auto& param : model->get_parameters()) {
+        const auto& shape = param->get_partial_shape();
+
+        // Check all dimensions except the first one (batch dimension)
+        for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) {
+            if (shape[dim_idx].is_dynamic()) {
+                return true;  // Found dynamic dimension other than batch
+            }
+        }
+    }
+
+    // Check results (outputs)
+    for (const auto& result : model->get_results()) {
+        const auto& shape = result->get_output_partial_shape(0);
+
+        // Check all dimensions except the first one (batch dimension)
+        for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) {
+            if (shape[dim_idx].is_dynamic()) {
+                return true;  // Found dynamic dimension other than batch
+            }
+        }
+    }
+
+    return false;  // No dynamic dimensions found other than batch
+}
+
 bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger logger) {
     std::set<ov::Output<const ov::Node>> batchedInputs;
     std::set<ov::Output<const ov::Node>> batchedOutputs;
     std::set<size_t> sBatchSize;
 
+    // Limitation: Plugin batching is not supported when there are dynamic
+    // dimensions other than the batch dimension.
+    const bool otherDynamicDims = checkDynamicDims(model);
+    if (otherDynamicDims) {
+        return false;
+    }
+
     const auto& params = model->get_parameters();
     for (size_t input_id = 0; input_id < params.size(); input_id++) {
         const auto& input = params[input_id];
@@ -572,7 +607,7 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
             if (shape.rank().is_dynamic()) {
                 OPENVINO_THROW("Shapes with dynamic rank are not supported.");
             } else {
-                sBatchSize.insert(staticShape[0]);
+                sBatchSize.insert(staticShape[intel_npu::utils::BATCH_AXIS]);
             }
         } else {
             // gather some diagnostic info
@@ -611,7 +646,7 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
             if (shape.rank().is_dynamic()) {
                 OPENVINO_THROW("Shapes with dynamic rank are not supported.");
             } else {
-                sBatchSize.insert(staticShape[0]);
+                sBatchSize.insert(staticShape[intel_npu::utils::BATCH_AXIS]);
             }
         } else {
             logger.info("Only networks with outputs batched by 0th dimension are supported. Please check an output by "

From 5477207234a5fea13d3951709e148bb8cf07435a Mon Sep 17 00:00:00 2001
From: DariaMityagina <daria.mityagina@intel.com>
Date: Wed, 10 Sep 2025 19:09:22 +0000
Subject: [PATCH 06/14] Investigate refactoring opportunities for batch
 management in Plugin and Compiler - additional checks

---
 .../intel_npu/src/plugin/src/plugin.cpp       | 135 +++++++++++++-----
 1 file changed, 103 insertions(+), 32 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 365b0721d43375..73dc120f49bfc0 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -39,6 +39,104 @@ constexpr std::string_view WEIGHTS_EXTENSION = ".bin";
 constexpr std::string_view XML_EXTENSION = ".xml";
 constexpr std::string_view ONNX_EXTENSION = ".onnx";
 
+// Helper function to check if shape has dynamic dimensions other than batch dimension
+bool hasOtherDynamicDims(const ov::PartialShape& shape) {
+    for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) {
+        if (shape[dim_idx].is_dynamic()) {
+            return true;  // Found dynamic dimension other than batch
+        }
+    }
+    return false;
+}
+
+bool checkModelDynamicDims(const std::shared_ptr<const ov::Model>& model) {
+    // Check parameters (inputs)
+    const auto& params = model->get_parameters();
+    for (const auto& param : params) {
+        const auto& shape = param->get_partial_shape();
+        if (hasOtherDynamicDims(shape)) {
+            return true;
+        }
+    }
+
+    // Check results (outputs)
+    const auto& results = model->get_results();
+    for (const auto& result : results) {
+        const auto& shape = result->get_output_partial_shape(0);
+        if (hasOtherDynamicDims(shape)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+bool validateReshapedModel(const std::vector<IODescriptor>& inputDescriptors,
+                           const std::vector<IODescriptor>& outputDescriptors) {
+    std::set<size_t> batchSizes;
+    bool hasBatchedInputs = false;
+    bool hasBatchedOutputs = false;
+
+    // Check input descriptors
+    for (const IODescriptor& inputDescriptor : inputDescriptors) {
+        if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
+            inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) {
+            continue;
+        }
+
+        auto shape = inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel
+                                                                  : inputDescriptor.shapeFromCompiler;
+
+        // Check for dynamic dimensions other than batch dimension
+        if (hasOtherDynamicDims(shape)) {
+            return false;  // Plugin batching not supported with other dynamic dims
+        }
+
+        // Check if shape has batch dimension and if batch size equals DEFAULT_BATCH_SIZE
+        if (shape.size() > 0 &&
+            shape[intel_npu::utils::BATCH_AXIS].is_static() &&
+            shape[intel_npu::utils::BATCH_AXIS].get_length() == intel_npu::utils::DEFAULT_BATCH_SIZE) {
+
+            hasBatchedInputs = true;
+            batchSizes.insert(shape[intel_npu::utils::BATCH_AXIS].get_length());
+        }
+    }
+
+    // Check output descriptors
+    for (const IODescriptor& outputDescriptor : outputDescriptors) {
+        if (outputDescriptor.isStateInput || outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor ||
+            outputDescriptor.isInitOutputWeights) {
+            continue;
+        }
+
+        auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel
+                                                                   : outputDescriptor.shapeFromCompiler;
+
+        // Check for dynamic dimensions other than batch dimension
+        if (hasOtherDynamicDims(shape)) {
+            return false;  // Plugin batching not supported with other dynamic dims
+        }
+
+        // Check if shape has batch dimension and if batch size equals DEFAULT_BATCH_SIZE
+        if (shape.size() > 0 &&
+            shape[intel_npu::utils::BATCH_AXIS].is_static() &&
+            shape[intel_npu::utils::BATCH_AXIS].get_length() == intel_npu::utils::DEFAULT_BATCH_SIZE) {
+
+            hasBatchedOutputs = true;
+            batchSizes.insert(shape[intel_npu::utils::BATCH_AXIS].get_length());
+        }
+    }
+
+    // Plugin batching is applied if:
+    // 1. Both inputs and outputs have batched dimensions
+    // 2. All batch sizes are consistent (should be only DEFAULT_BATCH_SIZE)
+    // 3. The batch size is exactly DEFAULT_BATCH_SIZE (since we've already reshaped the model)
+    // 4. No other dynamic dimensions exist (checked above)
+    return hasBatchedInputs && hasBatchedOutputs &&
+           batchSizes.size() == 1 &&
+           *batchSizes.begin() == intel_npu::utils::DEFAULT_BATCH_SIZE;
+}
+
 /**
  * @brief Creates an "ov::Model" object which contains only the given "parameter" and "result" nodes.
  * @details Using an "ov::Model" object to create the "CompiledModel" is the preferred way of using the OV API.
@@ -57,6 +155,8 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
     ov::ParameterVector parameters;
     ov::ResultVector results;
 
+    bool pluginBatchingIsApplied = validateReshapedModel(inputDescriptors, outputDescriptors);
+
     for (const IODescriptor& inputDescriptor : inputDescriptors) {
         if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
             inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) {
@@ -67,7 +167,7 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
                                                                   : inputDescriptor.shapeFromCompiler;
         // Treat every model with batch 1 as a potentially dynamically batched one.
         // TODO: should we protect this part with a certain condition?
-        if (shape[intel_npu::utils::BATCH_AXIS] == intel_npu::utils::DEFAULT_BATCH_SIZE) {
+        if (pluginBatchingIsApplied) {
             shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1);
         }
 
@@ -96,7 +196,7 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
         auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel
                                                                    : outputDescriptor.shapeFromCompiler;
         // Treat every model with batch 1 as a potentially dynamically batched one.
-        if (shape[intel_npu::utils::BATCH_AXIS] == intel_npu::utils::DEFAULT_BATCH_SIZE) {
+        if (pluginBatchingIsApplied) {
             shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1);
         }
 
@@ -552,34 +652,6 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument
     return _properties->get_property(name, npu_plugin_properties);
 }
 
-bool checkDynamicDims(const std::shared_ptr<const ov::Model>& model) {
-    // Check parameters (inputs)
-    for (const auto& param : model->get_parameters()) {
-        const auto& shape = param->get_partial_shape();
-
-        // Check all dimensions except the first one (batch dimension)
-        for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) {
-            if (shape[dim_idx].is_dynamic()) {
-                return true;  // Found dynamic dimension other than batch
-            }
-        }
-    }
-
-    // Check results (outputs)
-    for (const auto& result : model->get_results()) {
-        const auto& shape = result->get_output_partial_shape(0);
-
-        // Check all dimensions except the first one (batch dimension)
-        for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) {
-            if (shape[dim_idx].is_dynamic()) {
-                return true;  // Found dynamic dimension other than batch
-            }
-        }
-    }
-
-    return false;  // No dynamic dimensions found other than batch
-}
-
 bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger logger) {
     std::set<ov::Output<const ov::Node>> batchedInputs;
     std::set<ov::Output<const ov::Node>> batchedOutputs;
@@ -587,8 +659,7 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
 
     // Limitation: Plugin batching is not supported when there are dynamic
     // dimensions other than the batch dimension.
-    const bool otherDynamicDims = checkDynamicDims(model);
-    if (otherDynamicDims) {
+    if (checkModelDynamicDims(model)) {
         return false;
     }
 

From ef0744d5e6450eca9a905902b23114bc4a3065b4 Mon Sep 17 00:00:00 2001
From: DariaMityagina <daria.mityagina@intel.com>
Date: Mon, 22 Sep 2025 22:07:19 +0000
Subject: [PATCH 07/14] Investigate refactoring opportunities for batch
 management in Plugin and Compiler - simplify

---
 .../src/common/src/sync_infer_request.cpp     |  18 +-
 .../intel_npu/src/plugin/src/plugin.cpp       | 197 +++++++-----------
 2 files changed, 82 insertions(+), 133 deletions(-)

diff --git a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp
index d3eed4e7357005..b17db067726456 100644
--- a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp
+++ b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp
@@ -214,7 +214,8 @@ void SyncInferRequest::check_tensor(const ov::Output<const ov::Node>& port,
         }
     }
 
-    OPENVINO_ASSERT(is_dynamic || port.get_shape() == tensor->get_shape(),
+    OPENVINO_ASSERT(is_dynamic || port.get_shape() == tensor->get_shape() ||
+                        tensor->get_shape()[utils::BATCH_AXIS] % port.get_shape()[utils::BATCH_AXIS] == 0,
                     "The ",
                     tensor_type,
                     " tensor size is not equal to the model ",
@@ -274,13 +275,14 @@ void SyncInferRequest::check_batched_tensors(const ov::Output<const ov::Node>& p
                         port.get_partial_shape());
         auto batch = port.get_partial_shape()[batch_idx];
 
-        OPENVINO_ASSERT(batch.is_dynamic() || batch.get_length() == tensors_size,
-                        "set_input_tensors/set_tensors error. Input shape ",
-                        port.get_partial_shape(),
-                        "batch ",
-                        batch,
-                        "doesn't match with total blobs count: ",
-                        tensors_size);
+        OPENVINO_ASSERT(
+            batch.is_dynamic() || batch.get_length() == tensors_size || tensors_size % batch.get_length() == 0,
+            "set_input_tensors/set_tensors error. Input shape ",
+            port.get_partial_shape(),
+            "batch ",
+            batch,
+            "doesn't match with total blobs count: ",
+            tensors_size);
     }
 
     auto batched_shape = tensors[utils::BATCH_AXIS]->get_shape();
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 73dc120f49bfc0..6c07325fcfb038 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -39,104 +39,6 @@ constexpr std::string_view WEIGHTS_EXTENSION = ".bin";
 constexpr std::string_view XML_EXTENSION = ".xml";
 constexpr std::string_view ONNX_EXTENSION = ".onnx";
 
-// Helper function to check if shape has dynamic dimensions other than batch dimension
-bool hasOtherDynamicDims(const ov::PartialShape& shape) {
-    for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) {
-        if (shape[dim_idx].is_dynamic()) {
-            return true;  // Found dynamic dimension other than batch
-        }
-    }
-    return false;
-}
-
-bool checkModelDynamicDims(const std::shared_ptr<const ov::Model>& model) {
-    // Check parameters (inputs)
-    const auto& params = model->get_parameters();
-    for (const auto& param : params) {
-        const auto& shape = param->get_partial_shape();
-        if (hasOtherDynamicDims(shape)) {
-            return true;
-        }
-    }
-
-    // Check results (outputs)
-    const auto& results = model->get_results();
-    for (const auto& result : results) {
-        const auto& shape = result->get_output_partial_shape(0);
-        if (hasOtherDynamicDims(shape)) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-bool validateReshapedModel(const std::vector<IODescriptor>& inputDescriptors,
-                           const std::vector<IODescriptor>& outputDescriptors) {
-    std::set<size_t> batchSizes;
-    bool hasBatchedInputs = false;
-    bool hasBatchedOutputs = false;
-
-    // Check input descriptors
-    for (const IODescriptor& inputDescriptor : inputDescriptors) {
-        if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
-            inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) {
-            continue;
-        }
-
-        auto shape = inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel
-                                                                  : inputDescriptor.shapeFromCompiler;
-
-        // Check for dynamic dimensions other than batch dimension
-        if (hasOtherDynamicDims(shape)) {
-            return false;  // Plugin batching not supported with other dynamic dims
-        }
-
-        // Check if shape has batch dimension and if batch size equals DEFAULT_BATCH_SIZE
-        if (shape.size() > 0 &&
-            shape[intel_npu::utils::BATCH_AXIS].is_static() &&
-            shape[intel_npu::utils::BATCH_AXIS].get_length() == intel_npu::utils::DEFAULT_BATCH_SIZE) {
-
-            hasBatchedInputs = true;
-            batchSizes.insert(shape[intel_npu::utils::BATCH_AXIS].get_length());
-        }
-    }
-
-    // Check output descriptors
-    for (const IODescriptor& outputDescriptor : outputDescriptors) {
-        if (outputDescriptor.isStateInput || outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor ||
-            outputDescriptor.isInitOutputWeights) {
-            continue;
-        }
-
-        auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel
-                                                                   : outputDescriptor.shapeFromCompiler;
-
-        // Check for dynamic dimensions other than batch dimension
-        if (hasOtherDynamicDims(shape)) {
-            return false;  // Plugin batching not supported with other dynamic dims
-        }
-
-        // Check if shape has batch dimension and if batch size equals DEFAULT_BATCH_SIZE
-        if (shape.size() > 0 &&
-            shape[intel_npu::utils::BATCH_AXIS].is_static() &&
-            shape[intel_npu::utils::BATCH_AXIS].get_length() == intel_npu::utils::DEFAULT_BATCH_SIZE) {
-
-            hasBatchedOutputs = true;
-            batchSizes.insert(shape[intel_npu::utils::BATCH_AXIS].get_length());
-        }
-    }
-
-    // Plugin batching is applied if:
-    // 1. Both inputs and outputs have batched dimensions
-    // 2. All batch sizes are consistent (should be only DEFAULT_BATCH_SIZE)
-    // 3. The batch size is exactly DEFAULT_BATCH_SIZE (since we've already reshaped the model)
-    // 4. No other dynamic dimensions exist (checked above)
-    return hasBatchedInputs && hasBatchedOutputs &&
-           batchSizes.size() == 1 &&
-           *batchSizes.begin() == intel_npu::utils::DEFAULT_BATCH_SIZE;
-}
-
 /**
  * @brief Creates an "ov::Model" object which contains only the given "parameter" and "result" nodes.
  * @details Using an "ov::Model" object to create the "CompiledModel" is the preferred way of using the OV API.
@@ -155,8 +57,6 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
     ov::ParameterVector parameters;
     ov::ResultVector results;
 
-    bool pluginBatchingIsApplied = validateReshapedModel(inputDescriptors, outputDescriptors);
-
     for (const IODescriptor& inputDescriptor : inputDescriptors) {
         if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
             inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) {
@@ -165,15 +65,9 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
 
         auto shape = inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel
                                                                   : inputDescriptor.shapeFromCompiler;
-        // Treat every model with batch 1 as a potentially dynamically batched one.
-        // TODO: should we protect this part with a certain condition?
-        if (pluginBatchingIsApplied) {
-            shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1);
-        }
 
-        std::shared_ptr<ov::op::v0::Parameter> parameter = std::make_shared<ov::op::v0::Parameter>(
-            inputDescriptor.precision,
-            shape);
+        std::shared_ptr<ov::op::v0::Parameter> parameter =
+            std::make_shared<ov::op::v0::Parameter>(inputDescriptor.precision, shape);
 
         parameter->set_friendly_name(inputDescriptor.nodeFriendlyName);
         parameter->output(0).get_tensor().set_names(inputDescriptor.outputTensorNames);
@@ -195,15 +89,11 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
 
         auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel
                                                                    : outputDescriptor.shapeFromCompiler;
-        // Treat every model with batch 1 as a potentially dynamically batched one.
-        if (pluginBatchingIsApplied) {
-            shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1);
-        }
 
-        const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy = std::make_shared<ov::descriptor::Tensor>(
-            outputDescriptor.precision,
-            shape,
-            outputDescriptor.outputTensorNames);
+        const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy =
+            std::make_shared<ov::descriptor::Tensor>(outputDescriptor.precision,
+                                                     shape,
+                                                     outputDescriptor.outputTensorNames);
 
         auto& result = results.emplace_back(std::make_shared<ov::op::v0::Result>(constantDummy));
         result->output(0).set_tensor_ptr(tensorDummy);
@@ -652,6 +542,38 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument
     return _properties->get_property(name, npu_plugin_properties);
 }
 
+// Helper function to check if shape has dynamic dimensions other than batch dimension
+bool hasOtherDynamicDims(const ov::PartialShape& shape) {
+    for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) {
+        if (shape[dim_idx].is_dynamic()) {
+            return true;  // Found dynamic dimension other than batch
+        }
+    }
+    return false;
+}
+
+bool checkModelDynamicDims(const std::shared_ptr<const ov::Model>& model) {
+    // Check parameters (inputs)
+    const auto& params = model->get_parameters();
+    for (const auto& param : params) {
+        const auto& shape = param->get_partial_shape();
+        if (hasOtherDynamicDims(shape)) {
+            return true;
+        }
+    }
+
+    // Check results (outputs)
+    const auto& results = model->get_results();
+    for (const auto& result : results) {
+        const auto& shape = result->get_output_partial_shape(0);
+        if (hasOtherDynamicDims(shape)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger logger) {
     std::set<ov::Output<const ov::Node>> batchedInputs;
     std::set<ov::Output<const ov::Node>> batchedOutputs;
@@ -670,7 +592,8 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
         ov::Layout layout = ov::layout::get_layout(input);
 
         // Batching on plugin is working only when batching is found on 0th dimension
-        if ((shape.size() && shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) ||
+        if ((shape.size() &&
+             shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) ||
             (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) {
             const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : input->get_shape();
             batchedInputs.insert(params[input_id]->output(0));
@@ -708,7 +631,8 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
         ov::Layout layout = ov::layout::get_layout(output);
 
         // Batching on plugin is working only when batching is found on 0th dimension
-        if ((shape.size() && shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) ||
+        if ((shape.size() &&
+             shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) ||
             (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) {
             const auto& node = output->input_value(0);
             const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : output->get_shape();
@@ -758,6 +682,21 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
     return true;
 }
 
+void deBatchModel(std::shared_ptr<ov::Model>& model, ov::Dimension newBatch) {
+    size_t inputIdx = 0;
+    std::map<std::string, ov::PartialShape> newShapes;
+    for (auto&& item : model->get_parameters()) {
+        auto layout = item->get_layout();
+        auto partShape = item->get_partial_shape();
+        if (ov::layout::has_batch(layout)) {
+            partShape[ov::layout::batch_idx(layout)] = newBatch;
+        }
+        newShapes.emplace(item->get_friendly_name(), partShape);
+        inputIdx++;
+    }
+    model->reshape(newShapes);
+}
+
 std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<const ov::Model>& model,
                                                           const ov::AnyMap& properties) const {
     OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::compile_model");
@@ -834,21 +773,29 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
                                  localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::AUTO;
         try {
             const bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger);
-            const bool batchedModel = ov::get_batch(modelForCompilation) != intel_npu::utils::DEFAULT_BATCH_SIZE;
 
-            if (autoOrPluginBatch && pluginBatchingIsSupported && batchedModel) {
+            if (autoOrPluginBatch && pluginBatchingIsSupported) {
                 _logger.info("Attempting to handle batching on the plugin side.");
-                ov::set_batch(modelForCompilation, 1);
+                try {
+                    ov::set_batch(modelForCompilation, ov::Dimension(1));
+                } catch (const std::exception& ex) {
+                    _logger.warning("The plugin couldn't resize a batched model due to exception: %s.\n"
+                                    "Trying to debatch it...",
+                                    ex.what());
+                    deBatchModel(modelForCompilation, ov::Dimension(1));
+                    if (!modelForCompilation) {
+                        OPENVINO_THROW("Cannot debatch a model");
+                    }
+                    _logger.info("The model has been debatched successfully");
+                }
                 // TODO: add debatcher for more complicated cases as set_batch is pretty naive.
             } else {
-                _logger.info("Unable to manage batching on the plugin side, so the compiler will take care of it.");
+                _logger.info("Batching will be handed by compiler.");
             }
-
-            updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
         } catch (const std::exception& ex) {
             _logger.info("Couldn't validate and reshape the model. Batching will be handed by compiler.", ex.what());
-            updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
         }
+        updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
     }
 
     // Update stepping w/ information from driver, unless provided by user or we are off-device

From 0d87ab3ddc7f9b497c867eed198fb7ce0b4c3a5d Mon Sep 17 00:00:00 2001
From: DariaMityagina <daria.mityagina@intel.com>
Date: Fri, 26 Sep 2025 11:33:23 +0000
Subject: [PATCH 08/14] Investigate refactoring opportunities for batch
 management in Plugin and Compiler - will be prettier, functionality first

---
 .../src/common/src/sync_infer_request.cpp     | 18 ++--
 .../intel_npu/src/plugin/src/plugin.cpp       | 85 ++++++++++++++-----
 2 files changed, 72 insertions(+), 31 deletions(-)

diff --git a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp
index b17db067726456..d3eed4e7357005 100644
--- a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp
+++ b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp
@@ -214,8 +214,7 @@ void SyncInferRequest::check_tensor(const ov::Output<const ov::Node>& port,
         }
     }
 
-    OPENVINO_ASSERT(is_dynamic || port.get_shape() == tensor->get_shape() ||
-                        tensor->get_shape()[utils::BATCH_AXIS] % port.get_shape()[utils::BATCH_AXIS] == 0,
+    OPENVINO_ASSERT(is_dynamic || port.get_shape() == tensor->get_shape(),
                     "The ",
                     tensor_type,
                     " tensor size is not equal to the model ",
@@ -275,14 +274,13 @@ void SyncInferRequest::check_batched_tensors(const ov::Output<const ov::Node>& p
                         port.get_partial_shape());
         auto batch = port.get_partial_shape()[batch_idx];
 
-        OPENVINO_ASSERT(
-            batch.is_dynamic() || batch.get_length() == tensors_size || tensors_size % batch.get_length() == 0,
-            "set_input_tensors/set_tensors error. Input shape ",
-            port.get_partial_shape(),
-            "batch ",
-            batch,
-            "doesn't match with total blobs count: ",
-            tensors_size);
+        OPENVINO_ASSERT(batch.is_dynamic() || batch.get_length() == tensors_size,
+                        "set_input_tensors/set_tensors error. Input shape ",
+                        port.get_partial_shape(),
+                        "batch ",
+                        batch,
+                        "doesn't match with total blobs count: ",
+                        tensors_size);
     }
 
     auto batched_shape = tensors[utils::BATCH_AXIS]->get_shape();
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 6c07325fcfb038..fb7c4f742850bb 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -57,6 +57,16 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
     ov::ParameterVector parameters;
     ov::ResultVector results;
 
+    // Helper function to check if a tensor was originally dynamic
+    auto wasOriginallyDynamic = [](const std::unordered_set<std::string>& tensorNames) -> bool {
+        for (const auto& name : tensorNames) {
+            if (name.find("_DYNBATCH_ORIG") != std::string::npos) {
+                return true;
+            }
+        }
+        return false;
+    };
+
     for (const IODescriptor& inputDescriptor : inputDescriptors) {
         if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
             inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) {
@@ -66,6 +76,10 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
         auto shape = inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel
                                                                   : inputDescriptor.shapeFromCompiler;
 
+        if (wasOriginallyDynamic(inputDescriptor.outputTensorNames)) {
+            shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1);
+        }
+
         std::shared_ptr<ov::op::v0::Parameter> parameter =
             std::make_shared<ov::op::v0::Parameter>(inputDescriptor.precision, shape);
 
@@ -90,6 +104,10 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
         auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel
                                                                    : outputDescriptor.shapeFromCompiler;
 
+        if (wasOriginallyDynamic(outputDescriptor.outputTensorNames)) {
+            shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1);
+        }
+
         const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy =
             std::make_shared<ov::descriptor::Tensor>(outputDescriptor.precision,
                                                      shape,
@@ -771,31 +789,56 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     if (localConfig.isAvailable(ov::intel_npu::batch_mode.name())) {
         bool autoOrPluginBatch = localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN ||
                                  localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::AUTO;
-        try {
-            const bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger);
-
-            if (autoOrPluginBatch && pluginBatchingIsSupported) {
-                _logger.info("Attempting to handle batching on the plugin side.");
-                try {
-                    ov::set_batch(modelForCompilation, ov::Dimension(1));
-                } catch (const std::exception& ex) {
-                    _logger.warning("The plugin couldn't resize a batched model due to exception: %s.\n"
-                                    "Trying to debatch it...",
-                                    ex.what());
-                    deBatchModel(modelForCompilation, ov::Dimension(1));
-                    if (!modelForCompilation) {
-                        OPENVINO_THROW("Cannot debatch a model");
+        if (modelForCompilation->is_dynamic()) {  // Avoiding risks with static models. TODO: common solution.
+            try {
+                const bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger);
+
+                if (autoOrPluginBatch && pluginBatchingIsSupported) {
+                    _logger.info("Attempting to handle batching on the plugin side.");
+
+                    // Store dynamic batch info in tensor names BEFORE reshaping
+                    auto encodeDynamicBatchInfo = [](std::shared_ptr<ov::Model> model) {
+                        // Encode info in input tensor names
+                        for (auto& input : model->inputs()) {
+                            std::string originalName = input.get_any_name();
+                            std::string newName = originalName + "_DYNBATCH_ORIG";
+                            input.get_tensor().set_names({newName});
+                        }
+
+                        // Encode info in output tensor names
+                        for (auto& output : model->outputs()) {
+                            std::string originalName = output.get_any_name();
+                            std::string newName = originalName + "_DYNBATCH_ORIG";
+                            output.get_tensor().set_names({newName});
+                        }
+                    };
+
+                    try {
+                        encodeDynamicBatchInfo(modelForCompilation);
+                        ov::set_batch(modelForCompilation, ov::Dimension(1));
+                        updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
+                    } catch (const std::exception& ex) {
+                        _logger.warning("The plugin couldn't resize a batched model due to exception: %s.\n"
+                                        "Trying to debatch it...",
+                                        ex.what());
+                        encodeDynamicBatchInfo(modelForCompilation);
+                        deBatchModel(modelForCompilation, ov::Dimension(1));
+                        if (!modelForCompilation) {
+                            OPENVINO_THROW("Cannot debatch a model");
+                        }
+                        _logger.info("The model has been debatched successfully");
+                        updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
                     }
-                    _logger.info("The model has been debatched successfully");
+                } else {
+                    _logger.info("Batching will be handed by compiler.");
+                    updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
                 }
-                // TODO: add debatcher for more complicated cases as set_batch is pretty naive.
-            } else {
-                _logger.info("Batching will be handed by compiler.");
+            } catch (const std::exception& ex) {
+                _logger.info("Couldn't validate and reshape the model. Batching will be handed by compiler.",
+                             ex.what());
+                updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
             }
-        } catch (const std::exception& ex) {
-            _logger.info("Couldn't validate and reshape the model. Batching will be handed by compiler.", ex.what());
         }
-        updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
     }
 
     // Update stepping w/ information from driver, unless provided by user or we are off-device

From b830d9d6ee0a429410a80a9e8d71cac4ada6641e Mon Sep 17 00:00:00 2001
From: DariaMityagina <daria.mityagina@intel.com>
Date: Wed, 1 Oct 2025 16:18:23 +0000
Subject: [PATCH 09/14] Investigate refactoring opportunities for batch
 management in Plugin and Compiler - clean up

---
 .../src/backend/src/zero_infer_request.cpp    |  20 ++-
 .../intel_npu/src/plugin/include/plugin.hpp   |   6 +
 .../intel_npu/src/plugin/src/plugin.cpp       | 162 ++++++++++--------
 .../utils/include/intel_npu/utils/utils.hpp   |   2 +
 4 files changed, 114 insertions(+), 76 deletions(-)

diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
index 055923159cd14c..4199696636e3c1 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
@@ -76,12 +76,24 @@ void check_level_zero_attributes_match(const IODescriptor& ioDescriptor, const A
 std::optional<size_t> determine_dynamic_batch_size(const IODescriptor& desc,
                                                    const std::shared_ptr<ov::ITensor>& tensor,
                                                    const std::optional<size_t> batchSize) {
+    // Check if tensor was originally dynamic by looking for encoded markers
+    // This information is needed to restore the original dynamic batching behavior
+    auto wasOriginallyDynamic = [](const std::unordered_set<std::string>& tensorNames) -> bool {
+        for (const auto& name : tensorNames) {
+            if (name.find(intel_npu::utils::DYNBATCH_SUFFIX) != std::string::npos) {
+                return true;
+            }
+        }
+        return false;
+    };
+
+    auto wasDynamic = wasOriginallyDynamic(desc.outputTensorNames);
+
     if (tensor == nullptr && !batchSize.has_value()) {
         return std::nullopt;
     }
 
-    // Make sure that PLUGIN batch mode is currently active
-    if (*desc.shapeFromCompiler.begin() != intel_npu::utils::DEFAULT_BATCH_SIZE) {
+    if (!wasDynamic) {
         return std::nullopt;
     }
 
@@ -89,6 +101,10 @@ std::optional<size_t> determine_dynamic_batch_size(const IODescriptor& desc,
         return batchSize.value();
     }
 
+    if (tensor->get_shape().empty() || *desc.shapeFromCompiler.begin() != intel_npu::utils::DEFAULT_BATCH_SIZE) {
+        return std::nullopt;
+    }
+
     return tensor->get_shape()[intel_npu::utils::BATCH_AXIS];
 }
 
diff --git a/src/plugins/intel_npu/src/plugin/include/plugin.hpp b/src/plugins/intel_npu/src/plugin/include/plugin.hpp
index 2fad2f0c2be4ca..a46b2020a9203b 100644
--- a/src/plugins/intel_npu/src/plugin/include/plugin.hpp
+++ b/src/plugins/intel_npu/src/plugin/include/plugin.hpp
@@ -63,6 +63,12 @@ class Plugin : public ov::IPlugin {
     ov::SupportedOpsMap query_model(const std::shared_ptr<const ov::Model>& model,
                                     const ov::AnyMap& properties) const override;
 
+    void handleDynamicBatching(std::shared_ptr<ov::Model>& modelForCompilation,
+                               Config& localConfig,
+                               const std::function<void(ov::intel_npu::BatchMode)>& updateBatchMode) const;
+
+    void encodeDynamicBatchInfo(std::shared_ptr<ov::Model> model) const;
+
 private:
     void init_options();
     void filter_config_by_compiler_support(FilteredConfig& cfg) const;
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index fb7c4f742850bb..c5469322eb81e3 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -57,10 +57,11 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
     ov::ParameterVector parameters;
     ov::ResultVector results;
 
-    // Helper function to check if a tensor was originally dynamic
+    // Check if tensor was originally dynamic by looking for encoded markers
+    // This information is needed to restore the original dynamic batching behavior
     auto wasOriginallyDynamic = [](const std::unordered_set<std::string>& tensorNames) -> bool {
         for (const auto& name : tensorNames) {
-            if (name.find("_DYNBATCH_ORIG") != std::string::npos) {
+            if (name.find(intel_npu::utils::DYNBATCH_SUFFIX) != std::string::npos) {
                 return true;
             }
         }
@@ -560,7 +561,8 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument
     return _properties->get_property(name, npu_plugin_properties);
 }
 
-// Helper function to check if shape has dynamic dimensions other than batch dimension
+// Helper function to detect if shape contains dynamic dimensions other than the batch dimension
+// Plugin-side batch handling can only be applied when batch is the sole dynamic dimension
 bool hasOtherDynamicDims(const ov::PartialShape& shape) {
     for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) {
         if (shape[dim_idx].is_dynamic()) {
@@ -640,7 +642,7 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
             }
             sstream << "Please check input id: " << input_id << " by the name: " << input->get_friendly_name()
                     << ", layout: " << layout.to_string() << ", is_dynamic: " << shape.is_dynamic();
-            logger.info("%s", sstream.str());
+            logger.info("%s", sstream.str().c_str());
             return false;
         }
     }
@@ -664,8 +666,8 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
         } else {
             logger.info("Only networks with outputs batched by 0th dimension are supported. Please check an output by "
                         "the name: %s, layout: %s",
-                        output->get_friendly_name(),
-                        layout.to_string());
+                        output->get_friendly_name().c_str(),
+                        layout.to_string().c_str());
             return false;
         }
     }
@@ -685,9 +687,9 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
 
     auto node_info_printer = [&logger](const auto& ov_node, std::string nodeType) {
         logger.info("%s: %s has shape value: %s",
-                    nodeType,
-                    ov_node.get_any_name(),
-                    ov_node.get_partial_shape().to_string());
+                    nodeType.c_str(),
+                    ov_node.get_any_name().c_str(),
+                    ov_node.get_partial_shape().to_string().c_str());
     };
 
     for (const auto& ov_node : batchedInputs) {
@@ -701,7 +703,6 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
 }
 
 void deBatchModel(std::shared_ptr<ov::Model>& model, ov::Dimension newBatch) {
-    size_t inputIdx = 0;
     std::map<std::string, ov::PartialShape> newShapes;
     for (auto&& item : model->get_parameters()) {
         auto layout = item->get_layout();
@@ -710,11 +711,73 @@ void deBatchModel(std::shared_ptr<ov::Model>& model, ov::Dimension newBatch) {
             partShape[ov::layout::batch_idx(layout)] = newBatch;
         }
         newShapes.emplace(item->get_friendly_name(), partShape);
-        inputIdx++;
     }
     model->reshape(newShapes);
 }
 
+void Plugin::encodeDynamicBatchInfo(std::shared_ptr<ov::Model> model) const {
+    const std::string suffix = intel_npu::utils::DYNBATCH_SUFFIX;
+
+    // Encode info in input tensor names
+    for (auto& input : model->inputs()) {
+        const std::string originalName = input.get_any_name();
+        input.get_tensor().set_names({originalName, originalName + suffix});
+    }
+    // Encode info in output tensor names
+    for (auto& output : model->outputs()) {
+        const std::string originalName = output.get_any_name();
+        output.get_tensor().set_names({originalName, originalName + suffix});
+    }
+}
+
+void Plugin::handleDynamicBatching(std::shared_ptr<ov::Model>& modelForCompilation,
+                                   Config& localConfig,
+                                   const std::function<void(ov::intel_npu::BatchMode)>& updateBatchMode) const {
+    // Avoiding risks with static models. TODO: common solution.
+    if (!modelForCompilation->is_dynamic()) {
+        return;
+    }
+
+    const auto batchMode = localConfig.get<BATCH_MODE>();
+    const bool isAutoOrPluginBatch =
+        (batchMode == ov::intel_npu::BatchMode::PLUGIN || batchMode == ov::intel_npu::BatchMode::AUTO);
+
+    try {
+        const bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger);
+
+        if (!isAutoOrPluginBatch || !pluginBatchingIsSupported) {
+            _logger.info("Batching will be handled by compiler.");
+            updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
+            return;
+        }
+
+        _logger.info("Attempting to handle batching on the plugin side.");
+
+        // Preserve dynamic batch metadata by encoding it in tensor names
+        // Avoids introducing new metadata fields by leveraging existing naming system
+        encodeDynamicBatchInfo(modelForCompilation);
+
+        try {
+            ov::set_batch(modelForCompilation, ov::Dimension(1));
+        } catch (const std::exception& ex) {
+            _logger.warning("The plugin couldn't resize a batched model due to exception: %s.\n"
+                            "Trying to debatch it...",
+                            ex.what());
+
+            deBatchModel(modelForCompilation, ov::Dimension(1));
+            if (!modelForCompilation) {
+                OPENVINO_THROW("Cannot debatch a model");
+            }
+            _logger.info("The model has been debatched successfully");
+        }
+        updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
+    } catch (const std::exception& ex) {
+        _logger.info("Couldn't validate and reshape the model. Batching will be handled by compiler. Error: %s",
+                     ex.what());
+        updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
+    }
+}
+
 std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<const ov::Model>& model,
                                                           const ov::AnyMap& properties) const {
     OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::compile_model");
@@ -769,75 +832,26 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     auto updateBatchMode = [&](ov::intel_npu::BatchMode mode) {
         std::stringstream strStream;
         strStream << mode;
-        _logger.info("Setting batching mode to %s.", strStream.str());
+        _logger.info("Setting batching mode to %s.", strStream.str().c_str());
         localConfig.update({{ov::intel_npu::batch_mode.name(), strStream.str()}});
     };
 
-    if (localConfig.isAvailable(ov::intel_npu::batch_mode.name()) &&
-        !localConfig.has(ov::intel_npu::batch_mode.name())) {
-        updateBatchMode(ov::intel_npu::BatchMode::AUTO);
-    }
-
-    if (localConfig.isAvailable(ov::intel_npu::batch_mode.name()) && !model->get_variables().empty()) {
-        if (localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN) {
-            OPENVINO_THROW("This model contains states, thus it is not supported when handling batching on the plugin");
+    // Handle batch mode configuration
+    if (localConfig.isAvailable(ov::intel_npu::batch_mode.name())) {
+        // Set default batch mode if not configured
+        if (!localConfig.has(ov::intel_npu::batch_mode.name())) {
+            updateBatchMode(ov::intel_npu::BatchMode::AUTO);
         }
 
-        updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
-    }
-
-    if (localConfig.isAvailable(ov::intel_npu::batch_mode.name())) {
-        bool autoOrPluginBatch = localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN ||
-                                 localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::AUTO;
-        if (modelForCompilation->is_dynamic()) {  // Avoiding risks with static models. TODO: common solution.
-            try {
-                const bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger);
-
-                if (autoOrPluginBatch && pluginBatchingIsSupported) {
-                    _logger.info("Attempting to handle batching on the plugin side.");
-
-                    // Store dynamic batch info in tensor names BEFORE reshaping
-                    auto encodeDynamicBatchInfo = [](std::shared_ptr<ov::Model> model) {
-                        // Encode info in input tensor names
-                        for (auto& input : model->inputs()) {
-                            std::string originalName = input.get_any_name();
-                            std::string newName = originalName + "_DYNBATCH_ORIG";
-                            input.get_tensor().set_names({newName});
-                        }
-
-                        // Encode info in output tensor names
-                        for (auto& output : model->outputs()) {
-                            std::string originalName = output.get_any_name();
-                            std::string newName = originalName + "_DYNBATCH_ORIG";
-                            output.get_tensor().set_names({newName});
-                        }
-                    };
-
-                    try {
-                        encodeDynamicBatchInfo(modelForCompilation);
-                        ov::set_batch(modelForCompilation, ov::Dimension(1));
-                        updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
-                    } catch (const std::exception& ex) {
-                        _logger.warning("The plugin couldn't resize a batched model due to exception: %s.\n"
-                                        "Trying to debatch it...",
-                                        ex.what());
-                        encodeDynamicBatchInfo(modelForCompilation);
-                        deBatchModel(modelForCompilation, ov::Dimension(1));
-                        if (!modelForCompilation) {
-                            OPENVINO_THROW("Cannot debatch a model");
-                        }
-                        _logger.info("The model has been debatched successfully");
-                        updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
-                    }
-                } else {
-                    _logger.info("Batching will be handed by compiler.");
-                    updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
-                }
-            } catch (const std::exception& ex) {
-                _logger.info("Couldn't validate and reshape the model. Batching will be handed by compiler.",
-                             ex.what());
-                updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
+        // Handle models with variables (states)
+        if (!model->get_variables().empty()) {
+            if (localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN) {
+                OPENVINO_THROW(
+                    "This model contains states, thus it is not supported when handling batching on the plugin");
             }
+            updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
+        } else {
+            handleDynamicBatching(modelForCompilation, localConfig, updateBatchMode);
         }
     }
 
diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp
index 01cc49fc38c78b..601ebe8039610c 100644
--- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp
+++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp
@@ -17,6 +17,8 @@ constexpr std::size_t STANDARD_PAGE_SIZE = 4096;
 constexpr std::size_t DEFAULT_BATCH_SIZE = 1;
 constexpr std::size_t BATCH_AXIS = 0;
 
+const std::string DYNBATCH_SUFFIX = "_DYNBATCH_ORIG";
+
 struct AlignedAllocator {
 public:
     AlignedAllocator(const size_t align_size) : _align_size(align_size) {}

From 9bac0862177b604c993525e9f546923b74fdd951 Mon Sep 17 00:00:00 2001
From: DariaMityagina <daria.mityagina@intel.com>
Date: Wed, 1 Oct 2025 20:10:10 +0000
Subject: [PATCH 10/14] Investigate refactoring opportunities for batch
 management in Plugin and Compiler - clean up

---
 .../src/backend/src/zero_infer_request.cpp        | 14 +-------------
 src/plugins/intel_npu/src/plugin/src/plugin.cpp   | 15 ++-------------
 .../src/utils/include/intel_npu/utils/utils.hpp   | 12 ++++++++++++
 3 files changed, 15 insertions(+), 26 deletions(-)

diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
index 4199696636e3c1..fe0d2b1f255b21 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
@@ -76,23 +76,11 @@ void check_level_zero_attributes_match(const IODescriptor& ioDescriptor, const A
 std::optional<size_t> determine_dynamic_batch_size(const IODescriptor& desc,
                                                    const std::shared_ptr<ov::ITensor>& tensor,
                                                    const std::optional<size_t> batchSize) {
-    // Check if tensor was originally dynamic by looking for encoded markers
-    // This information is needed to restore the original dynamic batching behavior
-    auto wasOriginallyDynamic = [](const std::unordered_set<std::string>& tensorNames) -> bool {
-        for (const auto& name : tensorNames) {
-            if (name.find(intel_npu::utils::DYNBATCH_SUFFIX) != std::string::npos) {
-                return true;
-            }
-        }
-        return false;
-    };
-
-    auto wasDynamic = wasOriginallyDynamic(desc.outputTensorNames);
-
     if (tensor == nullptr && !batchSize.has_value()) {
         return std::nullopt;
     }
 
+    auto wasDynamic = intel_npu::utils::wasOriginallyDynamic(desc.outputTensorNames);
     if (!wasDynamic) {
         return std::nullopt;
     }
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index c5469322eb81e3..5950677cd5fe85 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -57,17 +57,6 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
     ov::ParameterVector parameters;
     ov::ResultVector results;
 
-    // Check if tensor was originally dynamic by looking for encoded markers
-    // This information is needed to restore the original dynamic batching behavior
-    auto wasOriginallyDynamic = [](const std::unordered_set<std::string>& tensorNames) -> bool {
-        for (const auto& name : tensorNames) {
-            if (name.find(intel_npu::utils::DYNBATCH_SUFFIX) != std::string::npos) {
-                return true;
-            }
-        }
-        return false;
-    };
-
     for (const IODescriptor& inputDescriptor : inputDescriptors) {
         if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
             inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) {
@@ -77,7 +66,7 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
         auto shape = inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel
                                                                   : inputDescriptor.shapeFromCompiler;
 
-        if (wasOriginallyDynamic(inputDescriptor.outputTensorNames)) {
+        if (intel_npu::utils::wasOriginallyDynamic(inputDescriptor.outputTensorNames)) {
             shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1);
         }
 
@@ -105,7 +94,7 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
         auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel
                                                                    : outputDescriptor.shapeFromCompiler;
 
-        if (wasOriginallyDynamic(outputDescriptor.outputTensorNames)) {
+        if (intel_npu::utils::wasOriginallyDynamic(outputDescriptor.outputTensorNames)) {
             shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1);
         }
 
diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp
index 601ebe8039610c..709e553ec67f5c 100644
--- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp
+++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <iostream>
+#include <unordered_set>
 
 #include "openvino/runtime/allocator.hpp"
 
@@ -50,6 +51,17 @@ static inline size_t align_size_to_standard_page_size(size_t size) {
     return (size + utils::STANDARD_PAGE_SIZE - 1) & ~(utils::STANDARD_PAGE_SIZE - 1);
 }
 
+// Check if tensor was originally dynamic by looking for encoded markers
+// This information is needed to restore the original dynamic batching behavior
+static inline bool wasOriginallyDynamic(const std::unordered_set<std::string>& tensorNames) {
+    for (const auto& name : tensorNames) {
+        if (name.find(DYNBATCH_SUFFIX) != std::string::npos) {
+            return true;
+        }
+    }
+    return false;
+}
+
 }  // namespace utils
 
 }  // namespace intel_npu

From b1997578c72032ed3f7df10cef5de6396028bc10 Mon Sep 17 00:00:00 2001
From: DariaMityagina <daria.mityagina@intel.com>
Date: Thu, 2 Oct 2025 01:43:14 +0000
Subject: [PATCH 11/14] Investigate refactoring opportunities for batch
 management in Plugin and Compiler - fix tests

---
 src/plugins/intel_npu/src/plugin/src/plugin.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 5950677cd5fe85..ff9d65e07b6f85 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -601,8 +601,7 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
         ov::Layout layout = ov::layout::get_layout(input);
 
         // Batching on plugin is working only when batching is found on 0th dimension
-        if ((shape.size() &&
-             shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) ||
+        if (shape.size() ||
             (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) {
             const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : input->get_shape();
             batchedInputs.insert(params[input_id]->output(0));
@@ -640,8 +639,7 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
         ov::Layout layout = ov::layout::get_layout(output);
 
         // Batching on plugin is working only when batching is found on 0th dimension
-        if ((shape.size() &&
-             shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) ||
+        if (shape.size() ||
             (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) {
             const auto& node = output->input_value(0);
             const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : output->get_shape();
@@ -674,6 +672,11 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
         return false;
     }
 
+    if (*sBatchSize.begin() == intel_npu::utils::DEFAULT_BATCH_SIZE) {
+        logger.info("PLUGIN batch won't be applied, got default batch value : %ld", *sBatchSize.begin());
+        return false;
+    }
+
     auto node_info_printer = [&logger](const auto& ov_node, std::string nodeType) {
         logger.info("%s: %s has shape value: %s",
                     nodeType.c_str(),

From 234f92070a254d862f31bcf47708086401ee3745 Mon Sep 17 00:00:00 2001
From: DariaMityagina <daria.mityagina@intel.com>
Date: Wed, 8 Oct 2025 01:17:37 +0000
Subject: [PATCH 12/14] Investigate refactoring opportunities for batch
 management in Plugin and Compiler - review comments

---
 .../intel_npu/src/plugin/src/plugin.cpp       | 47 ++++++++++++-------
 .../utils/include/intel_npu/utils/utils.hpp   |  9 ++--
 2 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index ff9d65e07b6f85..1791c59adec7ed 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -601,8 +601,8 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
         ov::Layout layout = ov::layout::get_layout(input);
 
         // Batching on plugin is working only when batching is found on 0th dimension
-        if (shape.size() ||
-            (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) {
+        if (shape[intel_npu::utils::BATCH_AXIS].is_dynamic() ||
+            ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS) {
             const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : input->get_shape();
             batchedInputs.insert(params[input_id]->output(0));
 
@@ -639,8 +639,8 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
         ov::Layout layout = ov::layout::get_layout(output);
 
         // Batching on plugin is working only when batching is found on 0th dimension
-        if (shape.size() ||
-            (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) {
+        if (shape[intel_npu::utils::BATCH_AXIS].is_dynamic() ||
+            ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS) {
             const auto& node = output->input_value(0);
             const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : output->get_shape();
             batchedOutputs.insert(ov::Output<const ov::Node>(node.get_node(), node.get_index()));
@@ -694,22 +694,34 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
     return true;
 }
 
-void deBatchModel(std::shared_ptr<ov::Model>& model, ov::Dimension newBatch) {
-    std::map<std::string, ov::PartialShape> newShapes;
-    for (auto&& item : model->get_parameters()) {
-        auto layout = item->get_layout();
-        auto partShape = item->get_partial_shape();
-        if (ov::layout::has_batch(layout)) {
-            partShape[ov::layout::batch_idx(layout)] = newBatch;
+bool deBatchModel(std::shared_ptr<ov::Model>& model, ov::Dimension newBatch) {
+    try {
+        std::map<std::string, ov::PartialShape> newShapes;
+        for (auto&& item : model->get_parameters()) {
+            auto layout = item->get_layout();
+            auto partShape = item->get_partial_shape();
+            if (ov::layout::has_batch(layout)) {
+                partShape[ov::layout::batch_idx(layout)] = newBatch;
+            }
+            newShapes.emplace(item->get_friendly_name(), partShape);
         }
-        newShapes.emplace(item->get_friendly_name(), partShape);
+        model->reshape(newShapes);
+        return true;
+    } catch (const std::exception&) {
+        // Don't throw - let caller handle the failure
+        return false;
     }
-    model->reshape(newShapes);
 }
 
 void Plugin::encodeDynamicBatchInfo(std::shared_ptr<ov::Model> model) const {
     const std::string suffix = intel_npu::utils::DYNBATCH_SUFFIX;
 
+    // Sanity check: ensure we don't transform static models
+    if (!model->is_dynamic()) {
+        _logger.warning("Attempting to encode dynamic batch info on a static model. Skipping encoding.");
+        return;
+    }
+
     // Encode info in input tensor names
     for (auto& input : model->inputs()) {
         const std::string originalName = input.get_any_name();
@@ -756,17 +768,20 @@ void Plugin::handleDynamicBatching(std::shared_ptr<ov::Model>& modelForCompilati
                             "Trying to debatch it...",
                             ex.what());
 
-            deBatchModel(modelForCompilation, ov::Dimension(1));
-            if (!modelForCompilation) {
+            if (!deBatchModel(modelForCompilation, ov::Dimension(1))) {
                 OPENVINO_THROW("Cannot debatch a model");
             }
             _logger.info("The model has been debatched successfully");
         }
+        // If we have successfully debatched the model on the PLUGIN side, we should
+        // avoid repeating the same in the compiler by resetting the batch mode
         updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
     } catch (const std::exception& ex) {
         _logger.info("Couldn't validate and reshape the model. Batching will be handled by compiler. Error: %s",
                      ex.what());
-        updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
+        if (batchMode == ov::intel_npu::BatchMode::AUTO) {
+            updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
+        }
     }
 }
 
diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp
index 709e553ec67f5c..62a2b1f44d169f 100644
--- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp
+++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp
@@ -54,12 +54,9 @@ static inline size_t align_size_to_standard_page_size(size_t size) {
 // Check if tensor was originally dynamic by looking for encoded markers
 // This information is needed to restore the original dynamic batching behavior
 static inline bool wasOriginallyDynamic(const std::unordered_set<std::string>& tensorNames) {
-    for (const auto& name : tensorNames) {
-        if (name.find(DYNBATCH_SUFFIX) != std::string::npos) {
-            return true;
-        }
-    }
-    return false;
+    return std::any_of(tensorNames.begin(), tensorNames.end(), [](const std::string& name) {
+        return name.find(DYNBATCH_SUFFIX) != std::string::npos;
+    });
 }
 
 }  // namespace utils

From c89599ecb890eabd5b2c0561ed79768a7e6fbe3d Mon Sep 17 00:00:00 2001
From: DariaMityagina <daria.mityagina@intel.com>
Date: Wed, 8 Oct 2025 06:36:47 +0000
Subject: [PATCH 13/14] Investigate refactoring opportunities for batch
 management in Plugin and Compiler - build warning fixes

---
 .../intel_npu/src/plugin/src/plugin.cpp       | 326 +++++++++---------
 1 file changed, 163 insertions(+), 163 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 1791c59adec7ed..ce4925ba8bc4c1 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -216,6 +216,169 @@ std::shared_ptr<const ov::Model> exclude_model_ptr_from_map(ov::AnyMap& properti
     return modelPtr;
 }
 
+// Helper function to detect if shape contains dynamic dimensions other than the batch dimension
+// Plugin-side batch handling can only be applied when batch is the sole dynamic dimension
+bool hasOtherDynamicDims(const ov::PartialShape& shape) {
+    for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) {
+        if (shape[dim_idx].is_dynamic()) {
+            return true;  // Found dynamic dimension other than batch
+        }
+    }
+    return false;
+}
+
+bool checkModelDynamicDims(const std::shared_ptr<const ov::Model>& model) {
+    // Check parameters (inputs)
+    const auto& params = model->get_parameters();
+    for (const auto& param : params) {
+        const auto& shape = param->get_partial_shape();
+        if (hasOtherDynamicDims(shape)) {
+            return true;
+        }
+    }
+
+    // Check results (outputs)
+    const auto& results = model->get_results();
+    for (const auto& result : results) {
+        const auto& shape = result->get_output_partial_shape(0);
+        if (hasOtherDynamicDims(shape)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger logger) {
+    std::set<ov::Output<const ov::Node>> batchedInputs;
+    std::set<ov::Output<const ov::Node>> batchedOutputs;
+    std::set<size_t> sBatchSize;
+
+    // Limitation: Plugin batching is not supported when there are dynamic
+    // dimensions other than the batch dimension.
+    if (checkModelDynamicDims(model)) {
+        return false;
+    }
+
+    const auto& params = model->get_parameters();
+    for (size_t input_id = 0; input_id < params.size(); input_id++) {
+        const auto& input = params[input_id];
+        const auto& shape = input->get_partial_shape();
+        ov::Layout layout = ov::layout::get_layout(input);
+
+        // Batching on plugin is working only when batching is found on 0th dimension
+        if (shape[intel_npu::utils::BATCH_AXIS].is_dynamic() ||
+            (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) {
+            const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : input->get_shape();
+            batchedInputs.insert(params[input_id]->output(0));
+
+            if (shape.rank().is_dynamic()) {
+                OPENVINO_THROW("Shapes with dynamic rank are not supported.");
+            } else {
+                sBatchSize.insert(staticShape[intel_npu::utils::BATCH_AXIS]);
+            }
+        } else {
+            // gather some diagnostic info
+            std::optional<size_t> batch_dim_index_detected;
+            for (size_t i = 1; i < shape.size(); i++) {
+                if (shape[i].has_symbol()) {
+                    batch_dim_index_detected = i;
+                    break;
+                }
+            }
+            std::stringstream sstream;
+            sstream << "Only networks with inputs batched by 0th dimension are supported. ";
+            if (batch_dim_index_detected.has_value()) {
+                sstream << "The batch has been detected on: " << batch_dim_index_detected.value()
+                        << " dimension instead. ";
+            } else {
+                sstream << "The batch hasn't been detected at all. ";
+            }
+            sstream << "Please check input id: " << input_id << " by the name: " << input->get_friendly_name()
+                    << ", layout: " << layout.to_string() << ", is_dynamic: " << shape.is_dynamic();
+            logger.info("%s", sstream.str().c_str());
+            return false;
+        }
+    }
+    for (const auto& output : model->get_results()) {
+        const auto& shape = output->get_output_partial_shape(0);
+        ov::Layout layout = ov::layout::get_layout(output);
+
+        // Batching on plugin is working only when batching is found on 0th dimension
+        if (shape[intel_npu::utils::BATCH_AXIS].is_dynamic() ||
+            (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) {
+            const auto& node = output->input_value(0);
+            const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : output->get_shape();
+            batchedOutputs.insert(ov::Output<const ov::Node>(node.get_node(), node.get_index()));
+
+            if (shape.rank().is_dynamic()) {
+                OPENVINO_THROW("Shapes with dynamic rank are not supported.");
+            } else {
+                sBatchSize.insert(staticShape[intel_npu::utils::BATCH_AXIS]);
+            }
+        } else {
+            logger.info("Only networks with outputs batched by 0th dimension are supported. Please check an output by "
+                        "the name: %s, layout: %s",
+                        output->get_friendly_name().c_str(),
+                        layout.to_string().c_str());
+            return false;
+        }
+    }
+    if (!batchedInputs.size() || !batchedOutputs.size()) {
+        logger.info(
+            "Only networks with inputs/outputs featuring batched dim are supported! Got inputs: %ld, outputs: %ld",
+            batchedInputs.size(),
+            batchedOutputs.size());
+        return false;
+    }
+
+    if (sBatchSize.size() != 1) {
+        logger.info("Batching size shall have same value for all tensors! Got unique batch sizes number: %ld",
+                    sBatchSize.size());
+        return false;
+    }
+
+    if (*sBatchSize.begin() == intel_npu::utils::DEFAULT_BATCH_SIZE) {
+        logger.info("PLUGIN batch won't be applied, got default batch value : %ld", *sBatchSize.begin());
+        return false;
+    }
+
+    auto node_info_printer = [&logger](const auto& ov_node, std::string nodeType) {
+        logger.info("%s: %s has shape value: %s",
+                    nodeType.c_str(),
+                    ov_node.get_any_name().c_str(),
+                    ov_node.get_partial_shape().to_string().c_str());
+    };
+
+    for (const auto& ov_node : batchedInputs) {
+        node_info_printer(ov_node, "Input");
+    }
+    for (const auto& ov_node : batchedOutputs) {
+        node_info_printer(ov_node, "Output");
+    }
+
+    return true;
+}
+
+bool deBatchModel(std::shared_ptr<ov::Model>& model, ov::Dimension newBatch) {
+    try {
+        std::map<std::string, ov::PartialShape> newShapes;
+        for (auto&& item : model->get_parameters()) {
+            auto layout = item->get_layout();
+            auto partShape = item->get_partial_shape();
+            if (ov::layout::has_batch(layout)) {
+                partShape[ov::layout::batch_idx(layout)] = newBatch;
+            }
+            newShapes.emplace(item->get_friendly_name(), partShape);
+        }
+        model->reshape(newShapes);
+        return true;
+    } catch (const std::exception&) {
+        // Don't throw - let caller handle the failure
+        return false;
+    }
+}
+
 }  // namespace
 
 namespace intel_npu {
@@ -550,169 +713,6 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument
     return _properties->get_property(name, npu_plugin_properties);
 }
 
-// Helper function to detect if shape contains dynamic dimensions other than the batch dimension
-// Plugin-side batch handling can only be applied when batch is the sole dynamic dimension
-bool hasOtherDynamicDims(const ov::PartialShape& shape) {
-    for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) {
-        if (shape[dim_idx].is_dynamic()) {
-            return true;  // Found dynamic dimension other than batch
-        }
-    }
-    return false;
-}
-
-bool checkModelDynamicDims(const std::shared_ptr<const ov::Model>& model) {
-    // Check parameters (inputs)
-    const auto& params = model->get_parameters();
-    for (const auto& param : params) {
-        const auto& shape = param->get_partial_shape();
-        if (hasOtherDynamicDims(shape)) {
-            return true;
-        }
-    }
-
-    // Check results (outputs)
-    const auto& results = model->get_results();
-    for (const auto& result : results) {
-        const auto& shape = result->get_output_partial_shape(0);
-        if (hasOtherDynamicDims(shape)) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger logger) {
-    std::set<ov::Output<const ov::Node>> batchedInputs;
-    std::set<ov::Output<const ov::Node>> batchedOutputs;
-    std::set<size_t> sBatchSize;
-
-    // Limitation: Plugin batching is not supported when there are dynamic
-    // dimensions other than the batch dimension.
-    if (checkModelDynamicDims(model)) {
-        return false;
-    }
-
-    const auto& params = model->get_parameters();
-    for (size_t input_id = 0; input_id < params.size(); input_id++) {
-        const auto& input = params[input_id];
-        const auto& shape = input->get_partial_shape();
-        ov::Layout layout = ov::layout::get_layout(input);
-
-        // Batching on plugin is working only when batching is found on 0th dimension
-        if (shape[intel_npu::utils::BATCH_AXIS].is_dynamic() ||
-            ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS) {
-            const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : input->get_shape();
-            batchedInputs.insert(params[input_id]->output(0));
-
-            if (shape.rank().is_dynamic()) {
-                OPENVINO_THROW("Shapes with dynamic rank are not supported.");
-            } else {
-                sBatchSize.insert(staticShape[intel_npu::utils::BATCH_AXIS]);
-            }
-        } else {
-            // gather some diagnostic info
-            std::optional<size_t> batch_dim_index_detected;
-            for (size_t i = 1; i < shape.size(); i++) {
-                if (shape[i].has_symbol()) {
-                    batch_dim_index_detected = i;
-                    break;
-                }
-            }
-            std::stringstream sstream;
-            sstream << "Only networks with inputs batched by 0th dimension are supported. ";
-            if (batch_dim_index_detected.has_value()) {
-                sstream << "The batch has been detected on: " << batch_dim_index_detected.value()
-                        << " dimension instead. ";
-            } else {
-                sstream << "The batch hasn't been detected at all. ";
-            }
-            sstream << "Please check input id: " << input_id << " by the name: " << input->get_friendly_name()
-                    << ", layout: " << layout.to_string() << ", is_dynamic: " << shape.is_dynamic();
-            logger.info("%s", sstream.str().c_str());
-            return false;
-        }
-    }
-    for (const auto& output : model->get_results()) {
-        const auto& shape = output->get_output_partial_shape(0);
-        ov::Layout layout = ov::layout::get_layout(output);
-
-        // Batching on plugin is working only when batching is found on 0th dimension
-        if (shape[intel_npu::utils::BATCH_AXIS].is_dynamic() ||
-            ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS) {
-            const auto& node = output->input_value(0);
-            const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : output->get_shape();
-            batchedOutputs.insert(ov::Output<const ov::Node>(node.get_node(), node.get_index()));
-
-            if (shape.rank().is_dynamic()) {
-                OPENVINO_THROW("Shapes with dynamic rank are not supported.");
-            } else {
-                sBatchSize.insert(staticShape[intel_npu::utils::BATCH_AXIS]);
-            }
-        } else {
-            logger.info("Only networks with outputs batched by 0th dimension are supported. Please check an output by "
-                        "the name: %s, layout: %s",
-                        output->get_friendly_name().c_str(),
-                        layout.to_string().c_str());
-            return false;
-        }
-    }
-    if (!batchedInputs.size() || !batchedOutputs.size()) {
-        logger.info(
-            "Only networks with inputs/outputs featuring batched dim are supported! Got inputs: %ld, outputs: %ld",
-            batchedInputs.size(),
-            batchedOutputs.size());
-        return false;
-    }
-
-    if (sBatchSize.size() != 1) {
-        logger.info("Batching size shall have same value for all tensors! Got unique batch sizes number: %ld",
-                    sBatchSize.size());
-        return false;
-    }
-
-    if (*sBatchSize.begin() == intel_npu::utils::DEFAULT_BATCH_SIZE) {
-        logger.info("PLUGIN batch won't be applied, got default batch value : %ld", *sBatchSize.begin());
-        return false;
-    }
-
-    auto node_info_printer = [&logger](const auto& ov_node, std::string nodeType) {
-        logger.info("%s: %s has shape value: %s",
-                    nodeType.c_str(),
-                    ov_node.get_any_name().c_str(),
-                    ov_node.get_partial_shape().to_string().c_str());
-    };
-
-    for (const auto& ov_node : batchedInputs) {
-        node_info_printer(ov_node, "Input");
-    }
-    for (const auto& ov_node : batchedOutputs) {
-        node_info_printer(ov_node, "Output");
-    }
-
-    return true;
-}
-
-bool deBatchModel(std::shared_ptr<ov::Model>& model, ov::Dimension newBatch) {
-    try {
-        std::map<std::string, ov::PartialShape> newShapes;
-        for (auto&& item : model->get_parameters()) {
-            auto layout = item->get_layout();
-            auto partShape = item->get_partial_shape();
-            if (ov::layout::has_batch(layout)) {
-                partShape[ov::layout::batch_idx(layout)] = newBatch;
-            }
-            newShapes.emplace(item->get_friendly_name(), partShape);
-        }
-        model->reshape(newShapes);
-        return true;
-    } catch (const std::exception&) {
-        // Don't throw - let caller handle the failure
-        return false;
-    }
-}
-
 void Plugin::encodeDynamicBatchInfo(std::shared_ptr<ov::Model> model) const {
     const std::string suffix = intel_npu::utils::DYNBATCH_SUFFIX;
 

From 105476378ea429379c1d93a34e4714731268804a Mon Sep 17 00:00:00 2001
From: DariaMityagina <daria.mityagina@intel.com>
Date: Wed, 8 Oct 2025 19:24:13 +0000
Subject: [PATCH 14/14] Investigate refactoring opportunities for batch
 management in Plugin and Compiler - compatibility with older blobs

---
 src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp | 4 +++-
 src/plugins/intel_npu/src/plugin/src/plugin.cpp              | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
index fe0d2b1f255b21..a044caf2f6f030 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
@@ -81,7 +81,9 @@ std::optional<size_t> determine_dynamic_batch_size(const IODescriptor& desc,
     }
 
     auto wasDynamic = intel_npu::utils::wasOriginallyDynamic(desc.outputTensorNames);
-    if (!wasDynamic) {
+    auto dynamicBatchFromIR = desc.shapeFromIRModel.has_value() && (*desc.shapeFromIRModel).size() &&
+                              (*desc.shapeFromIRModel)[intel_npu::utils::BATCH_AXIS].is_dynamic();
+    if (!wasDynamic && !dynamicBatchFromIR) {
         return std::nullopt;
     }
 
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index ce4925ba8bc4c1..370d348d8817a2 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -718,7 +718,7 @@ void Plugin::encodeDynamicBatchInfo(std::shared_ptr<ov::Model> model) const {
 
     // Sanity check: ensure we don't transform static models
     if (!model->is_dynamic()) {
-        _logger.warning("Attempting to encode dynamic batch info on a static model. Skipping encoding.");
+        _logger.debug("Attempting to encode dynamic batch info on a static model. Skipping encoding.");
         return;
     }