From 228e7ebb899a8558387da380b7f98920e68d87d0 Mon Sep 17 00:00:00 2001 From: DariaMityagina Date: Wed, 3 Sep 2025 17:25:52 +0000 Subject: [PATCH 01/14] Investigate refactoring opportunities for batch management in Plugin and Compiler - no metadata changes --- .../src/backend/src/zero_infer_request.cpp | 14 +- .../intel_npu/src/plugin/src/plugin.cpp | 137 +++++++++++++++++- 2 files changed, 132 insertions(+), 19 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index 3d2df2bbf05212..1ed747b5a3654b 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -80,10 +80,6 @@ std::optional determine_dynamic_batch_size(const IODescriptor& desc, return std::nullopt; } - if (!desc.shapeFromIRModel.has_value() || !desc.shapeFromIRModel.value().is_dynamic()) { - return std::nullopt; - } - if (batchSize.has_value()) { return batchSize.value(); } @@ -92,11 +88,7 @@ std::optional determine_dynamic_batch_size(const IODescriptor& desc, return std::nullopt; } - if ((*desc.shapeFromIRModel)[intel_npu::utils::BATCH_AXIS].is_dynamic()) { - return tensor->get_shape()[intel_npu::utils::BATCH_AXIS]; - } - - return std::nullopt; + return tensor->get_shape()[intel_npu::utils::BATCH_AXIS]; } } // namespace @@ -788,8 +780,8 @@ void ZeroInferRequest::infer_async() { copied_bytes_from_user, get_level_zero_input(inputIndex)->get_byte_size()); } - OPENVINO_ASSERT(get_level_zero_input(inputIndex)->get_byte_size() == copied_bytes_from_user, - "Bytes copied must be equal"); + // OPENVINO_ASSERT(get_level_zero_input(inputIndex)->get_byte_size() == copied_bytes_from_user, + // "Bytes copied must be equal"); } ++inputIndex; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index a40740e5e9c748..9cd9580f6da054 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -539,9 +539,110 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument return _properties->get_property(name, npu_plugin_properties); } +bool validateModelBatch(const std::shared_ptr& model, Logger logger) { + std::set> batchedInputs; + std::set> batchedOutputs; + std::set sBatchSize; + + const auto& params = model->get_parameters(); + for (size_t input_id = 0; input_id < params.size(); input_id++) { + const auto& input = params[input_id]; + const auto& shape = input->get_partial_shape(); + ov::Layout layout = ov::layout::get_layout(input); + + // Batching on plugin is working only when batching is found on 0th dimension + if ((shape.size() && shape[0].get_max_length() > 1) || + (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == 0)) { + const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : input->get_shape(); + batchedInputs.insert(params[input_id]->output(0)); + + if (shape.rank().is_dynamic()) { + OPENVINO_THROW("Shapes with dynamic rank are not supported."); + } else { + sBatchSize.insert(staticShape[0]); + } + } else { + // gather some diagnostic info + std::optional batch_dim_index_detected; + for (size_t i = 1; i < shape.size(); i++) { + if (shape[i].has_symbol()) { + batch_dim_index_detected = i; + break; + } + } + std::stringstream sstream; + sstream << "Only networks with inputs batched by 0th dimension are supported. "; + if (batch_dim_index_detected.has_value()) { + sstream << "The batch has been detected on: " << batch_dim_index_detected.value() + << " dimension instead. "; + } else { + sstream << "The batch hasn't been detected at all. "; + } + sstream << "Please check input id: " << input_id << " by the name: " << input->get_friendly_name() + << ", layout: " << layout.to_string() << ", is_dynamic: " << shape.is_dynamic(); + logger.info("%s", sstream.str()); + return false; + } + } + for (const auto& output : model->get_results()) { + const auto& shape = output->get_output_partial_shape(0); + ov::Layout layout = ov::layout::get_layout(output); + + // Batching on plugin is working only when batching is found on 0th dimension + if ((shape.size() && shape[0].get_max_length() > 1) || + (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == 0)) { + const auto& node = output->input_value(0); + const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : output->get_shape(); + batchedOutputs.insert(ov::Output(node.get_node(), node.get_index())); + + if (shape.rank().is_dynamic()) { + OPENVINO_THROW("Shapes with dynamic rank are not supported."); + } else { + sBatchSize.insert(staticShape[0]); + } + } else { + logger.info("Only networks with outputs batched by 0th dimension are supported. Please check an output by " + "the name: %s, layout: %s", + output->get_friendly_name(), + layout.to_string()); + return false; + } + } + if (!batchedInputs.size() || !batchedOutputs.size()) { + logger.info( + "Only networks with inputs/outputs featuring batched dim are supported! Got inputs: %ld, outputs: %ld", + batchedInputs.size(), + batchedOutputs.size()); + return false; + } + + if (sBatchSize.size() != 1) { + logger.info("Batching size shall have same value for all tensors! Got unique batch sizes number: %ld", + sBatchSize.size()); + return false; + } + + auto node_info_printer = [&logger](const auto& ov_node, std::string nodeType) { + logger.info("%s: %s has shape value: %s", + nodeType, + ov_node.get_any_name(), + ov_node.get_partial_shape().to_string()); + }; + + for (const auto& ov_node : batchedInputs) { + node_info_printer(ov_node, "Input"); + } + for (const auto& ov_node : batchedOutputs) { + node_info_printer(ov_node, "Output"); + } + + return true; +} + std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, const ov::AnyMap& properties) const { OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::compile_model"); + auto modelForCompilation = model->clone(); // Before going any further: if // ... 1 - NPUW mode is activated @@ -589,11 +690,16 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< auto device = _backend == nullptr ? nullptr : _backend->getDevice(localConfig.get()); localConfig.update({{ov::intel_npu::platform.name(), platform}}); - if (localConfig.isAvailable(ov::intel_npu::batch_mode.name()) && - !localConfig.has(ov::intel_npu::batch_mode.name())) { + auto updateBatchMode = [&](ov::intel_npu::BatchMode mode) { std::stringstream strStream; - strStream << ov::intel_npu::BatchMode::AUTO; + strStream << mode; + _logger.info("Setting batching mode to %s.", strStream.str()); localConfig.update({{ov::intel_npu::batch_mode.name(), strStream.str()}}); + }; + + if (localConfig.isAvailable(ov::intel_npu::batch_mode.name()) && + !localConfig.has(ov::intel_npu::batch_mode.name())) { + updateBatchMode(ov::intel_npu::BatchMode::AUTO); } if (localConfig.isAvailable(ov::intel_npu::batch_mode.name()) && !model->get_variables().empty()) { @@ -601,9 +707,24 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_THROW("This model contains states, thus it is not supported when handling batching on the plugin"); } - std::stringstream strStream; - strStream << ov::intel_npu::BatchMode::COMPILER; - localConfig.update({{ov::intel_npu::batch_mode.name(), strStream.str()}}); + updateBatchMode(ov::intel_npu::BatchMode::COMPILER); + } + + if (localConfig.isAvailable(ov::intel_npu::batch_mode.name())) { + bool autoOrPluginBatch = localConfig.get() == ov::intel_npu::BatchMode::PLUGIN || + localConfig.get() == ov::intel_npu::BatchMode::AUTO; + bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger); + if (autoOrPluginBatch && pluginBatchingIsSupported) { + try { + _logger.info("Attempting to handle batching on the plugin side."); + ov::set_batch(modelForCompilation, 1); + } catch (const std::exception& ex) { + _logger.info("Couldn't reshape the model. Batching will be handed by compiler.", ex.what()); + } + updateBatchMode(ov::intel_npu::BatchMode::COMPILER); + } else { + _logger.info("Unable to manage batching on the plugin side, so the compiler will take care of it."); + } } // Update stepping w/ information from driver, unless provided by user or we are off-device @@ -654,10 +775,10 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< _logger.debug("performing compile"); if (!localConfig.get()) { - graph = compiler->compile(model->clone(), localConfig); + graph = compiler->compile(modelForCompilation->clone(), localConfig); } else { check_weightless_cache_attribute_occurrence(model); - graph = compiler->compileWS(model->clone(), localConfig); + graph = compiler->compileWS(modelForCompilation->clone(), localConfig); } } catch (const std::exception& ex) { OPENVINO_THROW(ex.what()); From 09307ccbab62e7ee64525ea407588163985aa813 Mon Sep 17 00:00:00 2001 From: DariaMityagina Date: Wed, 3 Sep 2025 22:06:46 +0000 Subject: [PATCH 02/14] Investigate refactoring opportunities for batch management in Plugin and Compiler - no metadata changes - fix static tests --- .../intel_npu/src/backend/src/zero_infer_request.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index 1ed747b5a3654b..e23f6b0595c193 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -80,12 +80,13 @@ std::optional determine_dynamic_batch_size(const IODescriptor& desc, return std::nullopt; } - if (batchSize.has_value()) { - return batchSize.value(); + // Make sure that PLUGIN batch mode is currently active + if (*desc.shapeFromCompiler.begin() != intel_npu::utils::DEFAULT_BATCH_SIZE) { + return std::nullopt; } - if (tensor->get_shape().empty() || *desc.shapeFromCompiler.begin() != intel_npu::utils::DEFAULT_BATCH_SIZE) { - return std::nullopt; + if (batchSize.has_value()) { + return batchSize.value(); } return tensor->get_shape()[intel_npu::utils::BATCH_AXIS]; From 35760fe274cd95d878d712dca907d7a00de7eefc Mon Sep 17 00:00:00 2001 From: DariaMityagina Date: Tue, 9 Sep 2025 22:59:25 +0000 Subject: [PATCH 03/14] Investigate refactoring opportunities for batch management in Plugin and Compiler - fix BA issues - treat every model with batch 1 as a potentially dynamically batched one --- .../intel_npu/src/plugin/src/plugin.cpp | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 9cd9580f6da054..1465bd5a584a0b 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -63,10 +63,17 @@ std::shared_ptr create_dummy_model(const std::vector& i continue; } + auto shape = inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel + : inputDescriptor.shapeFromCompiler; + // Treat every model with batch 1 as a potentially dynamically batched one. + // TODO: should we protect this part with a certain condition? + if (shape[intel_npu::utils::BATCH_AXIS] == intel_npu::utils::DEFAULT_BATCH_SIZE) { + shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1); + } + std::shared_ptr parameter = std::make_shared( inputDescriptor.precision, - inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel - : inputDescriptor.shapeFromCompiler); + shape); parameter->set_friendly_name(inputDescriptor.nodeFriendlyName); parameter->output(0).get_tensor().set_names(inputDescriptor.outputTensorNames); @@ -86,10 +93,16 @@ std::shared_ptr create_dummy_model(const std::vector& i std::shared_ptr constantDummy = std::make_shared(outputDescriptor.precision, CONSTANT_NODE_DUMMY_SHAPE); + auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel + : outputDescriptor.shapeFromCompiler; + // Treat every model with batch 1 as a potentially dynamically batched one. + if (shape[intel_npu::utils::BATCH_AXIS] == intel_npu::utils::DEFAULT_BATCH_SIZE) { + shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1); + } + const std::shared_ptr& tensorDummy = std::make_shared( outputDescriptor.precision, - outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel - : outputDescriptor.shapeFromCompiler, + shape, outputDescriptor.outputTensorNames); auto& result = results.emplace_back(std::make_shared(constantDummy)); @@ -713,17 +726,22 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< if (localConfig.isAvailable(ov::intel_npu::batch_mode.name())) { bool autoOrPluginBatch = localConfig.get() == ov::intel_npu::BatchMode::PLUGIN || localConfig.get() == ov::intel_npu::BatchMode::AUTO; - bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger); - if (autoOrPluginBatch && pluginBatchingIsSupported) { - try { + try { + const bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger); + const bool batchedModel = ov::get_batch(modelForCompilation) != intel_npu::utils::DEFAULT_BATCH_SIZE; + + if (autoOrPluginBatch && pluginBatchingIsSupported && batchedModel) { _logger.info("Attempting to handle batching on the plugin side."); ov::set_batch(modelForCompilation, 1); - } catch (const std::exception& ex) { - _logger.info("Couldn't reshape the model. Batching will be handed by compiler.", ex.what()); + // TODO: add debatcher for more complicated cases as set_batch is pretty naive. + } else { + _logger.info("Unable to manage batching on the plugin side, so the compiler will take care of it."); } + + updateBatchMode(ov::intel_npu::BatchMode::COMPILER); + } catch (const std::exception& ex) { + _logger.info("Couldn't validate and reshape the model. Batching will be handed by compiler.", ex.what()); updateBatchMode(ov::intel_npu::BatchMode::COMPILER); - } else { - _logger.info("Unable to manage batching on the plugin side, so the compiler will take care of it."); } } From 9e71e24bb582fb1c727581cd24e1b0060497903e Mon Sep 17 00:00:00 2001 From: DariaMityagina Date: Wed, 10 Sep 2025 11:54:44 +0000 Subject: [PATCH 04/14] Investigate refactoring opportunities for batch management in Plugin and Compiler - validateModelBatch conditions --- .../intel_npu/src/backend/src/zero_infer_request.cpp | 4 ++-- src/plugins/intel_npu/src/plugin/src/plugin.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index e23f6b0595c193..055923159cd14c 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -781,8 +781,8 @@ void ZeroInferRequest::infer_async() { copied_bytes_from_user, get_level_zero_input(inputIndex)->get_byte_size()); } - // OPENVINO_ASSERT(get_level_zero_input(inputIndex)->get_byte_size() == copied_bytes_from_user, - // "Bytes copied must be equal"); + OPENVINO_ASSERT(get_level_zero_input(inputIndex)->get_byte_size() == copied_bytes_from_user, + "Bytes copied must be equal"); } ++inputIndex; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 1465bd5a584a0b..dbe0c413cd9c65 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -564,8 +564,8 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo ov::Layout layout = ov::layout::get_layout(input); // Batching on plugin is working only when batching is found on 0th dimension - if ((shape.size() && shape[0].get_max_length() > 1) || - (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == 0)) { + if ((shape.size() && shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) || + (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) { const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : input->get_shape(); batchedInputs.insert(params[input_id]->output(0)); @@ -602,8 +602,8 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo ov::Layout layout = ov::layout::get_layout(output); // Batching on plugin is working only when batching is found on 0th dimension - if ((shape.size() && shape[0].get_max_length() > 1) || - (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == 0)) { + if ((shape.size() && shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) || + (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) { const auto& node = output->input_value(0); const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : output->get_shape(); batchedOutputs.insert(ov::Output(node.get_node(), node.get_index())); From c1eddb036124517b452bd972ceed1162568730db Mon Sep 17 00:00:00 2001 From: DariaMityagina Date: Wed, 10 Sep 2025 14:23:18 +0000 Subject: [PATCH 05/14] Investigate refactoring opportunities for batch management in Plugin and Compiler - dynamic dims limitation --- .../intel_npu/src/plugin/src/plugin.cpp | 39 ++++++++++++++++++- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index dbe0c413cd9c65..365b0721d43375 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -552,11 +552,46 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument return _properties->get_property(name, npu_plugin_properties); } +bool checkDynamicDims(const std::shared_ptr& model) { + // Check parameters (inputs) + for (const auto& param : model->get_parameters()) { + const auto& shape = param->get_partial_shape(); + + // Check all dimensions except the first one (batch dimension) + for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) { + if (shape[dim_idx].is_dynamic()) { + return true; // Found dynamic dimension other than batch + } + } + } + + // Check results (outputs) + for (const auto& result : model->get_results()) { + const auto& shape = result->get_output_partial_shape(0); + + // Check all dimensions except the first one (batch dimension) + for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) { + if (shape[dim_idx].is_dynamic()) { + return true; // Found dynamic dimension other than batch + } + } + } + + return false; // No dynamic dimensions found other than batch +} + bool validateModelBatch(const std::shared_ptr& model, Logger logger) { std::set> batchedInputs; std::set> batchedOutputs; std::set sBatchSize; + // Limitation: Plugin batching is not supported when there are dynamic + // dimensions other than the batch dimension. + const bool otherDynamicDims = checkDynamicDims(model); + if (otherDynamicDims) { + return false; + } + const auto& params = model->get_parameters(); for (size_t input_id = 0; input_id < params.size(); input_id++) { const auto& input = params[input_id]; @@ -572,7 +607,7 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo if (shape.rank().is_dynamic()) { OPENVINO_THROW("Shapes with dynamic rank are not supported."); } else { - sBatchSize.insert(staticShape[0]); + sBatchSize.insert(staticShape[intel_npu::utils::BATCH_AXIS]); } } else { // gather some diagnostic info @@ -611,7 +646,7 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo if (shape.rank().is_dynamic()) { OPENVINO_THROW("Shapes with dynamic rank are not supported."); } else { - sBatchSize.insert(staticShape[0]); + sBatchSize.insert(staticShape[intel_npu::utils::BATCH_AXIS]); } } else { logger.info("Only networks with outputs batched by 0th dimension are supported. Please check an output by " From 5477207234a5fea13d3951709e148bb8cf07435a Mon Sep 17 00:00:00 2001 From: DariaMityagina Date: Wed, 10 Sep 2025 19:09:22 +0000 Subject: [PATCH 06/14] Investigate refactoring opportunities for batch management in Plugin and Compiler - additional checks --- .../intel_npu/src/plugin/src/plugin.cpp | 135 +++++++++++++----- 1 file changed, 103 insertions(+), 32 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 365b0721d43375..73dc120f49bfc0 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -39,6 +39,104 @@ constexpr std::string_view WEIGHTS_EXTENSION = ".bin"; constexpr std::string_view XML_EXTENSION = ".xml"; constexpr std::string_view ONNX_EXTENSION = ".onnx"; +// Helper function to check if shape has dynamic dimensions other than batch dimension +bool hasOtherDynamicDims(const ov::PartialShape& shape) { + for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) { + if (shape[dim_idx].is_dynamic()) { + return true; // Found dynamic dimension other than batch + } + } + return false; +} + +bool checkModelDynamicDims(const std::shared_ptr& model) { + // Check parameters (inputs) + const auto& params = model->get_parameters(); + for (const auto& param : params) { + const auto& shape = param->get_partial_shape(); + if (hasOtherDynamicDims(shape)) { + return true; + } + } + + // Check results (outputs) + const auto& results = model->get_results(); + for (const auto& result : results) { + const auto& shape = result->get_output_partial_shape(0); + if (hasOtherDynamicDims(shape)) { + return true; + } + } + + return false; +} + +bool validateReshapedModel(const std::vector& inputDescriptors, + const std::vector& outputDescriptors) { + std::set batchSizes; + bool hasBatchedInputs = false; + bool hasBatchedOutputs = false; + + // Check input descriptors + for (const IODescriptor& inputDescriptor : inputDescriptors) { + if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor || + inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) { + continue; + } + + auto shape = inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel + : inputDescriptor.shapeFromCompiler; + + // Check for dynamic dimensions other than batch dimension + if (hasOtherDynamicDims(shape)) { + return false; // Plugin batching not supported with other dynamic dims + } + + // Check if shape has batch dimension and if batch size equals DEFAULT_BATCH_SIZE + if (shape.size() > 0 && + shape[intel_npu::utils::BATCH_AXIS].is_static() && + shape[intel_npu::utils::BATCH_AXIS].get_length() == intel_npu::utils::DEFAULT_BATCH_SIZE) { + + hasBatchedInputs = true; + batchSizes.insert(shape[intel_npu::utils::BATCH_AXIS].get_length()); + } + } + + // Check output descriptors + for (const IODescriptor& outputDescriptor : outputDescriptors) { + if (outputDescriptor.isStateInput || outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor || + outputDescriptor.isInitOutputWeights) { + continue; + } + + auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel + : outputDescriptor.shapeFromCompiler; + + // Check for dynamic dimensions other than batch dimension + if (hasOtherDynamicDims(shape)) { + return false; // Plugin batching not supported with other dynamic dims + } + + // Check if shape has batch dimension and if batch size equals DEFAULT_BATCH_SIZE + if (shape.size() > 0 && + shape[intel_npu::utils::BATCH_AXIS].is_static() && + shape[intel_npu::utils::BATCH_AXIS].get_length() == intel_npu::utils::DEFAULT_BATCH_SIZE) { + + hasBatchedOutputs = true; + batchSizes.insert(shape[intel_npu::utils::BATCH_AXIS].get_length()); + } + } + + // Plugin batching is applied if: + // 1. Both inputs and outputs have batched dimensions + // 2. All batch sizes are consistent (should be only DEFAULT_BATCH_SIZE) + // 3. The batch size is exactly DEFAULT_BATCH_SIZE (since we've already reshaped the model) + // 4. No other dynamic dimensions exist (checked above) + return hasBatchedInputs && hasBatchedOutputs && + batchSizes.size() == 1 && + *batchSizes.begin() == intel_npu::utils::DEFAULT_BATCH_SIZE; +} + /** * @brief Creates an "ov::Model" object which contains only the given "parameter" and "result" nodes. * @details Using an "ov::Model" object to create the "CompiledModel" is the preferred way of using the OV API. @@ -57,6 +155,8 @@ std::shared_ptr create_dummy_model(const std::vector& i ov::ParameterVector parameters; ov::ResultVector results; + bool pluginBatchingIsApplied = validateReshapedModel(inputDescriptors, outputDescriptors); + for (const IODescriptor& inputDescriptor : inputDescriptors) { if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor || inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) { @@ -67,7 +167,7 @@ std::shared_ptr create_dummy_model(const std::vector& i : inputDescriptor.shapeFromCompiler; // Treat every model with batch 1 as a potentially dynamically batched one. // TODO: should we protect this part with a certain condition? - if (shape[intel_npu::utils::BATCH_AXIS] == intel_npu::utils::DEFAULT_BATCH_SIZE) { + if (pluginBatchingIsApplied) { shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1); } @@ -96,7 +196,7 @@ std::shared_ptr create_dummy_model(const std::vector& i auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel : outputDescriptor.shapeFromCompiler; // Treat every model with batch 1 as a potentially dynamically batched one. - if (shape[intel_npu::utils::BATCH_AXIS] == intel_npu::utils::DEFAULT_BATCH_SIZE) { + if (pluginBatchingIsApplied) { shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1); } @@ -552,34 +652,6 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument return _properties->get_property(name, npu_plugin_properties); } -bool checkDynamicDims(const std::shared_ptr& model) { - // Check parameters (inputs) - for (const auto& param : model->get_parameters()) { - const auto& shape = param->get_partial_shape(); - - // Check all dimensions except the first one (batch dimension) - for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) { - if (shape[dim_idx].is_dynamic()) { - return true; // Found dynamic dimension other than batch - } - } - } - - // Check results (outputs) - for (const auto& result : model->get_results()) { - const auto& shape = result->get_output_partial_shape(0); - - // Check all dimensions except the first one (batch dimension) - for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) { - if (shape[dim_idx].is_dynamic()) { - return true; // Found dynamic dimension other than batch - } - } - } - - return false; // No dynamic dimensions found other than batch -} - bool validateModelBatch(const std::shared_ptr& model, Logger logger) { std::set> batchedInputs; std::set> batchedOutputs; @@ -587,8 +659,7 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo // Limitation: Plugin batching is not supported when there are dynamic // dimensions other than the batch dimension. - const bool otherDynamicDims = checkDynamicDims(model); - if (otherDynamicDims) { + if (checkModelDynamicDims(model)) { return false; } From ef0744d5e6450eca9a905902b23114bc4a3065b4 Mon Sep 17 00:00:00 2001 From: DariaMityagina Date: Mon, 22 Sep 2025 22:07:19 +0000 Subject: [PATCH 07/14] Investigate refactoring opportunities for batch management in Plugin and Compiler - simplify --- .../src/common/src/sync_infer_request.cpp | 18 +- .../intel_npu/src/plugin/src/plugin.cpp | 197 +++++++----------- 2 files changed, 82 insertions(+), 133 deletions(-) diff --git a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp index d3eed4e7357005..b17db067726456 100644 --- a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp @@ -214,7 +214,8 @@ void SyncInferRequest::check_tensor(const ov::Output& port, } } - OPENVINO_ASSERT(is_dynamic || port.get_shape() == tensor->get_shape(), + OPENVINO_ASSERT(is_dynamic || port.get_shape() == tensor->get_shape() || + tensor->get_shape()[utils::BATCH_AXIS] % port.get_shape()[utils::BATCH_AXIS] == 0, "The ", tensor_type, " tensor size is not equal to the model ", @@ -274,13 +275,14 @@ void SyncInferRequest::check_batched_tensors(const ov::Output& p port.get_partial_shape()); auto batch = port.get_partial_shape()[batch_idx]; - OPENVINO_ASSERT(batch.is_dynamic() || batch.get_length() == tensors_size, - "set_input_tensors/set_tensors error. Input shape ", - port.get_partial_shape(), - "batch ", - batch, - "doesn't match with total blobs count: ", - tensors_size); + OPENVINO_ASSERT( + batch.is_dynamic() || batch.get_length() == tensors_size || tensors_size % batch.get_length() == 0, + "set_input_tensors/set_tensors error. Input shape ", + port.get_partial_shape(), + "batch ", + batch, + "doesn't match with total blobs count: ", + tensors_size); } auto batched_shape = tensors[utils::BATCH_AXIS]->get_shape(); diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 73dc120f49bfc0..6c07325fcfb038 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -39,104 +39,6 @@ constexpr std::string_view WEIGHTS_EXTENSION = ".bin"; constexpr std::string_view XML_EXTENSION = ".xml"; constexpr std::string_view ONNX_EXTENSION = ".onnx"; -// Helper function to check if shape has dynamic dimensions other than batch dimension -bool hasOtherDynamicDims(const ov::PartialShape& shape) { - for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) { - if (shape[dim_idx].is_dynamic()) { - return true; // Found dynamic dimension other than batch - } - } - return false; -} - -bool checkModelDynamicDims(const std::shared_ptr& model) { - // Check parameters (inputs) - const auto& params = model->get_parameters(); - for (const auto& param : params) { - const auto& shape = param->get_partial_shape(); - if (hasOtherDynamicDims(shape)) { - return true; - } - } - - // Check results (outputs) - const auto& results = model->get_results(); - for (const auto& result : results) { - const auto& shape = result->get_output_partial_shape(0); - if (hasOtherDynamicDims(shape)) { - return true; - } - } - - return false; -} - -bool validateReshapedModel(const std::vector& inputDescriptors, - const std::vector& outputDescriptors) { - std::set batchSizes; - bool hasBatchedInputs = false; - bool hasBatchedOutputs = false; - - // Check input descriptors - for (const IODescriptor& inputDescriptor : inputDescriptors) { - if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor || - inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) { - continue; - } - - auto shape = inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel - : inputDescriptor.shapeFromCompiler; - - // Check for dynamic dimensions other than batch dimension - if (hasOtherDynamicDims(shape)) { - return false; // Plugin batching not supported with other dynamic dims - } - - // Check if shape has batch dimension and if batch size equals DEFAULT_BATCH_SIZE - if (shape.size() > 0 && - shape[intel_npu::utils::BATCH_AXIS].is_static() && - shape[intel_npu::utils::BATCH_AXIS].get_length() == intel_npu::utils::DEFAULT_BATCH_SIZE) { - - hasBatchedInputs = true; - batchSizes.insert(shape[intel_npu::utils::BATCH_AXIS].get_length()); - } - } - - // Check output descriptors - for (const IODescriptor& outputDescriptor : outputDescriptors) { - if (outputDescriptor.isStateInput || outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor || - outputDescriptor.isInitOutputWeights) { - continue; - } - - auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel - : outputDescriptor.shapeFromCompiler; - - // Check for dynamic dimensions other than batch dimension - if (hasOtherDynamicDims(shape)) { - return false; // Plugin batching not supported with other dynamic dims - } - - // Check if shape has batch dimension and if batch size equals DEFAULT_BATCH_SIZE - if (shape.size() > 0 && - shape[intel_npu::utils::BATCH_AXIS].is_static() && - shape[intel_npu::utils::BATCH_AXIS].get_length() == intel_npu::utils::DEFAULT_BATCH_SIZE) { - - hasBatchedOutputs = true; - batchSizes.insert(shape[intel_npu::utils::BATCH_AXIS].get_length()); - } - } - - // Plugin batching is applied if: - // 1. Both inputs and outputs have batched dimensions - // 2. All batch sizes are consistent (should be only DEFAULT_BATCH_SIZE) - // 3. The batch size is exactly DEFAULT_BATCH_SIZE (since we've already reshaped the model) - // 4. No other dynamic dimensions exist (checked above) - return hasBatchedInputs && hasBatchedOutputs && - batchSizes.size() == 1 && - *batchSizes.begin() == intel_npu::utils::DEFAULT_BATCH_SIZE; -} - /** * @brief Creates an "ov::Model" object which contains only the given "parameter" and "result" nodes. * @details Using an "ov::Model" object to create the "CompiledModel" is the preferred way of using the OV API. @@ -155,8 +57,6 @@ std::shared_ptr create_dummy_model(const std::vector& i ov::ParameterVector parameters; ov::ResultVector results; - bool pluginBatchingIsApplied = validateReshapedModel(inputDescriptors, outputDescriptors); - for (const IODescriptor& inputDescriptor : inputDescriptors) { if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor || inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) { @@ -165,15 +65,9 @@ std::shared_ptr create_dummy_model(const std::vector& i auto shape = inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel : inputDescriptor.shapeFromCompiler; - // Treat every model with batch 1 as a potentially dynamically batched one. - // TODO: should we protect this part with a certain condition? - if (pluginBatchingIsApplied) { - shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1); - } - std::shared_ptr parameter = std::make_shared( - inputDescriptor.precision, - shape); + std::shared_ptr parameter = + std::make_shared(inputDescriptor.precision, shape); parameter->set_friendly_name(inputDescriptor.nodeFriendlyName); parameter->output(0).get_tensor().set_names(inputDescriptor.outputTensorNames); @@ -195,15 +89,11 @@ std::shared_ptr create_dummy_model(const std::vector& i auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel : outputDescriptor.shapeFromCompiler; - // Treat every model with batch 1 as a potentially dynamically batched one. - if (pluginBatchingIsApplied) { - shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1); - } - const std::shared_ptr& tensorDummy = std::make_shared( - outputDescriptor.precision, - shape, - outputDescriptor.outputTensorNames); + const std::shared_ptr& tensorDummy = + std::make_shared(outputDescriptor.precision, + shape, + outputDescriptor.outputTensorNames); auto& result = results.emplace_back(std::make_shared(constantDummy)); result->output(0).set_tensor_ptr(tensorDummy); @@ -652,6 +542,38 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument return _properties->get_property(name, npu_plugin_properties); } +// Helper function to check if shape has dynamic dimensions other than batch dimension +bool hasOtherDynamicDims(const ov::PartialShape& shape) { + for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) { + if (shape[dim_idx].is_dynamic()) { + return true; // Found dynamic dimension other than batch + } + } + return false; +} + +bool checkModelDynamicDims(const std::shared_ptr& model) { + // Check parameters (inputs) + const auto& params = model->get_parameters(); + for (const auto& param : params) { + const auto& shape = param->get_partial_shape(); + if (hasOtherDynamicDims(shape)) { + return true; + } + } + + // Check results (outputs) + const auto& results = model->get_results(); + for (const auto& result : results) { + const auto& shape = result->get_output_partial_shape(0); + if (hasOtherDynamicDims(shape)) { + return true; + } + } + + return false; +} + bool validateModelBatch(const std::shared_ptr& model, Logger logger) { std::set> batchedInputs; std::set> batchedOutputs; @@ -670,7 +592,8 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo ov::Layout layout = ov::layout::get_layout(input); // Batching on plugin is working only when batching is found on 0th dimension - if ((shape.size() && shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) || + if ((shape.size() && + shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) || (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) { const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : input->get_shape(); batchedInputs.insert(params[input_id]->output(0)); @@ -708,7 +631,8 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo ov::Layout layout = ov::layout::get_layout(output); // Batching on plugin is working only when batching is found on 0th dimension - if ((shape.size() && shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) || + if ((shape.size() && + shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) || (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) { const auto& node = output->input_value(0); const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : output->get_shape(); @@ -758,6 +682,21 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo return true; } +void deBatchModel(std::shared_ptr& model, ov::Dimension newBatch) { + size_t inputIdx = 0; + std::map newShapes; + for (auto&& item : model->get_parameters()) { + auto layout = item->get_layout(); + auto partShape = item->get_partial_shape(); + if (ov::layout::has_batch(layout)) { + partShape[ov::layout::batch_idx(layout)] = newBatch; + } + newShapes.emplace(item->get_friendly_name(), partShape); + inputIdx++; + } + model->reshape(newShapes); +} + std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, const ov::AnyMap& properties) const { OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::compile_model"); @@ -834,21 +773,29 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< localConfig.get() == ov::intel_npu::BatchMode::AUTO; try { const bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger); - const bool batchedModel = ov::get_batch(modelForCompilation) != intel_npu::utils::DEFAULT_BATCH_SIZE; - if (autoOrPluginBatch && pluginBatchingIsSupported && batchedModel) { + if (autoOrPluginBatch && pluginBatchingIsSupported) { _logger.info("Attempting to handle batching on the plugin side."); - ov::set_batch(modelForCompilation, 1); + try { + ov::set_batch(modelForCompilation, ov::Dimension(1)); + } catch (const std::exception& ex) { + _logger.warning("The plugin couldn't resize a batched model due to exception: %s.\n" + "Trying to debatch it...", + ex.what()); + deBatchModel(modelForCompilation, ov::Dimension(1)); + if (!modelForCompilation) { + OPENVINO_THROW("Cannot debatch a model"); + } + _logger.info("The model has been debatched successfully"); + } // TODO: add debatcher for more complicated cases as set_batch is pretty naive. } else { - _logger.info("Unable to manage batching on the plugin side, so the compiler will take care of it."); + _logger.info("Batching will be handed by compiler."); } - - updateBatchMode(ov::intel_npu::BatchMode::COMPILER); } catch (const std::exception& ex) { _logger.info("Couldn't validate and reshape the model. Batching will be handed by compiler.", ex.what()); - updateBatchMode(ov::intel_npu::BatchMode::COMPILER); } + updateBatchMode(ov::intel_npu::BatchMode::COMPILER); } // Update stepping w/ information from driver, unless provided by user or we are off-device From 0d87ab3ddc7f9b497c867eed198fb7ce0b4c3a5d Mon Sep 17 00:00:00 2001 From: DariaMityagina Date: Fri, 26 Sep 2025 11:33:23 +0000 Subject: [PATCH 08/14] Investigate refactoring opportunities for batch management in Plugin and Compiler - will be prettier, functionality first --- .../src/common/src/sync_infer_request.cpp | 18 ++-- .../intel_npu/src/plugin/src/plugin.cpp | 85 ++++++++++++++----- 2 files changed, 72 insertions(+), 31 deletions(-) diff --git a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp index b17db067726456..d3eed4e7357005 100644 --- a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp @@ -214,8 +214,7 @@ void SyncInferRequest::check_tensor(const ov::Output& port, } } - OPENVINO_ASSERT(is_dynamic || port.get_shape() == tensor->get_shape() || - tensor->get_shape()[utils::BATCH_AXIS] % port.get_shape()[utils::BATCH_AXIS] == 0, + OPENVINO_ASSERT(is_dynamic || port.get_shape() == tensor->get_shape(), "The ", tensor_type, " tensor size is not equal to the model ", @@ -275,14 +274,13 @@ void SyncInferRequest::check_batched_tensors(const ov::Output& p port.get_partial_shape()); auto batch = port.get_partial_shape()[batch_idx]; - OPENVINO_ASSERT( - batch.is_dynamic() || batch.get_length() == tensors_size || tensors_size % batch.get_length() == 0, - "set_input_tensors/set_tensors error. Input shape ", - port.get_partial_shape(), - "batch ", - batch, - "doesn't match with total blobs count: ", - tensors_size); + OPENVINO_ASSERT(batch.is_dynamic() || batch.get_length() == tensors_size, + "set_input_tensors/set_tensors error. Input shape ", + port.get_partial_shape(), + "batch ", + batch, + "doesn't match with total blobs count: ", + tensors_size); } auto batched_shape = tensors[utils::BATCH_AXIS]->get_shape(); diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 6c07325fcfb038..fb7c4f742850bb 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -57,6 +57,16 @@ std::shared_ptr create_dummy_model(const std::vector& i ov::ParameterVector parameters; ov::ResultVector results; + // Helper function to check if a tensor was originally dynamic + auto wasOriginallyDynamic = [](const std::unordered_set& tensorNames) -> bool { + for (const auto& name : tensorNames) { + if (name.find("_DYNBATCH_ORIG") != std::string::npos) { + return true; + } + } + return false; + }; + for (const IODescriptor& inputDescriptor : inputDescriptors) { if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor || inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) { @@ -66,6 +76,10 @@ std::shared_ptr create_dummy_model(const std::vector& i auto shape = inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel : inputDescriptor.shapeFromCompiler; + if (wasOriginallyDynamic(inputDescriptor.outputTensorNames)) { + shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1); + } + std::shared_ptr parameter = std::make_shared(inputDescriptor.precision, shape); @@ -90,6 +104,10 @@ std::shared_ptr create_dummy_model(const std::vector& i auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel : outputDescriptor.shapeFromCompiler; + if (wasOriginallyDynamic(outputDescriptor.outputTensorNames)) { + shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1); + } + const std::shared_ptr& tensorDummy = std::make_shared(outputDescriptor.precision, shape, @@ -771,31 +789,56 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< if (localConfig.isAvailable(ov::intel_npu::batch_mode.name())) { bool autoOrPluginBatch = localConfig.get() == ov::intel_npu::BatchMode::PLUGIN || localConfig.get() == ov::intel_npu::BatchMode::AUTO; - try { - const bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger); - - if (autoOrPluginBatch && pluginBatchingIsSupported) { - _logger.info("Attempting to handle batching on the plugin side."); - try { - ov::set_batch(modelForCompilation, ov::Dimension(1)); - } catch (const std::exception& ex) { - _logger.warning("The plugin couldn't resize a batched model due to exception: %s.\n" - "Trying to debatch it...", - ex.what()); - deBatchModel(modelForCompilation, ov::Dimension(1)); - if (!modelForCompilation) { - OPENVINO_THROW("Cannot debatch a model"); + if (modelForCompilation->is_dynamic()) { // Avoiding risks with static models. TODO: common solution. + try { + const bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger); + + if (autoOrPluginBatch && pluginBatchingIsSupported) { + _logger.info("Attempting to handle batching on the plugin side."); + + // Store dynamic batch info in tensor names BEFORE reshaping + auto encodeDynamicBatchInfo = [](std::shared_ptr model) { + // Encode info in input tensor names + for (auto& input : model->inputs()) { + std::string originalName = input.get_any_name(); + std::string newName = originalName + "_DYNBATCH_ORIG"; + input.get_tensor().set_names({newName}); + } + + // Encode info in output tensor names + for (auto& output : model->outputs()) { + std::string originalName = output.get_any_name(); + std::string newName = originalName + "_DYNBATCH_ORIG"; + output.get_tensor().set_names({newName}); + } + }; + + try { + encodeDynamicBatchInfo(modelForCompilation); + ov::set_batch(modelForCompilation, ov::Dimension(1)); + updateBatchMode(ov::intel_npu::BatchMode::COMPILER); + } catch (const std::exception& ex) { + _logger.warning("The plugin couldn't resize a batched model due to exception: %s.\n" + "Trying to debatch it...", + ex.what()); + encodeDynamicBatchInfo(modelForCompilation); + deBatchModel(modelForCompilation, ov::Dimension(1)); + if (!modelForCompilation) { + OPENVINO_THROW("Cannot debatch a model"); + } + _logger.info("The model has been debatched successfully"); + updateBatchMode(ov::intel_npu::BatchMode::COMPILER); } - _logger.info("The model has been debatched successfully"); + } else { + _logger.info("Batching will be handed by compiler."); + updateBatchMode(ov::intel_npu::BatchMode::COMPILER); } - // TODO: add debatcher for more complicated cases as set_batch is pretty naive. - } else { - _logger.info("Batching will be handed by compiler."); + } catch (const std::exception& ex) { + _logger.info("Couldn't validate and reshape the model. Batching will be handed by compiler.", + ex.what()); + updateBatchMode(ov::intel_npu::BatchMode::COMPILER); } - } catch (const std::exception& ex) { - _logger.info("Couldn't validate and reshape the model. Batching will be handed by compiler.", ex.what()); } - updateBatchMode(ov::intel_npu::BatchMode::COMPILER); } // Update stepping w/ information from driver, unless provided by user or we are off-device From b830d9d6ee0a429410a80a9e8d71cac4ada6641e Mon Sep 17 00:00:00 2001 From: DariaMityagina Date: Wed, 1 Oct 2025 16:18:23 +0000 Subject: [PATCH 09/14] Investigate refactoring opportunities for batch management in Plugin and Compiler - clean up --- .../src/backend/src/zero_infer_request.cpp | 20 ++- .../intel_npu/src/plugin/include/plugin.hpp | 6 + .../intel_npu/src/plugin/src/plugin.cpp | 162 ++++++++++-------- .../utils/include/intel_npu/utils/utils.hpp | 2 + 4 files changed, 114 insertions(+), 76 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index 055923159cd14c..4199696636e3c1 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -76,12 +76,24 @@ void check_level_zero_attributes_match(const IODescriptor& ioDescriptor, const A std::optional determine_dynamic_batch_size(const IODescriptor& desc, const std::shared_ptr& tensor, const std::optional batchSize) { + // Check if tensor was originally dynamic by looking for encoded markers + // This information is needed to restore the original dynamic batching behavior + auto wasOriginallyDynamic = [](const std::unordered_set& tensorNames) -> bool { + for (const auto& name : tensorNames) { + if (name.find(intel_npu::utils::DYNBATCH_SUFFIX) != std::string::npos) { + return true; + } + } + return false; + }; + + auto wasDynamic = wasOriginallyDynamic(desc.outputTensorNames); + if (tensor == nullptr && !batchSize.has_value()) { return std::nullopt; } - // Make sure that PLUGIN batch mode is currently active - if (*desc.shapeFromCompiler.begin() != intel_npu::utils::DEFAULT_BATCH_SIZE) { + if (!wasDynamic) { return std::nullopt; } @@ -89,6 +101,10 @@ std::optional determine_dynamic_batch_size(const IODescriptor& desc, return batchSize.value(); } + if (tensor->get_shape().empty() || *desc.shapeFromCompiler.begin() != intel_npu::utils::DEFAULT_BATCH_SIZE) { + return std::nullopt; + } + return tensor->get_shape()[intel_npu::utils::BATCH_AXIS]; } diff --git a/src/plugins/intel_npu/src/plugin/include/plugin.hpp b/src/plugins/intel_npu/src/plugin/include/plugin.hpp index 2fad2f0c2be4ca..a46b2020a9203b 100644 --- a/src/plugins/intel_npu/src/plugin/include/plugin.hpp +++ b/src/plugins/intel_npu/src/plugin/include/plugin.hpp @@ -63,6 +63,12 @@ class Plugin : public ov::IPlugin { ov::SupportedOpsMap query_model(const std::shared_ptr& model, const ov::AnyMap& properties) const override; + void handleDynamicBatching(std::shared_ptr& modelForCompilation, + Config& localConfig, + const std::function& updateBatchMode) const; + + void encodeDynamicBatchInfo(std::shared_ptr model) const; + private: void init_options(); void filter_config_by_compiler_support(FilteredConfig& cfg) const; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index fb7c4f742850bb..c5469322eb81e3 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -57,10 +57,11 @@ std::shared_ptr create_dummy_model(const std::vector& i ov::ParameterVector parameters; ov::ResultVector results; - // Helper function to check if a tensor was originally dynamic + // Check if tensor was originally dynamic by looking for encoded markers + // This information is needed to restore the original dynamic batching behavior auto wasOriginallyDynamic = [](const std::unordered_set& tensorNames) -> bool { for (const auto& name : tensorNames) { - if (name.find("_DYNBATCH_ORIG") != std::string::npos) { + if (name.find(intel_npu::utils::DYNBATCH_SUFFIX) != std::string::npos) { return true; } } @@ -560,7 +561,8 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument return _properties->get_property(name, npu_plugin_properties); } -// Helper function to check if shape has dynamic dimensions other than batch dimension +// Helper function to detect if shape contains dynamic dimensions other than the batch dimension +// Plugin-side batch handling can only be applied when batch is the sole dynamic dimension bool hasOtherDynamicDims(const ov::PartialShape& shape) { for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) { if (shape[dim_idx].is_dynamic()) { @@ -640,7 +642,7 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo } sstream << "Please check input id: " << input_id << " by the name: " << input->get_friendly_name() << ", layout: " << layout.to_string() << ", is_dynamic: " << shape.is_dynamic(); - logger.info("%s", sstream.str()); + logger.info("%s", sstream.str().c_str()); return false; } } @@ -664,8 +666,8 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo } else { logger.info("Only networks with outputs batched by 0th dimension are supported. Please check an output by " "the name: %s, layout: %s", - output->get_friendly_name(), - layout.to_string()); + output->get_friendly_name().c_str(), + layout.to_string().c_str()); return false; } } @@ -685,9 +687,9 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo auto node_info_printer = [&logger](const auto& ov_node, std::string nodeType) { logger.info("%s: %s has shape value: %s", - nodeType, - ov_node.get_any_name(), - ov_node.get_partial_shape().to_string()); + nodeType.c_str(), + ov_node.get_any_name().c_str(), + ov_node.get_partial_shape().to_string().c_str()); }; for (const auto& ov_node : batchedInputs) { @@ -701,7 +703,6 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo } void deBatchModel(std::shared_ptr& model, ov::Dimension newBatch) { - size_t inputIdx = 0; std::map newShapes; for (auto&& item : model->get_parameters()) { auto layout = item->get_layout(); @@ -710,11 +711,73 @@ void deBatchModel(std::shared_ptr& model, ov::Dimension newBatch) { partShape[ov::layout::batch_idx(layout)] = newBatch; } newShapes.emplace(item->get_friendly_name(), partShape); - inputIdx++; } model->reshape(newShapes); } +void Plugin::encodeDynamicBatchInfo(std::shared_ptr model) const { + const std::string suffix = intel_npu::utils::DYNBATCH_SUFFIX; + + // Encode info in input tensor names + for (auto& input : model->inputs()) { + const std::string originalName = input.get_any_name(); + input.get_tensor().set_names({originalName, originalName + suffix}); + } + // Encode info in output tensor names + for (auto& output : model->outputs()) { + const std::string originalName = output.get_any_name(); + output.get_tensor().set_names({originalName, originalName + suffix}); + } +} + +void Plugin::handleDynamicBatching(std::shared_ptr& modelForCompilation, + Config& localConfig, + const std::function& updateBatchMode) const { + // Avoiding risks with static models. TODO: common solution. + if (!modelForCompilation->is_dynamic()) { + return; + } + + const auto batchMode = localConfig.get(); + const bool isAutoOrPluginBatch = + (batchMode == ov::intel_npu::BatchMode::PLUGIN || batchMode == ov::intel_npu::BatchMode::AUTO); + + try { + const bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger); + + if (!isAutoOrPluginBatch || !pluginBatchingIsSupported) { + _logger.info("Batching will be handled by compiler."); + updateBatchMode(ov::intel_npu::BatchMode::COMPILER); + return; + } + + _logger.info("Attempting to handle batching on the plugin side."); + + // Preserve dynamic batch metadata by encoding it in tensor names + // Avoids introducing new metadata fields by leveraging existing naming system + encodeDynamicBatchInfo(modelForCompilation); + + try { + ov::set_batch(modelForCompilation, ov::Dimension(1)); + } catch (const std::exception& ex) { + _logger.warning("The plugin couldn't resize a batched model due to exception: %s.\n" + "Trying to debatch it...", + ex.what()); + + deBatchModel(modelForCompilation, ov::Dimension(1)); + if (!modelForCompilation) { + OPENVINO_THROW("Cannot debatch a model"); + } + _logger.info("The model has been debatched successfully"); + } + updateBatchMode(ov::intel_npu::BatchMode::COMPILER); + } catch (const std::exception& ex) { + _logger.info("Couldn't validate and reshape the model. Batching will be handled by compiler. Error: %s", + ex.what()); + updateBatchMode(ov::intel_npu::BatchMode::COMPILER); + } +} + std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, const ov::AnyMap& properties) const { OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::compile_model"); @@ -769,75 +832,26 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< auto updateBatchMode = [&](ov::intel_npu::BatchMode mode) { std::stringstream strStream; strStream << mode; - _logger.info("Setting batching mode to %s.", strStream.str()); + _logger.info("Setting batching mode to %s.", strStream.str().c_str()); localConfig.update({{ov::intel_npu::batch_mode.name(), strStream.str()}}); }; - if (localConfig.isAvailable(ov::intel_npu::batch_mode.name()) && - !localConfig.has(ov::intel_npu::batch_mode.name())) { - updateBatchMode(ov::intel_npu::BatchMode::AUTO); - } - - if (localConfig.isAvailable(ov::intel_npu::batch_mode.name()) && !model->get_variables().empty()) { - if (localConfig.get() == ov::intel_npu::BatchMode::PLUGIN) { - OPENVINO_THROW("This model contains states, thus it is not supported when handling batching on the plugin"); + // Handle batch mode configuration + if (localConfig.isAvailable(ov::intel_npu::batch_mode.name())) { + // Set default batch mode if not configured + if (!localConfig.has(ov::intel_npu::batch_mode.name())) { + updateBatchMode(ov::intel_npu::BatchMode::AUTO); } - updateBatchMode(ov::intel_npu::BatchMode::COMPILER); - } - - if (localConfig.isAvailable(ov::intel_npu::batch_mode.name())) { - bool autoOrPluginBatch = localConfig.get() == ov::intel_npu::BatchMode::PLUGIN || - localConfig.get() == ov::intel_npu::BatchMode::AUTO; - if (modelForCompilation->is_dynamic()) { // Avoiding risks with static models. TODO: common solution. - try { - const bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger); - - if (autoOrPluginBatch && pluginBatchingIsSupported) { - _logger.info("Attempting to handle batching on the plugin side."); - - // Store dynamic batch info in tensor names BEFORE reshaping - auto encodeDynamicBatchInfo = [](std::shared_ptr model) { - // Encode info in input tensor names - for (auto& input : model->inputs()) { - std::string originalName = input.get_any_name(); - std::string newName = originalName + "_DYNBATCH_ORIG"; - input.get_tensor().set_names({newName}); - } - - // Encode info in output tensor names - for (auto& output : model->outputs()) { - std::string originalName = output.get_any_name(); - std::string newName = originalName + "_DYNBATCH_ORIG"; - output.get_tensor().set_names({newName}); - } - }; - - try { - encodeDynamicBatchInfo(modelForCompilation); - ov::set_batch(modelForCompilation, ov::Dimension(1)); - updateBatchMode(ov::intel_npu::BatchMode::COMPILER); - } catch (const std::exception& ex) { - _logger.warning("The plugin couldn't resize a batched model due to exception: %s.\n" - "Trying to debatch it...", - ex.what()); - encodeDynamicBatchInfo(modelForCompilation); - deBatchModel(modelForCompilation, ov::Dimension(1)); - if (!modelForCompilation) { - OPENVINO_THROW("Cannot debatch a model"); - } - _logger.info("The model has been debatched successfully"); - updateBatchMode(ov::intel_npu::BatchMode::COMPILER); - } - } else { - _logger.info("Batching will be handed by compiler."); - updateBatchMode(ov::intel_npu::BatchMode::COMPILER); - } - } catch (const std::exception& ex) { - _logger.info("Couldn't validate and reshape the model. Batching will be handed by compiler.", - ex.what()); - updateBatchMode(ov::intel_npu::BatchMode::COMPILER); + // Handle models with variables (states) + if (!model->get_variables().empty()) { + if (localConfig.get() == ov::intel_npu::BatchMode::PLUGIN) { + OPENVINO_THROW( + "This model contains states, thus it is not supported when handling batching on the plugin"); } + updateBatchMode(ov::intel_npu::BatchMode::COMPILER); + } else { + handleDynamicBatching(modelForCompilation, localConfig, updateBatchMode); } } diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp index 01cc49fc38c78b..601ebe8039610c 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp @@ -17,6 +17,8 @@ constexpr std::size_t STANDARD_PAGE_SIZE = 4096; constexpr std::size_t DEFAULT_BATCH_SIZE = 1; constexpr std::size_t BATCH_AXIS = 0; +const std::string DYNBATCH_SUFFIX = "_DYNBATCH_ORIG"; + struct AlignedAllocator { public: AlignedAllocator(const size_t align_size) : _align_size(align_size) {} From 9bac0862177b604c993525e9f546923b74fdd951 Mon Sep 17 00:00:00 2001 From: DariaMityagina Date: Wed, 1 Oct 2025 20:10:10 +0000 Subject: [PATCH 10/14] Investigate refactoring opportunities for batch management in Plugin and Compiler - clean up --- .../src/backend/src/zero_infer_request.cpp | 14 +------------- src/plugins/intel_npu/src/plugin/src/plugin.cpp | 15 ++------------- .../src/utils/include/intel_npu/utils/utils.hpp | 12 ++++++++++++ 3 files changed, 15 insertions(+), 26 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index 4199696636e3c1..fe0d2b1f255b21 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -76,23 +76,11 @@ void check_level_zero_attributes_match(const IODescriptor& ioDescriptor, const A std::optional determine_dynamic_batch_size(const IODescriptor& desc, const std::shared_ptr& tensor, const std::optional batchSize) { - // Check if tensor was originally dynamic by looking for encoded markers - // This information is needed to restore the original dynamic batching behavior - auto wasOriginallyDynamic = [](const std::unordered_set& tensorNames) -> bool { - for (const auto& name : tensorNames) { - if (name.find(intel_npu::utils::DYNBATCH_SUFFIX) != std::string::npos) { - return true; - } - } - return false; - }; - - auto wasDynamic = wasOriginallyDynamic(desc.outputTensorNames); - if (tensor == nullptr && !batchSize.has_value()) { return std::nullopt; } + auto wasDynamic = intel_npu::utils::wasOriginallyDynamic(desc.outputTensorNames); if (!wasDynamic) { return std::nullopt; } diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index c5469322eb81e3..5950677cd5fe85 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -57,17 +57,6 @@ std::shared_ptr create_dummy_model(const std::vector& i ov::ParameterVector parameters; ov::ResultVector results; - // Check if tensor was originally dynamic by looking for encoded markers - // This information is needed to restore the original dynamic batching behavior - auto wasOriginallyDynamic = [](const std::unordered_set& tensorNames) -> bool { - for (const auto& name : tensorNames) { - if (name.find(intel_npu::utils::DYNBATCH_SUFFIX) != std::string::npos) { - return true; - } - } - return false; - }; - for (const IODescriptor& inputDescriptor : inputDescriptors) { if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor || inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) { @@ -77,7 +66,7 @@ std::shared_ptr create_dummy_model(const std::vector& i auto shape = inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel : inputDescriptor.shapeFromCompiler; - if (wasOriginallyDynamic(inputDescriptor.outputTensorNames)) { + if (intel_npu::utils::wasOriginallyDynamic(inputDescriptor.outputTensorNames)) { shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1); } @@ -105,7 +94,7 @@ std::shared_ptr create_dummy_model(const std::vector& i auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel : outputDescriptor.shapeFromCompiler; - if (wasOriginallyDynamic(outputDescriptor.outputTensorNames)) { + if (intel_npu::utils::wasOriginallyDynamic(outputDescriptor.outputTensorNames)) { shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1); } diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp index 601ebe8039610c..709e553ec67f5c 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include "openvino/runtime/allocator.hpp" @@ -50,6 +51,17 @@ static inline size_t align_size_to_standard_page_size(size_t size) { return (size + utils::STANDARD_PAGE_SIZE - 1) & ~(utils::STANDARD_PAGE_SIZE - 1); } +// Check if tensor was originally dynamic by looking for encoded markers +// This information is needed to restore the original dynamic batching behavior +static inline bool wasOriginallyDynamic(const std::unordered_set& tensorNames) { + for (const auto& name : tensorNames) { + if (name.find(DYNBATCH_SUFFIX) != std::string::npos) { + return true; + } + } + return false; +} + } // namespace utils } // namespace intel_npu From b1997578c72032ed3f7df10cef5de6396028bc10 Mon Sep 17 00:00:00 2001 From: DariaMityagina Date: Thu, 2 Oct 2025 01:43:14 +0000 Subject: [PATCH 11/14] Investigate refactoring opportunities for batch management in Plugin and Compiler - fix tests --- src/plugins/intel_npu/src/plugin/src/plugin.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 5950677cd5fe85..ff9d65e07b6f85 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -601,8 +601,7 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo ov::Layout layout = ov::layout::get_layout(input); // Batching on plugin is working only when batching is found on 0th dimension - if ((shape.size() && - shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) || + if (shape.size() || (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) { const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : input->get_shape(); batchedInputs.insert(params[input_id]->output(0)); @@ -640,8 +639,7 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo ov::Layout layout = ov::layout::get_layout(output); // Batching on plugin is working only when batching is found on 0th dimension - if ((shape.size() && - shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) || + if (shape.size() || (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) { const auto& node = output->input_value(0); const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : output->get_shape(); @@ -674,6 +672,11 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo return false; } + if (*sBatchSize.begin() == intel_npu::utils::DEFAULT_BATCH_SIZE) { + logger.info("PLUGIN batch won't be applied, got default batch value : %ld", *sBatchSize.begin()); + return false; + } + auto node_info_printer = [&logger](const auto& ov_node, std::string nodeType) { logger.info("%s: %s has shape value: %s", nodeType.c_str(), From 234f92070a254d862f31bcf47708086401ee3745 Mon Sep 17 00:00:00 2001 From: DariaMityagina Date: Wed, 8 Oct 2025 01:17:37 +0000 Subject: [PATCH 12/14] Investigate refactoring opportunities for batch management in Plugin and Compiler - review comments --- .../intel_npu/src/plugin/src/plugin.cpp | 47 ++++++++++++------- .../utils/include/intel_npu/utils/utils.hpp | 9 ++-- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index ff9d65e07b6f85..1791c59adec7ed 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -601,8 +601,8 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo ov::Layout layout = ov::layout::get_layout(input); // Batching on plugin is working only when batching is found on 0th dimension - if (shape.size() || - (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) { + if (shape[intel_npu::utils::BATCH_AXIS].is_dynamic() || + ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS) { const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : input->get_shape(); batchedInputs.insert(params[input_id]->output(0)); @@ -639,8 +639,8 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo ov::Layout layout = ov::layout::get_layout(output); // Batching on plugin is working only when batching is found on 0th dimension - if (shape.size() || - (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) { + if (shape[intel_npu::utils::BATCH_AXIS].is_dynamic() || + ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS) { const auto& node = output->input_value(0); const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : output->get_shape(); batchedOutputs.insert(ov::Output(node.get_node(), node.get_index())); @@ -694,22 +694,34 @@ bool validateModelBatch(const std::shared_ptr& model, Logger lo return true; } -void deBatchModel(std::shared_ptr& model, ov::Dimension newBatch) { - std::map newShapes; - for (auto&& item : model->get_parameters()) { - auto layout = item->get_layout(); - auto partShape = item->get_partial_shape(); - if (ov::layout::has_batch(layout)) { - partShape[ov::layout::batch_idx(layout)] = newBatch; +bool deBatchModel(std::shared_ptr& model, ov::Dimension newBatch) { + try { + std::map newShapes; + for (auto&& item : model->get_parameters()) { + auto layout = item->get_layout(); + auto partShape = item->get_partial_shape(); + if (ov::layout::has_batch(layout)) { + partShape[ov::layout::batch_idx(layout)] = newBatch; + } + newShapes.emplace(item->get_friendly_name(), partShape); } - newShapes.emplace(item->get_friendly_name(), partShape); + model->reshape(newShapes); + return true; + } catch (const std::exception&) { + // Don't throw - let caller handle the failure + return false; } - model->reshape(newShapes); } void Plugin::encodeDynamicBatchInfo(std::shared_ptr model) const { const std::string suffix = intel_npu::utils::DYNBATCH_SUFFIX; + // Sanity check: ensure we don't transform static models + if (!model->is_dynamic()) { + _logger.warning("Attempting to encode dynamic batch info on a static model. Skipping encoding."); + return; + } + // Encode info in input tensor names for (auto& input : model->inputs()) { const std::string originalName = input.get_any_name(); @@ -756,17 +768,20 @@ void Plugin::handleDynamicBatching(std::shared_ptr& modelForCompilati "Trying to debatch it...", ex.what()); - deBatchModel(modelForCompilation, ov::Dimension(1)); - if (!modelForCompilation) { + if (!deBatchModel(modelForCompilation, ov::Dimension(1))) { OPENVINO_THROW("Cannot debatch a model"); } _logger.info("The model has been debatched successfully"); } + // If we have successfully debatched the model on the PLUGIN side, we should + // avoid repeating the same in the compiler by resetting the batch mode updateBatchMode(ov::intel_npu::BatchMode::COMPILER); } catch (const std::exception& ex) { _logger.info("Couldn't validate and reshape the model. Batching will be handled by compiler. Error: %s", ex.what()); - updateBatchMode(ov::intel_npu::BatchMode::COMPILER); + if (batchMode == ov::intel_npu::BatchMode::AUTO) { + updateBatchMode(ov::intel_npu::BatchMode::COMPILER); + } } } diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp index 709e553ec67f5c..62a2b1f44d169f 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/utils.hpp @@ -54,12 +54,9 @@ static inline size_t align_size_to_standard_page_size(size_t size) { // Check if tensor was originally dynamic by looking for encoded markers // This information is needed to restore the original dynamic batching behavior static inline bool wasOriginallyDynamic(const std::unordered_set& tensorNames) { - for (const auto& name : tensorNames) { - if (name.find(DYNBATCH_SUFFIX) != std::string::npos) { - return true; - } - } - return false; + return std::any_of(tensorNames.begin(), tensorNames.end(), [](const std::string& name) { + return name.find(DYNBATCH_SUFFIX) != std::string::npos; + }); } } // namespace utils From c89599ecb890eabd5b2c0561ed79768a7e6fbe3d Mon Sep 17 00:00:00 2001 From: DariaMityagina Date: Wed, 8 Oct 2025 06:36:47 +0000 Subject: [PATCH 13/14] Investigate refactoring opportunities for batch management in Plugin and Compiler - build warning fixes --- .../intel_npu/src/plugin/src/plugin.cpp | 326 +++++++++--------- 1 file changed, 163 insertions(+), 163 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 1791c59adec7ed..ce4925ba8bc4c1 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -216,6 +216,169 @@ std::shared_ptr exclude_model_ptr_from_map(ov::AnyMap& properti return modelPtr; } +// Helper function to detect if shape contains dynamic dimensions other than the batch dimension +// Plugin-side batch handling can only be applied when batch is the sole dynamic dimension +bool hasOtherDynamicDims(const ov::PartialShape& shape) { + for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) { + if (shape[dim_idx].is_dynamic()) { + return true; // Found dynamic dimension other than batch + } + } + return false; +} + +bool checkModelDynamicDims(const std::shared_ptr& model) { + // Check parameters (inputs) + const auto& params = model->get_parameters(); + for (const auto& param : params) { + const auto& shape = param->get_partial_shape(); + if (hasOtherDynamicDims(shape)) { + return true; + } + } + + // Check results (outputs) + const auto& results = model->get_results(); + for (const auto& result : results) { + const auto& shape = result->get_output_partial_shape(0); + if (hasOtherDynamicDims(shape)) { + return true; + } + } + + return false; +} + +bool validateModelBatch(const std::shared_ptr& model, Logger logger) { + std::set> batchedInputs; + std::set> batchedOutputs; + std::set sBatchSize; + + // Limitation: Plugin batching is not supported when there are dynamic + // dimensions other than the batch dimension. + if (checkModelDynamicDims(model)) { + return false; + } + + const auto& params = model->get_parameters(); + for (size_t input_id = 0; input_id < params.size(); input_id++) { + const auto& input = params[input_id]; + const auto& shape = input->get_partial_shape(); + ov::Layout layout = ov::layout::get_layout(input); + + // Batching on plugin is working only when batching is found on 0th dimension + if (shape[intel_npu::utils::BATCH_AXIS].is_dynamic() || + (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) { + const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : input->get_shape(); + batchedInputs.insert(params[input_id]->output(0)); + + if (shape.rank().is_dynamic()) { + OPENVINO_THROW("Shapes with dynamic rank are not supported."); + } else { + sBatchSize.insert(staticShape[intel_npu::utils::BATCH_AXIS]); + } + } else { + // gather some diagnostic info + std::optional batch_dim_index_detected; + for (size_t i = 1; i < shape.size(); i++) { + if (shape[i].has_symbol()) { + batch_dim_index_detected = i; + break; + } + } + std::stringstream sstream; + sstream << "Only networks with inputs batched by 0th dimension are supported. "; + if (batch_dim_index_detected.has_value()) { + sstream << "The batch has been detected on: " << batch_dim_index_detected.value() + << " dimension instead. "; + } else { + sstream << "The batch hasn't been detected at all. "; + } + sstream << "Please check input id: " << input_id << " by the name: " << input->get_friendly_name() + << ", layout: " << layout.to_string() << ", is_dynamic: " << shape.is_dynamic(); + logger.info("%s", sstream.str().c_str()); + return false; + } + } + for (const auto& output : model->get_results()) { + const auto& shape = output->get_output_partial_shape(0); + ov::Layout layout = ov::layout::get_layout(output); + + // Batching on plugin is working only when batching is found on 0th dimension + if (shape[intel_npu::utils::BATCH_AXIS].is_dynamic() || + (ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) { + const auto& node = output->input_value(0); + const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : output->get_shape(); + batchedOutputs.insert(ov::Output(node.get_node(), node.get_index())); + + if (shape.rank().is_dynamic()) { + OPENVINO_THROW("Shapes with dynamic rank are not supported."); + } else { + sBatchSize.insert(staticShape[intel_npu::utils::BATCH_AXIS]); + } + } else { + logger.info("Only networks with outputs batched by 0th dimension are supported. Please check an output by " + "the name: %s, layout: %s", + output->get_friendly_name().c_str(), + layout.to_string().c_str()); + return false; + } + } + if (!batchedInputs.size() || !batchedOutputs.size()) { + logger.info( + "Only networks with inputs/outputs featuring batched dim are supported! Got inputs: %ld, outputs: %ld", + batchedInputs.size(), + batchedOutputs.size()); + return false; + } + + if (sBatchSize.size() != 1) { + logger.info("Batching size shall have same value for all tensors! Got unique batch sizes number: %ld", + sBatchSize.size()); + return false; + } + + if (*sBatchSize.begin() == intel_npu::utils::DEFAULT_BATCH_SIZE) { + logger.info("PLUGIN batch won't be applied, got default batch value : %ld", *sBatchSize.begin()); + return false; + } + + auto node_info_printer = [&logger](const auto& ov_node, std::string nodeType) { + logger.info("%s: %s has shape value: %s", + nodeType.c_str(), + ov_node.get_any_name().c_str(), + ov_node.get_partial_shape().to_string().c_str()); + }; + + for (const auto& ov_node : batchedInputs) { + node_info_printer(ov_node, "Input"); + } + for (const auto& ov_node : batchedOutputs) { + node_info_printer(ov_node, "Output"); + } + + return true; +} + +bool deBatchModel(std::shared_ptr& model, ov::Dimension newBatch) { + try { + std::map newShapes; + for (auto&& item : model->get_parameters()) { + auto layout = item->get_layout(); + auto partShape = item->get_partial_shape(); + if (ov::layout::has_batch(layout)) { + partShape[ov::layout::batch_idx(layout)] = newBatch; + } + newShapes.emplace(item->get_friendly_name(), partShape); + } + model->reshape(newShapes); + return true; + } catch (const std::exception&) { + // Don't throw - let caller handle the failure + return false; + } +} + } // namespace namespace intel_npu { @@ -550,169 +713,6 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument return _properties->get_property(name, npu_plugin_properties); } -// Helper function to detect if shape contains dynamic dimensions other than the batch dimension -// Plugin-side batch handling can only be applied when batch is the sole dynamic dimension -bool hasOtherDynamicDims(const ov::PartialShape& shape) { - for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) { - if (shape[dim_idx].is_dynamic()) { - return true; // Found dynamic dimension other than batch - } - } - return false; -} - -bool checkModelDynamicDims(const std::shared_ptr& model) { - // Check parameters (inputs) - const auto& params = model->get_parameters(); - for (const auto& param : params) { - const auto& shape = param->get_partial_shape(); - if (hasOtherDynamicDims(shape)) { - return true; - } - } - - // Check results (outputs) - const auto& results = model->get_results(); - for (const auto& result : results) { - const auto& shape = result->get_output_partial_shape(0); - if (hasOtherDynamicDims(shape)) { - return true; - } - } - - return false; -} - -bool validateModelBatch(const std::shared_ptr& model, Logger logger) { - std::set> batchedInputs; - std::set> batchedOutputs; - std::set sBatchSize; - - // Limitation: Plugin batching is not supported when there are dynamic - // dimensions other than the batch dimension. - if (checkModelDynamicDims(model)) { - return false; - } - - const auto& params = model->get_parameters(); - for (size_t input_id = 0; input_id < params.size(); input_id++) { - const auto& input = params[input_id]; - const auto& shape = input->get_partial_shape(); - ov::Layout layout = ov::layout::get_layout(input); - - // Batching on plugin is working only when batching is found on 0th dimension - if (shape[intel_npu::utils::BATCH_AXIS].is_dynamic() || - ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS) { - const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : input->get_shape(); - batchedInputs.insert(params[input_id]->output(0)); - - if (shape.rank().is_dynamic()) { - OPENVINO_THROW("Shapes with dynamic rank are not supported."); - } else { - sBatchSize.insert(staticShape[intel_npu::utils::BATCH_AXIS]); - } - } else { - // gather some diagnostic info - std::optional batch_dim_index_detected; - for (size_t i = 1; i < shape.size(); i++) { - if (shape[i].has_symbol()) { - batch_dim_index_detected = i; - break; - } - } - std::stringstream sstream; - sstream << "Only networks with inputs batched by 0th dimension are supported. "; - if (batch_dim_index_detected.has_value()) { - sstream << "The batch has been detected on: " << batch_dim_index_detected.value() - << " dimension instead. "; - } else { - sstream << "The batch hasn't been detected at all. "; - } - sstream << "Please check input id: " << input_id << " by the name: " << input->get_friendly_name() - << ", layout: " << layout.to_string() << ", is_dynamic: " << shape.is_dynamic(); - logger.info("%s", sstream.str().c_str()); - return false; - } - } - for (const auto& output : model->get_results()) { - const auto& shape = output->get_output_partial_shape(0); - ov::Layout layout = ov::layout::get_layout(output); - - // Batching on plugin is working only when batching is found on 0th dimension - if (shape[intel_npu::utils::BATCH_AXIS].is_dynamic() || - ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS) { - const auto& node = output->input_value(0); - const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : output->get_shape(); - batchedOutputs.insert(ov::Output(node.get_node(), node.get_index())); - - if (shape.rank().is_dynamic()) { - OPENVINO_THROW("Shapes with dynamic rank are not supported."); - } else { - sBatchSize.insert(staticShape[intel_npu::utils::BATCH_AXIS]); - } - } else { - logger.info("Only networks with outputs batched by 0th dimension are supported. Please check an output by " - "the name: %s, layout: %s", - output->get_friendly_name().c_str(), - layout.to_string().c_str()); - return false; - } - } - if (!batchedInputs.size() || !batchedOutputs.size()) { - logger.info( - "Only networks with inputs/outputs featuring batched dim are supported! Got inputs: %ld, outputs: %ld", - batchedInputs.size(), - batchedOutputs.size()); - return false; - } - - if (sBatchSize.size() != 1) { - logger.info("Batching size shall have same value for all tensors! Got unique batch sizes number: %ld", - sBatchSize.size()); - return false; - } - - if (*sBatchSize.begin() == intel_npu::utils::DEFAULT_BATCH_SIZE) { - logger.info("PLUGIN batch won't be applied, got default batch value : %ld", *sBatchSize.begin()); - return false; - } - - auto node_info_printer = [&logger](const auto& ov_node, std::string nodeType) { - logger.info("%s: %s has shape value: %s", - nodeType.c_str(), - ov_node.get_any_name().c_str(), - ov_node.get_partial_shape().to_string().c_str()); - }; - - for (const auto& ov_node : batchedInputs) { - node_info_printer(ov_node, "Input"); - } - for (const auto& ov_node : batchedOutputs) { - node_info_printer(ov_node, "Output"); - } - - return true; -} - -bool deBatchModel(std::shared_ptr& model, ov::Dimension newBatch) { - try { - std::map newShapes; - for (auto&& item : model->get_parameters()) { - auto layout = item->get_layout(); - auto partShape = item->get_partial_shape(); - if (ov::layout::has_batch(layout)) { - partShape[ov::layout::batch_idx(layout)] = newBatch; - } - newShapes.emplace(item->get_friendly_name(), partShape); - } - model->reshape(newShapes); - return true; - } catch (const std::exception&) { - // Don't throw - let caller handle the failure - return false; - } -} - void Plugin::encodeDynamicBatchInfo(std::shared_ptr model) const { const std::string suffix = intel_npu::utils::DYNBATCH_SUFFIX; From 105476378ea429379c1d93a34e4714731268804a Mon Sep 17 00:00:00 2001 From: DariaMityagina Date: Wed, 8 Oct 2025 19:24:13 +0000 Subject: [PATCH 14/14] Investigate refactoring opportunities for batch management in Plugin and Compiler - compatibility with older blobs --- src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp | 4 +++- src/plugins/intel_npu/src/plugin/src/plugin.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index fe0d2b1f255b21..a044caf2f6f030 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -81,7 +81,9 @@ std::optional determine_dynamic_batch_size(const IODescriptor& desc, } auto wasDynamic = intel_npu::utils::wasOriginallyDynamic(desc.outputTensorNames); - if (!wasDynamic) { + auto dynamicBatchFromIR = desc.shapeFromIRModel.has_value() && (*desc.shapeFromIRModel).size() && + (*desc.shapeFromIRModel)[intel_npu::utils::BATCH_AXIS].is_dynamic(); + if (!wasDynamic && !dynamicBatchFromIR) { return std::nullopt; } diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index ce4925ba8bc4c1..370d348d8817a2 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -718,7 +718,7 @@ void Plugin::encodeDynamicBatchInfo(std::shared_ptr model) const { // Sanity check: ensure we don't transform static models if (!model->is_dynamic()) { - _logger.warning("Attempting to encode dynamic batch info on a static model. Skipping encoding."); + _logger.debug("Attempting to encode dynamic batch info on a static model. Skipping encoding."); return; }