Skip to content

Commit e932c8d

Browse files
Investigate refactoring opportunities for batch management in Plugin and Compiler - simplify
1 parent 929e13a commit e932c8d

File tree

2 files changed

+76
-127
lines changed

2 files changed

+76
-127
lines changed

src/plugins/intel_npu/src/common/src/sync_infer_request.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,8 @@ void SyncInferRequest::check_tensor(const ov::Output<const ov::Node>& port,
214214
}
215215
}
216216

217-
OPENVINO_ASSERT(is_dynamic || port.get_shape() == tensor->get_shape(),
217+
OPENVINO_ASSERT(is_dynamic || port.get_shape() == tensor->get_shape() ||
218+
tensor->get_shape()[utils::BATCH_AXIS] % port.get_shape()[utils::BATCH_AXIS] == 0,
218219
"The ",
219220
tensor_type,
220221
" tensor size is not equal to the model ",
@@ -274,7 +275,8 @@ void SyncInferRequest::check_batched_tensors(const ov::Output<const ov::Node>& p
274275
port.get_partial_shape());
275276
auto batch = port.get_partial_shape()[batch_idx];
276277

277-
OPENVINO_ASSERT(batch.is_dynamic() || batch.get_length() == tensors_size,
278+
OPENVINO_ASSERT(batch.is_dynamic() || batch.get_length() == tensors_size ||
279+
tensors_size % batch.get_length() == 0,
278280
"set_input_tensors/set_tensors error. Input shape ",
279281
port.get_partial_shape(),
280282
"batch ",

src/plugins/intel_npu/src/plugin/src/plugin.cpp

Lines changed: 72 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -39,104 +39,6 @@ constexpr std::string_view WEIGHTS_EXTENSION = ".bin";
3939
constexpr std::string_view XML_EXTENSION = ".xml";
4040
constexpr std::string_view ONNX_EXTENSION = ".onnx";
4141

42-
// Helper function to check if shape has dynamic dimensions other than batch dimension
43-
bool hasOtherDynamicDims(const ov::PartialShape& shape) {
44-
for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) {
45-
if (shape[dim_idx].is_dynamic()) {
46-
return true; // Found dynamic dimension other than batch
47-
}
48-
}
49-
return false;
50-
}
51-
52-
bool checkModelDynamicDims(const std::shared_ptr<const ov::Model>& model) {
53-
// Check parameters (inputs)
54-
const auto& params = model->get_parameters();
55-
for (const auto& param : params) {
56-
const auto& shape = param->get_partial_shape();
57-
if (hasOtherDynamicDims(shape)) {
58-
return true;
59-
}
60-
}
61-
62-
// Check results (outputs)
63-
const auto& results = model->get_results();
64-
for (const auto& result : results) {
65-
const auto& shape = result->get_output_partial_shape(0);
66-
if (hasOtherDynamicDims(shape)) {
67-
return true;
68-
}
69-
}
70-
71-
return false;
72-
}
73-
74-
bool validateReshapedModel(const std::vector<IODescriptor>& inputDescriptors,
75-
const std::vector<IODescriptor>& outputDescriptors) {
76-
std::set<size_t> batchSizes;
77-
bool hasBatchedInputs = false;
78-
bool hasBatchedOutputs = false;
79-
80-
// Check input descriptors
81-
for (const IODescriptor& inputDescriptor : inputDescriptors) {
82-
if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
83-
inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) {
84-
continue;
85-
}
86-
87-
auto shape = inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel
88-
: inputDescriptor.shapeFromCompiler;
89-
90-
// Check for dynamic dimensions other than batch dimension
91-
if (hasOtherDynamicDims(shape)) {
92-
return false; // Plugin batching not supported with other dynamic dims
93-
}
94-
95-
// Check if shape has batch dimension and if batch size equals DEFAULT_BATCH_SIZE
96-
if (shape.size() > 0 &&
97-
shape[intel_npu::utils::BATCH_AXIS].is_static() &&
98-
shape[intel_npu::utils::BATCH_AXIS].get_length() == intel_npu::utils::DEFAULT_BATCH_SIZE) {
99-
100-
hasBatchedInputs = true;
101-
batchSizes.insert(shape[intel_npu::utils::BATCH_AXIS].get_length());
102-
}
103-
}
104-
105-
// Check output descriptors
106-
for (const IODescriptor& outputDescriptor : outputDescriptors) {
107-
if (outputDescriptor.isStateInput || outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor ||
108-
outputDescriptor.isInitOutputWeights) {
109-
continue;
110-
}
111-
112-
auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel
113-
: outputDescriptor.shapeFromCompiler;
114-
115-
// Check for dynamic dimensions other than batch dimension
116-
if (hasOtherDynamicDims(shape)) {
117-
return false; // Plugin batching not supported with other dynamic dims
118-
}
119-
120-
// Check if shape has batch dimension and if batch size equals DEFAULT_BATCH_SIZE
121-
if (shape.size() > 0 &&
122-
shape[intel_npu::utils::BATCH_AXIS].is_static() &&
123-
shape[intel_npu::utils::BATCH_AXIS].get_length() == intel_npu::utils::DEFAULT_BATCH_SIZE) {
124-
125-
hasBatchedOutputs = true;
126-
batchSizes.insert(shape[intel_npu::utils::BATCH_AXIS].get_length());
127-
}
128-
}
129-
130-
// Plugin batching is applied if:
131-
// 1. Both inputs and outputs have batched dimensions
132-
// 2. All batch sizes are consistent (should be only DEFAULT_BATCH_SIZE)
133-
// 3. The batch size is exactly DEFAULT_BATCH_SIZE (since we've already reshaped the model)
134-
// 4. No other dynamic dimensions exist (checked above)
135-
return hasBatchedInputs && hasBatchedOutputs &&
136-
batchSizes.size() == 1 &&
137-
*batchSizes.begin() == intel_npu::utils::DEFAULT_BATCH_SIZE;
138-
}
139-
14042
/**
14143
* @brief Creates an "ov::Model" object which contains only the given "parameter" and "result" nodes.
14244
* @details Using an "ov::Model" object to create the "CompiledModel" is the preferred way of using the OV API.
@@ -155,8 +57,6 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
15557
ov::ParameterVector parameters;
15658
ov::ResultVector results;
15759

158-
bool pluginBatchingIsApplied = validateReshapedModel(inputDescriptors, outputDescriptors);
159-
16060
for (const IODescriptor& inputDescriptor : inputDescriptors) {
16161
if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
16262
inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) {
@@ -165,15 +65,9 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
16565

16666
auto shape = inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel
16767
: inputDescriptor.shapeFromCompiler;
168-
// Treat every model with batch 1 as a potentially dynamically batched one.
169-
// TODO: should we protect this part with a certain condition?
170-
if (pluginBatchingIsApplied) {
171-
shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1);
172-
}
17368

174-
std::shared_ptr<ov::op::v0::Parameter> parameter = std::make_shared<ov::op::v0::Parameter>(
175-
inputDescriptor.precision,
176-
shape);
69+
std::shared_ptr<ov::op::v0::Parameter> parameter =
70+
std::make_shared<ov::op::v0::Parameter>(inputDescriptor.precision, shape);
17771

17872
parameter->set_friendly_name(inputDescriptor.nodeFriendlyName);
17973
parameter->output(0).get_tensor().set_names(inputDescriptor.outputTensorNames);
@@ -195,15 +89,11 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
19589

19690
auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel
19791
: outputDescriptor.shapeFromCompiler;
198-
// Treat every model with batch 1 as a potentially dynamically batched one.
199-
if (pluginBatchingIsApplied) {
200-
shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1);
201-
}
20292

203-
const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy = std::make_shared<ov::descriptor::Tensor>(
204-
outputDescriptor.precision,
205-
shape,
206-
outputDescriptor.outputTensorNames);
93+
const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy =
94+
std::make_shared<ov::descriptor::Tensor>(outputDescriptor.precision,
95+
shape,
96+
outputDescriptor.outputTensorNames);
20797

20898
auto& result = results.emplace_back(std::make_shared<ov::op::v0::Result>(constantDummy));
20999
result->output(0).set_tensor_ptr(tensorDummy);
@@ -629,6 +519,38 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument
629519
return _properties->get_property(name, arguments);
630520
}
631521

522+
// Helper function to check if shape has dynamic dimensions other than batch dimension
523+
bool hasOtherDynamicDims(const ov::PartialShape& shape) {
524+
for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) {
525+
if (shape[dim_idx].is_dynamic()) {
526+
return true; // Found dynamic dimension other than batch
527+
}
528+
}
529+
return false;
530+
}
531+
532+
bool checkModelDynamicDims(const std::shared_ptr<const ov::Model>& model) {
533+
// Check parameters (inputs)
534+
const auto& params = model->get_parameters();
535+
for (const auto& param : params) {
536+
const auto& shape = param->get_partial_shape();
537+
if (hasOtherDynamicDims(shape)) {
538+
return true;
539+
}
540+
}
541+
542+
// Check results (outputs)
543+
const auto& results = model->get_results();
544+
for (const auto& result : results) {
545+
const auto& shape = result->get_output_partial_shape(0);
546+
if (hasOtherDynamicDims(shape)) {
547+
return true;
548+
}
549+
}
550+
551+
return false;
552+
}
553+
632554
bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger logger) {
633555
std::set<ov::Output<const ov::Node>> batchedInputs;
634556
std::set<ov::Output<const ov::Node>> batchedOutputs;
@@ -647,7 +569,8 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
647569
ov::Layout layout = ov::layout::get_layout(input);
648570

649571
// Batching on plugin is working only when batching is found on 0th dimension
650-
if ((shape.size() && shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) ||
572+
if ((shape.size() &&
573+
shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) ||
651574
(ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) {
652575
const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : input->get_shape();
653576
batchedInputs.insert(params[input_id]->output(0));
@@ -685,7 +608,8 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
685608
ov::Layout layout = ov::layout::get_layout(output);
686609

687610
// Batching on plugin is working only when batching is found on 0th dimension
688-
if ((shape.size() && shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) ||
611+
if ((shape.size() &&
612+
shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) ||
689613
(ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) {
690614
const auto& node = output->input_value(0);
691615
const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : output->get_shape();
@@ -735,6 +659,21 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
735659
return true;
736660
}
737661

662+
void deBatchModel(std::shared_ptr<ov::Model>& model, ov::Dimension newBatch) {
663+
size_t inputIdx = 0;
664+
std::map<std::string, ov::PartialShape> newShapes;
665+
for (auto&& item : model->get_parameters()) {
666+
auto layout = item->get_layout();
667+
auto partShape = item->get_partial_shape();
668+
if (ov::layout::has_batch(layout)) {
669+
partShape[ov::layout::batch_idx(layout)] = newBatch;
670+
}
671+
newShapes.emplace(item->get_friendly_name(), partShape);
672+
inputIdx++;
673+
}
674+
model->reshape(newShapes);
675+
}
676+
738677
std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<const ov::Model>& model,
739678
const ov::AnyMap& properties) const {
740679
OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::compile_model");
@@ -805,21 +744,29 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
805744
localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::AUTO;
806745
try {
807746
const bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger);
808-
const bool batchedModel = ov::get_batch(modelForCompilation) != intel_npu::utils::DEFAULT_BATCH_SIZE;
809747

810-
if (autoOrPluginBatch && pluginBatchingIsSupported && batchedModel) {
748+
if (autoOrPluginBatch && pluginBatchingIsSupported) {
811749
_logger.info("Attempting to handle batching on the plugin side.");
812-
ov::set_batch(modelForCompilation, 1);
750+
try {
751+
ov::set_batch(modelForCompilation, ov::Dimension(1));
752+
} catch (const std::exception& ex) {
753+
_logger.warning("The plugin couldn't resize a batched model due to exception: %s.\n"
754+
"Trying to debatch it...",
755+
ex.what());
756+
deBatchModel(modelForCompilation, ov::Dimension(1));
757+
if (!modelForCompilation) {
758+
OPENVINO_THROW("Cannot debatch a model");
759+
}
760+
_logger.info("The model has been debatched successfully");
761+
}
813762
// TODO: add debatcher for more complicated cases as set_batch is pretty naive.
814763
} else {
815-
_logger.info("Unable to manage batching on the plugin side, so the compiler will take care of it.");
764+
_logger.info("Batching will be handed by compiler.");
816765
}
817-
818-
updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
819766
} catch (const std::exception& ex) {
820767
_logger.info("Couldn't validate and reshape the model. Batching will be handed by compiler.", ex.what());
821-
updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
822768
}
769+
updateBatchMode(ov::intel_npu::BatchMode::COMPILER);
823770
}
824771

825772
// Update stepping w/ information from driver, unless provided by user or we are off-device

0 commit comments

Comments
 (0)