@@ -39,104 +39,6 @@ constexpr std::string_view WEIGHTS_EXTENSION = ".bin";
39
39
constexpr std::string_view XML_EXTENSION = " .xml" ;
40
40
constexpr std::string_view ONNX_EXTENSION = " .onnx" ;
41
41
42
- // Helper function to check if shape has dynamic dimensions other than batch dimension
43
- bool hasOtherDynamicDims (const ov::PartialShape& shape) {
44
- for (size_t dim_idx = 1 ; dim_idx < shape.size (); dim_idx++) {
45
- if (shape[dim_idx].is_dynamic ()) {
46
- return true ; // Found dynamic dimension other than batch
47
- }
48
- }
49
- return false ;
50
- }
51
-
52
- bool checkModelDynamicDims (const std::shared_ptr<const ov::Model>& model) {
53
- // Check parameters (inputs)
54
- const auto & params = model->get_parameters ();
55
- for (const auto & param : params) {
56
- const auto & shape = param->get_partial_shape ();
57
- if (hasOtherDynamicDims (shape)) {
58
- return true ;
59
- }
60
- }
61
-
62
- // Check results (outputs)
63
- const auto & results = model->get_results ();
64
- for (const auto & result : results) {
65
- const auto & shape = result->get_output_partial_shape (0 );
66
- if (hasOtherDynamicDims (shape)) {
67
- return true ;
68
- }
69
- }
70
-
71
- return false ;
72
- }
73
-
74
- bool validateReshapedModel (const std::vector<IODescriptor>& inputDescriptors,
75
- const std::vector<IODescriptor>& outputDescriptors) {
76
- std::set<size_t > batchSizes;
77
- bool hasBatchedInputs = false ;
78
- bool hasBatchedOutputs = false ;
79
-
80
- // Check input descriptors
81
- for (const IODescriptor& inputDescriptor : inputDescriptors) {
82
- if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
83
- inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights ) {
84
- continue ;
85
- }
86
-
87
- auto shape = inputDescriptor.shapeFromIRModel .has_value () ? *inputDescriptor.shapeFromIRModel
88
- : inputDescriptor.shapeFromCompiler ;
89
-
90
- // Check for dynamic dimensions other than batch dimension
91
- if (hasOtherDynamicDims (shape)) {
92
- return false ; // Plugin batching not supported with other dynamic dims
93
- }
94
-
95
- // Check if shape has batch dimension and if batch size equals DEFAULT_BATCH_SIZE
96
- if (shape.size () > 0 &&
97
- shape[intel_npu::utils::BATCH_AXIS].is_static () &&
98
- shape[intel_npu::utils::BATCH_AXIS].get_length () == intel_npu::utils::DEFAULT_BATCH_SIZE) {
99
-
100
- hasBatchedInputs = true ;
101
- batchSizes.insert (shape[intel_npu::utils::BATCH_AXIS].get_length ());
102
- }
103
- }
104
-
105
- // Check output descriptors
106
- for (const IODescriptor& outputDescriptor : outputDescriptors) {
107
- if (outputDescriptor.isStateInput || outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor ||
108
- outputDescriptor.isInitOutputWeights ) {
109
- continue ;
110
- }
111
-
112
- auto shape = outputDescriptor.shapeFromIRModel .has_value () ? *outputDescriptor.shapeFromIRModel
113
- : outputDescriptor.shapeFromCompiler ;
114
-
115
- // Check for dynamic dimensions other than batch dimension
116
- if (hasOtherDynamicDims (shape)) {
117
- return false ; // Plugin batching not supported with other dynamic dims
118
- }
119
-
120
- // Check if shape has batch dimension and if batch size equals DEFAULT_BATCH_SIZE
121
- if (shape.size () > 0 &&
122
- shape[intel_npu::utils::BATCH_AXIS].is_static () &&
123
- shape[intel_npu::utils::BATCH_AXIS].get_length () == intel_npu::utils::DEFAULT_BATCH_SIZE) {
124
-
125
- hasBatchedOutputs = true ;
126
- batchSizes.insert (shape[intel_npu::utils::BATCH_AXIS].get_length ());
127
- }
128
- }
129
-
130
- // Plugin batching is applied if:
131
- // 1. Both inputs and outputs have batched dimensions
132
- // 2. All batch sizes are consistent (should be only DEFAULT_BATCH_SIZE)
133
- // 3. The batch size is exactly DEFAULT_BATCH_SIZE (since we've already reshaped the model)
134
- // 4. No other dynamic dimensions exist (checked above)
135
- return hasBatchedInputs && hasBatchedOutputs &&
136
- batchSizes.size () == 1 &&
137
- *batchSizes.begin () == intel_npu::utils::DEFAULT_BATCH_SIZE;
138
- }
139
-
140
42
/* *
141
43
* @brief Creates an "ov::Model" object which contains only the given "parameter" and "result" nodes.
142
44
* @details Using an "ov::Model" object to create the "CompiledModel" is the preferred way of using the OV API.
@@ -155,8 +57,6 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
155
57
ov::ParameterVector parameters;
156
58
ov::ResultVector results;
157
59
158
- bool pluginBatchingIsApplied = validateReshapedModel (inputDescriptors, outputDescriptors);
159
-
160
60
for (const IODescriptor& inputDescriptor : inputDescriptors) {
161
61
if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
162
62
inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights ) {
@@ -165,15 +65,9 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
165
65
166
66
auto shape = inputDescriptor.shapeFromIRModel .has_value () ? *inputDescriptor.shapeFromIRModel
167
67
: inputDescriptor.shapeFromCompiler ;
168
- // Treat every model with batch 1 as a potentially dynamically batched one.
169
- // TODO: should we protect this part with a certain condition?
170
- if (pluginBatchingIsApplied) {
171
- shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension (-1 );
172
- }
173
68
174
- std::shared_ptr<ov::op::v0::Parameter> parameter = std::make_shared<ov::op::v0::Parameter>(
175
- inputDescriptor.precision ,
176
- shape);
69
+ std::shared_ptr<ov::op::v0::Parameter> parameter =
70
+ std::make_shared<ov::op::v0::Parameter>(inputDescriptor.precision , shape);
177
71
178
72
parameter->set_friendly_name (inputDescriptor.nodeFriendlyName );
179
73
parameter->output (0 ).get_tensor ().set_names (inputDescriptor.outputTensorNames );
@@ -195,15 +89,11 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
195
89
196
90
auto shape = outputDescriptor.shapeFromIRModel .has_value () ? *outputDescriptor.shapeFromIRModel
197
91
: outputDescriptor.shapeFromCompiler ;
198
- // Treat every model with batch 1 as a potentially dynamically batched one.
199
- if (pluginBatchingIsApplied) {
200
- shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension (-1 );
201
- }
202
92
203
- const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy = std::make_shared<ov::descriptor::Tensor>(
204
- outputDescriptor.precision ,
205
- shape,
206
- outputDescriptor.outputTensorNames );
93
+ const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy =
94
+ std::make_shared<ov::descriptor::Tensor>( outputDescriptor.precision ,
95
+ shape,
96
+ outputDescriptor.outputTensorNames );
207
97
208
98
auto & result = results.emplace_back (std::make_shared<ov::op::v0::Result>(constantDummy));
209
99
result->output (0 ).set_tensor_ptr (tensorDummy);
@@ -629,6 +519,38 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument
629
519
return _properties->get_property (name, arguments);
630
520
}
631
521
522
+ // Helper function to check if shape has dynamic dimensions other than batch dimension
523
+ bool hasOtherDynamicDims (const ov::PartialShape& shape) {
524
+ for (size_t dim_idx = 1 ; dim_idx < shape.size (); dim_idx++) {
525
+ if (shape[dim_idx].is_dynamic ()) {
526
+ return true ; // Found dynamic dimension other than batch
527
+ }
528
+ }
529
+ return false ;
530
+ }
531
+
532
+ bool checkModelDynamicDims (const std::shared_ptr<const ov::Model>& model) {
533
+ // Check parameters (inputs)
534
+ const auto & params = model->get_parameters ();
535
+ for (const auto & param : params) {
536
+ const auto & shape = param->get_partial_shape ();
537
+ if (hasOtherDynamicDims (shape)) {
538
+ return true ;
539
+ }
540
+ }
541
+
542
+ // Check results (outputs)
543
+ const auto & results = model->get_results ();
544
+ for (const auto & result : results) {
545
+ const auto & shape = result->get_output_partial_shape (0 );
546
+ if (hasOtherDynamicDims (shape)) {
547
+ return true ;
548
+ }
549
+ }
550
+
551
+ return false ;
552
+ }
553
+
632
554
bool validateModelBatch (const std::shared_ptr<const ov::Model>& model, Logger logger) {
633
555
std::set<ov::Output<const ov::Node>> batchedInputs;
634
556
std::set<ov::Output<const ov::Node>> batchedOutputs;
@@ -647,7 +569,8 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
647
569
ov::Layout layout = ov::layout::get_layout (input);
648
570
649
571
// Batching on plugin is working only when batching is found on 0th dimension
650
- if ((shape.size () && shape[intel_npu::utils::BATCH_AXIS].get_max_length () != intel_npu::utils::DEFAULT_BATCH_SIZE) ||
572
+ if ((shape.size () &&
573
+ shape[intel_npu::utils::BATCH_AXIS].get_max_length () != intel_npu::utils::DEFAULT_BATCH_SIZE) ||
651
574
(ov::layout::has_batch (layout) && ov::layout::batch_idx (layout) == intel_npu::utils::BATCH_AXIS)) {
652
575
const auto & staticShape = shape.is_dynamic () ? shape.get_max_shape () : input->get_shape ();
653
576
batchedInputs.insert (params[input_id]->output (0 ));
@@ -685,7 +608,8 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
685
608
ov::Layout layout = ov::layout::get_layout (output);
686
609
687
610
// Batching on plugin is working only when batching is found on 0th dimension
688
- if ((shape.size () && shape[intel_npu::utils::BATCH_AXIS].get_max_length () != intel_npu::utils::DEFAULT_BATCH_SIZE) ||
611
+ if ((shape.size () &&
612
+ shape[intel_npu::utils::BATCH_AXIS].get_max_length () != intel_npu::utils::DEFAULT_BATCH_SIZE) ||
689
613
(ov::layout::has_batch (layout) && ov::layout::batch_idx (layout) == intel_npu::utils::BATCH_AXIS)) {
690
614
const auto & node = output->input_value (0 );
691
615
const auto & staticShape = shape.is_dynamic () ? shape.get_max_shape () : output->get_shape ();
@@ -735,6 +659,21 @@ bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger lo
735
659
return true ;
736
660
}
737
661
662
+ void deBatchModel (std::shared_ptr<ov::Model>& model, ov::Dimension newBatch) {
663
+ size_t inputIdx = 0 ;
664
+ std::map<std::string, ov::PartialShape> newShapes;
665
+ for (auto && item : model->get_parameters ()) {
666
+ auto layout = item->get_layout ();
667
+ auto partShape = item->get_partial_shape ();
668
+ if (ov::layout::has_batch (layout)) {
669
+ partShape[ov::layout::batch_idx (layout)] = newBatch;
670
+ }
671
+ newShapes.emplace (item->get_friendly_name (), partShape);
672
+ inputIdx++;
673
+ }
674
+ model->reshape (newShapes);
675
+ }
676
+
738
677
std::shared_ptr<ov::ICompiledModel> Plugin::compile_model (const std::shared_ptr<const ov::Model>& model,
739
678
const ov::AnyMap& properties) const {
740
679
OV_ITT_SCOPED_TASK (itt::domains::NPUPlugin, " Plugin::compile_model" );
@@ -805,21 +744,29 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
805
744
localConfig.get <BATCH_MODE>() == ov::intel_npu::BatchMode::AUTO;
806
745
try {
807
746
const bool pluginBatchingIsSupported = validateModelBatch (modelForCompilation, _logger);
808
- const bool batchedModel = ov::get_batch (modelForCompilation) != intel_npu::utils::DEFAULT_BATCH_SIZE;
809
747
810
- if (autoOrPluginBatch && pluginBatchingIsSupported && batchedModel ) {
748
+ if (autoOrPluginBatch && pluginBatchingIsSupported) {
811
749
_logger.info (" Attempting to handle batching on the plugin side." );
812
- ov::set_batch (modelForCompilation, 1 );
750
+ try {
751
+ ov::set_batch (modelForCompilation, ov::Dimension (1 ));
752
+ } catch (const std::exception& ex) {
753
+ _logger.warning (" The plugin couldn't resize a batched model due to exception: %s.\n "
754
+ " Trying to debatch it..." ,
755
+ ex.what ());
756
+ deBatchModel (modelForCompilation, ov::Dimension (1 ));
757
+ if (!modelForCompilation) {
758
+ OPENVINO_THROW (" Cannot debatch a model" );
759
+ }
760
+ _logger.info (" The model has been debatched successfully" );
761
+ }
813
762
// TODO: add debatcher for more complicated cases as set_batch is pretty naive.
814
763
} else {
815
- _logger.info (" Unable to manage batching on the plugin side, so the compiler will take care of it ." );
764
+ _logger.info (" Batching will be handed by compiler ." );
816
765
}
817
-
818
- updateBatchMode (ov::intel_npu::BatchMode::COMPILER);
819
766
} catch (const std::exception& ex) {
820
767
_logger.info (" Couldn't validate and reshape the model. Batching will be handed by compiler." , ex.what ());
821
- updateBatchMode (ov::intel_npu::BatchMode::COMPILER);
822
768
}
769
+ updateBatchMode (ov::intel_npu::BatchMode::COMPILER);
823
770
}
824
771
825
772
// Update stepping w/ information from driver, unless provided by user or we are off-device
0 commit comments