@@ -39,6 +39,104 @@ constexpr std::string_view WEIGHTS_EXTENSION = ".bin";
39
39
constexpr std::string_view XML_EXTENSION = " .xml" ;
40
40
constexpr std::string_view ONNX_EXTENSION = " .onnx" ;
41
41
42
+ // Helper function to check if shape has dynamic dimensions other than batch dimension
43
+ bool hasOtherDynamicDims (const ov::PartialShape& shape) {
44
+ for (size_t dim_idx = 1 ; dim_idx < shape.size (); dim_idx++) {
45
+ if (shape[dim_idx].is_dynamic ()) {
46
+ return true ; // Found dynamic dimension other than batch
47
+ }
48
+ }
49
+ return false ;
50
+ }
51
+
52
+ bool checkModelDynamicDims (const std::shared_ptr<const ov::Model>& model) {
53
+ // Check parameters (inputs)
54
+ const auto & params = model->get_parameters ();
55
+ for (const auto & param : params) {
56
+ const auto & shape = param->get_partial_shape ();
57
+ if (hasOtherDynamicDims (shape)) {
58
+ return true ;
59
+ }
60
+ }
61
+
62
+ // Check results (outputs)
63
+ const auto & results = model->get_results ();
64
+ for (const auto & result : results) {
65
+ const auto & shape = result->get_output_partial_shape (0 );
66
+ if (hasOtherDynamicDims (shape)) {
67
+ return true ;
68
+ }
69
+ }
70
+
71
+ return false ;
72
+ }
73
+
74
+ bool validateReshapedModel (const std::vector<IODescriptor>& inputDescriptors,
75
+ const std::vector<IODescriptor>& outputDescriptors) {
76
+ std::set<size_t > batchSizes;
77
+ bool hasBatchedInputs = false ;
78
+ bool hasBatchedOutputs = false ;
79
+
80
+ // Check input descriptors
81
+ for (const IODescriptor& inputDescriptor : inputDescriptors) {
82
+ if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
83
+ inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights ) {
84
+ continue ;
85
+ }
86
+
87
+ auto shape = inputDescriptor.shapeFromIRModel .has_value () ? *inputDescriptor.shapeFromIRModel
88
+ : inputDescriptor.shapeFromCompiler ;
89
+
90
+ // Check for dynamic dimensions other than batch dimension
91
+ if (hasOtherDynamicDims (shape)) {
92
+ return false ; // Plugin batching not supported with other dynamic dims
93
+ }
94
+
95
+ // Check if shape has batch dimension and if batch size equals DEFAULT_BATCH_SIZE
96
+ if (shape.size () > 0 &&
97
+ shape[intel_npu::utils::BATCH_AXIS].is_static () &&
98
+ shape[intel_npu::utils::BATCH_AXIS].get_length () == intel_npu::utils::DEFAULT_BATCH_SIZE) {
99
+
100
+ hasBatchedInputs = true ;
101
+ batchSizes.insert (shape[intel_npu::utils::BATCH_AXIS].get_length ());
102
+ }
103
+ }
104
+
105
+ // Check output descriptors
106
+ for (const IODescriptor& outputDescriptor : outputDescriptors) {
107
+ if (outputDescriptor.isStateInput || outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor ||
108
+ outputDescriptor.isInitOutputWeights ) {
109
+ continue ;
110
+ }
111
+
112
+ auto shape = outputDescriptor.shapeFromIRModel .has_value () ? *outputDescriptor.shapeFromIRModel
113
+ : outputDescriptor.shapeFromCompiler ;
114
+
115
+ // Check for dynamic dimensions other than batch dimension
116
+ if (hasOtherDynamicDims (shape)) {
117
+ return false ; // Plugin batching not supported with other dynamic dims
118
+ }
119
+
120
+ // Check if shape has batch dimension and if batch size equals DEFAULT_BATCH_SIZE
121
+ if (shape.size () > 0 &&
122
+ shape[intel_npu::utils::BATCH_AXIS].is_static () &&
123
+ shape[intel_npu::utils::BATCH_AXIS].get_length () == intel_npu::utils::DEFAULT_BATCH_SIZE) {
124
+
125
+ hasBatchedOutputs = true ;
126
+ batchSizes.insert (shape[intel_npu::utils::BATCH_AXIS].get_length ());
127
+ }
128
+ }
129
+
130
+ // Plugin batching is applied if:
131
+ // 1. Both inputs and outputs have batched dimensions
132
+ // 2. All batch sizes are consistent (should be only DEFAULT_BATCH_SIZE)
133
+ // 3. The batch size is exactly DEFAULT_BATCH_SIZE (since we've already reshaped the model)
134
+ // 4. No other dynamic dimensions exist (checked above)
135
+ return hasBatchedInputs && hasBatchedOutputs &&
136
+ batchSizes.size () == 1 &&
137
+ *batchSizes.begin () == intel_npu::utils::DEFAULT_BATCH_SIZE;
138
+ }
139
+
42
140
/* *
43
141
* @brief Creates an "ov::Model" object which contains only the given "parameter" and "result" nodes.
44
142
* @details Using an "ov::Model" object to create the "CompiledModel" is the preferred way of using the OV API.
@@ -57,6 +155,8 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
57
155
ov::ParameterVector parameters;
58
156
ov::ResultVector results;
59
157
158
+ bool pluginBatchingIsApplied = validateReshapedModel (inputDescriptors, outputDescriptors);
159
+
60
160
for (const IODescriptor& inputDescriptor : inputDescriptors) {
61
161
if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
62
162
inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights ) {
@@ -67,7 +167,7 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
67
167
: inputDescriptor.shapeFromCompiler ;
68
168
// Treat every model with batch 1 as a potentially dynamically batched one.
69
169
// TODO: should we protect this part with a certain condition?
70
- if (shape[intel_npu::utils::BATCH_AXIS] == intel_npu::utils::DEFAULT_BATCH_SIZE ) {
170
+ if (pluginBatchingIsApplied ) {
71
171
shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension (-1 );
72
172
}
73
173
@@ -96,7 +196,7 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
96
196
auto shape = outputDescriptor.shapeFromIRModel .has_value () ? *outputDescriptor.shapeFromIRModel
97
197
: outputDescriptor.shapeFromCompiler ;
98
198
// Treat every model with batch 1 as a potentially dynamically batched one.
99
- if (shape[intel_npu::utils::BATCH_AXIS] == intel_npu::utils::DEFAULT_BATCH_SIZE ) {
199
+ if (pluginBatchingIsApplied ) {
100
200
shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension (-1 );
101
201
}
102
202
@@ -529,43 +629,14 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument
529
629
return _properties->get_property (name, arguments);
530
630
}
531
631
532
- bool checkDynamicDims (const std::shared_ptr<const ov::Model>& model) {
533
- // Check parameters (inputs)
534
- for (const auto & param : model->get_parameters ()) {
535
- const auto & shape = param->get_partial_shape ();
536
-
537
- // Check all dimensions except the first one (batch dimension)
538
- for (size_t dim_idx = 1 ; dim_idx < shape.size (); dim_idx++) {
539
- if (shape[dim_idx].is_dynamic ()) {
540
- return true ; // Found dynamic dimension other than batch
541
- }
542
- }
543
- }
544
-
545
- // Check results (outputs)
546
- for (const auto & result : model->get_results ()) {
547
- const auto & shape = result->get_output_partial_shape (0 );
548
-
549
- // Check all dimensions except the first one (batch dimension)
550
- for (size_t dim_idx = 1 ; dim_idx < shape.size (); dim_idx++) {
551
- if (shape[dim_idx].is_dynamic ()) {
552
- return true ; // Found dynamic dimension other than batch
553
- }
554
- }
555
- }
556
-
557
- return false ; // No dynamic dimensions found other than batch
558
- }
559
-
560
632
bool validateModelBatch (const std::shared_ptr<const ov::Model>& model, Logger logger) {
561
633
std::set<ov::Output<const ov::Node>> batchedInputs;
562
634
std::set<ov::Output<const ov::Node>> batchedOutputs;
563
635
std::set<size_t > sBatchSize ;
564
636
565
637
// Limitation: Plugin batching is not supported when there are dynamic
566
638
// dimensions other than the batch dimension.
567
- const bool otherDynamicDims = checkDynamicDims (model);
568
- if (otherDynamicDims) {
639
+ if (checkModelDynamicDims (model)) {
569
640
return false ;
570
641
}
571
642
0 commit comments