Skip to content

Commit 929e13a

Browse files
Investigate refactoring opportunities for batch management in Plugin and Compiler - additional checks
1 parent 0f4b01c commit 929e13a

File tree

1 file changed

+103
-32
lines changed

1 file changed

+103
-32
lines changed

src/plugins/intel_npu/src/plugin/src/plugin.cpp

Lines changed: 103 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,104 @@ constexpr std::string_view WEIGHTS_EXTENSION = ".bin";
3939
constexpr std::string_view XML_EXTENSION = ".xml";
4040
constexpr std::string_view ONNX_EXTENSION = ".onnx";
4141

42+
// Helper function to check if shape has dynamic dimensions other than batch dimension
43+
bool hasOtherDynamicDims(const ov::PartialShape& shape) {
44+
for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) {
45+
if (shape[dim_idx].is_dynamic()) {
46+
return true; // Found dynamic dimension other than batch
47+
}
48+
}
49+
return false;
50+
}
51+
52+
bool checkModelDynamicDims(const std::shared_ptr<const ov::Model>& model) {
53+
// Check parameters (inputs)
54+
const auto& params = model->get_parameters();
55+
for (const auto& param : params) {
56+
const auto& shape = param->get_partial_shape();
57+
if (hasOtherDynamicDims(shape)) {
58+
return true;
59+
}
60+
}
61+
62+
// Check results (outputs)
63+
const auto& results = model->get_results();
64+
for (const auto& result : results) {
65+
const auto& shape = result->get_output_partial_shape(0);
66+
if (hasOtherDynamicDims(shape)) {
67+
return true;
68+
}
69+
}
70+
71+
return false;
72+
}
73+
74+
bool validateReshapedModel(const std::vector<IODescriptor>& inputDescriptors,
75+
const std::vector<IODescriptor>& outputDescriptors) {
76+
std::set<size_t> batchSizes;
77+
bool hasBatchedInputs = false;
78+
bool hasBatchedOutputs = false;
79+
80+
// Check input descriptors
81+
for (const IODescriptor& inputDescriptor : inputDescriptors) {
82+
if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
83+
inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) {
84+
continue;
85+
}
86+
87+
auto shape = inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel
88+
: inputDescriptor.shapeFromCompiler;
89+
90+
// Check for dynamic dimensions other than batch dimension
91+
if (hasOtherDynamicDims(shape)) {
92+
return false; // Plugin batching not supported with other dynamic dims
93+
}
94+
95+
// Check if shape has batch dimension and if batch size equals DEFAULT_BATCH_SIZE
96+
if (shape.size() > 0 &&
97+
shape[intel_npu::utils::BATCH_AXIS].is_static() &&
98+
shape[intel_npu::utils::BATCH_AXIS].get_length() == intel_npu::utils::DEFAULT_BATCH_SIZE) {
99+
100+
hasBatchedInputs = true;
101+
batchSizes.insert(shape[intel_npu::utils::BATCH_AXIS].get_length());
102+
}
103+
}
104+
105+
// Check output descriptors
106+
for (const IODescriptor& outputDescriptor : outputDescriptors) {
107+
if (outputDescriptor.isStateInput || outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor ||
108+
outputDescriptor.isInitOutputWeights) {
109+
continue;
110+
}
111+
112+
auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel
113+
: outputDescriptor.shapeFromCompiler;
114+
115+
// Check for dynamic dimensions other than batch dimension
116+
if (hasOtherDynamicDims(shape)) {
117+
return false; // Plugin batching not supported with other dynamic dims
118+
}
119+
120+
// Check if shape has batch dimension and if batch size equals DEFAULT_BATCH_SIZE
121+
if (shape.size() > 0 &&
122+
shape[intel_npu::utils::BATCH_AXIS].is_static() &&
123+
shape[intel_npu::utils::BATCH_AXIS].get_length() == intel_npu::utils::DEFAULT_BATCH_SIZE) {
124+
125+
hasBatchedOutputs = true;
126+
batchSizes.insert(shape[intel_npu::utils::BATCH_AXIS].get_length());
127+
}
128+
}
129+
130+
// Plugin batching is applied if:
131+
// 1. Both inputs and outputs have batched dimensions
132+
// 2. All batch sizes are consistent (should be only DEFAULT_BATCH_SIZE)
133+
// 3. The batch size is exactly DEFAULT_BATCH_SIZE (since we've already reshaped the model)
134+
// 4. No other dynamic dimensions exist (checked above)
135+
return hasBatchedInputs && hasBatchedOutputs &&
136+
batchSizes.size() == 1 &&
137+
*batchSizes.begin() == intel_npu::utils::DEFAULT_BATCH_SIZE;
138+
}
139+
42140
/**
43141
* @brief Creates an "ov::Model" object which contains only the given "parameter" and "result" nodes.
44142
* @details Using an "ov::Model" object to create the "CompiledModel" is the preferred way of using the OV API.
@@ -57,6 +155,8 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
57155
ov::ParameterVector parameters;
58156
ov::ResultVector results;
59157

158+
bool pluginBatchingIsApplied = validateReshapedModel(inputDescriptors, outputDescriptors);
159+
60160
for (const IODescriptor& inputDescriptor : inputDescriptors) {
61161
if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor ||
62162
inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) {
@@ -67,7 +167,7 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
67167
: inputDescriptor.shapeFromCompiler;
68168
// Treat every model with batch 1 as a potentially dynamically batched one.
69169
// TODO: should we protect this part with a certain condition?
70-
if (shape[intel_npu::utils::BATCH_AXIS] == intel_npu::utils::DEFAULT_BATCH_SIZE) {
170+
if (pluginBatchingIsApplied) {
71171
shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1);
72172
}
73173

@@ -96,7 +196,7 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
96196
auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel
97197
: outputDescriptor.shapeFromCompiler;
98198
// Treat every model with batch 1 as a potentially dynamically batched one.
99-
if (shape[intel_npu::utils::BATCH_AXIS] == intel_npu::utils::DEFAULT_BATCH_SIZE) {
199+
if (pluginBatchingIsApplied) {
100200
shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1);
101201
}
102202

@@ -529,43 +629,14 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument
529629
return _properties->get_property(name, arguments);
530630
}
531631

532-
bool checkDynamicDims(const std::shared_ptr<const ov::Model>& model) {
533-
// Check parameters (inputs)
534-
for (const auto& param : model->get_parameters()) {
535-
const auto& shape = param->get_partial_shape();
536-
537-
// Check all dimensions except the first one (batch dimension)
538-
for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) {
539-
if (shape[dim_idx].is_dynamic()) {
540-
return true; // Found dynamic dimension other than batch
541-
}
542-
}
543-
}
544-
545-
// Check results (outputs)
546-
for (const auto& result : model->get_results()) {
547-
const auto& shape = result->get_output_partial_shape(0);
548-
549-
// Check all dimensions except the first one (batch dimension)
550-
for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) {
551-
if (shape[dim_idx].is_dynamic()) {
552-
return true; // Found dynamic dimension other than batch
553-
}
554-
}
555-
}
556-
557-
return false; // No dynamic dimensions found other than batch
558-
}
559-
560632
bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger logger) {
561633
std::set<ov::Output<const ov::Node>> batchedInputs;
562634
std::set<ov::Output<const ov::Node>> batchedOutputs;
563635
std::set<size_t> sBatchSize;
564636

565637
// Limitation: Plugin batching is not supported when there are dynamic
566638
// dimensions other than the batch dimension.
567-
const bool otherDynamicDims = checkDynamicDims(model);
568-
if (otherDynamicDims) {
639+
if (checkModelDynamicDims(model)) {
569640
return false;
570641
}
571642

0 commit comments

Comments
 (0)