-
Notifications
You must be signed in to change notification settings - Fork 2.7k
[Dynamic batch] Investigate refactoring opportunities for batch management in Plugin and Compiler - ver 2 #31784
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
dfac8c5
7b6f81a
b9cbb0f
3a4baa2
0f4b01c
929e13a
ef91465
6cbe494
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,16 +57,31 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i | |
ov::ParameterVector parameters; | ||
ov::ResultVector results; | ||
|
||
// Helper function to check if a tensor was originally dynamic | ||
auto wasOriginallyDynamic = [](const std::unordered_set<std::string>& tensorNames) -> bool { | ||
for (const auto& name : tensorNames) { | ||
if (name.find("_DYNBATCH_ORIG") != std::string::npos) { | ||
return true; | ||
} | ||
} | ||
return false; | ||
}; | ||
|
||
for (const IODescriptor& inputDescriptor : inputDescriptors) { | ||
if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor || | ||
inputDescriptor.isInitInputWeights || inputDescriptor.isMainInputWeights) { | ||
continue; | ||
} | ||
|
||
std::shared_ptr<ov::op::v0::Parameter> parameter = std::make_shared<ov::op::v0::Parameter>( | ||
inputDescriptor.precision, | ||
inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel | ||
: inputDescriptor.shapeFromCompiler); | ||
auto shape = inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel | ||
: inputDescriptor.shapeFromCompiler; | ||
|
||
if (wasOriginallyDynamic(inputDescriptor.outputTensorNames)) { | ||
shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1); | ||
} | ||
|
||
std::shared_ptr<ov::op::v0::Parameter> parameter = | ||
std::make_shared<ov::op::v0::Parameter>(inputDescriptor.precision, shape); | ||
|
||
parameter->set_friendly_name(inputDescriptor.nodeFriendlyName); | ||
parameter->output(0).get_tensor().set_names(inputDescriptor.outputTensorNames); | ||
|
@@ -86,11 +101,17 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i | |
std::shared_ptr<ov::Node> constantDummy = | ||
std::make_shared<ov::op::v0::Constant>(outputDescriptor.precision, CONSTANT_NODE_DUMMY_SHAPE); | ||
|
||
const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy = std::make_shared<ov::descriptor::Tensor>( | ||
outputDescriptor.precision, | ||
outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel | ||
: outputDescriptor.shapeFromCompiler, | ||
outputDescriptor.outputTensorNames); | ||
auto shape = outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel | ||
: outputDescriptor.shapeFromCompiler; | ||
|
||
if (wasOriginallyDynamic(outputDescriptor.outputTensorNames)) { | ||
shape[intel_npu::utils::BATCH_AXIS] = ov::Dimension(-1); | ||
} | ||
|
||
const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy = | ||
std::make_shared<ov::descriptor::Tensor>(outputDescriptor.precision, | ||
shape, | ||
outputDescriptor.outputTensorNames); | ||
|
||
auto& result = results.emplace_back(std::make_shared<ov::op::v0::Result>(constantDummy)); | ||
result->output(0).set_tensor_ptr(tensorDummy); | ||
|
@@ -516,9 +537,165 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& argument | |
return _properties->get_property(name, arguments); | ||
} | ||
|
||
// Helper function to check if shape has dynamic dimensions other than batch dimension | ||
bool hasOtherDynamicDims(const ov::PartialShape& shape) { | ||
for (size_t dim_idx = 1; dim_idx < shape.size(); dim_idx++) { | ||
if (shape[dim_idx].is_dynamic()) { | ||
return true; // Found dynamic dimension other than batch | ||
} | ||
} | ||
return false; | ||
} | ||
|
||
bool checkModelDynamicDims(const std::shared_ptr<const ov::Model>& model) { | ||
// Check parameters (inputs) | ||
const auto& params = model->get_parameters(); | ||
for (const auto& param : params) { | ||
const auto& shape = param->get_partial_shape(); | ||
if (hasOtherDynamicDims(shape)) { | ||
return true; | ||
} | ||
} | ||
|
||
// Check results (outputs) | ||
const auto& results = model->get_results(); | ||
for (const auto& result : results) { | ||
const auto& shape = result->get_output_partial_shape(0); | ||
if (hasOtherDynamicDims(shape)) { | ||
return true; | ||
} | ||
} | ||
|
||
return false; | ||
} | ||
|
||
bool validateModelBatch(const std::shared_ptr<const ov::Model>& model, Logger logger) { | ||
std::set<ov::Output<const ov::Node>> batchedInputs; | ||
std::set<ov::Output<const ov::Node>> batchedOutputs; | ||
std::set<size_t> sBatchSize; | ||
|
||
// Limitation: Plugin batching is not supported when there are dynamic | ||
// dimensions other than the batch dimension. | ||
if (checkModelDynamicDims(model)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As we were discussed this offline the problem of simultaneous existence of batch and dynamic dimensions is that we would have a "gap" in plain tensor data after a first batch end position and before a next batch start position. That IMHO makes it harder to find where next N-line for a next batch portion is located and we cannot simply find a next batch I in an entire tensor by executing What if we do not introduce that limitation at all, as it manifests a lot of brittleness in the code and introduces very specific checks like What do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Sure, we can do it separately in E#179728. Thanks! |
||
return false; | ||
} | ||
|
||
const auto& params = model->get_parameters(); | ||
for (size_t input_id = 0; input_id < params.size(); input_id++) { | ||
const auto& input = params[input_id]; | ||
const auto& shape = input->get_partial_shape(); | ||
ov::Layout layout = ov::layout::get_layout(input); | ||
|
||
// Batching on plugin is working only when batching is found on 0th dimension | ||
if ((shape.size() && | ||
shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) || | ||
(ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) { | ||
const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : input->get_shape(); | ||
batchedInputs.insert(params[input_id]->output(0)); | ||
|
||
if (shape.rank().is_dynamic()) { | ||
OPENVINO_THROW("Shapes with dynamic rank are not supported."); | ||
} else { | ||
sBatchSize.insert(staticShape[intel_npu::utils::BATCH_AXIS]); | ||
} | ||
} else { | ||
// gather some diagnostic info | ||
std::optional<size_t> batch_dim_index_detected; | ||
for (size_t i = 1; i < shape.size(); i++) { | ||
if (shape[i].has_symbol()) { | ||
batch_dim_index_detected = i; | ||
break; | ||
} | ||
} | ||
std::stringstream sstream; | ||
sstream << "Only networks with inputs batched by 0th dimension are supported. "; | ||
if (batch_dim_index_detected.has_value()) { | ||
sstream << "The batch has been detected on: " << batch_dim_index_detected.value() | ||
<< " dimension instead. "; | ||
} else { | ||
sstream << "The batch hasn't been detected at all. "; | ||
} | ||
sstream << "Please check input id: " << input_id << " by the name: " << input->get_friendly_name() | ||
<< ", layout: " << layout.to_string() << ", is_dynamic: " << shape.is_dynamic(); | ||
logger.info("%s", sstream.str()); | ||
return false; | ||
} | ||
} | ||
for (const auto& output : model->get_results()) { | ||
const auto& shape = output->get_output_partial_shape(0); | ||
ov::Layout layout = ov::layout::get_layout(output); | ||
|
||
// Batching on plugin is working only when batching is found on 0th dimension | ||
if ((shape.size() && | ||
shape[intel_npu::utils::BATCH_AXIS].get_max_length() != intel_npu::utils::DEFAULT_BATCH_SIZE) || | ||
(ov::layout::has_batch(layout) && ov::layout::batch_idx(layout) == intel_npu::utils::BATCH_AXIS)) { | ||
const auto& node = output->input_value(0); | ||
const auto& staticShape = shape.is_dynamic() ? shape.get_max_shape() : output->get_shape(); | ||
batchedOutputs.insert(ov::Output<const ov::Node>(node.get_node(), node.get_index())); | ||
|
||
if (shape.rank().is_dynamic()) { | ||
OPENVINO_THROW("Shapes with dynamic rank are not supported."); | ||
} else { | ||
sBatchSize.insert(staticShape[intel_npu::utils::BATCH_AXIS]); | ||
} | ||
} else { | ||
logger.info("Only networks with outputs batched by 0th dimension are supported. Please check an output by " | ||
"the name: %s, layout: %s", | ||
output->get_friendly_name(), | ||
layout.to_string()); | ||
return false; | ||
} | ||
} | ||
if (!batchedInputs.size() || !batchedOutputs.size()) { | ||
logger.info( | ||
"Only networks with inputs/outputs featuring batched dim are supported! Got inputs: %ld, outputs: %ld", | ||
batchedInputs.size(), | ||
batchedOutputs.size()); | ||
return false; | ||
} | ||
|
||
if (sBatchSize.size() != 1) { | ||
logger.info("Batching size shall have same value for all tensors! Got unique batch sizes number: %ld", | ||
sBatchSize.size()); | ||
return false; | ||
} | ||
|
||
auto node_info_printer = [&logger](const auto& ov_node, std::string nodeType) { | ||
logger.info("%s: %s has shape value: %s", | ||
nodeType, | ||
ov_node.get_any_name(), | ||
ov_node.get_partial_shape().to_string()); | ||
}; | ||
|
||
for (const auto& ov_node : batchedInputs) { | ||
node_info_printer(ov_node, "Input"); | ||
} | ||
for (const auto& ov_node : batchedOutputs) { | ||
node_info_printer(ov_node, "Output"); | ||
} | ||
|
||
return true; | ||
} | ||
|
||
void deBatchModel(std::shared_ptr<ov::Model>& model, ov::Dimension newBatch) { | ||
size_t inputIdx = 0; | ||
std::map<std::string, ov::PartialShape> newShapes; | ||
for (auto&& item : model->get_parameters()) { | ||
auto layout = item->get_layout(); | ||
auto partShape = item->get_partial_shape(); | ||
if (ov::layout::has_batch(layout)) { | ||
partShape[ov::layout::batch_idx(layout)] = newBatch; | ||
} | ||
newShapes.emplace(item->get_friendly_name(), partShape); | ||
inputIdx++; | ||
} | ||
model->reshape(newShapes); | ||
} | ||
|
||
std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<const ov::Model>& model, | ||
const ov::AnyMap& properties) const { | ||
OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::compile_model"); | ||
auto modelForCompilation = model->clone(); | ||
|
||
// Before going any further: if | ||
// ... 1 - NPUW mode is activated | ||
|
@@ -560,21 +737,79 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr< | |
auto device = _backend == nullptr ? nullptr : _backend->getDevice(localConfig.get<DEVICE_ID>()); | ||
localConfig.update({{ov::intel_npu::platform.name(), platform}}); | ||
|
||
if (localConfig.isAvailable(ov::intel_npu::batch_mode.name()) && | ||
!localConfig.has(ov::intel_npu::batch_mode.name())) { | ||
auto updateBatchMode = [&](ov::intel_npu::BatchMode mode) { | ||
std::stringstream strStream; | ||
strStream << ov::intel_npu::BatchMode::AUTO; | ||
strStream << mode; | ||
_logger.info("Setting batching mode to %s.", strStream.str()); | ||
localConfig.update({{ov::intel_npu::batch_mode.name(), strStream.str()}}); | ||
}; | ||
|
||
if (localConfig.isAvailable(ov::intel_npu::batch_mode.name()) && | ||
!localConfig.has(ov::intel_npu::batch_mode.name())) { | ||
updateBatchMode(ov::intel_npu::BatchMode::AUTO); | ||
} | ||
|
||
if (localConfig.isAvailable(ov::intel_npu::batch_mode.name()) && !model->get_variables().empty()) { | ||
if (localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN) { | ||
OPENVINO_THROW("This model contains states, thus it is not supported when handling batching on the plugin"); | ||
} | ||
|
||
std::stringstream strStream; | ||
strStream << ov::intel_npu::BatchMode::COMPILER; | ||
localConfig.update({{ov::intel_npu::batch_mode.name(), strStream.str()}}); | ||
updateBatchMode(ov::intel_npu::BatchMode::COMPILER); | ||
} | ||
|
||
if (localConfig.isAvailable(ov::intel_npu::batch_mode.name())) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tips:
Perhaps it improves readability |
||
bool autoOrPluginBatch = localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN || | ||
localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::AUTO; | ||
if (modelForCompilation->is_dynamic()) { // Avoiding risks with static models. TODO: common solution. | ||
try { | ||
const bool pluginBatchingIsSupported = validateModelBatch(modelForCompilation, _logger); | ||
|
||
if (autoOrPluginBatch && pluginBatchingIsSupported) { | ||
_logger.info("Attempting to handle batching on the plugin side."); | ||
|
||
// Store dynamic batch info in tensor names BEFORE reshaping | ||
auto encodeDynamicBatchInfo = [](std::shared_ptr<ov::Model> model) { | ||
// Encode info in input tensor names | ||
for (auto& input : model->inputs()) { | ||
std::string originalName = input.get_any_name(); | ||
std::string newName = originalName + "_DYNBATCH_ORIG"; | ||
input.get_tensor().set_names({newName}); | ||
} | ||
|
||
// Encode info in output tensor names | ||
for (auto& output : model->outputs()) { | ||
std::string originalName = output.get_any_name(); | ||
std::string newName = originalName + "_DYNBATCH_ORIG"; | ||
output.get_tensor().set_names({newName}); | ||
} | ||
}; | ||
|
||
try { | ||
encodeDynamicBatchInfo(modelForCompilation); | ||
ov::set_batch(modelForCompilation, ov::Dimension(1)); | ||
updateBatchMode(ov::intel_npu::BatchMode::COMPILER); | ||
} catch (const std::exception& ex) { | ||
_logger.warning("The plugin couldn't resize a batched model due to exception: %s.\n" | ||
"Trying to debatch it...", | ||
ex.what()); | ||
encodeDynamicBatchInfo(modelForCompilation); | ||
deBatchModel(modelForCompilation, ov::Dimension(1)); | ||
if (!modelForCompilation) { | ||
OPENVINO_THROW("Cannot debatch a model"); | ||
} | ||
_logger.info("The model has been debatched successfully"); | ||
updateBatchMode(ov::intel_npu::BatchMode::COMPILER); | ||
} | ||
} else { | ||
_logger.info("Batching will be handed by compiler."); | ||
updateBatchMode(ov::intel_npu::BatchMode::COMPILER); | ||
} | ||
} catch (const std::exception& ex) { | ||
_logger.info("Couldn't validate and reshape the model. Batching will be handed by compiler.", | ||
ex.what()); | ||
updateBatchMode(ov::intel_npu::BatchMode::COMPILER); | ||
} | ||
} | ||
} | ||
|
||
// Update stepping w/ information from driver, unless provided by user or we are off-device | ||
|
@@ -625,10 +860,10 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr< | |
_logger.debug("performing compile"); | ||
|
||
if (!localConfig.get<WEIGHTLESS_BLOB>()) { | ||
graph = compiler->compile(model->clone(), localConfig); | ||
graph = compiler->compile(modelForCompilation->clone(), localConfig); | ||
} else { | ||
check_weightless_cache_attribute_occurrence(model); | ||
graph = compiler->compileWS(model->clone(), localConfig); | ||
graph = compiler->compileWS(modelForCompilation->clone(), localConfig); | ||
} | ||
} catch (const std::exception& ex) { | ||
OPENVINO_THROW(ex.what()); | ||
|
Uh oh!
There was an error while loading. Please reload this page.