Skip to content

Commit 94398ee

Browse files
authored
[port, core] Open compiled model as file stream when MMAP disabled. (#31813)
### Details: - When MMAP feature disabled open model from cache as stream and don't read into tensor on core level. This read may trigger additional consumption as plugin may try read stream again. ### Port of: - #31774 ### Tickets: - CVS-171329 Signed-off-by: Raasz, Pawel <[email protected]>
1 parent 2b18f70 commit 94398ee

File tree

5 files changed

+148
-132
lines changed

5 files changed

+148
-132
lines changed

src/inference/src/cache_manager.hpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99
*/
1010
#pragma once
1111

12+
#include <filesystem>
1213
#include <fstream>
1314
#include <functional>
1415
#include <memory>
1516
#include <string>
17+
#include <variant>
1618

1719
#include "openvino/runtime/shared_buffer.hpp"
1820
#include "openvino/runtime/tensor.hpp"
@@ -67,10 +69,15 @@ class ICacheManager {
6769
*/
6870
virtual void write_cache_entry(const std::string& id, StreamWriter writer) = 0;
6971

72+
/**
73+
* @brief Variant type for compiled blob representation
74+
*/
75+
using CompiledBlobVariant = std::variant<const ov::Tensor, std::reference_wrapper<std::istream>>;
76+
7077
/**
7178
* @brief Function passing created input stream
7279
*/
73-
using StreamReader = std::function<void(ov::Tensor&)>;
80+
using StreamReader = std::function<void(CompiledBlobVariant&)>;
7481

7582
/**
7683
* @brief Callback when OpenVINO intends to read model from cache
@@ -137,9 +144,14 @@ class FileStorageCacheManager final : public ICacheManager {
137144
ScopedLocale plocal_C(LC_ALL, "C");
138145
const auto blob_file_name = getBlobFile(id);
139146
if (std::filesystem::exists(blob_file_name)) {
140-
auto compiled_blob =
141-
read_tensor_data(blob_file_name, element::u8, PartialShape::dynamic(1), 0, enable_mmap);
142-
reader(compiled_blob);
147+
if (enable_mmap) {
148+
CompiledBlobVariant compiled_blob{std::in_place_index<0>, ov::read_tensor_data(blob_file_name)};
149+
reader(compiled_blob);
150+
} else {
151+
std::ifstream stream(blob_file_name, std::ios_base::binary);
152+
CompiledBlobVariant compiled_blob{std::in_place_index<1>, std::ref(stream)};
153+
reader(compiled_blob);
154+
}
143155
}
144156
}
145157

src/inference/src/dev/core_impl.cpp

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1531,16 +1531,24 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
15311531
cacheContent.blobId,
15321532
cacheContent.mmap_enabled && ov::util::contains(plugin.get_property(ov::internal::supported_properties),
15331533
ov::internal::caching_with_mmap),
1534-
[&](ov::Tensor& compiled_blob) {
1534+
[&](ICacheManager::CompiledBlobVariant& compiled_blob) {
15351535
OV_ITT_SCOPE(FIRST_INFERENCE,
15361536
ov::itt::domains::LoadTime,
15371537
"Core::load_model_from_cache::ReadStreamAndImport");
15381538
ov::CompiledBlobHeader header;
15391539
size_t compiled_blob_offset = 0;
15401540
try {
1541-
header.read_from_buffer(static_cast<const char*>(compiled_blob.data()),
1542-
compiled_blob.get_byte_size(),
1543-
compiled_blob_offset);
1541+
ov::util::VariantVisitor header_reader{[&](const ov::Tensor& tensor) {
1542+
header.read_from_buffer(
1543+
static_cast<const char*>(tensor.data()),
1544+
tensor.get_byte_size(),
1545+
compiled_blob_offset);
1546+
},
1547+
[&](std::reference_wrapper<std::istream> stream) {
1548+
stream >> header;
1549+
}};
1550+
std::visit(header_reader, compiled_blob);
1551+
15441552
if (header.get_file_info() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) {
15451553
// Original file is changed, don't use cache
15461554
OPENVINO_THROW("Original model file is changed");
@@ -1594,12 +1602,19 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
15941602
}
15951603
}
15961604

1597-
ov::Tensor compiled_blob_without_header{compiled_blob,
1598-
{compiled_blob_offset},
1599-
{compiled_blob.get_size()}};
1600-
1601-
compiled_model = context ? plugin.import_model(compiled_blob_without_header, context, update_config)
1602-
: plugin.import_model(compiled_blob_without_header, update_config);
1605+
ov::util::VariantVisitor model_importer{
1606+
[&](const ov::Tensor& compiled_blob) -> ov::SoPtr<ov::ICompiledModel> {
1607+
const ov::Tensor compiled_blob_without_header{compiled_blob,
1608+
{compiled_blob_offset},
1609+
{compiled_blob.get_size()}};
1610+
return context ? plugin.import_model(compiled_blob_without_header, context, update_config)
1611+
: plugin.import_model(compiled_blob_without_header, update_config);
1612+
},
1613+
[&](std::reference_wrapper<std::istream> stream) -> ov::SoPtr<ov::ICompiledModel> {
1614+
return context ? plugin.import_model(stream, context, update_config)
1615+
: plugin.import_model(stream, update_config);
1616+
}};
1617+
compiled_model = std::visit(model_importer, compiled_blob);
16031618
});
16041619
} catch (const HeaderException&) {
16051620
// For these exceptions just remove old cache and set that import didn't work

0 commit comments

Comments
 (0)