diff --git a/src/inference/src/cache_manager.hpp b/src/inference/src/cache_manager.hpp index 7f01a956b3959a..b00c406613afcd 100644 --- a/src/inference/src/cache_manager.hpp +++ b/src/inference/src/cache_manager.hpp @@ -9,10 +9,12 @@ */ #pragma once +#include #include #include #include #include +#include #include "openvino/runtime/shared_buffer.hpp" #include "openvino/runtime/tensor.hpp" @@ -67,10 +69,15 @@ class ICacheManager { */ virtual void write_cache_entry(const std::string& id, StreamWriter writer) = 0; + /** + * @brief Variant type for compiled blob representation + */ + using CompiledBlobVariant = std::variant>; + /** * @brief Function passing created input stream */ - using StreamReader = std::function; + using StreamReader = std::function; /** * @brief Callback when OpenVINO intends to read model from cache @@ -137,9 +144,14 @@ class FileStorageCacheManager final : public ICacheManager { ScopedLocale plocal_C(LC_ALL, "C"); const auto blob_file_name = getBlobFile(id); if (std::filesystem::exists(blob_file_name)) { - auto compiled_blob = - read_tensor_data(blob_file_name, element::u8, PartialShape::dynamic(1), 0, enable_mmap); - reader(compiled_blob); + if (enable_mmap) { + CompiledBlobVariant compiled_blob{std::in_place_index<0>, ov::read_tensor_data(blob_file_name)}; + reader(compiled_blob); + } else { + std::ifstream stream(blob_file_name, std::ios_base::binary); + CompiledBlobVariant compiled_blob{std::in_place_index<1>, std::ref(stream)}; + reader(compiled_blob); + } } } diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index cd9e6ed583df07..26382f01256d29 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -1531,16 +1531,24 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( cacheContent.blobId, cacheContent.mmap_enabled && ov::util::contains(plugin.get_property(ov::internal::supported_properties), ov::internal::caching_with_mmap), - [&](ov::Tensor& compiled_blob) { + [&](ICacheManager::CompiledBlobVariant& compiled_blob) { OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::load_model_from_cache::ReadStreamAndImport"); ov::CompiledBlobHeader header; size_t compiled_blob_offset = 0; try { - header.read_from_buffer(static_cast(compiled_blob.data()), - compiled_blob.get_byte_size(), - compiled_blob_offset); + ov::util::VariantVisitor header_reader{[&](const ov::Tensor& tensor) { + header.read_from_buffer( + static_cast(tensor.data()), + tensor.get_byte_size(), + compiled_blob_offset); + }, + [&](std::reference_wrapper stream) { + stream >> header; + }}; + std::visit(header_reader, compiled_blob); + if (header.get_file_info() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) { // Original file is changed, don't use cache OPENVINO_THROW("Original model file is changed"); @@ -1594,12 +1602,19 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( } } - ov::Tensor compiled_blob_without_header{compiled_blob, - {compiled_blob_offset}, - {compiled_blob.get_size()}}; - - compiled_model = context ? plugin.import_model(compiled_blob_without_header, context, update_config) - : plugin.import_model(compiled_blob_without_header, update_config); + ov::util::VariantVisitor model_importer{ + [&](const ov::Tensor& compiled_blob) -> ov::SoPtr { + const ov::Tensor compiled_blob_without_header{compiled_blob, + {compiled_blob_offset}, + {compiled_blob.get_size()}}; + return context ? plugin.import_model(compiled_blob_without_header, context, update_config) + : plugin.import_model(compiled_blob_without_header, update_config); + }, + [&](std::reference_wrapper stream) -> ov::SoPtr { + return context ? plugin.import_model(stream, context, update_config) + : plugin.import_model(stream, update_config); + }}; + compiled_model = std::visit(model_importer, compiled_blob); }); } catch (const HeaderException&) { // For these exceptions just remove old cache and set that import didn't work diff --git a/src/inference/tests/functional/caching_test.cpp b/src/inference/tests/functional/caching_test.cpp index 54f0358cbe2d4f..4dd243e5661400 100644 --- a/src/inference/tests/functional/caching_test.cpp +++ b/src/inference/tests/functional/caching_test.cpp @@ -385,17 +385,6 @@ class CachingTest : public ::testing::TestWithParam(), _)) - .WillByDefault(Invoke([&](const ov::Tensor& itensor, const ov::AnyMap& config) { - if (m_checkConfigCb) { - m_checkConfigCb(config); - } - size_t pos = 0; - auto name = getline_from_buffer(itensor.data(), itensor.get_byte_size(), pos); - std::lock_guard lock(mock_creation_mutex); - return create_mock_compiled_model(m_models[name], mockPlugin); - })); - ON_CALL(plugin, import_model(A(), _, _)) .WillByDefault( Invoke([&](const ov::Tensor& itensor, const ov::SoPtr&, const ov::AnyMap& config) { @@ -490,10 +479,10 @@ TEST_P(CachingTest, TestLoad) { { EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(m_remoteContext ? 0 : 1); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); for (auto& model : comp_models) { EXPECT_CALL(*model, export_model(_)).Times(0); // No more 'export_model' for existing model } @@ -535,10 +524,10 @@ TEST_P(CachingTest, TestLoad_by_device_name) { { EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(m_remoteContext ? 0 : 1); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); for (auto& model : comp_models) { EXPECT_CALL(*model, export_model(_)).Times(0); // No more 'export_model' for existing models } @@ -552,7 +541,6 @@ TEST_P(CachingTest, TestLoad_by_device_name) { TEST_P(CachingTest, TestLoadCustomImportExport) { const char customData[] = {1, 2, 3, 4, 5}; - size_t customDataSize = sizeof(customData); EXPECT_CALL(*mockPlugin, get_property(ov::supported_properties.name(), _)).Times(AnyNumber()); EXPECT_CALL(*mockPlugin, get_property(ov::device::capability::EXPORT_IMPORT, _)).Times(AnyNumber()); @@ -561,25 +549,24 @@ TEST_P(CachingTest, TestLoadCustomImportExport) { EXPECT_CALL(*mockPlugin, get_property(ov::internal::caching_properties.name(), _)).Times(AnyNumber()); EXPECT_CALL(*mockPlugin, get_property(ov::device::capabilities.name(), _)).Times(AnyNumber()); - ON_CALL(*mockPlugin, import_model(A(), _, _)) - .WillByDefault(Invoke([&](const ov::Tensor& s, const ov::SoPtr&, const ov::AnyMap&) { + ON_CALL(*mockPlugin, import_model(A(), _, _)) + .WillByDefault(Invoke([&](std::istream& s, const ov::SoPtr&, const ov::AnyMap&) { char a[sizeof(customData)]; - OPENVINO_ASSERT(customDataSize <= s.get_byte_size()); - std::memcpy(a, s.data(), customDataSize); - EXPECT_EQ(memcmp(a, customData, customDataSize), 0); - auto name = getline_from_buffer(s.data(), s.get_byte_size(), customDataSize); - + s.read(a, sizeof(customData)); + EXPECT_EQ(memcmp(a, customData, sizeof(customData)), 0); + std::string name; + s >> name; std::lock_guard lock(mock_creation_mutex); return create_mock_compiled_model(m_models[name], mockPlugin); })); - ON_CALL(*mockPlugin, import_model(A(), _)) - .WillByDefault(Invoke([&](const ov::Tensor& s, const ov::AnyMap&) { + ON_CALL(*mockPlugin, import_model(A(), _)) + .WillByDefault(Invoke([&](std::istream& s, const ov::AnyMap&) { char a[sizeof(customData)]; - OPENVINO_ASSERT(customDataSize <= s.get_byte_size()); - std::memcpy(a, s.data(), customDataSize); - EXPECT_EQ(memcmp(a, customData, customDataSize), 0); - auto name = getline_from_buffer(s.data(), s.get_byte_size(), customDataSize); + s.read(a, sizeof(customData)); + EXPECT_EQ(memcmp(a, customData, sizeof(customData)), 0); + std::string name; + s >> name; std::lock_guard lock(mock_creation_mutex); return create_mock_compiled_model(m_models[name], mockPlugin); })); @@ -611,10 +598,10 @@ TEST_P(CachingTest, TestLoadCustomImportExport) { { EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); for (auto& model : comp_models) { EXPECT_CALL(*model, export_model(_)).Times(0); // No 'export_model' for existing models } @@ -710,10 +697,10 @@ TEST_P(CachingTest, TestChangeLoadConfig_With_Cache_Dir_inline) { { EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); for (auto& model : comp_models) { EXPECT_CALL(*model, export_model(_)).Times(0); // No more 'export_model' for existing models } @@ -1292,10 +1279,10 @@ TEST_P(CachingTest, TestChangeCacheDirFailure) { { EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(m_remoteContext ? 0 : 1); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) { EXPECT_CALL(net, export_model(_)).Times(1); }); @@ -1374,10 +1361,10 @@ TEST_P(CachingTest, TestDeviceArchitecture) { { EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); for (auto& net : comp_models) { EXPECT_CALL(*net, export_model(_)).Times(0); } @@ -1408,10 +1395,10 @@ TEST_P(CachingTest, TestDeviceArchitecture) { { EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); for (auto& net : comp_models) { EXPECT_CALL(*net, export_model(_)).Times(0); } @@ -1465,10 +1452,10 @@ TEST_P(CachingTest, TestNoDeviceArchitecture) { { EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); for (auto& net : comp_models) { EXPECT_CALL(*net, export_model(_)).Times(0); } @@ -1568,14 +1555,14 @@ TEST_P(CachingTest, TestThrowOnImport) { EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 1 : 0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)) .Times(!m_remoteContext ? 1 : 0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); if (m_remoteContext) { - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(1).WillOnce(Throw(1)); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(1).WillOnce(Throw(1)); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); } else { - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(1).WillOnce(Throw(1)); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(1).WillOnce(Throw(1)); } m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) { EXPECT_CALL(net, export_model(_)).Times(1); @@ -1588,10 +1575,10 @@ TEST_P(CachingTest, TestThrowOnImport) { { // Step 3: same load, cache is re-created on export on step 2 and shall be successfully imported now EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); for (auto& net : comp_models) { EXPECT_CALL(*net, export_model(_)).Times(0); } @@ -1656,10 +1643,10 @@ TEST_P(CachingTest, TestModelModified) { { // Step 3: same load, should be ok now EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); for (auto& net : comp_models) { EXPECT_CALL(*net, export_model(_)).Times(0); } @@ -1721,10 +1708,10 @@ TEST_P(CachingTest, TestCacheFileCorrupted) { { // Step 3: same load, should be ok now due to re-creation of cache EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); for (auto& net : comp_models) { EXPECT_CALL(*net, export_model(_)).Times(0); } @@ -1802,10 +1789,10 @@ TEST_P(CachingTest, TestCacheFileOldVersion) { { // Step 3: same load, should be ok now due to re-creation of cache EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); for (auto& net : comp_models) { EXPECT_CALL(*net, export_model(_)).Times(0); } @@ -1901,10 +1888,10 @@ TEST_P(CachingTest, TestCacheFileWithCompiledModelRuntimeProperties) { { // Step 3: same load, should be ok now due to re-creation of cache EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(m_remoteContext ? 1 : 0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(!m_remoteContext ? 1 : 0); for (auto& net : comp_models) { EXPECT_CALL(*net, export_model(_)).Times(0); } @@ -2198,7 +2185,7 @@ TEST_P(CachingTest, LoadAUTO_OneDevice) { deviceToLoad += ":mock.0"; EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)) .Times(TEST_COUNT - index - 1); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(index); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(index); OV_ASSERT_NO_THROW(testLoad([&](ov::Core& core) { core.set_property(ov::cache_dir(cacheDir)); m_testFunction(core); @@ -2215,8 +2202,9 @@ TEST_P(CachingTest, LoadAUTOWithConfig) { EXPECT_CALL(*mockPlugin, query_model(_, _)).Times(AnyNumber()); EXPECT_CALL(*mockPlugin, get_property(ov::internal::caching_properties.name(), _)).Times(AnyNumber()); EXPECT_CALL(*mockPlugin, get_property(ov::device::architecture.name(), _)).Times(AnyNumber()); + // skip the remote Context test for Auto plugin if (m_remoteContext) { - return; // skip the remote Context test for Auto plugin + return; } int index = 0; m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) { @@ -2229,7 +2217,7 @@ TEST_P(CachingTest, LoadAUTOWithConfig) { deviceToLoad += ":mock.0"; EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)) .Times(TEST_COUNT - index - 1); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(index); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(index); OV_ASSERT_NO_THROW(testLoad([&](ov::Core& core) { m_testFunctionWithCfg(core, {{ov::cache_dir.name(), cacheDir}}); })); @@ -2305,9 +2293,9 @@ TEST_P(CachingTest, LoadMulti_race) { EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(devCount - 1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(devCount - 1); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); testLoad([&](ov::Core& core) { core.set_property(ov::cache_dir(cacheDir)); OV_ASSERT_NO_THROW(m_testFunction(core)); @@ -2349,9 +2337,9 @@ TEST_P(CachingTest, LoadMultiWithConfig_race) { EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(devCount - 1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(devCount - 1); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); testLoad([&](ov::Core& core) { OV_ASSERT_NO_THROW(m_testFunctionWithCfg(core, {{ov::cache_dir.name(), cacheDir}})); }); @@ -2394,16 +2382,16 @@ TEST_P(CachingTest, LoadMulti_Archs) { EXPECT_CALL(*mockPlugin, OnCompileModelFromFile()).Times(0); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)) + EXPECT_CALL(*mockPlugin, import_model(A(), _)) .Times(TEST_DEVICE_MAX_COUNT / 2) - .WillRepeatedly(Invoke([&](const ov::Tensor& s, const ov::AnyMap&) { - size_t pos = 0; - auto name = getline_from_buffer(s.data(), s.get_byte_size(), pos); + .WillRepeatedly(Invoke([&](std::istream& s, const ov::AnyMap&) { + std::string name; + s >> name; std::lock_guard lock(mock_creation_mutex); return create_mock_compiled_model(m_models[name], mockPlugin); })); + EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) { EXPECT_CALL(net, export_model(_)).Times(1); // each net will be exported once }); @@ -2500,7 +2488,7 @@ TEST_P(CachingTest, LoadBATCHWithConfig) { deviceToLoad += ":mock.0"; EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)) .Times(TEST_COUNT - index - 1); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(index); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(index); testLoad([&](ov::Core& core) { m_testFunctionWithCfg(core, {{ov::cache_dir.name(), cacheDir}}); }); @@ -2531,9 +2519,9 @@ TEST_P(CachingTest, Load_threads) { EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(THREADS_COUNT - 1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(THREADS_COUNT - 1); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); testLoad([&](ov::Core& core) { core.set_property({{ov::cache_dir.name(), cacheDir}}); std::vector threads; @@ -2626,9 +2614,9 @@ TEST_P(CachingTest, Load_mmap_is_disabled) { EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(1); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); testLoad([&](ov::Core& core) { core.set_property({{ov::cache_dir.name(), m_cacheDir}}); core.set_property({ov::enable_mmap(false)}); @@ -2665,9 +2653,9 @@ TEST_P(CachingTest, Load_mmap_is_not_supported_by_plugin) { EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(1); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); testLoad([&](ov::Core& core) { core.set_property({{ov::cache_dir.name(), m_cacheDir}}); core.set_property({ov::enable_mmap(true)}); @@ -2709,9 +2697,9 @@ TEST_P(CachingTest, Load_mmap_is_disabled_local_cfg) { EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(1); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); testLoad([&](ov::Core& core) { const auto config = ov::AnyMap{{ov::cache_dir(m_cacheDir)}, {ov::enable_mmap(false)}}; m_testFunctionWithCfg(core, config); @@ -2744,12 +2732,13 @@ TEST_P(CachingTest, Load_mmap_is_not_supported_by_plugin_local_cfg) { EXPECT_CALL(net, export_model(_)).Times(1); }); MkDirGuard guard(m_cacheDir); + EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(1); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); testLoad([&](ov::Core& core) { const auto config = ov::AnyMap{{ov::cache_dir(m_cacheDir)}, {ov::enable_mmap(false)}}; m_testFunctionWithCfg(core, config); @@ -2787,9 +2776,9 @@ TEST_P(CachingTest, import_from_cache_model_and_weights_path_properties_not_supp EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(1); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); testLoad([&](ov::Core& core) { const auto config = ov::AnyMap{{ov::cache_dir(m_cacheDir)}}; m_testFunctionWithCfg(core, config); @@ -2837,9 +2826,9 @@ TEST_P(CachingTest, import_from_cache_model_and_weights_path_properties_are_supp EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0); EXPECT_CALL(*mockPlugin, compile_model(A&>(), _)).Times(1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(1); EXPECT_CALL(*mockPlugin, import_model(A(), _, _)).Times(0); - EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(1); + EXPECT_CALL(*mockPlugin, import_model(A(), _)).Times(0); testLoad([&](ov::Core& core) { const auto config = ov::AnyMap{{ov::cache_dir(m_cacheDir)}}; m_testFunctionWithCfg(core, config); diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp index 4bb1a79d2c64ae..aa60590beeaf9e 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.cpp +++ b/src/plugins/intel_cpu/src/utils/serialize.cpp @@ -157,17 +157,17 @@ void ModelDeserializer::process_model(std::shared_ptr& model, model_stream.seekg(hdr_pos, std::istream::beg); pass::StreamSerialize::DataHeader hdr = {}; - model_stream.read(reinterpret_cast(&hdr), sizeof hdr); + model_stream.read(reinterpret_cast(&hdr), sizeof(hdr)); // Check if model header contains valid data. - bool is_valid_model = (hdr.custom_data_offset == sizeof(hdr) + hdr_pos) && + bool is_valid_model = (hdr.custom_data_offset == sizeof(hdr)) && (hdr.custom_data_size == hdr.consts_offset - hdr.custom_data_offset) && (hdr.consts_size == hdr.model_offset - hdr.consts_offset) && ((hdr.model_size = file_size - hdr.model_offset) != 0U); OPENVINO_ASSERT(is_valid_model, "[CPU] Could not deserialize by device xml header."); // read model input/output precisions - model_stream.seekg(hdr.custom_data_offset); + model_stream.seekg(hdr.custom_data_offset + hdr_pos); pugi::xml_document xmlInOutDoc; if (hdr.custom_data_size > 0) { @@ -181,14 +181,14 @@ void ModelDeserializer::process_model(std::shared_ptr& model, // read blob content auto data_blob = std::make_shared(ov::element::u8, ov::Shape({hdr.consts_size})); - model_stream.seekg(hdr.consts_offset); + model_stream.seekg(hdr.consts_offset + hdr_pos); if (hdr.consts_size) { model_stream.read(static_cast(data_blob->data(ov::element::u8)), hdr.consts_size); } // read XML content auto xml_string = std::make_shared(); - model_stream.seekg(hdr.model_offset); + model_stream.seekg(hdr.model_offset + hdr_pos); xml_string->resize(hdr.model_size); model_stream.read(const_cast(xml_string->data()), hdr.model_size); if (m_cache_decrypt) { diff --git a/src/plugins/template/src/plugin.cpp b/src/plugins/template/src/plugin.cpp index 122845879436f7..b7edea78d83605 100644 --- a/src/plugins/template/src/plugin.cpp +++ b/src/plugins/template/src/plugin.cpp @@ -38,7 +38,7 @@ uint64_t get_blob_data_size(std::istream& model) { } std::string get_model_str(std::istream& model) { - const auto model_size = std::min(model.rdbuf()->in_avail(), get_blob_data_size(model)); + const auto model_size = get_blob_data_size(model); std::string xml; xml.resize(model_size); model.read(xml.data(), model_size); @@ -52,7 +52,7 @@ ov::Tensor read_weights(std::istream& model, const size_t weights_size) { } ov::Tensor get_model_weights(std::istream& model) { - const auto weights_size = std::min(model.rdbuf()->in_avail(), get_blob_data_size(model)); + const auto weights_size = get_blob_data_size(model); return weights_size != 0 ? read_weights(model, weights_size) : ov::Tensor(); }