diff --git a/src/inference/src/cache_manager.hpp b/src/inference/src/cache_manager.hpp
index 7f01a956b3959a..b00c406613afcd 100644
--- a/src/inference/src/cache_manager.hpp
+++ b/src/inference/src/cache_manager.hpp
@@ -9,10 +9,12 @@
  */
 #pragma once
 
+#include <filesystem>
 #include <fstream>
 #include <functional>
 #include <memory>
 #include <string>
+#include <variant>
 
 #include "openvino/runtime/shared_buffer.hpp"
 #include "openvino/runtime/tensor.hpp"
@@ -67,10 +69,15 @@ class ICacheManager {
      */
     virtual void write_cache_entry(const std::string& id, StreamWriter writer) = 0;
 
+    /**
+     * @brief Variant type for compiled blob representation
+     */
+    using CompiledBlobVariant = std::variant<const ov::Tensor, std::reference_wrapper<std::istream>>;
+
     /**
      * @brief Function passing created input stream
      */
-    using StreamReader = std::function<void(ov::Tensor&)>;
+    using StreamReader = std::function<void(CompiledBlobVariant&)>;
 
     /**
      * @brief Callback when OpenVINO intends to read model from cache
@@ -137,9 +144,14 @@ class FileStorageCacheManager final : public ICacheManager {
         ScopedLocale plocal_C(LC_ALL, "C");
         const auto blob_file_name = getBlobFile(id);
         if (std::filesystem::exists(blob_file_name)) {
-            auto compiled_blob =
-                read_tensor_data(blob_file_name, element::u8, PartialShape::dynamic(1), 0, enable_mmap);
-            reader(compiled_blob);
+            if (enable_mmap) {
+                CompiledBlobVariant compiled_blob{std::in_place_index<0>, ov::read_tensor_data(blob_file_name)};
+                reader(compiled_blob);
+            } else {
+                std::ifstream stream(blob_file_name, std::ios_base::binary);
+                CompiledBlobVariant compiled_blob{std::in_place_index<1>, std::ref(stream)};
+                reader(compiled_blob);
+            }
         }
     }
 
diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp
index cd9e6ed583df07..26382f01256d29 100644
--- a/src/inference/src/dev/core_impl.cpp
+++ b/src/inference/src/dev/core_impl.cpp
@@ -1531,16 +1531,24 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
             cacheContent.blobId,
             cacheContent.mmap_enabled && ov::util::contains(plugin.get_property(ov::internal::supported_properties),
                                                             ov::internal::caching_with_mmap),
-            [&](ov::Tensor& compiled_blob) {
+            [&](ICacheManager::CompiledBlobVariant& compiled_blob) {
                 OV_ITT_SCOPE(FIRST_INFERENCE,
                              ov::itt::domains::LoadTime,
                              "Core::load_model_from_cache::ReadStreamAndImport");
                 ov::CompiledBlobHeader header;
                 size_t compiled_blob_offset = 0;
                 try {
-                    header.read_from_buffer(static_cast<const char*>(compiled_blob.data()),
-                                            compiled_blob.get_byte_size(),
-                                            compiled_blob_offset);
+                    ov::util::VariantVisitor header_reader{[&](const ov::Tensor& tensor) {
+                                                               header.read_from_buffer(
+                                                                   static_cast<const char*>(tensor.data()),
+                                                                   tensor.get_byte_size(),
+                                                                   compiled_blob_offset);
+                                                           },
+                                                           [&](std::reference_wrapper<std::istream> stream) {
+                                                               stream >> header;
+                                                           }};
+                    std::visit(header_reader, compiled_blob);
+
                     if (header.get_file_info() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) {
                         // Original file is changed, don't use cache
                         OPENVINO_THROW("Original model file is changed");
@@ -1594,12 +1602,19 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
                     }
                 }
 
-                ov::Tensor compiled_blob_without_header{compiled_blob,
-                                                        {compiled_blob_offset},
-                                                        {compiled_blob.get_size()}};
-
-                compiled_model = context ? plugin.import_model(compiled_blob_without_header, context, update_config)
-                                         : plugin.import_model(compiled_blob_without_header, update_config);
+                ov::util::VariantVisitor model_importer{
+                    [&](const ov::Tensor& compiled_blob) -> ov::SoPtr<ov::ICompiledModel> {
+                        const ov::Tensor compiled_blob_without_header{compiled_blob,
+                                                                      {compiled_blob_offset},
+                                                                      {compiled_blob.get_size()}};
+                        return context ? plugin.import_model(compiled_blob_without_header, context, update_config)
+                                       : plugin.import_model(compiled_blob_without_header, update_config);
+                    },
+                    [&](std::reference_wrapper<std::istream> stream) -> ov::SoPtr<ov::ICompiledModel> {
+                        return context ? plugin.import_model(stream, context, update_config)
+                                       : plugin.import_model(stream, update_config);
+                    }};
+                compiled_model = std::visit(model_importer, compiled_blob);
             });
     } catch (const HeaderException&) {
         // For these exceptions just remove old cache and set that import didn't work
diff --git a/src/inference/tests/functional/caching_test.cpp b/src/inference/tests/functional/caching_test.cpp
index 54f0358cbe2d4f..4dd243e5661400 100644
--- a/src/inference/tests/functional/caching_test.cpp
+++ b/src/inference/tests/functional/caching_test.cpp
@@ -385,17 +385,6 @@ class CachingTest : public ::testing::TestWithParam<std::tuple<TestParam, std::s
                 return create_mock_compiled_model(m_models[name], mockPlugin);
             }));
 
-        ON_CALL(plugin, import_model(A<const ov::Tensor&>(), _))
-            .WillByDefault(Invoke([&](const ov::Tensor& itensor, const ov::AnyMap& config) {
-                if (m_checkConfigCb) {
-                    m_checkConfigCb(config);
-                }
-                size_t pos = 0;
-                auto name = getline_from_buffer(itensor.data<const char>(), itensor.get_byte_size(), pos);
-                std::lock_guard<std::mutex> lock(mock_creation_mutex);
-                return create_mock_compiled_model(m_models[name], mockPlugin);
-            }));
-
         ON_CALL(plugin, import_model(A<const ov::Tensor&>(), _, _))
             .WillByDefault(
                 Invoke([&](const ov::Tensor& itensor, const ov::SoPtr<ov::IRemoteContext>&, const ov::AnyMap& config) {
@@ -490,10 +479,10 @@ TEST_P(CachingTest, TestLoad) {
     {
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(m_remoteContext ? 0 : 1);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
         for (auto& model : comp_models) {
             EXPECT_CALL(*model, export_model(_)).Times(0);  // No more 'export_model' for existing model
         }
@@ -535,10 +524,10 @@ TEST_P(CachingTest, TestLoad_by_device_name) {
     {
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(m_remoteContext ? 0 : 1);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
         for (auto& model : comp_models) {
             EXPECT_CALL(*model, export_model(_)).Times(0);  // No more 'export_model' for existing models
         }
@@ -552,7 +541,6 @@ TEST_P(CachingTest, TestLoad_by_device_name) {
 
 TEST_P(CachingTest, TestLoadCustomImportExport) {
     const char customData[] = {1, 2, 3, 4, 5};
-    size_t customDataSize = sizeof(customData);
 
     EXPECT_CALL(*mockPlugin, get_property(ov::supported_properties.name(), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, get_property(ov::device::capability::EXPORT_IMPORT, _)).Times(AnyNumber());
@@ -561,25 +549,24 @@ TEST_P(CachingTest, TestLoadCustomImportExport) {
     EXPECT_CALL(*mockPlugin, get_property(ov::internal::caching_properties.name(), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, get_property(ov::device::capabilities.name(), _)).Times(AnyNumber());
 
-    ON_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _))
-        .WillByDefault(Invoke([&](const ov::Tensor& s, const ov::SoPtr<ov::IRemoteContext>&, const ov::AnyMap&) {
+    ON_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _))
+        .WillByDefault(Invoke([&](std::istream& s, const ov::SoPtr<ov::IRemoteContext>&, const ov::AnyMap&) {
             char a[sizeof(customData)];
-            OPENVINO_ASSERT(customDataSize <= s.get_byte_size());
-            std::memcpy(a, s.data(), customDataSize);
-            EXPECT_EQ(memcmp(a, customData, customDataSize), 0);
-            auto name = getline_from_buffer(s.data<const char>(), s.get_byte_size(), customDataSize);
-
+            s.read(a, sizeof(customData));
+            EXPECT_EQ(memcmp(a, customData, sizeof(customData)), 0);
+            std::string name;
+            s >> name;
             std::lock_guard<std::mutex> lock(mock_creation_mutex);
             return create_mock_compiled_model(m_models[name], mockPlugin);
         }));
 
-    ON_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _))
-        .WillByDefault(Invoke([&](const ov::Tensor& s, const ov::AnyMap&) {
+    ON_CALL(*mockPlugin, import_model(A<std::istream&>(), _))
+        .WillByDefault(Invoke([&](std::istream& s, const ov::AnyMap&) {
             char a[sizeof(customData)];
-            OPENVINO_ASSERT(customDataSize <= s.get_byte_size());
-            std::memcpy(a, s.data(), customDataSize);
-            EXPECT_EQ(memcmp(a, customData, customDataSize), 0);
-            auto name = getline_from_buffer(s.data<const char>(), s.get_byte_size(), customDataSize);
+            s.read(a, sizeof(customData));
+            EXPECT_EQ(memcmp(a, customData, sizeof(customData)), 0);
+            std::string name;
+            s >> name;
             std::lock_guard<std::mutex> lock(mock_creation_mutex);
             return create_mock_compiled_model(m_models[name], mockPlugin);
         }));
@@ -611,10 +598,10 @@ TEST_P(CachingTest, TestLoadCustomImportExport) {
     {
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(!m_remoteContext ? 1 : 0);
         for (auto& model : comp_models) {
             EXPECT_CALL(*model, export_model(_)).Times(0);  // No 'export_model' for existing models
         }
@@ -710,10 +697,10 @@ TEST_P(CachingTest, TestChangeLoadConfig_With_Cache_Dir_inline) {
     {
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(!m_remoteContext ? 1 : 0);
         for (auto& model : comp_models) {
             EXPECT_CALL(*model, export_model(_)).Times(0);  // No more 'export_model' for existing models
         }
@@ -1292,10 +1279,10 @@ TEST_P(CachingTest, TestChangeCacheDirFailure) {
     {
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(m_remoteContext ? 0 : 1);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
         m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) {
             EXPECT_CALL(net, export_model(_)).Times(1);
         });
@@ -1374,10 +1361,10 @@ TEST_P(CachingTest, TestDeviceArchitecture) {
     {
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(!m_remoteContext ? 1 : 0);
         for (auto& net : comp_models) {
             EXPECT_CALL(*net, export_model(_)).Times(0);
         }
@@ -1408,10 +1395,10 @@ TEST_P(CachingTest, TestDeviceArchitecture) {
     {
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(!m_remoteContext ? 1 : 0);
         for (auto& net : comp_models) {
             EXPECT_CALL(*net, export_model(_)).Times(0);
         }
@@ -1465,10 +1452,10 @@ TEST_P(CachingTest, TestNoDeviceArchitecture) {
     {
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(!m_remoteContext ? 1 : 0);
         for (auto& net : comp_models) {
             EXPECT_CALL(*net, export_model(_)).Times(0);
         }
@@ -1568,14 +1555,14 @@ TEST_P(CachingTest, TestThrowOnImport) {
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(m_remoteContext ? 1 : 0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _))
             .Times(!m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
         if (m_remoteContext) {
-            EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(1).WillOnce(Throw(1));
-            EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
+            EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(1).WillOnce(Throw(1));
+            EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
         } else {
-            EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
-            EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(1).WillOnce(Throw(1));
+            EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
+            EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(1).WillOnce(Throw(1));
         }
         m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) {
             EXPECT_CALL(net, export_model(_)).Times(1);
@@ -1588,10 +1575,10 @@ TEST_P(CachingTest, TestThrowOnImport) {
     {  // Step 3: same load, cache is re-created on export on step 2 and shall be successfully imported now
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(!m_remoteContext ? 1 : 0);
         for (auto& net : comp_models) {
             EXPECT_CALL(*net, export_model(_)).Times(0);
         }
@@ -1656,10 +1643,10 @@ TEST_P(CachingTest, TestModelModified) {
     {  // Step 3: same load, should be ok now
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(!m_remoteContext ? 1 : 0);
         for (auto& net : comp_models) {
             EXPECT_CALL(*net, export_model(_)).Times(0);
         }
@@ -1721,10 +1708,10 @@ TEST_P(CachingTest, TestCacheFileCorrupted) {
     {  // Step 3: same load, should be ok now due to re-creation of cache
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(!m_remoteContext ? 1 : 0);
         for (auto& net : comp_models) {
             EXPECT_CALL(*net, export_model(_)).Times(0);
         }
@@ -1802,10 +1789,10 @@ TEST_P(CachingTest, TestCacheFileOldVersion) {
     {  // Step 3: same load, should be ok now due to re-creation of cache
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(!m_remoteContext ? 1 : 0);
         for (auto& net : comp_models) {
             EXPECT_CALL(*net, export_model(_)).Times(0);
         }
@@ -1901,10 +1888,10 @@ TEST_P(CachingTest, TestCacheFileWithCompiledModelRuntimeProperties) {
     {  // Step 3: same load, should be ok now due to re-creation of cache
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(m_remoteContext ? 1 : 0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(!m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(m_remoteContext ? 1 : 0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(!m_remoteContext ? 1 : 0);
         for (auto& net : comp_models) {
             EXPECT_CALL(*net, export_model(_)).Times(0);
         }
@@ -2198,7 +2185,7 @@ TEST_P(CachingTest, LoadAUTO_OneDevice) {
         deviceToLoad += ":mock.0";
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _))
             .Times(TEST_COUNT - index - 1);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(index);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(index);
         OV_ASSERT_NO_THROW(testLoad([&](ov::Core& core) {
             core.set_property(ov::cache_dir(cacheDir));
             m_testFunction(core);
@@ -2215,8 +2202,9 @@ TEST_P(CachingTest, LoadAUTOWithConfig) {
     EXPECT_CALL(*mockPlugin, query_model(_, _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, get_property(ov::internal::caching_properties.name(), _)).Times(AnyNumber());
     EXPECT_CALL(*mockPlugin, get_property(ov::device::architecture.name(), _)).Times(AnyNumber());
+    // skip the remote Context test for Auto plugin
     if (m_remoteContext) {
-        return;  // skip the remote Context test for Auto plugin
+        return;
     }
     int index = 0;
     m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) {
@@ -2229,7 +2217,7 @@ TEST_P(CachingTest, LoadAUTOWithConfig) {
         deviceToLoad += ":mock.0";
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _))
             .Times(TEST_COUNT - index - 1);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(index);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(index);
         OV_ASSERT_NO_THROW(testLoad([&](ov::Core& core) {
             m_testFunctionWithCfg(core, {{ov::cache_dir.name(), cacheDir}});
         }));
@@ -2305,9 +2293,9 @@ TEST_P(CachingTest, LoadMulti_race) {
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(1);
         EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(devCount - 1);
         EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(devCount - 1);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
         testLoad([&](ov::Core& core) {
             core.set_property(ov::cache_dir(cacheDir));
             OV_ASSERT_NO_THROW(m_testFunction(core));
@@ -2349,9 +2337,9 @@ TEST_P(CachingTest, LoadMultiWithConfig_race) {
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(1);
         EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(devCount - 1);
         EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(devCount - 1);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
         testLoad([&](ov::Core& core) {
             OV_ASSERT_NO_THROW(m_testFunctionWithCfg(core, {{ov::cache_dir.name(), cacheDir}}));
         });
@@ -2394,16 +2382,16 @@ TEST_P(CachingTest, LoadMulti_Archs) {
         EXPECT_CALL(*mockPlugin, OnCompileModelFromFile()).Times(0);
 
         EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _))
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _))
             .Times(TEST_DEVICE_MAX_COUNT / 2)
-            .WillRepeatedly(Invoke([&](const ov::Tensor& s, const ov::AnyMap&) {
-                size_t pos = 0;
-                auto name = getline_from_buffer(s.data<const char>(), s.get_byte_size(), pos);
+            .WillRepeatedly(Invoke([&](std::istream& s, const ov::AnyMap&) {
+                std::string name;
+                s >> name;
                 std::lock_guard<std::mutex> lock(mock_creation_mutex);
                 return create_mock_compiled_model(m_models[name], mockPlugin);
             }));
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
         m_post_mock_net_callbacks.emplace_back([&](MockICompiledModelImpl& net) {
             EXPECT_CALL(net, export_model(_)).Times(1);  // each net will be exported once
         });
@@ -2500,7 +2488,7 @@ TEST_P(CachingTest, LoadBATCHWithConfig) {
         deviceToLoad += ":mock.0";
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _))
             .Times(TEST_COUNT - index - 1);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(index);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(index);
         testLoad([&](ov::Core& core) {
             m_testFunctionWithCfg(core, {{ov::cache_dir.name(), cacheDir}});
         });
@@ -2531,9 +2519,9 @@ TEST_P(CachingTest, Load_threads) {
         EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
         EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(1);
         EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
+        EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(THREADS_COUNT - 1);
         EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
-        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(THREADS_COUNT - 1);
+        EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
         testLoad([&](ov::Core& core) {
             core.set_property({{ov::cache_dir.name(), cacheDir}});
             std::vector<std::thread> threads;
@@ -2626,9 +2614,9 @@ TEST_P(CachingTest, Load_mmap_is_disabled) {
     EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
     EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(1);
     EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-    EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
+    EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(1);
     EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
-    EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(1);
+    EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
     testLoad([&](ov::Core& core) {
         core.set_property({{ov::cache_dir.name(), m_cacheDir}});
         core.set_property({ov::enable_mmap(false)});
@@ -2665,9 +2653,9 @@ TEST_P(CachingTest, Load_mmap_is_not_supported_by_plugin) {
     EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
     EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(1);
     EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-    EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
+    EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(1);
     EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
-    EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(1);
+    EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
     testLoad([&](ov::Core& core) {
         core.set_property({{ov::cache_dir.name(), m_cacheDir}});
         core.set_property({ov::enable_mmap(true)});
@@ -2709,9 +2697,9 @@ TEST_P(CachingTest, Load_mmap_is_disabled_local_cfg) {
     EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
     EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(1);
     EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-    EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
+    EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(1);
     EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
-    EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(1);
+    EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
     testLoad([&](ov::Core& core) {
         const auto config = ov::AnyMap{{ov::cache_dir(m_cacheDir)}, {ov::enable_mmap(false)}};
         m_testFunctionWithCfg(core, config);
@@ -2744,12 +2732,13 @@ TEST_P(CachingTest, Load_mmap_is_not_supported_by_plugin_local_cfg) {
         EXPECT_CALL(net, export_model(_)).Times(1);
     });
     MkDirGuard guard(m_cacheDir);
+
     EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
     EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(1);
     EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-    EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
+    EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(1);
     EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
-    EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(1);
+    EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
     testLoad([&](ov::Core& core) {
         const auto config = ov::AnyMap{{ov::cache_dir(m_cacheDir)}, {ov::enable_mmap(false)}};
         m_testFunctionWithCfg(core, config);
@@ -2787,9 +2776,9 @@ TEST_P(CachingTest, import_from_cache_model_and_weights_path_properties_not_supp
     EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
     EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(1);
     EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-    EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
+    EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(1);
     EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
-    EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(1);
+    EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
     testLoad([&](ov::Core& core) {
         const auto config = ov::AnyMap{{ov::cache_dir(m_cacheDir)}};
         m_testFunctionWithCfg(core, config);
@@ -2837,9 +2826,9 @@ TEST_P(CachingTest, import_from_cache_model_and_weights_path_properties_are_supp
     EXPECT_CALL(*mockPlugin, compile_model(_, _, _)).Times(0);
     EXPECT_CALL(*mockPlugin, compile_model(A<const std::shared_ptr<const ov::Model>&>(), _)).Times(1);
     EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _, _)).Times(0);
-    EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(0);
+    EXPECT_CALL(*mockPlugin, import_model(A<std::istream&>(), _)).Times(1);
     EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _, _)).Times(0);
-    EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(1);
+    EXPECT_CALL(*mockPlugin, import_model(A<const ov::Tensor&>(), _)).Times(0);
     testLoad([&](ov::Core& core) {
         const auto config = ov::AnyMap{{ov::cache_dir(m_cacheDir)}};
         m_testFunctionWithCfg(core, config);
diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp
index 4bb1a79d2c64ae..aa60590beeaf9e 100644
--- a/src/plugins/intel_cpu/src/utils/serialize.cpp
+++ b/src/plugins/intel_cpu/src/utils/serialize.cpp
@@ -157,17 +157,17 @@ void ModelDeserializer::process_model(std::shared_ptr<ov::Model>& model,
     model_stream.seekg(hdr_pos, std::istream::beg);
 
     pass::StreamSerialize::DataHeader hdr = {};
-    model_stream.read(reinterpret_cast<char*>(&hdr), sizeof hdr);
+    model_stream.read(reinterpret_cast<char*>(&hdr), sizeof(hdr));
 
     // Check if model header contains valid data.
-    bool is_valid_model = (hdr.custom_data_offset == sizeof(hdr) + hdr_pos) &&
+    bool is_valid_model = (hdr.custom_data_offset == sizeof(hdr)) &&
                           (hdr.custom_data_size == hdr.consts_offset - hdr.custom_data_offset) &&
                           (hdr.consts_size == hdr.model_offset - hdr.consts_offset) &&
                           ((hdr.model_size = file_size - hdr.model_offset) != 0U);
     OPENVINO_ASSERT(is_valid_model, "[CPU] Could not deserialize by device xml header.");
 
     // read model input/output precisions
-    model_stream.seekg(hdr.custom_data_offset);
+    model_stream.seekg(hdr.custom_data_offset + hdr_pos);
 
     pugi::xml_document xmlInOutDoc;
     if (hdr.custom_data_size > 0) {
@@ -181,14 +181,14 @@ void ModelDeserializer::process_model(std::shared_ptr<ov::Model>& model,
 
     // read blob content
     auto data_blob = std::make_shared<ov::Tensor>(ov::element::u8, ov::Shape({hdr.consts_size}));
-    model_stream.seekg(hdr.consts_offset);
+    model_stream.seekg(hdr.consts_offset + hdr_pos);
     if (hdr.consts_size) {
         model_stream.read(static_cast<char*>(data_blob->data(ov::element::u8)), hdr.consts_size);
     }
 
     // read XML content
     auto xml_string = std::make_shared<std::string>();
-    model_stream.seekg(hdr.model_offset);
+    model_stream.seekg(hdr.model_offset + hdr_pos);
     xml_string->resize(hdr.model_size);
     model_stream.read(const_cast<char*>(xml_string->data()), hdr.model_size);
     if (m_cache_decrypt) {
diff --git a/src/plugins/template/src/plugin.cpp b/src/plugins/template/src/plugin.cpp
index 122845879436f7..b7edea78d83605 100644
--- a/src/plugins/template/src/plugin.cpp
+++ b/src/plugins/template/src/plugin.cpp
@@ -38,7 +38,7 @@ uint64_t get_blob_data_size(std::istream& model) {
 }
 
 std::string get_model_str(std::istream& model) {
-    const auto model_size = std::min<uint64_t>(model.rdbuf()->in_avail(), get_blob_data_size(model));
+    const auto model_size = get_blob_data_size(model);
     std::string xml;
     xml.resize(model_size);
     model.read(xml.data(), model_size);
@@ -52,7 +52,7 @@ ov::Tensor read_weights(std::istream& model, const size_t weights_size) {
 }
 
 ov::Tensor get_model_weights(std::istream& model) {
-    const auto weights_size = std::min<uint64_t>(model.rdbuf()->in_avail(), get_blob_data_size(model));
+    const auto weights_size = get_blob_data_size(model);
     return weights_size != 0 ? read_weights(model, weights_size) : ov::Tensor();
 }