openvinotoolkit · pereanub · Jun 17, 2025 · Jun 17, 2025 · Jun 18, 2025 · Jun 19, 2025
@@ -89,6 +89,7 @@ class ZeroInferRequest final : public SyncInferRequest {
     std::unique_ptr<Pipeline> _pipeline;
 
     bool _pipelineIsCreated = false;
+    bool _externalMemoryStandardAllocationSupported = false;
 };
 
 }  //  namespace intel_npu
@@ -12,6 +12,7 @@
 
 #include "intel_npu/utils/logger/logger.hpp"
 #include "intel_npu/utils/zero/zero_init.hpp"
+#include "openvino/runtime/itensor.hpp"
 
 namespace {
 
@@ -23,7 +24,7 @@ namespace intel_npu {
 namespace zeroMemory {
 
 // Create an allocator that uses the ov::Allocator signature that will be used to create the tensor.
-class HostMemAllocator final {
+class HostMemAllocator {
 public:
     explicit HostMemAllocator(const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
                               ze_host_mem_alloc_flag_t flag = {})
@@ -36,7 +37,7 @@ class HostMemAllocator final {
      * @param bytes The size in bytes to allocate
      * @return Handle to the allocated resource
      */
-    void* allocate(const size_t bytes, const size_t alignment = STANDARD_PAGE_SIZE) noexcept;
+    virtual void* allocate(const size_t bytes, const size_t alignment = STANDARD_PAGE_SIZE) noexcept;
 
     /**
      * @brief Releases handle and all associated memory resources which invalidates the handle.
@@ -47,7 +48,7 @@ class HostMemAllocator final {
 
     bool is_equal(const HostMemAllocator& other) const;
 
-private:
+protected:
     const std::shared_ptr<ZeroInitStructsHolder> _initStructs;
 
     Logger _logger;
@@ -56,5 +57,19 @@ class HostMemAllocator final {
     static const std::size_t _alignment = STANDARD_PAGE_SIZE;
 };
 
+class HostMemSharedAllocator final : public HostMemAllocator {
+public:
+    explicit HostMemSharedAllocator(const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
+                                    const std::shared_ptr<ov::ITensor>& tensor,
+                                    ze_host_mem_alloc_flag_t flag = {})
+        : HostMemAllocator(initStructs, flag),
+          _tensor(tensor) {}
+
+    void* allocate(const size_t bytes = 0, const size_t alignment = STANDARD_PAGE_SIZE) noexcept override;
+
+private:
+    const std::shared_ptr<ov::ITensor> _tensor;
+};
+
 }  // namespace zeroMemory
 }  // namespace intel_npu
@@ -4,6 +4,10 @@
 
 #include "zero_infer_request.hpp"
 
+#include <ze_mem_import_system_memory_ext.h>
+
+#include <cstdint>
+
 #include "intel_npu/common/itt.hpp"
 #include "intel_npu/config/options.hpp"
 #include "intel_npu/prefix.hpp"
@@ -62,6 +66,13 @@ void check_level_zero_attributes_match(const IODescriptor& ioDescriptor, const A
     }
 }
 
+bool memory_aligned_to_standard_page_size(void* addr) {
+    auto addr_int = reinterpret_cast<uintptr_t>(addr);
+
+    // addr is aligned to standard page size
+    return (addr_int & 0xFFF) == 0;
+}
+
 }  // namespace
 
 //------------------------------------------------------------------------------
@@ -79,6 +90,15 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
       _levelZeroOutputTensors(_metadata.outputs.size(), nullptr) {
     _logger.debug("ZeroInferRequest::ZeroInferRequest - SyncInferRequest");
 
+    ze_device_external_memory_properties_t desc = {};
+    desc.stype = ZE_STRUCTURE_TYPE_DEVICE_EXTERNAL_MEMORY_PROPERTIES;
+    auto res = zeDeviceGetExternalMemoryProperties(_initStructs->getDevice(), &desc);
+    if (res == ZE_RESULT_SUCCESS) {
+        if (desc.memoryAllocationImportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_STANDARD_ALLOCATION) {
+            _externalMemoryStandardAllocationSupported = true;
+        }
+    }
+
     _outputAllocator = std::make_shared<const zeroMemory::HostMemAllocator>(_initStructs);
     _inputAllocator =
         std::make_shared<const zeroMemory::HostMemAllocator>(_initStructs, ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED);
@@ -215,18 +235,33 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor>& tenso
         levelZeroTensors = tensor;
         updateCommandListArg = true;
     } else {
-        auto zeroTensor = std::dynamic_pointer_cast<ZeroTensor>(levelZeroTensors);
-        if (zeroTensor == nullptr || (zeroTensor != nullptr && zeroTensor->tensor_was_shared_with_user())) {
-            _logger.debug("ZeroInferRequest::set_tensor_data - create locally L0 tensor");
-            OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "allocate tensor");
+        if (_externalMemoryStandardAllocationSupported && memory_aligned_to_standard_page_size(tensor->data())) {
+            _logger.debug("ZeroInferRequest::set_tensor_data - import memory from a system memory pointer");
+            auto hostMemSharedAllocator = zeroMemory::HostMemSharedAllocator(_initStructs, tensor);
+            levelZeroTensors = std::make_shared<ZeroTensor>(_initStructs,
+                                                            _config,
+                                                            tensor->get_element_type(),
+                                                            tensor->get_shape(),
+                                                            hostMemSharedAllocator);
 
-            levelZeroTensors = allocate_tensor(isInput ? _metadata.inputs.at(index) : _metadata.outputs.at(index),
-                                               index,
-                                               isInput,
-                                               isInput ? *_inputAllocator : *_outputAllocator,
-                                               _graph->get_batch_size());
+            std::dynamic_pointer_cast<ZeroTensor>(levelZeroTensors)->set_tensor_shared_with_user();
 
             updateCommandListArg = true;
+        } else {
+            auto zeroTensor = std::dynamic_pointer_cast<ZeroTensor>(levelZeroTensors);
+
+            if (zeroTensor == nullptr || (zeroTensor != nullptr && zeroTensor->tensor_was_shared_with_user())) {
+                _logger.debug("ZeroInferRequest::set_tensor_data - create locally L0 tensor");
+                OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "allocate tensor");
+
+                levelZeroTensors = allocate_tensor(isInput ? _metadata.inputs.at(index) : _metadata.outputs.at(index),
+                                                   index,
+                                                   isInput,
+                                                   isInput ? *_inputAllocator : *_outputAllocator,
+                                                   _graph->get_batch_size());
+
+                updateCommandListArg = true;
+            }
         }
     }
 
@@ -516,6 +551,32 @@ void ZeroInferRequest::update_states_if_memory_changed() {
 
                 get_level_zero_input(zeroState->get_tensor_index()) = zeroState->get_state()._ptr;
                 _levelZeroOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_state()._ptr;
+            } else {
+                if (_externalMemoryStandardAllocationSupported &&
+                    memory_aligned_to_standard_page_size(zeroState->get_state()->data())) {
+                    auto hostMemSharedAllocator =
+                        zeroMemory::HostMemSharedAllocator(_initStructs, zeroState->get_state()._ptr);
+
+                    get_level_zero_input(zeroState->get_tensor_index()) =
+                        std::make_shared<ZeroTensor>(_initStructs,
+                                                     _config,
+                                                     zeroState->get_state()->get_element_type(),
+                                                     zeroState->get_state()->get_shape(),
+                                                     hostMemSharedAllocator);
+
+                    _levelZeroOutputTensors.at(zeroState->get_related_tensor_index()) =
+                        get_level_zero_input(zeroState->get_tensor_index());
+
+                    _pipeline->update_graph_arguments(
+                        _graphInputDescriptors.at(zeroState->get_tensor_index()).idx,
+                        _levelZeroOutputTensors.at(zeroState->get_related_tensor_index())->data(),
+                        _levelZeroOutputTensors.at(zeroState->get_related_tensor_index())->get_byte_size());
+
+                    _pipeline->update_graph_arguments(
+                        _graphOutputDescriptors.at(zeroState->get_related_tensor_index()).idx,
+                        _levelZeroOutputTensors.at(zeroState->get_related_tensor_index())->data(),
+                        _levelZeroOutputTensors.at(zeroState->get_related_tensor_index())->get_byte_size());
+                }
             }
         }
     }
@@ -616,15 +677,16 @@ void ZeroInferRequest::infer_async() {
         const auto& levelZeroTensor = get_level_zero_input(inputIndex);
         if (!is_remote_tensor(levelZeroTensor)) {
             void* levelZeroBuffer = levelZeroTensor->data();
+            if (userBuffer == nullptr || levelZeroBuffer == nullptr) {
+                OPENVINO_THROW("Empty buffer");
+            }
 
-            if (userBuffer != levelZeroBuffer) {
-                if (userBuffer == nullptr || levelZeroBuffer == nullptr) {
-                    OPENVINO_THROW("Empty buffer");
+            if (!_externalMemoryStandardAllocationSupported || !memory_aligned_to_standard_page_size(userBuffer)) {
+                if (userBuffer != levelZeroBuffer) {
+                    _logger.info("Tensor is not allocated in the current Level Zero context");
+                    OV_ITT_TASK_NEXT(ZERO_INFER, "memcpy");
+                    std::memcpy(levelZeroBuffer, userBuffer, userTensor.at(SINGLE_TENSOR)->get_byte_size());
                 }
-
-                _logger.info("Tensor is not allocated in the current Level Zero context");
-                OV_ITT_TASK_NEXT(ZERO_INFER, "memcpy");
-                std::memcpy(levelZeroBuffer, userBuffer, userTensor.at(SINGLE_TENSOR)->get_byte_size());
             }
         }
 
@@ -665,15 +727,16 @@ void ZeroInferRequest::get_result() {
         const std::shared_ptr<ov::ITensor>& levelZeroTensor = _levelZeroOutputTensors.at(outputIndex);
         if (!is_remote_tensor(levelZeroTensor)) {
             void* levelZeroBuffer = levelZeroTensor->data();
+            if (userBuffer == nullptr || levelZeroBuffer == nullptr) {
+                OPENVINO_THROW("Empty buffer");
+            }
 
-            if (userBuffer != levelZeroBuffer) {
-                if (userBuffer == nullptr || levelZeroBuffer == nullptr) {
-                    OPENVINO_THROW("Empty buffer");
+            if (!_externalMemoryStandardAllocationSupported || !memory_aligned_to_standard_page_size(userBuffer)) {
+                if (userBuffer != levelZeroBuffer) {
+                    _logger.info("Tensor is not allocated in the current Level Zero context");
+                    OV_ITT_TASK_NEXT(ZERO_RESULT, "memcpy");
+                    std::memcpy(userBuffer, levelZeroBuffer, userTensor->get_byte_size());
                 }
-
-                _logger.info("Tensor is not allocated in the current Level Zero context");
-                OV_ITT_TASK_NEXT(ZERO_RESULT, "memcpy");
-                std::memcpy(userBuffer, levelZeroBuffer, userTensor->get_byte_size());
             }
         }
 

@@ -4,6 +4,8 @@
 
 #include "zero_memory.hpp"
 
+#include <ze_mem_import_system_memory_ext.h>
+
 #include "intel_npu/utils/zero/zero_api.hpp"
 #include "intel_npu/utils/zero/zero_result.hpp"
 #include "intel_npu/utils/zero/zero_utils.hpp"
@@ -18,7 +20,7 @@ void* HostMemAllocator::allocate(const size_t bytes, const size_t /*alignment*/)
                                      nullptr,
                                      static_cast<ze_host_mem_alloc_flags_t>(_flag)};
     void* data = nullptr;
-    ze_result_t result = zeMemAllocHost(_initStructs->getContext(), &desc, size, _alignment, &data);
+    auto result = zeMemAllocHost(_initStructs->getContext(), &desc, size, _alignment, &data);
 
     if (result == ZE_RESULT_SUCCESS) {
         return data;
@@ -46,5 +48,31 @@ bool HostMemAllocator::is_equal(const HostMemAllocator& other) const {
     return (_initStructs == other._initStructs) && (_flag == other._flag);
 }
 
+void* HostMemSharedAllocator::allocate(const size_t /*bytes*/, const size_t /*alignment*/) noexcept {
+    size_t size = (_tensor->get_byte_size() + _alignment - 1) & ~(_alignment - 1);
+
+    _ze_external_memory_import_system_memory_t memory_import = {ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMPORT_SYSTEM_MEMORY,
+                                                                nullptr,
+                                                                _tensor->data(),
+                                                                size};
+
+    void* data = nullptr;
+
+    ze_host_mem_alloc_desc_t desc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC,
+                                     &memory_import,
+                                     static_cast<ze_host_mem_alloc_flags_t>(_flag)};
+    auto result = zeMemAllocHost(_initStructs->getContext(), &desc, size, _alignment, &data);
+
+    if (result == ZE_RESULT_SUCCESS) {
+        return data;
+    } else {
+        _logger.error("L0 zeMemAllocHost result: %s, code %#X - %s",
+                      ze_result_to_string(result).c_str(),
+                      uint64_t(result),
+                      ze_result_to_description(result).c_str());
+        return nullptr;
+    }
+}
+
 }  // namespace zeroMemory
 }  // namespace intel_npu
@@ -37,6 +37,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest,
                                             ::testing::ValuesIn(configsInferRequestRunTests)),
                          InferRequestRunTests::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest,
+                         CpuVaTensorsTests,
+                         ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU),
+                                            ::testing::ValuesIn(configsInferRequestRunTests)),
+                         InferRequestRunTests::getTestCaseName);
+
 const std::vector<ov::AnyMap> batchingConfigs = {
     {ov::log::level(ov::log::Level::WARNING), ov::intel_npu::batch_mode(ov::intel_npu::BatchMode::PLUGIN)},
     {ov::log::level(ov::log::Level::WARNING), ov::intel_npu::batch_mode(ov::intel_npu::BatchMode::COMPILER)},