diff --git a/sycl/test-e2e/AsyncAlloc/device/memory_pool.cpp b/sycl/test-e2e/AsyncAlloc/device/memory_pool.cpp index ef589722b74a8..0717e145014e1 100644 --- a/sycl/test-e2e/AsyncAlloc/device/memory_pool.cpp +++ b/sycl/test-e2e/AsyncAlloc/device/memory_pool.cpp @@ -1,8 +1,8 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// XFAIL: level_zero -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/17772 +// UNSUPPORTED: level_zero_v2_adapter +// UNSUPPORTED-INTENDED: v2 adapter does not support pool statistics. #include #include diff --git a/unified-runtime/source/adapters/level_zero/async_alloc.cpp b/unified-runtime/source/adapters/level_zero/async_alloc.cpp index 67aa0dcfccef6..4233114ec696c 100644 --- a/unified-runtime/source/adapters/level_zero/async_alloc.cpp +++ b/unified-runtime/source/adapters/level_zero/async_alloc.cpp @@ -28,11 +28,11 @@ static ur_result_t enqueueUSMAllocHelper( std::scoped_lock lock(Queue->Mutex); // Allocate USM memory - ur_usm_pool_handle_t USMPool = nullptr; + ur_usm_pool_handle_t UrPool = nullptr; if (Pool) { - USMPool = Pool; + UrPool = Pool; } else { - USMPool = &Queue->Context->AsyncPool; + UrPool = &Queue->Context->AsyncPool; } auto Device = (Type == UR_USM_TYPE_HOST) ? nullptr : Queue->Device; @@ -40,10 +40,10 @@ static ur_result_t enqueueUSMAllocHelper( std::vector ExtEventWaitList; ur_event_handle_t OriginAllocEvent = nullptr; auto AsyncAlloc = - USMPool->allocateEnqueued(Queue, Device, nullptr, Type, Size); + UrPool->allocateEnqueued(Queue, Device, nullptr, Type, Size); if (!AsyncAlloc) { auto Ret = - USMPool->allocate(Queue->Context, Device, nullptr, Type, Size, RetMem); + UrPool->allocate(Queue->Context, Device, nullptr, Type, Size, RetMem); if (Ret) { return Ret; } @@ -235,26 +235,26 @@ ur_result_t urEnqueueUSMFreeExp( (ZeCommandList, WaitList.Length, WaitList.ZeEventList)); } - umf_memory_pool_handle_t hPool = nullptr; - auto umfRet = umfPoolByPtr(Mem, &hPool); - if (umfRet != UMF_RESULT_SUCCESS || !hPool) { + umf_memory_pool_handle_t UmfPool = nullptr; + auto UmfRet = umfPoolByPtr(Mem, &UmfPool); + if (UmfRet != UMF_RESULT_SUCCESS || !UmfPool) { return USMFreeHelper(Queue->Context, Mem); } - UsmPool *usmPool = nullptr; - umfRet = umfPoolGetTag(hPool, (void **)&usmPool); - if (umfRet != UMF_RESULT_SUCCESS || usmPool == nullptr) { + UsmPool *UsmPool = nullptr; + UmfRet = umfPoolGetTag(UmfPool, (void **)&UsmPool); + if (UmfRet != UMF_RESULT_SUCCESS || UsmPool == nullptr) { return USMFreeHelper(Queue->Context, Mem); } - size_t size = 0; - umfRet = umfPoolMallocUsableSize(hPool, Mem, &size); - if (umfRet != UMF_RESULT_SUCCESS) { + size_t Size = 0; + UmfRet = umfPoolMallocUsableSize(UmfPool, Mem, &Size); + if (UmfRet != UMF_RESULT_SUCCESS) { return USMFreeHelper(Queue->Context, Mem); } (*Event)->RefCount.retain(); - usmPool->AsyncPool.insert(Mem, size, *Event, Queue); + UsmPool->AsyncPool.insert(Mem, Size, *Event, Queue); // Signal that USM free event was finished ZE2UR_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent)); diff --git a/unified-runtime/source/adapters/level_zero/enqueued_pool.cpp b/unified-runtime/source/adapters/level_zero/enqueued_pool.cpp index 1b6da1cb8cb0b..0df60df92f7c4 100644 --- a/unified-runtime/source/adapters/level_zero/enqueued_pool.cpp +++ b/unified-runtime/source/adapters/level_zero/enqueued_pool.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "enqueued_pool.hpp" +#include "usm.hpp" #include @@ -58,17 +59,13 @@ bool EnqueuedPool::cleanup() { auto Lock = std::lock_guard(Mutex); auto FreedAllocations = !Freelist.empty(); - auto umfRet [[maybe_unused]] = UMF_RESULT_SUCCESS; + auto Ret [[maybe_unused]] = UR_RESULT_SUCCESS; for (auto It : Freelist) { - umf_memory_pool_handle_t hPool = nullptr; - umfRet = umfPoolByPtr(It.Ptr, &hPool); - assert(hPool != nullptr); - - umfRet = umfPoolFree(hPool, It.Ptr); - assert(umfRet == UMF_RESULT_SUCCESS); + Ret = MemFreeFn(It.Ptr); + assert(Ret == UR_RESULT_SUCCESS); if (It.Event) - eventRelease(It.Event); + EventReleaseFn(It.Event); } Freelist.clear(); @@ -84,17 +81,13 @@ bool EnqueuedPool::cleanupForQueue(void *Queue) { bool FreedAllocations = false; - auto umfRet [[maybe_unused]] = UMF_RESULT_SUCCESS; + auto Ret [[maybe_unused]] = UR_RESULT_SUCCESS; while (It != Freelist.end() && It->Queue == Queue) { - umf_memory_pool_handle_t hPool = nullptr; - umfRet = umfPoolByPtr(It->Ptr, &hPool); - assert(hPool != nullptr); - - umfRet = umfPoolFree(hPool, It->Ptr); - assert(umfRet == UMF_RESULT_SUCCESS); + Ret = MemFreeFn(It->Ptr); + assert(Ret == UR_RESULT_SUCCESS); if (It->Event) - eventRelease(It->Event); + EventReleaseFn(It->Event); // Erase the current allocation and move to the next one It = Freelist.erase(It); diff --git a/unified-runtime/source/adapters/level_zero/enqueued_pool.hpp b/unified-runtime/source/adapters/level_zero/enqueued_pool.hpp index c7d19eb5e0b34..66577b7652db2 100644 --- a/unified-runtime/source/adapters/level_zero/enqueued_pool.hpp +++ b/unified-runtime/source/adapters/level_zero/enqueued_pool.hpp @@ -30,9 +30,11 @@ class EnqueuedPool { }; using event_release_callback_t = ur_result_t (*)(ur_event_handle_t); + using memory_free_callback_t = std::function; - EnqueuedPool(event_release_callback_t eventRelease) - : eventRelease(eventRelease) {} + EnqueuedPool(event_release_callback_t EventReleaseFn, + memory_free_callback_t MemFreeFn) + : EventReleaseFn(EventReleaseFn), MemFreeFn(MemFreeFn) {} ~EnqueuedPool(); std::optional getBestFit(size_t Size, size_t Alignment, @@ -60,5 +62,6 @@ class EnqueuedPool { using AllocationSet = std::set; ur_mutex Mutex; AllocationSet Freelist; - event_release_callback_t eventRelease; + event_release_callback_t EventReleaseFn; + memory_free_callback_t MemFreeFn; }; diff --git a/unified-runtime/source/adapters/level_zero/usm.cpp b/unified-runtime/source/adapters/level_zero/usm.cpp index 3aa86fd429824..0600c8765bea0 100644 --- a/unified-runtime/source/adapters/level_zero/usm.cpp +++ b/unified-runtime/source/adapters/level_zero/usm.cpp @@ -12,21 +12,21 @@ #include #include #include +#include #include "context.hpp" #include "event.hpp" +#include "logger/ur_logger.hpp" #include "queue.hpp" -#include "umf/base.h" -#include "umf/memory_pool.h" #include "ur_api.h" -#include "usm.hpp" - -#include "logger/ur_logger.hpp" #include "ur_interface_loader.hpp" #include "ur_level_zero.hpp" #include "ur_util.hpp" +#include "usm.hpp" -#include +#include +#include +#include #include namespace umf { @@ -309,15 +309,15 @@ ur_result_t urUSMHostAlloc( size_t Size, /// [out] pointer to USM host memory object void **RetMem) { - ur_usm_pool_handle_t USMPool = nullptr; + ur_usm_pool_handle_t UrPool = nullptr; if (Pool) { - USMPool = Pool; + UrPool = Pool; } else { - USMPool = &Context->DefaultPool; + UrPool = &Context->DefaultPool; } - return USMPool->allocate(Context, nullptr, USMDesc, UR_USM_TYPE_HOST, Size, - RetMem); + return UrPool->allocate(Context, nullptr, USMDesc, UR_USM_TYPE_HOST, Size, + RetMem); } ur_result_t urUSMDeviceAlloc( @@ -334,15 +334,15 @@ ur_result_t urUSMDeviceAlloc( /// [out] pointer to USM device memory object void **RetMem) { - ur_usm_pool_handle_t USMPool = nullptr; + ur_usm_pool_handle_t UrPool = nullptr; if (Pool) { - USMPool = Pool; + UrPool = Pool; } else { - USMPool = &Context->DefaultPool; + UrPool = &Context->DefaultPool; } - return USMPool->allocate(Context, Device, USMDesc, UR_USM_TYPE_DEVICE, Size, - RetMem); + return UrPool->allocate(Context, Device, USMDesc, UR_USM_TYPE_DEVICE, Size, + RetMem); } ur_result_t urUSMSharedAlloc( @@ -358,15 +358,15 @@ ur_result_t urUSMSharedAlloc( size_t Size, /// [out] pointer to USM shared memory object void **RetMem) { - ur_usm_pool_handle_t USMPool = nullptr; + ur_usm_pool_handle_t UrPool = nullptr; if (Pool) { - USMPool = Pool; + UrPool = Pool; } else { - USMPool = &Context->DefaultPool; + UrPool = &Context->DefaultPool; } - return USMPool->allocate(Context, Device, USMDesc, UR_USM_TYPE_SHARED, Size, - RetMem); + return UrPool->allocate(Context, Device, USMDesc, UR_USM_TYPE_SHARED, Size, + RetMem); } ur_result_t @@ -444,16 +444,16 @@ ur_result_t urUSMGetMemAllocInfo( return ReturnValue(Size); } case UR_USM_ALLOC_INFO_POOL: { - umf_memory_pool_handle_t UMFPool = nullptr; - auto umfRet = umfPoolByPtr(Ptr, &UMFPool); - if (umfRet != UMF_RESULT_SUCCESS || !UMFPool) { + umf_memory_pool_handle_t UmfPool = nullptr; + auto UmfRet = umfPoolByPtr(Ptr, &UmfPool); + if (UmfRet != UMF_RESULT_SUCCESS || !UmfPool) { return UR_RESULT_ERROR_INVALID_VALUE; } std::shared_lock ContextLock(Context->Mutex); for (auto &Pool : Context->UsmPoolHandles) { - if (Pool->hasPool(UMFPool)) { + if (Pool->hasPool(UmfPool)) { return ReturnValue(Pool); } } @@ -609,7 +609,7 @@ ur_result_t UR_APICALL urUSMPoolDestroyExp(ur_context_handle_t /*Context*/, ur_result_t UR_APICALL urUSMPoolSetInfoExp(ur_usm_pool_handle_t, ur_usm_pool_info_t, void *, size_t) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + return UR_RESULT_SUCCESS; } ur_result_t UR_APICALL urUSMPoolGetDefaultDevicePoolExp( @@ -623,10 +623,42 @@ ur_result_t UR_APICALL urUSMPoolGetDefaultDevicePoolExp( return UR_RESULT_SUCCESS; } -ur_result_t UR_APICALL urUSMPoolGetInfoExp(ur_usm_pool_handle_t, - ur_usm_pool_info_t, void *, - size_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +ur_result_t UR_APICALL urUSMPoolGetInfoExp(ur_usm_pool_handle_t hPool, + ur_usm_pool_info_t PropName, + void *PropValue, + size_t *PropSizeRet) { + size_t value = 0; + switch (PropName) { + case UR_USM_POOL_INFO_RELEASE_THRESHOLD_EXP: + // Current pool implementation ignores threshold. + value = 0; + break; + case UR_USM_POOL_INFO_RESERVED_CURRENT_EXP: + value = hPool->getTotalReservedSize(); + break; + case UR_USM_POOL_INFO_USED_CURRENT_EXP: + value = hPool->getTotalUsedSize(); + break; + case UR_USM_POOL_INFO_RESERVED_HIGH_EXP: + value = hPool->getPeakReservedSize(); + break; + case UR_USM_POOL_INFO_USED_HIGH_EXP: + value = hPool->getPeakUsedSize(); + break; + default: + // Unknown enumerator + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + + if (PropValue) { + *(size_t *)PropValue = value; + } + + if (PropSizeRet) { + *(size_t *)PropSizeRet = sizeof(size_t); + } + + return UR_RESULT_SUCCESS; } ur_result_t UR_APICALL urUSMPoolGetDevicePoolExp(ur_context_handle_t, @@ -699,6 +731,9 @@ enum umf_result_t L0MemoryProvider::alloc(size_t Size, size_t Align, return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; } + // TODO: Align the size? free doesn't accept alignment param. + AllocStats.update(AllocationStats::INCREASE, Size); + return UMF_RESULT_SUCCESS; } @@ -711,6 +746,8 @@ enum umf_result_t L0MemoryProvider::free(void *Ptr, size_t Size) { return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; } + AllocStats.update(AllocationStats::DECREASE, Size); + return UMF_RESULT_SUCCESS; } @@ -866,6 +903,33 @@ umf_result_t L0MemoryProvider::ext_close_ipc_handle(void *Ptr, return UMF_RESULT_SUCCESS; } +umf_result_t L0MemoryProvider::ext_ctl(int, const char *Name, void *Arg, + size_t Size, umf_ctl_query_type_t) { + if (std::string(Name) == "stats.allocated_memory") { + if (!Arg && Size < sizeof(size_t)) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + *(reinterpret_cast(Arg)) = AllocStats.getCurrent(); + UR_LOG(DEBUG, "L0MemoryProvider::ext_ctl with name: {}, value: {}", Name, + AllocStats.getCurrent()); + } else if (std::string(Name) == "stats.peak_memory") { + if (!Arg && Size < sizeof(size_t)) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // Return the peak memory size. + *(reinterpret_cast(Arg)) = AllocStats.getPeak(); + UR_LOG(DEBUG, "L0MemoryProvider::ext_ctl with name: {}, value: {}", Name, + AllocStats.getPeak()); + } else { + UR_LOG(ERR, "L0MemoryProvider::ext_ctl with unknown name: {}", Name); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + return UMF_RESULT_SUCCESS; +} + ur_result_t L0SharedMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, uint32_t Alignment) { return USMSharedAllocImpl(ResultPtr, Context, Device, /*host flags*/ 0, @@ -956,6 +1020,14 @@ MakeProvider(ProviderParams *Params = nullptr) { return nullptr; } +UsmPool::UsmPool(ur_usm_pool_handle_t UrPool, umf::pool_unique_handle_t UmfPool) + : UrPool(UrPool), UmfPool(std::move(UmfPool)), + AsyncPool( + [](ur_event_handle_t Event) { return urEventReleaseInternal(Event); }, + [Context = UrPool->Context](void *Ptr) { + return USMFreeHelper(Context, Ptr); + }) {} + ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc, bool IsProxy) @@ -983,7 +1055,7 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, } std::unique_ptr usmPool = - std::make_unique(std::move(Pool)); + std::make_unique(this, std::move(Pool)); auto Ret = umf::umf2urResult( umfPoolSetTag(usmPool->UmfPool.get(), usmPool.get(), nullptr)); if (Ret) { @@ -1043,7 +1115,7 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, DisjointPoolConfigs.Configs[DescToDisjointPoolMemType(Desc)]; std::unique_ptr usmPool = std::make_unique( - usm::makeDisjointPool(MakeProvider(&Desc), PoolConfig)); + this, usm::makeDisjointPool(MakeProvider(&Desc), PoolConfig)); auto Ret = umf::umf2urResult( umfPoolSetTag(usmPool->UmfPool.get(), usmPool.get(), nullptr)); if (Ret) { @@ -1154,20 +1226,20 @@ ur_result_t ur_usm_pool_handle_t_::allocate(ur_context_handle_t Context, if (!Pool) { return UR_RESULT_ERROR_INVALID_ARGUMENT; } - auto umfPool = Pool->UmfPool.get(); + auto UmfPool = Pool->UmfPool.get(); - *RetMem = umfPoolAlignedMalloc(umfPool, Size, Alignment); + *RetMem = umfPoolAlignedMalloc(UmfPool, Size, Alignment); if (*RetMem == nullptr) { if (Pool->AsyncPool.cleanup()) { // true means that objects were deallocated // let's try again - *RetMem = umfPoolAlignedMalloc(umfPool, Size, Alignment); + *RetMem = umfPoolAlignedMalloc(UmfPool, Size, Alignment); } if (*RetMem == nullptr) { - auto umfRet = umfPoolGetLastAllocationError(umfPool); + auto UmfRet = umfPoolGetLastAllocationError(UmfPool); UR_LOG(ERR, "enqueueUSMAllocHelper: allocation from the UMF pool {} failed", - umfPool); - return umf::umf2urResult(umfRet); + UmfPool); + return umf::umf2urResult(UmfRet); } } @@ -1178,6 +1250,34 @@ ur_result_t ur_usm_pool_handle_t_::allocate(ur_context_handle_t Context, std::forward_as_tuple(Context)); } + size_t UsableSize = 0; + auto UmfRet = umfPoolMallocUsableSize(UmfPool, *RetMem, &UsableSize); + if (UmfRet != UMF_RESULT_SUCCESS && + UmfRet != UMF_RESULT_ERROR_NOT_SUPPORTED) { + return umf::umf2urResult(UmfRet); + } + + AllocStats.update(AllocationStats::UpdateType::INCREASE, UsableSize); + + return UR_RESULT_SUCCESS; +} + +ur_result_t ur_usm_pool_handle_t_::free(void *Mem, + umf_memory_pool_handle_t UmfPool) { + size_t Size = 0; + auto UmfRet = umfPoolMallocUsableSize(UmfPool, Mem, &Size); + if (UmfRet != UMF_RESULT_SUCCESS && + UmfRet != UMF_RESULT_ERROR_NOT_SUPPORTED) { + return umf::umf2urResult(UmfRet); + } + + UmfRet = umfPoolFree(UmfPool, Mem); + if (UmfRet != UMF_RESULT_SUCCESS) { + return umf::umf2urResult(UmfRet); + } + + AllocStats.update(AllocationStats::UpdateType::DECREASE, Size); + return UR_RESULT_SUCCESS; } @@ -1208,6 +1308,60 @@ void ur_usm_pool_handle_t_::cleanupPoolsForQueue(ur_queue_handle_t Queue) { }); } +size_t ur_usm_pool_handle_t_::getTotalReservedSize() { + size_t totalAllocatedSize = 0; + umf_result_t UmfRet = UMF_RESULT_SUCCESS; + PoolManager.forEachPool([&](UsmPool *p) { + umf_memory_provider_handle_t hProvider = nullptr; + size_t allocatedSize = 0; + UmfRet = umfPoolGetMemoryProvider(p->UmfPool.get(), &hProvider); + if (UmfRet != UMF_RESULT_SUCCESS) { + return false; + } + + UmfRet = umfCtlGet("umf.provider.by_handle.stats.allocated_memory", + hProvider, &allocatedSize, sizeof(allocatedSize)); + if (UmfRet != UMF_RESULT_SUCCESS) { + return false; + } + + totalAllocatedSize += allocatedSize; + return true; + }); + + return UmfRet == UMF_RESULT_SUCCESS ? totalAllocatedSize : 0; +} + +size_t ur_usm_pool_handle_t_::getPeakReservedSize() { + size_t totalAllocatedSize = 0; + umf_result_t Ret = UMF_RESULT_SUCCESS; + PoolManager.forEachPool([&](UsmPool *p) { + umf_memory_provider_handle_t hProvider = nullptr; + size_t allocatedSize = 0; + Ret = umfPoolGetMemoryProvider(p->UmfPool.get(), &hProvider); + if (Ret != UMF_RESULT_SUCCESS) { + return false; + } + + Ret = umfCtlGet("umf.provider.by_handle.stats.peak_memory", hProvider, + &allocatedSize, sizeof(allocatedSize)); + if (Ret != UMF_RESULT_SUCCESS) { + return false; + } + + totalAllocatedSize += allocatedSize; + return true; + }); + + return Ret == UMF_RESULT_SUCCESS ? totalAllocatedSize : 0; +} + +size_t ur_usm_pool_handle_t_::getTotalUsedSize() { + return AllocStats.getCurrent(); +} + +size_t ur_usm_pool_handle_t_::getPeakUsedSize() { return AllocStats.getPeak(); } + bool ur_usm_pool_handle_t_::hasPool(const umf_memory_pool_handle_t Pool) { return getPoolByHandle(Pool) != nullptr; } @@ -1282,16 +1436,25 @@ ur_result_t USMFreeHelper(ur_context_handle_t Context, void *Ptr, Context->MemAllocs.erase(It); } - umf_memory_pool_handle_t hPool = nullptr; - auto umfRet = umfPoolByPtr(Ptr, &hPool); - if (umfRet != UMF_RESULT_SUCCESS || !hPool) { + umf_memory_pool_handle_t UmfPool = nullptr; + auto UmfRet = umfPoolByPtr(Ptr, &UmfPool); + if (UmfRet != UMF_RESULT_SUCCESS || !UmfPool) { if (IndirectAccessTrackingEnabled) UR_CALL(ContextReleaseHelper(Context)); return UR_RESULT_ERROR_INVALID_MEM_OBJECT; } - umfRet = umfPoolFree(hPool, Ptr); - if (IndirectAccessTrackingEnabled) + UsmPool *UsmPool = nullptr; + UmfRet = umfPoolGetTag(UmfPool, (void **)&UsmPool); + if (UmfRet != UMF_RESULT_SUCCESS || !UsmPool) { + // This should never happen + UR_LOG(ERR, "enqueueUSMFreeExp: invalid pool tag"); + return UR_RESULT_ERROR_UNKNOWN; + } + + auto Ret = UsmPool->UrPool->free(Ptr, UmfPool); + if (IndirectAccessTrackingEnabled) { UR_CALL(ContextReleaseHelper(Context)); - return umf::umf2urResult(umfRet); + } + return Ret; } diff --git a/unified-runtime/source/adapters/level_zero/usm.hpp b/unified-runtime/source/adapters/level_zero/usm.hpp index e0e59e72f5fb3..7e14b4d6527a6 100644 --- a/unified-runtime/source/adapters/level_zero/usm.hpp +++ b/unified-runtime/source/adapters/level_zero/usm.hpp @@ -17,21 +17,49 @@ #include "event.hpp" #include "ur_api.h" #include "ur_pool_manager.hpp" +#include "usm.hpp" #include usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig(); struct UsmPool { - UsmPool(umf::pool_unique_handle_t Pool) - : UmfPool(std::move(Pool)), AsyncPool([](ur_event_handle_t Event) { - return urEventReleaseInternal(Event); - }) {} + UsmPool(ur_usm_pool_handle_t UrPool, umf::pool_unique_handle_t UmfPool); + // Parent pool. + ur_usm_pool_handle_t UrPool; umf::pool_unique_handle_t UmfPool; // 'AsyncPool' needs to be declared after 'UmfPool' so its destructor is // invoked first. EnqueuedPool AsyncPool; }; +struct AllocationStats { +public: + enum UpdateType { + INCREASE, + DECREASE, + }; + + void update(UpdateType Type, size_t Size) { + if (Type == INCREASE) { + AllocatedMemorySize += Size; + size_t Current = AllocatedMemorySize.load(); + size_t Peak = PeakAllocatedMemorySize.load(); + if (Peak < Current) { + PeakAllocatedMemorySize.store(Current); + } + } else if (Type == DECREASE) { + AllocatedMemorySize -= Size; + } + } + + size_t getCurrent() { return AllocatedMemorySize.load(); } + size_t getPeak() { return PeakAllocatedMemorySize.load(); } + +private: + std::atomic_size_t AllocatedMemorySize{0}; + std::atomic_size_t PeakAllocatedMemorySize{0}; +}; + struct ur_usm_pool_handle_t_ : ur_object { ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc, bool IsProxy = false); @@ -41,6 +69,7 @@ struct ur_usm_pool_handle_t_ : ur_object { ur_result_t allocate(ur_context_handle_t Context, ur_device_handle_t Device, const ur_usm_desc_t *USMDesc, ur_usm_type_t Type, size_t Size, void **RetMem); + ur_result_t free(void *Mem, umf_memory_pool_handle_t hPool); std::optional> allocateEnqueued(ur_queue_handle_t Queue, ur_device_handle_t Device, @@ -51,14 +80,18 @@ struct ur_usm_pool_handle_t_ : ur_object { UsmPool *getPoolByHandle(const umf_memory_pool_handle_t Pool); void cleanupPools(); void cleanupPoolsForQueue(ur_queue_handle_t Queue); + size_t getTotalReservedSize(); + size_t getPeakReservedSize(); + size_t getTotalUsedSize(); + size_t getPeakUsedSize(); ur_context_handle_t Context; - ur::RefCount RefCount; private: UsmPool *getPool(const usm::pool_descriptor &Desc); usm::pool_manager PoolManager; + AllocationStats AllocStats; }; // Exception type to pass allocation errors @@ -138,6 +171,7 @@ class L0MemoryProvider : public USMMemoryProviderBase { umf_result_t GetL0MinPageSize(const void *Mem, size_t *PageSize); size_t MinPageSize = 0; bool MinPageSizeCached = false; + AllocationStats AllocStats; public: umf_result_t initialize(ur_context_handle_t Ctx, @@ -159,6 +193,8 @@ class L0MemoryProvider : public USMMemoryProviderBase { umf_result_t ext_put_ipc_handle(void *) override; umf_result_t ext_open_ipc_handle(void *, void **) override; umf_result_t ext_close_ipc_handle(void *, size_t) override; + umf_result_t ext_ctl(int, const char *, void *, size_t, + umf_ctl_query_type_t) override; }; // Allocation routines for shared memory type diff --git a/unified-runtime/source/adapters/level_zero/v2/usm.cpp b/unified-runtime/source/adapters/level_zero/v2/usm.cpp index c30b23ee35282..610aec17aa86e 100644 --- a/unified-runtime/source/adapters/level_zero/v2/usm.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/usm.cpp @@ -176,10 +176,10 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t hContext, auto &poolConfig = disjointPoolConfigs.value().Configs[descToDisjoinPoolMemType(desc)]; auto pool = usm::makeDisjointPool(makeProvider(desc), poolConfig); - usmPool = std::make_unique(std::move(pool)); + usmPool = std::make_unique(this, std::move(pool)); } else { auto pool = usm::makeProxyPool(makeProvider(desc)); - usmPool = std::make_unique(std::move(pool)); + usmPool = std::make_unique(this, std::move(pool)); } UMF_CALL_THROWS( umfPoolSetTag(usmPool->umfPool.get(), usmPool.get(), nullptr)); diff --git a/unified-runtime/source/adapters/level_zero/v2/usm.hpp b/unified-runtime/source/adapters/level_zero/v2/usm.hpp index 35e3446b82abc..8fc8d12ecf147 100644 --- a/unified-runtime/source/adapters/level_zero/v2/usm.hpp +++ b/unified-runtime/source/adapters/level_zero/v2/usm.hpp @@ -18,15 +18,7 @@ #include "event.hpp" #include "ur_pool_manager.hpp" -struct UsmPool { - UsmPool(umf::pool_unique_handle_t pPool) - : umfPool(std::move(pPool)), - asyncPool([](ur_event_handle_t hEvent) { return hEvent->release(); }) {} - umf::pool_unique_handle_t umfPool; - // 'asyncPool' needs to be declared after 'umfPool' so its destructor is - // invoked first. - EnqueuedPool asyncPool; -}; +struct UsmPool; struct ur_usm_pool_handle_t_ : ur_object { ur_usm_pool_handle_t_(ur_context_handle_t hContext, @@ -58,3 +50,16 @@ struct ur_usm_pool_handle_t_ : ur_object { UsmPool *getPool(const usm::pool_descriptor &desc); }; + +struct UsmPool { + UsmPool(ur_usm_pool_handle_t urPool, umf::pool_unique_handle_t umfPool) + : umfPool(std::move(umfPool)), + asyncPool([](ur_event_handle_t hEvent) { return hEvent->release(); }, + [context = urPool->getContextHandle()](void *ptr) { + return ur::level_zero::urUSMFree(context, ptr); + }) {} + umf::pool_unique_handle_t umfPool; + // 'asyncPool' needs to be declared after 'umfPool' so its destructor is + // invoked first. + EnqueuedPool asyncPool; +}; diff --git a/unified-runtime/test/conformance/exp_command_buffer/rect_write.cpp b/unified-runtime/test/conformance/exp_command_buffer/rect_write.cpp index 917078d10cb9f..377d9a55280dd 100644 --- a/unified-runtime/test/conformance/exp_command_buffer/rect_write.cpp +++ b/unified-runtime/test/conformance/exp_command_buffer/rect_write.cpp @@ -121,7 +121,7 @@ UUR_DEVICE_TEST_SUITE_WITH_PARAM( urCommandBufferAppendMemBufferWriteRectTestWithParam>); TEST_P(urCommandBufferAppendMemBufferWriteRectTestWithParam, Success) { - UUR_KNOWN_FAILURE_ON(uur::LevelZero{}); + UUR_KNOWN_FAILURE_ON(uur::LevelZero{}, uur::LevelZeroV2{}); // Zero it to begin with since the write may not cover the whole buffer. const uint8_t zero = 0x0;