Skip to content

Commit 579367f

Browse files
Enqueue write buffer instead of memcpy for renderCompressed allocation
Change-Id: I5505add129e08537fc0009b12228a446aa732645
1 parent c61364b commit 579367f

File tree

6 files changed

+168
-85
lines changed

6 files changed

+168
-85
lines changed

Jenkinsfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
#!groovy
22
neoDependenciesRev='782940-1037'
33
strategy='EQUAL'
4-
allowedCD=297
4+
allowedCD=296

runtime/mem_obj/buffer.cpp

Lines changed: 69 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ Buffer *Buffer::create(Context *context,
103103

104104
checkMemory(flags, size, hostPtr, errcodeRet, zeroCopy, allocateMemory, copyMemoryFromHostPtr, allocationType, memoryManager);
105105

106+
if (errcodeRet != CL_SUCCESS) {
107+
return nullptr;
108+
}
109+
106110
if (hostPtr && context->isProvidingPerformanceHints()) {
107111
if (zeroCopy) {
108112
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_MEETS_ALIGNMENT_RESTRICTIONS, hostPtr, size);
@@ -117,75 +121,82 @@ Buffer *Buffer::create(Context *context,
117121
copyMemoryFromHostPtr = false;
118122
allocateMemory = false;
119123
}
120-
if (errcodeRet == CL_SUCCESS) {
121-
while (true) {
122-
if (flags & CL_MEM_USE_HOST_PTR) {
123-
memory = context->getSVMAllocsManager()->getSVMAlloc(hostPtr);
124-
if (memory) {
125-
allocationType = GraphicsAllocation::AllocationType::BUFFER;
126-
zeroCopy = true;
127-
isHostPtrSVM = true;
128-
copyMemoryFromHostPtr = false;
129-
allocateMemory = false;
130-
}
131-
}
132124

133-
if (!memory) {
134-
memory = memoryManager->allocateGraphicsMemoryInPreferredPool(zeroCopy, allocateMemory, true, false, hostPtr, static_cast<size_t>(size), allocationType);
135-
}
136-
137-
if (allocateMemory) {
138-
if (memory) {
139-
memoryManager->addAllocationToHostPtrManager(memory);
140-
}
141-
if (context->isProvidingPerformanceHints()) {
142-
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_NEEDS_ALLOCATE_MEMORY);
143-
}
144-
} else {
145-
if (!memory && Buffer::isReadOnlyMemoryPermittedByFlags(flags)) {
146-
zeroCopy = false;
147-
copyMemoryFromHostPtr = true;
148-
allocateMemory = true;
149-
memory = memoryManager->allocateGraphicsMemoryInPreferredPool(zeroCopy, allocateMemory, true, false, nullptr, static_cast<size_t>(size), allocationType);
150-
}
151-
}
125+
if (flags & CL_MEM_USE_HOST_PTR) {
126+
memory = context->getSVMAllocsManager()->getSVMAlloc(hostPtr);
127+
if (memory) {
128+
allocationType = GraphicsAllocation::AllocationType::BUFFER;
129+
zeroCopy = true;
130+
isHostPtrSVM = true;
131+
copyMemoryFromHostPtr = false;
132+
allocateMemory = false;
133+
}
134+
}
152135

153-
if (!memory) {
154-
errcodeRet = CL_OUT_OF_HOST_MEMORY;
155-
break;
156-
}
136+
if (!memory) {
137+
memory = memoryManager->allocateGraphicsMemoryInPreferredPool(zeroCopy, allocateMemory, true, false, hostPtr, static_cast<size_t>(size), allocationType);
138+
}
157139

158-
memory->setAllocationType(allocationType);
159-
memory->setMemObjectsAllocationWithWritableFlags(!(flags & (CL_MEM_READ_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)));
140+
if (allocateMemory) {
141+
if (memory) {
142+
memoryManager->addAllocationToHostPtrManager(memory);
143+
}
144+
if (context->isProvidingPerformanceHints()) {
145+
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_NEEDS_ALLOCATE_MEMORY);
146+
}
147+
} else {
148+
if (!memory && Buffer::isReadOnlyMemoryPermittedByFlags(flags)) {
149+
zeroCopy = false;
150+
copyMemoryFromHostPtr = true;
151+
allocateMemory = true;
152+
memory = memoryManager->allocateGraphicsMemoryInPreferredPool(zeroCopy, allocateMemory, true, false, nullptr, static_cast<size_t>(size), allocationType);
153+
}
154+
}
160155

161-
DBG_LOG(LogMemoryObject, __FUNCTION__, "hostPtr:", hostPtr, "size:", size, "memoryStorage:", memory->getUnderlyingBuffer(), "GPU address:", std::hex, memory->getGpuAddress());
156+
if (!memory) {
157+
errcodeRet = CL_OUT_OF_HOST_MEMORY;
158+
return nullptr;
159+
}
162160

163-
if (copyMemoryFromHostPtr) {
164-
memcpy_s(memory->getUnderlyingBuffer(), size, hostPtr, size);
165-
}
161+
memory->setAllocationType(allocationType);
162+
memory->setMemObjectsAllocationWithWritableFlags(!(flags & (CL_MEM_READ_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS)));
163+
164+
DBG_LOG(LogMemoryObject, __FUNCTION__, "hostPtr:", hostPtr, "size:", size, "memoryStorage:", memory->getUnderlyingBuffer(), "GPU address:", std::hex, memory->getGpuAddress());
165+
166+
pBuffer = createBufferHw(context,
167+
flags,
168+
size,
169+
memory->getUnderlyingBuffer(),
170+
const_cast<void *>(hostPtr),
171+
memory,
172+
zeroCopy,
173+
isHostPtrSVM,
174+
false);
175+
if (!pBuffer) {
176+
errcodeRet = CL_OUT_OF_HOST_MEMORY;
177+
memoryManager->removeAllocationFromHostPtrManager(memory);
178+
memoryManager->freeGraphicsMemory(memory);
179+
return nullptr;
180+
}
166181

167-
pBuffer = createBufferHw(context,
168-
flags,
169-
size,
170-
memory->getUnderlyingBuffer(),
171-
const_cast<void *>(hostPtr),
172-
memory,
173-
zeroCopy,
174-
isHostPtrSVM,
175-
false);
176-
if (!pBuffer && allocateMemory) {
177-
memoryManager->removeAllocationFromHostPtrManager(memory);
178-
memoryManager->freeGraphicsMemory(memory);
179-
memory = nullptr;
180-
}
182+
pBuffer->setHostPtrMinSize(size);
181183

182-
if (pBuffer) {
183-
pBuffer->setHostPtrMinSize(size);
184+
if (copyMemoryFromHostPtr) {
185+
if (memory->gmm && memory->gmm->isRenderCompressed) {
186+
auto cmdQ = context->getSpecialQueue();
187+
if (CL_SUCCESS != cmdQ->enqueueWriteBuffer(pBuffer, CL_TRUE, 0, size, hostPtr, 0, nullptr, nullptr)) {
188+
errcodeRet = CL_OUT_OF_RESOURCES;
184189
}
185-
break;
190+
} else {
191+
memcpy_s(memory->getUnderlyingBuffer(), size, hostPtr, size);
186192
}
187193
}
188194

195+
if (errcodeRet != CL_SUCCESS) {
196+
pBuffer->release();
197+
return nullptr;
198+
}
199+
189200
return pBuffer;
190201
}
191202

unit_tests/mem_obj/buffer_tests.cpp

Lines changed: 68 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -214,16 +214,26 @@ TEST(Buffer, givenNullPtrWhenBufferIsCreatedWithKernelReadOnlyFlagsThenBufferAll
214214
EXPECT_EQ(nullptr, buffer.get());
215215
}
216216

217-
TEST(Buffer, givenBufferCompressedAllocationAndZeroCopyHostPtrWhenCheckingMemoryPropertiesThenForceDisableZeroCopyAndAllocateStorage) {
218-
HardwareInfo localHwInfo = *platformDevices[0];
217+
struct RenderCompressedBuffersTests : public ::testing::Test {
218+
void SetUp() override {
219+
localHwInfo = *platformDevices[0];
220+
device.reset(Device::create<MockDevice>(&localHwInfo, new ExecutionEnvironment()));
221+
context = std::make_unique<MockContext>(device.get(), true);
222+
}
223+
224+
cl_int retVal = CL_SUCCESS;
225+
HardwareInfo localHwInfo = {};
226+
std::unique_ptr<MockDevice> device;
227+
std::unique_ptr<MockContext> context;
228+
std::unique_ptr<Buffer> buffer;
229+
};
230+
231+
TEST_F(RenderCompressedBuffersTests, givenBufferCompressedAllocationAndZeroCopyHostPtrWhenCheckingMemoryPropertiesThenForceDisableZeroCopyAndAllocateStorage) {
219232
localHwInfo.capabilityTable.ftrRenderCompressedBuffers = false;
220-
std::unique_ptr<MockDevice> device(Device::create<MockDevice>(&localHwInfo, new ExecutionEnvironment()));
221-
auto context = std::make_unique<MockContext>(device.get());
222233

223234
void *cacheAlignedHostPtr = alignedMalloc(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
224-
cl_int retVal = CL_SUCCESS;
225235

226-
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), CL_MEM_USE_HOST_PTR, MemoryConstants::cacheLineSize, cacheAlignedHostPtr, retVal));
236+
buffer.reset(Buffer::create(context.get(), CL_MEM_USE_HOST_PTR, MemoryConstants::cacheLineSize, cacheAlignedHostPtr, retVal));
227237
EXPECT_EQ(cacheAlignedHostPtr, buffer->getGraphicsAllocation()->getUnderlyingBuffer());
228238
EXPECT_TRUE(buffer->isMemObjZeroCopy());
229239
EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER);
@@ -237,15 +247,10 @@ TEST(Buffer, givenBufferCompressedAllocationAndZeroCopyHostPtrWhenCheckingMemory
237247
alignedFree(cacheAlignedHostPtr);
238248
}
239249

240-
TEST(Buffer, givenBufferCompressedAllocationAndNoHostPtrWhenCheckingMemoryPropertiesThenForceDisableZeroCopy) {
241-
HardwareInfo localHwInfo = *platformDevices[0];
250+
TEST_F(RenderCompressedBuffersTests, givenBufferCompressedAllocationAndNoHostPtrWhenCheckingMemoryPropertiesThenForceDisableZeroCopy) {
242251
localHwInfo.capabilityTable.ftrRenderCompressedBuffers = false;
243-
std::unique_ptr<MockDevice> device(Device::create<MockDevice>(&localHwInfo, new ExecutionEnvironment()));
244-
auto context = std::make_unique<MockContext>(device.get());
245-
246-
cl_int retVal = CL_SUCCESS;
247252

248-
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), 0, MemoryConstants::cacheLineSize, nullptr, retVal));
253+
buffer.reset(Buffer::create(context.get(), 0, MemoryConstants::cacheLineSize, nullptr, retVal));
249254
EXPECT_TRUE(buffer->isMemObjZeroCopy());
250255
EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER);
251256

@@ -255,40 +260,77 @@ TEST(Buffer, givenBufferCompressedAllocationAndNoHostPtrWhenCheckingMemoryProper
255260
EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
256261
}
257262

258-
TEST(Buffer, givenBufferCompressedAllocationWhenSharedContextIsUsedThenForceDisableCompression) {
259-
HardwareInfo localHwInfo = *platformDevices[0];
263+
TEST_F(RenderCompressedBuffersTests, givenBufferCompressedAllocationWhenSharedContextIsUsedThenForceDisableCompression) {
260264
localHwInfo.capabilityTable.ftrRenderCompressedBuffers = true;
261-
std::unique_ptr<MockDevice> device(Device::create<MockDevice>(&localHwInfo, new ExecutionEnvironment()));
262-
auto context = std::make_unique<MockContext>(device.get());
263265
context->isSharedContext = false;
264266

265-
cl_int retVal = CL_SUCCESS;
266267
uint32_t hostPtr = 0;
267268

268-
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), 0, sizeof(uint32_t), &hostPtr, retVal));
269+
buffer.reset(Buffer::create(context.get(), 0, sizeof(uint32_t), &hostPtr, retVal));
269270
EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
270271

271272
context->isSharedContext = true;
272273
buffer.reset(Buffer::create(context.get(), 0, sizeof(uint32_t), &hostPtr, retVal));
273274
EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER);
274275
}
275276

276-
TEST(Buffer, givenSvmAllocationWhenCreatingBufferThenForceDisableCompression) {
277-
HardwareInfo localHwInfo = *platformDevices[0];
277+
TEST_F(RenderCompressedBuffersTests, givenSvmAllocationWhenCreatingBufferThenForceDisableCompression) {
278278
localHwInfo.capabilityTable.ftrRenderCompressedBuffers = true;
279-
std::unique_ptr<MockDevice> device(Device::create<MockDevice>(&localHwInfo, new ExecutionEnvironment()));
280-
auto context = std::make_unique<MockContext>(device.get());
281279

282280
auto svmAlloc = context->getSVMAllocsManager()->createSVMAlloc(sizeof(uint32_t), false);
283281

284-
cl_int retVal = CL_SUCCESS;
285-
286-
std::unique_ptr<Buffer> buffer(Buffer::create(context.get(), CL_MEM_USE_HOST_PTR, sizeof(uint32_t), svmAlloc, retVal));
282+
buffer.reset(Buffer::create(context.get(), CL_MEM_USE_HOST_PTR, sizeof(uint32_t), svmAlloc, retVal));
287283
EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER);
288284

289285
context->getSVMAllocsManager()->freeSVMAlloc(svmAlloc);
290286
}
291287

288+
struct RenderCompressedBuffersCopyHostMemoryTests : public RenderCompressedBuffersTests {
289+
void SetUp() override {
290+
RenderCompressedBuffersTests::SetUp();
291+
device->injectMemoryManager(new MockMemoryManager(true));
292+
context->setMemoryManager(device->getMemoryManager());
293+
mockCmdQ = new MockCommandQueue();
294+
context->setSpecialQueue(mockCmdQ);
295+
}
296+
297+
MockCommandQueue *mockCmdQ = nullptr;
298+
uint32_t hostPtr = 0;
299+
};
300+
301+
TEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenRenderCompressedBufferWhenCopyFromHostPtrIsRequiredThenCallWriteBuffer) {
302+
localHwInfo.capabilityTable.ftrRenderCompressedBuffers = true;
303+
304+
buffer.reset(Buffer::create(context.get(), CL_MEM_COPY_HOST_PTR, sizeof(uint32_t), &hostPtr, retVal));
305+
EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
306+
EXPECT_EQ(CL_SUCCESS, retVal);
307+
308+
EXPECT_EQ(1u, mockCmdQ->writeBufferCounter);
309+
EXPECT_TRUE(mockCmdQ->writeBufferBlocking);
310+
EXPECT_EQ(0u, mockCmdQ->writeBufferOffset);
311+
EXPECT_EQ(sizeof(uint32_t), mockCmdQ->writeBufferSize);
312+
EXPECT_EQ(&hostPtr, mockCmdQ->writeBufferPtr);
313+
}
314+
315+
TEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenNonRenderCompressedBufferWhenCopyFromHostPtrIsRequiredThenDontCallWriteBuffer) {
316+
localHwInfo.capabilityTable.ftrRenderCompressedBuffers = false;
317+
318+
buffer.reset(Buffer::create(context.get(), CL_MEM_COPY_HOST_PTR, sizeof(uint32_t), &hostPtr, retVal));
319+
EXPECT_EQ(buffer->getGraphicsAllocation()->getAllocationType(), GraphicsAllocation::AllocationType::BUFFER);
320+
EXPECT_EQ(CL_SUCCESS, retVal);
321+
322+
EXPECT_EQ(0u, mockCmdQ->writeBufferCounter);
323+
}
324+
325+
TEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenRenderCompressedBufferWhenWriteBufferFailsThenReturnErrorCode) {
326+
localHwInfo.capabilityTable.ftrRenderCompressedBuffers = true;
327+
mockCmdQ->writeBufferRetValue = CL_INVALID_VALUE;
328+
329+
buffer.reset(Buffer::create(context.get(), CL_MEM_COPY_HOST_PTR, sizeof(uint32_t), &hostPtr, retVal));
330+
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
331+
EXPECT_EQ(nullptr, buffer.get());
332+
}
333+
292334
class BufferTest : public DeviceFixture,
293335
public testing::TestWithParam<uint64_t /*cl_mem_flags*/> {
294336
public:

unit_tests/mocks/mock_command_queue.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,25 @@ class MockCommandQueue : public CommandQueue {
4444
releaseIndirectHeapCalled = true;
4545
CommandQueue::releaseIndirectHeap(heap);
4646
}
47+
48+
cl_int enqueueWriteBuffer(Buffer *buffer, cl_bool blockingWrite, size_t offset, size_t size, const void *ptr,
49+
cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event) override {
50+
writeBufferCounter++;
51+
writeBufferBlocking = (CL_TRUE == blockingWrite);
52+
writeBufferOffset = offset;
53+
writeBufferSize = size;
54+
writeBufferPtr = const_cast<void *>(ptr);
55+
return writeBufferRetValue;
56+
}
57+
4758
bool releaseIndirectHeapCalled = false;
59+
60+
cl_int writeBufferRetValue = CL_SUCCESS;
61+
uint32_t writeBufferCounter = 0;
62+
bool writeBufferBlocking = false;
63+
size_t writeBufferOffset = 0;
64+
size_t writeBufferSize = 0;
65+
void *writeBufferPtr = nullptr;
4866
};
4967

5068
template <typename GfxFamily>

unit_tests/mocks/mock_memory_manager.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,13 @@ GraphicsAllocation *MockMemoryManager::peekAllocationListHead() {
6464
return graphicsAllocations.peekHead();
6565
}
6666

67+
GraphicsAllocation *MockMemoryManager::allocateGraphicsMemory64kb(size_t size, size_t alignment, bool forcePin, bool preferRenderCompressed) {
68+
auto allocation = OsAgnosticMemoryManager::allocateGraphicsMemory64kb(size, alignment, forcePin, preferRenderCompressed);
69+
if (allocation) {
70+
allocation->gmm = new Gmm(allocation->getUnderlyingBuffer(), size, false, preferRenderCompressed);
71+
allocation->gmm->isRenderCompressed = preferRenderCompressed;
72+
}
73+
return allocation;
74+
}
75+
6776
} // namespace OCLRT

unit_tests/mocks/mock_memory_manager.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ namespace OCLRT {
2929

3030
class MockMemoryManager : public OsAgnosticMemoryManager {
3131
public:
32+
MockMemoryManager() = default;
33+
MockMemoryManager(bool enable64pages) : OsAgnosticMemoryManager(enable64pages) {}
34+
GraphicsAllocation *allocateGraphicsMemory64kb(size_t size, size_t alignment, bool forcePin, bool preferRenderCompressed) override;
3235
void setDeferredDeleter(DeferredDeleter *deleter);
3336
void overrideAsyncDeleterFlag(bool newValue);
3437
GraphicsAllocation *allocateGraphicsMemoryForImage(ImageInfo &imgInfo, Gmm *gmm) override;

0 commit comments

Comments
 (0)