Skip to content

Commit 1e0064f

Browse files
Allow ULTs to work with enabled TimestampPacketWrite
Change-Id: Idd4622469220b859e8724d9179837c685377ce52
1 parent b0acc5e commit 1e0064f

15 files changed

+74
-36
lines changed

runtime/command_queue/enqueue_common.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,9 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
280280
eventBuilder.getEvent()->addTimestampPacketNodes(*timestampPacketContainer);
281281
for (size_t i = 0; i < eventsRequest.numEventsInWaitList; i++) {
282282
auto waitlistEvent = castToObjectOrAbort<Event>(eventsRequest.eventWaitList[i]);
283-
eventBuilder.getEvent()->addTimestampPacketNodes(*waitlistEvent->getTimestampPacketNodes());
283+
if (!waitlistEvent->isUserEvent()) {
284+
eventBuilder.getEvent()->addTimestampPacketNodes(*waitlistEvent->getTimestampPacketNodes());
285+
}
284286
}
285287
}
286288
}

runtime/command_queue/hardware_interface.inl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,15 +183,15 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
183183

184184
dispatchWorkarounds(commandStream, commandQueue, kernel, true);
185185

186-
if (currentTimestampPacketNodes) {
186+
if (currentTimestampPacketNodes && commandQueue.getDevice().getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
187187
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex)->tag;
188188
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, nullptr, timestampPacket, TimestampPacket::WriteOperationType::BeforeWalker);
189189
}
190190

191191
// Program the walker. Invokes execution so all state should already be programmed
192192
auto walkerCmd = allocateWalkerSpace(*commandStream, kernel);
193193

194-
if (currentTimestampPacketNodes) {
194+
if (currentTimestampPacketNodes && commandQueue.getDevice().getCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
195195
auto timestampPacket = currentTimestampPacketNodes->peekNodes().at(currentDispatchIndex)->tag;
196196
GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(commandStream, walkerCmd, timestampPacket, TimestampPacket::WriteOperationType::AfterWalker);
197197
}

unit_tests/command_queue/command_queue_hw_tests.cpp

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -935,11 +935,6 @@ HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenNoBlockedThenKern
935935
cl_int status = pCmdQ->enqueueKernel(mockKernel, 1, &offset, &gws, &lws, 0, nullptr, nullptr);
936936
EXPECT_EQ(CL_SUCCESS, status);
937937
EXPECT_EQ(1u, mockKernel->makeResidentCalls);
938-
939-
std::map<GraphicsAllocation *, uint32_t>::iterator it = csr.makeResidentAllocations.begin();
940-
for (; it != csr.makeResidentAllocations.end(); it++) {
941-
EXPECT_EQ(1u, it->second);
942-
}
943938
}
944939

945940
HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenBlockedThenKernelGetResidencyCalledOnce) {
@@ -963,17 +958,13 @@ HWTEST_F(CommandQueueHwTest, givenWalkerSplitEnqueueNDRangeWhenBlockedThenKernel
963958
EXPECT_EQ(1u, mockKernel->getResidencyCalls);
964959

965960
userEvent.setStatus(CL_COMPLETE);
966-
967-
std::map<GraphicsAllocation *, uint32_t>::iterator it = csr.makeResidentAllocations.begin();
968-
for (; it != csr.makeResidentAllocations.end(); it++) {
969-
EXPECT_EQ(1u, it->second);
970-
}
971961
}
972962

973963
HWTEST_F(CommandQueueHwTest, givenKernelSplitEnqueueReadBufferWhenBlockedThenEnqueueSurfacesMakeResidentIsCalledOnce) {
974964
UserEvent userEvent(context);
975965
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
976966
csr.storeMakeResidentAllocations = true;
967+
csr.timestampPacketWriteEnabled = false;
977968

978969
BufferDefaults::context = context;
979970
std::unique_ptr<Buffer> buffer(BufferHelper<>::create());

unit_tests/command_queue/enqueue_copy_buffer_event_tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ TEST_F(EnqueueCopyBuffer, eventReturnedShouldBeMaxOfInputEventsAndCmdQPlus1) {
5959

6060
uint32_t taskLevelEvent1 = 8;
6161
uint32_t taskLevelEvent2 = 19;
62-
Event event1(nullptr, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
63-
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
62+
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 4);
63+
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 10);
6464

6565
cl_event eventWaitList[] = {
6666
&event1,

unit_tests/command_queue/enqueue_fill_buffer_event_tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ HWTEST_F(FillBufferEventTests, eventReturnedShouldBeMaxOfInputEventsAndCmdQPlus1
7070

7171
uint32_t taskLevelEvent1 = 8;
7272
uint32_t taskLevelEvent2 = 19;
73-
Event event1(nullptr, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 15);
74-
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 16);
73+
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 15);
74+
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 16);
7575

7676
float pattern[] = {1.0f};
7777
size_t patternSize = sizeof(pattern);

unit_tests/command_queue/enqueue_handler_tests.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -261,19 +261,21 @@ HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenAddPatchInfoCommentsForAUBDu
261261

262262
HWTEST_F(EnqueueHandlerTest, givenExternallySynchronizedParentEventWhenRequestingEnqueueWithoutGpuSubmissionThenTaskCountIsNotInherited) {
263263
struct ExternallySynchEvent : Event {
264-
ExternallySynchEvent() : Event(nullptr, CL_COMMAND_MARKER, 0, 0) {
264+
ExternallySynchEvent(CommandQueue *cmdQueue) : Event(cmdQueue, CL_COMMAND_MARKER, 0, 0) {
265265
transitionExecutionStatus(CL_COMPLETE);
266266
this->updateTaskCount(7);
267267
}
268268
bool isExternallySynchronized() const override {
269269
return true;
270270
}
271271
};
272-
ExternallySynchEvent synchEvent;
272+
273+
auto mockCmdQ = new MockCommandQueueHw<FamilyType>(context, pDevice, 0);
274+
275+
ExternallySynchEvent synchEvent(mockCmdQ);
273276
cl_event inEv = &synchEvent;
274277
cl_event outEv = nullptr;
275278

276-
auto mockCmdQ = new MockCommandQueueHw<FamilyType>(context, pDevice, 0);
277279
bool blocking = false;
278280
MultiDispatchInfo emptyDispatchInfo;
279281
mockCmdQ->template enqueueHandler<CL_COMMAND_MARKER>(nullptr,
@@ -356,4 +358,4 @@ HWTEST_F(EnqueueHandlerTestBasic, givenEnqueueHandlerWhenCommandIsBlokingThenCom
356358
nullptr);
357359
EXPECT_EQ(mockCsr->waitForTaskCountRequiredTaskCount, mockCmdQ->completionStampTaskCount);
358360
mockCmdQ->release();
359-
}
361+
}

unit_tests/command_queue/enqueue_kernel_event_tests.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ TEST_F(EventTests, eventReturnedShouldBeMaxOfInputEventsAndCmdQPlus1) {
4343

4444
uint32_t taskLevelEvent1 = 8;
4545
uint32_t taskLevelEvent2 = 19;
46-
Event event1(nullptr, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 15);
47-
Event event2(nullptr, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 16);
46+
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent1, 15);
47+
Event event2(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, taskLevelEvent2, 16);
4848

4949
cl_event eventWaitList[] =
5050
{

unit_tests/command_queue/enqueue_kernel_tests.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1004,9 +1004,10 @@ HWTEST_F(EnqueueKernelTest, givenCommandStreamReceiverInBatchingModeWhenEnqueueK
10041004

10051005
//Two more surfaces from preemptionAllocation and SipKernel
10061006
size_t csrSurfaceCount = (pDevice->getPreemptionMode() == PreemptionMode::MidThread) ? 2 : 0;
1007+
size_t timestampPacketSurfacesCount = mockCsr->peekTimestampPacketWriteEnabled() ? 1 : 0;
10071008

10081009
EXPECT_EQ(0, mockCsr->flushCalledCount);
1009-
EXPECT_EQ(5u + csrSurfaceCount, cmdBuffer->surfaces.size());
1010+
EXPECT_EQ(5u + csrSurfaceCount + timestampPacketSurfacesCount, cmdBuffer->surfaces.size());
10101011
}
10111012

10121013
HWTEST_F(EnqueueKernelTest, givenReducedAddressSpaceGraphicsAllocationForHostPtrWithL3FlushRequiredWhenEnqueueKernelIsCalledThenFlushIsCalledForReducedAddressSpacePlatforms) {
@@ -1455,6 +1456,7 @@ HWTEST_F(EnqueueKernelTest, givenInOrderCommandQueueWhenEnqueueKernelReturningEv
14551456

14561457
auto mockCsr = new MockCsrHw2<FamilyType>(pDevice->getHardwareInfo(), *pDevice->executionEnvironment);
14571458
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
1459+
mockCsr->timestampPacketWriteEnabled = false;
14581460
pDevice->resetCommandStreamReceiver(mockCsr);
14591461

14601462
auto mockedSubmissionsAggregator = new mockSubmissionsAggregator();

unit_tests/command_queue/enqueue_kernel_two_ooq_tests.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TwoOOQsTwoDependentWalkers, shouldHaveOneVFEState) {
124124
HWTEST_F(TwoOOQsTwoDependentWalkers, shouldHaveAPipecontrolBetweenWalkers) {
125125
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
126126

127+
pDevice->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;
128+
127129
parseWalkers<FamilyType>();
128130
auto itorCmd = find<PIPE_CONTROL *>(itorWalker1, itorWalker2);
129131

unit_tests/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,10 @@ HWCMDTEST_F(IGFX_GEN8_CORE, IOQWithTwoWalkers, shouldHaveOneVFEState) {
3333
}
3434

3535
HWTEST_F(IOQWithTwoWalkers, shouldHaveAPipecontrolBetweenWalkers2) {
36+
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
37+
commandStreamReceiver.timestampPacketWriteEnabled = false;
38+
3639
enqueueTwoKernels<FamilyType>();
37-
auto &commandStreamReceiver = pDevice->getCommandStreamReceiver();
3840

3941
typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL;
4042

0 commit comments

Comments
 (0)