Skip to content

Commit 12ffd60

Browse files
fix: copy offload cross engine dependency handling
Related-To: HSD-18043670900 Signed-off-by: Bartosz Dunajski <[email protected]> Source: ce5a17b
1 parent 714b612 commit 12ffd60

File tree

5 files changed

+34
-2
lines changed

5 files changed

+34
-2
lines changed

level_zero/core/source/cmdlist/cmdlist_hw.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,7 @@ struct CommandListCoreFamily : public CommandListImp {
479479
bool implicitSynchronizedDispatchForCooperativeKernelsAllowed = false;
480480
bool useAdditionalBlitProperties = false;
481481
bool isPostImageWriteFlushRequired = false;
482+
bool latestFlushIsDualCopyOffload = false;
482483
};
483484

484485
template <PRODUCT_FAMILY gfxProductFamily>

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3097,7 +3097,8 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh
30973097
isQwordInOrderCounter(), copyOnlyWait);
30983098

30993099
} else {
3100-
auto resolveDependenciesViaPipeControls = !copyOnlyWait && implicitDependency && (this->dcFlushSupport || (!this->heaplessModeEnabled && this->latestOperationHasOptimizedCbEvent));
3100+
bool crossEngineDependency = (latestFlushIsDualCopyOffload != dualStreamCopyOffloadOperation);
3101+
auto resolveDependenciesViaPipeControls = !crossEngineDependency && !copyOnlyWait && implicitDependency && (this->dcFlushSupport || (!this->heaplessModeEnabled && this->latestOperationHasOptimizedCbEvent));
31013102

31023103
if (NEO::debugManager.flags.ResolveDependenciesViaPipeControls.get() != -1) {
31033104
resolveDependenciesViaPipeControls = NEO::debugManager.flags.ResolveDependenciesViaPipeControls.get();

level_zero/core/source/cmdlist/cmdlist_hw_immediate.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,6 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
267267
uint64_t relaxedOrderingCounter = 0;
268268
std::atomic<bool> dependenciesPresent{false};
269269
bool latestFlushIsHostVisible = false;
270-
bool latestFlushIsDualCopyOffload = false;
271270
bool keepRelaxedOrderingEnabled = false;
272271
};
273272

level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "shared/source/gmm_helper/gmm_helper.h"
1010
#include "shared/source/helpers/compiler_product_helper.h"
1111
#include "shared/source/memory_manager/internal_allocation_storage.h"
12+
#include "shared/test/common/cmd_parse/hw_parse.h"
1213
#include "shared/test/common/helpers/relaxed_ordering_commands_helper.h"
1314
#include "shared/test/common/libult/ult_command_stream_receiver.h"
1415
#include "shared/test/common/mocks/mock_direct_submission_hw.h"
@@ -967,6 +968,29 @@ HWTEST2_F(CopyOffloadInOrderTests, givenProfilingEventWithRelaxedOrderingWhenApp
967968
}
968969
}
969970

971+
HWCMDTEST_F(IGFX_XE_HP_CORE, CopyOffloadInOrderTests, givenCrossEngineDependencyWhenComputeWorkSubmittedThenUseSemaphore) {
972+
debugManager.flags.OverrideCopyOffloadMode.set(CopyOffloadModes::dualStream);
973+
974+
uint32_t counterOffset = 64;
975+
976+
auto immCmdList = createImmCmdListWithOffload<FamilyType::gfxCoreFamily>();
977+
immCmdList->inOrderExecInfo->setAllocationOffset(counterOffset);
978+
979+
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
980+
981+
immCmdList->appendMemoryCopy(&copyData1, &copyData2, 1, nullptr, 0, nullptr, copyParams);
982+
EXPECT_TRUE(immCmdList->latestFlushIsDualCopyOffload);
983+
984+
auto offset = cmdStream->getUsed();
985+
986+
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams);
987+
988+
auto hwCmds = HardwareParse::parseCommandBuffer<FamilyType>(*cmdStream, offset);
989+
990+
auto itor = find<typename FamilyType::MI_SEMAPHORE_WAIT *>(hwCmds.begin(), hwCmds.end());
991+
EXPECT_NE(hwCmds.end(), itor);
992+
}
993+
970994
HWTEST2_F(CopyOffloadInOrderTests, givenAtomicSignalingModeWhenUpdatingCounterThenUseCorrectHwCommands, IsAtLeastXe2HpgCore) {
971995
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
972996
using ATOMIC_OPCODES = typename FamilyType::MI_ATOMIC::ATOMIC_OPCODES;

shared/test/common/cmd_parse/hw_parse.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,13 @@ struct HardwareParse : NEO::NonCopyableAndNonMovableClass {
218218
return FamilyType::Parse::getCommandName(cmd);
219219
}
220220

221+
template <typename FamilyType>
222+
static GenCmdList parseCommandBuffer(const LinearStream &linearStream, size_t offset) {
223+
GenCmdList cmds;
224+
EXPECT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(cmds, ptrOffset(linearStream.getCpuBase(), offset), linearStream.getUsed() - offset));
225+
return cmds;
226+
}
227+
221228
// The starting point of parsing commandBuffers. This is important
222229
// because as buffers get reused, we only want to parse the deltas.
223230
LinearStream *previousCS = nullptr;

0 commit comments

Comments
 (0)