Skip to content

Commit 64eb82e

Browse files
Add Kernel restrictions
Signed-off-by: Bartosz Dunajski <[email protected]>
1 parent aed3fad commit 64eb82e

File tree

8 files changed

+574
-17
lines changed

8 files changed

+574
-17
lines changed

opencl/source/kernel/kernel.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2670,4 +2670,87 @@ bool Kernel::areMultipleSubDevicesInContext() const {
26702670
return context ? context->containsMultipleSubDevices(clDevice.getRootDeviceIndex()) : false;
26712671
}
26722672

2673+
void Kernel::reconfigureKernel() {
2674+
auto &kernelDescriptor = kernelInfo.kernelDescriptor;
2675+
if (kernelDescriptor.kernelAttributes.numGrfRequired == GrfConfig::LargeGrfNumber) {
2676+
maxKernelWorkGroupSize >>= 1;
2677+
}
2678+
this->containsStatelessWrites = kernelDescriptor.kernelAttributes.flags.usesStatelessWrites;
2679+
this->specialPipelineSelectMode = kernelDescriptor.extendedInfo.get() ? kernelDescriptor.extendedInfo->specialPipelineSelectModeRequired() : false;
2680+
}
2681+
bool Kernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const {
2682+
if (false == HwHelper::cacheFlushAfterWalkerSupported(commandQueue.getDevice().getHardwareInfo())) {
2683+
return false;
2684+
}
2685+
2686+
if (DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.get() != -1) {
2687+
return !!DebugManager.flags.EnableCacheFlushAfterWalkerForAllQueues.get();
2688+
}
2689+
2690+
bool cmdQueueRequiresCacheFlush = commandQueue.getRequiresCacheFlushAfterWalker();
2691+
if (false == cmdQueueRequiresCacheFlush) {
2692+
return false;
2693+
}
2694+
if (commandQueue.getGpgpuCommandStreamReceiver().isMultiOsContextCapable()) {
2695+
return false;
2696+
}
2697+
bool isMultiDevice = commandQueue.getContext().containsMultipleSubDevices(commandQueue.getDevice().getRootDeviceIndex());
2698+
if (false == isMultiDevice) {
2699+
return false;
2700+
}
2701+
bool isDefaultContext = (commandQueue.getContext().peekContextType() == ContextType::CONTEXT_TYPE_DEFAULT);
2702+
if (true == isDefaultContext) {
2703+
return false;
2704+
}
2705+
2706+
if (getProgram()->getGlobalSurface(commandQueue.getDevice().getRootDeviceIndex()) != nullptr) {
2707+
return true;
2708+
}
2709+
if (svmAllocationsRequireCacheFlush) {
2710+
return true;
2711+
}
2712+
size_t args = kernelArgRequiresCacheFlush.size();
2713+
for (size_t i = 0; i < args; i++) {
2714+
if (kernelArgRequiresCacheFlush[i] != nullptr) {
2715+
return true;
2716+
}
2717+
}
2718+
return false;
2719+
}
2720+
2721+
bool Kernel::requiresLimitedWorkgroupSize() const {
2722+
if (!this->isBuiltIn) {
2723+
return false;
2724+
}
2725+
if (this->auxTranslationDirection != AuxTranslationDirection::None) {
2726+
return false;
2727+
}
2728+
2729+
//if source is buffer in local memory, no need for limited workgroup
2730+
if (this->kernelInfo.getArgDescriptorAt(0).is<ArgDescriptor::ArgTPointer>()) {
2731+
if (this->getKernelArgInfo(0).object) {
2732+
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
2733+
auto buffer = castToObject<Buffer>(this->getKernelArgInfo(0u).object);
2734+
if (buffer && buffer->getGraphicsAllocation(rootDeviceIndex)->getMemoryPool() == MemoryPool::LocalMemory) {
2735+
return false;
2736+
}
2737+
}
2738+
}
2739+
2740+
//if we are reading from image no need for limited workgroup
2741+
if (this->kernelInfo.getArgDescriptorAt(0).is<ArgDescriptor::ArgTImage>()) {
2742+
return false;
2743+
}
2744+
2745+
return true;
2746+
}
2747+
2748+
void Kernel::updateAuxTranslationRequired() {
2749+
if (DebugManager.flags.EnableStatelessCompression.get()) {
2750+
if (hasDirectStatelessAccessToHostMemory() || hasIndirectStatelessAccessToHostMemory()) {
2751+
setAuxTranslationRequired(true);
2752+
}
2753+
}
2754+
}
2755+
26732756
} // namespace NEO

opencl/source/kernel/kernel_extra.cpp

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,7 @@
1111
#include "opencl/source/kernel/kernel.h"
1212

1313
namespace NEO {
14-
bool Kernel::requiresCacheFlushCommand(const CommandQueue &commandQueue) const {
15-
return false;
16-
}
17-
void Kernel::reconfigureKernel() {
18-
}
14+
1915
int Kernel::setKernelThreadArbitrationPolicy(uint32_t policy) {
2016
auto hwInfo = clDevice.getHardwareInfo();
2117
auto &hwHelper = NEO::ClHwHelper::get(hwInfo.platform.eRenderCoreFamily);
@@ -39,14 +35,8 @@ bool Kernel::requiresPerDssBackedBuffer() const {
3935
return DebugManager.flags.ForcePerDssBackedBufferProgramming.get();
4036
}
4137

42-
bool Kernel::requiresLimitedWorkgroupSize() const {
43-
return this->isBuiltIn;
44-
}
45-
4638
int32_t Kernel::setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue) {
4739
return CL_INVALID_VALUE;
4840
}
4941

50-
void Kernel::updateAuxTranslationRequired() {
51-
}
5242
} // namespace NEO

opencl/test/unit_test/kernel/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ set(IGDRCL_SRCS_tests_kernel
1616
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_info_tests.cpp
1717
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_pipe_tests.cpp
1818
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_svm_tests.cpp
19-
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/kernel_cache_flush_requirements_tests.cpp
19+
${CMAKE_CURRENT_SOURCE_DIR}/kernel_cache_flush_requirements_tests.cpp
2020
${CMAKE_CURRENT_SOURCE_DIR}/kernel_info_cl_tests.cpp
2121
${CMAKE_CURRENT_SOURCE_DIR}/kernel_image_arg_tests.cpp
2222
${CMAKE_CURRENT_SOURCE_DIR}/kernel_immediate_arg_tests.cpp

opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,150 @@ TEST_F(KernelArgBufferTest, whenSettingAuxTranslationRequiredThenIsAuxTranslatio
461461
}
462462
}
463463

464+
TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) {
465+
DebugManagerStateRestore debugRestorer;
466+
DebugManager.flags.EnableStatelessCompression.set(1);
467+
468+
MockBuffer buffer;
469+
buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
470+
471+
auto val = (cl_mem)&buffer;
472+
auto pVal = &val;
473+
474+
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
475+
EXPECT_EQ(CL_SUCCESS, retVal);
476+
477+
EXPECT_TRUE(pKernel->hasDirectStatelessAccessToHostMemory());
478+
479+
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
480+
481+
pKernel->updateAuxTranslationRequired();
482+
483+
EXPECT_TRUE(pKernel->isAuxTranslationRequired());
484+
}
485+
486+
TEST_F(KernelArgBufferTest, givenSetArgBufferOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
487+
DebugManagerStateRestore debugRestorer;
488+
DebugManager.flags.EnableStatelessCompression.set(1);
489+
490+
MockBuffer buffer;
491+
buffer.getGraphicsAllocation(mockRootDeviceIndex)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
492+
493+
auto val = (cl_mem)&buffer;
494+
auto pVal = &val;
495+
496+
auto retVal = pKernel->setArg(0, sizeof(cl_mem *), pVal);
497+
EXPECT_EQ(CL_SUCCESS, retVal);
498+
499+
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
500+
501+
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
502+
503+
pKernel->updateAuxTranslationRequired();
504+
505+
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
506+
}
507+
508+
TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrue) {
509+
DebugManagerStateRestore debugRestorer;
510+
DebugManager.flags.EnableStatelessCompression.set(1);
511+
512+
char data[128];
513+
void *ptr = &data;
514+
MockGraphicsAllocation gfxAllocation(ptr, 128);
515+
gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
516+
517+
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
518+
EXPECT_EQ(CL_SUCCESS, retVal);
519+
520+
EXPECT_TRUE(pKernel->hasDirectStatelessAccessToHostMemory());
521+
522+
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
523+
524+
pKernel->updateAuxTranslationRequired();
525+
526+
EXPECT_TRUE(pKernel->isAuxTranslationRequired());
527+
}
528+
529+
TEST_F(KernelArgBufferTest, givenSetArgSvmAllocOnKernelWithNoDirectStatelessAccessToHostMemoryWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
530+
DebugManagerStateRestore debugRestorer;
531+
DebugManager.flags.EnableStatelessCompression.set(1);
532+
533+
char data[128];
534+
void *ptr = &data;
535+
MockGraphicsAllocation gfxAllocation(ptr, 128);
536+
gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
537+
538+
auto retVal = pKernel->setArgSvmAlloc(0, ptr, &gfxAllocation);
539+
EXPECT_EQ(CL_SUCCESS, retVal);
540+
541+
EXPECT_FALSE(pKernel->hasDirectStatelessAccessToHostMemory());
542+
543+
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
544+
545+
pKernel->updateAuxTranslationRequired();
546+
547+
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
548+
}
549+
550+
TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithNoIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnFalse) {
551+
DebugManagerStateRestore debugRestorer;
552+
DebugManager.flags.EnableStatelessCompression.set(1);
553+
554+
pKernelInfo->hasIndirectStatelessAccess = false;
555+
556+
MockGraphicsAllocation gfxAllocation;
557+
gfxAllocation.setAllocationType(GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY);
558+
559+
pKernel->setUnifiedMemoryExecInfo(&gfxAllocation);
560+
561+
EXPECT_FALSE(pKernel->hasIndirectStatelessAccessToHostMemory());
562+
563+
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
564+
565+
pKernel->updateAuxTranslationRequired();
566+
567+
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
568+
}
569+
570+
TEST_F(KernelArgBufferTest, givenSetUnifiedMemoryExecInfoOnKernelWithIndirectStatelessAccessWhenUpdateAuxTranslationRequiredIsCalledThenIsAuxTranslationRequiredShouldReturnTrueForHostMemoryAllocation) {
571+
DebugManagerStateRestore debugRestorer;
572+
DebugManager.flags.EnableStatelessCompression.set(1);
573+
574+
pKernelInfo->hasIndirectStatelessAccess = true;
575+
576+
const auto allocationTypes = {GraphicsAllocation::AllocationType::BUFFER,
577+
GraphicsAllocation::AllocationType::BUFFER_COMPRESSED,
578+
GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY};
579+
580+
MockGraphicsAllocation gfxAllocation;
581+
582+
for (const auto type : allocationTypes) {
583+
gfxAllocation.setAllocationType(type);
584+
585+
pKernel->setUnifiedMemoryExecInfo(&gfxAllocation);
586+
587+
if (type == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) {
588+
EXPECT_TRUE(pKernel->hasIndirectStatelessAccessToHostMemory());
589+
} else {
590+
EXPECT_FALSE(pKernel->hasIndirectStatelessAccessToHostMemory());
591+
}
592+
593+
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
594+
595+
pKernel->updateAuxTranslationRequired();
596+
597+
if (type == GraphicsAllocation::AllocationType::BUFFER_HOST_MEMORY) {
598+
EXPECT_TRUE(pKernel->isAuxTranslationRequired());
599+
} else {
600+
EXPECT_FALSE(pKernel->isAuxTranslationRequired());
601+
}
602+
603+
pKernel->clearUnifiedMemoryExecInfo();
604+
pKernel->setAuxTranslationRequired(false);
605+
}
606+
}
607+
464608
class KernelArgBufferFixtureBindless : public KernelArgBufferFixture {
465609
public:
466610
void SetUp() {

0 commit comments

Comments
 (0)