Skip to content

Commit ce8c44c

Browse files
committed
Add check for local work group size in clEnqueueNDRangeKernel call.
- Incoming local work group size cannot exceed device capabilities. Change-Id: I89a7503155c71443e3ebc630debb5d5b466c6cb5
1 parent 5ed13d7 commit ce8c44c

File tree

6 files changed

+34
-18
lines changed

6 files changed

+34
-18
lines changed

runtime/command_queue/enqueue_kernel.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
7171
}
7272

7373
size_t remainder = 0;
74+
size_t totalWorkItems = 1u;
7475
const size_t *localWkgSizeToPass = localWorkSizeIn ? workGroupSize : nullptr;
7576

7677
for (auto i = 0u; i < workDim; i++) {
@@ -86,6 +87,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
8687
}
8788
}
8889
workGroupSize[i] = localWorkSizeIn[i];
90+
totalWorkItems *= localWorkSizeIn[i];
8991
}
9092

9193
remainder += region[i] % workGroupSize[i];
@@ -126,6 +128,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
126128
",", globalWorkSizeIn[2],
127129
",SIMD:, ", kernel.getKernelInfo().getMaxSimdSize());
128130

131+
if (totalWorkItems > this->getDevice().getDeviceInfo().maxWorkGroupSize) {
132+
return CL_INVALID_WORK_GROUP_SIZE;
133+
}
134+
129135
enqueueHandler<CL_COMMAND_NDRANGE_KERNEL>(
130136
surfaces,
131137
false,

unit_tests/command_queue/enqueue_kernel_local_work_size_tests.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, Intel Corporation
2+
* Copyright (c) 2017 - 2018, Intel Corporation
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the "Software"),
@@ -78,20 +78,20 @@ TEST_F(EnqueueKernelRequiredWorkSize, unspecifiedWorkGroupSize) {
7878

7979
EXPECT_EQ(CL_SUCCESS, retVal);
8080

81-
EXPECT_EQ(*pKernel->localWorkSizeX, 16u);
82-
EXPECT_EQ(*pKernel->localWorkSizeY, 8u);
81+
EXPECT_EQ(*pKernel->localWorkSizeX, 8u);
82+
EXPECT_EQ(*pKernel->localWorkSizeY, 4u);
8383
EXPECT_EQ(*pKernel->localWorkSizeZ, 4u);
8484

85-
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 16u);
86-
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 8u);
85+
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u);
86+
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 4u);
8787
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 4u);
8888
}
8989

9090
// Fully specified
9191
TEST_F(EnqueueKernelRequiredWorkSize, matchingRequiredWorkGroupSize) {
9292
size_t globalWorkOffset[3] = {0, 0, 0};
9393
size_t globalWorkSize[3] = {32, 32, 32};
94-
size_t localWorkSize[3] = {16, 8, 4};
94+
size_t localWorkSize[3] = {8, 4, 4};
9595

9696
auto retVal = pCmdQ->enqueueKernel(
9797
pKernel,
@@ -105,20 +105,20 @@ TEST_F(EnqueueKernelRequiredWorkSize, matchingRequiredWorkGroupSize) {
105105

106106
EXPECT_EQ(CL_SUCCESS, retVal);
107107

108-
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 16u);
109-
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 8u);
108+
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeX, 8u);
109+
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeY, 4u);
110110
EXPECT_EQ(*pKernel->enqueuedLocalWorkSizeZ, 4u);
111111

112-
EXPECT_EQ(*pKernel->localWorkSizeX, 16u);
113-
EXPECT_EQ(*pKernel->localWorkSizeY, 8u);
112+
EXPECT_EQ(*pKernel->localWorkSizeX, 8u);
113+
EXPECT_EQ(*pKernel->localWorkSizeY, 4u);
114114
EXPECT_EQ(*pKernel->localWorkSizeZ, 4u);
115115
}
116116

117117
// Underspecified. Won't permit.
118118
TEST_F(EnqueueKernelRequiredWorkSize, givenKernelRequiringLocalWorkgroupSizeWhen1DimensionIsPassedThatIsCorrectThenNdRangeIsSuccesful) {
119119
size_t globalWorkOffset[1] = {0};
120120
size_t globalWorkSize[1] = {32};
121-
size_t localWorkSize[1] = {16};
121+
size_t localWorkSize[1] = {8};
122122

123123
auto retVal = pCmdQ->enqueueKernel(
124124
pKernel,

unit_tests/command_queue/enqueue_kernel_tests.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1547,3 +1547,13 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreNotAndEventExistSetThenClEnqu
15471547

15481548
clReleaseCommandQueue(pCmdQ2);
15491549
}
1550+
1551+
TEST_F(EnqueueKernelTest, givenEnqueueCommandThatLwsExceedsDeviceCapabilitiesWhenEnqueueNDRangeKernelIsCalledThenErrorIsReturned) {
1552+
auto maxWorkgroupSize = pDevice->getDeviceInfo().maxWorkGroupSize;
1553+
size_t globalWorkSize[3] = {maxWorkgroupSize * 2, 1, 1};
1554+
size_t localWorkSize[3] = {maxWorkgroupSize * 2, 1, 1};
1555+
MockKernelWithInternals mockKernel(*pDevice);
1556+
1557+
auto status = pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
1558+
EXPECT_EQ(CL_INVALID_WORK_GROUP_SIZE, status);
1559+
}

unit_tests/context/driver_diagnostics_enqueue_tests.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, Intel Corporation
2+
* Copyright (c) 2017 - 2018, Intel Corporation
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the "Software"),
@@ -764,10 +764,10 @@ TEST_P(PerformanceHintEnqueueKernelBadSizeTest, GivenBadLocalWorkGroupSizeWhenEn
764764
}
765765

766766
badSizeDimension = GetParam();
767-
if (badSizeDimension == 0) {
767+
if (localWorkGroupSize[badSizeDimension] > 1) {
768768
localWorkGroupSize[badSizeDimension] /= 2;
769769
} else {
770-
localWorkGroupSize[badSizeDimension] *= 2;
770+
localWorkGroupSize[0] /= 2;
771771
}
772772

773773
retVal = pCmdQ->enqueueKernel(kernel, 3, nullptr, globalWorkGroupSize, localWorkGroupSize, 0, nullptr, nullptr);

unit_tests/program/program_nonuniform.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, Intel Corporation
2+
* Copyright (c) 2017 - 2018, Intel Corporation
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the "Software"),
@@ -240,7 +240,7 @@ TEST_F(ProgramNonUniformTest, ExecuteKernelNonUniform21) {
240240
ASSERT_NE(nullptr, pKernel);
241241

242242
size_t globalWorkSize[3] = {12, 12, 12};
243-
size_t localWorkSize[3] = {11, 12, 12};
243+
size_t localWorkSize[3] = {11, 12, 1};
244244

245245
retVal = pCmdQ->enqueueKernel(
246246
pKernel,

unit_tests/test_files/required_work_group.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2017, Intel Corporation
2+
* Copyright (c) 2017 - 2018, Intel Corporation
33
*
44
* Permission is hereby granted, free of charge, to any person obtaining a
55
* copy of this software and associated documentation files (the "Software"),
@@ -20,7 +20,7 @@
2020
* OTHER DEALINGS IN THE SOFTWARE.
2121
*/
2222

23-
__kernel __attribute__((reqd_work_group_size(16, 8, 4)))
23+
__kernel __attribute__((reqd_work_group_size(8, 4, 4)))
2424
void CopyBuffer(
2525
__global unsigned int *src,
2626
__global unsigned int *dst)

0 commit comments

Comments
 (0)