Skip to content

Commit aed3fad

Browse files
Revert "Use Eu per dss to callculate max work group size"
This reverts commit 86b12dd23979db12e1898013c9162cb7106e40f1. Signed-off-by: Maciej Plewka <[email protected]>
1 parent 55374d4 commit aed3fad

File tree

8 files changed

+8
-35
lines changed

8 files changed

+8
-35
lines changed

opencl/source/kernel/kernel.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,7 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &c
7777
program->retainForKernel();
7878
imageTransformer.reset(new ImageTransformer);
7979
if (kernelInfoArg.kernelDescriptor.kernelAttributes.simdSize == 1u) {
80-
auto deviceInfo = getDevice().getDevice().getDeviceInfo();
81-
maxKernelWorkGroupSize = HwHelper::get(getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroupInDSSOrSS(getHardwareInfo(), static_cast<uint32_t>(deviceInfo.maxNumEUsPerSubSlice), static_cast<uint32_t>(deviceInfo.maxNumEUsPerDualSubSlice));
80+
maxKernelWorkGroupSize = HwHelper::get(getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroup(getHardwareInfo(), static_cast<uint32_t>(getDevice().getDevice().getDeviceInfo().maxNumEUsPerSubSlice));
8281
} else {
8382
maxKernelWorkGroupSize = static_cast<uint32_t>(clDevice.getSharedDeviceInfo().maxWorkGroupSize);
8483
}

opencl/test/unit_test/device/device_caps_tests.cpp

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1685,20 +1685,3 @@ HWTEST_F(QueueFamilyNameTest, givenBcsWhenGettingQueueFamilyNameThenReturnProper
16851685
HWTEST_F(QueueFamilyNameTest, givenInvalidEngineGroupWhenGettingQueueFamilyNameThenReturnEmptyName) {
16861686
verify(EngineGroupType::MaxEngineGroups, "");
16871687
}
1688-
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceGetCapsTest, givenSysInfoWhenDeviceCreatedThenMaxWorkGroupCalculatedCorrectly) {
1689-
HardwareInfo myHwInfo = *defaultHwInfo;
1690-
GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;
1691-
PLATFORM &myPlatform = myHwInfo.platform;
1692-
1693-
mySysInfo.EUCount = 16;
1694-
mySysInfo.SubSliceCount = 4;
1695-
mySysInfo.DualSubSliceCount = 2;
1696-
mySysInfo.ThreadCount = 16 * 8;
1697-
myPlatform.usRevId = 0x4;
1698-
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
1699-
auto minSimd = 8;
1700-
1701-
auto expectedWG = (mySysInfo.ThreadCount / mySysInfo.EUCount) * (mySysInfo.EUCount / mySysInfo.SubSliceCount) * minSimd;
1702-
1703-
EXPECT_EQ(expectedWG, device->sharedDeviceInfo.maxWorkGroupSize);
1704-
}

opencl/test/unit_test/kernel/kernel_tests.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2515,7 +2515,6 @@ HWTEST_F(KernelTest, givenKernelWhenDebugFlagToUseMaxSimdForCalculationsIsUsedTh
25152515

25162516
mySysInfo.EUCount = 24;
25172517
mySysInfo.SubSliceCount = 3;
2518-
mySysInfo.DualSubSliceCount = 3;
25192518
mySysInfo.ThreadCount = 24 * 7;
25202519
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
25212520

@@ -3167,8 +3166,7 @@ TEST_F(KernelTests, givenKernelWithSimdEqual1WhenKernelCreatedThenMaxWorgGroupSi
31673166
std::unique_ptr<MockKernel> pKernel(new MockKernel(pProgram, *pKernelInfo, *pClDevice));
31683167

31693168
auto deviceMaxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize;
3170-
auto deviceInfo = pClDevice->getDevice().getDeviceInfo();
3171-
auto maxThreadsPerWG = HwHelper::get(pKernel->getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroupInDSSOrSS(pKernel->getHardwareInfo(), static_cast<uint32_t>(deviceInfo.maxNumEUsPerSubSlice), static_cast<uint32_t>(deviceInfo.maxNumEUsPerDualSubSlice));
3169+
auto maxThreadsPerWG = HwHelper::get(pKernel->getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroup(pKernel->getHardwareInfo(), static_cast<uint32_t>(pClDevice->getDevice().getDeviceInfo().maxNumEUsPerSubSlice));
31723170

31733171
EXPECT_LT(pKernel->getMaxKernelWorkGroupSize(), deviceMaxWorkGroupSize);
31743172
EXPECT_EQ(pKernel->getMaxKernelWorkGroupSize(), maxThreadsPerWG);

shared/source/device/device_caps.cpp

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -107,14 +107,9 @@ void Device::initializeCaps() {
107107
deviceInfo.maxNumEUsPerSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.ftrPooledEuEnabled == 0)
108108
? (systemInfo.EUCount / systemInfo.SubSliceCount)
109109
: systemInfo.EuCountPerPoolMin;
110-
111-
deviceInfo.maxNumEUsPerDualSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.ftrPooledEuEnabled == 0)
112-
? (systemInfo.EUCount / systemInfo.DualSubSliceCount)
113-
: systemInfo.EuCountPerPoolMin;
114-
115110
deviceInfo.numThreadsPerEU = systemInfo.ThreadCount / systemInfo.EUCount;
116111
deviceInfo.threadsPerEUConfigs = hwHelper.getThreadsPerEUConfigs();
117-
auto maxWS = hwHelper.getMaxThreadsForWorkgroupInDSSOrSS(hwInfo, static_cast<uint32_t>(deviceInfo.maxNumEUsPerSubSlice), static_cast<uint32_t>(deviceInfo.maxNumEUsPerDualSubSlice)) * simdSizeUsed;
112+
auto maxWS = hwHelper.getMaxThreadsForWorkgroup(hwInfo, static_cast<uint32_t>(deviceInfo.maxNumEUsPerSubSlice)) * simdSizeUsed;
118113

119114
maxWS = Math::prevPowerOfTwo(maxWS);
120115
deviceInfo.maxWorkGroupSize = std::min(maxWS, 1024u);

shared/source/device/device_info.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ struct DeviceInfo {
2626
size_t imageMaxArraySize;
2727
size_t imageMaxBufferSize;
2828
size_t maxNumEUsPerSubSlice;
29-
size_t maxNumEUsPerDualSubSlice;
3029
size_t maxParameterSize;
3130
size_t maxWorkGroupSize;
3231
size_t maxWorkItemSizes[3];

shared/source/helpers/hw_helper.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ class HwHelper {
9797
virtual std::string getExtensions() const = 0;
9898
static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo);
9999
virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const;
100-
virtual uint32_t getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const = 0;
101100
virtual uint32_t getMetricsLibraryGenId() const = 0;
102101
virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
103102
virtual bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0;
@@ -212,7 +211,7 @@ class HwHelperHw : public HwHelper {
212211

213212
size_t getPaddingForISAAllocation() const override;
214213

215-
uint32_t getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const override;
214+
uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const override;
216215

217216
uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const override;
218217

shared/source/helpers/hw_helper_bdw_plus.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ uint32_t HwHelperHw<GfxFamily>::getPlanarYuvMaxHeight() const {
111111
}
112112

113113
template <typename GfxFamily>
114-
uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const {
114+
uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
115115
return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice);
116116
}
117117

shared/source/helpers/hw_helper_xehp_plus.inl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,11 +194,11 @@ inline bool HwHelperHw<GfxFamily>::preferSmallWorkgroupSizeForKernel(const size_
194194
}
195195

196196
template <typename GfxFamily>
197-
inline uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const {
197+
inline uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
198198
if (isWorkaroundRequired(REVISION_A0, REVISION_B, hwInfo)) {
199-
return std::min(HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerDualSubSlice), 64u);
199+
return std::min(HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice), 64u);
200200
}
201-
return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerDualSubSlice);
201+
return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice);
202202
}
203203

204204
} // namespace NEO

0 commit comments

Comments
 (0)