Skip to content

Commit ff382db

Browse files
committed
OpenCL: Self-test with default LWS at device maximum
Also with a GWS of 2x that LWS. The new figures are better at triggering bugs. If a kernel needs a lower LWS than device max. our code already handles that. We previously had it as LWS=7 GWS=49 for speed (and for checking heuristics that could bug out on non-log2 values) but that was introduced before our autotune was sped up with orders of magnitude and those heuristics has been stable for many years. Like before, the self-test will obey any give lws/gws options or environment variables. Closes #5822
1 parent 615af59 commit ff382db

File tree

2 files changed

+14
-13
lines changed

2 files changed

+14
-13
lines changed

src/opencl_autotune.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,12 @@ static void autotune_run_extra(struct fmt_main *self, unsigned int rounds,
112112
if (options.flags & FLG_SHOW_CHK)
113113
return;
114114

115-
// FIXME add optional test-same-sizes
116115
if (self_test_running) {
117-
local_work_size = 7;
118-
global_work_size = 49;
116+
if (cpu(device_info[gpu_id]))
117+
local_work_size = get_platform_vendor_id(platform_id) == DEV_INTEL ? 8 : 1;
118+
else
119+
local_work_size = get_device_max_lws(gpu_id);
120+
global_work_size = local_work_size;
119121
}
120122

121123
ocl_autotune_running = 1;

src/opencl_common.c

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,22 +1099,21 @@ void opencl_get_user_preferences(const char *format)
10991099

11001100
void opencl_get_sane_lws_gws_values()
11011101
{
1102-
if (self_test_running) {
1103-
local_work_size = 7;
1104-
global_work_size = 49;
1105-
}
11061102

1107-
if (!local_work_size) {
1103+
if (!local_work_size || self_test_running) {
11081104
if (cpu(device_info[gpu_id]))
11091105
local_work_size =
1110-
get_platform_vendor_id(platform_id) == DEV_INTEL ?
1111-
8 : 1;
1106+
get_platform_vendor_id(platform_id) == DEV_INTEL ? 8 : 1;
1107+
else if (self_test_running)
1108+
local_work_size = get_device_max_lws(gpu_id);
11121109
else
1113-
local_work_size = 64;
1110+
local_work_size = 2 * get_device_warp_size(gpu_id);
11141111
}
11151112

1116-
if (!global_work_size)
1117-
global_work_size = 768;
1113+
if (self_test_running)
1114+
global_work_size = local_work_size;
1115+
else if (!global_work_size)
1116+
global_work_size = 12 * local_work_size;
11181117
}
11191118

11201119
char* get_device_name_(int sequential_id)

0 commit comments

Comments
 (0)