Skip to content

Commit ba4cebc

Browse files
committed
OpenCL: Self-test with default LWS at device maximum
Also with a GWS of 2x that LWS. The new figures are better at triggering bugs. If a kernel needs a lower LWS than device max. our code already handles that. We previously had it as LWS=7 GWS=49 for speed (and for checking heuristics that could bug out on non-log2 values) but that was introduced before our autotune was sped up with orders of magnitude and those heuristics has been stable for many years. Like before, the self-test will obey any give lws/gws options or environment variables. Closes #5822
1 parent 615af59 commit ba4cebc

File tree

2 files changed

+7
-11
lines changed

2 files changed

+7
-11
lines changed

src/opencl_autotune.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,9 @@ static void autotune_run_extra(struct fmt_main *self, unsigned int rounds,
112112
if (options.flags & FLG_SHOW_CHK)
113113
return;
114114

115-
// FIXME add optional test-same-sizes
116-
if (self_test_running) {
117-
local_work_size = 7;
118-
global_work_size = 49;
119-
}
115+
/* We could do with 2x get_device_warp_size() but that isn't faster */
116+
if (self_test_running)
117+
global_work_size = local_work_size = get_device_max_lws(gpu_id);
120118

121119
ocl_autotune_running = 1;
122120

src/opencl_common.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,22 +1099,20 @@ void opencl_get_user_preferences(const char *format)
10991099

11001100
void opencl_get_sane_lws_gws_values()
11011101
{
1102-
if (self_test_running) {
1103-
local_work_size = 7;
1104-
global_work_size = 49;
1105-
}
1102+
if (self_test_running)
1103+
global_work_size = local_work_size = get_device_max_lws(gpu_id);
11061104

11071105
if (!local_work_size) {
11081106
if (cpu(device_info[gpu_id]))
11091107
local_work_size =
11101108
get_platform_vendor_id(platform_id) == DEV_INTEL ?
11111109
8 : 1;
11121110
else
1113-
local_work_size = 64;
1111+
local_work_size = 2 * get_device_warp_size(gpu_id);
11141112
}
11151113

11161114
if (!global_work_size)
1117-
global_work_size = 768;
1115+
global_work_size = 12 * local_work_size;
11181116
}
11191117

11201118
char* get_device_name_(int sequential_id)

0 commit comments

Comments
 (0)