Skip to content

Commit 7c54369

Browse files
magnumrippersolardiz
authored andcommitted
OpenCL: Chug along after seeing profiling errors during auto-tune
If profiling errors seen during LWS/GWS tests, silently ignore that run's timings and continue. Closes openwall#4044
1 parent 92852ae commit 7c54369

File tree

1 file changed

+52
-37
lines changed

1 file changed

+52
-37
lines changed

src/opencl_common.c

+52-37
Original file line numberDiff line numberDiff line change
@@ -1544,8 +1544,12 @@ static cl_ulong gws_test(size_t gws, unsigned int rounds, int sequential_id)
15441544

15451545
int prof_bug = 0;
15461546

1547-
HANDLE_CLERROR(clWaitForEvents(1, multi_profilingEvent[i]),
1548-
"clWaitForEvents");
1547+
if (clWaitForEvents(1, multi_profilingEvent[i]) != CL_SUCCESS) {
1548+
if (options.verbosity > VERB_LEGACY)
1549+
fprintf(stderr, "Profiling errors; Skipping results\n");
1550+
return 0;
1551+
}
1552+
15491553
HANDLE_CLERROR(clGetEventProfilingInfo(*multi_profilingEvent[i],
15501554
CL_PROFILING_COMMAND_SUBMIT,
15511555
sizeof(cl_ulong), &submitTime,
@@ -1731,41 +1735,47 @@ void opencl_find_best_lws(size_t group_size_limit, int sequential_id,
17311735
self->methods.cmp_all(binary, result);
17321736
uint64_t wc_end = john_get_nano();
17331737

1734-
HANDLE_CLERROR(clWaitForEvents(1, &benchEvent[main_opencl_event]),
1735-
"clWaitForEvents");
1736-
HANDLE_CLERROR(clFinish(queue[sequential_id]), "clFinish");
1737-
HANDLE_CLERROR(clGetEventProfilingInfo(benchEvent[main_opencl_event],
1738-
CL_PROFILING_COMMAND_SUBMIT,
1739-
sizeof(cl_ulong), &submitTime, NULL),
1740-
"clGetEventProfilingInfo submit");
1741-
HANDLE_CLERROR(clGetEventProfilingInfo(benchEvent[main_opencl_event],
1742-
CL_PROFILING_COMMAND_START,
1743-
sizeof(cl_ulong), &startTime, NULL),
1744-
"clGetEventProfilingInfo start");
1745-
HANDLE_CLERROR(clGetEventProfilingInfo(benchEvent[main_opencl_event],
1746-
CL_PROFILING_COMMAND_END,
1747-
sizeof(cl_ulong), &endTime, NULL),
1748-
"clGetEventProfilingInfo end");
1749-
1750-
/*
1751-
* Work around driver bugs. Problems seen with old AMD and Apple M1.
1752-
* If startTime looks b0rken we use submitTime instead
1753-
*/
1754-
if ((endTime - submitTime) > 10 * (endTime - startTime)) {
1755-
if (options.verbosity > VERB_LEGACY)
1756-
fprintf(stderr, "Note: Profiling timers seem buggy\n");
1757-
startTime = submitTime;
1758-
}
1759-
/*
1760-
* For numloops enumeration, we even double-check with wall clock time
1761-
* and if it drastically differs from the profile timer, use the former
1762-
* so we don't end up with a huge numloops where inappropriate.
1763-
*/
1764-
if ((wc_end - wc_start) > 10 * (endTime - startTime)) {
1738+
if ((clWaitForEvents(1, &benchEvent[main_opencl_event]) != CL_SUCCESS) ||
1739+
(clFinish(queue[sequential_id]) != CL_SUCCESS)) {
17651740
if (options.verbosity > VERB_LEGACY)
1766-
fprintf(stderr, "Note: Profiling timers seem to be way off\n");
1741+
fprintf(stderr, "Profiling errors; Using wall-clock time instead\n");
17671742
startTime = wc_start;
17681743
endTime = wc_end;
1744+
} else {
1745+
HANDLE_CLERROR(clGetEventProfilingInfo(benchEvent[main_opencl_event],
1746+
CL_PROFILING_COMMAND_SUBMIT,
1747+
sizeof(cl_ulong), &submitTime, NULL),
1748+
"clGetEventProfilingInfo submit");
1749+
HANDLE_CLERROR(clGetEventProfilingInfo(benchEvent[main_opencl_event],
1750+
CL_PROFILING_COMMAND_START,
1751+
sizeof(cl_ulong), &startTime, NULL),
1752+
"clGetEventProfilingInfo start");
1753+
HANDLE_CLERROR(clGetEventProfilingInfo(benchEvent[main_opencl_event],
1754+
CL_PROFILING_COMMAND_END,
1755+
sizeof(cl_ulong), &endTime, NULL),
1756+
"clGetEventProfilingInfo end");
1757+
1758+
/*
1759+
* Work around driver bugs. Problems seen with old AMD and Apple M1.
1760+
* If startTime looks b0rken we use submitTime instead
1761+
*/
1762+
if ((endTime - submitTime) > 10 * (endTime - startTime)) {
1763+
if (options.verbosity > VERB_LEGACY)
1764+
fprintf(stderr, "Note: Profiling timers seem buggy\n");
1765+
startTime = submitTime;
1766+
}
1767+
1768+
/*
1769+
* For numloops enumeration, we even double-check with wall clock time
1770+
* and if it drastically differs from the profile timer, use the former
1771+
* so we don't end up with a huge numloops where inappropriate.
1772+
*/
1773+
if ((wc_end - wc_start) > 10 * (endTime - startTime)) {
1774+
if (options.verbosity > VERB_LEGACY)
1775+
fprintf(stderr, "Note: Profiling timers seem to be way off\n");
1776+
startTime = wc_start;
1777+
endTime = wc_end;
1778+
}
17691779
}
17701780

17711781
cl_ulong roundup = endTime - startTime - 1;
@@ -1815,9 +1825,14 @@ void opencl_find_best_lws(size_t group_size_limit, int sequential_id,
18151825
break;
18161826
}
18171827

1818-
HANDLE_CLERROR(clWaitForEvents(1, &benchEvent[main_opencl_event]),
1819-
"clWaitForEvents");
1820-
HANDLE_CLERROR(clFinish(queue[sequential_id]), "clFinish");
1828+
if ((clWaitForEvents(1, &benchEvent[main_opencl_event]) != CL_SUCCESS) ||
1829+
(clFinish(queue[sequential_id]) != CL_SUCCESS)) {
1830+
if (options.verbosity > VERB_LEGACY)
1831+
fprintf(stderr, "Profiling errors; Skipping results\n");
1832+
startTime = endTime = 0;
1833+
break;
1834+
}
1835+
18211836
HANDLE_CLERROR(clGetEventProfilingInfo(benchEvent
18221837
[main_opencl_event], CL_PROFILING_COMMAND_SUBMIT,
18231838
sizeof(cl_ulong), &submitTime, NULL),

0 commit comments

Comments
 (0)