@@ -1544,8 +1544,12 @@ static cl_ulong gws_test(size_t gws, unsigned int rounds, int sequential_id)
1544
1544
1545
1545
int prof_bug = 0 ;
1546
1546
1547
- HANDLE_CLERROR (clWaitForEvents (1 , multi_profilingEvent [i ]),
1548
- "clWaitForEvents" );
1547
+ if (clWaitForEvents (1 , multi_profilingEvent [i ]) != CL_SUCCESS ) {
1548
+ if (options .verbosity > VERB_LEGACY )
1549
+ fprintf (stderr , "Profiling errors; Skipping results\n" );
1550
+ return 0 ;
1551
+ }
1552
+
1549
1553
HANDLE_CLERROR (clGetEventProfilingInfo (* multi_profilingEvent [i ],
1550
1554
CL_PROFILING_COMMAND_SUBMIT ,
1551
1555
sizeof (cl_ulong ), & submitTime ,
@@ -1731,41 +1735,47 @@ void opencl_find_best_lws(size_t group_size_limit, int sequential_id,
1731
1735
self -> methods .cmp_all (binary , result );
1732
1736
uint64_t wc_end = john_get_nano ();
1733
1737
1734
- HANDLE_CLERROR (clWaitForEvents (1 , & benchEvent [main_opencl_event ]),
1735
- "clWaitForEvents" );
1736
- HANDLE_CLERROR (clFinish (queue [sequential_id ]), "clFinish" );
1737
- HANDLE_CLERROR (clGetEventProfilingInfo (benchEvent [main_opencl_event ],
1738
- CL_PROFILING_COMMAND_SUBMIT ,
1739
- sizeof (cl_ulong ), & submitTime , NULL ),
1740
- "clGetEventProfilingInfo submit" );
1741
- HANDLE_CLERROR (clGetEventProfilingInfo (benchEvent [main_opencl_event ],
1742
- CL_PROFILING_COMMAND_START ,
1743
- sizeof (cl_ulong ), & startTime , NULL ),
1744
- "clGetEventProfilingInfo start" );
1745
- HANDLE_CLERROR (clGetEventProfilingInfo (benchEvent [main_opencl_event ],
1746
- CL_PROFILING_COMMAND_END ,
1747
- sizeof (cl_ulong ), & endTime , NULL ),
1748
- "clGetEventProfilingInfo end" );
1749
-
1750
- /*
1751
- * Work around driver bugs. Problems seen with old AMD and Apple M1.
1752
- * If startTime looks b0rken we use submitTime instead
1753
- */
1754
- if ((endTime - submitTime ) > 10 * (endTime - startTime )) {
1755
- if (options .verbosity > VERB_LEGACY )
1756
- fprintf (stderr , "Note: Profiling timers seem buggy\n" );
1757
- startTime = submitTime ;
1758
- }
1759
- /*
1760
- * For numloops enumeration, we even double-check with wall clock time
1761
- * and if it drastically differs from the profile timer, use the former
1762
- * so we don't end up with a huge numloops where inappropriate.
1763
- */
1764
- if ((wc_end - wc_start ) > 10 * (endTime - startTime )) {
1738
+ if ((clWaitForEvents (1 , & benchEvent [main_opencl_event ]) != CL_SUCCESS ) ||
1739
+ (clFinish (queue [sequential_id ]) != CL_SUCCESS )) {
1765
1740
if (options .verbosity > VERB_LEGACY )
1766
- fprintf (stderr , "Note: Profiling timers seem to be way off \n" );
1741
+ fprintf (stderr , "Profiling errors; Using wall-clock time instead \n" );
1767
1742
startTime = wc_start ;
1768
1743
endTime = wc_end ;
1744
+ } else {
1745
+ HANDLE_CLERROR (clGetEventProfilingInfo (benchEvent [main_opencl_event ],
1746
+ CL_PROFILING_COMMAND_SUBMIT ,
1747
+ sizeof (cl_ulong ), & submitTime , NULL ),
1748
+ "clGetEventProfilingInfo submit" );
1749
+ HANDLE_CLERROR (clGetEventProfilingInfo (benchEvent [main_opencl_event ],
1750
+ CL_PROFILING_COMMAND_START ,
1751
+ sizeof (cl_ulong ), & startTime , NULL ),
1752
+ "clGetEventProfilingInfo start" );
1753
+ HANDLE_CLERROR (clGetEventProfilingInfo (benchEvent [main_opencl_event ],
1754
+ CL_PROFILING_COMMAND_END ,
1755
+ sizeof (cl_ulong ), & endTime , NULL ),
1756
+ "clGetEventProfilingInfo end" );
1757
+
1758
+ /*
1759
+ * Work around driver bugs. Problems seen with old AMD and Apple M1.
1760
+ * If startTime looks b0rken we use submitTime instead
1761
+ */
1762
+ if ((endTime - submitTime ) > 10 * (endTime - startTime )) {
1763
+ if (options .verbosity > VERB_LEGACY )
1764
+ fprintf (stderr , "Note: Profiling timers seem buggy\n" );
1765
+ startTime = submitTime ;
1766
+ }
1767
+
1768
+ /*
1769
+ * For numloops enumeration, we even double-check with wall clock time
1770
+ * and if it drastically differs from the profile timer, use the former
1771
+ * so we don't end up with a huge numloops where inappropriate.
1772
+ */
1773
+ if ((wc_end - wc_start ) > 10 * (endTime - startTime )) {
1774
+ if (options .verbosity > VERB_LEGACY )
1775
+ fprintf (stderr , "Note: Profiling timers seem to be way off\n" );
1776
+ startTime = wc_start ;
1777
+ endTime = wc_end ;
1778
+ }
1769
1779
}
1770
1780
1771
1781
cl_ulong roundup = endTime - startTime - 1 ;
@@ -1815,9 +1825,14 @@ void opencl_find_best_lws(size_t group_size_limit, int sequential_id,
1815
1825
break ;
1816
1826
}
1817
1827
1818
- HANDLE_CLERROR (clWaitForEvents (1 , & benchEvent [main_opencl_event ]),
1819
- "clWaitForEvents" );
1820
- HANDLE_CLERROR (clFinish (queue [sequential_id ]), "clFinish" );
1828
+ if ((clWaitForEvents (1 , & benchEvent [main_opencl_event ]) != CL_SUCCESS ) ||
1829
+ (clFinish (queue [sequential_id ]) != CL_SUCCESS )) {
1830
+ if (options .verbosity > VERB_LEGACY )
1831
+ fprintf (stderr , "Profiling errors; Skipping results\n" );
1832
+ startTime = endTime = 0 ;
1833
+ break ;
1834
+ }
1835
+
1821
1836
HANDLE_CLERROR (clGetEventProfilingInfo (benchEvent
1822
1837
[main_opencl_event ], CL_PROFILING_COMMAND_SUBMIT ,
1823
1838
sizeof (cl_ulong ), & submitTime , NULL ),
0 commit comments