From 2dce71a9e57546f2089f45ad8f07147d4398879e Mon Sep 17 00:00:00 2001 From: Wayne Franz Date: Tue, 19 Mar 2024 12:30:46 -0400 Subject: [PATCH] Re-enable hipGraph version of device_adjacent_difference test (#534) Previously, we disabled the hipGraph version of the LargeIndices test for the device_adjacent_difference algorithm because it was failing on Navi3x and MI300. The root cause of this failure was a call to hipMemset that occasionally continued to run while the hipGraph was instantiated. I've now learned that this is expected behaviour - hipMemset runs asynchronously unless the pointer it is passed refers to pinned host memory. This is true even if the call to hipMemset is issued into the default stream, like it is in this case. hipMemset*Async*'s behaviour differs only with respect to pinned host memory (it's non-blocking in that case). This behaviour is not currently mentioned in the HIP documentation, so I'll put in a request to add it there. This change just inserts a call to hipDeviceSynchronize to ensure that the hipMemset call completes before the graph is instantiated. It also removes the GTEST_SKIP() call that disabled the hipGraph version of the test. --- test/rocprim/test_device_adjacent_difference.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/rocprim/test_device_adjacent_difference.cpp b/test/rocprim/test_device_adjacent_difference.cpp index 4776c3e20..197a170f7 100644 --- a/test/rocprim/test_device_adjacent_difference.cpp +++ b/test/rocprim/test_device_adjacent_difference.cpp @@ -460,9 +460,6 @@ TYPED_TEST_SUITE(RocprimDeviceAdjacentDifferenceLargeTests, TYPED_TEST(RocprimDeviceAdjacentDifferenceLargeTests, LargeIndices) { - if (TestFixture::use_graphs) - GTEST_SKIP() << "Temporarily skipping test within hipGraphs. Will re-enable when issues with atomics inside graphs are resolved."; - const int device_id = test_common_utils::obtain_device_from_ctest(); SCOPED_TRACE(testing::Message() << "with device_id = " << device_id); @@ -502,6 +499,10 @@ TYPED_TEST(RocprimDeviceAdjacentDifferenceLargeTests, LargeIndices) HIP_CHECK(test_common_utils::hipMallocHelper(&d_counter, sizeof(*d_counter))); HIP_CHECK(hipMemset(d_incorrect_flag, 0, sizeof(*d_incorrect_flag))); HIP_CHECK(hipMemset(d_counter, 0, sizeof(*d_counter))); + // Note: hipMemset runs asynchronously unless the pointer it's passed refers to pinned memory. + // Wait for it to complete here before we use the device variables we just set. + HIP_CHECK(hipDeviceSynchronize()); + OutputIterator output(d_incorrect_flag, d_counter); const auto input = rocprim::make_counting_iterator(T{0});