Skip to content

Commit

Permalink
Add gridSyncTest to test syncGridThreads()
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelVarvarin committed Nov 17, 2024
1 parent 53e51c2 commit 0ff0779
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 2 deletions.
12 changes: 10 additions & 2 deletions include/alpaka/test/KernelExecutionFixture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
namespace alpaka::test
{
//! The fixture for executing a kernel on a given accelerator.
template<typename TAcc>
template<typename TAcc, bool TCooperative = false>
class KernelExecutionFixture
{
public:
Expand Down Expand Up @@ -80,7 +80,15 @@ namespace alpaka::test
getPtrNative(bufAccResult),
std::forward<TArgs>(args)...);

exec<Acc>(m_queue, m_workDiv, kernelFnObj, getPtrNative(bufAccResult), std::forward<TArgs>(args)...);
if constexpr(TCooperative)
execCooperative<Acc>(
m_queue,
m_workDiv,
kernelFnObj,
getPtrNative(bufAccResult),
std::forward<TArgs>(args)...);
else
exec<Acc>(m_queue, m_workDiv, kernelFnObj, getPtrNative(bufAccResult), std::forward<TArgs>(args)...);

// Copy the result value to the host
auto bufHostResult = allocBuf<bool, Idx>(m_devHost, static_cast<Idx>(1u));
Expand Down
1 change: 1 addition & 0 deletions test/unit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ add_subdirectory("block/sync/")
add_subdirectory("core/")
add_subdirectory("dev/")
add_subdirectory("event/")
add_subdirectory("grid/")
add_subdirectory("idx/")
add_subdirectory("intrinsic/")
add_subdirectory("kernel/")
Expand Down
20 changes: 20 additions & 0 deletions test/unit/grid/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#
# Copyright 2024 Mykhailo Varvarin
# SPDX-License-Identifier: MPL-2.0
#

set(_TARGET_NAME "gridSyncTest")

append_recursive_files_add_to_src_group("src/" "src/" "cpp" _FILES_SOURCE)

alpaka_add_executable(
${_TARGET_NAME}
${_FILES_SOURCE})
target_link_libraries(
${_TARGET_NAME}
PRIVATE common)

set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER "test/unit")
target_compile_definitions(${_TARGET_NAME} PRIVATE "-DTEST_UNIT_GRID_SYNC")

add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME} ${_alpaka_TEST_OPTIONS})
86 changes: 86 additions & 0 deletions test/unit/grid/src/GridSync.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/* Copyright 2024 Mykhailo Varvarin
* SPDX-License-Identifier: MPL-2.0
*/

#include <alpaka/grid/Traits.hpp>
#include <alpaka/test/KernelExecutionFixture.hpp>
#include <alpaka/test/acc/TestAccs.hpp>

#include <catch2/catch_template_test_macros.hpp>
#include <catch2/catch_test_macros.hpp>

class GridSyncTestKernel
{
public:
static constexpr std::uint8_t blockThreadExtentPerDim()
{
return 2u;
}

ALPAKA_NO_HOST_ACC_WARNING
template<typename TAcc, typename T>
ALPAKA_FN_ACC auto operator()(TAcc const& acc, bool* success, T* array) const -> void
{
using Idx = alpaka::Idx<TAcc>;

// Get the index of the current thread within the grid and the grid extent and map them to 1D.
auto const gridThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc);
auto const gridThreadIdx1D = alpaka::mapIdx<1u>(gridThreadIdx, gridThreadExtent)[0u];
auto const gridThreadExtent1D = gridThreadExtent.prod();


// Write the thread index into the shared array.
array[gridThreadIdx1D] = static_cast<T>(gridThreadIdx1D);

// Synchronize the threads in the block.
alpaka::syncGridThreads(acc);

// All other threads within the block should now have written their index into the shared memory.
for(auto i = static_cast<Idx>(0u); i < gridThreadExtent1D; ++i)
{
ALPAKA_CHECK(*success, static_cast<Idx>(array[i]) == i);
}
}
};

TEMPLATE_LIST_TEST_CASE("synchronize", "[gridSync]", alpaka::test::TestAccs)
{
using Acc = TestType;
using Dim = alpaka::Dim<Acc>;
using Idx = alpaka::Idx<Acc>;

// Select the first device available on a system, for the chosen accelerator
auto const platformAcc = alpaka::Platform<Acc>{};
auto const devAcc = getDevByIdx(platformAcc, 0u);


auto const blockThreadExtentMax = alpaka::getAccDevProps<Acc>(devAcc).m_blockThreadExtentMax;
auto threadsPerBlock = alpaka::elementwise_min(
blockThreadExtentMax,
alpaka::Vec<Dim, Idx>::all(static_cast<Idx>(GridSyncTestKernel::blockThreadExtentPerDim())));

auto elementsPerThread = alpaka::Vec<Dim, Idx>::all(1);
auto blocksPerGrid = alpaka::Vec<Dim, Idx>::all(1);
blocksPerGrid[0] = 200;

// Allocate memory on the device.
alpaka::Vec<alpaka::DimInt<1>, Idx> bufferExtent{
blocksPerGrid.prod() * threadsPerBlock.prod() * elementsPerThread.prod()};
auto deviceMemory = alpaka::allocBuf<Idx, Idx>(devAcc, bufferExtent);

GridSyncTestKernel kernel;
int maxBlocks = alpaka::getMaxActiveBlocks<Acc>(
devAcc,
kernel,
threadsPerBlock,
elementsPerThread,
alpaka::getPtrNative(deviceMemory));

blocksPerGrid[0] = std::min(static_cast<Idx>(maxBlocks), blocksPerGrid[0]);
constexpr bool IsCooperative = true;
alpaka::test::KernelExecutionFixture<Acc, IsCooperative> fixture(
alpaka::WorkDivMembers<Dim, Idx>{blocksPerGrid, threadsPerBlock, elementsPerThread});

REQUIRE(fixture(kernel, alpaka::getPtrNative(deviceMemory)));
}

0 comments on commit 0ff0779

Please sign in to comment.