Skip to content

Commit

Permalink
Change AccCpuTbbBlocks object back to being local for each thread
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelVarvarin committed Nov 17, 2024
1 parent 215a292 commit c95b136
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 9 deletions.
8 changes: 6 additions & 2 deletions include/alpaka/acc/AccCpuTbbBlocks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

// Implementation details.
#include "alpaka/acc/Tag.hpp"
#include "alpaka/core/BarrierTbb.h"
#include "alpaka/core/ClipCast.hpp"
#include "alpaka/core/Interface.hpp"
#include "alpaka/dev/DevCpu.hpp"
Expand Down Expand Up @@ -90,13 +91,16 @@ namespace alpaka

private:
template<typename TWorkDiv>
ALPAKA_FN_HOST AccCpuTbbBlocks(TWorkDiv const& workDiv, std::size_t const& blockSharedMemDynSizeBytes)
ALPAKA_FN_HOST AccCpuTbbBlocks(
TWorkDiv const& workDiv,
std::size_t const& blockSharedMemDynSizeBytes,
core::tbb::BarrierThread<TIdx>& barrier)
: WorkDivMembers<TDim, TIdx>(workDiv)
, gb::IdxGbRef<TDim, TIdx>(m_gridBlockIdx)
, BlockSharedMemDynMember<>(blockSharedMemDynSizeBytes)
, BlockSharedMemStMember<>(staticMemBegin(), staticMemCapacity())
, GridSyncBarrierTbb<TIdx>(barrier)
, m_gridBlockIdx(Vec<TDim, TIdx>::zeros())
, GridSyncBarrierTbb<TIdx>(getWorkDiv<Grid, Threads>(workDiv).prod())
{
}

Expand Down
10 changes: 6 additions & 4 deletions include/alpaka/grid/GridSyncCpuTbbBlocks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

#pragma once

#include "alpaka/core/BarrierTbb.h"
#include "alpaka/core/Common.hpp"
#include "alpaka/grid/Traits.hpp"

#ifdef ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED
# include "alpaka/core/BarrierTbb.h"

namespace alpaka
{
Expand All @@ -19,11 +19,13 @@ namespace alpaka
public:
using Barrier = core::tbb::BarrierThread<TIdx>;

ALPAKA_FN_HOST explicit GridSyncBarrierTbb(TIdx const& gridThreadCount) : m_barrier(gridThreadCount)
// Get reference to the barrier from the outside because we need it to be shared between blocks
ALPAKA_FN_HOST explicit GridSyncBarrierTbb(Barrier& barrier)
{
m_barrier = &barrier;
}

Barrier mutable m_barrier;
Barrier* m_barrier;
};

namespace trait
Expand All @@ -33,7 +35,7 @@ namespace alpaka
{
ALPAKA_FN_HOST static auto syncGridThreads(GridSyncBarrierTbb<TIdx> const& gridSync) -> void
{
gridSync.m_barrier.wait();
gridSync.m_barrier->wait();
}
};

Expand Down
11 changes: 8 additions & 3 deletions include/alpaka/kernel/TaskKernelCpuTbbBlocks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,20 @@ namespace alpaka
tbb::this_task_arena::isolate(
[&]
{
AccCpuTbbBlocks<TDim, TIdx> acc(
*static_cast<WorkDivMembers<TDim, TIdx> const*>(this),
blockSharedMemDynSizeBytes);
// Create a shared barrier for grid sync, which will be passed by reference to each thread to
// achieve shared state
core::tbb::BarrierThread<TIdx> barrier(numBlocksInGrid);

tbb::parallel_for(
static_cast<TIdx>(0),
static_cast<TIdx>(numBlocksInGrid),
[&](TIdx i)
{
AccCpuTbbBlocks<TDim, TIdx> acc(
*static_cast<WorkDivMembers<TDim, TIdx> const*>(this),
blockSharedMemDynSizeBytes,
barrier);

acc.m_gridBlockIdx
= mapIdx<TDim::value>(Vec<DimInt<1u>, TIdx>(static_cast<TIdx>(i)), gridBlockExtent);

Expand Down

0 comments on commit c95b136

Please sign in to comment.