Skip to content

Commit

Permalink
Redefine pitches
Browse files Browse the repository at this point in the history
This PR shifts the values returned from getPitchesInBytes to be
consistent with std::mdspan (except in bytes).

Example: the pitch vector for the extent {42, 10, 2} changes:

Before: {4, 3360, 80, 8}
After: {80, 8, 4}

The new meaning is that the pitch value is the number of bytes to jump
from one element to the next in the given dimension.

Fixes: #2083
  • Loading branch information
bernhardmgruber authored and psychocoderHPC committed Sep 1, 2023
1 parent 4a18ebf commit 7e99f63
Show file tree
Hide file tree
Showing 17 changed files with 201 additions and 344 deletions.
8 changes: 4 additions & 4 deletions example/bufferCopy/src/bufferCopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,10 @@ auto main() -> int
// padding between rows/planes of multidimensional memory allocations.
// Therefore the pitch (distance between consecutive rows/planes) may be
// greater than the space required for the data.
Idx const deviceBuffer1Pitch(alpaka::getPitchesInBytes(deviceBuffer1)[2] / sizeof(Data));
Idx const deviceBuffer2Pitch(alpaka::getPitchesInBytes(deviceBuffer2)[2] / sizeof(Data));
Idx const hostBuffer1Pitch(alpaka::getPitchesInBytes(hostBuffer)[2] / sizeof(Data));
Idx const hostViewPlainPtrPitch(alpaka::getPitchesInBytes(hostViewPlainPtr)[2] / sizeof(Data));
Idx const deviceBuffer1Pitch(alpaka::getPitchesInBytes(deviceBuffer1)[1] / sizeof(Data));
Idx const deviceBuffer2Pitch(alpaka::getPitchesInBytes(deviceBuffer2)[1] / sizeof(Data));
Idx const hostBuffer1Pitch(alpaka::getPitchesInBytes(hostBuffer)[1] / sizeof(Data));
Idx const hostViewPlainPtrPitch(alpaka::getPitchesInBytes(hostViewPlainPtr)[1] / sizeof(Data));

// Test device Buffer
//
Expand Down
12 changes: 6 additions & 6 deletions example/randomCells2D/src/randomCells2D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,16 +201,16 @@ auto main() -> int
RandomEngineVector<Acc>* const ptrBufAccRandV{alpaka::getPtrNative(bufAccRandV)};

InitRandomKernel initRandomKernel;
auto pitchBufAccRandS = alpaka::getPitchesInBytes(bufAccRandS)[1];
auto pitchBufAccRandS = alpaka::getPitchesInBytes(bufAccRandS)[0];
alpaka::exec<Acc>(queue, workdiv, initRandomKernel, extent, ptrBufAccRandS, pitchBufAccRandS);
alpaka::wait(queue);

auto pitchBufAccRandV = alpaka::getPitchesInBytes(bufAccRandV)[1];
auto pitchBufAccRandV = alpaka::getPitchesInBytes(bufAccRandV)[0];
alpaka::exec<Acc>(queue, workdiv, initRandomKernel, extent, ptrBufAccRandV, pitchBufAccRandV);
alpaka::wait(queue);

auto pitchHostS = alpaka::getPitchesInBytes(bufHostS)[1];
auto pitchHostV = alpaka::getPitchesInBytes(bufHostV)[1];
auto pitchHostS = alpaka::getPitchesInBytes(bufHostS)[0];
auto pitchHostV = alpaka::getPitchesInBytes(bufHostV)[0];

for(Idx y = 0; y < numY; ++y)
{
Expand All @@ -221,7 +221,7 @@ auto main() -> int
}
}

auto pitchBufAccS = alpaka::getPitchesInBytes(bufAccS)[1];
auto pitchBufAccS = alpaka::getPitchesInBytes(bufAccS)[0];
alpaka::memcpy(queue, bufAccS, bufHostS);
RunTimestepKernelSingle runTimestepKernelSingle;
alpaka::exec<Acc>(
Expand All @@ -235,7 +235,7 @@ auto main() -> int
pitchBufAccS);
alpaka::memcpy(queue, bufHostS, bufAccS);

auto pitchBufAccV = alpaka::getPitchesInBytes(bufAccV)[1];
auto pitchBufAccV = alpaka::getPitchesInBytes(bufAccV)[0];
alpaka::memcpy(queue, bufAccV, bufHostV);
RunTimestepKernelVector runTimestepKernelVector;
alpaka::exec<Acc>(
Expand Down
25 changes: 11 additions & 14 deletions include/alpaka/idx/MapIdx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#pragma once

#include "alpaka/core/Common.hpp"
#include "alpaka/vec/Traits.hpp"
#include "alpaka/vec/Vec.hpp"

#include <type_traits>
Expand Down Expand Up @@ -136,10 +137,11 @@ namespace alpaka

namespace detail
{
//! Maps a linear index to a N dimensional index assuming a buffer wihtout padding.
//! Maps a linear index to a N dimensional index assuming a buffer without padding.
template<std::size_t TidxDimOut, std::size_t TidxDimIn, typename TSfinae = void>
struct MapIdxPitchBytes;
//! Maps a N dimensional index to the same N dimensional index assuming a buffer wihtout padding.

//! Maps a N dimensional index to the same N dimensional index assuming a buffer without padding.
template<std::size_t TidxDim>
struct MapIdxPitchBytes<TidxDim, TidxDim>
{
Expand All @@ -156,7 +158,7 @@ namespace alpaka
return idx;
}
};
//! Maps a 1 dimensional index to a N dimensional index assuming a buffer wihtout padding.
//! Maps a 1 dimensional index to a N dimensional index assuming a buffer without padding.
template<std::size_t TidxDimOut>
struct MapIdxPitchBytes<TidxDimOut, 1u, std::enable_if_t<(TidxDimOut > 1u)>>
{
Expand All @@ -170,15 +172,15 @@ namespace alpaka
Vec<DimInt<1u>, TElem> const& idx,
Vec<DimInt<TidxDimOut>, TElem> const& pitch) -> Vec<DimInt<TidxDimOut>, TElem>
{
auto idxNd = Vec<DimInt<TidxDimOut>, TElem>::all(0u);
auto idxNd = Vec<DimInt<TidxDimOut>, TElem>::zeros();

constexpr std::size_t lastIdx = TidxDimOut - 1u;

TElem tmp = idx[0u];
for(std::size_t d(0u); d < lastIdx; ++d)
{
idxNd[d] = static_cast<TElem>(tmp / pitch[d + 1]);
tmp %= pitch[d + 1];
idxNd[d] = static_cast<TElem>(tmp / pitch[d]);
tmp %= pitch[d];
}
idxNd[lastIdx] = tmp;

Expand All @@ -199,13 +201,8 @@ namespace alpaka
Vec<DimInt<TidxDimIn>, TElem> const& idx,
Vec<DimInt<TidxDimIn>, TElem> const& pitch) -> Vec<DimInt<1u>, TElem>
{
constexpr auto lastDim = TidxDimIn - 1;
TElem idx1d = idx[lastDim];
for(std::size_t d(0u); d < lastDim; ++d)
{
idx1d = static_cast<TElem>(idx1d + pitch[d + 1] * idx[d]);
}
return {idx1d};
using DimMinusOne = DimInt<TidxDimIn - 1>;
return {idx.back() + (subVecBegin<DimMinusOne>(pitch) * subVecBegin<DimMinusOne>(idx)).sum()};
}
};

Expand Down Expand Up @@ -234,7 +231,7 @@ namespace alpaka
};
} // namespace detail

//! Maps a N dimensional index to a N dimensional position based on
//! Maps an N dimensional index to a N dimensional position based on
//! pitch in a buffer without padding or a byte buffer.
//!
//! \tparam TidxDimOut Dimension of the index vector to map to.
Expand Down
12 changes: 7 additions & 5 deletions include/alpaka/mem/buf/BufUniformCudaHipRt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,16 +182,18 @@ namespace alpaka
struct GetPitchesInBytes<BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>>
{
ALPAKA_FN_HOST auto operator()(BufUniformCudaHipRt<TApi, TElem, TDim, TIdx> const& buf) const
-> Vec<TDim, TIdx>
{
Vec<TDim, TIdx> v{};
if constexpr(TDim::value > 0)
{
v.back() = sizeof(TElem);
if constexpr(TDim::value > 1)
v.back() = buf.m_rowPitchInBytes;
else
v.back() = buf.m_extentElements.back() * sizeof(TElem);
for(int i = static_cast<int>(TDim::value) - 2; i >= 0; i--)
v[i] = buf.m_extentElements[i] * v[i + 1];
{
v[TDim::value - 2] = static_cast<TIdx>(buf.m_rowPitchInBytes);
for(TIdx i = TDim::value - 2; i > 0; i--)
v[i - 1] = buf.m_extentElements[i] * v[i];
}
}
return v;
}
Expand Down
32 changes: 15 additions & 17 deletions include/alpaka/mem/buf/cpu/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ namespace alpaka
template<typename TDim, typename TViewDst, typename TViewSrc, typename TExtent>
struct TaskCopyCpuBase
{
static_assert(TDim::value > 0);

using ExtentSize = Idx<TExtent>;
using DstSize = Idx<TViewDst>;
using SrcSize = Idx<TViewSrc>;
Expand All @@ -37,7 +39,7 @@ namespace alpaka
template<typename TViewFwd>
TaskCopyCpuBase(TViewFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent)
: m_extent(getExtents(extent))
, m_extentWidthBytes(m_extent[TDim::value - 1u] * static_cast<ExtentSize>(sizeof(Elem)))
, m_extentWidthBytes(m_extent.back() * static_cast<ExtentSize>(sizeof(Elem)))
#if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
, m_dstExtent(getExtents(viewDst))
, m_srcExtent(getExtents(viewSrc))
Expand All @@ -49,10 +51,13 @@ namespace alpaka
{
if constexpr(TDim::value > 0)
{
ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).foldrAll(std::logical_or<bool>()));
ALPAKA_ASSERT((castVec<SrcSize>(m_extent) <= m_srcExtent).foldrAll(std::logical_or<bool>()));
ALPAKA_ASSERT(static_cast<DstSize>(m_extentWidthBytes) <= m_dstPitchBytes[TDim::value - 1u]);
ALPAKA_ASSERT(static_cast<SrcSize>(m_extentWidthBytes) <= m_srcPitchBytes[TDim::value - 1u]);
ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());
ALPAKA_ASSERT((castVec<SrcSize>(m_extent) <= m_srcExtent).all());
if constexpr(TDim::value > 1)
{
ALPAKA_ASSERT(static_cast<DstSize>(m_extentWidthBytes) <= m_dstPitchBytes[TDim::value - 2]);
ALPAKA_ASSERT(static_cast<SrcSize>(m_extentWidthBytes) <= m_srcPitchBytes[TDim::value - 2]);
}
}
}

Expand Down Expand Up @@ -100,10 +105,9 @@ namespace alpaka
#endif
// [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
// iteration.
Vec<DimMin1, ExtentSize> const extentWithoutInnermost(subVecBegin<DimMin1>(this->m_extent));
// [z, y, x] -> [y, x] because the z pitch (the full size of the buffer) is not required.
Vec<DimMin1, DstSize> const dstPitchBytesWithoutOutmost(subVecEnd<DimMin1>(this->m_dstPitchBytes));
Vec<DimMin1, SrcSize> const srcPitchBytesWithoutOutmost(subVecEnd<DimMin1>(this->m_srcPitchBytes));
Vec<DimMin1, ExtentSize> const extentWithoutInnermost = subVecBegin<DimMin1>(this->m_extent);
Vec<DimMin1, DstSize> const dstPitchBytesWithoutOutmost = subVecBegin<DimMin1>(this->m_dstPitchBytes);
Vec<DimMin1, SrcSize> const srcPitchBytesWithoutOutmost = subVecBegin<DimMin1>(this->m_srcPitchBytes);

if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
{
Expand All @@ -112,14 +116,8 @@ namespace alpaka
[&](Vec<DimMin1, ExtentSize> const& idx)
{
std::memcpy(
reinterpret_cast<void*>(
this->m_dstMemNative
+ (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost)
.foldrAll(std::plus<DstSize>())),
reinterpret_cast<void const*>(
this->m_srcMemNative
+ (castVec<SrcSize>(idx) * srcPitchBytesWithoutOutmost)
.foldrAll(std::plus<SrcSize>())),
this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost).sum(),
this->m_srcMemNative + (castVec<SrcSize>(idx) * srcPitchBytesWithoutOutmost).sum(),
static_cast<std::size_t>(this->m_extentWidthBytes));
});
}
Expand Down
19 changes: 9 additions & 10 deletions include/alpaka/mem/buf/cpu/Set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ namespace alpaka
template<typename TDim, typename TView, typename TExtent>
struct TaskSetCpuBase
{
static_assert(TDim::value > 0);

using ExtentSize = Idx<TExtent>;
using DstSize = Idx<TView>;
using Elem = alpaka::Elem<TView>;
Expand All @@ -31,15 +33,16 @@ namespace alpaka
TaskSetCpuBase(TViewFwd&& view, std::uint8_t const& byte, TExtent const& extent)
: m_byte(byte)
, m_extent(getExtents(extent))
, m_extentWidthBytes(m_extent[TDim::value - 1u] * static_cast<ExtentSize>(sizeof(Elem)))
, m_extentWidthBytes(m_extent.back() * static_cast<ExtentSize>(sizeof(Elem)))
#if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
, m_dstExtent(getExtents(view))
#endif
, m_dstPitchBytes(getPitchesInBytes(view))
, m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(view)))
{
ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).foldrAll(std::logical_or<bool>()));
ALPAKA_ASSERT(m_extentWidthBytes <= m_dstPitchBytes[TDim::value - 1u]);
ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());
if constexpr(TDim::value > 1)
ALPAKA_ASSERT(m_extentWidthBytes <= m_dstPitchBytes[TDim::value - 2]);
}

#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
Expand Down Expand Up @@ -80,9 +83,8 @@ namespace alpaka
#endif
// [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
// iteration.
Vec<DimMin1, ExtentSize> const extentWithoutInnermost(subVecBegin<DimMin1>(this->m_extent));
// [z, y, x] -> [y, x] because the z pitch (the full idx of the buffer) is not required.
Vec<DimMin1, DstSize> const dstPitchBytesWithoutOutmost(subVecEnd<DimMin1>(this->m_dstPitchBytes));
Vec<DimMin1, ExtentSize> const extentWithoutInnermost = subVecBegin<DimMin1>(this->m_extent);
Vec<DimMin1, DstSize> const dstPitchBytesWithoutOutmost = subVecBegin<DimMin1>(this->m_dstPitchBytes);

if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
{
Expand All @@ -91,10 +93,7 @@ namespace alpaka
[&](Vec<DimMin1, ExtentSize> const& idx)
{
std::memset(
reinterpret_cast<void*>(
this->m_dstMemNative
+ (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost)
.foldrAll(std::plus<DstSize>())),
this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost).sum(),
this->m_byte,
static_cast<std::size_t>(this->m_extentWidthBytes));
});
Expand Down
Loading

0 comments on commit 7e99f63

Please sign in to comment.