Skip to content

Commit

Permalink
Redefine pitches
Browse files Browse the repository at this point in the history
This PR shifts the values returned from getPitchesInBytes to be
consistent with std::mdspan (except in bytes).

Example: the pitch vector for the extent {42, 10, 2} changes:

Before: {4, 3360, 80, 8}
After: {80, 8, 4}

The new meaning is that the pitch value is the number of bytes to jump
from one element to the next in the given dimension.

Fixes: #2083
  • Loading branch information
bernhardmgruber committed Aug 28, 2023
1 parent 5341cfd commit 14caad1
Show file tree
Hide file tree
Showing 13 changed files with 127 additions and 200 deletions.
8 changes: 4 additions & 4 deletions example/bufferCopy/src/bufferCopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,10 @@ auto main() -> int
// padding between rows/planes of multidimensional memory allocations.
// Therefore the pitch (distance between consecutive rows/planes) may be
// greater than the space required for the data.
Idx const deviceBuffer1Pitch(alpaka::getPitchesInBytes(deviceBuffer1)[2] / sizeof(Data));
Idx const deviceBuffer2Pitch(alpaka::getPitchesInBytes(deviceBuffer2)[2] / sizeof(Data));
Idx const hostBuffer1Pitch(alpaka::getPitchesInBytes(hostBuffer)[2] / sizeof(Data));
Idx const hostViewPlainPtrPitch(alpaka::getPitchesInBytes(hostViewPlainPtr)[2] / sizeof(Data));
Idx const deviceBuffer1Pitch(alpaka::getPitchesInBytes(deviceBuffer1)[1] / sizeof(Data));
Idx const deviceBuffer2Pitch(alpaka::getPitchesInBytes(deviceBuffer2)[1] / sizeof(Data));
Idx const hostBuffer1Pitch(alpaka::getPitchesInBytes(hostBuffer)[1] / sizeof(Data));
Idx const hostViewPlainPtrPitch(alpaka::getPitchesInBytes(hostViewPlainPtr)[1] / sizeof(Data));

// Test device Buffer
//
Expand Down
12 changes: 6 additions & 6 deletions example/randomCells2D/src/randomCells2D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,16 +201,16 @@ auto main() -> int
RandomEngineVector<Acc>* const ptrBufAccRandV{alpaka::getPtrNative(bufAccRandV)};

InitRandomKernel initRandomKernel;
auto pitchBufAccRandS = alpaka::getPitchesInBytes(bufAccRandS)[1];
auto pitchBufAccRandS = alpaka::getPitchesInBytes(bufAccRandS)[0];
alpaka::exec<Acc>(queue, workdiv, initRandomKernel, extent, ptrBufAccRandS, pitchBufAccRandS);
alpaka::wait(queue);

auto pitchBufAccRandV = alpaka::getPitchesInBytes(bufAccRandV)[1];
auto pitchBufAccRandV = alpaka::getPitchesInBytes(bufAccRandV)[0];
alpaka::exec<Acc>(queue, workdiv, initRandomKernel, extent, ptrBufAccRandV, pitchBufAccRandV);
alpaka::wait(queue);

auto pitchHostS = alpaka::getPitchesInBytes(bufHostS)[1];
auto pitchHostV = alpaka::getPitchesInBytes(bufHostV)[1];
auto pitchHostS = alpaka::getPitchesInBytes(bufHostS)[0];
auto pitchHostV = alpaka::getPitchesInBytes(bufHostV)[0];

for(Idx y = 0; y < numY; ++y)
{
Expand All @@ -221,7 +221,7 @@ auto main() -> int
}
}

auto pitchBufAccS = alpaka::getPitchesInBytes(bufAccS)[1];
auto pitchBufAccS = alpaka::getPitchesInBytes(bufAccS)[0];
alpaka::memcpy(queue, bufAccS, bufHostS);
RunTimestepKernelSingle runTimestepKernelSingle;
alpaka::exec<Acc>(
Expand All @@ -235,7 +235,7 @@ auto main() -> int
pitchBufAccS);
alpaka::memcpy(queue, bufHostS, bufAccS);

auto pitchBufAccV = alpaka::getPitchesInBytes(bufAccV)[1];
auto pitchBufAccV = alpaka::getPitchesInBytes(bufAccV)[0];
alpaka::memcpy(queue, bufAccV, bufHostV);
RunTimestepKernelVector runTimestepKernelVector;
alpaka::exec<Acc>(
Expand Down
15 changes: 8 additions & 7 deletions include/alpaka/idx/MapIdx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,11 @@ namespace alpaka

namespace detail
{
//! Maps a linear index to a N dimensional index assuming a buffer wihtout padding.
//! Maps a linear index to a N dimensional index assuming a buffer without padding.
template<std::size_t TidxDimOut, std::size_t TidxDimIn, typename TSfinae = void>
struct MapIdxPitchBytes;
//! Maps a N dimensional index to the same N dimensional index assuming a buffer wihtout padding.

//! Maps a N dimensional index to the same N dimensional index assuming a buffer without padding.
template<std::size_t TidxDim>
struct MapIdxPitchBytes<TidxDim, TidxDim>
{
Expand All @@ -156,7 +157,7 @@ namespace alpaka
return idx;
}
};
//! Maps a 1 dimensional index to a N dimensional index assuming a buffer wihtout padding.
//! Maps a 1 dimensional index to a N dimensional index assuming a buffer without padding.
template<std::size_t TidxDimOut>
struct MapIdxPitchBytes<TidxDimOut, 1u, std::enable_if_t<(TidxDimOut > 1u)>>
{
Expand All @@ -177,8 +178,8 @@ namespace alpaka
TElem tmp = idx[0u];
for(std::size_t d(0u); d < lastIdx; ++d)
{
idxNd[d] = static_cast<TElem>(tmp / pitch[d + 1]);
tmp %= pitch[d + 1];
idxNd[d] = static_cast<TElem>(tmp / pitch[d]);
tmp %= pitch[d];
}
idxNd[lastIdx] = tmp;

Expand All @@ -203,7 +204,7 @@ namespace alpaka
TElem idx1d = idx[lastDim];
for(std::size_t d(0u); d < lastDim; ++d)
{
idx1d = static_cast<TElem>(idx1d + pitch[d + 1] * idx[d]);
idx1d = static_cast<TElem>(idx1d + pitch[d] * idx[d]);
}
return {idx1d};
}
Expand Down Expand Up @@ -234,7 +235,7 @@ namespace alpaka
};
} // namespace detail

//! Maps a N dimensional index to a N dimensional position based on
//! Maps an N dimensional index to a N dimensional position based on
//! pitch in a buffer without padding or a byte buffer.
//!
//! \tparam TidxDimOut Dimension of the index vector to map to.
Expand Down
19 changes: 5 additions & 14 deletions include/alpaka/mem/buf/cpu/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,6 @@ namespace alpaka
{
ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).foldrAll(std::logical_or<bool>()));
ALPAKA_ASSERT((castVec<SrcSize>(m_extent) <= m_srcExtent).foldrAll(std::logical_or<bool>()));
ALPAKA_ASSERT(static_cast<DstSize>(m_extentWidthBytes) <= m_dstPitchBytes[TDim::value - 1u]);
ALPAKA_ASSERT(static_cast<SrcSize>(m_extentWidthBytes) <= m_srcPitchBytes[TDim::value - 1u]);
}
}

Expand Down Expand Up @@ -100,10 +98,9 @@ namespace alpaka
#endif
// [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
// iteration.
Vec<DimMin1, ExtentSize> const extentWithoutInnermost(subVecBegin<DimMin1>(this->m_extent));
// [z, y, x] -> [y, x] because the z pitch (the full size of the buffer) is not required.
Vec<DimMin1, DstSize> const dstPitchBytesWithoutOutmost(subVecEnd<DimMin1>(this->m_dstPitchBytes));
Vec<DimMin1, SrcSize> const srcPitchBytesWithoutOutmost(subVecEnd<DimMin1>(this->m_srcPitchBytes));
Vec<DimMin1, ExtentSize> const extentWithoutInnermost = subVecBegin<DimMin1>(this->m_extent);
Vec<DimMin1, DstSize> const dstPitchBytesWithoutOutmost = subVecBegin<DimMin1>(this->m_dstPitchBytes);
Vec<DimMin1, SrcSize> const srcPitchBytesWithoutOutmost = subVecBegin<DimMin1>(this->m_srcPitchBytes);

if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
{
Expand All @@ -112,14 +109,8 @@ namespace alpaka
[&](Vec<DimMin1, ExtentSize> const& idx)
{
std::memcpy(
reinterpret_cast<void*>(
this->m_dstMemNative
+ (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost)
.foldrAll(std::plus<DstSize>())),
reinterpret_cast<void const*>(
this->m_srcMemNative
+ (castVec<SrcSize>(idx) * srcPitchBytesWithoutOutmost)
.foldrAll(std::plus<SrcSize>())),
this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost).sum(),
this->m_srcMemNative + (castVec<SrcSize>(idx) * srcPitchBytesWithoutOutmost).sum(),
static_cast<std::size_t>(this->m_extentWidthBytes));
});
}
Expand Down
12 changes: 4 additions & 8 deletions include/alpaka/mem/buf/cpu/Set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ namespace alpaka
, m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(view)))
{
ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).foldrAll(std::logical_or<bool>()));
ALPAKA_ASSERT(m_extentWidthBytes <= m_dstPitchBytes[TDim::value - 1u]);
// ALPAKA_ASSERT(m_extentWidthBytes <= m_dstPitchBytes[TDim::value - 1u]);
}

#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
Expand Down Expand Up @@ -80,9 +80,8 @@ namespace alpaka
#endif
// [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
// iteration.
Vec<DimMin1, ExtentSize> const extentWithoutInnermost(subVecBegin<DimMin1>(this->m_extent));
// [z, y, x] -> [y, x] because the z pitch (the full idx of the buffer) is not required.
Vec<DimMin1, DstSize> const dstPitchBytesWithoutOutmost(subVecEnd<DimMin1>(this->m_dstPitchBytes));
Vec<DimMin1, ExtentSize> const extentWithoutInnermost = subVecBegin<DimMin1>(this->m_extent);
Vec<DimMin1, DstSize> const dstPitchBytesWithoutOutmost = subVecBegin<DimMin1>(this->m_dstPitchBytes);

if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
{
Expand All @@ -91,10 +90,7 @@ namespace alpaka
[&](Vec<DimMin1, ExtentSize> const& idx)
{
std::memset(
reinterpret_cast<void*>(
this->m_dstMemNative
+ (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost)
.foldrAll(std::plus<DstSize>())),
this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost).sum(),
this->m_byte,
static_cast<std::size_t>(this->m_extentWidthBytes));
});
Expand Down
57 changes: 28 additions & 29 deletions include/alpaka/mem/buf/uniformCudaHip/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,8 @@ namespace alpaka
, m_dstHeight(static_cast<Idx>(getHeight(viewDst)))
, m_srcHeight(static_cast<Idx>(getHeight(viewSrc)))
# endif
, m_dstPitchBytes(static_cast<std::size_t>(getPitchesInBytes(viewDst)[Dim<TViewDst>::value - 1u]))
, m_srcPitchBytes(static_cast<std::size_t>(getPitchesInBytes(viewSrc)[Dim<TViewSrc>::value - 1u]))
, m_dstRowPitchBytes(static_cast<std::size_t>(getPitchesInBytes(viewDst)[0]))
, m_srcRowPitchBytes(static_cast<std::size_t>(getPitchesInBytes(viewSrc)[0]))
, m_dstMemNative(reinterpret_cast<void*>(getPtrNative(viewDst)))
, m_srcMemNative(reinterpret_cast<void const*>(getPtrNative(viewSrc)))
{
Expand All @@ -212,8 +212,8 @@ namespace alpaka
ALPAKA_ASSERT(m_extentHeight <= m_dstHeight);
ALPAKA_ASSERT(m_extentWidth <= m_srcWidth);
ALPAKA_ASSERT(m_extentHeight <= m_srcHeight);
ALPAKA_ASSERT(m_extentWidthBytes <= m_dstPitchBytes);
ALPAKA_ASSERT(m_extentWidthBytes <= m_srcPitchBytes);
ALPAKA_ASSERT(m_extentWidthBytes <= m_dstRowPitchBytes);
ALPAKA_ASSERT(m_extentWidthBytes <= m_srcRowPitchBytes);
# endif
}

Expand All @@ -236,9 +236,9 @@ namespace alpaka
// Initiate the memory copy.
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::memcpy2DAsync(
m_dstMemNative,
m_dstPitchBytes,
m_dstRowPitchBytes,
m_srcMemNative,
m_srcPitchBytes,
m_srcRowPitchBytes,
m_extentWidthBytes,
static_cast<std::size_t>(m_extentHeight),
m_uniformMemCpyKind,
Expand All @@ -251,9 +251,10 @@ namespace alpaka
{
std::cout << __func__ << " ew: " << m_extentWidth << " eh: " << m_extentHeight
<< " ewb: " << m_extentWidthBytes << " ddev: " << m_iDstDevice << " dw: " << m_dstWidth
<< " dh: " << m_dstHeight << " dptr: " << m_dstMemNative << " dpitchb: " << m_dstPitchBytes
<< " sdev: " << m_iSrcDevice << " sw: " << m_srcWidth << " sh: " << m_srcHeight
<< " sptr: " << m_srcMemNative << " spitchb: " << m_srcPitchBytes << std::endl;
<< " dh: " << m_dstHeight << " dptr: " << m_dstMemNative
<< " dpitchb: " << m_dstRowPitchBytes << " sdev: " << m_iSrcDevice << " sw: " << m_srcWidth
<< " sh: " << m_srcHeight << " sptr: " << m_srcMemNative
<< " spitchb: " << m_srcRowPitchBytes << std::endl;
}
# endif

Expand All @@ -272,8 +273,8 @@ namespace alpaka
Idx m_dstHeight;
Idx m_srcHeight;
# endif
std::size_t m_dstPitchBytes;
std::size_t m_srcPitchBytes;
std::size_t m_dstRowPitchBytes;
std::size_t m_srcRowPitchBytes;

void* m_dstMemNative;
void const* m_srcMemNative;
Expand Down Expand Up @@ -308,12 +309,10 @@ namespace alpaka
, m_dstDepth(static_cast<Idx>(getDepth(viewDst)))
, m_srcDepth(static_cast<Idx>(getDepth(viewSrc)))
# endif
, m_dstPitchBytesX(static_cast<std::size_t>(getPitchesInBytes(viewDst)[Dim<TViewDst>::value - 1u]))
, m_srcPitchBytesX(static_cast<std::size_t>(getPitchesInBytes(viewSrc)[Dim<TViewSrc>::value - 1u]))
, m_dstPitchBytesY(static_cast<std::size_t>(
getPitchesInBytes(viewDst)[Dim<TViewDst>::value - (2u % Dim<TViewDst>::value)]))
, m_srcPitchBytesY(static_cast<std::size_t>(
getPitchesInBytes(viewSrc)[Dim<TViewSrc>::value - (2u % Dim<TViewDst>::value)]))
, m_dstRowPitchBytes(static_cast<std::size_t>(getPitchesInBytes(viewDst)[1]))
, m_srcRowPitchBytes(static_cast<std::size_t>(getPitchesInBytes(viewSrc)[1]))
, m_dstSlicePitchBytes(static_cast<std::size_t>(getPitchesInBytes(viewDst)[0]))
, m_srcSlicePitchBytes(static_cast<std::size_t>(getPitchesInBytes(viewSrc)[0]))
, m_dstMemNative(reinterpret_cast<void*>(getPtrNative(viewDst)))
, m_srcMemNative(reinterpret_cast<void const*>(getPtrNative(viewSrc)))
{
Expand All @@ -324,8 +323,8 @@ namespace alpaka
ALPAKA_ASSERT(m_extentWidth <= m_srcWidth);
ALPAKA_ASSERT(m_extentHeight <= m_srcHeight);
ALPAKA_ASSERT(m_extentDepth <= m_srcDepth);
ALPAKA_ASSERT(m_extentWidthBytes <= m_dstPitchBytes);
ALPAKA_ASSERT(m_extentWidthBytes <= m_srcPitchBytes);
ALPAKA_ASSERT(m_extentWidthBytes <= m_dstRowPitchBytes);
ALPAKA_ASSERT(m_extentWidthBytes <= m_srcRowPitchBytes);
# endif
}

Expand Down Expand Up @@ -364,16 +363,16 @@ namespace alpaka
memCpy3DParms.srcPos = TApi::makePos(0, 0, 0); // Optional. Offset in bytes.
memCpy3DParms.srcPtr = TApi::makePitchedPtr(
const_cast<void*>(m_srcMemNative),
m_srcPitchBytesX,
m_srcRowPitchBytes,
static_cast<std::size_t>(m_srcWidth),
static_cast<std::size_t>(m_srcPitchBytesY / m_srcPitchBytesX));
static_cast<std::size_t>(m_srcSlicePitchBytes / m_srcRowPitchBytes));
memCpy3DParms.dstArray = nullptr; // Either dstArray or dstPtr.
memCpy3DParms.dstPos = TApi::makePos(0, 0, 0); // Optional. Offset in bytes.
memCpy3DParms.dstPtr = TApi::makePitchedPtr(
m_dstMemNative,
m_dstPitchBytesX,
m_dstRowPitchBytes,
static_cast<std::size_t>(m_dstWidth),
static_cast<std::size_t>(m_dstPitchBytesY / m_dstPitchBytesX));
static_cast<std::size_t>(m_dstSlicePitchBytes / m_dstRowPitchBytes));
memCpy3DParms.extent = TApi::makeExtent(
m_extentWidthBytes,
static_cast<std::size_t>(m_extentHeight),
Expand All @@ -388,9 +387,9 @@ namespace alpaka
std::cout << __func__ << " ew: " << m_extentWidth << " eh: " << m_extentHeight
<< " ed: " << m_extentDepth << " ewb: " << m_extentWidthBytes << " ddev: " << m_iDstDevice
<< " dw: " << m_dstWidth << " dh: " << m_dstHeight << " dd: " << m_dstDepth
<< " dptr: " << m_dstMemNative << " dpitchb: " << m_dstPitchBytes
<< " dptr: " << m_dstMemNative << " dpitchb: " << m_dstRowPitchBytes
<< " sdev: " << m_iSrcDevice << " sw: " << m_srcWidth << " sh: " << m_srcHeight
<< " sd: " << m_srcDepth << " sptr: " << m_srcMemNative << " spitchb: " << m_srcPitchBytes
<< " sd: " << m_srcDepth << " sptr: " << m_srcMemNative << " spitchb: " << m_srcRowPitchBytes
<< std::endl;
}
# endif
Expand All @@ -411,10 +410,10 @@ namespace alpaka
Idx m_dstDepth;
Idx m_srcDepth;
# endif
std::size_t m_dstPitchBytesX;
std::size_t m_srcPitchBytesX;
std::size_t m_dstPitchBytesY;
std::size_t m_srcPitchBytesY;
std::size_t m_dstRowPitchBytes;
std::size_t m_srcRowPitchBytes;
std::size_t m_dstSlicePitchBytes;
std::size_t m_srcSlicePitchBytes;

void* m_dstMemNative;
void const* m_srcMemNative;
Expand Down
Loading

0 comments on commit 14caad1

Please sign in to comment.