Skip to content

Commit

Permalink
Simplify pitch API
Browse files Browse the repository at this point in the history
* Add getPitchesInBytes and GetPitchesInBytes returning extents as alpaka::Vec
* Deprecate getPitchBytes, GetPitchBytes and getPitchBytesVec
* Refactor BufUniformCudaHipRt to always store row pitch

Fixes: #2079
  • Loading branch information
bernhardmgruber committed Aug 29, 2023
1 parent 98351f3 commit c3ebc62
Show file tree
Hide file tree
Showing 22 changed files with 195 additions and 152 deletions.
8 changes: 4 additions & 4 deletions example/bufferCopy/src/bufferCopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,10 @@ auto main() -> int
// padding between rows/planes of multidimensional memory allocations.
// Therefore the pitch (distance between consecutive rows/planes) may be
// greater than the space required for the data.
Idx const deviceBuffer1Pitch(alpaka::getPitchBytes<2u>(deviceBuffer1) / sizeof(Data));
Idx const deviceBuffer2Pitch(alpaka::getPitchBytes<2u>(deviceBuffer2) / sizeof(Data));
Idx const hostBuffer1Pitch(alpaka::getPitchBytes<2u>(hostBuffer) / sizeof(Data));
Idx const hostViewPlainPtrPitch(alpaka::getPitchBytes<2u>(hostViewPlainPtr) / sizeof(Data));
Idx const deviceBuffer1Pitch(alpaka::getPitchesInBytes(deviceBuffer1)[2] / sizeof(Data));
Idx const deviceBuffer2Pitch(alpaka::getPitchesInBytes(deviceBuffer2)[2] / sizeof(Data));
Idx const hostBuffer1Pitch(alpaka::getPitchesInBytes(hostBuffer)[2] / sizeof(Data));
Idx const hostViewPlainPtrPitch(alpaka::getPitchesInBytes(hostViewPlainPtr)[2] / sizeof(Data));

// Test device Buffer
//
Expand Down
12 changes: 6 additions & 6 deletions example/randomCells2D/src/randomCells2D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,16 +201,16 @@ auto main() -> int
RandomEngineVector<Acc>* const ptrBufAccRandV{alpaka::getPtrNative(bufAccRandV)};

InitRandomKernel initRandomKernel;
auto pitchBufAccRandS = alpaka::getPitchBytes<1u>(bufAccRandS);
auto pitchBufAccRandS = alpaka::getPitchesInBytes(bufAccRandS)[1];
alpaka::exec<Acc>(queue, workdiv, initRandomKernel, extent, ptrBufAccRandS, pitchBufAccRandS);
alpaka::wait(queue);

auto pitchBufAccRandV = alpaka::getPitchBytes<1u>(bufAccRandV);
auto pitchBufAccRandV = alpaka::getPitchesInBytes(bufAccRandV)[1];
alpaka::exec<Acc>(queue, workdiv, initRandomKernel, extent, ptrBufAccRandV, pitchBufAccRandV);
alpaka::wait(queue);

auto pitchHostS = alpaka::getPitchBytes<1u>(bufHostS);
auto pitchHostV = alpaka::getPitchBytes<1u>(bufHostV);
auto pitchHostS = alpaka::getPitchesInBytes(bufHostS)[1];
auto pitchHostV = alpaka::getPitchesInBytes(bufHostV)[1];

for(Idx y = 0; y < numY; ++y)
{
Expand All @@ -221,7 +221,7 @@ auto main() -> int
}
}

auto pitchBufAccS = alpaka::getPitchBytes<1u>(bufAccS);
auto pitchBufAccS = alpaka::getPitchesInBytes(bufAccS)[1];
alpaka::memcpy(queue, bufAccS, bufHostS);
RunTimestepKernelSingle runTimestepKernelSingle;
alpaka::exec<Acc>(
Expand All @@ -235,7 +235,7 @@ auto main() -> int
pitchBufAccS);
alpaka::memcpy(queue, bufHostS, bufAccS);

auto pitchBufAccV = alpaka::getPitchBytes<1u>(bufAccV);
auto pitchBufAccV = alpaka::getPitchesInBytes(bufAccV)[1];
alpaka::memcpy(queue, bufAccV, bufHostV);
RunTimestepKernelVector runTimestepKernelVector;
alpaka::exec<Acc>(
Expand Down
95 changes: 53 additions & 42 deletions include/alpaka/mem/buf/BufUniformCudaHipRt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,29 +32,44 @@ namespace alpaka
template<typename TElem, typename TDim, typename TIdx>
class BufCpu;

namespace detail
{
template<typename TDim, typename SFINAE = void>
struct PitchHolder
{
explicit PitchHolder(std::size_t)
{
}
};

template<typename TDim>
struct PitchHolder<TDim, std::enable_if_t<TDim::value >= 2>>
{
std::size_t m_rowPitchInBytes;
};
} // namespace detail

//! The CUDA/HIP memory buffer.
template<typename TApi, typename TElem, typename TDim, typename TIdx>
class BufUniformCudaHipRt : public internal::ViewAccessOps<BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>>
struct BufUniformCudaHipRt
: detail::PitchHolder<TDim>
, internal::ViewAccessOps<BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>>
{
public:
static_assert(
!std::is_const_v<TElem>,
"The elem type of the buffer can not be const because the C++ Standard forbids containers of const "
"elements!");
static_assert(!std::is_const_v<TIdx>, "The idx type of the buffer can not be const!");
static_assert(!std::is_const_v<TElem>, "The elem type of the buffer must not be const");
static_assert(!std::is_const_v<TIdx>, "The idx type of the buffer must not be const!");

//! Constructor
template<typename TExtent, typename Deleter>
ALPAKA_FN_HOST BufUniformCudaHipRt(
DevUniformCudaHipRt<TApi> const& dev,
TElem* const pMem,
Deleter deleter,
std::size_t pitchBytes,
TExtent const& extent)
: m_dev(dev)
, m_extentElements(getExtentVecEnd<TDim>(extent))
TExtent const& extent,
std::size_t pitchBytes)
: detail::PitchHolder<TDim>{pitchBytes}
, m_dev(dev)
, m_extentElements(getExtents(extent))
, m_spMem(pMem, std::move(deleter))
, m_pitchBytes(pitchBytes)
{
ALPAKA_DEBUG_MINIMAL_LOG_SCOPE;

Expand All @@ -67,11 +82,9 @@ namespace alpaka
"The idx type of TExtent and the TIdx template parameter have to be identical!");
}

public:
DevUniformCudaHipRt<TApi> m_dev;
Vec<TDim, TIdx> m_extentElements;
std::shared_ptr<TElem> m_spMem;
std::size_t m_pitchBytes;
};

namespace trait
Expand Down Expand Up @@ -165,26 +178,22 @@ namespace alpaka
}
};

//! The BufUniformCudaHipRt pitch get trait specialization.
template<typename TApi, typename TElem, typename TDim, typename TIdx>
struct GetPitchBytes<DimInt<TDim::value - 1u>, BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>>
struct GetPitchesInBytes<BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>>
{
ALPAKA_FN_HOST static auto getPitchBytes(BufUniformCudaHipRt<TApi, TElem, TDim, TIdx> const& buf) -> TIdx
ALPAKA_FN_HOST auto operator()(BufUniformCudaHipRt<TApi, TElem, TDim, TIdx> const& buf) const
{
constexpr auto idx = static_cast<TIdx>(TDim::value - 1u);
constexpr auto bufDim = TDim::value;
if constexpr(idx < bufDim - 1)
Vec<TDim, TIdx> v{};
if constexpr(TDim::value > 0)
{
return getExtent<idx>(buf)
* GetPitchBytes<DimInt<idx + 1>, BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>>::
getPitchBytes(buf);
if constexpr(TDim::value > 1)
v.back() = buf.m_rowPitchInBytes;
else
v.back() = buf.m_extentElements.back() * sizeof(TElem);
for(int i = static_cast<int>(TDim::value) - 2; i >= 0; i--)
v[i] = buf.m_extentElements[i] * v[i + 1];
}
else if constexpr(idx == bufDim - 1)
return static_cast<TIdx>(buf.m_pitchBytes);
else
return static_cast<TIdx>(sizeof(TElem));

ALPAKA_UNREACHABLE({});
return v;
}
};

Expand All @@ -201,24 +210,23 @@ namespace alpaka
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));

void* memPtr = nullptr;
std::size_t pitchBytes = 0u;
std::size_t rowPitchInBytes = 0u;
if(getExtentProduct(extent) != 0)
{
if constexpr(Dim::value == 0)
{
pitchBytes = sizeof(TElem);
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::malloc(&memPtr, pitchBytes));
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::malloc(&memPtr, sizeof(TElem)));
}
else if constexpr(Dim::value == 1)
{
pitchBytes = static_cast<std::size_t>(getWidth(extent)) * sizeof(TElem);
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::malloc(&memPtr, pitchBytes));
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
TApi::malloc(&memPtr, static_cast<std::size_t>(getWidth(extent)) * sizeof(TElem)));
}
else if constexpr(Dim::value == 2)
{
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::mallocPitch(
&memPtr,
&pitchBytes,
&rowPitchInBytes,
static_cast<std::size_t>(getWidth(extent)) * sizeof(TElem),
static_cast<std::size_t>(getHeight(extent))));
}
Expand All @@ -232,7 +240,7 @@ namespace alpaka
pitchedPtrVal.ptr = nullptr;
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::malloc3D(&pitchedPtrVal, extentVal));
memPtr = pitchedPtrVal.ptr;
pitchBytes = pitchedPtrVal.pitch;
rowPitchInBytes = pitchedPtrVal.pitch;
}
}
# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
Expand All @@ -243,14 +251,17 @@ namespace alpaka
std::cout << " eh: " << getHeight(extent);
if constexpr(Dim::value >= 3)
std::cout << " ed: " << getDepth(extent);
std::cout << " ptr: " << memPtr << " pitch: " << pitchBytes << std::endl;
std::cout << " ptr: " << memPtr;
if constexpr(Dim::value >= 2)
std::cout << " rowpitch: " << rowPitchInBytes;
std::cout << std::endl;
# endif
return {
dev,
reinterpret_cast<TElem*>(memPtr),
[](TElem* ptr) { ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_NOEXCEPT(TApi::free(ptr)); },
pitchBytes,
extent};
extent,
rowPitchInBytes};
}
};

Expand Down Expand Up @@ -292,13 +303,13 @@ namespace alpaka
# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
std::cout << __func__ << " ew: " << width << " ptr: " << memPtr << std::endl;
# endif
return BufUniformCudaHipRt<TApi, TElem, TDim, TIdx>(
return {
dev,
reinterpret_cast<TElem*>(memPtr),
[queue = std::move(queue)](TElem* ptr)
{ ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK_NOEXCEPT(TApi::freeAsync(ptr, queue.getNativeHandle())); },
width * static_cast<TIdx>(sizeof(TElem)),
extent);
extent,
static_cast<std::size_t>(width) * sizeof(TElem)};
}
};

Expand Down
4 changes: 2 additions & 2 deletions include/alpaka/mem/buf/cpu/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ namespace alpaka
, m_dstExtent(getExtents(viewDst))
, m_srcExtent(getExtents(viewSrc))
#endif
, m_dstPitchBytes(getPitchBytesVec(viewDst))
, m_srcPitchBytes(getPitchBytesVec(viewSrc))
, m_dstPitchBytes(getPitchesInBytes(viewDst))
, m_srcPitchBytes(getPitchesInBytes(viewSrc))
, m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(viewDst)))
, m_srcMemNative(reinterpret_cast<std::uint8_t const*>(getPtrNative(viewSrc)))
{
Expand Down
2 changes: 1 addition & 1 deletion include/alpaka/mem/buf/cpu/Set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ namespace alpaka
#if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
, m_dstExtent(getExtents(view))
#endif
, m_dstPitchBytes(getPitchBytesVec(view))
, m_dstPitchBytes(getPitchesInBytes(view))
, m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(view)))
{
ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).foldrAll(std::logical_or<bool>()));
Expand Down
4 changes: 2 additions & 2 deletions include/alpaka/mem/buf/sycl/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ namespace alpaka::detail
, m_dstExtent(getExtents(viewDst))
, m_srcExtent(getExtents(viewSrc))
# endif
, m_dstPitchBytes(getPitchBytesVec(viewDst))
, m_srcPitchBytes(getPitchBytesVec(viewSrc))
, m_dstPitchBytes(getPitchesInBytes(viewDst))
, m_srcPitchBytes(getPitchesInBytes(viewSrc))
, m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(viewDst)))
, m_srcMemNative(reinterpret_cast<std::uint8_t const*>(getPtrNative(viewSrc)))
{
Expand Down
2 changes: 1 addition & 1 deletion include/alpaka/mem/buf/sycl/Set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ namespace alpaka
, m_dstExtent(getExtents(view))
# endif

, m_dstPitchBytes(getPitchBytesVec(view))
, m_dstPitchBytes(getPitchesInBytes(view))
, m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(view)))

{
Expand Down
12 changes: 6 additions & 6 deletions include/alpaka/mem/buf/uniformCudaHip/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,8 @@ namespace alpaka
, m_dstHeight(static_cast<Idx>(getHeight(viewDst)))
, m_srcHeight(static_cast<Idx>(getHeight(viewSrc)))
# endif
, m_dstPitchBytes(static_cast<std::size_t>(getPitchBytes<Dim<TViewDst>::value - 1u>(viewDst)))
, m_srcPitchBytes(static_cast<std::size_t>(getPitchBytes<Dim<TViewSrc>::value - 1u>(viewSrc)))
, m_dstPitchBytes(static_cast<std::size_t>(getPitchesInBytes(viewDst)[Dim<TViewDst>::value - 1u]))
, m_srcPitchBytes(static_cast<std::size_t>(getPitchesInBytes(viewSrc)[Dim<TViewSrc>::value - 1u]))
, m_dstMemNative(reinterpret_cast<void*>(getPtrNative(viewDst)))
, m_srcMemNative(reinterpret_cast<void const*>(getPtrNative(viewSrc)))
{
Expand Down Expand Up @@ -308,12 +308,12 @@ namespace alpaka
, m_dstDepth(static_cast<Idx>(getDepth(viewDst)))
, m_srcDepth(static_cast<Idx>(getDepth(viewSrc)))
# endif
, m_dstPitchBytesX(static_cast<std::size_t>(getPitchBytes<Dim<TViewDst>::value - 1u>(viewDst)))
, m_srcPitchBytesX(static_cast<std::size_t>(getPitchBytes<Dim<TViewSrc>::value - 1u>(viewSrc)))
, m_dstPitchBytesX(static_cast<std::size_t>(getPitchesInBytes(viewDst)[Dim<TViewDst>::value - 1u]))
, m_srcPitchBytesX(static_cast<std::size_t>(getPitchesInBytes(viewSrc)[Dim<TViewSrc>::value - 1u]))
, m_dstPitchBytesY(static_cast<std::size_t>(
getPitchBytes<Dim<TViewDst>::value - (2u % Dim<TViewDst>::value)>(viewDst)))
getPitchesInBytes(viewDst)[Dim<TViewDst>::value - (2u % Dim<TViewDst>::value)]))
, m_srcPitchBytesY(static_cast<std::size_t>(
getPitchBytes<Dim<TViewSrc>::value - (2u % Dim<TViewSrc>::value)>(viewSrc)))
getPitchesInBytes(viewSrc)[Dim<TViewSrc>::value - (2u % Dim<TViewDst>::value)]))
, m_dstMemNative(reinterpret_cast<void*>(getPtrNative(viewDst)))
, m_srcMemNative(reinterpret_cast<void const*>(getPtrNative(viewSrc)))
{
Expand Down
6 changes: 3 additions & 3 deletions include/alpaka/mem/buf/uniformCudaHip/Set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ namespace alpaka
auto const dstWidth = getWidth(view);
auto const dstHeight = getHeight(view);
# endif
auto const dstPitchBytes = static_cast<std::size_t>(getPitchBytes<Dim<TView>::value - 1u>(view));
auto const dstPitchBytes = static_cast<std::size_t>(getPitchesInBytes(view)[Dim<TView>::value - 1u]);
auto const dstNativePtr = reinterpret_cast<void*>(getPtrNative(view));
ALPAKA_ASSERT(extentWidth <= dstWidth);
ALPAKA_ASSERT(extentHeight <= dstHeight);
Expand Down Expand Up @@ -202,9 +202,9 @@ namespace alpaka
auto const dstHeight = getHeight(view);
auto const dstDepth = getDepth(view);
# endif
auto const dstPitchBytesX = static_cast<std::size_t>(getPitchBytes<Dim<TView>::value - 1u>(view));
auto const dstPitchBytesX = static_cast<std::size_t>(getPitchesInBytes(view)[Dim<TView>::value - 1u]);
auto const dstPitchBytesY
= static_cast<std::size_t>(getPitchBytes<Dim<TView>::value - (2u % Dim<TView>::value)>(view));
= static_cast<std::size_t>(getPitchesInBytes(view)[Dim<TView>::value - (2u % Dim<TView>::value)]);
auto const dstNativePtr = reinterpret_cast<void*>(getPtrNative(view));
ALPAKA_ASSERT(extentWidth <= dstWidth);
ALPAKA_ASSERT(extentHeight <= dstHeight);
Expand Down
Loading

0 comments on commit c3ebc62

Please sign in to comment.