Skip to content

Commit

Permalink
Fix SYCL pitch calculations
Browse files Browse the repository at this point in the history
These are changes missed as part of #2093.

Fixes: #2124
  • Loading branch information
bernhardmgruber committed Sep 4, 2023
1 parent 5900223 commit bb29998
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 31 deletions.
10 changes: 6 additions & 4 deletions include/alpaka/mem/buf/cpu/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,10 @@ namespace alpaka
// [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
// iteration.
Vec<DimMin1, ExtentSize> const extentWithoutInnermost = subVecBegin<DimMin1>(this->m_extent);
Vec<DimMin1, DstSize> const dstPitchBytesWithoutOutmost = subVecBegin<DimMin1>(this->m_dstPitchBytes);
Vec<DimMin1, SrcSize> const srcPitchBytesWithoutOutmost = subVecBegin<DimMin1>(this->m_srcPitchBytes);
Vec<DimMin1, DstSize> const dstPitchBytesWithoutInnermost
= subVecBegin<DimMin1>(this->m_dstPitchBytes);
Vec<DimMin1, SrcSize> const srcPitchBytesWithoutInnermost
= subVecBegin<DimMin1>(this->m_srcPitchBytes);

if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
{
Expand All @@ -116,8 +118,8 @@ namespace alpaka
[&](Vec<DimMin1, ExtentSize> const& idx)
{
std::memcpy(
this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost).sum(),
this->m_srcMemNative + (castVec<SrcSize>(idx) * srcPitchBytesWithoutOutmost).sum(),
this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutInnermost).sum(),
this->m_srcMemNative + (castVec<SrcSize>(idx) * srcPitchBytesWithoutInnermost).sum(),
static_cast<std::size_t>(this->m_extentWidthBytes));
});
}
Expand Down
4 changes: 2 additions & 2 deletions include/alpaka/mem/buf/cpu/Set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ namespace alpaka
if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
{
std::memset(
reinterpret_cast<void*>(this->m_dstMemNative),
this->m_dstMemNative,
this->m_byte,
static_cast<std::size_t>(this->m_extentWidthBytes));
}
Expand Down Expand Up @@ -159,7 +159,7 @@ namespace alpaka
#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
printDebug();
#endif
std::memset(reinterpret_cast<void*>(m_dstMemNative), m_byte, sizeof(Elem));
std::memset(m_dstMemNative, m_byte, sizeof(Elem));
}

std::uint8_t const m_byte;
Expand Down
25 changes: 9 additions & 16 deletions include/alpaka/mem/buf/sycl/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace alpaka::detail
template<typename TViewFwd>
TaskCopySyclBase(TViewFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent)
: m_extent(getExtents(extent))
, m_extentWidthBytes(m_extent[TDim::value - 1u] * static_cast<ExtentSize>(sizeof(Elem)))
, m_extentWidthBytes(m_extent.back() * static_cast<ExtentSize>(sizeof(Elem)))
# if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
, m_dstExtent(getExtents(viewDst))
, m_srcExtent(getExtents(viewSrc))
Expand All @@ -53,8 +53,8 @@ namespace alpaka::detail
{
if constexpr(TDim::value > 0)
{
ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).foldrAll(std::logical_or<bool>()));
ALPAKA_ASSERT((castVec<SrcSize>(m_extent) <= m_srcExtent).foldrAll(std::logical_or<bool>()));
ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());
ALPAKA_ASSERT((castVec<SrcSize>(m_extent) <= m_srcExtent).all());
}
}

Expand Down Expand Up @@ -103,9 +103,8 @@ namespace alpaka::detail
// [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
// iteration.
Vec<DimMin1, ExtentSize> const extentWithoutInnermost(subVecBegin<DimMin1>(this->m_extent));
// [z, y, x] -> [y, x] because the z pitch (the full size of the buffer) is not required.
Vec<DimMin1, DstSize> const dstPitchBytesWithoutOutmost(subVecEnd<DimMin1>(this->m_dstPitchBytes));
Vec<DimMin1, SrcSize> const srcPitchBytesWithoutOutmost(subVecEnd<DimMin1>(this->m_srcPitchBytes));
Vec<DimMin1, DstSize> const dstPitchBytesWithoutInnermost(subVecBegin<DimMin1>(this->m_dstPitchBytes));
Vec<DimMin1, SrcSize> const srcPitchBytesWithoutInnermost(subVecBegin<DimMin1>(this->m_srcPitchBytes));

// Record an event for each memcpy call
std::vector<sycl::event> events;
Expand All @@ -118,14 +117,8 @@ namespace alpaka::detail
[&](Vec<DimMin1, ExtentSize> const& idx)
{
events.push_back(queue.memcpy(
reinterpret_cast<void*>(
this->m_dstMemNative
+ (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost)
.foldrAll(std::plus<DstSize>())),
reinterpret_cast<void const*>(
this->m_srcMemNative
+ (castVec<SrcSize>(idx) * srcPitchBytesWithoutOutmost)
.foldrAll(std::plus<SrcSize>())),
this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutInnermost).sum(),
this->m_srcMemNative + (castVec<SrcSize>(idx) * srcPitchBytesWithoutInnermost).sum(),
static_cast<std::size_t>(this->m_extentWidthBytes),
requirements));
});
Expand Down Expand Up @@ -154,8 +147,8 @@ namespace alpaka::detail
if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
{
return queue.memcpy(
reinterpret_cast<void*>(this->m_dstMemNative),
reinterpret_cast<void const*>(this->m_srcMemNative),
this->m_dstMemNative,
this->m_srcMemNative,
sizeof(Elem) * static_cast<std::size_t>(this->m_extent.prod()),
requirements);
}
Expand Down
15 changes: 6 additions & 9 deletions include/alpaka/mem/buf/sycl/Set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ namespace alpaka
TaskSetSyclBase(TViewFwd&& view, std::uint8_t const& byte, TExtent const& extent)
: m_byte(byte)
, m_extent(getExtents(extent))
, m_extentWidthBytes(m_extent[TDim::value - 1u] * static_cast<ExtentSize>(sizeof(Elem)))
, m_extentWidthBytes(m_extent.back() * static_cast<ExtentSize>(sizeof(Elem)))
# if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
, m_dstExtent(getExtents(view))
# endif
Expand All @@ -49,8 +49,9 @@ namespace alpaka
, m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(view)))

{
ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).foldrAll(std::logical_or<bool>()));
ALPAKA_ASSERT(m_extentWidthBytes <= m_dstPitchBytes[TDim::value - 1u]);
ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());
if constexpr(TDim::value > 1)
ALPAKA_ASSERT(m_extentWidthBytes <= m_dstPitchBytes[TDim::value - 2]);
}

# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
Expand Down Expand Up @@ -93,8 +94,7 @@ namespace alpaka
// [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
// iteration.
Vec<DimMin1, ExtentSize> const extentWithoutInnermost(subVecBegin<DimMin1>(this->m_extent));
// [z, y, x] -> [y, x] because the z pitch (the full idx of the buffer) is not required.
Vec<DimMin1, DstSize> const dstPitchBytesWithoutOutmost(subVecEnd<DimMin1>(this->m_dstPitchBytes));
Vec<DimMin1, DstSize> const dstPitchBytesWithoutInnermost(subVecBegin<DimMin1>(this->m_dstPitchBytes));

// Record an event for each memcpy call
std::vector<sycl::event> events;
Expand All @@ -107,10 +107,7 @@ namespace alpaka
[&](Vec<DimMin1, ExtentSize> const& idx)
{
events.push_back(queue.memset(
reinterpret_cast<void*>(
this->m_dstMemNative
+ (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost)
.foldrAll(std::plus<DstSize>())),
this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutInnermost).sum(),
this->m_byte,
static_cast<std::size_t>(this->m_extentWidthBytes),
requirements));
Expand Down

0 comments on commit bb29998

Please sign in to comment.