Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix SYCL pitch calculations #2125

Merged
merged 1 commit into from
Sep 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions include/alpaka/mem/buf/cpu/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,10 @@ namespace alpaka
// [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
// iteration.
Vec<DimMin1, ExtentSize> const extentWithoutInnermost = subVecBegin<DimMin1>(this->m_extent);
Vec<DimMin1, DstSize> const dstPitchBytesWithoutOutmost = subVecBegin<DimMin1>(this->m_dstPitchBytes);
Vec<DimMin1, SrcSize> const srcPitchBytesWithoutOutmost = subVecBegin<DimMin1>(this->m_srcPitchBytes);
Vec<DimMin1, DstSize> const dstPitchBytesWithoutInnermost
= subVecBegin<DimMin1>(this->m_dstPitchBytes);
Vec<DimMin1, SrcSize> const srcPitchBytesWithoutInnermost
= subVecBegin<DimMin1>(this->m_srcPitchBytes);

if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
{
Expand All @@ -116,8 +118,8 @@ namespace alpaka
[&](Vec<DimMin1, ExtentSize> const& idx)
{
std::memcpy(
this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost).sum(),
this->m_srcMemNative + (castVec<SrcSize>(idx) * srcPitchBytesWithoutOutmost).sum(),
this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutInnermost).sum(),
this->m_srcMemNative + (castVec<SrcSize>(idx) * srcPitchBytesWithoutInnermost).sum(),
static_cast<std::size_t>(this->m_extentWidthBytes));
});
}
Expand Down
4 changes: 2 additions & 2 deletions include/alpaka/mem/buf/cpu/Set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ namespace alpaka
if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
{
std::memset(
reinterpret_cast<void*>(this->m_dstMemNative),
this->m_dstMemNative,
this->m_byte,
static_cast<std::size_t>(this->m_extentWidthBytes));
}
Expand Down Expand Up @@ -159,7 +159,7 @@ namespace alpaka
#if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
printDebug();
#endif
std::memset(reinterpret_cast<void*>(m_dstMemNative), m_byte, sizeof(Elem));
std::memset(m_dstMemNative, m_byte, sizeof(Elem));
}

std::uint8_t const m_byte;
Expand Down
25 changes: 9 additions & 16 deletions include/alpaka/mem/buf/sycl/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace alpaka::detail
template<typename TViewFwd>
TaskCopySyclBase(TViewFwd&& viewDst, TViewSrc const& viewSrc, TExtent const& extent)
: m_extent(getExtents(extent))
, m_extentWidthBytes(m_extent[TDim::value - 1u] * static_cast<ExtentSize>(sizeof(Elem)))
, m_extentWidthBytes(m_extent.back() * static_cast<ExtentSize>(sizeof(Elem)))
# if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
, m_dstExtent(getExtents(viewDst))
, m_srcExtent(getExtents(viewSrc))
Expand All @@ -53,8 +53,8 @@ namespace alpaka::detail
{
if constexpr(TDim::value > 0)
{
ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).foldrAll(std::logical_or<bool>()));
ALPAKA_ASSERT((castVec<SrcSize>(m_extent) <= m_srcExtent).foldrAll(std::logical_or<bool>()));
ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());
ALPAKA_ASSERT((castVec<SrcSize>(m_extent) <= m_srcExtent).all());
}
}

Expand Down Expand Up @@ -103,9 +103,8 @@ namespace alpaka::detail
// [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
// iteration.
Vec<DimMin1, ExtentSize> const extentWithoutInnermost(subVecBegin<DimMin1>(this->m_extent));
// [z, y, x] -> [y, x] because the z pitch (the full size of the buffer) is not required.
Vec<DimMin1, DstSize> const dstPitchBytesWithoutOutmost(subVecEnd<DimMin1>(this->m_dstPitchBytes));
Vec<DimMin1, SrcSize> const srcPitchBytesWithoutOutmost(subVecEnd<DimMin1>(this->m_srcPitchBytes));
Vec<DimMin1, DstSize> const dstPitchBytesWithoutInnermost(subVecBegin<DimMin1>(this->m_dstPitchBytes));
Vec<DimMin1, SrcSize> const srcPitchBytesWithoutInnermost(subVecBegin<DimMin1>(this->m_srcPitchBytes));

// Record an event for each memcpy call
std::vector<sycl::event> events;
Expand All @@ -118,14 +117,8 @@ namespace alpaka::detail
[&](Vec<DimMin1, ExtentSize> const& idx)
{
events.push_back(queue.memcpy(
reinterpret_cast<void*>(
this->m_dstMemNative
+ (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost)
.foldrAll(std::plus<DstSize>())),
reinterpret_cast<void const*>(
this->m_srcMemNative
+ (castVec<SrcSize>(idx) * srcPitchBytesWithoutOutmost)
.foldrAll(std::plus<SrcSize>())),
this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutInnermost).sum(),
this->m_srcMemNative + (castVec<SrcSize>(idx) * srcPitchBytesWithoutInnermost).sum(),
static_cast<std::size_t>(this->m_extentWidthBytes),
requirements));
});
Expand Down Expand Up @@ -154,8 +147,8 @@ namespace alpaka::detail
if(static_cast<std::size_t>(this->m_extent.prod()) != 0u)
{
return queue.memcpy(
reinterpret_cast<void*>(this->m_dstMemNative),
reinterpret_cast<void const*>(this->m_srcMemNative),
this->m_dstMemNative,
this->m_srcMemNative,
sizeof(Elem) * static_cast<std::size_t>(this->m_extent.prod()),
requirements);
}
Expand Down
15 changes: 6 additions & 9 deletions include/alpaka/mem/buf/sycl/Set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ namespace alpaka
TaskSetSyclBase(TViewFwd&& view, std::uint8_t const& byte, TExtent const& extent)
: m_byte(byte)
, m_extent(getExtents(extent))
, m_extentWidthBytes(m_extent[TDim::value - 1u] * static_cast<ExtentSize>(sizeof(Elem)))
, m_extentWidthBytes(m_extent.back() * static_cast<ExtentSize>(sizeof(Elem)))
# if(!defined(NDEBUG)) || (ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL)
, m_dstExtent(getExtents(view))
# endif
Expand All @@ -49,8 +49,9 @@ namespace alpaka
, m_dstMemNative(reinterpret_cast<std::uint8_t*>(getPtrNative(view)))

{
ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).foldrAll(std::logical_or<bool>()));
ALPAKA_ASSERT(m_extentWidthBytes <= m_dstPitchBytes[TDim::value - 1u]);
ALPAKA_ASSERT((castVec<DstSize>(m_extent) <= m_dstExtent).all());
if constexpr(TDim::value > 1)
ALPAKA_ASSERT(m_extentWidthBytes <= m_dstPitchBytes[TDim::value - 2]);
}

# if ALPAKA_DEBUG >= ALPAKA_DEBUG_FULL
Expand Down Expand Up @@ -93,8 +94,7 @@ namespace alpaka
// [z, y, x] -> [z, y] because all elements with the innermost x dimension are handled within one
// iteration.
Vec<DimMin1, ExtentSize> const extentWithoutInnermost(subVecBegin<DimMin1>(this->m_extent));
// [z, y, x] -> [y, x] because the z pitch (the full idx of the buffer) is not required.
Vec<DimMin1, DstSize> const dstPitchBytesWithoutOutmost(subVecEnd<DimMin1>(this->m_dstPitchBytes));
Vec<DimMin1, DstSize> const dstPitchBytesWithoutInnermost(subVecBegin<DimMin1>(this->m_dstPitchBytes));

// Record an event for each memcpy call
std::vector<sycl::event> events;
Expand All @@ -107,10 +107,7 @@ namespace alpaka
[&](Vec<DimMin1, ExtentSize> const& idx)
{
events.push_back(queue.memset(
reinterpret_cast<void*>(
this->m_dstMemNative
+ (castVec<DstSize>(idx) * dstPitchBytesWithoutOutmost)
.foldrAll(std::plus<DstSize>())),
this->m_dstMemNative + (castVec<DstSize>(idx) * dstPitchBytesWithoutInnermost).sum(),
this->m_byte,
static_cast<std::size_t>(this->m_extentWidthBytes),
requirements));
Expand Down