Skip to content

Commit

Permalink
fix CUDA/HIP device wait for event
Browse files Browse the repository at this point in the history
fix #2060

The implementationto let a device wait for an event was wrongly
implemented.

**new implementation**

Collect all queues of an device within the device itself, equal to the
CPU device implementation, and let each stream wait for the event.
  • Loading branch information
psychocoderHPC authored and j-stephan committed Sep 1, 2023
1 parent 7e99f63 commit 0f93ef8
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 5 deletions.
24 changes: 22 additions & 2 deletions include/alpaka/dev/DevUniformCudaHipRt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
#include "alpaka/core/Cuda.hpp"
#include "alpaka/core/Hip.hpp"
#include "alpaka/dev/Traits.hpp"
#include "alpaka/dev/common/QueueRegistry.hpp"
#include "alpaka/mem/buf/Traits.hpp"
#include "alpaka/platform/Traits.hpp"
#include "alpaka/queue/Properties.hpp"
#include "alpaka/queue/Traits.hpp"
#include "alpaka/queue/cuda_hip/QueueUniformCudaHipRt.hpp"
#include "alpaka/traits/Traits.hpp"
#include "alpaka/wait/Traits.hpp"

Expand Down Expand Up @@ -56,8 +58,10 @@ namespace alpaka
{
friend struct trait::GetDevByIdx<PlatformUniformCudaHipRt<TApi>>;

using IDeviceQueue = uniform_cuda_hip::detail::QueueUniformCudaHipRtImpl<TApi>;

protected:
DevUniformCudaHipRt() = default;
DevUniformCudaHipRt() : m_QueueRegistry(std::make_shared<alpaka::detail::QueueRegistry<IDeviceQueue>>()){};

public:
ALPAKA_FN_HOST auto operator==(DevUniformCudaHipRt const& rhs) const -> bool
Expand All @@ -74,11 +78,27 @@ namespace alpaka
return m_iDevice;
}

[[nodiscard]] ALPAKA_FN_HOST auto getAllQueues() const -> std::vector<std::shared_ptr<IDeviceQueue>>
{
return m_QueueRegistry->getAllExistingQueues();
}

//! Registers the given queue on this device.
//! NOTE: Every queue has to be registered for correct functionality of device wait operations!
ALPAKA_FN_HOST auto registerQueue(std::shared_ptr<IDeviceQueue> spQueue) const -> void
{
m_QueueRegistry->registerQueue(spQueue);
}

private:
DevUniformCudaHipRt(int iDevice) : m_iDevice(iDevice)
DevUniformCudaHipRt(int iDevice)
: m_iDevice(iDevice)
, m_QueueRegistry(std::make_shared<alpaka::detail::QueueRegistry<IDeviceQueue>>())
{
}
int m_iDevice;

std::shared_ptr<alpaka::detail::QueueRegistry<IDeviceQueue>> m_QueueRegistry;
};

namespace trait
Expand Down
4 changes: 3 additions & 1 deletion include/alpaka/dev/common/QueueRegistry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@

namespace alpaka::detail
{
//! The CPU device implementation.
//! The CPU/GPU device queue registry implementation.
//!
//! @tparam TQueue queue implementation
template<typename TQueue>
struct QueueRegistry
{
Expand Down
9 changes: 8 additions & 1 deletion include/alpaka/event/EventUniformCudaHipRt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,14 @@ namespace alpaka
// Set the current device.
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::setDevice(dev.getNativeHandle()));

ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(TApi::streamWaitEvent(nullptr, event.getNativeHandle(), 0));
// Get all the queues on the device at the time of invocation.
// All queues added afterwards are ignored.
auto vQueues = dev.getAllQueues();
for(auto&& spQueue : vQueues)
{
ALPAKA_UNIFORM_CUDA_HIP_RT_CHECK(
TApi::streamWaitEvent(spQueue->getNativeHandle(), event.getNativeHandle(), 0));
}
}
};
//! The CUDA/HIP RT event native handle trait specialization.
Expand Down
6 changes: 5 additions & 1 deletion include/alpaka/queue/cuda_hip/QueueUniformCudaHipRt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
#include "alpaka/core/Concepts.hpp"
#include "alpaka/core/Cuda.hpp"
#include "alpaka/core/Hip.hpp"
#include "alpaka/dev/DevUniformCudaHipRt.hpp"
#include "alpaka/dev/Traits.hpp"
#include "alpaka/event/Traits.hpp"
#include "alpaka/meta/DependentFalseType.hpp"
#include "alpaka/queue/Traits.hpp"
#include "alpaka/traits/Traits.hpp"
#include "alpaka/wait/Traits.hpp"

#include <condition_variable>
Expand All @@ -30,6 +30,9 @@ namespace alpaka
template<typename TApi>
class EventUniformCudaHipRt;

template<typename TApi>
class DevUniformCudaHipRt;

namespace uniform_cuda_hip::detail
{
//! The CUDA/HIP RT queue implementation.
Expand Down Expand Up @@ -94,6 +97,7 @@ namespace alpaka
ALPAKA_FN_HOST QueueUniformCudaHipRt(DevUniformCudaHipRt<TApi> const& dev)
: m_spQueueImpl(std::make_shared<QueueUniformCudaHipRtImpl<TApi>>(dev))
{
dev.registerQueue(m_spQueueImpl);
}
ALPAKA_FN_HOST auto operator==(QueueUniformCudaHipRt const& rhs) const -> bool
{
Expand Down

0 comments on commit 0f93ef8

Please sign in to comment.