Skip to content

Commit

Permalink
fix load iterator
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Jan 26, 2025
1 parent 28503e8 commit 2bb84eb
Show file tree
Hide file tree
Showing 11 changed files with 45 additions and 41 deletions.
4 changes: 2 additions & 2 deletions cub/cub/agent/agent_adjacent_difference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ template <typename Policy,
bool ReadLeft>
struct AgentDifference
{
using LoadIt = typename THRUST_NS_QUALIFIER::cuda_cub::core::LoadIterator<Policy, InputIteratorT>::type;
using LoadIt = typename THRUST_NS_QUALIFIER::cuda_cub::core::detail::LoadIterator<Policy, InputIteratorT>::type;

using BlockLoad = typename cub::BlockLoadType<Policy, LoadIt>::type;
using BlockStore = typename cub::BlockStoreType<Policy, OutputIteratorT, OutputT>::type;
Expand Down Expand Up @@ -119,7 +119,7 @@ struct AgentDifference
OffsetT num_items)
: temp_storage(temp_storage.Alias())
, input_it(input_it)
, load_it(THRUST_NS_QUALIFIER::cuda_cub::core::make_load_iterator(Policy(), input_it))
, load_it(THRUST_NS_QUALIFIER::cuda_cub::core::detail::make_load_iterator(Policy(), input_it))
, first_tile_previous(first_tile_previous)
, result(result)
, difference_op(difference_op)
Expand Down
8 changes: 4 additions & 4 deletions cub/cub/agent/agent_merge.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ struct agent_t
using key_type = typename ::cuda::std::iterator_traits<KeysIt1>::value_type;
using item_type = typename ::cuda::std::iterator_traits<ItemsIt1>::value_type;

using keys_load_it1 = typename THRUST_NS_QUALIFIER::cuda_cub::core::LoadIterator<Policy, KeysIt1>::type;
using keys_load_it2 = typename THRUST_NS_QUALIFIER::cuda_cub::core::LoadIterator<Policy, KeysIt2>::type;
using items_load_it1 = typename THRUST_NS_QUALIFIER::cuda_cub::core::LoadIterator<Policy, ItemsIt1>::type;
using items_load_it2 = typename THRUST_NS_QUALIFIER::cuda_cub::core::LoadIterator<Policy, ItemsIt2>::type;
using keys_load_it1 = typename THRUST_NS_QUALIFIER::cuda_cub::core::detail::LoadIterator<Policy, KeysIt1>::type;
using keys_load_it2 = typename THRUST_NS_QUALIFIER::cuda_cub::core::detail::LoadIterator<Policy, KeysIt2>::type;
using items_load_it1 = typename THRUST_NS_QUALIFIER::cuda_cub::core::detail::LoadIterator<Policy, ItemsIt1>::type;
using items_load_it2 = typename THRUST_NS_QUALIFIER::cuda_cub::core::detail::LoadIterator<Policy, ItemsIt2>::type;

using block_load_keys1 = typename BlockLoadType<Policy, keys_load_it1>::type;
using block_load_keys2 = typename BlockLoadType<Policy, keys_load_it2>::type;
Expand Down
15 changes: 9 additions & 6 deletions cub/cub/agent/agent_merge_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,10 @@ struct AgentBlockSort

using BlockMergeSortT = BlockMergeSort<KeyT, Policy::BLOCK_THREADS, Policy::ITEMS_PER_THREAD, ValueT>;

using KeysLoadIt = typename THRUST_NS_QUALIFIER::cuda_cub::core::LoadIterator<Policy, KeyInputIteratorT>::type;
using ItemsLoadIt = typename THRUST_NS_QUALIFIER::cuda_cub::core::LoadIterator<Policy, ValueInputIteratorT>::type;
using KeysLoadIt =
typename THRUST_NS_QUALIFIER::cuda_cub::core::detail::LoadIterator<Policy, KeyInputIteratorT>::type;
using ItemsLoadIt =
typename THRUST_NS_QUALIFIER::cuda_cub::core::detail::LoadIterator<Policy, ValueInputIteratorT>::type;

using BlockLoadKeys = typename cub::BlockLoadType<Policy, KeysLoadIt>::type;
using BlockLoadItems = typename cub::BlockLoadType<Policy, ItemsLoadIt>::type;
Expand Down Expand Up @@ -438,10 +440,11 @@ struct AgentMerge
//---------------------------------------------------------------------
// Types and constants
//---------------------------------------------------------------------
using KeysLoadPingIt = typename THRUST_NS_QUALIFIER::cuda_cub::core::LoadIterator<Policy, KeyIteratorT>::type;
using ItemsLoadPingIt = typename THRUST_NS_QUALIFIER::cuda_cub::core::LoadIterator<Policy, ValueIteratorT>::type;
using KeysLoadPongIt = typename THRUST_NS_QUALIFIER::cuda_cub::core::LoadIterator<Policy, KeyT*>::type;
using ItemsLoadPongIt = typename THRUST_NS_QUALIFIER::cuda_cub::core::LoadIterator<Policy, ValueT*>::type;
using KeysLoadPingIt = typename THRUST_NS_QUALIFIER::cuda_cub::core::detail::LoadIterator<Policy, KeyIteratorT>::type;
using ItemsLoadPingIt =
typename THRUST_NS_QUALIFIER::cuda_cub::core::detail::LoadIterator<Policy, ValueIteratorT>::type;
using KeysLoadPongIt = typename THRUST_NS_QUALIFIER::cuda_cub::core::detail::LoadIterator<Policy, KeyT*>::type;
using ItemsLoadPongIt = typename THRUST_NS_QUALIFIER::cuda_cub::core::detail::LoadIterator<Policy, ValueT*>::type;

using KeysOutputPongIt = KeyIteratorT;
using ItemsOutputPongIt = ValueIteratorT;
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/agent/agent_sub_warp_merge_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ public:

using WarpMergeSortT = WarpMergeSort<KeyT, PolicyT::ITEMS_PER_THREAD, PolicyT::WARP_THREADS, ValueT>;

using KeysLoadItT = typename THRUST_NS_QUALIFIER::cuda_cub::core::LoadIterator<PolicyT, const KeyT*>::type;
using ItemsLoadItT = typename THRUST_NS_QUALIFIER::cuda_cub::core::LoadIterator<PolicyT, const ValueT*>::type;
using KeysLoadItT = typename THRUST_NS_QUALIFIER::cuda_cub::core::detail::LoadIterator<PolicyT, const KeyT*>::type;
using ItemsLoadItT = typename THRUST_NS_QUALIFIER::cuda_cub::core::detail::LoadIterator<PolicyT, const ValueT*>::type;

using WarpLoadKeysT = cub::WarpLoad<KeyT, PolicyT::ITEMS_PER_THREAD, PolicyT::LOAD_ALGORITHM, PolicyT::WARP_THREADS>;
using WarpLoadItemsT =
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/device/dispatch/dispatch_merge.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ __launch_bounds__(
CompareOp>::type;
using MergePolicy = typename MergeAgent::policy;

using THRUST_NS_QUALIFIER::cuda_cub::core::make_load_iterator;
using THRUST_NS_QUALIFIER::cuda_cub::core::detail::make_load_iterator;
using vsmem_helper_t = vsmem_helper_impl<MergeAgent>;
__shared__ typename vsmem_helper_t::static_temp_storage_t shared_temp_storage;
auto& temp_storage = vsmem_helper_t::get_temp_storage(shared_temp_storage, global_temp_storage);
Expand Down
19 changes: 10 additions & 9 deletions cub/cub/device/dispatch/kernels/merge_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@

THRUST_NAMESPACE_BEGIN

namespace cuda_cub::core
namespace cuda_cub::core::detail
{
// We must forward declare here because make_load_iterator.h pulls in non NVRTC compilable code
template <class PtxPlan, class It>
typename LoadIterator<PtxPlan, It>::type _CCCL_DEVICE _CCCL_FORCEINLINE make_load_iterator(PtxPlan const&, It it);
} // namespace cuda_cub::core
typename detail::LoadIterator<PtxPlan, It>::type _CCCL_DEVICE _CCCL_FORCEINLINE
make_load_iterator(PtxPlan const&, It it);
} // namespace cuda_cub::core::detail

THRUST_NAMESPACE_END

Expand Down Expand Up @@ -174,8 +175,8 @@ __launch_bounds__(
AgentBlockSortT agent(
ping,
temp_storage,
THRUST_NS_QUALIFIER::cuda_cub::core::make_load_iterator(ActivePolicyT(), keys_in),
THRUST_NS_QUALIFIER::cuda_cub::core::make_load_iterator(ActivePolicyT(), items_in),
THRUST_NS_QUALIFIER::cuda_cub::core::detail::make_load_iterator(ActivePolicyT(), keys_in),
THRUST_NS_QUALIFIER::cuda_cub::core::detail::make_load_iterator(ActivePolicyT(), items_in),
keys_count,
keys_out,
items_out,
Expand Down Expand Up @@ -280,10 +281,10 @@ __launch_bounds__(
AgentMergeT agent(
ping,
temp_storage,
THRUST_NS_QUALIFIER::cuda_cub::core::make_load_iterator(ActivePolicyT(), keys_ping),
THRUST_NS_QUALIFIER::cuda_cub::core::make_load_iterator(ActivePolicyT(), items_ping),
THRUST_NS_QUALIFIER::cuda_cub::core::make_load_iterator(ActivePolicyT(), keys_pong),
THRUST_NS_QUALIFIER::cuda_cub::core::make_load_iterator(ActivePolicyT(), items_pong),
THRUST_NS_QUALIFIER::cuda_cub::core::detail::make_load_iterator(ActivePolicyT(), keys_ping),
THRUST_NS_QUALIFIER::cuda_cub::core::detail::make_load_iterator(ActivePolicyT(), items_ping),
THRUST_NS_QUALIFIER::cuda_cub::core::detail::make_load_iterator(ActivePolicyT(), keys_pong),
THRUST_NS_QUALIFIER::cuda_cub::core::detail::make_load_iterator(ActivePolicyT(), items_pong),
keys_count,
keys_pong,
items_pong,
Expand Down
12 changes: 6 additions & 6 deletions thrust/thrust/system/cuda/detail/core/agent_launcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ struct AgentLauncher : Agent
, name(name_)
, grid(static_cast<unsigned int>((count + plan.items_per_tile - 1) / plan.items_per_tile))
, vshmem(nullptr)
, has_shmem((size_t) core::get_max_shared_memory_per_block() >= (size_t) plan.shared_memory_size)
, has_shmem((size_t) get_max_shared_memory_per_block() >= (size_t) plan.shared_memory_size)
, shmem_size(has_shmem ? plan.shared_memory_size : 0)
{
assert(count > 0);
Expand All @@ -137,7 +137,7 @@ struct AgentLauncher : Agent
, name(name_)
, grid(static_cast<unsigned int>((count + plan.items_per_tile - 1) / plan.items_per_tile))
, vshmem(vshmem)
, has_shmem((size_t) core::get_max_shared_memory_per_block() >= (size_t) plan.shared_memory_size)
, has_shmem((size_t) get_max_shared_memory_per_block() >= (size_t) plan.shared_memory_size)
, shmem_size(has_shmem ? plan.shared_memory_size : 0)
{
assert(count > 0);
Expand All @@ -150,7 +150,7 @@ struct AgentLauncher : Agent
, name(name_)
, grid(plan.grid_size)
, vshmem(nullptr)
, has_shmem((size_t) core::get_max_shared_memory_per_block() >= (size_t) plan.shared_memory_size)
, has_shmem((size_t) get_max_shared_memory_per_block() >= (size_t) plan.shared_memory_size)
, shmem_size(has_shmem ? plan.shared_memory_size : 0)
{
assert(plan.grid_size > 0);
Expand All @@ -163,19 +163,19 @@ struct AgentLauncher : Agent
, name(name_)
, grid(plan.grid_size)
, vshmem(vshmem)
, has_shmem((size_t) core::get_max_shared_memory_per_block() >= (size_t) plan.shared_memory_size)
, has_shmem((size_t) get_max_shared_memory_per_block() >= (size_t) plan.shared_memory_size)
, shmem_size(has_shmem ? plan.shared_memory_size : 0)
{
assert(plan.grid_size > 0);
}

THRUST_RUNTIME_FUNCTION typename core::get_plan<Agent>::type static get_plan(cudaStream_t, void* d_ptr = 0)
THRUST_RUNTIME_FUNCTION typename get_plan<Agent>::type static get_plan(cudaStream_t, void* d_ptr = 0)
{
THRUST_UNUSED_VAR(d_ptr);
return get_agent_plan<Agent>(get_ptx_version());
}

THRUST_RUNTIME_FUNCTION typename core::get_plan<Agent>::type static get_plan()
THRUST_RUNTIME_FUNCTION typename detail::get_plan<Agent>::type static get_plan()
{
return get_agent_plan<Agent>(lowest_supported_sm_arch::ver);
}
Expand Down
4 changes: 2 additions & 2 deletions thrust/thrust/system/cuda/detail/core/load_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

THRUST_NAMESPACE_BEGIN

namespace cuda_cub::core
namespace cuda_cub::core::detail
{

// LoadIterator
Expand All @@ -52,6 +52,6 @@ struct LoadIterator
cub::CacheModifiedInputIterator<PtxPlan::LOAD_MODIFIER, value_type, size_type>,
It>;
}; // struct Iterator
} // namespace cuda_cub::core
} // namespace cuda_cub::core::detail

THRUST_NAMESPACE_END
4 changes: 2 additions & 2 deletions thrust/thrust/system/cuda/detail/core/make_load_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

THRUST_NAMESPACE_BEGIN

namespace cuda_cub::core
namespace cuda_cub::core::detail
{
template <class PtxPlan, class It>
typename LoadIterator<PtxPlan, It>::type _CCCL_DEVICE _CCCL_FORCEINLINE
Expand All @@ -55,6 +55,6 @@ typename LoadIterator<PtxPlan, It>::type _CCCL_DEVICE _CCCL_FORCEINLINE make_loa
return make_load_iterator_impl<PtxPlan>(it, typename is_contiguous_iterator<It>::type());
}

} // namespace cuda_cub::core
} // namespace cuda_cub::core::detail

THRUST_NAMESPACE_END
6 changes: 3 additions & 3 deletions thrust/thrust/system/cuda/detail/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ template <class, class>
struct Tuning;

template <class T>
struct Tuning<sm30, T>
struct Tuning<core::detail::sm30, T>
{
enum
{
Expand All @@ -129,7 +129,7 @@ struct Tuning<sm30, T>
}; // Tuning sm30

template <class T>
struct Tuning<sm35, T> : Tuning<sm30, T>
struct Tuning<core::detail::sm35, T> : Tuning<core::detail::sm30, T>
{
// ReducePolicy1B (GTX Titan: 228.7 GB/s @ 192M 1B items)
using ReducePolicy1B =
Expand Down Expand Up @@ -204,7 +204,7 @@ struct ReduceAgent
// ptx_plan type *must* only be used from device code
// Its use from host code will result in *undefined behaviour*
//
using ptx_plan = typename core::specialize_plan_msvc10_war<PtxPlan>::type::type;
using ptx_plan = typename core::detail::specialize_plan_msvc10_war<PtxPlan>::type::type;

using TempStorage = typename ptx_plan::TempStorage;
using Vector = typename ptx_plan::Vector;
Expand Down
8 changes: 4 additions & 4 deletions thrust/thrust/system/cuda/detail/reduce_by_key.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ template <class Arch, class Key, class Value>
struct Tuning;

template <class Key, class Value>
struct Tuning<sm30, Key, Value>
struct Tuning<core::detail::sm30, Key, Value>
{
enum
{
Expand All @@ -138,7 +138,7 @@ struct Tuning<sm30, Key, Value>
}; // Tuning sm30

template <class Key, class Value>
struct Tuning<sm35, Key, Value> : Tuning<sm30, Key, Value>
struct Tuning<core::detail::sm35, Key, Value> : Tuning<core::detail::sm30, Key, Value>
{
enum
{
Expand All @@ -162,7 +162,7 @@ struct Tuning<sm35, Key, Value> : Tuning<sm30, Key, Value>
}; // Tuning sm35

template <class Key, class Value>
struct Tuning<sm52, Key, Value> : Tuning<sm30, Key, Value>
struct Tuning<core::detail::sm52, Key, Value> : Tuning<core::detail::sm30, Key, Value>
{
enum
{
Expand Down Expand Up @@ -239,7 +239,7 @@ struct ReduceByKeyAgent
}; // union TempStorage
}; // struct PtxPlan

using ptx_plan = typename core::specialize_plan_msvc10_war<PtxPlan>::type::type;
using ptx_plan = typename core::detail::specialize_plan_msvc10_war<PtxPlan>::type::type;

using KeysLoadIt = typename ptx_plan::KeysLoadIt;
using ValuesLoadIt = typename ptx_plan::ValuesLoadIt;
Expand Down

0 comments on commit 2bb84eb

Please sign in to comment.