Skip to content

Commit

Permalink
addresses review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
elstehle committed Oct 8, 2024
1 parent 8750240 commit 8abd5e9
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 14 deletions.
1 change: 1 addition & 0 deletions cub/cub/agent/agent_select_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ struct partition_distinct_output_t
* @tparam StreamingContextT
* Type providing the context information for the current partition, with the following member functions:
* input_offset() -> base offset for the input (and flags) iterator
* is_first_partition() -> [Select::Unique-only] whether this is the first partition
* num_previously_selected() -> base offset for the output iterator for selected items
* num_previously_rejected() -> base offset for the output iterator for rejected items (partition only)
* num_total_items() -> total number of items across all partitions (partition only)
Expand Down
10 changes: 4 additions & 6 deletions cub/cub/detail/choose_offset.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@

#include <cub/config.cuh>

#include "cuda/std/__type_traits/is_unsigned.h"

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
Expand Down Expand Up @@ -63,7 +61,7 @@ struct choose_offset
"NumItemsT must be an integral type, but not bool");

// Unsigned integer type for global offsets.
using type = typename ::cuda::std::conditional<sizeof(NumItemsT) <= 4, std::uint32_t, unsigned long long>::type;
using type = typename ::cuda::std::_If<sizeof(NumItemsT) <= 4, std::uint32_t, unsigned long long>::type;
};

/**
Expand All @@ -86,7 +84,7 @@ struct promote_small_offset
"NumItemsT must be an integral type, but not bool");

// Unsigned integer type for global offsets.
using type = typename ::cuda::std::conditional<sizeof(NumItemsT) < 4, std::int32_t, NumItemsT>::type;
using type = typename ::cuda::std::_If<sizeof(NumItemsT) < 4, std::int32_t, NumItemsT>::type;
};

/**
Expand All @@ -113,10 +111,10 @@ struct choose_signed_offset
// uint32 -> int64, else
// LEQ 4B -> int32, else
// int64
using type = typename ::cuda::std::conditional<
using type = typename ::cuda::std::_If<
(::cuda::std::is_integral<NumItemsT>::value && ::cuda::std::is_unsigned<NumItemsT>::value),
::cuda::std::int64_t,
typename ::cuda::std::conditional<(sizeof(NumItemsT) <= 4), ::cuda::std::int32_t, ::cuda::std::int64_t>::type>::type;
typename ::cuda::std::_If<(sizeof(NumItemsT) <= 4), ::cuda::std::int32_t, ::cuda::std::int64_t>::type>::type;

/**
* Checks if the given num_items can be covered by the selected offset type. If not, returns cudaErrorInvalidValue,
Expand Down
6 changes: 3 additions & 3 deletions cub/cub/device/device_select.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1012,20 +1012,20 @@ struct DeviceSelect
}

#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
template <typename InputIteratorT, typename OutputIteratorT, typename NumSelectedIteratorT, typename NumItemsT>
template <typename InputIteratorT, typename OutputIteratorT, typename NumSelectedIteratorT>
CUB_DETAIL_RUNTIME_DEBUG_SYNC_IS_NOT_SUPPORTED CUB_RUNTIME_FUNCTION _CCCL_FORCEINLINE static cudaError_t Unique(
void* d_temp_storage,
size_t& temp_storage_bytes,
InputIteratorT d_in,
OutputIteratorT d_out,
NumSelectedIteratorT d_num_selected_out,
NumItemsT num_items,
::cuda::std::int64_t num_items,
cudaStream_t stream,
bool debug_synchronous)
{
CUB_DETAIL_RUNTIME_DEBUG_SYNC_USAGE_LOG

return Unique<InputIteratorT, OutputIteratorT, NumSelectedIteratorT, NumItemsT>(
return Unique<InputIteratorT, OutputIteratorT, NumSelectedIteratorT>(
d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, stream);
}
#endif // DOXYGEN_SHOULD_SKIP_THIS
Expand Down
7 changes: 3 additions & 4 deletions cub/cub/device/dispatch/dispatch_select_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,12 @@ public:
return first_partition;
};

_CCCL_HOST_DEVICE _CCCL_FORCEINLINE TotalNumItemsT num_previously_selected() const
_CCCL_DEVICE _CCCL_FORCEINLINE TotalNumItemsT num_previously_selected() const
{
return first_partition ? TotalNumItemsT{0} : *d_num_selected_in;
};

_CCCL_HOST_DEVICE _CCCL_FORCEINLINE TotalNumItemsT num_previously_rejected() const
_CCCL_DEVICE _CCCL_FORCEINLINE TotalNumItemsT num_previously_rejected() const
{
return first_partition ? TotalNumItemsT{0} : (total_previous_num_items - num_previously_selected());
};
Expand Down Expand Up @@ -450,8 +450,7 @@ struct DispatchSelectIf : SelectedPolicy
using num_total_items_t = OffsetT;

// Type used to provide streaming information about each partition's context
static constexpr per_partition_offset_t const partition_size =
cuda::std::numeric_limits<per_partition_offset_t>::max();
static constexpr per_partition_offset_t partition_size = ::cuda::std::numeric_limits<per_partition_offset_t>::max();

// If the values representable by OffsetT exceed the partition_size, we use a kernel template specialization that
// supports streaming (i.e., splitting the input into partitions of up to partition_size number of items)
Expand Down
2 changes: 1 addition & 1 deletion thrust/thrust/system/cuda/detail/copy_if.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ THRUST_RUNTIME_FUNCTION OutputIt copy_if(
void* temp_storage = static_cast<void*>(tmp.data().get());

// Run algorithm
dispatch64_t::dispatch(
status = dispatch64_t::dispatch(
policy, temp_storage, temp_storage_bytes, first, stencil, output, predicate, static_cast<std::int64_t>(num_items));
cuda_cub::throw_on_error(status, "copy_if failed on 2nd step");

Expand Down

0 comments on commit 8abd5e9

Please sign in to comment.