Skip to content

Commit

Permalink
Add new generate_limit tool to fix numeric_limits not working on cust…
Browse files Browse the repository at this point in the history
…om types and rocprim::half (#604)
  • Loading branch information
NB4444 authored Sep 17, 2024
1 parent 60fdf0a commit 45b1942
Show file tree
Hide file tree
Showing 20 changed files with 189 additions and 393 deletions.
16 changes: 4 additions & 12 deletions benchmark/benchmark_block_radix_rank.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,18 +103,10 @@ void run_benchmark(benchmark::State& state, size_t N, const managed_seed& seed,
const unsigned int grid_size = ((N + items_per_block - 1) / items_per_block);
const unsigned int size = items_per_block * grid_size;

std::vector<T> input;
if ROCPRIM_IF_CONSTEXPR(std::is_floating_point<T>::value)
{
input = get_random_data<T>(size, static_cast<T>(-1000), static_cast<T>(1000), seed.get_0());
}
else
{
input = get_random_data<T>(size,
std::numeric_limits<T>::min(),
std::numeric_limits<T>::max(),
seed.get_0());
}
std::vector<T> input = get_random_data<T>(size,
generate_limits<T>::min(),
generate_limits<T>::max(),
seed.get_0());

T* d_input;
unsigned int* d_output;
Expand Down
19 changes: 6 additions & 13 deletions benchmark/benchmark_block_radix_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,19 +136,12 @@ void run_benchmark(benchmark::State& state,
constexpr auto items_per_block = BlockSize * ItemsPerThread;
const auto size = items_per_block * ((N + items_per_block - 1)/items_per_block);

std::vector<T> input;
if(std::is_floating_point<T>::value)
{
input = get_random_data<T>(size, static_cast<T>(-1000), static_cast<T>(1000), seed.get_0());
}
else
{
input = get_random_data<T>(size,
std::numeric_limits<T>::min(),
std::numeric_limits<T>::max(),
seed.get_0());
}
T * d_input;
std::vector<T> input = get_random_data<T>(size,
generate_limits<T>::min(),
generate_limits<T>::max(),
seed.get_0());

T* d_input;
T * d_output;
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&d_input), size * sizeof(T)));
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&d_output), size * sizeof(T)));
Expand Down
20 changes: 5 additions & 15 deletions benchmark/benchmark_block_sort.parallel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,21 +234,11 @@ struct block_sort_benchmark : public config_autotune_interface
{
const auto size = items_per_block * ((N + items_per_block - 1) / items_per_block);

std::vector<KeyType> input;
if(std::is_floating_point<KeyType>::value)
{
input = get_random_data<KeyType>(size,
static_cast<KeyType>(-1000),
static_cast<KeyType>(1000),
seed.get_0());
}
else
{
input = get_random_data<KeyType>(size,
std::numeric_limits<KeyType>::min(),
std::numeric_limits<KeyType>::max(),
seed.get_0());
}
std::vector<KeyType> input = get_random_data<KeyType>(size,
generate_limits<KeyType>::min(),
generate_limits<KeyType>::max(),
seed.get_0());

KeyType* d_input;
KeyType* d_output;
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&d_input), size * sizeof(KeyType)));
Expand Down
19 changes: 6 additions & 13 deletions benchmark/benchmark_device_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,19 +335,12 @@ void run_benchmark(benchmark::State& state,
const managed_seed& seed,
const hipStream_t stream)
{
const size_t grid_size = size / (BlockSize * ItemsPerThread);
std::vector<T> input;
if(std::is_floating_point<T>::value)
{
input = get_random_data<T>(size, static_cast<T>(-1000), static_cast<T>(1000), seed.get_0());
}
else
{
input = get_random_data<T>(size,
std::numeric_limits<T>::min(),
std::numeric_limits<T>::max(),
seed.get_0());
}
const size_t grid_size = size / (BlockSize * ItemsPerThread);
std::vector<T> input = get_random_data<T>(size,
generate_limits<T>::min(),
generate_limits<T>::max(),
seed.get_0());

T * d_input;
T * d_output;
HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&d_input), size * sizeof(T)));
Expand Down
40 changes: 10 additions & 30 deletions benchmark/benchmark_device_merge_sort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,21 +67,11 @@ struct device_merge_sort_benchmark : public config_autotune_interface
using key_type = Key;

// Generate data
std::vector<key_type> keys_input;
if(std::is_floating_point<key_type>::value)
{
keys_input = get_random_data<key_type>(size,
static_cast<key_type>(-1000),
static_cast<key_type>(1000),
seed.get_0());
}
else
{
keys_input = get_random_data<key_type>(size,
std::numeric_limits<key_type>::min(),
std::numeric_limits<key_type>::max(),
seed.get_0());
}
std::vector<key_type> keys_input
= get_random_data<key_type>(size,
generate_limits<key_type>::min(),
generate_limits<key_type>::max(),
seed.get_0());

key_type* d_keys_input;
key_type* d_keys_output;
Expand Down Expand Up @@ -177,21 +167,11 @@ struct device_merge_sort_benchmark : public config_autotune_interface
using value_type = Value;

// Generate data
std::vector<key_type> keys_input;
if(std::is_floating_point<key_type>::value)
{
keys_input = get_random_data<key_type>(size,
static_cast<key_type>(-1000),
static_cast<key_type>(1000),
seed.get_0());
}
else
{
keys_input = get_random_data<key_type>(size,
std::numeric_limits<key_type>::min(),
std::numeric_limits<key_type>::max(),
seed.get_0());
}
std::vector<key_type> keys_input
= get_random_data<key_type>(size,
generate_limits<key_type>::min(),
generate_limits<key_type>::max(),
seed.get_0());

std::vector<value_type> values_input(size);
std::iota(values_input.begin(), values_input.end(), 0);
Expand Down
41 changes: 11 additions & 30 deletions benchmark/benchmark_device_merge_sort_block_merge.parallel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,21 +88,11 @@ struct device_merge_sort_block_merge_benchmark : public config_autotune_interfac
using key_type = Key;

// Generate data
std::vector<key_type> keys_input;
if(std::is_floating_point<key_type>::value)
{
keys_input = get_random_data<key_type>(size,
static_cast<key_type>(-1000),
static_cast<key_type>(1000),
seed.get_0());
}
else
{
keys_input = get_random_data<key_type>(size,
std::numeric_limits<key_type>::min(),
std::numeric_limits<key_type>::max(),
seed.get_0());
}
std::vector<key_type> keys_input
= get_random_data<key_type>(size,
generate_limits<key_type>::min(),
generate_limits<key_type>::max(),
seed.get_0());

key_type* d_keys_input;
key_type* d_keys;
Expand Down Expand Up @@ -231,21 +221,12 @@ struct device_merge_sort_block_merge_benchmark : public config_autotune_interfac
using value_type = Value;

// Generate data
std::vector<key_type> keys_input;
if(std::is_floating_point<key_type>::value)
{
keys_input = get_random_data<key_type>(size,
static_cast<key_type>(-1000),
static_cast<key_type>(1000),
seed.get_0());
}
else
{
keys_input = get_random_data<key_type>(size,
std::numeric_limits<key_type>::min(),
std::numeric_limits<key_type>::max(),
seed.get_0());
}
std::vector<key_type> keys_input
= get_random_data<key_type>(size,
generate_limits<key_type>::min(),
generate_limits<key_type>::max(),
seed.get_0());

std::vector<value_type> values_input(size);
std::iota(values_input.begin(), values_input.end(), 0);

Expand Down
40 changes: 10 additions & 30 deletions benchmark/benchmark_device_merge_sort_block_sort.parallel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,21 +97,11 @@ struct device_merge_sort_block_sort_benchmark : public config_autotune_interface
using key_type = Key;

// Generate data
std::vector<key_type> keys_input;
if(std::is_floating_point<key_type>::value)
{
keys_input = get_random_data<key_type>(size,
static_cast<key_type>(-1000),
static_cast<key_type>(1000),
seed.get_0());
}
else
{
keys_input = get_random_data<key_type>(size,
std::numeric_limits<key_type>::min(),
std::numeric_limits<key_type>::max(),
seed.get_0());
}
std::vector<key_type> keys_input
= get_random_data<key_type>(size,
generate_limits<key_type>::min(),
generate_limits<key_type>::max(),
seed.get_0());

key_type* d_keys_input;
key_type* d_keys_output;
Expand Down Expand Up @@ -195,21 +185,11 @@ struct device_merge_sort_block_sort_benchmark : public config_autotune_interface
using value_type = Value;

// Generate data
std::vector<key_type> keys_input;
if(std::is_floating_point<key_type>::value)
{
keys_input = get_random_data<key_type>(size,
static_cast<key_type>(-1000),
static_cast<key_type>(1000),
seed.get_0());
}
else
{
keys_input = get_random_data<key_type>(size,
std::numeric_limits<key_type>::min(),
std::numeric_limits<key_type>::max(),
seed.get_0());
}
std::vector<key_type> keys_input
= get_random_data<key_type>(size,
generate_limits<key_type>::min(),
generate_limits<key_type>::max(),
seed.get_0());

std::vector<value_type> values_input(size);
std::iota(values_input.begin(), values_input.end(), 0);
Expand Down
20 changes: 5 additions & 15 deletions benchmark/benchmark_device_nth_element.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,21 +75,11 @@ struct device_nth_element_benchmark : public config_autotune_interface
}

// Generate data
std::vector<key_type> keys_input;
if(std::is_floating_point<key_type>::value)
{
keys_input = get_random_data<key_type>(size,
static_cast<key_type>(-1000),
static_cast<key_type>(1000),
seed.get_0());
}
else
{
keys_input = get_random_data<key_type>(size,
std::numeric_limits<key_type>::min(),
std::numeric_limits<key_type>::max(),
seed.get_0());
}
std::vector<key_type> keys_input
= get_random_data<key_type>(size,
generate_limits<key_type>::min(),
generate_limits<key_type>::max(),
seed.get_0());

key_type* d_keys_input;
key_type* d_keys_output;
Expand Down
20 changes: 5 additions & 15 deletions benchmark/benchmark_device_partial_sort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,21 +75,11 @@ struct device_partial_sort_benchmark : public config_autotune_interface
}

// Generate data
std::vector<key_type> keys_input;
if(std::is_floating_point<key_type>::value)
{
keys_input = get_random_data<key_type>(size,
static_cast<key_type>(-1000),
static_cast<key_type>(1000),
seed.get_0());
}
else
{
keys_input = get_random_data<key_type>(size,
std::numeric_limits<key_type>::min(),
std::numeric_limits<key_type>::max(),
seed.get_0());
}
std::vector<key_type> keys_input
= get_random_data<key_type>(size,
generate_limits<key_type>::min(),
generate_limits<key_type>::max(),
seed.get_0());

key_type* d_keys_input;
key_type* d_keys_new_data;
Expand Down
20 changes: 5 additions & 15 deletions benchmark/benchmark_device_partial_sort_copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,21 +75,11 @@ struct device_partial_sort_copy_benchmark : public config_autotune_interface
}

// Generate data
std::vector<key_type> keys_input;
if(std::is_floating_point<key_type>::value)
{
keys_input = get_random_data<key_type>(size,
static_cast<key_type>(-1000),
static_cast<key_type>(1000),
seed.get_0());
}
else
{
keys_input = get_random_data<key_type>(size,
std::numeric_limits<key_type>::min(),
std::numeric_limits<key_type>::max(),
seed.get_0());
}
std::vector<key_type> keys_input
= get_random_data<key_type>(size,
generate_limits<key_type>::min(),
generate_limits<key_type>::max(),
seed.get_0());

key_type* d_keys_input;
key_type* d_keys_output;
Expand Down
Loading

0 comments on commit 45b1942

Please sign in to comment.