From 84565f5b20e4f0bb6027df641733521b9f87ea9f Mon Sep 17 00:00:00 2001 From: Elias Stehle <3958403+elstehle@users.noreply.github.com> Date: Sat, 3 Feb 2024 08:22:31 +0100 Subject: [PATCH] Ports `cub::DeviceMergeSort` tests to Catch2 (#1319) * adds catch2 tests for device-scope merge sort * adds comments and pre-populates output buffers * adds more variance to tests * [skip-tests] removes unused overload * addresses review comments * replaces fill with vector value init * avoids materializing ground truth data where possible * uses custom_type_t instead of thrust::tuple in tests --- cub/test/catch2_test_device_merge_sort.cu | 307 ++++++++++++++++++ ... catch2_test_device_merge_sort_common.cuh} | 83 +++-- ...catch2_test_device_merge_sort_iterators.cu | 224 +++++++++++++ .../catch2_test_device_merge_sort_vsmem.cu | 99 ++++++ cub/test/test_device_merge_sort.cu | 131 -------- cub/test/test_device_merge_sort.cuh | 248 -------------- 6 files changed, 668 insertions(+), 424 deletions(-) create mode 100644 cub/test/catch2_test_device_merge_sort.cu rename cub/test/{test_device_merge_sort_vsmem.cu => catch2_test_device_merge_sort_common.cuh} (52%) create mode 100644 cub/test/catch2_test_device_merge_sort_iterators.cu create mode 100644 cub/test/catch2_test_device_merge_sort_vsmem.cu delete mode 100644 cub/test/test_device_merge_sort.cu delete mode 100644 cub/test/test_device_merge_sort.cuh diff --git a/cub/test/catch2_test_device_merge_sort.cu b/cub/test/catch2_test_device_merge_sort.cu new file mode 100644 index 00000000000..40172c27a36 --- /dev/null +++ b/cub/test/catch2_test_device_merge_sort.cu @@ -0,0 +1,307 @@ +/****************************************************************************** + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "catch2_test_device_merge_sort_common.cuh" +#include "catch2_test_helper.h" +#include "catch2_test_launch_helper.h" + +// %PARAM% TEST_LAUNCH lid 0:1:2 + +DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::SortPairs, sort_pairs); +DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::SortPairsCopy, sort_pairs_copy); +DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::StableSortPairs, stable_sort_pairs); + +DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::SortKeys, sort_keys); +DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::SortKeysCopy, sort_keys_copy); +DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::StableSortKeys, stable_sort_keys); +DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::StableSortKeysCopy, stable_sort_keys_copy); + +using key_types = + c2h::type_list>; +using wide_key_types = c2h::type_list; + +using value_types = + c2h::type_list>; + +/** + * Function object that maps the targeted sorted rank of an item to a key. + + * E.g., `OffsetT` is `int32_t` and `KeyT` is `float`: + * [ 4, 2, 3, 1, 0] <= targeted key ranks + * [4.0, 2.0, 3.0, 1.0, 0.0] <= corresponding keys + */ +template +struct rank_to_key_op_t +{ + __device__ __host__ KeyT operator()(const OffsetT& val) + { + return static_cast(val); + } +}; + +template +struct rank_to_key_op_t> +{ + using custom_t = c2h::custom_type_t; + __device__ __host__ custom_t operator()(const OffsetT& val) + { + custom_t custom_val{}; + custom_val.key = val; + custom_val.val = val; + return custom_val; + } +}; + +/** + * Helps initialize custom_type_t from a zip-iterator combination of sort-key and value + */ +template +struct tuple_to_custom_op_t +{ + template + __device__ __host__ CustomT operator()(const thrust::tuple& val) + { + CustomT custom_val{}; + custom_val.key = static_cast(thrust::get<0>(val)); + custom_val.val = static_cast(thrust::get<1>(val)); + return custom_val; + } +}; + +/** + * Generates a shuffled array of key ranks. E.g., for a vector of size 5: [4, 2, 3, 1, 0] + */ +template +thrust::device_vector make_shuffled_key_ranks_vector(OffsetT num_items, c2h::seed_t seed) +{ + thrust::device_vector key_ranks(num_items); + thrust::sequence(key_ranks.begin(), key_ranks.end()); + thrust::shuffle( + key_ranks.begin(), key_ranks.end(), thrust::default_random_engine{static_cast(seed.get())}); + return key_ranks; +} + +CUB_TEST("DeviceMergeSort::SortKeysCopy works", "[merge][sort][device]", wide_key_types) +{ + using key_t = typename c2h::get<0, TestType>; + using offset_t = std::int32_t; + + // Prepare input + const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000, 2000000})); + auto key_ranks = make_shuffled_key_ranks_vector(num_items, CUB_SEED(2)); + thrust::device_vector keys_in(num_items); + thrust::transform(key_ranks.begin(), key_ranks.end(), keys_in.begin(), rank_to_key_op_t{}); + + // Perform sort + thrust::device_vector keys_out(num_items, static_cast(42)); + sort_keys_copy( + thrust::raw_pointer_cast(keys_in.data()), thrust::raw_pointer_cast(keys_out.data()), num_items, custom_less_op_t{}); + + // Verify results + auto key_ranks_it = thrust::make_counting_iterator(offset_t{}); + auto keys_expected_it = thrust::make_transform_iterator(key_ranks_it, rank_to_key_op_t{}); + bool results_equal = thrust::equal(keys_out.cbegin(), keys_out.cend(), keys_expected_it); + REQUIRE(results_equal == true); +} + +CUB_TEST("DeviceMergeSort::SortKeys works", "[merge][sort][device]", wide_key_types) +{ + using key_t = typename c2h::get<0, TestType>; + using offset_t = std::int32_t; + + // Prepare input + const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000, 2000000})); + auto key_ranks = make_shuffled_key_ranks_vector(num_items, CUB_SEED(2)); + thrust::device_vector keys_in_out(num_items); + thrust::transform(key_ranks.begin(), key_ranks.end(), keys_in_out.begin(), rank_to_key_op_t{}); + + // Perform sort + sort_keys(thrust::raw_pointer_cast(keys_in_out.data()), num_items, custom_less_op_t{}); + + // Verify results + auto key_ranks_it = thrust::make_counting_iterator(offset_t{}); + auto keys_expected_it = thrust::make_transform_iterator(key_ranks_it, rank_to_key_op_t{}); + bool results_equal = thrust::equal(keys_in_out.cbegin(), keys_in_out.cend(), keys_expected_it); + REQUIRE(results_equal == true); +} + +CUB_TEST("DeviceMergeSort::StableSortKeysCopy works and performs a stable sort when there are a lot sort-keys that " + "compare equal", + "[merge][sort][device]") +{ + using key_t = c2h::custom_type_t; + using offset_t = std::size_t; + + // Prepare input (generate a items that compare equally to check for stability of sort) + const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000, 2000000})); + thrust::device_vector key_ranks(num_items); + c2h::gen(CUB_SEED(2), key_ranks, offset_t{}, static_cast(128)); + thrust::device_vector keys_in(num_items); + auto key_value_it = thrust::make_counting_iterator(offset_t{}); + auto key_init_it = thrust::make_zip_iterator(key_ranks.begin(), key_value_it); + thrust::transform(key_init_it, key_init_it + num_items, keys_in.begin(), tuple_to_custom_op_t{}); + + // Perform sort + thrust::device_vector keys_out(num_items, rank_to_key_op_t{}(42)); + stable_sort_keys_copy( + thrust::raw_pointer_cast(keys_in.data()), thrust::raw_pointer_cast(keys_out.data()), num_items, custom_less_op_t{}); + + // Verify results + thrust::host_vector keys_expected(keys_in); + std::stable_sort(keys_expected.begin(), keys_expected.end(), custom_less_op_t{}); + + REQUIRE(keys_expected == keys_out); +} + +CUB_TEST("DeviceMergeSort::StableSortKeys works", "[merge][sort][device]") +{ + using key_t = c2h::custom_type_t; + using offset_t = std::int32_t; + + // Prepare input + const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000, 2000000})); + thrust::device_vector keys_in_out(num_items); + c2h::gen(CUB_SEED(2), keys_in_out); + + // Perform sort + stable_sort_keys(thrust::raw_pointer_cast(keys_in_out.data()), num_items, custom_less_op_t{}); + + // Verify results + thrust::host_vector keys_expected(keys_in_out); + std::stable_sort(keys_expected.begin(), keys_expected.end(), custom_less_op_t{}); + + REQUIRE(keys_expected == keys_in_out); +} + +CUB_TEST("DeviceMergeSort::SortPairsCopy works", "[merge][sort][device]", wide_key_types) +{ + using key_t = typename c2h::get<0, TestType>; + using offset_t = std::int32_t; + + // Prepare input + const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000, 2000000})); + auto key_ranks = make_shuffled_key_ranks_vector(num_items, CUB_SEED(2)); + thrust::device_vector keys_in(num_items); + thrust::transform(key_ranks.begin(), key_ranks.end(), keys_in.begin(), rank_to_key_op_t{}); + + // Perform sort + thrust::device_vector keys_out(num_items, static_cast(42)); + thrust::device_vector values_out(num_items, static_cast(42)); + sort_pairs_copy( + thrust::raw_pointer_cast(keys_in.data()), + thrust::raw_pointer_cast(key_ranks.data()), + thrust::raw_pointer_cast(keys_out.data()), + thrust::raw_pointer_cast(values_out.data()), + num_items, + custom_less_op_t{}); + + // Verify results + auto key_ranks_it = thrust::make_counting_iterator(offset_t{}); + auto keys_expected_it = thrust::make_transform_iterator(key_ranks_it, rank_to_key_op_t{}); + auto values_expected_it = thrust::make_counting_iterator(offset_t{}); + bool keys_equal = thrust::equal(keys_out.cbegin(), keys_out.cend(), keys_expected_it); + bool values_equal = thrust::equal(values_out.cbegin(), values_out.cend(), values_expected_it); + REQUIRE(keys_equal == true); + REQUIRE(values_equal == true); +} + +CUB_TEST("DeviceMergeSort::SortPairs works", "[merge][sort][device]", wide_key_types) +{ + using key_t = typename c2h::get<0, TestType>; + using offset_t = std::int32_t; + + // Prepare input + const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000, 2000000})); + auto key_ranks = make_shuffled_key_ranks_vector(num_items, CUB_SEED(2)); + thrust::device_vector keys_in_out(num_items); + thrust::transform(key_ranks.begin(), key_ranks.end(), keys_in_out.begin(), rank_to_key_op_t{}); + + // Perform sort + sort_pairs(thrust::raw_pointer_cast(keys_in_out.data()), + thrust::raw_pointer_cast(key_ranks.data()), + num_items, + custom_less_op_t{}); + + // Verify results + auto key_ranks_it = thrust::make_counting_iterator(offset_t{}); + auto keys_expected_it = thrust::make_transform_iterator(key_ranks_it, rank_to_key_op_t{}); + auto values_expected_it = thrust::make_counting_iterator(offset_t{}); + bool keys_equal = thrust::equal(keys_in_out.cbegin(), keys_in_out.cend(), keys_expected_it); + bool values_equal = thrust::equal(key_ranks.cbegin(), key_ranks.cend(), values_expected_it); + REQUIRE(keys_equal == true); + REQUIRE(values_equal == true); +} + +CUB_TEST( + "DeviceMergeSort::StableSortPairs works and performs a stable sort", "[merge][sort][device]", key_types, value_types) +{ + using key_t = typename c2h::get<0, TestType>; + using data_t = typename c2h::get<1, TestType>; + using offset_t = std::int32_t; + + // Prepare input + const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000, 2000000})); + thrust::device_vector keys_in_out(num_items); + thrust::device_vector values_in_out(num_items); + c2h::gen(CUB_SEED(2), keys_in_out); + c2h::gen(CUB_SEED(1), values_in_out); + + // Prepare host data for verification + thrust::host_vector keys_expected(keys_in_out); + thrust::host_vector values_expected(values_in_out); + auto zipped_expected_it = thrust::make_zip_iterator(keys_expected.begin(), values_expected.begin()); + std::stable_sort(zipped_expected_it, zipped_expected_it + num_items, compare_first_lt_op_t{}); + + // Perform sort + stable_sort_pairs(thrust::raw_pointer_cast(keys_in_out.data()), + thrust::raw_pointer_cast(values_in_out.data()), + num_items, + custom_less_op_t{}); + + REQUIRE(keys_expected == keys_in_out); + REQUIRE(values_expected == values_in_out); +} diff --git a/cub/test/test_device_merge_sort_vsmem.cu b/cub/test/catch2_test_device_merge_sort_common.cuh similarity index 52% rename from cub/test/test_device_merge_sort_vsmem.cu rename to cub/test/catch2_test_device_merge_sort_common.cuh index 6ea43e3079f..a14492b05c4 100644 --- a/cub/test/test_device_merge_sort_vsmem.cu +++ b/cub/test/catch2_test_device_merge_sort_common.cuh @@ -1,5 +1,5 @@ /****************************************************************************** - * Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -25,53 +25,46 @@ * ******************************************************************************/ -/****************************************************************************** - * Test of DeviceMergeSort utilities using large user types (i.e., with vsmem utilities) - ******************************************************************************/ - -// Ensure printing of CUDA runtime errors to console -#define CUB_STDERR - -#include -#include // for std::bad_alloc +#pragma once -#include "test_device_merge_sort.cuh" -#include "test_util.h" - -int main(int argc, char** argv) +/** + * Custom comparator that simply uses `operator <` of the given type. + */ +struct custom_less_op_t { - CommandLineArgs args(argc, argv); - - // Initialize device - CubDebugExit(args.DeviceInit()); - - using DataType = int64_t; + template + __host__ __device__ bool operator()(const T& lhs, const T& rhs) + { + return lhs < rhs; + } +}; - thrust::default_random_engine rng; - for (unsigned int pow2 = 9; pow2 < 22; pow2 += 2) +/** + * Custom comparator that compares a tuple type's first element using `operator <`. + */ +struct compare_first_lt_op_t +{ + /** + * We need to be able to have two different types for lhs and rhs, as the call to std::stable_sort with a + * zip-iterator, will pass a thrust::tuple for lhs and a tuple_of_iterator_references for rhs. + */ + template + __host__ __device__ bool operator()(const LhsT& lhs, const RhsT& rhs) const { - try - { - const unsigned int num_items = 1 << pow2; - // Testing vsmem facility with a fallback policy - TestHelper::AllocateAndTest, DataType>(rng, num_items); - // Testing vsmem facility with virtual shared memory - TestHelper::AllocateAndTest, DataType>(rng, num_items); - } - catch (std::bad_alloc& e) - { - if (pow2 > 20) - { // Some cards don't have enough memory for large allocations, these - // can be skipped. - printf("Skipping large memory test. (num_items=2^%u): %s\n", pow2, e.what()); - } - else - { // For smaller problem sizes, treat as an error: - printf("Error (num_items=2^%u): %s", pow2, e.what()); - throw; - } - } + return thrust::get<0>(lhs) < thrust::get<0>(rhs); } +}; - return 0; -} +/** + * Function object to computes the modulo of a given value. Used within sort tests to reduce the value-range of sort + * keys and, hence, cause more ties between sort keys. + */ +template +struct mod_op_t +{ + T mod; + __host__ __device__ T operator()(T val) const + { + return val % mod; + } +}; diff --git a/cub/test/catch2_test_device_merge_sort_iterators.cu b/cub/test/catch2_test_device_merge_sort_iterators.cu new file mode 100644 index 00000000000..8cac6b01a56 --- /dev/null +++ b/cub/test/catch2_test_device_merge_sort_iterators.cu @@ -0,0 +1,224 @@ +/****************************************************************************** + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "catch2_test_device_merge_sort_common.cuh" +#include "catch2_test_helper.h" +#include "catch2_test_launch_helper.h" + +// %PARAM% TEST_LAUNCH lid 0:1:2 + +DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::SortPairs, sort_pairs); +DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::SortPairsCopy, sort_pairs_copy); +DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::StableSortPairs, stable_sort_pairs); + +DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::SortKeys, sort_keys); +DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::SortKeysCopy, sort_keys_copy); +DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::StableSortKeys, stable_sort_keys); +DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::StableSortKeysCopy, stable_sort_keys_copy); + +CUB_TEST("DeviceMergeSort::SortKeysCopy works with iterators", "[merge][sort][device]") +{ + using key_t = std::uint32_t; + using offset_t = std::int32_t; + + // Prepare input + const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000})); + auto keys_counting_it = thrust::make_counting_iterator(key_t{}); + auto keys_in_it = thrust::make_reverse_iterator(keys_counting_it + num_items); + + // Perform sort + thrust::device_vector keys_out(num_items, static_cast(42)); + sort_keys_copy(keys_in_it, keys_out.begin(), num_items, custom_less_op_t{}); + + // Verify results + auto keys_expected_it = keys_counting_it; + bool keys_equal = thrust::equal(keys_out.cbegin(), keys_out.cend(), keys_expected_it); + REQUIRE(keys_equal == true); +} + +CUB_TEST("DeviceMergeSort::StableSortKeysCopy works with iterators and is stable", "[merge][sort][device]") +{ + using key_t = std::uint32_t; + using offset_t = std::int32_t; + + // Prepare input (ensure we have multiple sort keys that compare equal to check stability) + const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000})); + auto sort_key_it = thrust::make_transform_iterator(thrust::make_counting_iterator(key_t{}), mod_op_t{128}); + auto key_idx_it = thrust::make_counting_iterator(offset_t{}); + auto keys_in_it = thrust::make_zip_iterator(sort_key_it, key_idx_it); + + // Perform sort + thrust::device_vector> keys_out( + num_items, thrust::tuple{static_cast(42), static_cast(42)}); + stable_sort_keys_copy(keys_in_it, keys_out.begin(), num_items, compare_first_lt_op_t{}); + + // Verify results + thrust::host_vector> keys_expected(num_items); + thrust::copy(keys_in_it, keys_in_it + num_items, keys_expected.begin()); + std::stable_sort(keys_expected.begin(), keys_expected.end(), compare_first_lt_op_t{}); + + REQUIRE(keys_expected == keys_out); +} + +CUB_TEST("DeviceMergeSort::SortKeys works with iterators", "[merge][sort][device]") +{ + using key_t = std::uint32_t; + using offset_t = std::int32_t; + + // Prepare input + const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000})); + thrust::device_vector keys_in_out(num_items); + thrust::sequence(keys_in_out.begin(), keys_in_out.end()); + auto keys_in_it = thrust::make_reverse_iterator(keys_in_out.end()); + + // Perform sort + sort_keys(keys_in_it, num_items, custom_less_op_t{}); + + // Verify results + auto keys_counting_it = thrust::make_counting_iterator(key_t{}); + auto keys_expected_it = thrust::make_reverse_iterator(keys_counting_it + num_items); + bool keys_equal = thrust::equal(keys_in_out.cbegin(), keys_in_out.cend(), keys_expected_it); + REQUIRE(keys_equal == true); +} + +CUB_TEST("DeviceMergeSort::StableSortKeys works with iterators", "[merge][sort][device]") +{ + using key_t = std::uint32_t; + using offset_t = std::int32_t; + + // Prepare input + const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000})); + thrust::device_vector keys_in_out(num_items); + thrust::sequence(keys_in_out.begin(), keys_in_out.end()); + auto keys_in_it = thrust::make_reverse_iterator(keys_in_out.end()); + + // Perform sort + stable_sort_keys(keys_in_it, num_items, custom_less_op_t{}); + + // Verify results + auto keys_counting_it = thrust::make_counting_iterator(key_t{}); + auto keys_expected_it = thrust::make_reverse_iterator(keys_counting_it + num_items); + bool keys_equal = thrust::equal(keys_in_out.cbegin(), keys_in_out.cend(), keys_expected_it); + REQUIRE(keys_equal == true); +} + +CUB_TEST("DeviceMergeSort::SortPairsCopy works with iterators", "[merge][sort][device]") +{ + using key_t = std::uint32_t; + using data_t = std::uint64_t; + using offset_t = std::int32_t; + + // Prepare input + const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000})); + auto key_counting_it = thrust::make_counting_iterator(key_t{}); + auto keys_in = thrust::make_reverse_iterator(key_counting_it + num_items); + auto values_in = thrust::make_counting_iterator(data_t{}) + num_items; + + // Perform sort + thrust::device_vector keys_out(num_items, static_cast(42)); + thrust::device_vector values_out(num_items, static_cast(42)); + sort_pairs_copy(keys_in, values_in, keys_out.begin(), values_out.begin(), num_items, custom_less_op_t{}); + + // Verify results + auto keys_expected_it = key_counting_it; + auto values_expected_it = thrust::make_reverse_iterator(values_in + num_items); + bool keys_equal = thrust::equal(keys_out.cbegin(), keys_out.cend(), keys_expected_it); + bool values_equal = thrust::equal(values_out.cbegin(), values_out.cend(), values_expected_it); + REQUIRE(keys_equal == true); + REQUIRE(values_equal == true); +} + +CUB_TEST("DeviceMergeSort::SortPairs works with iterators", "[merge][sort][device]") +{ + using key_t = std::uint32_t; + using data_t = std::uint64_t; + using offset_t = std::int32_t; + + // Prepare input + const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000})); + thrust::device_vector keys_in_out(num_items); + thrust::device_vector values_in_out(num_items); + thrust::sequence(keys_in_out.begin(), keys_in_out.end()); + thrust::sequence(values_in_out.begin(), values_in_out.end()); + thrust::reverse(values_in_out.begin(), values_in_out.end()); + auto keys_in_it = thrust::make_reverse_iterator(keys_in_out.end()); + + // Perform sort + sort_pairs(keys_in_it, values_in_out.begin(), num_items, custom_less_op_t{}); + + // Verify results + auto keys_counting_it = thrust::make_counting_iterator(key_t{}); + auto keys_expected_it = thrust::make_reverse_iterator(keys_counting_it + num_items); + auto values_expected_it = thrust::make_counting_iterator(data_t{}); + bool keys_equal = thrust::equal(keys_in_out.cbegin(), keys_in_out.cend(), keys_expected_it); + bool values_equal = thrust::equal(values_in_out.cbegin(), values_in_out.cend(), values_expected_it); + REQUIRE(keys_equal == true); + REQUIRE(values_equal == true); +} + +CUB_TEST("DeviceMergeSort::StableSortPairs works with iterators", "[merge][sort][device]") +{ + using key_t = std::uint32_t; + using data_t = std::uint64_t; + using offset_t = std::int32_t; + + // Prepare input + const offset_t num_items = GENERATE_COPY(take(2, random(1, 1000000)), values({500, 1000000})); + thrust::device_vector keys_in_out(num_items); + thrust::device_vector values_in_out(num_items); + thrust::sequence(keys_in_out.begin(), keys_in_out.end()); + thrust::sequence(values_in_out.begin(), values_in_out.end()); + thrust::reverse(values_in_out.begin(), values_in_out.end()); + auto keys_in_it = thrust::make_reverse_iterator(keys_in_out.end()); + + // Perform sort + stable_sort_pairs(keys_in_it, values_in_out.begin(), num_items, custom_less_op_t{}); + + // Verify results + auto keys_counting_it = thrust::make_counting_iterator(key_t{}); + auto keys_expected_it = thrust::make_reverse_iterator(keys_counting_it + num_items); + auto values_expected_it = thrust::make_counting_iterator(data_t{}); + bool keys_equal = thrust::equal(keys_in_out.cbegin(), keys_in_out.cend(), keys_expected_it); + bool values_equal = thrust::equal(values_in_out.cbegin(), values_in_out.cend(), values_expected_it); + REQUIRE(keys_equal == true); + REQUIRE(values_equal == true); +} diff --git a/cub/test/catch2_test_device_merge_sort_vsmem.cu b/cub/test/catch2_test_device_merge_sort_vsmem.cu new file mode 100644 index 00000000000..62a908e18a1 --- /dev/null +++ b/cub/test/catch2_test_device_merge_sort_vsmem.cu @@ -0,0 +1,99 @@ +/****************************************************************************** + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#include + +#include +#include +#include +#include + +#include + +#include "catch2_test_device_merge_sort_common.cuh" +#include "catch2_test_helper.h" +#include "catch2_test_launch_helper.h" + +// %PARAM% TEST_LAUNCH lid 0:1:2 + +DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::StableSortKeys, stable_sort_keys); +DECLARE_LAUNCH_WRAPPER(cub::DeviceMergeSort::StableSortPairs, stable_sort_pairs); + +using key_types = + c2h::type_list::type>, + c2h::custom_type_t::type>>; + +CUB_TEST("DeviceMergeSort::StableSortKeys works for large types", "[merge][sort][device]", key_types) +{ + using key_t = typename c2h::get<0, TestType>; + using offset_t = std::int32_t; + + // Prepare input + const offset_t num_items = GENERATE_COPY(take(2, random(1, 10000))); + thrust::device_vector keys_in_out(num_items); + c2h::gen(CUB_SEED(2), keys_in_out); + + // Prepare host data for verification + thrust::host_vector keys_expected(keys_in_out); + std::stable_sort(keys_expected.begin(), keys_expected.end(), custom_less_op_t{}); + + // Perform sort + stable_sort_keys(thrust::raw_pointer_cast(keys_in_out.data()), num_items, custom_less_op_t{}); + + // Verify results + REQUIRE(keys_expected == keys_in_out); +} + +CUB_TEST("DeviceMergeSort::StableSortPairs works for large types", "[merge][sort][device]", key_types) +{ + using key_t = typename c2h::get<0, TestType>; + using data_t = std::uint32_t; + using offset_t = std::int32_t; + + // Prepare input + const offset_t num_items = GENERATE_COPY(take(2, random(1, 10000))); + thrust::device_vector keys_in_out(num_items); + thrust::device_vector values_in_out(num_items); + c2h::gen(CUB_SEED(2), keys_in_out); + c2h::gen(CUB_SEED(1), values_in_out); + + // Prepare host data for verification + thrust::host_vector keys_expected(keys_in_out); + thrust::host_vector values_expected(values_in_out); + auto zipped_expected_it = thrust::make_zip_iterator(keys_expected.begin(), values_expected.begin()); + std::stable_sort(zipped_expected_it, zipped_expected_it + num_items, compare_first_lt_op_t{}); + + // Perform sort + stable_sort_pairs(thrust::raw_pointer_cast(keys_in_out.data()), + thrust::raw_pointer_cast(values_in_out.data()), + num_items, + custom_less_op_t{}); + + // Verify results + REQUIRE(keys_expected == keys_in_out); + REQUIRE(values_expected == values_in_out); +} diff --git a/cub/test/test_device_merge_sort.cu b/cub/test/test_device_merge_sort.cu deleted file mode 100644 index 757969fe7bb..00000000000 --- a/cub/test/test_device_merge_sort.cu +++ /dev/null @@ -1,131 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2011-2023, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/****************************************************************************** - * Test of DeviceMergeSort utilities - ******************************************************************************/ - -// Ensure printing of CUDA runtime errors to console -#define CUB_STDERR - -#include - -#include -#include -#include - -#include -#include // for std::bad_alloc - -#include "test_device_merge_sort.cuh" -#include "test_util.h" - -using namespace cub; - -template -void Test(thrust::default_random_engine& rng, unsigned int num_items) -{ - TestHelper::template AllocateAndTest( - rng, num_items); - TestHelper::template AllocateAndTest( - rng, num_items); - TestHelper::template AllocateAndTest( - rng, num_items); -} - -template -void AllocateAndTestIterators(unsigned int num_items) -{ - thrust::device_vector d_keys(num_items); - thrust::device_vector d_values(num_items); - - thrust::sequence(d_keys.begin(), d_keys.end()); - thrust::sequence(d_values.begin(), d_values.end()); - - thrust::reverse(d_values.begin(), d_values.end()); - - using KeyIterator = typename thrust::device_vector::iterator; - thrust::reverse_iterator reverse_iter(d_keys.end()); - - size_t temp_size = 0; - cub::DeviceMergeSort::SortPairs( - nullptr, temp_size, reverse_iter, thrust::raw_pointer_cast(d_values.data()), num_items, CustomLess()); - - thrust::device_vector tmp(temp_size); - - cub::DeviceMergeSort::SortPairs( - thrust::raw_pointer_cast(tmp.data()), - temp_size, - reverse_iter, - thrust::raw_pointer_cast(d_values.data()), - num_items, - CustomLess()); - - AssertTrue(CheckResult(d_values)); -} - -template -void Test(thrust::default_random_engine& rng) -{ - for (unsigned int pow2 = 9; pow2 < 22; pow2 += 2) - { - try - { - const unsigned int num_items = 1 << pow2; - AllocateAndTestIterators(num_items); - Test(rng, num_items); - } - catch (std::bad_alloc& e) - { - if (pow2 > 20) - { // Some cards don't have enough memory for large allocations, these - // can be skipped. - printf("Skipping large memory test. (num_items=2^%u): %s\n", pow2, e.what()); - } - else - { // For smaller problem sizes, treat as an error: - printf("Error (num_items=2^%u): %s", pow2, e.what()); - throw; - } - } - } -} - -int main(int argc, char** argv) -{ - CommandLineArgs args(argc, argv); - - // Initialize device - CubDebugExit(args.DeviceInit()); - - thrust::default_random_engine rng; - - Test(rng); - Test(rng); - - return 0; -} diff --git a/cub/test/test_device_merge_sort.cuh b/cub/test/test_device_merge_sort.cuh deleted file mode 100644 index 451b9e95faf..00000000000 --- a/cub/test/test_device_merge_sort.cuh +++ /dev/null @@ -1,248 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - ******************************************************************************/ - -/****************************************************************************** - * Common header for testing of DeviceMergeSort utilities - ******************************************************************************/ - -#pragma once - -// Ensure printing of CUDA runtime errors to console -#define CUB_STDERR - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "test_util.h" - -using namespace cub; - -struct CustomLess -{ - template - __device__ bool operator()(DataType& lhs, DataType& rhs) - { - return lhs < rhs; - } -}; - -template -bool CheckResult(thrust::device_vector& d_data) -{ - const bool is_sorted = thrust::is_sorted(d_data.begin(), d_data.end(), CustomLess()); - return is_sorted; -} - -template -struct ValueToKey -{ - __device__ __host__ KeyType operator()(const ValueType& val) - { - return val; - } -}; - -template -struct ValueToKey, ValueType> -{ - __device__ __host__ HugeDataType operator()(const ValueType& val) - { - return HugeDataType(val); - } -}; - -template -void Test(std::int64_t num_items, - thrust::default_random_engine& rng, - thrust::device_vector& d_keys, - thrust::device_vector& d_values) -{ - thrust::sequence(d_values.begin(), d_values.end()); - thrust::shuffle(d_values.begin(), d_values.end(), rng); - - thrust::transform(d_values.begin(), d_values.end(), d_keys.begin(), ValueToKey()); - - thrust::device_vector d_keys_before_sort(d_keys); - thrust::device_vector d_values_before_sort(d_values); - - thrust::device_vector d_keys_before_sort_copy(d_keys); - thrust::device_vector d_values_before_sort_copy(d_values); - - size_t temp_size = 0; - CubDebugExit(cub::DeviceMergeSort::SortPairs( - nullptr, - temp_size, - thrust::raw_pointer_cast(d_keys.data()), - thrust::raw_pointer_cast(d_values.data()), - num_items, - CustomLess())); - - thrust::device_vector tmp(temp_size); - - CubDebugExit(cub::DeviceMergeSort::SortPairs( - thrust::raw_pointer_cast(tmp.data()), - temp_size, - thrust::raw_pointer_cast(d_keys.data()), - thrust::raw_pointer_cast(d_values.data()), - num_items, - CustomLess())); - - thrust::device_vector d_keys_after_sort_copy(d_keys); - thrust::device_vector d_values_after_sort_copy(d_values); - - AssertTrue(CheckResult(d_values)); - - CubDebugExit(cub::DeviceMergeSort::SortPairsCopy( - thrust::raw_pointer_cast(tmp.data()), - temp_size, - thrust::raw_pointer_cast(d_keys_before_sort.data()), - thrust::raw_pointer_cast(d_values_before_sort.data()), - thrust::raw_pointer_cast(d_keys.data()), - thrust::raw_pointer_cast(d_values.data()), - num_items, - CustomLess())); - - AssertEquals(d_keys, d_keys_after_sort_copy); - AssertEquals(d_values, d_values_after_sort_copy); - AssertEquals(d_keys_before_sort, d_keys_before_sort_copy); - AssertEquals(d_values_before_sort, d_values_before_sort_copy); - - // At the moment stable sort is an alias to sort, so it's safe to use - // temp_size storage allocated before - CubDebugExit(cub::DeviceMergeSort::StableSortPairs( - thrust::raw_pointer_cast(tmp.data()), - temp_size, - thrust::raw_pointer_cast(d_keys.data()), - thrust::raw_pointer_cast(d_values.data()), - num_items, - CustomLess())); - - AssertTrue(CheckResult(d_values)); - - CubDebugExit(cub::DeviceMergeSort::SortPairsCopy( - thrust::raw_pointer_cast(tmp.data()), - temp_size, - thrust::constant_iterator(KeyType(42)), - thrust::counting_iterator(DataType(0)), - thrust::raw_pointer_cast(d_keys.data()), - thrust::raw_pointer_cast(d_values.data()), - num_items, - CustomLess())); - - thrust::sequence(d_values_before_sort.begin(), d_values_before_sort.end()); - - AssertEquals(d_values, d_values_before_sort); -} - -template -void TestKeys(std::int64_t num_items, - thrust::default_random_engine& rng, - thrust::device_vector& d_keys, - thrust::device_vector& d_values) -{ - thrust::sequence(d_values.begin(), d_values.end()); - thrust::shuffle(d_values.begin(), d_values.end(), rng); - - thrust::transform(d_values.begin(), d_values.end(), d_keys.begin(), ValueToKey()); - - thrust::device_vector d_before_sort(d_keys); - thrust::device_vector d_before_sort_copy(d_keys); - - size_t temp_size = 0; - cub::DeviceMergeSort::SortKeys(nullptr, temp_size, thrust::raw_pointer_cast(d_keys.data()), num_items, CustomLess()); - - thrust::device_vector tmp(temp_size); - - CubDebugExit(cub::DeviceMergeSort::SortKeys( - thrust::raw_pointer_cast(tmp.data()), temp_size, thrust::raw_pointer_cast(d_keys.data()), num_items, CustomLess())); - - thrust::device_vector d_after_sort(d_keys); - - AssertTrue(CheckResult(d_keys)); - - CubDebugExit(cub::DeviceMergeSort::SortKeysCopy( - thrust::raw_pointer_cast(tmp.data()), - temp_size, - thrust::raw_pointer_cast(d_before_sort.data()), - thrust::raw_pointer_cast(d_keys.data()), - num_items, - CustomLess())); - - AssertTrue(d_keys == d_after_sort); - AssertTrue(d_before_sort == d_before_sort_copy); - - // At the moment stable sort is an alias to sort, so it's safe to use - // temp_size storage allocated before - CubDebugExit(cub::DeviceMergeSort::StableSortKeys( - thrust::raw_pointer_cast(tmp.data()), temp_size, thrust::raw_pointer_cast(d_keys.data()), num_items, CustomLess())); - - AssertTrue(CheckResult(d_keys)); - - thrust::fill(d_keys.begin(), d_keys.end(), KeyType{}); - CubDebugExit(cub::DeviceMergeSort::StableSortKeysCopy( - thrust::raw_pointer_cast(tmp.data()), - temp_size, - thrust::raw_pointer_cast(d_before_sort.data()), - thrust::raw_pointer_cast(d_keys.data()), - num_items, - CustomLess())); - - // AssertTrue(CheckResult(d_keys)); - AssertTrue(d_keys == d_after_sort); - AssertTrue(d_before_sort == d_before_sort_copy); -} - -template -struct TestHelper -{ - template - static void AllocateAndTest(thrust::default_random_engine& rng, unsigned int num_items) - { - thrust::device_vector d_keys(num_items); - thrust::device_vector d_values(num_items); - - Test(num_items, rng, d_keys, d_values); - TestKeys(num_items, rng, d_keys, d_values); - } -}; - -template <> -struct TestHelper -{ - template - static void AllocateAndTest(thrust::default_random_engine&, unsigned int) - {} -};