Skip to content

Commit

Permalink
Test large arrays in in device radix sort (#1349)
Browse files Browse the repository at this point in the history
* Experiment with a naive large offset radix sort test.

* Add debug output to checked cuda allocator.

* debug timings

* Switch large memory test to doublebuffer API.

* Add c2h::cpu_timer utility.

* Remove old radix sort test.

* Add initial version of large array sort helper.

* Update CMake presets to no longer skip radix sort tests.

* Remove testing for signed offsets in radix sort.

* Address review comments.

* Silence unused variable warnings.
  • Loading branch information
alliepiper authored Feb 21, 2024
1 parent f0f2e87 commit 77431b8
Show file tree
Hide file tree
Showing 9 changed files with 700 additions and 2,390 deletions.
7 changes: 1 addition & 6 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -500,12 +500,7 @@
{
"name": "cub-base",
"hidden": true,
"inherits": "base",
"filter": {
"exclude": {
"name": "device_radix_sort"
}
}
"inherits": "base"
},
{
"name": "cub-cpp11",
Expand Down
18 changes: 18 additions & 0 deletions cub/test/c2h/checked_allocator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@

#include <new>

// #define DEBUG_CHECKED_ALLOC_FAILURE

#ifdef DEBUG_CHECKED_ALLOC_FAILURE
# include <iostream>
#endif

namespace c2h
{
namespace detail
Expand All @@ -56,6 +62,18 @@ inline cudaError_t check_free_device_memory(std::size_t bytes)
constexpr std::size_t padding = 16 * 1024 * 1024; // 16 MiB
if (free_bytes < (bytes + padding))
{
#ifdef DEBUG_CHECKED_ALLOC_FAILURE
const double total_GiB = static_cast<double>(total_bytes) / (1024 * 1024 * 1024);
const double free_GiB = static_cast<double>(free_bytes) / (1024 * 1024 * 1024);
const double requested_GiB = static_cast<double>(bytes) / (1024 * 1024 * 1024);
const double padded_GiB = static_cast<double>(bytes + padding) / (1024 * 1024 * 1024);

std::cerr
<< "Total device mem: " << total_GiB << " GiB\n" //
<< "Free device mem: " << free_GiB << " GiB\n" //
<< "Requested device mem: " << requested_GiB << " GiB\n" //
<< "Padded device mem: " << padded_GiB << " GiB\n";
#endif
return cudaErrorMemoryAllocation;
}

Expand Down
70 changes: 70 additions & 0 deletions cub/test/c2h/cpu_timer.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/******************************************************************************
* Copyright (c) 2011-2024, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

#pragma once

#include <chrono>
#include <iostream>
#include <string>

namespace c2h
{

class cpu_timer
{
std::chrono::high_resolution_clock::time_point m_start;

public:
cpu_timer()
: m_start(std::chrono::high_resolution_clock::now())
{}

void reset()
{
m_start = std::chrono::high_resolution_clock::now();
}

int elapsed_ms() const
{
auto duration = std::chrono::high_resolution_clock::now() - m_start;
auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(duration);
return static_cast<int>(ms.count());
}

void print_elapsed_seconds(const std::string& label)
{
std::cout << label << ": " << (this->elapsed_ms() / 1000.f) << "s\n";
}

void print_elapsed_seconds_and_reset(const std::string& label)
{
this->print_elapsed_seconds(label);
this->reset();
}
};

}
Loading

0 comments on commit 77431b8

Please sign in to comment.