Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Massively improve test times in heterogeneous atomics tests #1719

Merged
merged 5 commits into from
May 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//===----------------------------------------------------------------------===//
//
// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: nvrtc, pre-sm-60
// UNSUPPORTED: windows && pre-sm-70

#include <cuda/atomic>
#include <cuda/std/cassert>

#include "common.h"

__host__ __device__ void validate_not_lock_free()
{
cuda::atomic<big_not_lockfree_type> test;
assert(!test.is_lock_free());
}

void kernel_invoker()
{
validate_pinned<cuda::atomic<signed char, cuda::thread_scope_system>, arithmetic_atomic_testers>();
validate_pinned<cuda::atomic<signed short, cuda::thread_scope_system>, arithmetic_atomic_testers>();
validate_pinned<cuda::atomic<signed int, cuda::thread_scope_system>, arithmetic_atomic_testers>();
validate_pinned<cuda::atomic<signed long, cuda::thread_scope_system>, arithmetic_atomic_testers>();
validate_pinned<cuda::atomic<signed long long, cuda::thread_scope_system>, arithmetic_atomic_testers>();

validate_pinned<cuda::atomic<unsigned char, cuda::thread_scope_system>, bitwise_atomic_testers>();
validate_pinned<cuda::atomic<unsigned short, cuda::thread_scope_system>, bitwise_atomic_testers>();
validate_pinned<cuda::atomic<unsigned int, cuda::thread_scope_system>, bitwise_atomic_testers>();
validate_pinned<cuda::atomic<unsigned long, cuda::thread_scope_system>, bitwise_atomic_testers>();
validate_pinned<cuda::atomic<unsigned long long, cuda::thread_scope_system>, bitwise_atomic_testers>();

validate_pinned<cuda::atomic<float>, arithmetic_atomic_testers>();
validate_pinned<cuda::atomic<double>, arithmetic_atomic_testers>();

validate_pinned<cuda::atomic<big_not_lockfree_type, cuda::thread_scope_system>, basic_testers>();
}

int main(int arg, char** argv)
{
validate_not_lock_free();

NV_IF_TARGET(NV_IS_HOST, (kernel_invoker();))

return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//===----------------------------------------------------------------------===//
//
// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: nvrtc, pre-sm-60
// UNSUPPORTED: windows && pre-sm-70

#include <cuda/std/atomic>
#include <cuda/std/cassert>

#include "common.h"

__host__ __device__ void validate_not_lock_free()
{
cuda::std::atomic<big_not_lockfree_type> test;
assert(!test.is_lock_free());
}

void kernel_invoker()
{
validate_pinned<cuda::std::atomic<signed char>, arithmetic_atomic_testers>();
validate_pinned<cuda::std::atomic<signed short>, arithmetic_atomic_testers>();
validate_pinned<cuda::std::atomic<signed int>, arithmetic_atomic_testers>();
validate_pinned<cuda::std::atomic<signed long>, arithmetic_atomic_testers>();
validate_pinned<cuda::std::atomic<signed long long>, arithmetic_atomic_testers>();

validate_pinned<cuda::std::atomic<unsigned char>, bitwise_atomic_testers>();
validate_pinned<cuda::std::atomic<unsigned short>, bitwise_atomic_testers>();
validate_pinned<cuda::std::atomic<unsigned int>, bitwise_atomic_testers>();
validate_pinned<cuda::std::atomic<unsigned long>, bitwise_atomic_testers>();
validate_pinned<cuda::std::atomic<unsigned long long>, bitwise_atomic_testers>();

validate_pinned<cuda::std::atomic<float>, arithmetic_atomic_testers>();
validate_pinned<cuda::std::atomic<double>, arithmetic_atomic_testers>();

validate_pinned<cuda::std::atomic<big_not_lockfree_type>, basic_testers>();
}

int main(int arg, char** argv)
{
validate_not_lock_free();

NV_IF_TARGET(NV_IS_HOST, (kernel_invoker();))

return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,10 @@
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: nvrtc, pre-sm-60
// UNSUPPORTED: windows && pre-sm-70

#include <cuda/std/atomic>
#include <cuda/std/cassert>

#include "helpers.h"
#include "../helpers.h"

template <int Operand>
struct store_tester
Expand Down Expand Up @@ -186,55 +183,3 @@ class big_not_lockfree_type
private:
int array[128];
};

__host__ __device__ void validate_not_lock_free()
{
cuda::std::atomic<big_not_lockfree_type> test;
assert(!test.is_lock_free());
}

void kernel_invoker()
{
validate_pinned<cuda::std::atomic<signed char>, arithmetic_atomic_testers>();
validate_pinned<cuda::std::atomic<signed short>, arithmetic_atomic_testers>();
validate_pinned<cuda::std::atomic<signed int>, arithmetic_atomic_testers>();
validate_pinned<cuda::std::atomic<signed long>, arithmetic_atomic_testers>();
validate_pinned<cuda::std::atomic<signed long long>, arithmetic_atomic_testers>();

validate_pinned<cuda::std::atomic<unsigned char>, bitwise_atomic_testers>();
validate_pinned<cuda::std::atomic<unsigned short>, bitwise_atomic_testers>();
validate_pinned<cuda::std::atomic<unsigned int>, bitwise_atomic_testers>();
validate_pinned<cuda::std::atomic<unsigned long>, bitwise_atomic_testers>();
validate_pinned<cuda::std::atomic<unsigned long long>, bitwise_atomic_testers>();

validate_pinned<cuda::std::atomic<float>, arithmetic_atomic_testers>();
validate_pinned<cuda::std::atomic<double>, arithmetic_atomic_testers>();

validate_pinned<cuda::std::atomic<big_not_lockfree_type>, basic_testers>();

validate_pinned<cuda::atomic<signed char, cuda::thread_scope_system>, arithmetic_atomic_testers>();
validate_pinned<cuda::atomic<signed short, cuda::thread_scope_system>, arithmetic_atomic_testers>();
validate_pinned<cuda::atomic<signed int, cuda::thread_scope_system>, arithmetic_atomic_testers>();
validate_pinned<cuda::atomic<signed long, cuda::thread_scope_system>, arithmetic_atomic_testers>();
validate_pinned<cuda::atomic<signed long long, cuda::thread_scope_system>, arithmetic_atomic_testers>();

validate_pinned<cuda::atomic<unsigned char, cuda::thread_scope_system>, bitwise_atomic_testers>();
validate_pinned<cuda::atomic<unsigned short, cuda::thread_scope_system>, bitwise_atomic_testers>();
validate_pinned<cuda::atomic<unsigned int, cuda::thread_scope_system>, bitwise_atomic_testers>();
validate_pinned<cuda::atomic<unsigned long, cuda::thread_scope_system>, bitwise_atomic_testers>();
validate_pinned<cuda::atomic<unsigned long long, cuda::thread_scope_system>, bitwise_atomic_testers>();

validate_pinned<cuda::atomic<float>, arithmetic_atomic_testers>();
validate_pinned<cuda::atomic<double>, arithmetic_atomic_testers>();

validate_pinned<cuda::atomic<big_not_lockfree_type, cuda::thread_scope_system>, basic_testers>();
}

int main(int arg, char** argv)
{
validate_not_lock_free();

NV_IF_TARGET(NV_IS_HOST, (kernel_invoker();))

return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <cuda/std/atomic>
#include <cuda/std/cassert>

#include "helpers.h"
#include "../helpers.h"

struct clear
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <cuda/atomic>
#include <cuda/std/cassert>

#include "helpers.h"
#include "../helpers.h"

template <int Operand>
struct store_tester
Expand Down Expand Up @@ -192,21 +192,8 @@ void kernel_invoker()
validate_pinned<unsigned long, bitwise_atomic_testers>();
validate_pinned<unsigned long long, bitwise_atomic_testers>();

#ifdef _LIBCUDACXX_ATOMIC_REF_SUPPORTS_SMALL_INTEGRAL
validate_pinned<signed char, arithmetic_atomic_testers>();
validate_pinned<signed short, arithmetic_atomic_testers>();
#endif
validate_pinned<signed int, arithmetic_atomic_testers>();
validate_pinned<signed long, arithmetic_atomic_testers>();
validate_pinned<signed long long, arithmetic_atomic_testers>();

#ifdef _LIBCUDACXX_ATOMIC_REF_SUPPORTS_SMALL_INTEGRAL
validate_pinned<unsigned char, bitwise_atomic_testers>();
validate_pinned<unsigned short, bitwise_atomic_testers>();
#endif
validate_pinned<unsigned int, bitwise_atomic_testers>();
validate_pinned<unsigned long, bitwise_atomic_testers>();
validate_pinned<unsigned long long, bitwise_atomic_testers>();
validate_pinned<float, arithmetic_atomic_testers>();
validate_pinned<double, arithmetic_atomic_testers>();
}

int main(int arg, char** argv)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <cuda/std/atomic>
#include <cuda/std/cassert>

#include "helpers.h"
#include "../helpers.h"

template <int Operand>
struct store_tester
Expand Down Expand Up @@ -184,21 +184,8 @@ void kernel_invoker()
validate_pinned<unsigned long, bitwise_atomic_testers>();
validate_pinned<unsigned long long, bitwise_atomic_testers>();

#ifdef _LIBCUDACXX_ATOMIC_REF_SUPPORTS_SMALL_INTEGRAL
validate_pinned<signed char, arithmetic_atomic_testers>();
validate_pinned<signed short, arithmetic_atomic_testers>();
#endif
validate_pinned<signed int, arithmetic_atomic_testers>();
validate_pinned<signed long, arithmetic_atomic_testers>();
validate_pinned<signed long long, arithmetic_atomic_testers>();

#ifdef _LIBCUDACXX_ATOMIC_REF_SUPPORTS_SMALL_INTEGRAL
validate_pinned<unsigned char, bitwise_atomic_testers>();
validate_pinned<unsigned short, bitwise_atomic_testers>();
#endif
validate_pinned<unsigned int, bitwise_atomic_testers>();
validate_pinned<unsigned long, bitwise_atomic_testers>();
validate_pinned<unsigned long long, bitwise_atomic_testers>();
validate_pinned<float, arithmetic_atomic_testers>();
validate_pinned<double, arithmetic_atomic_testers>();
}

int main(int arg, char** argv)
Expand Down
88 changes: 51 additions & 37 deletions libcudacxx/test/libcudacxx/heterogeneous/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,90 +350,104 @@ struct initializer_validator
performer<T> validator;
};

template <typename T>
struct host_launcher
{
template <typename T, typename Tester>
template <typename Tester>
static initializer_validator<T> get_exec()
{
return initializer_validator<T>{host_initialize<Tester>, host_validate<Tester>};
}
};

template <typename T>
struct device_launcher
{
template <typename T, typename Tester>
template <typename Tester>
static initializer_validator<T> get_exec()
{
return initializer_validator<T>{device_initialize<Tester>, device_validate<Tester>};
}
};

template <typename T, typename... Testers, typename... Launchers, typename... Args>
void do_heterogeneous_test(type_list<Testers...>, type_list<Launchers...>, Args... args)
template <typename T, typename... Testers, typename... Launchers>
void do_heterogeneous_test(T* test_input, type_list<Testers...>, type_list<Launchers...>)
{
void* pointer = nullptr;
HETEROGENEOUS_SAFE_CALL(cudaMallocHost(&pointer, sizeof(T)));
T& object = *device_construct<T>(pointer, args...);

initializer_validator<T> performers[] = {{Launchers::template get_exec<T, Testers>()}...};
initializer_validator<T> performers[] = {{Launchers::template get_exec<Testers>()}...};

for (auto&& performer : performers)
{
performer.initializer(object);
performer.validator(object);
performer.initializer(*test_input);
performer.validator(*test_input);
}

HETEROGENEOUS_SAFE_CALL(cudaGetLastError());
HETEROGENEOUS_SAFE_CALL(cudaDeviceSynchronize());

sync_all();

device_destroy(&object);
HETEROGENEOUS_SAFE_CALL(cudaFreeHost(pointer));
}

template <size_t Idx>
using enable_if_permutations_remain = typename std::enable_if<Idx != 0, int>::type;
template <size_t Idx>
using enable_if_no_permutations_remain = typename std::enable_if<Idx == 0, int>::type;

template <size_t Idx,
typename T,
typename... Testers,
typename... Launchers,
typename... Args,
enable_if_permutations_remain<Idx> = 0>
void permute_tests(type_list<Testers...>, type_list<Launchers...>, Args... args)
template <size_t Idx, typename Fn, typename Launchers, enable_if_permutations_remain<Idx> = 0>
void permute_tests(const Fn& fn, Launchers launchers)
{
#ifdef DEBUG_TESTERS
printf("Testing permutation %zu of %zu\r\n", Idx, sizeof...(Testers));
printf("Testing permutation %zu of %zu\r\n", Idx, sizeof...(Launchers));
fflush(stdout);
#endif
do_heterogeneous_test<T>(type_list<Testers...>{}, type_list<Launchers...>{}, args...);
permute_tests<Idx - 1, T>(type_list<Testers...>{}, rotl<Launchers...>{}, args...);
fn(launchers);
permute_tests<Idx - 1>(fn, rotl<Launchers>{});
}

template <size_t Idx,
typename T,
typename... Testers,
typename... Launchers,
typename... Args,
enable_if_no_permutations_remain<Idx> = 0>
void permute_tests(type_list<Testers...>, type_list<Launchers...>, Args... args)
template <size_t Idx, typename Fn, typename Launchers, enable_if_no_permutations_remain<Idx> = 0>
void permute_tests(const Fn&, Launchers)
{}

template <typename T, typename... Testers, typename... Launchers, typename... Args>
void permute_tests(type_list<Testers...>, type_list<Launchers...>, Args... args)
template <typename Fn, typename... Launchers>
void permute_tests(const Fn& fn, type_list<Launchers...> launchers)
{
permute_tests<sizeof...(Testers), T>(type_list<Testers...>{}, type_list<Launchers...>{}, args...);
permute_tests<sizeof...(Launchers)>(fn, launchers);
}

template <typename Testers, typename InputCreator, typename InputDestructor>
struct test_wrapper
{
InputCreator creator;
InputDestructor destructor;

template <typename Launchers>
void operator()(Launchers) const
{
auto input = creator();
do_heterogeneous_test(input, Testers{}, Launchers{});
destructor(input);
}
};

template <typename T, typename... Testers, typename... Args>
void validate_device_dynamic(tester_list<Testers...>, Args... args)
void validate_device_dynamic(tester_list<Testers...> testers, Args... args)
{
auto test_input_creator = [args...]() -> T* {
void* pointer = nullptr;
HETEROGENEOUS_SAFE_CALL(cudaMallocHost(&pointer, sizeof(T)));
return device_construct<T>(pointer, args...);
};

auto test_input_destructor = [](T* test_input) {
device_destroy(test_input);
HETEROGENEOUS_SAFE_CALL(cudaFreeHost(test_input));
};

test_wrapper<tester_list<Testers...>, decltype(test_input_creator), decltype(test_input_destructor)> test_harness{
test_input_creator, test_input_destructor};

// ex: type_list<device_launcher, host_launcher, host_launcher>
using initial_launcher_list = append_n<sizeof...(Testers) - 1, type_list<device_launcher>, host_launcher>;
permute_tests<T>(type_list<Testers...>{}, initial_launcher_list{}, args...);
using initial_launcher_list = append_n<sizeof...(Testers) - 1, type_list<device_launcher<T>>, host_launcher<T>>;
permute_tests(test_harness, initial_launcher_list{});
}

#if __cplusplus >= 201402L
Expand Down
Loading