Skip to content

Commit 2ad4e4b

Browse files
authored
Merge branch 'main' into ptx-add-barrier-cluster
2 parents 1e5568b + 473fc9c commit 2ad4e4b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+602
-392
lines changed

.github/workflows/dispatch-build-and-test.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ jobs:
2828
include: ${{ fromJSON(inputs.per_cuda_compiler_matrix) }}
2929
with:
3030
cpu: ${{ matrix.cpu }}
31-
test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}}
31+
test_name: ${{matrix.cpu}}/${{matrix.compiler.name}}${{matrix.compiler.version}}/C++${{matrix.std}} ${{matrix.extra_build_args}}
3232
build_script: './ci/build_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}} "${{matrix.extra_build_args}}"'
33-
test_script: './ci/test_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}}'
33+
test_script: './ci/test_${{ inputs.project_name }}.sh -cxx ${{matrix.compiler.exe}} -std ${{matrix.std}} "${{matrix.extra_build_args}}"'
3434
container_image: rapidsai/devcontainers:${{inputs.devcontainer_version}}-cpp-${{matrix.compiler.name}}${{matrix.compiler.version}}-cuda${{matrix.cuda}}-${{matrix.os}}
3535
run_tests: ${{ contains(matrix.jobs, 'test') && !contains(github.event.head_commit.message, 'skip-tests') && matrix.os != 'windows-2022' }}
3636

ci/matrix.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ pull_request:
6767
- {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc9, std: [11, 14, 17], jobs: ['build']}
6868
- {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: *gcc10, std: [11, 14, 17, 20], jobs: ['build']}
6969
- {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc11, std: [11, 14, 17, 20], jobs: ['build']}
70+
- {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12, std: [11, 14, 17, 20], jobs: ['build'], extra_build_args: '-cmake-options -DCMAKE_CUDA_ARCHITECTURES=90a'}
7071
- {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'amd64', compiler: *gcc12, std: [11, 14, 17, 20], jobs: ['build', 'test']}
7172
- {cuda: *cuda_newest, os: 'ubuntu22.04', cpu: 'arm64', compiler: *gcc12, std: [11, 14, 17, 20], jobs: ['build']}
7273
- {cuda: *cuda_newest, os: 'ubuntu20.04', cpu: 'amd64', compiler: *llvm9, std: [11, 14, 17], jobs: ['build']}

libcudacxx/include/cuda/std/detail/libcxx/include/__cccl/ptx_isa.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,12 @@
9797
// depending on PTX ISA. This permits checking for the feature in host code.
9898
// When __CUDA_MINIMUM_ARCH__ is available, we only enable the feature when the
9999
// hardware supports it.
100+
#if __cccl_ptx_isa >= 800
100101
#if (!defined(__CUDA_MINIMUM_ARCH__)) \
101-
|| (defined(__CUDA_MINIMUM_ARCH__) && 900 <= __CUDA_MINIMUM_ARCH__) && __cccl_ptx_isa >= 800
102+
|| (defined(__CUDA_MINIMUM_ARCH__) && 900 <= __CUDA_MINIMUM_ARCH__)
102103
# define __cccl_lib_local_barrier_arrive_tx
103104
# define __cccl_lib_experimental_ctk12_cp_async_exposure
104105
#endif
106+
#endif // __cccl_ptx_isa >= 800
105107

106108
#endif // __CCCL_PTX_ISA_H_

libcudacxx/include/cuda/std/detail/libcxx/include/__concepts/arithmetic.h

+1-3
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "../__type_traits/is_arithmetic.h"
2727
#include "../__type_traits/is_floating_point.h"
2828
#include "../__type_traits/is_integral.h"
29+
#include "../__type_traits/is_signed.h"
2930
#include "../__type_traits/is_signed_integer.h"
3031
#include "../__type_traits/is_signed.h"
3132
#include "../__type_traits/is_unsigned_integer.h"
@@ -48,9 +49,6 @@ _LIBCUDACXX_CONCEPT unsigned_integral = integral<_Tp> && !signed_integral<_Tp>;
4849
template<class _Tp>
4950
_LIBCUDACXX_CONCEPT floating_point = _LIBCUDACXX_TRAIT(is_floating_point, _Tp);
5051

51-
// Concept helpers for the internal type traits for the fundamental types.
52-
template <class _Tp>
53-
_LIBCUDACXX_CONCEPT __libcpp_unsigned_integer = __libcpp_is_unsigned_integer<_Tp>::value;
5452
template <class _Tp>
5553
_LIBCUDACXX_CONCEPT __libcpp_signed_integer = __libcpp_is_signed_integer<_Tp>::value;
5654

libcudacxx/include/cuda/std/detail/libcxx/include/__cuda/barrier.h

-3
Original file line numberDiff line numberDiff line change
@@ -774,7 +774,6 @@ _CUDA_VSTD::uint64_t * __try_get_barrier_handle<::cuda::thread_scope_block, _CUD
774774
// The user is still responsible for arriving and waiting on (or otherwise
775775
// synchronizing with) the barrier or pipeline barrier to see the results of
776776
// copies from other threads participating in the synchronization object.
777-
extern "C" _LIBCUDACXX_HOST_DEVICE void __cuda_ptx_mbarrier_complete_tx_is_not_supported_before_SM_90__();
778777
struct __memcpy_completion_impl {
779778

780779
template<typename _Group>
@@ -815,8 +814,6 @@ struct __memcpy_completion_impl {
815814
if (__group.thread_rank() == 0) {
816815
::cuda::device::barrier_expect_tx(__barrier, __size);
817816
}
818-
),(
819-
__cuda_ptx_mbarrier_complete_tx_is_not_supported_before_SM_90__();
820817
));
821818
#endif // __cccl_ptx_isa >= 800
822819
return async_contract_fulfillment::async;

0 commit comments

Comments
 (0)