Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.7]: PRs #3201, #3523, #3547, #3580 (#3536) #3600

Merged
merged 20 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions ci/matrix.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
workflows:

Check notice on line 1 in ci/matrix.yaml

View workflow job for this annotation

GitHub Actions / Build workflow from matrix

Merging consumers for duplicate producer '[C++17 GCC13] Build(amd64)' in 'CUB CTK12.5 nvcc GCC'

Check notice on line 1 in ci/matrix.yaml

View workflow job for this annotation

GitHub Actions / Build workflow from matrix

Original consumers: [C++17 GCC13] DeviceLaunch(amd64, V100), [C++17 GCC13] HostLaunch(amd64, V100), [C++17 GCC13] GraphCapture(amd64, V100), [C++17 GCC13] TestGPU(amd64, V100)

Check notice on line 1 in ci/matrix.yaml

View workflow job for this annotation

GitHub Actions / Build workflow from matrix

Duplicate consumers: [C++17 GCC13] SmallGMem(amd64, V100)

Check notice on line 1 in ci/matrix.yaml

View workflow job for this annotation

GitHub Actions / Build workflow from matrix

Merged consumers: [C++17 GCC13] DeviceLaunch(amd64, V100), [C++17 GCC13] HostLaunch(amd64, V100), [C++17 GCC13] GraphCapture(amd64, V100), [C++17 GCC13] TestGPU(amd64, V100), [C++17 GCC13] SmallGMem(amd64, V100)
# If any jobs appear here, they will be executed instead of `pull_request' for PRs.
# This is useful for limiting resource usage when a full matrix is not needed.
# The branch protection checks will fail when using this override workflow.
Expand All @@ -14,10 +14,13 @@
- {jobs: ['build'], std: 'all', ctk: '11.1', cxx: ['gcc6', 'gcc7', 'gcc8', 'gcc9', 'clang9', 'msvc2017']}
- {jobs: ['build'], std: 'all', ctk: '11.8', cxx: ['gcc11'], sm: '60;70;80;90'}
# Current CTK
- {jobs: ['build'], std: 'all', cxx: ['gcc7', 'gcc8', 'gcc9', 'gcc10', 'gcc11', 'gcc12']}
- {jobs: ['build'], std: 'all', cxx: ['clang9', 'clang10', 'clang11', 'clang12', 'clang13', 'clang14', 'clang15', 'clang16']}
- {jobs: ['build'], std: 'all', cxx: ['intel', 'msvc2019']}
- {jobs: ['test'], std: 'all', cxx: ['gcc13', 'clang17', 'msvc2022']}
- {jobs: ['build'], std: [11, 17], cxx: ['gcc7', 'gcc8', 'gcc9']}
- {jobs: ['build'], std: [14, 20], cxx: ['gcc10', 'gcc11', 'gcc12']}
- {jobs: ['build'], std: [11, 14], cxx: ['clang10', 'clang11']}
- {jobs: ['build'], std: [17], cxx: ['clang12', 'clang13', 'clang14', 'clang15']}
- {jobs: ['build'], std: 'all', cxx: ['clang9', 'clang16']}
- {jobs: ['build'], std: 17, cxx: ['intel', 'msvc2019']}
- {jobs: ['test'], std: 'all', cxx: ['gcc', 'clang', 'msvc2022']}
# Modded builds:
- {jobs: ['build'], std: 'all', cxx: ['gcc', 'clang'], cpu: 'arm64'}
- {jobs: ['build'], std: 'all', cxx: ['gcc'], sm: '90a'}
Expand Down Expand Up @@ -219,13 +222,13 @@

# testing -> Runner with GPU is in a nv-gh-runners testing pool
gpus:
v100: { sm: 70 } # 32 GB, 40 runners
t4: { sm: 75, testing: true } # 16 GB, 8 runners
rtx2080: { sm: 75, testing: true } # 8 GB, 8 runners
rtxa6000: { sm: 86, testing: true } # 48 GB, 12 runners
l4: { sm: 89, testing: true } # 24 GB, 48 runners
rtx4090: { sm: 89, testing: true } # 24 GB, 10 runners
h100: { sm: 90 } # 80 GB, 16 runners
v100: { sm: 70 } # 32 GB, 40 runners
t4: { sm: 75 } # 16 GB, 10 runners
rtx2080: { sm: 75 } # 8 GB, 12 runners
rtxa6000: { sm: 86 } # 48 GB, 12 runners
l4: { sm: 89 } # 24 GB, 48 runners
rtx4090: { sm: 89 } # 24 GB, 10 runners
h100: { sm: 90 } # 80 GB, 16 runners

# Tags are used to define a `matrix job` in the workflow section.
#
Expand Down
33 changes: 18 additions & 15 deletions ci/test_pycuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,28 @@ print_environment_details

fail_if_no_gpu

readonly prefix="${BUILD_DIR}/python/"
export PYTHONPATH="${prefix}:${PYTHONPATH:-}"
begin_group "⚙️ Existing site-packages"
pip freeze
end_group "⚙️ Existing site-packages"

pushd ../python/cuda_cooperative >/dev/null
for module in cuda_parallel cuda_cooperative; do

run_command "⚙️ Pip install cuda_cooperative" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
run_command "🚀 Pytest cuda_cooperative" python -m pytest -v ./tests
pushd "../python/${module}" >/dev/null

popd >/dev/null
TEMP_VENV_DIR="/tmp/${module}_venv"
rm -rf "${TEMP_VENV_DIR}"
python -m venv "${TEMP_VENV_DIR}"
. "${TEMP_VENV_DIR}/bin/activate"
echo 'cuda-cccl @ file:///home/coder/cccl/python/cuda_cccl' > /tmp/cuda-cccl_constraints.txt
run_command "⚙️ Pip install ${module}" pip install -c /tmp/cuda-cccl_constraints.txt .[test]
begin_group "⚙️ ${module} site-packages"
pip freeze
end_group "⚙️ ${module} site-packages"
run_command "🚀 Pytest ${module}" python -m pytest -v ./tests
deactivate

pushd ../python/cuda_parallel >/dev/null
popd >/dev/null

# Temporarily install the package twice to populate include directory as part of the first installation
# and to let manifest discover these includes during the second installation. Do not forget to remove the
# second installation after https://github.com/NVIDIA/cccl/issues/2281 is addressed.
run_command "⚙️ Pip install cuda_parallel once" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
run_command "⚙️ Pip install cuda_parallel twice" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
run_command "🚀 Pytest cuda_parallel" python -m pytest -v ./tests

popd >/dev/null
done

print_time_summary
1 change: 1 addition & 0 deletions ci/update_version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_MAJOR \([0-9]\+\))" "
update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_MINOR \([0-9]\+\))" "set(cudax_VERSION_MINOR $minor)"
update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_PATCH \([0-9]\+\))" "set(cudax_VERSION_PATCH $patch)"

update_file "$CUDA_CCCL_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$major.$minor.$patch\""
update_file "$CUDA_COOPERATIVE_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$pymajor.$pyminor.$major.$minor.$patch\""
update_file "$CUDA_PARALLEL_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$pymajor.$pyminor.$major.$minor.$patch\""

Expand Down
2 changes: 1 addition & 1 deletion cudax/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ endif()

option(cudax_ENABLE_HEADER_TESTING "Test that CUDA Experimental's public headers compile." ON)
option(cudax_ENABLE_TESTING "Build CUDA Experimental's tests." ON)
option(cudax_ENABLE_SAMPLES "Build CUDA Experimental's samples." ON)
option(cudax_ENABLE_SAMPLES "Build CUDA Experimental's samples." OFF)

include(cmake/cudaxBuildCompilerTargets.cmake)
include(cmake/cudaxBuildTargetList.cmake)
Expand Down
21 changes: 12 additions & 9 deletions libcudacxx/include/cuda/std/__expected/bad_expected_access.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,6 @@ class bad_expected_access;
template <>
class bad_expected_access<void> : public ::std::exception
{
protected:
_CCCL_HIDE_FROM_ABI bad_expected_access() noexcept = default;
_CCCL_HIDE_FROM_ABI bad_expected_access(const bad_expected_access&) = default;
_CCCL_HIDE_FROM_ABI bad_expected_access(bad_expected_access&&) = default;
_CCCL_HIDE_FROM_ABI bad_expected_access& operator=(const bad_expected_access&) = default;
_CCCL_HIDE_FROM_ABI bad_expected_access& operator=(bad_expected_access&&) = default;
~bad_expected_access() noexcept override = default;

public:
// The way this has been designed (by using a class template below) means that we'll already
// have a profusion of these vtables in TUs, and the dynamic linker will already have a bunch
Expand All @@ -74,10 +66,21 @@ template <class _Err>
class bad_expected_access : public bad_expected_access<void>
{
public:
explicit bad_expected_access(_Err __e)
# if defined(_CCCL_CUDA_COMPILER_CLANG) // Clang needs this or it breaks with device only types
_CCCL_HOST_DEVICE
# endif // _CCCL_CUDA_COMPILER_CLANG
_CCCL_HIDE_FROM_ABI explicit bad_expected_access(_Err __e)
: __unex_(_CUDA_VSTD::move(__e))
{}

# if defined(_CCCL_CUDA_COMPILER_CLANG) // Clang needs this or it breaks with device only types
_CCCL_HOST_DEVICE
# endif // _CCCL_CUDA_COMPILER_CLANG
_CCCL_HIDE_FROM_ABI ~bad_expected_access() noexcept
{
__unex_.~_Err();
}

_LIBCUDACXX_HIDE_FROM_ABI _Err& error() & noexcept
{
return __unex_;
Expand Down
20 changes: 20 additions & 0 deletions libcudacxx/include/cuda/std/__expected/expected.h
Original file line number Diff line number Diff line change
Expand Up @@ -1077,6 +1077,7 @@ class expected : private __expected_move_assign<_Tp, _Err>
}

// [expected.object.eq], equality operators
_CCCL_EXEC_CHECK_DISABLE
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool operator==(const expected& __x, const expected& __y)
{
if (__x.__has_val_ != __y.has_value())
Expand All @@ -1097,12 +1098,14 @@ class expected : private __expected_move_assign<_Tp, _Err>
}

# if _CCCL_STD_VER < 2020
_CCCL_EXEC_CHECK_DISABLE
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool operator!=(const expected& __x, const expected& __y)
{
return !(__x == __y);
}
# endif // _CCCL_STD_VER < 2020

_CCCL_EXEC_CHECK_DISABLE
_LIBCUDACXX_TEMPLATE(class _T2, class _E2)
_LIBCUDACXX_REQUIRES((!_CCCL_TRAIT(is_void, _T2)))
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool operator==(const expected& __x, const expected<_T2, _E2>& __y)
Expand All @@ -1125,6 +1128,7 @@ class expected : private __expected_move_assign<_Tp, _Err>
}

# if _CCCL_STD_VER < 2020
_CCCL_EXEC_CHECK_DISABLE
_LIBCUDACXX_TEMPLATE(class _T2, class _E2)
_LIBCUDACXX_REQUIRES((!_CCCL_TRAIT(is_void, _T2)))
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool operator!=(const expected& __x, const expected<_T2, _E2>& __y)
Expand All @@ -1133,25 +1137,29 @@ class expected : private __expected_move_assign<_Tp, _Err>
}
# endif // _CCCL_STD_VER < 2020

_CCCL_EXEC_CHECK_DISABLE
_LIBCUDACXX_TEMPLATE(class _T2)
_LIBCUDACXX_REQUIRES((!__expected::__is_expected_nonvoid<_T2>) )
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool operator==(const expected& __x, const _T2& __v)
{
return __x.__has_val_ && static_cast<bool>(__x.__union_.__val_ == __v);
}
# if _CCCL_STD_VER < 2020
_CCCL_EXEC_CHECK_DISABLE
_LIBCUDACXX_TEMPLATE(class _T2)
_LIBCUDACXX_REQUIRES((!__expected::__is_expected_nonvoid<_T2>) )
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool operator==(const _T2& __v, const expected& __x)
{
return __x.__has_val_ && static_cast<bool>(__x.__union_.__val_ == __v);
}
_CCCL_EXEC_CHECK_DISABLE
_LIBCUDACXX_TEMPLATE(class _T2)
_LIBCUDACXX_REQUIRES((!__expected::__is_expected_nonvoid<_T2>) )
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool operator!=(const expected& __x, const _T2& __v)
{
return !__x.__has_val_ || static_cast<bool>(__x.__union_.__val_ != __v);
}
_CCCL_EXEC_CHECK_DISABLE
_LIBCUDACXX_TEMPLATE(class _T2)
_LIBCUDACXX_REQUIRES((!__expected::__is_expected_nonvoid<_T2>) )
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool operator!=(const _T2& __v, const expected& __x)
Expand All @@ -1160,22 +1168,26 @@ class expected : private __expected_move_assign<_Tp, _Err>
}
# endif // _CCCL_STD_VER < 2020

_CCCL_EXEC_CHECK_DISABLE
template <class _E2>
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool operator==(const expected& __x, const unexpected<_E2>& __e)
{
return !__x.__has_val_ && static_cast<bool>(__x.__union_.__unex_ == __e.error());
}
# if _CCCL_STD_VER < 2020
_CCCL_EXEC_CHECK_DISABLE
template <class _E2>
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool operator==(const unexpected<_E2>& __e, const expected& __x)
{
return !__x.__has_val_ && static_cast<bool>(__x.__union_.__unex_ == __e.error());
}
_CCCL_EXEC_CHECK_DISABLE
template <class _E2>
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool operator!=(const expected& __x, const unexpected<_E2>& __e)
{
return __x.__has_val_ || static_cast<bool>(__x.__union_.__unex_ != __e.error());
}
_CCCL_EXEC_CHECK_DISABLE
template <class _E2>
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool operator!=(const unexpected<_E2>& __e, const expected& __x)
{
Expand Down Expand Up @@ -1916,6 +1928,7 @@ class expected<void, _Err> : private __expected_move_assign<void, _Err>
}

// [expected.void.eq], equality operators
_CCCL_EXEC_CHECK_DISABLE
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool operator==(const expected& __x, const expected& __y) noexcept
{
if (__x.__has_val_ != __y.has_value())
Expand All @@ -1928,12 +1941,14 @@ class expected<void, _Err> : private __expected_move_assign<void, _Err>
}
}
# if _CCCL_STD_VER < 2020
_CCCL_EXEC_CHECK_DISABLE
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool operator!=(const expected& __x, const expected& __y) noexcept
{
return !(__x == __y);
}
# endif

_CCCL_EXEC_CHECK_DISABLE
template <class _E2>
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool
operator==(const expected& __x, const expected<void, _E2>& __y) noexcept
Expand All @@ -1948,6 +1963,7 @@ class expected<void, _Err> : private __expected_move_assign<void, _Err>
}
}
# if _CCCL_STD_VER < 2020
_CCCL_EXEC_CHECK_DISABLE
template <class _E2>
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool
operator!=(const expected& __x, const expected<void, _E2>& __y) noexcept
Expand All @@ -1956,22 +1972,26 @@ class expected<void, _Err> : private __expected_move_assign<void, _Err>
}
# endif

_CCCL_EXEC_CHECK_DISABLE
template <class _E2>
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool operator==(const expected& __x, const unexpected<_E2>& __y) noexcept
{
return !__x.__has_val_ && static_cast<bool>(__x.__union_.__unex_ == __y.error());
}
# if _CCCL_STD_VER < 2020
_CCCL_EXEC_CHECK_DISABLE
template <class _E2>
friend _LIBCUDACXX_HIDE_FROM_ABI constexpr bool operator==(const unexpected<_E2>& __y, const expected& __x) noexcept
{
return !__x.__has_val_ && static_cast<bool>(__x.__union_.__unex_ == __y.error());
}
_CCCL_EXEC_CHECK_DISABLE
template <class _E2>
_LIBCUDACXX_HIDE_FROM_ABI friend constexpr bool operator!=(const expected& __x, const unexpected<_E2>& __y) noexcept
{
return __x.__has_val_ || static_cast<bool>(__x.__union_.__unex_ != __y.error());
}
_CCCL_EXEC_CHECK_DISABLE
template <class _E2>
_LIBCUDACXX_HIDE_FROM_ABI friend constexpr bool operator!=(const unexpected<_E2>& __y, const expected& __x) noexcept
{
Expand Down
Loading
Loading