NVIDIA · miscco · Feb 5, 2025 · Jan 30, 2025 · Jan 30, 2025 · Jan 30, 2025
@@ -1,4 +1,4 @@
 workflows:
  # If any jobs appear here, they will be executed instead of `pull_request' for PRs.
  # This is useful for limiting resource usage when a full matrix is not needed.
  # The branch protection checks will fail when using this override workflow.
@@ -219,13 +219,13 @@
 
 # testing -> Runner with GPU is in a nv-gh-runners testing pool
 gpus:
-  v100:     { sm: 70 }                # 32 GB,  40 runners
-  t4:       { sm: 75, testing: true } # 16 GB,   8 runners
-  rtx2080:  { sm: 75, testing: true } #  8 GB,   8 runners
-  rtxa6000: { sm: 86, testing: true } # 48 GB,  12 runners
-  l4:       { sm: 89, testing: true } # 24 GB,  48 runners
-  rtx4090:  { sm: 89, testing: true } # 24 GB,  10 runners
-  h100:     { sm: 90 }                # 80 GB,  16 runners
+  v100:     { sm: 70 } # 32 GB,  40 runners
+  t4:       { sm: 75 } # 16 GB,  10 runners
+  rtx2080:  { sm: 75 } #  8 GB,  12 runners
+  rtxa6000: { sm: 86 } # 48 GB,  12 runners
+  l4:       { sm: 89 } # 24 GB,  48 runners
+  rtx4090:  { sm: 89 } # 24 GB,  10 runners
+  h100:     { sm: 90 } # 80 GB,  16 runners
 
 # Tags are used to define a `matrix job` in the workflow section.
 #

diff --git a/ci/test_pycuda.sh b/ci/test_pycuda.sh
@@ -8,25 +8,28 @@ print_environment_details
 
 fail_if_no_gpu
 
-readonly prefix="${BUILD_DIR}/python/"
-export PYTHONPATH="${prefix}:${PYTHONPATH:-}"
+begin_group "⚙️ Existing site-packages"
+pip freeze
+end_group "⚙️ Existing site-packages"
 
-pushd ../python/cuda_cooperative >/dev/null
+for module in cuda_parallel cuda_cooperative; do
 
-run_command "⚙️  Pip install cuda_cooperative" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
-run_command "🚀  Pytest cuda_cooperative" python -m pytest -v ./tests
+  pushd "../python/${module}" >/dev/null
 
-popd >/dev/null
+  TEMP_VENV_DIR="/tmp/${module}_venv"
+  rm -rf "${TEMP_VENV_DIR}"
+  python -m venv "${TEMP_VENV_DIR}"
+  . "${TEMP_VENV_DIR}/bin/activate"
+  echo 'cuda-cccl @ file:///home/coder/cccl/python/cuda_cccl' > /tmp/cuda-cccl_constraints.txt
+  run_command "⚙️  Pip install ${module}" pip install -c /tmp/cuda-cccl_constraints.txt .[test]
+  begin_group "⚙️ ${module} site-packages"
+  pip freeze
+  end_group "⚙️ ${module} site-packages"
+  run_command "🚀  Pytest ${module}" python -m pytest -v ./tests
+  deactivate
 
-pushd ../python/cuda_parallel >/dev/null
+  popd >/dev/null
 
-# Temporarily install the package twice to populate include directory as part of the first installation
-# and to let manifest discover these includes during the second installation. Do not forget to remove the
-# second installation after https://github.com/NVIDIA/cccl/issues/2281 is addressed.
-run_command "⚙️  Pip install cuda_parallel once" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
-run_command "⚙️  Pip install cuda_parallel twice" pip install --force-reinstall --upgrade --target "${prefix}" .[test]
-run_command "🚀  Pytest cuda_parallel" python -m pytest -v ./tests
-
-popd >/dev/null
+done
 
 print_time_summary
@@ -103,6 +103,7 @@ update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_MAJOR \([0-9]\+\))" "
 update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_MINOR \([0-9]\+\))" "set(cudax_VERSION_MINOR $minor)"
 update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_PATCH \([0-9]\+\))" "set(cudax_VERSION_PATCH $patch)"
 
+update_file "$CUDA_CCCL_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$major.$minor.$patch\""
 update_file "$CUDA_COOPERATIVE_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$pymajor.$pyminor.$major.$minor.$patch\""
 update_file "$CUDA_PARALLEL_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$pymajor.$pyminor.$major.$minor.$patch\""
 

@@ -1385,19 +1385,19 @@ def configure_modules(self):
 
     def configure_substitutions(self):
         sub = self.config.substitutions
-        cxx_path = pipes.quote(self.cxx.path)
+        cxx_path = shlex.quote(self.cxx.path)
         # Configure compiler substitutions
         sub.append(('%cxx', cxx_path))
         sub.append(('%libcxx_src_root', self.libcudacxx_src_root))
         # Configure flags substitutions
-        flags_str = ' '.join([pipes.quote(f) for f in self.cxx.flags])
-        compile_flags_str = ' '.join([pipes.quote(f) for f in self.cxx.compile_flags])
-        link_flags_str = ' '.join([pipes.quote(f) for f in self.cxx.link_flags])
-        all_flags = '%s %s %s' % (flags_str, compile_flags_str, link_flags_str)
-        sub.append(('%flags', flags_str))
-        sub.append(('%compile_flags', compile_flags_str))
-        sub.append(('%link_flags', link_flags_str))
-        sub.append(('%all_flags', all_flags))
+        flags_str = " ".join([shlex.quote(f) for f in self.cxx.flags])
+        compile_flags_str = " ".join([shlex.quote(f) for f in self.cxx.compile_flags])
+        link_flags_str = " ".join([shlex.quote(f) for f in self.cxx.link_flags])
+        all_flags = "%s %s %s" % (flags_str, compile_flags_str, link_flags_str)
+        sub.append(("%flags", flags_str))
+        sub.append(("%compile_flags", compile_flags_str))
+        sub.append(("%link_flags", link_flags_str))
+        sub.append(("%all_flags", all_flags))
         if self.cxx.isVerifySupported():
             verify_str = ' ' + ' '.join(self.cxx.verify_flags) + ' '
             sub.append(('%verify', verify_str))
@@ -1422,11 +1422,11 @@ def configure_substitutions(self):
         # Configure run env substitution.
         sub.append(('%run', '%t.exe'))
         # Configure not program substitutions
-        not_py = os.path.join(self.libcudacxx_src_root, 'test', 'utils', 'not.py')
-        not_str = '%s %s ' % (pipes.quote(sys.executable), pipes.quote(not_py))
-        sub.append(('not ', not_str))
-        if self.get_lit_conf('libcudacxx_gdb'):
-            sub.append(('%libcxx_gdb', self.get_lit_conf('libcudacxx_gdb')))
+        not_py = os.path.join(self.libcudacxx_src_root, "test", "utils", "not.py")
+        not_str = "%s %s " % (shlex.quote(sys.executable), shlex.quote(not_py))
+        sub.append(("not ", not_str))
+        if self.get_lit_conf("libcudacxx_gdb"):
+            sub.append(("%libcxx_gdb", self.get_lit_conf("libcudacxx_gdb")))
 
     def can_use_deployment(self):
         # Check if the host is on an Apple platform using clang.

diff --git a/python/cuda_cooperative/.gitignore b/python/cuda_cooperative/.gitignore
@@ -1,3 +1,2 @@
-cuda/_include
 env
 *egg-info
diff --git a/python/cuda_cooperative/MANIFEST.in b/python/cuda_cooperative/MANIFEST.in
diff --git a/python/cuda_cooperative/README.md b/python/cuda_cooperative/README.md
@@ -7,6 +7,7 @@ Please visit the documentation here: https://nvidia.github.io/cccl/python.html.
 ## Local development
 
 ```bash
+pip3 install -e ../cuda_cccl
 pip3 install -e .[test]
 pytest -v ./tests/
 ```
diff --git a/python/cuda_cooperative/pyproject.toml b/python/cuda_cooperative/pyproject.toml
@@ -1,7 +1,7 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 [build-system]
-requires = ["packaging", "setuptools>=61.0.0", "wheel"]
+requires = ["setuptools>=61.0.0"]
 build-backend = "setuptools.build_meta"
diff --git a/python/cuda_cooperative/setup.py b/python/cuda_cooperative/setup.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 

diff --git a/python/cuda_parallel/.gitignore b/python/cuda_parallel/.gitignore
@@ -1,4 +1,3 @@
-cuda/_include
 env
 *egg-info
 *so
diff --git a/python/cuda_parallel/MANIFEST.in b/python/cuda_parallel/MANIFEST.in
diff --git a/python/cuda_parallel/README.md b/python/cuda_parallel/README.md
@@ -7,6 +7,7 @@ Please visit the documentation here: https://nvidia.github.io/cccl/python.html.
 ## Local development
 
 ```bash
+pip3 install -e ../cuda_cccl
 pip3 install -e .[test]
 pytest -v ./tests/
 ```
diff --git a/python/cuda_parallel/pyproject.toml b/python/cuda_parallel/pyproject.toml
@@ -1,7 +1,7 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 [build-system]
-requires = ["packaging", "setuptools>=61.0.0", "wheel"]
+requires = ["setuptools>=61.0.0"]
 build-backend = "setuptools.build_meta"
diff --git a/python/cuda_parallel/setup.py b/python/cuda_parallel/setup.py
@@ -1,10 +1,9 @@
-# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED.
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-import os
-import shutil
 import subprocess
+from pathlib import Path
 
 from setuptools import Command, Extension, setup, find_packages, find_namespace_packages
 from setuptools.command.build_py import build_py
@@ -84,8 +83,8 @@ def build_extension(self, ext):
             '-DCMAKE_BUILD_TYPE=Release',
         ]
 
-        if not os.path.exists(self.build_temp):
-            os.makedirs(self.build_temp)
+        build_temp_path = Path(self.build_temp)
+        build_temp_path.mkdir(parents=True, exist_ok=True)
 
         subprocess.check_call(['cmake', cccl_path] +
                               cmake_args, cwd=self.build_temp)