From 3e1e6e08468b9eccc6347112326c7419f438237a Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Thu, 16 Jan 2025 19:51:42 -0800 Subject: [PATCH] [FEA]: Introduce Python module with CCCL headers (#3201) * Add cccl/python/cuda_cccl directory and use from cuda_parallel, cuda_cooperative * Run `copy_cccl_headers_to_aude_include()` before `setup()` * Create python/cuda_cccl/cuda/_include/__init__.py, then simply import cuda._include to find the include path. * Add cuda.cccl._version exactly as for cuda.cooperative and cuda.parallel * Bug fix: cuda/_include only exists after shutil.copytree() ran. * Use `f"cuda-cccl @ file://{cccl_path}/python/cuda_cccl"` in setup.py * Remove CustomBuildCommand, CustomWheelBuild in cuda_parallel/setup.py (they are equivalent to the default functions) * Replace := operator (needs Python 3.8+) * Fix oversights: remove `pip3 install ./cuda_cccl` lines from README.md * Restore original README.md: `pip3 install -e` now works on first pass. * cuda_cccl/README.md: FOR INTERNAL USE ONLY * Remove `$pymajor.$pyminor.` prefix in cuda_cccl _version.py (as suggested under https://github.com/NVIDIA/cccl/pull/3201#discussion_r1894035917) Command used: ci/update_version.sh 2 8 0 * Modernize pyproject.toml, setup.py Trigger for this change: * https://github.com/NVIDIA/cccl/pull/3201#discussion_r1894043178 * https://github.com/NVIDIA/cccl/pull/3201#discussion_r1894044996 * Install CCCL headers under cuda.cccl.include Trigger for this change: * https://github.com/NVIDIA/cccl/pull/3201#discussion_r1894048562 Unexpected accidental discovery: cuda.cooperative unit tests pass without CCCL headers entirely. * Factor out cuda_cccl/cuda/cccl/include_paths.py * Reuse cuda_cccl/cuda/cccl/include_paths.py from cuda_cooperative * Add missing Copyright notice. * Add missing __init__.py (cuda.cccl) * Add `"cuda.cccl"` to `autodoc.mock_imports` * Move cuda.cccl.include_paths into function where it is used. (Attempt to resolve Build and Verify Docs failure.) * Add # TODO: move this to a module-level import * Modernize cuda_cooperative/pyproject.toml, setup.py * Convert cuda_cooperative to use hatchling as build backend. * Revert "Convert cuda_cooperative to use hatchling as build backend." This reverts commit 61637d608da06fcf6851ef6197f88b5e7dbc3bbe. * Move numpy from [build-system] requires -> [project] dependencies * Move pyproject.toml [project] dependencies -> setup.py install_requires, to be able to use CCCL_PATH * Remove copy_license() and use license_files=["../../LICENSE"] instead. * Further modernize cuda_cccl/setup.py to use pathlib * Trivial simplifications in cuda_cccl/pyproject.toml * Further simplify cuda_cccl/pyproject.toml, setup.py: remove inconsequential code * Make cuda_cooperative/pyproject.toml more similar to cuda_cccl/pyproject.toml * Add taplo-pre-commit to .pre-commit-config.yaml * taplo-pre-commit auto-fixes * Use pathlib in cuda_cooperative/setup.py * CCCL_PYTHON_PATH in cuda_cooperative/setup.py * Modernize cuda_parallel/pyproject.toml, setup.py * Use pathlib in cuda_parallel/setup.py * Add `# TOML lint & format` comment. * Replace MANIFEST.in with `[tool.setuptools.package-data]` section in pyproject.toml * Use pathlib in cuda/cccl/include_paths.py * pre-commit autoupdate (EXCEPT clang-format, which was manually restored) * Fixes after git merge main * Resolve warning: AttributeError: '_Reduce' object has no attribute 'build_result' ``` =========================================================================== warnings summary =========================================================================== tests/test_reduce.py::test_reduce_non_contiguous /home/coder/cccl/python/devenv/lib/python3.12/site-packages/_pytest/unraisableexception.py:85: PytestUnraisableExceptionWarning: Exception ignored in: Traceback (most recent call last): File "/home/coder/cccl/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py", line 132, in __del__ bindings.cccl_device_reduce_cleanup(ctypes.byref(self.build_result)) ^^^^^^^^^^^^^^^^^ AttributeError: '_Reduce' object has no attribute 'build_result' warnings.warn(pytest.PytestUnraisableExceptionWarning(msg)) -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html ============================================================= 1 passed, 93 deselected, 1 warning in 0.44s ============================================================== ``` * Move `copy_cccl_headers_to_cuda_cccl_include()` functionality to `class CustomBuildPy` * Introduce cuda_cooperative/constraints.txt * Also add cuda_parallel/constraints.txt * Add `--constraint constraints.txt` in ci/test_python.sh * Update Copyright dates * Switch to https://github.com/ComPWA/taplo-pre-commit (the other repo has been archived by the owner on Jul 1, 2024) For completeness: The other repo took a long time to install into the pre-commit cache; so long it lead to timeouts in the CCCL CI. * Remove unused cuda_parallel jinja2 dependency (noticed by chance). * Remove constraints.txt files, advertise running `pip install cuda-cccl` first instead. * Make cuda_cooperative, cuda_parallel testing completely independent. * Run only test_python.sh [skip-rapids][skip-matx][skip-docs][skip-vdc] * Try using another runner (because V100 runners seem to be stuck) [skip-rapids][skip-matx][skip-docs][skip-vdc] * Fix sign-compare warning (#3408) [skip-rapids][skip-matx][skip-docs][skip-vdc] * Revert "Try using another runner (because V100 runners seem to be stuck) [skip-rapids][skip-matx][skip-docs][skip-vdc]" This reverts commit ea33a218ed77a075156cd1b332047202adb25aa2. Error message: https://github.com/NVIDIA/cccl/pull/3201#issuecomment-2594012971 * Try using A100 runner (because V100 runners still seem to be stuck) [skip-rapids][skip-matx][skip-docs][skip-vdc] * Also show cuda-cooperative site-packages, cuda-parallel site-packages (after pip install) [skip-rapids][skip-matx][skip-docs][skip-vdc] * Try using l4 runner (because V100 runners still seem to be stuck) [skip-rapids][skip-matx][skip-docs][skip-vdc] * Restore original ci/matrix.yaml [skip-rapids] * Use for loop in test_python.sh to avoid code duplication. * Run only test_python.sh [skip-rapids][skip-matx][skip-docs][skip-vdc][skip pre-commit.ci] * Comment out taplo-lint in pre-commit config [skip-rapids][skip-matx][skip-docs][skip-vdc] * Revert "Run only test_python.sh [skip-rapids][skip-matx][skip-docs][skip-vdc][skip pre-commit.ci]" This reverts commit ec206fd8b50a6a293e00a5825b579e125010b13d. * Implement suggestion by @shwina (https://github.com/NVIDIA/cccl/pull/3201#pullrequestreview-2556918460) * Address feedback by @leofang --------- Co-authored-by: Bernhard Manfred Gruber --- .pre-commit-config.yaml | 11 +++ ci/test_python.sh | 33 ++++--- ci/update_version.sh | 2 + docs/repo.toml | 1 + python/cuda_cccl/.gitignore | 2 + python/cuda_cccl/README.md | 3 + python/cuda_cccl/cuda/cccl/__init__.py | 8 ++ python/cuda_cccl/cuda/cccl/_version.py | 7 ++ python/cuda_cccl/cuda/cccl/include_paths.py | 63 ++++++++++++ python/cuda_cccl/pyproject.toml | 29 ++++++ python/cuda_cccl/setup.py | 51 ++++++++++ python/cuda_cooperative/.gitignore | 1 - python/cuda_cooperative/MANIFEST.in | 1 - python/cuda_cooperative/README.md | 1 + .../cuda/cooperative/experimental/_nvrtc.py | 46 ++------- python/cuda_cooperative/pyproject.toml | 34 ++++++- python/cuda_cooperative/setup.py | 88 +---------------- python/cuda_parallel/.gitignore | 1 - python/cuda_parallel/MANIFEST.in | 1 - python/cuda_parallel/README.md | 1 + .../cuda/parallel/experimental/_bindings.py | 42 +------- .../experimental/algorithms/reduce.py | 5 + python/cuda_parallel/pyproject.toml | 32 +++++-- python/cuda_parallel/setup.py | 96 +++---------------- 24 files changed, 288 insertions(+), 271 deletions(-) create mode 100644 python/cuda_cccl/.gitignore create mode 100644 python/cuda_cccl/README.md create mode 100644 python/cuda_cccl/cuda/cccl/__init__.py create mode 100644 python/cuda_cccl/cuda/cccl/_version.py create mode 100644 python/cuda_cccl/cuda/cccl/include_paths.py create mode 100644 python/cuda_cccl/pyproject.toml create mode 100644 python/cuda_cccl/setup.py delete mode 100644 python/cuda_cooperative/MANIFEST.in delete mode 100644 python/cuda_parallel/MANIFEST.in diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d317e931e78..e61d2f349ea 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -43,6 +43,17 @@ repos: hooks: - id: ruff # linter - id: ruff-format # formatter + + # TOML lint & format + - repo: https://github.com/ComPWA/taplo-pre-commit + rev: v0.9.3 + hooks: + # See https://github.com/NVIDIA/cccl/issues/3426 + # - id: taplo-lint + # exclude: "^docs/" + - id: taplo-format + exclude: "^docs/" + - repo: https://github.com/codespell-project/codespell rev: v2.3.0 hooks: diff --git a/ci/test_python.sh b/ci/test_python.sh index bd66cc57716..34900fdb8e0 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -8,25 +8,28 @@ print_environment_details fail_if_no_gpu -readonly prefix="${BUILD_DIR}/python/" -export PYTHONPATH="${prefix}:${PYTHONPATH:-}" +begin_group "⚙️ Existing site-packages" +pip freeze +end_group "⚙️ Existing site-packages" -pushd ../python/cuda_cooperative >/dev/null +for module in cuda_parallel cuda_cooperative; do -run_command "⚙️ Pip install cuda_cooperative" pip install --force-reinstall --upgrade --target "${prefix}" .[test] -run_command "🚀 Pytest cuda_cooperative" python -m pytest -v ./tests + pushd "../python/${module}" >/dev/null -popd >/dev/null + TEMP_VENV_DIR="/tmp/${module}_venv" + rm -rf "${TEMP_VENV_DIR}" + python -m venv "${TEMP_VENV_DIR}" + . "${TEMP_VENV_DIR}/bin/activate" + echo 'cuda-cccl @ file:///home/coder/cccl/python/cuda_cccl' > /tmp/cuda-cccl_constraints.txt + run_command "⚙️ Pip install ${module}" pip install -c /tmp/cuda-cccl_constraints.txt .[test] + begin_group "⚙️ ${module} site-packages" + pip freeze + end_group "⚙️ ${module} site-packages" + run_command "🚀 Pytest ${module}" python -m pytest -v ./tests + deactivate -pushd ../python/cuda_parallel >/dev/null + popd >/dev/null -# Temporarily install the package twice to populate include directory as part of the first installation -# and to let manifest discover these includes during the second installation. Do not forget to remove the -# second installation after https://github.com/NVIDIA/cccl/issues/2281 is addressed. -run_command "⚙️ Pip install cuda_parallel once" pip install --force-reinstall --upgrade --target "${prefix}" .[test] -run_command "⚙️ Pip install cuda_parallel twice" pip install --force-reinstall --upgrade --target "${prefix}" .[test] -run_command "🚀 Pytest cuda_parallel" python -m pytest -v ./tests - -popd >/dev/null +done print_time_summary diff --git a/ci/update_version.sh b/ci/update_version.sh index c43303449bb..6a25a837d50 100755 --- a/ci/update_version.sh +++ b/ci/update_version.sh @@ -37,6 +37,7 @@ CUB_CMAKE_VERSION_FILE="lib/cmake/cub/cub-config-version.cmake" LIBCUDACXX_CMAKE_VERSION_FILE="lib/cmake/libcudacxx/libcudacxx-config-version.cmake" THRUST_CMAKE_VERSION_FILE="lib/cmake/thrust/thrust-config-version.cmake" CUDAX_CMAKE_VERSION_FILE="lib/cmake/cudax/cudax-config-version.cmake" +CUDA_CCCL_VERSION_FILE="python/cuda_cccl/cuda/cccl/_version.py" CUDA_COOPERATIVE_VERSION_FILE="python/cuda_cooperative/cuda/cooperative/_version.py" CUDA_PARALLEL_VERSION_FILE="python/cuda_parallel/cuda/parallel/_version.py" @@ -110,6 +111,7 @@ update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_MAJOR \([0-9]\+\))" " update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_MINOR \([0-9]\+\))" "set(cudax_VERSION_MINOR $minor)" update_file "$CUDAX_CMAKE_VERSION_FILE" "set(cudax_VERSION_PATCH \([0-9]\+\))" "set(cudax_VERSION_PATCH $patch)" +update_file "$CUDA_CCCL_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$major.$minor.$patch\"" update_file "$CUDA_COOPERATIVE_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$pymajor.$pyminor.$major.$minor.$patch\"" update_file "$CUDA_PARALLEL_VERSION_FILE" "^__version__ = \"\([0-9.]\+\)\"" "__version__ = \"$pymajor.$pyminor.$major.$minor.$patch\"" diff --git a/docs/repo.toml b/docs/repo.toml index e949beb6e7c..999d62a8f20 100644 --- a/docs/repo.toml +++ b/docs/repo.toml @@ -347,6 +347,7 @@ autodoc.mock_imports = [ "numba", "pynvjitlink", "cuda.bindings", + "cuda.cccl", "llvmlite", "numpy", ] diff --git a/python/cuda_cccl/.gitignore b/python/cuda_cccl/.gitignore new file mode 100644 index 00000000000..24ec757199f --- /dev/null +++ b/python/cuda_cccl/.gitignore @@ -0,0 +1,2 @@ +cuda/cccl/include +*egg-info diff --git a/python/cuda_cccl/README.md b/python/cuda_cccl/README.md new file mode 100644 index 00000000000..37f020b6df6 --- /dev/null +++ b/python/cuda_cccl/README.md @@ -0,0 +1,3 @@ +## Note + +This package is currently FOR INTERNAL USE ONLY and not meant to be used/installed explicitly. diff --git a/python/cuda_cccl/cuda/cccl/__init__.py b/python/cuda_cccl/cuda/cccl/__init__.py new file mode 100644 index 00000000000..5288f071942 --- /dev/null +++ b/python/cuda_cccl/cuda/cccl/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from cuda.cccl._version import __version__ +from cuda.cccl.include_paths import get_include_paths + +__all__ = ["__version__", "get_include_paths"] diff --git a/python/cuda_cccl/cuda/cccl/_version.py b/python/cuda_cccl/cuda/cccl/_version.py new file mode 100644 index 00000000000..ec7c29a266e --- /dev/null +++ b/python/cuda_cccl/cuda/cccl/_version.py @@ -0,0 +1,7 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# This file is generated by ci/update_version.sh +# Do not edit this file manually. +__version__ = "2.8.0" diff --git a/python/cuda_cccl/cuda/cccl/include_paths.py b/python/cuda_cccl/cuda/cccl/include_paths.py new file mode 100644 index 00000000000..da8246b9195 --- /dev/null +++ b/python/cuda_cccl/cuda/cccl/include_paths.py @@ -0,0 +1,63 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import os +import shutil +from dataclasses import dataclass +from functools import lru_cache +from pathlib import Path +from typing import Optional + + +def _get_cuda_path() -> Optional[Path]: + cuda_path = os.environ.get("CUDA_PATH") + if cuda_path: + cuda_path = Path(cuda_path) + if cuda_path.exists(): + return cuda_path + + nvcc_path = shutil.which("nvcc") + if nvcc_path: + return Path(nvcc_path).parent.parent + + default_path = Path("/usr/local/cuda") + if default_path.exists(): + return default_path + + return None + + +@dataclass +class IncludePaths: + cuda: Optional[Path] + libcudacxx: Optional[Path] + cub: Optional[Path] + thrust: Optional[Path] + + def as_tuple(self): + # Note: higher-level ... lower-level order: + return (self.thrust, self.cub, self.libcudacxx, self.cuda) + + +@lru_cache() +def get_include_paths() -> IncludePaths: + # TODO: once docs env supports Python >= 3.9, we + # can move this to a module-level import. + from importlib.resources import as_file, files + + cuda_incl = None + cuda_path = _get_cuda_path() + if cuda_path is not None: + cuda_incl = cuda_path / "include" + + with as_file(files("cuda.cccl.include")) as f: + cccl_incl = Path(f) + assert cccl_incl.exists() + + return IncludePaths( + cuda=cuda_incl, + libcudacxx=cccl_incl / "libcudacxx", + cub=cccl_incl, + thrust=cccl_incl, + ) diff --git a/python/cuda_cccl/pyproject.toml b/python/cuda_cccl/pyproject.toml new file mode 100644 index 00000000000..ada06301a4c --- /dev/null +++ b/python/cuda_cccl/pyproject.toml @@ -0,0 +1,29 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +[build-system] +requires = ["setuptools>=61.0.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "cuda-cccl" +description = "Experimental Package with CCCL headers to support JIT compilation" +authors = [{ name = "NVIDIA Corporation" }] +classifiers = [ + "Programming Language :: Python :: 3 :: Only", + "Environment :: GPU :: NVIDIA CUDA", + "License :: OSI Approved :: Apache Software License", +] +requires-python = ">=3.9" +dynamic = ["version", "readme"] + +[project.urls] +Homepage = "https://github.com/NVIDIA/cccl" + +[tool.setuptools.dynamic] +version = { attr = "cuda.cccl._version.__version__" } +readme = { file = ["README.md"], content-type = "text/markdown" } + +[tool.setuptools.package-data] +cuda = ["cccl/include/**/*"] diff --git a/python/cuda_cccl/setup.py b/python/cuda_cccl/setup.py new file mode 100644 index 00000000000..f6e5e3fa033 --- /dev/null +++ b/python/cuda_cccl/setup.py @@ -0,0 +1,51 @@ +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import shutil +from pathlib import Path + +from setuptools import setup +from setuptools.command.build_py import build_py + +PROJECT_PATH = Path(__file__).resolve().parent +CCCL_PATH = PROJECT_PATH.parents[1] + + +class CustomBuildPy(build_py): + """Copy CCCL headers BEFORE super().run() + + Note that the CCCL headers cannot be referenced directly: + setuptools (and pyproject.toml) does not support relative paths that + reference files outside the package directory (like ../../). + This is a restriction designed to avoid inadvertently packaging files + that are outside the source tree. + """ + + def run(self): + cccl_headers = [ + ("cub", "cub"), + ("libcudacxx", "include"), + ("thrust", "thrust"), + ] + + inc_path = PROJECT_PATH / "cuda" / "cccl" / "include" + inc_path.mkdir(parents=True, exist_ok=True) + + for proj_dir, header_dir in cccl_headers: + src_path = CCCL_PATH / proj_dir / header_dir + dst_path = inc_path / proj_dir + if dst_path.exists(): + shutil.rmtree(dst_path) + shutil.copytree(src_path, dst_path) + + init_py_path = inc_path / "__init__.py" + init_py_path.write_text("# Intentionally empty.\n") + + super().run() + + +setup( + license_files=["../../LICENSE"], + cmdclass={"build_py": CustomBuildPy}, +) diff --git a/python/cuda_cooperative/.gitignore b/python/cuda_cooperative/.gitignore index 15c09b246c1..a9904c10554 100644 --- a/python/cuda_cooperative/.gitignore +++ b/python/cuda_cooperative/.gitignore @@ -1,3 +1,2 @@ -cuda/_include env *egg-info diff --git a/python/cuda_cooperative/MANIFEST.in b/python/cuda_cooperative/MANIFEST.in deleted file mode 100644 index 848cbfe2e81..00000000000 --- a/python/cuda_cooperative/MANIFEST.in +++ /dev/null @@ -1 +0,0 @@ -recursive-include cuda/_include * diff --git a/python/cuda_cooperative/README.md b/python/cuda_cooperative/README.md index c202d1d6c17..673e130bbe0 100644 --- a/python/cuda_cooperative/README.md +++ b/python/cuda_cooperative/README.md @@ -7,6 +7,7 @@ Please visit the documentation here: https://nvidia.github.io/cccl/python.html. ## Local development ```bash +pip3 install -e ../cuda_cccl pip3 install -e .[test] pytest -v ./tests/ ``` diff --git a/python/cuda_cooperative/cuda/cooperative/experimental/_nvrtc.py b/python/cuda_cooperative/cuda/cooperative/experimental/_nvrtc.py index 1e86dd45dfe..a1d269fd987 100644 --- a/python/cuda_cooperative/cuda/cooperative/experimental/_nvrtc.py +++ b/python/cuda_cooperative/cuda/cooperative/experimental/_nvrtc.py @@ -3,9 +3,6 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import functools -import importlib.resources as pkg_resources -import os -import shutil from cuda.bindings import nvrtc from cuda.cooperative.experimental._caching import disk_cache @@ -20,22 +17,6 @@ def CHECK_NVRTC(err, prog): raise RuntimeError(f"NVRTC error: {log.decode('ascii')}") -def get_cuda_path(): - cuda_path = os.environ.get("CUDA_PATH", "") - if os.path.exists(cuda_path): - return cuda_path - - nvcc_path = shutil.which("nvcc") - if nvcc_path is not None: - return os.path.dirname(os.path.dirname(nvcc_path)) - - default_path = "/usr/local/cuda" - if os.path.exists(default_path): - return default_path - - return None - - # cpp is the C++ source code # cc = 800 for Ampere, 900 Hopper, etc # rdc is true or false @@ -47,24 +28,15 @@ def compile_impl(cpp, cc, rdc, code, nvrtc_path, nvrtc_version): check_in("rdc", rdc, [True, False]) check_in("code", code, ["lto", "ptx"]) - with pkg_resources.path("cuda", "_include") as include_path: - # Using `.parent` for compatibility with pip install --editable: - include_path = pkg_resources.files("cuda.cooperative").parent.joinpath( - "_include" - ) - cub_path = include_path - thrust_path = include_path - libcudacxx_path = os.path.join(include_path, "libcudacxx") - cuda_include_path = os.path.join(get_cuda_path(), "include") - - opts = [ - b"--std=c++17", - bytes(f"--include-path={cub_path}", encoding="ascii"), - bytes(f"--include-path={thrust_path}", encoding="ascii"), - bytes(f"--include-path={libcudacxx_path}", encoding="ascii"), - bytes(f"--include-path={cuda_include_path}", encoding="ascii"), - bytes(f"--gpu-architecture=compute_{cc}", encoding="ascii"), - ] + opts = [b"--std=c++17"] + + # TODO: move this to a module-level import (after docs env modernization). + from cuda.cccl import get_include_paths + + for path in get_include_paths().as_tuple(): + if path is not None: + opts += [f"--include-path={path}".encode("ascii")] + opts += [f"--gpu-architecture=compute_{cc}".encode("ascii")] if rdc: opts += [b"--relocatable-device-code=true"] diff --git a/python/cuda_cooperative/pyproject.toml b/python/cuda_cooperative/pyproject.toml index 017c0be1e56..788e1e6d5d8 100644 --- a/python/cuda_cooperative/pyproject.toml +++ b/python/cuda_cooperative/pyproject.toml @@ -1,11 +1,41 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception [build-system] -requires = ["packaging", "setuptools>=61.0.0", "wheel"] +requires = ["setuptools>=61.0.0"] build-backend = "setuptools.build_meta" +[project] +name = "cuda-cooperative" +description = "Experimental Core Library for CUDA Python" +authors = [{ name = "NVIDIA Corporation" }] +classifiers = [ + "Programming Language :: Python :: 3 :: Only", + "Environment :: GPU :: NVIDIA CUDA", + "License :: OSI Approved :: Apache Software License", +] +requires-python = ">=3.9" +dependencies = [ + "cuda-cccl", + "numpy", + "numba>=0.60.0", + "pynvjitlink-cu12>=0.2.4", + "cuda-python==12.*", + "jinja2", +] +dynamic = ["version", "readme"] + +[project.optional-dependencies] +test = ["pytest", "pytest-xdist"] + +[project.urls] +Homepage = "https://developer.nvidia.com/" + +[tool.setuptools.dynamic] +version = { attr = "cuda.cooperative._version.__version__" } +readme = { file = ["README.md"], content-type = "text/markdown" } + [tool.ruff] extend = "../../pyproject.toml" diff --git a/python/cuda_cooperative/setup.py b/python/cuda_cooperative/setup.py index 5f954086cfe..b8dd6502515 100644 --- a/python/cuda_cooperative/setup.py +++ b/python/cuda_cooperative/setup.py @@ -1,91 +1,9 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -import os -import shutil - -from setuptools import Command, find_namespace_packages, setup -from setuptools.command.build_py import build_py -from wheel.bdist_wheel import bdist_wheel - -project_path = os.path.abspath(os.path.dirname(__file__)) -cccl_path = os.path.abspath(os.path.join(project_path, "..", "..")) -cccl_headers = [["cub", "cub"], ["libcudacxx", "include"], ["thrust", "thrust"]] -__version__ = None -with open(os.path.join(project_path, "cuda", "cooperative", "_version.py")) as f: - exec(f.read()) -assert __version__ is not None -ver = __version__ -del __version__ - - -with open("README.md") as f: - long_description = f.read() - - -class CustomBuildCommand(build_py): - def run(self): - self.run_command("package_cccl") - build_py.run(self) - - -class CustomWheelBuild(bdist_wheel): - def run(self): - self.run_command("package_cccl") - super().run() - - -class PackageCCCLCommand(Command): - description = "Generate additional files" - user_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - for proj_dir, header_dir in cccl_headers: - src_path = os.path.abspath(os.path.join(cccl_path, proj_dir, header_dir)) - dst_path = os.path.join(project_path, "cuda", "_include", proj_dir) - if os.path.exists(dst_path): - shutil.rmtree(dst_path) - shutil.copytree(src_path, dst_path) - +from setuptools import setup setup( - name="cuda-cooperative", - version=ver, - description="Experimental Core Library for CUDA Python", - long_description=long_description, - long_description_content_type="text/markdown", - author="NVIDIA Corporation", - classifiers=[ - "Programming Language :: Python :: 3 :: Only", - "Environment :: GPU :: NVIDIA CUDA", - ], - packages=find_namespace_packages(include=["cuda.*"]), - python_requires=">=3.9", - install_requires=[ - "numba>=0.60.0", - "pynvjitlink-cu12>=0.2.4", - "cuda-python", - "jinja2", - ], - extras_require={ - "test": [ - "pytest", - "pytest-xdist", - ] - }, - cmdclass={ - "package_cccl": PackageCCCLCommand, - "build_py": CustomBuildCommand, - "bdist_wheel": CustomWheelBuild, - }, - include_package_data=True, - license="Apache-2.0 with LLVM exception", - license_files=("../../LICENSE",), + license_files=["../../LICENSE"], ) diff --git a/python/cuda_parallel/.gitignore b/python/cuda_parallel/.gitignore index 8e0d030ff6a..7fc9da1604e 100644 --- a/python/cuda_parallel/.gitignore +++ b/python/cuda_parallel/.gitignore @@ -1,4 +1,3 @@ -cuda/_include env *egg-info *so diff --git a/python/cuda_parallel/MANIFEST.in b/python/cuda_parallel/MANIFEST.in deleted file mode 100644 index 848cbfe2e81..00000000000 --- a/python/cuda_parallel/MANIFEST.in +++ /dev/null @@ -1 +0,0 @@ -recursive-include cuda/_include * diff --git a/python/cuda_parallel/README.md b/python/cuda_parallel/README.md index 98a3a3c92d0..1dad4b0f03e 100644 --- a/python/cuda_parallel/README.md +++ b/python/cuda_parallel/README.md @@ -7,6 +7,7 @@ Please visit the documentation here: https://nvidia.github.io/cccl/python.html. ## Local development ```bash +pip3 install -e ../cuda_cccl pip3 install -e .[test] pytest -v ./tests/ ``` diff --git a/python/cuda_parallel/cuda/parallel/experimental/_bindings.py b/python/cuda_parallel/cuda/parallel/experimental/_bindings.py index c19ceebbf3e..ffc35ee2a87 100644 --- a/python/cuda_parallel/cuda/parallel/experimental/_bindings.py +++ b/python/cuda_parallel/cuda/parallel/experimental/_bindings.py @@ -4,28 +4,12 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import ctypes -import os -import shutil from functools import lru_cache -from typing import List, Optional +from typing import List -from . import _cccl as cccl - - -def _get_cuda_path() -> Optional[str]: - cuda_path = os.environ.get("CUDA_PATH", "") - if os.path.exists(cuda_path): - return cuda_path - - nvcc_path = shutil.which("nvcc") - if nvcc_path is not None: - return os.path.dirname(os.path.dirname(nvcc_path)) - - default_path = "/usr/local/cuda" - if os.path.exists(default_path): - return default_path +from cuda.cccl import get_include_paths # type: ignore[import-not-found] - return None +from . import _cccl as cccl @lru_cache() @@ -55,27 +39,9 @@ def get_bindings() -> ctypes.CDLL: @lru_cache() def get_paths() -> List[bytes]: - # TODO: once docs env supports Python >= 3.9, we - # can move this to a module-level import. - from importlib.resources import as_file, files - - with as_file(files("cuda.parallel")) as f: - # Using `.parent` for compatibility with pip install --editable: - cub_include_path = str(f.parent / "_include") - thrust_include_path = cub_include_path - libcudacxx_include_path = str(os.path.join(cub_include_path, "libcudacxx")) - cuda_include_path = None - cuda_path = _get_cuda_path() - if cuda_path is not None: - cuda_include_path = str(os.path.join(cuda_path, "include")) paths = [ f"-I{path}".encode() - for path in ( - cub_include_path, - thrust_include_path, - libcudacxx_include_path, - cuda_include_path, - ) + for path in get_include_paths().as_tuple() if path is not None ] return paths diff --git a/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py b/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py index 10a9cf12051..5e731bc4c50 100644 --- a/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py +++ b/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py @@ -56,6 +56,9 @@ def __init__( op: Callable, h_init: np.ndarray | GpuStruct, ): + # Referenced from __del__: + self.build_result = None + d_in_cccl = cccl.to_cccl_iter(d_in) self._ctor_d_in_cccl_type_enum_name = cccl.type_enum_as_name( d_in_cccl.value_type.type.value @@ -130,6 +133,8 @@ def __call__( return temp_storage_bytes.value def __del__(self): + if self.build_result is None: + return bindings = get_bindings() bindings.cccl_device_reduce_cleanup(ctypes.byref(self.build_result)) diff --git a/python/cuda_parallel/pyproject.toml b/python/cuda_parallel/pyproject.toml index c73736e496a..e7d2b9f0081 100644 --- a/python/cuda_parallel/pyproject.toml +++ b/python/cuda_parallel/pyproject.toml @@ -1,19 +1,39 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception [build-system] -requires = ["packaging", "setuptools>=61.0.0", "wheel"] +requires = ["setuptools>=61.0.0"] build-backend = "setuptools.build_meta" +[project] +name = "cuda-parallel" +description = "Experimental Core Library for CUDA Python" +authors = [{ name = "NVIDIA Corporation" }] +classifiers = [ + "Programming Language :: Python :: 3 :: Only", + "Environment :: GPU :: NVIDIA CUDA", + "License :: OSI Approved :: Apache Software License", +] +requires-python = ">=3.9" +dependencies = ["cuda-cccl", "numba>=0.60.0", "cuda-python==12.*"] +dynamic = ["version", "readme"] + +[project.optional-dependencies] +test = ["pytest", "pytest-xdist", "cupy-cuda12x", "typing_extensions"] + +[project.urls] +Homepage = "https://developer.nvidia.com/" + +[tool.setuptools.dynamic] +version = { attr = "cuda.parallel._version.__version__" } +readme = { file = ["README.md"], content-type = "text/markdown" } + [tool.mypy] python_version = "3.10" [[tool.mypy.overrides]] -module = [ - "numba.*", - "llvmlite" -] +module = ["numba.*", "llvmlite"] ignore_missing_imports = true follow_imports = "skip" diff --git a/python/cuda_parallel/setup.py b/python/cuda_parallel/setup.py index bb7cbb3ac44..c5c9fcd3c32 100644 --- a/python/cuda_parallel/setup.py +++ b/python/cuda_parallel/setup.py @@ -1,61 +1,15 @@ -# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. +# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. # # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -import os -import shutil import subprocess +from pathlib import Path -from setuptools import Command, Extension, find_namespace_packages, setup +from setuptools import Extension, setup from setuptools.command.build_ext import build_ext -from setuptools.command.build_py import build_py -from wheel.bdist_wheel import bdist_wheel -project_path = os.path.abspath(os.path.dirname(__file__)) -cccl_path = os.path.abspath(os.path.join(project_path, "..", "..")) -cccl_headers = [["cub", "cub"], ["libcudacxx", "include"], ["thrust", "thrust"]] -__version__ = None -with open(os.path.join(project_path, "cuda", "parallel", "_version.py")) as f: - exec(f.read()) -assert __version__ is not None -ver = __version__ -del __version__ - - -with open("README.md") as f: - long_description = f.read() - - -class CustomBuildCommand(build_py): - def run(self): - self.run_command("package_cccl") - build_py.run(self) - - -class CustomWheelBuild(bdist_wheel): - def run(self): - self.run_command("package_cccl") - super().run() - - -class PackageCCCLCommand(Command): - description = "Generate additional files" - user_options = [] - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - for proj_dir, header_dir in cccl_headers: - src_path = os.path.abspath(os.path.join(cccl_path, proj_dir, header_dir)) - # TODO Extract cccl headers into a standalone package - dst_path = os.path.join(project_path, "cuda", "_include", proj_dir) - if os.path.exists(dst_path): - shutil.rmtree(dst_path) - shutil.copytree(src_path, dst_path) +CCCL_PYTHON_PATH = Path(__file__).resolve().parents[1] +CCCL_PATH = CCCL_PYTHON_PATH.parent class CMakeExtension(Extension): @@ -69,53 +23,27 @@ def run(self): self.build_extension(ext) def build_extension(self, ext): - extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name))) + extdir = Path(self.get_ext_fullpath(ext.name)).resolve().parent cmake_args = [ "-DCCCL_ENABLE_C=YES", - "-DCCCL_C_PARALLEL_LIBRARY_OUTPUT_DIRECTORY=" + extdir, + f"-DCCCL_C_PARALLEL_LIBRARY_OUTPUT_DIRECTORY={extdir}", "-DCMAKE_BUILD_TYPE=Release", ] - if not os.path.exists(self.build_temp): - os.makedirs(self.build_temp) + build_temp_path = Path(self.build_temp) + build_temp_path.mkdir(parents=True, exist_ok=True) - subprocess.check_call(["cmake", cccl_path] + cmake_args, cwd=self.build_temp) + subprocess.check_call(["cmake", CCCL_PATH] + cmake_args, cwd=build_temp_path) subprocess.check_call( ["cmake", "--build", ".", "--target", "cccl.c.parallel"], - cwd=self.build_temp, + cwd=build_temp_path, ) setup( - name="cuda-parallel", - version=ver, - description="Experimental Core Library for CUDA Python", - long_description=long_description, - long_description_content_type="text/markdown", - author="NVIDIA Corporation", - classifiers=[ - "Programming Language :: Python :: 3 :: Only", - "Environment :: GPU :: NVIDIA CUDA", - ], - packages=find_namespace_packages(include=["cuda.*"]), - python_requires=">=3.9", - # TODO: typing_extensions required for Python 3.7 docs env - install_requires=["numba>=0.60.0", "cuda-python", "jinja2", "typing_extensions"], - extras_require={ - "test": [ - "pytest", - "pytest-xdist", - "cupy-cuda12x", - ] - }, + license_files=["../../LICENSE"], cmdclass={ - "package_cccl": PackageCCCLCommand, - "build_py": CustomBuildCommand, - "bdist_wheel": CustomWheelBuild, "build_ext": BuildCMakeExtension, }, ext_modules=[CMakeExtension("cuda.parallel.experimental.cccl.c")], - include_package_data=True, - license="Apache-2.0 with LLVM exception", - license_files=("../../LICENSE",), )