diff --git a/cuda_core/cuda/core/experimental/_graph.py b/cuda_core/cuda/core/experimental/_graph.py index 1acf38176..894b41047 100644 --- a/cuda_core/cuda/core/experimental/_graph.py +++ b/cuda_core/cuda/core/experimental/_graph.py @@ -476,7 +476,7 @@ def create_conditional_handle(self, default_value=None) -> driver.CUgraphConditi default_value = 0 flags = 0 - status, _, graph, _, _ = handle_return(driver.cuStreamGetCaptureInfo(self._mnff.stream.handle)) + status, _, graph, *_, _ = handle_return(driver.cuStreamGetCaptureInfo(self._mnff.stream.handle)) if status != driver.CUstreamCaptureStatus.CU_STREAM_CAPTURE_STATUS_ACTIVE: raise RuntimeError("Cannot create a conditional handle when graph is not being built") @@ -486,20 +486,22 @@ def create_conditional_handle(self, default_value=None) -> driver.CUgraphConditi def _cond_with_params(self, node_params) -> GraphBuilder: # Get current capture info to ensure we're in a valid state - status, _, graph, dependencies, num_dependencies = handle_return( + status, _, graph, *deps_info, num_dependencies = handle_return( driver.cuStreamGetCaptureInfo(self._mnff.stream.handle) ) if status != driver.CUstreamCaptureStatus.CU_STREAM_CAPTURE_STATUS_ACTIVE: raise RuntimeError("Cannot add conditional node when not actively capturing") # Add the conditional node to the graph - node = handle_return(driver.cuGraphAddNode(graph, dependencies, num_dependencies, node_params)) + deps_info_update = [ + [handle_return(driver.cuGraphAddNode(graph, *deps_info, num_dependencies, node_params))] + ] + [None] * (len(deps_info) - 1) # Update the stream's capture dependencies handle_return( driver.cuStreamUpdateCaptureDependencies( self._mnff.stream.handle, - [node], # dependencies + *deps_info_update, # dependencies, edgeData 1, # numDependencies driver.CUstreamUpdateCaptureDependencies_flags.CU_STREAM_SET_CAPTURE_DEPENDENCIES, ) @@ -677,17 +679,23 @@ def add_child(self, child_graph: GraphBuilder): raise ValueError("Parent graph is not being built.") stream_handle = self._mnff.stream.handle - _, _, graph_out, dependencies_out, num_dependencies_out = handle_return( + _, _, graph_out, *deps_info_out, num_dependencies_out = handle_return( driver.cuStreamGetCaptureInfo(stream_handle) ) - child_node = handle_return( - driver.cuGraphAddChildGraphNode(graph_out, dependencies_out, num_dependencies_out, child_graph._mnff.graph) - ) + deps_info_update = [ + [ + handle_return( + driver.cuGraphAddChildGraphNode( + graph_out, deps_info_out[0], num_dependencies_out, child_graph._mnff.graph + ) + ) + ] + ] + [None] * (len(deps_info_out) - 1) handle_return( driver.cuStreamUpdateCaptureDependencies( stream_handle, - [child_node], + *deps_info_update, # dependencies, edgeData 1, driver.CUstreamUpdateCaptureDependencies_flags.CU_STREAM_SET_CAPTURE_DEPENDENCIES, ) diff --git a/cuda_core/tests/conftest.py b/cuda_core/tests/conftest.py index 34c87e185..77eab1a59 100644 --- a/cuda_core/tests/conftest.py +++ b/cuda_core/tests/conftest.py @@ -1,7 +1,7 @@ # Copyright 2024 NVIDIA Corporation. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -import os +import helpers try: from cuda.bindings import driver @@ -65,5 +65,4 @@ def pop_all_contexts(): return pop_all_contexts -# TODO: make the fixture more sophisticated using path finder -skipif_need_cuda_headers = pytest.mark.skipif(os.environ.get("CUDA_PATH") is None, reason="need CUDA header") +skipif_need_cuda_headers = pytest.mark.skipif(helpers.CUDA_INCLUDE_PATH is None, reason="need CUDA header") diff --git a/cuda_core/tests/helpers.py b/cuda_core/tests/helpers.py new file mode 100644 index 000000000..3cc6e4846 --- /dev/null +++ b/cuda_core/tests/helpers.py @@ -0,0 +1,16 @@ +# Copyright 2025 NVIDIA Corporation. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import os + +CUDA_PATH = os.environ.get("CUDA_PATH") +CUDA_INCLUDE_PATH = None +CCCL_INCLUDE_PATHS = None +if CUDA_PATH is not None: + path = os.path.join(CUDA_PATH, "include") + if os.path.isdir(path): + CUDA_INCLUDE_PATH = path + CCCL_INCLUDE_PATHS = (path,) + path = os.path.join(path, "cccl") + if os.path.isdir(path): + CCCL_INCLUDE_PATHS = (path,) + CCCL_INCLUDE_PATHS diff --git a/cuda_core/tests/test_event.py b/cuda_core/tests/test_event.py index dcdbe0b92..827fa22b4 100644 --- a/cuda_core/tests/test_event.py +++ b/cuda_core/tests/test_event.py @@ -6,6 +6,7 @@ import platform import time +import helpers import numpy as np import pytest from conftest import skipif_need_cuda_headers @@ -149,7 +150,7 @@ def test_error_timing_incomplete(): program_options = ProgramOptions( std="c++17", arch=f"sm_{arch}", - include_path=str(pathlib.Path(os.environ["CUDA_PATH"]) / pathlib.Path("include")), + include_path=helpers.CCCL_INCLUDE_PATHS, ) prog = Program(code, code_type="c++", options=program_options) mod = prog.compile(target_type="cubin") diff --git a/cuda_core/tests/test_launcher.py b/cuda_core/tests/test_launcher.py index 507bc32c3..9febcb794 100644 --- a/cuda_core/tests/test_launcher.py +++ b/cuda_core/tests/test_launcher.py @@ -2,8 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 import ctypes -import os -import pathlib + +import helpers try: import cupy as cp @@ -107,7 +107,7 @@ def test_launch_invalid_values(init_cuda): (ctypes.c_float, "float", 3.14), (ctypes.c_double, "double", 2.718), ) -if os.environ.get("CUDA_PATH"): +if helpers.CCCL_INCLUDE_PATHS is not None: PARAMS += ( (np.float16, "half", 0.78), (np.complex64, "cuda::std::complex", 1 + 2j), @@ -141,8 +141,7 @@ def test_launch_scalar_argument(python_type, cpp_type, init_value): # Compile and force instantiation for this type arch = "".join(f"{i}" for i in dev.compute_capability) - if os.environ.get("CUDA_PATH"): - include_path = str(pathlib.Path(os.environ["CUDA_PATH"]) / pathlib.Path("include")) + if helpers.CCCL_INCLUDE_PATHS is not None: code = ( r""" #include @@ -150,9 +149,7 @@ def test_launch_scalar_argument(python_type, cpp_type, init_value): """ + code ) - else: - include_path = None - pro_opts = ProgramOptions(std="c++11", arch=f"sm_{arch}", include_path=include_path) + pro_opts = ProgramOptions(std="c++17", arch=f"sm_{arch}", include_path=helpers.CCCL_INCLUDE_PATHS) prog = Program(code, code_type="c++", options=pro_opts) ker_name = f"write_scalar<{cpp_type}>" mod = prog.compile("cubin", name_expressions=(ker_name,)) @@ -186,8 +183,7 @@ def test_cooperative_launch(): # Compile and force instantiation for this type arch = "".join(f"{i}" for i in dev.compute_capability) - include_path = str(pathlib.Path(os.environ["CUDA_PATH"]) / pathlib.Path("include")) - pro_opts = ProgramOptions(std="c++17", arch=f"sm_{arch}", include_path=include_path) + pro_opts = ProgramOptions(std="c++17", arch=f"sm_{arch}", include_path=helpers.CCCL_INCLUDE_PATHS) prog = Program(code, code_type="c++", options=pro_opts) ker = prog.compile("cubin").get_kernel("test_grid_sync")