diff --git a/python/cuda_parallel/cuda/parallel/experimental/_cccl.py b/python/cuda_parallel/cuda/parallel/experimental/_cccl.py index e231f721238..955274d66e4 100644 --- a/python/cuda_parallel/cuda/parallel/experimental/_cccl.py +++ b/python/cuda_parallel/cuda/parallel/experimental/_cccl.py @@ -11,7 +11,7 @@ import numpy as np from numba import cuda, types -from ._utils.protocols import get_dtype, is_contiguous +from ._utils.cai import get_dtype, is_contiguous from .iterators._iterators import IteratorBase from .typing import DeviceArrayLike, GpuStruct diff --git a/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py b/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py index f0b73f2b51d..109939d759a 100644 --- a/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py +++ b/python/cuda_parallel/cuda/parallel/experimental/algorithms/reduce.py @@ -89,13 +89,7 @@ def __init__( raise ValueError("Error building reduce") def __call__( - self, - temp_storage, - d_in, - d_out, - num_items: int, - h_init: np.ndarray | GpuStruct, - stream=None, + self, temp_storage, d_in, d_out, num_items: int, h_init: np.ndarray | GpuStruct ): d_in_cccl = cccl.to_cccl_iter(d_in) if d_in_cccl.type.value == cccl.IteratorKind.ITERATOR: @@ -110,7 +104,7 @@ def __call__( self._ctor_d_in_cccl_type_enum_name, cccl.type_enum_as_name(d_in_cccl.value_type.type.value), ) - _dtype_validation(self._ctor_d_out_dtype, protocols.get_dtype(d_out)) + _dtype_validation(self._ctor_d_out_dtype, cai.get_dtype(d_out)) _dtype_validation(self._ctor_init_dtype, h_init.dtype) stream_handle = protocols.validate_and_get_stream(stream) bindings = get_bindings() @@ -132,7 +126,7 @@ def __call__( ctypes.c_ulonglong(num_items), self.op_wrapper.handle(), cccl.to_cccl_value(h_init), - stream_handle, + None, ) if error != enums.CUDA_SUCCESS: raise ValueError("Error reducing")