Skip to content

Commit

Permalink
Add type annotations and mypy checks for cuda.parallel (#3180)
Browse files Browse the repository at this point in the history
* Refactor the source layout for cuda.parallel

* Add initial type annotations

* Update pre-commit config

* More typing

* Fix bad merge

* Fix TYPE_CHECKING and numpy annotations

* typing bindings.py correctly

* Address review feedback

---------

Co-authored-by: Ashwin Srinath <[email protected]>
  • Loading branch information
shwina and shwina authored Dec 19, 2024
1 parent 6adaca3 commit 7b19adb
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 89 deletions.
11 changes: 11 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,16 @@ repos:
CITATION.md
)
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.13.0'
hooks:
- id: mypy
additional_dependencies: [types-cachetools, numpy]
args: ["--config-file=python/cuda_parallel/pyproject.toml",
"python/cuda_parallel/cuda/"]
pass_filenames: false


default_language_version:
python: python3
Empty file.
63 changes: 29 additions & 34 deletions python/cuda_parallel/cuda/parallel/experimental/_bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@

import os
import shutil
import importlib
from importlib.resources import files, as_file
import ctypes
from functools import lru_cache
from typing import List, Optional

from . import _cccl as cccl


def _get_cuda_path():
def _get_cuda_path() -> Optional[str]:
cuda_path = os.environ.get("CUDA_PATH", "")
if os.path.exists(cuda_path):
return cuda_path
Expand All @@ -27,18 +29,10 @@ def _get_cuda_path():
return None


_bindings = None
_paths = None


def get_bindings():
global _bindings
if _bindings is not None:
return _bindings
include_path = importlib.resources.files("cuda.parallel.experimental").joinpath(
"cccl"
)
cccl_c_path = os.path.join(include_path, "libcccl.c.parallel.so")
@lru_cache()
def get_bindings() -> ctypes.CDLL:
with as_file(files("cuda.parallel.experimental")) as f:
cccl_c_path = str(f / "cccl" / "libcccl.c.parallel.so")
_bindings = ctypes.CDLL(cccl_c_path)
_bindings.cccl_device_reduce.restype = ctypes.c_int
_bindings.cccl_device_reduce.argtypes = [
Expand All @@ -56,23 +50,24 @@ def get_bindings():
return _bindings


def get_paths():
global _paths
if _paths is not None:
return _paths
# Using `.parent` for compatibility with pip install --editable:
include_path = importlib.resources.files("cuda.parallel").parent.joinpath(
"_include"
)
include_path_str = str(include_path)
include_option = "-I" + include_path_str
cub_path = include_option.encode()
thrust_path = cub_path
libcudacxx_path_str = str(os.path.join(include_path, "libcudacxx"))
libcudacxx_option = "-I" + libcudacxx_path_str
libcudacxx_path = libcudacxx_option.encode()
cuda_include_str = os.path.join(_get_cuda_path(), "include")
cuda_include_option = "-I" + cuda_include_str
cuda_include_path = cuda_include_option.encode()
_paths = cub_path, thrust_path, libcudacxx_path, cuda_include_path
return _paths
@lru_cache()
def get_paths() -> List[bytes]:
with as_file(files("cuda.parallel")) as f:
# Using `.parent` for compatibility with pip install --editable:
cub_include_path = str(f.parent / "_include")
thrust_include_path = cub_include_path
libcudacxx_include_path = str(os.path.join(cub_include_path, "libcudacxx"))
cuda_include_path = None
if cuda_path := _get_cuda_path():
cuda_include_path = str(os.path.join(cuda_path, "include"))
paths = [
f"-I{path}".encode()
for path in (
cub_include_path,
thrust_include_path,
libcudacxx_include_path,
cuda_include_path,
)
if path is not None
]
return paths
44 changes: 22 additions & 22 deletions python/cuda_parallel/cuda/parallel/experimental/_cccl.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
import numba
import functools
import ctypes
import numpy as np
from numba import types, cuda

from .iterators._iterators import IteratorBase


# MUST match `cccl_type_enum` in c/include/cccl/c/types.h
class TypeEnum(ctypes.c_int):
Expand Down Expand Up @@ -91,29 +94,29 @@ class Value(ctypes.Structure):


_TYPE_TO_ENUM = {
types.int8: TypeEnum.INT8,
types.int16: TypeEnum.INT16,
types.int32: TypeEnum.INT32,
types.int64: TypeEnum.INT64,
types.uint8: TypeEnum.UINT8,
types.uint16: TypeEnum.UINT16,
types.uint32: TypeEnum.UINT32,
types.uint64: TypeEnum.UINT64,
types.float32: TypeEnum.FLOAT32,
types.float64: TypeEnum.FLOAT64,
types.int8: TypeEnum(TypeEnum.INT8),
types.int16: TypeEnum(TypeEnum.INT16),
types.int32: TypeEnum(TypeEnum.INT32),
types.int64: TypeEnum(TypeEnum.INT64),
types.uint8: TypeEnum(TypeEnum.UINT8),
types.uint16: TypeEnum(TypeEnum.UINT16),
types.uint32: TypeEnum(TypeEnum.UINT32),
types.uint64: TypeEnum(TypeEnum.UINT64),
types.float32: TypeEnum(TypeEnum.FLOAT32),
types.float64: TypeEnum(TypeEnum.FLOAT64),
}


def _type_to_enum(numba_type):
def _type_to_enum(numba_type: types.Type) -> TypeEnum:
if numba_type in _TYPE_TO_ENUM:
return _TYPE_TO_ENUM[numba_type]
return TypeEnum.STORAGE
return TypeEnum(TypeEnum.STORAGE)


# TODO: replace with functools.cache once our docs build environment
# is upgraded to at least Python 3.9
@functools.lru_cache(maxsize=None)
def _numba_type_to_info(numba_type):
def _numba_type_to_info(numba_type: types.Type) -> TypeInfo:
context = cuda.descriptor.cuda_target.target_context
value_type = context.get_value_type(numba_type)
size = value_type.get_abi_size(context.target_data)
Expand All @@ -122,12 +125,12 @@ def _numba_type_to_info(numba_type):


@functools.lru_cache(maxsize=None)
def _numpy_type_to_info(numpy_type):
def _numpy_type_to_info(numpy_type: np.dtype) -> TypeInfo:
numba_type = numba.from_dtype(numpy_type)
return _numba_type_to_info(numba_type)


def _device_array_to_cccl_iter(array):
def _device_array_to_cccl_iter(array) -> Iterator:
info = _numpy_type_to_info(array.dtype)
return Iterator(
info.size,
Expand All @@ -143,7 +146,7 @@ def _device_array_to_cccl_iter(array):
)


def _iterator_to_cccl_iter(it):
def _iterator_to_cccl_iter(it: IteratorBase) -> Iterator:
context = cuda.descriptor.cuda_target.target_context
numba_type = it.numba_type
size = context.get_value_type(numba_type).get_abi_size(context.target_data)
Expand Down Expand Up @@ -180,8 +183,7 @@ def _iterator_to_cccl_iter(it):
)


def type_enum_as_name(enum_value):
assert isinstance(enum_value, int)
def type_enum_as_name(enum_value: int) -> str:
return (
"int8",
"int16",
Expand All @@ -197,14 +199,12 @@ def type_enum_as_name(enum_value):
)[enum_value]


def to_cccl_iter(array_or_iterator):
from cuda.parallel.experimental.iterators._iterators import IteratorBase

def to_cccl_iter(array_or_iterator) -> Iterator:
if isinstance(array_or_iterator, IteratorBase):
return _iterator_to_cccl_iter(array_or_iterator)
return _device_array_to_cccl_iter(array_or_iterator)


def host_array_to_value(array):
def host_array_to_value(array: np.ndarray) -> Value:
info = _numpy_type_to_info(array.dtype)
return Value(info, array.ctypes.data)
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,26 @@
#
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import numba
import ctypes
import numba
import numpy as np
from numba import cuda
from numba.cuda.cudadrv import enums
from typing import Callable

from .. import _cccl as cccl
from .._bindings import get_paths, get_bindings


class _Op:
def __init__(self, dtype, op):
def __init__(self, dtype: np.dtype, op: Callable):
value_type = numba.from_dtype(dtype)
self.ltoir, _ = cuda.compile(
op, sig=value_type(value_type, value_type), output="ltoir"
)
self.name = op.__name__.encode("utf-8")

def handle(self):
def handle(self) -> cccl.Op:
return cccl.Op(
cccl.OpKind.STATELESS,
self.name,
Expand All @@ -39,7 +41,7 @@ def _dtype_validation(dt1, dt2):

class _Reduce:
# TODO: constructor shouldn't require concrete `d_in`, `d_out`:
def __init__(self, d_in, d_out, op, h_init):
def __init__(self, d_in, d_out, op: Callable, h_init: np.ndarray):
d_in_cccl = cccl.to_cccl_iter(d_in)
self._ctor_d_in_cccl_type_enum_name = cccl.type_enum_as_name(
d_in_cccl.value_type.type.value
Expand Down Expand Up @@ -70,7 +72,7 @@ def __init__(self, d_in, d_out, op, h_init):
if error != enums.CUDA_SUCCESS:
raise ValueError("Error building reduce")

def __call__(self, temp_storage, d_in, d_out, num_items, h_init):
def __call__(self, temp_storage, d_in, d_out, num_items: int, h_init: np.ndarray):
d_in_cccl = cccl.to_cccl_iter(d_in)
if d_in_cccl.type.value == cccl.IteratorKind.ITERATOR:
assert num_items is not None
Expand Down Expand Up @@ -119,7 +121,7 @@ def __del__(self):

# TODO Figure out `sum` without operator and initial value
# TODO Accept stream
def reduce_into(d_in, d_out, op, h_init):
def reduce_into(d_in, d_out, op: Callable, h_init: np.ndarray):
"""Computes a device-wide reduction using the specified binary ``op`` functor and initial value ``init``.
Example:
Expand Down
Loading

0 comments on commit 7b19adb

Please sign in to comment.