Skip to content

Commit 7b7addd

Browse files
authored
Merge pull request #17 from fjarri/dynamic-mem
Rename `local_mem` to `cu_dynamic_local_mem`
2 parents dbf2ea1 + 9cfcebf commit 7b7addd

7 files changed

+51
-14
lines changed

docs/history.rst

+13
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,19 @@ Version history
22
===============
33

44

5+
0.5.0 (unreleased)
6+
------------------
7+
8+
Changed
9+
^^^^^^^
10+
11+
* ``local_mem`` keyword parameter of kernel calls renamed to ``cu_dynamic_local_mem``. (PR_17_)
12+
13+
14+
.. _PR_17: https://github.com/fjarri/grunnur/pull/17
15+
16+
17+
518
0.4.0 (25 Jul 2024)
619
-------------------
720

grunnur/adapter_base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,6 @@ def __call__(
404404
self,
405405
queue_adapter: QueueAdapter,
406406
*args: BufferAdapter | numpy.generic,
407-
local_mem: int = 0,
407+
cu_dynamic_local_mem: int = 0,
408408
) -> Any:
409409
pass

grunnur/adapter_cuda.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -701,7 +701,7 @@ def __call__(
701701
self,
702702
queue_adapter: QueueAdapter,
703703
*args: BufferAdapter | numpy.generic,
704-
local_mem: int = 0,
704+
cu_dynamic_local_mem: int = 0,
705705
) -> None:
706706
# Will be checked in the upper levels
707707
assert isinstance(queue_adapter, CuQueueAdapter) # noqa: S101
@@ -724,5 +724,5 @@ def __call__(
724724
grid=self._grid,
725725
block=self._block,
726726
stream=queue_adapter._pycuda_stream, # noqa: SLF001
727-
shared=local_mem,
727+
shared=cu_dynamic_local_mem,
728728
)

grunnur/adapter_opencl.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -632,11 +632,13 @@ def __call__(
632632
self,
633633
queue_adapter: QueueAdapter,
634634
*args: BufferAdapter | numpy.generic,
635-
local_mem: int = 0,
635+
cu_dynamic_local_mem: int = 0,
636636
) -> pyopencl.Event:
637-
# Local memory size is passed via regular kernel arguments in OpenCL.
638-
# Should be checked in `PreparedKernel`.
639-
assert local_mem == 0 # noqa: S101
637+
if cu_dynamic_local_mem != 0:
638+
raise ValueError(
639+
"`cu_dynamic_local_mem` must be zero for OpenCL kernels; "
640+
"dynamic local memory allocation is not supported"
641+
)
640642

641643
# We have to keep the signature more general because of the base class,
642644
# but the upper levels will ensure this is the case.

grunnur/program.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ def __call__(
314314
self,
315315
queue: Queue | MultiQueue,
316316
*args: MultiArray | Array | Buffer | numpy.generic,
317-
local_mem: int = 0,
317+
cu_dynamic_local_mem: int = 0,
318318
) -> Any:
319319
"""
320320
Enqueues the kernel on the devices in the given queue.
@@ -332,8 +332,10 @@ def __call__(
332332
If an argument is a integer-keyed ``dict``, its values corresponding to the
333333
device indices the kernel is executed on will be passed as kernel arguments.
334334
335+
:param cu_dynamic_local_mem: **CUDA only.** The size of dynamically allocated local
336+
(shared in CUDA terms) memory, in bytes. That is, the size of
337+
``extern __shared__`` arrays in CUDA kernels.
335338
:param args: kernel arguments.
336-
:param kwds: backend-specific keyword parameters.
337339
:returns: a list of ``Event`` objects for enqueued kernels in case of PyOpenCL.
338340
"""
339341
if isinstance(queue, Queue):
@@ -357,7 +359,11 @@ def __call__(
357359
single_queue = queue.queues[device]
358360

359361
pkernel = self._prepared_kernel_adapters[device]
360-
ret_val = pkernel(single_queue._queue_adapter, *kernel_args, local_mem=local_mem) # noqa: SLF001
362+
ret_val = pkernel(
363+
single_queue._queue_adapter, # noqa: SLF001
364+
*kernel_args,
365+
cu_dynamic_local_mem=cu_dynamic_local_mem,
366+
)
361367
ret_vals.append(ret_val)
362368

363369
return ret_vals
@@ -455,11 +461,11 @@ def __call__(
455461
global_size: Sequence[int] | Mapping[BoundDevice, Sequence[int]],
456462
local_size: Sequence[int] | None | Mapping[BoundDevice, Sequence[int] | None] = None,
457463
*args: MultiArray | Array | Buffer | numpy.generic,
458-
local_mem: int = 0,
464+
cu_dynamic_local_mem: int = 0,
459465
) -> Any:
460466
"""
461467
A shortcut for :py:meth:`Kernel.prepare` and subsequent :py:meth:`PreparedKernel.__call__`.
462468
See their doc entries for details.
463469
"""
464470
pkernel = self.prepare(global_size, local_size)
465-
return pkernel(queue, *args, local_mem=local_mem)
471+
return pkernel(queue, *args, cu_dynamic_local_mem=cu_dynamic_local_mem)

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "grunnur"
3-
version = "0.4.0"
3+
version = "0.5.0.dev"
44
description = "Uniform API for PyOpenCL and PyCUDA."
55
authors = [
66
{name = "Bogdan Opanchuk", email = "[email protected]"},

tests/test_program.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def test_compile(mock_or_real_context, no_prelude):
8787
assert (res == ref).all()
8888

8989
# Explicit local_size
90-
res2_dev = Array.from_host(queue, a) # Array.empty(queue, length, numpy.int32)
90+
res2_dev = Array.empty(context.device, [length], numpy.int32)
9191
program.kernel.multiply(queue, [length], [length // 2], res2_dev, a_dev, b_dev, c)
9292
res2 = res2_dev.get(queue)
9393
if not mocked:
@@ -518,3 +518,19 @@ def test_builtin_globals(mock_backend_pycuda):
518518

519519
assert "max_total_local_size = 1024" in program.sources[context.devices[0]].source
520520
assert "max_total_local_size = 512" in program.sources[context.devices[1]].source
521+
522+
523+
def test_cu_dynamic_local_mem(mock_context):
524+
src = MockDefTemplate(kernels=[MockKernel("test", [numpy.int32])])
525+
program = Program([mock_context.device], src)
526+
queue = Queue(mock_context.device)
527+
528+
if mock_context.api.id == opencl_api_id():
529+
message = (
530+
"`cu_dynamic_local_mem` must be zero for OpenCL kernels; "
531+
"dynamic local memory allocation is not supported"
532+
)
533+
with pytest.raises(ValueError, match=message):
534+
program.kernel.test(queue, [100], [100], numpy.int32(1), cu_dynamic_local_mem=100)
535+
else:
536+
program.kernel.test(queue, [100], [100], numpy.int32(1), cu_dynamic_local_mem=100)

0 commit comments

Comments
 (0)