Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cuda.parallel: Add documentation for the current iterators along with examples and tests #3311

Merged
merged 5 commits into from
Jan 9, 2025
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add tests demonstrating usage of different iterators
NaderAlAwar committed Jan 9, 2025
commit 899b099bddfac75f3618b9493fb0c640d346feac
158 changes: 149 additions & 9 deletions python/cuda_parallel/tests/test_reduce_api.py
Original file line number Diff line number Diff line change
@@ -2,17 +2,13 @@
#
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

# example-begin imports
import cupy as cp
import numpy as np

import cuda.parallel.experimental.algorithms as algorithms

# example-end imports


def test_device_reduce():
# example-begin reduce-min
import cupy as cp
import numpy as np

import cuda.parallel.experimental.algorithms as algorithms

def min_op(a, b):
return a if a < b else b

@@ -37,3 +33,147 @@ def min_op(a, b):
expected_output = 0
assert (d_output == expected_output).all()
# example-end reduce-min


def test_cache_modified_input_iterator():
# example-begin cache-iterator
import functools

import cupy as cp
import numpy as np

import cuda.parallel.experimental.algorithms as algorithms
import cuda.parallel.experimental.iterators as iterators

def add_op(a, b):
return a + b

values = [8, 6, 7, 5, 3, 0, 9]
d_input = cp.array(values, dtype=np.int32)
d_output = cp.empty(1, dtype=np.int32)

# Create the iterator
iterator = iterators.CacheModifiedInputIterator(d_input, modifier="stream")
h_init = np.array([0], dtype=np.int32)
d_output = cp.empty(1, dtype=np.int32)

# Instantiate reduction, determine storage requirements, and allocate storage
reduce_into = algorithms.reduce_into(iterator, d_output, add_op, h_init)
temp_storage_size = reduce_into(None, iterator, d_output, len(values), h_init)
d_temp_storage = cp.empty(temp_storage_size, dtype=np.uint8)

# Run reduction
reduce_into(d_temp_storage, iterator, d_output, len(values), h_init)

expected_output = functools.reduce(lambda a, b: a + b, values)
assert (d_output == expected_output).all()
# example-end cache-iterator


def test_constant_iterator():
# example-begin constant-iterator
import functools

import cupy as cp
import numpy as np

import cuda.parallel.experimental.algorithms as algorithms
import cuda.parallel.experimental.iterators as iterators

def add_op(a, b):
return a + b

value = 10
num_items = 3

# Create the iterator
constant_it = iterators.ConstantIterator(np.int32(value))
h_init = np.array([0], dtype=np.int32)
d_output = cp.empty(1, dtype=np.int32)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment above this code block only says what the first line is doing, perhaps the following is more clear? (ditto in other places)

Suggested change
# Create the iterator
constant_it = iterators.ConstantIterator(np.int32(value))
h_init = np.array([0], dtype=np.int32)
d_output = cp.empty(1, dtype=np.int32)
constant_it = iterators.ConstantIterator(np.int32(value)) # input sequence
h_init = np.array([0], dtype=np.int32) # initial value for the reduction
d_output = cp.empty(1, dtype=np.int32) # storage for output

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed


# Instantiate reduction, determine storage requirements, and allocate storage
reduce_into = algorithms.reduce_into(constant_it, d_output, add_op, h_init)
temp_storage_size = reduce_into(None, constant_it, d_output, num_items, h_init)
d_temp_storage = cp.empty(temp_storage_size, dtype=np.uint8)

# Run reduction
reduce_into(d_temp_storage, constant_it, d_output, num_items, h_init)

expected_output = functools.reduce(lambda a, b: a + b, [value] * num_items)
assert (d_output == expected_output).all()
# example-end constant-iterator


def test_counting_iterator():
# example-begin counting-iterator
import functools

import cupy as cp
import numpy as np

import cuda.parallel.experimental.algorithms as algorithms
import cuda.parallel.experimental.iterators as iterators

def add_op(a, b):
return a + b

first_item = 10
num_items = 3

# Create the iterator
first_it = iterators.CountingIterator(np.int32(first_item))
h_init = np.array([0], dtype=np.int32)
d_output = cp.empty(1, dtype=np.int32)

# Instantiate reduction, determine storage requirements, and allocate storage
reduce_into = algorithms.reduce_into(first_it, d_output, add_op, h_init)
temp_storage_size = reduce_into(None, first_it, d_output, num_items, h_init)
d_temp_storage = cp.empty(temp_storage_size, dtype=np.uint8)

# Run reduction
reduce_into(d_temp_storage, first_it, d_output, num_items, h_init)

expected_output = functools.reduce(lambda a, b: a + b, range(first_item, first_item + num_items))
assert (d_output == expected_output).all()
# example-end counting-iterator


def test_transform_iterator():
# example-begin transform-iterator
import functools

import cupy as cp
import numpy as np

import cuda.parallel.experimental.algorithms as algorithms
import cuda.parallel.experimental.iterators as iterators

def add_op(a, b):
return a + b

def square_op(a):
return a ** 2

first_item = 10
num_items = 3

# Creating the iterator by composing with a CountingIterator
transform_it = iterators.TransformIterator(
iterators.CountingIterator(np.int32(first_item)), square_op
)
h_init = np.array([0], dtype=np.int32)
d_output = cp.empty(1, dtype=np.int32)

# Instantiate reduction, determine storage requirements, and allocate storage
reduce_into = algorithms.reduce_into(transform_it, d_output, add_op, h_init)
temp_storage_size = reduce_into(None, transform_it, d_output, num_items, h_init)
d_temp_storage = cp.empty(temp_storage_size, dtype=np.uint8)

# Run reduction
reduce_into(d_temp_storage, transform_it, d_output, num_items, h_init)

expected_output = functools.reduce(
lambda a, b: a + b, [a ** 2 for a in range(first_item, first_item + num_items)]
)
assert (d_output == expected_output).all()
# example-end transform-iterator