Add tests demonstrating usage of different iterators

NVIDIA · NaderAlAwar · Jan 9, 2025 · Jan 9, 2025 · Jan 9, 2025 · Jan 9, 2025
commit 899b099bddfac75f3618b9493fb0c640d346feac
@@ -2,17 +2,13 @@
 #
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-# example-begin imports
-import cupy as cp
-import numpy as np
-
-import cuda.parallel.experimental.algorithms as algorithms
-
-# example-end imports
-
-
 def test_device_reduce():
     # example-begin reduce-min
+    import cupy as cp
+    import numpy as np
+
+    import cuda.parallel.experimental.algorithms as algorithms
+
     def min_op(a, b):
         return a if a < b else b
 
@@ -37,3 +33,147 @@ def min_op(a, b):
     expected_output = 0
     assert (d_output == expected_output).all()
     # example-end reduce-min
+
+
+def test_cache_modified_input_iterator():
+    # example-begin cache-iterator
+    import functools
+
+    import cupy as cp
+    import numpy as np
+
+    import cuda.parallel.experimental.algorithms as algorithms
+    import cuda.parallel.experimental.iterators as iterators
+
+    def add_op(a, b):
+        return a + b
+
+    values = [8, 6, 7, 5, 3, 0, 9]
+    d_input = cp.array(values, dtype=np.int32)
+    d_output = cp.empty(1, dtype=np.int32)
+
+    # Create the iterator
+    iterator = iterators.CacheModifiedInputIterator(d_input, modifier="stream")
+    h_init = np.array([0], dtype=np.int32)
+    d_output = cp.empty(1, dtype=np.int32)
+
+    # Instantiate reduction, determine storage requirements, and allocate storage
+    reduce_into = algorithms.reduce_into(iterator, d_output, add_op, h_init)
+    temp_storage_size = reduce_into(None, iterator, d_output, len(values), h_init)
+    d_temp_storage = cp.empty(temp_storage_size, dtype=np.uint8)
+
+    # Run reduction
+    reduce_into(d_temp_storage, iterator, d_output, len(values), h_init)
+
+    expected_output = functools.reduce(lambda a, b: a + b, values)
+    assert (d_output == expected_output).all()
+    # example-end cache-iterator
+
+
+def test_constant_iterator():
+    # example-begin constant-iterator
+    import functools
+
+    import cupy as cp
+    import numpy as np
+
+    import cuda.parallel.experimental.algorithms as algorithms
+    import cuda.parallel.experimental.iterators as iterators
+
+    def add_op(a, b):
+        return a + b
+
+    value = 10
+    num_items = 3
+
+    # Create the iterator
+    constant_it = iterators.ConstantIterator(np.int32(value))
+    h_init = np.array([0], dtype=np.int32)
+    d_output = cp.empty(1, dtype=np.int32)
-    # Create the iterator
-    constant_it = iterators.ConstantIterator(np.int32(value))
-    h_init = np.array([0], dtype=np.int32)
-    d_output = cp.empty(1, dtype=np.int32)
+    constant_it = iterators.ConstantIterator(np.int32(value))  # input sequence
+    h_init = np.array([0], dtype=np.int32)  # initial value for the reduction
+    d_output = cp.empty(1, dtype=np.int32)  # storage for output
-    # Create the iterator
-    constant_it = iterators.ConstantIterator(np.int32(value))
-    h_init = np.array([0], dtype=np.int32)
-    d_output = cp.empty(1, dtype=np.int32)
+    constant_it = iterators.ConstantIterator(np.int32(value))  # input sequence
+    h_init = np.array([0], dtype=np.int32)  # initial value for the reduction
+    d_output = cp.empty(1, dtype=np.int32)  # storage for output
+
+    # Instantiate reduction, determine storage requirements, and allocate storage
+    reduce_into = algorithms.reduce_into(constant_it, d_output, add_op, h_init)
+    temp_storage_size = reduce_into(None, constant_it, d_output, num_items, h_init)
+    d_temp_storage = cp.empty(temp_storage_size, dtype=np.uint8)
+
+    # Run reduction
+    reduce_into(d_temp_storage, constant_it, d_output, num_items, h_init)
+
+    expected_output = functools.reduce(lambda a, b: a + b, [value] * num_items)
+    assert (d_output == expected_output).all()
+    # example-end constant-iterator
+
+
+def test_counting_iterator():
+    # example-begin counting-iterator
+    import functools
+
+    import cupy as cp
+    import numpy as np
+
+    import cuda.parallel.experimental.algorithms as algorithms
+    import cuda.parallel.experimental.iterators as iterators
+
+    def add_op(a, b):
+        return a + b
+
+    first_item = 10
+    num_items = 3
+
+    # Create the iterator
+    first_it = iterators.CountingIterator(np.int32(first_item))
+    h_init = np.array([0], dtype=np.int32)
+    d_output = cp.empty(1, dtype=np.int32)
+
+    # Instantiate reduction, determine storage requirements, and allocate storage
+    reduce_into = algorithms.reduce_into(first_it, d_output, add_op, h_init)
+    temp_storage_size = reduce_into(None, first_it, d_output, num_items, h_init)
+    d_temp_storage = cp.empty(temp_storage_size, dtype=np.uint8)
+
+    # Run reduction
+    reduce_into(d_temp_storage, first_it, d_output, num_items, h_init)
+
+    expected_output = functools.reduce(lambda a, b: a + b, range(first_item, first_item + num_items))
+    assert (d_output == expected_output).all()
+    # example-end counting-iterator
+
+
+def test_transform_iterator():
+    # example-begin transform-iterator
+    import functools
+
+    import cupy as cp
+    import numpy as np
+
+    import cuda.parallel.experimental.algorithms as algorithms
+    import cuda.parallel.experimental.iterators as iterators
+
+    def add_op(a, b):
+        return a + b
+
+    def square_op(a):
+        return a ** 2
+
+    first_item = 10
+    num_items = 3
+
+    # Creating the iterator by composing with a CountingIterator
+    transform_it = iterators.TransformIterator(
+        iterators.CountingIterator(np.int32(first_item)), square_op
+    )
+    h_init = np.array([0], dtype=np.int32)
+    d_output = cp.empty(1, dtype=np.int32)
+
+    # Instantiate reduction, determine storage requirements, and allocate storage
+    reduce_into = algorithms.reduce_into(transform_it, d_output, add_op, h_init)
+    temp_storage_size = reduce_into(None, transform_it, d_output, num_items, h_init)
+    d_temp_storage = cp.empty(temp_storage_size, dtype=np.uint8)
+
+    # Run reduction
+    reduce_into(d_temp_storage, transform_it, d_output, num_items, h_init)
+
+    expected_output = functools.reduce(
+        lambda a, b: a + b, [a ** 2 for a in range(first_item, first_item + num_items)]
+    )
+    assert (d_output == expected_output).all()
+    # example-end transform-iterator