Skip to content

Commit e2380e5

Browse files
oleksandr-pavlykdavebayer
authored andcommitted
Add test_reduce_api.py::test_reduce_struct_type_minmax (NVIDIA#3938)
* Add test_reduce_api.py::test_reduce_struct_type_minmax This adds an example of using struct that holds minimum and maximum values to compute smallest and largest values of an array using a single call cub::DeviceReduce It also demonstrates a use of TransformedIterator to map input array values as v -> MinMax(v,v) to make reduction binary operator commutative, to render data-parallel algorithm applicable. Since this is done in registers, each element of input array is only accessed once. * Refined docstring text * Change reduce-minmax to compute mininum/maximum absolute values of an array * Replace embedding_op to transform_op, add comment
1 parent cdfa1b6 commit e2380e5

File tree

2 files changed

+63
-5
lines changed

2 files changed

+63
-5
lines changed

python/cuda_parallel/cuda/parallel/experimental/struct.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,15 @@ def gpu_struct(this: type) -> Type[GpuStruct]:
3838
3939
Example:
4040
The code snippet below shows how to use `gpu_struct` to define
41-
a `Pixel` type (composed of `r`, `g` and `b` values), and perform
42-
a reduction on an array of `Pixel` objects to identify the one
43-
with the largest `g` component:
41+
a `MinMax` type (composed of `min_val`, `max_val` values), and perform
42+
a reduction on an input array of floating point values to compute its
43+
the smallest and the largest absolute values:
4444
4545
.. literalinclude:: ../../python/cuda_parallel/tests/test_reduce_api.py
4646
:language: python
4747
:dedent:
48-
:start-after: example-begin reduce-struct
49-
:end-before: example-end reduce-struct
48+
:start-after: example-begin reduce-minmax
49+
:end-before: example-end reduce-minmax
5050
"""
5151
# Implementation-wise, @gpu_struct creates and registers a
5252
# corresponding numba type to the given type, so that it can be

python/cuda_parallel/tests/test_reduce_api.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,3 +213,61 @@ def max_g_value(x, y):
213213

214214
np.testing.assert_equal(expected["g"], d_out.get()["g"])
215215
# example-end reduce-struct
216+
217+
218+
def test_reduce_struct_type_minmax():
219+
# example-begin reduce-minmax
220+
import cupy as cp
221+
import numpy as np
222+
223+
import cuda.parallel.experimental.algorithms as algorithms
224+
import cuda.parallel.experimental.iterators as iterators
225+
from cuda.parallel.experimental.struct import gpu_struct
226+
227+
@gpu_struct
228+
class MinMax:
229+
min_val: np.float64
230+
max_val: np.float64
231+
232+
def minmax_op(v1: MinMax, v2: MinMax):
233+
c_min = min(v1.min_val, v2.min_val)
234+
c_max = max(v1.max_val, v2.max_val)
235+
return MinMax(c_min, c_max)
236+
237+
def transform_op(v):
238+
av = abs(v)
239+
return MinMax(av, av)
240+
241+
nelems = 4096
242+
243+
d_in = cp.random.randn(nelems)
244+
# input values must be transformed to MinMax structures
245+
# in-place to map computation to data-parallel reduction
246+
# algorithm that requires commutative binary operation
247+
# with both operands having the same type.
248+
tr_it = iterators.TransformIterator(d_in, transform_op)
249+
250+
d_out = cp.empty(tuple(), dtype=MinMax.dtype)
251+
252+
# initial value set with identity elements of
253+
# minimum and maximum operators
254+
h_init = MinMax(np.inf, -np.inf)
255+
256+
# get algorithm object
257+
cccl_sum = algorithms.reduce_into(tr_it, d_out, minmax_op, h_init)
258+
259+
# allocated needed temporary
260+
tmp_sz = cccl_sum(None, tr_it, d_out, nelems, h_init)
261+
tmp_storage = cp.empty(tmp_sz, dtype=cp.uint8)
262+
263+
# invoke the reduction algorithm
264+
cccl_sum(tmp_storage, tr_it, d_out, nelems, h_init)
265+
266+
# display values computed on the device
267+
actual = d_out.get()
268+
269+
h = np.abs(d_in.get())
270+
expected = np.asarray([(h.min(), h.max())], dtype=MinMax.dtype)
271+
272+
assert actual == expected
273+
# example-end reduce-minmax

0 commit comments

Comments
 (0)