22#
33# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
44
5- # example-begin imports
6- import cupy as cp
7- import numpy as np
8-
9- import cuda .parallel .experimental .algorithms as algorithms
10-
11- # example-end imports
12-
135
146def test_device_reduce ():
157 # example-begin reduce-min
8+ import cupy as cp
9+ import numpy as np
10+
11+ import cuda .parallel .experimental .algorithms as algorithms
12+
1613 def min_op (a , b ):
1714 return a if a < b else b
1815
@@ -37,3 +34,147 @@ def min_op(a, b):
3734 expected_output = 0
3835 assert (d_output == expected_output ).all ()
3936 # example-end reduce-min
37+
38+
39+ def test_cache_modified_input_iterator ():
40+ # example-begin cache-iterator
41+ import functools
42+
43+ import cupy as cp
44+ import numpy as np
45+
46+ import cuda .parallel .experimental .algorithms as algorithms
47+ import cuda .parallel .experimental .iterators as iterators
48+
49+ def add_op (a , b ):
50+ return a + b
51+
52+ values = [8 , 6 , 7 , 5 , 3 , 0 , 9 ]
53+ d_input = cp .array (values , dtype = np .int32 )
54+ d_output = cp .empty (1 , dtype = np .int32 )
55+
56+ iterator = iterators .CacheModifiedInputIterator (
57+ d_input , modifier = "stream"
58+ ) # Input sequence
59+ h_init = np .array ([0 ], dtype = np .int32 ) # Initial value for the reduction
60+ d_output = cp .empty (1 , dtype = np .int32 ) # Storage for output
61+
62+ # Instantiate reduction, determine storage requirements, and allocate storage
63+ reduce_into = algorithms .reduce_into (iterator , d_output , add_op , h_init )
64+ temp_storage_size = reduce_into (None , iterator , d_output , len (values ), h_init )
65+ d_temp_storage = cp .empty (temp_storage_size , dtype = np .uint8 )
66+
67+ # Run reduction
68+ reduce_into (d_temp_storage , iterator , d_output , len (values ), h_init )
69+
70+ expected_output = functools .reduce (lambda a , b : a + b , values )
71+ assert (d_output == expected_output ).all ()
72+ # example-end cache-iterator
73+
74+
75+ def test_constant_iterator ():
76+ # example-begin constant-iterator
77+ import functools
78+
79+ import cupy as cp
80+ import numpy as np
81+
82+ import cuda .parallel .experimental .algorithms as algorithms
83+ import cuda .parallel .experimental .iterators as iterators
84+
85+ def add_op (a , b ):
86+ return a + b
87+
88+ value = 10
89+ num_items = 3
90+
91+ constant_it = iterators .ConstantIterator (np .int32 (value )) # Input sequence
92+ h_init = np .array ([0 ], dtype = np .int32 ) # Initial value for the reduction
93+ d_output = cp .empty (1 , dtype = np .int32 ) # Storage for output
94+
95+ # Instantiate reduction, determine storage requirements, and allocate storage
96+ reduce_into = algorithms .reduce_into (constant_it , d_output , add_op , h_init )
97+ temp_storage_size = reduce_into (None , constant_it , d_output , num_items , h_init )
98+ d_temp_storage = cp .empty (temp_storage_size , dtype = np .uint8 )
99+
100+ # Run reduction
101+ reduce_into (d_temp_storage , constant_it , d_output , num_items , h_init )
102+
103+ expected_output = functools .reduce (lambda a , b : a + b , [value ] * num_items )
104+ assert (d_output == expected_output ).all ()
105+ # example-end constant-iterator
106+
107+
108+ def test_counting_iterator ():
109+ # example-begin counting-iterator
110+ import functools
111+
112+ import cupy as cp
113+ import numpy as np
114+
115+ import cuda .parallel .experimental .algorithms as algorithms
116+ import cuda .parallel .experimental .iterators as iterators
117+
118+ def add_op (a , b ):
119+ return a + b
120+
121+ first_item = 10
122+ num_items = 3
123+
124+ first_it = iterators .CountingIterator (np .int32 (first_item )) # Input sequence
125+ h_init = np .array ([0 ], dtype = np .int32 ) # Initial value for the reduction
126+ d_output = cp .empty (1 , dtype = np .int32 ) # Storage for output
127+
128+ # Instantiate reduction, determine storage requirements, and allocate storage
129+ reduce_into = algorithms .reduce_into (first_it , d_output , add_op , h_init )
130+ temp_storage_size = reduce_into (None , first_it , d_output , num_items , h_init )
131+ d_temp_storage = cp .empty (temp_storage_size , dtype = np .uint8 )
132+
133+ # Run reduction
134+ reduce_into (d_temp_storage , first_it , d_output , num_items , h_init )
135+
136+ expected_output = functools .reduce (
137+ lambda a , b : a + b , range (first_item , first_item + num_items )
138+ )
139+ assert (d_output == expected_output ).all ()
140+ # example-end counting-iterator
141+
142+
143+ def test_transform_iterator ():
144+ # example-begin transform-iterator
145+ import functools
146+
147+ import cupy as cp
148+ import numpy as np
149+
150+ import cuda .parallel .experimental .algorithms as algorithms
151+ import cuda .parallel .experimental .iterators as iterators
152+
153+ def add_op (a , b ):
154+ return a + b
155+
156+ def square_op (a ):
157+ return a ** 2
158+
159+ first_item = 10
160+ num_items = 3
161+
162+ transform_it = iterators .TransformIterator (
163+ iterators .CountingIterator (np .int32 (first_item )), square_op
164+ ) # Input sequence
165+ h_init = np .array ([0 ], dtype = np .int32 ) # Initial value for the reduction
166+ d_output = cp .empty (1 , dtype = np .int32 ) # Storage for output
167+
168+ # Instantiate reduction, determine storage requirements, and allocate storage
169+ reduce_into = algorithms .reduce_into (transform_it , d_output , add_op , h_init )
170+ temp_storage_size = reduce_into (None , transform_it , d_output , num_items , h_init )
171+ d_temp_storage = cp .empty (temp_storage_size , dtype = np .uint8 )
172+
173+ # Run reduction
174+ reduce_into (d_temp_storage , transform_it , d_output , num_items , h_init )
175+
176+ expected_output = functools .reduce (
177+ lambda a , b : a + b , [a ** 2 for a in range (first_item , first_item + num_items )]
178+ )
179+ assert (d_output == expected_output ).all ()
180+ # example-end transform-iterator
0 commit comments