Skip to content

Commit

Permalink
[FEA]: Validate cuda.parallel type matching in build and execution (#…
Browse files Browse the repository at this point in the history
…2429)

* Brute force experiment: Which tests fail after adding an `assert False`?

* Do not include our own string.h file (#2444)

That might conflict with the host standard library

* Add `_dtype_validation()` in python/cuda_parallel/cuda/parallel/experimental/__init__.py and fix bug in python/cuda_parallel/tests/test_reduce_api.py

* Add `test_device_reduce_dtype_mismatch()`.  Capture `dtype`s only in ctor (not entire arrays).

* Undo change in .gitignore

* Move `min_op()` back into `test_device_reduce_success()` to unbreak sphinx documentation. Also fix existing typo.

* Move `test_device_reduce_dtype_mismatch()` from test_reduce_api.py to test_reduce.py

* Add TODO POINTER vs ITERATOR

---------

Co-authored-by: Michael Schellenberger Costa <[email protected]>
  • Loading branch information
rwgk and miscco authored Oct 4, 2024
1 parent 1cfe171 commit e8d57c3
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 5 deletions.
13 changes: 12 additions & 1 deletion python/cuda_parallel/cuda/parallel/experimental/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,16 @@ class _CCCLDeviceReduceBuildResult(ctypes.Structure):
("reduction_kernel", ctypes.c_void_p)]


def _dtype_validation(dt1, dt2):
if dt1 != dt2:
raise TypeError(f"dtype mismatch: __init__={dt1}, __call__={dt2}")


class _Reduce:
def __init__(self, d_in, d_out, op, init):
self._ctor_d_in_dtype = d_in.dtype
self._ctor_d_out_dtype = d_out.dtype
self._ctor_init_dtype = init.dtype
cc_major, cc_minor = cuda.get_current_device().compute_capability
cub_path, thrust_path, libcudacxx_path, cuda_include_path = _get_paths()
bindings = _get_bindings()
Expand All @@ -212,7 +220,10 @@ def __init__(self, d_in, d_out, op, init):
raise ValueError('Error building reduce')

def __call__(self, temp_storage, d_in, d_out, init):
# TODO Assert that types match the ones used in the constructor
# TODO validate POINTER vs ITERATOR when iterator support is added
_dtype_validation(self._ctor_d_in_dtype, d_in.dtype)
_dtype_validation(self._ctor_d_out_dtype, d_out.dtype)
_dtype_validation(self._ctor_init_dtype, init.dtype)
bindings = _get_bindings()
if temp_storage is None:
temp_storage_bytes = ctypes.c_size_t()
Expand Down
17 changes: 17 additions & 0 deletions python/cuda_parallel/tests/test_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,20 @@ def op(a, b):
result = d_output.copy_to_host()[0]
expected = numpy.sum(h_input, initial=h_init[0])
assert result == pytest.approx(expected)


def test_device_reduce_dtype_mismatch():
def min_op(a, b):
return a if a < b else b

dtypes = [numpy.int32, numpy.int64]
h_inits = [numpy.array([], dt) for dt in dtypes]
h_inputs = [numpy.array([], dt) for dt in dtypes]
d_outputs = [cuda.device_array(1, dt) for dt in dtypes]
d_inputs = [cuda.to_device(h_inp) for h_inp in h_inputs]

reduce_into = cudax.reduce_into(d_inputs[0], d_outputs[0], min_op, h_inits[0])

for ix in range(3):
with pytest.raises(TypeError, match=r"^dtype mismatch: __init__=int32, __call__=int64$"):
reduce_into(None, d_inputs[int(ix == 0)], d_outputs[int(ix == 1)], h_inits[int(ix == 2)])
8 changes: 4 additions & 4 deletions python/cuda_parallel/tests/test_reduce_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@

def test_device_reduce():
# example-begin reduce-min
def op(a, b):
def min_op(a, b):
return a if a < b else b

dtype = numpy.int32
h_init = numpy.array([42], dtype)
h_input = numpy.array([8, 6, 7, 5, 3, 0, 9])
h_input = numpy.array([8, 6, 7, 5, 3, 0, 9], dtype)
d_output = cuda.device_array(1, dtype)
d_input = cuda.to_device(h_input)

# Instantiate reduction for the given operator and initial value
reduce_into = cudax.reduce_into(d_output, d_output, op, h_init)
reduce_into = cudax.reduce_into(d_output, d_output, min_op, h_init)

# Deterrmine temporary device storage requirements
# Determine temporary device storage requirements
temp_storage_size = reduce_into(None, d_input, d_output, h_init)

# Allocate temporary storage
Expand Down

0 comments on commit e8d57c3

Please sign in to comment.