From 8e231d2a9a9b17d25848bf85d4b7e222e142953f Mon Sep 17 00:00:00 2001 From: Almar Klein Date: Sat, 14 Oct 2023 23:53:03 +0200 Subject: [PATCH 01/13] I tried many ways, thought I found one, but did not. --- tests/test_buffer_mapping.py | 234 +++++++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 tests/test_buffer_mapping.py diff --git a/tests/test_buffer_mapping.py b/tests/test_buffer_mapping.py new file mode 100644 index 00000000..2bc2ac24 --- /dev/null +++ b/tests/test_buffer_mapping.py @@ -0,0 +1,234 @@ +"""This part is somewhat tricky so we dedicate a test module on it. + +The tricky part for implementing the buffer data mapping, as specified by +the WebGPU API, is that its not trivial (at all) to provide an array-like object +for which numpy views can be created, and which we can invalidate in such +a way that the views become un-usable. + +We spend the first part of this module demonstrating what techniques +do NOT work. In the second part we demonstrate the working of the used +technique. In the third part we test the buffer mapping API itself. + +""" + +import ctypes + +import numpy as np + +from testutils import run_tests +import pytest + + +ALLOW_SEGFAULTS = False + + +# %%%%%%%%%% 1. What does NOT work + + +def test_fail_array_interface_with_ctypes_array(): + # Create an array-like object using __array_interface__, + # with a ctypes array to hold the data, via a real address pointer. + # We cannot invalidate the base array-like object. + + class ArrayLike: + def __init__(self): + size = 100 + offset = 0 + itemsize = 1 + self._data = (ctypes.c_uint8 * size)() + + readonly = False + typestr = " Date: Mon, 16 Oct 2023 12:11:54 +0200 Subject: [PATCH 02/13] Remove test file again --- tests/test_buffer_mapping.py | 234 ----------------------------------- 1 file changed, 234 deletions(-) delete mode 100644 tests/test_buffer_mapping.py diff --git a/tests/test_buffer_mapping.py b/tests/test_buffer_mapping.py deleted file mode 100644 index 2bc2ac24..00000000 --- a/tests/test_buffer_mapping.py +++ /dev/null @@ -1,234 +0,0 @@ -"""This part is somewhat tricky so we dedicate a test module on it. - -The tricky part for implementing the buffer data mapping, as specified by -the WebGPU API, is that its not trivial (at all) to provide an array-like object -for which numpy views can be created, and which we can invalidate in such -a way that the views become un-usable. - -We spend the first part of this module demonstrating what techniques -do NOT work. In the second part we demonstrate the working of the used -technique. In the third part we test the buffer mapping API itself. - -""" - -import ctypes - -import numpy as np - -from testutils import run_tests -import pytest - - -ALLOW_SEGFAULTS = False - - -# %%%%%%%%%% 1. What does NOT work - - -def test_fail_array_interface_with_ctypes_array(): - # Create an array-like object using __array_interface__, - # with a ctypes array to hold the data, via a real address pointer. - # We cannot invalidate the base array-like object. - - class ArrayLike: - def __init__(self): - size = 100 - offset = 0 - itemsize = 1 - self._data = (ctypes.c_uint8 * size)() - - readonly = False - typestr = " Date: Mon, 16 Oct 2023 14:47:54 +0200 Subject: [PATCH 03/13] New buffer mapping API --- codegen/apipatcher.py | 4 + wgpu/backends/rs.py | 157 ++++++++++++++++--------------- wgpu/base.py | 143 ++++++++++++++++++++-------- wgpu/resources/codegen_report.md | 10 +- 4 files changed, 196 insertions(+), 118 deletions(-) diff --git a/codegen/apipatcher.py b/codegen/apipatcher.py index b376ee27..115b8462 100644 --- a/codegen/apipatcher.py +++ b/codegen/apipatcher.py @@ -303,6 +303,8 @@ def get_method_def(self, classname, methodname): name_idl = self.name2idl(methodname) if methodname.endswith("_async") and name_idl not in functions: name_idl = self.name2idl(methodname.replace("_async", "")) + elif name_idl not in functions and name_idl + "Async" in functions: + name_idl += "Async" idl_line = functions[name_idl] # Construct preamble @@ -369,6 +371,8 @@ def method_is_known(self, classname, methodname): name_idl = self.name2idl(methodname) if "_async" in methodname and name_idl not in functions: name_idl = self.name2idl(methodname.replace("_async", "")) + elif name_idl not in functions and name_idl + "Async" in functions: + name_idl += "Async" return name_idl if name_idl in functions else None def get_class_names(self): diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py index 8ad0e39a..3784b241 100644 --- a/wgpu/backends/rs.py +++ b/wgpu/backends/rs.py @@ -810,31 +810,7 @@ def create_buffer( usage: "flags.BufferUsage", mapped_at_creation: bool = False, ): - size = int(size) - if mapped_at_creation: - raise ValueError( - "In wgpu-py, mapped_at_creation must be False. Use create_buffer_with_data() instead." - ) - return self._create_buffer(label, size, usage, False) - - def create_buffer_with_data(self, *, label="", data, usage: "flags.BufferUsage"): - # Get a memoryview of the data - m, src_address = get_memoryview_and_address(data) - m = m.cast("B", shape=(m.nbytes,)) - size = m.nbytes - - # Create the buffer (usage does not have to be MAP_READ or MAP_WRITE) - buffer = self._create_buffer(label, size, usage, True) - - # Copy the data to the mapped memory - # H: void * f(WGPUBuffer buffer, size_t offset, size_t size) - dst_ptr = libf.wgpuBufferGetMappedRange(buffer._internal, 0, size) - dst_address = int(ffi.cast("intptr_t", dst_ptr)) - dst_m = get_memoryview_from_address(dst_address, size) - dst_m[:] = m # nicer than ctypes.memmove(dst_address, src_address, m.nbytes) - - buffer._unmap() - return buffer + return self._create_buffer(label, int(size), usage, bool(mapped_at_creation)) def _create_buffer(self, label, size, usage, mapped_at_creation): # Create a buffer object @@ -852,7 +828,11 @@ def _create_buffer(self, label, size, usage, mapped_at_creation): # Note that there is wgpuBufferGetSize and wgpuBufferGetUsage, # but we already know these, so they are kindof useless? # Return wrapped buffer - return GPUBuffer(label, id, self, size, usage) + b = GPUBuffer(label, id, self, size, usage) + + if mapped_at_creation: + b._map_state = enums.BufferMapState.mapped + return b def create_texture( self, @@ -1519,12 +1499,30 @@ def _destroy(self): class GPUBuffer(base.GPUBuffer, GPUObjectBase): - def map_read(self): - size = self.size - - # Prepare - status = 99 - data = memoryview((ctypes.c_uint8 * size)()).cast("B") + def _check_range(self, offset, size): + offset = 0 if offset is None else int(offset) + size = (self.size - offset) if size is None else int(size) + if offset < 0: + raise ValueError("Mapped offset must not be smaller than zero.") + if size < 1: + raise ValueError("Mapped size must be larger than zero.") + if offset + size >= self.size: + raise ValueError("Mapped range must not extend beyond total buffer size.") + + def map(self, mode, offset=0, size=None): + # Check mode + mode = mode.upper() if isinstance(mode, str) else mode + if mode in (enums.MapMode.READ, "READ"): + map_mode = lib.WGPUMapMode_Read + elif mode in (enums.MapMode.WRITE, "WRITE"): + map_mode = lib.WGPUMapMode_Write + + # Check offset and size + offset, size = self._check_range(offset, size) + + # Can we even map? + if self._map_state != enums.BufferMapState.unmapped: + raise RuntimeError("Can only map a buffer if its currently unmapped.") @ffi.callback("void(WGPUBufferMapAsyncStatus, void*)") def callback(status_, user_data_p): @@ -1535,76 +1533,87 @@ def callback(status_, user_data_p): self._map_state = enums.BufferMapState.pending # H: void f(WGPUBuffer buffer, WGPUMapModeFlags mode, size_t offset, size_t size, WGPUBufferMapCallback callback, void * userdata) libf.wgpuBufferMapAsync( - self._internal, lib.WGPUMapMode_Read, 0, size, callback, ffi.NULL + self._internal, map_mode, offset, size, callback, ffi.NULL ) # Let it do some cycles - # H: void f(WGPUInstance instance) - # libf.wgpuInstanceProcessEvents(get_wgpu_instance()) # H: bool f(WGPUDevice device, bool wait, WGPUWrappedSubmissionIndex const * wrappedSubmissionIndex) libf.wgpuDevicePoll(self._device._internal, True, ffi.NULL) if status != 0: # no-cover - raise RuntimeError(f"Could not read buffer data ({status}).") + raise RuntimeError(f"Could not map buffer ({status}).") self._map_state = enums.BufferMapState.mapped + self._mapped_range = offset, offset + size - # Copy data + async def map_async(self, mode, offset=0, size=None): + return self.map() # for now + + def unmap(self): + # Can we even unmap? + if self._map_state != enums.BufferMapState.mapped: + raise RuntimeError("Can only unmap a buffer if its currently mapped.") + # H: void f(WGPUBuffer buffer) + libf.wgpuBufferUnmap(self._internal) + self._map_state = enums.BufferMapState.unmapped + self._mapped_range = None + + def read_mapped(self, offset=0, size=None): + # Can we even read? + if self._map_state != enums.BufferMapState.mapped: + raise RuntimeError("Can only read from a buffer if its mapped.") + + # Check offset and size + offset, size = self._check_range(offset, size) + if offset < self._mapped_range[0] or (offset + size) > self._mapped_range[1]: + raise ValueError( + "The range for buffer reading is not contained in the currently mapped range." + ) + + # Get mapped memoryview. # H: void * f(WGPUBuffer buffer, size_t offset, size_t size) src_ptr = libf.wgpuBufferGetMappedRange(self._internal, 0, size) src_address = int(ffi.cast("intptr_t", src_ptr)) src_m = get_memoryview_from_address(src_address, size) + + # Copy the data. The memoryview created above becomes invalid when the buffer + # is unmapped, so we don't want to pass that memory to the user. + data = memoryview((ctypes.c_uint8 * size)()).cast("B") data[:] = src_m - self._unmap() return data - def map_write(self, data): - size = self.size + def write_mapped(self, data, offset=0, size=None): + # Can we even write? + if self._map_state != enums.BufferMapState.mapped: + raise RuntimeError("Can only write to a buffer if its mapped.") + # Cast data to a memoryview. This also works for e.g. numpy arrays, + # and the resulting memoryview will be a view on the data. data = memoryview(data).cast("B") - if data.nbytes != self.size: # pragma: no cover + + # Check offset and size + if size is None: + size = data.nbytes + offset, size = self._check_range(offset, size) + if offset < self._mapped_range[0] or (offset + size) > self._mapped_range[1]: raise ValueError( - "Data passed to map_write() does not have the correct size." + "The range for buffer writing is not contained in the currently mapped range." ) - # Prepare - status = 99 - - @ffi.callback("void(WGPUBufferMapAsyncStatus, void*)") - def callback(status_, user_data_p): - nonlocal status - status = status_ - - # Map it - self._map_state = enums.BufferMapState.pending - # H: void f(WGPUBuffer buffer, WGPUMapModeFlags mode, size_t offset, size_t size, WGPUBufferMapCallback callback, void * userdata) - libf.wgpuBufferMapAsync( - self._internal, lib.WGPUMapMode_Write, 0, size, callback, ffi.NULL - ) - - # Let it do some cycles - # H: void f(WGPUInstance instance) - # libf.wgpuInstanceProcessEvents(get_wgpu_instance()) - # H: bool f(WGPUDevice device, bool wait, WGPUWrappedSubmissionIndex const * wrappedSubmissionIndex) - libf.wgpuDevicePoll(self._device._internal, True, ffi.NULL) - - if status != 0: # no-cover - raise RuntimeError(f"Could not read buffer data ({status}).") - self._map_state = enums.BufferMapState.mapped + # Check data size and given size. If the latter was given, it should match! + if data.nbytes != size: # pragma: no cover + raise ValueError( + "Data passed to GPUBuffer.write_mapped() does not match the given size." + ) - # Copy data + # Get mapped memoryview # H: void * f(WGPUBuffer buffer, size_t offset, size_t size) src_ptr = libf.wgpuBufferGetMappedRange(self._internal, 0, size) src_address = int(ffi.cast("intptr_t", src_ptr)) src_m = get_memoryview_from_address(src_address, size) - src_m[:] = data - self._unmap() - - def _unmap(self): - # H: void f(WGPUBuffer buffer) - libf.wgpuBufferUnmap(self._internal) - self._map_state = enums.BufferMapState.unmapped + # Copy data + src_m[:] = data def destroy(self): self._destroy() # no-cover diff --git a/wgpu/base.py b/wgpu/base.py index 7896a22b..56e95e43 100644 --- a/wgpu/base.py +++ b/wgpu/base.py @@ -445,23 +445,36 @@ def create_buffer( label (str): A human readable label. Optional. size (int): The size of the buffer in bytes. usage (flags.BufferUsage): The ways in which this buffer will be used. - mapped_at_creation (bool): Must be False, use create_buffer_with_data() instead. + mapped_at_creation (bool): Whether the buffer is initially mapped. """ raise NotImplementedError() - @apidiff.add("replaces WebGPU's mapping API") + @apidiff.add("Convenience function") def create_buffer_with_data(self, *, label="", data, usage: "flags.BufferUsage"): """Create a `GPUBuffer` object initialized with the given data. + This is a convenience function that creates a mapped buffer, + writes the given data to it, and then unmaps the buffer. + Arguments: label (str): A human readable label. Optional. data: Any object supporting the Python buffer protocol (this includes bytes, bytearray, ctypes arrays, numpy arrays, etc.). usage (flags.BufferUsage): The ways in which this buffer will be used. - Also see `GPUQueue.write_buffer()` and `GPUQueue.read_buffer()`. + Also see `GPUBuffer.write_mapped()` and `GPUQueue.write_buffer()`. """ - raise NotImplementedError() + # This function was originally created to support the workflow + # of initializing a buffer with data when we did not support + # buffer mapping. Now that we do have buffer mapping it is not + # strictly necessary, but it's still quite useful and feels + # more Pythonic than having to write the boilerplate code below. + buf = self.create_buffer( + label=label, size=size, usage=usage, mapped_at_creation=True + ) + buf.write_mapped(data) + buf.unmap() + return buf # IDL: GPUTexture createTexture(GPUTextureDescriptor descriptor); def create_texture( @@ -904,11 +917,9 @@ class GPUBuffer(GPUObjectBase): the buffer, subject to alignment restrictions depending on the operation. - Create a buffer using `GPUDevice.create_buffer()`, - `GPUDevice.create_buffer_mapped()` or `GPUDevice.create_buffer_mapped_async()`. + Create a buffer using `GPUDevice.create_buffer()`. - One can sync data in a buffer by mapping it (or by creating a mapped - buffer) and then setting/getting the values in the mapped memoryview. + One can sync data in a buffer by mapping it and then getting and setting data. Alternatively, one can tell the GPU (via the command encoder) to copy data between buffers and textures. """ @@ -948,36 +959,78 @@ def map_state(self): # This means that the mapped memory is reclaimed (i.e. invalid) # when unmap is called, and that whatever object we expose the # memory with to the user, must be set to a state where it can no - # longer be used. I currently can't think of a good way to do this. + # longer be used. There does not seem to be a good way to do this. # - # So instead, we can use mapping internally to allow reading and - # writing but not expose it via the public API. The only - # disadvantage (AFAIK) is that there could be use-cases where a - # memory copy could be avoided when using mapping. + # In our Python API we do make use of the same map/unmap mechanism, + # but reading and writing data goes via method calls instead of via + # an array-like object that exposes the shared memory. - # IDL: undefined destroy(); - def destroy(self): - """An application that no longer requires a buffer can choose - to destroy it. Note that this is automatically called when the - Python object is cleaned up by the garbadge collector. + # IDL: Promise mapAsync(GPUMapModeFlags mode, optional GPUSize64 offset = 0, optional GPUSize64 size); + def map(self, mode, offset=0, size=None): + """Maps the given range of the GPUBuffer. + + When this call returns, the buffer content is ready to be + accessed with ``read_mapped`` or ``write_mapped``. Don't forget + to ``unmap()`` when done. + + Arguments: + mode (enum): The mapping mode, either mapmode.READ or mapmode.WRITE. + offset (str): the buffer offset in bytes. Default 0. + size (int): the size to read. Default until the end. """ raise NotImplementedError() - @apidiff.add("Alternative to mapping API") - def map_read(self): - """Map the buffer and read the data from it, then unmap. - Return a memoryview object. Requires the buffer usage to include MAP_READ. + # IDL: Promise mapAsync(GPUMapModeFlags mode, optional GPUSize64 offset = 0, optional GPUSize64 size); + async def map_async(self, mode, offset=0, size=None): + """Alternative version of map().""" + raise NotImplementedError() + + # IDL: undefined unmap(); + def unmap(self): + """Unmaps the buffer. - See `GPUQueue.read_buffer()` for a simpler alternative. + Unmaps the mapped range of the GPUBuffer and makes it’s contents + available for use by the GPU again. """ raise NotImplementedError() - @apidiff.add("Alternative to mapping API") - def map_write(self, data): - """Map the buffer and write the data to it, then unmap. - Return a memoryview object. Requires the buffer usage to include MAP_WRITE. + @apidiff.add("Replacement for get_mapped_range") + def read_mapped(self, offset=0, size=None): + """Read mapped buffer data. + + This method must only be called when the buffer is in a mapped state. + This is the Python alternative to WebGPU's ``getMappedRange``. + Returns a memoryview that is a copy of the mapped data (it won't + become invalid when the buffer is ummapped). - See `WGPUQueue.write_buffer()` for a simpler alternative. + Arguments: + offset (str): the buffer offset in bytes. Default 0. Must be at least + as large as the offset specified in ``map()``. + size (int): the size to read. Default until the end. The resuling range + must fit into the range specified in ``map()``. + + Also see `GPUBuffer.write_mapped, `GPUQueue.read_buffer()` and `GPUQueue.write_buffer()`. + """ + raise NotImplementedError() + + @apidiff.add("Replacement for get_mapped_range") + def write_mapped(self, data, offset=0, size=None): + """Read mapped buffer data. + + This method must only be called when the buffer is in a mapped state. + This is the Python alternative to WebGPU's ``getMappedRange``. + Since the data can also be a view into a larger array, this method + allows updating the buffer with minimal data copying. + + Arguments: + data (buffer-like): The data to write to the buffer, in the form of + e.g. a bytes object, memoryview, or numpy array. + offset (str): the buffer offset in bytes. Default 0. Must be at least + as large as the offset specified in ``map()``. + size (int): the size to read. Default until the end. The resuling range + must fit into the range specified in ``map()``. + + Also see `GPUBuffer.read_mapped, `GPUQueue.read_buffer()` and `GPUQueue.write_buffer()`. """ raise NotImplementedError() @@ -986,14 +1039,26 @@ def map_write(self, data): def get_mapped_range(self, offset=0, size=None): raise NotImplementedError("The Python API differs from WebGPU here") - # IDL: undefined unmap(); - @apidiff.hide - def unmap(self): - raise NotImplementedError("The Python API differs from WebGPU here") + @apidiff.add("Deprecated but still here to raise a warning") + def map_read(self, offset=None, size=None, iter=None): + """Deprecated.""" + raise DeprecationWarning( + "map_read() is deprecated, use map() and read_mapped() instead." + ) - # IDL: Promise mapAsync(GPUMapModeFlags mode, optional GPUSize64 offset = 0, optional GPUSize64 size); - @apidiff.hide - async def map_async(self, mode, offset=0, size=None): + @apidiff.add("Deprecated but still here to raise a warning") + def map_write(self, data): + """Deprecated.""" + raise DeprecationWarning( + "map_read() is deprecated, use map() and write_mapped() instead." + ) + + # IDL: undefined destroy(); + def destroy(self): + """An application that no longer requires a buffer can choose + to destroy it. Note that this is automatically called when the + Python object is cleaned up by the garbadge collector. + """ raise NotImplementedError() @@ -1117,7 +1182,7 @@ def __init__(self, label, internal, device, texture, size): self._texture = texture self._size = size - @apidiff.add("Need to know size e.g. for texture view provided by canvas.") + @apidiff.add("Need to know size e.g. for texture view provided by canvas") @property def size(self): """The texture size (as a 3-tuple).""" @@ -1689,12 +1754,12 @@ def write_buffer(self, buffer, buffer_offset, data, data_offset=0, size=None): This maps the data to a temporary buffer and then copies that buffer to the given buffer. The given buffer's usage must include COPY_DST. - Also see `GPUDevice.create_buffer_with_data()` and `GPUBuffer.map_write()`. + Also see `GPUBuffer.map()`. """ raise NotImplementedError() - @apidiff.add("replaces WebGPU's mapping API") + @apidiff.add("For symmetry with queue.write_buffer") def read_buffer(self, buffer, buffer_offset=0, size=None): """Takes the data contents of the buffer and return them as a memoryview. @@ -1707,7 +1772,7 @@ def read_buffer(self, buffer, buffer_offset=0, size=None): and then maps that buffer to read the data. The given buffer's usage must include COPY_SRC. - Also see `GPUBuffer.map_read()`. + Also see `GPUBuffer.map()`. """ raise NotImplementedError() diff --git a/wgpu/resources/codegen_report.md b/wgpu/resources/codegen_report.md index 01516948..c8f76fbe 100644 --- a/wgpu/resources/codegen_report.md +++ b/wgpu/resources/codegen_report.md @@ -12,14 +12,14 @@ * Diffs for GPU: add print_report, change get_preferred_canvas_format, change request_adapter, change request_adapter_async * Diffs for GPUCanvasContext: add get_preferred_format, add present * Diffs for GPUDevice: add adapter, add create_buffer_with_data, hide import_external_texture, hide pop_error_scope, hide push_error_scope -* Diffs for GPUBuffer: add map_read, add map_write, hide get_mapped_range, hide map_async, hide unmap +* Diffs for GPUBuffer: add map_read, add map_write, add read_mapped, add write_mapped, hide get_mapped_range * Diffs for GPUTexture: add size * Diffs for GPUTextureView: add size, add texture * Diffs for GPUQueue: add read_buffer, add read_texture, hide copy_external_image_to_texture -* Validated 39 classes, 111 methods, 44 properties +* Validated 39 classes, 114 methods, 44 properties ### Patching API for backends/rs.py * Diffs for GPUAdapter: add request_device_tracing -* Validated 39 classes, 99 methods, 0 properties +* Validated 39 classes, 101 methods, 0 properties ## Validating rs.py * Enum field TextureFormat.rgb10a2uint missing in wgpu.h * Enum PipelineErrorReason missing in wgpu.h @@ -28,6 +28,6 @@ * Enum CanvasAlphaMode missing in wgpu.h * Enum field DeviceLostReason.unknown missing in wgpu.h * Wrote 232 enum mappings and 47 struct-field mappings to rs_mappings.py -* Validated 91 C function calls -* Not using 115 C functions +* Validated 86 C function calls +* Not using 116 C functions * Validated 72 C structs From 42c765d6a829a6d28e9c652d0f6b90d84bf241b0 Mon Sep 17 00:00:00 2001 From: Almar Klein Date: Mon, 16 Oct 2023 17:32:46 +0200 Subject: [PATCH 04/13] Allow read_mapped to return mapped data without making a copy --- wgpu/backends/rs.py | 38 +++++++++++++++++++++++++++++--------- wgpu/base.py | 16 +++++++++++++++- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py index 3784b241..39d42309 100644 --- a/wgpu/backends/rs.py +++ b/wgpu/backends/rs.py @@ -1499,6 +1499,14 @@ def _destroy(self): class GPUBuffer(base.GPUBuffer, GPUObjectBase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self._mapped_range = 0, 0 + self._mapped_memoryviews = [] + if self._map_state == enums.BufferMapState.mapped: + self._mapped_range = 0, self.size + def _check_range(self, offset, size): offset = 0 if offset is None else int(offset) size = (self.size - offset) if size is None else int(size) @@ -1544,6 +1552,7 @@ def callback(status_, user_data_p): raise RuntimeError(f"Could not map buffer ({status}).") self._map_state = enums.BufferMapState.mapped self._mapped_range = offset, offset + size + self._mapped_memoryviews = [] async def map_async(self, mode, offset=0, size=None): return self.map() # for now @@ -1555,9 +1564,15 @@ def unmap(self): # H: void f(WGPUBuffer buffer) libf.wgpuBufferUnmap(self._internal) self._map_state = enums.BufferMapState.unmapped - self._mapped_range = None - - def read_mapped(self, offset=0, size=None): + self._mapped_range = 0, 0 + mapped_memoryviews = self._mapped_memoryviews + self._mapped_memoryviews = [] + # Release the mapped memoryview objects. These objects + # themselves become unusable, but any views on them do not. + for m in mapped_memoryviews: + m.release() + + def read_mapped(self, offset=0, size=None, *, copy=True): # Can we even read? if self._map_state != enums.BufferMapState.mapped: raise RuntimeError("Can only read from a buffer if its mapped.") @@ -1575,12 +1590,17 @@ def read_mapped(self, offset=0, size=None): src_address = int(ffi.cast("intptr_t", src_ptr)) src_m = get_memoryview_from_address(src_address, size) - # Copy the data. The memoryview created above becomes invalid when the buffer - # is unmapped, so we don't want to pass that memory to the user. - data = memoryview((ctypes.c_uint8 * size)()).cast("B") - data[:] = src_m - - return data + if copy: + # Copy the data. The memoryview created above becomes invalid when the buffer + # is unmapped, so we don't want to pass that memory to the user. + data = memoryview((ctypes.c_uint8 * size)()).cast("B") + data[:] = src_m + return data + else: + # Return view on the actually mapped data + self._mapped_memoryviews.append(src_m) + return src_m + # todo: cast to B? def write_mapped(self, data, offset=0, size=None): # Can we even write? diff --git a/wgpu/base.py b/wgpu/base.py index 56e95e43..0bf636fd 100644 --- a/wgpu/base.py +++ b/wgpu/base.py @@ -469,6 +469,12 @@ def create_buffer_with_data(self, *, label="", data, usage: "flags.BufferUsage") # buffer mapping. Now that we do have buffer mapping it is not # strictly necessary, but it's still quite useful and feels # more Pythonic than having to write the boilerplate code below. + + # Create a view of known type + data = memoryview(data).cast("B") + size = data.nbytes + + # Create the buffer and write data buf = self.create_buffer( label=label, size=size, usage=usage, mapped_at_creation=True ) @@ -995,7 +1001,7 @@ def unmap(self): raise NotImplementedError() @apidiff.add("Replacement for get_mapped_range") - def read_mapped(self, offset=0, size=None): + def read_mapped(self, offset=0, size=None, *, copy=True): """Read mapped buffer data. This method must only be called when the buffer is in a mapped state. @@ -1008,6 +1014,14 @@ def read_mapped(self, offset=0, size=None): as large as the offset specified in ``map()``. size (int): the size to read. Default until the end. The resuling range must fit into the range specified in ``map()``. + copy (boool): whether a copy of the data is given. Default True. + If False, the returned memoryview represents the mapped data + directly, and is released when the buffer is unmapped. + WARNING: views of the returned data (e.g. memoryview objects or + numpy arrays) can still be used after the base memory is released, + which can result in corrupted data and segfaults. Therefore, when + setting copy to False, make *very* sure the memory is not accessed + after the buffer is unmapped. Also see `GPUBuffer.write_mapped, `GPUQueue.read_buffer()` and `GPUQueue.write_buffer()`. """ From 9f1cde894a4eb6a3584bed3e3092289f5507e4a1 Mon Sep 17 00:00:00 2001 From: Almar Klein Date: Tue, 17 Oct 2023 00:04:21 +0200 Subject: [PATCH 05/13] also release memviews on buffer destroy --- wgpu/backends/rs.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py index 39d42309..676065d8 100644 --- a/wgpu/backends/rs.py +++ b/wgpu/backends/rs.py @@ -1565,12 +1565,17 @@ def unmap(self): libf.wgpuBufferUnmap(self._internal) self._map_state = enums.BufferMapState.unmapped self._mapped_range = 0, 0 - mapped_memoryviews = self._mapped_memoryviews - self._mapped_memoryviews = [] + self._release_memoryviews() + + def _release_memoryviews(self): # Release the mapped memoryview objects. These objects # themselves become unusable, but any views on them do not. - for m in mapped_memoryviews: - m.release() + for m in self._mapped_memoryviews: + try: + m.release() + except Exception: + pass + self._mapped_memoryviews = [] def read_mapped(self, offset=0, size=None, *, copy=True): # Can we even read? @@ -1639,6 +1644,7 @@ def destroy(self): self._destroy() # no-cover def _destroy(self): + self._release_memoryviews() if self._internal is not None and lib is not None: self._internal, internal = None, self._internal # H: void f(WGPUBuffer buffer) From f9da63923e46587e41318d37d633d54a053c696b Mon Sep 17 00:00:00 2001 From: Almar Klein Date: Tue, 17 Oct 2023 00:24:19 +0200 Subject: [PATCH 06/13] Fix current tests --- tests/test_rs_basics.py | 15 ++++++--------- wgpu/backends/rs.py | 27 ++++++++++++++++----------- wgpu/base.py | 4 ++-- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/tests/test_rs_basics.py b/tests/test_rs_basics.py index e60a04fe..a903a5bf 100644 --- a/tests/test_rs_basics.py +++ b/tests/test_rs_basics.py @@ -216,12 +216,6 @@ def test_buffer_init3(): device = wgpu.utils.get_default_device() data1 = b"abcdefghijkl" - # First fail - with raises(ValueError): - device.create_buffer( - mapped_at_creation=True, size=len(data1), usage=wgpu.BufferUsage.COPY_DST - ) - # Create buffer buf = device.create_buffer( size=len(data1), usage=wgpu.BufferUsage.COPY_DST | wgpu.BufferUsage.COPY_SRC @@ -556,7 +550,9 @@ def test_buffer_map_read_and_write(): # Upload data1 = b"abcdefghijkl" - buf1.map_write(data1) + buf1.map("write") + buf1.write_mapped(data1) + buf1.unmap() # Copy command_encoder = device.create_command_encoder() @@ -564,8 +560,9 @@ def test_buffer_map_read_and_write(): device.queue.submit([command_encoder.finish()]) # Download - data2 = buf2.map_read() - + buf2.map("read") + data2 = buf2.read_mapped() + buf2.unmap() assert data1 == data2 diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py index 676065d8..d3f1a54b 100644 --- a/wgpu/backends/rs.py +++ b/wgpu/backends/rs.py @@ -823,16 +823,17 @@ def _create_buffer(self, label, size, usage, mapped_at_creation): mappedAtCreation=mapped_at_creation, # not used: nextInChain ) + map_state = ( + enums.BufferMapState.mapped + if mapped_at_creation + else enums.BufferMapState.unmapped + ) # H: WGPUBuffer f(WGPUDevice device, WGPUBufferDescriptor const * descriptor) id = libf.wgpuDeviceCreateBuffer(self._internal, struct) # Note that there is wgpuBufferGetSize and wgpuBufferGetUsage, # but we already know these, so they are kindof useless? # Return wrapped buffer - b = GPUBuffer(label, id, self, size, usage) - - if mapped_at_creation: - b._map_state = enums.BufferMapState.mapped - return b + return GPUBuffer(label, id, self, size, usage, map_state) def create_texture( self, @@ -1514,15 +1515,16 @@ def _check_range(self, offset, size): raise ValueError("Mapped offset must not be smaller than zero.") if size < 1: raise ValueError("Mapped size must be larger than zero.") - if offset + size >= self.size: + if offset + size > self.size: raise ValueError("Mapped range must not extend beyond total buffer size.") + return offset, size def map(self, mode, offset=0, size=None): # Check mode mode = mode.upper() if isinstance(mode, str) else mode - if mode in (enums.MapMode.READ, "READ"): + if mode in (flags.MapMode.READ, "READ"): map_mode = lib.WGPUMapMode_Read - elif mode in (enums.MapMode.WRITE, "WRITE"): + elif mode in (flags.MapMode.WRITE, "WRITE"): map_mode = lib.WGPUMapMode_Write # Check offset and size @@ -1532,6 +1534,8 @@ def map(self, mode, offset=0, size=None): if self._map_state != enums.BufferMapState.unmapped: raise RuntimeError("Can only map a buffer if its currently unmapped.") + status = 999 + @ffi.callback("void(WGPUBufferMapAsyncStatus, void*)") def callback(status_, user_data_p): nonlocal status @@ -1605,7 +1609,6 @@ def read_mapped(self, offset=0, size=None, *, copy=True): # Return view on the actually mapped data self._mapped_memoryviews.append(src_m) return src_m - # todo: cast to B? def write_mapped(self, data, offset=0, size=None): # Can we even write? @@ -2489,7 +2492,8 @@ def read_buffer(self, buffer, buffer_offset=0, size=None): self.submit([command_buffer]) # Download from mappable buffer - data = tmp_buffer.map_read() + tmp_buffer.map(flags.MapMode.READ) + data = tmp_buffer.read_mapped() tmp_buffer.destroy() return data @@ -2586,7 +2590,8 @@ def read_texture(self, source, data_layout, size): self.submit([command_buffer]) # Download from mappable buffer - data = tmp_buffer.map_read() + tmp_buffer.map(flags.MapMode.READ) + data = tmp_buffer.read_mapped() tmp_buffer.destroy() # Fix data strides if necessary diff --git a/wgpu/base.py b/wgpu/base.py index 0bf636fd..725cbd8b 100644 --- a/wgpu/base.py +++ b/wgpu/base.py @@ -930,11 +930,11 @@ class GPUBuffer(GPUObjectBase): copy data between buffers and textures. """ - def __init__(self, label, internal, device, size, usage): + def __init__(self, label, internal, device, size, usage, map_state): super().__init__(label, internal, device) self._size = size self._usage = usage - self._map_state = enums.BufferMapState.unmapped + self._map_state = map_state # IDL: readonly attribute GPUSize64Out size; @property From c02d98295280e82e79976316a88dc9312e7a4892 Mon Sep 17 00:00:00 2001 From: Almar Klein Date: Tue, 17 Oct 2023 11:48:20 +0200 Subject: [PATCH 07/13] more buffer tests, fix buffer read issue --- tests/test_rs_basics.py | 128 +++++++++++++++++++++++++------ wgpu/backends/rs.py | 65 ++++++++++------ wgpu/base.py | 12 +-- wgpu/resources/codegen_report.md | 2 +- 4 files changed, 153 insertions(+), 54 deletions(-) diff --git a/tests/test_rs_basics.py b/tests/test_rs_basics.py index a903a5bf..44442d58 100644 --- a/tests/test_rs_basics.py +++ b/tests/test_rs_basics.py @@ -14,6 +14,9 @@ from pytest import mark, raises +is_win = sys.platform.startswith("win") + + def test_get_wgpu_version(): version = wgpu.backends.rs.__version__ commit_sha = wgpu.backends.rs.__commit_sha__ @@ -145,9 +148,7 @@ def test_rs_tracer(): @mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib") -@mark.skipif( - is_ci and sys.platform == "win32", reason="Cannot use SpirV shader on dx12" -) +@mark.skipif(is_ci and is_win, reason="Cannot use SpirV shader on dx12") def test_shader_module_creation_spirv(): device = wgpu.utils.get_default_device() @@ -181,41 +182,78 @@ def test_buffer_init1(): device = wgpu.utils.get_default_device() data1 = b"abcdefghijkl" - # Create buffer + # Create buffer. COPY_SRC is needed to read the buffer via the queue. buf = device.create_buffer_with_data(data=data1, usage=wgpu.BufferUsage.COPY_SRC) # Download from buffer to CPU data2 = device.queue.read_buffer(buf) assert data1 == data2 + # --- also read via mapped data -# @mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib") -# def test_buffer_init2(): -# # Initializing a buffer as mapped, to directly set the data -# -# device = wgpu.utils.get_default_device() -# data1 = b"abcdefghijkl" -# -# # Create buffer -# buf, data2 = device.create_buffer_mapped( -# size=len(data1), usage=wgpu.BufferUsage.MAP_READ -# ) -# data2[:] = data1 -# buf.unmap() -# -# # Download from buffer to CPU -# data3 = buf.map(wgpu.MapMode.READ).tobytes() -# buf.unmap() -# assert data1 == data3 + # Create buffer. MAP_READ is needed to read the buffer via the queue. + buf = device.create_buffer_with_data(data=data1, usage=wgpu.BufferUsage.MAP_READ) + + wgpu.backends.rs.libf.wgpuDevicePoll( + buf._device._internal, True, wgpu.backends.rs.ffi.NULL + ) + + # Download from buffer to CPU + buf.map(wgpu.MapMode.READ) + wgpu.backends.rs.libf.wgpuDevicePoll( + buf._device._internal, True, wgpu.backends.rs.ffi.NULL + ) + + data2 = buf.read_mapped() + buf.unmap() + print(data2.tobytes()) + assert data1 == data2 + + +@mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib") +def test_buffer_init2(): + # Initializing a buffer as mapped, to directly set the data + + device = wgpu.utils.get_default_device() + data1 = b"abcdefghijkl" + + # Create buffer. + buf = device.create_buffer( + size=len(data1), usage=wgpu.BufferUsage.COPY_SRC, mapped_at_creation=True + ) + buf.write_mapped(data1) + buf.unmap() + + # Download from buffer to CPU + data2 = device.queue.read_buffer(buf) + assert data1 == data2 + + # --- also read via mapped data + + # Create buffer. + buf = device.create_buffer( + size=len(data1), usage=wgpu.BufferUsage.MAP_READ, mapped_at_creation=True + ) + buf.write_mapped(data1) + buf.unmap() + + # Download from buffer to CPU + buf.map("read") + data2 = buf.read_mapped() + buf.unmap() + print(data2.tobytes()) + assert data1 == data2 @mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib") def test_buffer_init3(): - # Initializing an empty buffer, then writing to it + # Initializing an empty buffer, then writing to it, then reading back device = wgpu.utils.get_default_device() data1 = b"abcdefghijkl" + # Option 1: write via queue (i.e. temp buffer), read via queue + # Create buffer buf = device.create_buffer( size=len(data1), usage=wgpu.BufferUsage.COPY_DST | wgpu.BufferUsage.COPY_SRC @@ -225,8 +263,48 @@ def test_buffer_init3(): device.queue.write_buffer(buf, 0, data1) # Download from buffer to CPU - data3 = device.queue.read_buffer(buf) - assert data1 == data3 + data2 = device.queue.read_buffer(buf) + assert data1 == data2 + + # Option 2: Write via mapped data, read via queue + + # Create buffer + buf = device.create_buffer( + size=len(data1), usage=wgpu.BufferUsage.MAP_WRITE | wgpu.BufferUsage.COPY_SRC + ) + + # Write data to it + buf.map("write") + buf.write_mapped(data1) + buf.unmap() + + # Download from buffer to CPU + data2 = device.queue.read_buffer(buf) + assert data1 == data2 + + # Option 3: Write via queue, read via mapped data + + buf = device.create_buffer( + size=len(data1), usage=wgpu.BufferUsage.MAP_READ | wgpu.BufferUsage.COPY_DST + ) + + # Write data to it + device.queue.write_buffer(buf, 0, data1) + + # Download from buffer to CPU + buf.map("read") + data2 = buf.read_mapped() + buf.unmap() + assert data1 == data2 + + # Option 4: Write via mapped data, read via mapped data + + # Not actually an option + with raises(wgpu.GPUValidationError): + buf = device.create_buffer( + size=len(data1), + usage=wgpu.BufferUsage.MAP_READ | wgpu.BufferUsage.MAP_WRITE, + ) @mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib") diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py index d3f1a54b..45a0d663 100644 --- a/wgpu/backends/rs.py +++ b/wgpu/backends/rs.py @@ -1500,13 +1500,13 @@ def _destroy(self): class GPUBuffer(base.GPUBuffer, GPUObjectBase): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, label, internal, device, size, usage, map_state): + super().__init__(label, internal, device, size, usage, map_state) - self._mapped_range = 0, 0 + self._mapped_status = 0, 0, 0 self._mapped_memoryviews = [] if self._map_state == enums.BufferMapState.mapped: - self._mapped_range = 0, self.size + self._mapped_status = 0, self.size, 0 def _check_range(self, offset, size): offset = 0 if offset is None else int(offset) @@ -1520,12 +1520,27 @@ def _check_range(self, offset, size): return offset, size def map(self, mode, offset=0, size=None): + sync_on_read = True + # Check mode - mode = mode.upper() if isinstance(mode, str) else mode - if mode in (flags.MapMode.READ, "READ"): - map_mode = lib.WGPUMapMode_Read - elif mode in (flags.MapMode.WRITE, "WRITE"): - map_mode = lib.WGPUMapMode_Write + if isinstance(mode, str): + if mode.upper() == "READ_NOSYNC": # for internal use + mode = flags.MapMode.READ + sync_on_read = False + elif mode.upper() == "READ": + mode = flags.MapMode.READ + elif mode.upper() == "WRITE": + mode = flags.MapMode.WRITE + else: + raise ValueError("Map mode must be READ or WRITE") + if isinstance(mode, int): + map_mode = 0 + if mode & flags.MapMode.READ: + map_mode |= lib.WGPUMapMode_Read + if mode & flags.MapMode.WRITE: + map_mode |= lib.WGPUMapMode_Write + else: # pragma: no cover + raise TypeError("Map mode should be flag (int) or str.") # Check offset and size offset, size = self._check_range(offset, size) @@ -1534,6 +1549,13 @@ def map(self, mode, offset=0, size=None): if self._map_state != enums.BufferMapState.unmapped: raise RuntimeError("Can only map a buffer if its currently unmapped.") + # Sync up when reading, otherwise the memory may be all zeros. + # See https://github.com/gfx-rs/wgpu-native/issues/305 + if sync_on_read and map_mode & lib.WGPUMapMode_Read: + if self._mapped_status[2] == 0 and self._usage & flags.BufferUsage.MAP_READ: + encoder = self._device.create_command_encoder() + self._device.queue.submit([encoder.finish()]) + status = 999 @ffi.callback("void(WGPUBufferMapAsyncStatus, void*)") @@ -1555,20 +1577,19 @@ def callback(status_, user_data_p): if status != 0: # no-cover raise RuntimeError(f"Could not map buffer ({status}).") self._map_state = enums.BufferMapState.mapped - self._mapped_range = offset, offset + size + self._mapped_status = offset, offset + size, mode self._mapped_memoryviews = [] async def map_async(self, mode, offset=0, size=None): - return self.map() # for now + return self.map(mode, offset, size) # for now def unmap(self): - # Can we even unmap? if self._map_state != enums.BufferMapState.mapped: raise RuntimeError("Can only unmap a buffer if its currently mapped.") # H: void f(WGPUBuffer buffer) libf.wgpuBufferUnmap(self._internal) self._map_state = enums.BufferMapState.unmapped - self._mapped_range = 0, 0 + self._mapped_status = 0, 0, 0 self._release_memoryviews() def _release_memoryviews(self): @@ -1581,14 +1602,14 @@ def _release_memoryviews(self): pass self._mapped_memoryviews = [] - def read_mapped(self, offset=0, size=None, *, copy=True): + def read_mapped(self, buffer_offset=0, size=None, *, copy=True): # Can we even read? if self._map_state != enums.BufferMapState.mapped: raise RuntimeError("Can only read from a buffer if its mapped.") # Check offset and size - offset, size = self._check_range(offset, size) - if offset < self._mapped_range[0] or (offset + size) > self._mapped_range[1]: + offset, size = self._check_range(buffer_offset, size) + if offset < self._mapped_status[0] or (offset + size) > self._mapped_status[1]: raise ValueError( "The range for buffer reading is not contained in the currently mapped range." ) @@ -1610,7 +1631,7 @@ def read_mapped(self, offset=0, size=None, *, copy=True): self._mapped_memoryviews.append(src_m) return src_m - def write_mapped(self, data, offset=0, size=None): + def write_mapped(self, data, buffer_offset=0, size=None): # Can we even write? if self._map_state != enums.BufferMapState.mapped: raise RuntimeError("Can only write to a buffer if its mapped.") @@ -1622,8 +1643,8 @@ def write_mapped(self, data, offset=0, size=None): # Check offset and size if size is None: size = data.nbytes - offset, size = self._check_range(offset, size) - if offset < self._mapped_range[0] or (offset + size) > self._mapped_range[1]: + offset, size = self._check_range(buffer_offset, size) + if offset < self._mapped_status[0] or (offset + size) > self._mapped_status[1]: raise ValueError( "The range for buffer writing is not contained in the currently mapped range." ) @@ -2466,7 +2487,7 @@ def write_buffer(self, buffer, buffer_offset, data, data_offset=0, size=None): def read_buffer(self, buffer, buffer_offset=0, size=None): # Note that write_buffer probably does a very similar thing - # using a temporaty buffer. But write_buffer is official API + # using a temporary buffer. But write_buffer is official API # so it's a single call, while here we must create the temporary # buffer and do the copying ourselves. @@ -2492,7 +2513,7 @@ def read_buffer(self, buffer, buffer_offset=0, size=None): self.submit([command_buffer]) # Download from mappable buffer - tmp_buffer.map(flags.MapMode.READ) + tmp_buffer.map("READ_NOSYNC") data = tmp_buffer.read_mapped() tmp_buffer.destroy() @@ -2590,7 +2611,7 @@ def read_texture(self, source, data_layout, size): self.submit([command_buffer]) # Download from mappable buffer - tmp_buffer.map(flags.MapMode.READ) + tmp_buffer.map("READ_NOSYNC") data = tmp_buffer.read_mapped() tmp_buffer.destroy() diff --git a/wgpu/base.py b/wgpu/base.py index 725cbd8b..9751f15b 100644 --- a/wgpu/base.py +++ b/wgpu/base.py @@ -1001,7 +1001,7 @@ def unmap(self): raise NotImplementedError() @apidiff.add("Replacement for get_mapped_range") - def read_mapped(self, offset=0, size=None, *, copy=True): + def read_mapped(self, buffer_offset=0, size=None, *, copy=True): """Read mapped buffer data. This method must only be called when the buffer is in a mapped state. @@ -1010,8 +1010,8 @@ def read_mapped(self, offset=0, size=None, *, copy=True): become invalid when the buffer is ummapped). Arguments: - offset (str): the buffer offset in bytes. Default 0. Must be at least - as large as the offset specified in ``map()``. + buffer_offset (int): the buffer offset in bytes. Default 0. Must be at + least as large as the offset specified in ``map()``. size (int): the size to read. Default until the end. The resuling range must fit into the range specified in ``map()``. copy (boool): whether a copy of the data is given. Default True. @@ -1023,12 +1023,12 @@ def read_mapped(self, offset=0, size=None, *, copy=True): setting copy to False, make *very* sure the memory is not accessed after the buffer is unmapped. - Also see `GPUBuffer.write_mapped, `GPUQueue.read_buffer()` and `GPUQueue.write_buffer()`. + Also see `GPUBuffer.write_mapped()`, `GPUQueue.read_buffer()` and `GPUQueue.write_buffer()`. """ raise NotImplementedError() @apidiff.add("Replacement for get_mapped_range") - def write_mapped(self, data, offset=0, size=None): + def write_mapped(self, data, buffer_offset=0, size=None): """Read mapped buffer data. This method must only be called when the buffer is in a mapped state. @@ -1039,7 +1039,7 @@ def write_mapped(self, data, offset=0, size=None): Arguments: data (buffer-like): The data to write to the buffer, in the form of e.g. a bytes object, memoryview, or numpy array. - offset (str): the buffer offset in bytes. Default 0. Must be at least + buffer_offset (int): the buffer offset in bytes. Default 0. Must be at least as large as the offset specified in ``map()``. size (int): the size to read. Default until the end. The resuling range must fit into the range specified in ``map()``. diff --git a/wgpu/resources/codegen_report.md b/wgpu/resources/codegen_report.md index c8f76fbe..0f442d71 100644 --- a/wgpu/resources/codegen_report.md +++ b/wgpu/resources/codegen_report.md @@ -19,7 +19,7 @@ * Validated 39 classes, 114 methods, 44 properties ### Patching API for backends/rs.py * Diffs for GPUAdapter: add request_device_tracing -* Validated 39 classes, 101 methods, 0 properties +* Validated 39 classes, 103 methods, 0 properties ## Validating rs.py * Enum field TextureFormat.rgb10a2uint missing in wgpu.h * Enum PipelineErrorReason missing in wgpu.h From 9bc94686eb917fdc6dd7af6cab0b7f31e32d200e Mon Sep 17 00:00:00 2001 From: Almar Klein Date: Tue, 17 Oct 2023 12:59:24 +0200 Subject: [PATCH 08/13] More tests and fixes --- tests/test_rs_basics.py | 183 +++++++++++++++++++++++++++++++++++++++- wgpu/backends/rs.py | 41 +++++++-- wgpu/base.py | 25 +++--- 3 files changed, 229 insertions(+), 20 deletions(-) diff --git a/tests/test_rs_basics.py b/tests/test_rs_basics.py index 44442d58..05b6c4ec 100644 --- a/tests/test_rs_basics.py +++ b/tests/test_rs_basics.py @@ -307,6 +307,187 @@ def test_buffer_init3(): ) +@mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib") +def test_consequitive_writes1(): + # The inefficient way + + device = wgpu.utils.get_default_device() + + # Create buffer + buf = device.create_buffer( + size=32, usage=wgpu.BufferUsage.MAP_WRITE | wgpu.BufferUsage.COPY_SRC + ) + + # Write in parts + for i in range(4): + buf.map("write") + buf.write_mapped(f"{i+1}".encode() * 8, i * 8) + buf.unmap() + + # Download from buffer to CPU + data = device.queue.read_buffer(buf) + assert data == b"11111111222222223333333344444444" + + # Also in parts + for i in range(4): + data = device.queue.read_buffer(buf, i * 8, size=8) + assert data == f"{i+1}".encode() * 8 + + +@mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib") +def test_consequitive_writes2(): + # The efficient way + + device = wgpu.utils.get_default_device() + + # Create buffer + buf = device.create_buffer( + size=32, usage=wgpu.BufferUsage.MAP_WRITE | wgpu.BufferUsage.COPY_SRC + ) + + # Write in parts + buf.map("write") + for i in range(4): + buf.write_mapped(f"{i+1}".encode() * 8, i * 8) + buf.unmap() + + # Download from buffer to CPU + data = device.queue.read_buffer(buf) + assert data == b"11111111222222223333333344444444" + + # Also in parts + for i in range(4): + data = device.queue.read_buffer(buf, i * 8, size=8) + assert data == f"{i+1}".encode() * 8 + + +@mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib") +def test_consequitive_reads(): + device = wgpu.utils.get_default_device() + + # Create buffer + buf = device.create_buffer( + size=32, usage=wgpu.BufferUsage.MAP_READ | wgpu.BufferUsage.COPY_DST + ) + + # Write using the queue. Do in parts, to touch those offsets too + for i in range(4): + device.queue.write_buffer(buf, i * 8, f"{i+1}".encode() * 8) + + # Read in parts, the inefficient way + for i in range(4): + buf.map("read") + data = buf.read_mapped(i * 8, 8) + assert data == f"{i+1}".encode() * 8 + buf.unmap() + + # Read in parts, the efficient way + buf.map("read") + for i in range(4): + data = buf.read_mapped(i * 8, 8) + assert data == f"{i+1}".encode() * 8 + buf.unmap() + + +@mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib") +def test_buffer_mapping_fails(): + device = wgpu.utils.get_default_device() + data = b"12345678" + + # Create buffer + buf = device.create_buffer( + size=32, usage=wgpu.BufferUsage.MAP_WRITE | wgpu.BufferUsage.COPY_SRC + ) + + with raises(RuntimeError): + buf.write_mapped(data) # Not mapped + with raises(RuntimeError): + buf.read_mapped() # Not mapped + + with raises(ValueError): + buf.map("boo") # Invalid map mode + + buf.map("write", 0, 28) + + with raises(RuntimeError): + buf.map("write") # Cannot map twice + + with raises(RuntimeError): + buf.map("read") # Cannot map twice + + with raises(RuntimeError): + buf.read_mapped() # Not mapped in read mode + + # Ok + buf.write_mapped(data) + buf.write_mapped(data, 0) + buf.write_mapped(data, 8) + buf.write_mapped(data, 16) + + # Fail + with raises(ValueError): + buf.write_mapped(data, -1) # not neg + with raises(ValueError): + buf.write_mapped(data, -8) # not neg + with raises(ValueError): + buf.write_mapped(data, 6) # not multilpe of eight + + # Ok + buf.write_mapped(b"1" * 4) + buf.write_mapped(b"1" * 8) + buf.write_mapped(b"1" * 28) + buf.write_mapped(b"1" * 12, 0) + buf.write_mapped(b"1" * 12, 8) + + with raises(ValueError): + buf.write_mapped(b"") # not empty + with raises(ValueError): + buf.write_mapped(b"1" * 64) # too large for buffer + with raises(ValueError): + buf.write_mapped(b"1" * 32) # too large for mapped range + with raises(ValueError): + buf.write_mapped(b"1" * 3) # not multiple of 4 + with raises(ValueError): + buf.write_mapped(b"1" * 6) # not multiple of 4 + with raises(ValueError): + buf.write_mapped(b"1" * 9) # not multiple of 4 + + # Can unmap multiple times though! + buf.unmap() + + with raises(RuntimeError): + buf.unmap() # Cannot unmap when not mapped + + # Create buffer in read mode ... + + buf = device.create_buffer( + size=32, usage=wgpu.BufferUsage.MAP_READ | wgpu.BufferUsage.COPY_DST + ) + + with raises(RuntimeError): + buf.write_mapped(data) # not mapped + + buf.map("read", 8, 20) + + with raises(RuntimeError): + buf.map("read") # Cannot map twice + + with raises(RuntimeError): + buf.map("write") # Cannot map twice + + with raises(RuntimeError): + buf.write_mapped(data) # not mapped in write mode\ + + # Ok + assert len(buf.read_mapped()) == 20 + + # Fail + with raises(ValueError): + buf.read_mapped(64) # read beyond buffer size + with raises(ValueError): + buf.read_mapped(32) # read beyond mapped range + + @mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib") def test_clear_buffer(): data0 = b"111111112222222233333333" @@ -580,7 +761,7 @@ def test_write_buffer2(): device.queue.write_buffer(buf4, 0, data1) # We swipe the data. You could also think that we passed something into - # write_buffer without holding a referene to it. Anyway, write_buffer + # write_buffer without holding a reference to it. Anyway, write_buffer # seems to copy the data at the moment it is called. for i in range(len(data1)): data1[i] = 1 diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py index 45a0d663..3ddfc040 100644 --- a/wgpu/backends/rs.py +++ b/wgpu/backends/rs.py @@ -1505,21 +1505,38 @@ def __init__(self, label, internal, device, size, usage, map_state): self._mapped_status = 0, 0, 0 self._mapped_memoryviews = [] + # If mapped at creation, set to write mode (no point in reading zeros) if self._map_state == enums.BufferMapState.mapped: - self._mapped_status = 0, self.size, 0 + self._mapped_status = 0, self.size, flags.MapMode.WRITE def _check_range(self, offset, size): - offset = 0 if offset is None else int(offset) - size = (self.size - offset) if size is None else int(size) + # Apply defaults + if offset is None: + offset = 0 + if self._mapped_status[2] != 0: + offset = self._mapped_status[0] + else: + offset = int(offset) + if size is None: + size = self.size - offset + if self._mapped_status[2] != 0: + size = self._mapped_status[1] - offset + else: + size = int(size) + # Checks if offset < 0: raise ValueError("Mapped offset must not be smaller than zero.") + if offset % 8: + raise ValueError("Mapped offset must be a multiple of 8.") if size < 1: raise ValueError("Mapped size must be larger than zero.") + if size % 4: + raise ValueError("Mapped offset must be a multiple of 4.") if offset + size > self.size: raise ValueError("Mapped range must not extend beyond total buffer size.") return offset, size - def map(self, mode, offset=0, size=None): + def map(self, mode, offset=None, size=None): sync_on_read = True # Check mode @@ -1602,10 +1619,14 @@ def _release_memoryviews(self): pass self._mapped_memoryviews = [] - def read_mapped(self, buffer_offset=0, size=None, *, copy=True): + def read_mapped(self, buffer_offset=None, size=None, *, copy=True): # Can we even read? if self._map_state != enums.BufferMapState.mapped: raise RuntimeError("Can only read from a buffer if its mapped.") + elif not (self._mapped_status[2] & flags.MapMode.READ): + raise RuntimeError( + "Can only read from a buffer if its mapped in read mode." + ) # Check offset and size offset, size = self._check_range(buffer_offset, size) @@ -1616,7 +1637,7 @@ def read_mapped(self, buffer_offset=0, size=None, *, copy=True): # Get mapped memoryview. # H: void * f(WGPUBuffer buffer, size_t offset, size_t size) - src_ptr = libf.wgpuBufferGetMappedRange(self._internal, 0, size) + src_ptr = libf.wgpuBufferGetMappedRange(self._internal, offset, size) src_address = int(ffi.cast("intptr_t", src_ptr)) src_m = get_memoryview_from_address(src_address, size) @@ -1631,10 +1652,14 @@ def read_mapped(self, buffer_offset=0, size=None, *, copy=True): self._mapped_memoryviews.append(src_m) return src_m - def write_mapped(self, data, buffer_offset=0, size=None): + def write_mapped(self, data, buffer_offset=None, size=None): # Can we even write? if self._map_state != enums.BufferMapState.mapped: raise RuntimeError("Can only write to a buffer if its mapped.") + elif not (self._mapped_status[2] & flags.MapMode.WRITE): + raise RuntimeError( + "Can only write from a buffer if its mapped in write mode." + ) # Cast data to a memoryview. This also works for e.g. numpy arrays, # and the resulting memoryview will be a view on the data. @@ -1657,7 +1682,7 @@ def write_mapped(self, data, buffer_offset=0, size=None): # Get mapped memoryview # H: void * f(WGPUBuffer buffer, size_t offset, size_t size) - src_ptr = libf.wgpuBufferGetMappedRange(self._internal, 0, size) + src_ptr = libf.wgpuBufferGetMappedRange(self._internal, offset, size) src_address = int(ffi.cast("intptr_t", src_ptr)) src_m = get_memoryview_from_address(src_address, size) diff --git a/wgpu/base.py b/wgpu/base.py index 9751f15b..654c0dff 100644 --- a/wgpu/base.py +++ b/wgpu/base.py @@ -972,7 +972,7 @@ def map_state(self): # an array-like object that exposes the shared memory. # IDL: Promise mapAsync(GPUMapModeFlags mode, optional GPUSize64 offset = 0, optional GPUSize64 size); - def map(self, mode, offset=0, size=None): + def map(self, mode, offset=None, size=None): """Maps the given range of the GPUBuffer. When this call returns, the buffer content is ready to be @@ -1001,7 +1001,7 @@ def unmap(self): raise NotImplementedError() @apidiff.add("Replacement for get_mapped_range") - def read_mapped(self, buffer_offset=0, size=None, *, copy=True): + def read_mapped(self, buffer_offset=None, size=None, *, copy=True): """Read mapped buffer data. This method must only be called when the buffer is in a mapped state. @@ -1010,10 +1010,11 @@ def read_mapped(self, buffer_offset=0, size=None, *, copy=True): become invalid when the buffer is ummapped). Arguments: - buffer_offset (int): the buffer offset in bytes. Default 0. Must be at - least as large as the offset specified in ``map()``. - size (int): the size to read. Default until the end. The resuling range - must fit into the range specified in ``map()``. + buffer_offset (int): the buffer offset in bytes. Must be at + least as large as the offset specified in ``map()``. The default + is the offset of the mapped range. + size (int): the size to read. The resuling range must fit into the range + specified in ``map()``. The default is as large as the mapped range allows. copy (boool): whether a copy of the data is given. Default True. If False, the returned memoryview represents the mapped data directly, and is released when the buffer is unmapped. @@ -1028,7 +1029,7 @@ def read_mapped(self, buffer_offset=0, size=None, *, copy=True): raise NotImplementedError() @apidiff.add("Replacement for get_mapped_range") - def write_mapped(self, data, buffer_offset=0, size=None): + def write_mapped(self, data, buffer_offset=None, size=None): """Read mapped buffer data. This method must only be called when the buffer is in a mapped state. @@ -1039,10 +1040,12 @@ def write_mapped(self, data, buffer_offset=0, size=None): Arguments: data (buffer-like): The data to write to the buffer, in the form of e.g. a bytes object, memoryview, or numpy array. - buffer_offset (int): the buffer offset in bytes. Default 0. Must be at least - as large as the offset specified in ``map()``. - size (int): the size to read. Default until the end. The resuling range - must fit into the range specified in ``map()``. + buffer_offset (int): the buffer offset in bytes. Must be at least + as large as the offset specified in ``map()``. The default + is the offset of the mapped range. + size (int): the size to read. The default is the size of + the data, so this argument can typically be ignored. The + resuling range must fit into the range specified in ``map()``. Also see `GPUBuffer.read_mapped, `GPUQueue.read_buffer()` and `GPUQueue.write_buffer()`. """ From 1fbf6fd0684867569a1f19d9261acf52f8b18e62 Mon Sep 17 00:00:00 2001 From: Almar Klein Date: Tue, 17 Oct 2023 13:19:03 +0200 Subject: [PATCH 09/13] Tests and fixes for copyless buffer reads --- tests/test_rs_basics.py | 62 +++++++++++++++++++++++++++++++++++++++-- wgpu/backends/rs.py | 9 +++--- 2 files changed, 64 insertions(+), 7 deletions(-) diff --git a/tests/test_rs_basics.py b/tests/test_rs_basics.py index 05b6c4ec..6299bfa1 100644 --- a/tests/test_rs_basics.py +++ b/tests/test_rs_basics.py @@ -476,16 +476,72 @@ def test_buffer_mapping_fails(): buf.map("write") # Cannot map twice with raises(RuntimeError): - buf.write_mapped(data) # not mapped in write mode\ + buf.write_mapped(data) # not mapped in write mode # Ok assert len(buf.read_mapped()) == 20 # Fail with raises(ValueError): - buf.read_mapped(64) # read beyond buffer size + buf.read_mapped(0, 64) # read beyond buffer size with raises(ValueError): - buf.read_mapped(32) # read beyond mapped range + buf.read_mapped(0, 32) # read beyond mapped range + + buf.unmap() + + with raises(RuntimeError): + buf.unmap() # Cannot unmap when not mapped + + +def test_buffer_read_no_copy(): + device = wgpu.utils.get_default_device() + data1 = b"12345678" * 2 + + buf = device.create_buffer( + size=len(data1), usage=wgpu.BufferUsage.MAP_READ | wgpu.BufferUsage.COPY_DST + ) + + # Write data to it + device.queue.write_buffer(buf, 0, data1) + + # Download from buffer to CPU + buf.map("read") + data2 = buf.read_mapped(copy=False) + data3 = buf.read_mapped(0, 8, copy=False) + data4 = buf.read_mapped(8, 8, copy=False) + + assert data2 == data1 + assert data3 == data1[:8] + assert data4 == data1[8:] + + # Can access the arrays + _ = data2[0], data3[0], data4[0] + + # But cannot write to memory intended for reading + with raises(TypeError): + data2[0] = 1 + with raises(TypeError): + data3[0] = 1 + with raises(TypeError): + data4[0] = 1 + + buf.unmap() + + # The memoryview is invalidated when the buffer unmapped. + # Note that this unfortunately does *not* hold for views on these arrays. + with raises(ValueError): + data2[0] + with raises(ValueError): + data3[0] + with raises(ValueError): + data4[0] + + with raises(ValueError): + data2[0] = 1 # cannot write to memory intended for reading + with raises(ValueError): + data3[0] = 1 + with raises(ValueError): + data4[0] = 1 @mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib") diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py index 3ddfc040..c5ae3750 100644 --- a/wgpu/backends/rs.py +++ b/wgpu/backends/rs.py @@ -1615,7 +1615,7 @@ def _release_memoryviews(self): for m in self._mapped_memoryviews: try: m.release() - except Exception: + except Exception: # no-cover pass self._mapped_memoryviews = [] @@ -1649,8 +1649,9 @@ def read_mapped(self, buffer_offset=None, size=None, *, copy=True): return data else: # Return view on the actually mapped data - self._mapped_memoryviews.append(src_m) - return src_m + data = src_m.toreadonly() + self._mapped_memoryviews.append(data) + return data def write_mapped(self, data, buffer_offset=None, size=None): # Can we even write? @@ -1675,7 +1676,7 @@ def write_mapped(self, data, buffer_offset=None, size=None): ) # Check data size and given size. If the latter was given, it should match! - if data.nbytes != size: # pragma: no cover + if data.nbytes != size: # no-cover raise ValueError( "Data passed to GPUBuffer.write_mapped() does not match the given size." ) From 0f432622deb57a43f976643cdba0863ae71c865a Mon Sep 17 00:00:00 2001 From: Almar Klein Date: Tue, 17 Oct 2023 13:20:14 +0200 Subject: [PATCH 10/13] codegen --- wgpu/backends/rs.py | 2 +- wgpu/base.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py index c5ae3750..0daf27fb 100644 --- a/wgpu/backends/rs.py +++ b/wgpu/backends/rs.py @@ -1536,7 +1536,7 @@ def _check_range(self, offset, size): raise ValueError("Mapped range must not extend beyond total buffer size.") return offset, size - def map(self, mode, offset=None, size=None): + def map(self, mode, offset=0, size=None): sync_on_read = True # Check mode diff --git a/wgpu/base.py b/wgpu/base.py index 654c0dff..eeca32c3 100644 --- a/wgpu/base.py +++ b/wgpu/base.py @@ -972,7 +972,7 @@ def map_state(self): # an array-like object that exposes the shared memory. # IDL: Promise mapAsync(GPUMapModeFlags mode, optional GPUSize64 offset = 0, optional GPUSize64 size); - def map(self, mode, offset=None, size=None): + def map(self, mode, offset=0, size=None): """Maps the given range of the GPUBuffer. When this call returns, the buffer content is ready to be From 2b4b16a96c8338dbea8282308e032d8916a2dec3 Mon Sep 17 00:00:00 2001 From: Almar Klein Date: Tue, 17 Oct 2023 13:28:49 +0200 Subject: [PATCH 11/13] 3.7 compat --- wgpu/backends/rs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py index 0daf27fb..4d5ab36c 100644 --- a/wgpu/backends/rs.py +++ b/wgpu/backends/rs.py @@ -1649,7 +1649,9 @@ def read_mapped(self, buffer_offset=None, size=None, *, copy=True): return data else: # Return view on the actually mapped data - data = src_m.toreadonly() + data = src_m + if hasattr(data, "toreadonly"): # Py 3.8+ + data = data.toreadonly() self._mapped_memoryviews.append(data) return data From 8d8ff8a5f51932c595b4b36db12b11c8abbe3e86 Mon Sep 17 00:00:00 2001 From: Almar Klein Date: Tue, 17 Oct 2023 13:50:25 +0200 Subject: [PATCH 12/13] Add changelog --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d99ea2d..08a33a2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,33 @@ Changed: * Update to wgpu-native 0.17.2.1. No changes are needed in downstream code. +### [v0.11.0] - t.b.d. + +We have revised the buffer mapping API, making it more similar to the +WebGPU spec, and providing more flexible and performant ways to set +buffer data. + +Added: + +* The `GPUBuffer` has new methods `map()`, `map_async()`, `unmap()`. These have been + part of the WebGPU spec for a long time, but we had an alternative API, until now. +* The `GPUBuffer` has new methods `read_mapped()` and `write_mapped()`. These are not + present in the WebGPU spec; they are the Pythonic alternative to `getMappedRange()`. + +Changed: + +* Can create a buffer that is initially mapped: `device.create_buffer(..., mapped_at_creation=True)` is enabled again. + +Removed: + +* The `GPUBuffer` methods `map_read()`and `map_write()` are deprecated, in favor of `map()`, `unmap()`, `read_mapped()` and `write_mapped()`. + +For the record, these are not changed: + +* The convenient `device.create_buffer_with_data()` (not part of the WebGPU spec) is also available. +* The `GPUQueue.read_buffer()` and `GPUQueue.write_buffer()` methods are unchanged. + + ### [v0.10.0] - 09-10-2023 In this release the API is aligned with the latest webgpu.idl, and From 3b5e03ac149374c8e7f5bd7fe61045088c31a643 Mon Sep 17 00:00:00 2001 From: Almar Klein Date: Tue, 17 Oct 2023 14:07:44 +0200 Subject: [PATCH 13/13] fix --- tests/test_rs_basics.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/test_rs_basics.py b/tests/test_rs_basics.py index 6299bfa1..660dc604 100644 --- a/tests/test_rs_basics.py +++ b/tests/test_rs_basics.py @@ -518,12 +518,13 @@ def test_buffer_read_no_copy(): _ = data2[0], data3[0], data4[0] # But cannot write to memory intended for reading - with raises(TypeError): - data2[0] = 1 - with raises(TypeError): - data3[0] = 1 - with raises(TypeError): - data4[0] = 1 + if sys.version_info >= (3, 8): # no memoryview.toreadonly on 3.7 and below + with raises(TypeError): + data2[0] = 1 + with raises(TypeError): + data3[0] = 1 + with raises(TypeError): + data4[0] = 1 buf.unmap() @@ -537,7 +538,7 @@ def test_buffer_read_no_copy(): data4[0] with raises(ValueError): - data2[0] = 1 # cannot write to memory intended for reading + data2[0] = 1 with raises(ValueError): data3[0] = 1 with raises(ValueError):