From 8e231d2a9a9b17d25848bf85d4b7e222e142953f Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Sat, 14 Oct 2023 23:53:03 +0200
Subject: [PATCH 01/13] I tried many ways, thought I found one, but did not.

---
 tests/test_buffer_mapping.py | 234 +++++++++++++++++++++++++++++++++++
 1 file changed, 234 insertions(+)
 create mode 100644 tests/test_buffer_mapping.py

diff --git a/tests/test_buffer_mapping.py b/tests/test_buffer_mapping.py
new file mode 100644
index 00000000..2bc2ac24
--- /dev/null
+++ b/tests/test_buffer_mapping.py
@@ -0,0 +1,234 @@
+"""This part is somewhat tricky so we dedicate a test module on it.
+
+The tricky part for implementing the buffer data mapping, as specified by
+the WebGPU API, is that its not trivial (at all) to provide an array-like object
+for which numpy views can be created, and which we can invalidate in such
+a way that the views become un-usable.
+
+We spend the first part of this module demonstrating what techniques
+do NOT work. In the second part we demonstrate the working of the used
+technique. In the third part we test the buffer mapping API itself.
+
+"""
+
+import ctypes
+
+import numpy as np
+
+from testutils import run_tests
+import pytest
+
+
+ALLOW_SEGFAULTS = False
+
+
+# %%%%%%%%%%  1. What does NOT work
+
+
+def test_fail_array_interface_with_ctypes_array():
+    # Create an array-like object using __array_interface__,
+    # with a ctypes array to hold the data, via a real address pointer.
+    # We cannot invalidate the base array-like object.
+
+    class ArrayLike:
+        def __init__(self):
+            size = 100
+            offset = 0
+            itemsize = 1
+            self._data = (ctypes.c_uint8 * size)()
+
+            readonly = False
+            typestr = "<b1"
+            ptr = ctypes.addressof(self._data)
+            ptr += offset * itemsize
+
+            self.__array_interface__ = dict(
+                version=3,
+                shape=(size,),
+                typestr=typestr,
+                descr=[("", typestr)],
+                data=(ptr, readonly),
+                strides=None,
+            )
+
+        def __len__(self):
+            shape = self.__array_interface__["shape"]
+            return shape[0] if shape else 0
+
+    a = ArrayLike()
+
+    # Create numpy view
+    b = np.asarray(a)
+    assert b.base is a
+
+    b.dtype = np.int16
+    b.shape = 25, 2
+
+    # Set some data
+    b[:, 1] = 1
+
+    # Yes, this changes the original
+    assert list(a._data)[:8] == [0, 0, 1, 0, 0, 0, 1, 0]
+
+    # Try to invalidate the array
+    a.__array_interface__["shape"] = ()
+    a.__array_interface__["data"] = None
+
+    # Can still set the data
+    b[:, 1] = 2
+
+    # And it has changed the data. If the data would have been really
+    # released (by the process) this'd likely caused a segfault.
+    assert list(a._data)[:8] == [0, 0, 2, 0, 0, 0, 2, 0]
+
+
+def test_fail_but_arrays_can_be_resized_so_how_does_that_work():
+    # Arrays can be resized, so how does that work with views?
+    # Arrays cannot be resized when they have views on them.
+
+    # Create an array
+    a = np.array([1, 2, 3, 4], np.int32)
+
+    # We can resize it
+    a.resize((6,))
+
+    # Create a view on it
+    b = a.view()
+    assert b.base is a
+
+    # Cannot resize now, because it has a view - it checks back references!
+    with pytest.raises(ValueError):
+        a.resize((8,))
+
+    # For context, it is possible to override the behavior, but when
+    # you now try to use the arrays, it will segfault at some point.
+    # a.resize((4, ), refcheck=False)
+
+
+def test_fail_make_array_readonly():
+    # Arrays can be set readonly, how does that affect views on that array?
+    # It does not :)
+
+    # Create an array
+    a = np.array([1, 2, 3, 4], np.int32)
+
+    # And a view on it
+    b = a.view()
+    assert b.base is a
+
+    # Can modify the view
+    a[0] = 11
+    b[1] = 12
+    assert list(a[0:2]) == [11, 12]
+
+    # Now set the base array to readonly
+    a.flags.writeable = False
+
+    # Indeed cannot set array
+    with pytest.raises(ValueError):
+        a[0] = 13
+
+    # But can still set the view!
+    b[1] = 14
+    assert list(a[0:2]) == [11, 14]
+
+
+def test_fail_memoryview_can_be_released():
+    # Memoryviews can be released, how does that affect the views?
+    # It does not really affect them.
+
+    # Create a memoryview
+    a = memoryview((ctypes.c_uint8 * 100)()).cast("B")
+
+    # Create a view on it, via a cast
+    b = a.cast("h")
+
+    # Create another view on it, now a numpy array
+    c = np.frombuffer(a, dtype=np.int16)
+    # assert c.base is a  -- apparently not, but we check in a moment that its actually a view
+
+    # And again, but via a different API
+    d = np.asarray(a)
+    d.dtype = np.int16
+    # assert d.base is a  -- nope
+
+    # Can change the data, so c and d are indeed views
+    a[0] = 1
+    b[1] = 2
+    c[2] = 3
+    d[3] = 4
+    assert a.tolist()[:8] == [1, 0, 2, 0, 3, 0, 4, 0]
+
+    # Now release the memory
+    a.release()
+
+    # It works on the memoryview itself!
+    with pytest.raises(ValueError):
+        a[0] = 11
+
+    if ALLOW_SEGFAULTS:
+        # But not on the memoryview view
+        b[1] = 12
+
+        # And also not on the np array views
+        c[2] = 13
+        d[3] = 14
+
+
+def test_cannot_subclass_memoryview():
+    # So any solutions in that direction can be discarted.
+    with pytest.raises(TypeError):
+
+        class ArrayLike(memoryview):
+            pass
+
+
+def test_fail_memoryview_wrapped_in_array_interface():
+    # An array_interface can either point to a memory address or wrap
+    # a buffer-like object. I'd swear that I had this setup working in
+    # the sense that the views could not write after the memoryview was
+    # released, but I cannot reproduce it, so I must have seen something
+    # else.
+
+    class ArrayLike:
+        def __init__(self, size):
+            self._data = memoryview((ctypes.c_uint8 * size)()).cast("B")
+            self.__array_interface__ = dict(
+                version=3,
+                shape=(size,),
+                typestr="<b1",
+                descr=[("", "<b1")],
+                data=self._data,
+                strides=None,
+            )
+
+        def release(self):
+            self.__array_interface__["data"].release()
+
+    a = ArrayLike(100)
+
+    b = np.asarray(a).view(dtype=np.int16)
+
+    # Can change via the view
+    b[0] = 7
+    assert a._data.tolist()[:2] == [7, 0]
+
+    # Can invalidate
+    a.release()
+
+    # But can still write :(
+    if ALLOW_SEGFAULTS:
+        b[0] = 8
+
+
+# %%%%%%%%%%  2. This works
+
+
+# Euhm, I'm starting to feel there is no solution ...
+
+
+# %%%%%%%%%%  3. Buffer mapping API
+
+
+if __name__ == "__main__":
+    run_tests(globals())

From 102b51d80d067f3151eb839e949bd9bfe8989aad Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Mon, 16 Oct 2023 12:11:54 +0200
Subject: [PATCH 02/13] Remove test file again

---
 tests/test_buffer_mapping.py | 234 -----------------------------------
 1 file changed, 234 deletions(-)
 delete mode 100644 tests/test_buffer_mapping.py

diff --git a/tests/test_buffer_mapping.py b/tests/test_buffer_mapping.py
deleted file mode 100644
index 2bc2ac24..00000000
--- a/tests/test_buffer_mapping.py
+++ /dev/null
@@ -1,234 +0,0 @@
-"""This part is somewhat tricky so we dedicate a test module on it.
-
-The tricky part for implementing the buffer data mapping, as specified by
-the WebGPU API, is that its not trivial (at all) to provide an array-like object
-for which numpy views can be created, and which we can invalidate in such
-a way that the views become un-usable.
-
-We spend the first part of this module demonstrating what techniques
-do NOT work. In the second part we demonstrate the working of the used
-technique. In the third part we test the buffer mapping API itself.
-
-"""
-
-import ctypes
-
-import numpy as np
-
-from testutils import run_tests
-import pytest
-
-
-ALLOW_SEGFAULTS = False
-
-
-# %%%%%%%%%%  1. What does NOT work
-
-
-def test_fail_array_interface_with_ctypes_array():
-    # Create an array-like object using __array_interface__,
-    # with a ctypes array to hold the data, via a real address pointer.
-    # We cannot invalidate the base array-like object.
-
-    class ArrayLike:
-        def __init__(self):
-            size = 100
-            offset = 0
-            itemsize = 1
-            self._data = (ctypes.c_uint8 * size)()
-
-            readonly = False
-            typestr = "<b1"
-            ptr = ctypes.addressof(self._data)
-            ptr += offset * itemsize
-
-            self.__array_interface__ = dict(
-                version=3,
-                shape=(size,),
-                typestr=typestr,
-                descr=[("", typestr)],
-                data=(ptr, readonly),
-                strides=None,
-            )
-
-        def __len__(self):
-            shape = self.__array_interface__["shape"]
-            return shape[0] if shape else 0
-
-    a = ArrayLike()
-
-    # Create numpy view
-    b = np.asarray(a)
-    assert b.base is a
-
-    b.dtype = np.int16
-    b.shape = 25, 2
-
-    # Set some data
-    b[:, 1] = 1
-
-    # Yes, this changes the original
-    assert list(a._data)[:8] == [0, 0, 1, 0, 0, 0, 1, 0]
-
-    # Try to invalidate the array
-    a.__array_interface__["shape"] = ()
-    a.__array_interface__["data"] = None
-
-    # Can still set the data
-    b[:, 1] = 2
-
-    # And it has changed the data. If the data would have been really
-    # released (by the process) this'd likely caused a segfault.
-    assert list(a._data)[:8] == [0, 0, 2, 0, 0, 0, 2, 0]
-
-
-def test_fail_but_arrays_can_be_resized_so_how_does_that_work():
-    # Arrays can be resized, so how does that work with views?
-    # Arrays cannot be resized when they have views on them.
-
-    # Create an array
-    a = np.array([1, 2, 3, 4], np.int32)
-
-    # We can resize it
-    a.resize((6,))
-
-    # Create a view on it
-    b = a.view()
-    assert b.base is a
-
-    # Cannot resize now, because it has a view - it checks back references!
-    with pytest.raises(ValueError):
-        a.resize((8,))
-
-    # For context, it is possible to override the behavior, but when
-    # you now try to use the arrays, it will segfault at some point.
-    # a.resize((4, ), refcheck=False)
-
-
-def test_fail_make_array_readonly():
-    # Arrays can be set readonly, how does that affect views on that array?
-    # It does not :)
-
-    # Create an array
-    a = np.array([1, 2, 3, 4], np.int32)
-
-    # And a view on it
-    b = a.view()
-    assert b.base is a
-
-    # Can modify the view
-    a[0] = 11
-    b[1] = 12
-    assert list(a[0:2]) == [11, 12]
-
-    # Now set the base array to readonly
-    a.flags.writeable = False
-
-    # Indeed cannot set array
-    with pytest.raises(ValueError):
-        a[0] = 13
-
-    # But can still set the view!
-    b[1] = 14
-    assert list(a[0:2]) == [11, 14]
-
-
-def test_fail_memoryview_can_be_released():
-    # Memoryviews can be released, how does that affect the views?
-    # It does not really affect them.
-
-    # Create a memoryview
-    a = memoryview((ctypes.c_uint8 * 100)()).cast("B")
-
-    # Create a view on it, via a cast
-    b = a.cast("h")
-
-    # Create another view on it, now a numpy array
-    c = np.frombuffer(a, dtype=np.int16)
-    # assert c.base is a  -- apparently not, but we check in a moment that its actually a view
-
-    # And again, but via a different API
-    d = np.asarray(a)
-    d.dtype = np.int16
-    # assert d.base is a  -- nope
-
-    # Can change the data, so c and d are indeed views
-    a[0] = 1
-    b[1] = 2
-    c[2] = 3
-    d[3] = 4
-    assert a.tolist()[:8] == [1, 0, 2, 0, 3, 0, 4, 0]
-
-    # Now release the memory
-    a.release()
-
-    # It works on the memoryview itself!
-    with pytest.raises(ValueError):
-        a[0] = 11
-
-    if ALLOW_SEGFAULTS:
-        # But not on the memoryview view
-        b[1] = 12
-
-        # And also not on the np array views
-        c[2] = 13
-        d[3] = 14
-
-
-def test_cannot_subclass_memoryview():
-    # So any solutions in that direction can be discarted.
-    with pytest.raises(TypeError):
-
-        class ArrayLike(memoryview):
-            pass
-
-
-def test_fail_memoryview_wrapped_in_array_interface():
-    # An array_interface can either point to a memory address or wrap
-    # a buffer-like object. I'd swear that I had this setup working in
-    # the sense that the views could not write after the memoryview was
-    # released, but I cannot reproduce it, so I must have seen something
-    # else.
-
-    class ArrayLike:
-        def __init__(self, size):
-            self._data = memoryview((ctypes.c_uint8 * size)()).cast("B")
-            self.__array_interface__ = dict(
-                version=3,
-                shape=(size,),
-                typestr="<b1",
-                descr=[("", "<b1")],
-                data=self._data,
-                strides=None,
-            )
-
-        def release(self):
-            self.__array_interface__["data"].release()
-
-    a = ArrayLike(100)
-
-    b = np.asarray(a).view(dtype=np.int16)
-
-    # Can change via the view
-    b[0] = 7
-    assert a._data.tolist()[:2] == [7, 0]
-
-    # Can invalidate
-    a.release()
-
-    # But can still write :(
-    if ALLOW_SEGFAULTS:
-        b[0] = 8
-
-
-# %%%%%%%%%%  2. This works
-
-
-# Euhm, I'm starting to feel there is no solution ...
-
-
-# %%%%%%%%%%  3. Buffer mapping API
-
-
-if __name__ == "__main__":
-    run_tests(globals())

From 0a09ac580f2829f47b4103e2dbd951a6864882d4 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Mon, 16 Oct 2023 14:47:54 +0200
Subject: [PATCH 03/13] New buffer mapping API

---
 codegen/apipatcher.py            |   4 +
 wgpu/backends/rs.py              | 157 ++++++++++++++++---------------
 wgpu/base.py                     | 143 ++++++++++++++++++++--------
 wgpu/resources/codegen_report.md |  10 +-
 4 files changed, 196 insertions(+), 118 deletions(-)

diff --git a/codegen/apipatcher.py b/codegen/apipatcher.py
index b376ee27..115b8462 100644
--- a/codegen/apipatcher.py
+++ b/codegen/apipatcher.py
@@ -303,6 +303,8 @@ def get_method_def(self, classname, methodname):
         name_idl = self.name2idl(methodname)
         if methodname.endswith("_async") and name_idl not in functions:
             name_idl = self.name2idl(methodname.replace("_async", ""))
+        elif name_idl not in functions and name_idl + "Async" in functions:
+            name_idl += "Async"
         idl_line = functions[name_idl]
 
         # Construct preamble
@@ -369,6 +371,8 @@ def method_is_known(self, classname, methodname):
         name_idl = self.name2idl(methodname)
         if "_async" in methodname and name_idl not in functions:
             name_idl = self.name2idl(methodname.replace("_async", ""))
+        elif name_idl not in functions and name_idl + "Async" in functions:
+            name_idl += "Async"
         return name_idl if name_idl in functions else None
 
     def get_class_names(self):
diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py
index 8ad0e39a..3784b241 100644
--- a/wgpu/backends/rs.py
+++ b/wgpu/backends/rs.py
@@ -810,31 +810,7 @@ def create_buffer(
         usage: "flags.BufferUsage",
         mapped_at_creation: bool = False,
     ):
-        size = int(size)
-        if mapped_at_creation:
-            raise ValueError(
-                "In wgpu-py, mapped_at_creation must be False. Use create_buffer_with_data() instead."
-            )
-        return self._create_buffer(label, size, usage, False)
-
-    def create_buffer_with_data(self, *, label="", data, usage: "flags.BufferUsage"):
-        # Get a memoryview of the data
-        m, src_address = get_memoryview_and_address(data)
-        m = m.cast("B", shape=(m.nbytes,))
-        size = m.nbytes
-
-        # Create the buffer (usage does not have to be MAP_READ or MAP_WRITE)
-        buffer = self._create_buffer(label, size, usage, True)
-
-        # Copy the data to the mapped memory
-        # H: void * f(WGPUBuffer buffer, size_t offset, size_t size)
-        dst_ptr = libf.wgpuBufferGetMappedRange(buffer._internal, 0, size)
-        dst_address = int(ffi.cast("intptr_t", dst_ptr))
-        dst_m = get_memoryview_from_address(dst_address, size)
-        dst_m[:] = m  # nicer than ctypes.memmove(dst_address, src_address, m.nbytes)
-
-        buffer._unmap()
-        return buffer
+        return self._create_buffer(label, int(size), usage, bool(mapped_at_creation))
 
     def _create_buffer(self, label, size, usage, mapped_at_creation):
         # Create a buffer object
@@ -852,7 +828,11 @@ def _create_buffer(self, label, size, usage, mapped_at_creation):
         # Note that there is wgpuBufferGetSize and wgpuBufferGetUsage,
         # but we already know these, so they are kindof useless?
         # Return wrapped buffer
-        return GPUBuffer(label, id, self, size, usage)
+        b = GPUBuffer(label, id, self, size, usage)
+
+        if mapped_at_creation:
+            b._map_state = enums.BufferMapState.mapped
+        return b
 
     def create_texture(
         self,
@@ -1519,12 +1499,30 @@ def _destroy(self):
 
 
 class GPUBuffer(base.GPUBuffer, GPUObjectBase):
-    def map_read(self):
-        size = self.size
-
-        # Prepare
-        status = 99
-        data = memoryview((ctypes.c_uint8 * size)()).cast("B")
+    def _check_range(self, offset, size):
+        offset = 0 if offset is None else int(offset)
+        size = (self.size - offset) if size is None else int(size)
+        if offset < 0:
+            raise ValueError("Mapped offset must not be smaller than zero.")
+        if size < 1:
+            raise ValueError("Mapped size must be larger than zero.")
+        if offset + size >= self.size:
+            raise ValueError("Mapped range must not extend beyond total buffer size.")
+
+    def map(self, mode, offset=0, size=None):
+        # Check mode
+        mode = mode.upper() if isinstance(mode, str) else mode
+        if mode in (enums.MapMode.READ, "READ"):
+            map_mode = lib.WGPUMapMode_Read
+        elif mode in (enums.MapMode.WRITE, "WRITE"):
+            map_mode = lib.WGPUMapMode_Write
+
+        # Check offset and size
+        offset, size = self._check_range(offset, size)
+
+        # Can we even map?
+        if self._map_state != enums.BufferMapState.unmapped:
+            raise RuntimeError("Can only map a buffer if its currently unmapped.")
 
         @ffi.callback("void(WGPUBufferMapAsyncStatus, void*)")
         def callback(status_, user_data_p):
@@ -1535,76 +1533,87 @@ def callback(status_, user_data_p):
         self._map_state = enums.BufferMapState.pending
         # H: void f(WGPUBuffer buffer, WGPUMapModeFlags mode, size_t offset, size_t size, WGPUBufferMapCallback callback, void * userdata)
         libf.wgpuBufferMapAsync(
-            self._internal, lib.WGPUMapMode_Read, 0, size, callback, ffi.NULL
+            self._internal, map_mode, offset, size, callback, ffi.NULL
         )
 
         # Let it do some cycles
-        # H: void f(WGPUInstance instance)
-        # libf.wgpuInstanceProcessEvents(get_wgpu_instance())
         # H: bool f(WGPUDevice device, bool wait, WGPUWrappedSubmissionIndex const * wrappedSubmissionIndex)
         libf.wgpuDevicePoll(self._device._internal, True, ffi.NULL)
 
         if status != 0:  # no-cover
-            raise RuntimeError(f"Could not read buffer data ({status}).")
+            raise RuntimeError(f"Could not map buffer ({status}).")
         self._map_state = enums.BufferMapState.mapped
+        self._mapped_range = offset, offset + size
 
-        # Copy data
+    async def map_async(self, mode, offset=0, size=None):
+        return self.map()  # for now
+
+    def unmap(self):
+        # Can we even unmap?
+        if self._map_state != enums.BufferMapState.mapped:
+            raise RuntimeError("Can only unmap a buffer if its currently mapped.")
+        # H: void f(WGPUBuffer buffer)
+        libf.wgpuBufferUnmap(self._internal)
+        self._map_state = enums.BufferMapState.unmapped
+        self._mapped_range = None
+
+    def read_mapped(self, offset=0, size=None):
+        # Can we even read?
+        if self._map_state != enums.BufferMapState.mapped:
+            raise RuntimeError("Can only read from a buffer if its mapped.")
+
+        # Check offset and size
+        offset, size = self._check_range(offset, size)
+        if offset < self._mapped_range[0] or (offset + size) > self._mapped_range[1]:
+            raise ValueError(
+                "The range for buffer reading is not contained in the currently mapped range."
+            )
+
+        # Get mapped memoryview.
         # H: void * f(WGPUBuffer buffer, size_t offset, size_t size)
         src_ptr = libf.wgpuBufferGetMappedRange(self._internal, 0, size)
         src_address = int(ffi.cast("intptr_t", src_ptr))
         src_m = get_memoryview_from_address(src_address, size)
+
+        # Copy the data. The memoryview created above becomes invalid when the buffer
+        # is unmapped, so we don't want to pass that memory to the user.
+        data = memoryview((ctypes.c_uint8 * size)()).cast("B")
         data[:] = src_m
 
-        self._unmap()
         return data
 
-    def map_write(self, data):
-        size = self.size
+    def write_mapped(self, data, offset=0, size=None):
+        # Can we even write?
+        if self._map_state != enums.BufferMapState.mapped:
+            raise RuntimeError("Can only write to a buffer if its mapped.")
 
+        # Cast data to a memoryview. This also works for e.g. numpy arrays,
+        # and the resulting memoryview will be a view on the data.
         data = memoryview(data).cast("B")
-        if data.nbytes != self.size:  # pragma: no cover
+
+        # Check offset and size
+        if size is None:
+            size = data.nbytes
+        offset, size = self._check_range(offset, size)
+        if offset < self._mapped_range[0] or (offset + size) > self._mapped_range[1]:
             raise ValueError(
-                "Data passed to map_write() does not have the correct size."
+                "The range for buffer writing is not contained in the currently mapped range."
             )
 
-        # Prepare
-        status = 99
-
-        @ffi.callback("void(WGPUBufferMapAsyncStatus, void*)")
-        def callback(status_, user_data_p):
-            nonlocal status
-            status = status_
-
-        # Map it
-        self._map_state = enums.BufferMapState.pending
-        # H: void f(WGPUBuffer buffer, WGPUMapModeFlags mode, size_t offset, size_t size, WGPUBufferMapCallback callback, void * userdata)
-        libf.wgpuBufferMapAsync(
-            self._internal, lib.WGPUMapMode_Write, 0, size, callback, ffi.NULL
-        )
-
-        # Let it do some cycles
-        # H: void f(WGPUInstance instance)
-        # libf.wgpuInstanceProcessEvents(get_wgpu_instance())
-        # H: bool f(WGPUDevice device, bool wait, WGPUWrappedSubmissionIndex const * wrappedSubmissionIndex)
-        libf.wgpuDevicePoll(self._device._internal, True, ffi.NULL)
-
-        if status != 0:  # no-cover
-            raise RuntimeError(f"Could not read buffer data ({status}).")
-        self._map_state = enums.BufferMapState.mapped
+        # Check data size and given size. If the latter was given, it should match!
+        if data.nbytes != size:  # pragma: no cover
+            raise ValueError(
+                "Data passed to GPUBuffer.write_mapped() does not match the given size."
+            )
 
-        # Copy data
+        # Get mapped memoryview
         # H: void * f(WGPUBuffer buffer, size_t offset, size_t size)
         src_ptr = libf.wgpuBufferGetMappedRange(self._internal, 0, size)
         src_address = int(ffi.cast("intptr_t", src_ptr))
         src_m = get_memoryview_from_address(src_address, size)
-        src_m[:] = data
 
-        self._unmap()
-
-    def _unmap(self):
-        # H: void f(WGPUBuffer buffer)
-        libf.wgpuBufferUnmap(self._internal)
-        self._map_state = enums.BufferMapState.unmapped
+        # Copy data
+        src_m[:] = data
 
     def destroy(self):
         self._destroy()  # no-cover
diff --git a/wgpu/base.py b/wgpu/base.py
index 7896a22b..56e95e43 100644
--- a/wgpu/base.py
+++ b/wgpu/base.py
@@ -445,23 +445,36 @@ def create_buffer(
             label (str): A human readable label. Optional.
             size (int): The size of the buffer in bytes.
             usage (flags.BufferUsage): The ways in which this buffer will be used.
-            mapped_at_creation (bool): Must be False, use create_buffer_with_data() instead.
+            mapped_at_creation (bool): Whether the buffer is initially mapped.
         """
         raise NotImplementedError()
 
-    @apidiff.add("replaces WebGPU's mapping API")
+    @apidiff.add("Convenience function")
     def create_buffer_with_data(self, *, label="", data, usage: "flags.BufferUsage"):
         """Create a `GPUBuffer` object initialized with the given data.
 
+        This is a convenience function that creates a mapped buffer,
+        writes the given data to it, and then unmaps the buffer.
+
         Arguments:
             label (str): A human readable label. Optional.
             data: Any object supporting the Python buffer protocol (this
                 includes bytes, bytearray, ctypes arrays, numpy arrays, etc.).
             usage (flags.BufferUsage): The ways in which this buffer will be used.
 
-        Also see `GPUQueue.write_buffer()` and `GPUQueue.read_buffer()`.
+        Also see `GPUBuffer.write_mapped()` and `GPUQueue.write_buffer()`.
         """
-        raise NotImplementedError()
+        # This function was originally created to support the workflow
+        # of initializing a buffer with data when we did not support
+        # buffer mapping. Now that we do have buffer mapping it is not
+        # strictly necessary, but it's still quite useful and feels
+        # more Pythonic than having to write the boilerplate code below.
+        buf = self.create_buffer(
+            label=label, size=size, usage=usage, mapped_at_creation=True
+        )
+        buf.write_mapped(data)
+        buf.unmap()
+        return buf
 
     # IDL: GPUTexture createTexture(GPUTextureDescriptor descriptor);
     def create_texture(
@@ -904,11 +917,9 @@ class GPUBuffer(GPUObjectBase):
     the buffer, subject to alignment restrictions depending on the
     operation.
 
-    Create a buffer using `GPUDevice.create_buffer()`,
-    `GPUDevice.create_buffer_mapped()` or `GPUDevice.create_buffer_mapped_async()`.
+    Create a buffer using `GPUDevice.create_buffer()`.
 
-    One can sync data in a buffer by mapping it (or by creating a mapped
-    buffer) and then setting/getting the values in the mapped memoryview.
+    One can sync data in a buffer by mapping it and then getting and setting data.
     Alternatively, one can tell the GPU (via the command encoder) to
     copy data between buffers and textures.
     """
@@ -948,36 +959,78 @@ def map_state(self):
     # This means that the mapped memory is reclaimed (i.e. invalid)
     # when unmap is called, and that whatever object we expose the
     # memory with to the user, must be set to a state where it can no
-    # longer be used. I currently can't think of a good way to do this.
+    # longer be used. There does not seem to be a good way to do this.
     #
-    # So instead, we can use mapping internally to allow reading and
-    # writing but not expose it via the public API. The only
-    # disadvantage (AFAIK) is that there could be use-cases where a
-    # memory copy could be avoided when using mapping.
+    # In our Python API we do make use of the same map/unmap mechanism,
+    # but reading and writing data goes via method calls instead of via
+    # an array-like object that exposes the shared memory.
 
-    # IDL: undefined destroy();
-    def destroy(self):
-        """An application that no longer requires a buffer can choose
-        to destroy it. Note that this is automatically called when the
-        Python object is cleaned up by the garbadge collector.
+    # IDL: Promise<undefined> mapAsync(GPUMapModeFlags mode, optional GPUSize64 offset = 0, optional GPUSize64 size);
+    def map(self, mode, offset=0, size=None):
+        """Maps the given range of the GPUBuffer.
+
+        When this call returns, the buffer content is ready to be
+        accessed with ``read_mapped`` or ``write_mapped``. Don't forget
+        to ``unmap()`` when done.
+
+        Arguments:
+            mode (enum): The mapping mode, either mapmode.READ or mapmode.WRITE.
+            offset (str): the buffer offset in bytes. Default 0.
+            size (int): the size to read. Default until the end.
         """
         raise NotImplementedError()
 
-    @apidiff.add("Alternative to mapping API")
-    def map_read(self):
-        """Map the buffer and read the data from it, then unmap.
-        Return a memoryview object. Requires the buffer usage to include MAP_READ.
+    # IDL: Promise<undefined> mapAsync(GPUMapModeFlags mode, optional GPUSize64 offset = 0, optional GPUSize64 size);
+    async def map_async(self, mode, offset=0, size=None):
+        """Alternative version of map()."""
+        raise NotImplementedError()
+
+    # IDL: undefined unmap();
+    def unmap(self):
+        """Unmaps the buffer.
 
-        See `GPUQueue.read_buffer()` for a simpler alternative.
+        Unmaps the mapped range of the GPUBuffer and makes it’s contents
+        available for use by the GPU again.
         """
         raise NotImplementedError()
 
-    @apidiff.add("Alternative to mapping API")
-    def map_write(self, data):
-        """Map the buffer and write the data to it, then unmap.
-        Return a memoryview object. Requires the buffer usage to include MAP_WRITE.
+    @apidiff.add("Replacement for get_mapped_range")
+    def read_mapped(self, offset=0, size=None):
+        """Read mapped buffer data.
+
+        This method must only be called when the buffer is in a mapped state.
+        This is the Python alternative to WebGPU's ``getMappedRange``.
+        Returns a memoryview that is a copy of the mapped data (it won't
+        become invalid when the buffer is ummapped).
 
-        See `WGPUQueue.write_buffer()` for a simpler alternative.
+        Arguments:
+            offset (str): the buffer offset in bytes. Default 0. Must be at least
+                as large as the offset specified in ``map()``.
+            size (int): the size to read. Default until the end. The resuling range
+                must fit into the range specified in ``map()``.
+
+        Also see `GPUBuffer.write_mapped, `GPUQueue.read_buffer()` and `GPUQueue.write_buffer()`.
+        """
+        raise NotImplementedError()
+
+    @apidiff.add("Replacement for get_mapped_range")
+    def write_mapped(self, data, offset=0, size=None):
+        """Read mapped buffer data.
+
+        This method must only be called when the buffer is in a mapped state.
+        This is the Python alternative to WebGPU's ``getMappedRange``.
+        Since the data can also be a view into a larger array, this method
+        allows updating the buffer with minimal data copying.
+
+        Arguments:
+            data (buffer-like): The data to write to the buffer, in the form of
+                e.g. a bytes object, memoryview, or numpy array.
+            offset (str): the buffer offset in bytes. Default 0. Must be at least
+                as large as the offset specified in ``map()``.
+            size (int): the size to read. Default until the end. The resuling range
+                must fit into the range specified in ``map()``.
+
+        Also see `GPUBuffer.read_mapped, `GPUQueue.read_buffer()` and `GPUQueue.write_buffer()`.
         """
         raise NotImplementedError()
 
@@ -986,14 +1039,26 @@ def map_write(self, data):
     def get_mapped_range(self, offset=0, size=None):
         raise NotImplementedError("The Python API differs from WebGPU here")
 
-    # IDL: undefined unmap();
-    @apidiff.hide
-    def unmap(self):
-        raise NotImplementedError("The Python API differs from WebGPU here")
+    @apidiff.add("Deprecated but still here to raise a warning")
+    def map_read(self, offset=None, size=None, iter=None):
+        """Deprecated."""
+        raise DeprecationWarning(
+            "map_read() is deprecated, use map() and read_mapped() instead."
+        )
 
-    # IDL: Promise<undefined> mapAsync(GPUMapModeFlags mode, optional GPUSize64 offset = 0, optional GPUSize64 size);
-    @apidiff.hide
-    async def map_async(self, mode, offset=0, size=None):
+    @apidiff.add("Deprecated but still here to raise a warning")
+    def map_write(self, data):
+        """Deprecated."""
+        raise DeprecationWarning(
+            "map_read() is deprecated, use map() and write_mapped() instead."
+        )
+
+    # IDL: undefined destroy();
+    def destroy(self):
+        """An application that no longer requires a buffer can choose
+        to destroy it. Note that this is automatically called when the
+        Python object is cleaned up by the garbadge collector.
+        """
         raise NotImplementedError()
 
 
@@ -1117,7 +1182,7 @@ def __init__(self, label, internal, device, texture, size):
         self._texture = texture
         self._size = size
 
-    @apidiff.add("Need to know size e.g. for texture view provided by canvas.")
+    @apidiff.add("Need to know size e.g. for texture view provided by canvas")
     @property
     def size(self):
         """The texture size (as a 3-tuple)."""
@@ -1689,12 +1754,12 @@ def write_buffer(self, buffer, buffer_offset, data, data_offset=0, size=None):
         This maps the data to a temporary buffer and then copies that buffer
         to the given buffer. The given buffer's usage must include COPY_DST.
 
-        Also see `GPUDevice.create_buffer_with_data()` and `GPUBuffer.map_write()`.
+        Also see `GPUBuffer.map()`.
 
         """
         raise NotImplementedError()
 
-    @apidiff.add("replaces WebGPU's mapping API")
+    @apidiff.add("For symmetry with queue.write_buffer")
     def read_buffer(self, buffer, buffer_offset=0, size=None):
         """Takes the data contents of the buffer and return them as a memoryview.
 
@@ -1707,7 +1772,7 @@ def read_buffer(self, buffer, buffer_offset=0, size=None):
         and then maps that buffer to read the data. The given buffer's
         usage must include COPY_SRC.
 
-        Also see `GPUBuffer.map_read()`.
+        Also see `GPUBuffer.map()`.
         """
         raise NotImplementedError()
 
diff --git a/wgpu/resources/codegen_report.md b/wgpu/resources/codegen_report.md
index 01516948..c8f76fbe 100644
--- a/wgpu/resources/codegen_report.md
+++ b/wgpu/resources/codegen_report.md
@@ -12,14 +12,14 @@
 * Diffs for GPU: add print_report, change get_preferred_canvas_format, change request_adapter, change request_adapter_async
 * Diffs for GPUCanvasContext: add get_preferred_format, add present
 * Diffs for GPUDevice: add adapter, add create_buffer_with_data, hide import_external_texture, hide pop_error_scope, hide push_error_scope
-* Diffs for GPUBuffer: add map_read, add map_write, hide get_mapped_range, hide map_async, hide unmap
+* Diffs for GPUBuffer: add map_read, add map_write, add read_mapped, add write_mapped, hide get_mapped_range
 * Diffs for GPUTexture: add size
 * Diffs for GPUTextureView: add size, add texture
 * Diffs for GPUQueue: add read_buffer, add read_texture, hide copy_external_image_to_texture
-* Validated 39 classes, 111 methods, 44 properties
+* Validated 39 classes, 114 methods, 44 properties
 ### Patching API for backends/rs.py
 * Diffs for GPUAdapter: add request_device_tracing
-* Validated 39 classes, 99 methods, 0 properties
+* Validated 39 classes, 101 methods, 0 properties
 ## Validating rs.py
 * Enum field TextureFormat.rgb10a2uint missing in wgpu.h
 * Enum PipelineErrorReason missing in wgpu.h
@@ -28,6 +28,6 @@
 * Enum CanvasAlphaMode missing in wgpu.h
 * Enum field DeviceLostReason.unknown missing in wgpu.h
 * Wrote 232 enum mappings and 47 struct-field mappings to rs_mappings.py
-* Validated 91 C function calls
-* Not using 115 C functions
+* Validated 86 C function calls
+* Not using 116 C functions
 * Validated 72 C structs

From 42c765d6a829a6d28e9c652d0f6b90d84bf241b0 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Mon, 16 Oct 2023 17:32:46 +0200
Subject: [PATCH 04/13] Allow read_mapped to return mapped data without making
 a copy

---
 wgpu/backends/rs.py | 38 +++++++++++++++++++++++++++++---------
 wgpu/base.py        | 16 +++++++++++++++-
 2 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py
index 3784b241..39d42309 100644
--- a/wgpu/backends/rs.py
+++ b/wgpu/backends/rs.py
@@ -1499,6 +1499,14 @@ def _destroy(self):
 
 
 class GPUBuffer(base.GPUBuffer, GPUObjectBase):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self._mapped_range = 0, 0
+        self._mapped_memoryviews = []
+        if self._map_state == enums.BufferMapState.mapped:
+            self._mapped_range = 0, self.size
+
     def _check_range(self, offset, size):
         offset = 0 if offset is None else int(offset)
         size = (self.size - offset) if size is None else int(size)
@@ -1544,6 +1552,7 @@ def callback(status_, user_data_p):
             raise RuntimeError(f"Could not map buffer ({status}).")
         self._map_state = enums.BufferMapState.mapped
         self._mapped_range = offset, offset + size
+        self._mapped_memoryviews = []
 
     async def map_async(self, mode, offset=0, size=None):
         return self.map()  # for now
@@ -1555,9 +1564,15 @@ def unmap(self):
         # H: void f(WGPUBuffer buffer)
         libf.wgpuBufferUnmap(self._internal)
         self._map_state = enums.BufferMapState.unmapped
-        self._mapped_range = None
-
-    def read_mapped(self, offset=0, size=None):
+        self._mapped_range = 0, 0
+        mapped_memoryviews = self._mapped_memoryviews
+        self._mapped_memoryviews = []
+        # Release the mapped memoryview objects. These objects
+        # themselves become unusable, but any views on them do not.
+        for m in mapped_memoryviews:
+            m.release()
+
+    def read_mapped(self, offset=0, size=None, *, copy=True):
         # Can we even read?
         if self._map_state != enums.BufferMapState.mapped:
             raise RuntimeError("Can only read from a buffer if its mapped.")
@@ -1575,12 +1590,17 @@ def read_mapped(self, offset=0, size=None):
         src_address = int(ffi.cast("intptr_t", src_ptr))
         src_m = get_memoryview_from_address(src_address, size)
 
-        # Copy the data. The memoryview created above becomes invalid when the buffer
-        # is unmapped, so we don't want to pass that memory to the user.
-        data = memoryview((ctypes.c_uint8 * size)()).cast("B")
-        data[:] = src_m
-
-        return data
+        if copy:
+            # Copy the data. The memoryview created above becomes invalid when the buffer
+            # is unmapped, so we don't want to pass that memory to the user.
+            data = memoryview((ctypes.c_uint8 * size)()).cast("B")
+            data[:] = src_m
+            return data
+        else:
+            # Return view on the actually mapped data
+            self._mapped_memoryviews.append(src_m)
+            return src_m
+            # todo: cast to B?
 
     def write_mapped(self, data, offset=0, size=None):
         # Can we even write?
diff --git a/wgpu/base.py b/wgpu/base.py
index 56e95e43..0bf636fd 100644
--- a/wgpu/base.py
+++ b/wgpu/base.py
@@ -469,6 +469,12 @@ def create_buffer_with_data(self, *, label="", data, usage: "flags.BufferUsage")
         # buffer mapping. Now that we do have buffer mapping it is not
         # strictly necessary, but it's still quite useful and feels
         # more Pythonic than having to write the boilerplate code below.
+
+        # Create a view of known type
+        data = memoryview(data).cast("B")
+        size = data.nbytes
+
+        # Create the buffer and write data
         buf = self.create_buffer(
             label=label, size=size, usage=usage, mapped_at_creation=True
         )
@@ -995,7 +1001,7 @@ def unmap(self):
         raise NotImplementedError()
 
     @apidiff.add("Replacement for get_mapped_range")
-    def read_mapped(self, offset=0, size=None):
+    def read_mapped(self, offset=0, size=None, *, copy=True):
         """Read mapped buffer data.
 
         This method must only be called when the buffer is in a mapped state.
@@ -1008,6 +1014,14 @@ def read_mapped(self, offset=0, size=None):
                 as large as the offset specified in ``map()``.
             size (int): the size to read. Default until the end. The resuling range
                 must fit into the range specified in ``map()``.
+            copy (boool): whether a copy of the data is given. Default True.
+                If False, the returned memoryview represents the mapped data
+                directly, and is released when the buffer is unmapped.
+                WARNING: views of the returned data (e.g. memoryview objects or
+                numpy arrays) can still be used after the base memory is released,
+                which can result in corrupted data and segfaults. Therefore, when
+                setting copy to False, make *very* sure the memory is not accessed
+                after the buffer is unmapped.
 
         Also see `GPUBuffer.write_mapped, `GPUQueue.read_buffer()` and `GPUQueue.write_buffer()`.
         """

From 9f1cde894a4eb6a3584bed3e3092289f5507e4a1 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 17 Oct 2023 00:04:21 +0200
Subject: [PATCH 05/13] also release memviews on buffer destroy

---
 wgpu/backends/rs.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py
index 39d42309..676065d8 100644
--- a/wgpu/backends/rs.py
+++ b/wgpu/backends/rs.py
@@ -1565,12 +1565,17 @@ def unmap(self):
         libf.wgpuBufferUnmap(self._internal)
         self._map_state = enums.BufferMapState.unmapped
         self._mapped_range = 0, 0
-        mapped_memoryviews = self._mapped_memoryviews
-        self._mapped_memoryviews = []
+        self._release_memoryviews()
+
+    def _release_memoryviews(self):
         # Release the mapped memoryview objects. These objects
         # themselves become unusable, but any views on them do not.
-        for m in mapped_memoryviews:
-            m.release()
+        for m in self._mapped_memoryviews:
+            try:
+                m.release()
+            except Exception:
+                pass
+        self._mapped_memoryviews = []
 
     def read_mapped(self, offset=0, size=None, *, copy=True):
         # Can we even read?
@@ -1639,6 +1644,7 @@ def destroy(self):
         self._destroy()  # no-cover
 
     def _destroy(self):
+        self._release_memoryviews()
         if self._internal is not None and lib is not None:
             self._internal, internal = None, self._internal
             # H: void f(WGPUBuffer buffer)

From f9da63923e46587e41318d37d633d54a053c696b Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 17 Oct 2023 00:24:19 +0200
Subject: [PATCH 06/13] Fix current tests

---
 tests/test_rs_basics.py | 15 ++++++---------
 wgpu/backends/rs.py     | 27 ++++++++++++++++-----------
 wgpu/base.py            |  4 ++--
 3 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/tests/test_rs_basics.py b/tests/test_rs_basics.py
index e60a04fe..a903a5bf 100644
--- a/tests/test_rs_basics.py
+++ b/tests/test_rs_basics.py
@@ -216,12 +216,6 @@ def test_buffer_init3():
     device = wgpu.utils.get_default_device()
     data1 = b"abcdefghijkl"
 
-    # First fail
-    with raises(ValueError):
-        device.create_buffer(
-            mapped_at_creation=True, size=len(data1), usage=wgpu.BufferUsage.COPY_DST
-        )
-
     # Create buffer
     buf = device.create_buffer(
         size=len(data1), usage=wgpu.BufferUsage.COPY_DST | wgpu.BufferUsage.COPY_SRC
@@ -556,7 +550,9 @@ def test_buffer_map_read_and_write():
 
     # Upload
     data1 = b"abcdefghijkl"
-    buf1.map_write(data1)
+    buf1.map("write")
+    buf1.write_mapped(data1)
+    buf1.unmap()
 
     # Copy
     command_encoder = device.create_command_encoder()
@@ -564,8 +560,9 @@ def test_buffer_map_read_and_write():
     device.queue.submit([command_encoder.finish()])
 
     # Download
-    data2 = buf2.map_read()
-
+    buf2.map("read")
+    data2 = buf2.read_mapped()
+    buf2.unmap()
     assert data1 == data2
 
 
diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py
index 676065d8..d3f1a54b 100644
--- a/wgpu/backends/rs.py
+++ b/wgpu/backends/rs.py
@@ -823,16 +823,17 @@ def _create_buffer(self, label, size, usage, mapped_at_creation):
             mappedAtCreation=mapped_at_creation,
             # not used: nextInChain
         )
+        map_state = (
+            enums.BufferMapState.mapped
+            if mapped_at_creation
+            else enums.BufferMapState.unmapped
+        )
         # H: WGPUBuffer f(WGPUDevice device, WGPUBufferDescriptor const * descriptor)
         id = libf.wgpuDeviceCreateBuffer(self._internal, struct)
         # Note that there is wgpuBufferGetSize and wgpuBufferGetUsage,
         # but we already know these, so they are kindof useless?
         # Return wrapped buffer
-        b = GPUBuffer(label, id, self, size, usage)
-
-        if mapped_at_creation:
-            b._map_state = enums.BufferMapState.mapped
-        return b
+        return GPUBuffer(label, id, self, size, usage, map_state)
 
     def create_texture(
         self,
@@ -1514,15 +1515,16 @@ def _check_range(self, offset, size):
             raise ValueError("Mapped offset must not be smaller than zero.")
         if size < 1:
             raise ValueError("Mapped size must be larger than zero.")
-        if offset + size >= self.size:
+        if offset + size > self.size:
             raise ValueError("Mapped range must not extend beyond total buffer size.")
+        return offset, size
 
     def map(self, mode, offset=0, size=None):
         # Check mode
         mode = mode.upper() if isinstance(mode, str) else mode
-        if mode in (enums.MapMode.READ, "READ"):
+        if mode in (flags.MapMode.READ, "READ"):
             map_mode = lib.WGPUMapMode_Read
-        elif mode in (enums.MapMode.WRITE, "WRITE"):
+        elif mode in (flags.MapMode.WRITE, "WRITE"):
             map_mode = lib.WGPUMapMode_Write
 
         # Check offset and size
@@ -1532,6 +1534,8 @@ def map(self, mode, offset=0, size=None):
         if self._map_state != enums.BufferMapState.unmapped:
             raise RuntimeError("Can only map a buffer if its currently unmapped.")
 
+        status = 999
+
         @ffi.callback("void(WGPUBufferMapAsyncStatus, void*)")
         def callback(status_, user_data_p):
             nonlocal status
@@ -1605,7 +1609,6 @@ def read_mapped(self, offset=0, size=None, *, copy=True):
             # Return view on the actually mapped data
             self._mapped_memoryviews.append(src_m)
             return src_m
-            # todo: cast to B?
 
     def write_mapped(self, data, offset=0, size=None):
         # Can we even write?
@@ -2489,7 +2492,8 @@ def read_buffer(self, buffer, buffer_offset=0, size=None):
         self.submit([command_buffer])
 
         # Download from mappable buffer
-        data = tmp_buffer.map_read()
+        tmp_buffer.map(flags.MapMode.READ)
+        data = tmp_buffer.read_mapped()
         tmp_buffer.destroy()
 
         return data
@@ -2586,7 +2590,8 @@ def read_texture(self, source, data_layout, size):
         self.submit([command_buffer])
 
         # Download from mappable buffer
-        data = tmp_buffer.map_read()
+        tmp_buffer.map(flags.MapMode.READ)
+        data = tmp_buffer.read_mapped()
         tmp_buffer.destroy()
 
         # Fix data strides if necessary
diff --git a/wgpu/base.py b/wgpu/base.py
index 0bf636fd..725cbd8b 100644
--- a/wgpu/base.py
+++ b/wgpu/base.py
@@ -930,11 +930,11 @@ class GPUBuffer(GPUObjectBase):
     copy data between buffers and textures.
     """
 
-    def __init__(self, label, internal, device, size, usage):
+    def __init__(self, label, internal, device, size, usage, map_state):
         super().__init__(label, internal, device)
         self._size = size
         self._usage = usage
-        self._map_state = enums.BufferMapState.unmapped
+        self._map_state = map_state
 
     # IDL: readonly attribute GPUSize64Out size;
     @property

From c02d98295280e82e79976316a88dc9312e7a4892 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 17 Oct 2023 11:48:20 +0200
Subject: [PATCH 07/13] more buffer tests, fix buffer read issue

---
 tests/test_rs_basics.py          | 128 +++++++++++++++++++++++++------
 wgpu/backends/rs.py              |  65 ++++++++++------
 wgpu/base.py                     |  12 +--
 wgpu/resources/codegen_report.md |   2 +-
 4 files changed, 153 insertions(+), 54 deletions(-)

diff --git a/tests/test_rs_basics.py b/tests/test_rs_basics.py
index a903a5bf..44442d58 100644
--- a/tests/test_rs_basics.py
+++ b/tests/test_rs_basics.py
@@ -14,6 +14,9 @@
 from pytest import mark, raises
 
 
+is_win = sys.platform.startswith("win")
+
+
 def test_get_wgpu_version():
     version = wgpu.backends.rs.__version__
     commit_sha = wgpu.backends.rs.__commit_sha__
@@ -145,9 +148,7 @@ def test_rs_tracer():
 
 
 @mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib")
-@mark.skipif(
-    is_ci and sys.platform == "win32", reason="Cannot use SpirV shader on dx12"
-)
+@mark.skipif(is_ci and is_win, reason="Cannot use SpirV shader on dx12")
 def test_shader_module_creation_spirv():
     device = wgpu.utils.get_default_device()
 
@@ -181,41 +182,78 @@ def test_buffer_init1():
     device = wgpu.utils.get_default_device()
     data1 = b"abcdefghijkl"
 
-    # Create buffer
+    # Create buffer. COPY_SRC is needed to read the buffer via the queue.
     buf = device.create_buffer_with_data(data=data1, usage=wgpu.BufferUsage.COPY_SRC)
 
     # Download from buffer to CPU
     data2 = device.queue.read_buffer(buf)
     assert data1 == data2
 
+    # ---  also read via mapped data
 
-# @mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib")
-# def test_buffer_init2():
-#     # Initializing a buffer as mapped, to directly set the data
-#
-#     device = wgpu.utils.get_default_device()
-#     data1 = b"abcdefghijkl"
-#
-#     # Create buffer
-#     buf, data2 = device.create_buffer_mapped(
-#         size=len(data1), usage=wgpu.BufferUsage.MAP_READ
-#     )
-#     data2[:] = data1
-#     buf.unmap()
-#
-#     # Download from buffer to CPU
-#     data3 = buf.map(wgpu.MapMode.READ).tobytes()
-#     buf.unmap()
-#     assert data1 == data3
+    # Create buffer. MAP_READ is needed to read the buffer via the queue.
+    buf = device.create_buffer_with_data(data=data1, usage=wgpu.BufferUsage.MAP_READ)
+
+    wgpu.backends.rs.libf.wgpuDevicePoll(
+        buf._device._internal, True, wgpu.backends.rs.ffi.NULL
+    )
+
+    # Download from buffer to CPU
+    buf.map(wgpu.MapMode.READ)
+    wgpu.backends.rs.libf.wgpuDevicePoll(
+        buf._device._internal, True, wgpu.backends.rs.ffi.NULL
+    )
+
+    data2 = buf.read_mapped()
+    buf.unmap()
+    print(data2.tobytes())
+    assert data1 == data2
+
+
+@mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib")
+def test_buffer_init2():
+    # Initializing a buffer as mapped, to directly set the data
+
+    device = wgpu.utils.get_default_device()
+    data1 = b"abcdefghijkl"
+
+    # Create buffer.
+    buf = device.create_buffer(
+        size=len(data1), usage=wgpu.BufferUsage.COPY_SRC, mapped_at_creation=True
+    )
+    buf.write_mapped(data1)
+    buf.unmap()
+
+    # Download from buffer to CPU
+    data2 = device.queue.read_buffer(buf)
+    assert data1 == data2
+
+    # --- also read via mapped data
+
+    # Create buffer.
+    buf = device.create_buffer(
+        size=len(data1), usage=wgpu.BufferUsage.MAP_READ, mapped_at_creation=True
+    )
+    buf.write_mapped(data1)
+    buf.unmap()
+
+    # Download from buffer to CPU
+    buf.map("read")
+    data2 = buf.read_mapped()
+    buf.unmap()
+    print(data2.tobytes())
+    assert data1 == data2
 
 
 @mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib")
 def test_buffer_init3():
-    # Initializing an empty buffer, then writing to it
+    # Initializing an empty buffer, then writing to it, then reading back
 
     device = wgpu.utils.get_default_device()
     data1 = b"abcdefghijkl"
 
+    # Option 1: write via queue (i.e. temp buffer), read via queue
+
     # Create buffer
     buf = device.create_buffer(
         size=len(data1), usage=wgpu.BufferUsage.COPY_DST | wgpu.BufferUsage.COPY_SRC
@@ -225,8 +263,48 @@ def test_buffer_init3():
     device.queue.write_buffer(buf, 0, data1)
 
     # Download from buffer to CPU
-    data3 = device.queue.read_buffer(buf)
-    assert data1 == data3
+    data2 = device.queue.read_buffer(buf)
+    assert data1 == data2
+
+    # Option 2: Write via mapped data, read via queue
+
+    # Create buffer
+    buf = device.create_buffer(
+        size=len(data1), usage=wgpu.BufferUsage.MAP_WRITE | wgpu.BufferUsage.COPY_SRC
+    )
+
+    # Write data to it
+    buf.map("write")
+    buf.write_mapped(data1)
+    buf.unmap()
+
+    # Download from buffer to CPU
+    data2 = device.queue.read_buffer(buf)
+    assert data1 == data2
+
+    # Option 3: Write via queue, read via mapped data
+
+    buf = device.create_buffer(
+        size=len(data1), usage=wgpu.BufferUsage.MAP_READ | wgpu.BufferUsage.COPY_DST
+    )
+
+    # Write data to it
+    device.queue.write_buffer(buf, 0, data1)
+
+    # Download from buffer to CPU
+    buf.map("read")
+    data2 = buf.read_mapped()
+    buf.unmap()
+    assert data1 == data2
+
+    # Option 4: Write via mapped data, read via mapped data
+
+    # Not actually an option
+    with raises(wgpu.GPUValidationError):
+        buf = device.create_buffer(
+            size=len(data1),
+            usage=wgpu.BufferUsage.MAP_READ | wgpu.BufferUsage.MAP_WRITE,
+        )
 
 
 @mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib")
diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py
index d3f1a54b..45a0d663 100644
--- a/wgpu/backends/rs.py
+++ b/wgpu/backends/rs.py
@@ -1500,13 +1500,13 @@ def _destroy(self):
 
 
 class GPUBuffer(base.GPUBuffer, GPUObjectBase):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self, label, internal, device, size, usage, map_state):
+        super().__init__(label, internal, device, size, usage, map_state)
 
-        self._mapped_range = 0, 0
+        self._mapped_status = 0, 0, 0
         self._mapped_memoryviews = []
         if self._map_state == enums.BufferMapState.mapped:
-            self._mapped_range = 0, self.size
+            self._mapped_status = 0, self.size, 0
 
     def _check_range(self, offset, size):
         offset = 0 if offset is None else int(offset)
@@ -1520,12 +1520,27 @@ def _check_range(self, offset, size):
         return offset, size
 
     def map(self, mode, offset=0, size=None):
+        sync_on_read = True
+
         # Check mode
-        mode = mode.upper() if isinstance(mode, str) else mode
-        if mode in (flags.MapMode.READ, "READ"):
-            map_mode = lib.WGPUMapMode_Read
-        elif mode in (flags.MapMode.WRITE, "WRITE"):
-            map_mode = lib.WGPUMapMode_Write
+        if isinstance(mode, str):
+            if mode.upper() == "READ_NOSYNC":  # for internal use
+                mode = flags.MapMode.READ
+                sync_on_read = False
+            elif mode.upper() == "READ":
+                mode = flags.MapMode.READ
+            elif mode.upper() == "WRITE":
+                mode = flags.MapMode.WRITE
+            else:
+                raise ValueError("Map mode must be READ or WRITE")
+        if isinstance(mode, int):
+            map_mode = 0
+            if mode & flags.MapMode.READ:
+                map_mode |= lib.WGPUMapMode_Read
+            if mode & flags.MapMode.WRITE:
+                map_mode |= lib.WGPUMapMode_Write
+        else:  # pragma: no cover
+            raise TypeError("Map mode should be flag (int) or str.")
 
         # Check offset and size
         offset, size = self._check_range(offset, size)
@@ -1534,6 +1549,13 @@ def map(self, mode, offset=0, size=None):
         if self._map_state != enums.BufferMapState.unmapped:
             raise RuntimeError("Can only map a buffer if its currently unmapped.")
 
+        # Sync up when reading, otherwise the memory may be all zeros.
+        # See https://github.com/gfx-rs/wgpu-native/issues/305
+        if sync_on_read and map_mode & lib.WGPUMapMode_Read:
+            if self._mapped_status[2] == 0 and self._usage & flags.BufferUsage.MAP_READ:
+                encoder = self._device.create_command_encoder()
+                self._device.queue.submit([encoder.finish()])
+
         status = 999
 
         @ffi.callback("void(WGPUBufferMapAsyncStatus, void*)")
@@ -1555,20 +1577,19 @@ def callback(status_, user_data_p):
         if status != 0:  # no-cover
             raise RuntimeError(f"Could not map buffer ({status}).")
         self._map_state = enums.BufferMapState.mapped
-        self._mapped_range = offset, offset + size
+        self._mapped_status = offset, offset + size, mode
         self._mapped_memoryviews = []
 
     async def map_async(self, mode, offset=0, size=None):
-        return self.map()  # for now
+        return self.map(mode, offset, size)  # for now
 
     def unmap(self):
-        # Can we even unmap?
         if self._map_state != enums.BufferMapState.mapped:
             raise RuntimeError("Can only unmap a buffer if its currently mapped.")
         # H: void f(WGPUBuffer buffer)
         libf.wgpuBufferUnmap(self._internal)
         self._map_state = enums.BufferMapState.unmapped
-        self._mapped_range = 0, 0
+        self._mapped_status = 0, 0, 0
         self._release_memoryviews()
 
     def _release_memoryviews(self):
@@ -1581,14 +1602,14 @@ def _release_memoryviews(self):
                 pass
         self._mapped_memoryviews = []
 
-    def read_mapped(self, offset=0, size=None, *, copy=True):
+    def read_mapped(self, buffer_offset=0, size=None, *, copy=True):
         # Can we even read?
         if self._map_state != enums.BufferMapState.mapped:
             raise RuntimeError("Can only read from a buffer if its mapped.")
 
         # Check offset and size
-        offset, size = self._check_range(offset, size)
-        if offset < self._mapped_range[0] or (offset + size) > self._mapped_range[1]:
+        offset, size = self._check_range(buffer_offset, size)
+        if offset < self._mapped_status[0] or (offset + size) > self._mapped_status[1]:
             raise ValueError(
                 "The range for buffer reading is not contained in the currently mapped range."
             )
@@ -1610,7 +1631,7 @@ def read_mapped(self, offset=0, size=None, *, copy=True):
             self._mapped_memoryviews.append(src_m)
             return src_m
 
-    def write_mapped(self, data, offset=0, size=None):
+    def write_mapped(self, data, buffer_offset=0, size=None):
         # Can we even write?
         if self._map_state != enums.BufferMapState.mapped:
             raise RuntimeError("Can only write to a buffer if its mapped.")
@@ -1622,8 +1643,8 @@ def write_mapped(self, data, offset=0, size=None):
         # Check offset and size
         if size is None:
             size = data.nbytes
-        offset, size = self._check_range(offset, size)
-        if offset < self._mapped_range[0] or (offset + size) > self._mapped_range[1]:
+        offset, size = self._check_range(buffer_offset, size)
+        if offset < self._mapped_status[0] or (offset + size) > self._mapped_status[1]:
             raise ValueError(
                 "The range for buffer writing is not contained in the currently mapped range."
             )
@@ -2466,7 +2487,7 @@ def write_buffer(self, buffer, buffer_offset, data, data_offset=0, size=None):
 
     def read_buffer(self, buffer, buffer_offset=0, size=None):
         # Note that write_buffer probably does a very similar thing
-        # using a temporaty buffer. But write_buffer is official API
+        # using a temporary buffer. But write_buffer is official API
         # so it's a single call, while here we must create the temporary
         # buffer and do the copying ourselves.
 
@@ -2492,7 +2513,7 @@ def read_buffer(self, buffer, buffer_offset=0, size=None):
         self.submit([command_buffer])
 
         # Download from mappable buffer
-        tmp_buffer.map(flags.MapMode.READ)
+        tmp_buffer.map("READ_NOSYNC")
         data = tmp_buffer.read_mapped()
         tmp_buffer.destroy()
 
@@ -2590,7 +2611,7 @@ def read_texture(self, source, data_layout, size):
         self.submit([command_buffer])
 
         # Download from mappable buffer
-        tmp_buffer.map(flags.MapMode.READ)
+        tmp_buffer.map("READ_NOSYNC")
         data = tmp_buffer.read_mapped()
         tmp_buffer.destroy()
 
diff --git a/wgpu/base.py b/wgpu/base.py
index 725cbd8b..9751f15b 100644
--- a/wgpu/base.py
+++ b/wgpu/base.py
@@ -1001,7 +1001,7 @@ def unmap(self):
         raise NotImplementedError()
 
     @apidiff.add("Replacement for get_mapped_range")
-    def read_mapped(self, offset=0, size=None, *, copy=True):
+    def read_mapped(self, buffer_offset=0, size=None, *, copy=True):
         """Read mapped buffer data.
 
         This method must only be called when the buffer is in a mapped state.
@@ -1010,8 +1010,8 @@ def read_mapped(self, offset=0, size=None, *, copy=True):
         become invalid when the buffer is ummapped).
 
         Arguments:
-            offset (str): the buffer offset in bytes. Default 0. Must be at least
-                as large as the offset specified in ``map()``.
+            buffer_offset (int): the buffer offset in bytes. Default 0. Must be at
+                least as large as the offset specified in ``map()``.
             size (int): the size to read. Default until the end. The resuling range
                 must fit into the range specified in ``map()``.
             copy (boool): whether a copy of the data is given. Default True.
@@ -1023,12 +1023,12 @@ def read_mapped(self, offset=0, size=None, *, copy=True):
                 setting copy to False, make *very* sure the memory is not accessed
                 after the buffer is unmapped.
 
-        Also see `GPUBuffer.write_mapped, `GPUQueue.read_buffer()` and `GPUQueue.write_buffer()`.
+        Also see `GPUBuffer.write_mapped()`, `GPUQueue.read_buffer()` and `GPUQueue.write_buffer()`.
         """
         raise NotImplementedError()
 
     @apidiff.add("Replacement for get_mapped_range")
-    def write_mapped(self, data, offset=0, size=None):
+    def write_mapped(self, data, buffer_offset=0, size=None):
         """Read mapped buffer data.
 
         This method must only be called when the buffer is in a mapped state.
@@ -1039,7 +1039,7 @@ def write_mapped(self, data, offset=0, size=None):
         Arguments:
             data (buffer-like): The data to write to the buffer, in the form of
                 e.g. a bytes object, memoryview, or numpy array.
-            offset (str): the buffer offset in bytes. Default 0. Must be at least
+            buffer_offset (int): the buffer offset in bytes. Default 0. Must be at least
                 as large as the offset specified in ``map()``.
             size (int): the size to read. Default until the end. The resuling range
                 must fit into the range specified in ``map()``.
diff --git a/wgpu/resources/codegen_report.md b/wgpu/resources/codegen_report.md
index c8f76fbe..0f442d71 100644
--- a/wgpu/resources/codegen_report.md
+++ b/wgpu/resources/codegen_report.md
@@ -19,7 +19,7 @@
 * Validated 39 classes, 114 methods, 44 properties
 ### Patching API for backends/rs.py
 * Diffs for GPUAdapter: add request_device_tracing
-* Validated 39 classes, 101 methods, 0 properties
+* Validated 39 classes, 103 methods, 0 properties
 ## Validating rs.py
 * Enum field TextureFormat.rgb10a2uint missing in wgpu.h
 * Enum PipelineErrorReason missing in wgpu.h

From 9bc94686eb917fdc6dd7af6cab0b7f31e32d200e Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 17 Oct 2023 12:59:24 +0200
Subject: [PATCH 08/13] More tests and fixes

---
 tests/test_rs_basics.py | 183 +++++++++++++++++++++++++++++++++++++++-
 wgpu/backends/rs.py     |  41 +++++++--
 wgpu/base.py            |  25 +++---
 3 files changed, 229 insertions(+), 20 deletions(-)

diff --git a/tests/test_rs_basics.py b/tests/test_rs_basics.py
index 44442d58..05b6c4ec 100644
--- a/tests/test_rs_basics.py
+++ b/tests/test_rs_basics.py
@@ -307,6 +307,187 @@ def test_buffer_init3():
         )
 
 
+@mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib")
+def test_consequitive_writes1():
+    # The inefficient way
+
+    device = wgpu.utils.get_default_device()
+
+    # Create buffer
+    buf = device.create_buffer(
+        size=32, usage=wgpu.BufferUsage.MAP_WRITE | wgpu.BufferUsage.COPY_SRC
+    )
+
+    # Write in parts
+    for i in range(4):
+        buf.map("write")
+        buf.write_mapped(f"{i+1}".encode() * 8, i * 8)
+        buf.unmap()
+
+    # Download from buffer to CPU
+    data = device.queue.read_buffer(buf)
+    assert data == b"11111111222222223333333344444444"
+
+    # Also in parts
+    for i in range(4):
+        data = device.queue.read_buffer(buf, i * 8, size=8)
+        assert data == f"{i+1}".encode() * 8
+
+
+@mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib")
+def test_consequitive_writes2():
+    # The efficient way
+
+    device = wgpu.utils.get_default_device()
+
+    # Create buffer
+    buf = device.create_buffer(
+        size=32, usage=wgpu.BufferUsage.MAP_WRITE | wgpu.BufferUsage.COPY_SRC
+    )
+
+    # Write in parts
+    buf.map("write")
+    for i in range(4):
+        buf.write_mapped(f"{i+1}".encode() * 8, i * 8)
+    buf.unmap()
+
+    # Download from buffer to CPU
+    data = device.queue.read_buffer(buf)
+    assert data == b"11111111222222223333333344444444"
+
+    # Also in parts
+    for i in range(4):
+        data = device.queue.read_buffer(buf, i * 8, size=8)
+        assert data == f"{i+1}".encode() * 8
+
+
+@mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib")
+def test_consequitive_reads():
+    device = wgpu.utils.get_default_device()
+
+    # Create buffer
+    buf = device.create_buffer(
+        size=32, usage=wgpu.BufferUsage.MAP_READ | wgpu.BufferUsage.COPY_DST
+    )
+
+    # Write using the queue. Do in parts, to touch those offsets too
+    for i in range(4):
+        device.queue.write_buffer(buf, i * 8, f"{i+1}".encode() * 8)
+
+    # Read in parts, the inefficient way
+    for i in range(4):
+        buf.map("read")
+        data = buf.read_mapped(i * 8, 8)
+        assert data == f"{i+1}".encode() * 8
+        buf.unmap()
+
+    # Read in parts, the efficient way
+    buf.map("read")
+    for i in range(4):
+        data = buf.read_mapped(i * 8, 8)
+        assert data == f"{i+1}".encode() * 8
+    buf.unmap()
+
+
+@mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib")
+def test_buffer_mapping_fails():
+    device = wgpu.utils.get_default_device()
+    data = b"12345678"
+
+    # Create buffer
+    buf = device.create_buffer(
+        size=32, usage=wgpu.BufferUsage.MAP_WRITE | wgpu.BufferUsage.COPY_SRC
+    )
+
+    with raises(RuntimeError):
+        buf.write_mapped(data)  # Not mapped
+    with raises(RuntimeError):
+        buf.read_mapped()  # Not mapped
+
+    with raises(ValueError):
+        buf.map("boo")  # Invalid map mode
+
+    buf.map("write", 0, 28)
+
+    with raises(RuntimeError):
+        buf.map("write")  # Cannot map twice
+
+    with raises(RuntimeError):
+        buf.map("read")  # Cannot map twice
+
+    with raises(RuntimeError):
+        buf.read_mapped()  # Not mapped in read mode
+
+    # Ok
+    buf.write_mapped(data)
+    buf.write_mapped(data, 0)
+    buf.write_mapped(data, 8)
+    buf.write_mapped(data, 16)
+
+    # Fail
+    with raises(ValueError):
+        buf.write_mapped(data, -1)  # not neg
+    with raises(ValueError):
+        buf.write_mapped(data, -8)  # not neg
+    with raises(ValueError):
+        buf.write_mapped(data, 6)  # not multilpe of eight
+
+    # Ok
+    buf.write_mapped(b"1" * 4)
+    buf.write_mapped(b"1" * 8)
+    buf.write_mapped(b"1" * 28)
+    buf.write_mapped(b"1" * 12, 0)
+    buf.write_mapped(b"1" * 12, 8)
+
+    with raises(ValueError):
+        buf.write_mapped(b"")  # not empty
+    with raises(ValueError):
+        buf.write_mapped(b"1" * 64)  # too large for buffer
+    with raises(ValueError):
+        buf.write_mapped(b"1" * 32)  # too large for mapped range
+    with raises(ValueError):
+        buf.write_mapped(b"1" * 3)  # not multiple of 4
+    with raises(ValueError):
+        buf.write_mapped(b"1" * 6)  # not multiple of 4
+    with raises(ValueError):
+        buf.write_mapped(b"1" * 9)  # not multiple of 4
+
+    # Can unmap multiple times though!
+    buf.unmap()
+
+    with raises(RuntimeError):
+        buf.unmap()  # Cannot unmap when not mapped
+
+    # Create buffer in read mode ...
+
+    buf = device.create_buffer(
+        size=32, usage=wgpu.BufferUsage.MAP_READ | wgpu.BufferUsage.COPY_DST
+    )
+
+    with raises(RuntimeError):
+        buf.write_mapped(data)  # not mapped
+
+    buf.map("read", 8, 20)
+
+    with raises(RuntimeError):
+        buf.map("read")  # Cannot map twice
+
+    with raises(RuntimeError):
+        buf.map("write")  # Cannot map twice
+
+    with raises(RuntimeError):
+        buf.write_mapped(data)  # not mapped in write mode\
+
+    # Ok
+    assert len(buf.read_mapped()) == 20
+
+    # Fail
+    with raises(ValueError):
+        buf.read_mapped(64)  # read beyond buffer size
+    with raises(ValueError):
+        buf.read_mapped(32)  # read beyond mapped range
+
+
 @mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib")
 def test_clear_buffer():
     data0 = b"111111112222222233333333"
@@ -580,7 +761,7 @@ def test_write_buffer2():
     device.queue.write_buffer(buf4, 0, data1)
 
     # We swipe the data. You could also think that we passed something into
-    # write_buffer without holding a referene to it. Anyway, write_buffer
+    # write_buffer without holding a reference to it. Anyway, write_buffer
     # seems to copy the data at the moment it is called.
     for i in range(len(data1)):
         data1[i] = 1
diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py
index 45a0d663..3ddfc040 100644
--- a/wgpu/backends/rs.py
+++ b/wgpu/backends/rs.py
@@ -1505,21 +1505,38 @@ def __init__(self, label, internal, device, size, usage, map_state):
 
         self._mapped_status = 0, 0, 0
         self._mapped_memoryviews = []
+        # If mapped at creation, set to write mode (no point in reading zeros)
         if self._map_state == enums.BufferMapState.mapped:
-            self._mapped_status = 0, self.size, 0
+            self._mapped_status = 0, self.size, flags.MapMode.WRITE
 
     def _check_range(self, offset, size):
-        offset = 0 if offset is None else int(offset)
-        size = (self.size - offset) if size is None else int(size)
+        # Apply defaults
+        if offset is None:
+            offset = 0
+            if self._mapped_status[2] != 0:
+                offset = self._mapped_status[0]
+        else:
+            offset = int(offset)
+        if size is None:
+            size = self.size - offset
+            if self._mapped_status[2] != 0:
+                size = self._mapped_status[1] - offset
+        else:
+            size = int(size)
+        # Checks
         if offset < 0:
             raise ValueError("Mapped offset must not be smaller than zero.")
+        if offset % 8:
+            raise ValueError("Mapped offset must be a multiple of 8.")
         if size < 1:
             raise ValueError("Mapped size must be larger than zero.")
+        if size % 4:
+            raise ValueError("Mapped offset must be a multiple of 4.")
         if offset + size > self.size:
             raise ValueError("Mapped range must not extend beyond total buffer size.")
         return offset, size
 
-    def map(self, mode, offset=0, size=None):
+    def map(self, mode, offset=None, size=None):
         sync_on_read = True
 
         # Check mode
@@ -1602,10 +1619,14 @@ def _release_memoryviews(self):
                 pass
         self._mapped_memoryviews = []
 
-    def read_mapped(self, buffer_offset=0, size=None, *, copy=True):
+    def read_mapped(self, buffer_offset=None, size=None, *, copy=True):
         # Can we even read?
         if self._map_state != enums.BufferMapState.mapped:
             raise RuntimeError("Can only read from a buffer if its mapped.")
+        elif not (self._mapped_status[2] & flags.MapMode.READ):
+            raise RuntimeError(
+                "Can only read from a buffer if its mapped in read mode."
+            )
 
         # Check offset and size
         offset, size = self._check_range(buffer_offset, size)
@@ -1616,7 +1637,7 @@ def read_mapped(self, buffer_offset=0, size=None, *, copy=True):
 
         # Get mapped memoryview.
         # H: void * f(WGPUBuffer buffer, size_t offset, size_t size)
-        src_ptr = libf.wgpuBufferGetMappedRange(self._internal, 0, size)
+        src_ptr = libf.wgpuBufferGetMappedRange(self._internal, offset, size)
         src_address = int(ffi.cast("intptr_t", src_ptr))
         src_m = get_memoryview_from_address(src_address, size)
 
@@ -1631,10 +1652,14 @@ def read_mapped(self, buffer_offset=0, size=None, *, copy=True):
             self._mapped_memoryviews.append(src_m)
             return src_m
 
-    def write_mapped(self, data, buffer_offset=0, size=None):
+    def write_mapped(self, data, buffer_offset=None, size=None):
         # Can we even write?
         if self._map_state != enums.BufferMapState.mapped:
             raise RuntimeError("Can only write to a buffer if its mapped.")
+        elif not (self._mapped_status[2] & flags.MapMode.WRITE):
+            raise RuntimeError(
+                "Can only write from a buffer if its mapped in write mode."
+            )
 
         # Cast data to a memoryview. This also works for e.g. numpy arrays,
         # and the resulting memoryview will be a view on the data.
@@ -1657,7 +1682,7 @@ def write_mapped(self, data, buffer_offset=0, size=None):
 
         # Get mapped memoryview
         # H: void * f(WGPUBuffer buffer, size_t offset, size_t size)
-        src_ptr = libf.wgpuBufferGetMappedRange(self._internal, 0, size)
+        src_ptr = libf.wgpuBufferGetMappedRange(self._internal, offset, size)
         src_address = int(ffi.cast("intptr_t", src_ptr))
         src_m = get_memoryview_from_address(src_address, size)
 
diff --git a/wgpu/base.py b/wgpu/base.py
index 9751f15b..654c0dff 100644
--- a/wgpu/base.py
+++ b/wgpu/base.py
@@ -972,7 +972,7 @@ def map_state(self):
     # an array-like object that exposes the shared memory.
 
     # IDL: Promise<undefined> mapAsync(GPUMapModeFlags mode, optional GPUSize64 offset = 0, optional GPUSize64 size);
-    def map(self, mode, offset=0, size=None):
+    def map(self, mode, offset=None, size=None):
         """Maps the given range of the GPUBuffer.
 
         When this call returns, the buffer content is ready to be
@@ -1001,7 +1001,7 @@ def unmap(self):
         raise NotImplementedError()
 
     @apidiff.add("Replacement for get_mapped_range")
-    def read_mapped(self, buffer_offset=0, size=None, *, copy=True):
+    def read_mapped(self, buffer_offset=None, size=None, *, copy=True):
         """Read mapped buffer data.
 
         This method must only be called when the buffer is in a mapped state.
@@ -1010,10 +1010,11 @@ def read_mapped(self, buffer_offset=0, size=None, *, copy=True):
         become invalid when the buffer is ummapped).
 
         Arguments:
-            buffer_offset (int): the buffer offset in bytes. Default 0. Must be at
-                least as large as the offset specified in ``map()``.
-            size (int): the size to read. Default until the end. The resuling range
-                must fit into the range specified in ``map()``.
+            buffer_offset (int): the buffer offset in bytes. Must be at
+                least as large as the offset specified in ``map()``. The default
+                is the offset of the mapped range.
+            size (int): the size to read. The resuling range must fit into the range
+                specified in ``map()``. The default is as large as the mapped range allows.
             copy (boool): whether a copy of the data is given. Default True.
                 If False, the returned memoryview represents the mapped data
                 directly, and is released when the buffer is unmapped.
@@ -1028,7 +1029,7 @@ def read_mapped(self, buffer_offset=0, size=None, *, copy=True):
         raise NotImplementedError()
 
     @apidiff.add("Replacement for get_mapped_range")
-    def write_mapped(self, data, buffer_offset=0, size=None):
+    def write_mapped(self, data, buffer_offset=None, size=None):
         """Read mapped buffer data.
 
         This method must only be called when the buffer is in a mapped state.
@@ -1039,10 +1040,12 @@ def write_mapped(self, data, buffer_offset=0, size=None):
         Arguments:
             data (buffer-like): The data to write to the buffer, in the form of
                 e.g. a bytes object, memoryview, or numpy array.
-            buffer_offset (int): the buffer offset in bytes. Default 0. Must be at least
-                as large as the offset specified in ``map()``.
-            size (int): the size to read. Default until the end. The resuling range
-                must fit into the range specified in ``map()``.
+            buffer_offset (int): the buffer offset in bytes. Must be at least
+                as large as the offset specified in ``map()``. The default
+                is the offset of the mapped range.
+            size (int): the size to read. The default is the size of
+                the data, so this argument can typically be ignored. The
+                resuling range must fit into the range specified in ``map()``.
 
         Also see `GPUBuffer.read_mapped, `GPUQueue.read_buffer()` and `GPUQueue.write_buffer()`.
         """

From 1fbf6fd0684867569a1f19d9261acf52f8b18e62 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 17 Oct 2023 13:19:03 +0200
Subject: [PATCH 09/13] Tests and fixes for copyless buffer reads

---
 tests/test_rs_basics.py | 62 +++++++++++++++++++++++++++++++++++++++--
 wgpu/backends/rs.py     |  9 +++---
 2 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/tests/test_rs_basics.py b/tests/test_rs_basics.py
index 05b6c4ec..6299bfa1 100644
--- a/tests/test_rs_basics.py
+++ b/tests/test_rs_basics.py
@@ -476,16 +476,72 @@ def test_buffer_mapping_fails():
         buf.map("write")  # Cannot map twice
 
     with raises(RuntimeError):
-        buf.write_mapped(data)  # not mapped in write mode\
+        buf.write_mapped(data)  # not mapped in write mode
 
     # Ok
     assert len(buf.read_mapped()) == 20
 
     # Fail
     with raises(ValueError):
-        buf.read_mapped(64)  # read beyond buffer size
+        buf.read_mapped(0, 64)  # read beyond buffer size
     with raises(ValueError):
-        buf.read_mapped(32)  # read beyond mapped range
+        buf.read_mapped(0, 32)  # read beyond mapped range
+
+    buf.unmap()
+
+    with raises(RuntimeError):
+        buf.unmap()  # Cannot unmap when not mapped
+
+
+def test_buffer_read_no_copy():
+    device = wgpu.utils.get_default_device()
+    data1 = b"12345678" * 2
+
+    buf = device.create_buffer(
+        size=len(data1), usage=wgpu.BufferUsage.MAP_READ | wgpu.BufferUsage.COPY_DST
+    )
+
+    # Write data to it
+    device.queue.write_buffer(buf, 0, data1)
+
+    # Download from buffer to CPU
+    buf.map("read")
+    data2 = buf.read_mapped(copy=False)
+    data3 = buf.read_mapped(0, 8, copy=False)
+    data4 = buf.read_mapped(8, 8, copy=False)
+
+    assert data2 == data1
+    assert data3 == data1[:8]
+    assert data4 == data1[8:]
+
+    # Can access the arrays
+    _ = data2[0], data3[0], data4[0]
+
+    # But cannot write to memory intended for reading
+    with raises(TypeError):
+        data2[0] = 1
+    with raises(TypeError):
+        data3[0] = 1
+    with raises(TypeError):
+        data4[0] = 1
+
+    buf.unmap()
+
+    # The memoryview is invalidated when the buffer unmapped.
+    # Note that this unfortunately does *not* hold for views on these arrays.
+    with raises(ValueError):
+        data2[0]
+    with raises(ValueError):
+        data3[0]
+    with raises(ValueError):
+        data4[0]
+
+    with raises(ValueError):
+        data2[0] = 1  # cannot write to memory intended for reading
+    with raises(ValueError):
+        data3[0] = 1
+    with raises(ValueError):
+        data4[0] = 1
 
 
 @mark.skipif(not can_use_wgpu_lib, reason="Needs wgpu lib")
diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py
index 3ddfc040..c5ae3750 100644
--- a/wgpu/backends/rs.py
+++ b/wgpu/backends/rs.py
@@ -1615,7 +1615,7 @@ def _release_memoryviews(self):
         for m in self._mapped_memoryviews:
             try:
                 m.release()
-            except Exception:
+            except Exception:  # no-cover
                 pass
         self._mapped_memoryviews = []
 
@@ -1649,8 +1649,9 @@ def read_mapped(self, buffer_offset=None, size=None, *, copy=True):
             return data
         else:
             # Return view on the actually mapped data
-            self._mapped_memoryviews.append(src_m)
-            return src_m
+            data = src_m.toreadonly()
+            self._mapped_memoryviews.append(data)
+            return data
 
     def write_mapped(self, data, buffer_offset=None, size=None):
         # Can we even write?
@@ -1675,7 +1676,7 @@ def write_mapped(self, data, buffer_offset=None, size=None):
             )
 
         # Check data size and given size. If the latter was given, it should match!
-        if data.nbytes != size:  # pragma: no cover
+        if data.nbytes != size:  # no-cover
             raise ValueError(
                 "Data passed to GPUBuffer.write_mapped() does not match the given size."
             )

From 0f432622deb57a43f976643cdba0863ae71c865a Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 17 Oct 2023 13:20:14 +0200
Subject: [PATCH 10/13] codegen

---
 wgpu/backends/rs.py | 2 +-
 wgpu/base.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py
index c5ae3750..0daf27fb 100644
--- a/wgpu/backends/rs.py
+++ b/wgpu/backends/rs.py
@@ -1536,7 +1536,7 @@ def _check_range(self, offset, size):
             raise ValueError("Mapped range must not extend beyond total buffer size.")
         return offset, size
 
-    def map(self, mode, offset=None, size=None):
+    def map(self, mode, offset=0, size=None):
         sync_on_read = True
 
         # Check mode
diff --git a/wgpu/base.py b/wgpu/base.py
index 654c0dff..eeca32c3 100644
--- a/wgpu/base.py
+++ b/wgpu/base.py
@@ -972,7 +972,7 @@ def map_state(self):
     # an array-like object that exposes the shared memory.
 
     # IDL: Promise<undefined> mapAsync(GPUMapModeFlags mode, optional GPUSize64 offset = 0, optional GPUSize64 size);
-    def map(self, mode, offset=None, size=None):
+    def map(self, mode, offset=0, size=None):
         """Maps the given range of the GPUBuffer.
 
         When this call returns, the buffer content is ready to be

From 2b4b16a96c8338dbea8282308e032d8916a2dec3 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 17 Oct 2023 13:28:49 +0200
Subject: [PATCH 11/13] 3.7 compat

---
 wgpu/backends/rs.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/wgpu/backends/rs.py b/wgpu/backends/rs.py
index 0daf27fb..4d5ab36c 100644
--- a/wgpu/backends/rs.py
+++ b/wgpu/backends/rs.py
@@ -1649,7 +1649,9 @@ def read_mapped(self, buffer_offset=None, size=None, *, copy=True):
             return data
         else:
             # Return view on the actually mapped data
-            data = src_m.toreadonly()
+            data = src_m
+            if hasattr(data, "toreadonly"):  # Py 3.8+
+                data = data.toreadonly()
             self._mapped_memoryviews.append(data)
             return data
 

From 8d8ff8a5f51932c595b4b36db12b11c8abbe3e86 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 17 Oct 2023 13:50:25 +0200
Subject: [PATCH 12/13] Add changelog

---
 CHANGELOG.md | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3d99ea2d..08a33a2f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,33 @@ Changed:
 * Update to wgpu-native 0.17.2.1. No changes are needed in downstream code.
 
 
+### [v0.11.0] - t.b.d.
+
+We have revised the buffer mapping API, making it more similar to the
+WebGPU spec, and providing more flexible and performant ways to set
+buffer data.
+
+Added:
+
+* The `GPUBuffer` has new methods `map()`, `map_async()`, `unmap()`. These have been
+  part of the WebGPU spec for a long time, but we had an alternative API, until now.
+* The `GPUBuffer` has new methods `read_mapped()` and `write_mapped()`. These are not
+  present in the WebGPU spec; they are the Pythonic alternative to `getMappedRange()`.
+
+Changed:
+
+* Can create a buffer that is initially mapped: `device.create_buffer(..., mapped_at_creation=True)` is enabled again.
+
+Removed:
+
+* The `GPUBuffer` methods `map_read()`and `map_write()` are deprecated, in favor of `map()`, `unmap()`, `read_mapped()` and `write_mapped()`.
+
+For the record, these are not changed:
+
+* The convenient `device.create_buffer_with_data()` (not part of the WebGPU spec) is also available.
+* The `GPUQueue.read_buffer()` and `GPUQueue.write_buffer()` methods are unchanged.
+
+
 ### [v0.10.0] - 09-10-2023
 
 In this release the API is aligned with the latest webgpu.idl, and

From 3b5e03ac149374c8e7f5bd7fe61045088c31a643 Mon Sep 17 00:00:00 2001
From: Almar Klein <almar@almarklein.org>
Date: Tue, 17 Oct 2023 14:07:44 +0200
Subject: [PATCH 13/13] fix

---
 tests/test_rs_basics.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/test_rs_basics.py b/tests/test_rs_basics.py
index 6299bfa1..660dc604 100644
--- a/tests/test_rs_basics.py
+++ b/tests/test_rs_basics.py
@@ -518,12 +518,13 @@ def test_buffer_read_no_copy():
     _ = data2[0], data3[0], data4[0]
 
     # But cannot write to memory intended for reading
-    with raises(TypeError):
-        data2[0] = 1
-    with raises(TypeError):
-        data3[0] = 1
-    with raises(TypeError):
-        data4[0] = 1
+    if sys.version_info >= (3, 8):  # no memoryview.toreadonly on 3.7 and below
+        with raises(TypeError):
+            data2[0] = 1
+        with raises(TypeError):
+            data3[0] = 1
+        with raises(TypeError):
+            data4[0] = 1
 
     buf.unmap()
 
@@ -537,7 +538,7 @@ def test_buffer_read_no_copy():
         data4[0]
 
     with raises(ValueError):
-        data2[0] = 1  # cannot write to memory intended for reading
+        data2[0] = 1
     with raises(ValueError):
         data3[0] = 1
     with raises(ValueError):