gfx-rs
diff --git a/‎tests/tests/wgpu-validation/api/command_buffer_actions.rs
Lines changed: 261 additions & 0 deletions b/‎tests/tests/wgpu-validation/api/command_buffer_actions.rs
Lines changed: 261 additions & 0 deletions
diff --git a/‎tests/tests/wgpu-validation/api/mod.rs
Lines changed: 1 addition & 0 deletions b/‎tests/tests/wgpu-validation/api/mod.rs
Lines changed: 1 addition & 0 deletions
diff --git a/‎wgpu/src/api/buffer.rs
Lines changed: 55 additions & 23 deletions b/‎wgpu/src/api/buffer.rs
Lines changed: 55 additions & 23 deletions
@@ -0,0 +1,261 @@
+use std::sync::atomic::{AtomicBool, AtomicU32, Ordering::SeqCst};
+use std::sync::Arc;
+
+/// Helper to create a small mappable buffer for READ tests.
+fn make_read_buffer(device: &wgpu::Device, size: u64) -> wgpu::Buffer {
+    device.create_buffer(&wgpu::BufferDescriptor {
+        label: Some("read buffer"),
+        size,
+        usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
+        mapped_at_creation: false,
+    })
+}
+
+/// map_buffer_on_submit defers mapping until submit, then invokes the callback after polling.
+#[test]
+fn encoder_map_buffer_on_submit_defers_until_submit() {
+    let (device, queue) = wgpu::Device::noop(&wgpu::DeviceDescriptor::default());
+    let buffer = make_read_buffer(&device, 16);
+
+    let fired = Arc::new(AtomicBool::new(false));
+    let fired_cl = Arc::clone(&fired);
+
+    let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
+        label: Some("encoder"),
+    });
+
+    // Register deferred map.
+    encoder.map_buffer_on_submit(&buffer, wgpu::MapMode::Read, 0..4, move |_| {
+        fired_cl.store(true, SeqCst);
+    });
+    // Include a trivial command that uses the buffer.
+    encoder.clear_buffer(&buffer, 0, None);
+
+    // Polling before submit should not trigger the callback.
+    _ = device.poll(wgpu::PollType::Poll);
+    assert!(!fired.load(SeqCst));
+
+    // Submit and wait; callback should fire.
+    queue.submit([encoder.finish()]);
+    _ = device.poll(wgpu::PollType::Wait);
+    assert!(fired.load(SeqCst));
+}
+
+/// Empty ranges panic immediately when registering the deferred map.
+#[test]
+#[should_panic = "buffer slices can not be empty"]
+fn encoder_map_buffer_on_submit_empty_range_panics_immediately() {
+    let (device, _queue) = wgpu::Device::noop(&wgpu::DeviceDescriptor::default());
+    let buffer = make_read_buffer(&device, 16);
+
+    let encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+
+    // This panics inside map_buffer_on_submit (range_to_offset_size).
+    encoder.map_buffer_on_submit(&buffer, wgpu::MapMode::Read, 8..8, |_| {});
+}
+
+/// Out-of-bounds ranges panic during submit (when the deferred map executes).
+#[test]
+#[should_panic = "is out of range for buffer of size"]
+fn encoder_map_buffer_on_submit_out_of_bounds_panics_on_submit() {
+    let (device, queue) = wgpu::Device::noop(&wgpu::DeviceDescriptor::default());
+    let buffer = make_read_buffer(&device, 16);
+
+    let mut encoder =
+        device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+    // 12..24 overflows the 16-byte buffer (size=12, end=24).
+    encoder.map_buffer_on_submit(&buffer, wgpu::MapMode::Read, 12..24, |_| {});
+    encoder.clear_buffer(&buffer, 0, None);
+
+    // Panic happens inside submit when executing deferred actions.
+    queue.submit([encoder.finish()]);
+}
+
+/// If the buffer is already mapped when the deferred mapping executes, it panics during submit.
+#[test]
+#[should_panic = "Buffer with 'read buffer' label is still mapped"]
+fn encoder_map_buffer_on_submit_panics_if_already_mapped_on_submit() {
+    let (device, queue) = wgpu::Device::noop(&wgpu::DeviceDescriptor::default());
+    let buffer = make_read_buffer(&device, 16);
+
+    // Start a mapping now so the buffer is considered mapped.
+    buffer.slice(0..4).map_async(wgpu::MapMode::Read, |_| {});
+
+    let mut encoder =
+        device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+    // Deferred mapping of an already-mapped buffer will panic when executed on submit or be rejected by submit.
+    encoder.map_buffer_on_submit(&buffer, wgpu::MapMode::Read, 0..4, |_| {});
+    // Include any trivial work; using the same buffer ensures core validation catches the mapped hazard.
+    encoder.clear_buffer(&buffer, 0, None);
+
+    queue.submit([encoder.finish()]);
+}
+
+/// on_submitted_work_done is deferred until submit.
+#[test]
+fn encoder_on_submitted_work_done_defers_until_submit() {
+    let (device, queue) = wgpu::Device::noop(&wgpu::DeviceDescriptor::default());
+
+    let fired = Arc::new(AtomicBool::new(false));
+    let fired_cl = Arc::clone(&fired);
+
+    let mut encoder =
+        device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+
+    encoder.on_submitted_work_done(move || {
+        fired_cl.store(true, SeqCst);
+    });
+
+    // Include a trivial command so the command buffer isn't completely empty.
+    let dummy = make_read_buffer(&device, 4);
+    encoder.clear_buffer(&dummy, 0, None);
+
+    // Without submission, polling shouldn't invoke the callback.
+    _ = device.poll(wgpu::PollType::Poll);
+    assert!(!fired.load(SeqCst));
+
+    queue.submit([encoder.finish()]);
+    _ = device.poll(wgpu::PollType::Wait);
+    assert!(fired.load(SeqCst));
+}
+
+/// Both kinds of deferred callbacks are enqueued and eventually invoked.
+#[test]
+fn encoder_both_callbacks_fire_after_submit() {
+    let (device, queue) = wgpu::Device::noop(&wgpu::DeviceDescriptor::default());
+    let buffer = make_read_buffer(&device, 16);
+
+    let map_fired = Arc::new(AtomicBool::new(false));
+    let map_fired_cl = Arc::clone(&map_fired);
+    let queue_fired = Arc::new(AtomicBool::new(false));
+    let queue_fired_cl = Arc::clone(&queue_fired);
+
+    let mut encoder =
+        device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+    encoder.map_buffer_on_submit(&buffer, wgpu::MapMode::Read, 0..4, move |_| {
+        map_fired_cl.store(true, SeqCst);
+    });
+    encoder.on_submitted_work_done(move || {
+        queue_fired_cl.store(true, SeqCst);
+    });
+    encoder.clear_buffer(&buffer, 0, None);
+
+    queue.submit([encoder.finish()]);
+    _ = device.poll(wgpu::PollType::Wait);
+
+    assert!(map_fired.load(SeqCst));
+    assert!(queue_fired.load(SeqCst));
+}
+
+/// Registering multiple deferred mappings works; all callbacks fire after submit.
+#[test]
+fn encoder_multiple_map_buffer_on_submit_callbacks_fire() {
+    let (device, queue) = wgpu::Device::noop(&wgpu::DeviceDescriptor::default());
+    let buffer1 = make_read_buffer(&device, 32);
+    let buffer2 = make_read_buffer(&device, 32);
+
+    let counter = Arc::new(AtomicU32::new(0));
+    let c1 = Arc::clone(&counter);
+    let c2 = Arc::clone(&counter);
+
+    let mut encoder =
+        device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+    encoder.map_buffer_on_submit(&buffer1, wgpu::MapMode::Read, 0..4, move |_| {
+        c1.fetch_add(1, SeqCst);
+    });
+    encoder.map_buffer_on_submit(&buffer2, wgpu::MapMode::Read, 8..12, move |_| {
+        c2.fetch_add(1, SeqCst);
+    });
+    encoder.clear_buffer(&buffer1, 0, None);
+
+    queue.submit([encoder.finish()]);
+    _ = device.poll(wgpu::PollType::Wait);
+
+    assert_eq!(counter.load(SeqCst), 2);
+}
+
+/// Mapping with a buffer lacking MAP_* usage should panic when executed on submit.
+#[test]
+#[should_panic]
+fn encoder_map_buffer_on_submit_panics_if_usage_invalid_on_submit() {
+    let (device, queue) = wgpu::Device::noop(&wgpu::DeviceDescriptor::default());
+    let unmappable = device.create_buffer(&wgpu::BufferDescriptor {
+        label: Some("unmappable buffer"),
+        size: 16,
+        usage: wgpu::BufferUsages::COPY_DST, // No MAP_READ or MAP_WRITE
+        mapped_at_creation: false,
+    });
+
+    let mut encoder =
+        device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+    encoder.map_buffer_on_submit(&unmappable, wgpu::MapMode::Read, 0..4, |_| {});
+
+    // Add unrelated work so the submission isn't empty.
+    let dummy = make_read_buffer(&device, 4);
+    encoder.clear_buffer(&dummy, 0, None);
+
+    // Panic expected when deferred mapping executes.
+    queue.submit([encoder.finish()]);
+}
+
+/// Deferred map callbacks run before on_submitted_work_done for the same submission.
+#[test]
+fn encoder_deferred_map_runs_before_on_submitted_work_done() {
+    let (device, queue) = wgpu::Device::noop(&wgpu::DeviceDescriptor::default());
+    let buffer = make_read_buffer(&device, 16);
+
+    #[derive(Default)]
+    struct Order {
+        map_order: AtomicU32,
+        queue_order: AtomicU32,
+        counter: AtomicU32,
+    }
+    let order = Arc::new(Order::default());
+    let o_map = Arc::clone(&order);
+    let o_queue = Arc::clone(&order);
+
+    let mut encoder =
+        device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+    encoder.map_buffer_on_submit(&buffer, wgpu::MapMode::Read, 0..4, move |_| {
+        let v = o_map.counter.fetch_add(1, SeqCst);
+        o_map.map_order.store(v, SeqCst);
+    });
+    encoder.on_submitted_work_done(move || {
+        let v = o_queue.counter.fetch_add(1, SeqCst);
+        o_queue.queue_order.store(v, SeqCst);
+    });
+    encoder.clear_buffer(&buffer, 0, None);
+
+    queue.submit([encoder.finish()]);
+    _ = device.poll(wgpu::PollType::Wait);
+
+    assert_eq!(order.counter.load(SeqCst), 2);
+    assert_eq!(order.map_order.load(SeqCst), 0);
+    assert_eq!(order.queue_order.load(SeqCst), 1);
+}
+
+/// Multiple on_submitted_work_done callbacks registered on encoder all fire after submit.
+#[test]
+fn encoder_multiple_on_submitted_callbacks_fire() {
+    let (device, queue) = wgpu::Device::noop(&wgpu::DeviceDescriptor::default());
+    let buffer = make_read_buffer(&device, 4);
+
+    let counter = Arc::new(AtomicU32::new(0));
+    let c1 = Arc::clone(&counter);
+    let c2 = Arc::clone(&counter);
+
+    let mut encoder =
+        device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+    encoder.on_submitted_work_done(move || {
+        c1.fetch_add(1, SeqCst);
+    });
+    encoder.on_submitted_work_done(move || {
+        c2.fetch_add(1, SeqCst);
+    });
+    encoder.clear_buffer(&buffer, 0, None);
+
+    queue.submit([encoder.finish()]);
+    _ = device.poll(wgpu::PollType::Wait);
+
+    assert_eq!(counter.load(SeqCst), 2);
+}
@@ -1,6 +1,7 @@
 mod binding_arrays;
 mod buffer;
 mod buffer_slice;
+mod command_buffer_actions;
 mod device;
 mod external_texture;
 mod instance;
 
@@ -302,20 +302,28 @@ impl Buffer {
         self.usage
     }
 
-    /// Map the buffer to host (CPU) memory, making it available for reading or writing
-    /// via [`get_mapped_range()`](Self::get_mapped_range).
-    /// It is available once the `callback` is called with an [`Ok`] response.
+    /// Map the buffer to host (CPU) memory, making it available for reading or writing via
+    /// [`get_mapped_range()`](Self::get_mapped_range). The buffer becomes accessible once the
+    /// `callback` is invoked with [`Ok`].
     ///
-    /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)`
-    /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.
+    /// Use this when you want to map the buffer immediately. If you need to submit GPU work that
+    /// uses the buffer before mapping it, use `map_buffer_on_submit` on
+    /// [`CommandEncoder`][CEmbos], [`CommandBuffer`][CBmbos], [`RenderPass`][RPmbos], or
+    /// [`ComputePass`][CPmbos] to schedule the mapping after submission. This avoids extra calls to
+    /// [`Buffer::map_async()`] or [`BufferSlice::map_async()`] and lets you initiate mapping from a
+    /// more convenient place.
     ///
-    /// The callback will be called on the thread that first calls the above functions after the GPU work
-    /// has completed. There are no restrictions on the code you can run in the callback, however on native the
-    /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
-    /// and used to set flags, send messages, etc.
+    /// For the callback to run, either [`queue.submit(..)`][q::s], [`instance.poll_all(..)`][i::p_a],
+    /// or [`device.poll(..)`][d::p] must be called elsewhere in the runtime, possibly integrated into
+    /// an event loop or run on a separate thread.
     ///
-    /// As long as a buffer is mapped, it is not available for use by any other commands;
-    /// at all times, either the GPU or the CPU has exclusive access to the contents of the buffer.
+    /// The callback runs on the thread that first calls one of the above functions after the GPU work
+    /// completes. There are no restrictions on the code you can run in the callback; however, on native
+    /// the polling call will not return until the callback finishes, so keep callbacks short (set flags,
+    /// send messages, etc.).
+    ///
+    /// While a buffer is mapped, it cannot be used by other commands; at any time, either the GPU or
+    /// the CPU has exclusive access to the buffer’s contents.
     ///
     /// This can also be performed using [`BufferSlice::map_async()`].
     ///
@@ -326,6 +334,14 @@ impl Buffer {
     /// - If `bounds` is outside of the bounds of `self`.
     /// - If `bounds` has a length less than 1.
     /// - If the start and end of `bounds` are not be aligned to [`MAP_ALIGNMENT`].
+    ///
+    /// [CEmbos]: CommandEncoder::map_buffer_on_submit
+    /// [CBmbos]: CommandBuffer::map_buffer_on_submit
+    /// [RPmbos]: RenderPass::map_buffer_on_submit
+    /// [CPmbos]: ComputePass::map_buffer_on_submit
+    /// [q::s]: Queue::submit
+    /// [i::p_a]: Instance::poll_all
+    /// [d::p]: Device::poll
     pub fn map_async<S: RangeBounds<BufferAddress>>(
         &self,
         mode: MapMode,
@@ -461,20 +477,28 @@ impl<'a> BufferSlice<'a> {
         }
     }
 
-    /// Map the buffer to host (CPU) memory, making it available for reading or writing
-    /// via [`get_mapped_range()`](Self::get_mapped_range).
-    /// It is available once the `callback` is called with an [`Ok`] response.
+    /// Map the buffer to host (CPU) memory, making it available for reading or writing via
+    /// [`get_mapped_range()`](Self::get_mapped_range). The buffer becomes accessible once the
+    /// `callback` is invoked with [`Ok`].
     ///
-    /// For the callback to complete, either `queue.submit(..)`, `instance.poll_all(..)`, or `device.poll(..)`
-    /// must be called elsewhere in the runtime, possibly integrated into an event loop or run on a separate thread.
+    /// Use this when you want to map the buffer immediately. If you need to submit GPU work that
+    /// uses the buffer before mapping it, use `map_buffer_on_submit` on
+    /// [`CommandEncoder`][CEmbos], [`CommandBuffer`][CBmbos], [`RenderPass`][RPmbos], or
+    /// [`ComputePass`][CPmbos] to schedule the mapping after submission. This avoids extra calls to
+    /// [`Buffer::map_async()`] or [`BufferSlice::map_async()`] and lets you initiate mapping from a
+    /// more convenient place.
     ///
-    /// The callback will be called on the thread that first calls the above functions after the GPU work
-    /// has completed. There are no restrictions on the code you can run in the callback, however on native the
-    /// call to the function will not complete until the callback returns, so prefer keeping callbacks short
-    /// and used to set flags, send messages, etc.
+    /// For the callback to run, either [`queue.submit(..)`][q::s], [`instance.poll_all(..)`][i::p_a],
+    /// or [`device.poll(..)`][d::p] must be called elsewhere in the runtime, possibly integrated into
+    /// an event loop or run on a separate thread.
     ///
-    /// As long as a buffer is mapped, it is not available for use by any other commands;
-    /// at all times, either the GPU or the CPU has exclusive access to the contents of the buffer.
+    /// The callback runs on the thread that first calls one of the above functions after the GPU work
+    /// completes. There are no restrictions on the code you can run in the callback; however, on native
+    /// the polling call will not return until the callback finishes, so keep callbacks short (set flags,
+    /// send messages, etc.).
+    ///
+    /// While a buffer is mapped, it cannot be used by other commands; at any time, either the GPU or
+    /// the CPU has exclusive access to the buffer’s contents.
     ///
     /// This can also be performed using [`Buffer::map_async()`].
     ///
@@ -483,6 +507,14 @@ impl<'a> BufferSlice<'a> {
     /// - If the buffer is already mapped.
     /// - If the buffer’s [`BufferUsages`] do not allow the requested [`MapMode`].
     /// - If the endpoints of this slice are not aligned to [`MAP_ALIGNMENT`] within the buffer.
+    ///
+    /// [CEmbos]: CommandEncoder::map_buffer_on_submit
+    /// [CBmbos]: CommandBuffer::map_buffer_on_submit
+    /// [RPmbos]: RenderPass::map_buffer_on_submit
+    /// [CPmbos]: ComputePass::map_buffer_on_submit
+    /// [q::s]: Queue::submit
+    /// [i::p_a]: Instance::poll_all
+    /// [d::p]: Device::poll
     pub fn map_async(
         &self,
         mode: MapMode,
@@ -856,7 +888,7 @@ fn check_buffer_bounds(
 }
 
 #[track_caller]
-fn range_to_offset_size<S: RangeBounds<BufferAddress>>(
+pub(crate) fn range_to_offset_size<S: RangeBounds<BufferAddress>>(
     bounds: S,
     whole_size: BufferAddress,
 ) -> (BufferAddress, BufferSize) {