Skip to content

Commit 335c9ab

Browse files
fix(dx12): align tex. <-> buf. copies via intermediate buffer if !UnrestrictedBufferTextureCopyPitchSupported
1 parent 29d2458 commit 335c9ab

File tree

6 files changed

+167
-24
lines changed

6 files changed

+167
-24
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ By @Vecvec in [#7913](https://github.com/gfx-rs/wgpu/pull/7913).
5959

6060
Naga now requires that no type be larger than 1 GB. This limit may be lowered in the future; feedback on an appropriate value for the limit is welcome. By @andyleiserson in [#7950](https://github.com/gfx-rs/wgpu/pull/7950).
6161

62+
### Bug Fixes
63+
64+
#### DX12
65+
66+
- Align copies b/w textures and buffers via a single intermediate buffer per copy when `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported` is `false`. By @ErichDonGubler in [#7721](https://github.com/gfx-rs/wgpu/pull/7721).
67+
6268
## v26.0.1 (2025-07-10)
6369

6470
### Bug Fixes

tests/tests/wgpu-gpu/regression/issue_6827.rs

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,7 @@ static TEST_SCATTER: GpuTestConfiguration = GpuTestConfiguration::new()
1919
.expect_fail(FailureCase::backend_adapter(
2020
wgpu::Backends::METAL,
2121
"Apple Paravirtual device", // CI on M1
22-
))
23-
.expect_fail(
24-
// Unfortunately this depends on if `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported`
25-
// is true, which we have no way to encode. This reproduces in CI though, so not too worried about it.
26-
FailureCase::backend(wgpu::Backends::DX12)
27-
.flaky()
28-
.validation_error(
29-
"D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512",
30-
)
31-
.panic("GraphicsCommandList::close failed: The parameter is incorrect"),
32-
),
22+
)),
3323
)
3424
.run_async(|ctx| async move { run_test(ctx, true).await });
3525

wgpu-hal/src/dx12/adapter.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -317,8 +317,7 @@ impl super::Adapter {
317317
suballocation_supported: !info.name.contains("Iris(R) Xe"),
318318
shader_model,
319319
max_sampler_descriptor_heap_size,
320-
_unrestricted_buffer_texture_copy_pitch_supported:
321-
unrestricted_buffer_texture_copy_pitch_supported,
320+
unrestricted_buffer_texture_copy_pitch_supported,
322321
};
323322

324323
// Theoretically vram limited, but in practice 2^20 is the limit

wgpu-hal/src/dx12/command.rs

Lines changed: 140 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use crate::{
1414
dxgi::{name::ObjectExt, result::HResult as _},
1515
},
1616
dx12::borrow_interface_temporarily,
17-
AccelerationStructureEntries,
17+
AccelerationStructureEntries, CommandEncoder as _,
1818
};
1919

2020
fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> Direct3D12::D3D12_BOX {
@@ -312,6 +312,78 @@ impl super::CommandEncoder {
312312
}
313313
}
314314
}
315+
316+
unsafe fn buf_tex_intermediate<T>(
317+
&mut self,
318+
region: crate::BufferTextureCopy,
319+
tex_fmt: wgt::TextureFormat,
320+
copy_op: impl FnOnce(&mut Self, &super::Buffer, wgt::BufferSize, crate::BufferTextureCopy) -> T,
321+
) -> Option<(T, super::Buffer)> {
322+
let size = {
323+
let copy_info = region.buffer_layout.get_buffer_texture_copy_info(
324+
tex_fmt,
325+
region.texture_base.aspect.map(),
326+
&region.size.into(),
327+
);
328+
copy_info.unwrap().bytes_in_copy
329+
};
330+
331+
let size = wgt::BufferSize::new(size)?;
332+
333+
let buffer = {
334+
let (resource, allocation) =
335+
super::suballocation::DeviceAllocationContext::from(&*self)
336+
.create_buffer(&crate::BufferDescriptor {
337+
label: None,
338+
size: size.get(),
339+
usage: wgt::BufferUses::COPY_SRC | wgt::BufferUses::COPY_DST,
340+
memory_flags: crate::MemoryFlags::empty(),
341+
})
342+
.expect(concat!(
343+
"internal error: ",
344+
"failed to allocate intermediate buffer ",
345+
"for offset alignment"
346+
));
347+
super::Buffer {
348+
resource,
349+
size: size.get(),
350+
allocation,
351+
}
352+
};
353+
354+
let mut region = region;
355+
region.buffer_layout.offset = 0;
356+
357+
unsafe {
358+
self.transition_buffers(
359+
[crate::BufferBarrier {
360+
buffer: &buffer,
361+
usage: crate::StateTransition {
362+
from: wgt::BufferUses::empty(),
363+
to: wgt::BufferUses::COPY_DST,
364+
},
365+
}]
366+
.into_iter(),
367+
)
368+
};
369+
370+
let t = copy_op(self, &buffer, size, region);
371+
372+
unsafe {
373+
self.transition_buffers(
374+
[crate::BufferBarrier {
375+
buffer: &buffer,
376+
usage: crate::StateTransition {
377+
from: wgt::BufferUses::COPY_DST,
378+
to: wgt::BufferUses::COPY_SRC,
379+
},
380+
}]
381+
.into_iter(),
382+
)
383+
};
384+
385+
Some((t, buffer))
386+
}
315387
}
316388

317389
impl crate::CommandEncoder for super::CommandEncoder {
@@ -612,31 +684,61 @@ impl crate::CommandEncoder for super::CommandEncoder {
612684
) where
613685
T: Iterator<Item = crate::BufferTextureCopy>,
614686
{
615-
for r in regions {
687+
let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();
688+
689+
for naive_copy_region in regions {
690+
let is_offset_aligned = naive_copy_region.buffer_layout.offset % offset_alignment == 0;
691+
let (final_copy_region, src) = if is_offset_aligned {
692+
(naive_copy_region, src)
693+
} else {
694+
let Some((intermediate_to_dst_region, intermediate_buf)) = (unsafe {
695+
let src_offset = naive_copy_region.buffer_layout.offset;
696+
self.buf_tex_intermediate(
697+
naive_copy_region,
698+
dst.format,
699+
|this, buf, size, intermediate_to_dst_region| {
700+
let layout = crate::BufferCopy {
701+
src_offset,
702+
dst_offset: 0,
703+
size,
704+
};
705+
this.copy_buffer_to_buffer(src, buf, [layout].into_iter());
706+
intermediate_to_dst_region
707+
},
708+
)
709+
}) else {
710+
continue;
711+
};
712+
self.intermediate_copy_bufs.push(intermediate_buf);
713+
let intermediate_buf = self.intermediate_copy_bufs.last().unwrap();
714+
(intermediate_to_dst_region, intermediate_buf)
715+
};
716+
616717
let list = self.list.as_ref().unwrap();
617718

618719
let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
619720
pResource: unsafe { borrow_interface_temporarily(&src.resource) },
620721
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
621722
Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
622-
PlacedFootprint: r.to_subresource_footprint(dst.format),
723+
PlacedFootprint: final_copy_region.to_subresource_footprint(dst.format),
623724
},
624725
};
625726
let dst_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
626727
pResource: unsafe { borrow_interface_temporarily(&dst.resource) },
627728
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
628729
Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
629-
SubresourceIndex: dst.calc_subresource_for_copy(&r.texture_base),
730+
SubresourceIndex: dst
731+
.calc_subresource_for_copy(&final_copy_region.texture_base),
630732
},
631733
};
632734

633-
let src_box = make_box(&wgt::Origin3d::ZERO, &r.size);
735+
let src_box = make_box(&wgt::Origin3d::ZERO, &final_copy_region.size);
634736
unsafe {
635737
list.CopyTextureRegion(
636738
&dst_location,
637-
r.texture_base.origin.x,
638-
r.texture_base.origin.y,
639-
r.texture_base.origin.z,
739+
final_copy_region.texture_base.origin.x,
740+
final_copy_region.texture_base.origin.y,
741+
final_copy_region.texture_base.origin.z,
640742
&src_location,
641743
Some(&src_box),
642744
)
@@ -680,8 +782,37 @@ impl crate::CommandEncoder for super::CommandEncoder {
680782
};
681783
};
682784

785+
let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();
786+
683787
for r in regions {
684-
copy_aligned(this, src, dst, r);
788+
let is_offset_aligned = r.buffer_layout.offset % offset_alignment == 0;
789+
if is_offset_aligned {
790+
copy_aligned(self, src, dst, r)
791+
} else {
792+
let orig_offset = r.buffer_layout.offset;
793+
let Some((intermediate_to_dst_region, src)) = (unsafe {
794+
self.buf_tex_intermediate(
795+
r,
796+
src.format,
797+
|this, buf, size, intermediate_region| {
798+
copy_aligned(this, src, buf, intermediate_region);
799+
crate::BufferCopy {
800+
src_offset: orig_offset,
801+
dst_offset: 0,
802+
size,
803+
}
804+
},
805+
)
806+
}) else {
807+
continue;
808+
};
809+
810+
unsafe {
811+
self.copy_buffer_to_buffer(&src, dst, [intermediate_to_dst_region].into_iter());
812+
}
813+
814+
self.intermediate_copy_bufs.push(src);
815+
};
685816
}
686817
}
687818

wgpu-hal/src/dx12/device.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,7 @@ impl crate::Device for super::Device {
778778
mem_allocator: self.mem_allocator.clone(),
779779
rtv_pool: Arc::clone(&self.rtv_pool),
780780
temp_rtv_handles: Vec::new(),
781+
intermediate_copy_bufs: Vec::new(),
781782
null_rtv_handle: self.null_rtv_handle,
782783
list: None,
783784
free_lists: Vec::new(),

wgpu-hal/src/dx12/mod.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,11 @@ use windows::{
9595
core::{Free, Interface},
9696
Win32::{
9797
Foundation,
98-
Graphics::{Direct3D, Direct3D12, DirectComposition, Dxgi},
98+
Graphics::{
99+
Direct3D,
100+
Direct3D12::{self, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT},
101+
DirectComposition, Dxgi,
102+
},
99103
System::Threading,
100104
},
101105
};
@@ -581,7 +585,17 @@ struct PrivateCapabilities {
581585
suballocation_supported: bool,
582586
shader_model: naga::back::hlsl::ShaderModel,
583587
max_sampler_descriptor_heap_size: u32,
584-
_unrestricted_buffer_texture_copy_pitch_supported: bool,
588+
unrestricted_buffer_texture_copy_pitch_supported: bool,
589+
}
590+
591+
impl PrivateCapabilities {
592+
fn texture_data_placement_alignment(&self) -> u64 {
593+
if self.unrestricted_buffer_texture_copy_pitch_supported {
594+
4
595+
} else {
596+
D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT.into()
597+
}
598+
}
585599
}
586600

587601
#[derive(Default)]
@@ -817,6 +831,8 @@ pub struct CommandEncoder {
817831
rtv_pool: Arc<Mutex<descriptor::CpuPool>>,
818832
temp_rtv_handles: Vec<descriptor::Handle>,
819833

834+
intermediate_copy_bufs: Vec<Buffer>,
835+
820836
null_rtv_handle: descriptor::Handle,
821837
list: Option<Direct3D12::ID3D12GraphicsCommandList>,
822838
free_lists: Vec<Direct3D12::ID3D12GraphicsCommandList>,

0 commit comments

Comments
 (0)