From 964d88289f9cb680fc8a04c04f02e220b22a1657 Mon Sep 17 00:00:00 2001 From: krol Date: Sat, 19 Dec 2020 11:32:17 +0100 Subject: [PATCH 1/2] Fixed FXC validation error: uav store write mask must match store value mask, write mask is 7 and store value mask is 0. --- .../SampleCore/Shaders/util/GenerateGrassStrawsCS.hlsl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Samples/Desktop/D3D12Raytracing/src/D3D12RaytracingRealTimeDenoisedAmbientOcclusion/SampleCore/Shaders/util/GenerateGrassStrawsCS.hlsl b/Samples/Desktop/D3D12Raytracing/src/D3D12RaytracingRealTimeDenoisedAmbientOcclusion/SampleCore/Shaders/util/GenerateGrassStrawsCS.hlsl index 911bd15b6..9af105f63 100644 --- a/Samples/Desktop/D3D12Raytracing/src/D3D12RaytracingRealTimeDenoisedAmbientOcclusion/SampleCore/Shaders/util/GenerateGrassStrawsCS.hlsl +++ b/Samples/Desktop/D3D12Raytracing/src/D3D12RaytracingRealTimeDenoisedAmbientOcclusion/SampleCore/Shaders/util/GenerateGrassStrawsCS.hlsl @@ -194,6 +194,9 @@ void main(uint2 DTid : SV_DispatchThreadID) { VertexPositionNormalTextureTangent vertex; vertex.position = 0; + vertex.normal = 0; + vertex.textureCoordinate = 0; + vertex.tangent = 0; uint threadID = DTid.x + DTid.y * cb.p.maxPatchDim.x; uint baseVertexID = threadID * N_GRASS_VERTICES; From c50a1b8ed49f79d9ce758a2ba0d24e9e26881bac Mon Sep 17 00:00:00 2001 From: krol Date: Sat, 19 Dec 2020 12:26:19 +0100 Subject: [PATCH 2/2] Hemisphere samples array is uploaded only on recreation of AO samples. The array was previously uploaded to GPU on every frame and since the array is quite large (87031808 bytes) it caused unnecessary CPU overhead (8ms of CPU time on i7 4790K 4.4GHz 32GB RAM 2070 RTX). On simpler scenes this was main bottleneck i.e. in the default scene with grass disabled (#define RENDER_GRASS_GEOMETRY 0) framerate tripled (~100 to ~300fps) with GPU timings unaffected. TODO: - The array is unnecessarily triple-buffered and has to be uploaded three times on consecutive frames. Only single upload is required since the content is unchanged. - In order to reduce size precision of the array can be reduced to half float or even 8bit SNORM. --- .../RTAO/RTAO.cpp | 6 +++++- .../RTAO/RTAO.h | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Samples/Desktop/D3D12Raytracing/src/D3D12RaytracingRealTimeDenoisedAmbientOcclusion/RTAO/RTAO.cpp b/Samples/Desktop/D3D12Raytracing/src/D3D12RaytracingRealTimeDenoisedAmbientOcclusion/RTAO/RTAO.cpp index b02d73630..646f72e44 100644 --- a/Samples/Desktop/D3D12Raytracing/src/D3D12RaytracingRealTimeDenoisedAmbientOcclusion/RTAO/RTAO.cpp +++ b/Samples/Desktop/D3D12Raytracing/src/D3D12RaytracingRealTimeDenoisedAmbientOcclusion/RTAO/RTAO.cpp @@ -448,6 +448,7 @@ void RTAO::CreateSamplesRNG() m_samplesGPUBuffer[i].value = XMFLOAT2(p.x * 0.5f + 0.5f, p.y * 0.5f + 0.5f); m_hemisphereSamplesGPUBuffer[i].value = p; } + m_numAOSamplesUploadFrames = m_deviceResources->GetBackBufferCount(); } void RTAO::GetRayGenParameters(bool* isCheckerboardSamplingEnabled, bool* checkerboardLoadEvenPixels) @@ -547,8 +548,11 @@ void RTAO::Run( } // Copy dynamic buffers to GPU. + UpdateConstantBuffer(frameIndex); + + if (m_numAOSamplesUploadFrames) { - UpdateConstantBuffer(frameIndex); + m_numAOSamplesUploadFrames--; m_hemisphereSamplesGPUBuffer.CopyStagingToGpu(frameIndex); } diff --git a/Samples/Desktop/D3D12Raytracing/src/D3D12RaytracingRealTimeDenoisedAmbientOcclusion/RTAO/RTAO.h b/Samples/Desktop/D3D12Raytracing/src/D3D12RaytracingRealTimeDenoisedAmbientOcclusion/RTAO/RTAO.h index 7bc67fba0..0e17cc347 100644 --- a/Samples/Desktop/D3D12Raytracing/src/D3D12RaytracingRealTimeDenoisedAmbientOcclusion/RTAO/RTAO.h +++ b/Samples/Desktop/D3D12Raytracing/src/D3D12RaytracingRealTimeDenoisedAmbientOcclusion/RTAO/RTAO.h @@ -108,6 +108,7 @@ class RTAO StructuredBuffer m_hemisphereSamplesGPUBuffer; BOOL m_isRecreateAOSamplesRequested = true; + UINT m_numAOSamplesUploadFrames = 0; UINT m_numAORayGeometryHits; bool m_checkerboardGenerateRaysForEvenPixels = false;