diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..f20b91b --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,19 @@ +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2f4c7fb --- /dev/null +++ b/README.md @@ -0,0 +1,38 @@ +# FidelityFX Denoiser + +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +## Overview + +FidelityFX Denoiser will contain a collection of highly optimized denoiser implementations for specific use cases. + +## FFX Reflection Denoiser + +The reflection denoiser includes a high performant Spatio-temporal denoiser specialized for reflection denoising. +The preferred use case of this denoiser is within applications requiring denoised radiance values generated by some stochastic reflection implementation. +Example of stochastic reflections: +- Stochastic Screen Space Reflections +- Stochastic Raytraced Reflections + +### Links + +- ffx-reflection-dnsr contains the [Reflection Denoiser](https://github.com/GPUOpen-Effects/FidelityFX-Denoiser/tree/master/ffx-reflection-dnsr) +- Visit [FidelityFX SSSR](https://github.com/GPUOpen-Effects/FidelityFX-SSSR/tree/master/sample) to see the reflection denoiser in action. + +## FFX Shadow Denoiser (Future Update) +A denoiser optimized for raytraced shadows will be added here in a future update. If you are a developer working on a raytracing title then please contact your AMD representative for early access. \ No newline at end of file diff --git a/docs/FFX_Denoiser_Reflection_Technology.pdf b/docs/FFX_Denoiser_Reflection_Technology.pdf new file mode 100644 index 0000000..dbd37c0 Binary files /dev/null and b/docs/FFX_Denoiser_Reflection_Technology.pdf differ diff --git a/ffx-reflection-dnsr/ffx_denoiser_reflections_blur.h b/ffx-reflection-dnsr/ffx_denoiser_reflections_blur.h new file mode 100644 index 0000000..b0a08d0 --- /dev/null +++ b/ffx-reflection-dnsr/ffx_denoiser_reflections_blur.h @@ -0,0 +1,142 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef FFX_DNSR_REFLECTIONS_BLUR +#define FFX_DNSR_REFLECTIONS_BLUR + +#include "ffx_denoiser_reflections_common.h" + +min16float FFX_DNSR_Reflections_GaussianWeight(int x, int y) { + uint weights[] = { 6, 4, 1 }; + return min16float(weights[abs(x)] * weights[abs(y)]) / 256.0; +} + +min16float3 FFX_DNSR_Reflections_Resolve(int2 group_thread_id, min16float center_roughness, min16float roughness_sigma_min, min16float roughness_sigma_max) { + min16float3 sum = 0.0; + min16float total_weight = 0.0; + + const int radius = 2; + for (int dy = -radius; dy <= radius; ++dy) { + for (int dx = -radius; dx <= radius; ++dx) { + int2 texel_coords = group_thread_id + int2(dx, dy); + + min16float3 radiance; + min16float roughness; + FFX_DNSR_Reflections_LoadFromGroupSharedMemory(texel_coords, radiance, roughness); + + min16float weight = 1 + * FFX_DNSR_Reflections_GaussianWeight(dx, dy) + * FFX_DNSR_Reflections_GetEdgeStoppingRoughnessWeightFP16(center_roughness, roughness, roughness_sigma_min, roughness_sigma_max); + sum += weight * radiance; + total_weight += weight; + } + } + + sum /= max(total_weight, 0.0001); + return min16float3(sum); +} + +void FFX_DNSR_Reflections_LoadWithOffset(int2 dispatch_thread_id, int2 offset, out min16float3 radiance, out min16float roughness) { + dispatch_thread_id += offset; + radiance = FFX_DNSR_Reflections_LoadRadianceFP16(dispatch_thread_id); + roughness = FFX_DNSR_Reflections_LoadRoughnessFP16(dispatch_thread_id); +} + +void FFX_DNSR_Reflections_StoreWithOffset(int2 group_thread_id, int2 offset, min16float3 radiance, min16float roughness) { + group_thread_id += offset; + FFX_DNSR_Reflections_StoreInGroupSharedMemory(group_thread_id, radiance, roughness); +} + +void FFX_DNSR_Reflections_InitializeGroupSharedMemory(int2 dispatch_thread_id, int2 group_thread_id) { + int2 offset_0 = 0; + if (group_thread_id.x < 4) { + offset_0 = int2(8, 0); + } + else if (group_thread_id.y >= 4) { + offset_0 = int2(4, 4); + } + else { + offset_0 = -group_thread_id; // map all threads to the same memory location to guarantee cache hits. + } + + int2 offset_1 = 0; + if (group_thread_id.y < 4) { + offset_1 = int2(0, 8); + } + else { + offset_1 = -group_thread_id; // map all threads to the same memory location to guarantee cache hits. + } + + min16float3 radiance_0; + min16float roughness_0; + + min16float3 radiance_1; + min16float roughness_1; + + min16float3 radiance_2; + min16float roughness_2; + + /// XXA + /// XXA + /// BBC + + dispatch_thread_id -= 2; + FFX_DNSR_Reflections_LoadWithOffset(dispatch_thread_id, int2(0, 0), radiance_0, roughness_0); // X + FFX_DNSR_Reflections_LoadWithOffset(dispatch_thread_id, offset_0, radiance_1, roughness_1); // A & C + FFX_DNSR_Reflections_LoadWithOffset(dispatch_thread_id, offset_1, radiance_2, roughness_2); // B + + FFX_DNSR_Reflections_StoreWithOffset(group_thread_id, int2(0, 0), radiance_0, roughness_0); // X + if (group_thread_id.x < 4 || group_thread_id.y >= 4) { + FFX_DNSR_Reflections_StoreWithOffset(group_thread_id, offset_0, radiance_1, roughness_1); // A & C + } + if (group_thread_id.y < 4) { + FFX_DNSR_Reflections_StoreWithOffset(group_thread_id, offset_1, radiance_2, roughness_2); // B + } +} + +void FFX_DNSR_Reflections_Blur(int2 dispatch_thread_id, int2 group_thread_id, uint2 screen_size) { + + // First check if we have to denoise or if a simple copy is enough + uint tile_meta_data_index = FFX_DNSR_Reflections_GetTileMetaDataIndex(dispatch_thread_id, screen_size.x); + tile_meta_data_index = WaveReadLaneFirst(tile_meta_data_index); + bool needs_denoiser = FFX_DNSR_Reflections_LoadTileMetaDataMask(tile_meta_data_index); + + [branch] + if (needs_denoiser) { + FFX_DNSR_Reflections_InitializeGroupSharedMemory(dispatch_thread_id, group_thread_id); + GroupMemoryBarrierWithGroupSync(); + + group_thread_id += 2; // Center threads in groupshared memory + + min16float3 center_radiance; + min16float center_roughness; + FFX_DNSR_Reflections_LoadFromGroupSharedMemory(group_thread_id, center_radiance, center_roughness); + + if (!FFX_DNSR_Reflections_IsGlossyReflection(center_roughness) || FFX_DNSR_Reflections_IsMirrorReflection(center_roughness)) { + return; + } + + min16float3 radiance = FFX_DNSR_Reflections_Resolve(group_thread_id, center_roughness, g_roughness_sigma_min, g_roughness_sigma_max); + FFX_DNSR_Reflections_StoreDenoisedReflectionResult(dispatch_thread_id, radiance); + } +} +#endif //FFX_DNSR_REFLECTIONS_BLUR diff --git a/ffx-reflection-dnsr/ffx_denoiser_reflections_classify_tiles.h b/ffx-reflection-dnsr/ffx_denoiser_reflections_classify_tiles.h new file mode 100644 index 0000000..b448684 --- /dev/null +++ b/ffx-reflection-dnsr/ffx_denoiser_reflections_classify_tiles.h @@ -0,0 +1,110 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef FFX_DNSR_REFLECTIONS_CLASSIFY_TILES +#define FFX_DNSR_REFLECTIONS_CLASSIFY_TILES + +#include "ffx_denoiser_reflections_common.h" + +bool FFX_DNSR_Reflections_IsBaseRay(uint2 dispatch_thread_id, uint samples_per_quad) { + switch (samples_per_quad) { + case 1: + return ((dispatch_thread_id.x & 1) | (dispatch_thread_id.y & 1)) == 0; // Deactivates 3 out of 4 rays + case 2: + return (dispatch_thread_id.x & 1) == (dispatch_thread_id.y & 1); // Deactivates 2 out of 4 rays. Keeps diagonal. + default: // case 4: + return true; + } +} + +groupshared uint g_FFX_DNSR_TileCount; + +void FFX_DNSR_Reflections_ClassifyTiles(uint2 dispatch_thread_id, uint2 group_thread_id, float roughness, uint2 screen_size, uint samples_per_quad, bool enable_temporal_variance_guided_tracing) { + g_FFX_DNSR_TileCount = 0; + + bool is_first_lane_of_wave = WaveIsFirstLane(); + + // First we figure out on a per thread basis if we need to shoot a reflection ray. + // Disable offscreen pixels + bool needs_ray = !(dispatch_thread_id.x >= screen_size.x || dispatch_thread_id.y >= screen_size.y); + + // Dont shoot a ray on very rough surfaces. + needs_ray = needs_ray && FFX_DNSR_Reflections_IsGlossyReflection(roughness); + + // Also we dont need to run the denoiser on mirror reflections. + bool needs_denoiser = needs_ray && !FFX_DNSR_Reflections_IsMirrorReflection(roughness); + + // Decide which ray to keep + bool is_base_ray = FFX_DNSR_Reflections_IsBaseRay(dispatch_thread_id, samples_per_quad); + needs_ray = needs_ray && (!needs_denoiser || is_base_ray); // Make sure to not deactivate mirror reflection rays. + + if (enable_temporal_variance_guided_tracing && needs_denoiser && !needs_ray) { + uint lane_mask = FFX_DNSR_Reflections_GetBitMaskFromPixelPosition(dispatch_thread_id); + uint base_mask_index = FFX_DNSR_Reflections_GetTemporalVarianceIndex(dispatch_thread_id & (~0b111), screen_size.x); + base_mask_index = WaveReadLaneFirst(base_mask_index); + + uint temporal_variance_mask_upper = FFX_DNSR_Reflections_LoadTemporalVarianceMask(base_mask_index); + uint temporal_variance_mask_lower = FFX_DNSR_Reflections_LoadTemporalVarianceMask(base_mask_index + 1); + uint temporal_variance_mask = group_thread_id.y < 4 ? temporal_variance_mask_upper : temporal_variance_mask_lower; + + bool has_temporal_variance = temporal_variance_mask & lane_mask; + needs_ray = needs_ray || has_temporal_variance; + } + + GroupMemoryBarrierWithGroupSync(); // Wait until g_FFX_DNSR_TileCount is cleared - allow some computations before and after + + // Now we know for each thread if it needs to shoot a ray and wether or not a denoiser pass has to run on this pixel. + + // Next we have to figure out for which pixels that ray is creating the values for. Thus, if we have to copy its value horizontal, vertical or across. + bool require_copy = !needs_ray && needs_denoiser; // Our pixel only requires a copy if we want to run a denoiser on it but don't want to shoot a ray for it. + bool copy_horizontal = (samples_per_quad != 4) && is_base_ray && WaveReadLaneAt(require_copy, WaveGetLaneIndex() ^ 0b01); // QuadReadAcrossX + bool copy_vertical = (samples_per_quad == 1) && is_base_ray && WaveReadLaneAt(require_copy, WaveGetLaneIndex() ^ 0b10); // QuadReadAcrossY + bool copy_diagonal = (samples_per_quad == 1) && is_base_ray && WaveReadLaneAt(require_copy, WaveGetLaneIndex() ^ 0b11); // QuadReadAcrossDiagonal + + // Thus, we need to compact the rays and append them all at once to the ray list. + uint local_ray_index_in_wave = WavePrefixCountBits(needs_ray); + uint wave_ray_count = WaveActiveCountBits(needs_ray); + uint base_ray_index; + if (is_first_lane_of_wave) { + FFX_DNSR_Reflections_IncrementRayCounter(wave_ray_count, base_ray_index); + } + base_ray_index = WaveReadLaneFirst(base_ray_index); + if (needs_ray) { + int ray_index = base_ray_index + local_ray_index_in_wave; + FFX_DNSR_Reflections_StoreRay(ray_index, dispatch_thread_id, copy_horizontal, copy_vertical, copy_diagonal); + } + + // Write tile meta data masks + bool wave_needs_denoiser = WaveActiveAnyTrue(needs_denoiser); + if (WaveIsFirstLane() && wave_needs_denoiser) { + InterlockedAdd(g_FFX_DNSR_TileCount, 1); + } + + GroupMemoryBarrierWithGroupSync(); // Wait until all waves wrote into g_FFX_DNSR_TileCount + + if (all(group_thread_id == 0)) { + uint tile_meta_data_index = FFX_DNSR_Reflections_GetTileMetaDataIndex(WaveReadLaneFirst(dispatch_thread_id), screen_size.x); + FFX_DNSR_Reflections_StoreTileMetaDataMask(tile_meta_data_index, g_FFX_DNSR_TileCount); + } +} + +#endif //FFX_DNSR_REFLECTIONS_CLASSIFY_TILES \ No newline at end of file diff --git a/ffx-reflection-dnsr/ffx_denoiser_reflections_common.h b/ffx-reflection-dnsr/ffx_denoiser_reflections_common.h new file mode 100644 index 0000000..32ee0e2 --- /dev/null +++ b/ffx-reflection-dnsr/ffx_denoiser_reflections_common.h @@ -0,0 +1,101 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef FFX_DNSR_REFLECTIONS_COMMON +#define FFX_DNSR_REFLECTIONS_COMMON + +uint FFX_DNSR_Reflections_RoundedDivide(uint value, uint divisor) { + return (value + divisor - 1) / divisor; +} + +uint FFX_DNSR_Reflections_GetTileMetaDataIndex(uint2 pixel_pos, uint screen_width) { + uint2 tile_index = uint2(pixel_pos.x / 8, pixel_pos.y / 8); + uint flattened = tile_index.y * FFX_DNSR_Reflections_RoundedDivide(screen_width, 8) + tile_index.x; + return flattened; +} + +uint FFX_DNSR_Reflections_GetTemporalVarianceIndex(uint2 pixel_pos, uint screen_width) { + uint2 tile_index = uint2(pixel_pos.x / 8, pixel_pos.y / 8); + uint flattened = tile_index.y * FFX_DNSR_Reflections_RoundedDivide(screen_width, 8) + tile_index.x; + return 2 * flattened + ((pixel_pos.y % 8) / 4); // Position upper and lower half next to each other +} + +uint FFX_DNSR_Reflections_GetBitMaskFromPixelPosition(uint2 pixel_pos) { + int lane_index = (pixel_pos.y % 4) * 8 + (pixel_pos.x % 8); + return (1u << lane_index); +} + +float FFX_DNSR_Reflections_GetEdgeStoppingNormalWeight(float3 normal_p, float3 normal_q, float sigma) { + return pow(max(dot(normal_p, normal_q), 0.0), sigma); +} + +float FFX_DNSR_Reflections_GetEdgeStoppingRoughnessWeight(float roughness_p, float roughness_q, float sigma_min, float sigma_max) { + return 1.0 - smoothstep(sigma_min, sigma_max, abs(roughness_p - roughness_q)); +} + +min16float FFX_DNSR_Reflections_GetEdgeStoppingRoughnessWeightFP16(min16float roughness_p, min16float roughness_q, min16float sigma_min, min16float sigma_max) { + return 1.0 - smoothstep(sigma_min, sigma_max, abs(roughness_p - roughness_q)); +} + +// Roughness weight to prevent ghosting on pure mirror reflections +float FFX_DNSR_Reflections_GetRoughnessAccumulationWeight(float roughness) { + float near_singular_roughness = 0.00001; + return smoothstep(0.0, near_singular_roughness, roughness); +} + +float FFX_DNSR_Reflections_Gaussian(float x, float m, float sigma) { + float a = length(x - m) / sigma; + a *= a; + return exp(-0.5 * a); +} + +float FFX_DNSR_Reflections_Luminance(float3 color) { + return max(dot(color, float3(0.299, 0.587, 0.114)), 0.00001); +} + +uint FFX_DNSR_Reflections_BitfieldExtract(uint src, uint off, uint bits) { + uint mask = (1 << bits) - 1; + return (src >> off) & mask; +} + +uint FFX_DNSR_Reflections_BitfieldInsert(uint src, uint ins, uint bits) { + uint mask = (1 << bits) - 1; + return (ins & mask) | (src & (~mask)); +} + +// LANE TO 8x8 MAPPING +// =================== +// 00 01 08 09 10 11 18 19 +// 02 03 0a 0b 12 13 1a 1b +// 04 05 0c 0d 14 15 1c 1d +// 06 07 0e 0f 16 17 1e 1f +// 20 21 28 29 30 31 38 39 +// 22 23 2a 2b 32 33 3a 3b +// 24 25 2c 2d 34 35 3c 3d +// 26 27 2e 2f 36 37 3e 3f +uint2 FFX_DNSR_Reflections_RemapLane8x8(uint lane) { + return uint2(FFX_DNSR_Reflections_BitfieldInsert(FFX_DNSR_Reflections_BitfieldExtract(lane, 2u, 3u), lane, 1u) + , FFX_DNSR_Reflections_BitfieldInsert(FFX_DNSR_Reflections_BitfieldExtract(lane, 3u, 3u) + , FFX_DNSR_Reflections_BitfieldExtract(lane, 1u, 2u), 2u)); +} + +#endif // FFX_DNSR_REFLECTIONS_COMMON \ No newline at end of file diff --git a/ffx-reflection-dnsr/ffx_denoiser_reflections_resolve_spatial.h b/ffx-reflection-dnsr/ffx_denoiser_reflections_resolve_spatial.h new file mode 100644 index 0000000..0b422af --- /dev/null +++ b/ffx-reflection-dnsr/ffx_denoiser_reflections_resolve_spatial.h @@ -0,0 +1,157 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef FFX_DNSR_REFLECTIONS_RESOLVE_SPATIAL +#define FFX_DNSR_REFLECTIONS_RESOLVE_SPATIAL + +#include "ffx_denoiser_reflections_common.h" + +void FFX_DNSR_Reflections_LoadWithOffset(int2 dispatch_thread_id, int2 offset, out min16float3 radiance, out min16float3 normal, out float depth) { + dispatch_thread_id += offset; + radiance = FFX_DNSR_Reflections_LoadRadianceFP16(dispatch_thread_id); + normal = FFX_DNSR_Reflections_LoadNormalFP16(dispatch_thread_id); + depth = FFX_DNSR_Reflections_LoadDepth(dispatch_thread_id); +} + +void FFX_DNSR_Reflections_StoreWithOffset(int2 group_thread_id, int2 offset, min16float3 radiance, min16float3 normal, float depth) { + group_thread_id += offset; + FFX_DNSR_Reflections_StoreInGroupSharedMemory(group_thread_id, radiance, normal, depth); // Pack ray length and radiance together +} + +void FFX_DNSR_Reflections_InitializeGroupSharedMemory(int2 dispatch_thread_id, int2 group_thread_id, uint samples_per_quad, uint screenWidth) { + // Load 16x16 region into shared memory. + int2 offset_0 = 0; + int2 offset_1 = int2(8, 0); + int2 offset_2 = int2(0, 8); + int2 offset_3 = int2(8, 8); + + min16float3 radiance_0; + min16float3 normal_0; + float depth_0; + + min16float3 radiance_1; + min16float3 normal_1; + float depth_1; + + min16float3 radiance_2; + min16float3 normal_2; + float depth_2; + + min16float3 radiance_3; + min16float3 normal_3; + float depth_3; + + /// XA + /// BC + + dispatch_thread_id -= 4; // 1 + 3 => additional band + left band + FFX_DNSR_Reflections_LoadWithOffset(dispatch_thread_id, offset_0, radiance_0, normal_0, depth_0); // X + FFX_DNSR_Reflections_LoadWithOffset(dispatch_thread_id, offset_1, radiance_1, normal_1, depth_1); // A + FFX_DNSR_Reflections_LoadWithOffset(dispatch_thread_id, offset_2, radiance_2, normal_2, depth_2); // B + FFX_DNSR_Reflections_LoadWithOffset(dispatch_thread_id, offset_3, radiance_3, normal_3, depth_3); // C + + FFX_DNSR_Reflections_StoreWithOffset(group_thread_id, offset_0, radiance_0, normal_0, depth_0); // X + FFX_DNSR_Reflections_StoreWithOffset(group_thread_id, offset_1, radiance_1, normal_1, depth_1); // A + FFX_DNSR_Reflections_StoreWithOffset(group_thread_id, offset_2, radiance_2, normal_2, depth_2); // B + FFX_DNSR_Reflections_StoreWithOffset(group_thread_id, offset_3, radiance_3, normal_3, depth_3); // C +} + +min16float3 FFX_DNSR_Reflections_Resolve(int2 group_thread_id, min16float3 center_radiance, min16float3 center_normal, float depth_sigma, float center_depth) { + float3 accumulated_radiance = center_radiance; + float accumulated_weight = 1; + + const float normal_sigma = 64.0; + + // First 15 numbers of Halton(2,3) streteched to [-3,3] + const int2 reuse_offsets[] = { + 0, 1, + -2, 1, + 2, -3, + -3, 0, + 1, 2, + -1, -2, + 3, 0, + -3, 3, + 0, -3, + -1, -1, + 2, 1, + -2, -2, + 1, 0, + 0, 2, + 3, -1 + }; + const uint sample_count = 15; + + int mirror = 2 * (group_thread_id & 0b1) - 1; + + for (int i = 0; i < sample_count; ++i) { + int2 new_idx = group_thread_id + mirror * reuse_offsets[i]; + min16float3 normal = FFX_DNSR_Reflections_LoadNormalFromGroupSharedMemory(new_idx); + float depth = FFX_DNSR_Reflections_LoadDepthFromGroupSharedMemory(new_idx); + min16float3 radiance = FFX_DNSR_Reflections_LoadRadianceFromGroupSharedMemory(new_idx); + float weight = 1 + * FFX_DNSR_Reflections_GetEdgeStoppingNormalWeight((float3)center_normal, (float3)normal, normal_sigma) + * FFX_DNSR_Reflections_Gaussian(center_depth, depth, depth_sigma) + ; + + // Accumulate all contributions. + accumulated_weight += weight; + accumulated_radiance += weight * radiance.xyz; + } + + accumulated_radiance /= max(accumulated_weight, 0.00001); + return (min16float3)accumulated_radiance; +} + +void FFX_DNSR_Reflections_ResolveSpatial(int2 dispatch_thread_id, int2 group_thread_id, uint samples_per_quad, uint2 screen_size) { + // First check if we have to denoise or if a simple copy is enough + uint tile_meta_data_index = FFX_DNSR_Reflections_GetTileMetaDataIndex(dispatch_thread_id, screen_size.x); + tile_meta_data_index = WaveReadLaneFirst(tile_meta_data_index); + bool needs_denoiser = FFX_DNSR_Reflections_LoadTileMetaDataMask(tile_meta_data_index); + + [branch] + if (needs_denoiser) { + float center_roughness = FFX_DNSR_Reflections_LoadRoughness(dispatch_thread_id); + FFX_DNSR_Reflections_InitializeGroupSharedMemory(dispatch_thread_id, group_thread_id, samples_per_quad, screen_size.x); + GroupMemoryBarrierWithGroupSync(); + + group_thread_id += 4; // Center threads in groupshared memory + min16float3 center_radiance = FFX_DNSR_Reflections_LoadRadianceFromGroupSharedMemory(group_thread_id); + + if (!FFX_DNSR_Reflections_IsGlossyReflection(center_roughness) || FFX_DNSR_Reflections_IsMirrorReflection(center_roughness)) { + FFX_DNSR_Reflections_StoreSpatiallyDenoisedReflections(dispatch_thread_id, center_radiance); + return; + } + + min16float3 center_normal = FFX_DNSR_Reflections_LoadNormalFromGroupSharedMemory(group_thread_id); + float center_depth = FFX_DNSR_Reflections_LoadDepthFromGroupSharedMemory(group_thread_id); + + min16float3 resolved_radiance = FFX_DNSR_Reflections_Resolve(group_thread_id, center_radiance, center_normal, g_depth_sigma, center_depth); + FFX_DNSR_Reflections_StoreSpatiallyDenoisedReflections(dispatch_thread_id, resolved_radiance); + } + else { + min16float3 radiance = FFX_DNSR_Reflections_LoadRadianceFP16(dispatch_thread_id); + FFX_DNSR_Reflections_StoreSpatiallyDenoisedReflections(dispatch_thread_id, radiance); + } +} + +#endif //FFX_DNSR_REFLECTIONS_RESOLVE_SPATIAL diff --git a/ffx-reflection-dnsr/ffx_denoiser_reflections_resolve_temporal.h b/ffx-reflection-dnsr/ffx_denoiser_reflections_resolve_temporal.h new file mode 100644 index 0000000..11cb415 --- /dev/null +++ b/ffx-reflection-dnsr/ffx_denoiser_reflections_resolve_temporal.h @@ -0,0 +1,245 @@ +/********************************************************************** +Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +********************************************************************/ + +#ifndef FFX_DNSR_REFLECTIONS_RESOLVE_TEMPORAL +#define FFX_DNSR_REFLECTIONS_RESOLVE_TEMPORAL + +#include "ffx_denoiser_reflections_common.h" + +// From "Temporal Reprojection Anti-Aliasing" +// https://github.com/playdeadgames/temporal +/********************************************************************** +Copyright (c) [2015] [Playdead] + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +********************************************************************/ +float3 FFX_DNSR_Reflections_ClipAABB(float3 aabb_min, float3 aabb_max, float3 prev_sample) { + // Main idea behind clipping - it prevents clustering when neighbor color space + // is distant from history sample + + // Here we find intersection between color vector and aabb color box + + // Note: only clips towards aabb center + float3 aabb_center = 0.5 * (aabb_max + aabb_min); + float3 extent_clip = 0.5 * (aabb_max - aabb_min) + 0.001; + + // Find color vector + float3 color_vector = prev_sample - aabb_center; + // Transform into clip space + float3 color_vector_clip = color_vector / extent_clip; + // Find max absolute component + color_vector_clip = abs(color_vector_clip); + float max_abs_unit = max(max(color_vector_clip.x, color_vector_clip.y), color_vector_clip.z); + + if (max_abs_unit > 1.0) { + return aabb_center + color_vector / max_abs_unit; // clip towards color vector + } + else { + return prev_sample; // point is inside aabb + } +} + +// Estimates spatial reflection radiance standard deviation +float3 FFX_DNSR_Reflections_EstimateLocalNeighborhood(int2 dispatch_thread_id) { + float3 color_sum = 0.0; + float3 color_sum_squared = 0.0; + + int radius = 1; + float weight = (radius * 2.0 + 1.0) * (radius * 2.0 + 1.0); + + for (int dx = -radius; dx <= radius; dx++) { + for (int dy = -radius; dy <= radius; dy++) { + int2 texel_coords = dispatch_thread_id + int2(dx, dy); + float3 value = FFX_DNSR_Reflections_LoadSpatiallyDenoisedReflections(texel_coords); + color_sum += value; + color_sum_squared += value * value; + } + } + + float3 color_std = (color_sum_squared - color_sum * color_sum / weight) / (weight - 1.0); + return sqrt(max(color_std, 0.0)); +} + +float2 FFX_DNSR_Reflections_GetSurfaceReprojection(int2 dispatch_thread_id, float2 uv, float2 motion_vector) { + // Reflector position reprojection + float2 history_uv = uv - motion_vector; + return history_uv; +} + +float2 FFX_DNSR_Reflections_GetHitPositionReprojection(int2 dispatch_thread_id, float2 uv, float reflected_ray_length) { + float z = FFX_DNSR_Reflections_LoadDepth(dispatch_thread_id); + float3 view_space_ray = FFX_DNSR_Reflections_ScreenSpaceToViewSpace(float3(uv, z)); + + // We start out with reconstructing the ray length in view space. + // This includes the portion from the camera to the reflecting surface as well as the portion from the surface to the hit position. + float surface_depth = length(view_space_ray); + float ray_length = surface_depth + reflected_ray_length; + + // We then perform a parallax correction by shooting a ray + // of the same length "straight through" the reflecting surface + // and reprojecting the tip of that ray to the previous frame. + view_space_ray /= surface_depth; // == normalize(view_space_ray) + view_space_ray *= ray_length; + float3 world_hit_position = FFX_DNSR_Reflections_ViewSpaceToWorldSpace(float4(view_space_ray, 1)); // This is the "fake" hit position if we would follow the ray straight through the surface. + float3 prev_hit_position = FFX_DNSR_Reflections_WorldSpaceToScreenSpacePrevious(world_hit_position); + float2 history_uv = prev_hit_position.xy; + return history_uv; +} + +float FFX_DNSR_Reflections_SampleHistory(float2 uv, uint2 screen_size, float3 normal, float roughness, float3 radiance_min, float3 radiance_max, float roughness_sigma_min, float roughness_sigma_max, float temporalStabilityFactor, out float3 radiance) { + int2 texel_coords = int2(screen_size * uv); + radiance = FFX_DNSR_Reflections_LoadRadianceHistory(texel_coords); + radiance = FFX_DNSR_Reflections_ClipAABB(radiance_min, radiance_max, radiance); + + float3 history_normal = FFX_DNSR_Reflections_LoadNormalHistory(texel_coords); + float history_roughness = FFX_DNSR_Reflections_LoadRoughnessHistory(texel_coords); + + const float normal_sigma = 8.0; + + float accumulation_speed = temporalStabilityFactor + * FFX_DNSR_Reflections_GetEdgeStoppingNormalWeight(normal, history_normal, normal_sigma) + * FFX_DNSR_Reflections_GetEdgeStoppingRoughnessWeight(roughness, history_roughness, roughness_sigma_min, roughness_sigma_max) + * FFX_DNSR_Reflections_GetRoughnessAccumulationWeight(roughness) + ; + + return saturate(accumulation_speed); +} + +float FFX_DNSR_Reflections_ComputeTemporalVariance(float3 history_radiance, float3 radiance) { + // Check temporal variance. + float history_luminance = FFX_DNSR_Reflections_Luminance(history_radiance); + float luminance = FFX_DNSR_Reflections_Luminance(radiance); + return abs(history_luminance - luminance) / max(max(history_luminance, luminance), 0.00001); +} + +groupshared uint g_FFX_DNSR_Reflections_TemporalVarianceMask[2]; + +void FFX_DNSR_Reflections_WriteTemporalVarianceMask(uint mask_write_index, uint has_temporal_variance_mask) { + // All lanes write to the same index, so we combine the masks within the wave and do a single write + const uint s_has_temporal_variance_mask = WaveActiveBitOr(has_temporal_variance_mask); + if (WaveIsFirstLane()) { + FFX_DNSR_Reflections_StoreTemporalVarianceMask(mask_write_index, s_has_temporal_variance_mask); + } +} + +void FFX_DNSR_Reflections_WriteTemporalVariance(int2 dispatch_thread_id, int2 group_thread_id, uint2 screen_size, bool has_temporal_variance) { + uint mask_write_index = FFX_DNSR_Reflections_GetTemporalVarianceIndex(dispatch_thread_id, screen_size.x); + uint lane_mask = FFX_DNSR_Reflections_GetBitMaskFromPixelPosition(dispatch_thread_id); + uint has_temporal_variance_mask = has_temporal_variance ? lane_mask : 0; + + if (WaveGetLaneCount() == 32) { + FFX_DNSR_Reflections_WriteTemporalVarianceMask(mask_write_index, has_temporal_variance_mask); + } + else if (WaveGetLaneCount() == 64) { // The lower 32 lanes write to a different index than the upper 32 lanes. + if (WaveGetLaneIndex() < 32) { + FFX_DNSR_Reflections_WriteTemporalVarianceMask(mask_write_index, has_temporal_variance_mask); // Write lower + } + else { + FFX_DNSR_Reflections_WriteTemporalVarianceMask(mask_write_index, has_temporal_variance_mask); // Write upper + } + } + else { // Use groupshared memory for all other wave sizes + uint mask_index = group_thread_id.y / 4; + g_FFX_DNSR_Reflections_TemporalVarianceMask[mask_index] = 0; + GroupMemoryBarrierWithGroupSync(); + InterlockedOr(g_FFX_DNSR_Reflections_TemporalVarianceMask[mask_index], has_temporal_variance_mask); + GroupMemoryBarrierWithGroupSync(); + + if (all(group_thread_id == 0)) { + FFX_DNSR_Reflections_StoreTemporalVarianceMask(mask_write_index, g_FFX_DNSR_Reflections_TemporalVarianceMask[0]); + FFX_DNSR_Reflections_StoreTemporalVarianceMask(mask_write_index + 1, g_FFX_DNSR_Reflections_TemporalVarianceMask[1]); + } + } +} + +void FFX_DNSR_Reflections_ResolveTemporal(int2 dispatch_thread_id, int2 group_thread_id, uint2 screen_size, float temporal_stability_factor, float temporal_variance_threshold) { + + // First check if we have to denoise or if a simple copy is enough + uint tile_meta_data_index = FFX_DNSR_Reflections_GetTileMetaDataIndex(dispatch_thread_id, screen_size.x); + tile_meta_data_index = WaveReadLaneFirst(tile_meta_data_index); + bool needs_denoiser = FFX_DNSR_Reflections_LoadTileMetaDataMask(tile_meta_data_index); + + bool has_temporal_variance = false; + + [branch] + if (needs_denoiser) { + float roughness = FFX_DNSR_Reflections_LoadRoughness(dispatch_thread_id); + if (!FFX_DNSR_Reflections_IsGlossyReflection(roughness) || FFX_DNSR_Reflections_IsMirrorReflection(roughness)) { + return; + } + + float2 uv = float2(dispatch_thread_id.x + 0.5, dispatch_thread_id.y + 0.5) / screen_size; + + float3 normal = FFX_DNSR_Reflections_LoadNormal(dispatch_thread_id); + float3 radiance = FFX_DNSR_Reflections_LoadSpatiallyDenoisedReflections(dispatch_thread_id); + float3 radiance_history = FFX_DNSR_Reflections_LoadRadianceHistory(dispatch_thread_id); + float ray_length = FFX_DNSR_Reflections_LoadRayLength(dispatch_thread_id); + + // And clip it to the local neighborhood + float2 motion_vector = FFX_DNSR_Reflections_LoadMotionVector(dispatch_thread_id); + float3 color_std = FFX_DNSR_Reflections_EstimateLocalNeighborhood(dispatch_thread_id); + color_std *= 2.2; + + float3 radiance_min = radiance.xyz - color_std; + float3 radiance_max = radiance + color_std; + + // Reproject point on the reflecting surface + float2 surface_reprojection_uv = FFX_DNSR_Reflections_GetSurfaceReprojection(dispatch_thread_id, uv, motion_vector); + + // Reproject hit point + float2 hit_reprojection_uv = FFX_DNSR_Reflections_GetHitPositionReprojection(dispatch_thread_id, uv, ray_length); + + float2 reprojection_uv; + reprojection_uv = (roughness < 0.05) ? hit_reprojection_uv : surface_reprojection_uv; + + float3 reprojection = 0; + float weight = 0; + if (all(reprojection_uv > 0.0) && all(reprojection_uv < 1.0)) { + weight = FFX_DNSR_Reflections_SampleHistory(reprojection_uv, screen_size, normal, roughness, radiance_min, radiance_max, g_roughness_sigma_min, g_roughness_sigma_max, temporal_stability_factor, reprojection); + } + + radiance = lerp(radiance, reprojection, weight); + has_temporal_variance = FFX_DNSR_Reflections_ComputeTemporalVariance(radiance_history, radiance) > temporal_variance_threshold; + + FFX_DNSR_Reflections_StoreTemporallyDenoisedReflections(dispatch_thread_id, radiance); + } + + FFX_DNSR_Reflections_WriteTemporalVariance(dispatch_thread_id, group_thread_id, screen_size, has_temporal_variance); +} + +#endif //FFX_DNSR_REFLECTIONS_RESOLVE_TEMPORAL