Skip to content

Commit

Permalink
FidelityFX Denoiser v1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
rys committed Nov 23, 2020
0 parents commit 670c76e
Show file tree
Hide file tree
Showing 8 changed files with 812 additions and 0 deletions.
19 changes: 19 additions & 0 deletions LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
38 changes: 38 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# FidelityFX Denoiser

Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

## Overview

FidelityFX Denoiser will contain a collection of highly optimized denoiser implementations for specific use cases.

## FFX Reflection Denoiser

The reflection denoiser includes a high performant Spatio-temporal denoiser specialized for reflection denoising.
The preferred use case of this denoiser is within applications requiring denoised radiance values generated by some stochastic reflection implementation.
Example of stochastic reflections:
- Stochastic Screen Space Reflections
- Stochastic Raytraced Reflections

### Links

- ffx-reflection-dnsr contains the [Reflection Denoiser](https://github.com/GPUOpen-Effects/FidelityFX-Denoiser/tree/master/ffx-reflection-dnsr)
- Visit [FidelityFX SSSR](https://github.com/GPUOpen-Effects/FidelityFX-SSSR/tree/master/sample) to see the reflection denoiser in action.

## FFX Shadow Denoiser (Future Update)
A denoiser optimized for raytraced shadows will be added here in a future update. If you are a developer working on a raytracing title then please contact your AMD representative for early access.
Binary file added docs/FFX_Denoiser_Reflection_Technology.pdf
Binary file not shown.
142 changes: 142 additions & 0 deletions ffx-reflection-dnsr/ffx_denoiser_reflections_blur.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/**********************************************************************
Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
********************************************************************/

#ifndef FFX_DNSR_REFLECTIONS_BLUR
#define FFX_DNSR_REFLECTIONS_BLUR

#include "ffx_denoiser_reflections_common.h"

min16float FFX_DNSR_Reflections_GaussianWeight(int x, int y) {
uint weights[] = { 6, 4, 1 };
return min16float(weights[abs(x)] * weights[abs(y)]) / 256.0;
}

min16float3 FFX_DNSR_Reflections_Resolve(int2 group_thread_id, min16float center_roughness, min16float roughness_sigma_min, min16float roughness_sigma_max) {
min16float3 sum = 0.0;
min16float total_weight = 0.0;

const int radius = 2;
for (int dy = -radius; dy <= radius; ++dy) {
for (int dx = -radius; dx <= radius; ++dx) {
int2 texel_coords = group_thread_id + int2(dx, dy);

min16float3 radiance;
min16float roughness;
FFX_DNSR_Reflections_LoadFromGroupSharedMemory(texel_coords, radiance, roughness);

min16float weight = 1
* FFX_DNSR_Reflections_GaussianWeight(dx, dy)
* FFX_DNSR_Reflections_GetEdgeStoppingRoughnessWeightFP16(center_roughness, roughness, roughness_sigma_min, roughness_sigma_max);
sum += weight * radiance;
total_weight += weight;
}
}

sum /= max(total_weight, 0.0001);
return min16float3(sum);
}

void FFX_DNSR_Reflections_LoadWithOffset(int2 dispatch_thread_id, int2 offset, out min16float3 radiance, out min16float roughness) {
dispatch_thread_id += offset;
radiance = FFX_DNSR_Reflections_LoadRadianceFP16(dispatch_thread_id);
roughness = FFX_DNSR_Reflections_LoadRoughnessFP16(dispatch_thread_id);
}

void FFX_DNSR_Reflections_StoreWithOffset(int2 group_thread_id, int2 offset, min16float3 radiance, min16float roughness) {
group_thread_id += offset;
FFX_DNSR_Reflections_StoreInGroupSharedMemory(group_thread_id, radiance, roughness);
}

void FFX_DNSR_Reflections_InitializeGroupSharedMemory(int2 dispatch_thread_id, int2 group_thread_id) {
int2 offset_0 = 0;
if (group_thread_id.x < 4) {
offset_0 = int2(8, 0);
}
else if (group_thread_id.y >= 4) {
offset_0 = int2(4, 4);
}
else {
offset_0 = -group_thread_id; // map all threads to the same memory location to guarantee cache hits.
}

int2 offset_1 = 0;
if (group_thread_id.y < 4) {
offset_1 = int2(0, 8);
}
else {
offset_1 = -group_thread_id; // map all threads to the same memory location to guarantee cache hits.
}

min16float3 radiance_0;
min16float roughness_0;

min16float3 radiance_1;
min16float roughness_1;

min16float3 radiance_2;
min16float roughness_2;

/// XXA
/// XXA
/// BBC

dispatch_thread_id -= 2;
FFX_DNSR_Reflections_LoadWithOffset(dispatch_thread_id, int2(0, 0), radiance_0, roughness_0); // X
FFX_DNSR_Reflections_LoadWithOffset(dispatch_thread_id, offset_0, radiance_1, roughness_1); // A & C
FFX_DNSR_Reflections_LoadWithOffset(dispatch_thread_id, offset_1, radiance_2, roughness_2); // B

FFX_DNSR_Reflections_StoreWithOffset(group_thread_id, int2(0, 0), radiance_0, roughness_0); // X
if (group_thread_id.x < 4 || group_thread_id.y >= 4) {
FFX_DNSR_Reflections_StoreWithOffset(group_thread_id, offset_0, radiance_1, roughness_1); // A & C
}
if (group_thread_id.y < 4) {
FFX_DNSR_Reflections_StoreWithOffset(group_thread_id, offset_1, radiance_2, roughness_2); // B
}
}

void FFX_DNSR_Reflections_Blur(int2 dispatch_thread_id, int2 group_thread_id, uint2 screen_size) {

// First check if we have to denoise or if a simple copy is enough
uint tile_meta_data_index = FFX_DNSR_Reflections_GetTileMetaDataIndex(dispatch_thread_id, screen_size.x);
tile_meta_data_index = WaveReadLaneFirst(tile_meta_data_index);
bool needs_denoiser = FFX_DNSR_Reflections_LoadTileMetaDataMask(tile_meta_data_index);

[branch]
if (needs_denoiser) {
FFX_DNSR_Reflections_InitializeGroupSharedMemory(dispatch_thread_id, group_thread_id);
GroupMemoryBarrierWithGroupSync();

group_thread_id += 2; // Center threads in groupshared memory

min16float3 center_radiance;
min16float center_roughness;
FFX_DNSR_Reflections_LoadFromGroupSharedMemory(group_thread_id, center_radiance, center_roughness);

if (!FFX_DNSR_Reflections_IsGlossyReflection(center_roughness) || FFX_DNSR_Reflections_IsMirrorReflection(center_roughness)) {
return;
}

min16float3 radiance = FFX_DNSR_Reflections_Resolve(group_thread_id, center_roughness, g_roughness_sigma_min, g_roughness_sigma_max);
FFX_DNSR_Reflections_StoreDenoisedReflectionResult(dispatch_thread_id, radiance);
}
}
#endif //FFX_DNSR_REFLECTIONS_BLUR
110 changes: 110 additions & 0 deletions ffx-reflection-dnsr/ffx_denoiser_reflections_classify_tiles.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/**********************************************************************
Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
********************************************************************/

#ifndef FFX_DNSR_REFLECTIONS_CLASSIFY_TILES
#define FFX_DNSR_REFLECTIONS_CLASSIFY_TILES

#include "ffx_denoiser_reflections_common.h"

bool FFX_DNSR_Reflections_IsBaseRay(uint2 dispatch_thread_id, uint samples_per_quad) {
switch (samples_per_quad) {
case 1:
return ((dispatch_thread_id.x & 1) | (dispatch_thread_id.y & 1)) == 0; // Deactivates 3 out of 4 rays
case 2:
return (dispatch_thread_id.x & 1) == (dispatch_thread_id.y & 1); // Deactivates 2 out of 4 rays. Keeps diagonal.
default: // case 4:
return true;
}
}

groupshared uint g_FFX_DNSR_TileCount;

void FFX_DNSR_Reflections_ClassifyTiles(uint2 dispatch_thread_id, uint2 group_thread_id, float roughness, uint2 screen_size, uint samples_per_quad, bool enable_temporal_variance_guided_tracing) {
g_FFX_DNSR_TileCount = 0;

bool is_first_lane_of_wave = WaveIsFirstLane();

// First we figure out on a per thread basis if we need to shoot a reflection ray.
// Disable offscreen pixels
bool needs_ray = !(dispatch_thread_id.x >= screen_size.x || dispatch_thread_id.y >= screen_size.y);

// Dont shoot a ray on very rough surfaces.
needs_ray = needs_ray && FFX_DNSR_Reflections_IsGlossyReflection(roughness);

// Also we dont need to run the denoiser on mirror reflections.
bool needs_denoiser = needs_ray && !FFX_DNSR_Reflections_IsMirrorReflection(roughness);

// Decide which ray to keep
bool is_base_ray = FFX_DNSR_Reflections_IsBaseRay(dispatch_thread_id, samples_per_quad);
needs_ray = needs_ray && (!needs_denoiser || is_base_ray); // Make sure to not deactivate mirror reflection rays.

if (enable_temporal_variance_guided_tracing && needs_denoiser && !needs_ray) {
uint lane_mask = FFX_DNSR_Reflections_GetBitMaskFromPixelPosition(dispatch_thread_id);
uint base_mask_index = FFX_DNSR_Reflections_GetTemporalVarianceIndex(dispatch_thread_id & (~0b111), screen_size.x);
base_mask_index = WaveReadLaneFirst(base_mask_index);

uint temporal_variance_mask_upper = FFX_DNSR_Reflections_LoadTemporalVarianceMask(base_mask_index);
uint temporal_variance_mask_lower = FFX_DNSR_Reflections_LoadTemporalVarianceMask(base_mask_index + 1);
uint temporal_variance_mask = group_thread_id.y < 4 ? temporal_variance_mask_upper : temporal_variance_mask_lower;

bool has_temporal_variance = temporal_variance_mask & lane_mask;
needs_ray = needs_ray || has_temporal_variance;
}

GroupMemoryBarrierWithGroupSync(); // Wait until g_FFX_DNSR_TileCount is cleared - allow some computations before and after

// Now we know for each thread if it needs to shoot a ray and wether or not a denoiser pass has to run on this pixel.

// Next we have to figure out for which pixels that ray is creating the values for. Thus, if we have to copy its value horizontal, vertical or across.
bool require_copy = !needs_ray && needs_denoiser; // Our pixel only requires a copy if we want to run a denoiser on it but don't want to shoot a ray for it.
bool copy_horizontal = (samples_per_quad != 4) && is_base_ray && WaveReadLaneAt(require_copy, WaveGetLaneIndex() ^ 0b01); // QuadReadAcrossX
bool copy_vertical = (samples_per_quad == 1) && is_base_ray && WaveReadLaneAt(require_copy, WaveGetLaneIndex() ^ 0b10); // QuadReadAcrossY
bool copy_diagonal = (samples_per_quad == 1) && is_base_ray && WaveReadLaneAt(require_copy, WaveGetLaneIndex() ^ 0b11); // QuadReadAcrossDiagonal

// Thus, we need to compact the rays and append them all at once to the ray list.
uint local_ray_index_in_wave = WavePrefixCountBits(needs_ray);
uint wave_ray_count = WaveActiveCountBits(needs_ray);
uint base_ray_index;
if (is_first_lane_of_wave) {
FFX_DNSR_Reflections_IncrementRayCounter(wave_ray_count, base_ray_index);
}
base_ray_index = WaveReadLaneFirst(base_ray_index);
if (needs_ray) {
int ray_index = base_ray_index + local_ray_index_in_wave;
FFX_DNSR_Reflections_StoreRay(ray_index, dispatch_thread_id, copy_horizontal, copy_vertical, copy_diagonal);
}

// Write tile meta data masks
bool wave_needs_denoiser = WaveActiveAnyTrue(needs_denoiser);
if (WaveIsFirstLane() && wave_needs_denoiser) {
InterlockedAdd(g_FFX_DNSR_TileCount, 1);
}

GroupMemoryBarrierWithGroupSync(); // Wait until all waves wrote into g_FFX_DNSR_TileCount

if (all(group_thread_id == 0)) {
uint tile_meta_data_index = FFX_DNSR_Reflections_GetTileMetaDataIndex(WaveReadLaneFirst(dispatch_thread_id), screen_size.x);
FFX_DNSR_Reflections_StoreTileMetaDataMask(tile_meta_data_index, g_FFX_DNSR_TileCount);
}
}

#endif //FFX_DNSR_REFLECTIONS_CLASSIFY_TILES
Loading

0 comments on commit 670c76e

Please sign in to comment.