diff --git a/README.md b/README.md index 508f49d..4bbaf30 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,32 @@ # FidelityFX SPD Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# Changelist v2.0 + +- Added support for cube and array textures. SpdDownsample and SpdDownsampleH shader functions now take index of texture slice as an additional parameter. For regular texture use 0. +- Added support for updating only sub-rectangle of the texture. Additional, optional parameter workGroupOffset added to shader functions SpdDownsample and SpdDownsampleH. +- Added C function SpdSetup that helps to setup constants to be passed as a constant buffer. +- The global atomic counter is automatically reset to 0 by the shader at the end, so you do not need to clear it before every use, just once after creation + # Single Pass Downsampler - SPD FidelityFX Single Pass Downsampler (SPD) provides an RDNA-optimized solution for generating up to 12 MIP levels of a texture. +- Generates up to 12 MIP levels (maximum source texture size is 4096x4096) per slice. +- Supports Texture2DArrays / CubeTextures: downsamples all slices within one single disptach call. +- Single compute dispatch. +- User defined 2x2 reduction function. +- User controlled border handling. +- Supports various image formats. +- HLSL and GLSL versions available. +- Rapid Packed Math support. +- Uses optionally subgroup operations / SM6+ wave operations, which can provide faster performance. +- Supports downsampling of a sub-rectangle from the source texture: useful for atlas textures in which only a known region got updated # Sample Build Instructions 1. Clone submodules by running 'git submodule update --init --recursive' (so you get the Cauldron framework too) 2. Run sample/build/GenerateSolutions.bat -3. open solution, build + run + have fun 😊 +3. Open solution, build + run + have fun 😊 # SPD Files You can find them in ffx-spd @@ -21,15 +38,26 @@ Downsampler - PS: computes each mip in a separate pixel shader pass - Multipass CS: computes each mip in a separate compute shader pass - SPD CS: uses the SPD library, computes all mips (up to a source texture of size 4096²) in a single pass -- SPD CS linear sampler: uses the SPD library and for sampling the source texture a linear sampler -SPD Versions -- NO-WaveOps: uses only LDS to share the data between threads +SPD Load Versions +- Load: uses a load to fetch from the source texture +- Linear Sampler: uses a sampler to fetch from the source texture. Sampler must meet the user defined reduction function. + +SPD WaveOps Versions +- No-WaveOps: uses only LDS to share the data between threads - WaveOps: uses Intrinsics and LDS to share the data between threads -SPD Non-Packed / Packed Version +SPD Non-Packed / Packed Versions - Non-Packed: uses fp32 - Packed: uses fp16, reduced register pressure # Recommendations -We recommend to use the WapeOps path when supported. If higher precision is not needed, you can enable the packed mode - it has less register pressure and can run a bit faster as well. \ No newline at end of file +We recommend to use the WaveOps path when supported. If higher precision is not needed, you can enable the packed mode - it has less register pressure and can run a bit faster as well. +If you compute the average for each 2x2 quad, we also recommend to use a linear sampler to fetch from the source texture instead of four separate loads. + +# Known issues +Please use driver 20.8.3 or newer. There is a known issue on DX12 when using the SPD No-WaveOps Packed version. +It may appear as "Access violation reading location ..." during CreateComputePipelineState, with top of the stack +pointing to amdxc64.dll. +To workaround this issue, you may advise players to update their graphics driver or don't compile and use +a different SPD version, e.g. a Non-Packed version. \ No newline at end of file diff --git a/docs/FidelityFX_SPD.pdf b/docs/FidelityFX_SPD.pdf index 68f343b..bc80ecc 100644 Binary files a/docs/FidelityFX_SPD.pdf and b/docs/FidelityFX_SPD.pdf differ diff --git a/ffx-spd/ffx_spd.h b/ffx-spd/ffx_spd.h index 0a6b13d..e0b58b8 100644 --- a/ffx-spd/ffx_spd.h +++ b/ffx-spd/ffx_spd.h @@ -1,7 +1,7 @@ //_____________________________________________________________/\_______________________________________________________________ //============================================================================================================================== // -// [FFX SPD] Single Pass Downsampler 1.0 +// [FFX SPD] Single Pass Downsampler 2.0 // //============================================================================================================================== // LICENSE @@ -20,17 +20,56 @@ // WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// //------------------------------------------------------------------------------------------------------------------------------ - +// CHANGELIST v2.0 +// =============== +// - Added support for cube and array textures. SpdDownsample and SpdDownsampleH shader functions now take index of texture slice +// as an additional parameter. For regular texture use 0. +// - Added support for updating only sub-rectangle of the texture. Additional, optional parameter workGroupOffset added to shader +// functions SpdDownsample and SpdDownsampleH. +// - Added C function SpdSetup that helps to setup constants to be passed as a constant buffer. +// - The global atomic counter is automatically reset to 0 by the shader at the end, so you do not need to clear it before every +// use, just once after creation +// //------------------------------------------------------------------------------------------------------------------------------ // INTEGRATION SUMMARY FOR CPU // =========================== // // you need to provide as constants: // // number of mip levels to be computed (maximum is 12) // // number of total thread groups: ((widthInPixels+63)>>6) * ((heightInPixels+63)>>6) +// // workGroupOffset -> by default 0, if you only downsample a rectancle within the source texture use SpdSetup function to calculate correct offset // ... // // Dispatch the shader such that each thread group works on a 64x64 sub-tile of the source image -// vkCmdDispatch(cmdBuf,(widthInPixels+63)>>6,(heightInPixels+63)>>6,1); +// // for Cube Textures or Texture2DArray, use the z dimension +// vkCmdDispatch(cmdBuf,(widthInPixels+63)>>6,(heightInPixels+63)>>6, slices); + +// // you can also use the SpdSetup function: +// //on top of your cpp file: +// #define A_CPU +// #include "ffx_a.h" +// #include "ffx_spd.h" +// // before your dispatch call, use SpdSetup function to get your constants +// varAU2(dispatchThreadGroupCountXY); // output variable +// varAU2(workGroupOffset); // output variable, this constants are required if Left and Top are not 0,0 +// varAU2(numWorkGroupsAndMips); // output variable +// // input information about your source texture: +// // left and top of the rectancle within your texture you want to downsample +// // width and height of the rectancle you want to downsample +// // if complete source texture should get downsampled: left = 0, top = 0, width = sourceTexture.width, height = sourceTexture.height +// varAU4(rectInfo) = initAU4(0, 0, m_Texture.GetWidth(), m_Texture.GetHeight()); // left, top, width, height +// SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo); +// ... +// // constants: +// data.numWorkGroupsPerSlice = numWorkGroupsAndMips[0]; +// data.mips = numWorkGroupsAndMips[1]; +// data.workGroupOffset[0] = workGroupOffset[0]; +// data.workGroupOffset[1] = workGroupOffset[1]; +// ... +// uint32_t dispatchX = dispatchThreadGroupCountXY[0]; +// uint32_t dispatchY = dispatchThreadGroupCountXY[1]; +// uint32_t dispatchZ = m_CubeTexture.GetArraySize(); // slices - for 2D Texture this is 1, for cube texture 6 +// vkCmdDispatch(cmd_buf, dispatchX, dispatchY, dispatchZ); //------------------------------------------------------------------------------------------------------------------------------ // INTEGRATION SUMMARY FOR GPU @@ -39,37 +78,44 @@ // [SAMPLER] - if you want to use a sampler with linear filtering for loading the source image // follow additionally the instructions marked with [SAMPLER] // add following define: -// #SPD_LINEAR_SAMPLER +// #define SPD_LINEAR_SAMPLER // this is recommended, as using one sample() with linear filter to reduce 2x2 is faster // than 4x load() plus manual averaging // // Setup layout. Example below for VK_FORMAT_R16G16B16A16_SFLOAT. -// // Note: If you use UNORM/SRGB format, you need to convert to linear space +// // Note: If you use SRGB format for UAV load() and store() (if it's supported), you need to convert to and from linear space // // when using UAV load() and store() -// // conversion to linear (load function): x*x -// // conversion from linear (store function): sqrt() +// // approximate conversion to linear (load function): x*x +// // approximate conversion from linear (store function): sqrt() +// // or use more accurate functions from ffx_a.h: AFromSrgbF1(value) and AToSrgbF1(value) +// // Recommendation: use UNORM format instead of SRGB for UAV access, and SRGB for SRV access +// // look in the sample app to see how it's done // // source image +// // if cube texture use image2DArray / Texture2DArray and adapt your load/store/sample calls // GLSL: layout(set=0,binding=0,rgba16f)uniform image2D imgSrc; // [SAMPLER]: layout(set=0,binding=0)uniform texture2D imgSrc; // HLSL: [[vk::binding(0)]] Texture2D imgSrc :register(u0); -// // destination -> 12 is the maximum number of mips supported by DS +// // destination -> 12 is the maximum number of mips supported by SPD // GLSL: layout(set=0,binding=1,rgba16f) uniform coherent image2D imgDst[12]; // HLSL: [[vk::binding(1)]] globallycoherent RWTexture2D imgDst[12] :register(u1); // // global atomic counter - MUST be initialized to 0 +// // SPD resets the counter back after each run by calling SpdResetAtomicCounter(slice) +// // if you have more than 1 slice (== if you downsample a cube texture or a texture2Darray) +// // you have an array of counters: counter[6] -> if you have 6 slices for example // // GLSL: -// layout(std430, set=0, binding=2) coherent buffer globalAtomicBuffer +// layout(std430, set=0, binding=2) coherent buffer SpdGlobalAtomicBuffer // { // uint counter; -// } globalAtomic; +// } spdGlobalAtomic; // // HLSL: -// struct globalAtomicBuffer +// struct SpdGlobalAtomicBuffer // { // uint counter; // }; -// [[vk::binding(2)]] globallycoherent RWStructuredBuffer globalAtomic; +// [[vk::binding(2)]] globallycoherent RWStructuredBuffer spdGlobalAtomic; // // [SAMPLER] add sampler // GLSL: layout(set=0, binding=3) uniform sampler srcSampler; @@ -79,15 +125,19 @@ // // or calculate within shader // // [SAMPLER] when using sampler add inverse source image size // // GLSL: -// layout(push_constant) uniform pushConstants { +// layout(push_constant) uniform SpdConstants { // uint mips; // needed to opt out earlier if mips are < 12 // uint numWorkGroups; // number of total thread groups, so numWorkGroupsX * numWorkGroupsY * 1 +// // it is important to NOT take the number of slices (z dimension) into account here +// // as each slice has its own counter! +// vec2 workGroupOffset; // optional - use SpdSetup() function to calculate correct workgroup offset // } spdConstants; // // HLSL: // [[vk::push_constant]] // cbuffer spdConstants { -// uint mips; -// uint numWorkGroups; +// uint mips; +// uint numWorkGroups; +// float2 workGroupOffset; // optional // }; // ... @@ -105,18 +155,18 @@ // ... // // Define LDS variables -// shared AF4 spd_intermediate[16][16]; // HLSL: groupshared -// shared AU1 spd_counter; // HLSL: groupshared +// shared AF4 spdIntermediate[16][16]; // HLSL: groupshared +// shared AU1 spdCounter; // HLSL: groupshared // // PACKED version -// shared AH4 spd_intermediate[16][16]; // HLSL: groupshared +// shared AH4 spdIntermediate[16][16]; // HLSL: groupshared // // Note: You can also use -// shared AF1 spd_intermediateR[16][16]; -// shared AF1 spd_intermediateG[16][16]; -// shared AF1 spd_intermediateB[16][16]; -// shared AF1 spd_intermediateA[16][16]; +// shared AF1 spdIntermediateR[16][16]; +// shared AF1 spdIntermediateG[16][16]; +// shared AF1 spdIntermediateB[16][16]; +// shared AF1 spdIntermediateA[16][16]; // // or for Packed version: -// shared AH2 spd_intermediateRG[16][16]; -// shared AH2 spd_intermediateBA[16][16]; +// shared AH2 spdIntermediateRG[16][16]; +// shared AH2 spdIntermediateBA[16][16]; // // This is potentially faster // // Adapt your load and store functions accordingly @@ -135,17 +185,19 @@ // // conversion to linear (load function): x*x // // conversion from linear (store function): sqrt() +// AU1 slice parameter is for Cube textures and texture2DArray +// if downsampling Texture2D you can ignore this parameter, otherwise use it to access correct slice // // Load from source image -// GLSL: AF4 SpdLoadSourceImage(ASU2 p){return imageLoad(imgSrc, p);} -// HLSL: AF4 SpdLoadSourceImage(ASU2 tex){return imgSrc[tex];} +// GLSL: AF4 SpdLoadSourceImage(ASU2 p, AU1 slice){return imageLoad(imgSrc, p);} +// HLSL: AF4 SpdLoadSourceImage(ASU2 tex, AU1 slice){return imgSrc[tex];} // [SAMPLER] don't forget to add the define #SPD_LINEAR_SAMPLER :) // GLSL: -// AF4 SpdLoadSourceImage(ASU2 p){ +// AF4 SpdLoadSourceImage(ASU2 p, AU1 slice){ // AF2 textureCoord = p * invInputSize + invInputSize; // return texture(sampler2D(imgSrc, srcSampler), textureCoord); // } // HLSL: -// AF4 SpdLoadSourceImage(ASU2 p){ +// AF4 SpdLoadSourceImage(ASU2 p, AU1 slice){ // AF2 textureCoord = p * invInputSize + invInputSize; // return imgSrc.SampleLevel(srcSampler, textureCoord, 0); // } @@ -153,28 +205,34 @@ // // SpdLoad() takes a 32-bit signed integer 2D coordinate and loads color. // // Loads the 5th mip level, each value is computed by a different thread group // // last thread group will access all its elements and compute the subsequent mips -// GLSL: AF4 SpdLoad(ASU2 p){return imageLoad(imgDst[5],p);} -// HLSL: AF4 SpdLoad(ASU2 tex){return imgDst[5][tex];} +// // reminder: if non-power-of-2 textures, add border controls if you do not want to read zeros past the border +// GLSL: AF4 SpdLoad(ASU2 p, AU1 slice){return imageLoad(imgDst[5],p);} +// HLSL: AF4 SpdLoad(ASU2 tex, AU1 slice){return imgDst[5][tex];} // Define the store function -// GLSL: void SpdStore(ASU2 p, AF4 value, AU1 mip){imageStore(imgDst[mip], p, value);} -// HLSL: void SpdStore(ASU2 pix, AF4 value, AU1 index){imgDst[index][pix] = value;} +// GLSL: void SpdStore(ASU2 p, AF4 value, AU1 mip, AU1 slice){imageStore(imgDst[mip], p, value);} +// HLSL: void SpdStore(ASU2 pix, AF4 value, AU1 mip, AU1 slice){imgDst[mip][pix] = value;} // // Define the atomic counter increase function +// // each slice only reads and stores to its specific slice counter +// // so, if you have several slices it's +// // InterlockedAdd(spdGlobalAtomic[0].counter[slice], 1, spdCounter); // // GLSL: -// void SpdIncreaseAtomicCounter(){spd_counter = atomicAdd(globalAtomic.counter, 1);} -// AU1 SpdGetAtomicCounter() {return spd_counter;} +// void SpdIncreaseAtomicCounter(AU1 slice){spdCounter = atomicAdd(spdGlobalAtomic.counter, 1);} +// AU1 SpdGetAtomicCounter() {return spdCounter;} +// void SpdResetAtomicCounter(AU1 slice){spdGlobalAtomic.counter[slice] = 0;} // // HLSL: -// void SpdIncreaseAtomicCounter(){InterlockedAdd(globalAtomic[0].counter, 1, spd_counter);} -// AU1 SpdGetAtomicCounter(){return spd_counter;} +// void SpdIncreaseAtomicCounter(AU1 slice){InterlockedAdd(spdGlobalAtomic[0].counter, 1, spdCounter);} +// AU1 SpdGetAtomicCounter(){return spdCounter;} +// void SpdResetAtomicCounter(AU1 slice){spdGlobalAtomic[0].counter[slice] = 0;} // // Define the LDS load and store functions // // GLSL: -// AF4 SpdLoadIntermediate(AU1 x, AU1 y){return spd_intermediate[x][y];} -// void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value){spd_intermediate[x][y] = value;} +// AF4 SpdLoadIntermediate(AU1 x, AU1 y){return spdIntermediate[x][y];} +// void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value){spdIntermediate[x][y] = value;} // // HLSL: -// AF4 SpdLoadIntermediate(AU1 x, AU1 y){return spd_intermediate[x][y];} -// void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value){spd_intermediate[x][y] = value;} +// AF4 SpdLoadIntermediate(AU1 x, AU1 y){return spdIntermediate[x][y];} +// void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value){spdIntermediate[x][y] = value;} // // Define your reduction function: takes as input the four 2x2 values and returns 1 output value // Example below: computes the average value @@ -182,16 +240,16 @@ // // PACKED VERSION // Load from source image -// GLSL: AH4 SpdLoadSourceImageH(ASU2 p){return AH4(imageLoad(imgSrc, p));} -// HLSL: AH4 SpdLoadSourceImageH(ASU2 tex){return AH4(imgSrc[tex]);} +// GLSL: AH4 SpdLoadSourceImageH(ASU2 p, AU1 slice){return AH4(imageLoad(imgSrc, p));} +// HLSL: AH4 SpdLoadSourceImageH(ASU2 tex, AU1 slice){return AH4(imgSrc[tex]);} // [SAMPLER] // GLSL: -// AH4 SpdLoadSourceImageH(ASU2 p){ +// AH4 SpdLoadSourceImageH(ASU2 p, AU1 slice){ // AF2 textureCoord = p * invInputSize + invInputSize; // return AH4(texture(sampler2D(imgSrc, srcSampler), textureCoord)); // } // HLSL: -// AH4 SpdLoadSourceImageH(ASU2 p){ +// AH4 SpdLoadSourceImageH(ASU2 p, AU1 slice){ // AF2 textureCoord = p * invInputSize + invInputSize; // return AH4(imgSrc.SampleLevel(srcSampler, textureCoord, 0)); // } @@ -199,28 +257,28 @@ // // SpdLoadH() takes a 32-bit signed integer 2D coordinate and loads color. // // Loads the 5th mip level, each value is computed by a different thread group // // last thread group will access all its elements and compute the subsequent mips -// GLSL: AH4 SpdLoadH(ASU2 p){return AH4(imageLoad(imgDst[5],p));} -// HLSL: AH4 SpdLoadH(ASU2 tex){return AH4(imgDst[5][tex]);} +// GLSL: AH4 SpdLoadH(ASU2 p, AU1 slice){return AH4(imageLoad(imgDst[5],p));} +// HLSL: AH4 SpdLoadH(ASU2 tex, AU1 slice){return AH4(imgDst[5][tex]);} // Define the store function -// GLSL: void SpdStoreH(ASU2 p, AH4 value, AU1 mip){imageStore(imgDst[mip], p, AF4(value));} -// HLSL: void SpdStoreH(ASU2 pix, AH4 value, AU1 index){imgDst[index][pix] = AF4(value);} +// GLSL: void SpdStoreH(ASU2 p, AH4 value, AU1 mip, AU1 slice){imageStore(imgDst[mip], p, AF4(value));} +// HLSL: void SpdStoreH(ASU2 pix, AH4 value, AU1 index, AU1 slice){imgDst[index][pix] = AF4(value);} // // Define the atomic counter increase function // // GLSL: -// void SpdIncreaseAtomicCounter(){spd_counter = atomicAdd(globalAtomic.counter, 1);} -// AU1 SpdGetAtomicCounter() {return spd_counter;} +// void SpdIncreaseAtomicCounter(AU1 slice){spd_counter = atomicAdd(spdGlobalAtomic.counter, 1);} +// AU1 SpdGetAtomicCounter() {return spdCounter;} // // HLSL: -// void SpdIncreaseAtomicCounter(){InterlockedAdd(globalAtomic[0].counter, 1, spd_counter);} -// AU1 SpdGetAtomicCounter(){return spd_counter;} +// void SpdIncreaseAtomicCounter(AU1 slice){InterlockedAdd(spdGlobalAtomic[0].counter, 1, spdCounter);} +// AU1 SpdGetAtomicCounter(){return spdCounter;} -// // Define the lds load and store functions +// // Define the LDS load and store functions // // GLSL: -// AH4 SpdLoadIntermediateH(AU1 x, AU1 y){return spd_intermediate[x][y];} -// void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value){spd_intermediate[x][y] = value;} +// AH4 SpdLoadIntermediateH(AU1 x, AU1 y){return spdIntermediate[x][y];} +// void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value){spdIntermediate[x][y] = value;} // // HLSL: -// AH4 SpdLoadIntermediate(AU1 x, AU1 y){return spd_intermediate[x][y];} -// void SpdStoreIntermediate(AU1 x, AU1 y, AH4 value){spd_intermediate[x][y] = value;} +// AH4 SpdLoadIntermediate(AU1 x, AU1 y){return spdIntermediate[x][y];} +// void SpdStoreIntermediate(AU1 x, AU1 y, AH4 value){spdIntermediate[x][y] = value;} // // Define your reduction function: takes as input the four 2x2 values and returns 1 output value // Example below: computes the average value @@ -240,42 +298,80 @@ // layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; // void main(){ // // Call the downsampling function +// // WorkGroupId.z should be 0 if you only downsample a Texture2D! // SpdDownsample(AU2(gl_WorkGroupID.xy), AU1(gl_LocalInvocationIndex), -// AU1(spdConstants.mips), AU1(spdConstants.numWorkGroups)); +// AU1(spdConstants.mips), AU1(spdConstants.numWorkGroups), AU1(WorkGroupId.z)); // // // PACKED: // SpdDownsampleH(AU2(gl_WorkGroupID.xy), AU1(gl_LocalInvocationIndex), -// AU1(spdConstants.mips), AU1(spdConstants.numWorkGroups)); +// AU1(spdConstants.mips), AU1(spdConstants.numWorkGroups), AU1(WorkGroupId.z)); // ... // // HLSL: // [numthreads(256,1,1)] // void main(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) { // SpdDownsample(AU2(WorkGroupId.xy), AU1(LocalThreadIndex), -// AU1(mips), AU1(numWorkGroups)); +// AU1(mips), AU1(numWorkGroups), AU1(WorkGroupId.z)); // // // PACKED: // SpdDownsampleH(AU2(WorkGroupId.xy), AU1(LocalThreadIndex), -// AU1(mips), AU1(numWorkGroups)); +// AU1(mips), AU1(numWorkGroups), AU1(WorkGroupId.z)); // ... // //------------------------------------------------------------------------------------------------------------------------------ +//============================================================================================================================== +// SPD Setup +//============================================================================================================================== +#ifdef A_CPU +A_STATIC void SpdSetup( +outAU2 dispatchThreadGroupCountXY, // CPU side: dispatch thread group count xy +outAU2 workGroupOffset, // GPU side: pass in as constant +outAU2 numWorkGroupsAndMips, // GPU side: pass in as constant +inAU4 rectInfo, // left, top, width, height +ASU1 mips // optional: if -1, calculate based on rect width and height +){ + workGroupOffset[0] = rectInfo[0] / 64; // rectInfo[0] = left + workGroupOffset[1] = rectInfo[1] / 64; // rectInfo[1] = top + + AU1 endIndexX = (rectInfo[0] + rectInfo[2] - 1) / 64; // rectInfo[0] = left, rectInfo[2] = width + AU1 endIndexY = (rectInfo[1] + rectInfo[3] - 1) / 64; // rectInfo[1] = top, rectInfo[3] = height + + dispatchThreadGroupCountXY[0] = endIndexX + 1 - workGroupOffset[0]; + dispatchThreadGroupCountXY[1] = endIndexY + 1 - workGroupOffset[1]; + + numWorkGroupsAndMips[0] = (dispatchThreadGroupCountXY[0]) * (dispatchThreadGroupCountXY[1]); + + if (mips >= 0) { + numWorkGroupsAndMips[1] = AU1(mips); + } else { // calculate based on rect width and height + AU1 resolution = AMaxU1(rectInfo[2], rectInfo[3]); + numWorkGroupsAndMips[1] = AU1((AMinF1(AFloorF1(ALog2F1(AF1(resolution))), AF1(12)))); + } +} - +A_STATIC void SpdSetup( + outAU2 dispatchThreadGroupCountXY, // CPU side: dispatch thread group count xy + outAU2 workGroupOffset, // GPU side: pass in as constant + outAU2 numWorkGroupsAndMips, // GPU side: pass in as constant + inAU4 rectInfo // left, top, width, height +) { + SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, -1); +} +#endif // #ifdef A_CPU //============================================================================================================================== // NON-PACKED VERSION //============================================================================================================================== - +#ifdef A_GPU #ifdef SPD_PACKED_ONLY // Avoid compiler error - AF4 SpdLoadSourceImage(ASU2 p){return AF4(0.0,0.0,0.0,0.0);} - AF4 SpdLoad(ASU2 p){return AF4(0.0,0.0,0.0,0.0);} - void SpdStore(ASU2 p, AF4 value, AU1 mip){} + AF4 SpdLoadSourceImage(ASU2 p, AU1 slice){return AF4(0.0,0.0,0.0,0.0);} + AF4 SpdLoad(ASU2 p, AU1 slice){return AF4(0.0,0.0,0.0,0.0);} + void SpdStore(ASU2 p, AF4 value, AU1 mip, AU1 slice){} AF4 SpdLoadIntermediate(AU1 x, AU1 y){return AF4(0.0,0.0,0.0,0.0);} void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value){} AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3){return AF4(0.0,0.0,0.0,0.0);} -#endif +#endif // #ifdef SPD_PACKED_ONLY //_____________________________________________________________/\_______________________________________________________________ #if defined(A_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS) @@ -292,12 +388,12 @@ void SpdWorkgroupShuffleBarrier() { } // Only last active workgroup should proceed -bool SpdExitWorkgroup(AU1 numWorkGroups, AU1 localInvocationIndex) +bool SpdExitWorkgroup(AU1 numWorkGroups, AU1 localInvocationIndex, AU1 slice) { // global atomic counter if (localInvocationIndex == 0) { - SpdIncreaseAtomicCounter(); + SpdIncreaseAtomicCounter(slice); } SpdWorkgroupShuffleBarrier(); return (SpdGetAtomicCounter() != (numWorkGroups - 1)); @@ -306,7 +402,7 @@ bool SpdExitWorkgroup(AU1 numWorkGroups, AU1 localInvocationIndex) //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// User defined: AF4 DSReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3); +// User defined: AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3); AF4 SpdReduceQuad(AF4 v) { @@ -326,6 +422,8 @@ AF4 SpdReduceQuad(AF4 v) return SpdReduce4(v0, v1, v2, v3); /* // if SM6.0 is not available, you can use the AMD shader intrinsics + // the AMD shader intrinsics are available in AMD GPU Services (AGS) library: + // https://gpuopen.com/amd-gpu-services-ags-library/ // works for DX11 AF4 v0 = v; AF4 v1; @@ -346,7 +444,7 @@ AF4 SpdReduceQuad(AF4 v) return SpdReduce4(v0, v1, v2, v3); */ #endif - return AF4_x(0.0); + return v; } AF4 SpdReduceIntermediate(AU2 i0, AU2 i1, AU2 i2, AU2 i3) @@ -358,69 +456,71 @@ AF4 SpdReduceIntermediate(AU2 i0, AU2 i1, AU2 i2, AU2 i3) return SpdReduce4(v0, v1, v2, v3); } -AF4 SpdReduceLoad4(AU2 i0, AU2 i1, AU2 i2, AU2 i3) +AF4 SpdReduceLoad4(AU2 i0, AU2 i1, AU2 i2, AU2 i3, AU1 slice) { - AF4 v0 = SpdLoad(ASU2(i0)); - AF4 v1 = SpdLoad(ASU2(i1)); - AF4 v2 = SpdLoad(ASU2(i2)); - AF4 v3 = SpdLoad(ASU2(i3)); + AF4 v0 = SpdLoad(ASU2(i0), slice); + AF4 v1 = SpdLoad(ASU2(i1), slice); + AF4 v2 = SpdLoad(ASU2(i2), slice); + AF4 v3 = SpdLoad(ASU2(i3), slice); return SpdReduce4(v0, v1, v2, v3); } -AF4 SpdReduceLoad4(AU2 base) +AF4 SpdReduceLoad4(AU2 base, AU1 slice) { return SpdReduceLoad4( AU2(base + AU2(0, 0)), AU2(base + AU2(0, 1)), AU2(base + AU2(1, 0)), - AU2(base + AU2(1, 1))); + AU2(base + AU2(1, 1)), + slice); } -AF4 SpdReduceLoadSourceImage4(AU2 i0, AU2 i1, AU2 i2, AU2 i3) +AF4 SpdReduceLoadSourceImage4(AU2 i0, AU2 i1, AU2 i2, AU2 i3, AU1 slice) { - AF4 v0 = SpdLoadSourceImage(ASU2(i0)); - AF4 v1 = SpdLoadSourceImage(ASU2(i1)); - AF4 v2 = SpdLoadSourceImage(ASU2(i2)); - AF4 v3 = SpdLoadSourceImage(ASU2(i3)); + AF4 v0 = SpdLoadSourceImage(ASU2(i0), slice); + AF4 v1 = SpdLoadSourceImage(ASU2(i1), slice); + AF4 v2 = SpdLoadSourceImage(ASU2(i2), slice); + AF4 v3 = SpdLoadSourceImage(ASU2(i3), slice); return SpdReduce4(v0, v1, v2, v3); } -AF4 SpdReduceLoadSourceImage4(AU2 base) +AF4 SpdReduceLoadSourceImage(AU2 base, AU1 slice) { #ifdef SPD_LINEAR_SAMPLER - return SpdLoadSourceImage(ASU2(base)); + return SpdLoadSourceImage(ASU2(base), slice); #else return SpdReduceLoadSourceImage4( AU2(base + AU2(0, 0)), AU2(base + AU2(0, 1)), AU2(base + AU2(1, 0)), - AU2(base + AU2(1, 1))); + AU2(base + AU2(1, 1)), + slice); #endif } -void SpdDownsampleMips_0_1_Intrinsics(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) +void SpdDownsampleMips_0_1_Intrinsics(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip, AU1 slice) { AF4 v[4]; ASU2 tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2, y * 2); ASU2 pix = ASU2(workGroupID.xy * 32) + ASU2(x, y); - v[0] = SpdReduceLoadSourceImage4(tex); - SpdStore(pix, v[0], 0); + v[0] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[0], 0, slice); tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2 + 32, y * 2); pix = ASU2(workGroupID.xy * 32) + ASU2(x + 16, y); - v[1] = SpdReduceLoadSourceImage4(tex); - SpdStore(pix, v[1], 0); + v[1] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[1], 0, slice); tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2, y * 2 + 32); pix = ASU2(workGroupID.xy * 32) + ASU2(x, y + 16); - v[2] = SpdReduceLoadSourceImage4(tex); - SpdStore(pix, v[2], 0); + v[2] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[2], 0, slice); tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2 + 32, y * 2 + 32); pix = ASU2(workGroupID.xy * 32) + ASU2(x + 16, y + 16); - v[3] = SpdReduceLoadSourceImage4(tex); - SpdStore(pix, v[3], 0); + v[3] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[3], 0, slice); if (mip <= 1) return; @@ -433,50 +533,50 @@ void SpdDownsampleMips_0_1_Intrinsics(AU1 x, AU1 y, AU2 workGroupID, AU1 localIn if ((localInvocationIndex % 4) == 0) { SpdStore(ASU2(workGroupID.xy * 16) + - ASU2(x/2, y/2), v[0], 1); + ASU2(x/2, y/2), v[0], 1, slice); SpdStoreIntermediate( x/2, y/2, v[0]); SpdStore(ASU2(workGroupID.xy * 16) + - ASU2(x/2 + 8, y/2), v[1], 1); + ASU2(x/2 + 8, y/2), v[1], 1, slice); SpdStoreIntermediate( x/2 + 8, y/2, v[1]); SpdStore(ASU2(workGroupID.xy * 16) + - ASU2(x/2, y/2 + 8), v[2], 1); + ASU2(x/2, y/2 + 8), v[2], 1, slice); SpdStoreIntermediate( x/2, y/2 + 8, v[2]); SpdStore(ASU2(workGroupID.xy * 16) + - ASU2(x/2 + 8, y/2 + 8), v[3], 1); + ASU2(x/2 + 8, y/2 + 8), v[3], 1, slice); SpdStoreIntermediate( x/2 + 8, y/2 + 8, v[3]); } } -void SpdDownsampleMips_0_1_LDS(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) +void SpdDownsampleMips_0_1_LDS(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip, AU1 slice) { AF4 v[4]; ASU2 tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2, y * 2); ASU2 pix = ASU2(workGroupID.xy * 32) + ASU2(x, y); - v[0] = SpdReduceLoadSourceImage4(tex); - SpdStore(pix, v[0], 0); + v[0] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[0], 0, slice); tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2 + 32, y * 2); pix = ASU2(workGroupID.xy * 32) + ASU2(x + 16, y); - v[1] = SpdReduceLoadSourceImage4(tex); - SpdStore(pix, v[1], 0); + v[1] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[1], 0, slice); tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2, y * 2 + 32); pix = ASU2(workGroupID.xy * 32) + ASU2(x, y + 16); - v[2] = SpdReduceLoadSourceImage4(tex); - SpdStore(pix, v[2], 0); + v[2] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[2], 0, slice); tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2 + 32, y * 2 + 32); pix = ASU2(workGroupID.xy * 32) + ASU2(x + 16, y + 16); - v[3] = SpdReduceLoadSourceImage4(tex); - SpdStore(pix, v[3], 0); + v[3] = SpdReduceLoadSourceImage(tex, slice); + SpdStore(pix, v[3], 0, slice); if (mip <= 1) return; @@ -493,7 +593,7 @@ void SpdDownsampleMips_0_1_LDS(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocatio AU2(x * 2 + 0, y * 2 + 1), AU2(x * 2 + 1, y * 2 + 1) ); - SpdStore(ASU2(workGroupID.xy * 16) + ASU2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1); + SpdStore(ASU2(workGroupID.xy * 16) + ASU2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice); } SpdWorkgroupShuffleBarrier(); } @@ -507,28 +607,28 @@ void SpdDownsampleMips_0_1_LDS(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocatio } } -void SpdDownsampleMips_0_1(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) +void SpdDownsampleMips_0_1(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip, AU1 slice) { #ifdef SPD_NO_WAVE_OPERATIONS - SpdDownsampleMips_0_1_LDS(x, y, workGroupID, localInvocationIndex, mip); + SpdDownsampleMips_0_1_LDS(x, y, workGroupID, localInvocationIndex, mip, slice); #else - SpdDownsampleMips_0_1_Intrinsics(x, y, workGroupID, localInvocationIndex, mip); + SpdDownsampleMips_0_1_Intrinsics(x, y, workGroupID, localInvocationIndex, mip, slice); #endif } -void SpdDownsampleMip_2(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) +void SpdDownsampleMip_2(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip, AU1 slice) { #ifdef SPD_NO_WAVE_OPERATIONS if (localInvocationIndex < 64) { AF4 v = SpdReduceIntermediate( - AU2(x * 2 + 0 + 0, y * 2 + 0), - AU2(x * 2 + 0 + 1, y * 2 + 0), - AU2(x * 2 + 0 + 0, y * 2 + 1), - AU2(x * 2 + 0 + 1, y * 2 + 1) + AU2(x * 2 + 0, y * 2 + 0), + AU2(x * 2 + 1, y * 2 + 0), + AU2(x * 2 + 0, y * 2 + 1), + AU2(x * 2 + 1, y * 2 + 1) ); - SpdStore(ASU2(workGroupID.xy * 8) + ASU2(x, y), v, mip); + SpdStore(ASU2(workGroupID.xy * 8) + ASU2(x, y), v, mip, slice); // store to LDS, try to reduce bank conflicts // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -545,13 +645,13 @@ void SpdDownsampleMip_2(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, // quad index 0 stores result if (localInvocationIndex % 4 == 0) { - SpdStore(ASU2(workGroupID.xy * 8) + ASU2(x/2, y/2), v, mip); + SpdStore(ASU2(workGroupID.xy * 8) + ASU2(x/2, y/2), v, mip, slice); SpdStoreIntermediate(x + (y/2) % 2, y, v); } #endif } -void SpdDownsampleMip_3(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) +void SpdDownsampleMip_3(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip, AU1 slice) { #ifdef SPD_NO_WAVE_OPERATIONS if (localInvocationIndex < 16) @@ -566,7 +666,7 @@ void SpdDownsampleMip_3(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU2(x * 4 + 0 + 1, y * 4 + 2), AU2(x * 4 + 2 + 1, y * 4 + 2) ); - SpdStore(ASU2(workGroupID.xy * 4) + ASU2(x, y), v, mip); + SpdStore(ASU2(workGroupID.xy * 4) + ASU2(x, y), v, mip, slice); // store to LDS // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -588,14 +688,14 @@ void SpdDownsampleMip_3(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, // quad index 0 stores result if (localInvocationIndex % 4 == 0) { - SpdStore(ASU2(workGroupID.xy * 4) + ASU2(x/2, y/2), v, mip); + SpdStore(ASU2(workGroupID.xy * 4) + ASU2(x/2, y/2), v, mip, slice); SpdStoreIntermediate(x * 2 + y/2, y * 2, v); } } #endif } -void SpdDownsampleMip_4(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) +void SpdDownsampleMip_4(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip, AU1 slice) { #ifdef SPD_NO_WAVE_OPERATIONS if (localInvocationIndex < 4) @@ -609,7 +709,7 @@ void SpdDownsampleMip_4(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU2(x * 8 + 0 + 1 + y * 2, y * 8 + 4), AU2(x * 8 + 4 + 1 + y * 2, y * 8 + 4) ); - SpdStore(ASU2(workGroupID.xy * 2) + ASU2(x, y), v, mip); + SpdStore(ASU2(workGroupID.xy * 2) + ASU2(x, y), v, mip, slice); // store to LDS // x x x x 0 ... // 0 ... @@ -623,14 +723,14 @@ void SpdDownsampleMip_4(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, // quad index 0 stores result if (localInvocationIndex % 4 == 0) { - SpdStore(ASU2(workGroupID.xy * 2) + ASU2(x/2, y/2), v, mip); + SpdStore(ASU2(workGroupID.xy * 2) + ASU2(x/2, y/2), v, mip, slice); SpdStoreIntermediate(x / 2 + y, 0, v); } } #endif } -void SpdDownsampleMip_5(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) +void SpdDownsampleMip_5(AU2 workGroupID, AU1 localInvocationIndex, AU1 mip, AU1 slice) { #ifdef SPD_NO_WAVE_OPERATIONS if (localInvocationIndex < 1) @@ -643,7 +743,7 @@ void SpdDownsampleMip_5(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU2(2, 0), AU2(3, 0) ); - SpdStore(ASU2(workGroupID.xy), v, mip); + SpdStore(ASU2(workGroupID.xy), v, mip, slice); } #else if (localInvocationIndex < 4) @@ -653,82 +753,96 @@ void SpdDownsampleMip_5(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, // quad index 0 stores result if (localInvocationIndex % 4 == 0) { - SpdStore(ASU2(workGroupID.xy), v, mip); + SpdStore(ASU2(workGroupID.xy), v, mip, slice); } } #endif } -void SpdDownsampleMips_6_7(AU1 x, AU1 y, AU1 mips) +void SpdDownsampleMips_6_7(AU1 x, AU1 y, AU1 mips, AU1 slice) { ASU2 tex = ASU2(x * 4 + 0, y * 4 + 0); ASU2 pix = ASU2(x * 2 + 0, y * 2 + 0); - AF4 v0 = SpdReduceLoad4(tex); - SpdStore(pix, v0, 6); + AF4 v0 = SpdReduceLoad4(tex, slice); + SpdStore(pix, v0, 6, slice); tex = ASU2(x * 4 + 2, y * 4 + 0); pix = ASU2(x * 2 + 1, y * 2 + 0); - AF4 v1 = SpdReduceLoad4(tex); - SpdStore(pix, v1, 6); + AF4 v1 = SpdReduceLoad4(tex, slice); + SpdStore(pix, v1, 6, slice); tex = ASU2(x * 4 + 0, y * 4 + 2); pix = ASU2(x * 2 + 0, y * 2 + 1); - AF4 v2 = SpdReduceLoad4(tex); - SpdStore(pix, v2, 6); + AF4 v2 = SpdReduceLoad4(tex, slice); + SpdStore(pix, v2, 6, slice); tex = ASU2(x * 4 + 2, y * 4 + 2); pix = ASU2(x * 2 + 1, y * 2 + 1); - AF4 v3 = SpdReduceLoad4(tex); - SpdStore(pix, v3, 6); + AF4 v3 = SpdReduceLoad4(tex, slice); + SpdStore(pix, v3, 6, slice); if (mips <= 7) return; // no barrier needed, working on values only from the same thread AF4 v = SpdReduce4(v0, v1, v2, v3); - SpdStore(ASU2(x, y), v, 7); + SpdStore(ASU2(x, y), v, 7, slice); SpdStoreIntermediate(x, y, v); } -void SpdDownsampleNextFour(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 baseMip, AU1 mips) +void SpdDownsampleNextFour(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 baseMip, AU1 mips, AU1 slice) { if (mips <= baseMip) return; SpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_2(x, y, workGroupID, localInvocationIndex, baseMip); + SpdDownsampleMip_2(x, y, workGroupID, localInvocationIndex, baseMip, slice); if (mips <= baseMip + 1) return; SpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_3(x, y, workGroupID, localInvocationIndex, baseMip + 1); + SpdDownsampleMip_3(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice); if (mips <= baseMip + 2) return; SpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_4(x, y, workGroupID, localInvocationIndex, baseMip + 2); + SpdDownsampleMip_4(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice); if (mips <= baseMip + 3) return; SpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_5(x, y, workGroupID, localInvocationIndex, baseMip + 3); + SpdDownsampleMip_5(workGroupID, localInvocationIndex, baseMip + 3, slice); } void SpdDownsample( AU2 workGroupID, AU1 localInvocationIndex, AU1 mips, - AU1 numWorkGroups + AU1 numWorkGroups, + AU1 slice ) { AU2 sub_xy = ARmpRed8x8(localInvocationIndex % 64); AU1 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2); AU1 y = sub_xy.y + 8 * ((localInvocationIndex >> 7)); - SpdDownsampleMips_0_1(x, y, workGroupID, localInvocationIndex, mips); + SpdDownsampleMips_0_1(x, y, workGroupID, localInvocationIndex, mips, slice); - SpdDownsampleNextFour(x, y, workGroupID, localInvocationIndex, 2, mips); + SpdDownsampleNextFour(x, y, workGroupID, localInvocationIndex, 2, mips, slice); if (mips <= 6) return; - if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex)) return; + if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice)) return; + + SpdResetAtomicCounter(slice); // After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. - SpdDownsampleMips_6_7(x, y, mips); + SpdDownsampleMips_6_7(x, y, mips, slice); + + SpdDownsampleNextFour(x, y, AU2(0,0), localInvocationIndex, 8, mips, slice); +} - SpdDownsampleNextFour(x, y, AU2(0,0), localInvocationIndex, 8, mips); +void SpdDownsample( + AU2 workGroupID, + AU1 localInvocationIndex, + AU1 mips, + AU1 numWorkGroups, + AU1 slice, + AU2 workGroupOffset +) { + SpdDownsample(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -738,7 +852,7 @@ void SpdDownsample( // PACKED VERSION //============================================================================================================================== -#ifdef A_HALF // A_HALF +#ifdef A_HALF #ifdef A_GLSL #extension GL_EXT_shader_subgroup_extended_types_float16:require @@ -762,6 +876,8 @@ AH4 SpdReduceQuadH(AH4 v) return SpdReduce4H(v0, v1, v2, v3); /* // if SM6.0 is not available, you can use the AMD shader intrinsics + // the AMD shader intrinsics are available in AMD GPU Services (AGS) library: + // https://gpuopen.com/amd-gpu-services-ags-library/ // works for DX11 AH4 v0 = v; AH4 v1; @@ -795,69 +911,71 @@ AH4 SpdReduceIntermediateH(AU2 i0, AU2 i1, AU2 i2, AU2 i3) return SpdReduce4H(v0, v1, v2, v3); } -AH4 SpdReduceLoad4H(AU2 i0, AU2 i1, AU2 i2, AU2 i3) +AH4 SpdReduceLoad4H(AU2 i0, AU2 i1, AU2 i2, AU2 i3, AU1 slice) { - AH4 v0 = SpdLoadH(ASU2(i0)); - AH4 v1 = SpdLoadH(ASU2(i1)); - AH4 v2 = SpdLoadH(ASU2(i2)); - AH4 v3 = SpdLoadH(ASU2(i3)); + AH4 v0 = SpdLoadH(ASU2(i0), slice); + AH4 v1 = SpdLoadH(ASU2(i1), slice); + AH4 v2 = SpdLoadH(ASU2(i2), slice); + AH4 v3 = SpdLoadH(ASU2(i3), slice); return SpdReduce4H(v0, v1, v2, v3); } -AH4 SpdReduceLoad4H(AU2 base) +AH4 SpdReduceLoad4H(AU2 base, AU1 slice) { return SpdReduceLoad4H( AU2(base + AU2(0, 0)), AU2(base + AU2(0, 1)), AU2(base + AU2(1, 0)), - AU2(base + AU2(1, 1))); + AU2(base + AU2(1, 1)), + slice); } -AH4 SpdReduceLoadSourceImage4H(AU2 i0, AU2 i1, AU2 i2, AU2 i3) +AH4 SpdReduceLoadSourceImage4H(AU2 i0, AU2 i1, AU2 i2, AU2 i3, AU1 slice) { - AH4 v0 = SpdLoadSourceImageH(ASU2(i0)); - AH4 v1 = SpdLoadSourceImageH(ASU2(i1)); - AH4 v2 = SpdLoadSourceImageH(ASU2(i2)); - AH4 v3 = SpdLoadSourceImageH(ASU2(i3)); + AH4 v0 = SpdLoadSourceImageH(ASU2(i0), slice); + AH4 v1 = SpdLoadSourceImageH(ASU2(i1), slice); + AH4 v2 = SpdLoadSourceImageH(ASU2(i2), slice); + AH4 v3 = SpdLoadSourceImageH(ASU2(i3), slice); return SpdReduce4H(v0, v1, v2, v3); } -AH4 SpdReduceLoadSourceImage4H(AU2 base) +AH4 SpdReduceLoadSourceImageH(AU2 base, AU1 slice) { #ifdef SPD_LINEAR_SAMPLER - return SpdLoadSourceImageH(ASU2(base)); + return SpdLoadSourceImageH(ASU2(base), slice); #else return SpdReduceLoadSourceImage4H( AU2(base + AU2(0, 0)), AU2(base + AU2(0, 1)), AU2(base + AU2(1, 0)), - AU2(base + AU2(1, 1))); + AU2(base + AU2(1, 1)), + slice); #endif } -void SpdDownsampleMips_0_1_IntrinsicsH(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mips) +void SpdDownsampleMips_0_1_IntrinsicsH(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mips, AU1 slice) { AH4 v[4]; ASU2 tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2, y * 2); ASU2 pix = ASU2(workGroupID.xy * 32) + ASU2(x, y); - v[0] = SpdReduceLoadSourceImage4H(tex); - SpdStoreH(pix, v[0], 0); + v[0] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[0], 0, slice); tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2 + 32, y * 2); pix = ASU2(workGroupID.xy * 32) + ASU2(x + 16, y); - v[1] = SpdReduceLoadSourceImage4H(tex); - SpdStoreH(pix, v[1], 0); + v[1] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[1], 0, slice); tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2, y * 2 + 32); pix = ASU2(workGroupID.xy * 32) + ASU2(x, y + 16); - v[2] = SpdReduceLoadSourceImage4H(tex); - SpdStoreH(pix, v[2], 0); + v[2] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[2], 0, slice); tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2 + 32, y * 2 + 32); pix = ASU2(workGroupID.xy * 32) + ASU2(x + 16, y + 16); - v[3] = SpdReduceLoadSourceImage4H(tex); - SpdStoreH(pix, v[3], 0); + v[3] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[3], 0, slice); if (mips <= 1) return; @@ -869,43 +987,43 @@ void SpdDownsampleMips_0_1_IntrinsicsH(AU1 x, AU1 y, AU2 workGroupID, AU1 localI if ((localInvocationIndex % 4) == 0) { - SpdStoreH(ASU2(workGroupID.xy * 16) + ASU2(x/2, y/2), v[0], 1); + SpdStoreH(ASU2(workGroupID.xy * 16) + ASU2(x/2, y/2), v[0], 1, slice); SpdStoreIntermediateH(x/2, y/2, v[0]); - SpdStoreH(ASU2(workGroupID.xy * 16) + ASU2(x/2 + 8, y/2), v[1], 1); + SpdStoreH(ASU2(workGroupID.xy * 16) + ASU2(x/2 + 8, y/2), v[1], 1, slice); SpdStoreIntermediateH(x/2 + 8, y/2, v[1]); - SpdStoreH(ASU2(workGroupID.xy * 16) + ASU2(x/2, y/2 + 8), v[2], 1); + SpdStoreH(ASU2(workGroupID.xy * 16) + ASU2(x/2, y/2 + 8), v[2], 1, slice); SpdStoreIntermediateH(x/2, y/2 + 8, v[2]); - SpdStoreH(ASU2(workGroupID.xy * 16) + ASU2(x/2 + 8, y/2 + 8), v[3], 1); + SpdStoreH(ASU2(workGroupID.xy * 16) + ASU2(x/2 + 8, y/2 + 8), v[3], 1, slice); SpdStoreIntermediateH(x/2 + 8, y/2 + 8, v[3]); } } -void SpdDownsampleMips_0_1_LDSH(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mips) +void SpdDownsampleMips_0_1_LDSH(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mips, AU1 slice) { AH4 v[4]; ASU2 tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2, y * 2); ASU2 pix = ASU2(workGroupID.xy * 32) + ASU2(x, y); - v[0] = SpdReduceLoadSourceImage4H(tex); - SpdStoreH(pix, v[0], 0); + v[0] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[0], 0, slice); tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2 + 32, y * 2); pix = ASU2(workGroupID.xy * 32) + ASU2(x + 16, y); - v[1] = SpdReduceLoadSourceImage4H(tex); - SpdStoreH(pix, v[1], 0); + v[1] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[1], 0, slice); tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2, y * 2 + 32); pix = ASU2(workGroupID.xy * 32) + ASU2(x, y + 16); - v[2] = SpdReduceLoadSourceImage4H(tex); - SpdStoreH(pix, v[2], 0); + v[2] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[2], 0, slice); tex = ASU2(workGroupID.xy * 64) + ASU2(x * 2 + 32, y * 2 + 32); pix = ASU2(workGroupID.xy * 32) + ASU2(x + 16, y + 16); - v[3] = SpdReduceLoadSourceImage4H(tex); - SpdStoreH(pix, v[3], 0); + v[3] = SpdReduceLoadSourceImageH(tex, slice); + SpdStoreH(pix, v[3], 0, slice); if (mips <= 1) return; @@ -922,7 +1040,7 @@ void SpdDownsampleMips_0_1_LDSH(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocati AU2(x * 2 + 0, y * 2 + 1), AU2(x * 2 + 1, y * 2 + 1) ); - SpdStoreH(ASU2(workGroupID.xy * 16) + ASU2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1); + SpdStoreH(ASU2(workGroupID.xy * 16) + ASU2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice); } SpdWorkgroupShuffleBarrier(); } @@ -936,28 +1054,28 @@ void SpdDownsampleMips_0_1_LDSH(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocati } } -void SpdDownsampleMips_0_1H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mips) +void SpdDownsampleMips_0_1H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mips, AU1 slice) { #ifdef SPD_NO_WAVE_OPERATIONS - SpdDownsampleMips_0_1_LDSH(x, y, workGroupID, localInvocationIndex, mips); + SpdDownsampleMips_0_1_LDSH(x, y, workGroupID, localInvocationIndex, mips, slice); #else - SpdDownsampleMips_0_1_IntrinsicsH(x, y, workGroupID, localInvocationIndex, mips); + SpdDownsampleMips_0_1_IntrinsicsH(x, y, workGroupID, localInvocationIndex, mips, slice); #endif } -void SpdDownsampleMip_2H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) +void SpdDownsampleMip_2H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip, AU1 slice) { #ifdef SPD_NO_WAVE_OPERATIONS if (localInvocationIndex < 64) { AH4 v = SpdReduceIntermediateH( - AU2(x * 2 + 0 + 0, y * 2 + 0), - AU2(x * 2 + 0 + 1, y * 2 + 0), - AU2(x * 2 + 0 + 0, y * 2 + 1), - AU2(x * 2 + 0 + 1, y * 2 + 1) + AU2(x * 2 + 0, y * 2 + 0), + AU2(x * 2 + 1, y * 2 + 0), + AU2(x * 2 + 0, y * 2 + 1), + AU2(x * 2 + 1, y * 2 + 1) ); - SpdStoreH(ASU2(workGroupID.xy * 8) + ASU2(x, y), v, mip); + SpdStoreH(ASU2(workGroupID.xy * 8) + ASU2(x, y), v, mip, slice); // store to LDS, try to reduce bank conflicts // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -974,13 +1092,13 @@ void SpdDownsampleMip_2H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex // quad index 0 stores result if (localInvocationIndex % 4 == 0) { - SpdStoreH(ASU2(workGroupID.xy * 8) + ASU2(x/2, y/2), v, mip); + SpdStoreH(ASU2(workGroupID.xy * 8) + ASU2(x/2, y/2), v, mip, slice); SpdStoreIntermediateH(x + (y/2) % 2, y, v); } #endif } -void SpdDownsampleMip_3H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) +void SpdDownsampleMip_3H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip, AU1 slice) { #ifdef SPD_NO_WAVE_OPERATIONS if (localInvocationIndex < 16) @@ -995,7 +1113,7 @@ void SpdDownsampleMip_3H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex AU2(x * 4 + 0 + 1, y * 4 + 2), AU2(x * 4 + 2 + 1, y * 4 + 2) ); - SpdStoreH(ASU2(workGroupID.xy * 4) + ASU2(x, y), v, mip); + SpdStoreH(ASU2(workGroupID.xy * 4) + ASU2(x, y), v, mip, slice); // store to LDS // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -1017,14 +1135,14 @@ void SpdDownsampleMip_3H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex // quad index 0 stores result if (localInvocationIndex % 4 == 0) { - SpdStoreH(ASU2(workGroupID.xy * 4) + ASU2(x/2, y/2), v, mip); + SpdStoreH(ASU2(workGroupID.xy * 4) + ASU2(x/2, y/2), v, mip, slice); SpdStoreIntermediateH(x * 2 + y/2, y * 2, v); } } #endif } -void SpdDownsampleMip_4H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) +void SpdDownsampleMip_4H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip, AU1 slice) { #ifdef SPD_NO_WAVE_OPERATIONS if (localInvocationIndex < 4) @@ -1038,7 +1156,7 @@ void SpdDownsampleMip_4H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex AU2(x * 8 + 0 + 1 + y * 2, y * 8 + 4), AU2(x * 8 + 4 + 1 + y * 2, y * 8 + 4) ); - SpdStoreH(ASU2(workGroupID.xy * 2) + ASU2(x, y), v, mip); + SpdStoreH(ASU2(workGroupID.xy * 2) + ASU2(x, y), v, mip, slice); // store to LDS // x x x x 0 ... // 0 ... @@ -1052,14 +1170,14 @@ void SpdDownsampleMip_4H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex // quad index 0 stores result if (localInvocationIndex % 4 == 0) { - SpdStoreH(ASU2(workGroupID.xy * 2) + ASU2(x/2, y/2), v, mip); + SpdStoreH(ASU2(workGroupID.xy * 2) + ASU2(x/2, y/2), v, mip, slice); SpdStoreIntermediateH(x / 2 + y, 0, v); } } #endif } -void SpdDownsampleMip_5H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 mip) +void SpdDownsampleMip_5H(AU2 workGroupID, AU1 localInvocationIndex, AU1 mip, AU1 slice) { #ifdef SPD_NO_WAVE_OPERATIONS if (localInvocationIndex < 1) @@ -1072,7 +1190,7 @@ void SpdDownsampleMip_5H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex AU2(2, 0), AU2(3, 0) ); - SpdStoreH(ASU2(workGroupID.xy), v, mip); + SpdStoreH(ASU2(workGroupID.xy), v, mip, slice); } #else if (localInvocationIndex < 4) @@ -1082,83 +1200,98 @@ void SpdDownsampleMip_5H(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex // quad index 0 stores result if (localInvocationIndex % 4 == 0) { - SpdStoreH(ASU2(workGroupID.xy), v, mip); + SpdStoreH(ASU2(workGroupID.xy), v, mip, slice); } } #endif } -void SpdDownsampleMips_6_7H(AU1 x, AU1 y, AU1 mips) +void SpdDownsampleMips_6_7H(AU1 x, AU1 y, AU1 mips, AU1 slice) { ASU2 tex = ASU2(x * 4 + 0, y * 4 + 0); ASU2 pix = ASU2(x * 2 + 0, y * 2 + 0); - AH4 v0 = SpdReduceLoad4H(tex); - SpdStoreH(pix, v0, 6); + AH4 v0 = SpdReduceLoad4H(tex, slice); + SpdStoreH(pix, v0, 6, slice); tex = ASU2(x * 4 + 2, y * 4 + 0); pix = ASU2(x * 2 + 1, y * 2 + 0); - AH4 v1 = SpdReduceLoad4H(tex); - SpdStoreH(pix, v1, 6); + AH4 v1 = SpdReduceLoad4H(tex, slice); + SpdStoreH(pix, v1, 6, slice); tex = ASU2(x * 4 + 0, y * 4 + 2); pix = ASU2(x * 2 + 0, y * 2 + 1); - AH4 v2 = SpdReduceLoad4H(tex); - SpdStoreH(pix, v2, 6); + AH4 v2 = SpdReduceLoad4H(tex, slice); + SpdStoreH(pix, v2, 6, slice); tex = ASU2(x * 4 + 2, y * 4 + 2); pix = ASU2(x * 2 + 1, y * 2 + 1); - AH4 v3 = SpdReduceLoad4H(tex); - SpdStoreH(pix, v3, 6); + AH4 v3 = SpdReduceLoad4H(tex, slice); + SpdStoreH(pix, v3, 6, slice); if (mips < 8) return; // no barrier needed, working on values only from the same thread AH4 v = SpdReduce4H(v0, v1, v2, v3); - SpdStoreH(ASU2(x, y), v, 7); + SpdStoreH(ASU2(x, y), v, 7, slice); SpdStoreIntermediateH(x, y, v); } -void SpdDownsampleNextFourH(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 baseMip, AU1 mips) +void SpdDownsampleNextFourH(AU1 x, AU1 y, AU2 workGroupID, AU1 localInvocationIndex, AU1 baseMip, AU1 mips, AU1 slice) { if (mips <= baseMip) return; SpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_2H(x, y, workGroupID, localInvocationIndex, baseMip); + SpdDownsampleMip_2H(x, y, workGroupID, localInvocationIndex, baseMip, slice); if (mips <= baseMip + 1) return; SpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_3H(x, y, workGroupID, localInvocationIndex, baseMip + 1); + SpdDownsampleMip_3H(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice); if (mips <= baseMip + 2) return; SpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_4H(x, y, workGroupID, localInvocationIndex, baseMip + 2); + SpdDownsampleMip_4H(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice); if (mips <= baseMip + 3) return; SpdWorkgroupShuffleBarrier(); - SpdDownsampleMip_5H(x, y, workGroupID, localInvocationIndex, baseMip + 3); + SpdDownsampleMip_5H(workGroupID, localInvocationIndex, baseMip + 3, slice); } void SpdDownsampleH( AU2 workGroupID, AU1 localInvocationIndex, AU1 mips, - AU1 numWorkGroups + AU1 numWorkGroups, + AU1 slice ) { AU2 sub_xy = ARmpRed8x8(localInvocationIndex % 64); AU1 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2); AU1 y = sub_xy.y + 8 * ((localInvocationIndex >> 7)); - SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips); + SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips, slice); - SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips); + SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips, slice); if (mips < 7) return; - if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex)) return; + if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice)) return; + + SpdResetAtomicCounter(slice); // After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. - SpdDownsampleMips_6_7H(x, y, mips); + SpdDownsampleMips_6_7H(x, y, mips, slice); - SpdDownsampleNextFourH(x, y, AU2(0,0), localInvocationIndex, 8, mips); + SpdDownsampleNextFourH(x, y, AU2(0,0), localInvocationIndex, 8, mips, slice); +} + +void SpdDownsampleH( + AU2 workGroupID, + AU1 localInvocationIndex, + AU1 mips, + AU1 numWorkGroups, + AU1 slice, + AU2 workGroupOffset +) { + SpdDownsampleH(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice); } -#endif \ No newline at end of file +#endif // #ifdef A_HALF +#endif // #ifdef A_GPU \ No newline at end of file diff --git a/sample/CMakeLists.txt b/sample/CMakeLists.txt index 95eed3e..21e356b 100644 --- a/sample/CMakeLists.txt +++ b/sample/CMakeLists.txt @@ -3,6 +3,8 @@ set(CMAKE_GENERATOR_PLATFORM x64) project (SPDSample_${GFX_API}) +set(FFX_SPD_OUTPUT_DIRECTORY ${CMAKE_HOME_DIRECTORY}/bin) + # ouput exe to bin directory SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_HOME_DIRECTORY}/bin) foreach( OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES} ) @@ -16,7 +18,7 @@ add_subdirectory(libs/cauldron) set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT ${PROJECT_NAME}) if(GFX_API STREQUAL DX12) - add_subdirectory(src/DX12) + add_subdirectory(src/DX12) elseif(GFX_API STREQUAL VK) find_package(Vulkan REQUIRED) add_subdirectory(src/VK) diff --git a/sample/README.md b/sample/README.md index b009b55..8a3f428 100644 --- a/sample/README.md +++ b/sample/README.md @@ -3,13 +3,13 @@ Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. Permission # SPD Sample -A sample using the FidelityFX Singlepass Downsampler (SPD) library. +A sample using the FidelityFX Single Pass Downsampler (SPD) library. # Build Instructions 1. Clone submodules by running 'git submodule update --init --recursive' (so you get the Cauldron framework too) 2. Run sample/build/GenerateSolutions.bat -3. open solution, build + run + have fun 😊 +3. Open solution, build + run + have fun 😊 # SPD Files You can find them in ../ffx-spd @@ -21,15 +21,24 @@ Downsampler - PS: computes each mip in a separate pixel shader pass - Multipass CS: computes each mip in a separate compute shader pass - SPD CS: uses the SPD library, computes all mips (up to a source texture of size 4096²) in a single pass -- SPD CS linear sampler: uses the SPD library and for sampling the source texture a linear sampler -SPD Versions -- NO-WaveOps: uses only LDS to share the data between threads +SPD Load Versions +- Load: uses a load to fetch from the source texture +- Linear Sampler: uses a sampler to fetch from the source texture. Sampler must meet the user defined reduction function. + +SPD WaveOps Versions +- No-WaveOps: uses only LDS to share the data between threads - WaveOps: uses Intrinsics and LDS to share the data between threads -SPD Non-Packed / Packed Version +SPD Non-Packed / Packed Versions - Non-Packed: uses fp32 - Packed: uses fp16, reduced register pressure # Recommendations -We recommend to use the WapeOps path when supported. If higher precision is not needed, you can enable the packed mode - it has less register pressure and can run a bit faster as well. \ No newline at end of file +We recommend to use the WaveOps path when supported. If higher precision is not needed, you can enable the packed mode - it has less register pressure and can run a bit faster as well. +If you compute the average for each 2x2 quad, we also recommend to use a linear sampler to fetch from the source texture instead of four separate loads. + +# Known issues +Please use driver 20.8.3 or newer. There is a known issue on DX12 when using the SPD No-WaveOps Packed version. +It may appear as "Access violation reading location ..." during CreateComputePipelineState, with top of the stack pointing to amdxc64.dll. +To workaround this issue, you may advise players to update their graphics driver or don't compile and use a different SPD version, e.g. a Non-Packed version. \ No newline at end of file diff --git a/sample/libs/cauldron b/sample/libs/cauldron index fd91cd7..050b274 160000 --- a/sample/libs/cauldron +++ b/sample/libs/cauldron @@ -1 +1 @@ -Subproject commit fd91cd744d014505daef1780dceee49fd62ce953 +Subproject commit 050b274df95777d688686d017a6926a515a58b30 diff --git a/sample/src/Common/SpdSample.json b/sample/src/Common/SpdSample.json new file mode 100644 index 0000000..07ec87e --- /dev/null +++ b/sample/src/Common/SpdSample.json @@ -0,0 +1,41 @@ +{ + "globals": { + "CpuValidationLayerEnabled": false, + "GpuValidationLayerEnabled": false, + "fullScreen": false, + "width": 1920, + "height": 1080, + "activeScene": 0, + "benchmark": false, + "stablePowerState": false, + "downsampler": 2, + "spdLoad": 0, + "spdWaveOps": 1, + "spdPacked": 0 + }, + "scenes": [ + { + "name": "DamagedHelmet", + "directory": "..\\media\\cauldron-media\\DamagedHelmet\\GLTF\\", + "filename": "DamagedHelmet.gltf", + "TAA": true, + "toneMapper": 0, + "iblFactor": 2, + "emmisiveFactor": 1, + "intensity": 50, + "exposure": 1, + "camera": { + "defaultFrom": [ 0, 0, 3.5 ], + "defaultTo": [ 0, 0, 0 ] + }, + "BenchmarkSettings": { + "timeStep": 1, + "timeStart": 0, + "timeEnd": 10000, + "exitWhenTimeEnds": true, + "resultsFilename": "FidelityFXSpd.csv", + "warmUpFrames": 200 + } + } + ] +} diff --git a/sample/src/DX12/CMakeLists.txt b/sample/src/DX12/CMakeLists.txt index 24ade70..3620699 100644 --- a/sample/src/DX12/CMakeLists.txt +++ b/sample/src/DX12/CMakeLists.txt @@ -1,20 +1,21 @@ project (SPDSample_DX12) include(${CMAKE_HOME_DIRECTORY}/common.cmake) + +add_compile_options(/MP) + set(sources CSDownsampler.cpp CSDownsampler.h PSDownsampler.cpp PSDownsampler.h - SPD_CS.cpp - SPD_CS.h - SPD_CS_Linear_Sampler.cpp - SPD_CS_Linear_Sampler.h - SPD_Versions.cpp - SPD_Versions.h - SPD_Sample.cpp - SPD_Sample.h - SPD_Renderer.cpp - SPD_Renderer.h + SPDCS.cpp + SPDCS.h + SPDVersions.cpp + SPDVersions.h + SPDSample.cpp + SPDSample.h + SPDRenderer.cpp + SPDRenderer.h stdafx.cpp stdafx.h) set(Shaders_src @@ -22,14 +23,20 @@ set(Shaders_src ${CMAKE_CURRENT_SOURCE_DIR}/../../../ffx-spd/ffx_spd.h ${CMAKE_CURRENT_SOURCE_DIR}/CSDownsampler.hlsl ${CMAKE_CURRENT_SOURCE_DIR}/PSDownsampler.hlsl - ${CMAKE_CURRENT_SOURCE_DIR}/SPD_Integration.hlsl - ${CMAKE_CURRENT_SOURCE_DIR}/SPD_Integration_Linear_Sampler.hlsl + ${CMAKE_CURRENT_SOURCE_DIR}/SPDIntegration.hlsl + ${CMAKE_CURRENT_SOURCE_DIR}/SPDIntegrationLinearSampler.hlsl +) +set(Common_src + ${CMAKE_CURRENT_SOURCE_DIR}/../Common/SpdSample.json ) source_group("Sources" FILES ${sources}) source_group("Shaders" FILES ${Shaders_src}) +source_group("Common" FILES ${Common_src}) + # prevent VS from processing/compiling these files set_source_files_properties(${Shaders_src} PROPERTIES VS_TOOL_OVERRIDE "Text") +set_source_files_properties(${Common_src} PROPERTIES VS_TOOL_OVERRIDE "Text") function(copyCommand list dest) foreach(fullFileName ${list}) @@ -49,8 +56,9 @@ endfunction() # copy shaders and media to Bin # include("${CMAKE_HOME_DIRECTORY}/src/Common/Shaders/CMakeList.txt") copyCommand("${Shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibDX) +copyCommand("${Common_src}" ${CMAKE_HOME_DIRECTORY}/bin) -add_executable(${PROJECT_NAME} WIN32 ${sources} ${Shaders_src}) +add_executable(${PROJECT_NAME} WIN32 ${sources} ${Shaders_src} ${Common_src}) target_link_libraries (${PROJECT_NAME} LINK_PUBLIC Cauldron_DX12 ImGUI amd_ags DXC d3dcompiler D3D12) target_include_directories (${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../../ffx-spd) set_target_properties(${PROJECT_NAME} PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${CMAKE_HOME_DIRECTORY}/bin") diff --git a/sample/src/DX12/CSDownsampler.cpp b/sample/src/DX12/CSDownsampler.cpp index ed1a7c7..abe7222 100644 --- a/sample/src/DX12/CSDownsampler.cpp +++ b/sample/src/DX12/CSDownsampler.cpp @@ -33,19 +33,18 @@ namespace CAULDRON_DX12 { void CSDownsampler::OnCreate( Device *pDevice, + UploadHeap *pUploadHeap, ResourceViewHeaps *pResourceViewHeaps, - DynamicBufferRing *pConstantBufferRing, - DXGI_FORMAT outFormat + DynamicBufferRing *pConstantBufferRing ) { m_pDevice = pDevice; m_pResourceViewHeaps = pResourceViewHeaps; m_pConstantBufferRing = pConstantBufferRing; - m_outFormat = outFormat; D3D12_SHADER_BYTECODE shaderByteCode = {}; DefineList defines; - CompileShaderFromFile("CSDownsampler.hlsl", &defines, "main", "cs_6_2", 0, &shaderByteCode); + CompileShaderFromFile("CSDownsampler.hlsl", &defines, "main", "-T cs_6_0 /Zi /Zss", &shaderByteCode); // Create root signature // @@ -91,7 +90,7 @@ namespace CAULDRON_DX12 // deny uneccessary access to certain pipeline stages descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; - ID3DBlob *pOutBlob, *pErrorBlob = NULL; + ID3DBlob* pOutBlob, * pErrorBlob = NULL; ThrowIfFailed(D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob)); ThrowIfFailed( pDevice->GetDevice()->CreateRootSignature(0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS(&m_pRootSignature)) @@ -104,74 +103,53 @@ namespace CAULDRON_DX12 } //{ - D3D12_COMPUTE_PIPELINE_STATE_DESC descPso = {}; - descPso.CS = shaderByteCode; - descPso.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; - descPso.pRootSignature = m_pRootSignature; - descPso.NodeMask = 0; + D3D12_COMPUTE_PIPELINE_STATE_DESC descPso = {}; + descPso.CS = shaderByteCode; + descPso.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + descPso.pRootSignature = m_pRootSignature; + descPso.NodeMask = 0; - ThrowIfFailed(pDevice->GetDevice()->CreateComputePipelineState(&descPso, IID_PPV_ARGS(&m_pPipeline))); + ThrowIfFailed(pDevice->GetDevice()->CreateComputePipelineState(&descPso, IID_PPV_ARGS(&m_pPipeline))); //} - // Allocate descriptors for the mip chain + m_cubeTexture.InitFromFile(pDevice, pUploadHeap, "..\\media\\envmaps\\papermill\\specular.dds", true, 1.0f, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); + pUploadHeap->FlushAndFinish(); + + // Allocate and create descriptors for the mip chain // - for (int i = 0; i < CS_MAX_MIP_LEVELS; i++) + for (uint32_t mip = 0; mip < m_cubeTexture.GetMipCount() - 1; mip++) { - m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_mip[i].m_constBuffer); - m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_mip[i].m_SRV); - m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_mip[i].m_RTV); + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_mip[mip].m_constBuffer); + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_mip[mip].m_SRV); + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_mip[mip].m_UAV); + + m_cubeTexture.CreateSRV(0, &m_mip[mip].m_SRV, mip); + + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc; + uavDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; // can't create SRGB UAV + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + uavDesc.Texture2DArray.ArraySize = m_cubeTexture.GetArraySize(); + uavDesc.Texture2DArray.FirstArraySlice = 0; + uavDesc.Texture2DArray.MipSlice = mip + 1; + uavDesc.Texture2DArray.PlaneSlice = 0; + + m_cubeTexture.CreateUAV(0, NULL, &m_mip[mip].m_UAV, &uavDesc); } - } - void CSDownsampler::OnCreateWindowSizeDependentResources(uint32_t Width, uint32_t Height, Texture *pInput, int mipCount) - { - m_Width = Width; - m_Height = Height; - m_mipCount = mipCount; - m_pInput = pInput; - - m_result.InitRenderTarget( - m_pDevice, - "CSDownsampler::m_result", - &CD3DX12_RESOURCE_DESC::Tex2D( - m_outFormat, - m_Width >> 1, - m_Height >> 1, - 1, - mipCount, - 1, - 0, - D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), - D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); - - // Create views for the mip chain - // - for (int i = 0; i < m_mipCount; i++) + for (uint32_t slice = 0; slice < m_cubeTexture.GetArraySize(); slice++) { - // source - // - if (i == 0) + for (uint32_t mip = 0; mip < m_cubeTexture.GetMipCount(); mip++) { - pInput->CreateSRV(0, &m_mip[i].m_SRV, 0); + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_imGUISRV[slice * m_cubeTexture.GetMipCount() + mip]); + m_cubeTexture.CreateSRV(0, &m_imGUISRV[slice * m_cubeTexture.GetMipCount() + mip], mip, 1, slice); } - else - { - m_result.CreateSRV(0, &m_mip[i].m_SRV, i - 1); - } - - // destination - // - m_result.CreateUAV(0, &m_mip[i].m_RTV, i); } } - void CSDownsampler::OnDestroyWindowSizeDependentResources() - { - m_result.OnDestroy(); - } - void CSDownsampler::OnDestroy() { + m_cubeTexture.OnDestroy(); + if (m_pPipeline != NULL) { m_pPipeline->Release(); @@ -185,62 +163,81 @@ namespace CAULDRON_DX12 } } - void CSDownsampler::Draw(ID3D12GraphicsCommandList* pCommandList) + void CSDownsampler::Draw(ID3D12GraphicsCommandList *pCommandList) { UserMarker marker(pCommandList, "CSDownsampler"); // downsample // - for (int i = 0; i < m_mipCount; i++) + for (uint32_t slice = 0; slice < m_cubeTexture.GetArraySize(); slice++) { - pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, i)); - - D3D12_GPU_VIRTUAL_ADDRESS cbHandle; - uint32_t* pConstMem; - m_pConstantBufferRing->AllocConstantBuffer(sizeof(cbDownscale), (void**)&pConstMem, &cbHandle); - cbDownscale constants; - constants.outWidth = (m_Width >> (i + 1)); - constants.outHeight = (m_Height >> (i + 1)); - constants.invWidth = 1.0f / (float)(m_Width >> i); - constants.invHeight = 1.0f / (float)(m_Height >> i); - memcpy(pConstMem, &constants, sizeof(cbDownscale)); - - // Bind Descriptor heaps and the root signature - // - ID3D12DescriptorHeap *pDescriptorHeaps[] = { m_pResourceViewHeaps->GetCBV_SRV_UAVHeap(), m_pResourceViewHeaps->GetSamplerHeap() }; - pCommandList->SetDescriptorHeaps(2, pDescriptorHeaps); - pCommandList->SetComputeRootSignature(m_pRootSignature); - - // Bind Descriptor the descriptor sets - // - int params = 0; - pCommandList->SetComputeRootConstantBufferView(params++, cbHandle); - pCommandList->SetComputeRootDescriptorTable(params++, m_mip[i].m_RTV.GetGPU()); - pCommandList->SetComputeRootDescriptorTable(params++, m_mip[i].m_SRV.GetGPU()); - - // Bind Pipeline - // - pCommandList->SetPipelineState(m_pPipeline); - - // Dispatch - // - uint32_t dispatchX = ((m_Width >> (i + 1)) + 7) / 8; - uint32_t dispatchY = ((m_Height >> (i + 1)) + 7) / 8; - uint32_t dispatchZ = 1; - pCommandList->Dispatch(dispatchX, dispatchY, dispatchZ); - - pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, i)); + for (uint32_t i = 0; i < m_cubeTexture.GetMipCount() - 1; i++) + { + pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_cubeTexture.GetResource(), D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, i + 1)); + + D3D12_GPU_VIRTUAL_ADDRESS cbHandle; + uint32_t* pConstMem; + m_pConstantBufferRing->AllocConstantBuffer(sizeof(cbDownsample), (void**)&pConstMem, &cbHandle); + cbDownsample constants; + constants.outWidth = (m_cubeTexture.GetWidth() >> (i + 1)); + constants.outHeight = (m_cubeTexture.GetHeight() >> (i + 1)); + constants.invWidth = 1.0f / (float)(m_cubeTexture.GetWidth() >> i); + constants.invHeight = 1.0f / (float)(m_cubeTexture.GetHeight() >> i); + constants.slice = slice; + memcpy(pConstMem, &constants, sizeof(cbDownsample)); + + // Bind Descriptor heaps and the root signature + // + ID3D12DescriptorHeap* pDescriptorHeaps[] = { m_pResourceViewHeaps->GetCBV_SRV_UAVHeap(), m_pResourceViewHeaps->GetSamplerHeap() }; + pCommandList->SetDescriptorHeaps(2, pDescriptorHeaps); + pCommandList->SetComputeRootSignature(m_pRootSignature); + + // Bind Descriptor the descriptor sets + // + int params = 0; + pCommandList->SetComputeRootConstantBufferView(params++, cbHandle); + pCommandList->SetComputeRootDescriptorTable(params++, m_mip[i].m_UAV.GetGPU()); + pCommandList->SetComputeRootDescriptorTable(params++, m_mip[i].m_SRV.GetGPU()); + + // Bind Pipeline + // + pCommandList->SetPipelineState(m_pPipeline); + + // Dispatch + // + uint32_t dispatchX = ((m_cubeTexture.GetWidth() >> (i + 1)) + 7) / 8; + uint32_t dispatchY = ((m_cubeTexture.GetHeight() >> (i + 1)) + 7) / 8; + uint32_t dispatchZ = 1; + pCommandList->Dispatch(dispatchX, dispatchY, dispatchZ); + + pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_cubeTexture.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, i + 1)); + } } } - void CSDownsampler::Gui() + void CSDownsampler::GUI(int *pSlice) { bool opened = true; - ImGui::Begin("Downsample", &opened); + std::string header = "Downsample"; + ImGui::Begin(header.c_str(), &opened); - for (int i = 0; i < m_mipCount; i++) + if (ImGui::CollapsingHeader("CS Multipass", ImGuiTreeNodeFlags_DefaultOpen)) { - ImGui::Image((ImTextureID)&m_mip[i].m_SRV, ImVec2(320, 180)); + const char* sliceItemNames[] = + { + "Slice 0", + "Slice 1", + "Slice 2", + "Slice 3", + "Slice 4", + "Slice 5" + }; + ImGui::Combo("Slice of Cube Texture", pSlice, sliceItemNames, _countof(sliceItemNames)); + + for (uint32_t i = 0; i < m_cubeTexture.GetMipCount(); i++) + { + ImGui::Image((ImTextureID)&m_imGUISRV[*pSlice * m_cubeTexture.GetMipCount() + i], ImVec2(static_cast(512 >> i), static_cast(512 >> i))); + } } ImGui::End(); diff --git a/sample/src/DX12/CSDownsampler.h b/sample/src/DX12/CSDownsampler.h index 4a64086..c6be9be 100644 --- a/sample/src/DX12/CSDownsampler.h +++ b/sample/src/DX12/CSDownsampler.h @@ -28,48 +28,42 @@ namespace CAULDRON_DX12 class CSDownsampler { public: - void OnCreate(Device *pDevice, ResourceViewHeaps *pResourceViewHeaps, DynamicBufferRing *pConstantBufferRing, DXGI_FORMAT outFormat); + void OnCreate(Device *pDevice, UploadHeap *pUploadHeap, ResourceViewHeaps *pResourceViewHeaps, DynamicBufferRing *pConstantBufferRing); void OnDestroy(); - void OnCreateWindowSizeDependentResources(uint32_t Width, uint32_t Height, Texture *pInput, int mips); - void OnDestroyWindowSizeDependentResources(); + void Draw(ID3D12GraphicsCommandList *pCommandList); + Texture *GetTexture() { return &m_cubeTexture; } + void GUI(int *pSlice); - void Draw(ID3D12GraphicsCommandList* pCommandList); - Texture *GetTexture() { return &m_result; } - CBV_SRV_UAV GetTextureView(int i) { return m_mip[i].m_SRV; } - void Gui(); - - struct cbDownscale + struct cbDownsample { uint32_t outWidth, outHeight; float invWidth, invHeight; + uint32_t slice; + uint32_t padding[3]; }; private: - Device* m_pDevice = nullptr; - DXGI_FORMAT m_outFormat; + Device *m_pDevice = nullptr; - Texture *m_pInput; - Texture m_result; + Texture m_cubeTexture; struct Pass { CBV_SRV_UAV m_constBuffer; // dimension - CBV_SRV_UAV m_RTV; //dest -> more like UAV x) + CBV_SRV_UAV m_UAV; //dest CBV_SRV_UAV m_SRV; //src }; Pass m_mip[CS_MAX_MIP_LEVELS]; - ResourceViewHeaps *m_pResourceViewHeaps; - DynamicBufferRing *m_pConstantBufferRing; - ID3D12RootSignature *m_pRootSignature; - ID3D12PipelineState *m_pPipeline = NULL; + ResourceViewHeaps *m_pResourceViewHeaps = nullptr; + DynamicBufferRing *m_pConstantBufferRing = nullptr; + ID3D12RootSignature *m_pRootSignature = nullptr; + ID3D12PipelineState *m_pPipeline = nullptr; - SAMPLER m_Sampler; + SAMPLER m_sampler; - uint32_t m_Width; - uint32_t m_Height; - int m_mipCount; + CBV_SRV_UAV m_imGUISRV[CS_MAX_MIP_LEVELS * 6]; }; } \ No newline at end of file diff --git a/sample/src/DX12/CSDownsampler.hlsl b/sample/src/DX12/CSDownsampler.hlsl index 442b178..8e54c00 100644 --- a/sample/src/DX12/CSDownsampler.hlsl +++ b/sample/src/DX12/CSDownsampler.hlsl @@ -22,16 +22,17 @@ //-------------------------------------------------------------------------------------- cbuffer cbPerFrame : register(b0) { - uint2 u_outSize; + uint2 u_outSize; float2 u_invSize; + uint u_slice; } //-------------------------------------------------------------------------------------- // Texture definitions //-------------------------------------------------------------------------------------- -RWTexture2D outputTex : register(u0); -Texture2D inputTex : register(t0); -SamplerState samLinearMirror : register(s0); +RWTexture2DArray outputTex : register(u0); +Texture2DArray inputTex : register(t0); +SamplerState samLinear : register(s0); // Main function //-------------------------------------------------------------------------------------- @@ -43,5 +44,10 @@ void main(uint3 LocalThreadId : SV_GroupThreadID, uint3 WorkGroupId : SV_GroupID return; float2 samplerTexCoord = u_invSize * float2(DispatchId.xy) * 2.0 + u_invSize; - outputTex[DispatchId.xy] = inputTex.SampleLevel(samLinearMirror, samplerTexCoord, 0); + float4 result = inputTex.SampleLevel(samLinear, float3(samplerTexCoord, u_slice), 0); + result.r = max(min(result.r * (12.92), (0.0031308)), (1.055) * pow(result.r, (0.41666)) - (0.055)); + result.g = max(min(result.g * (12.92), (0.0031308)), (1.055) * pow(result.g, (0.41666)) - (0.055)); + result.b = max(min(result.b * (12.92), (0.0031308)), (1.055) * pow(result.b, (0.41666)) - (0.055)); + + outputTex[int3(DispatchId.xy, u_slice)] = result; } \ No newline at end of file diff --git a/sample/src/DX12/PSDownsampler.cpp b/sample/src/DX12/PSDownsampler.cpp index 06dcf05..2bde4f0 100644 --- a/sample/src/DX12/PSDownsampler.cpp +++ b/sample/src/DX12/PSDownsampler.cpp @@ -32,17 +32,16 @@ namespace CAULDRON_DX12 { void PSDownsampler::OnCreate( Device *pDevice, + UploadHeap *pUploadHeap, ResourceViewHeaps *pResourceViewHeaps, DynamicBufferRing *pConstantBufferRing, - StaticBufferPool *pStaticBufferPool, - DXGI_FORMAT outFormat + StaticBufferPool *pStaticBufferPool ) { m_pDevice = pDevice; m_pStaticBufferPool = pStaticBufferPool; m_pResourceViewHeaps = pResourceViewHeaps; m_pConstantBufferRing = pConstantBufferRing; - m_outFormat = outFormat; // Use helper class to create the fullscreen pass // @@ -62,99 +61,104 @@ namespace CAULDRON_DX12 SamplerDesc.RegisterSpace = 0; SamplerDesc.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - m_downscale.OnCreate(pDevice, "PSDownsampler.hlsl", m_pResourceViewHeaps, - m_pStaticBufferPool, 1, 1, &SamplerDesc, m_outFormat); + m_cubeTexture.InitFromFile(pDevice, pUploadHeap, "..\\media\\envmaps\\papermill\\specular.dds", true, 1.0f, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); + pUploadHeap->FlushAndFinish(); + + m_downsample.OnCreate(pDevice, "PSDownsampler.hlsl", m_pResourceViewHeaps, + m_pStaticBufferPool, 1, 1, &SamplerDesc, m_cubeTexture.GetFormat()); // Allocate descriptors for the mip chain // - for (int i = 0; i < DOWNSAMPLEPS_MAX_MIP_LEVELS; i++) + for (uint32_t slice = 0; slice < m_cubeTexture.GetArraySize(); slice++) { - m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_mip[i].m_SRV); - m_pResourceViewHeaps->AllocRTVDescriptor(1, &m_mip[i].m_RTV); - } - - } - - void PSDownsampler::OnCreateWindowSizeDependentResources(uint32_t Width, uint32_t Height, Texture *pInput, int mipCount) - { - m_Width = Width; - m_Height = Height; - m_mipCount = mipCount; - m_pInput = pInput; + for (uint32_t i = 0; i < m_cubeTexture.GetMipCount() - 1; i++) + { + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_mip[slice * (m_cubeTexture.GetMipCount() - 1) + i].m_SRV); + m_pResourceViewHeaps->AllocRTVDescriptor(1, &m_mip[slice * (m_cubeTexture.GetMipCount() - 1) + i].m_RTV); - m_result.InitRenderTarget(m_pDevice, "PSDownsampler::m_result", &CD3DX12_RESOURCE_DESC::Tex2D(m_outFormat, m_Width >> 1, m_Height >> 1, 1, mipCount, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + m_cubeTexture.CreateSRV(0, &m_mip[slice * (m_cubeTexture.GetMipCount() - 1) + i].m_SRV, i, 1, slice); + m_cubeTexture.CreateRTV(0, &m_mip[slice * (m_cubeTexture.GetMipCount() - 1) + i].m_RTV, i + 1, 1, slice); + } + } - // Create views for the mip chain - // - for (int i = 0; i < m_mipCount; i++) + for (uint32_t slice = 0; slice < m_cubeTexture.GetArraySize(); slice++) { - // source - // - if (i == 0) - { - pInput->CreateSRV(0, &m_mip[i].m_SRV, 0); - } - else + for (uint32_t mip = 0; mip < m_cubeTexture.GetMipCount(); mip++) { - m_result.CreateSRV(0, &m_mip[i].m_SRV, i - 1); + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_imGUISRV[slice * m_cubeTexture.GetMipCount() + mip]); + m_cubeTexture.CreateSRV(0, &m_imGUISRV[slice * m_cubeTexture.GetMipCount() + mip], mip, 1, slice); } - - // destination - // - m_result.CreateRTV(0, &m_mip[i].m_RTV, i); } } - void PSDownsampler::OnDestroyWindowSizeDependentResources() - { - m_result.OnDestroy(); - } - void PSDownsampler::OnDestroy() { - m_downscale.OnDestroy(); + m_cubeTexture.OnDestroy(); + m_downsample.OnDestroy(); } - void PSDownsampler::Draw(ID3D12GraphicsCommandList* pCommandList) + void PSDownsampler::Draw(ID3D12GraphicsCommandList *pCommandList) { UserMarker marker(pCommandList, "PSDownsampler"); // downsample // - for (int i = 0; i < m_mipCount; i++) + for (uint32_t slice = 0; slice < m_cubeTexture.GetArraySize(); slice++) { - pCommandList->OMSetRenderTargets(1, &m_mip[i].m_RTV.GetCPU(), true, NULL); - SetViewportAndScissor(pCommandList, 0, 0, m_Width >> (i + 1), m_Height >> (i + 1)); - - cbDownscale *data; - D3D12_GPU_VIRTUAL_ADDRESS constantBuffer; - m_pConstantBufferRing->AllocConstantBuffer(sizeof(cbDownscale), (void **)&data, &constantBuffer); - data->outWidth = (float)(m_Width >> (i + 1)); - data->outHeight = (float)(m_Height >> (i + 1)); - data->invWidth = 1.0f / (float)(m_Width >> i); - data->invHeight = 1.0f / (float)(m_Height >> i); - - if (i > 0) + for (uint32_t i = 0; i < m_cubeTexture.GetMipCount() - 1; i++) { - pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, i - 1)); + pCommandList->ResourceBarrier(1, + &CD3DX12_RESOURCE_BARRIER::Transition(m_cubeTexture.GetResource(), + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, + D3D12_RESOURCE_STATE_RENDER_TARGET, + slice * m_cubeTexture.GetMipCount() + i + 1)); + + pCommandList->OMSetRenderTargets(1, &m_mip[slice * (m_cubeTexture.GetMipCount() - 1) + i].m_RTV.GetCPU(), true, NULL); + SetViewportAndScissor(pCommandList, 0, 0, m_cubeTexture.GetWidth() >> (i + 1), m_cubeTexture.GetHeight() >> (i + 1)); + + cbDownsample* data; + D3D12_GPU_VIRTUAL_ADDRESS constantBuffer; + m_pConstantBufferRing->AllocConstantBuffer(sizeof(cbDownsample), (void**)&data, &constantBuffer); + data->outWidth = (float)(m_cubeTexture.GetWidth() >> (i + 1)); + data->outHeight = (float)(m_cubeTexture.GetHeight() >> (i + 1)); + data->invWidth = 1.0f / (float)(m_cubeTexture.GetWidth() >> i); + data->invHeight = 1.0f / (float)(m_cubeTexture.GetHeight() >> i); + data->slice = slice; + + m_downsample.Draw(pCommandList, 1, &m_mip[slice * (m_cubeTexture.GetMipCount() - 1) + i].m_SRV, constantBuffer); + + pCommandList->ResourceBarrier(1, + &CD3DX12_RESOURCE_BARRIER::Transition(m_cubeTexture.GetResource(), + D3D12_RESOURCE_STATE_RENDER_TARGET, + D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, + slice * m_cubeTexture.GetMipCount() + i + 1)); } - - pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET, i)); - - m_downscale.Draw(pCommandList, 1, &m_mip[i].m_SRV, constantBuffer); } - - pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, m_mipCount - 1)); } - void PSDownsampler::Gui() + void PSDownsampler::GUI(int *pSlice) { bool opened = true; - ImGui::Begin("Downsample", &opened); + std::string header = "Downsample"; + ImGui::Begin(header.c_str(), &opened); - for (int i = 0; i < m_mipCount; i++) + if (ImGui::CollapsingHeader("PS", ImGuiTreeNodeFlags_DefaultOpen)) { - ImGui::Image((ImTextureID)&m_mip[i].m_SRV, ImVec2(320, 180)); + const char* sliceItemNames[] = + { + "Slice 0", + "Slice 1", + "Slice 2", + "Slice 3", + "Slice 4", + "Slice 5" + }; + ImGui::Combo("Slice of Cube Texture", pSlice, sliceItemNames, _countof(sliceItemNames)); + + for (uint32_t i = 0; i < m_cubeTexture.GetMipCount(); i++) + { + ImGui::Image((ImTextureID)&m_imGUISRV[*pSlice * m_cubeTexture.GetMipCount() + i], ImVec2(static_cast(512 >> i), static_cast(512 >> i))); + } } ImGui::End(); diff --git a/sample/src/DX12/PSDownsampler.h b/sample/src/DX12/PSDownsampler.h index 71e98b7..e39ebb9 100644 --- a/sample/src/DX12/PSDownsampler.h +++ b/sample/src/DX12/PSDownsampler.h @@ -29,29 +29,25 @@ namespace CAULDRON_DX12 class PSDownsampler { public: - void OnCreate(Device *pDevice, ResourceViewHeaps *pResourceViewHeaps, DynamicBufferRing *pConstantBufferRing, StaticBufferPool *pStaticBufferPool, DXGI_FORMAT outFormat); + void OnCreate(Device *pDevice, UploadHeap *pUploadHeap, ResourceViewHeaps *pResourceViewHeaps, DynamicBufferRing *pConstantBufferRing, StaticBufferPool *pStaticBufferPool); void OnDestroy(); - void OnCreateWindowSizeDependentResources(uint32_t Width, uint32_t Height, Texture *pInput, int mips); - void OnDestroyWindowSizeDependentResources(); + void Draw(ID3D12GraphicsCommandList *pCommandList); + Texture *GetTexture() { return &m_cubeTexture; } + void GUI(int *pSlice); - void Draw(ID3D12GraphicsCommandList* pCommandList); - Texture *GetTexture() { return &m_result; } - CBV_SRV_UAV GetTextureView(int i) { return m_mip[i].m_SRV; } - void Gui(); - - struct cbDownscale + struct cbDownsample { float outWidth, outHeight; float invWidth, invHeight; + int slice; + int padding[3]; }; private: - Device* m_pDevice = nullptr; - DXGI_FORMAT m_outFormat; + Device *m_pDevice = nullptr; - Texture *m_pInput; - Texture m_result; + Texture m_cubeTexture; struct Pass { @@ -59,16 +55,14 @@ namespace CAULDRON_DX12 CBV_SRV_UAV m_SRV; //src }; - Pass m_mip[PS_MAX_MIP_LEVELS]; + Pass m_mip[PS_MAX_MIP_LEVELS * 6]; - StaticBufferPool *m_pStaticBufferPool; - ResourceViewHeaps *m_pResourceViewHeaps; - DynamicBufferRing *m_pConstantBufferRing; + StaticBufferPool *m_pStaticBufferPool = nullptr; + ResourceViewHeaps *m_pResourceViewHeaps = nullptr; + DynamicBufferRing *m_pConstantBufferRing = nullptr; - uint32_t m_Width; - uint32_t m_Height; - int m_mipCount; + PostProcPS m_downsample; - PostProcPS m_downscale; + CBV_SRV_UAV m_imGUISRV[PS_MAX_MIP_LEVELS * 6]; }; } \ No newline at end of file diff --git a/sample/src/DX12/PSDownsampler.hlsl b/sample/src/DX12/PSDownsampler.hlsl index 4bb1185..020e1e3 100644 --- a/sample/src/DX12/PSDownsampler.hlsl +++ b/sample/src/DX12/PSDownsampler.hlsl @@ -38,7 +38,7 @@ struct VERTEX // Texture definitions //-------------------------------------------------------------------------------------- Texture2D inputTex :register(t0); -SamplerState samLinearMirror :register(s0); +SamplerState samLinear :register(s0); //-------------------------------------------------------------------------------------- // Main function @@ -46,7 +46,8 @@ SamplerState samLinearMirror :register(s0); float4 mainPS(VERTEX Input) : SV_Target { + // as compute shader solution float2 texCoord = Input.vTexcoord * u_outSize; texCoord = texCoord * u_invSize * 2.0; - return inputTex.Sample(samLinearMirror, texCoord); + return inputTex.Sample(samLinear, texCoord); } diff --git a/sample/src/DX12/SPDCS.cpp b/sample/src/DX12/SPDCS.cpp new file mode 100644 index 0000000..95a2d63 --- /dev/null +++ b/sample/src/DX12/SPDCS.cpp @@ -0,0 +1,443 @@ +// SPDSample +// +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "stdafx.h" +#include "base\Device.h" +#include "base\DynamicBufferRing.h" +#include "base\StaticBufferPool.h" +#include "base\UploadHeap.h" +#include "base\Texture.h" +#include "base\Imgui.h" +#include "base\Helper.h" +#include "Base\ShaderCompilerHelper.h" + +#include "SPDCS.h" + +#define A_CPU +#include "ffx_a.h" +#include "ffx_spd.h" + +namespace CAULDRON_DX12 +{ + void SPDCS::OnCreate( + Device *pDevice, + UploadHeap *pUploadHeap, + ResourceViewHeaps *pResourceViewHeaps, + DynamicBufferRing *pConstantBufferRing, + SPDLoad spdLoad, + SPDWaveOps spdWaveOps, + SPDPacked spdPacked + ) + { + m_pDevice = pDevice; + m_pResourceViewHeaps = pResourceViewHeaps; + m_pConstantBufferRing = pConstantBufferRing; + + m_spdLoad = spdLoad; + m_spdWaveOps = spdWaveOps; + m_spdPacked = spdPacked; + + D3D12_SHADER_BYTECODE shaderByteCode = {}; + DefineList defines; + + if (m_spdWaveOps == SPDWaveOps::SPDNoWaveOps) { + defines["SPD_NO_WAVE_OPERATIONS"] = 1; + } + if (m_spdPacked == SPDPacked::SPDPacked) { + defines["A_HALF"] = 1; + defines["SPD_PACKED_ONLY"] = 1; + } + + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + CompileShaderFromFile("SPDIntegrationLinearSampler.hlsl", &defines, "main", "-T cs_6_2 /Zi /Zss", &shaderByteCode); + } + else { + CompileShaderFromFile("SPDIntegration.hlsl", &defines, "main", "-T cs_6_2 /Zi /Zss", &shaderByteCode); + } + + // Create root signature + // Spd Load version + if (m_spdLoad == SPDLoad::SPDLoad) + { + CD3DX12_DESCRIPTOR_RANGE DescRange[5]; + CD3DX12_ROOT_PARAMETER RTSlot[5]; + + // we'll always have a constant buffer + int parameterCount = 0; + DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0); + RTSlot[parameterCount++].InitAsConstantBufferView(0, 0, D3D12_SHADER_VISIBILITY_ALL); + + // UAV table + global counter buffer + DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 1); + RTSlot[parameterCount++].InitAsDescriptorTable(1, &DescRange[1], D3D12_SHADER_VISIBILITY_ALL); + + // output mips + DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 2); + RTSlot[parameterCount++].InitAsDescriptorTable(1, &DescRange[2], D3D12_SHADER_VISIBILITY_ALL); + + DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, SPD_MAX_MIP_LEVELS + 1, 3); + RTSlot[parameterCount++].InitAsDescriptorTable(1, &DescRange[3], D3D12_SHADER_VISIBILITY_ALL); + + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + // SRV table + DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0); + RTSlot[parameterCount++].InitAsDescriptorTable(1, &DescRange[4], D3D12_SHADER_VISIBILITY_ALL); + } + + // the root signature contains 4 slots to be used + CD3DX12_ROOT_SIGNATURE_DESC descRootSignature = CD3DX12_ROOT_SIGNATURE_DESC(); + descRootSignature.NumParameters = parameterCount; + descRootSignature.pParameters = RTSlot; + descRootSignature.NumStaticSamplers = 0; + descRootSignature.pStaticSamplers = NULL; + + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + D3D12_STATIC_SAMPLER_DESC SamplerDesc = {}; + SamplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; + SamplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + SamplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + SamplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + SamplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; + SamplerDesc.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + SamplerDesc.MinLOD = 0.0f; + SamplerDesc.MaxLOD = D3D12_FLOAT32_MAX; + SamplerDesc.MipLODBias = 0; + SamplerDesc.MaxAnisotropy = 1; + SamplerDesc.ShaderRegister = 0; + SamplerDesc.RegisterSpace = 0; + SamplerDesc.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + descRootSignature.NumStaticSamplers = 1; + descRootSignature.pStaticSamplers = &SamplerDesc; + } + + // deny uneccessary access to certain pipeline stages + descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + + ID3DBlob* pOutBlob, * pErrorBlob = NULL; + ThrowIfFailed(D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob)); + ThrowIfFailed( + pDevice->GetDevice()->CreateRootSignature(0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS(&m_pRootSignature)) + ); + SetName(m_pRootSignature, std::string("PostProcCS::") + "SPD_CS"); + + pOutBlob->Release(); + if (pErrorBlob) + pErrorBlob->Release(); + } + + // SPD Linear Sampler version + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + CD3DX12_DESCRIPTOR_RANGE DescRange[5]; + CD3DX12_ROOT_PARAMETER RTSlot[5]; + + // we'll always have a constant buffer + int parameterCount = 0; + DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0); + RTSlot[parameterCount++].InitAsConstantBufferView(0, 0, D3D12_SHADER_VISIBILITY_ALL); + + // UAV table + global counter buffer + DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 1); + RTSlot[parameterCount++].InitAsDescriptorTable(1, &DescRange[1], D3D12_SHADER_VISIBILITY_ALL); + + // output mips + DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 2); + RTSlot[parameterCount++].InitAsDescriptorTable(1, &DescRange[2], D3D12_SHADER_VISIBILITY_ALL); + + DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, SPD_MAX_MIP_LEVELS + 1, 3); + RTSlot[parameterCount++].InitAsDescriptorTable(1, &DescRange[3], D3D12_SHADER_VISIBILITY_ALL); + + // SRV table + DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0); + RTSlot[parameterCount++].InitAsDescriptorTable(1, &DescRange[4], D3D12_SHADER_VISIBILITY_ALL); + + D3D12_STATIC_SAMPLER_DESC SamplerDesc = {}; + SamplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; + SamplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + SamplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + SamplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + SamplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; + SamplerDesc.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + SamplerDesc.MinLOD = 0.0f; + SamplerDesc.MaxLOD = D3D12_FLOAT32_MAX; + SamplerDesc.MipLODBias = 0; + SamplerDesc.MaxAnisotropy = 1; + SamplerDesc.ShaderRegister = 0; + SamplerDesc.RegisterSpace = 0; + SamplerDesc.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + // the root signature contains 4 slots to be used + CD3DX12_ROOT_SIGNATURE_DESC descRootSignature = CD3DX12_ROOT_SIGNATURE_DESC(); + descRootSignature.NumParameters = parameterCount; + descRootSignature.pParameters = RTSlot; + descRootSignature.NumStaticSamplers = 1; + descRootSignature.pStaticSamplers = &SamplerDesc; + + // deny uneccessary access to certain pipeline stages + descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + + ID3DBlob* pOutBlob, * pErrorBlob = NULL; + ThrowIfFailed(D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob)); + ThrowIfFailed( + pDevice->GetDevice()->CreateRootSignature(0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS(&m_pRootSignature)) + ); + SetName(m_pRootSignature, std::string("PostProcCS::") + "SPD_CS"); + + pOutBlob->Release(); + if (pErrorBlob) + pErrorBlob->Release(); + } + + { + D3D12_COMPUTE_PIPELINE_STATE_DESC descPso = {}; + descPso.CS = shaderByteCode; + descPso.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + descPso.pRootSignature = m_pRootSignature; + descPso.NodeMask = 0; + + ThrowIfFailed(pDevice->GetDevice()->CreateComputePipelineState(&descPso, IID_PPV_ARGS(&m_pPipeline))); + } + + m_cubeTexture.InitFromFile(pDevice, pUploadHeap, "..\\media\\envmaps\\papermill\\specular.dds", true, 1.0f, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); + pUploadHeap->FlushAndFinish(); + + // Allocate descriptors for the mip chain + // + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_constBuffer); + + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_sourceSRV); + m_cubeTexture.CreateSRV(0, &m_sourceSRV, 0, m_cubeTexture.GetArraySize(), 0); + } + + uint32_t numUAVs = m_cubeTexture.GetMipCount(); + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + // we need one UAV less because source texture will be bound as SRV and not as UAV + numUAVs = m_cubeTexture.GetMipCount() - 1; + } + + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(numUAVs, m_UAV); + + // Create views for the mip chain + // + // destination + // + for (uint32_t i = 0; i < numUAVs; i++) + { + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc; + uavDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; // can't create SRGB UAV + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + uavDesc.Texture2DArray.ArraySize = m_cubeTexture.GetArraySize(); + uavDesc.Texture2DArray.FirstArraySlice = 0; + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + uavDesc.Texture2DArray.MipSlice = i + 1; + } + else { + uavDesc.Texture2DArray.MipSlice = i; + } + uavDesc.Texture2DArray.PlaneSlice = 0; + + m_cubeTexture.CreateUAV(i, NULL, m_UAV, &uavDesc); + } + + // for GUI + for (uint32_t slice = 0; slice < m_cubeTexture.GetArraySize(); slice++) + { + for (uint32_t mip = 0; mip < m_cubeTexture.GetMipCount(); mip++) + { + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_SRV[slice * m_cubeTexture.GetMipCount() + mip]); + m_cubeTexture.CreateSRV(0, &m_SRV[slice * m_cubeTexture.GetMipCount() + mip], mip, 1, slice); + } + } + + m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_globalCounter); + m_globalCounterBuffer.InitBuffer(m_pDevice, "SPD_CS::m_globalCounterBuffer", + &CD3DX12_RESOURCE_DESC::Buffer(sizeof(uint32_t) * m_cubeTexture.GetArraySize(), // 6 slices + D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), + sizeof(uint32_t), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + m_globalCounterBuffer.CreateBufferUAV(0, NULL, &m_globalCounter); + } + + void SPDCS::OnDestroy() + { + m_globalCounterBuffer.OnDestroy(); + m_cubeTexture.OnDestroy(); + + if (m_pPipeline != NULL) + { + m_pPipeline->Release(); + m_pPipeline = NULL; + } + + if (m_pRootSignature != NULL) + { + m_pRootSignature->Release(); + m_pRootSignature = NULL; + } + } + + void SPDCS::Draw(ID3D12GraphicsCommandList2 *pCommandList) + { + UserMarker marker(pCommandList, "SPDCS"); + + varAU2(dispatchThreadGroupCountXY); + varAU2(workGroupOffset); // needed if Left and Top are not 0,0 + varAU2(numWorkGroupsAndMips); + varAU4(rectInfo) = initAU4(0, 0, m_cubeTexture.GetWidth(), m_cubeTexture.GetHeight()); // left, top, width, height + SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo); + + // downsample + uint32_t dispatchX = dispatchThreadGroupCountXY[0]; + uint32_t dispatchY = dispatchThreadGroupCountXY[1]; + uint32_t dispatchZ = m_cubeTexture.GetArraySize(); + + D3D12_GPU_VIRTUAL_ADDRESS cbHandle; + uint32_t* pConstMem; + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + m_pConstantBufferRing->AllocConstantBuffer(sizeof(SpdLinearSamplerConstants), (void**)&pConstMem, &cbHandle); + SpdLinearSamplerConstants constants; + constants.numWorkGroupsPerSlice = numWorkGroupsAndMips[0]; + constants.mips = numWorkGroupsAndMips[1]; + constants.workGroupOffset[0] = workGroupOffset[0]; + constants.workGroupOffset[1] = workGroupOffset[1]; + constants.invInputSize[0] = 1.0f / m_cubeTexture.GetWidth(); + constants.invInputSize[1] = 1.0f / m_cubeTexture.GetHeight(); + memcpy(pConstMem, &constants, sizeof(SpdLinearSamplerConstants)); + } + else { + m_pConstantBufferRing->AllocConstantBuffer(sizeof(SpdConstants), (void**)&pConstMem, &cbHandle); + SpdConstants constants; + constants.numWorkGroupsPerSlice = numWorkGroupsAndMips[0]; + constants.mips = numWorkGroupsAndMips[1]; + constants.workGroupOffset[0] = workGroupOffset[0]; + constants.workGroupOffset[1] = workGroupOffset[1]; + memcpy(pConstMem, &constants, sizeof(SpdConstants)); + } + + // Bind Descriptor heaps and the root signature + // + ID3D12DescriptorHeap *pDescriptorHeaps[] = { m_pResourceViewHeaps->GetCBV_SRV_UAVHeap(), m_pResourceViewHeaps->GetSamplerHeap() }; + pCommandList->SetDescriptorHeaps(2, pDescriptorHeaps); + pCommandList->SetComputeRootSignature(m_pRootSignature); + + // Bind Descriptor the descriptor sets + // + int params = 0; + pCommandList->SetComputeRootConstantBufferView(params++, cbHandle); + pCommandList->SetComputeRootDescriptorTable(params++, m_globalCounter.GetGPU()); + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + pCommandList->SetComputeRootDescriptorTable(params++, m_UAV[0].GetGPU(5)); + } + else { + pCommandList->SetComputeRootDescriptorTable(params++, m_UAV[0].GetGPU(6)); + } + // bind UAVs + pCommandList->SetComputeRootDescriptorTable(params++, m_UAV[0].GetGPU()); + + // bind SRV + if (m_spdLoad == SPDLoad::SPDLinearSampler) { + pCommandList->SetComputeRootDescriptorTable(params++, m_sourceSRV.GetGPU()); + } + // Bind Pipeline + // + pCommandList->SetPipelineState(m_pPipeline); + + // set counter to 0 + pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_globalCounterBuffer.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST, 0)); + + D3D12_WRITEBUFFERIMMEDIATE_PARAMETER pParams[6]; + for (int i = 0; i < 6; i++) + { + pParams[i] = { m_globalCounterBuffer.GetResource()->GetGPUVirtualAddress() + sizeof(uint32_t) * i, 0 }; + } + pCommandList->WriteBufferImmediate(6, pParams, NULL); // 6 counter per slice, each initialized to 0 + + D3D12_RESOURCE_BARRIER resourceBarriers[2] = { + CD3DX12_RESOURCE_BARRIER::Transition(m_globalCounterBuffer.GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0), + CD3DX12_RESOURCE_BARRIER::Transition(m_cubeTexture.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS) + }; + pCommandList->ResourceBarrier(2, resourceBarriers); + + // Dispatch + // + pCommandList->Dispatch(dispatchX, dispatchY, dispatchZ); + pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_cubeTexture.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE)); + } + + void SPDCS::GUI(int *pSlice) + { + bool opened = true; + std::string header = "Downsample"; + ImGui::Begin(header.c_str(), &opened); + + std::string downsampleHeader = "SPD CS"; + if (m_spdLoad == SPDLoad::SPDLoad) { + downsampleHeader += " Load"; + } + else { + downsampleHeader += " Linear Sampler"; + } + + if (m_spdWaveOps == SPDWaveOps::SPDWaveOps) + { + downsampleHeader += " WaveOps"; + } + else { + downsampleHeader += " No WaveOps"; + } + + if (m_spdPacked == SPDPacked::SPDNonPacked) + { + downsampleHeader += " Non Packed"; + } + else { + downsampleHeader += " Packed"; + } + + if (ImGui::CollapsingHeader(downsampleHeader.c_str(), ImGuiTreeNodeFlags_DefaultOpen)) + { + const char* sliceItemNames[] = + { + "Slice 0", + "Slice 1", + "Slice 2", + "Slice 3", + "Slice 4", + "Slice 5" + }; + ImGui::Combo("Slice of Cube Texture", pSlice, sliceItemNames, _countof(sliceItemNames)); + + for (uint32_t i = 0; i < m_cubeTexture.GetMipCount(); i++) + { + ImGui::Image((ImTextureID)&m_SRV[*pSlice * m_cubeTexture.GetMipCount() + i], ImVec2(static_cast(512 >> i), static_cast(512 >> i))); + } + } + + ImGui::End(); + } +} \ No newline at end of file diff --git a/sample/src/DX12/SPDCS.h b/sample/src/DX12/SPDCS.h new file mode 100644 index 0000000..78c9f71 --- /dev/null +++ b/sample/src/DX12/SPDCS.h @@ -0,0 +1,95 @@ +// SPDSample +// +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +#pragma once + +#include "Base/DynamicBufferRing.h" +#include "Base/Texture.h" + +namespace CAULDRON_DX12 +{ +#define SPD_MAX_MIP_LEVELS 12 + + enum class SPDWaveOps + { + SPDNoWaveOps, + SPDWaveOps, + }; + + enum class SPDPacked + { + SPDNonPacked, + SPDPacked, + }; + + enum class SPDLoad + { + SPDLoad, + SPDLinearSampler, + }; + + class SPDCS + { + public: + void OnCreate(Device *pDevice, UploadHeap *pUploadHeap, ResourceViewHeaps *pResourceViewHeaps, DynamicBufferRing *pConstantBufferRing, + SPDLoad spdLoad, SPDWaveOps spdWaveOps, SPDPacked spdPacked); + void OnDestroy(); + + void Draw(ID3D12GraphicsCommandList2 *pCommandList); + Texture *GetTexture() { return &m_cubeTexture; } + void GUI(int *pSlice); + + struct SpdConstants + { + int mips; + int numWorkGroupsPerSlice; + int workGroupOffset[2]; + }; + + struct SpdLinearSamplerConstants + { + int mips; + int numWorkGroupsPerSlice; + int workGroupOffset[2]; + float invInputSize[2]; + float padding[2]; + }; + + private: + Device *m_pDevice = nullptr; + + Texture m_cubeTexture; + + CBV_SRV_UAV m_constBuffer; // dimension + CBV_SRV_UAV m_UAV[SPD_MAX_MIP_LEVELS + 1]; //src + dest mips + CBV_SRV_UAV m_SRV[SPD_MAX_MIP_LEVELS * 6]; // for display of mips using imGUI + CBV_SRV_UAV m_sourceSRV; // src + + CBV_SRV_UAV m_globalCounter; + Texture m_globalCounterBuffer; + + ResourceViewHeaps *m_pResourceViewHeaps = nullptr; + DynamicBufferRing *m_pConstantBufferRing = nullptr; + ID3D12RootSignature *m_pRootSignature = nullptr; + ID3D12PipelineState *m_pPipeline = nullptr; + + SPDLoad m_spdLoad; + SPDWaveOps m_spdWaveOps; + SPDPacked m_spdPacked; + }; +} \ No newline at end of file diff --git a/sample/src/DX12/SPDIntegration.hlsl b/sample/src/DX12/SPDIntegration.hlsl new file mode 100644 index 0000000..6dfaae9 --- /dev/null +++ b/sample/src/DX12/SPDIntegration.hlsl @@ -0,0 +1,189 @@ +// SPDSample +// +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// when using amd shader intrinscs +// #include "ags_shader_intrinsics_dx12.h" + +//-------------------------------------------------------------------------------------- +// Constant Buffer +//-------------------------------------------------------------------------------------- +cbuffer spdConstants : register(b0) +{ + uint mips; + uint numWorkGroups; + uint2 workGroupOffset; +} + +//-------------------------------------------------------------------------------------- +// Texture definitions +//-------------------------------------------------------------------------------------- +RWTexture2DArray imgDst[13] : register(u3); // don't access MIP [6] +globallycoherent RWTexture2DArray imgDst6 : register(u2); + +//-------------------------------------------------------------------------------------- +// Buffer definitions - global atomic counter +//-------------------------------------------------------------------------------------- +struct SpdGlobalAtomicBuffer +{ + uint counter[6]; +}; +globallycoherent RWStructuredBuffer spdGlobalAtomic :register(u1); + +#define A_GPU +#define A_HLSL + +#include "ffx_a.h" + +groupshared AU1 spdCounter; + +#ifndef SPD_PACKED_ONLY +groupshared AF1 spdIntermediateR[16][16]; +groupshared AF1 spdIntermediateG[16][16]; +groupshared AF1 spdIntermediateB[16][16]; +groupshared AF1 spdIntermediateA[16][16]; + +AF4 SpdLoadSourceImage(AF2 tex, AU1 slice) +{ + return imgDst[0][float3(tex, slice)]; +} +AF4 SpdLoad(ASU2 tex, AU1 slice) +{ + return imgDst6[uint3(tex, slice)]; +} +void SpdStore(ASU2 pix, AF4 outValue, AU1 index, AU1 slice) +{ + if (index == 5) + { + imgDst6[uint3(pix, slice)] = outValue; + return; + } + imgDst[index + 1][uint3(pix, slice)] = outValue; +} +void SpdIncreaseAtomicCounter(AU1 slice) +{ + InterlockedAdd(spdGlobalAtomic[0].counter[slice], 1, spdCounter); +} +AU1 SpdGetAtomicCounter() +{ + return spdCounter; +} +void SpdResetAtomicCounter(AU1 slice) +{ + spdGlobalAtomic[0].counter[slice] = 0; +} +AF4 SpdLoadIntermediate(AU1 x, AU1 y) +{ + return AF4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} +void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} +AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3) +{ + return (v0+v1+v2+v3)*0.25; +} +#endif + +// define fetch and store functions Packed +#ifdef A_HALF +groupshared AH2 spdIntermediateRG[16][16]; +groupshared AH2 spdIntermediateBA[16][16]; + +AH4 SpdLoadSourceImageH(AF2 tex, AU1 slice) +{ + return AH4(imgDst[0][float3(tex, slice)]); +} +AH4 SpdLoadH(ASU2 p, AU1 slice) +{ + return AH4(imgDst6[uint3(p, slice)]); +} +void SpdStoreH(ASU2 p, AH4 value, AU1 mip, AU1 slice) +{ + if (mip == 5) + { + imgDst6[uint3(p, slice)] = AF4(value); + return; + } + imgDst[mip + 1][uint3(p, slice)] = AF4(value); +} +void SpdIncreaseAtomicCounter(AU1 slice) +{ + InterlockedAdd(spdGlobalAtomic[0].counter[slice], 1, spdCounter); +} +AU1 SpdGetAtomicCounter() +{ + return spdCounter; +} +void SpdResetAtomicCounter(AU1 slice) +{ + spdGlobalAtomic[0].counter[slice] = 0; +} +AH4 SpdLoadIntermediateH(AU1 x, AU1 y) +{ + return AH4( + spdIntermediateRG[x][y].x, + spdIntermediateRG[x][y].y, + spdIntermediateBA[x][y].x, + spdIntermediateBA[x][y].y); +} +void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value) +{ + spdIntermediateRG[x][y] = value.xy; + spdIntermediateBA[x][y] = value.zw; +} +AH4 SpdReduce4H(AH4 v0, AH4 v1, AH4 v2, AH4 v3) +{ + return (v0+v1+v2+v3)*AH1(0.25); +} +#endif + +#include "ffx_spd.h" + +// Main function +//-------------------------------------------------------------------------------------- +//-------------------------------------------------------------------------------------- +[numthreads(256, 1, 1)] +void main(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) +{ +#ifndef A_HALF + SpdDownsample( + AU2(WorkGroupId.xy), + AU1(LocalThreadIndex), + AU1(mips), + AU1(numWorkGroups), + AU1(WorkGroupId.z), + AU2(workGroupOffset)); +#else + SpdDownsampleH( + AU2(WorkGroupId.xy), + AU1(LocalThreadIndex), + AU1(mips), + AU1(numWorkGroups), + AU1(WorkGroupId.z), + AU2(workGroupOffset)); +#endif + } \ No newline at end of file diff --git a/sample/src/DX12/SPDIntegrationLinearSampler.hlsl b/sample/src/DX12/SPDIntegrationLinearSampler.hlsl new file mode 100644 index 0000000..9492e35 --- /dev/null +++ b/sample/src/DX12/SPDIntegrationLinearSampler.hlsl @@ -0,0 +1,200 @@ +// SPDSample +// +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// when using amd shader intrinscs +// #include "ags_shader_intrinsics_dx12.h" + +//-------------------------------------------------------------------------------------- +// Constant Buffer +//-------------------------------------------------------------------------------------- +cbuffer spdConstants : register(b0) +{ + uint mips; + uint numWorkGroups; + uint2 workGroupOffset; + float2 invInputSize; +} + +//-------------------------------------------------------------------------------------- +// Texture definitions +//-------------------------------------------------------------------------------------- +globallycoherent RWTexture2DArray imgDst5 : register(u2); +RWTexture2DArray imgDst[12] : register(u3); // do no access MIP [5] +Texture2DArray imgSrc : register(t0); +SamplerState srcSampler : register(s0); + +//-------------------------------------------------------------------------------------- +// Buffer definitions - global atomic counter +//-------------------------------------------------------------------------------------- +struct SpdGlobalAtomicBuffer +{ + uint counter[6]; +}; +globallycoherent RWStructuredBuffer spdGlobalAtomic :register(u1); + +#define A_GPU +#define A_HLSL + +#include "ffx_a.h" + +groupshared AU1 spdCounter; + +#ifndef SPD_PACKED_ONLY +groupshared AF1 spdIntermediateR[16][16]; +groupshared AF1 spdIntermediateG[16][16]; +groupshared AF1 spdIntermediateB[16][16]; +groupshared AF1 spdIntermediateA[16][16]; + +AF4 SpdLoadSourceImage(ASU2 p, AU1 slice) +{ + AF2 textureCoord = p * invInputSize + invInputSize; + AF4 result = imgSrc.SampleLevel(srcSampler, float3(textureCoord, slice), 0); + result = AF4(AToSrgbF1(result.x), AToSrgbF1(result.y), AToSrgbF1(result.z), result.w); + return result; +} +AF4 SpdLoad(ASU2 tex, AU1 slice) +{ + return imgDst5[uint3(tex, slice)]; +} +void SpdStore(ASU2 pix, AF4 outValue, AU1 index, AU1 slice) +{ + if (index == 5) + { + imgDst5[uint3(pix, slice)] = outValue; + return; + } + imgDst[index][uint3(pix, slice)] = outValue; +} +void SpdIncreaseAtomicCounter(AU1 slice) +{ + InterlockedAdd(spdGlobalAtomic[0].counter[slice], 1, spdCounter); +} +AU1 SpdGetAtomicCounter() +{ + return spdCounter; +} +void SpdResetAtomicCounter(AU1 slice) +{ + spdGlobalAtomic[0].counter[slice] = 0; +} +AF4 SpdLoadIntermediate(AU1 x, AU1 y) +{ + return AF4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} +void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} +AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3) +{ + return (v0+v1+v2+v3)*0.25; +} +#endif + +// define fetch and store functions Packed +#ifdef A_HALF +groupshared AH2 spdIntermediateRG[16][16]; +groupshared AH2 spdIntermediateBA[16][16]; + +AH4 SpdLoadSourceImageH(ASU2 p, AU1 slice) +{ + AF2 textureCoord = p * invInputSize + invInputSize; + AF4 result = imgSrc.SampleLevel(srcSampler, float3(textureCoord, slice), 0); + result = AF4(AToSrgbF1(result.x), AToSrgbF1(result.y), AToSrgbF1(result.z), result.w); + return AH4(result); +} +AH4 SpdLoadH(ASU2 p, AU1 slice) +{ + return AH4(imgDst5[uint3(p, slice)]); +} +void SpdStoreH(ASU2 p, AH4 value, AU1 mip, AU1 slice) +{ + if (mip == 5) + { + imgDst5[uint3(p, slice)] = AF4(value); + return; + } + imgDst[mip][uint3(p, slice)] = AF4(value); +} +void SpdIncreaseAtomicCounter(AU1 slice) +{ + InterlockedAdd(spdGlobalAtomic[0].counter[slice], 1, spdCounter); +} +AU1 SpdGetAtomicCounter() +{ + return spdCounter; +} +void SpdResetAtomicCounter(AU1 slice) +{ + spdGlobalAtomic[0].counter[slice] = 0; +} +AH4 SpdLoadIntermediateH(AU1 x, AU1 y) +{ + return AH4( + spdIntermediateRG[x][y].x, + spdIntermediateRG[x][y].y, + spdIntermediateBA[x][y].x, + spdIntermediateBA[x][y].y); +} +void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value) +{ + spdIntermediateRG[x][y] = value.xy; + spdIntermediateBA[x][y] = value.zw; +} +AH4 SpdReduce4H(AH4 v0, AH4 v1, AH4 v2, AH4 v3) +{ + return (v0+v1+v2+v3)*AH1(0.25); +} +#endif + +#define SPD_LINEAR_SAMPLER + +#include "ffx_spd.h" + +// Main function +//-------------------------------------------------------------------------------------- +//-------------------------------------------------------------------------------------- +[numthreads(256, 1, 1)] +void main(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) +{ +#ifndef A_HALF + SpdDownsample( + AU2(WorkGroupId.xy), + AU1(LocalThreadIndex), + AU1(mips), + AU1(numWorkGroups), + AU1(WorkGroupId.z), + AU2(workGroupOffset)); +#else + SpdDownsampleH( + AU2(WorkGroupId.xy), + AU1(LocalThreadIndex), + AU1(mips), + AU1(numWorkGroups), + AU1(WorkGroupId.z), + AU2(workGroupOffset)); +#endif + } \ No newline at end of file diff --git a/sample/src/DX12/SPD_Renderer.cpp b/sample/src/DX12/SPDRenderer.cpp similarity index 78% rename from sample/src/DX12/SPD_Renderer.cpp rename to sample/src/DX12/SPDRenderer.cpp index 2f29311..5c4e0e9 100644 --- a/sample/src/DX12/SPD_Renderer.cpp +++ b/sample/src/DX12/SPDRenderer.cpp @@ -19,19 +19,16 @@ #include "stdafx.h" -#include "SPD_Renderer.h" +#include "SPDRenderer.h" //-------------------------------------------------------------------------------------- // // OnCreate // //-------------------------------------------------------------------------------------- -void SPD_Renderer::OnCreate(Device* pDevice, SwapChain *pSwapChain) +void SPDRenderer::OnCreate(Device *pDevice, SwapChain *pSwapChain) { m_pDevice = pDevice; - - m_format = DXGI_FORMAT_R16G16B16A16_FLOAT; - // Initialize helpers // Create all the heaps for the resources views @@ -45,15 +42,15 @@ void SPD_Renderer::OnCreate(Device* pDevice, SwapChain *pSwapChain) // Create a commandlist ring for the Direct queue uint32_t commandListsPerBackBuffer = 8; - m_CommandListRing.OnCreate(pDevice, backBufferCount, commandListsPerBackBuffer, pDevice->GetGraphicsQueue()->GetDesc()); + m_commandListRing.OnCreate(pDevice, backBufferCount, commandListsPerBackBuffer, pDevice->GetGraphicsQueue()->GetDesc()); // Create a 'dynamic' constant buffer const uint32_t constantBuffersMemSize = 20 * 1024 * 1024; - m_ConstantBufferRing.OnCreate(pDevice, backBufferCount, constantBuffersMemSize, &m_resourceViewHeaps); + m_constantBufferRing.OnCreate(pDevice, backBufferCount, constantBuffersMemSize, &m_resourceViewHeaps); // Create a 'static' pool for vertices, indices and constant buffers const uint32_t staticGeometryMemSize = 128 * 1024 * 1024; - m_VidMemBufferPool.OnCreate(pDevice, staticGeometryMemSize, USE_VID_MEM, "StaticGeom"); + m_vidMemBufferPool.OnCreate(pDevice, staticGeometryMemSize, USE_VID_MEM, "StaticGeom"); // initialize the GPU time stamps module m_GPUTimer.OnCreate(pDevice, backBufferCount); @@ -61,7 +58,7 @@ void SPD_Renderer::OnCreate(Device* pDevice, SwapChain *pSwapChain) // Quick helper to upload resources, it has it's own commandList and uses suballocation. // for 4K textures we'll need 100Megs const uint32_t uploadHeapMemSize = 1000 * 1024 * 1024; - m_UploadHeap.OnCreate(pDevice, uploadHeapMemSize); // initialize an upload heap (uses suballocation for faster results) + m_uploadHeap.OnCreate(pDevice, uploadHeapMemSize); // initialize an upload heap (uses suballocation for faster results) // Create the depth buffer view m_resourceViewHeaps.AllocDSVDescriptor(1, &m_depthBufferDSV); @@ -73,20 +70,20 @@ void SPD_Renderer::OnCreate(Device* pDevice, SwapChain *pSwapChain) m_shadowMap.CreateDSV(0, &m_ShadowMapDSV); m_shadowMap.CreateSRV(0, &m_ShadowMapSRV); - m_skyDome.OnCreate(pDevice, &m_UploadHeap, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, "..\\media\\envmaps\\papermill\\diffuse.dds", "..\\media\\envmaps\\papermill\\specular.dds", DXGI_FORMAT_R16G16B16A16_FLOAT, 4); - m_skyDomeProc.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, m_format, 4); - m_wireframe.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT, 4); - m_wireframeBox.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool); + m_skyDome.OnCreate(pDevice, &m_uploadHeap, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, "..\\media\\envmaps\\papermill\\diffuse.dds", "..\\media\\envmaps\\papermill\\specular.dds", DXGI_FORMAT_R16G16B16A16_FLOAT, 4); + m_skyDomeProc.OnCreate(pDevice, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT, 4); + m_wireframe.OnCreate(pDevice, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT, 4); + m_wireframeBox.OnCreate(pDevice, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool); - m_PSDownsampler.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, m_format); - m_CSDownsampler.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, m_format); - m_SPD_Versions.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, m_format); + m_PSDownsampler.OnCreate(pDevice, &m_uploadHeap, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool); + m_CSDownsampler.OnCreate(pDevice, &m_uploadHeap, &m_resourceViewHeaps, &m_constantBufferRing); + m_SPDVersions.OnCreate(pDevice, &m_uploadHeap, &m_resourceViewHeaps, &m_constantBufferRing); // Create tonemapping pass - m_toneMapping.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, pSwapChain->GetFormat()); + m_toneMapping.OnCreate(pDevice, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, pSwapChain->GetFormat()); // Initialize UI rendering resources - m_ImGUI.OnCreate(pDevice, &m_UploadHeap, &m_resourceViewHeaps, &m_ConstantBufferRing, pSwapChain->GetFormat()); + m_imGUI.OnCreate(pDevice, &m_uploadHeap, &m_resourceViewHeaps, &m_constantBufferRing, pSwapChain->GetFormat()); m_resourceViewHeaps.AllocRTVDescriptor(1, &m_HDRRTV); m_resourceViewHeaps.AllocRTVDescriptor(1, &m_HDRRTVMSAA); @@ -95,8 +92,8 @@ void SPD_Renderer::OnCreate(Device* pDevice, SwapChain *pSwapChain) // Make sure upload heap has finished uploading before continuing #if (USE_VID_MEM==true) - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); - m_UploadHeap.FlushAndFinish(); + m_vidMemBufferPool.UploadData(m_uploadHeap.GetCommandList()); + m_uploadHeap.FlushAndFinish(); #endif } @@ -105,14 +102,14 @@ void SPD_Renderer::OnCreate(Device* pDevice, SwapChain *pSwapChain) // OnDestroy // //-------------------------------------------------------------------------------------- -void SPD_Renderer::OnDestroy() +void SPDRenderer::OnDestroy() { m_toneMapping.OnDestroy(); - m_ImGUI.OnDestroy(); + m_imGUI.OnDestroy(); m_PSDownsampler.OnDestroy(); m_CSDownsampler.OnDestroy(); - m_SPD_Versions.OnDestroy(); + m_SPDVersions.OnDestroy(); m_wireframeBox.OnDestroy(); m_wireframe.OnDestroy(); @@ -120,12 +117,12 @@ void SPD_Renderer::OnDestroy() m_skyDome.OnDestroy(); m_shadowMap.OnDestroy(); - m_UploadHeap.OnDestroy(); + m_uploadHeap.OnDestroy(); m_GPUTimer.OnDestroy(); - m_VidMemBufferPool.OnDestroy(); - m_ConstantBufferRing.OnDestroy(); + m_vidMemBufferPool.OnDestroy(); + m_constantBufferRing.OnDestroy(); m_resourceViewHeaps.OnDestroy(); - m_CommandListRing.OnDestroy(); + m_commandListRing.OnDestroy(); } //-------------------------------------------------------------------------------------- @@ -133,10 +130,10 @@ void SPD_Renderer::OnDestroy() // OnCreateWindowSizeDependentResources // //-------------------------------------------------------------------------------------- -void SPD_Renderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height) +void SPDRenderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height) { - m_Width = Width; - m_Height = Height; + m_width = Width; + m_height = Height; // Set the viewport // @@ -144,7 +141,7 @@ void SPD_Renderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, u // Create scissor rectangle // - m_RectScissor = { 0, 0, (LONG)Width, (LONG)Height }; + m_rectScissor = { 0, 0, (LONG)Width, (LONG)Height }; // Create depth buffer // @@ -163,17 +160,6 @@ void SPD_Renderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, u m_HDR.InitRenderTarget(m_pDevice, "HDR", &RDesc, D3D12_RESOURCE_STATE_RENDER_TARGET); m_HDR.CreateSRV(0, &m_HDRSRV); m_HDR.CreateRTV(0, &m_HDRRTV); - - // update downscaling effect - // - { - int resolution = max(m_Width, m_Height); - int mipLevel = (static_cast(min(1.0f + floor(log2(resolution)), 12)) - 1); - - m_PSDownsampler.OnCreateWindowSizeDependentResources(m_Width, m_Height, &m_HDR, mipLevel); - m_CSDownsampler.OnCreateWindowSizeDependentResources(m_Width, m_Height, &m_HDR, mipLevel); - m_SPD_Versions.OnCreateWindowSizeDependentResources(m_Width, m_Height, &m_HDR); - } } //-------------------------------------------------------------------------------------- @@ -181,12 +167,8 @@ void SPD_Renderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, u // OnDestroyWindowSizeDependentResources // //-------------------------------------------------------------------------------------- -void SPD_Renderer::OnDestroyWindowSizeDependentResources() +void SPDRenderer::OnDestroyWindowSizeDependentResources() { - m_PSDownsampler.OnDestroyWindowSizeDependentResources(); - m_CSDownsampler.OnDestroyWindowSizeDependentResources(); - m_SPD_Versions.OnDestroyWindowSizeDependentResources(); - m_HDR.OnDestroy(); m_HDRMSAA.OnDestroy(); m_depthBuffer.OnDestroy(); @@ -198,7 +180,7 @@ void SPD_Renderer::OnDestroyWindowSizeDependentResources() // LoadScene // //-------------------------------------------------------------------------------------- -int SPD_Renderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) +int SPDRenderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) { // show loading progress // @@ -220,7 +202,7 @@ int SPD_Renderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) Profile p("m_pGltfLoader->Load"); m_pGLTFTexturesAndBuffers = new GLTFTexturesAndBuffers(); - m_pGLTFTexturesAndBuffers->OnCreate(m_pDevice, pGLTFCommon, &m_UploadHeap, &m_VidMemBufferPool, &m_ConstantBufferRing); + m_pGLTFTexturesAndBuffers->OnCreate(m_pDevice, pGLTFCommon, &m_uploadHeap, &m_vidMemBufferPool, &m_constantBufferRing); } else if (stage == 6) { @@ -235,13 +217,13 @@ int SPD_Renderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) Profile p("m_gltfDepth->OnCreate"); //create the glTF's textures, VBs, IBs, shaders and descriptors for this particular pass - m_gltfDepth = new GltfDepthPass(); - m_gltfDepth->OnCreate( + m_pGltfDepth = new GltfDepthPass(); + m_pGltfDepth->OnCreate( m_pDevice, - &m_UploadHeap, + &m_uploadHeap, &m_resourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, + &m_constantBufferRing, + &m_vidMemBufferPool, m_pGLTFTexturesAndBuffers ); } @@ -250,19 +232,21 @@ int SPD_Renderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) Profile p("m_gltfPBR->OnCreate"); // same thing as above but for the PBR pass - m_gltfPBR = new GltfPbrPass(); - m_gltfPBR->OnCreate( + m_pGltfPBR = new GltfPbrPass(); + m_pGltfPBR->OnCreate( m_pDevice, - &m_UploadHeap, + &m_uploadHeap, &m_resourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, + &m_constantBufferRing, + &m_vidMemBufferPool, m_pGLTFTexturesAndBuffers, &m_skyDome, false, + false, m_HDRMSAA.GetFormat(), DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, + DXGI_FORMAT_UNKNOWN, 4 ); } @@ -271,30 +255,30 @@ int SPD_Renderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) Profile p("m_gltfBBox->OnCreate"); // just a bounding box pass that will draw boundingboxes instead of the geometry itself - m_gltfBBox = new GltfBBoxPass(); - m_gltfBBox->OnCreate( + m_pGltfBBox = new GltfBBoxPass(); + m_pGltfBBox->OnCreate( m_pDevice, - &m_UploadHeap, + &m_uploadHeap, &m_resourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, + &m_constantBufferRing, + &m_vidMemBufferPool, m_pGLTFTexturesAndBuffers, &m_wireframe ); #if (USE_VID_MEM==true) // we are borrowing the upload heap command list for uploading to the GPU the IBs and VBs - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); + m_vidMemBufferPool.UploadData(m_uploadHeap.GetCommandList()); #endif } else if (stage == 10) { Profile p("Flush"); - m_UploadHeap.FlushAndFinish(); + m_uploadHeap.FlushAndFinish(); #if (USE_VID_MEM==true) //once everything is uploaded we dont need he upload heaps anymore - m_VidMemBufferPool.FreeUploadHeap(); + m_vidMemBufferPool.FreeUploadHeap(); #endif // tell caller that we are done loading the map return -1; @@ -309,27 +293,27 @@ int SPD_Renderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) // UnloadScene // //-------------------------------------------------------------------------------------- -void SPD_Renderer::UnloadScene() +void SPDRenderer::UnloadScene() { - if (m_gltfPBR) + if (m_pGltfPBR) { - m_gltfPBR->OnDestroy(); - delete m_gltfPBR; - m_gltfPBR = NULL; + m_pGltfPBR->OnDestroy(); + delete m_pGltfPBR; + m_pGltfPBR = NULL; } - if (m_gltfDepth) + if (m_pGltfDepth) { - m_gltfDepth->OnDestroy(); - delete m_gltfDepth; - m_gltfDepth = NULL; + m_pGltfDepth->OnDestroy(); + delete m_pGltfDepth; + m_pGltfDepth = NULL; } - if (m_gltfBBox) + if (m_pGltfBBox) { - m_gltfBBox->OnDestroy(); - delete m_gltfBBox; - m_gltfBBox = NULL; + m_pGltfBBox->OnDestroy(); + delete m_pGltfBBox; + m_pGltfBBox = NULL; } if (m_pGLTFTexturesAndBuffers) @@ -346,7 +330,7 @@ void SPD_Renderer::UnloadScene() // OnRender // //-------------------------------------------------------------------------------------- -void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) +void SPDRenderer::OnRender(State *pState, SwapChain *pSwapChain) { // Timing values // @@ -355,8 +339,8 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) // Let our resource managers do some house keeping // - m_ConstantBufferRing.OnBeginFrame(); - m_GPUTimer.OnBeginFrame(gpuTicksPerSecond, &m_TimeStamps); + m_constantBufferRing.OnBeginFrame(); + m_GPUTimer.OnBeginFrame(gpuTicksPerSecond, &m_timeStamps); // Sets the perFrame data (Camera and lights data), override as necessary and set them as constant buffers -------------- // @@ -395,7 +379,7 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) // command buffer calls // - ID3D12GraphicsCommandList2* pCmdLst1 = m_CommandListRing.GetNewCommandList(); + ID3D12GraphicsCommandList2* pCmdLst1 = m_commandListRing.GetNewCommandList(); m_GPUTimer.GetTimeStamp(pCmdLst1, "Begin Frame"); @@ -417,7 +401,7 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) // Render to shadow map atlas for spot lights ------------------------------------------ // - if (m_gltfDepth && pPerFrame != NULL) + if (m_pGltfDepth && pPerFrame != NULL) { uint32_t shadowMapIndex = 0; for (uint32_t i = 0; i < pPerFrame->lightCount; i++) @@ -433,10 +417,10 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) SetViewportAndScissor(pCmdLst1, viewportOffsetsX[i] * viewportWidth, viewportOffsetsY[i] * viewportHeight, viewportWidth, viewportHeight); pCmdLst1->OMSetRenderTargets(0, NULL, true, &m_ShadowMapDSV.GetCPU()); - GltfDepthPass::per_frame *cbDepthPerFrame = m_gltfDepth->SetPerFrameConstants(); + GltfDepthPass::per_frame *cbDepthPerFrame = m_pGltfDepth->SetPerFrameConstants(); cbDepthPerFrame->mViewProj = pPerFrame->lights[i].mLightViewProj; - m_gltfDepth->Draw(pCmdLst1); + m_pGltfDepth->Draw(pCmdLst1); m_GPUTimer.GetTimeStamp(pCmdLst1, "Shadow map"); shadowMapIndex++; @@ -448,7 +432,7 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) pCmdLst1->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_shadowMap.GetResource(), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)); pCmdLst1->RSSetViewports(1, &m_viewPort); - pCmdLst1->RSSetScissorRects(1, &m_RectScissor); + pCmdLst1->RSSetScissorRects(1, &m_rectScissor); pCmdLst1->OMSetRenderTargets(1, &m_HDRRTVMSAA.GetCPU(), true, &m_depthBufferDSV.GetCPU()); if (pPerFrame != NULL) @@ -479,19 +463,19 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) // Render scene to color buffer // - if (m_gltfPBR && pPerFrame != NULL) + if (m_pGltfPBR && pPerFrame != NULL) { //set per frame constant buffer values - m_gltfPBR->Draw(pCmdLst1, &m_ShadowMapSRV); + m_pGltfPBR->Draw(pCmdLst1, &m_ShadowMapSRV); } // draw object's bounding boxes // - if (m_gltfBBox && pPerFrame != NULL) + if (m_pGltfBBox && pPerFrame != NULL) { if (pState->bDrawBoundingBoxes) { - m_gltfBBox->Draw(pCmdLst1, pPerFrame->mCameraViewProj); + m_pGltfBBox->Draw(pCmdLst1, pPerFrame->mCameraViewProj); m_GPUTimer.GetTimeStamp(pCmdLst1, "Bounding Box"); } @@ -549,19 +533,15 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) { case Downsampler::PS: m_PSDownsampler.Draw(pCmdLst1); - m_PSDownsampler.Gui(); + m_PSDownsampler.GUI(&pState->downsamplerImGUISlice); break; - case Downsampler::Multipass_CS: + case Downsampler::MultipassCS: m_CSDownsampler.Draw(pCmdLst1); - m_CSDownsampler.Gui(); - break; - case Downsampler::SPD_CS: - m_SPD_Versions.Dispatch(pCmdLst1, pState->spdVersion, pState->spdPacked); - m_SPD_Versions.Gui(pState->spdVersion, pState->spdPacked); + m_CSDownsampler.GUI(&pState->downsamplerImGUISlice); break; - case Downsampler::SPD_CS_Linear_Sampler: - m_SPD_Versions.DispatchLinearSamplerVersion(pCmdLst1, pState->spdVersion, pState->spdPacked); - m_SPD_Versions.GuiLinearSamplerVersion(pState->spdVersion, pState->spdPacked); + case Downsampler::SPDCS: + m_SPDVersions.Dispatch(pCmdLst1, pState->spdLoad, pState->spdWaveOps, pState->spdPacked); + m_SPDVersions.GUI(pState->spdLoad, pState->spdWaveOps, pState->spdPacked, &pState->downsamplerImGUISlice); break; } @@ -576,15 +556,15 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) // pSwapChain->WaitForSwapChain(); m_pDevice->GPUFlush(); - m_CommandListRing.OnBeginFrame(); + m_commandListRing.OnBeginFrame(); - ID3D12GraphicsCommandList* pCmdLst2 = m_CommandListRing.GetNewCommandList(); + ID3D12GraphicsCommandList* pCmdLst2 = m_commandListRing.GetNewCommandList(); // Tonemapping ------------------------------------------------------------------------ // { pCmdLst2->RSSetViewports(1, &m_viewPort); - pCmdLst2->RSSetScissorRects(1, &m_RectScissor); + pCmdLst2->RSSetScissorRects(1, &m_rectScissor); pCmdLst2->OMSetRenderTargets(1, pSwapChain->GetCurrentBackBufferRTV(), true, NULL); m_toneMapping.Draw(pCmdLst2, &m_HDRSRV, pState->exposure, pState->toneMapper); @@ -595,10 +575,10 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) // { pCmdLst2->RSSetViewports(1, &m_viewPort); - pCmdLst2->RSSetScissorRects(1, &m_RectScissor); + pCmdLst2->RSSetScissorRects(1, &m_rectScissor); pCmdLst2->OMSetRenderTargets(1, pSwapChain->GetCurrentBackBufferRTV(), true, NULL); - m_ImGUI.Draw(pCmdLst2); + m_imGUI.Draw(pCmdLst2); m_GPUTimer.GetTimeStamp(pCmdLst2, "ImGUI Rendering"); } diff --git a/sample/src/DX12/SPD_Renderer.h b/sample/src/DX12/SPDRenderer.h similarity index 64% rename from sample/src/DX12/SPD_Renderer.h rename to sample/src/DX12/SPDRenderer.h index a5251aa..08bdb1a 100644 --- a/sample/src/DX12/SPD_Renderer.h +++ b/sample/src/DX12/SPDRenderer.h @@ -19,7 +19,7 @@ #pragma once #include "CSDownsampler.h" -#include "SPD_Versions.h" +#include "SPDVersions.h" #include "PSDownsampler.h" static const int backBufferCount = 3; @@ -35,12 +35,12 @@ using namespace CAULDRON_DX12; enum class Downsampler { PS, - Multipass_CS, - SPD_CS, - SPD_CS_Linear_Sampler, + MultipassCS, + SPDCS, + SPDCSLinearSampler, }; -class SPD_Renderer +class SPDRenderer { public: struct Spotlight @@ -52,27 +52,34 @@ class SPD_Renderer struct State { - float time; - Camera camera; + float time; + Camera camera; - float exposure; - float iblFactor; - float emmisiveFactor; + float exposure; + float iblFactor; + float emmisiveFactor; - int toneMapper; - int skyDomeType; - bool bDrawBoundingBoxes; + int toneMapper; + int skyDomeType; + bool bDrawBoundingBoxes; - uint32_t spotlightCount; - Spotlight spotlight[4]; - bool bDrawLightFrustum; + bool useTAA; - Downsampler downsampler; - SPD_Version spdVersion; - SPD_Packed spdPacked; + bool isBenchmarking; + + uint32_t spotlightCount; + Spotlight spotlight[4]; + bool bDrawLightFrustum; + + Downsampler downsampler; + SPDLoad spdLoad; + SPDWaveOps spdWaveOps; + SPDPacked spdPacked; + + int downsamplerImGUISlice; }; - void OnCreate(Device* pDevice, SwapChain *pSwapChain); + void OnCreate(Device *pDevice, SwapChain *pSwapChain); void OnDestroy(); void OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height); @@ -81,32 +88,32 @@ class SPD_Renderer int LoadScene(GLTFCommon *pGLTFCommon, int stage = 0); void UnloadScene(); - const std::vector &GetTimingValues() { return m_TimeStamps; } + const std::vector &GetTimingValues() { return m_timeStamps; } void OnRender(State *pState, SwapChain *pSwapChain); private: - Device *m_pDevice; + Device *m_pDevice = nullptr; - uint32_t m_Width; - uint32_t m_Height; + uint32_t m_width; + uint32_t m_height; D3D12_VIEWPORT m_viewPort; - D3D12_RECT m_RectScissor; + D3D12_RECT m_rectScissor; // Initialize helper classes ResourceViewHeaps m_resourceViewHeaps; - UploadHeap m_UploadHeap; - DynamicBufferRing m_ConstantBufferRing; - StaticBufferPool m_VidMemBufferPool; - CommandListRing m_CommandListRing; + UploadHeap m_uploadHeap; + DynamicBufferRing m_constantBufferRing; + StaticBufferPool m_vidMemBufferPool; + CommandListRing m_commandListRing; GPUTimestamps m_GPUTimer; //gltf passes - GLTFTexturesAndBuffers *m_pGLTFTexturesAndBuffers; - GltfPbrPass *m_gltfPBR; - GltfDepthPass *m_gltfDepth; - GltfBBoxPass *m_gltfBBox; + GLTFTexturesAndBuffers *m_pGLTFTexturesAndBuffers = nullptr; + GltfPbrPass *m_pGltfPBR = nullptr; + GltfDepthPass *m_pGltfDepth = nullptr; + GltfBBoxPass *m_pGltfBBox = nullptr; // effects SkyDome m_skyDome; @@ -116,10 +123,10 @@ class SPD_Renderer // Downsampling PSDownsampler m_PSDownsampler; CSDownsampler m_CSDownsampler; - SPD_Versions m_SPD_Versions; + SPDVersions m_SPDVersions; // GUI - ImGUI m_ImGUI; + ImGUI m_imGUI; // Temporary render targets @@ -139,14 +146,13 @@ class SPD_Renderer // Resolved RT Texture m_HDR; CBV_SRV_UAV m_HDRSRV; + CBV_SRV_UAV m_HDRUAV; RTV m_HDRRTV; // widgets Wireframe m_wireframe; WireframeBox m_wireframeBox; - std::vector m_TimeStamps; - - DXGI_FORMAT m_format; + std::vector m_timeStamps; }; diff --git a/sample/src/DX12/SPDSample.cpp b/sample/src/DX12/SPDSample.cpp new file mode 100644 index 0000000..813682f --- /dev/null +++ b/sample/src/DX12/SPDSample.cpp @@ -0,0 +1,496 @@ +// SPDSample +// +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "stdafx.h" + +#include "SPDSample.h" + +SPDSample::SPDSample(LPCSTR name) : FrameworkWindows(name) +{ + m_lastFrameTime = MillisecondsNow(); + m_time = 0; + m_bPlay = true; + + m_pGltfLoader = NULL; +} + +//-------------------------------------------------------------------------------------- +// +// OnParseCommandLine +// +//-------------------------------------------------------------------------------------- +void SPDSample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t *pWidth, uint32_t *pHeight, bool *pbFullScreen) +{ + // set some default values + *pWidth = 1920; + *pHeight = 1080; + *pbFullScreen = false; + m_state.isBenchmarking = true; + m_isCpuValidationLayerEnabled = false; + m_isGpuValidationLayerEnabled = false; + m_stablePowerState = false; + + //read globals + auto process = [&](json jData) + { + *pWidth = jData.value("width", *pWidth); + *pHeight = jData.value("height", *pHeight); + *pbFullScreen = jData.value("fullScreen", *pbFullScreen); + m_isCpuValidationLayerEnabled = jData.value("CpuValidationLayerEnabled", m_isCpuValidationLayerEnabled); + m_isGpuValidationLayerEnabled = jData.value("GpuValidationLayerEnabled", m_isGpuValidationLayerEnabled); + m_state.isBenchmarking = jData.value("benchmark", m_state.isBenchmarking); + m_state.downsampler = jData.value("downsampler", m_state.downsampler); + m_state.spdLoad = jData.value("spdLoad", m_state.spdLoad); + m_state.spdWaveOps = jData.value("spdWaveOps", m_state.spdWaveOps); + m_state.spdPacked = jData.value("spdPacked", m_state.spdPacked); + }; + + //read json globals from commandline + // + try + { + if (strlen(lpCmdLine) > 0) + { + auto j3 = json::parse(lpCmdLine); + process(j3); + } + } + catch (json::parse_error) + { + Trace("Error parsing commandline\n"); + exit(0); + } + + // read config file (and override values from commandline if so) + // + { + std::ifstream f("SpdSample.json"); + if (!f) + { + MessageBox(NULL, "Config file not found!\n", "Cauldron Panic!", MB_ICONERROR); + exit(0); + } + + try + { + f >> m_jsonConfigFile; + } + catch (json::parse_error) + { + MessageBox(NULL, "Error parsing GLTFSample.json!\n", "Cauldron Panic!", MB_ICONERROR); + exit(0); + } + } + + json globals = m_jsonConfigFile["globals"]; + process(globals); +} + + +//-------------------------------------------------------------------------------------- +// +// OnCreate +// +//-------------------------------------------------------------------------------------- +void SPDSample::OnCreate(HWND hWnd) +{ + // Create Device + // + m_device.OnCreate("FFX_SPD_Sample", "Cauldron", m_isCpuValidationLayerEnabled, m_isGpuValidationLayerEnabled, hWnd); + m_device.CreatePipelineCache(); + + //init the shader compiler + InitDirectXCompiler(); + CreateShaderCache(); + + // Create Swapchain + // + uint32_t dwNumberOfBackBuffers = 2; + m_swapChain.OnCreate(&m_device, dwNumberOfBackBuffers, hWnd); + + // Create a instance of the renderer and initialize it, we need to do that for each GPU + // + m_pNode = new SPDRenderer(); + m_pNode->OnCreate(&m_device, &m_swapChain); + + // init GUI (non gfx stuff) + // + ImGUI_Init((void *)hWnd); + + // Init Camera, looking at the origin + // + m_roll = 0.0f; + m_pitch = 0.0f; + m_distance = 3.5f; + + // init GUI state + m_state.toneMapper = 0; + m_state.skyDomeType = 0; + m_state.exposure = 1.0f; + m_state.iblFactor = 2.0f; + m_state.emmisiveFactor = 1.0f; + m_state.bDrawLightFrustum = false; + m_state.bDrawBoundingBoxes = false; + m_state.camera.LookAt(m_roll, m_pitch, m_distance, XMVectorSet(0, 0, 0, 0)); + + m_state.spotlightCount = 1; + + m_state.spotlight[0].intensity = 10.0f; + m_state.spotlight[0].color = XMVectorSet(1.0f, 1.0f, 1.0f, 0.0f); + m_state.spotlight[0].light.SetFov(XM_PI / 2.0f, 1024, 1024, 0.1f, 100.0f); + m_state.spotlight[0].light.LookAt(XM_PI / 2.0f, 0.58f, 3.5f, XMVectorSet(0, 0, 0, 0)); + + m_state.downsamplerImGUISlice = 0; +} + +//-------------------------------------------------------------------------------------- +// +// OnDestroy +// +//-------------------------------------------------------------------------------------- +void SPDSample::OnDestroy() +{ + ImGUI_Shutdown(); + + m_device.GPUFlush(); + + // Fullscreen state should always be false before exiting the app. + m_swapChain.SetFullScreen(false); + + m_pNode->UnloadScene(); + m_pNode->OnDestroyWindowSizeDependentResources(); + m_pNode->OnDestroy(); + + delete m_pNode; + + m_swapChain.OnDestroyWindowSizeDependentResources(); + m_swapChain.OnDestroy(); + + //shut down the shader compiler + DestroyShaderCache(&m_device); + + if (m_pGltfLoader) + { + delete m_pGltfLoader; + m_pGltfLoader = NULL; + } + + m_device.OnDestroy(); +} + +//-------------------------------------------------------------------------------------- +// +// OnEvent +// +//-------------------------------------------------------------------------------------- +bool SPDSample::OnEvent(MSG msg) +{ + if (ImGUI_WndProcHandler(msg.hwnd, msg.message, msg.wParam, msg.lParam)) + return true; + + return true; +} + +//-------------------------------------------------------------------------------------- +// +// SetFullScreen +// +//-------------------------------------------------------------------------------------- +void SPDSample::SetFullScreen(bool fullscreen) +{ + m_device.GPUFlush(); + + m_swapChain.SetFullScreen(fullscreen); +} + +//-------------------------------------------------------------------------------------- +// +// OnResize +// +//-------------------------------------------------------------------------------------- +void SPDSample::OnResize(uint32_t width, uint32_t height) +{ + if (m_Width != width || m_Height != height) + { + // Flush GPU + // + m_device.GPUFlush(); + + // If resizing but no minimizing + // + if (m_Width > 0 && m_Height > 0) + { + if (m_pNode != NULL) + { + m_pNode->OnDestroyWindowSizeDependentResources(); + } + m_swapChain.OnDestroyWindowSizeDependentResources(); + } + + m_Width = width; + m_Height = height; + + // if resizing but not minimizing the recreate it with the new size + // + if (m_Width > 0 && m_Height > 0) + { + m_swapChain.OnCreateWindowSizeDependentResources(m_Width, m_Height, false, DISPLAYMODE_SDR); + if (m_pNode != NULL) + { + m_pNode->OnCreateWindowSizeDependentResources(&m_swapChain, m_Width, m_Height); + } + } + } + m_state.camera.SetFov(XM_PI / 4, m_Width, m_Height, 0.1f, 1000.0f); +} + +void SPDSample::BuildUI() +{ + ImGuiStyle& style = ImGui::GetStyle(); + style.FrameBorderSize = 1.0f; + + bool opened = true; + ImGui::Begin("Stats", &opened); + + if (ImGui::CollapsingHeader("Info", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Text("Resolution : %ix%i", m_Width, m_Height); + } + + if (ImGui::CollapsingHeader("Downsampler", ImGuiTreeNodeFlags_DefaultOpen)) + { + // Downsample settings + const char* downsampleItemNames[] = + { + "PS", + "Multipass CS", + "SPD CS", + }; + ImGui::Combo("Downsampler Options", (int*)&m_state.downsampler, downsampleItemNames, _countof(downsampleItemNames)); + + // SPD Version + // Use load or linear sample to fetch data from source texture + const char* spdLoadItemNames[] = + { + "Load", + "Linear Sampler", + }; + ImGui::Combo("SPD Load / Linear Sampler", (int*)&m_state.spdLoad, spdLoadItemNames, _countof(spdLoadItemNames)); + + // enable the usage of wave operations + const char* spdWaveOpsItemNames[] = + { + "No-WaveOps", + "WaveOps", + }; + ImGui::Combo("SPD Version", (int*)&m_state.spdWaveOps, spdWaveOpsItemNames, _countof(spdWaveOpsItemNames)); + + // Non-Packed or Packed Version + const char* spdPackedItemNames[] = + { + "Non-Packed", + "Packed", + }; + ImGui::Combo("SPD Non-Packed / Packed Version", (int*)&m_state.spdPacked, spdPackedItemNames, _countof(spdPackedItemNames)); + } + + if (ImGui::CollapsingHeader("Lighting", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::SliderFloat("exposure", &m_state.exposure, 0.0f, 2.0f); + ImGui::SliderFloat("emmisive", &m_state.emmisiveFactor, 1.0f, 1000.0f, NULL, 1.0f); + ImGui::SliderFloat("iblFactor", &m_state.iblFactor, 0.0f, 2.0f); + } + + const char* tonemappers[] = { "Timothy", "DX11DSK", "Reinhard", "Uncharted2Tonemap", "ACES", "No tonemapper" }; + ImGui::Combo("tone mapper", &m_state.toneMapper, tonemappers, _countof(tonemappers)); + + const char* skyDomeType[] = { "Procedural Sky", "cubemap", "Simple clear" }; + ImGui::Combo("SkyDome", &m_state.skyDomeType, skyDomeType, _countof(skyDomeType)); + + const char* cameraControl[] = { "WASD", "Orbit" }; + static int cameraControlSelected = 1; + ImGui::Combo("Camera", &cameraControlSelected, cameraControl, _countof(cameraControl)); + + if (ImGui::CollapsingHeader("Profiler", ImGuiTreeNodeFlags_DefaultOpen)) + { + std::vector timeStamps = m_pNode->GetTimingValues(); + if (timeStamps.size() > 0) + { + for (uint32_t i = 1; i < timeStamps.size(); i++) + { + ImGui::Text("%-22s: %7.1f", timeStamps[i].m_label.c_str(), timeStamps[i].m_microseconds); + } + + //scrolling data and average computing + static float values[128]; + values[127] = timeStamps.back().m_microseconds; + for (uint32_t i = 0; i < 128 - 1; i++) { values[i] = values[i + 1]; } + ImGui::PlotLines("", values, 128, 0, "GPU frame time (us)", 0.0f, 30000.0f, ImVec2(0, 80)); + + } + } + + ImGui::End(); + + // If the mouse was not used by the GUI then it's for the camera + // + ImGuiIO& io = ImGui::GetIO(); + if (io.WantCaptureMouse == false) + { + if ((io.KeyCtrl == false) && (io.MouseDown[0] == true)) + { + m_roll -= io.MouseDelta.x / 100.f; + m_pitch += io.MouseDelta.y / 100.f; + } + + // Choose camera movement depending on setting + // + + if (cameraControlSelected == 0) + { + // WASD + // + m_state.camera.UpdateCameraWASD(m_roll, m_pitch, io.KeysDown, io.DeltaTime); + } + else if (cameraControlSelected == 1) + { + // Orbiting + // + m_distance -= (float)io.MouseWheel / 3.0f; + m_distance = std::max(m_distance, 0.1f); + + bool panning = (io.KeyCtrl == true) && (io.MouseDown[0] == true); + + m_state.camera.UpdateCameraPolar(m_roll, m_pitch, panning ? -io.MouseDelta.x / 100.0f : 0.0f, panning ? io.MouseDelta.y / 100.0f : 0.0f, m_distance); + } + } +} + +//-------------------------------------------------------------------------------------- +// +// OnRender, updates the state from the UI, animates, transforms and renders the scene +// +//-------------------------------------------------------------------------------------- +void SPDSample::OnRender() +{ + // Get timings + // + double timeNow = MillisecondsNow(); + float deltaTime = (float)(timeNow - m_lastFrameTime); + m_lastFrameTime = timeNow; + + // Build UI and set the scene state. Note that the rendering of the UI happens later. + // + ImGUI_UpdateIO(); + ImGui::NewFrame(); + + static int loadingStage = 0; + if (loadingStage >= 0) + { + // LoadScene needs to be called a number of times, the scene is not fully loaded until it returns -1 + // This is done so we can display a progress bar when the scene is loading + if (m_pGltfLoader == NULL) + { + m_pGltfLoader = new GLTFCommon(); + m_pGltfLoader->Load("..\\media\\DamagedHelmet\\glTF\\", "DamagedHelmet.gltf"); + loadingStage = 0; + + // set benchmarking state if enabled + // + json scene = m_jsonConfigFile["scenes"][0]; + + // set default camera + // + json camera = scene["camera"]; + XMVECTOR from = GetVector(GetElementJsonArray(camera, "defaultFrom", { 0.0, 0.0, 10.0 })); + XMVECTOR to = GetVector(GetElementJsonArray(camera, "defaultTo", { 0.0, 0.0, 0.0 })); + m_state.camera.LookAt(from, to); + m_roll = m_state.camera.GetYaw(); + m_pitch = m_state.camera.GetPitch(); + m_distance = m_state.camera.GetDistance(); + + // set benchmarking state if enabled + + if (m_state.isBenchmarking) + { + BenchmarkConfig(scene["BenchmarkSettings"], -1, m_pGltfLoader); + } + } + loadingStage = m_pNode->LoadScene(m_pGltfLoader, loadingStage); + if (loadingStage == 0) + { + m_time = 0; + m_loadingScene = false; + } + } + else if (m_pGltfLoader && m_state.isBenchmarking) + { + // benchmarking takes control of the time, and exits the app when the animation is done + std::vector timeStamps = m_pNode->GetTimingValues(); + + const std::string* pFilename; + m_time = BenchmarkLoop(timeStamps, &m_state.camera, &pFilename); + + BuildUI(); + } + else + { + BuildUI(); + } + + // Set animation time + // + if (m_bPlay) + { + m_time += (float)m_deltaTime / 1000.0f; + } + + // Animate and transform the scene + // + if (m_pGltfLoader) + { + m_pGltfLoader->SetAnimationTime(0, m_time); + m_pGltfLoader->TransformScene(0, XMMatrixIdentity()); + } + + m_state.time = m_time; + + // Do Render frame using AFR + // + m_pNode->OnRender(&m_state, &m_swapChain); + + m_swapChain.Present(); +} + + +//-------------------------------------------------------------------------------------- +// +// WinMain +// +//-------------------------------------------------------------------------------------- +int WINAPI WinMain(HINSTANCE hInstance, + HINSTANCE hPrevInstance, + LPSTR lpCmdLine, + int nCmdShow) +{ + LPCSTR Name = "FFX SPD SampleDX12 v2.0"; + + // create new DX sample + return RunFramework(hInstance, lpCmdLine, nCmdShow, new SPDSample(Name)); +} diff --git a/sample/src/VK/SPD_Sample.h b/sample/src/DX12/SPDSample.h similarity index 57% rename from sample/src/VK/SPD_Sample.h rename to sample/src/DX12/SPDSample.h index f23f7e3..92ed977 100644 --- a/sample/src/VK/SPD_Sample.h +++ b/sample/src/DX12/SPDSample.h @@ -18,7 +18,7 @@ // THE SOFTWARE. #pragma once -#include "SPD_Renderer.h" +#include "SPDRenderer.h" // // This is the main class, it manages the state of the sample and does all the high level work without touching the GPU directly. @@ -35,33 +35,45 @@ // - uses the SampleRenderer to update all the state to the GPU and do the rendering // -class SPD_Sample : public FrameworkWindows +class SPDSample : public FrameworkWindows { public: - SPD_Sample(LPCSTR name); + SPDSample(LPCSTR name); + void OnParseCommandLine(LPSTR lpCmdLine, uint32_t *pWidth, uint32_t *pHeight, bool *pbFullScreen); void OnCreate(HWND hWnd); void OnDestroy(); + void BuildUI(); void OnRender(); bool OnEvent(MSG msg); void OnResize(uint32_t Width, uint32_t Height); void SetFullScreen(bool fullscreen); private: - Device m_device; - SwapChain m_swapChain; + Device m_device; + SwapChain m_swapChain; + + GLTFCommon *m_pGltfLoader = nullptr; + bool m_loadingScene = false; + + SPDRenderer *m_pNode = nullptr; + SPDRenderer::State m_state; + + float m_distance; + float m_roll; + float m_pitch; + + float m_time; // WallClock in seconds. + double m_deltaTime; // The elapsed time in milliseconds since the previous frame. + double m_lastFrameTime; - GLTFCommon *m_pGltfLoader; + // json config file + json m_jsonConfigFile; + std::vector m_sceneNames; + int m_activeScene; + int m_activeCamera; + bool m_stablePowerState; + bool m_isCpuValidationLayerEnabled; + bool m_isGpuValidationLayerEnabled; - SPD_Renderer *m_Node; - SPD_Renderer::State m_state; - - float m_distance; - float m_roll; - float m_pitch; - - float m_time; // WallClock in seconds. - double m_lastFrameTime; - float m_timeStep = 0; - - bool m_bPlay; -}; \ No newline at end of file + bool m_bPlay; +}; diff --git a/sample/src/DX12/SPDVersions.cpp b/sample/src/DX12/SPDVersions.cpp new file mode 100644 index 0000000..7c85709 --- /dev/null +++ b/sample/src/DX12/SPDVersions.cpp @@ -0,0 +1,194 @@ +// SPDSample +// +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "stdafx.h" +#include "base/DynamicBufferRing.h" +#include "base/StaticBufferPool.h" +#include "base/UploadHeap.h" +#include "base/Texture.h" +#include "base/Helper.h" +#include "SPDVersions.h" + +namespace CAULDRON_DX12 +{ + void SPDVersions::OnCreate( + Device *pDevice, + UploadHeap *pUploadHeap, + ResourceViewHeaps *pResourceViewHeaps, + DynamicBufferRing *pConstantBufferRing + ) + { + m_pDevice = pDevice; + + m_spd_WaveOps_NonPacked.OnCreate(pDevice, pUploadHeap, pResourceViewHeaps, pConstantBufferRing, SPDLoad::SPDLoad, SPDWaveOps::SPDWaveOps, SPDPacked::SPDNonPacked); + m_spd_WaveOps_Packed.OnCreate(pDevice, pUploadHeap, pResourceViewHeaps, pConstantBufferRing, SPDLoad::SPDLoad, SPDWaveOps::SPDWaveOps, SPDPacked::SPDPacked); + m_spd_No_WaveOps_NonPacked.OnCreate(pDevice, pUploadHeap, pResourceViewHeaps, pConstantBufferRing, SPDLoad::SPDLoad, SPDWaveOps::SPDNoWaveOps, SPDPacked::SPDNonPacked); + m_spd_No_WaveOps_Packed.OnCreate(pDevice, pUploadHeap, pResourceViewHeaps, pConstantBufferRing, SPDLoad::SPDLoad, SPDWaveOps::SPDNoWaveOps, SPDPacked::SPDPacked); + + m_spd_WaveOps_NonPacked_Linear_Sampler.OnCreate(pDevice, pUploadHeap, pResourceViewHeaps, pConstantBufferRing, SPDLoad::SPDLinearSampler, SPDWaveOps::SPDWaveOps, SPDPacked::SPDNonPacked); + m_spd_WaveOps_Packed_Linear_Sampler.OnCreate(pDevice, pUploadHeap, pResourceViewHeaps, pConstantBufferRing, SPDLoad::SPDLinearSampler, SPDWaveOps::SPDWaveOps, SPDPacked::SPDPacked); + m_spd_No_WaveOps_NonPacked_Linear_Sampler.OnCreate(pDevice, pUploadHeap, pResourceViewHeaps, pConstantBufferRing, SPDLoad::SPDLinearSampler, SPDWaveOps::SPDNoWaveOps, SPDPacked::SPDNonPacked); + m_spd_No_WaveOps_Packed_Linear_Sampler.OnCreate(pDevice, pUploadHeap, pResourceViewHeaps, pConstantBufferRing, SPDLoad::SPDLinearSampler, SPDWaveOps::SPDNoWaveOps, SPDPacked::SPDPacked); + } + + uint32_t SPDVersions::GetMaxMIPLevelCount(uint32_t Width, uint32_t Height) + { + uint32_t resolution = max(Width, Height); + return (static_cast(min(floor(log2(resolution)), 12))); + } + + void SPDVersions::OnDestroy() + { + m_spd_WaveOps_NonPacked.OnDestroy(); + m_spd_WaveOps_Packed.OnDestroy(); + m_spd_No_WaveOps_NonPacked.OnDestroy(); + m_spd_No_WaveOps_Packed.OnDestroy(); + + m_spd_WaveOps_NonPacked_Linear_Sampler.OnDestroy(); + m_spd_WaveOps_Packed_Linear_Sampler.OnDestroy(); + m_spd_No_WaveOps_NonPacked_Linear_Sampler.OnDestroy(); + m_spd_No_WaveOps_Packed_Linear_Sampler.OnDestroy(); + } + + void SPDVersions::Dispatch(ID3D12GraphicsCommandList2 *pCommandList, SPDLoad spdLoad, SPDWaveOps spdWaveOps, SPDPacked spdPacked) + { + switch (spdLoad) + { + case SPDLoad::SPDLoad: + { + switch (spdWaveOps) + { + case SPDWaveOps::SPDWaveOps: + switch (spdPacked) + { + case SPDPacked::SPDNonPacked: + m_spd_WaveOps_NonPacked.Draw(pCommandList); + break; + case SPDPacked::SPDPacked: + m_spd_WaveOps_Packed.Draw(pCommandList); + break; + } + break; + case SPDWaveOps::SPDNoWaveOps: + switch (spdPacked) + { + case SPDPacked::SPDNonPacked: + m_spd_No_WaveOps_NonPacked.Draw(pCommandList); + break; + case SPDPacked::SPDPacked: + m_spd_No_WaveOps_Packed.Draw(pCommandList); + break; + } + } + break; + } + case SPDLoad::SPDLinearSampler: + { + switch (spdWaveOps) + { + case SPDWaveOps::SPDWaveOps: + switch (spdPacked) + { + case SPDPacked::SPDNonPacked: + m_spd_WaveOps_NonPacked_Linear_Sampler.Draw(pCommandList); + break; + case SPDPacked::SPDPacked: + m_spd_WaveOps_Packed_Linear_Sampler.Draw(pCommandList); + break; + } + break; + case SPDWaveOps::SPDNoWaveOps: + switch (spdPacked) + { + case SPDPacked::SPDNonPacked: + m_spd_No_WaveOps_NonPacked_Linear_Sampler.Draw(pCommandList); + break; + case SPDPacked::SPDPacked: + m_spd_No_WaveOps_Packed_Linear_Sampler.Draw(pCommandList); + break; + } + } + break; + } + } + } + + void SPDVersions::GUI(SPDLoad spdLoad, SPDWaveOps spdWaveOps, SPDPacked spdPacked, int *pSlice) + { + switch (spdLoad) + { + case SPDLoad::SPDLoad: + { + switch (spdWaveOps) + { + case SPDWaveOps::SPDWaveOps: + switch (spdPacked) + { + case SPDPacked::SPDNonPacked: + m_spd_WaveOps_NonPacked.GUI(pSlice); + break; + case SPDPacked::SPDPacked: + m_spd_WaveOps_Packed.GUI(pSlice); + break; + } + break; + case SPDWaveOps::SPDNoWaveOps: + switch (spdPacked) + { + case SPDPacked::SPDNonPacked: + m_spd_No_WaveOps_NonPacked.GUI(pSlice); + break; + case SPDPacked::SPDPacked: + m_spd_No_WaveOps_Packed.GUI(pSlice); + break; + } + } + break; + } + case SPDLoad::SPDLinearSampler: + { + switch (spdWaveOps) + { + case SPDWaveOps::SPDWaveOps: + switch (spdPacked) + { + case SPDPacked::SPDNonPacked: + m_spd_WaveOps_NonPacked_Linear_Sampler.GUI(pSlice); + break; + case SPDPacked::SPDPacked: + m_spd_WaveOps_Packed_Linear_Sampler.GUI(pSlice); + break; + } + break; + case SPDWaveOps::SPDNoWaveOps: + switch (spdPacked) + { + case SPDPacked::SPDNonPacked: + m_spd_No_WaveOps_NonPacked_Linear_Sampler.GUI(pSlice); + break; + case SPDPacked::SPDPacked: + m_spd_No_WaveOps_Packed_Linear_Sampler.GUI(pSlice); + break; + } + } + break; + } + } + } +} \ No newline at end of file diff --git a/sample/src/DX12/SPDVersions.h b/sample/src/DX12/SPDVersions.h new file mode 100644 index 0000000..52ba9aa --- /dev/null +++ b/sample/src/DX12/SPDVersions.h @@ -0,0 +1,56 @@ +// SPDSample +// +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once +#include "SPDCS.h" + +namespace CAULDRON_DX12 +{ + class SPDVersions + { + public: + void OnCreate( + Device *pDevice, + UploadHeap *pUploadHeap, + ResourceViewHeaps *pResourceViewHeaps, + DynamicBufferRing *pConstantBufferRing + ); + void OnDestroy(); + + void Dispatch(ID3D12GraphicsCommandList2 *pCommandList, SPDLoad spdLoad, SPDWaveOps spdWaveOps, SPDPacked spdPacked); + void GUI(SPDLoad spdLoad, SPDWaveOps spdWaveOps, SPDPacked spdPacked, int *pSlice); + + private: + Device *m_pDevice = nullptr; + + SPDCS m_spd_WaveOps_NonPacked; + SPDCS m_spd_No_WaveOps_NonPacked; + + SPDCS m_spd_WaveOps_Packed; + SPDCS m_spd_No_WaveOps_Packed; + + SPDCS m_spd_WaveOps_NonPacked_Linear_Sampler; + SPDCS m_spd_No_WaveOps_NonPacked_Linear_Sampler; + + SPDCS m_spd_WaveOps_Packed_Linear_Sampler; + SPDCS m_spd_No_WaveOps_Packed_Linear_Sampler; + + uint32_t GetMaxMIPLevelCount(uint32_t Width, uint32_t Height); + }; +} \ No newline at end of file diff --git a/sample/src/DX12/SPD_CS.cpp b/sample/src/DX12/SPD_CS.cpp deleted file mode 100644 index 1eb92c2..0000000 --- a/sample/src/DX12/SPD_CS.cpp +++ /dev/null @@ -1,268 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include "stdafx.h" -#include "base\Device.h" -#include "base\DynamicBufferRing.h" -#include "base\StaticBufferPool.h" -#include "base\UploadHeap.h" -#include "base\Texture.h" -#include "base\Imgui.h" -#include "base\Helper.h" -#include "Base\ShaderCompilerHelper.h" - -#include "SPD_CS.h" - -namespace CAULDRON_DX12 -{ - void SPD_CS::OnCreate( - Device *pDevice, - ResourceViewHeaps *pResourceViewHeaps, - DynamicBufferRing *pConstantBufferRing, - DXGI_FORMAT outFormat, - bool fallback, - bool packed - ) - { - m_pDevice = pDevice; - m_pResourceViewHeaps = pResourceViewHeaps; - m_pConstantBufferRing = pConstantBufferRing; - m_outFormat = outFormat; - - D3D12_SHADER_BYTECODE shaderByteCode = {}; - DefineList defines; - - if (fallback) { - defines["SPD_NO_WAVE_OPERATIONS"] = std::to_string(1); - } - if (packed) { - defines["A_HALF"] = std::to_string(1); - defines["SPD_PACKED_ONLY"] = std::to_string(1); - } - - CompileShaderFromFile("SPD_Integration.hlsl", &defines, "main", "cs_6_0", 0, &shaderByteCode); - - // Create root signature - // - { - CD3DX12_DESCRIPTOR_RANGE DescRange[4]; - CD3DX12_ROOT_PARAMETER RTSlot[4]; - - // we'll always have a constant buffer - int parameterCount = 0; - DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0); - RTSlot[parameterCount++].InitAsConstantBufferView(0, 0, D3D12_SHADER_VISIBILITY_ALL); - - // SRV table - DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0); - RTSlot[parameterCount++].InitAsDescriptorTable(1, &DescRange[1], D3D12_SHADER_VISIBILITY_ALL); - - // UAV table + global counter buffer - DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 1); - RTSlot[parameterCount++].InitAsDescriptorTable(1, &DescRange[2], D3D12_SHADER_VISIBILITY_ALL); - - // output mips - DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, SPD_MAX_MIP_LEVELS, 2); - RTSlot[parameterCount++].InitAsDescriptorTable(1, &DescRange[3], D3D12_SHADER_VISIBILITY_ALL); - - // when using AMD shader intrinsics - /*if (!fallback) - { - //*** add AMD Intrinsic Resource *** - DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, AGS_DX12_SHADER_INSTRINSICS_SPACE_ID); // u0 - RTSlot[parameterCount++].InitAsDescriptorTable(1, &DescRange[4], D3D12_SHADER_VISIBILITY_ALL); - }*/ - - // the root signature contains 4 slots to be used - CD3DX12_ROOT_SIGNATURE_DESC descRootSignature = CD3DX12_ROOT_SIGNATURE_DESC(); - descRootSignature.NumParameters = parameterCount; - descRootSignature.pParameters = RTSlot; - descRootSignature.NumStaticSamplers = 0; - descRootSignature.pStaticSamplers = NULL; - - // deny uneccessary access to certain pipeline stages - descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; - - ID3DBlob *pOutBlob, *pErrorBlob = NULL; - ThrowIfFailed(D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob)); - ThrowIfFailed( - pDevice->GetDevice()->CreateRootSignature(0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS(&m_pRootSignature)) - ); - SetName(m_pRootSignature, std::string("PostProcCS::") + "SPD_CS"); - - pOutBlob->Release(); - if (pErrorBlob) - pErrorBlob->Release(); - } - - { - D3D12_COMPUTE_PIPELINE_STATE_DESC descPso = {}; - descPso.CS = shaderByteCode; - descPso.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; - descPso.pRootSignature = m_pRootSignature; - descPso.NodeMask = 0; - - ThrowIfFailed(pDevice->GetDevice()->CreateComputePipelineState(&descPso, IID_PPV_ARGS(&m_pPipeline))); - } - - // Allocate descriptors for the mip chain - // - m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_constBuffer); - m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_sourceSRV); - m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(SPD_MAX_MIP_LEVELS, m_UAV); - for (int i = 0; i < SPD_MAX_MIP_LEVELS; i++) - { - m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_SRV[i]); - } - - m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_globalCounter); - } - - void SPD_CS::OnCreateWindowSizeDependentResources(uint32_t Width, uint32_t Height, Texture *pInput, int mipCount) - { - m_Width = Width; - m_Height = Height; - m_mipCount = mipCount; - m_pInput = pInput; - - m_result.InitRenderTarget( - m_pDevice, - "SPD_CS::m_result", - &CD3DX12_RESOURCE_DESC::Tex2D( - m_outFormat, - m_Width >> 1, - m_Height >> 1, - 1, - mipCount, - 1, - 0, - D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), - D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); - - // Create views for the mip chain - // - - // source - // - pInput->CreateSRV(0, &m_sourceSRV, 0); - - // destination - // - for (int i = 0; i < m_mipCount; i++) - { - m_result.CreateUAV(i, m_UAV, i); - m_result.CreateSRV(0, &m_SRV[i], i); - } - - m_globalCounterBuffer.InitBuffer(m_pDevice, "SPD_CS::m_globalCounterBuffer", - &CD3DX12_RESOURCE_DESC::Buffer(sizeof(uint32_t), D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), - sizeof(uint32_t), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - m_globalCounterBuffer.CreateBufferUAV(0, NULL, &m_globalCounter); - } - - void SPD_CS::OnDestroyWindowSizeDependentResources() - { - m_globalCounterBuffer.OnDestroy(); - m_result.OnDestroy(); - } - - void SPD_CS::OnDestroy() - { - if (m_pPipeline != NULL) - { - m_pPipeline->Release(); - m_pPipeline = NULL; - } - - if (m_pRootSignature != NULL) - { - m_pRootSignature->Release(); - m_pRootSignature = NULL; - } - } - - void SPD_CS::Draw(ID3D12GraphicsCommandList2* pCommandList) - { - UserMarker marker(pCommandList, "SPD_CS"); - - // downsample - uint32_t dispatchX = (((m_Width + 63) >> (6))); - uint32_t dispatchY = (((m_Height + 63) >> (6))); - uint32_t dispatchZ = 1; - - D3D12_GPU_VIRTUAL_ADDRESS cbHandle; - uint32_t* pConstMem; - m_pConstantBufferRing->AllocConstantBuffer(sizeof(cbDownscale), (void**)&pConstMem, &cbHandle); - cbDownscale constants; - constants.mips = m_mipCount; - constants.numWorkGroups = dispatchX * dispatchY * dispatchZ; - memcpy(pConstMem, &constants, sizeof(cbDownscale)); - - D3D12_RANGE range = { 0, sizeof(uint32_t) }; - - // Bind Descriptor heaps and the root signature - // - ID3D12DescriptorHeap *pDescriptorHeaps[] = { m_pResourceViewHeaps->GetCBV_SRV_UAVHeap(), m_pResourceViewHeaps->GetSamplerHeap() }; - pCommandList->SetDescriptorHeaps(2, pDescriptorHeaps); - pCommandList->SetComputeRootSignature(m_pRootSignature); - - // Bind Descriptor the descriptor sets - // - int params = 0; - pCommandList->SetComputeRootConstantBufferView(params++, cbHandle); - pCommandList->SetComputeRootDescriptorTable(params++, m_sourceSRV.GetGPU()); - pCommandList->SetComputeRootDescriptorTable(params++, m_globalCounter.GetGPU()); - pCommandList->SetComputeRootDescriptorTable(params++, m_UAV[0].GetGPU()); - - // Bind Pipeline - // - pCommandList->SetPipelineState(m_pPipeline); - - // set counter to 0 - pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_globalCounterBuffer.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST, 0)); - - D3D12_WRITEBUFFERIMMEDIATE_PARAMETER pParams = { m_globalCounterBuffer.GetResource()->GetGPUVirtualAddress(), 0 }; - pCommandList->WriteBufferImmediate(1, &pParams, NULL); - - D3D12_RESOURCE_BARRIER resourceBarriers[2] = { - CD3DX12_RESOURCE_BARRIER::Transition(m_globalCounterBuffer.GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0), - CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS) - }; - pCommandList->ResourceBarrier(2, resourceBarriers); - - // Dispatch - // - pCommandList->Dispatch(dispatchX, dispatchY, dispatchZ); - pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE)); - } - - void SPD_CS::Gui() - { - bool opened = true; - ImGui::Begin("Downsample", &opened); - - ImGui::Image((ImTextureID)&m_sourceSRV, ImVec2(320, 180)); - for (int i = 0; i < m_mipCount; i++) - { - ImGui::Image((ImTextureID)&m_SRV[i], ImVec2(320, 180)); - } - - ImGui::End(); - } -} \ No newline at end of file diff --git a/sample/src/DX12/SPD_CS.h b/sample/src/DX12/SPD_CS.h deleted file mode 100644 index 674c5c4..0000000 --- a/sample/src/DX12/SPD_CS.h +++ /dev/null @@ -1,73 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -#pragma once - -#include "Base/DynamicBufferRing.h" -#include "Base/Texture.h" - -namespace CAULDRON_DX12 -{ -#define SPD_MAX_MIP_LEVELS 12 - - class SPD_CS - { - public: - void OnCreate(Device *pDevice, ResourceViewHeaps *pResourceViewHeaps, DynamicBufferRing *pConstantBufferRing, DXGI_FORMAT outFormat, bool fallback, bool packed); - void OnDestroy(); - - void OnCreateWindowSizeDependentResources(uint32_t Width, uint32_t Height, Texture *pInput, int mips); - void OnDestroyWindowSizeDependentResources(); - - void Draw(ID3D12GraphicsCommandList2* pCommandList); - Texture *GetTexture() { return &m_result; } - CBV_SRV_UAV GetTextureView(int i) { if (i == 0) { return m_sourceSRV; } else { return m_SRV[i]; } } - void Gui(); - - struct cbDownscale - { - int mips; - int numWorkGroups; - int padding[2]; - }; - - private: - Device* m_pDevice = nullptr; - DXGI_FORMAT m_outFormat; - - Texture *m_pInput; - Texture m_result; - - CBV_SRV_UAV m_constBuffer; // dimension - CBV_SRV_UAV m_UAV[SPD_MAX_MIP_LEVELS]; //dest - CBV_SRV_UAV m_SRV[SPD_MAX_MIP_LEVELS]; // for display of mips using imGUI - CBV_SRV_UAV m_sourceSRV; //src - - CBV_SRV_UAV m_globalCounter; - Texture m_globalCounterBuffer; - - ResourceViewHeaps *m_pResourceViewHeaps; - DynamicBufferRing *m_pConstantBufferRing; - ID3D12RootSignature *m_pRootSignature; - ID3D12PipelineState *m_pPipeline = NULL; - - uint32_t m_Width; - uint32_t m_Height; - int m_mipCount; - }; -} \ No newline at end of file diff --git a/sample/src/DX12/SPD_CS_Linear_Sampler.cpp b/sample/src/DX12/SPD_CS_Linear_Sampler.cpp deleted file mode 100644 index 6e99db3..0000000 --- a/sample/src/DX12/SPD_CS_Linear_Sampler.cpp +++ /dev/null @@ -1,285 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include "stdafx.h" -#include "base\Device.h" -#include "base\DynamicBufferRing.h" -#include "base\StaticBufferPool.h" -#include "base\UploadHeap.h" -#include "base\Texture.h" -#include "base\Imgui.h" -#include "base\Helper.h" -#include "Base\ShaderCompilerHelper.h" - -#include "SPD_CS_Linear_Sampler.h" - -namespace CAULDRON_DX12 -{ - void SPD_CS_Linear_Sampler::OnCreate( - Device *pDevice, - ResourceViewHeaps *pResourceViewHeaps, - DynamicBufferRing *pConstantBufferRing, - DXGI_FORMAT outFormat, - bool fallback, - bool packed - ) - { - m_pDevice = pDevice; - m_pResourceViewHeaps = pResourceViewHeaps; - m_pConstantBufferRing = pConstantBufferRing; - m_outFormat = outFormat; - - D3D12_SHADER_BYTECODE shaderByteCode = {}; - DefineList defines; - - if (fallback) { - defines["SPD_NO_WAVE_OPERATIONS"] = std::to_string(1); - } - if (packed) { - defines["A_HALF"] = std::to_string(1); - defines["SPD_PACKED_ONLY"] = std::to_string(1); - } - - CompileShaderFromFile("SPD_Integration_Linear_Sampler.hlsl", &defines, "main", "cs_6_0", 0, &shaderByteCode); - - // Create root signature - // - { - CD3DX12_DESCRIPTOR_RANGE DescRange[4]; - CD3DX12_ROOT_PARAMETER RTSlot[4]; - - // we'll always have a constant buffer - int parameterCount = 0; - DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0); - RTSlot[parameterCount++].InitAsConstantBufferView(0, 0, D3D12_SHADER_VISIBILITY_ALL); - - // SRV table - DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0); - RTSlot[parameterCount++].InitAsDescriptorTable(1, &DescRange[1], D3D12_SHADER_VISIBILITY_ALL); - - // UAV table + global counter buffer (== also an UAV)? - DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 1); - RTSlot[parameterCount++].InitAsDescriptorTable(1, &DescRange[2], D3D12_SHADER_VISIBILITY_ALL); - - // output mips - DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, SPD_MAX_MIP_LEVELS, 2); - RTSlot[parameterCount++].InitAsDescriptorTable(1, &DescRange[3], D3D12_SHADER_VISIBILITY_ALL); - - // when using AMD shader intrinsics - /*if (!fallback) - { - //*** add AMD Intrinsic Resource *** - DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, AGS_DX12_SHADER_INSTRINSICS_SPACE_ID); // u0 - RTSlot[parameterCount++].InitAsDescriptorTable(1, &DescRange[4], D3D12_SHADER_VISIBILITY_ALL); - }*/ - - D3D12_STATIC_SAMPLER_DESC SamplerDesc = {}; - SamplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; - SamplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - SamplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - SamplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - SamplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; - SamplerDesc.BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; - SamplerDesc.MinLOD = 0.0f; - SamplerDesc.MaxLOD = D3D12_FLOAT32_MAX; - SamplerDesc.MipLODBias = 0; - SamplerDesc.MaxAnisotropy = 1; - SamplerDesc.ShaderRegister = 0; - SamplerDesc.RegisterSpace = 0; - SamplerDesc.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - - // the root signature contains 4 slots to be used - CD3DX12_ROOT_SIGNATURE_DESC descRootSignature = CD3DX12_ROOT_SIGNATURE_DESC(); - descRootSignature.NumParameters = parameterCount; - descRootSignature.pParameters = RTSlot; - descRootSignature.NumStaticSamplers = 1; // numStaticSamplers; - descRootSignature.pStaticSamplers = &SamplerDesc; //pStaticSamplers; - - // deny uneccessary access to certain pipeline stages - descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; - - ID3DBlob *pOutBlob, *pErrorBlob = NULL; - ThrowIfFailed(D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &pOutBlob, &pErrorBlob)); - ThrowIfFailed( - pDevice->GetDevice()->CreateRootSignature(0, pOutBlob->GetBufferPointer(), pOutBlob->GetBufferSize(), IID_PPV_ARGS(&m_pRootSignature)) - ); - SetName(m_pRootSignature, std::string("PostProcCS::") + "SPD_CS"); - - pOutBlob->Release(); - if (pErrorBlob) - pErrorBlob->Release(); - } - - { - D3D12_COMPUTE_PIPELINE_STATE_DESC descPso = {}; - descPso.CS = shaderByteCode; - descPso.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; - descPso.pRootSignature = m_pRootSignature; - descPso.NodeMask = 0; - - ThrowIfFailed(pDevice->GetDevice()->CreateComputePipelineState(&descPso, IID_PPV_ARGS(&m_pPipeline))); - } - - // Allocate descriptors for the mip chain - // - m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_constBuffer); - m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_sourceSRV); - m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(SPD_MAX_MIP_LEVELS, m_UAV); - for (int i = 0; i < SPD_MAX_MIP_LEVELS; i++) - { - m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_SRV[i]); - } - - m_pResourceViewHeaps->AllocCBV_SRV_UAVDescriptor(1, &m_globalCounter); - } - - void SPD_CS_Linear_Sampler::OnCreateWindowSizeDependentResources(uint32_t Width, uint32_t Height, Texture *pInput, int mipCount) - { - m_Width = Width; - m_Height = Height; - m_mipCount = mipCount; - m_pInput = pInput; - - m_result.InitRenderTarget( - m_pDevice, - "SPD_CS::m_result", - &CD3DX12_RESOURCE_DESC::Tex2D( - m_outFormat, - m_Width >> 1, - m_Height >> 1, - 1, - mipCount, - 1, - 0, - D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), - D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); - - // Create views for the mip chain - // - - // source - // - pInput->CreateSRV(0, &m_sourceSRV, 0); - - // destination - // - for (int i = 0; i < m_mipCount; i++) - { - m_result.CreateUAV(i, m_UAV, i); - m_result.CreateSRV(0, &m_SRV[i], i); - } - - m_globalCounterBuffer.InitBuffer(m_pDevice, "SPD_CS::m_globalCounterBuffer", - &CD3DX12_RESOURCE_DESC::Buffer(sizeof(uint32_t), D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), - sizeof(uint32_t), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - m_globalCounterBuffer.CreateBufferUAV(0, NULL, &m_globalCounter); - } - - void SPD_CS_Linear_Sampler::OnDestroyWindowSizeDependentResources() - { - m_globalCounterBuffer.OnDestroy(); - m_result.OnDestroy(); - } - - void SPD_CS_Linear_Sampler::OnDestroy() - { - if (m_pPipeline != NULL) - { - m_pPipeline->Release(); - m_pPipeline = NULL; - } - - if (m_pRootSignature != NULL) - { - m_pRootSignature->Release(); - m_pRootSignature = NULL; - } - } - - void SPD_CS_Linear_Sampler::Draw(ID3D12GraphicsCommandList2* pCommandList) - { - UserMarker marker(pCommandList, "SPD_CS_Linear_Sampler"); - - // downsample - uint32_t dispatchX = (((m_Width + 63) >> (6))); - uint32_t dispatchY = (((m_Height + 63) >> (6))); - uint32_t dispatchZ = 1; - - D3D12_GPU_VIRTUAL_ADDRESS cbHandle; - uint32_t* pConstMem; - m_pConstantBufferRing->AllocConstantBuffer(sizeof(cbDownscale), (void**)&pConstMem, &cbHandle); - cbDownscale constants; - constants.mips = m_mipCount; - constants.numWorkGroups = dispatchX * dispatchY * dispatchZ; - constants.invInputSize[0] = 1.0f / m_Width; - constants.invInputSize[1] = 1.0f / m_Height; - memcpy(pConstMem, &constants, sizeof(cbDownscale)); - - D3D12_RANGE range = { 0, sizeof(uint32_t) }; - - // Bind Descriptor heaps and the root signature - // - ID3D12DescriptorHeap *pDescriptorHeaps[] = { m_pResourceViewHeaps->GetCBV_SRV_UAVHeap(), m_pResourceViewHeaps->GetSamplerHeap() }; - pCommandList->SetDescriptorHeaps(2, pDescriptorHeaps); - pCommandList->SetComputeRootSignature(m_pRootSignature); - - // Bind Descriptor the descriptor sets - // - int params = 0; - pCommandList->SetComputeRootConstantBufferView(params++, cbHandle); - pCommandList->SetComputeRootDescriptorTable(params++, m_sourceSRV.GetGPU()); - pCommandList->SetComputeRootDescriptorTable(params++, m_globalCounter.GetGPU()); - pCommandList->SetComputeRootDescriptorTable(params++, m_UAV[0].GetGPU()); - - // Bind Pipeline - // - pCommandList->SetPipelineState(m_pPipeline); - - // set counter to 0 - pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_globalCounterBuffer.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST, 0)); - - D3D12_WRITEBUFFERIMMEDIATE_PARAMETER pParams = { m_globalCounterBuffer.GetResource()->GetGPUVirtualAddress(), 0 }; - pCommandList->WriteBufferImmediate(1, &pParams, NULL); - - D3D12_RESOURCE_BARRIER resourceBarriers[2] = { - CD3DX12_RESOURCE_BARRIER::Transition(m_globalCounterBuffer.GetResource(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0), - CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS) - }; - pCommandList->ResourceBarrier(2, resourceBarriers); - - // Dispatch - // - pCommandList->Dispatch(dispatchX, dispatchY, dispatchZ); - pCommandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_result.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE | D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE)); - } - - void SPD_CS_Linear_Sampler::Gui() - { - bool opened = true; - ImGui::Begin("Downsample", &opened); - - ImGui::Image((ImTextureID)&m_sourceSRV, ImVec2(320, 180)); - for (int i = 0; i < m_mipCount; i++) - { - ImGui::Image((ImTextureID)&m_SRV[i], ImVec2(320, 180)); - } - - ImGui::End(); - } -} \ No newline at end of file diff --git a/sample/src/DX12/SPD_CS_Linear_Sampler.h b/sample/src/DX12/SPD_CS_Linear_Sampler.h deleted file mode 100644 index 1dc1a3c..0000000 --- a/sample/src/DX12/SPD_CS_Linear_Sampler.h +++ /dev/null @@ -1,73 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -#pragma once - -#include "Base/DynamicBufferRing.h" -#include "Base/Texture.h" - -namespace CAULDRON_DX12 -{ -#define SPD_MAX_MIP_LEVELS 12 - - class SPD_CS_Linear_Sampler - { - public: - void OnCreate(Device *pDevice, ResourceViewHeaps *pResourceViewHeaps, DynamicBufferRing *pConstantBufferRing, DXGI_FORMAT outFormat, bool fallback, bool packed); - void OnDestroy(); - - void OnCreateWindowSizeDependentResources(uint32_t Width, uint32_t Height, Texture *pInput, int mips); - void OnDestroyWindowSizeDependentResources(); - - void Draw(ID3D12GraphicsCommandList2* pCommandList); - Texture *GetTexture() { return &m_result; } - CBV_SRV_UAV GetTextureView(int i) { if (i == 0) { return m_sourceSRV; } else { return m_SRV[i]; } } - void Gui(); - - struct cbDownscale - { - int mips; - int numWorkGroups; - float invInputSize[2]; - }; - - private: - Device* m_pDevice = nullptr; - DXGI_FORMAT m_outFormat; - - Texture *m_pInput; - Texture m_result; - - CBV_SRV_UAV m_constBuffer; // dimension - CBV_SRV_UAV m_UAV[SPD_MAX_MIP_LEVELS]; //dest - CBV_SRV_UAV m_SRV[SPD_MAX_MIP_LEVELS]; // for display of mips using imGUI - CBV_SRV_UAV m_sourceSRV; //src - - CBV_SRV_UAV m_globalCounter; - Texture m_globalCounterBuffer; - - ResourceViewHeaps *m_pResourceViewHeaps; - DynamicBufferRing *m_pConstantBufferRing; - ID3D12RootSignature *m_pRootSignature; - ID3D12PipelineState *m_pPipeline = NULL; - - uint32_t m_Width; - uint32_t m_Height; - int m_mipCount; - }; -} \ No newline at end of file diff --git a/sample/src/DX12/SPD_Integration.hlsl b/sample/src/DX12/SPD_Integration.hlsl deleted file mode 100644 index 9f57ed3..0000000 --- a/sample/src/DX12/SPD_Integration.hlsl +++ /dev/null @@ -1,120 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -// when using amd shader intrinscs -// #include "ags_shader_intrinsics_dx12.h" - -//-------------------------------------------------------------------------------------- -// Constant Buffer -//-------------------------------------------------------------------------------------- -cbuffer spdConstants : register(b0) -{ - uint mips; - uint numWorkGroups; -} - -//-------------------------------------------------------------------------------------- -// Texture definitions -//-------------------------------------------------------------------------------------- -/*globallycoherent*/ RWTexture2D imgDst[12] : register(u2); -Texture2D imgSrc : register(t0); - -//-------------------------------------------------------------------------------------- -// Buffer definitions - global atomic counter -//-------------------------------------------------------------------------------------- -struct globalAtomicBuffer -{ - uint counter; -}; -globallycoherent RWStructuredBuffer globalAtomic :register(u1); - -#define A_GPU -#define A_HLSL - -#include "ffx_a.h" - -groupshared AU1 spd_counter; - -#ifndef SPD_PACKED_ONLY -groupshared AF1 spd_intermediateR[16][16]; -groupshared AF1 spd_intermediateG[16][16]; -groupshared AF1 spd_intermediateB[16][16]; -groupshared AF1 spd_intermediateA[16][16]; -AF4 SpdLoadSourceImage(AF2 tex){return imgSrc[tex];} -AF4 SpdLoad(ASU2 tex){return imgDst[5][tex];} -void SpdStore(ASU2 pix, AF4 outValue, AU1 index){imgDst[index][pix] = outValue;} -void SpdIncreaseAtomicCounter(){InterlockedAdd(globalAtomic[0].counter, 1, spd_counter);} -AU1 SpdGetAtomicCounter(){return spd_counter;} -AF4 SpdLoadIntermediate(AU1 x, AU1 y){ - return AF4( - spd_intermediateR[x][y], - spd_intermediateG[x][y], - spd_intermediateB[x][y], - spd_intermediateA[x][y]);} -void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value){ - spd_intermediateR[x][y] = value.x; - spd_intermediateG[x][y] = value.y; - spd_intermediateB[x][y] = value.z; - spd_intermediateA[x][y] = value.w;} -AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3){return (v0+v1+v2+v3)*0.25;} -#endif - -// define fetch and store functions Packed -#ifdef A_HALF -groupshared AH2 spd_intermediateRG[16][16]; -groupshared AH2 spd_intermediateBA[16][16]; -AH4 SpdLoadSourceImageH(AF2 tex){return AH4(imgSrc[tex]);} -AH4 SpdLoadH(ASU2 p){return AH4(imgDst[5][p]);} -void SpdStoreH(ASU2 p, AH4 value, AU1 mip){imgDst[mip][p] = AF4(value);} -void SpdIncreaseAtomicCounter(){InterlockedAdd(globalAtomic[0].counter, 1, spd_counter);} -AU1 SpdGetAtomicCounter(){return spd_counter;} -AH4 SpdLoadIntermediateH(AU1 x, AU1 y){ - return AH4( - spd_intermediateRG[x][y].x, - spd_intermediateRG[x][y].y, - spd_intermediateBA[x][y].x, - spd_intermediateBA[x][y].y);} -void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value){ - spd_intermediateRG[x][y] = value.xy; - spd_intermediateBA[x][y] = value.zw;} -AH4 SpdReduce4H(AH4 v0, AH4 v1, AH4 v2, AH4 v3){return (v0+v1+v2+v3)*AH1(0.25);} -#endif - -#include "ffx_spd.h" - -// Main function -//-------------------------------------------------------------------------------------- -//-------------------------------------------------------------------------------------- -[numthreads(256, 1, 1)] -void main(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) -{ -#ifndef A_HALF - SpdDownsample( - AU2(WorkGroupId.xy), - AU1(LocalThreadIndex), - AU1(mips), - AU1(numWorkGroups)); -#else - SpdDownsampleH( - AU2(WorkGroupId.xy), - AU1(LocalThreadIndex), - AU1(mips), - AU1(numWorkGroups)); -#endif - } \ No newline at end of file diff --git a/sample/src/DX12/SPD_Integration_Linear_Sampler.hlsl b/sample/src/DX12/SPD_Integration_Linear_Sampler.hlsl deleted file mode 100644 index ca9c59b..0000000 --- a/sample/src/DX12/SPD_Integration_Linear_Sampler.hlsl +++ /dev/null @@ -1,133 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -// when using amd shader intrinscs -// #include "ags_shader_intrinsics_dx12.h" - -//-------------------------------------------------------------------------------------- -// Constant Buffer -//-------------------------------------------------------------------------------------- -cbuffer spdConstants : register(b0) -{ - uint mips; - uint numWorkGroups; - // [SAMPLER] - float2 invInputSize; -} - -//-------------------------------------------------------------------------------------- -// Texture definitions -//-------------------------------------------------------------------------------------- -/*globallycoherent*/ RWTexture2D imgDst[12] : register(u2); -Texture2D imgSrc : register(t0); -SamplerState srcSampler : register(s0); - -//-------------------------------------------------------------------------------------- -// Buffer definitions - global atomic counter -//-------------------------------------------------------------------------------------- -struct globalAtomicBuffer -{ - uint counter; -}; -globallycoherent RWStructuredBuffer globalAtomic :register(u1); - -#define A_GPU -#define A_HLSL - -#include "ffx_a.h" - -groupshared AU1 spd_counter; - -#ifndef SPD_PACKED_ONLY -groupshared AF1 spd_intermediateR[16][16]; -groupshared AF1 spd_intermediateG[16][16]; -groupshared AF1 spd_intermediateB[16][16]; -groupshared AF1 spd_intermediateA[16][16]; -//AF4 DSLoadSourceImage(AF2 tex){return imgSrc[tex];} -// [SAMPLER] -AF4 SpdLoadSourceImage(ASU2 p){ - AF2 textureCoord = p * invInputSize + invInputSize; - return imgSrc.SampleLevel(srcSampler, textureCoord, 0); -} -AF4 SpdLoad(ASU2 tex){return imgDst[5][tex];} -void SpdStore(ASU2 pix, AF4 outValue, AU1 index){imgDst[index][pix] = outValue;} -void SpdIncreaseAtomicCounter(){InterlockedAdd(globalAtomic[0].counter, 1, spd_counter);} -AU1 SpdGetAtomicCounter(){return spd_counter;} -AF4 SpdLoadIntermediate(AU1 x, AU1 y){ - return AF4( - spd_intermediateR[x][y], - spd_intermediateG[x][y], - spd_intermediateB[x][y], - spd_intermediateA[x][y]);} -void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value){ - spd_intermediateR[x][y] = value.x; - spd_intermediateG[x][y] = value.y; - spd_intermediateB[x][y] = value.z; - spd_intermediateA[x][y] = value.w;} -AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3){return (v0+v1+v2+v3)*0.25;} -#endif - -// define fetch and store functions Packed -#ifdef A_HALF -groupshared AH2 spd_intermediateRG[16][16]; -groupshared AH2 spd_intermediateBA[16][16]; -AH4 SpdLoadSourceImageH(ASU2 p){ - AF2 textureCoord = p * invInputSize + invInputSize; - return AH4(imgSrc.SampleLevel(srcSampler, textureCoord, 0)); -} -AH4 SpdLoadH(ASU2 p){return AH4(imgDst[5][p]);} -void SpdStoreH(ASU2 p, AH4 value, AU1 mip){imgDst[mip][p] = AF4(value);} -void SpdIncreaseAtomicCounter(){InterlockedAdd(globalAtomic[0].counter, 1, spd_counter);} -AU1 SpdGetAtomicCounter(){return spd_counter;} -AH4 SpdLoadIntermediateH(AU1 x, AU1 y){ - return AH4( - spd_intermediateRG[x][y].x, - spd_intermediateRG[x][y].y, - spd_intermediateBA[x][y].x, - spd_intermediateBA[x][y].y);} -void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value){ - spd_intermediateRG[x][y] = value.xy; - spd_intermediateBA[x][y] = value.zw;} -AH4 SpdReduce4H(AH4 v0, AH4 v1, AH4 v2, AH4 v3){return (v0+v1+v2+v3)*AH1(0.25);} -#endif - -#define SPD_LINEAR_SAMPLER - -#include "ffx_spd.h" - -// Main function -//-------------------------------------------------------------------------------------- -//-------------------------------------------------------------------------------------- -[numthreads(256, 1, 1)] -void main(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) -{ -#ifndef A_HALF - SpdDownsample( - AU2(WorkGroupId.xy), - AU1(LocalThreadIndex), - AU1(mips), - AU1(numWorkGroups)); -#else - SpdDownsampleH( - AU2(WorkGroupId.xy), - AU1(LocalThreadIndex), - AU1(mips), - AU1(numWorkGroups)); -#endif - } \ No newline at end of file diff --git a/sample/src/DX12/SPD_Sample.cpp b/sample/src/DX12/SPD_Sample.cpp deleted file mode 100644 index 7f71f53..0000000 --- a/sample/src/DX12/SPD_Sample.cpp +++ /dev/null @@ -1,377 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include "stdafx.h" - -#include "SPD_Sample.h" - -const bool VALIDATION_ENABLED = true; - -SPD_Sample::SPD_Sample(LPCSTR name) : FrameworkWindows(name) -{ - m_lastFrameTime = MillisecondsNow(); - m_time = 0; - m_bPlay = true; - - m_pGltfLoader = NULL; -} - -//-------------------------------------------------------------------------------------- -// -// OnCreate -// -//-------------------------------------------------------------------------------------- -void SPD_Sample::OnCreate(HWND hWnd) -{ - // Create Device - // - m_device.OnCreate("FFX_SPD_Sample", "Cauldron", VALIDATION_ENABLED, hWnd); - m_device.CreatePipelineCache(); - - //init the shader compiler - CreateShaderCache(); - - // Create Swapchain - // - uint32_t dwNumberOfBackBuffers = 2; - m_swapChain.OnCreate(&m_device, dwNumberOfBackBuffers, hWnd); - - // Create a instance of the renderer and initialize it, we need to do that for each GPU - // - m_Node = new SPD_Renderer(); - m_Node->OnCreate(&m_device, &m_swapChain); - - // init GUI (non gfx stuff) - // - ImGUI_Init((void *)hWnd); - - // Init Camera, looking at the origin - // - m_roll = 0.0f; - m_pitch = 0.0f; - m_distance = 3.5f; - - // init GUI state - m_state.toneMapper = 0; - m_state.skyDomeType = 0; - m_state.exposure = 1.0f; - m_state.iblFactor = 2.0f; - m_state.emmisiveFactor = 1.0f; - m_state.bDrawLightFrustum = false; - m_state.bDrawBoundingBoxes = false; - m_state.camera.LookAt(m_roll, m_pitch, m_distance, XMVectorSet(0, 0, 0, 0)); - - m_state.spotlightCount = 1; - - m_state.spotlight[0].intensity = 10.0f; - m_state.spotlight[0].color = XMVectorSet(1.0f, 1.0f, 1.0f, 0.0f); - m_state.spotlight[0].light.SetFov(XM_PI / 2.0f, 1024, 1024, 0.1f, 100.0f); - m_state.spotlight[0].light.LookAt(XM_PI / 2.0f, 0.58f, 3.5f, XMVectorSet(0, 0, 0, 0)); - - m_state.downsampler = Downsampler::SPD_CS; - m_state.spdVersion = SPD_Version::SPD_WaveOps; - m_state.spdPacked = SPD_Packed::SPD_Non_Packed; -} - -//-------------------------------------------------------------------------------------- -// -// OnDestroy -// -//-------------------------------------------------------------------------------------- -void SPD_Sample::OnDestroy() -{ - ImGUI_Shutdown(); - - m_device.GPUFlush(); - - // Fullscreen state should always be false before exiting the app. - m_swapChain.SetFullScreen(false); - - m_Node->UnloadScene(); - m_Node->OnDestroyWindowSizeDependentResources(); - m_Node->OnDestroy(); - - delete m_Node; - - m_swapChain.OnDestroyWindowSizeDependentResources(); - m_swapChain.OnDestroy(); - - //shut down the shader compiler - DestroyShaderCache(&m_device); - - if (m_pGltfLoader) - { - delete m_pGltfLoader; - m_pGltfLoader = NULL; - } - - m_device.OnDestroy(); -} - -//-------------------------------------------------------------------------------------- -// -// OnEvent -// -//-------------------------------------------------------------------------------------- -bool SPD_Sample::OnEvent(MSG msg) -{ - if (ImGUI_WndProcHandler(msg.hwnd, msg.message, msg.wParam, msg.lParam)) - return true; - - return true; -} - -//-------------------------------------------------------------------------------------- -// -// SetFullScreen -// -//-------------------------------------------------------------------------------------- -void SPD_Sample::SetFullScreen(bool fullscreen) -{ - m_device.GPUFlush(); - - m_swapChain.SetFullScreen(fullscreen); -} - -//-------------------------------------------------------------------------------------- -// -// OnResize -// -//-------------------------------------------------------------------------------------- -void SPD_Sample::OnResize(uint32_t width, uint32_t height) -{ - if (m_Width != width || m_Height != height) - { - // Flush GPU - // - m_device.GPUFlush(); - - // If resizing but no minimizing - // - if (m_Width > 0 && m_Height > 0) - { - m_Node->OnDestroyWindowSizeDependentResources(); - m_swapChain.OnDestroyWindowSizeDependentResources(); - } - - m_Width = width; - m_Height = height; - - // if resizing but not minimizing the recreate it with the new size - // - if (m_Width > 0 && m_Height > 0) - { - m_swapChain.OnCreateWindowSizeDependentResources(m_Width, m_Height, false, DISPLAYMODE_SDR); - m_Node->OnCreateWindowSizeDependentResources(&m_swapChain, m_Width, m_Height); - } - } - m_state.camera.SetFov(XM_PI / 4, m_Width, m_Height, 0.1f, 1000.0f); -} - -//-------------------------------------------------------------------------------------- -// -// OnRender, updates the state from the UI, animates, transforms and renders the scene -// -//-------------------------------------------------------------------------------------- -void SPD_Sample::OnRender() -{ - // Get timings - // - double timeNow = MillisecondsNow(); - m_deltaTime = timeNow - m_lastFrameTime; - m_lastFrameTime = timeNow; - - // Build UI and set the scene state. Note that the rendering of the UI happens later. - // - ImGUI_UpdateIO(); - ImGui::NewFrame(); - - static int loadingStage = 0; - if (loadingStage >= 0) - { - // LoadScene needs to be called a number of times, the scene is not fully loaded until it returns -1 - // This is done so we can display a progress bar when the scene is loading - if (m_pGltfLoader == NULL) - { - m_pGltfLoader = new GLTFCommon(); - m_pGltfLoader->Load("..\\media\\DamagedHelmet\\glTF\\", "DamagedHelmet.gltf"); - loadingStage = 0; - } - loadingStage = m_Node->LoadScene(m_pGltfLoader, loadingStage); - } - else - { - ImGuiStyle& style = ImGui::GetStyle(); - style.FrameBorderSize = 1.0f; - - bool opened = true; - ImGui::Begin("Stats", &opened); - - if (ImGui::CollapsingHeader("Info", ImGuiTreeNodeFlags_DefaultOpen)) - { - ImGui::Text("Resolution : %ix%i", m_Width, m_Height); - } - - if (ImGui::CollapsingHeader("Downsampler", ImGuiTreeNodeFlags_DefaultOpen)) - { - // Downsample settings - const char* downsampleItemNames[] = - { - "PS", - "Multipass CS", - "SPD CS", - "SPD CS Linear Sampler", - }; - ImGui::Combo("Downsampler Options", (int*)&m_state.downsampler, downsampleItemNames, _countof(downsampleItemNames)); - - // Downsample settings - const char* spdVersionItemNames[] = - { - "No-WaveOps", - "WaveOps", - }; - ImGui::Combo("SPD Version", (int*)&m_state.spdVersion, spdVersionItemNames, _countof(spdVersionItemNames)); - - // NON-PACKED or PACKED Version - const char* spdPackedNames[] = - { - "Non-Packed", - "Packed", - }; - ImGui::Combo("SPD Non-Packed / Packed Version", (int*)&m_state.spdPacked, spdPackedNames, _countof(spdPackedNames)); - } - - if (ImGui::CollapsingHeader("Lighting", ImGuiTreeNodeFlags_DefaultOpen)) - { - ImGui::SliderFloat("exposure", &m_state.exposure, 0.0f, 2.0f); - ImGui::SliderFloat("emmisive", &m_state.emmisiveFactor, 1.0f, 1000.0f, NULL, 1.0f); - ImGui::SliderFloat("iblFactor", &m_state.iblFactor, 0.0f, 2.0f); - } - - const char * tonemappers[] = { "Timothy", "DX11DSK", "Reinhard", "Uncharted2Tonemap", "ACES", "No tonemapper" }; - ImGui::Combo("tone mapper", &m_state.toneMapper, tonemappers, _countof(tonemappers)); - - const char * skyDomeType[] = { "Procedural Sky", "cubemap", "Simple clear" }; - ImGui::Combo("SkyDome", &m_state.skyDomeType, skyDomeType, _countof(skyDomeType)); - - const char * cameraControl[] = { "WASD", "Orbit" }; - static int cameraControlSelected = 1; - ImGui::Combo("Camera", &cameraControlSelected, cameraControl, _countof(cameraControl)); - - if (ImGui::CollapsingHeader("Profiler", ImGuiTreeNodeFlags_DefaultOpen)) - { - std::vector timeStamps = m_Node->GetTimingValues(); - if (timeStamps.size() > 0) - { - for (uint32_t i = 1; i < timeStamps.size(); i++) - { - float DeltaTime = ((float)(timeStamps[i].m_microseconds - timeStamps[i - 1].m_microseconds)); - ImGui::Text("%-17s: %7.1f us", timeStamps[i].m_label.c_str(), DeltaTime); - } - - //scrolling data and average computing - static float values[128]; - values[127] = (float)(timeStamps.back().m_microseconds - timeStamps.front().m_microseconds); - float average = values[0]; - for (uint32_t i = 0; i < 128 - 1; i++) { values[i] = values[i + 1]; average += values[i]; } - average /= 128; - - ImGui::Text("%-17s: %7.1f us", "TotalGPUTime", average); - ImGui::PlotLines("", values, 128, 0, "", 0.0f, 30000.0f, ImVec2(0, 80)); - } - } - - ImGui::End(); - - // If the mouse was not used by the GUI then it's for the camera - // - ImGuiIO& io = ImGui::GetIO(); - if (io.WantCaptureMouse == false) - { - if ((io.KeyCtrl == false) && (io.MouseDown[0] == true)) - { - m_roll -= io.MouseDelta.x / 100.f; - m_pitch += io.MouseDelta.y / 100.f; - } - - // Choose camera movement depending on setting - // - - if (cameraControlSelected == 0) - { - // WASD - // - m_state.camera.UpdateCameraWASD(m_roll, m_pitch, io.KeysDown, io.DeltaTime); - } - else if (cameraControlSelected == 1) - { - // Orbiting - // - m_distance -= (float)io.MouseWheel / 3.0f; - m_distance = std::max(m_distance, 0.1f); - - bool panning = (io.KeyCtrl == true) && (io.MouseDown[0] == true); - - m_state.camera.UpdateCameraPolar(m_roll, m_pitch, panning ? -io.MouseDelta.x / 100.0f : 0.0f, panning ? io.MouseDelta.y / 100.0f : 0.0f, m_distance ); - } - } - } - - // Set animation time - // - if (m_bPlay) - { - m_time += (float)m_deltaTime / 1000.0f; - } - - // Animate and transform the scene - // - if (m_pGltfLoader) - { - m_pGltfLoader->SetAnimationTime(0, m_time); - m_pGltfLoader->TransformScene(0, XMMatrixIdentity()); - } - - m_state.time = m_time; - - // Do Render frame using AFR - // - m_Node->OnRender(&m_state, &m_swapChain); - - m_swapChain.Present(); -} - - -//-------------------------------------------------------------------------------------- -// -// WinMain -// -//-------------------------------------------------------------------------------------- -int WINAPI WinMain(HINSTANCE hInstance, - HINSTANCE hPrevInstance, - LPSTR lpCmdLine, - int nCmdShow) -{ - LPCSTR Name = "FFX SPD SampleDX12 v1.0"; - uint32_t Width = 1920; - uint32_t Height = 1080; - - // create new DX sample - return RunFramework(hInstance, lpCmdLine, nCmdShow, Width, Height, new SPD_Sample(Name)); -} diff --git a/sample/src/DX12/SPD_Versions.cpp b/sample/src/DX12/SPD_Versions.cpp deleted file mode 100644 index aba0ce8..0000000 --- a/sample/src/DX12/SPD_Versions.cpp +++ /dev/null @@ -1,206 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include "stdafx.h" -#include "base/DynamicBufferRing.h" -#include "base/StaticBufferPool.h" -#include "base/UploadHeap.h" -#include "base/Texture.h" -#include "base/Helper.h" -#include "SPD_Versions.h" - -namespace CAULDRON_DX12 -{ - void SPD_Versions::OnCreate( - Device *pDevice, - ResourceViewHeaps *pResourceViewHeaps, - DynamicBufferRing *pConstantBufferRing, - DXGI_FORMAT outFormat - ) - { - m_pDevice = pDevice; - - m_spd_WaveOps_NonPacked.OnCreate(pDevice, pResourceViewHeaps, pConstantBufferRing, outFormat, false, false); - m_spd_WaveOps_Packed.OnCreate(pDevice, pResourceViewHeaps, pConstantBufferRing, outFormat, false, true); - m_spd_No_WaveOps_NonPacked.OnCreate(pDevice, pResourceViewHeaps, pConstantBufferRing, outFormat, true, false); - m_spd_No_WaveOps_Packed.OnCreate(pDevice, pResourceViewHeaps, pConstantBufferRing, outFormat, true, true); - - m_spd_WaveOps_NonPacked_Linear_Sampler.OnCreate(pDevice, pResourceViewHeaps, pConstantBufferRing, outFormat, false, false); - m_spd_WaveOps_Packed_Linear_Sampler.OnCreate(pDevice, pResourceViewHeaps, pConstantBufferRing, outFormat, false, true); - m_spd_No_WaveOps_NonPacked_Linear_Sampler.OnCreate(pDevice, pResourceViewHeaps, pConstantBufferRing, outFormat, true, false); - m_spd_No_WaveOps_Packed_Linear_Sampler.OnCreate(pDevice, pResourceViewHeaps, pConstantBufferRing, outFormat, true, true); - } - - int SPD_Versions::GetMaxMipLevelCount(int Width, int Height) - { - int resolution = max(Width, Height); - return (static_cast(min(1.0f + floor(log2(resolution)), 12)) - 1); - } - - void SPD_Versions::OnCreateWindowSizeDependentResources(int Width, int Height, Texture *pInput) - { - m_spd_WaveOps_NonPacked.OnCreateWindowSizeDependentResources(Width, Height, pInput, GetMaxMipLevelCount(Width, Height)); - m_spd_WaveOps_Packed.OnCreateWindowSizeDependentResources(Width, Height, pInput, GetMaxMipLevelCount(Width, Height)); - m_spd_No_WaveOps_NonPacked.OnCreateWindowSizeDependentResources(Width, Height, pInput, GetMaxMipLevelCount(Width, Height)); - m_spd_No_WaveOps_Packed.OnCreateWindowSizeDependentResources(Width, Height, pInput, GetMaxMipLevelCount(Width, Height)); - - m_spd_WaveOps_NonPacked_Linear_Sampler.OnCreateWindowSizeDependentResources(Width, Height, pInput, GetMaxMipLevelCount(Width, Height)); - m_spd_WaveOps_Packed_Linear_Sampler.OnCreateWindowSizeDependentResources(Width, Height, pInput, GetMaxMipLevelCount(Width, Height)); - m_spd_No_WaveOps_NonPacked_Linear_Sampler.OnCreateWindowSizeDependentResources(Width, Height, pInput, GetMaxMipLevelCount(Width, Height)); - m_spd_No_WaveOps_Packed_Linear_Sampler.OnCreateWindowSizeDependentResources(Width, Height, pInput, GetMaxMipLevelCount(Width, Height)); - } - - void SPD_Versions::OnDestroyWindowSizeDependentResources() - { - m_spd_WaveOps_NonPacked.OnDestroyWindowSizeDependentResources(); - m_spd_WaveOps_Packed.OnDestroyWindowSizeDependentResources(); - m_spd_No_WaveOps_NonPacked.OnDestroyWindowSizeDependentResources(); - m_spd_No_WaveOps_Packed.OnDestroyWindowSizeDependentResources(); - - m_spd_WaveOps_NonPacked_Linear_Sampler.OnDestroyWindowSizeDependentResources(); - m_spd_WaveOps_Packed_Linear_Sampler.OnDestroyWindowSizeDependentResources(); - m_spd_No_WaveOps_NonPacked_Linear_Sampler.OnDestroyWindowSizeDependentResources(); - m_spd_No_WaveOps_Packed_Linear_Sampler.OnDestroyWindowSizeDependentResources(); - } - - void SPD_Versions::OnDestroy() - { - m_spd_WaveOps_NonPacked.OnDestroy(); - m_spd_WaveOps_Packed.OnDestroy(); - m_spd_No_WaveOps_NonPacked.OnDestroy(); - m_spd_No_WaveOps_Packed.OnDestroy(); - - m_spd_WaveOps_NonPacked_Linear_Sampler.OnDestroy(); - m_spd_WaveOps_Packed_Linear_Sampler.OnDestroy(); - m_spd_No_WaveOps_NonPacked_Linear_Sampler.OnDestroy(); - m_spd_No_WaveOps_Packed_Linear_Sampler.OnDestroy(); - } - - void SPD_Versions::Dispatch(ID3D12GraphicsCommandList2* pCommandList, SPD_Version spdVersion, SPD_Packed spdPacked) - { - switch (spdVersion) - { - case SPD_Version::SPD_WaveOps: - switch (spdPacked) - { - case SPD_Packed::SPD_Non_Packed: - m_spd_WaveOps_NonPacked.Draw(pCommandList); - break; - case SPD_Packed::SPD_Packed: - m_spd_WaveOps_Packed.Draw(pCommandList); - break; - } - break; - case SPD_Version::SPD_No_WaveOps: - switch (spdPacked) - { - case SPD_Packed::SPD_Non_Packed: - m_spd_No_WaveOps_NonPacked.Draw(pCommandList); - break; - case SPD_Packed::SPD_Packed: - m_spd_No_WaveOps_Packed.Draw(pCommandList); - break; - } - } - } - - void SPD_Versions::DispatchLinearSamplerVersion(ID3D12GraphicsCommandList2* pCommandList, SPD_Version spdVersion, SPD_Packed spdPacked) - { - switch (spdVersion) - { - case SPD_Version::SPD_WaveOps: - switch (spdPacked) - { - case SPD_Packed::SPD_Non_Packed: - m_spd_WaveOps_NonPacked_Linear_Sampler.Draw(pCommandList); - break; - case SPD_Packed::SPD_Packed: - m_spd_WaveOps_Packed_Linear_Sampler.Draw(pCommandList); - break; - } - break; - case SPD_Version::SPD_No_WaveOps: - switch (spdPacked) - { - case SPD_Packed::SPD_Non_Packed: - m_spd_No_WaveOps_NonPacked_Linear_Sampler.Draw(pCommandList); - break; - case SPD_Packed::SPD_Packed: - m_spd_No_WaveOps_Packed_Linear_Sampler.Draw(pCommandList); - break; - } - } - } - - void SPD_Versions::Gui(SPD_Version spdVersion, SPD_Packed spdPacked) - { - switch (spdVersion) - { - case SPD_Version::SPD_WaveOps: - switch (spdPacked) - { - case SPD_Packed::SPD_Non_Packed: - m_spd_WaveOps_NonPacked.Gui(); - break; - case SPD_Packed::SPD_Packed: - m_spd_WaveOps_Packed.Gui(); - break; - } - break; - case SPD_Version::SPD_No_WaveOps: - switch (spdPacked) - { - case SPD_Packed::SPD_Non_Packed: - m_spd_No_WaveOps_NonPacked.Gui(); - break; - case SPD_Packed::SPD_Packed: - m_spd_No_WaveOps_Packed.Gui(); - break; - } - } - } - - void SPD_Versions::GuiLinearSamplerVersion(SPD_Version spdVersion, SPD_Packed spdPacked) - { - switch (spdVersion) - { - case SPD_Version::SPD_WaveOps: - switch (spdPacked) - { - case SPD_Packed::SPD_Non_Packed: - m_spd_WaveOps_NonPacked_Linear_Sampler.Gui(); - break; - case SPD_Packed::SPD_Packed: - m_spd_WaveOps_Packed_Linear_Sampler.Gui(); - break; - } - break; - case SPD_Version::SPD_No_WaveOps: - switch (spdPacked) - { - case SPD_Packed::SPD_Non_Packed: - m_spd_No_WaveOps_NonPacked_Linear_Sampler.Gui(); - break; - case SPD_Packed::SPD_Packed: - m_spd_No_WaveOps_Packed_Linear_Sampler.Gui(); - break; - } - } - } -} \ No newline at end of file diff --git a/sample/src/DX12/SPD_Versions.h b/sample/src/DX12/SPD_Versions.h deleted file mode 100644 index bd1c1dc..0000000 --- a/sample/src/DX12/SPD_Versions.h +++ /dev/null @@ -1,75 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#pragma once -#include "SPD_CS.h" -#include "SPD_CS_Linear_Sampler.h" - -namespace CAULDRON_DX12 -{ - enum class SPD_Version - { - SPD_No_WaveOps, - SPD_WaveOps, - }; - - enum class SPD_Packed - { - SPD_Non_Packed, - SPD_Packed, - }; - - class SPD_Versions - { - public: - void OnCreate( - Device *pDevice, - ResourceViewHeaps *pResourceViewHeaps, - DynamicBufferRing *pConstantBufferRing, - DXGI_FORMAT outFormat - ); - void OnDestroy(); - - void OnCreateWindowSizeDependentResources(int Width, int Height, Texture *pInput); - void OnDestroyWindowSizeDependentResources(); - - void Dispatch(ID3D12GraphicsCommandList2* pCommandList, SPD_Version dsVersion, SPD_Packed dsPacked); - void Gui(SPD_Version dsVersion, SPD_Packed dsPacked); - - void DispatchLinearSamplerVersion(ID3D12GraphicsCommandList2* pCommandList, SPD_Version dsVersion, SPD_Packed dsPacked); - void GuiLinearSamplerVersion(SPD_Version dsVersion, SPD_Packed dsPacked); - - private: - Device *m_pDevice; - - SPD_CS m_spd_WaveOps_NonPacked; - SPD_CS m_spd_No_WaveOps_NonPacked; - - SPD_CS m_spd_WaveOps_Packed; - SPD_CS m_spd_No_WaveOps_Packed; - - SPD_CS_Linear_Sampler m_spd_WaveOps_NonPacked_Linear_Sampler; - SPD_CS_Linear_Sampler m_spd_No_WaveOps_NonPacked_Linear_Sampler; - - SPD_CS_Linear_Sampler m_spd_WaveOps_Packed_Linear_Sampler; - SPD_CS_Linear_Sampler m_spd_No_WaveOps_Packed_Linear_Sampler; - - int GetMaxMipLevelCount(int Width, int Height); - }; -} \ No newline at end of file diff --git a/sample/src/VK/CMakeLists.txt b/sample/src/VK/CMakeLists.txt index e68ff55..376e046 100644 --- a/sample/src/VK/CMakeLists.txt +++ b/sample/src/VK/CMakeLists.txt @@ -1,20 +1,21 @@ -project (SPDSample_VK) -include(${CMAKE_HOME_DIRECTORY}/common.cmake) +project(${PROJECT_NAME}) +include(${CMAKE_CURRENT_SOURCE_DIR}/../../common.cmake) + +add_compile_options(/MP) + set(sources CSDownsampler.cpp CSDownsampler.h PSDownsampler.cpp PSDownsampler.h - SPD_CS.cpp - SPD_CS.h - SPD_CS_Linear_Sampler.cpp - SPD_CS_Linear_Sampler.h - SPD_Sample.cpp - SPD_Sample.h - SPD_Renderer.cpp - SPD_Renderer.h - SPD_Versions.cpp - SPD_Versions.h + SPDCS.cpp + SPDCS.h + SPDSample.cpp + SPDSample.h + SPDRenderer.cpp + SPDRenderer.h + SPDVersions.cpp + SPDVersions.h stdafx.cpp stdafx.h) set(Shaders_src @@ -22,37 +23,44 @@ set(Shaders_src ${CMAKE_CURRENT_SOURCE_DIR}/../../../ffx-spd/ffx_spd.h ${CMAKE_CURRENT_SOURCE_DIR}/CSDownsampler.glsl ${CMAKE_CURRENT_SOURCE_DIR}/PSDownsampler.glsl - ${CMAKE_CURRENT_SOURCE_DIR}/SPD_Integration.glsl - ${CMAKE_CURRENT_SOURCE_DIR}/SPD_Integration.hlsl - ${CMAKE_CURRENT_SOURCE_DIR}/SPD_Integration_Linear_Sampler.glsl - ${CMAKE_CURRENT_SOURCE_DIR}/SPD_Integration_Linear_Sampler.hlsl + ${CMAKE_CURRENT_SOURCE_DIR}/SPDIntegration.glsl + ${CMAKE_CURRENT_SOURCE_DIR}/SPDIntegration.hlsl + ${CMAKE_CURRENT_SOURCE_DIR}/SPDIntegrationLinearSampler.glsl + ${CMAKE_CURRENT_SOURCE_DIR}/SPDIntegrationLinearSampler.hlsl +) +set(Common_src + ${CMAKE_CURRENT_SOURCE_DIR}/../Common/SpdSample.json ) source_group("Sources" FILES ${sources}) source_group("Shaders" FILES ${Shaders_src}) +source_group("Common" FILES ${Common_src}) + # prevent VS from processing/compiling these files set_source_files_properties(${Shaders_src} PROPERTIES VS_TOOL_OVERRIDE "Text") +set_source_files_properties(${Common_src} PROPERTIES VS_TOOL_OVERRIDE "Text") function(copyCommand list dest) foreach(fullFileName ${list}) - get_filename_component(file ${fullFileName} NAME) - message("Generating custom command for ${fullFileName}") - add_custom_command( - OUTPUT ${dest}/${file} - PRE_BUILD - COMMAND cmake -E make_directory ${dest} - COMMAND cmake -E copy ${fullFileName} ${dest} - MAIN_DEPENDENCY ${fullFileName} - COMMENT "Updating ${file} into ${dest}" - ) + get_filename_component(file ${fullFileName} NAME) + message("Generating custom command for ${fullFileName}") + add_custom_command( + OUTPUT ${dest}/${file} + PRE_BUILD + COMMAND cmake -E make_directory ${dest} + COMMAND cmake -E copy ${fullFileName} ${dest} + MAIN_DEPENDENCY ${fullFileName} + COMMENT "Updating ${file} into ${dest}" + ) endforeach() endfunction() # copy shaders and media to Bin # include("${CMAKE_HOME_DIRECTORY}/src/Common/Shaders/CMakeList.txt") copyCommand("${Shaders_src}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibVK) - -add_executable(${PROJECT_NAME} WIN32 ${sources} ${Shaders_src}) +copyCommand("${Common_src}" ${CMAKE_HOME_DIRECTORY}/bin) + +add_executable(${PROJECT_NAME} WIN32 ${sources} ${Shaders_src} ${Common_src}) target_link_libraries (${PROJECT_NAME} LINK_PUBLIC Cauldron_VK ImGUI Vulkan::Vulkan) target_include_directories (${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../../ffx-spd) set_target_properties(${PROJECT_NAME} PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY "${CMAKE_HOME_DIRECTORY}/bin") diff --git a/sample/src/VK/CSDownsampler.cpp b/sample/src/VK/CSDownsampler.cpp index 1a3ee50..b2604e7 100644 --- a/sample/src/VK/CSDownsampler.cpp +++ b/sample/src/VK/CSDownsampler.cpp @@ -28,14 +28,13 @@ namespace CAULDRON_VK { void CSDownsampler::OnCreate( - Device* pDevice, - ResourceViewHeaps* pResourceViewHeaps, - VkFormat outFormat + Device *pDevice, + UploadHeap *pUploadHeap, + ResourceViewHeaps *pResourceViewHeaps ) { m_pDevice = pDevice; m_pResourceViewHeaps = pResourceViewHeaps; - m_outFormat = outFormat; // create the descriptor set layout // the shader needs @@ -91,12 +90,14 @@ namespace CAULDRON_VK assert(res == VK_SUCCESS); } - // Do this stuff by yourself due to special requirements: push constants + m_cubeTexture.InitFromFile(pDevice, pUploadHeap , "..\\media\\envmaps\\papermill\\specular.dds", true, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT); + pUploadHeap->FlushAndFinish(); + VkPipelineShaderStageCreateInfo computeShader; DefineList defines; VkResult res = VKCompileFromFile(m_pDevice->GetDevice(), VK_SHADER_STAGE_COMPUTE_BIT, - "CSDownsampler.glsl", "main", &defines, &computeShader); + "CSDownsampler.glsl", "main", "", &defines, &computeShader); assert(res == VK_SUCCESS); // Create pipeline layout @@ -108,7 +109,7 @@ namespace CAULDRON_VK // push constants: input size, inverse output size VkPushConstantRange pushConstantRange = {}; pushConstantRange.offset = 0; - pushConstantRange.size = sizeof(PushConstantsCSSimple); + pushConstantRange.size = sizeof(cbDownsample); pushConstantRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; pPipelineLayoutCreateInfo.pushConstantRangeCount = 1; pPipelineLayoutCreateInfo.pPushConstantRanges = &pushConstantRange; @@ -139,78 +140,12 @@ namespace CAULDRON_VK { m_pResourceViewHeaps->AllocDescriptor(m_descriptorSetLayout, &m_mip[i].m_descriptorSet); } - } - - void CSDownsampler::OnCreateWindowSizeDependentResources( - VkCommandBuffer cmd_buf, - uint32_t Width, - uint32_t Height, - Texture* pInput, - int mips - ) - { - m_Width = Width; - m_Height = Height; - m_mipCount = mips; - - VkImageCreateInfo image_info = {}; - image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - image_info.pNext = NULL; - image_info.imageType = VK_IMAGE_TYPE_2D; - image_info.format = m_outFormat; - image_info.extent.width = m_Width >> 1; - image_info.extent.height = m_Height >> 1; - image_info.extent.depth = 1; - image_info.mipLevels = mips; - image_info.arrayLayers = 1; - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_info.queueFamilyIndexCount = 0; - image_info.pQueueFamilyIndices = NULL; - image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - image_info.usage = (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT); - image_info.flags = 0; - image_info.tiling = VK_IMAGE_TILING_OPTIMAL; - m_result.Init(m_pDevice, &image_info, "DownsampleMipCS"); - - // transition layout undefined to general layout? - VkImageMemoryBarrier imageMemoryBarrier = {}; - imageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - imageMemoryBarrier.pNext = NULL; - imageMemoryBarrier.srcAccessMask = 0; - imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - imageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - imageMemoryBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - imageMemoryBarrier.subresourceRange.baseMipLevel = 0; - imageMemoryBarrier.subresourceRange.levelCount = m_mipCount; - imageMemoryBarrier.subresourceRange.baseArrayLayer = 0; - imageMemoryBarrier.subresourceRange.layerCount = 1; - imageMemoryBarrier.image = m_result.Resource(); - - // transition general layout if detination image to shader read only for source image - vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier); - // Create views for the mip chain - // - for (int i = 0; i < m_mipCount; i++) + // populate descriptor sets + for (uint32_t i = 0; i < m_cubeTexture.GetMipCount() - 1; i++) { - // source ----------- - // - if (i == 0) - { - pInput->CreateSRV(&m_mip[i].m_SRV, 0); - } - else - { - m_result.CreateSRV(&m_mip[i].m_SRV, i - 1); - } - - // destination ----------- - m_result.CreateRTV(&m_mip[i].m_RTV, i); + m_cubeTexture.CreateSRV(&m_mip[i].m_SRV, i); // texture2DArray + m_cubeTexture.CreateRTV(&m_mip[i].m_UAV, i + 1); // texture2DArray // Create and initialize the Descriptor Sets (all of them use the same Descriptor Layout) // Create and initialize descriptor set for sampled image @@ -247,7 +182,7 @@ namespace CAULDRON_VK // Create and initialize descriptor set for storage image VkDescriptorImageInfo desc_storage_image = {}; desc_storage_image.sampler = VK_NULL_HANDLE; - desc_storage_image.imageView = m_mip[i].m_RTV; + desc_storage_image.imageView = m_mip[i].m_UAV; desc_storage_image.imageLayout = VK_IMAGE_LAYOUT_GENERAL; writes[2] = {}; @@ -262,30 +197,67 @@ namespace CAULDRON_VK vkUpdateDescriptorSets(m_pDevice->GetDevice(), (uint32_t)writes.size(), writes.data(), 0, NULL); } - } - void CSDownsampler::OnDestroyWindowSizeDependentResources() - { - for (int i = 0; i < m_mipCount; i++) + for (uint32_t slice = 0; slice < m_cubeTexture.GetArraySize(); slice++) { - vkDestroyImageView(m_pDevice->GetDevice(), m_mip[i].m_SRV, NULL); - vkDestroyImageView(m_pDevice->GetDevice(), m_mip[i].m_RTV, NULL); + for (uint32_t mip = 0; mip < m_cubeTexture.GetMipCount(); mip++) + { + VkImageViewUsageCreateInfo imageViewUsageInfo = {}; + imageViewUsageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO; + imageViewUsageInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + + VkImageViewCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + info.pNext = &imageViewUsageInfo; + info.image = m_cubeTexture.Resource(); + info.viewType = VK_IMAGE_VIEW_TYPE_2D; + info.subresourceRange.baseArrayLayer = slice; + info.subresourceRange.layerCount = 1; + + switch (m_cubeTexture.GetFormat()) + { + case VK_FORMAT_B8G8R8A8_UNORM: info.format = VK_FORMAT_B8G8R8A8_SRGB; break; + case VK_FORMAT_R8G8B8A8_UNORM: info.format = VK_FORMAT_R8G8B8A8_SRGB; break; + case VK_FORMAT_BC1_RGB_UNORM_BLOCK: info.format = VK_FORMAT_BC1_RGB_SRGB_BLOCK; break; + case VK_FORMAT_BC2_UNORM_BLOCK: info.format = VK_FORMAT_BC2_SRGB_BLOCK; break; + case VK_FORMAT_BC3_UNORM_BLOCK: info.format = VK_FORMAT_BC3_SRGB_BLOCK; break; + default: info.format = m_cubeTexture.GetFormat(); + } + + info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + info.subresourceRange.baseMipLevel = mip; + info.subresourceRange.levelCount = 1; + + VkResult res = vkCreateImageView(m_pDevice->GetDevice(), &info, NULL, + &m_imGUISRV[slice * m_cubeTexture.GetMipCount() + mip]); + assert(res == VK_SUCCESS); + } } - - m_result.OnDestroy(); } void CSDownsampler::OnDestroy() { + for (uint32_t i = 0; i < m_cubeTexture.GetMipCount() * 6; i++) + { + vkDestroyImageView(m_pDevice->GetDevice(), m_imGUISRV[i], NULL); + } + + for (uint32_t i = 0; i < m_cubeTexture.GetMipCount() - 1; i++) + { + vkDestroyImageView(m_pDevice->GetDevice(), m_mip[i].m_SRV, NULL); + vkDestroyImageView(m_pDevice->GetDevice(), m_mip[i].m_UAV, NULL); + } + for (int i = 0; i < CS_MAX_MIP_LEVELS; i++) { m_pResourceViewHeaps->FreeDescriptor(m_mip[i].m_descriptorSet); } - vkDestroyPipeline(m_pDevice->GetDevice(), m_pipeline, nullptr); - vkDestroyPipelineLayout(m_pDevice->GetDevice(), m_pipelineLayout, nullptr); + vkDestroyPipeline(m_pDevice->GetDevice(), m_pipeline, NULL); + vkDestroyPipelineLayout(m_pDevice->GetDevice(), m_pipelineLayout, NULL); vkDestroyDescriptorSetLayout(m_pDevice->GetDevice(), m_descriptorSetLayout, NULL); vkDestroySampler(m_pDevice->GetDevice(), m_sampler, nullptr); + m_cubeTexture.OnDestroy(); } void CSDownsampler::Draw(VkCommandBuffer cmd_buf) @@ -295,20 +267,21 @@ namespace CAULDRON_VK // transition layout undefined to general layout? VkImageMemoryBarrier imageMemoryBarrier = {}; + imageMemoryBarrier = {}; imageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; imageMemoryBarrier.pNext = NULL; imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; - imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; - imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; imageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; imageMemoryBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; imageMemoryBarrier.subresourceRange.baseMipLevel = 0; - imageMemoryBarrier.subresourceRange.levelCount = m_mipCount; + imageMemoryBarrier.subresourceRange.levelCount = 1; imageMemoryBarrier.subresourceRange.baseArrayLayer = 0; - imageMemoryBarrier.subresourceRange.layerCount = 1; - imageMemoryBarrier.image = m_result.Resource(); + imageMemoryBarrier.subresourceRange.layerCount = 6; + imageMemoryBarrier.image = m_cubeTexture.Resource(); // transition general layout if detination image to shader read only for source image vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, @@ -320,60 +293,99 @@ namespace CAULDRON_VK // vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipeline); - for (int i = 0; i < m_mipCount; i++) + for (uint32_t slice = 0; slice < m_cubeTexture.GetArraySize(); slice++) { - uint32_t dispatchX = ((m_Width >> (i + 1)) + 7) / 8; - uint32_t dispatchY = ((m_Height >> (i + 1)) + 7) / 8; - uint32_t dispatchZ = 1; - - vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipelineLayout, 0, 1, &m_mip[i].m_descriptorSet, 0, nullptr); - - // Bind push constants - // - PushConstantsCSSimple data; - data.outputSize[0] = (float)(m_Width >> (i + 1)); - data.outputSize[1] = (float)(m_Height >> (i + 1)); - data.invInputSize[0] = 1.0f / (float)(m_Width >> i); - data.invInputSize[1] = 1.0f / (float)(m_Height >> i); - vkCmdPushConstants(cmd_buf, m_pipelineLayout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(PushConstantsCSSimple), (void*)&data); - - // Draw - // - vkCmdDispatch(cmd_buf, dispatchX, dispatchY, dispatchZ); - - VkImageMemoryBarrier imageMemoryBarrier = {}; - imageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - imageMemoryBarrier.pNext = NULL; - imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; - imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - imageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - imageMemoryBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - imageMemoryBarrier.subresourceRange.baseMipLevel = i; - imageMemoryBarrier.subresourceRange.levelCount = 1; - imageMemoryBarrier.subresourceRange.baseArrayLayer = 0; - imageMemoryBarrier.subresourceRange.layerCount = 1; - imageMemoryBarrier.image = m_result.Resource(); + VkImageMemoryBarrier imageMemoryBarrierArray = {}; + imageMemoryBarrierArray.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageMemoryBarrierArray.pNext = NULL; + imageMemoryBarrierArray.srcAccessMask = VK_ACCESS_SHADER_READ_BIT; + imageMemoryBarrierArray.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + imageMemoryBarrierArray.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageMemoryBarrierArray.newLayout = VK_IMAGE_LAYOUT_GENERAL; + imageMemoryBarrierArray.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemoryBarrierArray.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemoryBarrierArray.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imageMemoryBarrierArray.subresourceRange.baseMipLevel = 1; + imageMemoryBarrierArray.subresourceRange.levelCount = m_cubeTexture.GetMipCount() - 1; + imageMemoryBarrierArray.subresourceRange.baseArrayLayer = 0; + imageMemoryBarrierArray.subresourceRange.layerCount = 6; + imageMemoryBarrierArray.image = m_cubeTexture.Resource(); // transition general layout if destination image to shader read only for source image vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier); + 0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrierArray); + + for (uint32_t i = 0; i < m_cubeTexture.GetMipCount() - 1; i++) + { + uint32_t dispatchX = ((m_cubeTexture.GetWidth() >> (i + 1)) + 7) / 8; + uint32_t dispatchY = ((m_cubeTexture.GetHeight() >> (i + 1)) + 7) / 8; + uint32_t dispatchZ = 1; + + vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipelineLayout, 0, 1, &m_mip[i].m_descriptorSet, 0, nullptr); + + // Bind push constants + // + cbDownsample data; + data.outputSize[0] = (float)(m_cubeTexture.GetWidth() >> (i + 1)); + data.outputSize[1] = (float)(m_cubeTexture.GetHeight() >> (i + 1)); + data.invInputSize[0] = 1.0f / (float)(m_cubeTexture.GetWidth() >> i); + data.invInputSize[1] = 1.0f / (float)(m_cubeTexture.GetHeight() >> i); + data.slice = slice; + vkCmdPushConstants(cmd_buf, m_pipelineLayout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(cbDownsample), (void*)&data); + + // Draw + // + vkCmdDispatch(cmd_buf, dispatchX, dispatchY, dispatchZ); + + VkImageMemoryBarrier imageMemoryBarrier = {}; + imageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageMemoryBarrier.pNext = NULL; + imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; + imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemoryBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imageMemoryBarrier.subresourceRange.baseMipLevel = i + 1; + imageMemoryBarrier.subresourceRange.levelCount = 1; + imageMemoryBarrier.subresourceRange.baseArrayLayer = 0; + imageMemoryBarrier.subresourceRange.layerCount = 6; + imageMemoryBarrier.image = m_cubeTexture.Resource(); + + // transition general layout if destination image to shader read only for source image + vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier); + } } SetPerfMarkerEnd(cmd_buf); } - void CSDownsampler::Gui() + void CSDownsampler::GUI(int* pSlice) { bool opened = true; - ImGui::Begin("Downsample", &opened); + std::string header = "Downsample"; + ImGui::Begin(header.c_str(), &opened); - for (int i = 0; i < m_mipCount; i++) + if (ImGui::CollapsingHeader("CS Multipass", ImGuiTreeNodeFlags_DefaultOpen)) { - ImGui::Image((ImTextureID)m_mip[i].m_SRV, ImVec2(320, 180)); + const char* sliceItemNames[] = + { + "Slice 0", + "Slice 1", + "Slice 2", + "Slice 3", + "Slice 4", + "Slice 5" + }; + ImGui::Combo("Slice of Cube Texture", pSlice, sliceItemNames, _countof(sliceItemNames)); + + for (uint32_t i = 0; i < m_cubeTexture.GetMipCount(); i++) + { + ImGui::Image((ImTextureID)m_imGUISRV[*pSlice * m_cubeTexture.GetMipCount() + i], ImVec2(static_cast(512 >> i), static_cast(512 >> i))); + } } ImGui::End(); diff --git a/sample/src/VK/CSDownsampler.glsl b/sample/src/VK/CSDownsampler.glsl index f4408c7..8121021 100644 --- a/sample/src/VK/CSDownsampler.glsl +++ b/sample/src/VK/CSDownsampler.glsl @@ -28,18 +28,20 @@ layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; //-------------------------------------------------------------------------------------- // Push Constants //-------------------------------------------------------------------------------------- -layout(push_constant) uniform pushConstants { +layout(push_constant) uniform pushConstants +{ vec2 u_outputTextureSize; vec2 u_inputInvTextureSize; + int u_slice; } myPerMip; //-------------------------------------------------------------------------------------- // Texture definitions //-------------------------------------------------------------------------------------- -layout(set=0, binding=0) uniform texture2D inputTexture; +layout(set=0, binding=0) uniform texture2DArray inputTexture; layout(set=0, binding=1) uniform sampler inputSampler; -layout(set=0, binding=2, rgba16f) uniform writeonly image2D outputTexture; +layout(set=0, binding=2, rgba16f) uniform writeonly image2DArray outputTexture; // Main function //-------------------------------------------------------------------------------------- @@ -52,5 +54,8 @@ void main() ivec2 pixel_coord = ivec2(gl_GlobalInvocationID.xy); vec2 texcoord = myPerMip.u_inputInvTextureSize.xy * gl_GlobalInvocationID.xy * 2.0f + myPerMip.u_inputInvTextureSize.xy; - imageStore(outputTexture, pixel_coord, texture(sampler2D(inputTexture, inputSampler), texcoord)); + imageStore(outputTexture, + ivec3(pixel_coord, myPerMip.u_slice), + texture(sampler2DArray(inputTexture, inputSampler), vec3(texcoord, myPerMip.u_slice)) + ); } \ No newline at end of file diff --git a/sample/src/VK/CSDownsampler.h b/sample/src/VK/CSDownsampler.h index 3a86856..c638c85 100644 --- a/sample/src/VK/CSDownsampler.h +++ b/sample/src/VK/CSDownsampler.h @@ -28,49 +28,45 @@ namespace CAULDRON_VK class CSDownsampler { public: - void OnCreate(Device* pDevice, ResourceViewHeaps *pResourceViewHeaps, VkFormat outFormat); + void OnCreate(Device *pDevice, UploadHeap *pUploadHeap, ResourceViewHeaps *pResourceViewHeaps); void OnDestroy(); - void OnCreateWindowSizeDependentResources(VkCommandBuffer cmd_buf, uint32_t Width, uint32_t Height, Texture *pInput, int mips); - void OnDestroyWindowSizeDependentResources(); - void Draw(VkCommandBuffer cmd_buf); - Texture *GetTexture() { return &m_result; } - VkImageView GetTextureView(int i) { return m_mip[i].m_SRV; } - void Gui(); + Texture *GetTexture() { return &m_cubeTexture; } + void GUI(int *pSlice); - struct PushConstantsCSSimple + struct cbDownsample { float outputSize[2]; float invInputSize[2]; + uint32_t slice; + uint32_t padding[3]; }; private: - Device *m_pDevice; - VkFormat m_outFormat; + Device *m_pDevice = nullptr; - Texture m_result; + Texture m_cubeTexture; struct Pass { - VkImageView m_RTV; - VkImageView m_SRV; + VkImageView m_UAV; + VkImageView m_SRV; VkDescriptorSet m_descriptorSet; }; - Pass m_mip[CS_MAX_MIP_LEVELS]; + // for each mip for each array slice + Pass m_mip[CS_MAX_MIP_LEVELS] = {}; - ResourceViewHeaps *m_pResourceViewHeaps; + ResourceViewHeaps *m_pResourceViewHeaps = nullptr; - uint32_t m_Width; - uint32_t m_Height; - int m_mipCount; + VkDescriptorSetLayout m_descriptorSetLayout = VK_NULL_HANDLE; - VkDescriptorSetLayout m_descriptorSetLayout; + VkPipelineLayout m_pipelineLayout = VK_NULL_HANDLE; + VkPipeline m_pipeline = VK_NULL_HANDLE; - VkPipelineLayout m_pipelineLayout; - VkPipeline m_pipeline; + VkSampler m_sampler = VK_NULL_HANDLE; - VkSampler m_sampler; + VkImageView m_imGUISRV[CS_MAX_MIP_LEVELS * 6] = {}; }; } diff --git a/sample/src/VK/PSDownsampler.cpp b/sample/src/VK/PSDownsampler.cpp index 12faf02..6640b10 100644 --- a/sample/src/VK/PSDownsampler.cpp +++ b/sample/src/VK/PSDownsampler.cpp @@ -32,18 +32,17 @@ namespace CAULDRON_VK { void PSDownsampler::OnCreate( - Device* pDevice, + Device *pDevice, + UploadHeap *pUploadHeap, ResourceViewHeaps *pResourceViewHeaps, DynamicBufferRing *pConstantBufferRing, - StaticBufferPool *pStaticBufferPool, - VkFormat outFormat + StaticBufferPool *pStaticBufferPool ) { m_pDevice = pDevice; m_pStaticBufferPool = pStaticBufferPool; m_pResourceViewHeaps = pResourceViewHeaps; m_pConstantBufferRing = pConstantBufferRing; - m_outFormat = outFormat; // Create Descriptor Set Layout, the shader needs a uniform dynamic buffer and a texture + sampler // The Descriptor Sets will be created and initialized once we know the input to the shader, that happens in OnCreateWindowSizeDependentResources() @@ -71,9 +70,58 @@ namespace CAULDRON_VK assert(res == VK_SUCCESS); } + m_cubeTexture.InitFromFile(pDevice, pUploadHeap, "..\\media\\envmaps\\papermill\\specular.dds", true, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); + pUploadHeap->FlushAndFinish(); + // In Render pass // - m_in = SimpleColorWriteRenderPass(pDevice->GetDevice(), VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + // color RT + VkAttachmentDescription attachments[1]; + attachments[0].format = m_cubeTexture.GetFormat(); + attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; + attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; // we don't care about the previous contents, this is for a full screen pass with no blending + attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + attachments[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + attachments[0].finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + attachments[0].flags = 0; + + VkAttachmentReference color_reference = { 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL }; + + VkSubpassDescription subpass = {}; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.flags = 0; + subpass.inputAttachmentCount = 0; + subpass.pInputAttachments = NULL; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &color_reference; + subpass.pResolveAttachments = NULL; + subpass.pDepthStencilAttachment = NULL; + subpass.preserveAttachmentCount = 0; + subpass.pPreserveAttachments = NULL; + + VkSubpassDependency dep = {}; + dep.dependencyFlags = 0; + dep.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; + dep.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + dep.dstSubpass = VK_SUBPASS_EXTERNAL; + dep.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + dep.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + dep.srcSubpass = 0; + + VkRenderPassCreateInfo rp_info = {}; + rp_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + rp_info.pNext = NULL; + rp_info.attachmentCount = 1; + rp_info.pAttachments = attachments; + rp_info.subpassCount = 1; + rp_info.pSubpasses = &subpass; + rp_info.dependencyCount = 1; + rp_info.pDependencies = &dep; + + VkResult res = vkCreateRenderPass(pDevice->GetDevice(), &rp_info, NULL, &m_in); + assert(res == VK_SUCCESS); // The sampler we want to use for downsampling, all linear // @@ -95,68 +143,46 @@ namespace CAULDRON_VK // Use helper class to create the fullscreen pass // - m_downscale.OnCreate(pDevice, m_in, "PSDownsampler.glsl", pStaticBufferPool, pConstantBufferRing, m_descriptorSetLayout); + m_downsample.OnCreate(pDevice, m_in, "PSDownsampler.glsl", "main", "", pStaticBufferPool, pConstantBufferRing, m_descriptorSetLayout); // Allocate descriptors for the mip chain // - for (int i = 0; i < DOWNSAMPLEPS_MAX_MIP_LEVELS; i++) + for (int i = 0; i < DOWNSAMPLEPS_MAX_MIP_LEVELS * 6; i++) { - m_pResourceViewHeaps->AllocDescriptor(m_descriptorSetLayout, &m_mip[i].descriptorSet); + m_pResourceViewHeaps->AllocDescriptor(m_descriptorSetLayout, &m_mip[i].m_descriptorSet); } - } - void PSDownsampler::OnCreateWindowSizeDependentResources(uint32_t Width, uint32_t Height, Texture *pInput, int mipCount) - { - m_Width = Width; - m_Height = Height; - m_mipCount = mipCount; - - VkImageCreateInfo image_info = {}; - image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - image_info.pNext = NULL; - image_info.imageType = VK_IMAGE_TYPE_2D; - image_info.format = m_outFormat; - image_info.extent.width = m_Width >> 1; - image_info.extent.height = m_Height >> 1; - image_info.extent.depth = 1; - image_info.mipLevels = mipCount; - image_info.arrayLayers = 1; - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_info.queueFamilyIndexCount = 0; - image_info.pQueueFamilyIndices = NULL; - image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - image_info.usage = (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); //TODO - image_info.flags = 0; - image_info.tiling = VK_IMAGE_TILING_OPTIMAL; - m_result.Init(m_pDevice, &image_info, "DownsampleMip"); - - // Create views for the mip chain - // - for (int i = 0; i < m_mipCount; i++) + for (uint32_t slice = 0; slice < m_cubeTexture.GetArraySize(); slice++) { - // source ----------- - // - if (i == 0) - { - pInput->CreateSRV(&m_mip[i].m_SRV, 0); - } - else + for (uint32_t mip = 0; mip < m_cubeTexture.GetMipCount() - 1; mip++) { - m_result.CreateSRV(&m_mip[i].m_SRV, i - 1); - } - // Create and initialize the Descriptor Sets (all of them use the same Descriptor Layout) - m_pConstantBufferRing->SetDescriptorSet(0, sizeof(DownSamplePS::cbDownscale), m_mip[i].descriptorSet); - SetDescriptorSet(m_pDevice->GetDevice(), 1, m_mip[i].m_SRV, &m_sampler, m_mip[i].descriptorSet); + VkImageViewCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + info.image = m_cubeTexture.Resource(); + info.viewType = VK_IMAGE_VIEW_TYPE_2D; + info.subresourceRange.baseArrayLayer = slice; + info.subresourceRange.layerCount = 1; + info.format = m_cubeTexture.GetFormat(); + info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + info.subresourceRange.baseMipLevel = mip; + info.subresourceRange.levelCount = 1; + + VkResult res = vkCreateImageView(m_pDevice->GetDevice(), &info, NULL, + &m_mip[slice * m_cubeTexture.GetMipCount() + mip].m_SRV); + assert(res == VK_SUCCESS); - // destination ----------- - // - m_result.CreateRTV(&m_mip[i].RTV, i); + // Create and initialize the Descriptor Sets (all of them use the same Descriptor Layout) + m_pConstantBufferRing->SetDescriptorSet(0, sizeof(DownSamplePS::cbDownscale), m_mip[slice * m_cubeTexture.GetMipCount() + mip].m_descriptorSet); + SetDescriptorSet(m_pDevice->GetDevice(), 1, m_mip[slice * m_cubeTexture.GetMipCount() + mip].m_SRV, &m_sampler, m_mip[slice * m_cubeTexture.GetMipCount() + mip].m_descriptorSet); - // Create framebuffer - { - VkImageView attachments[1] = { m_mip[i].RTV }; + info.subresourceRange.baseMipLevel = mip + 1; + + res = vkCreateImageView(m_pDevice->GetDevice(), &info, NULL, + &m_mip[slice * m_cubeTexture.GetMipCount() + mip].m_RTV); + assert(res == VK_SUCCESS); + + VkImageView attachments[1] = { m_mip[slice * m_cubeTexture.GetMipCount() + mip].m_RTV }; VkFramebufferCreateInfo fb_info = {}; fb_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; @@ -164,37 +190,37 @@ namespace CAULDRON_VK fb_info.renderPass = m_in; fb_info.attachmentCount = 1; fb_info.pAttachments = attachments; - fb_info.width = m_Width >> (i + 1); - fb_info.height = m_Height >> (i + 1); + fb_info.width = m_cubeTexture.GetWidth() >> (mip + 1); + fb_info.height = m_cubeTexture.GetHeight() >> (mip + 1); fb_info.layers = 1; - VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &fb_info, NULL, &m_mip[i].frameBuffer); + res = vkCreateFramebuffer(m_pDevice->GetDevice(), &fb_info, NULL, &m_mip[slice * m_cubeTexture.GetMipCount() + mip].m_frameBuffer); assert(res == VK_SUCCESS); } } } - void PSDownsampler::OnDestroyWindowSizeDependentResources() + void PSDownsampler::OnDestroy() { - for (int i = 0; i < m_mipCount; i++) + for (uint32_t slice = 0; slice < m_cubeTexture.GetArraySize(); slice++) { - vkDestroyImageView(m_pDevice->GetDevice(), m_mip[i].m_SRV, NULL); - vkDestroyImageView(m_pDevice->GetDevice(), m_mip[i].RTV, NULL); - vkDestroyFramebuffer(m_pDevice->GetDevice(), m_mip[i].frameBuffer, NULL); + for (uint32_t i = 0; i < m_cubeTexture.GetMipCount() - 1; i++) + { + vkDestroyImageView(m_pDevice->GetDevice(), m_mip[slice * m_cubeTexture.GetMipCount() + i].m_SRV, NULL); + vkDestroyImageView(m_pDevice->GetDevice(), m_mip[slice * m_cubeTexture.GetMipCount() + i].m_RTV, NULL); + vkDestroyFramebuffer(m_pDevice->GetDevice(), m_mip[slice * m_cubeTexture.GetMipCount() + i].m_frameBuffer, NULL); + } } - m_result.OnDestroy(); - } + m_cubeTexture.OnDestroy(); - void PSDownsampler::OnDestroy() - { - for (int i = 0; i < DOWNSAMPLEPS_MAX_MIP_LEVELS; i++) + for (int i = 0; i < DOWNSAMPLEPS_MAX_MIP_LEVELS * 6; i++) { - m_pResourceViewHeaps->FreeDescriptor(m_mip[i].descriptorSet); + m_pResourceViewHeaps->FreeDescriptor(m_mip[i].m_descriptorSet); } - m_downscale.OnDestroy(); + m_downsample.OnDestroy(); vkDestroyDescriptorSetLayout(m_pDevice->GetDevice(), m_descriptorSetLayout, NULL); - vkDestroySampler(m_pDevice->GetDevice(), m_sampler, nullptr); + vkDestroySampler(m_pDevice->GetDevice(), m_sampler, NULL); vkDestroyRenderPass(m_pDevice->GetDevice(), m_in, NULL); } @@ -205,49 +231,68 @@ namespace CAULDRON_VK // downsample // - for (int i = 0; i < m_mipCount; i++) + for (uint32_t slice = 0; slice < m_cubeTexture.GetArraySize(); slice++) { + for (uint32_t i = 0; i < m_cubeTexture.GetMipCount() - 1; i++) + { - VkRenderPassBeginInfo rp_begin = {}; - rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - rp_begin.pNext = NULL; - rp_begin.renderPass = m_in; - rp_begin.framebuffer = m_mip[i].frameBuffer; - rp_begin.renderArea.offset.x = 0; - rp_begin.renderArea.offset.y = 0; - rp_begin.renderArea.extent.width = m_Width >> (i + 1); - rp_begin.renderArea.extent.height = m_Height >> (i + 1); - rp_begin.clearValueCount = 0; - rp_begin.pClearValues = NULL; - vkCmdBeginRenderPass(cmd_buf, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); - SetViewportAndScissor(cmd_buf, 0, 0, m_Width >> (i + 1), m_Height >> (i + 1)); - - cbDownscale *data; - VkDescriptorBufferInfo constantBuffer; - m_pConstantBufferRing->AllocConstantBuffer(sizeof(cbDownscale), (void **)&data, &constantBuffer); - data->outWidth = (float)(m_Width >> (i + 1)); - data->outHeight = (float)(m_Height >> (i + 1)); - data->invWidth = 1.0f / (float)(m_Width >> i); - data->invHeight = 1.0f / (float)(m_Height >> i); - - m_downscale.Draw(cmd_buf, constantBuffer, m_mip[i].descriptorSet); - - vkCmdEndRenderPass(cmd_buf); + VkRenderPassBeginInfo rp_begin = {}; + rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + rp_begin.pNext = NULL; + rp_begin.renderPass = m_in; + rp_begin.framebuffer = m_mip[slice * m_cubeTexture.GetMipCount() + i].m_frameBuffer; + rp_begin.renderArea.offset.x = 0; + rp_begin.renderArea.offset.y = 0; + rp_begin.renderArea.extent.width = m_cubeTexture.GetWidth() >> (i + 1); + rp_begin.renderArea.extent.height = m_cubeTexture.GetHeight() >> (i + 1); + rp_begin.clearValueCount = 0; + rp_begin.pClearValues = NULL; + vkCmdBeginRenderPass(cmd_buf, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + SetViewportAndScissor(cmd_buf, 0, 0, m_cubeTexture.GetWidth() >> (i + 1), m_cubeTexture.GetHeight() >> (i + 1)); + + cbDownsample* data; + VkDescriptorBufferInfo constantBuffer; + m_pConstantBufferRing->AllocConstantBuffer(sizeof(cbDownsample), (void**)&data, &constantBuffer); + data->outWidth = (float)(m_cubeTexture.GetWidth() >> (i + 1)); + data->outHeight = (float)(m_cubeTexture.GetHeight() >> (i + 1)); + data->invWidth = 1.0f / (float)(m_cubeTexture.GetWidth() >> i); + data->invHeight = 1.0f / (float)(m_cubeTexture.GetHeight() >> i); + data->slice = slice; + + m_downsample.Draw(cmd_buf, constantBuffer, m_mip[slice * m_cubeTexture.GetMipCount() + i].m_descriptorSet); + + vkCmdEndRenderPass(cmd_buf); + } } SetPerfMarkerEnd(cmd_buf); } - void PSDownsampler::Gui() + void PSDownsampler::GUI(int* pSlice) { bool opened = true; - ImGui::Begin("Downsample", &opened); + std::string header = "Downsample"; + ImGui::Begin(header.c_str(), &opened); - for (int i = 0; i < m_mipCount; i++) + if (ImGui::CollapsingHeader("PS", ImGuiTreeNodeFlags_DefaultOpen)) { - ImGui::Image((ImTextureID)m_mip[i].m_SRV, ImVec2(320, 180)); + const char* sliceItemNames[] = + { + "Slice 0", + "Slice 1", + "Slice 2", + "Slice 3", + "Slice 4", + "Slice 5" + }; + ImGui::Combo("Slice of Cube Texture", pSlice, sliceItemNames, _countof(sliceItemNames)); + + for (uint32_t i = 0; i < m_cubeTexture.GetMipCount(); i++) + { + ImGui::Image((ImTextureID)m_mip[*pSlice * m_cubeTexture.GetMipCount() + i].m_SRV, ImVec2(static_cast(512 >> i), static_cast(512 >> i))); + } } ImGui::End(); } -} +} \ No newline at end of file diff --git a/sample/src/VK/PSDownsampler.h b/sample/src/VK/PSDownsampler.h index c70297b..effdf73 100644 --- a/sample/src/VK/PSDownsampler.h +++ b/sample/src/VK/PSDownsampler.h @@ -29,54 +29,47 @@ namespace CAULDRON_VK class PSDownsampler { public: - void OnCreate(Device* pDevice, ResourceViewHeaps *pResourceViewHeaps, DynamicBufferRing *m_pConstantBufferRing, StaticBufferPool *pStaticBufferPool, VkFormat outFormat); + void OnCreate(Device *pDevice, UploadHeap *pUploadHeap, ResourceViewHeaps *pResourceViewHeaps, DynamicBufferRing *m_pConstantBufferRing, StaticBufferPool *pStaticBufferPool); void OnDestroy(); - void OnCreateWindowSizeDependentResources(uint32_t Width, uint32_t Height, Texture *pInput, int mips); - void OnDestroyWindowSizeDependentResources(); - void Draw(VkCommandBuffer cmd_buf); - Texture *GetTexture() { return &m_result; } - VkImageView GetTextureView(int i) { return m_mip[i].m_SRV; } - void Gui(); + Texture *GetTexture() { return &m_cubeTexture; } + void GUI(int *pSlice); - struct cbDownscale + struct cbDownsample { float outWidth, outHeight; float invWidth, invHeight; + uint32_t slice; + uint32_t padding[3]; }; private: - Device *m_pDevice; - VkFormat m_outFormat; + Device *m_pDevice = nullptr; - Texture m_result; + Texture m_cubeTexture; struct Pass { - VkImageView RTV; //dest VkImageView m_SRV; //src - VkFramebuffer frameBuffer; - VkDescriptorSet descriptorSet; + VkImageView m_RTV; //dst + VkFramebuffer m_frameBuffer; + VkDescriptorSet m_descriptorSet; }; - Pass m_mip[PS_MAX_MIP_LEVELS]; - - StaticBufferPool *m_pStaticBufferPool; - ResourceViewHeaps *m_pResourceViewHeaps; - DynamicBufferRing *m_pConstantBufferRing; + Pass m_mip[PS_MAX_MIP_LEVELS * 6] = {}; - uint32_t m_Width; - uint32_t m_Height; - int m_mipCount; + StaticBufferPool *m_pStaticBufferPool = nullptr; + ResourceViewHeaps *m_pResourceViewHeaps = nullptr; + DynamicBufferRing *m_pConstantBufferRing = nullptr; - VkDescriptorSetLayout m_descriptorSetLayout; + VkDescriptorSetLayout m_descriptorSetLayout = VK_NULL_HANDLE; - PostProcPS m_downscale; + PostProcPS m_downsample; - VkRenderPass m_in; + VkRenderPass m_in = VK_NULL_HANDLE; - VkSampler m_sampler; + VkSampler m_sampler = VK_NULL_HANDLE; }; } diff --git a/sample/src/VK/SPDCS.cpp b/sample/src/VK/SPDCS.cpp new file mode 100644 index 0000000..24ab345 --- /dev/null +++ b/sample/src/VK/SPDCS.cpp @@ -0,0 +1,631 @@ +// SPDSample +// +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "stdafx.h" +#include "Base\Device.h" +#include "Base\ShaderCompilerHelper.h" +#include "Base\ExtDebugMarkers.h" +#include "Base\Imgui.h" + +#include "SPDCS.h" + +#define A_CPU +#include "ffx_a.h" +#include "ffx_spd.h" + +namespace CAULDRON_VK +{ + void SPDCS::OnCreate( + Device *pDevice, + UploadHeap *pUploadHeap, + ResourceViewHeaps *pResourceViewHeaps, + SPDLoad spdLoad, + SPDWaveOps spdWaveOps, + SPDPacked spdPacked + ) + { + m_pDevice = pDevice; + m_pResourceViewHeaps = pResourceViewHeaps; + + m_spdLoad = spdLoad; + m_spdWaveOps = spdWaveOps; + m_spdPacked = spdPacked; + + uint32_t bindingCount = 3; + + // create the descriptor set layout + // the shader needs + // image: source image + destination mips + // global atomic counter: storage buffer + { + VkDescriptorSetLayoutBinding layoutBindings[5]; + layoutBindings[0].binding = 0; + layoutBindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + layoutBindings[0].descriptorCount = SPD_MAX_MIP_LEVELS + 1; + layoutBindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + layoutBindings[0].pImmutableSamplers = NULL; + + layoutBindings[1].binding = 1; + layoutBindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + layoutBindings[1].descriptorCount = 1; + layoutBindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + layoutBindings[1].pImmutableSamplers = NULL; + + layoutBindings[2].binding = 2; + layoutBindings[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + layoutBindings[2].descriptorCount = 1; + layoutBindings[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + layoutBindings[2].pImmutableSamplers = NULL; + + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + bindingCount = 5; + + // bind source texture as sampled image and sampler + layoutBindings[3].binding = 3; + layoutBindings[3].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + layoutBindings[3].descriptorCount = 1; + layoutBindings[3].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + layoutBindings[3].pImmutableSamplers = NULL; + + layoutBindings[4].binding = 4; + layoutBindings[4].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + layoutBindings[4].descriptorCount = 1; + layoutBindings[4].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + layoutBindings[4].pImmutableSamplers = NULL; + } + + VkDescriptorSetLayoutBindingFlagsCreateInfo bindingFlagsInfo = {}; + bindingFlagsInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO; + bindingFlagsInfo.bindingCount = bindingCount; + std::vector bindingFlags(bindingCount); + bindingFlags[0] = VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT; + bindingFlagsInfo.pBindingFlags = bindingFlags.data(); + + VkDescriptorSetLayoutCreateInfo descriptor_layout = {}; + descriptor_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptor_layout.pNext = &bindingFlagsInfo; + descriptor_layout.bindingCount = bindingCount; + descriptor_layout.pBindings = layoutBindings; + + VkResult res = vkCreateDescriptorSetLayout(pDevice->GetDevice(), &descriptor_layout, NULL, &m_descriptorSetLayout); + assert(res == VK_SUCCESS); + } + + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + // The sampler we want to use, needs to match the SPD Reduction function in the shader + // linear sampler: + // -> AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3){return (v0+v1+v2+v3)*0.25;} + // point sampler: + // -> AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3){return v3;} + { + VkSamplerCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + info.magFilter = VK_FILTER_LINEAR; + info.minFilter = VK_FILTER_LINEAR; + info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + info.minLod = -1000; + info.maxLod = 1000; + info.maxAnisotropy = 1.0f; + VkResult res = vkCreateSampler(pDevice->GetDevice(), &info, NULL, &m_sampler); + assert(res == VK_SUCCESS); + } + } + + m_cubeTexture.InitFromFile(pDevice, pUploadHeap, "..\\media\\envmaps\\papermill\\specular.dds", true, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT); + pUploadHeap->FlushAndFinish(); + + // Create global atomic counter + { + VkBufferCreateInfo bufferInfo = {}; + bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bufferInfo.flags = 0; + bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + bufferInfo.queueFamilyIndexCount = 0; + bufferInfo.pQueueFamilyIndices = NULL; + bufferInfo.size = sizeof(int) * m_cubeTexture.GetArraySize(); // number of slices + bufferInfo.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + + VmaAllocationCreateInfo bufferAllocCreateInfo = {}; + bufferAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; + bufferAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT; + bufferAllocCreateInfo.pUserData = "SpdGlobalAtomicCounter"; + VmaAllocationInfo bufferAllocInfo = {}; + vmaCreateBuffer(m_pDevice->GetAllocator(), &bufferInfo, &bufferAllocCreateInfo, &m_globalCounter, + &m_globalCounterAllocation, &bufferAllocInfo); + + // initialize global atomic counter to 0 + uint32_t pCounter[6]; // one counter per slice + vmaMapMemory(m_pDevice->GetAllocator(), m_globalCounterAllocation, (void**)&pCounter); + for (uint32_t i = 0; i < m_cubeTexture.GetArraySize(); i++) + { + pCounter[i] = 0; + } + vmaUnmapMemory(m_pDevice->GetAllocator(), m_globalCounterAllocation); + } + + VkPipelineShaderStageCreateInfo computeShader; + DefineList defines; + + if (m_spdWaveOps == SPDWaveOps::SPDNoWaveOps) { + defines["SPD_NO_WAVE_OPERATIONS"] = 1; + } + if (m_spdPacked == SPDPacked::SPDPacked) { + defines["A_HALF"] = 1; + defines["SPD_PACKED_ONLY"] = 1; + } + + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + VkResult res = VKCompileFromFile(m_pDevice->GetDevice(), VK_SHADER_STAGE_COMPUTE_BIT, + "SPDIntegrationLinearSampler.hlsl", "main", "-T cs_6_0", &defines, &computeShader); + assert(res == VK_SUCCESS); + } + else { + VkResult res = VKCompileFromFile(m_pDevice->GetDevice(), VK_SHADER_STAGE_COMPUTE_BIT, + "SPDIntegration.hlsl", "main", "-T cs_6_0", &defines, &computeShader); + assert(res == VK_SUCCESS); + } + + // Create pipeline layout + // + VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = {}; + pPipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pPipelineLayoutCreateInfo.pNext = NULL; + + // push constants + VkPushConstantRange pushConstantRange = {}; + pushConstantRange.offset = 0; + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + pushConstantRange.size = sizeof(SpdLinearSamplerConstants); + } + else { + pushConstantRange.size = sizeof(SpdConstants); + } + pushConstantRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + pPipelineLayoutCreateInfo.pushConstantRangeCount = 1; + pPipelineLayoutCreateInfo.pPushConstantRanges = &pushConstantRange; + + pPipelineLayoutCreateInfo.setLayoutCount = 1; + pPipelineLayoutCreateInfo.pSetLayouts = &m_descriptorSetLayout; + + VkResult res = vkCreatePipelineLayout(pDevice->GetDevice(), &pPipelineLayoutCreateInfo, NULL, &m_pipelineLayout); + assert(res == VK_SUCCESS); + + // Create pipeline + // + VkComputePipelineCreateInfo pipeline = {}; + pipeline.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + pipeline.pNext = NULL; + pipeline.flags = 0; + pipeline.layout = m_pipelineLayout; + pipeline.stage = computeShader; + pipeline.basePipelineHandle = VK_NULL_HANDLE; + pipeline.basePipelineIndex = 0; + + res = vkCreateComputePipelines(pDevice->GetDevice(), pDevice->GetPipelineCache(), 1, &pipeline, NULL, &m_pipeline); + assert(res == VK_SUCCESS); + + m_pResourceViewHeaps->AllocDescriptor(m_descriptorSetLayout, &m_descriptorSet); + + // Create and initialize descriptor set for storage image + // std::vector desc_storage_images(SPD_MAX_MIP_LEVELS + 1); + + uint32_t numUAVs = m_cubeTexture.GetMipCount(); + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + // we need one UAV less because source texture will be bound as SRV and not as UAV + numUAVs = m_cubeTexture.GetMipCount() - 1; + } + + std::vector desc_storage_images(numUAVs); + for (uint32_t i = 0; i < numUAVs; i++) + { + // destination ----------- + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + // first UAV is MIP 1 + m_cubeTexture.CreateRTV(&m_UAV[i], i + 1); + } + else { + // first UAV is source texture, MIP 0 + m_cubeTexture.CreateRTV(&m_UAV[i], i); + } + + desc_storage_images[i] = {}; + desc_storage_images[i].sampler = VK_NULL_HANDLE; + desc_storage_images[i].imageView = m_UAV[i]; + desc_storage_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } + + // update descriptors + // SPD Load version + if (m_spdLoad == SPDLoad::SPDLoad) + { + VkWriteDescriptorSet writes[3]; + writes[0] = {}; + writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[0].pNext = NULL; + writes[0].dstSet = m_descriptorSet; + writes[0].descriptorCount = numUAVs; + writes[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + writes[0].pImageInfo = desc_storage_images.data(); + writes[0].dstBinding = 0; + writes[0].dstArrayElement = 0; + + writes[1] = {}; + writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[1].pNext = NULL; + writes[1].dstSet = m_descriptorSet; + writes[1].descriptorCount = 1; + writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + writes[1].pImageInfo = &desc_storage_images[6]; + writes[1].dstBinding = 1; + writes[1].dstArrayElement = 0; + + VkDescriptorBufferInfo desc_buffer = {}; + desc_buffer.buffer = m_globalCounter; + desc_buffer.offset = 0; + desc_buffer.range = sizeof(int) * m_cubeTexture.GetArraySize(); // number of slices + + writes[2] = {}; + writes[2].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[2].pNext = NULL; + writes[2].dstSet = m_descriptorSet; + writes[2].descriptorCount = 1; + writes[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + writes[2].pBufferInfo = &desc_buffer; + writes[2].dstBinding = 2; + writes[2].dstArrayElement = 0; + + vkUpdateDescriptorSets(m_pDevice->GetDevice(), 3, writes, 0, NULL); + } + + // update descriptors + // SPD Linear Sampler version + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + VkWriteDescriptorSet writes[5]; + writes[0] = {}; + writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[0].pNext = NULL; + writes[0].dstSet = m_descriptorSet; + writes[0].descriptorCount = numUAVs; + writes[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + writes[0].pImageInfo = desc_storage_images.data(); + writes[0].dstBinding = 0; + writes[0].dstArrayElement = 0; + + writes[1] = {}; + writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[1].pNext = NULL; + writes[1].dstSet = m_descriptorSet; + writes[1].descriptorCount = 1; + writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + writes[1].pImageInfo = &desc_storage_images[5]; + writes[1].dstBinding = 1; + writes[1].dstArrayElement = 0; + + VkDescriptorBufferInfo desc_buffer = {}; + desc_buffer.buffer = m_globalCounter; + desc_buffer.offset = 0; + desc_buffer.range = sizeof(int) * m_cubeTexture.GetArraySize(); // number of slices + + writes[2] = {}; + writes[2].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[2].pNext = NULL; + writes[2].dstSet = m_descriptorSet; + writes[2].descriptorCount = 1; + writes[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + writes[2].pBufferInfo = &desc_buffer; + writes[2].dstBinding = 2; + writes[2].dstArrayElement = 0; + + m_cubeTexture.CreateSRV(&m_sourceSRV, 0); + + VkDescriptorImageInfo desc_sampled_image = {}; + desc_sampled_image.sampler = VK_NULL_HANDLE; + desc_sampled_image.imageView = m_sourceSRV; + desc_sampled_image.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + writes[3] = {}; + writes[3].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[3].pNext = NULL; + writes[3].dstSet = m_descriptorSet; + writes[3].descriptorCount = 1; + writes[3].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + writes[3].pImageInfo = &desc_sampled_image; + writes[3].dstBinding = 3; + writes[3].dstArrayElement = 0; + + // Create and initialize descriptor set for sampler + VkDescriptorImageInfo desc_sampler = {}; + desc_sampler.sampler = m_sampler; + + writes[4] = {}; + writes[4].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writes[4].pNext = NULL; + writes[4].dstSet = m_descriptorSet; + writes[4].descriptorCount = 1; + writes[4].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + writes[4].pImageInfo = &desc_sampler; + writes[4].dstBinding = 4; + writes[4].dstArrayElement = 0; + + vkUpdateDescriptorSets(m_pDevice->GetDevice(), 5, writes, 0, NULL); + } + + for (uint32_t slice = 0; slice < m_cubeTexture.GetArraySize(); slice++) + { + for (uint32_t mip = 0; mip < m_cubeTexture.GetMipCount(); mip++) + { + VkImageViewUsageCreateInfo imageViewUsageInfo = {}; + imageViewUsageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO; + imageViewUsageInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT; + + VkImageViewCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + info.pNext = &imageViewUsageInfo; + info.image = m_cubeTexture.Resource(); + info.viewType = VK_IMAGE_VIEW_TYPE_2D; + info.subresourceRange.baseArrayLayer = slice; + info.subresourceRange.layerCount = 1; + + switch (m_cubeTexture.GetFormat()) + { + case VK_FORMAT_B8G8R8A8_UNORM: info.format = VK_FORMAT_B8G8R8A8_SRGB; break; + case VK_FORMAT_R8G8B8A8_UNORM: info.format = VK_FORMAT_R8G8B8A8_SRGB; break; + case VK_FORMAT_BC1_RGB_UNORM_BLOCK: info.format = VK_FORMAT_BC1_RGB_SRGB_BLOCK; break; + case VK_FORMAT_BC2_UNORM_BLOCK: info.format = VK_FORMAT_BC2_SRGB_BLOCK; break; + case VK_FORMAT_BC3_UNORM_BLOCK: info.format = VK_FORMAT_BC3_SRGB_BLOCK; break; + default: info.format = m_cubeTexture.GetFormat(); + } + + info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + info.subresourceRange.baseMipLevel = mip; + info.subresourceRange.levelCount = 1; + + VkResult res = vkCreateImageView(m_pDevice->GetDevice(), &info, NULL, + &m_SRV[slice * m_cubeTexture.GetMipCount() + mip]); + assert(res == VK_SUCCESS); + } + } + } + + void SPDCS::OnDestroy() + { + for (uint32_t i = 0; i < m_cubeTexture.GetMipCount() * m_cubeTexture.GetArraySize(); i++) + { + vkDestroyImageView(m_pDevice->GetDevice(), m_SRV[i], NULL); + } + + uint32_t numUAVs = m_cubeTexture.GetMipCount(); + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + // we needed one UAV less because source texture is bound as SRV and not as UAV + numUAVs = m_cubeTexture.GetMipCount() - 1; + + // also destroy SRV and sampler + vkDestroyImageView(m_pDevice->GetDevice(), m_sourceSRV, NULL); + vkDestroySampler(m_pDevice->GetDevice(), m_sampler, NULL); + } + + for (uint32_t i = 0; i < numUAVs; i++) + { + vkDestroyImageView(m_pDevice->GetDevice(), m_UAV[i], NULL); + } + + m_cubeTexture.OnDestroy(); + + m_pResourceViewHeaps->FreeDescriptor(m_descriptorSet); + + vmaDestroyBuffer(m_pDevice->GetAllocator(), m_globalCounter, m_globalCounterAllocation); + + vkDestroyPipeline(m_pDevice->GetDevice(), m_pipeline, nullptr); + vkDestroyPipelineLayout(m_pDevice->GetDevice(), m_pipelineLayout, nullptr); + vkDestroyDescriptorSetLayout(m_pDevice->GetDevice(), m_descriptorSetLayout, NULL); + } + + void SPDCS::Draw(VkCommandBuffer cmd_buf) + { + // downsample + // + varAU2(dispatchThreadGroupCountXY); + varAU2(workGroupOffset); // needed if Left and Top are not 0,0 + varAU2(numWorkGroupsAndMips); + varAU4(rectInfo) = initAU4(0, 0, m_cubeTexture.GetWidth(), m_cubeTexture.GetHeight()); // left, top, width, height + SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo); + + VkImageMemoryBarrier imageMemoryBarrier[2]; + + uint32_t numBarriers = 1; + imageMemoryBarrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageMemoryBarrier[0].pNext = NULL; + imageMemoryBarrier[0].srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + imageMemoryBarrier[0].dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + imageMemoryBarrier[0].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageMemoryBarrier[0].newLayout = VK_IMAGE_LAYOUT_GENERAL; + imageMemoryBarrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemoryBarrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemoryBarrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + imageMemoryBarrier[0].subresourceRange.baseMipLevel = 1; + imageMemoryBarrier[0].subresourceRange.levelCount = m_cubeTexture.GetMipCount() - 1; + } + else { + imageMemoryBarrier[0].subresourceRange.baseMipLevel = 0; + imageMemoryBarrier[0].subresourceRange.levelCount = m_cubeTexture.GetMipCount(); + } + imageMemoryBarrier[0].subresourceRange.baseArrayLayer = 0; + imageMemoryBarrier[0].subresourceRange.layerCount = m_cubeTexture.GetArraySize(); + imageMemoryBarrier[0].image = m_cubeTexture.Resource(); + + if (m_spdLoad == SPDLoad::SPDLinearSampler) { + numBarriers = 2; + imageMemoryBarrier[1].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageMemoryBarrier[1].pNext = NULL; + imageMemoryBarrier[1].srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + imageMemoryBarrier[1].dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + imageMemoryBarrier[1].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageMemoryBarrier[1].newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + imageMemoryBarrier[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemoryBarrier[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemoryBarrier[1].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imageMemoryBarrier[1].subresourceRange.baseMipLevel = 0; + imageMemoryBarrier[1].subresourceRange.levelCount = 1; + imageMemoryBarrier[1].subresourceRange.baseArrayLayer = 0; + imageMemoryBarrier[1].subresourceRange.layerCount = m_cubeTexture.GetArraySize(); + imageMemoryBarrier[1].image = m_cubeTexture.Resource(); + } + + // transition general layout + vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, nullptr, 0, nullptr, numBarriers, imageMemoryBarrier); + + SetPerfMarkerBegin(cmd_buf, "SPDCS"); + + // Bind Pipeline + // + vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipeline); + + // should be / 64 + uint32_t dispatchX = dispatchThreadGroupCountXY[0]; + uint32_t dispatchY = dispatchThreadGroupCountXY[1]; + uint32_t dispatchZ = m_cubeTexture.GetArraySize(); // slices + + // single pass for storage buffer? + //uint32_t uniformOffsets[1] = { (uint32_t)constantBuffer.offset }; + vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipelineLayout, 0, 1, &m_descriptorSet, 0, nullptr); + + // Bind push constants + // + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + SpdLinearSamplerConstants data; + data.numWorkGroupsPerSlice = numWorkGroupsAndMips[0]; + data.mips = numWorkGroupsAndMips[1]; + data.workGroupOffset[0] = workGroupOffset[0]; + data.workGroupOffset[1] = workGroupOffset[1]; + data.invInputSize[0] = 1.0f / m_cubeTexture.GetWidth(); + data.invInputSize[1] = 1.0f / m_cubeTexture.GetHeight(); + vkCmdPushConstants(cmd_buf, m_pipelineLayout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(SpdLinearSamplerConstants), (void*)&data); + } + else { + SpdConstants data; + data.numWorkGroupsPerSlice = numWorkGroupsAndMips[0]; + data.mips = numWorkGroupsAndMips[1]; + data.workGroupOffset[0] = workGroupOffset[0]; + data.workGroupOffset[1] = workGroupOffset[1]; + vkCmdPushConstants(cmd_buf, m_pipelineLayout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(SpdConstants), (void*)&data); + } + + // Draw + // + vkCmdDispatch(cmd_buf, dispatchX, dispatchY, dispatchZ); + + imageMemoryBarrier[0] = {}; + imageMemoryBarrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageMemoryBarrier[0].pNext = NULL; + imageMemoryBarrier[0].srcAccessMask = 0; + imageMemoryBarrier[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; + imageMemoryBarrier[0].oldLayout = VK_IMAGE_LAYOUT_GENERAL; + imageMemoryBarrier[0].newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + imageMemoryBarrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemoryBarrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemoryBarrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + if (m_spdLoad == SPDLoad::SPDLinearSampler) + { + imageMemoryBarrier[0].subresourceRange.baseMipLevel = 1; + imageMemoryBarrier[0].subresourceRange.levelCount = m_cubeTexture.GetMipCount() - 1; + } + else { + imageMemoryBarrier[0].subresourceRange.baseMipLevel = 0; + imageMemoryBarrier[0].subresourceRange.levelCount = m_cubeTexture.GetMipCount(); + } + imageMemoryBarrier[0].subresourceRange.baseArrayLayer = 0; + imageMemoryBarrier[0].subresourceRange.layerCount = m_cubeTexture.GetArraySize(); + imageMemoryBarrier[0].image = m_cubeTexture.Resource(); + + // transition general layout if detination image to shader read only for source image + vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, 0, nullptr, 0, nullptr, 1, imageMemoryBarrier); + + SetPerfMarkerEnd(cmd_buf); + } + + void SPDCS::GUI(int* pSlice) + { + bool opened = true; + std::string header = "Downsample"; + ImGui::Begin(header.c_str(), &opened); + + std::string downsampleHeader = "SPD CS"; + if (m_spdLoad == SPDLoad::SPDLoad) { + downsampleHeader += " Load"; + } + else { + downsampleHeader += " Linear Sampler"; + } + + if (m_spdWaveOps == SPDWaveOps::SPDWaveOps) + { + downsampleHeader += " WaveOps"; + } + else { + downsampleHeader += " No WaveOps"; + } + + if (m_spdPacked == SPDPacked::SPDNonPacked) + { + downsampleHeader += " Non Packed"; + } + else { + downsampleHeader += " Packed"; + } + + if (ImGui::CollapsingHeader(downsampleHeader.c_str(), ImGuiTreeNodeFlags_DefaultOpen)) + { + const char* sliceItemNames[] = + { + "Slice 0", + "Slice 1", + "Slice 2", + "Slice 3", + "Slice 4", + "Slice 5" + }; + ImGui::Combo("Slice of Cube Texture", pSlice, sliceItemNames, _countof(sliceItemNames)); + + for (uint32_t i = 0; i < m_cubeTexture.GetMipCount(); i++) + { + ImGui::Image((ImTextureID)m_SRV[*pSlice * m_cubeTexture.GetMipCount() + i], ImVec2(static_cast(512 >> i), static_cast(512 >> i))); + } + } + + ImGui::End(); + } +} \ No newline at end of file diff --git a/sample/src/VK/SPDCS.h b/sample/src/VK/SPDCS.h new file mode 100644 index 0000000..2420f14 --- /dev/null +++ b/sample/src/VK/SPDCS.h @@ -0,0 +1,100 @@ +// SPDSample +// +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +#pragma once + +#include "Base/StaticBufferPool.h" +#include "Base/Texture.h" +#include "Base/DynamicBufferRing.h" + +namespace CAULDRON_VK +{ +#define SPD_MAX_MIP_LEVELS 12 + + enum class SPDWaveOps + { + SPDNoWaveOps, + SPDWaveOps, + }; + + enum class SPDPacked + { + SPDNonPacked, + SPDPacked, + }; + + enum class SPDLoad + { + SPDLoad, + SPDLinearSampler, + }; + + class SPDCS + { + public: + void OnCreate(Device *pDevice, UploadHeap *pUploadHeap, ResourceViewHeaps *pResourceViewHeaps, + SPDLoad spdLoad, SPDWaveOps spdWaveOps, SPDPacked spdPacked); + void OnDestroy(); + + void Draw(VkCommandBuffer cmd_buf); + Texture *GetTexture() { return &m_cubeTexture; } + void GUI(int* pSlice); + + struct SpdConstants + { + int mips; + int numWorkGroupsPerSlice; + int workGroupOffset[2]; + }; + + struct SpdLinearSamplerConstants + { + int mips; + int numWorkGroupsPerSlice; + int workGroupOffset[2]; + float invInputSize[2]; + float padding[2]; + }; + + private: + Device *m_pDevice = nullptr; + + Texture m_cubeTexture; + + VkImageView m_UAV[SPD_MAX_MIP_LEVELS + 1] = {}; // source + destinations (mips) + VkImageView m_SRV[SPD_MAX_MIP_LEVELS * 6] = {}; // for display of MIPS using imGUI + VkImageView m_sourceSRV = VK_NULL_HANDLE; // source when linear sampler is used + VkSampler m_sampler = VK_NULL_HANDLE; // linear sampler + VkDescriptorSet m_descriptorSet = VK_NULL_HANDLE; + + ResourceViewHeaps *m_pResourceViewHeaps = nullptr; + DynamicBufferRing *m_pConstantBufferRing = nullptr; + + VkDescriptorSetLayout m_descriptorSetLayout = VK_NULL_HANDLE; + + VkPipelineLayout m_pipelineLayout = VK_NULL_HANDLE; + VkPipeline m_pipeline = VK_NULL_HANDLE; + + VkBuffer m_globalCounter = VK_NULL_HANDLE; + VmaAllocation m_globalCounterAllocation; + + SPDLoad m_spdLoad; + SPDWaveOps m_spdWaveOps; + SPDPacked m_spdPacked; + }; +} \ No newline at end of file diff --git a/sample/src/VK/SPD_Integration_Linear_Sampler.glsl b/sample/src/VK/SPDIntegration.glsl similarity index 50% rename from sample/src/VK/SPD_Integration_Linear_Sampler.glsl rename to sample/src/VK/SPDIntegration.glsl index 379c7fa..7768032 100644 --- a/sample/src/VK/SPD_Integration_Linear_Sampler.glsl +++ b/sample/src/VK/SPDIntegration.glsl @@ -29,93 +29,142 @@ layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; //-------------------------------------------------------------------------------------- // Push Constants //-------------------------------------------------------------------------------------- -layout(push_constant) uniform pushConstants { +layout(push_constant) uniform SpdConstants +{ uint mips; uint numWorkGroups; - // [SAMPLER] - vec2 invInputSize; + ivec2 workGroupOffset; } spdConstants; //-------------------------------------------------------------------------------------- // Texture definitions //-------------------------------------------------------------------------------------- -//layout(set=0, binding=0, rgba16f) uniform image2D imgSrc; -// [SAMPLER] image2D -> texture2D -layout(set=0, binding=0) uniform texture2D imgSrc; -layout(set=0, binding=1, rgba16f) coherent uniform image2D imgDst[12]; -// [SAMPLER] add sampler -layout(set=0, binding=3) uniform sampler srcSampler; +layout(set=0, binding=0, rgba16f) uniform image2DArray imgDst[13]; // don't access mip [6] +layout(set=0, binding=1, rgba16f) coherent uniform image2DArray imgDst6; + //-------------------------------------------------------------------------------------- // Buffer definitions - global atomic counter //-------------------------------------------------------------------------------------- -layout(std430, binding=2) coherent buffer globalAtomicBuffer +layout(std430, binding=2) coherent buffer spdGlobalAtomicBuffer { - uint counter; -} globalAtomic; + uint counter[6]; +} spdGlobalAtomic; #define A_GPU #define A_GLSL #include "ffx_a.h" -shared AU1 spd_counter; +shared AU1 spdCounter; // define fetch and store functions Non-Packed #ifndef SPD_PACKED_ONLY -shared AF1 spd_intermediateR[16][16]; -shared AF1 spd_intermediateG[16][16]; -shared AF1 spd_intermediateB[16][16]; -shared AF1 spd_intermediateA[16][16]; -//AF4 SPDLoadSourceImage(ASU2 p){return imageLoad(imgSrc, p);} -// [SAMPLER] use sampler for accessing source image -AF4 SpdLoadSourceImage(ASU2 p){ - AF2 textureCoord = p * spdConstants.invInputSize + spdConstants.invInputSize; - return texture(sampler2D(imgSrc, srcSampler), textureCoord); +shared AF1 spdIntermediateR[16][16]; +shared AF1 spdIntermediateG[16][16]; +shared AF1 spdIntermediateB[16][16]; +shared AF1 spdIntermediateA[16][16]; + +AF4 SpdLoadSourceImage(ASU2 p, AU1 slice) +{ + return imageLoad(imgDst[0], ivec3(p,slice)); +} +AF4 SpdLoad(ASU2 p, AU1 slice) +{ + return imageLoad(imgDst6,ivec3(p,slice)); +} +void SpdStore(ASU2 p, AF4 value, AU1 mip, AU1 slice) +{ + if (mip == 5) + { + imageStore(imgDst6, ivec3(p,slice), value); + return; } -AF4 SpdLoad(ASU2 p){return imageLoad(imgDst[5],p);} -void SpdStore(ASU2 p, AF4 value, AU1 mip){imageStore(imgDst[mip], p, value);} -void SpdIncreaseAtomicCounter(){spd_counter = atomicAdd(globalAtomic.counter, 1);} -AU1 SpdGetAtomicCounter(){return spd_counter;} -AF4 SpdLoadIntermediate(AU1 x, AU1 y){ + imageStore(imgDst[mip+1], ivec3(p,slice), value); +} +void SpdIncreaseAtomicCounter(AU1 slice) +{ + spdCounter = atomicAdd(spdGlobalAtomic.counter[slice], 1); +} +AU1 SpdGetAtomicCounter() +{ + return spdCounter; +} +void SpdResetAtomicCounter(AU1 slice) +{ + spdGlobalAtomic.counter[slice] = 0; +} +AF4 SpdLoadIntermediate(AU1 x, AU1 y) +{ return AF4( - spd_intermediateR[x][y], - spd_intermediateG[x][y], - spd_intermediateB[x][y], - spd_intermediateA[x][y]);} -void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value){ - spd_intermediateR[x][y] = value.x; - spd_intermediateG[x][y] = value.y; - spd_intermediateB[x][y] = value.z; - spd_intermediateA[x][y] = value.w;} -AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3){return (v0+v1+v2+v3)*0.25;} + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} +void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} +AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3) +{ + return (v0+v1+v2+v3)*0.25; +} #endif // define fetch and store functions Packed #ifdef A_HALF -shared AH2 spd_intermediateRG[16][16]; -shared AH2 spd_intermediateBA[16][16]; -AH4 SpdLoadSourceImageH(ASU2 p){ - AF2 textureCoord = p * spdConstants.invInputSize + spdConstants.invInputSize; - return AH4(texture(sampler2D(imgSrc, srcSampler), textureCoord)); +shared AH2 spdIntermediateRG[16][16]; +shared AH2 spdIntermediateBA[16][16]; + +AH4 SpdLoadSourceImageH(ASU2 p, AU1 slice) +{ + return AH4(imageLoad(imgDst[0], ivec3(p,slice))); +} +AH4 SpdLoadH(ASU2 p, AU1 slice) +{ + return AH4(imageLoad(imgDst6, ivec3(p,slice))); +} +void SpdStoreH(ASU2 p, AH4 value, AU1 mip, AU1 slice) +{ + if (mip == 5) + { + imageStore(imgDst6, ivec3(p,slice), AF4(value)); + return; } -AH4 SpdLoadH(ASU2 p){return AH4(imageLoad(imgDst[5],p));} -void SpdStoreH(ASU2 p, AH4 value, AU1 mip){imageStore(imgDst[mip], p, AF4(value));} -void SpdIncreaseAtomicCounter(){spd_counter = atomicAdd(globalAtomic.counter, 1);} -AU1 SpdGetAtomicCounter(){return spd_counter;} -AH4 SpdLoadIntermediateH(AU1 x, AU1 y){ + imageStore(imgDst[mip+1], ivec3(p,slice), AF4(value)); +} +void SpdIncreaseAtomicCounter(AU1 slice) +{ + spdCounter = atomicAdd(spdGlobalAtomic.counter[slice], 1); +} +AU1 SpdGetAtomicCounter() +{ + return spdCounter; +} +void SpdResetAtomicCounter(AU1 slice) +{ + spdGlobalAtomic.counter[slice] = 0; +} +AH4 SpdLoadIntermediateH(AU1 x, AU1 y) +{ return AH4( - spd_intermediateRG[x][y].x, - spd_intermediateRG[x][y].y, - spd_intermediateBA[x][y].x, - spd_intermediateBA[x][y].y);} + spdIntermediateRG[x][y].x, + spdIntermediateRG[x][y].y, + spdIntermediateBA[x][y].x, + spdIntermediateBA[x][y].y);} void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value){ - spd_intermediateRG[x][y] = value.xy; - spd_intermediateBA[x][y] = value.zw;} -AH4 SpdReduce4H(AH4 v0, AH4 v1, AH4 v2, AH4 v3){return (v0+v1+v2+v3)*AH1(0.25f);} + spdIntermediateRG[x][y] = value.xy; + spdIntermediateBA[x][y] = value.zw; +} +AH4 SpdReduce4H(AH4 v0, AH4 v1, AH4 v2, AH4 v3) +{ + return (v0+v1+v2+v3)*AH1(0.25f); +} #endif -#define SPD_LINEAR_SAMPLER - #include "ffx_spd.h" // Main function @@ -128,12 +177,16 @@ void main() AU2(gl_WorkGroupID.xy), AU1(gl_LocalInvocationIndex), AU1(spdConstants.mips), - AU1(spdConstants.numWorkGroups)); + AU1(spdConstants.numWorkGroups), + AU1(gl_WorkGroupID.z), + AU2(spdConstants.workGroupOffset)); #else SpdDownsampleH( AU2(gl_WorkGroupID.xy), AU1(gl_LocalInvocationIndex), AU1(spdConstants.mips), - AU1(spdConstants.numWorkGroups)); + AU1(spdConstants.numWorkGroups), + AU1(gl_WorkGroupID.z), + AU2(spdConstants.workGroupOffset)); #endif } \ No newline at end of file diff --git a/sample/src/VK/SPDIntegration.hlsl b/sample/src/VK/SPDIntegration.hlsl new file mode 100644 index 0000000..543dc94 --- /dev/null +++ b/sample/src/VK/SPDIntegration.hlsl @@ -0,0 +1,189 @@ +// SPDSample +// +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//-------------------------------------------------------------------------------------- +// Push Constants +//-------------------------------------------------------------------------------------- +struct SpdConstants +{ + uint mips; + uint numWorkGroups; + uint2 workGroupOffset; +}; + +[[vk::push_constant]] +ConstantBuffer spdConstants; +//-------------------------------------------------------------------------------------- +// Texture definitions +//-------------------------------------------------------------------------------------- +[[vk::binding(0)]] RWTexture2DArray imgDst[13] :register(u0); // don't access mip [6] +[[vk::binding(1)]] globallycoherent RWTexture2DArray imgDst6 :register(u1); + +//-------------------------------------------------------------------------------------- +// Buffer definitions - global atomic counter +//-------------------------------------------------------------------------------------- +struct SpdGlobalAtomicBuffer +{ + uint counter[6]; +}; +[[vk::binding(2)]] globallycoherent RWStructuredBuffer spdGlobalAtomic; + +#define A_GPU +#define A_HLSL + +#include "ffx_a.h" + +groupshared AU1 spdCounter; + +// define fetch and store functions +#ifndef SPD_PACKED_ONLY +groupshared AF1 spdIntermediateR[16][16]; +groupshared AF1 spdIntermediateG[16][16]; +groupshared AF1 spdIntermediateB[16][16]; +groupshared AF1 spdIntermediateA[16][16]; + +AF4 SpdLoadSourceImage(ASU2 tex, AU1 slice) +{ + return imgDst[0][int3(tex,slice)]; +} +AF4 SpdLoad(ASU2 tex, AU1 slice) +{ + return imgDst6[int3(tex,slice)]; +} +void SpdStore(ASU2 pix, AF4 outValue, AU1 index, AU1 slice) +{ + if (index == 5) + { + imgDst6[int3(pix, slice)] = outValue; + return; + } + imgDst[index + 1][int3(pix, slice)] = outValue; +} +void SpdIncreaseAtomicCounter(AU1 slice) +{ + InterlockedAdd(spdGlobalAtomic[0].counter[slice], 1, spdCounter); +} +AU1 SpdGetAtomicCounter() +{ + return spdCounter; +} +void SpdResetAtomicCounter(AU1 slice) +{ + spdGlobalAtomic[0].counter[slice] = 0; +} +AF4 SpdLoadIntermediate(AU1 x, AU1 y) +{ + return AF4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} +void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} +AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3) +{ + return (v0+v1+v2+v3)*0.25; +} +#endif + +// define fetch and store functions Packed +#ifdef A_HALF +groupshared AH2 spdIntermediateRG[16][16]; +groupshared AH2 spdIntermediateBA[16][16]; + +AH4 SpdLoadSourceImageH(ASU2 tex, AU1 slice) +{ + return AH4(imgDst[0][int3(tex,slice)]); +} +AH4 SpdLoadH(ASU2 p, AU1 slice) +{ + return AH4(imgDst6[int3(p,slice)]); +} +void SpdStoreH(ASU2 p, AH4 value, AU1 mip, AU1 slice) +{ + if (mip == 5) + { + imgDst6[int3(p, slice)] = AF4(value); + return; + } + imgDst[mip + 1][int3(p, slice)] = AF4(value); +} +void SpdIncreaseAtomicCounter(AU1 slice) +{ + InterlockedAdd(spdGlobalAtomic[0].counter[slice], 1, spdCounter); +} +AU1 SpdGetAtomicCounter() +{ + return spdCounter; +} +void SpdResetAtomicCounter(AU1 slice) +{ + spdGlobalAtomic[0].counter[slice] = 0; +} +AH4 SpdLoadIntermediateH(AU1 x, AU1 y) +{ + return AH4( + spdIntermediateRG[x][y].x, + spdIntermediateRG[x][y].y, + spdIntermediateBA[x][y].x, + spdIntermediateBA[x][y].y); +} +void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value) +{ + spdIntermediateRG[x][y] = value.xy; + spdIntermediateBA[x][y] = value.zw; +} +AH4 SpdReduce4H(AH4 v0, AH4 v1, AH4 v2, AH4 v3) +{ + return (v0+v1+v2+v3)*AH1(0.25); +} +#endif + +#include "ffx_spd.h" + +// Main function +//-------------------------------------------------------------------------------------- +//-------------------------------------------------------------------------------------- +[numthreads(256,1,1)] +void main(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) +{ +#ifndef A_HALF + SpdDownsample( + AU2(WorkGroupId.xy), + AU1(LocalThreadIndex), + AU1(spdConstants.mips), + AU1(spdConstants.numWorkGroups), + AU1(WorkGroupId.z), + AU2(spdConstants.workGroupOffset)); +#else + SpdDownsampleH( + AU2(WorkGroupId.xy), + AU1(LocalThreadIndex), + AU1(spdConstants.mips), + AU1(spdConstants.numWorkGroups), + AU1(WorkGroupId.z), + AU2(spdConstants.workGroupOffset)); +#endif +} \ No newline at end of file diff --git a/sample/src/VK/SPDIntegrationLinearSampler.glsl b/sample/src/VK/SPDIntegrationLinearSampler.glsl new file mode 100644 index 0000000..97c5b1e --- /dev/null +++ b/sample/src/VK/SPDIntegrationLinearSampler.glsl @@ -0,0 +1,200 @@ +#version 450 +#extension GL_GOOGLE_include_directive : enable +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable +#extension GL_ARB_compute_shader : enable +#extension GL_ARB_shader_group_vote : enable + +// SPDSample +// +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; + +//-------------------------------------------------------------------------------------- +// Push Constants +//-------------------------------------------------------------------------------------- +layout(push_constant) uniform SpdConstants +{ + uint mips; + uint numWorkGroups; + ivec2 workGroupOffset; + vec2 invInputSize; +} spdConstants; + +//-------------------------------------------------------------------------------------- +// Texture definitions +//-------------------------------------------------------------------------------------- +layout(set=0, binding=3) uniform texture2DArray imgSrc; +layout(set=0, binding=0, rgba16f) uniform image2DArray imgDst[12]; // don't access MIP [5] +layout(set=0, binding=1, rgba16f) coherent uniform image2DArray imgDst5; +layout(set=0, binding=4) uniform sampler srcSampler; +//-------------------------------------------------------------------------------------- +// Buffer definitions - global atomic counter +//-------------------------------------------------------------------------------------- +layout(std430, binding=2) coherent buffer SpdGlobalAtomicBuffer +{ + uint counter[6]; +} spdGlobalAtomic; + +#define A_GPU +#define A_GLSL + +#include "ffx_a.h" + +shared AU1 spdCounter; + +// define fetch and store functions Non-Packed +#ifndef SPD_PACKED_ONLY +shared AF1 spdIntermediateR[16][16]; +shared AF1 spdIntermediateG[16][16]; +shared AF1 spdIntermediateB[16][16]; +shared AF1 spdIntermediateA[16][16]; + +AF4 SpdLoadSourceImage(ASU2 p, AU1 slice) +{ + AF2 textureCoord = p * spdConstants.invInputSize + spdConstants.invInputSize; + return texture(sampler2DArray(imgSrc, srcSampler), vec3(textureCoord,slice)); +} +AF4 SpdLoad(ASU2 p, AU1 slice) +{ + return imageLoad(imgDst5,ivec3(p,slice)); + } +void SpdStore(ASU2 p, AF4 value, AU1 mip, AU1 slice) +{ + if (mip == 5) + { + imageStore(imgDst5, ivec3(p,slice), value); + return; + } + imageStore(imgDst[mip], ivec3(p,slice), value); +} +void SpdIncreaseAtomicCounter(AU1 slice) +{ + spdCounter = atomicAdd(spdGlobalAtomic.counter[slice], 1); +} +AU1 SpdGetAtomicCounter() +{ + return spdCounter; +} +void SpdResetAtomicCounter(AU1 slice) +{ + spdGlobalAtomic.counter[slice] = 0; +} +AF4 SpdLoadIntermediate(AU1 x, AU1 y) +{ + return AF4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} +void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} +AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3) +{ + return (v0+v1+v2+v3)*0.25; +} +#endif + +// define fetch and store functions Packed +#ifdef A_HALF +shared AH2 spdIntermediateRG[16][16]; +shared AH2 spdIntermediateBA[16][16]; + +AH4 SpdLoadSourceImageH(ASU2 p, AU1 slice) +{ + AF2 textureCoord = p * spdConstants.invInputSize + spdConstants.invInputSize; + return AH4(texture(sampler2DArray(imgSrc, srcSampler), vec3(textureCoord, slice))); +} +AH4 SpdLoadH(ASU2 p, AU1 slice) +{ + return AH4(imageLoad(imgDst5,ivec3(p, slice))); +} +void SpdStoreH(ASU2 p, AH4 value, AU1 mip, AU1 slice) +{ + if (mip == 5) + { + imageStore(imgDst5, ivec3(p,slice), AF4(value)); + return; + } + imageStore(imgDst[mip], ivec3(p,slice), AF4(value)); +} +void SpdIncreaseAtomicCounter(AU1 slice) +{ + spdCounter = atomicAdd(spdGlobalAtomic.counter[slice], 1); +} +AU1 SpdGetAtomicCounter() +{ + return spdCounter; +} +void SpdResetAtomicCounter(AU1 slice) +{ + spdGlobalAtomic.counter[slice] = 0; +} +AH4 SpdLoadIntermediateH(AU1 x, AU1 y) +{ + return AH4( + spdIntermediateRG[x][y].x, + spdIntermediateRG[x][y].y, + spdIntermediateBA[x][y].x, + spdIntermediateBA[x][y].y); +} +void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value) +{ + spdIntermediateRG[x][y] = value.xy; + spdIntermediateBA[x][y] = value.zw; +} +AH4 SpdReduce4H(AH4 v0, AH4 v1, AH4 v2, AH4 v3) +{ + return (v0+v1+v2+v3)*AH1(0.25f); +} +#endif + +#define SPD_LINEAR_SAMPLER + +#include "ffx_spd.h" + +// Main function +//-------------------------------------------------------------------------------------- +//-------------------------------------------------------------------------------------- +void main() +{ +#ifndef A_HALF + SpdDownsample( + AU2(gl_WorkGroupID.xy), + AU1(gl_LocalInvocationIndex), + AU1(spdConstants.mips), + AU1(spdConstants.numWorkGroups), + AU1(gl_WorkGroupID.z), + AU2(spdConstants.workGroupOffset)); +#else + SpdDownsampleH( + AU2(gl_WorkGroupID.xy), + AU1(gl_LocalInvocationIndex), + AU1(spdConstants.mips), + AU1(spdConstants.numWorkGroups), + AU1(gl_WorkGroupID.z), + AU2(spdConstants.workGroupOffset)); +#endif +} \ No newline at end of file diff --git a/sample/src/VK/SPDIntegrationLinearSampler.hlsl b/sample/src/VK/SPDIntegrationLinearSampler.hlsl new file mode 100644 index 0000000..f2ce10d --- /dev/null +++ b/sample/src/VK/SPDIntegrationLinearSampler.hlsl @@ -0,0 +1,196 @@ +// SPDSample +// +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//-------------------------------------------------------------------------------------- +// Push Constants +//-------------------------------------------------------------------------------------- +struct SpdConstants +{ + uint mips; + uint numWorkGroups; + uint2 workGroupOffset; + float2 invInputSize; +}; + +[[vk::push_constant]] +ConstantBuffer spdConstants; +//-------------------------------------------------------------------------------------- +// Texture definitions +//-------------------------------------------------------------------------------------- +[[vk::binding(3)]] Texture2DArray imgSrc :register(t0); +[[vk::binding(0)]] RWTexture2DArray imgDst[12] :register(u1); // don't access mip [5] +[[vk::binding(1)]] globallycoherent RWTexture2DArray imgDst5 :register(u1); +[[vk::binding(4)]] SamplerState srcSampler :register(s0); + +//-------------------------------------------------------------------------------------- +// Buffer definitions - global atomic counter +//-------------------------------------------------------------------------------------- +struct SpdGlobalAtomicBuffer +{ + uint counter[6]; +}; +[[vk::binding(2)]] globallycoherent RWStructuredBuffer spdGlobalAtomic; + +#define A_GPU +#define A_HLSL + +#include "ffx_a.h" + +groupshared AU1 spdCounter; + +// define fetch and store functions +#ifndef SPD_PACKED_ONLY +groupshared AF1 spdIntermediateR[16][16]; +groupshared AF1 spdIntermediateG[16][16]; +groupshared AF1 spdIntermediateB[16][16]; +groupshared AF1 spdIntermediateA[16][16]; + +AF4 SpdLoadSourceImage(ASU2 p, AU1 slice) +{ + AF2 textureCoord = p * spdConstants.invInputSize + spdConstants.invInputSize; + return imgSrc.SampleLevel(srcSampler, float3(textureCoord, slice), 0); +} +AF4 SpdLoad(ASU2 tex, AU1 slice) +{ + return imgDst5[int3(tex, slice)]; +} +void SpdStore(ASU2 pix, AF4 outValue, AU1 index, AU1 slice) +{ + if (index == 5) + { + imgDst5[int3(pix, slice)] = outValue; + return; + } + imgDst[index][int3(pix, slice)] = outValue; +} +void SpdIncreaseAtomicCounter(AU1 slice) +{ + InterlockedAdd(spdGlobalAtomic[0].counter[slice], 1, spdCounter); +} +AU1 SpdGetAtomicCounter() +{ + return spdCounter; +} +void SpdResetAtomicCounter(AU1 slice) +{ + spdGlobalAtomic[0].counter[slice] = 0; +} +AF4 SpdLoadIntermediate(AU1 x, AU1 y) +{ + return AF4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} +void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} +AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3) +{ + return (v0+v1+v2+v3)*0.25; +} +#endif + +// define fetch and store functions Packed +#ifdef A_HALF +groupshared AH2 spdIntermediateRG[16][16]; +groupshared AH2 spdIntermediateBA[16][16]; + +AH4 SpdLoadSourceImageH(ASU2 p, AU1 slice) +{ + AF2 textureCoord = p * spdConstants.invInputSize + spdConstants.invInputSize; + return AH4(imgSrc.SampleLevel(srcSampler, float3(textureCoord, slice), 0)); +} +AH4 SpdLoadH(ASU2 p, AU1 slice) +{ + return AH4(imgDst5[int3(p, slice)]); +} +void SpdStoreH(ASU2 p, AH4 value, AU1 mip, AU1 slice) +{ + if (mip == 5) + { + imgDst5[int3(p, slice)] = AF4(value); + return; + } + imgDst[mip][int3(p, slice)] = AF4(value); +} +void SpdIncreaseAtomicCounter(AU1 slice) +{ + InterlockedAdd(spdGlobalAtomic[0].counter[slice], 1, spdCounter); +} +AU1 SpdGetAtomicCounter() +{ + return spdCounter; +} +void SpdResetAtomicCounter(AU1 slice) +{ + spdGlobalAtomic[0].counter[slice] = 0; +} +AH4 SpdLoadIntermediateH(AU1 x, AU1 y) +{ + return AH4( + spdIntermediateRG[x][y].x, + spdIntermediateRG[x][y].y, + spdIntermediateBA[x][y].x, + spdIntermediateBA[x][y].y); +} +void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value) +{ + spdIntermediateRG[x][y] = value.xy; + spdIntermediateBA[x][y] = value.zw; +} +AH4 SpdReduce4H(AH4 v0, AH4 v1, AH4 v2, AH4 v3) +{ + return (v0+v1+v2+v3)*AH1(0.25); +} +#endif + +#define SPD_LINEAR_SAMPLER + +#include "ffx_spd.h" + +// Main function +//-------------------------------------------------------------------------------------- +//-------------------------------------------------------------------------------------- +[numthreads(256,1,1)] +void main(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) +{ +#ifndef A_HALF + SpdDownsample( + AU2(WorkGroupId.xy), + AU1(LocalThreadIndex), + AU1(spdConstants.mips), + AU1(spdConstants.numWorkGroups), + AU1(WorkGroupId.z), + AU2(spdConstants.workGroupOffset)); +#else + SpdDownsampleH( + AU2(WorkGroupId.xy), + AU1(LocalThreadIndex), + AU1(spdConstants.mips), + AU1(spdConstants.numWorkGroups), + AU1(WorkGroupId.z), + AU2(spdConstants.workGroupOffset)); +#endif +} \ No newline at end of file diff --git a/sample/src/VK/SPD_Renderer.cpp b/sample/src/VK/SPDRenderer.cpp similarity index 66% rename from sample/src/VK/SPD_Renderer.cpp rename to sample/src/VK/SPDRenderer.cpp index b5c3759..39cd352 100644 --- a/sample/src/VK/SPD_Renderer.cpp +++ b/sample/src/VK/SPDRenderer.cpp @@ -19,19 +19,23 @@ #include "stdafx.h" -#include "SPD_Renderer.h" +#include "SPDRenderer.h" //-------------------------------------------------------------------------------------- // // OnCreate // //-------------------------------------------------------------------------------------- -void SPD_Renderer::OnCreate(Device *pDevice, SwapChain *pSwapChain) +void SPDRenderer::OnCreate(Device *pDevice, SwapChain *pSwapChain, bool usingDescriptorIndexing) { - m_Format = VK_FORMAT_R16G16B16A16_SFLOAT; - m_pDevice = pDevice; + // we set this as requirement for SPD + // technically it's not, but it requires some integration side changes + // in case the resolution of the source texture is of variable size + // and the # of output MIPS is set to 12 by default regardless of source texture resolution + m_usingDescriptorIndexing = usingDescriptorIndexing; + // Initialize helpers // Create all the heaps for the resources views @@ -43,17 +47,17 @@ void SPD_Renderer::OnCreate(Device *pDevice, SwapChain *pSwapChain) // Create a commandlist ring for the Direct queue uint32_t commandListsPerBackBuffer = 8; - m_CommandListRing.OnCreate(pDevice, backBufferCount, commandListsPerBackBuffer); + m_commandListRing.OnCreate(pDevice, backBufferCount, commandListsPerBackBuffer); // Create a 'dynamic' constant buffer const uint32_t constantBuffersMemSize = 20 * 1024 * 1024; - m_ConstantBufferRing.OnCreate(pDevice, backBufferCount, constantBuffersMemSize, "Uniforms"); + m_constantBufferRing.OnCreate(pDevice, backBufferCount, constantBuffersMemSize, "Uniforms"); // Create a 'static' pool for vertices and indices - const uint32_t staticGeometryMemSize = 128 * 1024 * 1024; + const uint32_t staticGeometryMemSize = ( 2 * 128) * 1024 * 1024; const uint32_t systemGeometryMemSize = 32 * 1024; - m_VidMemBufferPool.OnCreate(pDevice, staticGeometryMemSize, USE_VID_MEM, "StaticGeom"); - m_SysMemBufferPool.OnCreate(pDevice, systemGeometryMemSize, false, "PostProcGeom"); + m_vidMemBufferPool.OnCreate(pDevice, staticGeometryMemSize, USE_VID_MEM, "StaticGeom"); + m_sysMemBufferPool.OnCreate(pDevice, systemGeometryMemSize, false, "PostProcGeom"); // initialize the GPU time stamps module m_GPUTimer.OnCreate(pDevice, backBufferCount); @@ -61,20 +65,19 @@ void SPD_Renderer::OnCreate(Device *pDevice, SwapChain *pSwapChain) // Quick helper to upload resources, it has it's own commandList and uses suballocation. // for 4K textures we'll need 100Megs const uint32_t uploadHeapMemSize = 1000 * 1024 * 1024; - m_UploadHeap.OnCreate(pDevice, staticGeometryMemSize); // initialize an upload heap (uses suballocation for faster results) + m_uploadHeap.OnCreate(pDevice, uploadHeapMemSize); // initialize an upload heap (uses suballocation for faster results) // Create a 2Kx2K Shadowmap atlas to hold 4 cascades/spotlights m_shadowMap.InitDepthStencil(m_pDevice, 2 * 1024, 2 * 1024, VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, "ShadowMap"); m_shadowMap.CreateSRV(&m_shadowMapSRV); m_shadowMap.CreateDSV(&m_shadowMapDSV); - // Create render pass shadow + // Create render pass shadow, will clear contents // { - /* Need attachments for render target and depth buffer */ VkAttachmentDescription depthAttachments; AttachClearBeforeUse(m_shadowMap.GetFormat(), VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &depthAttachments); - m_render_pass_shadow = CreateRenderPassOptimal(m_pDevice->GetDevice(), 0, NULL, &depthAttachments); + m_renderPassShadow = CreateRenderPassOptimal(m_pDevice->GetDevice(), 0, NULL, &depthAttachments); // Create frame buffer, its size is now window dependant so we can do this here. // @@ -82,13 +85,13 @@ void SPD_Renderer::OnCreate(Device *pDevice, SwapChain *pSwapChain) VkFramebufferCreateInfo fb_info = {}; fb_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; fb_info.pNext = NULL; - fb_info.renderPass = m_render_pass_shadow; + fb_info.renderPass = m_renderPassShadow; fb_info.attachmentCount = 1; fb_info.pAttachments = attachmentViews; fb_info.width = m_shadowMap.GetWidth(); fb_info.height = m_shadowMap.GetHeight(); fb_info.layers = 1; - VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &fb_info, NULL, &m_pFrameBuffer_shadow); + VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &fb_info, NULL, &m_frameBufferShadow); assert(res == VK_SUCCESS); } @@ -96,62 +99,42 @@ void SPD_Renderer::OnCreate(Device *pDevice, SwapChain *pSwapChain) // { VkAttachmentDescription colorAttachment, depthAttachment; - AttachClearBeforeUse(m_Format, VK_SAMPLE_COUNT_4_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, &colorAttachment); + AttachClearBeforeUse(VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_4_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, &colorAttachment); AttachClearBeforeUse(VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_4_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, &depthAttachment); - m_render_pass_HDR_MSAA = CreateRenderPassOptimal(m_pDevice->GetDevice(), 1, &colorAttachment, &depthAttachment); + m_renderPassHDRMSAA = CreateRenderPassOptimal(m_pDevice->GetDevice(), 1, &colorAttachment, &depthAttachment); } // Create HDR render pass, for the GUI // { VkAttachmentDescription colorAttachment; - AttachBlending(m_Format, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &colorAttachment); - m_render_pass_PBR_HDR = CreateRenderPassOptimal(m_pDevice->GetDevice(), 1, &colorAttachment, NULL); + AttachBlending(VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &colorAttachment); + m_renderPassPBRHDR = CreateRenderPassOptimal(m_pDevice->GetDevice(), 1, &colorAttachment, NULL); } - m_skyDome.OnCreate(pDevice, m_render_pass_HDR_MSAA, &m_UploadHeap, m_Format, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, "..\\media\\envmaps\\papermill\\diffuse.dds", "..\\media\\envmaps\\papermill\\specular.dds", VK_SAMPLE_COUNT_4_BIT); - m_skyDomeProc.OnCreate(pDevice, m_render_pass_HDR_MSAA, &m_UploadHeap, m_Format, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, VK_SAMPLE_COUNT_4_BIT); - m_wireframe.OnCreate(pDevice, m_render_pass_HDR_MSAA, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, VK_SAMPLE_COUNT_4_BIT); - m_wireframeBox.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool); + m_skyDome.OnCreate(pDevice, m_renderPassHDRMSAA, &m_uploadHeap, VK_FORMAT_R16G16B16A16_SFLOAT, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, "..\\media\\envmaps\\papermill\\diffuse.dds", "..\\media\\envmaps\\papermill\\specular.dds", VK_SAMPLE_COUNT_4_BIT); + m_skyDomeProc.OnCreate(pDevice, m_renderPassHDRMSAA, &m_uploadHeap, VK_FORMAT_R16G16B16A16_SFLOAT, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, VK_SAMPLE_COUNT_4_BIT); + m_wireframe.OnCreate(pDevice, m_renderPassHDRMSAA, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, VK_SAMPLE_COUNT_4_BIT); + m_wireframeBox.OnCreate(pDevice, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool); // Create downsampling passes - - m_PSDownsampler.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, m_Format); - m_CSDownsampler.OnCreate(pDevice, &m_resourceViewHeaps, m_Format); - m_SPD_Versions.OnCreate(pDevice, &m_resourceViewHeaps, m_Format); + m_PSDownsampler.OnCreate(pDevice, &m_uploadHeap, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool); + m_CSDownsampler.OnCreate(pDevice, &m_uploadHeap, &m_resourceViewHeaps); + if (usingDescriptorIndexing) { + m_SPDVersions.OnCreate(pDevice, &m_uploadHeap, &m_resourceViewHeaps); + } // Create tonemapping pass - m_toneMapping.OnCreate(m_pDevice, pSwapChain->GetRenderPass(), &m_resourceViewHeaps, &m_VidMemBufferPool, &m_ConstantBufferRing); + m_toneMapping.OnCreate(m_pDevice, pSwapChain->GetRenderPass(), &m_resourceViewHeaps, &m_vidMemBufferPool, &m_constantBufferRing); // Initialize UI rendering resources - m_ImGUI.OnCreate(m_pDevice, pSwapChain->GetRenderPass(), &m_UploadHeap, &m_ConstantBufferRing); + m_imGUI.OnCreate(m_pDevice, pSwapChain->GetRenderPass(), &m_uploadHeap, &m_constantBufferRing); // Make sure upload heap has finished uploading before continuing #if (USE_VID_MEM==true) - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); - m_UploadHeap.FlushAndFinish(); + m_vidMemBufferPool.UploadData(m_uploadHeap.GetCommandList()); + m_uploadHeap.FlushAndFinish(); #endif - - // Create allocator - // - VkCommandPoolCreateInfo cmd_pool_info = {}; - cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - cmd_pool_info.pNext = NULL; - cmd_pool_info.queueFamilyIndex = pDevice->GetGraphicsQueueFamilyIndex(); - cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; - VkResult res = vkCreateCommandPool(pDevice->GetDevice(), &cmd_pool_info, NULL, &m_CommandPool); - assert(res == VK_SUCCESS); - - // Create command buffers - // - VkCommandBufferAllocateInfo cmd = {}; - cmd.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - cmd.pNext = NULL; - cmd.commandPool = m_CommandPool; - cmd.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - cmd.commandBufferCount = 1; - res = vkAllocateCommandBuffers(pDevice->GetDevice(), &cmd, &m_CommandBufferInit); - assert(res == VK_SUCCESS); } //-------------------------------------------------------------------------------------- @@ -159,9 +142,9 @@ void SPD_Renderer::OnCreate(Device *pDevice, SwapChain *pSwapChain) // OnDestroy // //-------------------------------------------------------------------------------------- -void SPD_Renderer::OnDestroy() +void SPDRenderer::OnDestroy() { - m_ImGUI.OnDestroy(); + m_imGUI.OnDestroy(); m_toneMapping.OnDestroy(); m_wireframeBox.OnDestroy(); m_wireframe.OnDestroy(); @@ -171,27 +154,26 @@ void SPD_Renderer::OnDestroy() m_PSDownsampler.OnDestroy(); m_CSDownsampler.OnDestroy(); - m_SPD_Versions.OnDestroy(); + if (m_usingDescriptorIndexing) { + m_SPDVersions.OnDestroy(); + } vkDestroyImageView(m_pDevice->GetDevice(), m_shadowMapDSV, nullptr); vkDestroyImageView(m_pDevice->GetDevice(), m_shadowMapSRV, nullptr); - vkDestroyRenderPass(m_pDevice->GetDevice(), m_render_pass_shadow, nullptr); - vkDestroyRenderPass(m_pDevice->GetDevice(), m_render_pass_HDR_MSAA, nullptr); - vkDestroyRenderPass(m_pDevice->GetDevice(), m_render_pass_PBR_HDR, nullptr); + vkDestroyRenderPass(m_pDevice->GetDevice(), m_renderPassShadow, nullptr); + vkDestroyRenderPass(m_pDevice->GetDevice(), m_renderPassHDRMSAA, nullptr); + vkDestroyRenderPass(m_pDevice->GetDevice(), m_renderPassPBRHDR, nullptr); - vkDestroyFramebuffer(m_pDevice->GetDevice(), m_pFrameBuffer_shadow, nullptr); + vkDestroyFramebuffer(m_pDevice->GetDevice(), m_frameBufferShadow, nullptr); - m_UploadHeap.OnDestroy(); + m_uploadHeap.OnDestroy(); m_GPUTimer.OnDestroy(); - m_VidMemBufferPool.OnDestroy(); - m_SysMemBufferPool.OnDestroy(); - m_ConstantBufferRing.OnDestroy(); + m_vidMemBufferPool.OnDestroy(); + m_sysMemBufferPool.OnDestroy(); + m_constantBufferRing.OnDestroy(); m_resourceViewHeaps.OnDestroy(); - m_CommandListRing.OnDestroy(); - - vkFreeCommandBuffers(m_pDevice->GetDevice(), m_CommandPool, 1, &m_CommandBufferInit); - vkDestroyCommandPool(m_pDevice->GetDevice(), m_CommandPool, NULL); + m_commandListRing.OnDestroy(); } //-------------------------------------------------------------------------------------- @@ -199,7 +181,7 @@ void SPD_Renderer::OnDestroy() // OnCreateWindowSizeDependentResources // //-------------------------------------------------------------------------------------- -void SPD_Renderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height) +void SPDRenderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height) { m_Width = Width; m_Height = Height; @@ -223,87 +205,49 @@ void SPD_Renderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, u // Create depth buffer // m_depthBuffer.InitDepthStencil(m_pDevice, Width, Height, VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_4_BIT, "DepthBuffer"); - m_depthBuffer.CreateDSV(&m_depthBufferView); + m_depthBuffer.CreateDSV(&m_depthBufferDSV); // Create Texture + RTV with x4 MSAA // - m_HDRMSAA.InitRenderTarget(m_pDevice, m_Width, m_Height, m_Format, VK_SAMPLE_COUNT_4_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT), false, "HDRMSAA"); - m_HDRMSAA.CreateRTV(&m_HDRMSAASRV); + m_HDRMSAA.InitRenderTarget(m_pDevice, m_Width, m_Height, VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_4_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT), false, "HDRMSAA"); + m_HDRMSAA.CreateRTV(&m_HDRMSAARTV); // Create Texture + RTV, to hold the resolved scene // - m_HDR.InitRenderTarget(m_pDevice, m_Width, m_Height, m_Format, VK_SAMPLE_COUNT_1_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT), false, "HDR"); + m_HDR.InitRenderTarget(m_pDevice, m_Width, m_Height, VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT), false, "HDR"); m_HDR.CreateSRV(&m_HDRSRV); m_HDR.CreateSRV(&m_HDRUAV); // Create framebuffer for the MSAA RT // { - VkImageView attachments[2] = { m_HDRMSAASRV, m_depthBufferView }; - VkImageView attachments_PBR_HDR[1] = { m_HDRSRV }; - VkFramebufferCreateInfo fb_info = {}; fb_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; fb_info.pNext = NULL; - fb_info.renderPass = m_render_pass_HDR_MSAA; - fb_info.attachmentCount = 2; - fb_info.pAttachments = attachments; fb_info.width = Width; fb_info.height = Height; fb_info.layers = 1; - VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &fb_info, NULL, &m_pFrameBuffer_HDR_MSAA); + VkResult res; + + VkImageView attachments_PBR_HDR_MSAA[] = { m_HDRMSAARTV, m_depthBufferDSV }; + fb_info.attachmentCount = _countof(attachments_PBR_HDR_MSAA); + fb_info.pAttachments = attachments_PBR_HDR_MSAA; + fb_info.renderPass = m_renderPassHDRMSAA; + res = vkCreateFramebuffer(m_pDevice->GetDevice(), &fb_info, NULL, &m_frameBufferHDRMSAA); assert(res == VK_SUCCESS); - fb_info.attachmentCount = 1; + VkImageView attachments_PBR_HDR[1] = { m_HDRSRV }; + fb_info.attachmentCount = _countof(attachments_PBR_HDR); fb_info.pAttachments = attachments_PBR_HDR; - fb_info.renderPass = m_render_pass_PBR_HDR; - res = vkCreateFramebuffer(m_pDevice->GetDevice(), &fb_info, NULL, &m_pFrameBuffer_PBR_HDR); + fb_info.renderPass = m_renderPassPBRHDR; + res = vkCreateFramebuffer(m_pDevice->GetDevice(), &fb_info, NULL, &m_frameBufferPBRHDR); assert(res == VK_SUCCESS); } - // update downscaling effect - // - { - int resolution = max(m_Width, m_Height); - int mipLevel = (static_cast(min(1.0f + floor(log2(resolution)), 12)) - 1); - - { - VkCommandBufferBeginInfo cmd_buf_info; - cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - cmd_buf_info.pNext = NULL; - cmd_buf_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - cmd_buf_info.pInheritanceInfo = NULL; - VkResult res = vkBeginCommandBuffer(m_CommandBufferInit, &cmd_buf_info); - assert(res == VK_SUCCESS); - } - - m_PSDownsampler.OnCreateWindowSizeDependentResources(m_Width, m_Height, &m_HDR, mipLevel); - m_CSDownsampler.OnCreateWindowSizeDependentResources(m_CommandBufferInit, m_Width, m_Height, &m_HDR, mipLevel); - m_SPD_Versions.OnCreateWindowSizeDependentResources(m_CommandBufferInit, m_Width, m_Height, &m_HDR); - - { - VkResult res = vkEndCommandBuffer(m_CommandBufferInit); - assert(res == VK_SUCCESS); - - VkSubmitInfo submit_info; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = NULL; - submit_info.waitSemaphoreCount = 0; - submit_info.pWaitSemaphores = NULL; - submit_info.pWaitDstStageMask = NULL; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &m_CommandBufferInit; - submit_info.signalSemaphoreCount = 0; - submit_info.pSignalSemaphores = NULL; - res = vkQueueSubmit(m_pDevice->GetGraphicsQueue(), 1, &submit_info, VK_NULL_HANDLE); - assert(res == VK_SUCCESS); - } - } - m_toneMapping.UpdatePipelines(pSwapChain->GetRenderPass()), - m_ImGUI.UpdatePipeline(pSwapChain->GetRenderPass()); + m_imGUI.UpdatePipeline(pSwapChain->GetRenderPass()); } //-------------------------------------------------------------------------------------- @@ -311,21 +255,17 @@ void SPD_Renderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, u // OnDestroyWindowSizeDependentResources // //-------------------------------------------------------------------------------------- -void SPD_Renderer::OnDestroyWindowSizeDependentResources() +void SPDRenderer::OnDestroyWindowSizeDependentResources() { - m_PSDownsampler.OnDestroyWindowSizeDependentResources(); - m_CSDownsampler.OnDestroyWindowSizeDependentResources(); - m_SPD_Versions.OnDestroyWindowSizeDependentResources(); - m_HDR.OnDestroy(); m_HDRMSAA.OnDestroy(); m_depthBuffer.OnDestroy(); - vkDestroyFramebuffer(m_pDevice->GetDevice(), m_pFrameBuffer_HDR_MSAA, nullptr); - vkDestroyFramebuffer(m_pDevice->GetDevice(), m_pFrameBuffer_PBR_HDR, nullptr); + vkDestroyFramebuffer(m_pDevice->GetDevice(), m_frameBufferHDRMSAA, nullptr); + vkDestroyFramebuffer(m_pDevice->GetDevice(), m_frameBufferPBRHDR, nullptr); - vkDestroyImageView(m_pDevice->GetDevice(), m_depthBufferView, nullptr); - vkDestroyImageView(m_pDevice->GetDevice(), m_HDRMSAASRV, nullptr); + vkDestroyImageView(m_pDevice->GetDevice(), m_depthBufferDSV, nullptr); + vkDestroyImageView(m_pDevice->GetDevice(), m_HDRMSAARTV, nullptr); vkDestroyImageView(m_pDevice->GetDevice(), m_HDRSRV, nullptr); vkDestroyImageView(m_pDevice->GetDevice(), m_HDRUAV, nullptr); } @@ -335,7 +275,7 @@ void SPD_Renderer::OnDestroyWindowSizeDependentResources() // LoadScene // //-------------------------------------------------------------------------------------- -int SPD_Renderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) +int SPDRenderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) { // show loading progress // @@ -357,7 +297,7 @@ int SPD_Renderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) Profile p("m_pGltfLoader->Load"); m_pGLTFTexturesAndBuffers = new GLTFTexturesAndBuffers(); - m_pGLTFTexturesAndBuffers->OnCreate(m_pDevice, pGLTFCommon, &m_UploadHeap, &m_VidMemBufferPool, &m_ConstantBufferRing); + m_pGLTFTexturesAndBuffers->OnCreate(m_pDevice, pGLTFCommon, &m_uploadHeap, &m_vidMemBufferPool, &m_constantBufferRing); } else if (stage == 6) { @@ -372,19 +312,19 @@ int SPD_Renderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) Profile p("m_gltfDepth->OnCreate"); //create the glTF's textures, VBs, IBs, shaders and descriptors for this particular pass - m_gltfDepth = new GltfDepthPass(); - m_gltfDepth->OnCreate( + m_pGltfDepth = new GltfDepthPass(); + m_pGltfDepth->OnCreate( m_pDevice, - m_render_pass_shadow, - &m_UploadHeap, + m_renderPassShadow, + &m_uploadHeap, &m_resourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, + &m_constantBufferRing, + &m_vidMemBufferPool, m_pGLTFTexturesAndBuffers ); #if (USE_VID_MEM==true) - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); - m_UploadHeap.FlushAndFinish(); + m_vidMemBufferPool.UploadData(m_uploadHeap.GetCommandList()); + m_uploadHeap.FlushAndFinish(); #endif } else if (stage == 8) @@ -392,25 +332,27 @@ int SPD_Renderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) Profile p("m_gltfPBR->OnCreate"); // same thing as above but for the PBR pass - m_gltfPBR = new GltfPbrPass(); - m_gltfPBR->OnCreate( + m_pGltfPBR = new GltfPbrPass(); + m_pGltfPBR->OnCreate( m_pDevice, - m_render_pass_HDR_MSAA, - &m_UploadHeap, + m_renderPassHDRMSAA, + &m_uploadHeap, &m_resourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, + &m_constantBufferRing, + &m_vidMemBufferPool, m_pGLTFTexturesAndBuffers, &m_skyDome, - m_shadowMapSRV, - true, - true, false, + m_shadowMapSRV, + true, // Exports ForwardPass + false, // Won't export Specular Roughness + false, // Won't export Diffuse Color + false, // Won't export normals VK_SAMPLE_COUNT_4_BIT ); #if (USE_VID_MEM==true) - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); - m_UploadHeap.FlushAndFinish(); + m_vidMemBufferPool.UploadData(m_uploadHeap.GetCommandList()); + m_uploadHeap.FlushAndFinish(); #endif } else if (stage == 9) @@ -418,30 +360,30 @@ int SPD_Renderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) Profile p("m_gltfBBox->OnCreate"); // just a bounding box pass that will draw boundingboxes instead of the geometry itself - m_gltfBBox = new GltfBBoxPass(); - m_gltfBBox->OnCreate( + m_pGltfBBox = new GltfBBoxPass(); + m_pGltfBBox->OnCreate( m_pDevice, - m_render_pass_HDR_MSAA, + m_renderPassHDRMSAA, &m_resourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, + &m_constantBufferRing, + &m_vidMemBufferPool, m_pGLTFTexturesAndBuffers, &m_wireframe ); #if (USE_VID_MEM==true) // we are borrowing the upload heap command list for uploading to the GPU the IBs and VBs - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); + m_vidMemBufferPool.UploadData(m_uploadHeap.GetCommandList()); #endif } else if (stage == 10) { Profile p("Flush"); - m_UploadHeap.FlushAndFinish(); + m_uploadHeap.FlushAndFinish(); #if (USE_VID_MEM==true) //once everything is uploaded we dont need he upload heaps anymore - m_VidMemBufferPool.FreeUploadHeap(); + m_vidMemBufferPool.FreeUploadHeap(); #endif // tell caller that we are done loading the map return -1; @@ -456,27 +398,29 @@ int SPD_Renderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) // UnloadScene // //-------------------------------------------------------------------------------------- -void SPD_Renderer::UnloadScene() +void SPDRenderer::UnloadScene() { - if (m_gltfPBR) + m_pDevice->GPUFlush(); + + if (m_pGltfPBR) { - m_gltfPBR->OnDestroy(); - delete m_gltfPBR; - m_gltfPBR = NULL; + m_pGltfPBR->OnDestroy(); + delete m_pGltfPBR; + m_pGltfPBR = NULL; } - if (m_gltfDepth) + if (m_pGltfDepth) { - m_gltfDepth->OnDestroy(); - delete m_gltfDepth; - m_gltfDepth = NULL; + m_pGltfDepth->OnDestroy(); + delete m_pGltfDepth; + m_pGltfDepth = NULL; } - if (m_gltfBBox) + if (m_pGltfBBox) { - m_gltfBBox->OnDestroy(); - delete m_gltfBBox; - m_gltfBBox = NULL; + m_pGltfBBox->OnDestroy(); + delete m_pGltfBBox; + m_pGltfBBox = NULL; } if (m_pGLTFTexturesAndBuffers) @@ -492,15 +436,15 @@ void SPD_Renderer::UnloadScene() // OnRender // //-------------------------------------------------------------------------------------- -void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) +void SPDRenderer::OnRender(State *pState, SwapChain *pSwapChain) { // Let our resource managers do some house keeping // - m_ConstantBufferRing.OnBeginFrame(); + m_constantBufferRing.OnBeginFrame(); // command buffer calls // - VkCommandBuffer cmd_buf = m_CommandListRing.GetNewCommandList(); + VkCommandBuffer cmdBuf1 = m_commandListRing.GetNewCommandList(); { VkCommandBufferBeginInfo cmd_buf_info; @@ -508,17 +452,20 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) cmd_buf_info.pNext = NULL; cmd_buf_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; cmd_buf_info.pInheritanceInfo = NULL; - VkResult res = vkBeginCommandBuffer(cmd_buf, &cmd_buf_info); + VkResult res = vkBeginCommandBuffer(cmdBuf1, &cmd_buf_info); assert(res == VK_SUCCESS); } - m_GPUTimer.OnBeginFrame(cmd_buf, &m_TimeStamps); + m_GPUTimer.OnBeginFrame(cmdBuf1, &m_timeStamps); + + m_GPUTimer.GetTimeStampUser({ "time (s)", pState->time }); - // Sets the perFrame data (Camera and lights data), override as necessary and set them as constant buffers -------------- + // Sets the perFrame data // per_frame *pPerFrame = NULL; if (m_pGLTFTexturesAndBuffers) { + // fill as much as possible using the GLTF (camera, lights, ...) pPerFrame = m_pGLTFTexturesAndBuffers->m_pGLTFCommon->SetPerFrameData(pState->camera); // Set some lighting factors @@ -552,9 +499,9 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) // Render to shadow map atlas for spot lights ------------------------------------------ // - if (m_gltfDepth && pPerFrame != NULL) + if (m_pGltfDepth && pPerFrame != NULL) { - SetPerfMarkerBegin(cmd_buf, "ShadowPass"); + SetPerfMarkerBegin(cmdBuf1, "ShadowPass"); VkClearValue depth_clear_values[1]; depth_clear_values[0].depthStencil.depth = 1.0f; @@ -564,8 +511,8 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) VkRenderPassBeginInfo rp_begin; rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; rp_begin.pNext = NULL; - rp_begin.renderPass = m_render_pass_shadow; - rp_begin.framebuffer = m_pFrameBuffer_shadow; + rp_begin.renderPass = m_renderPassShadow; + rp_begin.framebuffer = m_frameBufferShadow; rp_begin.renderArea.offset.x = 0; rp_begin.renderArea.offset.y = 0; rp_begin.renderArea.extent.width = m_shadowMap.GetWidth(); @@ -573,8 +520,8 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) rp_begin.clearValueCount = 1; rp_begin.pClearValues = depth_clear_values; - vkCmdBeginRenderPass(cmd_buf, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); - m_GPUTimer.GetTimeStamp(cmd_buf, "Clear Shadow Map"); + vkCmdBeginRenderPass(cmdBuf1, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + m_GPUTimer.GetTimeStamp(cmdBuf1, "Clear Shadow Map"); } uint32_t shadowMapIndex = 0; @@ -588,27 +535,27 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) uint32_t viewportOffsetsY[4] = { 0, 0, 1, 1 }; uint32_t viewportWidth = m_shadowMap.GetWidth() / 2; uint32_t viewportHeight = m_shadowMap.GetHeight() / 2; - SetViewportAndScissor(cmd_buf, viewportOffsetsX[shadowMapIndex] * viewportWidth, viewportOffsetsY[shadowMapIndex] * viewportHeight, viewportWidth, viewportHeight); + SetViewportAndScissor(cmdBuf1, viewportOffsetsX[shadowMapIndex] * viewportWidth, viewportOffsetsY[shadowMapIndex] * viewportHeight, viewportWidth, viewportHeight); //set per frame constant buffer values - GltfDepthPass::per_frame *cbPerFrame = m_gltfDepth->SetPerFrameConstants(); + GltfDepthPass::per_frame *cbPerFrame = m_pGltfDepth->SetPerFrameConstants(); cbPerFrame->mViewProj = pPerFrame->lights[i].mLightViewProj; - m_gltfDepth->Draw(cmd_buf); + m_pGltfDepth->Draw(cmdBuf1); - m_GPUTimer.GetTimeStamp(cmd_buf, "Shadow maps"); + m_GPUTimer.GetTimeStamp(cmdBuf1, "Shadow maps"); shadowMapIndex++; } - vkCmdEndRenderPass(cmd_buf); + vkCmdEndRenderPass(cmdBuf1); - SetPerfMarkerEnd(cmd_buf); + SetPerfMarkerEnd(cmdBuf1); } // Render Scene to the MSAA HDR RT ------------------------------------------------ // { - SetPerfMarkerBegin(cmd_buf, "Color pass"); - m_GPUTimer.GetTimeStamp(cmd_buf, "before color RP"); + SetPerfMarkerBegin(cmdBuf1, "Color pass"); + m_GPUTimer.GetTimeStamp(cmdBuf1, "before color RP"); VkClearValue clear_values[2]; clear_values[0].color.float32[0] = 0.0f; clear_values[0].color.float32[1] = 0.0f; @@ -620,8 +567,8 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) VkRenderPassBeginInfo rp_begin; rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; rp_begin.pNext = NULL; - rp_begin.renderPass = m_render_pass_HDR_MSAA; - rp_begin.framebuffer = m_pFrameBuffer_HDR_MSAA; + rp_begin.renderPass = m_renderPassHDRMSAA; + rp_begin.framebuffer = m_frameBufferHDRMSAA; rp_begin.renderArea.offset.x = 0; rp_begin.renderArea.offset.y = 0; rp_begin.renderArea.extent.width = m_Width; @@ -629,11 +576,11 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) rp_begin.clearValueCount = 2; rp_begin.pClearValues = clear_values; - vkCmdBeginRenderPass(cmd_buf, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + vkCmdBeginRenderPass(cmdBuf1, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); - vkCmdSetScissor(cmd_buf, 0, 1, &m_scissor); - vkCmdSetViewport(cmd_buf, 0, 1, &m_viewport); - m_GPUTimer.GetTimeStamp(cmd_buf, "after color RP"); + vkCmdSetScissor(cmdBuf1, 0, 1, &m_scissor); + vkCmdSetViewport(cmdBuf1, 0, 1, &m_viewport); + m_GPUTimer.GetTimeStamp(cmdBuf1, "after color RP"); } if (pPerFrame != NULL) @@ -643,9 +590,9 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) if (pState->skyDomeType == 1) { XMMATRIX clipToView = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); - m_skyDome.Draw(cmd_buf, clipToView); + m_skyDome.Draw(cmdBuf1, clipToView); - m_GPUTimer.GetTimeStamp(cmd_buf, "Skydome cube"); + m_GPUTimer.GetTimeStamp(cmdBuf1, "Skydome cube"); } else if (pState->skyDomeType == 0) { @@ -658,28 +605,28 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) skyDomeConstants.mieDirectionalG = 0.8f; skyDomeConstants.luminance = 1.0f; skyDomeConstants.sun = false; - m_skyDomeProc.Draw(cmd_buf, skyDomeConstants); + m_skyDomeProc.Draw(cmdBuf1, skyDomeConstants); - m_GPUTimer.GetTimeStamp(cmd_buf, "Skydome Proc"); + m_GPUTimer.GetTimeStamp(cmdBuf1, "Skydome Proc"); } // Render scene to color buffer // - if (m_gltfBBox && pPerFrame != NULL) + if (m_pGltfBBox && pPerFrame != NULL) { - m_gltfPBR->Draw(cmd_buf); - m_GPUTimer.GetTimeStamp(cmd_buf, "Rendering Scene"); + m_pGltfPBR->Draw(cmdBuf1); + m_GPUTimer.GetTimeStamp(cmdBuf1, "Rendering Scene"); } // draw object's bounding boxes // - if (m_gltfBBox && pPerFrame != NULL) + if (m_pGltfBBox && pPerFrame != NULL) { if (pState->bDrawBoundingBoxes) { - m_gltfBBox->Draw(cmd_buf, pPerFrame->mCameraViewProj); + m_pGltfBBox->Draw(cmdBuf1, pPerFrame->mCameraViewProj); - m_GPUTimer.GetTimeStamp(cmd_buf, "Bounding Box"); + m_GPUTimer.GetTimeStamp(cmdBuf1, "Bounding Box"); } } @@ -687,7 +634,7 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) // if (pState->bDrawLightFrustum && pPerFrame != NULL) { - SetPerfMarkerBegin(cmd_buf, "light frustrum"); + SetPerfMarkerBegin(cmdBuf1, "light frustrum"); XMVECTOR vCenter = XMVectorSet(0.0f, 0.0f, 0.0f, 0.0f); XMVECTOR vRadius = XMVectorSet(1.0f, 1.0f, 1.0f, 0.0f); @@ -696,24 +643,24 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) { XMMATRIX spotlightMatrix = XMMatrixInverse(NULL, pPerFrame->lights[i].mLightViewProj); XMMATRIX worldMatrix = spotlightMatrix * pPerFrame->mCameraViewProj; - m_wireframeBox.Draw(cmd_buf, &m_wireframe, worldMatrix, vCenter, vRadius, vColor); + m_wireframeBox.Draw(cmdBuf1, &m_wireframe, worldMatrix, vCenter, vRadius, vColor); } - m_GPUTimer.GetTimeStamp(cmd_buf, "Light's frustum"); + m_GPUTimer.GetTimeStamp(cmdBuf1, "Light's frustum"); - SetPerfMarkerEnd(cmd_buf); + SetPerfMarkerEnd(cmdBuf1); } } { - vkCmdEndRenderPass(cmd_buf); - SetPerfMarkerEnd(cmd_buf); + vkCmdEndRenderPass(cmdBuf1); + SetPerfMarkerEnd(cmdBuf1); } // Resolve MSAA ------------------------------------------------------------------------ // { - SetPerfMarkerBegin(cmd_buf, "resolve MSAA"); + SetPerfMarkerBegin(cmdBuf1, "resolve MSAA"); { VkImageMemoryBarrier barrier[2] = {}; barrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; @@ -746,7 +693,7 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) barrier[1].subresourceRange.layerCount = 1; barrier[1].image = m_HDRMSAA.Resource(); - vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 0, NULL, 2, barrier); + vkCmdPipelineBarrier(cmdBuf1, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 0, NULL, 2, barrier); } { @@ -762,7 +709,7 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) re.dstOffset.y = 0; re.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; re.dstSubresource.layerCount = 1; - vkCmdResolveImage(cmd_buf, m_HDRMSAA.Resource(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_HDR.Resource(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &re); + vkCmdResolveImage(cmdBuf1, m_HDRMSAA.Resource(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_HDR.Resource(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &re); } { @@ -773,6 +720,11 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) barrier[0].dstAccessMask = VK_ACCESS_SHADER_READ_BIT; barrier[0].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; barrier[0].newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; // we need to read from it for the post-processing + // when we use load to fetch the data from the source texture, the source texture needs to be in general layout instead of shader read only + if (pState->downsampler == Downsampler::SPDCS && pState->spdLoad == SPDLoad::SPDLoad) + { + barrier[0].newLayout = VK_IMAGE_LAYOUT_GENERAL; // we load from a storage image in this case + } barrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; @@ -797,45 +749,63 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) barrier[1].subresourceRange.layerCount = 1; barrier[1].image = m_HDRMSAA.Resource(); - vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, NULL, 0, NULL, 2, barrier); + vkCmdPipelineBarrier(cmdBuf1, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, NULL, 0, NULL, 2, barrier); } - m_GPUTimer.GetTimeStamp(cmd_buf, "Resolve"); - SetPerfMarkerEnd(cmd_buf); + m_GPUTimer.GetTimeStamp(cmdBuf1, "Resolve"); + SetPerfMarkerEnd(cmdBuf1); } // Post proc--------------------------------------------------------------------------- // { - SetPerfMarkerBegin(cmd_buf, "post proc"); + SetPerfMarkerBegin(cmdBuf1, "post proc"); + VkImageMemoryBarrier barrier[1] = {}; switch (pState->downsampler) { case Downsampler::PS: - m_PSDownsampler.Draw(cmd_buf); - m_PSDownsampler.Gui(); - break; - case Downsampler::Multipass_CS: - m_CSDownsampler.Draw(cmd_buf); - m_CSDownsampler.Gui(); + m_PSDownsampler.Draw(cmdBuf1); + m_PSDownsampler.GUI(&pState->downsamplerImGUISlice); break; - case Downsampler::SPD_CS: - m_SPD_Versions.Dispatch(cmd_buf, pState->spdVersion, pState->spdPacked); - m_SPD_Versions.Gui(pState->spdVersion, pState->spdPacked); + case Downsampler::MultipassCS: + m_CSDownsampler.Draw(cmdBuf1); + m_CSDownsampler.GUI(&pState->downsamplerImGUISlice); break; - case Downsampler::SPD_CS_Linear_Sampler: - m_SPD_Versions.DispatchLinearSamplerVersion(cmd_buf, pState->spdVersion, pState->spdPacked); - m_SPD_Versions.GuiLinearSamplerVersion(pState->spdVersion, pState->spdPacked); + case Downsampler::SPDCS: + if (m_usingDescriptorIndexing) { + m_SPDVersions.Dispatch(cmdBuf1, pState->spdLoad, pState->spdWaveOps, pState->spdPacked); + m_SPDVersions.GUI(pState->spdLoad, pState->spdWaveOps, pState->spdPacked, &pState->downsamplerImGUISlice); + } + + if (pState->spdLoad == SPDLoad::SPDLoad) + { + barrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier[0].pNext = NULL; + barrier[0].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier[0].dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier[0].oldLayout = VK_IMAGE_LAYOUT_GENERAL; + barrier[0].newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + barrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrier[0].subresourceRange.baseMipLevel = 0; + barrier[0].subresourceRange.levelCount = 1; + barrier[0].subresourceRange.baseArrayLayer = 0; + barrier[0].subresourceRange.layerCount = 1; + barrier[0].image = m_HDR.Resource(); + vkCmdPipelineBarrier(cmdBuf1, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, NULL, 0, NULL, 1, barrier); + } break; } - m_GPUTimer.GetTimeStamp(cmd_buf, "Downsampler"); + m_GPUTimer.GetTimeStamp(cmdBuf1, "Downsampler"); - SetPerfMarkerEnd(cmd_buf); + SetPerfMarkerEnd(cmdBuf1); } { - VkResult res = vkEndCommandBuffer(cmd_buf); + VkResult res = vkEndCommandBuffer(cmdBuf1); assert(res == VK_SUCCESS); VkSubmitInfo submit_info; @@ -845,7 +815,7 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) submit_info.pWaitSemaphores = NULL; submit_info.pWaitDstStageMask = NULL; submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &cmd_buf; + submit_info.pCommandBuffers = &cmdBuf1; submit_info.signalSemaphoreCount = 0; submit_info.pSignalSemaphores = NULL; res = vkQueueSubmit(m_pDevice->GetGraphicsQueue(), 1, &submit_info, VK_NULL_HANDLE); @@ -858,9 +828,9 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) int imageIndex = pSwapChain->WaitForSwapChain(); - m_CommandListRing.OnBeginFrame(); + m_commandListRing.OnBeginFrame(); - VkCommandBuffer cmdBuf2 = m_CommandListRing.GetNewCommandList(); + VkCommandBuffer cmdBuf2 = m_commandListRing.GetNewCommandList(); { VkCommandBufferBeginInfo cmd_buf_info; @@ -902,7 +872,7 @@ void SPD_Renderer::OnRender(State *pState, SwapChain *pSwapChain) // Render HUD ------------------------------------------------------------------------ // { - m_ImGUI.Draw(cmdBuf2); + m_imGUI.Draw(cmdBuf2); m_GPUTimer.GetTimeStamp(cmdBuf2, "ImGUI Rendering"); } diff --git a/sample/src/VK/SPD_Renderer.h b/sample/src/VK/SPDRenderer.h similarity index 58% rename from sample/src/VK/SPD_Renderer.h rename to sample/src/VK/SPDRenderer.h index 4750d3d..2094934 100644 --- a/sample/src/VK/SPD_Renderer.h +++ b/sample/src/VK/SPDRenderer.h @@ -20,7 +20,7 @@ #include "CSDownsampler.h" #include "PSDownsampler.h" -#include "SPD_Versions.h" +#include "SPDVersions.h" static const int backBufferCount = 3; @@ -35,12 +35,11 @@ using namespace CAULDRON_VK; enum class Downsampler { PS, - Multipass_CS, - SPD_CS, - SPD_CS_Linear_Sampler, + MultipassCS, + SPDCS }; -class SPD_Renderer +class SPDRenderer { public: struct Spotlight @@ -52,27 +51,34 @@ class SPD_Renderer struct State { - float time; - Camera camera; + float time; + Camera camera; - float exposure; - float iblFactor; - float emmisiveFactor; + float exposure; + float iblFactor; + float emmisiveFactor; - int toneMapper; - int skyDomeType; - bool bDrawBoundingBoxes; + int toneMapper; + int skyDomeType; + bool bDrawBoundingBoxes; - uint32_t spotlightCount; - Spotlight spotlight[4]; - bool bDrawLightFrustum; + uint32_t spotlightCount; + Spotlight spotlight[4]; - Downsampler downsampler; - SPD_Version spdVersion; - SPD_Packed spdPacked; + bool isBenchmarking; + bool isValidationLayerEnabled; + + bool bDrawLightFrustum; + + Downsampler downsampler; + SPDLoad spdLoad; + SPDWaveOps spdWaveOps; + SPDPacked spdPacked; + + int downsamplerImGUISlice; }; - void OnCreate(Device *pDevice, SwapChain *pSwapChain); + void OnCreate(Device *pDevice, SwapChain *pSwapChain, bool usingDescriptorIndexing = false); void OnDestroy(); void OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height); @@ -81,33 +87,33 @@ class SPD_Renderer int LoadScene(GLTFCommon *pGLTFCommon, int stage = 0); void UnloadScene(); - const std::vector &GetTimingValues() { return m_TimeStamps; } + const std::vector &GetTimingValues() { return m_timeStamps; } void OnRender(State *pState, SwapChain *pSwapChain); private: - Device *m_pDevice; + Device *m_pDevice = nullptr; - uint32_t m_Width; - uint32_t m_Height; + uint32_t m_Width; + uint32_t m_Height; VkRect2D m_scissor; VkViewport m_viewport; // Initialize helper classes ResourceViewHeaps m_resourceViewHeaps; - UploadHeap m_UploadHeap; - DynamicBufferRing m_ConstantBufferRing; - StaticBufferPool m_VidMemBufferPool; - StaticBufferPool m_SysMemBufferPool; - CommandListRing m_CommandListRing; + UploadHeap m_uploadHeap; + DynamicBufferRing m_constantBufferRing; + StaticBufferPool m_vidMemBufferPool; + StaticBufferPool m_sysMemBufferPool; + CommandListRing m_commandListRing; GPUTimestamps m_GPUTimer; //gltf passes - GLTFTexturesAndBuffers *m_pGLTFTexturesAndBuffers; - GltfPbrPass *m_gltfPBR; - GltfDepthPass *m_gltfDepth; - GltfBBoxPass *m_gltfBBox; + GLTFTexturesAndBuffers *m_pGLTFTexturesAndBuffers = nullptr; + GltfPbrPass *m_pGltfPBR = nullptr; + GltfDepthPass *m_pGltfDepth = nullptr; + GltfBBoxPass *m_pGltfBBox = nullptr; // effects SkyDome m_skyDome; @@ -117,19 +123,19 @@ class SPD_Renderer // downsampling - m_HDR PSDownsampler m_PSDownsampler; CSDownsampler m_CSDownsampler; - SPD_Versions m_SPD_Versions; + SPDVersions m_SPDVersions; VkCommandPool m_CommandPool; VkCommandBuffer m_CommandBufferInit; // GUI - ImGUI m_ImGUI; + ImGUI m_imGUI; // Temporary render targets // depth buffer Texture m_depthBuffer; - VkImageView m_depthBufferView; + VkImageView m_depthBufferDSV; // shadowmaps Texture m_shadowMap; @@ -138,7 +144,7 @@ class SPD_Renderer // MSAA RT Texture m_HDRMSAA; - VkImageView m_HDRMSAASRV; + VkImageView m_HDRMSAARTV; // Resolved RT Texture m_HDR; @@ -149,15 +155,15 @@ class SPD_Renderer Wireframe m_wireframe; WireframeBox m_wireframeBox; - VkRenderPass m_render_pass_shadow; - VkRenderPass m_render_pass_HDR_MSAA; - VkRenderPass m_render_pass_PBR_HDR; + VkRenderPass m_renderPassShadow; + VkRenderPass m_renderPassHDRMSAA; + VkRenderPass m_renderPassPBRHDR; - VkFramebuffer m_pFrameBuffer_shadow; - VkFramebuffer m_pFrameBuffer_HDR_MSAA; - VkFramebuffer m_pFrameBuffer_PBR_HDR; + VkFramebuffer m_frameBufferShadow; + VkFramebuffer m_frameBufferHDRMSAA; + VkFramebuffer m_frameBufferPBRHDR; - std::vector m_TimeStamps; + std::vector m_timeStamps; - VkFormat m_Format; + bool m_usingDescriptorIndexing; }; diff --git a/sample/src/VK/SPDSample.cpp b/sample/src/VK/SPDSample.cpp new file mode 100644 index 0000000..fd9f7b2 --- /dev/null +++ b/sample/src/VK/SPDSample.cpp @@ -0,0 +1,561 @@ +// SPDSample +// +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "stdafx.h" + +#include "SPDSample.h" +#include "base/ShaderCompilerCache.h" +#include "base/Instance.h" + +SPDSample::SPDSample(LPCSTR name) : FrameworkWindows(name) +{ + m_lastFrameTime = MillisecondsNow(); + m_time = 0; + m_bPlay = true; + + m_pGltfLoader = NULL; +} + +//-------------------------------------------------------------------------------------- +// +// OnParseCommandLine +// +//-------------------------------------------------------------------------------------- +void SPDSample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight, bool* pbFullScreen) +{ + // set some default values + *pWidth = 1920; + *pHeight = 1080; + *pbFullScreen = false; + m_state.isBenchmarking = true; + m_isCpuValidationLayerEnabled = false; + m_isGpuValidationLayerEnabled = false; + + //read globals + auto process = [&](json jData) + { + *pWidth = jData.value("width", *pWidth); + *pHeight = jData.value("height", *pHeight); + *pbFullScreen = jData.value("fullScreen", *pbFullScreen); + m_isCpuValidationLayerEnabled = jData.value("CpuValidationLayerEnabled", m_isCpuValidationLayerEnabled); + m_isGpuValidationLayerEnabled = jData.value("GpuValidationLayerEnabled", m_isGpuValidationLayerEnabled); + m_state.isBenchmarking = jData.value("benchmark", m_state.isBenchmarking); + m_state.downsampler = jData.value("downsampler", m_state.downsampler); + m_state.spdLoad = jData.value("spdLoad", m_state.spdLoad); + m_state.spdWaveOps = jData.value("spdWaveOps", m_state.spdWaveOps); + m_state.spdPacked = jData.value("spdPacked", m_state.spdPacked); + }; + + //read json globals from commandline + // + try + { + if (strlen(lpCmdLine) > 0) + { + auto j3 = json::parse(lpCmdLine); + process(j3); + } + } + catch (json::parse_error) + { + Trace("Error parsing commandline\n"); + exit(0); + } + + // read config file (and override values from commandline if so) + // + { + std::ifstream f("SpdSample.json"); + if (!f) + { + MessageBox(NULL, "Config file not found!\n", "Cauldron Panic!", MB_ICONERROR); + exit(0); + } + + try + { + f >> m_jsonConfigFile; + } + catch (json::parse_error) + { + MessageBox(NULL, "Error parsing GLTFSample.json!\n", "Cauldron Panic!", MB_ICONERROR); + exit(0); + } + } + + json globals = m_jsonConfigFile["globals"]; + process(globals); +} + + + +//-------------------------------------------------------------------------------------- +// +// OnCreate +// +//-------------------------------------------------------------------------------------- +void SPDSample::OnCreate(HWND hWnd) +{ + // Create Device + // + InstanceProperties ip; + ip.Init(); + m_device.SetEssentialInstanceExtensions(m_isCpuValidationLayerEnabled, m_isGpuValidationLayerEnabled, &ip); + + // Create Instance + VkInstance vulkanInstance; + VkPhysicalDevice physicalDevice; + CreateInstance("SpdSample", "Cauldron", &vulkanInstance, &physicalDevice, &ip); + + DeviceProperties dp; + dp.Init(physicalDevice); + m_device.SetEssentialDeviceExtensions(&dp); + + m_usingDescriptorIndexing = dp.AddDeviceExtensionName(VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME); + + VkPhysicalDeviceDescriptorIndexingFeatures descriptorIndexingFeatures = {}; + descriptorIndexingFeatures.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES; + descriptorIndexingFeatures.pNext = dp.GetNext(); + descriptorIndexingFeatures.descriptorBindingPartiallyBound = VK_TRUE; + + if (m_usingDescriptorIndexing) + { + dp.SetNewNext(&descriptorIndexingFeatures); + } + + // Create device + m_device.OnCreateEx(vulkanInstance, physicalDevice, hWnd, &dp); + + m_device.CreatePipelineCache(); + + //init the shader compiler + InitDirectXCompiler(); + InitShaderCompilerCache("ShaderLibVK", "ShaderLibVK\\ShaderCacheVK"); + CreateShaderCache(); + + // Create Swapchain + // + uint32_t dwNumberOfBackBuffers = 2; + m_swapChain.OnCreate(&m_device, dwNumberOfBackBuffers, hWnd); + + // Create a instance of the renderer and initialize it, we need to do that for each GPU + // + m_pNode = new SPDRenderer(); + m_pNode->OnCreate(&m_device, &m_swapChain, m_usingDescriptorIndexing); + + // init GUI (non gfx stuff) + // + ImGUI_Init((void *)hWnd); + + // Init Camera, looking at the origin + // + m_roll = 0.0f; + m_pitch = 0.0f; + m_distance = 3.5f; + + // init GUI state + m_state.toneMapper = 0; + m_state.skyDomeType = 0; + m_state.exposure = 1.0f; + m_state.iblFactor = 2.0f; + m_state.emmisiveFactor = 1.0f; + m_state.bDrawLightFrustum = false; + m_state.bDrawBoundingBoxes = false; + m_state.camera.LookAt(m_roll, m_pitch, m_distance, XMVectorSet(0, 0, 0, 0)); + + m_state.spotlightCount = 1; + + m_state.spotlight[0].intensity = 10.0f; + m_state.spotlight[0].color = XMVectorSet(1.0f, 1.0f, 1.0f, 0.0f); + m_state.spotlight[0].light.SetFov(XM_PI / 2.0f, 1024, 1024, 0.1f, 100.0f); + m_state.spotlight[0].light.LookAt(XM_PI / 2.0f, 0.58f, 3.5f, XMVectorSet(0, 0, 0, 0)); + + m_state.downsamplerImGUISlice = 0; +} + +//-------------------------------------------------------------------------------------- +// +// OnDestroy +// +//-------------------------------------------------------------------------------------- +void SPDSample::OnDestroy() +{ + ImGUI_Shutdown(); + + m_device.GPUFlush(); + + // Fullscreen state should always be false before exiting the app. + m_swapChain.SetFullScreen(false); + + m_pNode->UnloadScene(); + m_pNode->OnDestroyWindowSizeDependentResources(); + m_pNode->OnDestroy(); + + delete m_pNode; + + m_swapChain.OnDestroyWindowSizeDependentResources(); + m_swapChain.OnDestroy(); + + //shut down the shader compiler + DestroyShaderCache(&m_device); + + if (m_pGltfLoader) + { + delete m_pGltfLoader; + m_pGltfLoader = NULL; + } + + m_device.DestroyPipelineCache(); + m_device.OnDestroy(); +} + +//-------------------------------------------------------------------------------------- +// +// OnEvent +// +//-------------------------------------------------------------------------------------- +bool SPDSample::OnEvent(MSG msg) +{ + if (ImGUI_WndProcHandler(msg.hwnd, msg.message, msg.wParam, msg.lParam)) + return true; + return true; +} + +//-------------------------------------------------------------------------------------- +// +// SetFullScreen +// +//-------------------------------------------------------------------------------------- +void SPDSample::SetFullScreen(bool fullscreen) +{ + m_device.GPUFlush(); + + m_swapChain.SetFullScreen(fullscreen); +} + +//-------------------------------------------------------------------------------------- +// +// OnResize +// +//-------------------------------------------------------------------------------------- +void SPDSample::OnResize(uint32_t width, uint32_t height) +{ + if (m_Width != width || m_Height != height) + { + // Flush GPU + // + m_device.GPUFlush(); + + // If resizing but no minimizing + // + if (m_Width > 0 && m_Height > 0) + { + if (m_pNode != NULL) + { + m_pNode->OnDestroyWindowSizeDependentResources(); + } + m_swapChain.OnDestroyWindowSizeDependentResources(); + } + + m_Width = width; + m_Height = height; + + // if resizing but not minimizing the recreate it with the new size + // + if (m_Width > 0 && m_Height > 0) + { + m_swapChain.OnCreateWindowSizeDependentResources(m_Width, m_Height, false); + if (m_pNode != NULL) + { + m_pNode->OnCreateWindowSizeDependentResources(&m_swapChain, m_Width, m_Height); + } + } + } + m_state.camera.SetFov(XM_PI / 4, m_Width, m_Height, 0.1f, 1000.0f); +} + +void SPDSample::BuildUI() +{ + ImGuiStyle& style = ImGui::GetStyle(); + style.FrameBorderSize = 1.0f; + + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_FirstUseEver); + ImGui::SetNextWindowSize(ImVec2(250, 700), ImGuiCond_FirstUseEver); + + bool opened = true; + ImGui::Begin("Stats", &opened); + + if (ImGui::CollapsingHeader("Info", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Text("Resolution : %ix%i", m_Width, m_Height); + } + + if (ImGui::CollapsingHeader("Downsampler", ImGuiTreeNodeFlags_DefaultOpen)) + { + if (m_usingDescriptorIndexing) { + // Downsample settings + const char* downsampleItemNames[] = + { + "PS", + "Multipass CS", + "SPD CS", + }; + ImGui::Combo("Downsampler Options", (int*)&m_state.downsampler, downsampleItemNames, _countof(downsampleItemNames)); + + // SPD Version + // Use load or linear sample to fetch data from source texture + const char* spdLoadItemNames[] = + { + "Load", + "Linear Sampler", + }; + ImGui::Combo("SPD Load / Linear Sampler", (int*)&m_state.spdLoad, spdLoadItemNames, _countof(spdLoadItemNames)); + + // if possible give choice of using wave operations + if (m_device.GetPhysicalDeviceSubgroupProperties().supportedOperations + & VK_SUBGROUP_FEATURE_QUAD_BIT) + { + const char* spdWaveOpsItemNames[] = + { + "No-WaveOps", + "WaveOps", + }; + ImGui::Combo("SPD No-WaveOps / WaveOps", (int*)&m_state.spdWaveOps, spdWaveOpsItemNames, _countof(spdWaveOpsItemNames)); + } + else { + const char* spdWaveOpsItemNames[] = + { + "No-WaveOps", + }; + ImGui::Combo("SPD No-WaveOps", (int*)&m_state.spdWaveOps, spdWaveOpsItemNames, _countof(spdWaveOpsItemNames)); + } + + // Non-Packed or Packed Version + const char* spdPackedItemNames[] = + { + "Non-Packed", + "Packed", + }; + ImGui::Combo("SPD Non-Packed / Packed Version", (int*)&m_state.spdPacked, spdPackedItemNames, _countof(spdPackedItemNames)); + } + else { + // Downsample settings + const char* downsampleItemNames[] = + { + "PS", + "Multipass CS" + }; + ImGui::Combo("Downsampler Options", (int*)&m_state.downsampler, downsampleItemNames, _countof(downsampleItemNames)); + } + } + + if (ImGui::CollapsingHeader("Lighting", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::SliderFloat("exposure", &m_state.exposure, 0.0f, 2.0f); + ImGui::SliderFloat("emmisive", &m_state.emmisiveFactor, 1.0f, 1000.0f, NULL, 1.0f); + ImGui::SliderFloat("iblFactor", &m_state.iblFactor, 0.0f, 2.0f); + } + + const char* tonemappers[] = { "Timothy", "DX11DSK", "Reinhard", "Uncharted2Tonemap", "ACES", "No tonemapper" }; + ImGui::Combo("tone mapper", &m_state.toneMapper, tonemappers, _countof(tonemappers)); + + const char* skyDomeType[] = { "Procedural Sky", "cubemap", "Simple clear" }; + ImGui::Combo("SkyDome", &m_state.skyDomeType, skyDomeType, _countof(skyDomeType)); + + const char* cameraControl[] = { "WASD", "Orbit" }; + static int cameraControlSelected = 1; + ImGui::Combo("Camera", &cameraControlSelected, cameraControl, _countof(cameraControl)); + + if (ImGui::CollapsingHeader("Profiler", ImGuiTreeNodeFlags_DefaultOpen)) + { + std::vector timeStamps = m_pNode->GetTimingValues(); + if (timeStamps.size() > 0) + { + for (uint32_t i = 1; i < timeStamps.size(); i++) + { + ImGui::Text("%-22s: %7.1f", timeStamps[i].m_label.c_str(), timeStamps[i].m_microseconds); + } + + //scrolling data and average computing + static float values[128]; + values[127] = timeStamps.back().m_microseconds; + for (uint32_t i = 0; i < 128 - 1; i++) { values[i] = values[i + 1]; } + ImGui::PlotLines("", values, 128, 0, "GPU frame time (us)", 0.0f, 30000.0f, ImVec2(0, 80)); + } + } + +#ifdef USE_VMA + if (ImGui::Button("Save VMA json")) + { + char* pJson; + vmaBuildStatsString(m_device.GetAllocator(), &pJson, VK_TRUE); + + static char filename[256]; + time_t now = time(NULL); + tm buf; + localtime_s(&buf, &now); + strftime(filename, sizeof(filename), "VMA_%Y%m%d_%H%M%S.json", &buf); + std::ofstream ofs(filename, std::ofstream::out); + ofs << pJson; + ofs.close(); + vmaFreeStatsString(m_device.GetAllocator(), pJson); + } +#endif + + ImGui::End(); + + // If the mouse was not used by the GUI then it's for the camera + // + ImGuiIO& io = ImGui::GetIO(); + if (io.WantCaptureMouse == false) + { + if ((io.KeyCtrl == false) && (io.MouseDown[0] == true)) + { + m_roll -= io.MouseDelta.x / 100.f; + m_pitch += io.MouseDelta.y / 100.f; + } + + // Choose camera movement depending on setting + // + + if (cameraControlSelected == 0) + { + // WASD + // + m_state.camera.UpdateCameraWASD(m_roll, m_pitch, io.KeysDown, io.DeltaTime); + } + else if (cameraControlSelected == 1) + { + // Orbiting + // + m_distance -= (float)io.MouseWheel / 3.0f; + m_distance = std::max(m_distance, 0.1f); + + bool panning = (io.KeyCtrl == true) && (io.MouseDown[0] == true); + + m_state.camera.UpdateCameraPolar(m_roll, m_pitch, panning ? -io.MouseDelta.x / 100.0f : 0.0f, panning ? io.MouseDelta.y / 100.0f : 0.0f, m_distance); + } + } +} + +//-------------------------------------------------------------------------------------- +// +// OnRender, updates the state from the UI, animates, transforms and renders the scene +// +//-------------------------------------------------------------------------------------- +void SPDSample::OnRender() +{ + // Get timings + // + double timeNow = MillisecondsNow(); + float deltaTime = (float)(timeNow - m_lastFrameTime); + m_lastFrameTime = timeNow; + + // Build UI and set the scene state. Note that the rendering of the UI happens later. + // + ImGUI_UpdateIO(); + ImGui::NewFrame(); + + static int loadingStage = 0; + if (loadingStage >= 0) + { + // LoadScene needs to be called a number of times, the scene is not fully loaded until it returns -1 + // This is done so we can display a progress bar when the scene is loading + if (m_pGltfLoader == NULL) + { + m_pGltfLoader = new GLTFCommon(); + m_pGltfLoader->Load("..\\media\\DamagedHelmet\\glTF\\", "DamagedHelmet.gltf"); + loadingStage = 0; + + // set benchmarking state if enabled + // + json scene = m_jsonConfigFile["scenes"][0]; + + // set default camera + // + json camera = scene["camera"]; + XMVECTOR from = GetVector(GetElementJsonArray(camera, "defaultFrom", { 0.0, 0.0, 10.0 })); + XMVECTOR to = GetVector(GetElementJsonArray(camera, "defaultTo", { 0.0, 0.0, 0.0 })); + m_state.camera.LookAt(from, to); + m_roll = m_state.camera.GetYaw(); + m_pitch = m_state.camera.GetPitch(); + m_distance = m_state.camera.GetDistance(); + + // set benchmarking state if enabled + + if (m_state.isBenchmarking) + { + BenchmarkConfig(scene["BenchmarkSettings"], -1, m_pGltfLoader); + } + } + loadingStage = m_pNode->LoadScene(m_pGltfLoader, loadingStage); + if (loadingStage == 0) + { + m_time = 0; + m_loadingScene = false; + } + } + else if (m_pGltfLoader && m_state.isBenchmarking) + { + // benchmarking takes control of the time, and exits the app when the animation is done + std::vector timeStamps = m_pNode->GetTimingValues(); + + const std::string* pFilename; + m_time = BenchmarkLoop(timeStamps, &m_state.camera, &pFilename); + + BuildUI(); + } + else + { + BuildUI(); + } + + // Animate and transform the scene + // + if (m_pGltfLoader) + { + m_pGltfLoader->SetAnimationTime(0, m_time); + m_pGltfLoader->TransformScene(0, XMMatrixIdentity()); + } + + m_state.time = m_time; + + // Do Render frame using AFR + // + m_pNode->OnRender(&m_state, &m_swapChain); + + m_swapChain.Present(); +} + + +//-------------------------------------------------------------------------------------- +// +// WinMain +// +//-------------------------------------------------------------------------------------- +int WINAPI WinMain(HINSTANCE hInstance, + HINSTANCE hPrevInstance, + LPSTR lpCmdLine, + int nCmdShow) +{ + LPCSTR Name = "FFX SPD SampleVK v2.0"; + + // create new Vulkan sample + return RunFramework(hInstance, lpCmdLine, nCmdShow, new SPDSample(Name)); +} diff --git a/sample/src/DX12/SPD_Sample.h b/sample/src/VK/SPDSample.h similarity index 78% rename from sample/src/DX12/SPD_Sample.h rename to sample/src/VK/SPDSample.h index a72ede9..2ba385d 100644 --- a/sample/src/DX12/SPD_Sample.h +++ b/sample/src/VK/SPDSample.h @@ -18,7 +18,7 @@ // THE SOFTWARE. #pragma once -#include "SPD_Renderer.h" +#include "SPDRenderer.h" // // This is the main class, it manages the state of the sample and does all the high level work without touching the GPU directly. @@ -35,12 +35,14 @@ // - uses the SampleRenderer to update all the state to the GPU and do the rendering // -class SPD_Sample : public FrameworkWindows +class SPDSample : public FrameworkWindows { public: - SPD_Sample(LPCSTR name); + SPDSample(LPCSTR name); + void OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight, bool* pbFullScreen); void OnCreate(HWND hWnd); void OnDestroy(); + void BuildUI(); void OnRender(); bool OnEvent(MSG msg); void OnResize(uint32_t Width, uint32_t Height); @@ -50,18 +52,25 @@ class SPD_Sample : public FrameworkWindows Device m_device; SwapChain m_swapChain; - GLTFCommon *m_pGltfLoader; + GLTFCommon *m_pGltfLoader = nullptr; + bool m_loadingScene = false; - SPD_Renderer *m_Node; - SPD_Renderer::State m_state; + SPDRenderer *m_pNode = nullptr; + SPDRenderer::State m_state; float m_distance; float m_roll; float m_pitch; float m_time; // WallClock in seconds. - double m_deltaTime; // The elapsed time in milliseconds since the previous frame. double m_lastFrameTime; + // json config file + json m_jsonConfigFile; + bool m_isCpuValidationLayerEnabled; + bool m_isGpuValidationLayerEnabled; + bool m_bPlay; -}; + + bool m_usingDescriptorIndexing; +}; \ No newline at end of file diff --git a/sample/src/VK/SPDVersions.cpp b/sample/src/VK/SPDVersions.cpp new file mode 100644 index 0000000..7d9b57e --- /dev/null +++ b/sample/src/VK/SPDVersions.cpp @@ -0,0 +1,204 @@ +// SPDSample +// +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "stdafx.h" +#include "Base\DynamicBufferRing.h" +#include "Base\StaticBufferPool.h" +#include "Base\UploadHeap.h" +#include "Base\Texture.h" +#include "Base\Helper.h" +#include "SPDVersions.h" + + +namespace CAULDRON_VK +{ + void SPDVersions::OnCreate(Device *pDevice, UploadHeap *pUploadHeap, ResourceViewHeaps *pResourceViewHeaps) + { + m_pDevice = pDevice; + + // check if subgroup operations are supported, otherwise we need to fallback to the LDS only version + if (pDevice->GetPhysicalDeviceSubgroupProperties().supportedOperations + & VK_SUBGROUP_FEATURE_QUAD_BIT) + { + m_spd_WaveOps_NonPacked.OnCreate(pDevice, pUploadHeap, pResourceViewHeaps, SPDLoad::SPDLoad, SPDWaveOps::SPDWaveOps, SPDPacked::SPDNonPacked); + m_spd_WaveOps_Packed.OnCreate(pDevice, pUploadHeap, pResourceViewHeaps, SPDLoad::SPDLoad, SPDWaveOps::SPDWaveOps, SPDPacked::SPDPacked); + + m_spd_WaveOps_NonPacked_Linear_Sampler.OnCreate(pDevice, pUploadHeap, pResourceViewHeaps, SPDLoad::SPDLinearSampler, SPDWaveOps::SPDWaveOps, SPDPacked::SPDNonPacked); + m_spd_WaveOps_Packed_Linear_Sampler.OnCreate(pDevice, pUploadHeap, pResourceViewHeaps, SPDLoad::SPDLinearSampler, SPDWaveOps::SPDWaveOps, SPDPacked::SPDPacked); + } + + // fallback path + m_spd_No_WaveOps_NonPacked.OnCreate(pDevice, pUploadHeap, pResourceViewHeaps, SPDLoad::SPDLoad, SPDWaveOps::SPDNoWaveOps, SPDPacked::SPDNonPacked); + m_spd_No_WaveOps_Packed.OnCreate(pDevice, pUploadHeap, pResourceViewHeaps, SPDLoad::SPDLoad, SPDWaveOps::SPDNoWaveOps, SPDPacked::SPDPacked); + + m_spd_No_WaveOps_NonPacked_Linear_Sampler.OnCreate(pDevice, pUploadHeap, pResourceViewHeaps, SPDLoad::SPDLinearSampler, SPDWaveOps::SPDNoWaveOps, SPDPacked::SPDNonPacked); + m_spd_No_WaveOps_Packed_Linear_Sampler.OnCreate(pDevice, pUploadHeap, pResourceViewHeaps, SPDLoad::SPDLinearSampler, SPDWaveOps::SPDNoWaveOps, SPDPacked::SPDPacked); + } + + void SPDVersions::OnDestroy() + { + m_spd_No_WaveOps_NonPacked.OnDestroy(); + m_spd_No_WaveOps_Packed.OnDestroy(); + + m_spd_No_WaveOps_NonPacked_Linear_Sampler.OnDestroy(); + m_spd_No_WaveOps_Packed_Linear_Sampler.OnDestroy(); + + if (m_pDevice->GetPhysicalDeviceSubgroupProperties().supportedOperations + & VK_SUBGROUP_FEATURE_QUAD_BIT) + { + m_spd_WaveOps_NonPacked.OnDestroy(); + m_spd_WaveOps_Packed.OnDestroy(); + + m_spd_WaveOps_NonPacked_Linear_Sampler.OnDestroy(); + m_spd_WaveOps_Packed_Linear_Sampler.OnDestroy(); + } + } + + uint32_t SPDVersions::GetMaxMIPLevelCount(uint32_t Width, uint32_t Height) + { + int resolution = max(Width, Height); + return (static_cast(min(floor(log2(resolution)), 12))); + } + + void SPDVersions::Dispatch(VkCommandBuffer cmd_buf, SPDLoad spdLoad, SPDWaveOps spdWaveOps, SPDPacked spdPacked) + { + switch (spdLoad) + { + case SPDLoad::SPDLoad: + { + switch (spdWaveOps) + { + case SPDWaveOps::SPDWaveOps: + switch (spdPacked) + { + case SPDPacked::SPDNonPacked: + m_spd_WaveOps_NonPacked.Draw(cmd_buf); + break; + case SPDPacked::SPDPacked: + m_spd_WaveOps_Packed.Draw(cmd_buf); + break; + } + break; + case SPDWaveOps::SPDNoWaveOps: + switch (spdPacked) + { + case SPDPacked::SPDNonPacked: + m_spd_No_WaveOps_NonPacked.Draw(cmd_buf); + break; + case SPDPacked::SPDPacked: + m_spd_No_WaveOps_Packed.Draw(cmd_buf); + break; + } + } + break; + } + case SPDLoad::SPDLinearSampler: + { + switch (spdWaveOps) + { + case SPDWaveOps::SPDWaveOps: + switch (spdPacked) + { + case SPDPacked::SPDNonPacked: + m_spd_WaveOps_NonPacked_Linear_Sampler.Draw(cmd_buf); + break; + case SPDPacked::SPDPacked: + m_spd_WaveOps_Packed_Linear_Sampler.Draw(cmd_buf); + break; + } + break; + case SPDWaveOps::SPDNoWaveOps: + switch (spdPacked) + { + case SPDPacked::SPDNonPacked: + m_spd_No_WaveOps_NonPacked_Linear_Sampler.Draw(cmd_buf); + break; + case SPDPacked::SPDPacked: + m_spd_No_WaveOps_Packed_Linear_Sampler.Draw(cmd_buf); + break; + } + } + break; + } + } + } + + void SPDVersions::GUI(SPDLoad spdLoad, SPDWaveOps spdWaveOps, SPDPacked spdPacked, int* pSlice) + { + switch (spdLoad) + { + case SPDLoad::SPDLoad: + { + switch (spdWaveOps) + { + case SPDWaveOps::SPDWaveOps: + switch (spdPacked) + { + case SPDPacked::SPDNonPacked: + m_spd_WaveOps_NonPacked.GUI(pSlice); + break; + case SPDPacked::SPDPacked: + m_spd_WaveOps_Packed.GUI(pSlice); + break; + } + break; + case SPDWaveOps::SPDNoWaveOps: + switch (spdPacked) + { + case SPDPacked::SPDNonPacked: + m_spd_No_WaveOps_NonPacked.GUI(pSlice); + break; + case SPDPacked::SPDPacked: + m_spd_No_WaveOps_Packed.GUI(pSlice); + break; + } + } + break; + } + case SPDLoad::SPDLinearSampler: + { + switch (spdWaveOps) + { + case SPDWaveOps::SPDWaveOps: + switch (spdPacked) + { + case SPDPacked::SPDNonPacked: + m_spd_WaveOps_NonPacked_Linear_Sampler.GUI(pSlice); + break; + case SPDPacked::SPDPacked: + m_spd_WaveOps_Packed_Linear_Sampler.GUI(pSlice); + break; + } + break; + case SPDWaveOps::SPDNoWaveOps: + switch (spdPacked) + { + case SPDPacked::SPDNonPacked: + m_spd_No_WaveOps_NonPacked_Linear_Sampler.GUI(pSlice); + break; + case SPDPacked::SPDPacked: + m_spd_No_WaveOps_Packed_Linear_Sampler.GUI(pSlice); + break; + } + } + break; + } + } + } +} \ No newline at end of file diff --git a/sample/src/VK/SPDVersions.h b/sample/src/VK/SPDVersions.h new file mode 100644 index 0000000..b4c7e92 --- /dev/null +++ b/sample/src/VK/SPDVersions.h @@ -0,0 +1,55 @@ +// SPDSample +// +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +#pragma once + +#include "PostProc/PostProcCS.h" +#include "PostProc/PostProcPS.h" +#include "Base/ResourceViewHeaps.h" + +#include "SPDCS.h" + +namespace CAULDRON_VK +{ + class SPDVersions + { + public: + void OnCreate(Device *pDevice, UploadHeap *pUploadHeap, ResourceViewHeaps *pResourceViewHeaps); + void OnDestroy(); + + void Dispatch(VkCommandBuffer cmd_buf, SPDLoad spdLoad, SPDWaveOps spdWaveOps, SPDPacked spdPacked); + void GUI(SPDLoad spdLoad, SPDWaveOps spdWaveOps, SPDPacked spdPacked, int *pSlice); + + private: + Device *m_pDevice = NULL; + + SPDCS m_spd_WaveOps_NonPacked; + SPDCS m_spd_No_WaveOps_NonPacked; + + SPDCS m_spd_WaveOps_Packed; + SPDCS m_spd_No_WaveOps_Packed; + + SPDCS m_spd_WaveOps_NonPacked_Linear_Sampler; + SPDCS m_spd_No_WaveOps_NonPacked_Linear_Sampler; + + SPDCS m_spd_WaveOps_Packed_Linear_Sampler; + SPDCS m_spd_No_WaveOps_Packed_Linear_Sampler; + + uint32_t GetMaxMIPLevelCount(uint32_t Width, uint32_t Height); + }; +} diff --git a/sample/src/VK/SPD_CS.cpp b/sample/src/VK/SPD_CS.cpp deleted file mode 100644 index 72351e0..0000000 --- a/sample/src/VK/SPD_CS.cpp +++ /dev/null @@ -1,353 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include "stdafx.h" -#include "Base\Device.h" -#include "Base\ShaderCompilerHelper.h" -#include "Base\ExtDebugMarkers.h" -#include "Base\Imgui.h" - -#include "SPD_CS.h" - -namespace CAULDRON_VK -{ - void SPD_CS::OnCreate( - Device* pDevice, - ResourceViewHeaps *pResourceViewHeaps, - VkFormat outFormat, - bool fallback, - bool packed - ) - { - m_pDevice = pDevice; - m_pResourceViewHeaps = pResourceViewHeaps; - m_outFormat = outFormat; - - // create the descriptor set layout - // the shader needs - // source image: storage image (read-only) - // destination image: storage image - // global atomic counter: storage buffer - { - std::vector layoutBindings(3); - layoutBindings[0].binding = 0; - layoutBindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - layoutBindings[0].descriptorCount = 1; - layoutBindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - layoutBindings[0].pImmutableSamplers = NULL; - - layoutBindings[1].binding = 1; - layoutBindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - layoutBindings[1].descriptorCount = SPD_MAX_MIP_LEVELS; - layoutBindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - layoutBindings[1].pImmutableSamplers = NULL; - - layoutBindings[2].binding = 2; - layoutBindings[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - layoutBindings[2].descriptorCount = 1; - layoutBindings[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - layoutBindings[2].pImmutableSamplers = NULL; - - VkDescriptorSetLayoutCreateInfo descriptor_layout = {}; - descriptor_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - descriptor_layout.pNext = NULL; - descriptor_layout.bindingCount = (uint32_t)layoutBindings.size(); - descriptor_layout.pBindings = layoutBindings.data(); - - VkResult res = vkCreateDescriptorSetLayout(pDevice->GetDevice(), &descriptor_layout, NULL, &m_descriptorSetLayout); - assert(res == VK_SUCCESS); - } - - // Create global atomic counter - { - VkBufferCreateInfo bufferInfo = {}; - bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - bufferInfo.flags = 0; - bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - bufferInfo.queueFamilyIndexCount = 0; - bufferInfo.pQueueFamilyIndices = NULL; - bufferInfo.size = sizeof(int) * 1; - bufferInfo.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - - VmaAllocationCreateInfo bufferAllocCreateInfo = {}; - bufferAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; - bufferAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT; - bufferAllocCreateInfo.pUserData = "SpdGlobalAtomicCounter"; - VmaAllocationInfo bufferAllocInfo = {}; - vmaCreateBuffer(m_pDevice->GetAllocator(), &bufferInfo, &bufferAllocCreateInfo, &m_globalCounter, - &m_globalCounterAllocation, &bufferAllocInfo); - } - - VkPipelineShaderStageCreateInfo computeShader; - DefineList defines; - - if (fallback) { - defines["SPD_NO_WAVE_OPERATIONS"] = std::to_string(1); - } - if (packed) { - defines["A_HALF"] = std::to_string(1); - defines["SPD_PACKED_ONLY"] = std::to_string(1); - } - - VkResult res = VKCompileFromFile(m_pDevice->GetDevice(), VK_SHADER_STAGE_COMPUTE_BIT, - "SPD_Integration.glsl", "main", &defines, &computeShader); - assert(res == VK_SUCCESS); - - // Create pipeline layout - // - VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = {}; - pPipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pPipelineLayoutCreateInfo.pNext = NULL; - - // push constants - VkPushConstantRange pushConstantRange = {}; - pushConstantRange.offset = 0; - pushConstantRange.size = sizeof(PushConstants); - pushConstantRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - pPipelineLayoutCreateInfo.pushConstantRangeCount = 1; - pPipelineLayoutCreateInfo.pPushConstantRanges = &pushConstantRange; - - pPipelineLayoutCreateInfo.setLayoutCount = 1; - pPipelineLayoutCreateInfo.pSetLayouts = &m_descriptorSetLayout; - - res = vkCreatePipelineLayout(pDevice->GetDevice(), &pPipelineLayoutCreateInfo, NULL, &m_pipelineLayout); - assert(res == VK_SUCCESS); - - // Create pipeline - // - VkComputePipelineCreateInfo pipeline = {}; - pipeline.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - pipeline.pNext = NULL; - pipeline.flags = 0; - pipeline.layout = m_pipelineLayout; - pipeline.stage = computeShader; - pipeline.basePipelineHandle = VK_NULL_HANDLE; - pipeline.basePipelineIndex = 0; - - res = vkCreateComputePipelines(pDevice->GetDevice(), pDevice->GetPipelineCache(), 1, &pipeline, NULL, &m_pipeline); - assert(res == VK_SUCCESS); - - m_pResourceViewHeaps->AllocDescriptor(m_descriptorSetLayout, &m_descriptorSet); - } - - void SPD_CS::OnCreateWindowSizeDependentResources( - VkCommandBuffer cmd_buf, - uint32_t Width, - uint32_t Height, - Texture *pInput, - int mips - ) - { - m_Width = Width; - m_Height = Height; - m_mipCount = mips; - - VkImageCreateInfo image_info = {}; - image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - image_info.pNext = NULL; - image_info.imageType = VK_IMAGE_TYPE_2D; - image_info.format = m_outFormat; - image_info.extent.width = m_Width >> 1; - image_info.extent.height = m_Height >> 1; - image_info.extent.depth = 1; - image_info.mipLevels = m_mipCount; - image_info.arrayLayers = 1; - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_info.queueFamilyIndexCount = 0; - image_info.pQueueFamilyIndices = NULL; - image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - image_info.usage = (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT); - image_info.flags = 0; - image_info.tiling = VK_IMAGE_TILING_OPTIMAL; - m_result.Init(m_pDevice, &image_info, "SpdDestinationMips"); - - // transition layout undefined to general layout? - VkImageMemoryBarrier imageMemoryBarrier = {}; - imageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - imageMemoryBarrier.pNext = NULL; - imageMemoryBarrier.srcAccessMask = 0; - imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; - imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - imageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - imageMemoryBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - imageMemoryBarrier.subresourceRange.baseMipLevel = 0; - imageMemoryBarrier.subresourceRange.levelCount = m_mipCount; - imageMemoryBarrier.subresourceRange.baseArrayLayer = 0; - imageMemoryBarrier.subresourceRange.layerCount = 1; - imageMemoryBarrier.image = m_result.Resource(); - - // transition general layout if detination image to shader read only for source image - vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier); - - // Create views for the mip chain - // - // source ----------- - // - pInput->CreateSRV(&m_SRV, 0); - - // Create and initialize the Descriptor Sets (all of them use the same Descriptor Layout) - // Create and initialize descriptor set for sampled image - VkDescriptorImageInfo desc_source_image = {}; - desc_source_image.sampler = VK_NULL_HANDLE; - desc_source_image.imageView = m_SRV; - desc_source_image.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - - std::vector writes(3); - writes[0] = {}; - writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[0].pNext = NULL; - writes[0].dstSet = m_descriptorSet; - writes[0].descriptorCount = 1; - writes[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - writes[0].pImageInfo = &desc_source_image; - writes[0].dstBinding = 0; - writes[0].dstArrayElement = 0; - - // Create and initialize descriptor set for storage image - std::vector desc_storage_images(m_mipCount); - - for (int i = 0; i < m_mipCount; i++) - { - // destination ----------- - m_result.CreateRTV(&m_RTV[i], i); - - desc_storage_images[i] = {}; - desc_storage_images[i].sampler = VK_NULL_HANDLE; - desc_storage_images[i].imageView = m_RTV[i]; - desc_storage_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - writes[1] = {}; - writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[1].pNext = NULL; - writes[1].dstSet = m_descriptorSet; - writes[1].descriptorCount = m_mipCount; - writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - writes[1].pImageInfo = desc_storage_images.data(); - writes[1].dstBinding = 1; - writes[1].dstArrayElement = 0; - - VkDescriptorBufferInfo desc_buffer = {}; - desc_buffer.buffer = m_globalCounter; - desc_buffer.offset = 0; - desc_buffer.range = sizeof(int) * 1; - - writes[2] = {}; - writes[2].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[2].pNext = NULL; - writes[2].dstSet = m_descriptorSet; - writes[2].descriptorCount = 1; - writes[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - writes[2].pBufferInfo = &desc_buffer; - writes[2].dstBinding = 2; - writes[2].dstArrayElement = 0; - - vkUpdateDescriptorSets(m_pDevice->GetDevice(), (uint32_t)writes.size(), writes.data(), 0, NULL); - } - - void SPD_CS::OnDestroyWindowSizeDependentResources() - { - vkDestroyImageView(m_pDevice->GetDevice(), m_SRV, NULL); - for (int i = 0; i < m_mipCount; i++) - { - vkDestroyImageView(m_pDevice->GetDevice(), m_RTV[i], NULL); - } - - m_result.OnDestroy(); - } - - void SPD_CS::OnDestroy() - { - - m_pResourceViewHeaps->FreeDescriptor(m_descriptorSet); - - vmaDestroyBuffer(m_pDevice->GetAllocator(), m_globalCounter, m_globalCounterAllocation); - - vkDestroyPipeline(m_pDevice->GetDevice(), m_pipeline, nullptr); - vkDestroyPipelineLayout(m_pDevice->GetDevice(), m_pipelineLayout, nullptr); - vkDestroyDescriptorSetLayout(m_pDevice->GetDevice(), m_descriptorSetLayout, NULL); - } - - void SPD_CS::Draw(VkCommandBuffer cmd_buf) - { - // downsample - // - - // initialize global atomic counter to 0 - vmaMapMemory(m_pDevice->GetAllocator(), m_globalCounterAllocation, (void**)&m_pCounter); - *m_pCounter = 0; - vmaUnmapMemory(m_pDevice->GetAllocator(), m_globalCounterAllocation); - - // transition general layout if detination image to shader read only for source image - vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, 0, nullptr, 0, nullptr, 0, nullptr); - - SetPerfMarkerBegin(cmd_buf, "SPD_CS"); - - // Bind Pipeline - // - vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipeline); - - // should be / 64 - uint32_t dispatchX = (((m_Width + 63) >> (6))); - uint32_t dispatchY = (((m_Height + 63) >> (6))); - uint32_t dispatchZ = 1; - - // single pass for storage buffer? - //uint32_t uniformOffsets[1] = { (uint32_t)constantBuffer.offset }; - vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipelineLayout, 0, 1, &m_descriptorSet, 0, nullptr); - - // Bind push constants - // - PushConstants data; - data.mips = m_mipCount; - data.numWorkGroups = dispatchX * dispatchY * dispatchZ; - vkCmdPushConstants(cmd_buf, m_pipelineLayout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(PushConstants), (void*)&data); - - // Draw - // - vkCmdDispatch(cmd_buf, dispatchX, dispatchY, dispatchZ); - - // transition general layout if detination image to shader read only for source image - vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, 0, nullptr, 0, nullptr, 0, nullptr); - - SetPerfMarkerEnd(cmd_buf); - } - - void SPD_CS::Gui() - { - bool opened = true; - ImGui::Begin("Downsample", &opened); - - ImGui::Image((ImTextureID)m_SRV, ImVec2(320, 180)); - - for (int i = 0; i < m_mipCount; i++) - { - ImGui::Image((ImTextureID)m_RTV[i], ImVec2(320, 180)); - } - - ImGui::End(); - } -} \ No newline at end of file diff --git a/sample/src/VK/SPD_CS.h b/sample/src/VK/SPD_CS.h deleted file mode 100644 index 6275f07..0000000 --- a/sample/src/VK/SPD_CS.h +++ /dev/null @@ -1,77 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -#pragma once - -#include "Base/StaticBufferPool.h" -#include "Base/Texture.h" -#include "Base/DynamicBufferRing.h" - -namespace CAULDRON_VK -{ -#define SPD_MAX_MIP_LEVELS 12 - - class SPD_CS - { - public: - void OnCreate(Device* pDevice, ResourceViewHeaps *pResourceViewHeaps, VkFormat outFormat, bool fallback, bool packed); - void OnDestroy(); - - void OnCreateWindowSizeDependentResources(VkCommandBuffer cmd_buf, uint32_t Width, uint32_t Height, Texture *pInput, int mips); - void OnDestroyWindowSizeDependentResources(); - - void Draw(VkCommandBuffer cmd_buf); - Texture *GetTexture() { return &m_result; } - VkImageView GetTextureView(int i) { return m_RTV[i]; } - void Gui(); - - struct PushConstants - { - int mips; - int numWorkGroups; - int padding[2]; - }; - - private: - Device *m_pDevice; - VkFormat m_outFormat; - - Texture m_result; - - VkImageView m_RTV[SPD_MAX_MIP_LEVELS]; // destinations (mips) - VkImageView m_SRV; // source - VkDescriptorSet m_descriptorSet; - - ResourceViewHeaps *m_pResourceViewHeaps; - DynamicBufferRing *m_pConstantBufferRing; - - uint32_t m_Width; - uint32_t m_Height; - int m_mipCount; - - VkDescriptorSetLayout m_descriptorSetLayout; - - VkPipelineLayout m_pipelineLayout; - VkPipeline m_pipeline; - - VkBuffer m_globalCounter; - VmaAllocation m_globalCounterAllocation; - - uint32_t* m_pCounter; - }; -} \ No newline at end of file diff --git a/sample/src/VK/SPD_CS_Linear_Sampler.cpp b/sample/src/VK/SPD_CS_Linear_Sampler.cpp deleted file mode 100644 index 0a51534..0000000 --- a/sample/src/VK/SPD_CS_Linear_Sampler.cpp +++ /dev/null @@ -1,397 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include "stdafx.h" -#include "Base\Device.h" -#include "Base\ShaderCompilerHelper.h" -#include "Base\ExtDebugMarkers.h" -#include "Base\Imgui.h" - -#include "SPD_CS_Linear_Sampler.h" - -namespace CAULDRON_VK -{ - void SPD_CS_Linear_Sampler::OnCreate( - Device* pDevice, - ResourceViewHeaps *pResourceViewHeaps, - VkFormat outFormat, - bool fallback, - bool packed - ) - { - m_pDevice = pDevice; - m_pResourceViewHeaps = pResourceViewHeaps; - m_outFormat = outFormat; - - // create the descriptor set layout - // the shader needs - // source image: sampled image - // destination image: storage image - // global atomic counter: storage buffer - // sampler - { - std::vector layoutBindings(4); - layoutBindings[0].binding = 0; - layoutBindings[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - layoutBindings[0].descriptorCount = 1; - layoutBindings[0].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - layoutBindings[0].pImmutableSamplers = NULL; - - layoutBindings[1].binding = 1; - layoutBindings[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - layoutBindings[1].descriptorCount = SPD_MAX_MIP_LEVELS; - layoutBindings[1].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - layoutBindings[1].pImmutableSamplers = NULL; - - layoutBindings[2].binding = 2; - layoutBindings[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - layoutBindings[2].descriptorCount = 1; - layoutBindings[2].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - layoutBindings[2].pImmutableSamplers = NULL; - - layoutBindings[3].binding = 3; - layoutBindings[3].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - layoutBindings[3].descriptorCount = 1; - layoutBindings[3].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - layoutBindings[3].pImmutableSamplers = NULL; - - VkDescriptorSetLayoutCreateInfo descriptor_layout = {}; - descriptor_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - descriptor_layout.pNext = NULL; - descriptor_layout.bindingCount = (uint32_t)layoutBindings.size(); - descriptor_layout.pBindings = layoutBindings.data(); - - VkResult res = vkCreateDescriptorSetLayout(pDevice->GetDevice(), &descriptor_layout, NULL, &m_descriptorSetLayout); - assert(res == VK_SUCCESS); - } - - // The sampler we want to use, needs to match the SPD Reduction function in the shader - // linear sampler: - // -> AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3){return (v0+v1+v2+v3)*0.25;} - // point sampler: - // -> AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3){return v3;} - { - VkSamplerCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - info.magFilter = VK_FILTER_LINEAR; - info.minFilter = VK_FILTER_LINEAR; - info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; - info.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - info.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - info.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - info.minLod = -1000; - info.maxLod = 1000; - info.maxAnisotropy = 1.0f; - VkResult res = vkCreateSampler(pDevice->GetDevice(), &info, NULL, &m_sampler); - assert(res == VK_SUCCESS); - } - - // Create global atomic counter - { - VkBufferCreateInfo bufferInfo = {}; - bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - bufferInfo.flags = 0; - bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - bufferInfo.queueFamilyIndexCount = 0; - bufferInfo.pQueueFamilyIndices = NULL; - bufferInfo.size = sizeof(int) * 1; - bufferInfo.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; - - VmaAllocationCreateInfo bufferAllocCreateInfo = {}; - bufferAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_TO_GPU; - bufferAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT; - bufferAllocCreateInfo.pUserData = "SpdGlobalAtomicCounter"; - VmaAllocationInfo bufferAllocInfo = {}; - vmaCreateBuffer(m_pDevice->GetAllocator(), &bufferInfo, &bufferAllocCreateInfo, &m_globalCounter, - &m_globalCounterAllocation, &bufferAllocInfo); - } - - VkPipelineShaderStageCreateInfo computeShader; - DefineList defines; - - if (fallback) { - defines["SPD_NO_WAVE_OPERATIONS"] = std::to_string(1); - } - if (packed) { - defines["A_HALF"] = std::to_string(1); - defines["SPD_PACKED_ONLY"] = std::to_string(1); - } - - VkResult res = VKCompileFromFile(m_pDevice->GetDevice(), VK_SHADER_STAGE_COMPUTE_BIT, - "SPD_Integration_Linear_Sampler.glsl", "main", &defines, &computeShader); - assert(res == VK_SUCCESS); - - // Create pipeline layout - // - VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = {}; - pPipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pPipelineLayoutCreateInfo.pNext = NULL; - - // push constants - VkPushConstantRange pushConstantRange = {}; - pushConstantRange.offset = 0; - pushConstantRange.size = sizeof(PushConstants); - pushConstantRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - pPipelineLayoutCreateInfo.pushConstantRangeCount = 1; - pPipelineLayoutCreateInfo.pPushConstantRanges = &pushConstantRange; - - pPipelineLayoutCreateInfo.setLayoutCount = 1; - pPipelineLayoutCreateInfo.pSetLayouts = &m_descriptorSetLayout; - - res = vkCreatePipelineLayout(pDevice->GetDevice(), &pPipelineLayoutCreateInfo, NULL, &m_pipelineLayout); - assert(res == VK_SUCCESS); - - // Create pipeline - // - VkComputePipelineCreateInfo pipeline = {}; - pipeline.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - pipeline.pNext = NULL; - pipeline.flags = 0; - pipeline.layout = m_pipelineLayout; - pipeline.stage = computeShader; - pipeline.basePipelineHandle = VK_NULL_HANDLE; - pipeline.basePipelineIndex = 0; - - res = vkCreateComputePipelines(pDevice->GetDevice(), pDevice->GetPipelineCache(), 1, &pipeline, NULL, &m_pipeline); - assert(res == VK_SUCCESS); - - m_pResourceViewHeaps->AllocDescriptor(m_descriptorSetLayout, &m_descriptorSet); - } - - void SPD_CS_Linear_Sampler::OnCreateWindowSizeDependentResources( - VkCommandBuffer cmd_buf, - uint32_t Width, - uint32_t Height, - Texture *pInput, - int mips - ) - { - m_Width = Width; - m_Height = Height; - m_mipCount = mips; - - VkImageCreateInfo image_info = {}; - image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - image_info.pNext = NULL; - image_info.imageType = VK_IMAGE_TYPE_2D; - image_info.format = m_outFormat; - image_info.extent.width = m_Width >> 1; - image_info.extent.height = m_Height >> 1; - image_info.extent.depth = 1; - image_info.mipLevels = m_mipCount; - image_info.arrayLayers = 1; - image_info.samples = VK_SAMPLE_COUNT_1_BIT; - image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - image_info.queueFamilyIndexCount = 0; - image_info.pQueueFamilyIndices = NULL; - image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - image_info.usage = (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT); - image_info.flags = 0; - image_info.tiling = VK_IMAGE_TILING_OPTIMAL; - m_result.Init(m_pDevice, &image_info, "SpdDestinationMips"); - - // transition layout undefined to general layout? - VkImageMemoryBarrier imageMemoryBarrier = {}; - imageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - imageMemoryBarrier.pNext = NULL; - imageMemoryBarrier.srcAccessMask = 0; - imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT; - imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - imageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - imageMemoryBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - imageMemoryBarrier.subresourceRange.baseMipLevel = 0; - imageMemoryBarrier.subresourceRange.levelCount = m_mipCount; - imageMemoryBarrier.subresourceRange.baseArrayLayer = 0; - imageMemoryBarrier.subresourceRange.layerCount = 1; - imageMemoryBarrier.image = m_result.Resource(); - - // transition general layout if detination image to shader read only for source image - vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, 0, nullptr, 0, nullptr, 1, &imageMemoryBarrier); - - // Create views for the mip chain - // - // source ----------- - // - pInput->CreateSRV(&m_SRV, 0); - - // Create and initialize the Descriptor Sets (all of them use the same Descriptor Layout) - // Create and initialize descriptor set for sampled image - VkDescriptorImageInfo desc_sampled_image = {}; - desc_sampled_image.sampler = VK_NULL_HANDLE; - desc_sampled_image.imageView = m_SRV; - desc_sampled_image.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - - std::vector writes(4); - writes[0] = {}; - writes[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[0].pNext = NULL; - writes[0].dstSet = m_descriptorSet; - writes[0].descriptorCount = 1; - writes[0].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - writes[0].pImageInfo = &desc_sampled_image; - writes[0].dstBinding = 0; - writes[0].dstArrayElement = 0; - - // Create and initialize descriptor set for storage image - std::vector desc_storage_images(m_mipCount); - - for (int i = 0; i < m_mipCount; i++) - { - // destination ----------- - m_result.CreateRTV(&m_RTV[i], i); - - desc_storage_images[i] = {}; - desc_storage_images[i].sampler = VK_NULL_HANDLE; - desc_storage_images[i].imageView = m_RTV[i]; - desc_storage_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - } - - writes[1] = {}; - writes[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[1].pNext = NULL; - writes[1].dstSet = m_descriptorSet; - writes[1].descriptorCount = m_mipCount; - writes[1].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - writes[1].pImageInfo = desc_storage_images.data(); - writes[1].dstBinding = 1; - writes[1].dstArrayElement = 0; - - VkDescriptorBufferInfo desc_buffer = {}; - desc_buffer.buffer = m_globalCounter; - desc_buffer.offset = 0; - desc_buffer.range = sizeof(int) * 1; - - writes[2] = {}; - writes[2].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[2].pNext = NULL; - writes[2].dstSet = m_descriptorSet; - writes[2].descriptorCount = 1; - writes[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; - writes[2].pBufferInfo = &desc_buffer; - writes[2].dstBinding = 2; - writes[2].dstArrayElement = 0; - - // Create and initialize descriptor set for sampler - VkDescriptorImageInfo desc_sampler = {}; - desc_sampler.sampler = m_sampler; - - writes[3] = {}; - writes[3].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - writes[3].pNext = NULL; - writes[3].dstSet = m_descriptorSet; - writes[3].descriptorCount = 1; - writes[3].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - writes[3].pImageInfo = &desc_sampler; - writes[3].dstBinding = 3; - writes[3].dstArrayElement = 0; - - vkUpdateDescriptorSets(m_pDevice->GetDevice(), (uint32_t)writes.size(), writes.data(), 0, NULL); - } - - void SPD_CS_Linear_Sampler::OnDestroyWindowSizeDependentResources() - { - vkDestroyImageView(m_pDevice->GetDevice(), m_SRV, NULL); - for (int i = 0; i < m_mipCount; i++) - { - vkDestroyImageView(m_pDevice->GetDevice(), m_RTV[i], NULL); - } - - m_result.OnDestroy(); - } - - void SPD_CS_Linear_Sampler::OnDestroy() - { - - m_pResourceViewHeaps->FreeDescriptor(m_descriptorSet); - - vmaDestroyBuffer(m_pDevice->GetAllocator(), m_globalCounter, m_globalCounterAllocation); - - vkDestroyPipeline(m_pDevice->GetDevice(), m_pipeline, nullptr); - vkDestroyPipelineLayout(m_pDevice->GetDevice(), m_pipelineLayout, nullptr); - vkDestroyDescriptorSetLayout(m_pDevice->GetDevice(), m_descriptorSetLayout, NULL); - } - - void SPD_CS_Linear_Sampler::Draw(VkCommandBuffer cmd_buf) - { - // downsample - // - - // initialize global atomic counter to 0 - vmaMapMemory(m_pDevice->GetAllocator(), m_globalCounterAllocation, (void**)&m_pCounter); - *m_pCounter = 0; - vmaUnmapMemory(m_pDevice->GetAllocator(), m_globalCounterAllocation); - - // transition general layout if detination image to shader read only for source image - vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, 0, nullptr, 0, nullptr, 0, nullptr); - - SetPerfMarkerBegin(cmd_buf, "SPD_CS_Linear_Sampler"); - - // Bind Pipeline - // - vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipeline); - - // should be / 64 - uint32_t dispatchX = (((m_Width + 63) >> (6))); - uint32_t dispatchY = (((m_Height + 63) >> (6))); - uint32_t dispatchZ = 1; - - // single pass for storage buffer? - //uint32_t uniformOffsets[1] = { (uint32_t)constantBuffer.offset }; - vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, m_pipelineLayout, 0, 1, &m_descriptorSet, 0, nullptr); - - // Bind push constants - // - PushConstants data; - data.mips = m_mipCount; - data.numWorkGroups = dispatchX * dispatchY * dispatchZ; - data.invInputSize[0] = 1.0f / m_Width; - data.invInputSize[1] = 1.0f / m_Height; - vkCmdPushConstants(cmd_buf, m_pipelineLayout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(PushConstants), (void*)&data); - - // Draw - // - vkCmdDispatch(cmd_buf, dispatchX, dispatchY, dispatchZ); - - // transition general layout if detination image to shader read only for source image - vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, 0, nullptr, 0, nullptr, 0, nullptr); - - SetPerfMarkerEnd(cmd_buf); - } - - void SPD_CS_Linear_Sampler::Gui() - { - bool opened = true; - ImGui::Begin("Downsample", &opened); - - ImGui::Image((ImTextureID)m_SRV, ImVec2(320, 180)); - - for (int i = 0; i < m_mipCount; i++) - { - ImGui::Image((ImTextureID)m_RTV[i], ImVec2(320, 180)); - } - - ImGui::End(); - } -} \ No newline at end of file diff --git a/sample/src/VK/SPD_CS_Linear_Sampler.h b/sample/src/VK/SPD_CS_Linear_Sampler.h deleted file mode 100644 index fe6639d..0000000 --- a/sample/src/VK/SPD_CS_Linear_Sampler.h +++ /dev/null @@ -1,79 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -#pragma once - -#include "Base/StaticBufferPool.h" -#include "Base/Texture.h" -#include "Base/DynamicBufferRing.h" - -namespace CAULDRON_VK -{ -#define SPD_MAX_MIP_LEVELS 12 - - class SPD_CS_Linear_Sampler - { - public: - void OnCreate(Device* pDevice, ResourceViewHeaps *pResourceViewHeaps, VkFormat outFormat, bool fallback, bool packed); - void OnDestroy(); - - void OnCreateWindowSizeDependentResources(VkCommandBuffer cmd_buf, uint32_t Width, uint32_t Height, Texture *pInput, int mips); - void OnDestroyWindowSizeDependentResources(); - - void Draw(VkCommandBuffer cmd_buf); - Texture *GetTexture() { return &m_result; } - VkImageView GetTextureView(int i) { return m_RTV[i]; } - void Gui(); - - struct PushConstants - { - int mips; - int numWorkGroups; - float invInputSize[2]; - }; - - private: - Device *m_pDevice; - VkFormat m_outFormat; - - Texture m_result; - - VkImageView m_RTV[SPD_MAX_MIP_LEVELS]; // destinations (mips) - VkImageView m_SRV; // source - VkDescriptorSet m_descriptorSet; - - ResourceViewHeaps *m_pResourceViewHeaps; - DynamicBufferRing *m_pConstantBufferRing; - - uint32_t m_Width; - uint32_t m_Height; - int m_mipCount; - - VkDescriptorSetLayout m_descriptorSetLayout; - - VkPipelineLayout m_pipelineLayout; - VkPipeline m_pipeline; - - VkBuffer m_globalCounter; - VmaAllocation m_globalCounterAllocation; - - uint32_t* m_pCounter; - - VkSampler m_sampler; - }; -} \ No newline at end of file diff --git a/sample/src/VK/SPD_Integration.glsl b/sample/src/VK/SPD_Integration.glsl deleted file mode 100644 index 4090f1b..0000000 --- a/sample/src/VK/SPD_Integration.glsl +++ /dev/null @@ -1,124 +0,0 @@ -#version 450 -#extension GL_GOOGLE_include_directive : enable -#extension GL_ARB_separate_shader_objects : enable -#extension GL_ARB_shading_language_420pack : enable -#extension GL_ARB_compute_shader : enable -#extension GL_ARB_shader_group_vote : enable - -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; - -//-------------------------------------------------------------------------------------- -// Push Constants -//-------------------------------------------------------------------------------------- -layout(push_constant) uniform pushConstants { - uint mips; - uint numWorkGroups; -} spdConstants; - -//-------------------------------------------------------------------------------------- -// Texture definitions -//-------------------------------------------------------------------------------------- -layout(set=0, binding=0, rgba16f) uniform image2D imgSrc; -layout(set=0, binding=1, rgba16f) coherent uniform image2D imgDst[12]; - -//-------------------------------------------------------------------------------------- -// Buffer definitions - global atomic counter -//-------------------------------------------------------------------------------------- -layout(std430, binding=2) coherent buffer globalAtomicBuffer -{ - uint counter; -} globalAtomic; - -#define A_GPU -#define A_GLSL - -#include "ffx_a.h" - -shared AU1 spd_counter; - -// define fetch and store functions Non-Packed -#ifndef SPD_PACKED_ONLY -shared AF1 spd_intermediateR[16][16]; -shared AF1 spd_intermediateG[16][16]; -shared AF1 spd_intermediateB[16][16]; -shared AF1 spd_intermediateA[16][16]; -AF4 SpdLoadSourceImage(ASU2 p){return imageLoad(imgSrc, p);} -AF4 SpdLoad(ASU2 p){return imageLoad(imgDst[5],p);} -void SpdStore(ASU2 p, AF4 value, AU1 mip){imageStore(imgDst[mip], p, value);} -void SpdIncreaseAtomicCounter(){spd_counter = atomicAdd(globalAtomic.counter, 1);} -AU1 SpdGetAtomicCounter(){return spd_counter;} -AF4 SpdLoadIntermediate(AU1 x, AU1 y){ - return AF4( - spd_intermediateR[x][y], - spd_intermediateG[x][y], - spd_intermediateB[x][y], - spd_intermediateA[x][y]);} -void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value){ - spd_intermediateR[x][y] = value.x; - spd_intermediateG[x][y] = value.y; - spd_intermediateB[x][y] = value.z; - spd_intermediateA[x][y] = value.w;} -AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3){return (v0+v1+v2+v3)*0.25;} -#endif - -// define fetch and store functions Packed -#ifdef A_HALF -shared AH2 spd_intermediateRG[16][16]; -shared AH2 spd_intermediateBA[16][16]; -AH4 SpdLoadSourceImageH(ASU2 p){return AH4(imageLoad(imgSrc, p));} -AH4 SpdLoadH(ASU2 p){return AH4(imageLoad(imgDst[5],p));} -void SpdStoreH(ASU2 p, AH4 value, AU1 mip){imageStore(imgDst[mip], p, AF4(value));} -void SpdIncreaseAtomicCounter(){spd_counter = atomicAdd(globalAtomic.counter, 1);} -AU1 SpdGetAtomicCounter(){return spd_counter;} -AH4 SpdLoadIntermediateH(AU1 x, AU1 y){ - return AH4( - spd_intermediateRG[x][y].x, - spd_intermediateRG[x][y].y, - spd_intermediateBA[x][y].x, - spd_intermediateBA[x][y].y);} -void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value){ - spd_intermediateRG[x][y] = value.xy; - spd_intermediateBA[x][y] = value.zw;} -AH4 SpdReduce4H(AH4 v0, AH4 v1, AH4 v2, AH4 v3){return (v0+v1+v2+v3)*AH1(0.25f);} -#endif - -#include "ffx_spd.h" - -// Main function -//-------------------------------------------------------------------------------------- -//-------------------------------------------------------------------------------------- -void main() -{ -#ifndef A_HALF - SpdDownsample( - AU2(gl_WorkGroupID.xy), - AU1(gl_LocalInvocationIndex), - AU1(spdConstants.mips), - AU1(spdConstants.numWorkGroups)); -#else - SpdDownsampleH( - AU2(gl_WorkGroupID.xy), - AU1(gl_LocalInvocationIndex), - AU1(spdConstants.mips), - AU1(spdConstants.numWorkGroups)); -#endif -} \ No newline at end of file diff --git a/sample/src/VK/SPD_Integration.hlsl b/sample/src/VK/SPD_Integration.hlsl deleted file mode 100644 index 935dc5d..0000000 --- a/sample/src/VK/SPD_Integration.hlsl +++ /dev/null @@ -1,117 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -//-------------------------------------------------------------------------------------- -// Push Constants -//-------------------------------------------------------------------------------------- -[[vk::push_constant]] -cbuffer spdConstants { - uint mips; - uint numWorkGroups; -}; -//-------------------------------------------------------------------------------------- -// Texture definitions -//-------------------------------------------------------------------------------------- -[[vk::binding(0)]] Texture2D imgSrc :register(u0); -[[vk::binding(1)]] globallycoherent RWTexture2D imgDst[12] :register(u1); - -//-------------------------------------------------------------------------------------- -// Buffer definitions - global atomic counter -//-------------------------------------------------------------------------------------- -struct globalAtomicBuffer -{ - uint counter; -}; -[[vk::binding(2)]] globallycoherent RWStructuredBuffer globalAtomic; - -#define A_GPU -#define A_HLSL - -#include "ffx_a.h" - -groupshared AU1 spd_counter; - -// define fetch and store functions -#ifndef SPD_PACKED_ONLY -groupshared AF1 spd_intermediateR[16][16]; -groupshared AF1 spd_intermediateG[16][16]; -groupshared AF1 spd_intermediateB[16][16]; -groupshared AF1 spd_intermediateA[16][16]; -AF4 SpdLoadSourceImage(ASU2 tex){return imgSrc[tex];} -AF4 SpdLoad(ASU2 tex){return imgDst[5][tex];} -void SpdStore(ASU2 pix, AF4 outValue, AU1 index){imgDst[index][pix] = outValue;} -void SpdIncreaseAtomicCounter(){InterlockedAdd(globalAtomic[0].counter, 1, spd_counter);} -AU1 SpdGetAtomicCounter(){return spd_counter;} -AF4 SpdLoadIntermediate(AU1 x, AU1 y){ - return AF4( - spd_intermediateR[x][y], - spd_intermediateG[x][y], - spd_intermediateB[x][y], - spd_intermediateA[x][y]);} -void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value){ - spd_intermediateR[x][y] = value.x; - spd_intermediateG[x][y] = value.y; - spd_intermediateB[x][y] = value.z; - spd_intermediateA[x][y] = value.w;} -AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3){return (v0+v1+v2+v3)*0.25;} -#endif - -// define fetch and store functions Packed -#ifdef A_HALF -groupshared AH2 spd_intermediateRG[16][16]; -groupshared AH2 spd_intermediateBA[16][16]; -AH4 SpdLoadSourceImageH(ASU2 tex){return AH4(imgSrc[tex]);} -AH4 SpdLoadH(ASU2 p){return AH4(imgDst[5][p]);} -void SpdStoreH(ASU2 p, AH4 value, AU1 mip){imgDst[mip][p] = AF4(value);} -void SpdIncreaseAtomicCounter(){InterlockedAdd(globalAtomic[0].counter, 1, spd_counter);} -AU1 SpdGetAtomicCounter(){return spd_counter;} -AH4 SpdLoadIntermediateH(AU1 x, AU1 y){ - return AH4( - spd_intermediateRG[x][y].x, - spd_intermediateRG[x][y].y, - spd_intermediateBA[x][y].x, - spd_intermediateBA[x][y].y);} -void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value){ - spd_intermediateRG[x][y] = value.xy; - spd_intermediateBA[x][y] = value.zw;} -AH4 SpdReduce4H(AH4 v0, AH4 v1, AH4 v2, AH4 v3){return (v0+v1+v2+v3)*AH1(0.25);} -#endif - -#include "ffx_spd.h" - -// Main function -//-------------------------------------------------------------------------------------- -//-------------------------------------------------------------------------------------- -[numthreads(256,1,1)] -void main(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) -{ -#ifndef A_HALF - SpdDownsample( - AU2(WorkGroupId.xy), - AU1(LocalThreadIndex), - AU1(mips), - AU1(numWorkGroups)); -#else - SpdDownsampleH( - AU2(WorkGroupId.xy), - AU1(LocalThreadIndex), - AU1(mips), - AU1(numWorkGroups)); -#endif -} \ No newline at end of file diff --git a/sample/src/VK/SPD_Integration_Linear_Sampler.hlsl b/sample/src/VK/SPD_Integration_Linear_Sampler.hlsl deleted file mode 100644 index db9502b..0000000 --- a/sample/src/VK/SPD_Integration_Linear_Sampler.hlsl +++ /dev/null @@ -1,131 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -//-------------------------------------------------------------------------------------- -// Push Constants -//-------------------------------------------------------------------------------------- -[[vk::push_constant]] -cbuffer spdConstants { - uint mips; - uint numWorkGroups; - // [SAMPLER] - float2 invInputSize; -}; -//-------------------------------------------------------------------------------------- -// Texture definitions -//-------------------------------------------------------------------------------------- -[[vk::binding(0)]] Texture2D imgSrc :register(u0); -[[vk::binding(1)]] globallycoherent RWTexture2D imgDst[12] :register(u1); -// [SAMPLER] -[[vk::binding(3)]] SamplerState srcSampler :register(s0); - -//-------------------------------------------------------------------------------------- -// Buffer definitions - global atomic counter -//-------------------------------------------------------------------------------------- -struct globalAtomicBuffer -{ - uint counter; -}; -[[vk::binding(2)]] globallycoherent RWStructuredBuffer globalAtomic; - -#define A_GPU -#define A_HLSL - -#include "ffx_a.h" - -groupshared AU1 spd_counter; - -// define fetch and store functions -#ifndef SPD_PACKED_ONLY -groupshared AF1 spd_intermediateR[16][16]; -groupshared AF1 spd_intermediateG[16][16]; -groupshared AF1 spd_intermediateB[16][16]; -groupshared AF1 spd_intermediateA[16][16]; -//AF4 DSLoadSourceImage(ASU2 tex){return imgSrc[tex];} -//[SAMPLER] -AF4 SpdLoadSourceImage(ASU2 p){ - AF2 textureCoord = p * invInputSize + invInputSize; - return imgSrc.SampleLevel(srcSampler, textureCoord, 0); -} -AF4 SpdLoad(ASU2 tex){return imgDst[5][tex];} -void SpdStore(ASU2 pix, AF4 outValue, AU1 index){imgDst[index][pix] = outValue;} -void SpdIncreaseAtomicCounter(){InterlockedAdd(globalAtomic[0].counter, 1, spd_counter);} -AU1 SpdGetAtomicCounter(){return spd_counter;} -AF4 SpdLoadIntermediate(AU1 x, AU1 y){ - return AF4( - spd_intermediateR[x][y], - spd_intermediateG[x][y], - spd_intermediateB[x][y], - spd_intermediateA[x][y]);} -void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value){ - spd_intermediateR[x][y] = value.x; - spd_intermediateG[x][y] = value.y; - spd_intermediateB[x][y] = value.z; - spd_intermediateA[x][y] = value.w;} -AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3){return (v0+v1+v2+v3)*0.25;} -#endif - -// define fetch and store functions Packed -#ifdef A_HALF -groupshared AH2 spd_intermediateRG[16][16]; -groupshared AH2 spd_intermediateBA[16][16]; -AH4 SpdLoadSourceImageH(ASU2 p){ - AF2 textureCoord = p * invInputSize + invInputSize; - return AH4(imgSrc.SampleLevel(srcSampler, textureCoord, 0)); -} -AH4 SpdLoadH(ASU2 p){return AH4(imgDst[5][p]);} -void SpdStoreH(ASU2 p, AH4 value, AU1 mip){imgDst[mip][p] = AF4(value);} -void SpdIncreaseAtomicCounter(){InterlockedAdd(globalAtomic[0].counter, 1, spd_counter);} -AU1 SpdGetAtomicCounter(){return spd_counter;} -AH4 SpdLoadIntermediateH(AU1 x, AU1 y){ - return AH4( - spd_intermediateRG[x][y].x, - spd_intermediateRG[x][y].y, - spd_intermediateBA[x][y].x, - spd_intermediateBA[x][y].y);} -void SpdStoreIntermediateH(AU1 x, AU1 y, AH4 value){ - spd_intermediateRG[x][y] = value.xy; - spd_intermediateBA[x][y] = value.zw;} -AH4 SpdReduce4H(AH4 v0, AH4 v1, AH4 v2, AH4 v3){return (v0+v1+v2+v3)*AH1(0.25);} -#endif - -#define SPD_LINEAR_SAMPLER - -#include "ffx_spd.h" - -// Main function -//-------------------------------------------------------------------------------------- -//-------------------------------------------------------------------------------------- -[numthreads(256,1,1)] -void main(uint3 WorkGroupId : SV_GroupID, uint LocalThreadIndex : SV_GroupIndex) -{ -#ifndef A_HALF - SpdDownsample( - AU2(WorkGroupId.xy), - AU1(LocalThreadIndex), - AU1(mips), - AU1(numWorkGroups)); -#else - SpdDownsampleH( - AU2(WorkGroupId.xy), - AU1(LocalThreadIndex), - AU1(mips), - AU1(numWorkGroups)); -#endif -} \ No newline at end of file diff --git a/sample/src/VK/SPD_Sample.cpp b/sample/src/VK/SPD_Sample.cpp deleted file mode 100644 index 070043c..0000000 --- a/sample/src/VK/SPD_Sample.cpp +++ /dev/null @@ -1,412 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include "stdafx.h" - -#include "SPD_Sample.h" - -const bool VALIDATION_ENABLED = false; - -SPD_Sample::SPD_Sample(LPCSTR name) : FrameworkWindows(name) -{ - m_lastFrameTime = MillisecondsNow(); - m_time = 0; - m_bPlay = true; - - m_pGltfLoader = NULL; -} - -//-------------------------------------------------------------------------------------- -// -// OnCreate -// -//-------------------------------------------------------------------------------------- -void SPD_Sample::OnCreate(HWND hWnd) -{ - // Create Device - // - m_device.OnCreate("FFX_SPD_Sample", "Cauldron", VALIDATION_ENABLED, hWnd); - m_device.CreatePipelineCache(); - - //init the shader compiler - CreateShaderCache(); - - // Create Swapchain - // - uint32_t dwNumberOfBackBuffers = 2; - m_swapChain.OnCreate(&m_device, dwNumberOfBackBuffers, hWnd); - - // Create a instance of the renderer and initialize it, we need to do that for each GPU - // - m_Node = new SPD_Renderer(); - m_Node->OnCreate(&m_device, &m_swapChain); - - // init GUI (non gfx stuff) - // - ImGUI_Init((void *)hWnd); - - // Init Camera, looking at the origin - // - m_roll = 0.0f; - m_pitch = 0.0f; - m_distance = 3.5f; - - // init GUI state - m_state.toneMapper = 0; - m_state.skyDomeType = 0; - m_state.exposure = 1.0f; - m_state.iblFactor = 2.0f; - m_state.emmisiveFactor = 1.0f; - m_state.bDrawLightFrustum = false; - m_state.bDrawBoundingBoxes = false; - m_state.camera.LookAt(m_roll, m_pitch, m_distance, XMVectorSet(0, 0, 0, 0)); - - m_state.spotlightCount = 1; - - m_state.spotlight[0].intensity = 10.0f; - m_state.spotlight[0].color = XMVectorSet(1.0f, 1.0f, 1.0f, 0.0f); - m_state.spotlight[0].light.SetFov(XM_PI / 2.0f, 1024, 1024, 0.1f, 100.0f); - m_state.spotlight[0].light.LookAt(XM_PI / 2.0f, 0.58f, 3.5f, XMVectorSet(0, 0, 0, 0)); - - m_state.downsampler = Downsampler::SPD_CS; - m_state.spdVersion = SPD_Version::SPD_WaveOps; - m_state.spdPacked = SPD_Packed::SPD_Non_Packed; -} - -//-------------------------------------------------------------------------------------- -// -// OnDestroy -// -//-------------------------------------------------------------------------------------- -void SPD_Sample::OnDestroy() -{ - ImGUI_Shutdown(); - - m_device.GPUFlush(); - - // Fullscreen state should always be false before exiting the app. - m_swapChain.SetFullScreen(false); - - m_Node->UnloadScene(); - m_Node->OnDestroyWindowSizeDependentResources(); - m_Node->OnDestroy(); - - delete m_Node; - - m_swapChain.OnDestroyWindowSizeDependentResources(); - m_swapChain.OnDestroy(); - - //shut down the shader compiler - DestroyShaderCache(&m_device); - - if (m_pGltfLoader) - { - delete m_pGltfLoader; - m_pGltfLoader = NULL; - } - - m_device.DestroyPipelineCache(); - m_device.OnDestroy(); -} - -//-------------------------------------------------------------------------------------- -// -// OnEvent -// -//-------------------------------------------------------------------------------------- -bool SPD_Sample::OnEvent(MSG msg) -{ - if (ImGUI_WndProcHandler(msg.hwnd, msg.message, msg.wParam, msg.lParam)) - return true; - return true; -} - -//-------------------------------------------------------------------------------------- -// -// SetFullScreen -// -//-------------------------------------------------------------------------------------- -void SPD_Sample::SetFullScreen(bool fullscreen) -{ - m_device.GPUFlush(); - - m_swapChain.SetFullScreen(fullscreen); -} - -//-------------------------------------------------------------------------------------- -// -// OnResize -// -//-------------------------------------------------------------------------------------- -void SPD_Sample::OnResize(uint32_t width, uint32_t height) -{ - if (m_Width != width || m_Height != height) - { - // Flush GPU - // - m_device.GPUFlush(); - - // If resizing but no minimizing - // - if (m_Width > 0 && m_Height > 0) - { - if (m_Node != NULL) - { - m_Node->OnDestroyWindowSizeDependentResources(); - } - m_swapChain.OnDestroyWindowSizeDependentResources(); - } - - m_Width = width; - m_Height = height; - - // if resizing but not minimizing the recreate it with the new size - // - if (m_Width > 0 && m_Height > 0) - { - m_swapChain.OnCreateWindowSizeDependentResources(m_Width, m_Height, false); - if (m_Node != NULL) - { - m_Node->OnCreateWindowSizeDependentResources(&m_swapChain, m_Width, m_Height); - } - } - } - m_state.camera.SetFov(XM_PI / 4, m_Width, m_Height, 0.1f, 1000.0f); -} - -//-------------------------------------------------------------------------------------- -// -// OnRender, updates the state from the UI, animates, transforms and renders the scene -// -//-------------------------------------------------------------------------------------- -void SPD_Sample::OnRender() -{ - // Get timings - // - double timeNow = MillisecondsNow(); - float deltaTime = (m_timeStep == 0.0f) ? (float)(timeNow - m_lastFrameTime) : m_timeStep; - m_lastFrameTime = timeNow; - - // Build UI and set the scene state. Note that the rendering of the UI happens later. - // - ImGUI_UpdateIO(); - ImGui::NewFrame(); - - static int loadingStage = 0; - if (loadingStage >= 0) - { - // LoadScene needs to be called a number of times, the scene is not fully loaded until it returns -1 - // This is done so we can display a progress bar when the scene is loading - if (m_pGltfLoader == NULL) - { - m_pGltfLoader = new GLTFCommon(); - m_pGltfLoader->Load("..\\media\\DamagedHelmet\\glTF\\", "DamagedHelmet.gltf"); - loadingStage = 0; - } - loadingStage = m_Node->LoadScene(m_pGltfLoader, loadingStage); - } - else - { - ImGuiStyle& style = ImGui::GetStyle(); - style.FrameBorderSize = 1.0f; - - bool opened = true; - ImGui::Begin("Stats", &opened); - - if (ImGui::CollapsingHeader("Info", ImGuiTreeNodeFlags_DefaultOpen)) - { - ImGui::Text("Resolution : %ix%i", m_Width, m_Height); - } - - if (ImGui::CollapsingHeader("Downsampler", ImGuiTreeNodeFlags_DefaultOpen)) - { - // Downsample settings - const char* downsampleItemNames[] = - { - "PS", - "Multipass CS", - "SPD CS", - "SPD CS Linear Sampler" - }; - ImGui::Combo("Downsampler Options", (int*)&m_state.downsampler, downsampleItemNames, _countof(downsampleItemNames)); - - // SPD Version - if (m_device.GetPhysicalDeviceSubgroupProperties().supportedOperations - & VK_SUBGROUP_FEATURE_QUAD_BIT) - { - const char* dsVersionItemNames[] = - { - "No-WaveOps", - "WaveOps", - }; - ImGui::Combo("SPD Version", (int*)&m_state.spdVersion, dsVersionItemNames, _countof(dsVersionItemNames)); - } - else { - const char* dsVersionItemNames[] = - { - "No-WaveOps", - }; - ImGui::Combo("SPD Version", (int*)&m_state.spdVersion, dsVersionItemNames, _countof(dsVersionItemNames)); - } - - // Non-Packed or Packed Version - const char* dsPackedNames[] = - { - "Non-Packed", - "Packed", - }; - ImGui::Combo("SPD Non-Packed / Packed Version", (int*)&m_state.spdPacked, dsPackedNames, _countof(dsPackedNames)); - } - - if (ImGui::CollapsingHeader("Lighting", ImGuiTreeNodeFlags_DefaultOpen)) - { - ImGui::SliderFloat("exposure", &m_state.exposure, 0.0f, 2.0f); - ImGui::SliderFloat("emmisive", &m_state.emmisiveFactor, 1.0f, 1000.0f, NULL,1.0f); - ImGui::SliderFloat("iblFactor", &m_state.iblFactor, 0.0f, 2.0f); - } - - const char * tonemappers[] = { "Timothy", "DX11DSK", "Reinhard", "Uncharted2Tonemap", "ACES", "No tonemapper" }; - ImGui::Combo("tone mapper", &m_state.toneMapper, tonemappers, _countof(tonemappers)); - - const char * skyDomeType[] = { "Procedural Sky", "cubemap", "Simple clear" }; - ImGui::Combo("SkyDome", &m_state.skyDomeType, skyDomeType, _countof(skyDomeType)); - - const char * cameraControl[] = { "WASD", "Orbit" }; - static int cameraControlSelected = 1; - ImGui::Combo("Camera", &cameraControlSelected, cameraControl, _countof(cameraControl)); - - if (ImGui::CollapsingHeader("Profiler", ImGuiTreeNodeFlags_DefaultOpen)) - { - std::vector timeStamps = m_Node->GetTimingValues(); - if (timeStamps.size() > 0) - { - for (uint32_t i = 1; i < timeStamps.size(); i++) - { - float DeltaTime = ((float)(timeStamps[i].m_microseconds - timeStamps[i - 1].m_microseconds)); - ImGui::Text("%-17s: %7.1f us", timeStamps[i].m_label.c_str(), DeltaTime); - } - - //scrolling data and average computing - static float values[128]; - values[127] = (float)(timeStamps.back().m_microseconds - timeStamps.front().m_microseconds); - float average = values[0]; - for (uint32_t i = 0; i < 128 - 1; i++) { values[i] = values[i + 1]; average += values[i]; } - average /= 128; - - ImGui::Text("%-17s: %7.1f us", "TotalGPUTime", average); - ImGui::PlotLines("", values, 128, 0, "", 0.0f, 30000.0f, ImVec2(0, 80)); - } - } - -#ifdef USE_VMA - if (ImGui::Button("Save VMA json")) - { - char *pJson; - vmaBuildStatsString(m_device.GetAllocator(), &pJson, VK_TRUE); - - static char filename[256]; - time_t now = time(NULL); - tm buf; - localtime_s(&buf, &now); - strftime(filename, sizeof(filename), "VMA_%Y%m%d_%H%M%S.json", &buf); - std::ofstream ofs(filename, std::ofstream::out); - ofs << pJson; - ofs.close(); - vmaFreeStatsString(m_device.GetAllocator(), pJson); - } -#endif - - ImGui::End(); - - // If the mouse was not used by the GUI then it's for the camera - // - ImGuiIO& io = ImGui::GetIO(); - if (io.WantCaptureMouse == false) - { - if ((io.KeyCtrl == false) && (io.MouseDown[0] == true)) - { - m_roll -= io.MouseDelta.x / 100.f; - m_pitch += io.MouseDelta.y / 100.f; - } - - // Choose camera movement depending on setting - // - - if (cameraControlSelected == 0) - { - // WASD - // - m_state.camera.UpdateCameraWASD(m_roll, m_pitch, io.KeysDown, io.DeltaTime); - } - else if (cameraControlSelected == 1) - { - // Orbiting - // - m_distance -= (float)io.MouseWheel / 3.0f; - m_distance = std::max(m_distance, 0.1f); - - bool panning = (io.KeyCtrl == true) && (io.MouseDown[0] == true); - - m_state.camera.UpdateCameraPolar(m_roll, m_pitch, panning ? -io.MouseDelta.x / 100.0f : 0.0f, panning ? io.MouseDelta.y / 100.0f : 0.0f, m_distance); - } - } - } - - // Set animation time - // - if (m_bPlay) - { - m_time += (float)deltaTime / 1000.0f; - } - - // Animate and transform the scene - // - if (m_pGltfLoader) - { - m_pGltfLoader->SetAnimationTime(0, m_time); - m_pGltfLoader->TransformScene(0, XMMatrixIdentity()); - } - - m_state.time = m_time; - - // Do Render frame using AFR - // - m_Node->OnRender(&m_state, &m_swapChain); - - m_swapChain.Present(); -} - - -//-------------------------------------------------------------------------------------- -// -// WinMain -// -//-------------------------------------------------------------------------------------- -int WINAPI WinMain(HINSTANCE hInstance, - HINSTANCE hPrevInstance, - LPSTR lpCmdLine, - int nCmdShow) -{ - LPCSTR Name = "FFX SPD SampleVK v1.0"; - uint32_t Width = 1920; - uint32_t Height = 1080; - - // create new Vulkan sample - return RunFramework(hInstance, lpCmdLine, nCmdShow, Width, Height, new SPD_Sample(Name)); -} diff --git a/sample/src/VK/SPD_Versions.cpp b/sample/src/VK/SPD_Versions.cpp deleted file mode 100644 index b86eea9..0000000 --- a/sample/src/VK/SPD_Versions.cpp +++ /dev/null @@ -1,226 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include "stdafx.h" -#include "Base\DynamicBufferRing.h" -#include "Base\StaticBufferPool.h" -#include "Base\UploadHeap.h" -#include "Base\Texture.h" -#include "Base\Helper.h" -#include "SPD_Versions.h" - - -namespace CAULDRON_VK -{ - void SPD_Versions::OnCreate(Device* pDevice, ResourceViewHeaps *pResourceViewHeaps, VkFormat outFormat) - { - m_pDevice = pDevice; - - // check if subgroup operations are supported, otherwise we need to fallback to the LDS only version - if (pDevice->GetPhysicalDeviceSubgroupProperties().supportedOperations - & VK_SUBGROUP_FEATURE_QUAD_BIT) - { - m_spd_WaveOps_NonPacked.OnCreate(pDevice, pResourceViewHeaps, outFormat, false, false); - m_spd_WaveOps_Packed.OnCreate(pDevice, pResourceViewHeaps, outFormat, false, true); - - m_spd_WaveOps_NonPacked_Linear_Sampler.OnCreate(pDevice, pResourceViewHeaps, outFormat, false, false); - m_spd_WaveOps_Packed_Linear_Sampler.OnCreate(pDevice, pResourceViewHeaps, outFormat, false, true); - } - - // fallback path - m_spd_No_WaveOps_NonPacked.OnCreate(pDevice, pResourceViewHeaps, outFormat, true, false); - m_spd_No_WaveOps_Packed.OnCreate(pDevice, pResourceViewHeaps, outFormat, true, true); - - m_spd_No_WaveOps_NonPacked_Linear_Sampler.OnCreate(pDevice, pResourceViewHeaps, outFormat, true, false); - m_spd_No_WaveOps_Packed_Linear_Sampler.OnCreate(pDevice, pResourceViewHeaps, outFormat, true, true); - } - - void SPD_Versions::OnDestroy() - { - m_spd_No_WaveOps_NonPacked.OnDestroy(); - m_spd_No_WaveOps_Packed.OnDestroy(); - - m_spd_No_WaveOps_NonPacked_Linear_Sampler.OnDestroy(); - m_spd_No_WaveOps_Packed_Linear_Sampler.OnDestroy(); - - if (m_pDevice->GetPhysicalDeviceSubgroupProperties().supportedOperations - & VK_SUBGROUP_FEATURE_QUAD_BIT) - { - m_spd_WaveOps_NonPacked.OnDestroy(); - m_spd_WaveOps_Packed.OnDestroy(); - - m_spd_WaveOps_NonPacked_Linear_Sampler.OnDestroy(); - m_spd_WaveOps_Packed_Linear_Sampler.OnDestroy(); - } - } - - uint32_t SPD_Versions::GetMaxMipLevelCount(uint32_t Width, uint32_t Height) - { - int resolution = max(Width, Height); - return (static_cast(min(1.0f + floor(log2(resolution)), 12)) - 1); - } - - void SPD_Versions::OnCreateWindowSizeDependentResources(VkCommandBuffer cmd_buf, uint32_t Width, uint32_t Height, Texture *pInput) - { - if (m_pDevice->GetPhysicalDeviceSubgroupProperties().supportedOperations - & VK_SUBGROUP_FEATURE_QUAD_BIT) - { - m_spd_WaveOps_NonPacked.OnCreateWindowSizeDependentResources(cmd_buf, Width, Height, pInput, GetMaxMipLevelCount(Width, Height)); - m_spd_WaveOps_Packed.OnCreateWindowSizeDependentResources(cmd_buf, Width, Height, pInput, GetMaxMipLevelCount(Width, Height)); - - m_spd_WaveOps_NonPacked_Linear_Sampler.OnCreateWindowSizeDependentResources(cmd_buf, Width, Height, pInput, GetMaxMipLevelCount(Width, Height)); - m_spd_WaveOps_Packed_Linear_Sampler.OnCreateWindowSizeDependentResources(cmd_buf, Width, Height, pInput, GetMaxMipLevelCount(Width, Height)); - } - m_spd_No_WaveOps_NonPacked.OnCreateWindowSizeDependentResources(cmd_buf, Width, Height, pInput, GetMaxMipLevelCount(Width, Height)); - m_spd_No_WaveOps_Packed.OnCreateWindowSizeDependentResources(cmd_buf, Width, Height, pInput, GetMaxMipLevelCount(Width, Height)); - - m_spd_No_WaveOps_NonPacked_Linear_Sampler.OnCreateWindowSizeDependentResources(cmd_buf, Width, Height, pInput, GetMaxMipLevelCount(Width, Height)); - m_spd_No_WaveOps_Packed_Linear_Sampler.OnCreateWindowSizeDependentResources(cmd_buf, Width, Height, pInput, GetMaxMipLevelCount(Width, Height)); - } - - void SPD_Versions::OnDestroyWindowSizeDependentResources() - { - if (m_pDevice->GetPhysicalDeviceSubgroupProperties().supportedOperations - & VK_SUBGROUP_FEATURE_QUAD_BIT) - { - m_spd_WaveOps_NonPacked.OnDestroyWindowSizeDependentResources(); - m_spd_WaveOps_Packed.OnDestroyWindowSizeDependentResources(); - - m_spd_WaveOps_NonPacked_Linear_Sampler.OnDestroyWindowSizeDependentResources(); - m_spd_WaveOps_Packed_Linear_Sampler.OnDestroyWindowSizeDependentResources(); - } - m_spd_No_WaveOps_NonPacked.OnDestroyWindowSizeDependentResources(); - m_spd_No_WaveOps_Packed.OnDestroyWindowSizeDependentResources(); - - m_spd_No_WaveOps_NonPacked_Linear_Sampler.OnDestroyWindowSizeDependentResources(); - m_spd_No_WaveOps_Packed_Linear_Sampler.OnDestroyWindowSizeDependentResources(); - } - - void SPD_Versions::Dispatch(VkCommandBuffer cmd_buf, SPD_Version dsVersion, SPD_Packed dsPacked) - { - switch (dsVersion) - { - case SPD_Version::SPD_WaveOps: - switch (dsPacked) - { - case SPD_Packed::SPD_Non_Packed: - m_spd_WaveOps_NonPacked.Draw(cmd_buf); - break; - case SPD_Packed::SPD_Packed: - m_spd_WaveOps_Packed.Draw(cmd_buf); - break; - } - break; - case SPD_Version::SPD_No_WaveOps: - switch (dsPacked) - { - case SPD_Packed::SPD_Non_Packed: - m_spd_No_WaveOps_NonPacked.Draw(cmd_buf); - break; - case SPD_Packed::SPD_Packed: - m_spd_No_WaveOps_Packed.Draw(cmd_buf); - break; - } - } - } - - void SPD_Versions::DispatchLinearSamplerVersion(VkCommandBuffer cmd_buf, SPD_Version dsVersion, SPD_Packed dsPacked) - { - switch (dsVersion) - { - case SPD_Version::SPD_WaveOps: - switch (dsPacked) - { - case SPD_Packed::SPD_Non_Packed: - m_spd_WaveOps_NonPacked_Linear_Sampler.Draw(cmd_buf); - break; - case SPD_Packed::SPD_Packed: - m_spd_WaveOps_Packed_Linear_Sampler.Draw(cmd_buf); - break; - } - break; - case SPD_Version::SPD_No_WaveOps: - switch (dsPacked) - { - case SPD_Packed::SPD_Non_Packed: - m_spd_No_WaveOps_NonPacked_Linear_Sampler.Draw(cmd_buf); - break; - case SPD_Packed::SPD_Packed: - m_spd_No_WaveOps_Packed_Linear_Sampler.Draw(cmd_buf); - break; - } - } - } - - void SPD_Versions::Gui(SPD_Version dsVersion, SPD_Packed dsPacked) - { - switch (dsVersion) - { - case SPD_Version::SPD_WaveOps: - switch (dsPacked) - { - case SPD_Packed::SPD_Non_Packed: - m_spd_WaveOps_NonPacked.Gui(); - break; - case SPD_Packed::SPD_Packed: - m_spd_WaveOps_Packed.Gui(); - break; - } - break; - case SPD_Version::SPD_No_WaveOps: - switch (dsPacked) - { - case SPD_Packed::SPD_Non_Packed: - m_spd_No_WaveOps_NonPacked.Gui(); - break; - case SPD_Packed::SPD_Packed: - m_spd_No_WaveOps_Packed.Gui(); - break; - } - } - } - - void SPD_Versions::GuiLinearSamplerVersion(SPD_Version dsVersion, SPD_Packed dsPacked) - { - switch (dsVersion) - { - case SPD_Version::SPD_WaveOps: - switch (dsPacked) - { - case SPD_Packed::SPD_Non_Packed: - m_spd_WaveOps_NonPacked_Linear_Sampler.Gui(); - break; - case SPD_Packed::SPD_Packed: - m_spd_WaveOps_Packed_Linear_Sampler.Gui(); - break; - } - break; - case SPD_Version::SPD_No_WaveOps: - switch (dsPacked) - { - case SPD_Packed::SPD_Non_Packed: - m_spd_No_WaveOps_NonPacked_Linear_Sampler.Gui(); - break; - case SPD_Packed::SPD_Packed: - m_spd_No_WaveOps_Packed_Linear_Sampler.Gui(); - break; - } - } - } -} \ No newline at end of file diff --git a/sample/src/VK/SPD_Versions.h b/sample/src/VK/SPD_Versions.h deleted file mode 100644 index 08ac4ab..0000000 --- a/sample/src/VK/SPD_Versions.h +++ /dev/null @@ -1,74 +0,0 @@ -// SPDSample -// -// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -#pragma once - -#include "PostProc/PostProcCS.h" -#include "PostProc/PostProcPS.h" -#include "Base/ResourceViewHeaps.h" - -#include "SPD_CS.h" -#include "SPD_CS_Linear_Sampler.h" - -namespace CAULDRON_VK -{ - enum class SPD_Version - { - SPD_No_WaveOps, - SPD_WaveOps, - }; - - enum class SPD_Packed - { - SPD_Non_Packed, - SPD_Packed, - }; - - class SPD_Versions - { - public: - void OnCreate(Device* pDevice, ResourceViewHeaps *pResourceViewHeaps, VkFormat outFormat); - void OnDestroy(); - - void OnCreateWindowSizeDependentResources(VkCommandBuffer cmd_buf, uint32_t Width, uint32_t Height, Texture *pInput); - void OnDestroyWindowSizeDependentResources(); - - void Dispatch(VkCommandBuffer cmd_buf, SPD_Version spdVersion, SPD_Packed spdPacked); - void Gui(SPD_Version spdVersion, SPD_Packed spdPacked); - - void DispatchLinearSamplerVersion(VkCommandBuffer cmd_buf, SPD_Version spdVersion, SPD_Packed spdPacked); - void GuiLinearSamplerVersion(SPD_Version spdVersion, SPD_Packed spdPacked); - - private: - Device* m_pDevice; - - SPD_CS m_spd_WaveOps_NonPacked; - SPD_CS m_spd_No_WaveOps_NonPacked; - - SPD_CS m_spd_WaveOps_Packed; - SPD_CS m_spd_No_WaveOps_Packed; - - SPD_CS_Linear_Sampler m_spd_WaveOps_NonPacked_Linear_Sampler; - SPD_CS_Linear_Sampler m_spd_No_WaveOps_NonPacked_Linear_Sampler; - - SPD_CS_Linear_Sampler m_spd_WaveOps_Packed_Linear_Sampler; - SPD_CS_Linear_Sampler m_spd_No_WaveOps_Packed_Linear_Sampler; - - uint32_t GetMaxMipLevelCount(uint32_t Width, uint32_t Height); - }; -}