From b4ab266c9010aaff5404f6a508a2e592eb367d36 Mon Sep 17 00:00:00 2001 From: Nathan Hoobler Date: Tue, 22 Mar 2016 11:40:34 -0400 Subject: initial commit --- src/shaders/Apply_PS.hlsl | 176 ++++++++++++++ src/shaders/ComputeLightLUT_CS.hlsl | 192 ++++++++++++++++ src/shaders/ComputePhaseLookup_PS.hlsl | 150 ++++++++++++ src/shaders/Debug_PS.hlsl | 42 ++++ src/shaders/DownsampleDepth_PS.hlsl | 82 +++++++ src/shaders/Quad_VS.hlsl | 46 ++++ src/shaders/RenderVolume_DS.hlsl | 181 +++++++++++++++ src/shaders/RenderVolume_HS.hlsl | 182 +++++++++++++++ src/shaders/RenderVolume_PS.hlsl | 403 +++++++++++++++++++++++++++++++++ src/shaders/RenderVolume_VS.hlsl | 204 +++++++++++++++++ src/shaders/Resolve_PS.hlsl | 179 +++++++++++++++ src/shaders/ShaderCommon.h | 265 ++++++++++++++++++++++ src/shaders/TemporalFilter_PS.hlsl | 207 +++++++++++++++++ 13 files changed, 2309 insertions(+) create mode 100644 src/shaders/Apply_PS.hlsl create mode 100644 src/shaders/ComputeLightLUT_CS.hlsl create mode 100644 src/shaders/ComputePhaseLookup_PS.hlsl create mode 100644 src/shaders/Debug_PS.hlsl create mode 100644 src/shaders/DownsampleDepth_PS.hlsl create mode 100644 src/shaders/Quad_VS.hlsl create mode 100644 src/shaders/RenderVolume_DS.hlsl create mode 100644 src/shaders/RenderVolume_HS.hlsl create mode 100644 src/shaders/RenderVolume_PS.hlsl create mode 100644 src/shaders/RenderVolume_VS.hlsl create mode 100644 src/shaders/Resolve_PS.hlsl create mode 100644 src/shaders/ShaderCommon.h create mode 100644 src/shaders/TemporalFilter_PS.hlsl (limited to 'src/shaders') diff --git a/src/shaders/Apply_PS.hlsl b/src/shaders/Apply_PS.hlsl new file mode 100644 index 0000000..0e19e46 --- /dev/null +++ b/src/shaders/Apply_PS.hlsl @@ -0,0 +1,176 @@ +// This code contains NVIDIA Confidential Information and is disclosed +// under the Mutual Non-Disclosure Agreement. +// +// Notice +// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES +// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +// +// NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless +// expressly authorized by NVIDIA. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved. +// +// NVIDIA Corporation and its licensors retain all intellectual property and proprietary +// rights in and to this software and related documentation and any modifications thereto. +// Any use, reproduction, disclosure or distribution of this software and related +// documentation without an express license agreement from NVIDIA Corporation is +// strictly prohibited. +// + +/* +Define the shader permutations for code generation +%% MUX_BEGIN %% + +- SAMPLEMODE: + - SAMPLEMODE_SINGLE + - SAMPLEMODE_MSAA + +- UPSAMPLEMODE: + - UPSAMPLEMODE_POINT + - UPSAMPLEMODE_BILINEAR + - UPSAMPLEMODE_BILATERAL + +- FOGMODE: + - FOGMODE_NONE + - FOGMODE_NOSKY + - FOGMODE_FULL + +%% MUX_END %% +*/ + +#include "ShaderCommon.h" + +Texture2D tGodraysBuffer : register(t0); +#if (SAMPLEMODE == SAMPLEMODE_MSAA) + Texture2DMS tSceneDepth : register(t1); +#elif (SAMPLEMODE == SAMPLEMODE_SINGLE) + Texture2D tSceneDepth : register(t1); +#endif +Texture2D tGodraysDepth : register(t2); +Texture2D tPhaseLUT : register(t4); + +struct PS_APPLY_OUTPUT +{ + float4 inscatter : SV_TARGET0; + float4 transmission : SV_TARGET1; +}; + +float3 Tonemap(float3 s) +{ + return s / (float3(1,1,1) + s); +} + +float3 Tonemap_Inv(float3 s) +{ + return s / (float3(1,1,1) - s); +} + + +float CalcVariance(float x, float x_sqr) +{ + return abs(x_sqr - x*x); +} + +PS_APPLY_OUTPUT main(VS_QUAD_OUTPUT input +#if (SAMPLEMODE == SAMPLEMODE_MSAA) + , uint sampleID : SV_SAMPLEINDEX +#endif + ) +{ + PS_APPLY_OUTPUT output; + output.transmission = float4(1,1,1,1); + output.inscatter = float4(0,0,0,1); + + float2 texcoord = input.vTex * g_vViewportSize * g_vBufferSize_Inv; + +#if (SAMPLEMODE == SAMPLEMODE_MSAA) + float scene_depth = tSceneDepth.Load(int2(input.vTex*g_vOutputViewportSize), sampleID).x; +#elif (SAMPLEMODE == SAMPLEMODE_SINGLE) + float scene_depth = tSceneDepth.SampleLevel(sPoint, input.vTex * g_vViewportSize * g_vBufferSize_Inv, 0).x; +#endif + scene_depth = LinearizeDepth(scene_depth, g_fZNear, g_fZFar); + + + + // Quality of the upsampling interpolator + // 0: Point (no up-sample) + // 1: Bilinear + // 2: Bilateral + float3 inscatter_sample = float3(0,0,0); + if (UPSAMPLEMODE == UPSAMPLEMODE_POINT) + { + inscatter_sample = tGodraysBuffer.SampleLevel( sPoint, texcoord, 0).rgb; + } + else if (UPSAMPLEMODE == UPSAMPLEMODE_BILINEAR) + { + inscatter_sample = tGodraysBuffer.SampleLevel( sBilinear, texcoord, 0).rgb; + } + else if (UPSAMPLEMODE == UPSAMPLEMODE_BILATERAL) + { + const float2 NEIGHBOR_OFFSETS[] = { + float2(-1, -1), float2( 0, -1), float2( 1, -1), + float2(-1, 0), float2( 0, 0), float2( 1, 0), + float2(-1, 1), float2( 0, 1), float2( 1, 1) + }; + const float GAUSSIAN_WIDTH = 1.0f; + + float2 max_dimensions = floor(g_vViewportSize); + float2 base_tc = input.vTex * max_dimensions; + + float total_weight = 0; + [unroll] + for (int n=0; n<9; ++n) + { + float2 sample_tc = max( float2(0,0), min(max_dimensions, base_tc + NEIGHBOR_OFFSETS[n])); + + float weight = 0.0f; + float2 sample_location = floor(sample_tc) + float2(0.5f, 0.5f); + weight = GaussianApprox(sample_location - base_tc, GAUSSIAN_WIDTH); + + const float DEPTH_RANGE = 0.10f; + + float2 neighbor_depth = tGodraysDepth.Load(int3(sample_location.xy, 0)).rg; + float depth_diff = abs(scene_depth - neighbor_depth.r); + float neighbor_variance = CalcVariance(neighbor_depth.r, neighbor_depth.g); + float neighbor_stddev = sqrt(neighbor_variance); + float depth_weight = saturate(1 - depth_diff / DEPTH_RANGE); + depth_weight = depth_weight*depth_weight*(1-neighbor_stddev); + weight *= depth_weight; + + inscatter_sample += weight * Tonemap(tGodraysBuffer.Load(int3(sample_location.xy, 0)).rgb); + total_weight += weight; + } + + if (total_weight > 0.0f) + { + inscatter_sample = Tonemap_Inv(inscatter_sample / total_weight); + } + else + { + inscatter_sample = tGodraysBuffer.SampleLevel(sBilinear, texcoord, 0).rgb; + } + } + + output.inscatter.rgb = inscatter_sample.rgb; + if (FOGMODE != FOGMODE_NONE) + { + if ((FOGMODE != FOGMODE_NOSKY) || (scene_depth < 1.f)) + { + float scene_distance = g_fZFar * scene_depth; + float3 sigma_ext = g_vSigmaExtinction; + output.inscatter.rgb += g_fMultiScattering * g_vFogLight * g_vScatterPower * (1-exp(-sigma_ext*scene_distance)) / sigma_ext; + output.transmission.rgb = exp(-sigma_ext*scene_distance); + } + } + + return output; +} \ No newline at end of file diff --git a/src/shaders/ComputeLightLUT_CS.hlsl b/src/shaders/ComputeLightLUT_CS.hlsl new file mode 100644 index 0000000..fca70dd --- /dev/null +++ b/src/shaders/ComputeLightLUT_CS.hlsl @@ -0,0 +1,192 @@ +// This code contains NVIDIA Confidential Information and is disclosed +// under the Mutual Non-Disclosure Agreement. +// +// Notice +// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES +// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +// +// NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless +// expressly authorized by NVIDIA. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved. +// +// NVIDIA Corporation and its licensors retain all intellectual property and proprietary +// rights in and to this software and related documentation and any modifications thereto. +// Any use, reproduction, disclosure or distribution of this software and related +// documentation without an express license agreement from NVIDIA Corporation is +// strictly prohibited. +// + +/* +Define the shader permutations for code generation +%% MUX_BEGIN %% + +- LIGHTMODE: + - LIGHTMODE_OMNI + - LIGHTMODE_SPOTLIGHT + +- ATTENUATIONMODE: + - ATTENUATIONMODE_NONE + - ATTENUATIONMODE_POLYNOMIAL + - ATTENUATIONMODE_INV_POLYNOMIAL + +- COMPUTEPASS: + - COMPUTEPASS_CALCULATE + - COMPUTEPASS_SUM + +%% MUX_END %% +*/ + +#include "ShaderCommon.h" + +float4 PackLut(float3 v, float s) +{ + return float4(v/s, s); +} + +float3 UnpackLut(float4 v) +{ + return v.rgb*v.a; +} + +Texture2D tPhaseLUT : register(t4); +RWTexture2D rwLightLUT_P : register(u0); +RWTexture2D rwLightLUT_S1 : register(u1); +RWTexture2D rwLightLUT_S2 : register(u2); + +// These need to match the values in context_common.h +static const uint LIGHT_LUT_DEPTH_RESOLUTION = 128; +static const uint LIGHT_LUT_WDOTV_RESOLUTION = 512; + +#if (COMPUTEPASS == COMPUTEPASS_CALCULATE) + +static const uint2 BLOCK_SIZE = uint2(32, 8); +groupshared float3 sAccum_P[BLOCK_SIZE.x*BLOCK_SIZE.y]; + +#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT) +groupshared float3 sAccum_S1[BLOCK_SIZE.x*BLOCK_SIZE.y]; +groupshared float3 sAccum_S2[BLOCK_SIZE.x*BLOCK_SIZE.y]; +#endif + +[numthreads( BLOCK_SIZE.x, BLOCK_SIZE.y, 1 )] +void main(uint3 gthreadID : SV_GroupThreadID, uint2 dispatchID : SV_DispatchThreadID, uint2 groupID : SV_GroupID) +{ + uint idx = gthreadID.y*BLOCK_SIZE.x + gthreadID.x; + float2 coord = float2(dispatchID) / float2(LIGHT_LUT_DEPTH_RESOLUTION-1, LIGHT_LUT_WDOTV_RESOLUTION-1); + + float angle = coord.y * PI; + float cos_WV = -cos(angle); + + float3 vW = g_vEyePosition - g_vLightPos; + float Wsqr = dot(vW, vW); + float W_length = sqrt(Wsqr); + float t0 = max(0.0f, W_length-g_fLightZFar); + float t_range = g_fLightZFar + W_length - t0; + float t = t0 + coord.x*t_range; + + float WdotV = cos_WV*W_length; + float Dsqr = max(Wsqr+2*WdotV*t+t*t, 0.0f); + float D = sqrt(Dsqr); + float cos_phi = (t>0 && D>0) ? (t*t + Dsqr - Wsqr) / (2 * t*D) : cos_WV; + float3 extinction = exp(-g_vSigmaExtinction*(D+t)); + float3 phase_factor = GetPhaseFactor(tPhaseLUT, -cos_phi); + float attenuation = AttenuationFunc(D); + float3 inscatter = phase_factor*attenuation*extinction; + + // Scale by dT because we are doing quadrature + inscatter *= t_range / float(LIGHT_LUT_DEPTH_RESOLUTION); + + inscatter = inscatter / g_vScatterPower; + sAccum_P[idx] = inscatter; +#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT) + sAccum_S1[idx] = (D==0) ? 0.0f : inscatter/D; + sAccum_S2[idx] = t*sAccum_S1[idx]; +#endif + + + [unroll] + for (uint d=1; d<32; d = d<<1) + { + if (gthreadID.x >= d) + { + sAccum_P[idx] += sAccum_P[idx - d]; +#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT) + sAccum_S1[idx] += sAccum_S1[idx - d]; + sAccum_S2[idx] += sAccum_S2[idx - d]; +#endif + } + } + + static const float LUT_SCALE = 32.0f / 32768.0f; + rwLightLUT_P[dispatchID] = PackLut(sAccum_P[idx], LUT_SCALE); +#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT) + float max_t = 2*(t0 + t_range); + rwLightLUT_S1[dispatchID] = PackLut(sAccum_S1[idx], LUT_SCALE); + rwLightLUT_S2[dispatchID] = PackLut(sAccum_S2[idx], LUT_SCALE*max_t); +#endif +} + +#elif (COMPUTEPASS == COMPUTEPASS_SUM) + +static const uint2 BLOCK_SIZE = uint2(32, 4); + +Texture2D tLightLUT_P : register(t5); +Texture2D tLightLUT_S1 : register(t6); +Texture2D tLightLUT_S2 : register(t7); + +groupshared float3 sOffset[BLOCK_SIZE.y]; + +[numthreads( BLOCK_SIZE.x, BLOCK_SIZE.y, 1 )] +void main(uint3 gthreadID : SV_GroupThreadID, uint3 dispatchID : SV_DispatchThreadID, uint2 groupID : SV_GroupID) +{ + uint t_offset = 0; + + if (gthreadID.x == 0) + { + sOffset[gthreadID.y] = float3(0, 0, 0); + } + + [unroll] + for (uint t = 0; t < LIGHT_LUT_DEPTH_RESOLUTION; t += BLOCK_SIZE.x) + { + uint2 tc = dispatchID.xy + uint2(t, 0); + float4 s = float4(0,0,0,0); +#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT) + if (dispatchID.z == 2) + s = tLightLUT_S2[tc]; + else if (dispatchID.z == 1) + s = tLightLUT_S1[tc]; + else + s = tLightLUT_P[tc]; +#else + s = tLightLUT_P[tc]; +#endif + float3 v = UnpackLut(s) + sOffset[gthreadID.y]; + if (gthreadID.x == (BLOCK_SIZE.x-1)) + { + sOffset[gthreadID.y] = v; + } + s.a *= LIGHT_LUT_DEPTH_RESOLUTION/32; +#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT) + if (dispatchID.z == 2) + rwLightLUT_S2[tc] = PackLut(v, s.a); + else if (dispatchID.z == 1) + rwLightLUT_S1[tc] = PackLut(v, s.a); + else + rwLightLUT_P[tc] = PackLut(v, s.a); +#else + rwLightLUT_P[tc] = PackLut(v, s.a); +#endif + } +} + +#endif \ No newline at end of file diff --git a/src/shaders/ComputePhaseLookup_PS.hlsl b/src/shaders/ComputePhaseLookup_PS.hlsl new file mode 100644 index 0000000..7487c40 --- /dev/null +++ b/src/shaders/ComputePhaseLookup_PS.hlsl @@ -0,0 +1,150 @@ +// This code contains NVIDIA Confidential Information and is disclosed +// under the Mutual Non-Disclosure Agreement. +// +// Notice +// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES +// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +// +// NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless +// expressly authorized by NVIDIA. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved. +// +// NVIDIA Corporation and its licensors retain all intellectual property and proprietary +// rights in and to this software and related documentation and any modifications thereto. +// Any use, reproduction, disclosure or distribution of this software and related +// documentation without an express license agreement from NVIDIA Corporation is +// strictly prohibited. +// + +/* +Define the shader permutations for code generation +%% MUX_BEGIN %% + +%% MUX_END %% +*/ + +#include "ShaderCommon.h" + +// using the phase functions directly isn't correct, because they are supposed to be +// integrated over the subtended solid angle. This falls apart as sin(theta) +// approaches 0 (ie. cos(theta) aproaches +1 or -1). +// We apply a sliding scale to the functions to compensate for this somewhat. + +#define NORMALIZE_PHASE_FUNCTIONS 1 + +float ScatterPhase_Isotropic() +{ + return 1.f / (4.f * PI); +} + +float ScatterPhase_Rayleigh(float cosa) +{ + float cos_term = cosa*cosa; // ^2 + float phase_term = (3.f/(16.f*PI)) * (1.f + cos_term); +#if NORMALIZE_PHASE_FUNCTIONS + cos_term *= cos_term; // ^4 + return phase_term*(1-cos_term/8.f); +#else + return phase_term; +#endif +} + +float ScatterPhase_HenyeyGreenstein(float cosa, float g) +{ +#if NORMALIZE_PHASE_FUNCTIONS + // "normalized" Henyey-Greenstein + float g_sqr = g*g; + float num = (1 - abs(g)); + float denom = sqrt( max(1-2*g*cosa+g_sqr, 0) ); + float frac = num/denom; + float scale = g_sqr + (1 - g_sqr) / (4*PI); + return scale * (frac*frac*frac); +#else + // Classic Henyey-Greenstein + float k1 = (1.f-g*g); + float k2 = (1.f + g*g - 2.f*g*cosa); + return (1.f / (4.f*PI)) * k1 / pow(abs(k2), 1.5f); +#endif +} + +float ScatterPhase_MieHazy(float cosa) +{ + float cos_term = 0.5f*(1+cosa); + float cos_term_2 = cos_term*cos_term; // ^2 + float cos_term_4 = cos_term_2*cos_term_2; // ^4 + float cos_term_8 = cos_term_4*cos_term_4; // ^8 + float phase_term = (1.f/(4.f*PI))*(0.5f+(9.f/2.f)*cos_term_8); +#if NORMALIZE_PHASE_FUNCTIONS + return phase_term * (1-cos_term_8/2.0f); +#else + return phase_term; +#endif +} + +float ScatterPhase_MieMurky(float cosa) +{ + float cos_term = 0.5f*(1+cosa); + float cos_term_2 = cos_term*cos_term; // ^2 + float cos_term_4 = cos_term_2*cos_term_2; // ^4 + float cos_term_8 = cos_term_4*cos_term_4; // ^8 + float cos_term_16 = cos_term_8*cos_term_8; // ^16 + float cos_term_32 = cos_term_16*cos_term_16; // ^32 + float phase_term = (1.f/(4.f*PI))*(0.5f+(33.f/2.f)*cos_term_32); +#if NORMALIZE_PHASE_FUNCTIONS + return phase_term * (1-cos_term_32/2.0f); +#else + return phase_term; +#endif +} + +float4 main(VS_QUAD_OUTPUT input) : SV_TARGET +{ + float cos_theta = -cos(PI*input.vTex.y); + float3 phase_factor = float3(0,0,0); + float3 total_scatter = float3(0,0,0); + + // These must match the PhaseFunctionType enum in NvVolumetricLighting.h + static const uint PHASEFUNC_ISOTROPIC = 0; + static const uint PHASEFUNC_RAYLEIGH = 1; + static const uint PHASEFUNC_HG = 2; + static const uint PHASEFUNC_MIEHAZY = 3; + static const uint PHASEFUNC_MIEMURKY = 4; + + for (uint i=0; i tDepthMap : register(t0); +#elif (SAMPLEMODE == SAMPLEMODE_MSAA) +Texture2DMS tDepthMap : register(t0); +#endif + +uint Unused(uint input) +{ + return input; +} + +float main( + VS_QUAD_OUTPUT input + , uint sampleID : SV_SAMPLEINDEX + ) : SV_DEPTH +{ + float2 jitter = float2(0.0f, 0.0f); + uint2 pixelIdx = uint2(input.vPos.xy); + if ( (pixelIdx.x+pixelIdx.y)%2 ) + { + jitter.xy = g_vJitterOffset.xy; + } + else + { + jitter.xy = g_vJitterOffset.yx; + } + +#if defined(__PSSL__) + Unused(sampleID);//Fix a compiler warning with pssl. + float2 tc = (floor(input.vTex.xy*g_vOutputViewportSize) + GetViVjLinearSample() + jitter)*g_vOutputSize_Inv; +#else + float2 tc = (EvaluateAttributeAtSample(input.vTex.xy, sampleID)*g_vOutputViewportSize + jitter)*g_vOutputSize_Inv; +#endif + +#if (SAMPLEMODE == SAMPLEMODE_SINGLE) + return tDepthMap.SampleLevel(sPoint, tc, 0).x; +#elif (SAMPLEMODE == SAMPLEMODE_MSAA) + int2 load_tc = int2(tc*g_vOutputSize); + return tDepthMap.Load(load_tc, 0).x; +#endif +} diff --git a/src/shaders/Quad_VS.hlsl b/src/shaders/Quad_VS.hlsl new file mode 100644 index 0000000..bbb0ae2 --- /dev/null +++ b/src/shaders/Quad_VS.hlsl @@ -0,0 +1,46 @@ +// This code contains NVIDIA Confidential Information and is disclosed +// under the Mutual Non-Disclosure Agreement. +// +// Notice +// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES +// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +// +// NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless +// expressly authorized by NVIDIA. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved. +// +// NVIDIA Corporation and its licensors retain all intellectual property and proprietary +// rights in and to this software and related documentation and any modifications thereto. +// Any use, reproduction, disclosure or distribution of this software and related +// documentation without an express license agreement from NVIDIA Corporation is +// strictly prohibited. +// + +/* +Define the shader permutations for code generation +%% MUX_BEGIN %% + +%% MUX_END %% +*/ + +#include "ShaderCommon.h" + +VS_QUAD_OUTPUT main(uint id : SV_VERTEXID) +{ + VS_QUAD_OUTPUT output; + output.vTex = float2((id << 1) & 2, id & 2); + output.vPos = float4(output.vTex * float2(2,-2) + float2(-1,1), 1, 1); + output.vWorldPos = mul( g_mViewProjInv, output.vPos ); + output.vWorldPos *= 1.0f / output.vWorldPos.w; + return output; +} diff --git a/src/shaders/RenderVolume_DS.hlsl b/src/shaders/RenderVolume_DS.hlsl new file mode 100644 index 0000000..880e9ed --- /dev/null +++ b/src/shaders/RenderVolume_DS.hlsl @@ -0,0 +1,181 @@ +// This code contains NVIDIA Confidential Information and is disclosed +// under the Mutual Non-Disclosure Agreement. +// +// Notice +// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES +// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +// +// NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless +// expressly authorized by NVIDIA. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved. +// +// NVIDIA Corporation and its licensors retain all intellectual property and proprietary +// rights in and to this software and related documentation and any modifications thereto. +// Any use, reproduction, disclosure or distribution of this software and related +// documentation without an express license agreement from NVIDIA Corporation is +// strictly prohibited. +// + +/* +Define the shader permutations for code generation +%% MUX_BEGIN %% + +- SHADOWMAPTYPE: + - SHADOWMAPTYPE_ATLAS + - SHADOWMAPTYPE_ARRAY + +- CASCADECOUNT: + - CASCADECOUNT_1: 1 + - CASCADECOUNT_2: 2 + - CASCADECOUNT_3: 3 + - CASCADECOUNT_4: 4 + +- VOLUMETYPE: + - VOLUMETYPE_FRUSTUM + - VOLUMETYPE_PARABOLOID + +%% MUX_END %% +*/ + +#include "ShaderCommon.h" + +#define COARSE_CASCADE (CASCADECOUNT-1) + +#if (SHADOWMAPTYPE == SHADOWMAPTYPE_ATLAS) +Texture2D tShadowMap : register(t1); +#elif (SHADOWMAPTYPE == SHADOWMAPTYPE_ARRAY) +Texture2DArray tShadowMap : register(t1); +#endif + +float SampleShadowMap(float2 tex_coord, int cascade) +{ + float depth_value = 1.0f; + float2 lookup_coord = g_vElementOffsetAndScale[cascade].zw * tex_coord + g_vElementOffsetAndScale[cascade].xy; +#if (SHADOWMAPTYPE == SHADOWMAPTYPE_ATLAS) + depth_value = tShadowMap.SampleLevel( sBilinear, lookup_coord, 0).x; +#elif (SHADOWMAPTYPE == SHADOWMAPTYPE_ARRAY) + depth_value = tShadowMap.SampleLevel( sBilinear, float3( lookup_coord, (float)g_uElementIndex[cascade] ), 0).x; +#endif + return depth_value; +} + +float3 ParaboloidProject(float3 P, float zNear, float zFar) +{ + float3 outP; + float lenP = length(P.xyz); + outP.xyz = P.xyz/lenP; + outP.x = outP.x / (outP.z + 1); + outP.y = outP.y / (outP.z + 1); + outP.z = (lenP - zNear) / (zFar - zNear); + return outP; +} + +float3 ParaboloidUnproject(float3 P, float zNear, float zFar) +{ + // Use a quadratic to find the Z component + // then reverse the projection to find the unit vector, and scale + float L = P.z*(zFar-zNear) + zNear; + + float qa = P.x*P.x + P.y*P.y + 1; + float qb = 2*(P.x*P.x + P.y*P.y); + float qc = P.x*P.x + P.y*P.y - 1; + float z = (-qb + sqrt(qb*qb - 4*qa*qc)) / (2*qa); + + float3 outP; + outP.x = P.x * (z + 1); + outP.y = P.y * (z + 1); + outP.z = z; + return outP*L; +} + +HS_POLYGONAL_CONSTANT_DATA_OUTPUT Unused(HS_POLYGONAL_CONSTANT_DATA_OUTPUT input) +{ + return input; +} + +[domain("quad")] +PS_POLYGONAL_INPUT main( HS_POLYGONAL_CONSTANT_DATA_OUTPUT input, float2 uv : SV_DOMAINLOCATION, const OutputPatch Patch ) +{ + Unused(input);//Fix a compiler warning with pssl. + + PS_POLYGONAL_INPUT output = (PS_POLYGONAL_INPUT)0; + + float3 vClipIn1 = lerp(Patch[0].vClipPos.xyz, Patch[1].vClipPos.xyz, uv.x); + float3 vClipIn2 = lerp(Patch[3].vClipPos.xyz, Patch[2].vClipPos.xyz, uv.x); + float3 vClipIn = lerp(vClipIn1, vClipIn2, uv.y); + + float4 vPos1 = lerp(Patch[0].vWorldPos, Patch[1].vWorldPos, uv.x); + float4 vPos2 = lerp(Patch[3].vWorldPos, Patch[2].vWorldPos, uv.x); + float4 vWorldPos = lerp(vPos1, vPos2, uv.y); + + if (VOLUMETYPE == VOLUMETYPE_FRUSTUM) + { + if (all(abs(vClipIn.xy) < EDGE_FACTOR)) + { + int iCascade = -1; + float4 vClipPos = float4(0,0,0,1); + + [unroll] + for (int i = COARSE_CASCADE;i >= 0; --i) + { + // Try to refetch from finer cascade + float4 vClipPosCascade = mul( g_mLightProj[i], vWorldPos ); + vClipPosCascade *= 1.f / vClipPosCascade.w; + if (all(abs(vClipPosCascade.xy) < 1.0f)) + { + + float2 vTex = float2(0.5*vClipPosCascade.x + 0.5, -0.5*vClipPosCascade.y + 0.5); + float depthSample = SampleShadowMap(vTex, i); + if (depthSample < 1.0f) + { + + vClipPos.xy = vClipPosCascade.xy; + vClipPos.z = depthSample; + iCascade = i; + } + } + } + + if (iCascade >= 0) + { + vWorldPos = mul( g_mLightProjInv[iCascade], float4(vClipPos.xyz, 1) ); + vWorldPos *= 1.0f / vWorldPos.w; + vWorldPos.xyz = g_vEyePosition + (1.0f-g_fGodrayBias)*(vWorldPos.xyz-g_vEyePosition); + } + } + else + { + vWorldPos = mul(g_mLightToWorld, float4(vClipIn.xy, 1, 1)); + vWorldPos *= 1.0f / vWorldPos.w; + } + } + else if (VOLUMETYPE == VOLUMETYPE_PARABOLOID) + { + vClipIn.xyz = normalize(vClipIn.xyz); + float4 shadowPos = mul(g_mLightProj[0], vWorldPos); + shadowPos.xyz = shadowPos.xyz/shadowPos.w; + uint hemisphereID = (shadowPos.z > 0) ? 0 : 1; + shadowPos.z = abs(shadowPos.z); + shadowPos.xyz = ParaboloidProject(shadowPos.xyz, g_fLightZNear, g_fLightZFar); + float2 shadowTC = float2(0.5f, -0.5f)*shadowPos.xy + 0.5f; + float depthSample = SampleShadowMap(shadowTC, hemisphereID); + float sceneDepth = depthSample*(g_fLightZFar-g_fLightZNear)+g_fLightZNear; + vWorldPos = mul( g_mLightProjInv[0], float4(vClipIn.xyz * sceneDepth, 1)); + vWorldPos *= 1.0f / vWorldPos.w; + } + + // Transform world position with viewprojection matrix + output.vWorldPos = vWorldPos; + output.vPos = mul( g_mViewProj, output.vWorldPos ); + return output; +} diff --git a/src/shaders/RenderVolume_HS.hlsl b/src/shaders/RenderVolume_HS.hlsl new file mode 100644 index 0000000..1689e15 --- /dev/null +++ b/src/shaders/RenderVolume_HS.hlsl @@ -0,0 +1,182 @@ +// This code contains NVIDIA Confidential Information and is disclosed +// under the Mutual Non-Disclosure Agreement. +// +// Notice +// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES +// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +// +// NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless +// expressly authorized by NVIDIA. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved. +// +// NVIDIA Corporation and its licensors retain all intellectual property and proprietary +// rights in and to this software and related documentation and any modifications thereto. +// Any use, reproduction, disclosure or distribution of this software and related +// documentation without an express license agreement from NVIDIA Corporation is +// strictly prohibited. +// + +/* +Define the shader permutations for code generation +%% MUX_BEGIN %% + +- SHADOWMAPTYPE: + - SHADOWMAPTYPE_ATLAS + - SHADOWMAPTYPE_ARRAY + +- CASCADECOUNT: + - CASCADECOUNT_1: 1 + - CASCADECOUNT_2: 2 + - CASCADECOUNT_3: 3 + - CASCADECOUNT_4: 4 + +- VOLUMETYPE: + - VOLUMETYPE_FRUSTUM + - VOLUMETYPE_PARABOLOID + +- MAXTESSFACTOR: + - MAXTESSFACTOR_LOW: 16.0f + - MAXTESSFACTOR_MEDIUM: 32.0f + - MAXTESSFACTOR_HIGH: 64.0f +%% MUX_END %% +*/ + +#define COARSE_CASCADE (CASCADECOUNT-1) + +#include "ShaderCommon.h" + +float3 NearestPos(float3 vStartPos, float3 vEndPos) +{ + float3 vPos = (g_vEyePosition - vStartPos); + float3 vLine = (vEndPos - vStartPos); + float lineLength = length(vLine); + float t = max(0, min(lineLength, dot(vPos, vLine)/lineLength)); + return vStartPos + (t/lineLength)*vLine; +} + +float CalcTessFactor(float3 vStartPos, float3 vEndPos) +{ + float section_size = length(vEndPos - vStartPos); + float3 vWorldPos = 0.5f*(vStartPos+vEndPos); + float3 vEyeVec = (vWorldPos.xyz - g_vEyePosition); + float4 clip_pos = mul( g_mProj, float4(0, 0, length(vEyeVec), 1) ); + float projected_size = abs(section_size * g_mProj._m11 / clip_pos.w); + float desired_splits = (projected_size*g_vOutputViewportSize.y)/(g_fTargetRaySize); + return min(MAXTESSFACTOR, max(1, desired_splits)); +} + +bool IntersectsFrustum(float4 vPos1, float4 vPos2) +{ + return !(vPos1.x > 1.0 && vPos2.x > 1.0 || vPos1.x < -1.0 && vPos2.x < -1.0) + || !(vPos1.y > 1.0 && vPos2.y > 1.0 || vPos1.y < -1.0 && vPos2.y < -1.0) + || !(vPos1.z < 0.0 && vPos2.z < 0.0); +} + +HS_POLYGONAL_CONSTANT_DATA_OUTPUT HS_POLYGONAL_CONSTANT_FUNC( /*uint PatchID : SV_PRIMITIVEID,*/ const OutputPatch opPatch) +{ + HS_POLYGONAL_CONSTANT_DATA_OUTPUT output = (HS_POLYGONAL_CONSTANT_DATA_OUTPUT)0; + + bool bIsVisible = false; +#if 1 + //Frustum cull + [unroll] + for (int j=0; j<4; ++j) + { + float4 vScreenClip = mul(g_mViewProj, opPatch[j].vWorldPos); + vScreenClip *= 1.0f / vScreenClip.w; + float4 vOriginPos = float4(0,0,0,1); + if (VOLUMETYPE == VOLUMETYPE_FRUSTUM) + { + vOriginPos = mul(g_mLightToWorld, float4(opPatch[j].vClipPos.xy, 0, 1)); + } + else if (VOLUMETYPE == VOLUMETYPE_PARABOLOID) + { + vOriginPos = float4(g_vLightPos, 1); + } + float4 vScreenClipOrigin = mul(g_mViewProj, vOriginPos); + vScreenClipOrigin *= 1.0f / vScreenClipOrigin.w; + bIsVisible = bIsVisible || IntersectsFrustum(vScreenClip, vScreenClipOrigin); + } +#else + bIsVisible = true; +#endif + + if (bIsVisible) + { + float3 nearest_pos[4]; + for (int j=0; j < 4; ++j) + { + float3 start_pos; + if (VOLUMETYPE == VOLUMETYPE_FRUSTUM) + { + float4 p = mul(g_mLightToWorld, float4(opPatch[j].vClipPos.xy, 0, 1)); + start_pos = p.xyz / p.w; + } + else if (VOLUMETYPE == VOLUMETYPE_PARABOLOID) + start_pos = g_vLightPos; + else + start_pos = float3(0, 0, 0); + nearest_pos[j] = NearestPos(start_pos, opPatch[j].vWorldPos.xyz); + } + + float tess_factor[4]; + [unroll] + for (int k=0; k<4; ++k) + { + float tess_near = CalcTessFactor(nearest_pos[(k+3)%4], nearest_pos[k]); + float tess_far = CalcTessFactor(opPatch[(k+3)%4].vWorldPos.xyz, opPatch[k].vWorldPos.xyz); + tess_factor[k] = max(tess_near, tess_far); + if (VOLUMETYPE == VOLUMETYPE_FRUSTUM) + { + bool bIsEdge = !(all((abs(opPatch[(k + 3) % 4].vClipPos.xy) < EDGE_FACTOR) || (abs(opPatch[k].vClipPos.xy) < EDGE_FACTOR))); + output.fEdges[k] = (bIsEdge) ? 1.0f : tess_factor[k]; + } + else if (VOLUMETYPE == VOLUMETYPE_PARABOLOID) + { + output.fEdges[k] = tess_factor[k]; + } + else + { + output.fEdges[k] = 1; + } + + } + output.fInside[0] = max(tess_factor[1], tess_factor[3]); + output.fInside[1] = max(tess_factor[0], tess_factor[2]); + } + else + { + output.fEdges[0] = 0; + output.fEdges[1] = 0; + output.fEdges[2] = 0; + output.fEdges[3] = 0; + output.fInside[0] = 0; + output.fInside[1] = 0; + } + + return output; +} + +[domain("quad")] +[partitioning("integer")] +[outputtopology("triangle_ccw")] +[outputcontrolpoints(4)] +[patchconstantfunc("HS_POLYGONAL_CONSTANT_FUNC")] +[maxtessfactor(MAXTESSFACTOR)] +HS_POLYGONAL_CONTROL_POINT_OUTPUT main( InputPatch ipPatch, uint uCPID : SV_OUTPUTCONTROLPOINTID ) +{ + HS_POLYGONAL_CONTROL_POINT_OUTPUT output = (HS_POLYGONAL_CONTROL_POINT_OUTPUT)0; + output.vWorldPos = ipPatch[uCPID].vWorldPos; + output.vClipPos = ipPatch[uCPID].vClipPos; + return output; +} diff --git a/src/shaders/RenderVolume_PS.hlsl b/src/shaders/RenderVolume_PS.hlsl new file mode 100644 index 0000000..f2724c2 --- /dev/null +++ b/src/shaders/RenderVolume_PS.hlsl @@ -0,0 +1,403 @@ +// This code contains NVIDIA Confidential Information and is disclosed +// under the Mutual Non-Disclosure Agreement. +// +// Notice +// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES +// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +// +// NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless +// expressly authorized by NVIDIA. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved. +// +// NVIDIA Corporation and its licensors retain all intellectual property and proprietary +// rights in and to this software and related documentation and any modifications thereto. +// Any use, reproduction, disclosure or distribution of this software and related +// documentation without an express license agreement from NVIDIA Corporation is +// strictly prohibited. +// + +/* +%% MUX_BEGIN %% +# Define the shader permutations for code generation + +# Are we operating on single sample or MSAA buffer +- SAMPLEMODE: + - SAMPLEMODE_SINGLE + - SAMPLEMODE_MSAA + +# What type of light are we rendering +- LIGHTMODE: + - LIGHTMODE_DIRECTIONAL + - LIGHTMODE_SPOTLIGHT + - LIGHTMODE_OMNI + +# What sort of pass are we rendering +- PASSMODE: + - PASSMODE_GEOMETRY + - PASSMODE_SKY + - PASSMODE_FINAL + +# What is our distance attenuation function +- ATTENUATIONMODE: + - ATTENUATIONMODE_NONE + - ATTENUATIONMODE_POLYNOMIAL + - ATTENUATIONMODE_INV_POLYNOMIAL + +# What is our spotlight angular falloff mode +- FALLOFFMODE: + - FALLOFFMODE_NONE + - FALLOFFMODE_FIXED + - FALLOFFMODE_CUSTOM + +%% MUX_END %% +*/ + +#include "ShaderCommon.h" + +#if (PASSMODE == PASSMODE_FINAL) +# if (SAMPLEMODE == SAMPLEMODE_SINGLE) + + Texture2D tSceneDepth : register(t2); + float LoadSceneDepth(uint2 pos, uint s) + { + return tSceneDepth.Load(int3(pos.xy, 0)).x; + } + +# elif (SAMPLEMODE == SAMPLEMODE_MSAA) + + Texture2DMS tSceneDepth : register(t2); + float LoadSceneDepth(uint2 pos, uint s) + { + return tSceneDepth.Load(int2(pos.xy), s).x; + } + +# endif +#else + + float LoadSceneDepth(uint2 pos, uint s) + { + return 1.0f; + } + +#endif + +Texture2D tPhaseLUT : register(t4); +Texture2D tLightLUT_P : register(t5); +Texture2D tLightLUT_S1 : register(t6); +Texture2D tLightLUT_S2 : register(t7); + +float GetLutCoord_X(float t, float light_dist) +{ + float t0 = max(0.0f, light_dist-g_fLightZFar); + float t_range = g_fLightZFar + light_dist - t0; + return (t-t0) / t_range; +} + +float GetLutCoord_Y(float cos_theta) +{ + return acos(-cos_theta) / PI; +} + +float3 SampleLut(Texture2D tex, float2 tc) +{ + float4 s = tex.SampleLevel(sBilinear, tc, 0); + return s.rgb*s.a; +} +//////////////////////////////////////////////////////////////////////////////// +// Integration code + +#define INTEGRATE(result, fn, data, step_count, t0, t1) \ +{ \ + float t_step = (t1-t0)/float(step_count); \ + float3 sum = float3(0,0,0); \ + sum += fn(data, t0); \ + float t = t0+t_step; \ + [unroll] \ + for (uint istep=1; istep 0.0f) ? WdotL / vW_len : 1.0f; + if (WdotL_norm >= cos_theta) + { + if (VdotL >= cos_theta) + t1 = t_max; + t0 = 0; + } + else if (WdotL_norm <= -cos_theta) + { + if (t0 < 0 && t1>0) + hit = false; + t0 = t0; + t1 = t_max; + } + else + { + if (t0 < 0 && t1 < 0) + hit = false; + else if (dot(vL, vW + t0*vV) < 0) + hit = false; + else if (t1<0) + t1 = t_max; + } + + if (t0 > t_max) + { + t0 = 0; + t1 = 0; + hit = false; + } + + return hit; + } +} + +struct LightEvaluatorData_Spotlight +{ + float3 sigma; + float light_theta; + float light_falloff_power; + float Wsqr; + float WdotV; + float WdotL; + float VdotL; +}; + +float3 LightEvaluator_Spotlight(LightEvaluatorData_Spotlight data, float t) +{ + float Dsqr = max(data.Wsqr+2*data.WdotV*t+t*t, 0.0f); + float D = sqrt(Dsqr); + float cos_phi = (t>0 && D>0) ? (t*t + Dsqr - data.Wsqr) / (2 * t*D) : 0; + float3 phase_factor = GetPhaseFactor(tPhaseLUT, -cos_phi); + float distance_attenuation = AttenuationFunc(D); + float Dproj = data.WdotL + t*data.VdotL; + float cos_alpha = (D>0.0f) ? Dproj/D : 1.0f; + float angle_factor = saturate(cos_alpha-data.light_theta)/(1-data.light_theta); + const float ANGLE_EPSILON = 0.000001f; + float spot_attenuation = (angle_factor > ANGLE_EPSILON) ? pow(abs(angle_factor), data.light_falloff_power) : 0.0f; + float3 media_attenuation = exp(-data.sigma*(t+D)); + return phase_factor*distance_attenuation*spot_attenuation*media_attenuation; +} + +float3 Integrate_Spotlight(float eye_dist, float3 vW, float3 vV, float3 vL) +{ + float3 integral = float3(0, 0, 0); + float WdotL = dot(vW, vL); + float VdotL = dot(vV, vL); + float t0=0, t1=1; + if (IntersectCone(t0, t1, eye_dist, g_fLightFalloffAngle, vW, vV, vL, WdotL, VdotL)) + { + t1 = min(t1, eye_dist); + + if (FALLOFFMODE == FALLOFFMODE_NONE) + { + float light_dist = length(vW); + float3 vW_norm = vW / light_dist; + float2 tc; + tc.x = GetLutCoord_X(t1, light_dist); + tc.y = GetLutCoord_Y(dot(vW_norm, vV)); + integral = SampleLut(tLightLUT_P, tc); + if (t0 > 0) + { + tc.x = GetLutCoord_X(t0, light_dist); + integral -= SampleLut(tLightLUT_P, tc); + } + integral *= g_vScatterPower; + } + else if (FALLOFFMODE == FALLOFFMODE_FIXED) + { + float light_dist = length(vW); + float3 vW_norm = vW / light_dist; + float2 tc; + tc.x = GetLutCoord_X(t1, light_dist); + tc.y = GetLutCoord_Y(dot(vW_norm, vV)); + integral = WdotL*SampleLut(tLightLUT_S1, tc) + VdotL*SampleLut(tLightLUT_S2, tc) - g_fLightFalloffAngle*SampleLut(tLightLUT_P, tc); + if (t0 > 0) + { + tc.x = GetLutCoord_X(t0, light_dist); + integral -= WdotL*SampleLut(tLightLUT_S1, tc) + VdotL*SampleLut(tLightLUT_S2, tc) - g_fLightFalloffAngle*SampleLut(tLightLUT_P, tc); + } + integral *= g_vScatterPower / (1-g_fLightFalloffAngle); + } + if (FALLOFFMODE == FALLOFFMODE_CUSTOM) + { + LightEvaluatorData_Spotlight evaluator; + evaluator.sigma = g_vSigmaExtinction; + evaluator.light_theta = g_fLightFalloffAngle; + evaluator.light_falloff_power = g_fLightFalloffPower; + evaluator.Wsqr = dot(vW, vW); + evaluator.WdotV = dot(vW, vV); + evaluator.WdotL = WdotL; + evaluator.VdotL = VdotL; + const uint STEP_COUNT = 8; + INTEGRATE(integral, LightEvaluator_Spotlight, evaluator, STEP_COUNT, t0, t1); + integral *= 6; + } + } + return integral; +} + +//////////////////////////////////////////////////////////////////////////////// +// Omni + +float3 Integrate_Omni(float eye_dist, float3 vW, float3 vV) +{ + float light_dist = length(vW); + vW = vW / light_dist; + float2 tc; + tc.x = GetLutCoord_X(eye_dist, light_dist); + tc.y = GetLutCoord_Y(dot(vW, vV)); + return g_vScatterPower*SampleLut(tLightLUT_P, tc); +} + +//////////////////////////////////////////////////////////////////////////////// +// Shader Entrypoint + +float4 main( +#if (PASSMODE == PASSMODE_FINAL) + VS_QUAD_OUTPUT pi + , uint sampleID : SV_SAMPLEINDEX +#else + PS_POLYGONAL_INPUT pi +#endif + , bool bIsFrontFace : SV_ISFRONTFACE + ) : SV_TARGET +{ +#if (PASSMODE != PASSMODE_FINAL) + uint sampleID = 0; +#endif + float fSign = 0; + float4 vWorldPos = float4(0, 0, 0, 1); + float eye_dist = 0; + float3 vV = float3(0, 0, 0); + if (PASSMODE == PASSMODE_GEOMETRY) + { + fSign = bIsFrontFace ? -1.0f : 1.0f; + vWorldPos = pi.vWorldPos; + eye_dist = length(vWorldPos.xyz - g_vEyePosition.xyz); + vV = (vWorldPos.xyz - g_vEyePosition.xyz) / eye_dist; + } + else if (PASSMODE == PASSMODE_SKY) + { + fSign = 1.0f; + eye_dist = g_fZFar; + vV = normalize(pi.vWorldPos.xyz - g_vEyePosition.xyz); + vWorldPos.xyz = g_vEyePosition.xyz + vV * eye_dist; + vWorldPos.w = 1; + } + else if (PASSMODE == PASSMODE_FINAL) + { + fSign = 1.0f; + float fSceneDepth = LoadSceneDepth(pi.vPos.xy, sampleID); + float4 vClipPos; + vClipPos.xy = float2(2, -2)*g_vViewportSize_Inv*pi.vPos.xy + float2(-1.0f, 1.0f); + vClipPos.z = fSceneDepth; + vClipPos.w = 1; + vWorldPos = mul(g_mViewProjInv, vClipPos); + vWorldPos *= 1.0f / vWorldPos.w; + eye_dist = length(vWorldPos.xyz - g_vEyePosition.xyz); + vV = (vWorldPos.xyz - g_vEyePosition.xyz) / eye_dist; + } + + float3 vL = g_vLightDir.xyz; + + float3 integral = float3(0,0,0); + if (LIGHTMODE == LIGHTMODE_DIRECTIONAL) + { + integral = Integrate_SimpleDirectional(eye_dist, vV, vL); + } + else if (LIGHTMODE == LIGHTMODE_SPOTLIGHT) + { + float3 vW = g_vEyePosition.xyz - g_vLightPos.xyz; + integral = Integrate_Spotlight(eye_dist, vW, vV, vL); + } + else if (LIGHTMODE == LIGHTMODE_OMNI) + { + float3 vW = g_vEyePosition.xyz - g_vLightPos.xyz; + integral = Integrate_Omni(eye_dist, vW, vV); + } + return float4(fSign*integral*g_vLightIntensity.rgb, 0); +} diff --git a/src/shaders/RenderVolume_VS.hlsl b/src/shaders/RenderVolume_VS.hlsl new file mode 100644 index 0000000..dc5cdb8 --- /dev/null +++ b/src/shaders/RenderVolume_VS.hlsl @@ -0,0 +1,204 @@ +// This code contains NVIDIA Confidential Information and is disclosed +// under the Mutual Non-Disclosure Agreement. +// +// Notice +// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES +// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +// +// NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless +// expressly authorized by NVIDIA. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved. +// +// NVIDIA Corporation and its licensors retain all intellectual property and proprietary +// rights in and to this software and related documentation and any modifications thereto. +// Any use, reproduction, disclosure or distribution of this software and related +// documentation without an express license agreement from NVIDIA Corporation is +// strictly prohibited. +// + +/* +Define the shader permutations for code generation +%% MUX_BEGIN %% +- MESHMODE: + - MESHMODE_FRUSTUM_GRID + - MESHMODE_FRUSTUM_BASE + - MESHMODE_FRUSTUM_CAP + - MESHMODE_OMNI_VOLUME + - MESHMODE_GEOMETRY +%% MUX_END %% +*/ + +#include "ShaderCommon.h" + +// Bypass vertex shader +HS_POLYGONAL_INPUT main( +#if (MESHMODE == MESHMODE_GEOMETRY) + float4 input_position : POSITION, +#endif + uint id : SV_VERTEXID ) +{ +#if (MESHMODE != MESHMODE_GEOMETRY) + float4 input_position = float4(0,0,0,1); +#endif + HS_POLYGONAL_INPUT output; + // + // Generate the mesh dynamically from the vertex ID + // + if (MESHMODE == MESHMODE_FRUSTUM_GRID) + { + const float patch_size = 2.0f / float(g_uMeshResolution); + uint patch_idx = id / 4; + uint patch_row = patch_idx / g_uMeshResolution; + uint patch_col = patch_idx % g_uMeshResolution; + output.vClipPos.x = patch_size*patch_col - 1.0f; + output.vClipPos.y = patch_size*patch_row - 1.0f; + + uint vtx_idx = id % 4; + float2 vtx_offset; + if (vtx_idx == 0) + { + vtx_offset = float2(0, 0); + } + else if (vtx_idx == 1) + { + vtx_offset = float2(1, 0); + } + else if (vtx_idx == 2) + { + vtx_offset = float2(1, 1); + } + else // if (vtx_idx == 3) + { + vtx_offset = float2(0, 1); + } + output.vClipPos.xy += patch_size * vtx_offset; + + output.vClipPos.z = 1.0f; + output.vClipPos.w = 1.0f; + } + else if (MESHMODE == MESHMODE_FRUSTUM_BASE) + { + uint vtx_idx = id % 3; + output.vClipPos.x = (vtx_idx == 0) ? 1 : -1; + output.vClipPos.y = (vtx_idx == 2) ? -1 : 1; + output.vClipPos.xy *= (id/3 == 0) ? 1 : -1; + output.vClipPos.z = 1.0f; + output.vClipPos.w = 1.0f; + } + else if (MESHMODE == MESHMODE_FRUSTUM_CAP) + { + uint tris_per_face = g_uMeshResolution+1; + uint verts_per_face = 3*tris_per_face; + uint face_idx = id / verts_per_face; + uint vtx_idx = id % 3; + if (face_idx < 4) + { + // Cap Side + const float patch_size = 2.0f / float(g_uMeshResolution); + const uint split_point = (g_uMeshResolution+1)/2; + float3 v; + uint tri_idx = (id%verts_per_face)/3; + if (tri_idx < g_uMeshResolution) + { + if (vtx_idx == 0) + v.x = (tri_idx >= split_point) ? 1 : -1; + else if (vtx_idx == 1) + v.x = patch_size * tri_idx - 1; + else // if (vtx_idx == 2) + v.x = patch_size * (tri_idx+1) - 1; + v.y = (vtx_idx == 0) ? 0 : 1; + } + else + { + if (vtx_idx == 1) + v.x = patch_size*split_point-1; + else + v.x = (vtx_idx == 0) ? -1 : 1; + v.y = (vtx_idx == 1) ? 1 : 0; + } + v.z = 1; + v.xz *= (face_idx/2 == 0) ? 1 : -1; + output.vClipPos.xyz = (face_idx%2 == 0) ? v.zxy : v.xzy*float3(-1,1,1); + } + else + { + // Z=0 + uint tri_idx = (id-4*verts_per_face)/3; + output.vClipPos.x = (vtx_idx == 1) ? 1 : -1; + output.vClipPos.y = (vtx_idx == 2) ? 1 : -1; + output.vClipPos.xy *= (tri_idx == 0) ? 1 : -1; + output.vClipPos.z = 0.0f; + } + output.vClipPos.w = 1.0f; + } + else if (MESHMODE == MESHMODE_OMNI_VOLUME) + { + uint verts_per_face = 4*g_uMeshResolution*g_uMeshResolution; + uint face_idx = id / verts_per_face; + uint face_vert_idx = id % verts_per_face; + + const float patch_size = 2.0f / float(g_uMeshResolution); + uint patch_idx = face_vert_idx / 4; + uint patch_row = patch_idx / g_uMeshResolution; + uint patch_col = patch_idx % g_uMeshResolution; + + float3 P; + P.x = patch_size*patch_col - 1.0f; + P.y = patch_size*patch_row - 1.0f; + + uint vtx_idx = id % 4; + float2 vtx_offset; + if (vtx_idx == 0) + { + vtx_offset = float2(0, 0); + } + else if (vtx_idx == 1) + { + vtx_offset = float2(1, 0); + } + else if (vtx_idx == 2) + { + vtx_offset = float2(1, 1); + } + else // if (vtx_idx == 3) + { + vtx_offset = float2(0, 1); + } + P.xy += patch_size * vtx_offset; + P.z = ((face_idx / 3) == 0) ? 1 : -1; + if ((face_idx % 3) == 0) + P.yzx = P.xyz * (((face_idx / 3) == 0) ? float3(1,1,1) : float3(-1,1,1)); + else if ((face_idx % 3) == 1) + P.xzy = P.xyz * (((face_idx / 3) == 1) ? float3(1,1,1) : float3(-1,1,1)); + else //if ((face_idx % 3) == 2) + P.xyz = P.xyz * (((face_idx / 3) == 0) ? float3(1,1,1) : float3(-1,1,1)); + output.vClipPos = float4(normalize(P.xyz), 1); + } + else + { + output.vClipPos = input_position; + + } + + if (MESHMODE == MESHMODE_OMNI_VOLUME) + { + output.vWorldPos = mul(g_mLightToWorld, float4(g_fLightZFar*output.vClipPos.xyz, 1)); + } + else + { + output.vWorldPos = mul(g_mLightToWorld, output.vClipPos); + } + output.vWorldPos = output.vWorldPos / output.vWorldPos.w; + output.vPos = mul(g_mViewProj, output.vWorldPos); + return output; +} diff --git a/src/shaders/Resolve_PS.hlsl b/src/shaders/Resolve_PS.hlsl new file mode 100644 index 0000000..72c07f9 --- /dev/null +++ b/src/shaders/Resolve_PS.hlsl @@ -0,0 +1,179 @@ +// This code contains NVIDIA Confidential Information and is disclosed +// under the Mutual Non-Disclosure Agreement. +// +// Notice +// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES +// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +// +// NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless +// expressly authorized by NVIDIA. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved. +// +// NVIDIA Corporation and its licensors retain all intellectual property and proprietary +// rights in and to this software and related documentation and any modifications thereto. +// Any use, reproduction, disclosure or distribution of this software and related +// documentation without an express license agreement from NVIDIA Corporation is +// strictly prohibited. +// + +/* +Define the shader permutations for code generation +%% MUX_BEGIN %% + +- SAMPLEMODE: + - SAMPLEMODE_SINGLE + - SAMPLEMODE_MSAA + +%% MUX_END %% +*/ + +#include "ShaderCommon.h" + +struct RESOLVE_OUTPUT +{ + float3 color : SV_TARGET0; + float2 depth : SV_TARGET1; +}; + +#if (SAMPLEMODE == SAMPLEMODE_MSAA) +Texture2DMS tGodraysBuffer : register(t0); +Texture2DMS tGodraysDepth : register(t1); +#elif (SAMPLEMODE == SAMPLEMODE_SINGLE) +Texture2D tGodraysBuffer : register(t0); +Texture2D tGodraysDepth : register(t1); +#endif + +#if (defined(__PSSL__) && (SAMPLEMODE == SAMPLEMODE_MSAA)) +Texture2D tFMask_color : register(t2); +#endif + +#if defined(__PSSL__) +static const int FMASK_UNKNOWN = 1 << 3; // color "unknown" is always represented as high bit in the 4bit fragment index + +int2 getFmask(Texture2D tex, int sample_count, int2 coord) +{ + // if 8 or less coverage samples, only load one VGPR (32bits / 4bits per sample) + // if more than 8 coverage samples, we need to load 2 VGPRs + int2 fmask; + if (sample_count <= 8) + { + fmask.x = tex.Load(int3(coord, 0)).x; + fmask.y = 0x88888888; // all invalid -- though in theory we shouldn't need to refer to them at all. + } + else + { + fmask.xy = tex.Load(int3(coord, 0)).xy; + } + return fmask; +} + +int getFptr(int index, int2 fmask) +{ + const int bitShift = 4; // fmask load always returns a 4bit fragment index (fptr) per coverage sample, regardless of actual number of fragments. + const int mask = (1 << bitShift) - 1; + if (index < 8) + return (fmask.x >> (index*bitShift)) & mask; + else + return (fmask.y >> ((index-8)*bitShift)) & mask; +} +#endif + +RESOLVE_OUTPUT main(VS_QUAD_OUTPUT input) +{ + float3 result_color = 0.0f; + float result_depth = 0.0f; + float result_depth_sqr = 0.0f; + +#if (SAMPLEMODE == SAMPLEMODE_MSAA) + uint2 buffer_size; + uint buffer_samples; + tGodraysBuffer.GetDimensions(buffer_size.x, buffer_size.y, buffer_samples); +#elif (SAMPLEMODE == SAMPLEMODE_SINGLE) + uint buffer_samples = 1; +#endif + + int2 base_tc = int2(input.vTex * g_vViewportSize); + const float FILTER_SCALE = 1.0f; + const int KERNEL_WIDTH = 1; + float total_weight = 0.0f; + [unroll] + for (int ox=-KERNEL_WIDTH; ox<=KERNEL_WIDTH; ++ox) + { + if ((base_tc.x + ox) < 0 || (base_tc.x + ox) >= g_vViewportSize.x) continue; + + [unroll] + for (int oy=-KERNEL_WIDTH; oy<=KERNEL_WIDTH; ++oy) + { + if ((base_tc.y + oy) < 0 || (base_tc.y + oy) >= g_vViewportSize.y) continue; + + int2 offset = int2(ox, oy); + int2 tc = base_tc + offset; + +#if (defined(__PSSL__) && (SAMPLEMODE == SAMPLEMODE_MSAA)) + int2 fmask = getFmask(tFMask_color, buffer_samples, tc); +#endif + +#if (SAMPLEMODE == SAMPLEMODE_MSAA) + for (uint s=0; s 0.0f) ? result_color/total_weight : float3(0.f, 0.f, 0.f); + output.depth = (total_weight > 0.0f) ? float2(result_depth, result_depth_sqr)/total_weight : 1.0f; + return output; +} diff --git a/src/shaders/ShaderCommon.h b/src/shaders/ShaderCommon.h new file mode 100644 index 0000000..f4b8f80 --- /dev/null +++ b/src/shaders/ShaderCommon.h @@ -0,0 +1,265 @@ +// This code contains NVIDIA Confidential Information and is disclosed +// under the Mutual Non-Disclosure Agreement. +// +// Notice +// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES +// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +// +// NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless +// expressly authorized by NVIDIA. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (C) 2013, NVIDIA Corporation. All rights reserved. + +/*=========================================================================== +Constants +===========================================================================*/ + +static const float PI = 3.1415926535898f; +static const float EDGE_FACTOR = 1.0f - (2.0f/64.0f) * (1.0f/64.0f); +static const uint MAX_PHASE_TERMS = 4; + +#ifdef __PSSL__ +static const float2 SAMPLE_POSITIONS[] = { + // 1x + float2( 0, 0)/16.f, + // 2x + float2(-4, 4)/16.f, + float2( 4,-4)/16.f, + // 4x + float2(-6, 6)/16.f, + float2( 6,-6)/16.f, + float2(-2,-2)/16.f, + float2( 2, 2)/16.f, + // 8x + float2(-7,-3)/16.f, + float2( 7, 3)/16.f, + float2( 1,-5)/16.f, + float2(-5, 5)/16.f, + float2(-3,-7)/16.f, + float2( 3, 7)/16.f, + float2( 5,-1)/16.f, + float2(-1, 1)/16.f +}; + +// constant buffers +#define cbuffer ConstantBuffer + +// textures and samplers +#define Texture2DMS MS_Texture2D +#define Texture2DArray Texture2D_Array +#define SampleLevel SampleLOD +#define GetSamplePosition(s) GetSamplePoint(s) + +// semantics +#define SV_DEPTH S_DEPTH_OUTPUT +#define SV_DOMAINLOCATION S_DOMAIN_LOCATION +#define SV_INSIDETESSFACTOR S_INSIDE_TESS_FACTOR +#define SV_INSTANCEID S_INSTANCE_ID +#define SV_ISFRONTFACE S_FRONT_FACE +#define SV_OUTPUTCONTROLPOINTID S_OUTPUT_CONTROL_POINT_ID +#define SV_POSITION S_POSITION +#define SV_POSITION S_POSITION +#define SV_PRIMITIVEID S_PRIMITIVE_ID +#define SV_SAMPLEINDEX S_SAMPLE_INDEX +#define SV_TARGET S_TARGET_OUTPUT +#define SV_TARGET0 S_TARGET_OUTPUT0 +#define SV_TARGET1 S_TARGET_OUTPUT1 +#define SV_TESSFACTOR S_EDGE_TESS_FACTOR +#define SV_VERTEXID S_VERTEX_ID + +// hull and domain shader properties +#define domain DOMAIN_PATCH_TYPE +#define partitioning PARTITIONING_TYPE +#define outputtopology OUTPUT_TOPOLOGY_TYPE +#define outputcontrolpoints OUTPUT_CONTROL_POINTS +#define patchconstantfunc PATCH_CONSTANT_FUNC +#define maxtessfactor MAX_TESS_FACTOR + +// need to figure out how to deal with those exactly: +#define shared +#endif + +/*=========================================================================== +Sampler states +===========================================================================*/ +SamplerState sPoint : register(s0); +SamplerState sBilinear : register(s1); + +/*=========================================================================== +Constant buffers +===========================================================================*/ +shared cbuffer cbContext : register(b0) +{ + float2 g_vOutputSize : packoffset(c0); + float2 g_vOutputSize_Inv : packoffset(c0.z); + float2 g_vBufferSize : packoffset(c1); + float2 g_vBufferSize_Inv : packoffset(c1.z); + float g_fResMultiplier : packoffset(c2); + unsigned int g_uBufferSamples : packoffset(c2.y); +} + +shared cbuffer cbFrame : register(b1) +{ + column_major float4x4 g_mProj : packoffset(c0); + column_major float4x4 g_mViewProj : packoffset(c4); + column_major float4x4 g_mViewProjInv: packoffset(c8); + float2 g_vOutputViewportSize : packoffset(c12); + float2 g_vOutputViewportSize_Inv : packoffset(c12.z); + float2 g_vViewportSize : packoffset(c13); + float2 g_vViewportSize_Inv : packoffset(c13.z); + float3 g_vEyePosition : packoffset(c14); + float2 g_vJitterOffset : packoffset(c15); + float g_fZNear : packoffset(c15.z); + float g_fZFar : packoffset(c15.w); + float3 g_vScatterPower : packoffset(c16); + unsigned int g_uNumPhaseTerms : packoffset(c16.w); + float3 g_vSigmaExtinction : packoffset(c17); + unsigned int g_uPhaseFunc[4] : packoffset(c18); + float4 g_vPhaseParams[4] : packoffset(c22); +}; + +shared cbuffer cbVolume : register(b2) +{ + column_major float4x4 g_mLightToWorld : packoffset(c0); + float g_fLightFalloffAngle : packoffset(c4.x); + float g_fLightFalloffPower : packoffset(c4.y); + float g_fGridSectionSize : packoffset(c4.z); + float g_fLightToEyeDepth : packoffset(c4.w); + float g_fLightZNear : packoffset(c5); + float g_fLightZFar : packoffset(c5.y); + float4 g_vLightAttenuationFactors : packoffset(c6); + column_major float4x4 g_mLightProj[4] : packoffset(c7); + column_major float4x4 g_mLightProjInv[4]: packoffset(c23); + float3 g_vLightDir : packoffset(c39); + float g_fGodrayBias : packoffset(c39.w); + float3 g_vLightPos : packoffset(c40); + unsigned int g_uMeshResolution : packoffset(c40.w); + float3 g_vLightIntensity : packoffset(c41); + float g_fTargetRaySize : packoffset(c41.w); + float4 g_vElementOffsetAndScale[4] : packoffset(c42); + float4 g_vShadowMapDim : packoffset(c46); + unsigned int g_uElementIndex[4] : packoffset(c47); +}; + +shared cbuffer cbApply : register(b3) +{ + column_major float4x4 g_mHistoryXform : packoffset(c0); + float g_fFilterThreshold : packoffset(c4); + float g_fHistoryFactor : packoffset(c4.y); + float3 g_vFogLight : packoffset(c5); + float g_fMultiScattering : packoffset(c5.w); +}; + +/*=========================================================================== +Shader inputs +===========================================================================*/ +struct VS_POLYGONAL_INPUT +{ + float4 vPos : POSITION; +}; + +struct HS_POLYGONAL_INPUT +{ + float4 vPos : SV_POSITION; + float4 vWorldPos : TEXCOORD0; + float4 vClipPos : TEXCOORD1; +}; + +struct HS_POLYGONAL_CONTROL_POINT_OUTPUT +{ + float4 vWorldPos : TEXCOORD0; + float4 vClipPos : TEXCOORD1; +}; + +struct HS_POLYGONAL_CONSTANT_DATA_OUTPUT +{ + float fEdges[4] : SV_TESSFACTOR; + float fInside[2] : SV_INSIDETESSFACTOR; + float debug[4] : TEXCOORD2; +}; + +struct PS_POLYGONAL_INPUT +{ + float4 vPos : SV_POSITION; + float4 vWorldPos : TEXCOORD0; +#ifdef __PSSL__ + float dummy : CLIPPPOSDUMMY; //Workaround for compiler exception in polygon hull shaders. +#endif +}; + +struct VS_QUAD_OUTPUT +{ + float4 vPos : SV_POSITION; + sample float4 vWorldPos : TEXCOORD0; + sample float2 vTex : TEXCOORD1; +}; + +/*=========================================================================== +Common functions +===========================================================================*/ + +float LinearizeDepth(float d, float zn, float zf) +{ + return d * zn / (zf - ((zf - zn) * d)); +} + +float WarpDepth(float z, float zn, float zf) +{ + return z * (1+zf/zn) / (1+z*zf/zn); +} + +float MapDepth(float d, float zn, float zf) +{ + return (d - zn) / (zf - zn); +} + +// Approximates a non-normalized gaussian with Sigma == 1 +float GaussianApprox(float2 sample_pos, float width) +{ + float x_sqr = sample_pos.x*sample_pos.x + sample_pos.y*sample_pos.y; + // exp(-0.5*(x/w)^2) ~ (1-(x/(8*w))^2)^32 + float w = saturate(1.0f - x_sqr/(64.0f * width*width)); + w = w*w; // ^2 + w = w*w; // ^4 + w = w*w; // ^8 + w = w*w; // ^16 + w = w*w; // ^32 + return w; +} + +#if defined(ATTENUATIONMODE) +float AttenuationFunc(float d) +{ + if (ATTENUATIONMODE == ATTENUATIONMODE_POLYNOMIAL) + { + // 1-(A+Bx+Cx^2) + return saturate(1.0f - (g_vLightAttenuationFactors.x + g_vLightAttenuationFactors.y*d + g_vLightAttenuationFactors.z*d*d)); + } + else if (ATTENUATIONMODE == ATTENUATIONMODE_INV_POLYNOMIAL) + { + // 1 / (A+Bx+Cx^2) + D + return saturate(1.0f / (g_vLightAttenuationFactors.x + g_vLightAttenuationFactors.y*d + g_vLightAttenuationFactors.z*d*d) + g_vLightAttenuationFactors.w); + } + else //if (ATTENUATIONMODE == ATTENUATIONMODE_NONE) + { + return 1.0f; + } +} +#endif + +float3 GetPhaseFactor(Texture2D tex, float cos_theta) +{ + float2 tc; + tc.x = 0; + tc.y = acos(clamp(-cos_theta, -1.0f, 1.0f)) / PI; + return g_vScatterPower*tex.SampleLevel(sBilinear, tc, 0).rgb; +} diff --git a/src/shaders/TemporalFilter_PS.hlsl b/src/shaders/TemporalFilter_PS.hlsl new file mode 100644 index 0000000..082e577 --- /dev/null +++ b/src/shaders/TemporalFilter_PS.hlsl @@ -0,0 +1,207 @@ +// This code contains NVIDIA Confidential Information and is disclosed +// under the Mutual Non-Disclosure Agreement. +// +// Notice +// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES +// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. +// +// NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless +// expressly authorized by NVIDIA. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved. +// +// NVIDIA Corporation and its licensors retain all intellectual property and proprietary +// rights in and to this software and related documentation and any modifications thereto. +// Any use, reproduction, disclosure or distribution of this software and related +// documentation without an express license agreement from NVIDIA Corporation is +// strictly prohibited. +// + +/* +Define the shader permutations for code generation +%% MUX_BEGIN %% + +%% MUX_END %% +*/ + +#include "ShaderCommon.h" + +Texture2D tCurrBuffer : register(t0); +Texture2D tLastBuffer : register(t1); +Texture2D tCurrDepth : register(t2); +Texture2D tLastDepth : register(t3); + +static const int2 NEIGHBOR_OFFSETS[] = { + int2(-1, -1), int2( 0, -1), int2( 1, -1), + int2(-1, 0), int2( 0, 0), int2( 1, 0), + int2(-1, 1), int2( 0, 1), int2( 1, 1) +}; + +#if 1 +static const float NEIGHBOR_WEIGHTS[] = { + 0.015625f, 0.125000f, 0.015625f, + 0.125000f, 1.000000f, 0.125000f, + 0.015625f, 0.125000f, 0.015625f, +}; +#else +static const float NEIGHBOR_WEIGHTS[] = { + 0, 0, 0, + 0, 1, 0, + 0, 0, 0, +}; +#endif + +float RGB_to_Y (float3 input) +{ + return 0.50f*input.g + 0.25f*(input.r + input.b); +} + +float3 RGB_to_YCoCg (float3 input) +{ + float3 ret; + float tmp = 0.25f*(input.r + input.b); + ret.x = 0.50f*input.g + tmp; + ret.y = 0.50f*(input.r - input.b); + ret.z = 0.50f*input.g - tmp; + return ret; +} + +float3 YCoCg_to_RGB(float3 input) +{ + float3 ret; + float Y_val = input.x; float Co = input.y; float Cg = input.z; + float tmp = Y_val - Cg; + ret.r = tmp + Co; + ret.g = Y_val + Cg; + ret.b = tmp - Co; + return ret; +} + +float3 Tonemap( float3 sample_rgb ) +{ + sample_rgb = sample_rgb / (1 + sample_rgb); + return RGB_to_YCoCg(sample_rgb); +} + +float3 Tonemap_Inv( float3 sample_YCoCg ) +{ + float3 sample_rgb = YCoCg_to_RGB(sample_YCoCg); + return sample_rgb / (1 - sample_rgb); +} + +struct FILTER_OUTPUT +{ + float3 color : SV_TARGET0; + float2 depth : SV_TARGET1; +}; + +FILTER_OUTPUT main(VS_QUAD_OUTPUT input) +{ + FILTER_OUTPUT output; + + // load neighbors + float3 curr_sample = float3(0,0,0); + float2 curr_depth = float2(0,0); + float neighborhood_bounds_max = 0; + float neighborhood_bounds_min = 0; + int2 max_dimensions = int2(g_vViewportSize); + int2 base_tc = int2(floor(input.vTex.xy*max_dimensions)); + float total_weight = -1.0f; + + [unroll] + for (int n=0; n<9; ++n) + { + int2 sample_tc = max( int2(0,0), min(max_dimensions, base_tc + NEIGHBOR_OFFSETS[n])); + float3 neighbor_sample = max(float3(0,0,0), tCurrBuffer.Load(int3(sample_tc, 0)).rgb); + float2 neighbor_depth = tCurrDepth.Load(int3(sample_tc, 0)).rg; + bool is_valid = all(isfinite(neighbor_sample.xyz)); + if (is_valid) + { + neighbor_sample = Tonemap(neighbor_sample); + float weight = NEIGHBOR_WEIGHTS[n]; + curr_sample += weight*neighbor_sample; + curr_depth += weight*neighbor_depth; + if (total_weight <= 0.0f) + { + neighborhood_bounds_max = neighbor_sample.x; + neighborhood_bounds_min = neighbor_sample.x; + total_weight = weight; + } + else + { + neighborhood_bounds_max = max(neighborhood_bounds_max, neighbor_sample.x); + neighborhood_bounds_min = min(neighborhood_bounds_min, neighbor_sample.x); + total_weight += weight; + } + } + } + curr_sample = (total_weight > 0) ? curr_sample/total_weight : float3(0,0,0); + curr_depth = (total_weight > 0) ? curr_depth/total_weight : float2(1, 1); + + // Transform and apply history + const float MAX_HISTORY_FACTOR = 0.98f; + float history_factor = g_fHistoryFactor; + + float4 curr_clip; + curr_clip.xy = float2(2, -2) * input.vTex.xy + float2(-1, 1); + curr_clip.z = WarpDepth(curr_depth.x, g_fZNear, g_fZFar); + curr_clip.w = 1; + float4 last_clip = mul( g_mHistoryXform, curr_clip ); + last_clip = last_clip/last_clip.w; + + float2 last_tc = saturate((float2(0.5f, -0.5f)*last_clip.xy+float2(0.5f, 0.5f))) * max_dimensions; + float3 last_sample = tLastBuffer.Load(int3(last_tc, 0)).rgb; + float2 last_depth = tLastDepth.Load(int3(last_tc, 0)).rg; + last_sample = all(isfinite(last_sample)) ? Tonemap(last_sample) : curr_sample; + + history_factor = all(abs(last_clip.xy) <= 1.0f) ? history_factor : 0.0f; + + float2 clip_diff = (last_clip.xy - curr_clip.xy) * g_vViewportSize * g_vViewportSize_Inv.xx; + float clip_dist = length(clip_diff); + float movement_factor = saturate(1.0f - clip_dist/g_fFilterThreshold); + history_factor *= movement_factor*movement_factor*movement_factor; + + float depth_diff = abs(curr_depth.r-last_depth.r); + float local_variance = abs(curr_depth.g - curr_depth.r*curr_depth.r) + abs(last_depth.g - last_depth.r*last_depth.r); + local_variance = max(local_variance, 0.0001f); +#if 0 + float local_stddev = sqrt(local_variance); + float depth_factor = saturate(depth_diff-local_stddev); + depth_factor = local_stddev / (local_stddev + depth_factor); +#else + float depth_factor = saturate(depth_diff-local_variance); + depth_factor = local_variance / (local_variance + depth_factor); +#endif + history_factor *= depth_factor; + + // threshold based on neighbors + // Convert to Y Co Cg, then clip to bounds of neighborhood + float3 blended_sample = curr_sample; + float2 blended_depth = curr_depth; + if (history_factor > 0.0f) + { + const float CLIP_EPSILON = 0.0001f; + float3 clip_vec = last_sample - curr_sample; + float clamped_Y = max(neighborhood_bounds_min, min(neighborhood_bounds_max, last_sample.x)); + float clip_factor_Y = (abs(clip_vec.x) > CLIP_EPSILON) ? abs((clamped_Y-curr_sample.x) / clip_vec.x) : 1.0f; + float clip_factor = clip_factor_Y; + float3 clipped_history = curr_sample + clip_factor*clip_vec; + + history_factor = min(history_factor, MAX_HISTORY_FACTOR); + blended_sample = lerp(curr_sample, clipped_history, history_factor); + blended_depth = lerp(curr_depth, last_depth, history_factor); + } + + output.color = Tonemap_Inv(blended_sample); + output.depth = blended_depth; + return output; +} -- cgit v1.2.3