aboutsummaryrefslogtreecommitdiff
path: root/src/shaders
diff options
context:
space:
mode:
Diffstat (limited to 'src/shaders')
-rw-r--r--src/shaders/Apply_PS.hlsl176
-rw-r--r--src/shaders/ComputeLightLUT_CS.hlsl192
-rw-r--r--src/shaders/ComputePhaseLookup_PS.hlsl150
-rw-r--r--src/shaders/Debug_PS.hlsl42
-rw-r--r--src/shaders/DownsampleDepth_PS.hlsl82
-rw-r--r--src/shaders/Quad_VS.hlsl46
-rw-r--r--src/shaders/RenderVolume_DS.hlsl181
-rw-r--r--src/shaders/RenderVolume_HS.hlsl182
-rw-r--r--src/shaders/RenderVolume_PS.hlsl403
-rw-r--r--src/shaders/RenderVolume_VS.hlsl204
-rw-r--r--src/shaders/Resolve_PS.hlsl179
-rw-r--r--src/shaders/ShaderCommon.h265
-rw-r--r--src/shaders/TemporalFilter_PS.hlsl207
13 files changed, 2309 insertions, 0 deletions
diff --git a/src/shaders/Apply_PS.hlsl b/src/shaders/Apply_PS.hlsl
new file mode 100644
index 0000000..0e19e46
--- /dev/null
+++ b/src/shaders/Apply_PS.hlsl
@@ -0,0 +1,176 @@
+// This code contains NVIDIA Confidential Information and is disclosed
+// under the Mutual Non-Disclosure Agreement.
+//
+// Notice
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
+// expressly authorized by NVIDIA. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+- SAMPLEMODE:
+ - SAMPLEMODE_SINGLE
+ - SAMPLEMODE_MSAA
+
+- UPSAMPLEMODE:
+ - UPSAMPLEMODE_POINT
+ - UPSAMPLEMODE_BILINEAR
+ - UPSAMPLEMODE_BILATERAL
+
+- FOGMODE:
+ - FOGMODE_NONE
+ - FOGMODE_NOSKY
+ - FOGMODE_FULL
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+Texture2D<float4> tGodraysBuffer : register(t0);
+#if (SAMPLEMODE == SAMPLEMODE_MSAA)
+ Texture2DMS<float> tSceneDepth : register(t1);
+#elif (SAMPLEMODE == SAMPLEMODE_SINGLE)
+ Texture2D<float> tSceneDepth : register(t1);
+#endif
+Texture2D<float2> tGodraysDepth : register(t2);
+Texture2D<float4> tPhaseLUT : register(t4);
+
+struct PS_APPLY_OUTPUT
+{
+ float4 inscatter : SV_TARGET0;
+ float4 transmission : SV_TARGET1;
+};
+
+float3 Tonemap(float3 s)
+{
+ return s / (float3(1,1,1) + s);
+}
+
+float3 Tonemap_Inv(float3 s)
+{
+ return s / (float3(1,1,1) - s);
+}
+
+
+float CalcVariance(float x, float x_sqr)
+{
+ return abs(x_sqr - x*x);
+}
+
+PS_APPLY_OUTPUT main(VS_QUAD_OUTPUT input
+#if (SAMPLEMODE == SAMPLEMODE_MSAA)
+ , uint sampleID : SV_SAMPLEINDEX
+#endif
+ )
+{
+ PS_APPLY_OUTPUT output;
+ output.transmission = float4(1,1,1,1);
+ output.inscatter = float4(0,0,0,1);
+
+ float2 texcoord = input.vTex * g_vViewportSize * g_vBufferSize_Inv;
+
+#if (SAMPLEMODE == SAMPLEMODE_MSAA)
+ float scene_depth = tSceneDepth.Load(int2(input.vTex*g_vOutputViewportSize), sampleID).x;
+#elif (SAMPLEMODE == SAMPLEMODE_SINGLE)
+ float scene_depth = tSceneDepth.SampleLevel(sPoint, input.vTex * g_vViewportSize * g_vBufferSize_Inv, 0).x;
+#endif
+ scene_depth = LinearizeDepth(scene_depth, g_fZNear, g_fZFar);
+
+
+
+ // Quality of the upsampling interpolator
+ // 0: Point (no up-sample)
+ // 1: Bilinear
+ // 2: Bilateral
+ float3 inscatter_sample = float3(0,0,0);
+ if (UPSAMPLEMODE == UPSAMPLEMODE_POINT)
+ {
+ inscatter_sample = tGodraysBuffer.SampleLevel( sPoint, texcoord, 0).rgb;
+ }
+ else if (UPSAMPLEMODE == UPSAMPLEMODE_BILINEAR)
+ {
+ inscatter_sample = tGodraysBuffer.SampleLevel( sBilinear, texcoord, 0).rgb;
+ }
+ else if (UPSAMPLEMODE == UPSAMPLEMODE_BILATERAL)
+ {
+ const float2 NEIGHBOR_OFFSETS[] = {
+ float2(-1, -1), float2( 0, -1), float2( 1, -1),
+ float2(-1, 0), float2( 0, 0), float2( 1, 0),
+ float2(-1, 1), float2( 0, 1), float2( 1, 1)
+ };
+ const float GAUSSIAN_WIDTH = 1.0f;
+
+ float2 max_dimensions = floor(g_vViewportSize);
+ float2 base_tc = input.vTex * max_dimensions;
+
+ float total_weight = 0;
+ [unroll]
+ for (int n=0; n<9; ++n)
+ {
+ float2 sample_tc = max( float2(0,0), min(max_dimensions, base_tc + NEIGHBOR_OFFSETS[n]));
+
+ float weight = 0.0f;
+ float2 sample_location = floor(sample_tc) + float2(0.5f, 0.5f);
+ weight = GaussianApprox(sample_location - base_tc, GAUSSIAN_WIDTH);
+
+ const float DEPTH_RANGE = 0.10f;
+
+ float2 neighbor_depth = tGodraysDepth.Load(int3(sample_location.xy, 0)).rg;
+ float depth_diff = abs(scene_depth - neighbor_depth.r);
+ float neighbor_variance = CalcVariance(neighbor_depth.r, neighbor_depth.g);
+ float neighbor_stddev = sqrt(neighbor_variance);
+ float depth_weight = saturate(1 - depth_diff / DEPTH_RANGE);
+ depth_weight = depth_weight*depth_weight*(1-neighbor_stddev);
+ weight *= depth_weight;
+
+ inscatter_sample += weight * Tonemap(tGodraysBuffer.Load(int3(sample_location.xy, 0)).rgb);
+ total_weight += weight;
+ }
+
+ if (total_weight > 0.0f)
+ {
+ inscatter_sample = Tonemap_Inv(inscatter_sample / total_weight);
+ }
+ else
+ {
+ inscatter_sample = tGodraysBuffer.SampleLevel(sBilinear, texcoord, 0).rgb;
+ }
+ }
+
+ output.inscatter.rgb = inscatter_sample.rgb;
+ if (FOGMODE != FOGMODE_NONE)
+ {
+ if ((FOGMODE != FOGMODE_NOSKY) || (scene_depth < 1.f))
+ {
+ float scene_distance = g_fZFar * scene_depth;
+ float3 sigma_ext = g_vSigmaExtinction;
+ output.inscatter.rgb += g_fMultiScattering * g_vFogLight * g_vScatterPower * (1-exp(-sigma_ext*scene_distance)) / sigma_ext;
+ output.transmission.rgb = exp(-sigma_ext*scene_distance);
+ }
+ }
+
+ return output;
+} \ No newline at end of file
diff --git a/src/shaders/ComputeLightLUT_CS.hlsl b/src/shaders/ComputeLightLUT_CS.hlsl
new file mode 100644
index 0000000..fca70dd
--- /dev/null
+++ b/src/shaders/ComputeLightLUT_CS.hlsl
@@ -0,0 +1,192 @@
+// This code contains NVIDIA Confidential Information and is disclosed
+// under the Mutual Non-Disclosure Agreement.
+//
+// Notice
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
+// expressly authorized by NVIDIA. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+- LIGHTMODE:
+ - LIGHTMODE_OMNI
+ - LIGHTMODE_SPOTLIGHT
+
+- ATTENUATIONMODE:
+ - ATTENUATIONMODE_NONE
+ - ATTENUATIONMODE_POLYNOMIAL
+ - ATTENUATIONMODE_INV_POLYNOMIAL
+
+- COMPUTEPASS:
+ - COMPUTEPASS_CALCULATE
+ - COMPUTEPASS_SUM
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+float4 PackLut(float3 v, float s)
+{
+ return float4(v/s, s);
+}
+
+float3 UnpackLut(float4 v)
+{
+ return v.rgb*v.a;
+}
+
+Texture2D<float4> tPhaseLUT : register(t4);
+RWTexture2D<float4> rwLightLUT_P : register(u0);
+RWTexture2D<float4> rwLightLUT_S1 : register(u1);
+RWTexture2D<float4> rwLightLUT_S2 : register(u2);
+
+// These need to match the values in context_common.h
+static const uint LIGHT_LUT_DEPTH_RESOLUTION = 128;
+static const uint LIGHT_LUT_WDOTV_RESOLUTION = 512;
+
+#if (COMPUTEPASS == COMPUTEPASS_CALCULATE)
+
+static const uint2 BLOCK_SIZE = uint2(32, 8);
+groupshared float3 sAccum_P[BLOCK_SIZE.x*BLOCK_SIZE.y];
+
+#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT)
+groupshared float3 sAccum_S1[BLOCK_SIZE.x*BLOCK_SIZE.y];
+groupshared float3 sAccum_S2[BLOCK_SIZE.x*BLOCK_SIZE.y];
+#endif
+
+[numthreads( BLOCK_SIZE.x, BLOCK_SIZE.y, 1 )]
+void main(uint3 gthreadID : SV_GroupThreadID, uint2 dispatchID : SV_DispatchThreadID, uint2 groupID : SV_GroupID)
+{
+ uint idx = gthreadID.y*BLOCK_SIZE.x + gthreadID.x;
+ float2 coord = float2(dispatchID) / float2(LIGHT_LUT_DEPTH_RESOLUTION-1, LIGHT_LUT_WDOTV_RESOLUTION-1);
+
+ float angle = coord.y * PI;
+ float cos_WV = -cos(angle);
+
+ float3 vW = g_vEyePosition - g_vLightPos;
+ float Wsqr = dot(vW, vW);
+ float W_length = sqrt(Wsqr);
+ float t0 = max(0.0f, W_length-g_fLightZFar);
+ float t_range = g_fLightZFar + W_length - t0;
+ float t = t0 + coord.x*t_range;
+
+ float WdotV = cos_WV*W_length;
+ float Dsqr = max(Wsqr+2*WdotV*t+t*t, 0.0f);
+ float D = sqrt(Dsqr);
+ float cos_phi = (t>0 && D>0) ? (t*t + Dsqr - Wsqr) / (2 * t*D) : cos_WV;
+ float3 extinction = exp(-g_vSigmaExtinction*(D+t));
+ float3 phase_factor = GetPhaseFactor(tPhaseLUT, -cos_phi);
+ float attenuation = AttenuationFunc(D);
+ float3 inscatter = phase_factor*attenuation*extinction;
+
+ // Scale by dT because we are doing quadrature
+ inscatter *= t_range / float(LIGHT_LUT_DEPTH_RESOLUTION);
+
+ inscatter = inscatter / g_vScatterPower;
+ sAccum_P[idx] = inscatter;
+#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT)
+ sAccum_S1[idx] = (D==0) ? 0.0f : inscatter/D;
+ sAccum_S2[idx] = t*sAccum_S1[idx];
+#endif
+
+
+ [unroll]
+ for (uint d=1; d<32; d = d<<1)
+ {
+ if (gthreadID.x >= d)
+ {
+ sAccum_P[idx] += sAccum_P[idx - d];
+#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT)
+ sAccum_S1[idx] += sAccum_S1[idx - d];
+ sAccum_S2[idx] += sAccum_S2[idx - d];
+#endif
+ }
+ }
+
+ static const float LUT_SCALE = 32.0f / 32768.0f;
+ rwLightLUT_P[dispatchID] = PackLut(sAccum_P[idx], LUT_SCALE);
+#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT)
+ float max_t = 2*(t0 + t_range);
+ rwLightLUT_S1[dispatchID] = PackLut(sAccum_S1[idx], LUT_SCALE);
+ rwLightLUT_S2[dispatchID] = PackLut(sAccum_S2[idx], LUT_SCALE*max_t);
+#endif
+}
+
+#elif (COMPUTEPASS == COMPUTEPASS_SUM)
+
+static const uint2 BLOCK_SIZE = uint2(32, 4);
+
+Texture2D<float4> tLightLUT_P : register(t5);
+Texture2D<float4> tLightLUT_S1 : register(t6);
+Texture2D<float4> tLightLUT_S2 : register(t7);
+
+groupshared float3 sOffset[BLOCK_SIZE.y];
+
+[numthreads( BLOCK_SIZE.x, BLOCK_SIZE.y, 1 )]
+void main(uint3 gthreadID : SV_GroupThreadID, uint3 dispatchID : SV_DispatchThreadID, uint2 groupID : SV_GroupID)
+{
+ uint t_offset = 0;
+
+ if (gthreadID.x == 0)
+ {
+ sOffset[gthreadID.y] = float3(0, 0, 0);
+ }
+
+ [unroll]
+ for (uint t = 0; t < LIGHT_LUT_DEPTH_RESOLUTION; t += BLOCK_SIZE.x)
+ {
+ uint2 tc = dispatchID.xy + uint2(t, 0);
+ float4 s = float4(0,0,0,0);
+#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT)
+ if (dispatchID.z == 2)
+ s = tLightLUT_S2[tc];
+ else if (dispatchID.z == 1)
+ s = tLightLUT_S1[tc];
+ else
+ s = tLightLUT_P[tc];
+#else
+ s = tLightLUT_P[tc];
+#endif
+ float3 v = UnpackLut(s) + sOffset[gthreadID.y];
+ if (gthreadID.x == (BLOCK_SIZE.x-1))
+ {
+ sOffset[gthreadID.y] = v;
+ }
+ s.a *= LIGHT_LUT_DEPTH_RESOLUTION/32;
+#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT)
+ if (dispatchID.z == 2)
+ rwLightLUT_S2[tc] = PackLut(v, s.a);
+ else if (dispatchID.z == 1)
+ rwLightLUT_S1[tc] = PackLut(v, s.a);
+ else
+ rwLightLUT_P[tc] = PackLut(v, s.a);
+#else
+ rwLightLUT_P[tc] = PackLut(v, s.a);
+#endif
+ }
+}
+
+#endif \ No newline at end of file
diff --git a/src/shaders/ComputePhaseLookup_PS.hlsl b/src/shaders/ComputePhaseLookup_PS.hlsl
new file mode 100644
index 0000000..7487c40
--- /dev/null
+++ b/src/shaders/ComputePhaseLookup_PS.hlsl
@@ -0,0 +1,150 @@
+// This code contains NVIDIA Confidential Information and is disclosed
+// under the Mutual Non-Disclosure Agreement.
+//
+// Notice
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
+// expressly authorized by NVIDIA. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+// using the phase functions directly isn't correct, because they are supposed to be
+// integrated over the subtended solid angle. This falls apart as sin(theta)
+// approaches 0 (ie. cos(theta) aproaches +1 or -1).
+// We apply a sliding scale to the functions to compensate for this somewhat.
+
+#define NORMALIZE_PHASE_FUNCTIONS 1
+
+float ScatterPhase_Isotropic()
+{
+ return 1.f / (4.f * PI);
+}
+
+float ScatterPhase_Rayleigh(float cosa)
+{
+ float cos_term = cosa*cosa; // ^2
+ float phase_term = (3.f/(16.f*PI)) * (1.f + cos_term);
+#if NORMALIZE_PHASE_FUNCTIONS
+ cos_term *= cos_term; // ^4
+ return phase_term*(1-cos_term/8.f);
+#else
+ return phase_term;
+#endif
+}
+
+float ScatterPhase_HenyeyGreenstein(float cosa, float g)
+{
+#if NORMALIZE_PHASE_FUNCTIONS
+ // "normalized" Henyey-Greenstein
+ float g_sqr = g*g;
+ float num = (1 - abs(g));
+ float denom = sqrt( max(1-2*g*cosa+g_sqr, 0) );
+ float frac = num/denom;
+ float scale = g_sqr + (1 - g_sqr) / (4*PI);
+ return scale * (frac*frac*frac);
+#else
+ // Classic Henyey-Greenstein
+ float k1 = (1.f-g*g);
+ float k2 = (1.f + g*g - 2.f*g*cosa);
+ return (1.f / (4.f*PI)) * k1 / pow(abs(k2), 1.5f);
+#endif
+}
+
+float ScatterPhase_MieHazy(float cosa)
+{
+ float cos_term = 0.5f*(1+cosa);
+ float cos_term_2 = cos_term*cos_term; // ^2
+ float cos_term_4 = cos_term_2*cos_term_2; // ^4
+ float cos_term_8 = cos_term_4*cos_term_4; // ^8
+ float phase_term = (1.f/(4.f*PI))*(0.5f+(9.f/2.f)*cos_term_8);
+#if NORMALIZE_PHASE_FUNCTIONS
+ return phase_term * (1-cos_term_8/2.0f);
+#else
+ return phase_term;
+#endif
+}
+
+float ScatterPhase_MieMurky(float cosa)
+{
+ float cos_term = 0.5f*(1+cosa);
+ float cos_term_2 = cos_term*cos_term; // ^2
+ float cos_term_4 = cos_term_2*cos_term_2; // ^4
+ float cos_term_8 = cos_term_4*cos_term_4; // ^8
+ float cos_term_16 = cos_term_8*cos_term_8; // ^16
+ float cos_term_32 = cos_term_16*cos_term_16; // ^32
+ float phase_term = (1.f/(4.f*PI))*(0.5f+(33.f/2.f)*cos_term_32);
+#if NORMALIZE_PHASE_FUNCTIONS
+ return phase_term * (1-cos_term_32/2.0f);
+#else
+ return phase_term;
+#endif
+}
+
+float4 main(VS_QUAD_OUTPUT input) : SV_TARGET
+{
+ float cos_theta = -cos(PI*input.vTex.y);
+ float3 phase_factor = float3(0,0,0);
+ float3 total_scatter = float3(0,0,0);
+
+ // These must match the PhaseFunctionType enum in NvVolumetricLighting.h
+ static const uint PHASEFUNC_ISOTROPIC = 0;
+ static const uint PHASEFUNC_RAYLEIGH = 1;
+ static const uint PHASEFUNC_HG = 2;
+ static const uint PHASEFUNC_MIEHAZY = 3;
+ static const uint PHASEFUNC_MIEMURKY = 4;
+
+ for (uint i=0; i<g_uNumPhaseTerms; ++i)
+ {
+ float3 term_scatter = g_vPhaseParams[i].rgb;
+ total_scatter += term_scatter;
+ if (g_uPhaseFunc[i] == PHASEFUNC_ISOTROPIC)
+ {
+ phase_factor += term_scatter*ScatterPhase_Isotropic();
+ }
+ else if (g_uPhaseFunc[i] == PHASEFUNC_RAYLEIGH)
+ {
+ phase_factor += term_scatter*ScatterPhase_Rayleigh(cos_theta);
+ }
+ else if (g_uPhaseFunc[i] == PHASEFUNC_HG)
+ {
+ phase_factor += term_scatter*ScatterPhase_HenyeyGreenstein(cos_theta, g_vPhaseParams[i].a);
+ }
+ else if (g_uPhaseFunc[i] == PHASEFUNC_MIEHAZY)
+ {
+ phase_factor += term_scatter*ScatterPhase_MieHazy(cos_theta);
+ }
+ else if (g_uPhaseFunc[i] == PHASEFUNC_MIEMURKY)
+ {
+ phase_factor += term_scatter*ScatterPhase_MieMurky(cos_theta);
+ }
+ }
+ phase_factor = phase_factor / total_scatter;
+ return float4(phase_factor, 1);
+}
diff --git a/src/shaders/Debug_PS.hlsl b/src/shaders/Debug_PS.hlsl
new file mode 100644
index 0000000..a2cd9cf
--- /dev/null
+++ b/src/shaders/Debug_PS.hlsl
@@ -0,0 +1,42 @@
+// This code contains NVIDIA Confidential Information and is disclosed
+// under the Mutual Non-Disclosure Agreement.
+//
+// Notice
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
+// expressly authorized by NVIDIA. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+float4 main(PS_POLYGONAL_INPUT input, bool bIsFrontFace : SV_ISFRONTFACE) : SV_TARGET
+{
+ return bIsFrontFace ? float4(1,0,0,1) : float4(0,1,0,1);
+}
+ \ No newline at end of file
diff --git a/src/shaders/DownsampleDepth_PS.hlsl b/src/shaders/DownsampleDepth_PS.hlsl
new file mode 100644
index 0000000..4f4efdd
--- /dev/null
+++ b/src/shaders/DownsampleDepth_PS.hlsl
@@ -0,0 +1,82 @@
+// This code contains NVIDIA Confidential Information and is disclosed
+// under the Mutual Non-Disclosure Agreement.
+//
+// Notice
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
+// expressly authorized by NVIDIA. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+- SAMPLEMODE:
+ - SAMPLEMODE_SINGLE
+ - SAMPLEMODE_MSAA
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+#if (SAMPLEMODE == SAMPLEMODE_SINGLE)
+Texture2D<float> tDepthMap : register(t0);
+#elif (SAMPLEMODE == SAMPLEMODE_MSAA)
+Texture2DMS<float> tDepthMap : register(t0);
+#endif
+
+uint Unused(uint input)
+{
+ return input;
+}
+
+float main(
+ VS_QUAD_OUTPUT input
+ , uint sampleID : SV_SAMPLEINDEX
+ ) : SV_DEPTH
+{
+ float2 jitter = float2(0.0f, 0.0f);
+ uint2 pixelIdx = uint2(input.vPos.xy);
+ if ( (pixelIdx.x+pixelIdx.y)%2 )
+ {
+ jitter.xy = g_vJitterOffset.xy;
+ }
+ else
+ {
+ jitter.xy = g_vJitterOffset.yx;
+ }
+
+#if defined(__PSSL__)
+ Unused(sampleID);//Fix a compiler warning with pssl.
+ float2 tc = (floor(input.vTex.xy*g_vOutputViewportSize) + GetViVjLinearSample() + jitter)*g_vOutputSize_Inv;
+#else
+ float2 tc = (EvaluateAttributeAtSample(input.vTex.xy, sampleID)*g_vOutputViewportSize + jitter)*g_vOutputSize_Inv;
+#endif
+
+#if (SAMPLEMODE == SAMPLEMODE_SINGLE)
+ return tDepthMap.SampleLevel(sPoint, tc, 0).x;
+#elif (SAMPLEMODE == SAMPLEMODE_MSAA)
+ int2 load_tc = int2(tc*g_vOutputSize);
+ return tDepthMap.Load(load_tc, 0).x;
+#endif
+}
diff --git a/src/shaders/Quad_VS.hlsl b/src/shaders/Quad_VS.hlsl
new file mode 100644
index 0000000..bbb0ae2
--- /dev/null
+++ b/src/shaders/Quad_VS.hlsl
@@ -0,0 +1,46 @@
+// This code contains NVIDIA Confidential Information and is disclosed
+// under the Mutual Non-Disclosure Agreement.
+//
+// Notice
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
+// expressly authorized by NVIDIA. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+VS_QUAD_OUTPUT main(uint id : SV_VERTEXID)
+{
+ VS_QUAD_OUTPUT output;
+ output.vTex = float2((id << 1) & 2, id & 2);
+ output.vPos = float4(output.vTex * float2(2,-2) + float2(-1,1), 1, 1);
+ output.vWorldPos = mul( g_mViewProjInv, output.vPos );
+ output.vWorldPos *= 1.0f / output.vWorldPos.w;
+ return output;
+}
diff --git a/src/shaders/RenderVolume_DS.hlsl b/src/shaders/RenderVolume_DS.hlsl
new file mode 100644
index 0000000..880e9ed
--- /dev/null
+++ b/src/shaders/RenderVolume_DS.hlsl
@@ -0,0 +1,181 @@
+// This code contains NVIDIA Confidential Information and is disclosed
+// under the Mutual Non-Disclosure Agreement.
+//
+// Notice
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
+// expressly authorized by NVIDIA. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+- SHADOWMAPTYPE:
+ - SHADOWMAPTYPE_ATLAS
+ - SHADOWMAPTYPE_ARRAY
+
+- CASCADECOUNT:
+ - CASCADECOUNT_1: 1
+ - CASCADECOUNT_2: 2
+ - CASCADECOUNT_3: 3
+ - CASCADECOUNT_4: 4
+
+- VOLUMETYPE:
+ - VOLUMETYPE_FRUSTUM
+ - VOLUMETYPE_PARABOLOID
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+#define COARSE_CASCADE (CASCADECOUNT-1)
+
+#if (SHADOWMAPTYPE == SHADOWMAPTYPE_ATLAS)
+Texture2D<float> tShadowMap : register(t1);
+#elif (SHADOWMAPTYPE == SHADOWMAPTYPE_ARRAY)
+Texture2DArray<float> tShadowMap : register(t1);
+#endif
+
+float SampleShadowMap(float2 tex_coord, int cascade)
+{
+ float depth_value = 1.0f;
+ float2 lookup_coord = g_vElementOffsetAndScale[cascade].zw * tex_coord + g_vElementOffsetAndScale[cascade].xy;
+#if (SHADOWMAPTYPE == SHADOWMAPTYPE_ATLAS)
+ depth_value = tShadowMap.SampleLevel( sBilinear, lookup_coord, 0).x;
+#elif (SHADOWMAPTYPE == SHADOWMAPTYPE_ARRAY)
+ depth_value = tShadowMap.SampleLevel( sBilinear, float3( lookup_coord, (float)g_uElementIndex[cascade] ), 0).x;
+#endif
+ return depth_value;
+}
+
+float3 ParaboloidProject(float3 P, float zNear, float zFar)
+{
+ float3 outP;
+ float lenP = length(P.xyz);
+ outP.xyz = P.xyz/lenP;
+ outP.x = outP.x / (outP.z + 1);
+ outP.y = outP.y / (outP.z + 1);
+ outP.z = (lenP - zNear) / (zFar - zNear);
+ return outP;
+}
+
+float3 ParaboloidUnproject(float3 P, float zNear, float zFar)
+{
+ // Use a quadratic to find the Z component
+ // then reverse the projection to find the unit vector, and scale
+ float L = P.z*(zFar-zNear) + zNear;
+
+ float qa = P.x*P.x + P.y*P.y + 1;
+ float qb = 2*(P.x*P.x + P.y*P.y);
+ float qc = P.x*P.x + P.y*P.y - 1;
+ float z = (-qb + sqrt(qb*qb - 4*qa*qc)) / (2*qa);
+
+ float3 outP;
+ outP.x = P.x * (z + 1);
+ outP.y = P.y * (z + 1);
+ outP.z = z;
+ return outP*L;
+}
+
+HS_POLYGONAL_CONSTANT_DATA_OUTPUT Unused(HS_POLYGONAL_CONSTANT_DATA_OUTPUT input)
+{
+ return input;
+}
+
+[domain("quad")]
+PS_POLYGONAL_INPUT main( HS_POLYGONAL_CONSTANT_DATA_OUTPUT input, float2 uv : SV_DOMAINLOCATION, const OutputPatch<HS_POLYGONAL_CONTROL_POINT_OUTPUT, 4> Patch )
+{
+ Unused(input);//Fix a compiler warning with pssl.
+
+ PS_POLYGONAL_INPUT output = (PS_POLYGONAL_INPUT)0;
+
+ float3 vClipIn1 = lerp(Patch[0].vClipPos.xyz, Patch[1].vClipPos.xyz, uv.x);
+ float3 vClipIn2 = lerp(Patch[3].vClipPos.xyz, Patch[2].vClipPos.xyz, uv.x);
+ float3 vClipIn = lerp(vClipIn1, vClipIn2, uv.y);
+
+ float4 vPos1 = lerp(Patch[0].vWorldPos, Patch[1].vWorldPos, uv.x);
+ float4 vPos2 = lerp(Patch[3].vWorldPos, Patch[2].vWorldPos, uv.x);
+ float4 vWorldPos = lerp(vPos1, vPos2, uv.y);
+
+ if (VOLUMETYPE == VOLUMETYPE_FRUSTUM)
+ {
+ if (all(abs(vClipIn.xy) < EDGE_FACTOR))
+ {
+ int iCascade = -1;
+ float4 vClipPos = float4(0,0,0,1);
+
+ [unroll]
+ for (int i = COARSE_CASCADE;i >= 0; --i)
+ {
+ // Try to refetch from finer cascade
+ float4 vClipPosCascade = mul( g_mLightProj[i], vWorldPos );
+ vClipPosCascade *= 1.f / vClipPosCascade.w;
+ if (all(abs(vClipPosCascade.xy) < 1.0f))
+ {
+
+ float2 vTex = float2(0.5*vClipPosCascade.x + 0.5, -0.5*vClipPosCascade.y + 0.5);
+ float depthSample = SampleShadowMap(vTex, i);
+ if (depthSample < 1.0f)
+ {
+
+ vClipPos.xy = vClipPosCascade.xy;
+ vClipPos.z = depthSample;
+ iCascade = i;
+ }
+ }
+ }
+
+ if (iCascade >= 0)
+ {
+ vWorldPos = mul( g_mLightProjInv[iCascade], float4(vClipPos.xyz, 1) );
+ vWorldPos *= 1.0f / vWorldPos.w;
+ vWorldPos.xyz = g_vEyePosition + (1.0f-g_fGodrayBias)*(vWorldPos.xyz-g_vEyePosition);
+ }
+ }
+ else
+ {
+ vWorldPos = mul(g_mLightToWorld, float4(vClipIn.xy, 1, 1));
+ vWorldPos *= 1.0f / vWorldPos.w;
+ }
+ }
+ else if (VOLUMETYPE == VOLUMETYPE_PARABOLOID)
+ {
+ vClipIn.xyz = normalize(vClipIn.xyz);
+ float4 shadowPos = mul(g_mLightProj[0], vWorldPos);
+ shadowPos.xyz = shadowPos.xyz/shadowPos.w;
+ uint hemisphereID = (shadowPos.z > 0) ? 0 : 1;
+ shadowPos.z = abs(shadowPos.z);
+ shadowPos.xyz = ParaboloidProject(shadowPos.xyz, g_fLightZNear, g_fLightZFar);
+ float2 shadowTC = float2(0.5f, -0.5f)*shadowPos.xy + 0.5f;
+ float depthSample = SampleShadowMap(shadowTC, hemisphereID);
+ float sceneDepth = depthSample*(g_fLightZFar-g_fLightZNear)+g_fLightZNear;
+ vWorldPos = mul( g_mLightProjInv[0], float4(vClipIn.xyz * sceneDepth, 1));
+ vWorldPos *= 1.0f / vWorldPos.w;
+ }
+
+ // Transform world position with viewprojection matrix
+ output.vWorldPos = vWorldPos;
+ output.vPos = mul( g_mViewProj, output.vWorldPos );
+ return output;
+}
diff --git a/src/shaders/RenderVolume_HS.hlsl b/src/shaders/RenderVolume_HS.hlsl
new file mode 100644
index 0000000..1689e15
--- /dev/null
+++ b/src/shaders/RenderVolume_HS.hlsl
@@ -0,0 +1,182 @@
+// This code contains NVIDIA Confidential Information and is disclosed
+// under the Mutual Non-Disclosure Agreement.
+//
+// Notice
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
+// expressly authorized by NVIDIA. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+- SHADOWMAPTYPE:
+ - SHADOWMAPTYPE_ATLAS
+ - SHADOWMAPTYPE_ARRAY
+
+- CASCADECOUNT:
+ - CASCADECOUNT_1: 1
+ - CASCADECOUNT_2: 2
+ - CASCADECOUNT_3: 3
+ - CASCADECOUNT_4: 4
+
+- VOLUMETYPE:
+ - VOLUMETYPE_FRUSTUM
+ - VOLUMETYPE_PARABOLOID
+
+- MAXTESSFACTOR:
+ - MAXTESSFACTOR_LOW: 16.0f
+ - MAXTESSFACTOR_MEDIUM: 32.0f
+ - MAXTESSFACTOR_HIGH: 64.0f
+%% MUX_END %%
+*/
+
+#define COARSE_CASCADE (CASCADECOUNT-1)
+
+#include "ShaderCommon.h"
+
+float3 NearestPos(float3 vStartPos, float3 vEndPos)
+{
+ float3 vPos = (g_vEyePosition - vStartPos);
+ float3 vLine = (vEndPos - vStartPos);
+ float lineLength = length(vLine);
+ float t = max(0, min(lineLength, dot(vPos, vLine)/lineLength));
+ return vStartPos + (t/lineLength)*vLine;
+}
+
+float CalcTessFactor(float3 vStartPos, float3 vEndPos)
+{
+ float section_size = length(vEndPos - vStartPos);
+ float3 vWorldPos = 0.5f*(vStartPos+vEndPos);
+ float3 vEyeVec = (vWorldPos.xyz - g_vEyePosition);
+ float4 clip_pos = mul( g_mProj, float4(0, 0, length(vEyeVec), 1) );
+ float projected_size = abs(section_size * g_mProj._m11 / clip_pos.w);
+ float desired_splits = (projected_size*g_vOutputViewportSize.y)/(g_fTargetRaySize);
+ return min(MAXTESSFACTOR, max(1, desired_splits));
+}
+
+bool IntersectsFrustum(float4 vPos1, float4 vPos2)
+{
+ return !(vPos1.x > 1.0 && vPos2.x > 1.0 || vPos1.x < -1.0 && vPos2.x < -1.0)
+ || !(vPos1.y > 1.0 && vPos2.y > 1.0 || vPos1.y < -1.0 && vPos2.y < -1.0)
+ || !(vPos1.z < 0.0 && vPos2.z < 0.0);
+}
+
+HS_POLYGONAL_CONSTANT_DATA_OUTPUT HS_POLYGONAL_CONSTANT_FUNC( /*uint PatchID : SV_PRIMITIVEID,*/ const OutputPatch<HS_POLYGONAL_CONTROL_POINT_OUTPUT, 4> opPatch)
+{
+ HS_POLYGONAL_CONSTANT_DATA_OUTPUT output = (HS_POLYGONAL_CONSTANT_DATA_OUTPUT)0;
+
+ bool bIsVisible = false;
+#if 1
+ //Frustum cull
+ [unroll]
+ for (int j=0; j<4; ++j)
+ {
+ float4 vScreenClip = mul(g_mViewProj, opPatch[j].vWorldPos);
+ vScreenClip *= 1.0f / vScreenClip.w;
+ float4 vOriginPos = float4(0,0,0,1);
+ if (VOLUMETYPE == VOLUMETYPE_FRUSTUM)
+ {
+ vOriginPos = mul(g_mLightToWorld, float4(opPatch[j].vClipPos.xy, 0, 1));
+ }
+ else if (VOLUMETYPE == VOLUMETYPE_PARABOLOID)
+ {
+ vOriginPos = float4(g_vLightPos, 1);
+ }
+ float4 vScreenClipOrigin = mul(g_mViewProj, vOriginPos);
+ vScreenClipOrigin *= 1.0f / vScreenClipOrigin.w;
+ bIsVisible = bIsVisible || IntersectsFrustum(vScreenClip, vScreenClipOrigin);
+ }
+#else
+ bIsVisible = true;
+#endif
+
+ if (bIsVisible)
+ {
+ float3 nearest_pos[4];
+ for (int j=0; j < 4; ++j)
+ {
+ float3 start_pos;
+ if (VOLUMETYPE == VOLUMETYPE_FRUSTUM)
+ {
+ float4 p = mul(g_mLightToWorld, float4(opPatch[j].vClipPos.xy, 0, 1));
+ start_pos = p.xyz / p.w;
+ }
+ else if (VOLUMETYPE == VOLUMETYPE_PARABOLOID)
+ start_pos = g_vLightPos;
+ else
+ start_pos = float3(0, 0, 0);
+ nearest_pos[j] = NearestPos(start_pos, opPatch[j].vWorldPos.xyz);
+ }
+
+ float tess_factor[4];
+ [unroll]
+ for (int k=0; k<4; ++k)
+ {
+ float tess_near = CalcTessFactor(nearest_pos[(k+3)%4], nearest_pos[k]);
+ float tess_far = CalcTessFactor(opPatch[(k+3)%4].vWorldPos.xyz, opPatch[k].vWorldPos.xyz);
+ tess_factor[k] = max(tess_near, tess_far);
+ if (VOLUMETYPE == VOLUMETYPE_FRUSTUM)
+ {
+ bool bIsEdge = !(all((abs(opPatch[(k + 3) % 4].vClipPos.xy) < EDGE_FACTOR) || (abs(opPatch[k].vClipPos.xy) < EDGE_FACTOR)));
+ output.fEdges[k] = (bIsEdge) ? 1.0f : tess_factor[k];
+ }
+ else if (VOLUMETYPE == VOLUMETYPE_PARABOLOID)
+ {
+ output.fEdges[k] = tess_factor[k];
+ }
+ else
+ {
+ output.fEdges[k] = 1;
+ }
+
+ }
+ output.fInside[0] = max(tess_factor[1], tess_factor[3]);
+ output.fInside[1] = max(tess_factor[0], tess_factor[2]);
+ }
+ else
+ {
+ output.fEdges[0] = 0;
+ output.fEdges[1] = 0;
+ output.fEdges[2] = 0;
+ output.fEdges[3] = 0;
+ output.fInside[0] = 0;
+ output.fInside[1] = 0;
+ }
+
+ return output;
+}
+
+[domain("quad")]
+[partitioning("integer")]
+[outputtopology("triangle_ccw")]
+[outputcontrolpoints(4)]
+[patchconstantfunc("HS_POLYGONAL_CONSTANT_FUNC")]
+[maxtessfactor(MAXTESSFACTOR)]
+HS_POLYGONAL_CONTROL_POINT_OUTPUT main( InputPatch<HS_POLYGONAL_INPUT, 4> ipPatch, uint uCPID : SV_OUTPUTCONTROLPOINTID )
+{
+ HS_POLYGONAL_CONTROL_POINT_OUTPUT output = (HS_POLYGONAL_CONTROL_POINT_OUTPUT)0;
+ output.vWorldPos = ipPatch[uCPID].vWorldPos;
+ output.vClipPos = ipPatch[uCPID].vClipPos;
+ return output;
+}
diff --git a/src/shaders/RenderVolume_PS.hlsl b/src/shaders/RenderVolume_PS.hlsl
new file mode 100644
index 0000000..f2724c2
--- /dev/null
+++ b/src/shaders/RenderVolume_PS.hlsl
@@ -0,0 +1,403 @@
+// This code contains NVIDIA Confidential Information and is disclosed
+// under the Mutual Non-Disclosure Agreement.
+//
+// Notice
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
+// expressly authorized by NVIDIA. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+%% MUX_BEGIN %%
+# Define the shader permutations for code generation
+
+# Are we operating on single sample or MSAA buffer
+- SAMPLEMODE:
+ - SAMPLEMODE_SINGLE
+ - SAMPLEMODE_MSAA
+
+# What type of light are we rendering
+- LIGHTMODE:
+ - LIGHTMODE_DIRECTIONAL
+ - LIGHTMODE_SPOTLIGHT
+ - LIGHTMODE_OMNI
+
+# What sort of pass are we rendering
+- PASSMODE:
+ - PASSMODE_GEOMETRY
+ - PASSMODE_SKY
+ - PASSMODE_FINAL
+
+# What is our distance attenuation function
+- ATTENUATIONMODE:
+ - ATTENUATIONMODE_NONE
+ - ATTENUATIONMODE_POLYNOMIAL
+ - ATTENUATIONMODE_INV_POLYNOMIAL
+
+# What is our spotlight angular falloff mode
+- FALLOFFMODE:
+ - FALLOFFMODE_NONE
+ - FALLOFFMODE_FIXED
+ - FALLOFFMODE_CUSTOM
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+#if (PASSMODE == PASSMODE_FINAL)
+# if (SAMPLEMODE == SAMPLEMODE_SINGLE)
+
+ Texture2D<float> tSceneDepth : register(t2);
+ float LoadSceneDepth(uint2 pos, uint s)
+ {
+ return tSceneDepth.Load(int3(pos.xy, 0)).x;
+ }
+
+# elif (SAMPLEMODE == SAMPLEMODE_MSAA)
+
+ Texture2DMS<float> tSceneDepth : register(t2);
+ float LoadSceneDepth(uint2 pos, uint s)
+ {
+ return tSceneDepth.Load(int2(pos.xy), s).x;
+ }
+
+# endif
+#else
+
+ float LoadSceneDepth(uint2 pos, uint s)
+ {
+ return 1.0f;
+ }
+
+#endif
+
+Texture2D<float4> tPhaseLUT : register(t4);
+Texture2D<float4> tLightLUT_P : register(t5);
+Texture2D<float4> tLightLUT_S1 : register(t6);
+Texture2D<float4> tLightLUT_S2 : register(t7);
+
+float GetLutCoord_X(float t, float light_dist)
+{
+ float t0 = max(0.0f, light_dist-g_fLightZFar);
+ float t_range = g_fLightZFar + light_dist - t0;
+ return (t-t0) / t_range;
+}
+
+float GetLutCoord_Y(float cos_theta)
+{
+ return acos(-cos_theta) / PI;
+}
+
+float3 SampleLut(Texture2D tex, float2 tc)
+{
+ float4 s = tex.SampleLevel(sBilinear, tc, 0);
+ return s.rgb*s.a;
+}
+////////////////////////////////////////////////////////////////////////////////
+// Integration code
+
+#define INTEGRATE(result, fn, data, step_count, t0, t1) \
+{ \
+ float t_step = (t1-t0)/float(step_count); \
+ float3 sum = float3(0,0,0); \
+ sum += fn(data, t0); \
+ float t = t0+t_step; \
+ [unroll] \
+ for (uint istep=1; istep<step_count-1; istep += 2) \
+ { \
+ sum += 4*fn(data, t); \
+ t += t_step; \
+ sum += 2*fn(data, t); \
+ t += t_step; \
+ } \
+ sum += 4*fn(data, t); \
+ sum += fn(data, t1); \
+ result = (t_step/3.0f) * sum; \
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Directional Light
+
+struct LightEvaluatorData_Directional {
+ float VdotL;
+ float3 sigma;
+};
+
+float3 LightEvaluator_Directional(LightEvaluatorData_Directional data, float t)
+{
+ float3 light_to_world_depth = g_fLightToEyeDepth + t*data.VdotL;
+ return exp(-data.sigma*(t+light_to_world_depth));
+}
+
+float3 Integrate_Directional(float eye_dist, float3 vV, float3 vL)
+{
+ float VdotL = dot(vV, vL);
+ // Manually integrate over interval
+ LightEvaluatorData_Directional evaluator;
+ float3 sigma = g_vSigmaExtinction;
+ evaluator.VdotL = VdotL;
+ const uint STEP_COUNT = 6;
+ float3 integral = float3(0,0,0);
+ INTEGRATE(integral, LightEvaluator_Directional, evaluator, STEP_COUNT, 0, eye_dist);
+ return GetPhaseFactor(tPhaseLUT, -VdotL)*integral*exp(g_fLightToEyeDepth*(evaluator.sigma.r+evaluator.sigma.g+evaluator.sigma.b)/3.f);
+}
+
+float3 Integrate_SimpleDirectional(float eye_dist, float3 vV, float3 vL)
+{
+ // Do basic directional light
+ float VdotL = dot(vV, vL);
+ float3 sigma = g_vSigmaExtinction;
+ return GetPhaseFactor(tPhaseLUT, -VdotL) * (1 - exp(-sigma*eye_dist)) / (sigma);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Spotlight
+
+bool IntersectCone(out float t0, out float t1, float t_max, float cos_theta, float3 vW, float3 vV, float3 vL, float WdotL, float VdotL)
+{
+ float cos_sqr = cos_theta * cos_theta;
+ float sin_sqr = 1 - cos_sqr;
+ float3 v_proj = vV - VdotL*vL;
+ float3 w_proj = vW - WdotL*vL;
+
+ float A = cos_sqr*dot(v_proj, v_proj) - sin_sqr*VdotL*VdotL;
+ float B = 2 * cos_sqr*dot(v_proj, w_proj) - 2 * sin_sqr*VdotL*WdotL;
+ float C = cos_sqr*dot(w_proj, w_proj) - sin_sqr*WdotL*WdotL;
+
+ float det = B*B - 4 * A*C;
+ float denom = 2 * A;
+ if (det < 0.0f || denom == 0.0f)
+ {
+ t0 = 0;
+ t1 = 0;
+ return false;
+ }
+ else
+ {
+ bool hit = true;
+ float root = sqrt(det);
+ t0 = (-B - root) / denom;
+ t1 = (-B + root) / denom;
+
+ float vW_len = length(vW);
+ float WdotL_norm = (vW_len > 0.0f) ? WdotL / vW_len : 1.0f;
+ if (WdotL_norm >= cos_theta)
+ {
+ if (VdotL >= cos_theta)
+ t1 = t_max;
+ t0 = 0;
+ }
+ else if (WdotL_norm <= -cos_theta)
+ {
+ if (t0 < 0 && t1>0)
+ hit = false;
+ t0 = t0;
+ t1 = t_max;
+ }
+ else
+ {
+ if (t0 < 0 && t1 < 0)
+ hit = false;
+ else if (dot(vL, vW + t0*vV) < 0)
+ hit = false;
+ else if (t1<0)
+ t1 = t_max;
+ }
+
+ if (t0 > t_max)
+ {
+ t0 = 0;
+ t1 = 0;
+ hit = false;
+ }
+
+ return hit;
+ }
+}
+
+struct LightEvaluatorData_Spotlight
+{
+ float3 sigma;
+ float light_theta;
+ float light_falloff_power;
+ float Wsqr;
+ float WdotV;
+ float WdotL;
+ float VdotL;
+};
+
+float3 LightEvaluator_Spotlight(LightEvaluatorData_Spotlight data, float t)
+{
+ float Dsqr = max(data.Wsqr+2*data.WdotV*t+t*t, 0.0f);
+ float D = sqrt(Dsqr);
+ float cos_phi = (t>0 && D>0) ? (t*t + Dsqr - data.Wsqr) / (2 * t*D) : 0;
+ float3 phase_factor = GetPhaseFactor(tPhaseLUT, -cos_phi);
+ float distance_attenuation = AttenuationFunc(D);
+ float Dproj = data.WdotL + t*data.VdotL;
+ float cos_alpha = (D>0.0f) ? Dproj/D : 1.0f;
+ float angle_factor = saturate(cos_alpha-data.light_theta)/(1-data.light_theta);
+ const float ANGLE_EPSILON = 0.000001f;
+ float spot_attenuation = (angle_factor > ANGLE_EPSILON) ? pow(abs(angle_factor), data.light_falloff_power) : 0.0f;
+ float3 media_attenuation = exp(-data.sigma*(t+D));
+ return phase_factor*distance_attenuation*spot_attenuation*media_attenuation;
+}
+
+float3 Integrate_Spotlight(float eye_dist, float3 vW, float3 vV, float3 vL)
+{
+ float3 integral = float3(0, 0, 0);
+ float WdotL = dot(vW, vL);
+ float VdotL = dot(vV, vL);
+ float t0=0, t1=1;
+ if (IntersectCone(t0, t1, eye_dist, g_fLightFalloffAngle, vW, vV, vL, WdotL, VdotL))
+ {
+ t1 = min(t1, eye_dist);
+
+ if (FALLOFFMODE == FALLOFFMODE_NONE)
+ {
+ float light_dist = length(vW);
+ float3 vW_norm = vW / light_dist;
+ float2 tc;
+ tc.x = GetLutCoord_X(t1, light_dist);
+ tc.y = GetLutCoord_Y(dot(vW_norm, vV));
+ integral = SampleLut(tLightLUT_P, tc);
+ if (t0 > 0)
+ {
+ tc.x = GetLutCoord_X(t0, light_dist);
+ integral -= SampleLut(tLightLUT_P, tc);
+ }
+ integral *= g_vScatterPower;
+ }
+ else if (FALLOFFMODE == FALLOFFMODE_FIXED)
+ {
+ float light_dist = length(vW);
+ float3 vW_norm = vW / light_dist;
+ float2 tc;
+ tc.x = GetLutCoord_X(t1, light_dist);
+ tc.y = GetLutCoord_Y(dot(vW_norm, vV));
+ integral = WdotL*SampleLut(tLightLUT_S1, tc) + VdotL*SampleLut(tLightLUT_S2, tc) - g_fLightFalloffAngle*SampleLut(tLightLUT_P, tc);
+ if (t0 > 0)
+ {
+ tc.x = GetLutCoord_X(t0, light_dist);
+ integral -= WdotL*SampleLut(tLightLUT_S1, tc) + VdotL*SampleLut(tLightLUT_S2, tc) - g_fLightFalloffAngle*SampleLut(tLightLUT_P, tc);
+ }
+ integral *= g_vScatterPower / (1-g_fLightFalloffAngle);
+ }
+ if (FALLOFFMODE == FALLOFFMODE_CUSTOM)
+ {
+ LightEvaluatorData_Spotlight evaluator;
+ evaluator.sigma = g_vSigmaExtinction;
+ evaluator.light_theta = g_fLightFalloffAngle;
+ evaluator.light_falloff_power = g_fLightFalloffPower;
+ evaluator.Wsqr = dot(vW, vW);
+ evaluator.WdotV = dot(vW, vV);
+ evaluator.WdotL = WdotL;
+ evaluator.VdotL = VdotL;
+ const uint STEP_COUNT = 8;
+ INTEGRATE(integral, LightEvaluator_Spotlight, evaluator, STEP_COUNT, t0, t1);
+ integral *= 6;
+ }
+ }
+ return integral;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Omni
+
+float3 Integrate_Omni(float eye_dist, float3 vW, float3 vV)
+{
+ float light_dist = length(vW);
+ vW = vW / light_dist;
+ float2 tc;
+ tc.x = GetLutCoord_X(eye_dist, light_dist);
+ tc.y = GetLutCoord_Y(dot(vW, vV));
+ return g_vScatterPower*SampleLut(tLightLUT_P, tc);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Shader Entrypoint
+
+float4 main(
+#if (PASSMODE == PASSMODE_FINAL)
+ VS_QUAD_OUTPUT pi
+ , uint sampleID : SV_SAMPLEINDEX
+#else
+ PS_POLYGONAL_INPUT pi
+#endif
+ , bool bIsFrontFace : SV_ISFRONTFACE
+ ) : SV_TARGET
+{
+#if (PASSMODE != PASSMODE_FINAL)
+ uint sampleID = 0;
+#endif
+ float fSign = 0;
+ float4 vWorldPos = float4(0, 0, 0, 1);
+ float eye_dist = 0;
+ float3 vV = float3(0, 0, 0);
+ if (PASSMODE == PASSMODE_GEOMETRY)
+ {
+ fSign = bIsFrontFace ? -1.0f : 1.0f;
+ vWorldPos = pi.vWorldPos;
+ eye_dist = length(vWorldPos.xyz - g_vEyePosition.xyz);
+ vV = (vWorldPos.xyz - g_vEyePosition.xyz) / eye_dist;
+ }
+ else if (PASSMODE == PASSMODE_SKY)
+ {
+ fSign = 1.0f;
+ eye_dist = g_fZFar;
+ vV = normalize(pi.vWorldPos.xyz - g_vEyePosition.xyz);
+ vWorldPos.xyz = g_vEyePosition.xyz + vV * eye_dist;
+ vWorldPos.w = 1;
+ }
+ else if (PASSMODE == PASSMODE_FINAL)
+ {
+ fSign = 1.0f;
+ float fSceneDepth = LoadSceneDepth(pi.vPos.xy, sampleID);
+ float4 vClipPos;
+ vClipPos.xy = float2(2, -2)*g_vViewportSize_Inv*pi.vPos.xy + float2(-1.0f, 1.0f);
+ vClipPos.z = fSceneDepth;
+ vClipPos.w = 1;
+ vWorldPos = mul(g_mViewProjInv, vClipPos);
+ vWorldPos *= 1.0f / vWorldPos.w;
+ eye_dist = length(vWorldPos.xyz - g_vEyePosition.xyz);
+ vV = (vWorldPos.xyz - g_vEyePosition.xyz) / eye_dist;
+ }
+
+ float3 vL = g_vLightDir.xyz;
+
+ float3 integral = float3(0,0,0);
+ if (LIGHTMODE == LIGHTMODE_DIRECTIONAL)
+ {
+ integral = Integrate_SimpleDirectional(eye_dist, vV, vL);
+ }
+ else if (LIGHTMODE == LIGHTMODE_SPOTLIGHT)
+ {
+ float3 vW = g_vEyePosition.xyz - g_vLightPos.xyz;
+ integral = Integrate_Spotlight(eye_dist, vW, vV, vL);
+ }
+ else if (LIGHTMODE == LIGHTMODE_OMNI)
+ {
+ float3 vW = g_vEyePosition.xyz - g_vLightPos.xyz;
+ integral = Integrate_Omni(eye_dist, vW, vV);
+ }
+ return float4(fSign*integral*g_vLightIntensity.rgb, 0);
+}
diff --git a/src/shaders/RenderVolume_VS.hlsl b/src/shaders/RenderVolume_VS.hlsl
new file mode 100644
index 0000000..dc5cdb8
--- /dev/null
+++ b/src/shaders/RenderVolume_VS.hlsl
@@ -0,0 +1,204 @@
+// This code contains NVIDIA Confidential Information and is disclosed
+// under the Mutual Non-Disclosure Agreement.
+//
+// Notice
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
+// expressly authorized by NVIDIA. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+- MESHMODE:
+ - MESHMODE_FRUSTUM_GRID
+ - MESHMODE_FRUSTUM_BASE
+ - MESHMODE_FRUSTUM_CAP
+ - MESHMODE_OMNI_VOLUME
+ - MESHMODE_GEOMETRY
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+// Bypass vertex shader
+HS_POLYGONAL_INPUT main(
+#if (MESHMODE == MESHMODE_GEOMETRY)
+ float4 input_position : POSITION,
+#endif
+ uint id : SV_VERTEXID )
+{
+#if (MESHMODE != MESHMODE_GEOMETRY)
+ float4 input_position = float4(0,0,0,1);
+#endif
+ HS_POLYGONAL_INPUT output;
+ //
+ // Generate the mesh dynamically from the vertex ID
+ //
+ if (MESHMODE == MESHMODE_FRUSTUM_GRID)
+ {
+ const float patch_size = 2.0f / float(g_uMeshResolution);
+ uint patch_idx = id / 4;
+ uint patch_row = patch_idx / g_uMeshResolution;
+ uint patch_col = patch_idx % g_uMeshResolution;
+ output.vClipPos.x = patch_size*patch_col - 1.0f;
+ output.vClipPos.y = patch_size*patch_row - 1.0f;
+
+ uint vtx_idx = id % 4;
+ float2 vtx_offset;
+ if (vtx_idx == 0)
+ {
+ vtx_offset = float2(0, 0);
+ }
+ else if (vtx_idx == 1)
+ {
+ vtx_offset = float2(1, 0);
+ }
+ else if (vtx_idx == 2)
+ {
+ vtx_offset = float2(1, 1);
+ }
+ else // if (vtx_idx == 3)
+ {
+ vtx_offset = float2(0, 1);
+ }
+ output.vClipPos.xy += patch_size * vtx_offset;
+
+ output.vClipPos.z = 1.0f;
+ output.vClipPos.w = 1.0f;
+ }
+ else if (MESHMODE == MESHMODE_FRUSTUM_BASE)
+ {
+ uint vtx_idx = id % 3;
+ output.vClipPos.x = (vtx_idx == 0) ? 1 : -1;
+ output.vClipPos.y = (vtx_idx == 2) ? -1 : 1;
+ output.vClipPos.xy *= (id/3 == 0) ? 1 : -1;
+ output.vClipPos.z = 1.0f;
+ output.vClipPos.w = 1.0f;
+ }
+ else if (MESHMODE == MESHMODE_FRUSTUM_CAP)
+ {
+ uint tris_per_face = g_uMeshResolution+1;
+ uint verts_per_face = 3*tris_per_face;
+ uint face_idx = id / verts_per_face;
+ uint vtx_idx = id % 3;
+ if (face_idx < 4)
+ {
+ // Cap Side
+ const float patch_size = 2.0f / float(g_uMeshResolution);
+ const uint split_point = (g_uMeshResolution+1)/2;
+ float3 v;
+ uint tri_idx = (id%verts_per_face)/3;
+ if (tri_idx < g_uMeshResolution)
+ {
+ if (vtx_idx == 0)
+ v.x = (tri_idx >= split_point) ? 1 : -1;
+ else if (vtx_idx == 1)
+ v.x = patch_size * tri_idx - 1;
+ else // if (vtx_idx == 2)
+ v.x = patch_size * (tri_idx+1) - 1;
+ v.y = (vtx_idx == 0) ? 0 : 1;
+ }
+ else
+ {
+ if (vtx_idx == 1)
+ v.x = patch_size*split_point-1;
+ else
+ v.x = (vtx_idx == 0) ? -1 : 1;
+ v.y = (vtx_idx == 1) ? 1 : 0;
+ }
+ v.z = 1;
+ v.xz *= (face_idx/2 == 0) ? 1 : -1;
+ output.vClipPos.xyz = (face_idx%2 == 0) ? v.zxy : v.xzy*float3(-1,1,1);
+ }
+ else
+ {
+ // Z=0
+ uint tri_idx = (id-4*verts_per_face)/3;
+ output.vClipPos.x = (vtx_idx == 1) ? 1 : -1;
+ output.vClipPos.y = (vtx_idx == 2) ? 1 : -1;
+ output.vClipPos.xy *= (tri_idx == 0) ? 1 : -1;
+ output.vClipPos.z = 0.0f;
+ }
+ output.vClipPos.w = 1.0f;
+ }
+ else if (MESHMODE == MESHMODE_OMNI_VOLUME)
+ {
+ uint verts_per_face = 4*g_uMeshResolution*g_uMeshResolution;
+ uint face_idx = id / verts_per_face;
+ uint face_vert_idx = id % verts_per_face;
+
+ const float patch_size = 2.0f / float(g_uMeshResolution);
+ uint patch_idx = face_vert_idx / 4;
+ uint patch_row = patch_idx / g_uMeshResolution;
+ uint patch_col = patch_idx % g_uMeshResolution;
+
+ float3 P;
+ P.x = patch_size*patch_col - 1.0f;
+ P.y = patch_size*patch_row - 1.0f;
+
+ uint vtx_idx = id % 4;
+ float2 vtx_offset;
+ if (vtx_idx == 0)
+ {
+ vtx_offset = float2(0, 0);
+ }
+ else if (vtx_idx == 1)
+ {
+ vtx_offset = float2(1, 0);
+ }
+ else if (vtx_idx == 2)
+ {
+ vtx_offset = float2(1, 1);
+ }
+ else // if (vtx_idx == 3)
+ {
+ vtx_offset = float2(0, 1);
+ }
+ P.xy += patch_size * vtx_offset;
+ P.z = ((face_idx / 3) == 0) ? 1 : -1;
+ if ((face_idx % 3) == 0)
+ P.yzx = P.xyz * (((face_idx / 3) == 0) ? float3(1,1,1) : float3(-1,1,1));
+ else if ((face_idx % 3) == 1)
+ P.xzy = P.xyz * (((face_idx / 3) == 1) ? float3(1,1,1) : float3(-1,1,1));
+ else //if ((face_idx % 3) == 2)
+ P.xyz = P.xyz * (((face_idx / 3) == 0) ? float3(1,1,1) : float3(-1,1,1));
+ output.vClipPos = float4(normalize(P.xyz), 1);
+ }
+ else
+ {
+ output.vClipPos = input_position;
+
+ }
+
+ if (MESHMODE == MESHMODE_OMNI_VOLUME)
+ {
+ output.vWorldPos = mul(g_mLightToWorld, float4(g_fLightZFar*output.vClipPos.xyz, 1));
+ }
+ else
+ {
+ output.vWorldPos = mul(g_mLightToWorld, output.vClipPos);
+ }
+ output.vWorldPos = output.vWorldPos / output.vWorldPos.w;
+ output.vPos = mul(g_mViewProj, output.vWorldPos);
+ return output;
+}
diff --git a/src/shaders/Resolve_PS.hlsl b/src/shaders/Resolve_PS.hlsl
new file mode 100644
index 0000000..72c07f9
--- /dev/null
+++ b/src/shaders/Resolve_PS.hlsl
@@ -0,0 +1,179 @@
+// This code contains NVIDIA Confidential Information and is disclosed
+// under the Mutual Non-Disclosure Agreement.
+//
+// Notice
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
+// expressly authorized by NVIDIA. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+- SAMPLEMODE:
+ - SAMPLEMODE_SINGLE
+ - SAMPLEMODE_MSAA
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+struct RESOLVE_OUTPUT
+{
+ float3 color : SV_TARGET0;
+ float2 depth : SV_TARGET1;
+};
+
+#if (SAMPLEMODE == SAMPLEMODE_MSAA)
+Texture2DMS<float4> tGodraysBuffer : register(t0);
+Texture2DMS<float> tGodraysDepth : register(t1);
+#elif (SAMPLEMODE == SAMPLEMODE_SINGLE)
+Texture2D<float4> tGodraysBuffer : register(t0);
+Texture2D<float> tGodraysDepth : register(t1);
+#endif
+
+#if (defined(__PSSL__) && (SAMPLEMODE == SAMPLEMODE_MSAA))
+Texture2D<int2> tFMask_color : register(t2);
+#endif
+
+#if defined(__PSSL__)
+static const int FMASK_UNKNOWN = 1 << 3; // color "unknown" is always represented as high bit in the 4bit fragment index
+
+int2 getFmask(Texture2D <int2> tex, int sample_count, int2 coord)
+{
+ // if 8 or less coverage samples, only load one VGPR (32bits / 4bits per sample)
+ // if more than 8 coverage samples, we need to load 2 VGPRs
+ int2 fmask;
+ if (sample_count <= 8)
+ {
+ fmask.x = tex.Load(int3(coord, 0)).x;
+ fmask.y = 0x88888888; // all invalid -- though in theory we shouldn't need to refer to them at all.
+ }
+ else
+ {
+ fmask.xy = tex.Load(int3(coord, 0)).xy;
+ }
+ return fmask;
+}
+
+int getFptr(int index, int2 fmask)
+{
+ const int bitShift = 4; // fmask load always returns a 4bit fragment index (fptr) per coverage sample, regardless of actual number of fragments.
+ const int mask = (1 << bitShift) - 1;
+ if (index < 8)
+ return (fmask.x >> (index*bitShift)) & mask;
+ else
+ return (fmask.y >> ((index-8)*bitShift)) & mask;
+}
+#endif
+
+RESOLVE_OUTPUT main(VS_QUAD_OUTPUT input)
+{
+ float3 result_color = 0.0f;
+ float result_depth = 0.0f;
+ float result_depth_sqr = 0.0f;
+
+#if (SAMPLEMODE == SAMPLEMODE_MSAA)
+ uint2 buffer_size;
+ uint buffer_samples;
+ tGodraysBuffer.GetDimensions(buffer_size.x, buffer_size.y, buffer_samples);
+#elif (SAMPLEMODE == SAMPLEMODE_SINGLE)
+ uint buffer_samples = 1;
+#endif
+
+ int2 base_tc = int2(input.vTex * g_vViewportSize);
+ const float FILTER_SCALE = 1.0f;
+ const int KERNEL_WIDTH = 1;
+ float total_weight = 0.0f;
+ [unroll]
+ for (int ox=-KERNEL_WIDTH; ox<=KERNEL_WIDTH; ++ox)
+ {
+ if ((base_tc.x + ox) < 0 || (base_tc.x + ox) >= g_vViewportSize.x) continue;
+
+ [unroll]
+ for (int oy=-KERNEL_WIDTH; oy<=KERNEL_WIDTH; ++oy)
+ {
+ if ((base_tc.y + oy) < 0 || (base_tc.y + oy) >= g_vViewportSize.y) continue;
+
+ int2 offset = int2(ox, oy);
+ int2 tc = base_tc + offset;
+
+#if (defined(__PSSL__) && (SAMPLEMODE == SAMPLEMODE_MSAA))
+ int2 fmask = getFmask(tFMask_color, buffer_samples, tc);
+#endif
+
+#if (SAMPLEMODE == SAMPLEMODE_MSAA)
+ for (uint s=0; s<buffer_samples; ++s)
+ {
+ float2 so = offset + tGodraysBuffer.GetSamplePosition(s);
+#elif (SAMPLEMODE == SAMPLEMODE_SINGLE)
+ {
+ float2 so = offset;
+#endif
+ bool is_valid_sample = false;
+#if (SAMPLEMODE == SAMPLEMODE_MSAA)
+# if defined(__PSSL__)
+ float3 sample_value = float3(0,0,0);
+ float sample_depth = 0.0f;
+ int fptr = getFptr(s, fmask);
+ if (fptr != FMASK_UNKNOWN)
+ {
+ sample_value = tGodraysBuffer.Load(tc, fptr).rgb;
+ sample_depth = tGodraysDepth.Load( tc, fptr ).r;
+ is_valid_sample = true;
+ }
+# else // !defined(__PSSL__)
+ is_valid_sample = true;
+ float3 sample_value = tGodraysBuffer.Load( tc, s ).rgb;
+ float sample_depth = tGodraysDepth.Load( tc, s ).r;
+# endif
+#elif (SAMPLEMODE == SAMPLEMODE_SINGLE)
+ is_valid_sample = true;
+ float3 sample_value = tGodraysBuffer.Load( int3(tc, 0) ).rgb;
+ float sample_depth = tGodraysDepth.Load( int3(tc, 0) ).r;
+#endif
+ sample_depth = LinearizeDepth(sample_depth, g_fZNear, g_fZFar);
+ if (!all(isfinite(sample_value)))
+ {
+ is_valid_sample = false;
+ }
+
+ if (is_valid_sample)
+ {
+ so *= g_fResMultiplier;
+ float weight = GaussianApprox(so, FILTER_SCALE);
+ result_color += weight * sample_value;
+ result_depth += weight * sample_depth;
+ result_depth_sqr += weight * sample_depth*sample_depth;
+ total_weight += weight;
+ }
+ }
+ }
+ }
+
+ RESOLVE_OUTPUT output;
+ output.color = (total_weight > 0.0f) ? result_color/total_weight : float3(0.f, 0.f, 0.f);
+ output.depth = (total_weight > 0.0f) ? float2(result_depth, result_depth_sqr)/total_weight : 1.0f;
+ return output;
+}
diff --git a/src/shaders/ShaderCommon.h b/src/shaders/ShaderCommon.h
new file mode 100644
index 0000000..f4b8f80
--- /dev/null
+++ b/src/shaders/ShaderCommon.h
@@ -0,0 +1,265 @@
+// This code contains NVIDIA Confidential Information and is disclosed
+// under the Mutual Non-Disclosure Agreement.
+//
+// Notice
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
+// expressly authorized by NVIDIA. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (C) 2013, NVIDIA Corporation. All rights reserved.
+
+/*===========================================================================
+Constants
+===========================================================================*/
+
+static const float PI = 3.1415926535898f;
+static const float EDGE_FACTOR = 1.0f - (2.0f/64.0f) * (1.0f/64.0f);
+static const uint MAX_PHASE_TERMS = 4;
+
+#ifdef __PSSL__
+static const float2 SAMPLE_POSITIONS[] = {
+ // 1x
+ float2( 0, 0)/16.f,
+ // 2x
+ float2(-4, 4)/16.f,
+ float2( 4,-4)/16.f,
+ // 4x
+ float2(-6, 6)/16.f,
+ float2( 6,-6)/16.f,
+ float2(-2,-2)/16.f,
+ float2( 2, 2)/16.f,
+ // 8x
+ float2(-7,-3)/16.f,
+ float2( 7, 3)/16.f,
+ float2( 1,-5)/16.f,
+ float2(-5, 5)/16.f,
+ float2(-3,-7)/16.f,
+ float2( 3, 7)/16.f,
+ float2( 5,-1)/16.f,
+ float2(-1, 1)/16.f
+};
+
+// constant buffers
+#define cbuffer ConstantBuffer
+
+// textures and samplers
+#define Texture2DMS MS_Texture2D
+#define Texture2DArray Texture2D_Array
+#define SampleLevel SampleLOD
+#define GetSamplePosition(s) GetSamplePoint(s)
+
+// semantics
+#define SV_DEPTH S_DEPTH_OUTPUT
+#define SV_DOMAINLOCATION S_DOMAIN_LOCATION
+#define SV_INSIDETESSFACTOR S_INSIDE_TESS_FACTOR
+#define SV_INSTANCEID S_INSTANCE_ID
+#define SV_ISFRONTFACE S_FRONT_FACE
+#define SV_OUTPUTCONTROLPOINTID S_OUTPUT_CONTROL_POINT_ID
+#define SV_POSITION S_POSITION
+#define SV_POSITION S_POSITION
+#define SV_PRIMITIVEID S_PRIMITIVE_ID
+#define SV_SAMPLEINDEX S_SAMPLE_INDEX
+#define SV_TARGET S_TARGET_OUTPUT
+#define SV_TARGET0 S_TARGET_OUTPUT0
+#define SV_TARGET1 S_TARGET_OUTPUT1
+#define SV_TESSFACTOR S_EDGE_TESS_FACTOR
+#define SV_VERTEXID S_VERTEX_ID
+
+// hull and domain shader properties
+#define domain DOMAIN_PATCH_TYPE
+#define partitioning PARTITIONING_TYPE
+#define outputtopology OUTPUT_TOPOLOGY_TYPE
+#define outputcontrolpoints OUTPUT_CONTROL_POINTS
+#define patchconstantfunc PATCH_CONSTANT_FUNC
+#define maxtessfactor MAX_TESS_FACTOR
+
+// need to figure out how to deal with those exactly:
+#define shared
+#endif
+
+/*===========================================================================
+Sampler states
+===========================================================================*/
+SamplerState sPoint : register(s0);
+SamplerState sBilinear : register(s1);
+
+/*===========================================================================
+Constant buffers
+===========================================================================*/
+shared cbuffer cbContext : register(b0)
+{
+ float2 g_vOutputSize : packoffset(c0);
+ float2 g_vOutputSize_Inv : packoffset(c0.z);
+ float2 g_vBufferSize : packoffset(c1);
+ float2 g_vBufferSize_Inv : packoffset(c1.z);
+ float g_fResMultiplier : packoffset(c2);
+ unsigned int g_uBufferSamples : packoffset(c2.y);
+}
+
+shared cbuffer cbFrame : register(b1)
+{
+ column_major float4x4 g_mProj : packoffset(c0);
+ column_major float4x4 g_mViewProj : packoffset(c4);
+ column_major float4x4 g_mViewProjInv: packoffset(c8);
+ float2 g_vOutputViewportSize : packoffset(c12);
+ float2 g_vOutputViewportSize_Inv : packoffset(c12.z);
+ float2 g_vViewportSize : packoffset(c13);
+ float2 g_vViewportSize_Inv : packoffset(c13.z);
+ float3 g_vEyePosition : packoffset(c14);
+ float2 g_vJitterOffset : packoffset(c15);
+ float g_fZNear : packoffset(c15.z);
+ float g_fZFar : packoffset(c15.w);
+ float3 g_vScatterPower : packoffset(c16);
+ unsigned int g_uNumPhaseTerms : packoffset(c16.w);
+ float3 g_vSigmaExtinction : packoffset(c17);
+ unsigned int g_uPhaseFunc[4] : packoffset(c18);
+ float4 g_vPhaseParams[4] : packoffset(c22);
+};
+
+shared cbuffer cbVolume : register(b2)
+{
+ column_major float4x4 g_mLightToWorld : packoffset(c0);
+ float g_fLightFalloffAngle : packoffset(c4.x);
+ float g_fLightFalloffPower : packoffset(c4.y);
+ float g_fGridSectionSize : packoffset(c4.z);
+ float g_fLightToEyeDepth : packoffset(c4.w);
+ float g_fLightZNear : packoffset(c5);
+ float g_fLightZFar : packoffset(c5.y);
+ float4 g_vLightAttenuationFactors : packoffset(c6);
+ column_major float4x4 g_mLightProj[4] : packoffset(c7);
+ column_major float4x4 g_mLightProjInv[4]: packoffset(c23);
+ float3 g_vLightDir : packoffset(c39);
+ float g_fGodrayBias : packoffset(c39.w);
+ float3 g_vLightPos : packoffset(c40);
+ unsigned int g_uMeshResolution : packoffset(c40.w);
+ float3 g_vLightIntensity : packoffset(c41);
+ float g_fTargetRaySize : packoffset(c41.w);
+ float4 g_vElementOffsetAndScale[4] : packoffset(c42);
+ float4 g_vShadowMapDim : packoffset(c46);
+ unsigned int g_uElementIndex[4] : packoffset(c47);
+};
+
+shared cbuffer cbApply : register(b3)
+{
+ column_major float4x4 g_mHistoryXform : packoffset(c0);
+ float g_fFilterThreshold : packoffset(c4);
+ float g_fHistoryFactor : packoffset(c4.y);
+ float3 g_vFogLight : packoffset(c5);
+ float g_fMultiScattering : packoffset(c5.w);
+};
+
+/*===========================================================================
+Shader inputs
+===========================================================================*/
+struct VS_POLYGONAL_INPUT
+{
+ float4 vPos : POSITION;
+};
+
+struct HS_POLYGONAL_INPUT
+{
+ float4 vPos : SV_POSITION;
+ float4 vWorldPos : TEXCOORD0;
+ float4 vClipPos : TEXCOORD1;
+};
+
+struct HS_POLYGONAL_CONTROL_POINT_OUTPUT
+{
+ float4 vWorldPos : TEXCOORD0;
+ float4 vClipPos : TEXCOORD1;
+};
+
+struct HS_POLYGONAL_CONSTANT_DATA_OUTPUT
+{
+ float fEdges[4] : SV_TESSFACTOR;
+ float fInside[2] : SV_INSIDETESSFACTOR;
+ float debug[4] : TEXCOORD2;
+};
+
+struct PS_POLYGONAL_INPUT
+{
+ float4 vPos : SV_POSITION;
+ float4 vWorldPos : TEXCOORD0;
+#ifdef __PSSL__
+ float dummy : CLIPPPOSDUMMY; //Workaround for compiler exception in polygon hull shaders.
+#endif
+};
+
+struct VS_QUAD_OUTPUT
+{
+ float4 vPos : SV_POSITION;
+ sample float4 vWorldPos : TEXCOORD0;
+ sample float2 vTex : TEXCOORD1;
+};
+
+/*===========================================================================
+Common functions
+===========================================================================*/
+
+float LinearizeDepth(float d, float zn, float zf)
+{
+ return d * zn / (zf - ((zf - zn) * d));
+}
+
+float WarpDepth(float z, float zn, float zf)
+{
+ return z * (1+zf/zn) / (1+z*zf/zn);
+}
+
+float MapDepth(float d, float zn, float zf)
+{
+ return (d - zn) / (zf - zn);
+}
+
+// Approximates a non-normalized gaussian with Sigma == 1
+float GaussianApprox(float2 sample_pos, float width)
+{
+ float x_sqr = sample_pos.x*sample_pos.x + sample_pos.y*sample_pos.y;
+ // exp(-0.5*(x/w)^2) ~ (1-(x/(8*w))^2)^32
+ float w = saturate(1.0f - x_sqr/(64.0f * width*width));
+ w = w*w; // ^2
+ w = w*w; // ^4
+ w = w*w; // ^8
+ w = w*w; // ^16
+ w = w*w; // ^32
+ return w;
+}
+
+#if defined(ATTENUATIONMODE)
+float AttenuationFunc(float d)
+{
+ if (ATTENUATIONMODE == ATTENUATIONMODE_POLYNOMIAL)
+ {
+ // 1-(A+Bx+Cx^2)
+ return saturate(1.0f - (g_vLightAttenuationFactors.x + g_vLightAttenuationFactors.y*d + g_vLightAttenuationFactors.z*d*d));
+ }
+ else if (ATTENUATIONMODE == ATTENUATIONMODE_INV_POLYNOMIAL)
+ {
+ // 1 / (A+Bx+Cx^2) + D
+ return saturate(1.0f / (g_vLightAttenuationFactors.x + g_vLightAttenuationFactors.y*d + g_vLightAttenuationFactors.z*d*d) + g_vLightAttenuationFactors.w);
+ }
+ else //if (ATTENUATIONMODE == ATTENUATIONMODE_NONE)
+ {
+ return 1.0f;
+ }
+}
+#endif
+
+float3 GetPhaseFactor(Texture2D tex, float cos_theta)
+{
+ float2 tc;
+ tc.x = 0;
+ tc.y = acos(clamp(-cos_theta, -1.0f, 1.0f)) / PI;
+ return g_vScatterPower*tex.SampleLevel(sBilinear, tc, 0).rgb;
+}
diff --git a/src/shaders/TemporalFilter_PS.hlsl b/src/shaders/TemporalFilter_PS.hlsl
new file mode 100644
index 0000000..082e577
--- /dev/null
+++ b/src/shaders/TemporalFilter_PS.hlsl
@@ -0,0 +1,207 @@
+// This code contains NVIDIA Confidential Information and is disclosed
+// under the Mutual Non-Disclosure Agreement.
+//
+// Notice
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
+// expressly authorized by NVIDIA. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+Texture2D<float4> tCurrBuffer : register(t0);
+Texture2D<float4> tLastBuffer : register(t1);
+Texture2D<float2> tCurrDepth : register(t2);
+Texture2D<float2> tLastDepth : register(t3);
+
+static const int2 NEIGHBOR_OFFSETS[] = {
+ int2(-1, -1), int2( 0, -1), int2( 1, -1),
+ int2(-1, 0), int2( 0, 0), int2( 1, 0),
+ int2(-1, 1), int2( 0, 1), int2( 1, 1)
+};
+
+#if 1
+static const float NEIGHBOR_WEIGHTS[] = {
+ 0.015625f, 0.125000f, 0.015625f,
+ 0.125000f, 1.000000f, 0.125000f,
+ 0.015625f, 0.125000f, 0.015625f,
+};
+#else
+static const float NEIGHBOR_WEIGHTS[] = {
+ 0, 0, 0,
+ 0, 1, 0,
+ 0, 0, 0,
+};
+#endif
+
+float RGB_to_Y (float3 input)
+{
+ return 0.50f*input.g + 0.25f*(input.r + input.b);
+}
+
+float3 RGB_to_YCoCg (float3 input)
+{
+ float3 ret;
+ float tmp = 0.25f*(input.r + input.b);
+ ret.x = 0.50f*input.g + tmp;
+ ret.y = 0.50f*(input.r - input.b);
+ ret.z = 0.50f*input.g - tmp;
+ return ret;
+}
+
+float3 YCoCg_to_RGB(float3 input)
+{
+ float3 ret;
+ float Y_val = input.x; float Co = input.y; float Cg = input.z;
+ float tmp = Y_val - Cg;
+ ret.r = tmp + Co;
+ ret.g = Y_val + Cg;
+ ret.b = tmp - Co;
+ return ret;
+}
+
+float3 Tonemap( float3 sample_rgb )
+{
+ sample_rgb = sample_rgb / (1 + sample_rgb);
+ return RGB_to_YCoCg(sample_rgb);
+}
+
+float3 Tonemap_Inv( float3 sample_YCoCg )
+{
+ float3 sample_rgb = YCoCg_to_RGB(sample_YCoCg);
+ return sample_rgb / (1 - sample_rgb);
+}
+
+struct FILTER_OUTPUT
+{
+ float3 color : SV_TARGET0;
+ float2 depth : SV_TARGET1;
+};
+
+FILTER_OUTPUT main(VS_QUAD_OUTPUT input)
+{
+ FILTER_OUTPUT output;
+
+ // load neighbors
+ float3 curr_sample = float3(0,0,0);
+ float2 curr_depth = float2(0,0);
+ float neighborhood_bounds_max = 0;
+ float neighborhood_bounds_min = 0;
+ int2 max_dimensions = int2(g_vViewportSize);
+ int2 base_tc = int2(floor(input.vTex.xy*max_dimensions));
+ float total_weight = -1.0f;
+
+ [unroll]
+ for (int n=0; n<9; ++n)
+ {
+ int2 sample_tc = max( int2(0,0), min(max_dimensions, base_tc + NEIGHBOR_OFFSETS[n]));
+ float3 neighbor_sample = max(float3(0,0,0), tCurrBuffer.Load(int3(sample_tc, 0)).rgb);
+ float2 neighbor_depth = tCurrDepth.Load(int3(sample_tc, 0)).rg;
+ bool is_valid = all(isfinite(neighbor_sample.xyz));
+ if (is_valid)
+ {
+ neighbor_sample = Tonemap(neighbor_sample);
+ float weight = NEIGHBOR_WEIGHTS[n];
+ curr_sample += weight*neighbor_sample;
+ curr_depth += weight*neighbor_depth;
+ if (total_weight <= 0.0f)
+ {
+ neighborhood_bounds_max = neighbor_sample.x;
+ neighborhood_bounds_min = neighbor_sample.x;
+ total_weight = weight;
+ }
+ else
+ {
+ neighborhood_bounds_max = max(neighborhood_bounds_max, neighbor_sample.x);
+ neighborhood_bounds_min = min(neighborhood_bounds_min, neighbor_sample.x);
+ total_weight += weight;
+ }
+ }
+ }
+ curr_sample = (total_weight > 0) ? curr_sample/total_weight : float3(0,0,0);
+ curr_depth = (total_weight > 0) ? curr_depth/total_weight : float2(1, 1);
+
+ // Transform and apply history
+ const float MAX_HISTORY_FACTOR = 0.98f;
+ float history_factor = g_fHistoryFactor;
+
+ float4 curr_clip;
+ curr_clip.xy = float2(2, -2) * input.vTex.xy + float2(-1, 1);
+ curr_clip.z = WarpDepth(curr_depth.x, g_fZNear, g_fZFar);
+ curr_clip.w = 1;
+ float4 last_clip = mul( g_mHistoryXform, curr_clip );
+ last_clip = last_clip/last_clip.w;
+
+ float2 last_tc = saturate((float2(0.5f, -0.5f)*last_clip.xy+float2(0.5f, 0.5f))) * max_dimensions;
+ float3 last_sample = tLastBuffer.Load(int3(last_tc, 0)).rgb;
+ float2 last_depth = tLastDepth.Load(int3(last_tc, 0)).rg;
+ last_sample = all(isfinite(last_sample)) ? Tonemap(last_sample) : curr_sample;
+
+ history_factor = all(abs(last_clip.xy) <= 1.0f) ? history_factor : 0.0f;
+
+ float2 clip_diff = (last_clip.xy - curr_clip.xy) * g_vViewportSize * g_vViewportSize_Inv.xx;
+ float clip_dist = length(clip_diff);
+ float movement_factor = saturate(1.0f - clip_dist/g_fFilterThreshold);
+ history_factor *= movement_factor*movement_factor*movement_factor;
+
+ float depth_diff = abs(curr_depth.r-last_depth.r);
+ float local_variance = abs(curr_depth.g - curr_depth.r*curr_depth.r) + abs(last_depth.g - last_depth.r*last_depth.r);
+ local_variance = max(local_variance, 0.0001f);
+#if 0
+ float local_stddev = sqrt(local_variance);
+ float depth_factor = saturate(depth_diff-local_stddev);
+ depth_factor = local_stddev / (local_stddev + depth_factor);
+#else
+ float depth_factor = saturate(depth_diff-local_variance);
+ depth_factor = local_variance / (local_variance + depth_factor);
+#endif
+ history_factor *= depth_factor;
+
+ // threshold based on neighbors
+ // Convert to Y Co Cg, then clip to bounds of neighborhood
+ float3 blended_sample = curr_sample;
+ float2 blended_depth = curr_depth;
+ if (history_factor > 0.0f)
+ {
+ const float CLIP_EPSILON = 0.0001f;
+ float3 clip_vec = last_sample - curr_sample;
+ float clamped_Y = max(neighborhood_bounds_min, min(neighborhood_bounds_max, last_sample.x));
+ float clip_factor_Y = (abs(clip_vec.x) > CLIP_EPSILON) ? abs((clamped_Y-curr_sample.x) / clip_vec.x) : 1.0f;
+ float clip_factor = clip_factor_Y;
+ float3 clipped_history = curr_sample + clip_factor*clip_vec;
+
+ history_factor = min(history_factor, MAX_HISTORY_FACTOR);
+ blended_sample = lerp(curr_sample, clipped_history, history_factor);
+ blended_depth = lerp(curr_depth, last_depth, history_factor);
+ }
+
+ output.color = Tonemap_Inv(blended_sample);
+ output.depth = blended_depth;
+ return output;
+}