13 files changed, 2309 insertions, 0 deletions
diff --git a/src/shaders/Apply_PS.hlsl b/src/shaders/Apply_PS.hlsl
new file mode 100644
index 0000000..0e19e46
--- /dev/null
+++ b/src/shaders/Apply_PS.hlsl
@@ -0,0 +1,176 @@
+// This code contains NVIDIA Confidential Information and is disclosed 
+// under the Mutual Non-Disclosure Agreement. 
+// 
+// Notice 
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES 
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO 
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, 
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. 
+// 
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such 
+// information or for any infringement of patents or other rights of third parties that may 
+// result from its use. No license is granted by implication or otherwise under any patent 
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless 
+// expressly authorized by NVIDIA.  Details are subject to change without notice. 
+// This code supersedes and replaces all information previously supplied. 
+// NVIDIA Corporation products are not authorized for use as critical 
+// components in life support devices or systems without express written approval of 
+// NVIDIA Corporation. 
+// 
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+- SAMPLEMODE:
+    - SAMPLEMODE_SINGLE
+    - SAMPLEMODE_MSAA
+
+- UPSAMPLEMODE:
+	- UPSAMPLEMODE_POINT
+	- UPSAMPLEMODE_BILINEAR
+	- UPSAMPLEMODE_BILATERAL
+
+- FOGMODE:
+	- FOGMODE_NONE
+	- FOGMODE_NOSKY
+	- FOGMODE_FULL
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+Texture2D<float4> tGodraysBuffer : register(t0);
+#if (SAMPLEMODE == SAMPLEMODE_MSAA)
+    Texture2DMS<float> tSceneDepth : register(t1);
+#elif (SAMPLEMODE == SAMPLEMODE_SINGLE)
+    Texture2D<float> tSceneDepth : register(t1);
+#endif
+Texture2D<float2> tGodraysDepth : register(t2);
+Texture2D<float4> tPhaseLUT : register(t4);
+
+struct PS_APPLY_OUTPUT
+{
+	float4 inscatter : SV_TARGET0;
+	float4 transmission : SV_TARGET1;
+};
+
+float3 Tonemap(float3 s)
+{
+	return s / (float3(1,1,1) + s);
+}
+
+float3 Tonemap_Inv(float3 s)
+{
+	return s / (float3(1,1,1) - s);
+}
+
+
+float CalcVariance(float x, float x_sqr)
+{
+	return abs(x_sqr - x*x);
+}
+
+PS_APPLY_OUTPUT main(VS_QUAD_OUTPUT input
+#if (SAMPLEMODE == SAMPLEMODE_MSAA)
+					, uint sampleID : SV_SAMPLEINDEX
+#endif
+					)
+{
+	PS_APPLY_OUTPUT output;	
+	output.transmission = float4(1,1,1,1);
+	output.inscatter = float4(0,0,0,1);
+
+	float2 texcoord = input.vTex * g_vViewportSize * g_vBufferSize_Inv;
+
+#if (SAMPLEMODE == SAMPLEMODE_MSAA)
+	float scene_depth = tSceneDepth.Load(int2(input.vTex*g_vOutputViewportSize), sampleID).x;
+#elif (SAMPLEMODE == SAMPLEMODE_SINGLE)
+	float scene_depth = tSceneDepth.SampleLevel(sPoint, input.vTex * g_vViewportSize * g_vBufferSize_Inv, 0).x;
+#endif
+	scene_depth = LinearizeDepth(scene_depth, g_fZNear, g_fZFar);
+
+
+
+	// Quality of the upsampling interpolator
+	// 0: Point (no up-sample)
+	// 1: Bilinear
+	// 2: Bilateral
+	float3 inscatter_sample = float3(0,0,0);
+	if (UPSAMPLEMODE == UPSAMPLEMODE_POINT)
+	{
+		inscatter_sample = tGodraysBuffer.SampleLevel( sPoint, texcoord, 0).rgb;
+	}
+	else if (UPSAMPLEMODE == UPSAMPLEMODE_BILINEAR)
+	{
+		inscatter_sample = tGodraysBuffer.SampleLevel( sBilinear, texcoord, 0).rgb;
+	}
+	else if (UPSAMPLEMODE == UPSAMPLEMODE_BILATERAL)
+	{
+		const float2 NEIGHBOR_OFFSETS[] = {
+			float2(-1, -1),	float2( 0, -1),	float2( 1, -1),
+			float2(-1,  0),	float2( 0,  0),	float2( 1,  0),
+			float2(-1,  1),	float2( 0,  1),	float2( 1,  1)
+		};
+		const float GAUSSIAN_WIDTH = 1.0f;
+
+		float2 max_dimensions = floor(g_vViewportSize);
+		float2 base_tc = input.vTex * max_dimensions;
+
+		float total_weight = 0;
+		[unroll]
+		for (int n=0; n<9; ++n)
+		{
+			float2 sample_tc = max( float2(0,0), min(max_dimensions, base_tc + NEIGHBOR_OFFSETS[n]));
+
+			float weight = 0.0f;		
+			float2 sample_location = floor(sample_tc) + float2(0.5f, 0.5f);
+			weight = GaussianApprox(sample_location - base_tc, GAUSSIAN_WIDTH);
+
+			const float DEPTH_RANGE = 0.10f;
+
+			float2 neighbor_depth = tGodraysDepth.Load(int3(sample_location.xy, 0)).rg;
+			float depth_diff = abs(scene_depth - neighbor_depth.r);
+			float neighbor_variance = CalcVariance(neighbor_depth.r, neighbor_depth.g);
+			float neighbor_stddev = sqrt(neighbor_variance);
+			float depth_weight = saturate(1 - depth_diff / DEPTH_RANGE);
+			depth_weight = depth_weight*depth_weight*(1-neighbor_stddev);
+			weight *= depth_weight;
+
+			inscatter_sample += weight * Tonemap(tGodraysBuffer.Load(int3(sample_location.xy, 0)).rgb);
+			total_weight += weight;
+		}
+
+		if (total_weight > 0.0f)
+		{
+			inscatter_sample = Tonemap_Inv(inscatter_sample / total_weight);
+		}
+		else
+		{
+			inscatter_sample = tGodraysBuffer.SampleLevel(sBilinear, texcoord, 0).rgb;
+		}
+	}
+
+	output.inscatter.rgb = inscatter_sample.rgb;
+	if (FOGMODE != FOGMODE_NONE)
+	{
+        if ((FOGMODE != FOGMODE_NOSKY) || (scene_depth < 1.f))
+		{
+			float scene_distance = g_fZFar * scene_depth;
+            float3 sigma_ext = g_vSigmaExtinction;
+			output.inscatter.rgb += g_fMultiScattering * g_vFogLight * g_vScatterPower * (1-exp(-sigma_ext*scene_distance)) / sigma_ext;
+			output.transmission.rgb = exp(-sigma_ext*scene_distance);
+		}
+	}
+
+	return output;
+}
+\ No newline at end of file
diff --git a/src/shaders/ComputeLightLUT_CS.hlsl b/src/shaders/ComputeLightLUT_CS.hlsl
new file mode 100644
index 0000000..fca70dd
--- /dev/null
+++ b/src/shaders/ComputeLightLUT_CS.hlsl
@@ -0,0 +1,192 @@
+// This code contains NVIDIA Confidential Information and is disclosed 
+// under the Mutual Non-Disclosure Agreement. 
+// 
+// Notice 
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES 
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO 
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, 
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. 
+// 
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such 
+// information or for any infringement of patents or other rights of third parties that may 
+// result from its use. No license is granted by implication or otherwise under any patent 
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless 
+// expressly authorized by NVIDIA.  Details are subject to change without notice. 
+// This code supersedes and replaces all information previously supplied. 
+// NVIDIA Corporation products are not authorized for use as critical 
+// components in life support devices or systems without express written approval of 
+// NVIDIA Corporation. 
+// 
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+- LIGHTMODE:
+    - LIGHTMODE_OMNI
+    - LIGHTMODE_SPOTLIGHT
+
+- ATTENUATIONMODE:
+    - ATTENUATIONMODE_NONE
+    - ATTENUATIONMODE_POLYNOMIAL
+    - ATTENUATIONMODE_INV_POLYNOMIAL
+
+- COMPUTEPASS:
+    - COMPUTEPASS_CALCULATE
+    - COMPUTEPASS_SUM
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+float4 PackLut(float3 v, float s)
+{
+    return float4(v/s, s);
+}
+
+float3 UnpackLut(float4 v)
+{
+    return v.rgb*v.a;
+}
+
+Texture2D<float4> tPhaseLUT : register(t4);
+RWTexture2D<float4> rwLightLUT_P : register(u0);
+RWTexture2D<float4> rwLightLUT_S1 : register(u1);
+RWTexture2D<float4> rwLightLUT_S2 : register(u2);
+
+// These need to match the values in context_common.h
+static const uint LIGHT_LUT_DEPTH_RESOLUTION = 128;
+static const uint LIGHT_LUT_WDOTV_RESOLUTION = 512;
+
+#if (COMPUTEPASS == COMPUTEPASS_CALCULATE)
+
+static const uint2 BLOCK_SIZE = uint2(32, 8);
+groupshared float3 sAccum_P[BLOCK_SIZE.x*BLOCK_SIZE.y];
+
+#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT)
+groupshared float3 sAccum_S1[BLOCK_SIZE.x*BLOCK_SIZE.y];
+groupshared float3 sAccum_S2[BLOCK_SIZE.x*BLOCK_SIZE.y];
+#endif
+
+[numthreads( BLOCK_SIZE.x, BLOCK_SIZE.y, 1 )]
+void main(uint3 gthreadID : SV_GroupThreadID, uint2 dispatchID : SV_DispatchThreadID, uint2 groupID : SV_GroupID)
+{
+    uint idx = gthreadID.y*BLOCK_SIZE.x + gthreadID.x;
+    float2 coord = float2(dispatchID) / float2(LIGHT_LUT_DEPTH_RESOLUTION-1, LIGHT_LUT_WDOTV_RESOLUTION-1);
+
+    float angle = coord.y * PI;
+    float cos_WV = -cos(angle);
+
+    float3 vW = g_vEyePosition - g_vLightPos;
+    float Wsqr = dot(vW, vW);
+    float W_length = sqrt(Wsqr);
+    float t0 = max(0.0f, W_length-g_fLightZFar);
+    float t_range = g_fLightZFar + W_length - t0;
+    float t = t0 + coord.x*t_range;
+
+    float WdotV = cos_WV*W_length;
+    float Dsqr = max(Wsqr+2*WdotV*t+t*t, 0.0f);
+    float D = sqrt(Dsqr);
+    float cos_phi = (t>0 && D>0) ? (t*t + Dsqr - Wsqr) / (2 * t*D) : cos_WV;
+    float3 extinction = exp(-g_vSigmaExtinction*(D+t));
+    float3 phase_factor = GetPhaseFactor(tPhaseLUT, -cos_phi);
+    float attenuation = AttenuationFunc(D);
+    float3 inscatter = phase_factor*attenuation*extinction;
+
+    // Scale by dT because we are doing quadrature
+    inscatter *= t_range / float(LIGHT_LUT_DEPTH_RESOLUTION);
+
+    inscatter = inscatter / g_vScatterPower;
+    sAccum_P[idx] = inscatter;
+#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT)
+    sAccum_S1[idx] = (D==0) ? 0.0f : inscatter/D;
+    sAccum_S2[idx] = t*sAccum_S1[idx];
+#endif
+
+    
+    [unroll]
+    for (uint d=1; d<32; d = d<<1)
+    {
+        if (gthreadID.x >= d)
+        {
+            sAccum_P[idx] += sAccum_P[idx - d];
+#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT)
+            sAccum_S1[idx] += sAccum_S1[idx - d];
+            sAccum_S2[idx] += sAccum_S2[idx - d];
+#endif
+        }
+    }
+
+    static const float LUT_SCALE = 32.0f / 32768.0f;
+    rwLightLUT_P[dispatchID] = PackLut(sAccum_P[idx], LUT_SCALE);
+#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT)
+    float max_t = 2*(t0 + t_range);
+    rwLightLUT_S1[dispatchID] = PackLut(sAccum_S1[idx], LUT_SCALE);
+    rwLightLUT_S2[dispatchID] = PackLut(sAccum_S2[idx], LUT_SCALE*max_t);
+#endif
+}
+
+#elif (COMPUTEPASS == COMPUTEPASS_SUM)
+
+static const uint2 BLOCK_SIZE = uint2(32, 4);
+
+Texture2D<float4> tLightLUT_P : register(t5);
+Texture2D<float4> tLightLUT_S1 : register(t6);
+Texture2D<float4> tLightLUT_S2 : register(t7);
+
+groupshared float3 sOffset[BLOCK_SIZE.y];
+
+[numthreads( BLOCK_SIZE.x, BLOCK_SIZE.y, 1 )]
+void main(uint3 gthreadID : SV_GroupThreadID, uint3 dispatchID : SV_DispatchThreadID, uint2 groupID : SV_GroupID)
+{
+    uint t_offset = 0;
+
+    if (gthreadID.x == 0)
+    {
+        sOffset[gthreadID.y] = float3(0, 0, 0);
+    }
+
+    [unroll]
+    for (uint t = 0; t < LIGHT_LUT_DEPTH_RESOLUTION; t += BLOCK_SIZE.x)
+    {
+        uint2 tc = dispatchID.xy + uint2(t, 0);
+        float4 s = float4(0,0,0,0);
+#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT)
+        if (dispatchID.z == 2)
+            s = tLightLUT_S2[tc];
+        else if (dispatchID.z == 1)
+            s = tLightLUT_S1[tc];
+        else
+            s = tLightLUT_P[tc];
+#else
+        s = tLightLUT_P[tc];
+#endif
+        float3 v = UnpackLut(s) + sOffset[gthreadID.y];
+        if (gthreadID.x == (BLOCK_SIZE.x-1))
+        {
+            sOffset[gthreadID.y] = v;
+        }
+        s.a *= LIGHT_LUT_DEPTH_RESOLUTION/32;
+#if (LIGHTMODE == LIGHTMODE_SPOTLIGHT)
+        if (dispatchID.z == 2)
+            rwLightLUT_S2[tc] = PackLut(v, s.a);
+        else if (dispatchID.z == 1)
+            rwLightLUT_S1[tc] = PackLut(v, s.a);
+        else
+            rwLightLUT_P[tc] = PackLut(v, s.a);
+#else
+        rwLightLUT_P[tc] = PackLut(v, s.a);
+#endif
+    }
+}
+
+#endif
+\ No newline at end of file
diff --git a/src/shaders/ComputePhaseLookup_PS.hlsl b/src/shaders/ComputePhaseLookup_PS.hlsl
new file mode 100644
index 0000000..7487c40
--- /dev/null
+++ b/src/shaders/ComputePhaseLookup_PS.hlsl
@@ -0,0 +1,150 @@
+// This code contains NVIDIA Confidential Information and is disclosed 
+// under the Mutual Non-Disclosure Agreement. 
+// 
+// Notice 
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES 
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO 
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, 
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. 
+// 
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such 
+// information or for any infringement of patents or other rights of third parties that may 
+// result from its use. No license is granted by implication or otherwise under any patent 
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless 
+// expressly authorized by NVIDIA.  Details are subject to change without notice. 
+// This code supersedes and replaces all information previously supplied. 
+// NVIDIA Corporation products are not authorized for use as critical 
+// components in life support devices or systems without express written approval of 
+// NVIDIA Corporation. 
+// 
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+// using the phase functions directly isn't correct, because they are supposed to be 
+// integrated over the subtended solid angle. This falls apart as sin(theta)
+// approaches 0 (ie. cos(theta) aproaches +1 or -1).
+// We apply a sliding scale to the functions to compensate for this somewhat.
+
+#define NORMALIZE_PHASE_FUNCTIONS 1
+
+float ScatterPhase_Isotropic()
+{
+    return 1.f / (4.f * PI);
+}
+
+float ScatterPhase_Rayleigh(float cosa)
+{
+    float cos_term = cosa*cosa; // ^2
+	float phase_term = (3.f/(16.f*PI)) * (1.f + cos_term);
+#if NORMALIZE_PHASE_FUNCTIONS
+    cos_term *= cos_term; // ^4
+    return phase_term*(1-cos_term/8.f);
+#else
+    return phase_term;
+#endif
+}
+
+float ScatterPhase_HenyeyGreenstein(float cosa, float g)
+{	
+#if NORMALIZE_PHASE_FUNCTIONS
+    // "normalized" Henyey-Greenstein
+    float g_sqr = g*g;
+    float num = (1 - abs(g));
+    float denom = sqrt( max(1-2*g*cosa+g_sqr, 0) );
+    float frac = num/denom;
+    float scale = g_sqr + (1 - g_sqr) / (4*PI);
+    return scale * (frac*frac*frac);
+#else
+    // Classic Henyey-Greenstein
+	float k1 = (1.f-g*g);
+	float k2 = (1.f + g*g - 2.f*g*cosa);
+	return (1.f / (4.f*PI)) * k1 / pow(abs(k2), 1.5f);
+#endif
+}
+
+float ScatterPhase_MieHazy(float cosa)
+{
+    float cos_term = 0.5f*(1+cosa);
+    float cos_term_2 = cos_term*cos_term;           // ^2
+    float cos_term_4 = cos_term_2*cos_term_2;       // ^4
+    float cos_term_8 = cos_term_4*cos_term_4;       // ^8
+	float phase_term = (1.f/(4.f*PI))*(0.5f+(9.f/2.f)*cos_term_8);
+#if NORMALIZE_PHASE_FUNCTIONS
+    return phase_term * (1-cos_term_8/2.0f);
+#else
+    return phase_term;
+#endif
+}
+
+float ScatterPhase_MieMurky(float cosa)
+{
+    float cos_term = 0.5f*(1+cosa);
+    float cos_term_2 = cos_term*cos_term;           // ^2
+    float cos_term_4 = cos_term_2*cos_term_2;       // ^4
+    float cos_term_8 = cos_term_4*cos_term_4;       // ^8
+    float cos_term_16 = cos_term_8*cos_term_8;      // ^16
+    float cos_term_32 = cos_term_16*cos_term_16;    // ^32
+	float phase_term = (1.f/(4.f*PI))*(0.5f+(33.f/2.f)*cos_term_32);
+#if NORMALIZE_PHASE_FUNCTIONS
+    return phase_term * (1-cos_term_32/2.0f);
+#else
+    return phase_term;
+#endif
+}
+
+float4 main(VS_QUAD_OUTPUT input) : SV_TARGET
+{
+	float cos_theta = -cos(PI*input.vTex.y);
+	float3 phase_factor = float3(0,0,0);
+    float3 total_scatter = float3(0,0,0);
+
+	// These must match the PhaseFunctionType enum in NvVolumetricLighting.h
+	static const uint PHASEFUNC_ISOTROPIC = 0;
+	static const uint PHASEFUNC_RAYLEIGH = 1;
+	static const uint PHASEFUNC_HG = 2;
+    static const uint PHASEFUNC_MIEHAZY = 3;
+    static const uint PHASEFUNC_MIEMURKY = 4;
+
+    for (uint i=0; i<g_uNumPhaseTerms; ++i)
+	{
+        float3 term_scatter = g_vPhaseParams[i].rgb;
+        total_scatter += term_scatter;
+		if (g_uPhaseFunc[i] == PHASEFUNC_ISOTROPIC)
+		{
+			phase_factor += term_scatter*ScatterPhase_Isotropic();
+		}
+		else if (g_uPhaseFunc[i] == PHASEFUNC_RAYLEIGH)
+		{
+			phase_factor += term_scatter*ScatterPhase_Rayleigh(cos_theta);
+		}
+		else if (g_uPhaseFunc[i] == PHASEFUNC_HG)
+		{
+			phase_factor += term_scatter*ScatterPhase_HenyeyGreenstein(cos_theta, g_vPhaseParams[i].a);
+		}
+		else if (g_uPhaseFunc[i] == PHASEFUNC_MIEHAZY)
+		{
+			phase_factor += term_scatter*ScatterPhase_MieHazy(cos_theta);
+		}
+		else if (g_uPhaseFunc[i] == PHASEFUNC_MIEMURKY)
+		{
+			phase_factor += term_scatter*ScatterPhase_MieMurky(cos_theta);
+		}
+    }
+    phase_factor = phase_factor / total_scatter;
+	return float4(phase_factor, 1);
+}
diff --git a/src/shaders/Debug_PS.hlsl b/src/shaders/Debug_PS.hlsl
new file mode 100644
index 0000000..a2cd9cf
--- /dev/null
+++ b/src/shaders/Debug_PS.hlsl
@@ -0,0 +1,42 @@
+// This code contains NVIDIA Confidential Information and is disclosed 
+// under the Mutual Non-Disclosure Agreement. 
+// 
+// Notice 
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES 
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO 
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, 
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. 
+// 
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such 
+// information or for any infringement of patents or other rights of third parties that may 
+// result from its use. No license is granted by implication or otherwise under any patent 
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless 
+// expressly authorized by NVIDIA.  Details are subject to change without notice. 
+// This code supersedes and replaces all information previously supplied. 
+// NVIDIA Corporation products are not authorized for use as critical 
+// components in life support devices or systems without express written approval of 
+// NVIDIA Corporation. 
+// 
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+float4 main(PS_POLYGONAL_INPUT input, bool bIsFrontFace : SV_ISFRONTFACE) : SV_TARGET
+{
+	return bIsFrontFace ? float4(1,0,0,1) : float4(0,1,0,1);
+}
+	
+\ No newline at end of file
diff --git a/src/shaders/DownsampleDepth_PS.hlsl b/src/shaders/DownsampleDepth_PS.hlsl
new file mode 100644
index 0000000..4f4efdd
--- /dev/null
+++ b/src/shaders/DownsampleDepth_PS.hlsl
@@ -0,0 +1,82 @@
+// This code contains NVIDIA Confidential Information and is disclosed 
+// under the Mutual Non-Disclosure Agreement. 
+// 
+// Notice 
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES 
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO 
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, 
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. 
+// 
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such 
+// information or for any infringement of patents or other rights of third parties that may 
+// result from its use. No license is granted by implication or otherwise under any patent 
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless 
+// expressly authorized by NVIDIA.  Details are subject to change without notice. 
+// This code supersedes and replaces all information previously supplied. 
+// NVIDIA Corporation products are not authorized for use as critical 
+// components in life support devices or systems without express written approval of 
+// NVIDIA Corporation. 
+// 
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+- SAMPLEMODE:
+    - SAMPLEMODE_SINGLE
+    - SAMPLEMODE_MSAA
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+#if (SAMPLEMODE == SAMPLEMODE_SINGLE)
+Texture2D<float> tDepthMap : register(t0);
+#elif (SAMPLEMODE == SAMPLEMODE_MSAA)
+Texture2DMS<float> tDepthMap : register(t0);
+#endif
+
+uint Unused(uint input)
+{
+	return input;
+}
+
+float main(
+	VS_QUAD_OUTPUT input
+	, uint sampleID : SV_SAMPLEINDEX 
+	) : SV_DEPTH
+{
+	float2 jitter = float2(0.0f, 0.0f);
+	uint2 pixelIdx = uint2(input.vPos.xy);
+	if ( (pixelIdx.x+pixelIdx.y)%2 )
+	{
+		jitter.xy = g_vJitterOffset.xy;
+	}
+	else
+	{
+		jitter.xy = g_vJitterOffset.yx;
+	}
+
+#if defined(__PSSL__)
+	Unused(sampleID);//Fix a compiler warning with pssl.
+	float2 tc = (floor(input.vTex.xy*g_vOutputViewportSize) + GetViVjLinearSample() + jitter)*g_vOutputSize_Inv;
+#else
+	float2 tc = (EvaluateAttributeAtSample(input.vTex.xy, sampleID)*g_vOutputViewportSize + jitter)*g_vOutputSize_Inv;
+#endif
+
+#if (SAMPLEMODE == SAMPLEMODE_SINGLE)
+	return tDepthMap.SampleLevel(sPoint, tc, 0).x;
+#elif (SAMPLEMODE == SAMPLEMODE_MSAA)
+	int2 load_tc = int2(tc*g_vOutputSize);
+	return tDepthMap.Load(load_tc, 0).x;
+#endif
+}
diff --git a/src/shaders/Quad_VS.hlsl b/src/shaders/Quad_VS.hlsl
new file mode 100644
index 0000000..bbb0ae2
--- /dev/null
+++ b/src/shaders/Quad_VS.hlsl
@@ -0,0 +1,46 @@
+// This code contains NVIDIA Confidential Information and is disclosed 
+// under the Mutual Non-Disclosure Agreement. 
+// 
+// Notice 
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES 
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO 
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, 
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. 
+// 
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such 
+// information or for any infringement of patents or other rights of third parties that may 
+// result from its use. No license is granted by implication or otherwise under any patent 
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless 
+// expressly authorized by NVIDIA.  Details are subject to change without notice. 
+// This code supersedes and replaces all information previously supplied. 
+// NVIDIA Corporation products are not authorized for use as critical 
+// components in life support devices or systems without express written approval of 
+// NVIDIA Corporation. 
+// 
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+VS_QUAD_OUTPUT main(uint id : SV_VERTEXID)
+{
+	VS_QUAD_OUTPUT output;
+	output.vTex = float2((id << 1) & 2, id & 2);
+	output.vPos = float4(output.vTex * float2(2,-2) + float2(-1,1), 1, 1);
+	output.vWorldPos = mul( g_mViewProjInv, output.vPos );
+	output.vWorldPos *= 1.0f / output.vWorldPos.w;
+	return output;
+}
diff --git a/src/shaders/RenderVolume_DS.hlsl b/src/shaders/RenderVolume_DS.hlsl
new file mode 100644
index 0000000..880e9ed
--- /dev/null
+++ b/src/shaders/RenderVolume_DS.hlsl
@@ -0,0 +1,181 @@
+// This code contains NVIDIA Confidential Information and is disclosed 
+// under the Mutual Non-Disclosure Agreement. 
+// 
+// Notice 
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES 
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO 
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, 
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. 
+// 
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such 
+// information or for any infringement of patents or other rights of third parties that may 
+// result from its use. No license is granted by implication or otherwise under any patent 
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless 
+// expressly authorized by NVIDIA.  Details are subject to change without notice. 
+// This code supersedes and replaces all information previously supplied. 
+// NVIDIA Corporation products are not authorized for use as critical 
+// components in life support devices or systems without express written approval of 
+// NVIDIA Corporation. 
+// 
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+- SHADOWMAPTYPE:
+    - SHADOWMAPTYPE_ATLAS
+    - SHADOWMAPTYPE_ARRAY
+
+- CASCADECOUNT:
+    - CASCADECOUNT_1: 1
+    - CASCADECOUNT_2: 2
+    - CASCADECOUNT_3: 3
+    - CASCADECOUNT_4: 4
+
+- VOLUMETYPE:
+	- VOLUMETYPE_FRUSTUM
+	- VOLUMETYPE_PARABOLOID
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+#define COARSE_CASCADE (CASCADECOUNT-1)
+
+#if (SHADOWMAPTYPE == SHADOWMAPTYPE_ATLAS)
+Texture2D<float> tShadowMap : register(t1);
+#elif (SHADOWMAPTYPE == SHADOWMAPTYPE_ARRAY)
+Texture2DArray<float> tShadowMap : register(t1);
+#endif
+
+float SampleShadowMap(float2 tex_coord, int cascade)
+{
+	float depth_value = 1.0f;
+	float2 lookup_coord = g_vElementOffsetAndScale[cascade].zw * tex_coord + g_vElementOffsetAndScale[cascade].xy;
+#if (SHADOWMAPTYPE == SHADOWMAPTYPE_ATLAS)
+	depth_value = tShadowMap.SampleLevel( sBilinear, lookup_coord, 0).x;
+#elif (SHADOWMAPTYPE == SHADOWMAPTYPE_ARRAY)
+	depth_value = tShadowMap.SampleLevel( sBilinear, float3( lookup_coord, (float)g_uElementIndex[cascade] ), 0).x;
+#endif
+	return depth_value;
+}
+
+float3 ParaboloidProject(float3 P, float zNear, float zFar)
+{
+	float3 outP;
+	float lenP = length(P.xyz);
+	outP.xyz = P.xyz/lenP;
+	outP.x = outP.x / (outP.z + 1);
+	outP.y = outP.y / (outP.z + 1);			
+	outP.z = (lenP - zNear) / (zFar - zNear);
+	return outP;
+}
+
+float3 ParaboloidUnproject(float3 P, float zNear, float zFar)
+{
+	// Use a quadratic to find the Z component
+	// then reverse the projection to find the unit vector, and scale
+	float L = P.z*(zFar-zNear) + zNear;
+
+	float qa = P.x*P.x + P.y*P.y + 1;
+	float qb = 2*(P.x*P.x + P.y*P.y);
+	float qc = P.x*P.x + P.y*P.y - 1;
+	float z = (-qb + sqrt(qb*qb - 4*qa*qc)) / (2*qa);
+
+	float3 outP;
+	outP.x = P.x * (z + 1);
+	outP.y = P.y * (z + 1);
+	outP.z = z;
+	return outP*L;
+}
+
+HS_POLYGONAL_CONSTANT_DATA_OUTPUT Unused(HS_POLYGONAL_CONSTANT_DATA_OUTPUT input)
+{
+	return input;
+}
+
+[domain("quad")]
+PS_POLYGONAL_INPUT main( HS_POLYGONAL_CONSTANT_DATA_OUTPUT input, float2 uv : SV_DOMAINLOCATION, const OutputPatch<HS_POLYGONAL_CONTROL_POINT_OUTPUT, 4> Patch )
+{
+	Unused(input);//Fix a compiler warning with pssl.
+
+	PS_POLYGONAL_INPUT output = (PS_POLYGONAL_INPUT)0;
+
+	float3 vClipIn1 = lerp(Patch[0].vClipPos.xyz, Patch[1].vClipPos.xyz, uv.x);
+	float3 vClipIn2 = lerp(Patch[3].vClipPos.xyz, Patch[2].vClipPos.xyz, uv.x);
+	float3 vClipIn = lerp(vClipIn1, vClipIn2, uv.y);
+
+	float4 vPos1 = lerp(Patch[0].vWorldPos, Patch[1].vWorldPos, uv.x);
+	float4 vPos2 = lerp(Patch[3].vWorldPos, Patch[2].vWorldPos, uv.x);
+	float4 vWorldPos = lerp(vPos1, vPos2, uv.y);
+
+	if (VOLUMETYPE == VOLUMETYPE_FRUSTUM)
+	{
+		if (all(abs(vClipIn.xy) < EDGE_FACTOR))
+		{
+			int iCascade = -1;
+			float4 vClipPos = float4(0,0,0,1);
+
+			[unroll]
+			for (int i = COARSE_CASCADE;i >= 0; --i)
+			{
+				// Try to refetch from finer cascade
+				float4 vClipPosCascade = mul( g_mLightProj[i], vWorldPos );
+				vClipPosCascade *= 1.f / vClipPosCascade.w;
+				if (all(abs(vClipPosCascade.xy) < 1.0f))
+				{
+					
+					float2 vTex = float2(0.5*vClipPosCascade.x + 0.5, -0.5*vClipPosCascade.y + 0.5);
+					float depthSample = SampleShadowMap(vTex, i);
+					if (depthSample < 1.0f)
+					{
+						
+						vClipPos.xy = vClipPosCascade.xy;
+						vClipPos.z = depthSample;
+						iCascade = i;
+					}
+				}
+			}
+
+			if (iCascade >= 0)
+			{
+				vWorldPos = mul( g_mLightProjInv[iCascade], float4(vClipPos.xyz, 1) );
+				vWorldPos *= 1.0f / vWorldPos.w;
+				vWorldPos.xyz = g_vEyePosition + (1.0f-g_fGodrayBias)*(vWorldPos.xyz-g_vEyePosition);
+			}
+		}
+		else
+		{
+			vWorldPos = mul(g_mLightToWorld, float4(vClipIn.xy, 1, 1));
+			vWorldPos *= 1.0f / vWorldPos.w;
+		}
+	}
+	else if (VOLUMETYPE == VOLUMETYPE_PARABOLOID)
+	{
+        vClipIn.xyz = normalize(vClipIn.xyz);
+		float4 shadowPos = mul(g_mLightProj[0], vWorldPos);
+		shadowPos.xyz = shadowPos.xyz/shadowPos.w;
+		uint hemisphereID = (shadowPos.z > 0) ? 0 : 1;
+		shadowPos.z = abs(shadowPos.z);
+		shadowPos.xyz = ParaboloidProject(shadowPos.xyz, g_fLightZNear, g_fLightZFar);
+		float2 shadowTC = float2(0.5f, -0.5f)*shadowPos.xy + 0.5f;
+        float depthSample = SampleShadowMap(shadowTC, hemisphereID);
+		float sceneDepth = depthSample*(g_fLightZFar-g_fLightZNear)+g_fLightZNear;
+		vWorldPos = mul( g_mLightProjInv[0], float4(vClipIn.xyz * sceneDepth, 1));
+        vWorldPos *= 1.0f / vWorldPos.w;
+	}
+
+	// Transform world position with viewprojection matrix
+	output.vWorldPos = vWorldPos;
+    output.vPos = mul( g_mViewProj, output.vWorldPos );
+    return output;
+}
diff --git a/src/shaders/RenderVolume_HS.hlsl b/src/shaders/RenderVolume_HS.hlsl
new file mode 100644
index 0000000..1689e15
--- /dev/null
+++ b/src/shaders/RenderVolume_HS.hlsl
@@ -0,0 +1,182 @@
+// This code contains NVIDIA Confidential Information and is disclosed 
+// under the Mutual Non-Disclosure Agreement. 
+// 
+// Notice 
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES 
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO 
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, 
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. 
+// 
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such 
+// information or for any infringement of patents or other rights of third parties that may 
+// result from its use. No license is granted by implication or otherwise under any patent 
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless 
+// expressly authorized by NVIDIA.  Details are subject to change without notice. 
+// This code supersedes and replaces all information previously supplied. 
+// NVIDIA Corporation products are not authorized for use as critical 
+// components in life support devices or systems without express written approval of 
+// NVIDIA Corporation. 
+// 
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+- SHADOWMAPTYPE:
+    - SHADOWMAPTYPE_ATLAS
+    - SHADOWMAPTYPE_ARRAY
+
+- CASCADECOUNT:
+    - CASCADECOUNT_1: 1
+    - CASCADECOUNT_2: 2
+    - CASCADECOUNT_3: 3
+    - CASCADECOUNT_4: 4
+
+- VOLUMETYPE:
+	- VOLUMETYPE_FRUSTUM
+	- VOLUMETYPE_PARABOLOID
+
+- MAXTESSFACTOR:
+    - MAXTESSFACTOR_LOW: 16.0f
+    - MAXTESSFACTOR_MEDIUM: 32.0f
+    - MAXTESSFACTOR_HIGH: 64.0f
+%% MUX_END %%
+*/
+
+#define COARSE_CASCADE (CASCADECOUNT-1)
+
+#include "ShaderCommon.h"
+
+float3 NearestPos(float3 vStartPos, float3 vEndPos)
+{
+    float3 vPos = (g_vEyePosition - vStartPos);
+    float3 vLine = (vEndPos - vStartPos);
+    float lineLength = length(vLine);
+    float t = max(0, min(lineLength, dot(vPos, vLine)/lineLength));
+    return vStartPos + (t/lineLength)*vLine;
+}
+
+float CalcTessFactor(float3 vStartPos, float3 vEndPos)
+{
+    float section_size = length(vEndPos - vStartPos);
+	float3 vWorldPos = 0.5f*(vStartPos+vEndPos);
+	float3 vEyeVec = (vWorldPos.xyz - g_vEyePosition);
+	float4 clip_pos = mul( g_mProj, float4(0, 0, length(vEyeVec), 1) );
+	float projected_size = abs(section_size * g_mProj._m11 / clip_pos.w);
+	float desired_splits = (projected_size*g_vOutputViewportSize.y)/(g_fTargetRaySize);
+	return min(MAXTESSFACTOR, max(1, desired_splits));
+}
+
+bool IntersectsFrustum(float4 vPos1, float4 vPos2)
+{
+	return !(vPos1.x > 1.0 && vPos2.x > 1.0 || vPos1.x < -1.0 && vPos2.x < -1.0)
+		|| !(vPos1.y > 1.0 && vPos2.y > 1.0 || vPos1.y < -1.0 && vPos2.y < -1.0)
+		|| !(vPos1.z < 0.0 && vPos2.z < 0.0);
+}
+
+HS_POLYGONAL_CONSTANT_DATA_OUTPUT HS_POLYGONAL_CONSTANT_FUNC( /*uint PatchID : SV_PRIMITIVEID,*/ const OutputPatch<HS_POLYGONAL_CONTROL_POINT_OUTPUT, 4> opPatch)
+{
+	HS_POLYGONAL_CONSTANT_DATA_OUTPUT output  = (HS_POLYGONAL_CONSTANT_DATA_OUTPUT)0;
+
+	bool bIsVisible = false;
+#if 1
+	//Frustum cull
+	[unroll]
+	for (int j=0; j<4; ++j)
+	{
+		float4 vScreenClip = mul(g_mViewProj, opPatch[j].vWorldPos);
+		vScreenClip *= 1.0f / vScreenClip.w;
+		float4 vOriginPos = float4(0,0,0,1);
+		if (VOLUMETYPE == VOLUMETYPE_FRUSTUM)
+		{
+			vOriginPos = mul(g_mLightToWorld, float4(opPatch[j].vClipPos.xy, 0, 1)); 
+		}
+		else if (VOLUMETYPE == VOLUMETYPE_PARABOLOID)
+		{
+			vOriginPos = float4(g_vLightPos, 1); 
+		}
+		float4 vScreenClipOrigin = mul(g_mViewProj, vOriginPos);
+		vScreenClipOrigin *= 1.0f / vScreenClipOrigin.w; 
+		bIsVisible = bIsVisible || IntersectsFrustum(vScreenClip, vScreenClipOrigin);
+	}
+#else
+	bIsVisible = true;
+#endif
+
+	if (bIsVisible)
+	{
+        float3 nearest_pos[4];
+        for (int j=0; j < 4; ++j)
+        {
+            float3 start_pos;
+            if (VOLUMETYPE == VOLUMETYPE_FRUSTUM)
+            {
+                float4 p = mul(g_mLightToWorld, float4(opPatch[j].vClipPos.xy, 0, 1));
+                start_pos = p.xyz / p.w;
+            }
+            else if (VOLUMETYPE == VOLUMETYPE_PARABOLOID)
+                start_pos = g_vLightPos;
+            else
+                start_pos = float3(0, 0, 0);
+            nearest_pos[j] = NearestPos(start_pos, opPatch[j].vWorldPos.xyz);
+        }
+
+		float tess_factor[4];
+		[unroll]
+		for (int k=0; k<4; ++k)
+		{
+            float tess_near = CalcTessFactor(nearest_pos[(k+3)%4], nearest_pos[k]);
+            float tess_far = CalcTessFactor(opPatch[(k+3)%4].vWorldPos.xyz, opPatch[k].vWorldPos.xyz);
+            tess_factor[k] = max(tess_near, tess_far);
+            if (VOLUMETYPE == VOLUMETYPE_FRUSTUM)
+            {
+                bool bIsEdge = !(all((abs(opPatch[(k + 3) % 4].vClipPos.xy) < EDGE_FACTOR) || (abs(opPatch[k].vClipPos.xy) < EDGE_FACTOR)));
+                output.fEdges[k] = (bIsEdge) ? 1.0f : tess_factor[k];
+            }
+            else if (VOLUMETYPE == VOLUMETYPE_PARABOLOID)
+            {
+                output.fEdges[k] = tess_factor[k];
+            }
+            else
+            {
+                output.fEdges[k] = 1;
+            }
+            
+		}
+		output.fInside[0] = max(tess_factor[1], tess_factor[3]);
+        output.fInside[1] = max(tess_factor[0], tess_factor[2]);
+	}
+	else
+	{
+		output.fEdges[0] = 0;
+		output.fEdges[1] = 0;
+		output.fEdges[2] = 0;
+		output.fEdges[3] = 0;
+		output.fInside[0] = 0;
+		output.fInside[1] = 0;
+	}
+
+	return output;
+}
+                          
+[domain("quad")]
+[partitioning("integer")]
+[outputtopology("triangle_ccw")]
+[outputcontrolpoints(4)]
+[patchconstantfunc("HS_POLYGONAL_CONSTANT_FUNC")]
+[maxtessfactor(MAXTESSFACTOR)]
+HS_POLYGONAL_CONTROL_POINT_OUTPUT main( InputPatch<HS_POLYGONAL_INPUT, 4> ipPatch, uint uCPID : SV_OUTPUTCONTROLPOINTID )
+{
+	HS_POLYGONAL_CONTROL_POINT_OUTPUT output = (HS_POLYGONAL_CONTROL_POINT_OUTPUT)0;
+	output.vWorldPos = ipPatch[uCPID].vWorldPos;
+	output.vClipPos = ipPatch[uCPID].vClipPos;
+    return output;
+}	
diff --git a/src/shaders/RenderVolume_PS.hlsl b/src/shaders/RenderVolume_PS.hlsl
new file mode 100644
index 0000000..f2724c2
--- /dev/null
+++ b/src/shaders/RenderVolume_PS.hlsl
@@ -0,0 +1,403 @@
+// This code contains NVIDIA Confidential Information and is disclosed 
+// under the Mutual Non-Disclosure Agreement. 
+// 
+// Notice 
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES 
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO 
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, 
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. 
+// 
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such 
+// information or for any infringement of patents or other rights of third parties that may 
+// result from its use. No license is granted by implication or otherwise under any patent 
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless 
+// expressly authorized by NVIDIA.  Details are subject to change without notice. 
+// This code supersedes and replaces all information previously supplied. 
+// NVIDIA Corporation products are not authorized for use as critical 
+// components in life support devices or systems without express written approval of 
+// NVIDIA Corporation. 
+// 
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+%% MUX_BEGIN %%
+# Define the shader permutations for code generation
+
+# Are we operating on single sample or MSAA buffer
+- SAMPLEMODE:
+    - SAMPLEMODE_SINGLE
+    - SAMPLEMODE_MSAA
+
+# What type of light are we rendering
+- LIGHTMODE:
+    - LIGHTMODE_DIRECTIONAL
+    - LIGHTMODE_SPOTLIGHT
+    - LIGHTMODE_OMNI
+
+# What sort of pass are we rendering
+- PASSMODE:
+    - PASSMODE_GEOMETRY
+    - PASSMODE_SKY
+    - PASSMODE_FINAL
+
+# What is our distance attenuation function
+- ATTENUATIONMODE:
+    - ATTENUATIONMODE_NONE
+    - ATTENUATIONMODE_POLYNOMIAL
+    - ATTENUATIONMODE_INV_POLYNOMIAL
+
+# What is our spotlight angular falloff mode
+- FALLOFFMODE:
+    - FALLOFFMODE_NONE
+    - FALLOFFMODE_FIXED
+    - FALLOFFMODE_CUSTOM
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+#if (PASSMODE == PASSMODE_FINAL)
+#   if (SAMPLEMODE == SAMPLEMODE_SINGLE)
+
+        Texture2D<float> tSceneDepth : register(t2);
+        float LoadSceneDepth(uint2 pos, uint s)
+        {
+            return tSceneDepth.Load(int3(pos.xy, 0)).x;
+        }
+
+#   elif (SAMPLEMODE == SAMPLEMODE_MSAA)
+
+        Texture2DMS<float> tSceneDepth : register(t2);
+        float LoadSceneDepth(uint2 pos, uint s)
+        {
+            return tSceneDepth.Load(int2(pos.xy), s).x;
+        }
+
+#   endif
+#else
+
+    float LoadSceneDepth(uint2 pos, uint s)
+    {
+        return 1.0f;
+    }
+
+#endif
+
+Texture2D<float4> tPhaseLUT : register(t4);
+Texture2D<float4> tLightLUT_P : register(t5);
+Texture2D<float4> tLightLUT_S1 : register(t6);
+Texture2D<float4> tLightLUT_S2 : register(t7);
+
+float GetLutCoord_X(float t, float light_dist)
+{
+    float t0 = max(0.0f, light_dist-g_fLightZFar);
+    float t_range = g_fLightZFar + light_dist - t0;
+    return (t-t0) / t_range;
+}
+
+float GetLutCoord_Y(float cos_theta)
+{
+    return acos(-cos_theta) / PI;
+}
+
+float3 SampleLut(Texture2D tex, float2 tc)
+{
+    float4 s = tex.SampleLevel(sBilinear, tc, 0);
+    return s.rgb*s.a;
+}
+////////////////////////////////////////////////////////////////////////////////
+// Integration code
+
+#define INTEGRATE(result, fn, data, step_count, t0, t1) \
+{                                                       \
+    float t_step = (t1-t0)/float(step_count);           \
+    float3 sum = float3(0,0,0);                         \
+    sum += fn(data, t0);                                \
+    float t = t0+t_step;                                \
+    [unroll]                                            \
+    for (uint istep=1; istep<step_count-1; istep += 2)  \
+    {                                                   \
+        sum += 4*fn(data, t);                           \
+        t += t_step;                                    \
+        sum += 2*fn(data, t);                           \
+        t += t_step;                                    \
+    }                                                   \
+    sum += 4*fn(data, t);                               \
+    sum += fn(data, t1);                                \
+    result = (t_step/3.0f) * sum;                       \
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Directional Light 
+
+struct LightEvaluatorData_Directional {
+    float VdotL;
+    float3 sigma;
+};
+
+float3 LightEvaluator_Directional(LightEvaluatorData_Directional data, float t)
+{
+    float3 light_to_world_depth = g_fLightToEyeDepth + t*data.VdotL;
+    return exp(-data.sigma*(t+light_to_world_depth));
+}
+
+float3 Integrate_Directional(float eye_dist, float3 vV, float3 vL)
+{
+    float VdotL = dot(vV, vL);
+    // Manually integrate over interval
+    LightEvaluatorData_Directional evaluator;
+    float3 sigma = g_vSigmaExtinction;
+    evaluator.VdotL = VdotL;
+    const uint STEP_COUNT = 6;
+    float3 integral = float3(0,0,0);
+    INTEGRATE(integral, LightEvaluator_Directional, evaluator, STEP_COUNT, 0, eye_dist);
+    return GetPhaseFactor(tPhaseLUT, -VdotL)*integral*exp(g_fLightToEyeDepth*(evaluator.sigma.r+evaluator.sigma.g+evaluator.sigma.b)/3.f);
+}
+
+float3 Integrate_SimpleDirectional(float eye_dist, float3 vV, float3 vL)
+{
+    // Do basic directional light
+    float VdotL = dot(vV, vL);
+    float3 sigma = g_vSigmaExtinction;
+    return GetPhaseFactor(tPhaseLUT, -VdotL) * (1 - exp(-sigma*eye_dist)) / (sigma);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Spotlight
+
+bool IntersectCone(out float t0, out float t1, float t_max, float cos_theta, float3 vW, float3 vV, float3 vL, float WdotL, float VdotL)
+{
+    float cos_sqr = cos_theta * cos_theta;
+    float sin_sqr = 1 - cos_sqr;
+    float3 v_proj = vV - VdotL*vL;
+    float3 w_proj = vW - WdotL*vL;
+
+    float A = cos_sqr*dot(v_proj, v_proj) - sin_sqr*VdotL*VdotL;
+    float B = 2 * cos_sqr*dot(v_proj, w_proj) - 2 * sin_sqr*VdotL*WdotL;
+    float C = cos_sqr*dot(w_proj, w_proj) - sin_sqr*WdotL*WdotL;
+
+    float det = B*B - 4 * A*C;
+    float denom = 2 * A;
+    if (det < 0.0f || denom == 0.0f)
+    {
+        t0 = 0;
+        t1 = 0;
+        return false;
+    }
+    else
+    {
+        bool hit = true;
+        float root = sqrt(det);
+        t0 = (-B - root) / denom;
+        t1 = (-B + root) / denom;
+
+        float vW_len = length(vW);
+        float WdotL_norm = (vW_len > 0.0f) ? WdotL / vW_len : 1.0f;
+        if (WdotL_norm >= cos_theta)
+        {
+            if (VdotL >= cos_theta)
+                t1 = t_max;
+            t0 = 0;
+        }
+        else if (WdotL_norm <= -cos_theta)
+        {
+            if (t0 < 0 && t1>0)
+                hit = false;
+            t0 = t0;
+            t1 = t_max;
+        }
+        else
+        {
+            if (t0 < 0 && t1 < 0)
+                hit = false;
+            else if (dot(vL, vW + t0*vV) < 0)
+                hit = false;
+            else if (t1<0)
+                t1 = t_max;
+        }
+
+        if (t0 > t_max)
+        {
+            t0 = 0;
+            t1 = 0;
+            hit = false;
+        }
+
+        return hit;
+    }
+}
+
+struct LightEvaluatorData_Spotlight
+{
+    float3 sigma;
+    float light_theta;
+    float light_falloff_power;
+    float Wsqr;
+    float WdotV;
+    float WdotL;
+    float VdotL;
+};
+
+float3 LightEvaluator_Spotlight(LightEvaluatorData_Spotlight data, float t)
+{
+    float Dsqr = max(data.Wsqr+2*data.WdotV*t+t*t, 0.0f);
+    float D = sqrt(Dsqr);
+    float cos_phi = (t>0 && D>0) ? (t*t + Dsqr - data.Wsqr) / (2 * t*D) : 0;
+    float3 phase_factor = GetPhaseFactor(tPhaseLUT, -cos_phi);
+    float distance_attenuation = AttenuationFunc(D);
+    float Dproj = data.WdotL + t*data.VdotL;
+    float cos_alpha = (D>0.0f) ? Dproj/D : 1.0f;
+    float angle_factor = saturate(cos_alpha-data.light_theta)/(1-data.light_theta);
+    const float ANGLE_EPSILON = 0.000001f;
+    float spot_attenuation = (angle_factor > ANGLE_EPSILON) ? pow(abs(angle_factor), data.light_falloff_power) : 0.0f;
+    float3 media_attenuation = exp(-data.sigma*(t+D));
+    return phase_factor*distance_attenuation*spot_attenuation*media_attenuation;
+}
+
+float3 Integrate_Spotlight(float eye_dist, float3 vW, float3 vV, float3 vL)
+{
+    float3 integral = float3(0, 0, 0);
+    float WdotL = dot(vW, vL);
+    float VdotL = dot(vV, vL);
+    float t0=0, t1=1;
+    if (IntersectCone(t0, t1, eye_dist, g_fLightFalloffAngle, vW, vV, vL, WdotL, VdotL))
+    {
+        t1 = min(t1, eye_dist);
+
+        if (FALLOFFMODE == FALLOFFMODE_NONE)
+        {
+            float light_dist = length(vW);
+            float3 vW_norm = vW / light_dist;
+            float2 tc;
+            tc.x = GetLutCoord_X(t1, light_dist);
+            tc.y = GetLutCoord_Y(dot(vW_norm, vV));
+            integral = SampleLut(tLightLUT_P, tc);
+            if (t0 > 0)
+            {
+                tc.x = GetLutCoord_X(t0, light_dist);
+                integral -= SampleLut(tLightLUT_P, tc);
+            }
+            integral *= g_vScatterPower;
+        }
+        else if (FALLOFFMODE == FALLOFFMODE_FIXED)
+        {
+            float light_dist = length(vW);
+            float3 vW_norm = vW / light_dist;
+            float2 tc;            
+            tc.x = GetLutCoord_X(t1, light_dist);
+            tc.y = GetLutCoord_Y(dot(vW_norm, vV));
+            integral = WdotL*SampleLut(tLightLUT_S1, tc) + VdotL*SampleLut(tLightLUT_S2, tc) - g_fLightFalloffAngle*SampleLut(tLightLUT_P, tc);
+            if (t0 > 0)
+            {
+                tc.x = GetLutCoord_X(t0, light_dist);
+                integral -= WdotL*SampleLut(tLightLUT_S1, tc) + VdotL*SampleLut(tLightLUT_S2, tc) - g_fLightFalloffAngle*SampleLut(tLightLUT_P, tc);
+            }
+            integral *= g_vScatterPower / (1-g_fLightFalloffAngle);
+        }
+        if (FALLOFFMODE == FALLOFFMODE_CUSTOM)
+        {
+            LightEvaluatorData_Spotlight evaluator;
+            evaluator.sigma = g_vSigmaExtinction;
+            evaluator.light_theta = g_fLightFalloffAngle;
+            evaluator.light_falloff_power = g_fLightFalloffPower;
+            evaluator.Wsqr = dot(vW, vW);
+            evaluator.WdotV = dot(vW, vV);
+            evaluator.WdotL = WdotL;
+            evaluator.VdotL = VdotL;
+            const uint STEP_COUNT = 8;
+            INTEGRATE(integral, LightEvaluator_Spotlight, evaluator, STEP_COUNT, t0, t1);
+            integral *= 6;
+        }
+    }
+    return integral;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Omni
+
+float3 Integrate_Omni(float eye_dist, float3 vW, float3 vV)
+{
+    float light_dist = length(vW);
+    vW = vW / light_dist;
+    float2 tc;
+    tc.x = GetLutCoord_X(eye_dist, light_dist);
+    tc.y = GetLutCoord_Y(dot(vW, vV));
+    return g_vScatterPower*SampleLut(tLightLUT_P, tc);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Shader Entrypoint
+
+float4 main(
+#if (PASSMODE == PASSMODE_FINAL)
+    VS_QUAD_OUTPUT pi
+    , uint sampleID : SV_SAMPLEINDEX
+#else
+    PS_POLYGONAL_INPUT pi
+#endif  
+    , bool bIsFrontFace : SV_ISFRONTFACE
+        ) : SV_TARGET
+{
+#if (PASSMODE != PASSMODE_FINAL)
+    uint sampleID = 0;
+#endif
+    float fSign = 0;
+    float4 vWorldPos = float4(0, 0, 0, 1);
+    float eye_dist = 0;
+    float3 vV = float3(0, 0, 0);
+    if (PASSMODE == PASSMODE_GEOMETRY)
+    {
+        fSign = bIsFrontFace ? -1.0f : 1.0f;
+        vWorldPos = pi.vWorldPos;
+        eye_dist = length(vWorldPos.xyz - g_vEyePosition.xyz);
+        vV = (vWorldPos.xyz - g_vEyePosition.xyz) / eye_dist;
+    }
+    else if (PASSMODE == PASSMODE_SKY)
+    {
+        fSign = 1.0f;
+        eye_dist = g_fZFar;
+        vV = normalize(pi.vWorldPos.xyz - g_vEyePosition.xyz);
+        vWorldPos.xyz = g_vEyePosition.xyz + vV * eye_dist;
+        vWorldPos.w = 1;
+    }
+    else if (PASSMODE == PASSMODE_FINAL)
+    {
+        fSign = 1.0f;
+        float fSceneDepth = LoadSceneDepth(pi.vPos.xy, sampleID);
+        float4 vClipPos;
+        vClipPos.xy = float2(2, -2)*g_vViewportSize_Inv*pi.vPos.xy + float2(-1.0f, 1.0f);
+        vClipPos.z = fSceneDepth;
+        vClipPos.w = 1;
+        vWorldPos = mul(g_mViewProjInv, vClipPos);
+        vWorldPos *= 1.0f / vWorldPos.w;
+        eye_dist = length(vWorldPos.xyz - g_vEyePosition.xyz);
+        vV = (vWorldPos.xyz - g_vEyePosition.xyz) / eye_dist;
+    }
+
+    float3 vL = g_vLightDir.xyz;
+
+    float3 integral = float3(0,0,0);
+    if (LIGHTMODE == LIGHTMODE_DIRECTIONAL)
+    {
+        integral = Integrate_SimpleDirectional(eye_dist, vV, vL);
+    }
+    else if (LIGHTMODE == LIGHTMODE_SPOTLIGHT)
+    {
+        float3 vW = g_vEyePosition.xyz - g_vLightPos.xyz;
+        integral = Integrate_Spotlight(eye_dist, vW, vV, vL);
+    }
+    else if (LIGHTMODE == LIGHTMODE_OMNI)
+    {
+        float3 vW = g_vEyePosition.xyz - g_vLightPos.xyz;
+        integral = Integrate_Omni(eye_dist, vW, vV);
+    }
+    return float4(fSign*integral*g_vLightIntensity.rgb, 0);
+}
diff --git a/src/shaders/RenderVolume_VS.hlsl b/src/shaders/RenderVolume_VS.hlsl
new file mode 100644
index 0000000..dc5cdb8
--- /dev/null
+++ b/src/shaders/RenderVolume_VS.hlsl
@@ -0,0 +1,204 @@
+// This code contains NVIDIA Confidential Information and is disclosed 
+// under the Mutual Non-Disclosure Agreement. 
+// 
+// Notice 
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES 
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO 
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, 
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. 
+// 
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such 
+// information or for any infringement of patents or other rights of third parties that may 
+// result from its use. No license is granted by implication or otherwise under any patent 
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless 
+// expressly authorized by NVIDIA.  Details are subject to change without notice. 
+// This code supersedes and replaces all information previously supplied. 
+// NVIDIA Corporation products are not authorized for use as critical 
+// components in life support devices or systems without express written approval of 
+// NVIDIA Corporation. 
+// 
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+- MESHMODE:
+    - MESHMODE_FRUSTUM_GRID
+    - MESHMODE_FRUSTUM_BASE
+    - MESHMODE_FRUSTUM_CAP
+    - MESHMODE_OMNI_VOLUME
+    - MESHMODE_GEOMETRY
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+// Bypass vertex shader
+HS_POLYGONAL_INPUT main( 
+#if (MESHMODE == MESHMODE_GEOMETRY)
+    float4 input_position : POSITION,
+#endif
+    uint id : SV_VERTEXID )
+{
+#if (MESHMODE != MESHMODE_GEOMETRY)
+    float4 input_position = float4(0,0,0,1);
+#endif
+	HS_POLYGONAL_INPUT output;
+    //
+    // Generate the mesh dynamically from the vertex ID
+    //
+    if (MESHMODE == MESHMODE_FRUSTUM_GRID)
+    {
+        const float patch_size = 2.0f / float(g_uMeshResolution);
+        uint patch_idx = id / 4;
+        uint patch_row = patch_idx / g_uMeshResolution;
+        uint patch_col = patch_idx % g_uMeshResolution;
+        output.vClipPos.x = patch_size*patch_col - 1.0f;
+        output.vClipPos.y = patch_size*patch_row - 1.0f;
+
+        uint vtx_idx = id % 4;
+        float2 vtx_offset;
+        if (vtx_idx == 0)
+        {
+            vtx_offset = float2(0, 0);
+        }
+        else if (vtx_idx == 1)
+        {
+            vtx_offset = float2(1, 0);
+        }
+        else if (vtx_idx == 2)
+        {
+            vtx_offset = float2(1, 1);
+        }
+        else // if (vtx_idx == 3)
+        {
+            vtx_offset = float2(0, 1);
+        }
+        output.vClipPos.xy += patch_size * vtx_offset;
+
+        output.vClipPos.z = 1.0f;
+        output.vClipPos.w = 1.0f;
+    }
+    else if (MESHMODE == MESHMODE_FRUSTUM_BASE)
+    {
+        uint vtx_idx = id % 3;
+        output.vClipPos.x = (vtx_idx == 0) ? 1 : -1;
+        output.vClipPos.y = (vtx_idx == 2) ? -1 : 1;
+        output.vClipPos.xy *= (id/3 == 0) ? 1 : -1;
+        output.vClipPos.z = 1.0f;
+        output.vClipPos.w = 1.0f;
+    }
+    else if (MESHMODE == MESHMODE_FRUSTUM_CAP)
+    {
+        uint tris_per_face = g_uMeshResolution+1;
+        uint verts_per_face = 3*tris_per_face;
+        uint face_idx = id / verts_per_face;
+        uint vtx_idx = id % 3;
+        if (face_idx < 4)
+        {
+            // Cap Side
+            const float patch_size = 2.0f / float(g_uMeshResolution);
+            const uint split_point = (g_uMeshResolution+1)/2;
+            float3 v;
+            uint tri_idx = (id%verts_per_face)/3;
+            if (tri_idx < g_uMeshResolution)
+            {
+                if (vtx_idx == 0)
+                    v.x = (tri_idx >= split_point) ? 1 : -1;
+                else if (vtx_idx == 1)
+                    v.x = patch_size * tri_idx - 1;
+                else // if (vtx_idx == 2)
+                    v.x = patch_size * (tri_idx+1) - 1;
+                v.y = (vtx_idx == 0) ? 0 : 1;
+            }
+            else
+            {
+                if (vtx_idx == 1)
+                    v.x = patch_size*split_point-1;
+                else
+                    v.x = (vtx_idx == 0) ? -1 : 1;
+                v.y = (vtx_idx == 1) ? 1 : 0;
+            }
+            v.z = 1;
+            v.xz *= (face_idx/2 == 0) ? 1 : -1;
+            output.vClipPos.xyz = (face_idx%2 == 0) ? v.zxy : v.xzy*float3(-1,1,1);
+        }
+        else
+        {
+            // Z=0
+            uint tri_idx = (id-4*verts_per_face)/3;
+            output.vClipPos.x = (vtx_idx == 1) ? 1 : -1;
+            output.vClipPos.y = (vtx_idx == 2) ? 1 : -1;
+            output.vClipPos.xy *= (tri_idx == 0) ? 1 : -1;
+            output.vClipPos.z = 0.0f;
+        }
+        output.vClipPos.w = 1.0f;
+    }
+    else if (MESHMODE == MESHMODE_OMNI_VOLUME)
+    {
+        uint verts_per_face = 4*g_uMeshResolution*g_uMeshResolution;
+        uint face_idx = id / verts_per_face;
+        uint face_vert_idx = id % verts_per_face;
+
+        const float patch_size = 2.0f / float(g_uMeshResolution);
+        uint patch_idx = face_vert_idx / 4;
+        uint patch_row = patch_idx / g_uMeshResolution;
+        uint patch_col = patch_idx % g_uMeshResolution;
+
+        float3 P;
+        P.x = patch_size*patch_col - 1.0f;
+        P.y = patch_size*patch_row - 1.0f;
+
+        uint vtx_idx = id % 4;
+        float2 vtx_offset;
+        if (vtx_idx == 0)
+        {
+            vtx_offset = float2(0, 0);
+        }
+        else if (vtx_idx == 1)
+        {
+            vtx_offset = float2(1, 0);
+        }
+        else if (vtx_idx == 2)
+        {
+            vtx_offset = float2(1, 1);
+        }
+        else // if (vtx_idx == 3)
+        {
+            vtx_offset = float2(0, 1);
+        }
+        P.xy += patch_size * vtx_offset;
+        P.z = ((face_idx / 3) == 0) ? 1 : -1;
+        if ((face_idx % 3) == 0)
+            P.yzx = P.xyz * (((face_idx / 3) == 0) ? float3(1,1,1) : float3(-1,1,1));
+        else if ((face_idx % 3) == 1)
+            P.xzy = P.xyz * (((face_idx / 3) == 1) ? float3(1,1,1) : float3(-1,1,1));
+         else //if ((face_idx % 3) == 2)
+            P.xyz = P.xyz * (((face_idx / 3) == 0) ? float3(1,1,1) : float3(-1,1,1));
+        output.vClipPos = float4(normalize(P.xyz), 1);
+    }
+    else
+    {
+        output.vClipPos = input_position;
+
+    }
+
+    if (MESHMODE == MESHMODE_OMNI_VOLUME)
+    {
+        output.vWorldPos = mul(g_mLightToWorld, float4(g_fLightZFar*output.vClipPos.xyz, 1));
+    }
+    else
+    {
+	    output.vWorldPos = mul(g_mLightToWorld, output.vClipPos);
+    }
+    output.vWorldPos = output.vWorldPos / output.vWorldPos.w;
+	output.vPos = mul(g_mViewProj, output.vWorldPos);
+	return output;
+}
diff --git a/src/shaders/Resolve_PS.hlsl b/src/shaders/Resolve_PS.hlsl
new file mode 100644
index 0000000..72c07f9
--- /dev/null
+++ b/src/shaders/Resolve_PS.hlsl
@@ -0,0 +1,179 @@
+// This code contains NVIDIA Confidential Information and is disclosed 
+// under the Mutual Non-Disclosure Agreement. 
+// 
+// Notice 
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES 
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO 
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, 
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. 
+// 
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such 
+// information or for any infringement of patents or other rights of third parties that may 
+// result from its use. No license is granted by implication or otherwise under any patent 
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless 
+// expressly authorized by NVIDIA.  Details are subject to change without notice. 
+// This code supersedes and replaces all information previously supplied. 
+// NVIDIA Corporation products are not authorized for use as critical 
+// components in life support devices or systems without express written approval of 
+// NVIDIA Corporation. 
+// 
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+- SAMPLEMODE:
+    - SAMPLEMODE_SINGLE
+    - SAMPLEMODE_MSAA
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+struct RESOLVE_OUTPUT
+{
+	float3 color : SV_TARGET0;
+	float2 depth : SV_TARGET1;
+};
+
+#if (SAMPLEMODE == SAMPLEMODE_MSAA)
+Texture2DMS<float4> tGodraysBuffer : register(t0);
+Texture2DMS<float> tGodraysDepth : register(t1);
+#elif (SAMPLEMODE == SAMPLEMODE_SINGLE)
+Texture2D<float4> tGodraysBuffer : register(t0);
+Texture2D<float> tGodraysDepth : register(t1);
+#endif
+
+#if (defined(__PSSL__) && (SAMPLEMODE == SAMPLEMODE_MSAA))
+Texture2D<int2> tFMask_color : register(t2);
+#endif
+
+#if defined(__PSSL__)
+static const int FMASK_UNKNOWN = 1 << 3; // color "unknown" is always represented as high bit in the 4bit fragment index
+
+int2 getFmask(Texture2D <int2> tex, int sample_count, int2 coord)
+{
+	// if 8 or less coverage samples, only load one VGPR (32bits / 4bits per sample)
+	// if more than 8 coverage samples, we need to load 2 VGPRs
+	int2 fmask;
+	if (sample_count <= 8)
+	{
+		fmask.x = tex.Load(int3(coord, 0)).x;
+		fmask.y = 0x88888888; // all invalid -- though in theory we shouldn't need to refer to them at all.
+	}
+	else
+	{
+		fmask.xy = tex.Load(int3(coord, 0)).xy;
+	}
+	return fmask;
+}
+
+int getFptr(int index, int2 fmask)
+{
+	const int     bitShift = 4;     // fmask load always returns a 4bit fragment index (fptr) per coverage sample, regardless of actual number of fragments.
+	const int     mask = (1 << bitShift) - 1;
+	if (index < 8)
+		return (fmask.x >> (index*bitShift)) & mask;
+	else
+		return (fmask.y >> ((index-8)*bitShift)) & mask;
+}
+#endif
+
+RESOLVE_OUTPUT main(VS_QUAD_OUTPUT input)
+{	
+	float3 result_color = 0.0f;
+	float result_depth = 0.0f;
+	float result_depth_sqr = 0.0f;
+
+#if (SAMPLEMODE == SAMPLEMODE_MSAA)
+	uint2 buffer_size;
+	uint buffer_samples;
+	tGodraysBuffer.GetDimensions(buffer_size.x, buffer_size.y, buffer_samples);
+#elif (SAMPLEMODE == SAMPLEMODE_SINGLE)
+	uint buffer_samples = 1;
+#endif
+
+	int2 base_tc = int2(input.vTex * g_vViewportSize);
+	const float FILTER_SCALE = 1.0f;
+	const int KERNEL_WIDTH = 1;
+	float total_weight = 0.0f;
+	[unroll]
+	for (int ox=-KERNEL_WIDTH; ox<=KERNEL_WIDTH; ++ox)
+	{
+		if ((base_tc.x + ox) < 0 || (base_tc.x + ox) >= g_vViewportSize.x) continue;
+
+		[unroll]
+		for (int oy=-KERNEL_WIDTH; oy<=KERNEL_WIDTH; ++oy)
+		{
+			if ((base_tc.y + oy) < 0 || (base_tc.y + oy) >= g_vViewportSize.y) continue;
+
+			int2 offset = int2(ox, oy);
+			int2 tc = base_tc + offset;	
+
+#if (defined(__PSSL__) && (SAMPLEMODE == SAMPLEMODE_MSAA))
+			int2 fmask = getFmask(tFMask_color, buffer_samples, tc);
+#endif
+
+#if (SAMPLEMODE == SAMPLEMODE_MSAA)
+			for (uint s=0; s<buffer_samples; ++s)
+			{
+				float2 so = offset + tGodraysBuffer.GetSamplePosition(s);
+#elif (SAMPLEMODE == SAMPLEMODE_SINGLE)
+			{
+				float2 so = offset;
+#endif
+				bool is_valid_sample = false;
+#if (SAMPLEMODE == SAMPLEMODE_MSAA)
+#	if defined(__PSSL__)
+				float3 sample_value = float3(0,0,0);
+				float sample_depth = 0.0f;
+				int fptr = getFptr(s, fmask);
+				if (fptr != FMASK_UNKNOWN)
+				{
+					sample_value = tGodraysBuffer.Load(tc, fptr).rgb;
+					sample_depth = tGodraysDepth.Load( tc, fptr ).r;
+					is_valid_sample = true;
+				}
+#	else // !defined(__PSSL__)
+				is_valid_sample = true;
+				float3 sample_value = tGodraysBuffer.Load( tc, s ).rgb;
+				float sample_depth = tGodraysDepth.Load( tc, s ).r;
+#	endif
+#elif (SAMPLEMODE == SAMPLEMODE_SINGLE)
+				is_valid_sample = true;
+				float3 sample_value = tGodraysBuffer.Load( int3(tc, 0) ).rgb;
+				float sample_depth = tGodraysDepth.Load( int3(tc, 0) ).r;
+#endif
+				sample_depth = LinearizeDepth(sample_depth, g_fZNear, g_fZFar);
+				if (!all(isfinite(sample_value)))
+				{
+					is_valid_sample = false;
+				}
+
+				if (is_valid_sample)
+				{
+					so *= g_fResMultiplier;
+					float weight = GaussianApprox(so, FILTER_SCALE);
+					result_color += weight * sample_value;
+					result_depth += weight * sample_depth;
+					result_depth_sqr += weight * sample_depth*sample_depth;
+					total_weight += weight;
+				}
+			}
+		}
+	}
+
+	RESOLVE_OUTPUT output;
+	output.color = (total_weight > 0.0f) ? result_color/total_weight : float3(0.f, 0.f, 0.f);
+	output.depth = (total_weight > 0.0f) ? float2(result_depth, result_depth_sqr)/total_weight : 1.0f;
+	return output;
+}
diff --git a/src/shaders/ShaderCommon.h b/src/shaders/ShaderCommon.h
new file mode 100644
index 0000000..f4b8f80
--- /dev/null
+++ b/src/shaders/ShaderCommon.h
@@ -0,0 +1,265 @@
+// This code contains NVIDIA Confidential Information and is disclosed
+// under the Mutual Non-Disclosure Agreement.
+//
+// Notice
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
+//
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless
+// expressly authorized by NVIDIA.  Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (C) 2013, NVIDIA Corporation. All rights reserved.
+
+/*===========================================================================
+Constants
+===========================================================================*/
+
+static const float PI = 3.1415926535898f;
+static const float EDGE_FACTOR = 1.0f - (2.0f/64.0f) * (1.0f/64.0f);
+static const uint MAX_PHASE_TERMS = 4;
+
+#ifdef __PSSL__
+static const float2 SAMPLE_POSITIONS[] = {
+	// 1x
+	float2( 0, 0)/16.f,
+	// 2x
+	float2(-4, 4)/16.f,
+	float2( 4,-4)/16.f,
+	// 4x
+	float2(-6, 6)/16.f,
+	float2( 6,-6)/16.f,
+	float2(-2,-2)/16.f,
+	float2( 2, 2)/16.f,
+	// 8x
+	float2(-7,-3)/16.f,
+	float2( 7, 3)/16.f,
+	float2( 1,-5)/16.f,
+	float2(-5, 5)/16.f,
+	float2(-3,-7)/16.f,
+	float2( 3, 7)/16.f,
+	float2( 5,-1)/16.f,
+	float2(-1, 1)/16.f
+};
+
+// constant buffers
+#define cbuffer ConstantBuffer
+
+// textures and samplers
+#define Texture2DMS MS_Texture2D
+#define Texture2DArray Texture2D_Array
+#define SampleLevel SampleLOD
+#define GetSamplePosition(s) GetSamplePoint(s)
+
+// semantics
+#define SV_DEPTH S_DEPTH_OUTPUT
+#define SV_DOMAINLOCATION S_DOMAIN_LOCATION
+#define SV_INSIDETESSFACTOR S_INSIDE_TESS_FACTOR
+#define SV_INSTANCEID S_INSTANCE_ID
+#define SV_ISFRONTFACE S_FRONT_FACE
+#define SV_OUTPUTCONTROLPOINTID S_OUTPUT_CONTROL_POINT_ID
+#define SV_POSITION S_POSITION
+#define SV_POSITION S_POSITION
+#define SV_PRIMITIVEID S_PRIMITIVE_ID
+#define SV_SAMPLEINDEX S_SAMPLE_INDEX
+#define SV_TARGET S_TARGET_OUTPUT
+#define SV_TARGET0 S_TARGET_OUTPUT0
+#define SV_TARGET1 S_TARGET_OUTPUT1
+#define SV_TESSFACTOR S_EDGE_TESS_FACTOR
+#define SV_VERTEXID S_VERTEX_ID
+
+// hull and domain shader properties
+#define domain DOMAIN_PATCH_TYPE
+#define partitioning PARTITIONING_TYPE
+#define outputtopology OUTPUT_TOPOLOGY_TYPE
+#define outputcontrolpoints OUTPUT_CONTROL_POINTS
+#define patchconstantfunc PATCH_CONSTANT_FUNC
+#define maxtessfactor MAX_TESS_FACTOR
+
+// need to figure out how to deal with those exactly:
+#define shared
+#endif
+
+/*===========================================================================
+Sampler states
+===========================================================================*/
+SamplerState 	 sPoint : register(s0);
+SamplerState 	 sBilinear : register(s1);
+
+/*===========================================================================
+Constant buffers
+===========================================================================*/
+shared cbuffer cbContext : register(b0)
+{
+	float2 g_vOutputSize				: packoffset(c0);
+	float2 g_vOutputSize_Inv			: packoffset(c0.z);
+	float2 g_vBufferSize				: packoffset(c1);
+	float2 g_vBufferSize_Inv			: packoffset(c1.z);
+	float g_fResMultiplier				: packoffset(c2);
+	unsigned int g_uBufferSamples		: packoffset(c2.y);
+}
+
+shared cbuffer cbFrame : register(b1)
+{
+	column_major float4x4 g_mProj		: packoffset(c0);
+	column_major float4x4 g_mViewProj	: packoffset(c4);
+    column_major float4x4 g_mViewProjInv: packoffset(c8);
+    float2 g_vOutputViewportSize        : packoffset(c12);
+    float2 g_vOutputViewportSize_Inv    : packoffset(c12.z);
+    float2 g_vViewportSize              : packoffset(c13);
+    float2 g_vViewportSize_Inv          : packoffset(c13.z);
+    float3 g_vEyePosition				: packoffset(c14);
+	float2 g_vJitterOffset				: packoffset(c15);
+	float g_fZNear						: packoffset(c15.z);
+	float g_fZFar						: packoffset(c15.w);
+    float3 g_vScatterPower              : packoffset(c16);
+    unsigned int g_uNumPhaseTerms       : packoffset(c16.w);
+    float3 g_vSigmaExtinction           : packoffset(c17);
+    unsigned int g_uPhaseFunc[4]		: packoffset(c18);
+    float4 g_vPhaseParams[4]			: packoffset(c22);
+};
+
+shared cbuffer cbVolume : register(b2)
+{
+	column_major float4x4 g_mLightToWorld	: packoffset(c0);
+	float g_fLightFalloffAngle				: packoffset(c4.x);
+	float g_fLightFalloffPower				: packoffset(c4.y);
+	float g_fGridSectionSize				: packoffset(c4.z);
+	float g_fLightToEyeDepth				: packoffset(c4.w);
+    float g_fLightZNear                     : packoffset(c5);
+    float g_fLightZFar                      : packoffset(c5.y);
+	float4 g_vLightAttenuationFactors		: packoffset(c6);
+	column_major float4x4 g_mLightProj[4]	: packoffset(c7);
+	column_major float4x4 g_mLightProjInv[4]: packoffset(c23);
+	float3 g_vLightDir						: packoffset(c39);
+	float g_fGodrayBias						: packoffset(c39.w);
+	float3 g_vLightPos						: packoffset(c40);
+    unsigned int g_uMeshResolution          : packoffset(c40.w);
+	float3 g_vLightIntensity				: packoffset(c41);
+	float g_fTargetRaySize					: packoffset(c41.w);
+	float4 g_vElementOffsetAndScale[4]		: packoffset(c42); 
+	float4 g_vShadowMapDim					: packoffset(c46);
+	unsigned int g_uElementIndex[4]	        : packoffset(c47);
+};
+
+shared cbuffer cbApply : register(b3)
+{
+	column_major float4x4 g_mHistoryXform	: packoffset(c0);	
+	float g_fFilterThreshold				: packoffset(c4);
+	float g_fHistoryFactor					: packoffset(c4.y);
+	float3 g_vFogLight						: packoffset(c5);
+	float g_fMultiScattering				: packoffset(c5.w);
+};
+
+/*===========================================================================
+Shader inputs
+===========================================================================*/
+struct VS_POLYGONAL_INPUT
+{
+	float4 vPos : POSITION;
+};
+
+struct HS_POLYGONAL_INPUT
+{
+	float4 vPos : SV_POSITION;
+	float4 vWorldPos : TEXCOORD0;
+	float4 vClipPos : TEXCOORD1;
+};
+
+struct HS_POLYGONAL_CONTROL_POINT_OUTPUT
+{
+	float4 vWorldPos : TEXCOORD0;
+	float4 vClipPos : TEXCOORD1;
+};
+
+struct HS_POLYGONAL_CONSTANT_DATA_OUTPUT
+{
+    float fEdges[4] : SV_TESSFACTOR;
+    float fInside[2] : SV_INSIDETESSFACTOR;
+    float debug[4] : TEXCOORD2;
+};
+
+struct PS_POLYGONAL_INPUT
+{
+    float4 vPos : SV_POSITION;
+    float4 vWorldPos : TEXCOORD0;
+#ifdef __PSSL__
+	float dummy : CLIPPPOSDUMMY;  //Workaround for compiler exception in polygon hull shaders.
+#endif
+};
+
+struct VS_QUAD_OUTPUT
+{
+	float4 vPos : SV_POSITION;
+	sample float4 vWorldPos : TEXCOORD0;
+	sample float2 vTex : TEXCOORD1;
+};
+
+/*===========================================================================
+Common functions
+===========================================================================*/
+
+float LinearizeDepth(float d, float zn, float zf)
+{
+	return d * zn / (zf - ((zf - zn) * d));
+}
+
+float WarpDepth(float z, float zn, float zf)
+{
+	return z * (1+zf/zn) / (1+z*zf/zn);
+}
+
+float MapDepth(float d, float zn, float zf)
+{
+	return (d - zn) / (zf - zn);
+}
+
+// Approximates a non-normalized gaussian with Sigma == 1
+float GaussianApprox(float2 sample_pos, float width)
+{
+	float x_sqr = sample_pos.x*sample_pos.x + sample_pos.y*sample_pos.y;
+	// exp(-0.5*(x/w)^2) ~ (1-(x/(8*w))^2)^32
+	float w = saturate(1.0f - x_sqr/(64.0f * width*width));
+	w = w*w;	// ^2
+	w = w*w;	// ^4
+	w = w*w;	// ^8
+	w = w*w;	// ^16
+	w = w*w;	// ^32
+	return w;
+}
+
+#if defined(ATTENUATIONMODE)
+float AttenuationFunc(float d)
+{
+    if (ATTENUATIONMODE == ATTENUATIONMODE_POLYNOMIAL)
+    {
+        // 1-(A+Bx+Cx^2)
+        return saturate(1.0f - (g_vLightAttenuationFactors.x + g_vLightAttenuationFactors.y*d + g_vLightAttenuationFactors.z*d*d));
+    }
+    else if (ATTENUATIONMODE == ATTENUATIONMODE_INV_POLYNOMIAL)
+    {
+        // 1 / (A+Bx+Cx^2) + D
+        return saturate(1.0f / (g_vLightAttenuationFactors.x + g_vLightAttenuationFactors.y*d + g_vLightAttenuationFactors.z*d*d) + g_vLightAttenuationFactors.w);
+    }
+    else //if (ATTENUATIONMODE == ATTENUATIONMODE_NONE)
+    {
+        return 1.0f;
+    }
+}
+#endif
+
+float3 GetPhaseFactor(Texture2D tex, float cos_theta)
+{
+    float2 tc;
+    tc.x = 0;
+    tc.y = acos(clamp(-cos_theta, -1.0f, 1.0f)) / PI;
+    return g_vScatterPower*tex.SampleLevel(sBilinear, tc, 0).rgb;
+}
diff --git a/src/shaders/TemporalFilter_PS.hlsl b/src/shaders/TemporalFilter_PS.hlsl
new file mode 100644
index 0000000..082e577
--- /dev/null
+++ b/src/shaders/TemporalFilter_PS.hlsl
@@ -0,0 +1,207 @@
+// This code contains NVIDIA Confidential Information and is disclosed 
+// under the Mutual Non-Disclosure Agreement. 
+// 
+// Notice 
+// ALL NVIDIA DESIGN SPECIFICATIONS AND CODE ("MATERIALS") ARE PROVIDED "AS IS" NVIDIA MAKES 
+// NO REPRESENTATIONS, WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO 
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ANY IMPLIED WARRANTIES OF NONINFRINGEMENT, 
+// MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. 
+// 
+// NVIDIA Corporation assumes no responsibility for the consequences of use of such 
+// information or for any infringement of patents or other rights of third parties that may 
+// result from its use. No license is granted by implication or otherwise under any patent 
+// or patent rights of NVIDIA Corporation. No third party distribution is allowed unless 
+// expressly authorized by NVIDIA.  Details are subject to change without notice. 
+// This code supersedes and replaces all information previously supplied. 
+// NVIDIA Corporation products are not authorized for use as critical 
+// components in life support devices or systems without express written approval of 
+// NVIDIA Corporation. 
+// 
+// Copyright (c) 2003 - 2016 NVIDIA Corporation. All rights reserved.
+//
+// NVIDIA Corporation and its licensors retain all intellectual property and proprietary
+// rights in and to this software and related documentation and any modifications thereto.
+// Any use, reproduction, disclosure or distribution of this software and related
+// documentation without an express license agreement from NVIDIA Corporation is
+// strictly prohibited.
+//
+
+/*
+Define the shader permutations for code generation
+%% MUX_BEGIN %%
+
+%% MUX_END %%
+*/
+
+#include "ShaderCommon.h"
+
+Texture2D<float4> tCurrBuffer : register(t0);
+Texture2D<float4> tLastBuffer : register(t1);
+Texture2D<float2> tCurrDepth : register(t2);
+Texture2D<float2> tLastDepth : register(t3);
+
+static const int2 NEIGHBOR_OFFSETS[] = {
+	int2(-1, -1),	int2( 0, -1),	int2( 1, -1),
+	int2(-1,  0),	int2( 0,  0),	int2( 1,  0),
+	int2(-1,  1),	int2( 0,  1),	int2( 1,  1)
+};
+
+#if 1
+static const float NEIGHBOR_WEIGHTS[] = {
+	0.015625f,	0.125000f,	0.015625f,
+	0.125000f,	1.000000f,	0.125000f,
+	0.015625f,	0.125000f,	0.015625f,
+};
+#else
+static const float NEIGHBOR_WEIGHTS[] = {
+	0, 0, 0,
+	0, 1, 0,
+	0, 0, 0,
+};
+#endif
+
+float RGB_to_Y (float3 input)
+{
+	return 0.50f*input.g + 0.25f*(input.r + input.b);
+}
+
+float3 RGB_to_YCoCg (float3 input)
+{
+	float3 ret;
+	float tmp = 0.25f*(input.r + input.b);
+	ret.x = 0.50f*input.g + tmp;
+	ret.y = 0.50f*(input.r - input.b);
+	ret.z = 0.50f*input.g - tmp;
+	return ret;
+}
+
+float3 YCoCg_to_RGB(float3 input)
+{
+	float3 ret;
+	float Y_val = input.x; float Co = input.y; float Cg = input.z;
+	float tmp = Y_val - Cg;
+	ret.r = tmp + Co;
+	ret.g = Y_val + Cg;
+	ret.b = tmp - Co;
+	return ret;
+}
+
+float3 Tonemap( float3 sample_rgb )
+{
+	sample_rgb = sample_rgb / (1 + sample_rgb);
+	return RGB_to_YCoCg(sample_rgb);
+}
+
+float3 Tonemap_Inv( float3 sample_YCoCg )
+{
+	float3 sample_rgb = YCoCg_to_RGB(sample_YCoCg);
+	return sample_rgb / (1 - sample_rgb);
+}
+
+struct FILTER_OUTPUT
+{
+	float3 color : SV_TARGET0;
+	float2 depth : SV_TARGET1;
+};
+
+FILTER_OUTPUT main(VS_QUAD_OUTPUT input)
+{
+	FILTER_OUTPUT output;
+
+	// load neighbors
+	float3 curr_sample = float3(0,0,0);
+	float2 curr_depth = float2(0,0);
+	float neighborhood_bounds_max = 0;
+	float neighborhood_bounds_min = 0;
+	int2 max_dimensions = int2(g_vViewportSize);
+	int2 base_tc = int2(floor(input.vTex.xy*max_dimensions));
+	float total_weight = -1.0f;
+
+	[unroll]
+	for (int n=0; n<9; ++n)
+	{
+		int2 sample_tc = max( int2(0,0), min(max_dimensions, base_tc + NEIGHBOR_OFFSETS[n]));
+		float3 neighbor_sample = max(float3(0,0,0), tCurrBuffer.Load(int3(sample_tc, 0)).rgb);
+		float2 neighbor_depth = tCurrDepth.Load(int3(sample_tc, 0)).rg;
+		bool is_valid = all(isfinite(neighbor_sample.xyz));
+		if (is_valid)
+		{
+			neighbor_sample = Tonemap(neighbor_sample);
+			float weight = NEIGHBOR_WEIGHTS[n];
+			curr_sample += weight*neighbor_sample;
+			curr_depth += weight*neighbor_depth;
+			if (total_weight <= 0.0f)
+			{
+				neighborhood_bounds_max = neighbor_sample.x;
+				neighborhood_bounds_min = neighbor_sample.x;
+				total_weight = weight;
+			}
+			else
+			{
+				neighborhood_bounds_max = max(neighborhood_bounds_max, neighbor_sample.x);
+				neighborhood_bounds_min = min(neighborhood_bounds_min, neighbor_sample.x);
+				total_weight += weight;
+			}
+		}
+	}
+	curr_sample = (total_weight > 0) ? curr_sample/total_weight : float3(0,0,0);
+	curr_depth =  (total_weight > 0) ? curr_depth/total_weight : float2(1, 1);
+
+	// Transform and apply history
+	const float MAX_HISTORY_FACTOR = 0.98f;
+	float history_factor = g_fHistoryFactor;
+
+	float4 curr_clip;
+	curr_clip.xy = float2(2, -2) * input.vTex.xy + float2(-1, 1);
+	curr_clip.z = WarpDepth(curr_depth.x, g_fZNear, g_fZFar);
+	curr_clip.w = 1;
+	float4 last_clip = mul( g_mHistoryXform, curr_clip );
+	last_clip = last_clip/last_clip.w;
+
+	float2 last_tc = saturate((float2(0.5f, -0.5f)*last_clip.xy+float2(0.5f, 0.5f))) * max_dimensions;
+	float3 last_sample = tLastBuffer.Load(int3(last_tc, 0)).rgb;
+	float2 last_depth = tLastDepth.Load(int3(last_tc, 0)).rg;
+	last_sample = all(isfinite(last_sample)) ? Tonemap(last_sample) : curr_sample;
+
+	history_factor = all(abs(last_clip.xy) <= 1.0f) ? history_factor : 0.0f;
+
+	float2 clip_diff = (last_clip.xy - curr_clip.xy) * g_vViewportSize * g_vViewportSize_Inv.xx;
+	float clip_dist = length(clip_diff);
+	float movement_factor = saturate(1.0f - clip_dist/g_fFilterThreshold);
+	history_factor *= movement_factor*movement_factor*movement_factor;
+
+	float depth_diff = abs(curr_depth.r-last_depth.r);
+	float local_variance = abs(curr_depth.g - curr_depth.r*curr_depth.r) + abs(last_depth.g - last_depth.r*last_depth.r);
+	local_variance = max(local_variance, 0.0001f);
+#if 0
+	float local_stddev = sqrt(local_variance);
+	float depth_factor = saturate(depth_diff-local_stddev);
+	depth_factor = local_stddev / (local_stddev + depth_factor);
+#else
+	float depth_factor = saturate(depth_diff-local_variance);
+	depth_factor = local_variance / (local_variance + depth_factor);
+#endif
+	history_factor *= depth_factor;
+
+	// threshold based on neighbors
+	// Convert to Y Co Cg, then clip to bounds of neighborhood
+	float3 blended_sample = curr_sample;
+	float2 blended_depth = curr_depth;
+	if (history_factor > 0.0f)
+	{
+		const float CLIP_EPSILON = 0.0001f;
+		float3 clip_vec = last_sample - curr_sample;
+		float clamped_Y = max(neighborhood_bounds_min, min(neighborhood_bounds_max, last_sample.x));
+		float clip_factor_Y = (abs(clip_vec.x) > CLIP_EPSILON) ? abs((clamped_Y-curr_sample.x) / clip_vec.x) : 1.0f;
+		float clip_factor = clip_factor_Y;
+		float3 clipped_history = curr_sample + clip_factor*clip_vec;
+
+		history_factor = min(history_factor, MAX_HISTORY_FACTOR);
+		blended_sample = lerp(curr_sample, clipped_history, history_factor);
+		blended_depth = lerp(curr_depth, last_depth, history_factor);
+	}
+
+	output.color = Tonemap_Inv(blended_sample);
+	output.depth = blended_depth;
+	return output;
+}