1 files changed, 1393 insertions, 1393 deletions
diff --git a/sp/src/materialsystem/stdshaders/macros.vsh b/sp/src/materialsystem/stdshaders/macros.vsh
index 9b19f777..d1116e52 100644
--- a/sp/src/materialsystem/stdshaders/macros.vsh
+++ b/sp/src/materialsystem/stdshaders/macros.vsh
@@ -1,1393 +1,1393 @@
-;------------------------------------
-; RULES FOR AUTHORING VERTEX SHADERS:
-;------------------------------------
-; - never use "def" . . .set constants in code instead. . our constant shadowing will break otherwise.
-;	(same goes for pixel shaders)
-; - use cN notation instead of c[N] notation. .makes grepping for registers easier.
-;   The only exception is c[a0.x+blah] where you have no choice.
-$g_NumRegisters = 12;
-
-; NOTE: These must match the same values in vsh_prep.pl!
-$vPos				= "v0";
-$vBoneWeights		= "v1";
-$vBoneIndices		= "v2";
-$vNormal			= "v3";
-$vColor				= "v5";
-$vSpecular			= "v6";
-$vTexCoord0			= "v7";
-$vTexCoord1			= "v8";
-$vTexCoord2			= "v9";
-$vTexCoord3			= "v10";
-$vTangentS			= "v11";
-$vTangentT			= "v12";
-$vUserData			= "v14";
-
-if( $g_dx9 )
-{
-	if( $g_usesPos )
-	{
-		dcl_position $vPos;
-	}
-	
-	if( $g_usesBoneWeights )
-	{
-		dcl_blendweight $vBoneWeights;
-	}
-	if( $g_usesBoneIndices )
-	{
-		dcl_blendindices $vBoneIndices;
-	}
-	if( $g_usesNormal )
-	{
-		dcl_normal $vNormal;
-	}
-	if( $g_usesColor )
-	{
-		dcl_color0 $vColor;
-	}
-	if( $g_usesSpecular )
-	{
-		dcl_color1 $vSpecular;
-	}
-	if( $g_usesTexCoord0 )
-	{
-		dcl_texcoord0 $vTexCoord0;
-	}
-	if( $g_usesTexCoord1 )
-	{
-		dcl_texcoord1 $vTexCoord1;
-	}
-	if( $g_usesTexCoord2 )
-	{
-		dcl_texcoord2 $vTexCoord2;
-	}
-	if( $g_usesTexCoord3 )
-	{
-		dcl_texcoord3 $vTexCoord3;
-	}
-	if( $g_usesTangentS )
-	{
-		dcl_tangent $vTangentS;
-	}
-	if( $g_usesTangentT )
-	{
-		dcl_binormal0 $vTangentT;
-	}
-	if( $g_usesUserData )
-	{
-		dcl_tangent $vUserData;
-	}
-}
-
-# NOTE: These should match g_LightCombinations in vertexshaderdx8.cpp!
-# NOTE: Leave this on single lines or shit might blow up.
-@g_staticLightTypeArray = ( "none", "static", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "static", "static", "static", "static", "static", "static", "static", "static", "static", "static" );
-@g_ambientLightTypeArray = ( "none", "none", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient", 	"ambient", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient" );
-@g_localLightType1Array = ( "none", "none", "none", "spot", "point", "directional", "spot", "spot", "spot", "point", "point", "directional", "none", "spot", "point", "directional", "spot", "spot", "spot", "point", "point", "directional" );
-@g_localLightType2Array = ( "none", "none", "none", "none", "none", "none", "spot", "point", "directional", "point", "directional", "directional", "none", "none", "none", "none", "spot", "point", "directional", "point", "directional", "directional" );
-
-$cConstants0		= "c0";
-$cZero				= "c0.x";
-$cOne				= "c0.y";
-$cTwo				= "c0.z";
-$cHalf				= "c0.w";
-
-$cConstants1		    = "c1";
-$cOOGamma			    = "c1.x"; # 1/2.2
-$cOtherOverbrightFactor = "c1.y"; # overbright
-$cOneThird			    = "c1.z"; # 1/3
-$cOverbrightFactor      = "c1.w"; # 1/overbright
-
-$cEyePos			= "c2";
-$cWaterZ			= "c2.w";
-$cEyePosWaterZ		= "c2";
-
-$cLightIndex		= "c3";
-$cLight0Offset		= "c3.x"; # 27
-$cLight1Offset		= "c3.y"; # 32
-$cColorToIntScale	= "c3.z"; # matrix array offset = 3.0f * 255.0f + 0.01 (epsilon ensures floor yields desired result)
-$cModel0Index		= "c3.w"; # base for start of skinning matrices
-
-; NOTE: These must match the same values in vsh_prep.pl!
-$cModelViewProj0	= "c4";
-$cModelViewProj1	= "c5";
-$cModelViewProj2	= "c6";
-$cModelViewProj3	= "c7";
-
-$cViewProj0			= "c8";
-$cViewProj1			= "c9";
-$cViewProj2			= "c10";
-$cViewProj3			= "c11";
-
-; currently unused
-; c12, c13
-
-$SHADER_SPECIFIC_CONST_10 = "c14";
-$SHADER_SPECIFIC_CONST_11 = "c15";
-
-$cFogParams			= "c16";
-$cFogEndOverFogRange = "c16.x";
-$cFogOne			= "c16.y";
-$cFogMaxDensity		= "c16.z";
-$cOOFogRange		= "c16.w"; # (1/(fogEnd-fogStart))
-
-$cViewModel0		= "c17";
-$cViewModel1		= "c18";
-$cViewModel2		= "c19";
-$cViewModel3		= "c20";
-
-$cAmbientColorPosX	= "c21";
-$cAmbientColorNegX	= "c22";
-$cAmbientColorPosY	= "c23";
-$cAmbientColorNegY	= "c24";
-$cAmbientColorPosZ	= "c25";
-$cAmbientColorNegZ	= "c26";
-
-$cAmbientColorPosXOffset = "21";
-$cAmbientColorPosYOffset = "23";
-$cAmbientColorPosZOffset = "25";
-
-$cLight0DiffColor	= "c27";
-$cLight0Dir			= "c28";
-$cLight0Pos			= "c29";
-$cLight0SpotParams  = "c30"; # [ exponent, stopdot, stopdot2, 1 / (stopdot - stopdot2)
-$cLight0Atten		= "c31"; # [ constant, linear, quadratic, 0.0f ]
-
-$cLight1DiffColor	= "c32";
-$cLight1Dir			= "c33";
-$cLight1Pos			= "c34";
-$cLight1SpotParams  = "c35"; # [ exponent, stopdot, stopdot2, 1 / (stopdot - stopdot2)
-$cLight1Atten		= "c36"; # [ constant, linear, quadratic, 0.0f ]
-
-$cModulationColor	= "c37";
-
-$SHADER_SPECIFIC_CONST_0  = "c38";
-$SHADER_SPECIFIC_CONST_1  = "c39";
-$SHADER_SPECIFIC_CONST_2  = "c40";
-$SHADER_SPECIFIC_CONST_3  = "c41";
-$SHADER_SPECIFIC_CONST_4  = "c42";
-$SHADER_SPECIFIC_CONST_5  = "c43";
-$SHADER_SPECIFIC_CONST_6  = "c44";
-$SHADER_SPECIFIC_CONST_7  = "c45";
-$SHADER_SPECIFIC_CONST_8  = "c46";
-$SHADER_SPECIFIC_CONST_9  = "c47";
-; $SHADER_SPECIFIC_CONST_10 is c14
-; $SHADER_SPECIFIC_CONST_11 is c15
-
-; There are 16 model matrices for skinning
-; NOTE: These must match the same values in vsh_prep.pl!
-$cModel0			= "c48";
-$cModel1			= "c49";
-$cModel2			= "c50";
-
-sub OutputUsedRegisters
-{
-	local( $i );
-	; USED REGISTERS
-	for( $i = 0; $i < $g_NumRegisters; $i++ )
-	{
-		if( $g_allocated[$i] )
-		{
-			; $g_allocatedname[$i] = r$i
-		}
-	}
-	;
-}
-
-sub AllocateRegister
-{
-	local( *reg ) = shift;
-	local( $regname ) = shift;
-	local( $i );
-	for( $i = 0; $i < $g_NumRegisters; $i++ )
-	{
-		if( !$g_allocated[$i] )
-		{
-			$g_allocated[$i] = 1;
-			$g_allocatedname[$i] = $regname;
-			; AllocateRegister $regname = r$i
-			$reg = "r$i";
-			&OutputUsedRegisters();
-			return;
-		}
-	}
-	; Out of registers allocating $regname!
-	$reg = "rERROR_OUT_OF_REGISTERS";
-	&OutputUsedRegisters();
-}
-
-; pass in a reference to a var that contains a register. . ie \$var where var will constain "r1", etc
-sub FreeRegister
-{
-	local( *reg ) = shift;
-	local( $regname ) = shift;
-	; FreeRegister $regname = $reg
-	if( $reg =~ m/rERROR_DEALLOCATED/ )
-	{
-		; $regname already deallocated
-		; $reg = "rALREADY_DEALLOCATED";
-		&OutputUsedRegisters();
-		return;
-	}
-;	if( $regname ne g_allocatedname[$reg] )
-;	{
-;		; Error freeing $reg
-;		mov compileerror, freed unallocated register $regname
-;	}
-
-	if( ( $reg =~ m/r(.*)/ ) )
-	{
-		$g_allocated[$1] = 0;
-	}
-	$reg = "rERROR_DEALLOCATED";
-	&OutputUsedRegisters();
-}
-
-sub CheckUnfreedRegisters()
-{
-	local( $i );
-	for( $i = 0; $i < $g_NumRegisters; $i++ )
-	{
-		if( $g_allocated[$i] )
-		{
-			print "ERROR: r$i allocated to $g_allocatedname[$i] at end of program\n";
-			$g_allocated[$i] = 0;
-		}
-	}
-}
-
-sub Normalize
-{
-	local( $r ) = shift;
-	dp3 $r.w, $r, $r
-	rsq $r.w, $r.w
-	mul $r, $r, $r.w
-}
-
-sub Cross
-{
-	local( $result ) = shift;
-	local( $a ) = shift;
-	local( $b ) = shift;
-
-	mul $result.xyz, $a.yzx, $b.zxy
-	mad $result.xyz, -$b.yzx, $a.zxy, $result
-}
-
-sub RangeFog
-{
-	local( $projPos ) = shift;
-	
-	;------------------------------
-	; Regular range fog
-	;------------------------------
-
-	; oFog.x = 1.0f = no fog
-	; oFog.x = 0.0f = full fog
-	; compute fog factor f = (fog_end - dist)*(1/(fog_end-fog_start))
-	; this is == to: (fog_end/(fog_end-fog_start) - dist/(fog_end-fog_start)
-	; which can be expressed with a single mad instruction!
-
-	; Compute |projPos|
-	local( $tmp );
-	&AllocateRegister( \$tmp );
-	dp3 $tmp.x, $projPos.xyw, $projPos.xyw
-	rsq $tmp.x, $tmp.x
-	rcp $tmp.x, $tmp.x
-
-	if( $g_dx9 )
-	{
-		mad $tmp, -$tmp.x, $cOOFogRange, $cFogEndOverFogRange
-		min $tmp, $tmp, $cOne
-		max oFog, $tmp.x, $cFogMaxDensity
-	}
-	else
-	{
-		mad $tmp, -$tmp.x, $cOOFogRange, $cFogEndOverFogRange
-		min $tmp, $tmp, $cOne
-		max oFog.x, $tmp.x, $cFogMaxDensity
-	}
-	&FreeRegister( \$tmp );
-}
-
-sub DepthFog
-{
-	local( $projPos ) = shift;
-	local( $dest ) = shift;
-
-	if ( $dest eq "" )
-	{
-		$dest = "oFog";
-	}
-
-	;------------------------------
-	; Regular range fog
-	;------------------------------
-
-	; oFog.x = 1.0f = no fog
-	; oFog.x = 0.0f = full fog
-	; compute fog factor f = (fog_end - dist)*(1/(fog_end-fog_start))
-	; this is == to: (fog_end/(fog_end-fog_start) - dist/(fog_end-fog_start)
-	; which can be expressed with a single mad instruction!
-
-	; Compute |projPos|
-	local( $tmp );
-	&AllocateRegister( \$tmp );
-
-	if( $g_dx9 )
-	{
-		mad $tmp, -$projPos.w, $cOOFogRange, $cFogEndOverFogRange
-		min $tmp, $tmp, $cOne
-		max $dest, $tmp.x, $cFogMaxDensity
-	}
-	else
-	{
-		mad $tmp, -$projPos.w, $cOOFogRange, $cFogEndOverFogRange
-		min $tmp, $tmp, $cOne
-		max $dest.x, $tmp.x, $cFogMaxDensity
-	}
-
-	&FreeRegister( \$tmp );
-}
-
-sub WaterRangeFog
-{
-	; oFog.x = 1.0f = no fog
-	; oFog.x = 0.0f = full fog
-
-	; only $worldPos.z is used out of worldPos
-	local( $worldPos ) = shift;
-	local( $projPos ) = shift;
-	
-	local( $tmp );
-	&AllocateRegister( \$tmp );
-
-	; This is simple similar triangles. Imagine a line passing from the point directly vertically
-	; and another line passing from the point to the eye position.
-	; Let d = total distance from point to the eye
-	; Let h = vertical distance from the point to the eye
-	; Let hw = vertical distance from the point to the water surface
-	; Let dw = distance from the point to a point on the water surface that lies along the ray from point to eye
-	; Therefore d/h = dw/hw by similar triangles, or dw = d * hw / h.
-	; d = |projPos|, h = eyepos.z - worldPos.z, hw = waterheight.z - worldPos.z, dw = what we solve for
-
-	; Now, tmp.x = hw, and tmp.y = h
-	add $tmp.xy, $cEyePosWaterZ.wz, -$worldPos.z
-
-	; if $tmp.x < 0, then set it to 0
-	; This is the equivalent of moving the vert to the water surface if it's above the water surface
-	max $tmp.x, $tmp.x, $cZero
-
-	; Compute 1 / |projPos| = 1/d
-	dp3 $tmp.z, $projPos.xyw, $projPos.xyw
-	rsq $tmp.z, $tmp.z
-
-	; Now we have h/d
-	mul $tmp.z, $tmp.z, $tmp.y
-
-	; Now we have d/h
-	rcp $tmp.w, $tmp.z
-	
-	; We finally have d * hw / h
-	; $tmp.w is now the distance that we see through water.
-	mul $tmp.w, $tmp.x, $tmp.w
-
-	if( $g_dx9 )
-	{
-		mad $tmp, -$tmp.w, $cOOFogRange, $cFogOne
-		min $tmp, $tmp, $cOne
-		max oFog, $tmp.x, $cFogMaxDensity
-	}
-	else
-	{
-		mad $tmp, -$tmp.w, $cOOFogRange, $cFogOne
-		min $tmp, $tmp, $cOne
-		max oFog.x, $tmp.x, $cFogMaxDensity
-	}
-
-	&FreeRegister( \$tmp );
-}
-
-sub WaterDepthFog
-{
-	; oFog.x = 1.0f = no fog
-	; oFog.x = 0.0f = full fog
-
-	; only $worldPos.z is used out of worldPos
-	local( $worldPos ) = shift;
-	local( $projPos ) = shift;
-	local( $dest ) = shift;
-	
-	if ( $dest eq "" )
-	{
-		$dest = "oFog";
-	}
-	
-	local( $tmp );
-	&AllocateRegister( \$tmp );
-
-	; This is simple similar triangles. Imagine a line passing from the point directly vertically
-	; and another line passing from the point to the eye position.
-	; Let d = total distance from point to the eye
-	; Let h = vertical distance from the point to the eye
-	; Let hw = vertical distance from the point to the water surface
-	; Let dw = distance from the point to a point on the water surface that lies along the ray from point to eye
-	; Therefore d/h = dw/hw by similar triangles, or dw = d * hw / h.
-	; d = projPos.w, h = eyepos.z - worldPos.z, hw = waterheight.z - worldPos.z, dw = what we solve for
-
-	; Now, tmp.x = hw, and tmp.y = h
-	add $tmp.xy, $cEyePosWaterZ.wz, -$worldPos.z
-
-	; if $tmp.x < 0, then set it to 0
-	; This is the equivalent of moving the vert to the water surface if it's above the water surface
-	max $tmp.x, $tmp.x, $cZero
-
-	; Now we have 1/h
-	rcp $tmp.z, $tmp.y
-
-	; Now we have d/h
-	mul $tmp.w, $projPos.w, $tmp.z
-
-	; We finally have d * hw / h
-	; $tmp.w is now the distance that we see through water.
-	mul $tmp.w, $tmp.x, $tmp.w
-
-	if( $g_dx9 )
-	{
-		mad $tmp, -$tmp.w, $cOOFogRange, $cFogOne
-		min $tmp, $tmp, $cOne
-		max $dest, $tmp.x, $cZero
-	}
-	else
-	{
-		mad $tmp, -$tmp.w, $cOOFogRange, $cFogOne
-		min $tmp, $tmp, $cOne
-		max $dest.x, $tmp.x, $cZero
-	}
-
-	&FreeRegister( \$tmp );
-}
-
-
-;------------------------------------------------------------------------------
-; Main fogging routine
-;------------------------------------------------------------------------------
-sub CalcFog
-{
-	if( !defined $DOWATERFOG )
-	{
-		die "CalcFog called without using \$DOWATERFOG\n";
-	}
-	my $fogType;
-	if( $DOWATERFOG == 0 )
-	{
-		$fogType = "rangefog";		
-	}
-	else
-	{
-		$fogType = "heightfog";
-	}
-
-#	print "\$fogType = $fogType\n";
-
-	; CalcFog
-	local( $worldPos ) = shift;
-	local( $projPos ) = shift;
-	local( $dest ) = shift;
-
-	if ( $dest eq "" )
-	{
-		$dest = "oFog";
-	}
-
-	if( $fogType eq "rangefog" )
-	{
-		&DepthFog( $projPos, $dest );
-	}
-	elsif( $fogType eq "heightfog" )
-	{
-		&WaterDepthFog( $worldPos, $projPos, $dest );
-	}
-	else
-	{
-		die;
-	}	
-}
-
-sub CalcRangeFog
-{
-	; CalcFog
-	local( $worldPos ) = shift;
-	local( $projPos ) = shift;
-
-	if( $DOWATERFOG == 0 )
-	{
-		&RangeFog( $projPos );
-	}
-	elsif( $DOWATERFOG == 1 )
-	{
-		&WaterRangeFog( $worldPos, $projPos );
-	}
-	else
-	{
-		die;
-	}	
-}
-
-sub GammaToLinear
-{
-	local( $gamma ) = shift;
-	local( $linear ) = shift;
-
-	local( $tmp );
-	&AllocateRegister( \$tmp );
-
-	; Is rcp more expensive than just storing 2.2 somewhere and doing a mov?
-	rcp $gamma.w, $cOOGamma							; $gamma.w = 2.2
-	lit $linear.z, $gamma.zzzw						; r0.z = linear blue
-	lit $tmp.z, $gamma.yyyw							; r2.z = linear green
-	mov $linear.y, $tmp.z							; r0.y = linear green
-	lit $tmp.z, $gamma.xxxw							; r2.z = linear red
-	mov $linear.x, $tmp.z							; r0.x = linear red
-
-	&FreeRegister( \$tmp );
-}
-
-sub LinearToGamma
-{
-	local( $linear ) = shift;
-	local( $gamma ) = shift;
-
-	local( $tmp );
-	&AllocateRegister( \$tmp );
-
-	mov $linear.w, $cOOGamma						; $linear.w = 1.0/2.2
-	lit $gamma.z, $linear.zzzw						; r0.z = gamma blue
-	lit $tmp.z, $linear.yyyw						; r2.z = gamma green
-	mov $gamma.y, $tmp.z							; r0.y = gamma green
-	lit $tmp.z, $linear.xxxw						; r2.z = gamma red
-	mov $gamma.x, $tmp.z							; r0.x = gamma red
-
-	&FreeRegister( \$tmp );
-}
-
-sub ComputeReflectionVector
-{
-	local( $worldPos ) = shift;
-	local( $worldNormal ) = shift;
-	local( $reflectionVector ) = shift;
-
-	local( $vertToEye ); &AllocateRegister( \$vertToEye );
-	local( $tmp ); &AllocateRegister( \$tmp );
-
-	; compute reflection vector r = 2 * (n dot v) n - v
-	sub $vertToEye.xyz, $cEyePos.xyz, $worldPos  ; $tmp1 = v = c - p
-	dp3 $tmp, $worldNormal, $vertToEye			; $tmp = n dot v
-	mul $tmp.xyz, $tmp.xyz, $worldNormal	; $tmp = (n dot v ) n
-	mad $reflectionVector.xyz, $tmp, $cTwo, -$vertToEye
-
-	&FreeRegister( \$vertToEye );
-	&FreeRegister( \$tmp );
-}
-
-sub ComputeSphereMapTexCoords
-{
-	local( $reflectionVector ) = shift;
-	local( $sphereMapTexCoords ) = shift;
-
-	local( $tmp ); &AllocateRegister( \$tmp );
-
-	; transform reflection vector into view space
-	dp3 $tmp.x, $reflectionVector, $cViewModel0
-	dp3 $tmp.y, $reflectionVector, $cViewModel1
-	dp3 $tmp.z, $reflectionVector, $cViewModel2
-
-	; generate <rx ry rz+1>
-	add $tmp.z, $tmp.z, $cOne
-
-	; find 1 / the length of r2
-	dp3 $tmp.w, $tmp, $tmp
-	rsq $tmp.w, $tmp.w
-
-	; r1 = r2/|r2| + 1
-	mad $tmp.xy, $tmp.w, $tmp, $cOne
-	mul $sphereMapTexCoords.xy, $tmp.xy, $cHalf
-	
-	&FreeRegister( \$tmp );
-}
-
-sub SkinPosition
-{
-#	print "\$SKINNING = $SKINNING\n";
-	local( $worldPos ) = shift;
-	
-	if( !defined $SKINNING )
-	{
-		die "using \$SKINNING without defining.\n";
-	}
-		
-	if( $SKINNING == 0 )
-	{
-		;
-		; 0 bone skinning (4 instructions)
-		;
-		; Transform position into world space
-		; position
-		dp4 $worldPos.x, $vPos, $cModel0
-		dp4 $worldPos.y, $vPos, $cModel1
-		dp4 $worldPos.z, $vPos, $cModel2
-		mov $worldPos.w, $cOne
-	} 
-	else
-	{
-		;
-		; 3 bone skinning  (19 instructions)
-		;
-		local( $boneIndices );
-		local( $blendedMatrix0 );
-		local( $blendedMatrix1 );
-		local( $blendedMatrix2 );
-		local( $localPos );
-		&AllocateRegister( \$boneIndices );
-		&AllocateRegister( \$blendedMatrix0 );
-		&AllocateRegister( \$blendedMatrix1 );
-		&AllocateRegister( \$blendedMatrix2 );
-
-		; Transform position into world space using all bones
-		; denormalize d3dcolor to matrix index
-		mad $boneIndices, $vBoneIndices, $cColorToIntScale, $cModel0Index
-		if ( $g_x360 )
-		{
-			mov $boneIndices, $boneIndices.zyxw
-		}
-		
-		; r11 = boneindices at this point
-		; first matrix
-		mov a0.x, $boneIndices.z
-		mul $blendedMatrix0, $vBoneWeights.x, c[a0.x]
-		mul $blendedMatrix1, $vBoneWeights.x, c[a0.x+1]
-		mul $blendedMatrix2, $vBoneWeights.x, c[a0.x+2]
-		; second matrix
-		mov a0.x, $boneIndices.y
-		mad $blendedMatrix0, $vBoneWeights.y, c[a0.x], $blendedMatrix0
-		mad $blendedMatrix1, $vBoneWeights.y, c[a0.x+1], $blendedMatrix1
-		mad $blendedMatrix2, $vBoneWeights.y, c[a0.x+2], $blendedMatrix2
-
-		; Calculate third weight
-		; compute 1-(weight1+weight2) to calculate weight2
-		; Use $boneIndices.w as a temp since we aren't using it for anything.
-		add $boneIndices.w, $vBoneWeights.x, $vBoneWeights.y
-		sub $boneIndices.w, $cOne, $boneIndices.w
-
-		; third matrix
-		mov a0.x, $boneIndices.x
-		mad $blendedMatrix0, $boneIndices.w, c[a0.x], $blendedMatrix0
-		mad $blendedMatrix1, $boneIndices.w, c[a0.x+1], $blendedMatrix1
-		mad $blendedMatrix2, $boneIndices.w, c[a0.x+2], $blendedMatrix2
-		
-		dp4 $worldPos.x, $vPos, $blendedMatrix0
-		dp4 $worldPos.y, $vPos, $blendedMatrix1
-		dp4 $worldPos.z, $vPos, $blendedMatrix2
-		mov $worldPos.w, $cOne
-
-		&FreeRegister( \$boneIndices );
-		&FreeRegister( \$blendedMatrix0 );
-		&FreeRegister( \$blendedMatrix1 );
-		&FreeRegister( \$blendedMatrix2 );
-	}
-}
-
-
-sub SkinPositionAndNormal
-{
-#	print "\$SKINNING = $SKINNING\n";
-	local( $worldPos ) = shift;
-	local( $worldNormal ) = shift;
-
-	if( !defined $SKINNING )
-	{
-		die "using \$SKINNING without defining.\n";
-	}
-
-	if( $SKINNING == 0 )
-	{
-		;
-		; 0 bone skinning (13 instructions)
-		;
-		; Transform position + normal + tangentS + tangentT into world space
-		; position
-		dp4 $worldPos.x, $vPos, $cModel0
-		dp4 $worldPos.y, $vPos, $cModel1
-		dp4 $worldPos.z, $vPos, $cModel2
-		mov $worldPos.w, $cOne
-		; normal
-		dp3 $worldNormal.x, $vNormal, $cModel0
-		dp3 $worldNormal.y, $vNormal, $cModel1
-		dp3 $worldNormal.z, $vNormal, $cModel2
-	}
-	else
-	{
-		local( $boneIndices );
-		local( $blendedMatrix0 );
-		local( $blendedMatrix1 );
-		local( $blendedMatrix2 );
-		local( $localPos );
-		local( $localNormal );
-		local( $normalLength );
-		local( $ooNormalLength );
-		&AllocateRegister( \$boneIndices );
-		&AllocateRegister( \$blendedMatrix0 );
-		&AllocateRegister( \$blendedMatrix1 );
-		&AllocateRegister( \$blendedMatrix2 );
-
-		; Transform position into world space using all bones
-		; denormalize d3dcolor to matrix index
-		mad $boneIndices, $vBoneIndices, $cColorToIntScale, $cModel0Index
-		if ( $g_x360 )
-		{
-			mov $boneIndices, $boneIndices.zyxw
-		}
-
-		; r11 = boneindices at this point
-		; first matrix
-		mov a0.x, $boneIndices.z
-		mul $blendedMatrix0, $vBoneWeights.x, c[a0.x]
-		mul $blendedMatrix1, $vBoneWeights.x, c[a0.x+1]
-		mul $blendedMatrix2, $vBoneWeights.x, c[a0.x+2]
-		; second matrix
-		mov a0.x, $boneIndices.y
-		mad $blendedMatrix0, $vBoneWeights.y, c[a0.x], $blendedMatrix0
-		mad $blendedMatrix1, $vBoneWeights.y, c[a0.x+1], $blendedMatrix1
-		mad $blendedMatrix2, $vBoneWeights.y, c[a0.x+2], $blendedMatrix2
-
-		; Calculate third weight
-		; compute 1-(weight1+weight2) to calculate weight2
-		; Use $boneIndices.w as a temp since we aren't using it for anything.
-		add $boneIndices.w, $vBoneWeights.x, $vBoneWeights.y
-		sub $boneIndices.w, $cOne, $boneIndices.w
-
-		; third matrix
-		mov a0.x, $boneIndices.x
-		mad $blendedMatrix0, $boneIndices.w, c[a0.x], $blendedMatrix0
-		mad $blendedMatrix1, $boneIndices.w, c[a0.x+1], $blendedMatrix1
-		mad $blendedMatrix2, $boneIndices.w, c[a0.x+2], $blendedMatrix2
-		
-		dp4 $worldPos.x, $vPos, $blendedMatrix0
-		dp4 $worldPos.y, $vPos, $blendedMatrix1
-		dp4 $worldPos.z, $vPos, $blendedMatrix2
-		mov $worldPos.w, $cOne
-
-		; normal
-		dp3 $worldNormal.x, $vNormal, $blendedMatrix0
-		dp3 $worldNormal.y, $vNormal, $blendedMatrix1
-		dp3 $worldNormal.z, $vNormal, $blendedMatrix2
-
-		&FreeRegister( \$boneIndices );
-		&FreeRegister( \$blendedMatrix0 );
-		&FreeRegister( \$blendedMatrix1 );
-		&FreeRegister( \$blendedMatrix2 );
-	}	
-}
-
-sub SkinPositionNormalAndTangentSpace
-{
-#	print "\$SKINNING = $SKINNING\n";
-	local( $worldPos ) = shift;
-	local( $worldNormal ) = shift;
-	local( $worldTangentS ) = shift;
-	local( $worldTangentT ) = shift;
-	local( $userData );
-	local( $localPos );
-	local( $localNormal );
-	local( $normalLength );
-	local( $ooNormalLength );
-	
-	if( !defined $SKINNING )
-	{
-		die "using \$SKINNING without defining.\n";
-	}
-
-# X360TBD: needed for compressed vertex format
-#	if ( $g_x360 )
-#	{
-#		&AllocateRegister( \$userData );
-#		; remap compressed range [0..1] to [-1..1]
-#		mad $userData, $vUserData, $cTwo, -$cOne
-#	}
-
-	if( $SKINNING == 0 )
-	{
-		;
-		; 0 bone skinning (13 instructions)
-		;
-		; Transform position + normal + tangentS + tangentT into world space
-		dp4 $worldPos.x, $vPos, $cModel0
-		dp4 $worldPos.y, $vPos, $cModel1
-		dp4 $worldPos.z, $vPos, $cModel2
-		mov $worldPos.w, $cOne
-
-		; normal
-		dp3 $worldNormal.x, $vNormal, $cModel0
-		dp3 $worldNormal.y, $vNormal, $cModel1
-		dp3 $worldNormal.z, $vNormal, $cModel2
-
-# X360TBD: needed for compressed vertex format
-#		if ( $g_x360 )
-#		{
-#			; tangents
-#			dp3 $worldTangentS.x, $userData, $cModel0
-#			dp3 $worldTangentS.y, $userData, $cModel1
-#			dp3 $worldTangentS.z, $userData, $cModel2
-#
-#			; calculate tangent t via cross( N, S ) * S[3]
-#			&Cross( $worldTangentT, $worldNormal, $worldTangentS );
-#			mul $worldTangentT.xyz, $userData.w, $worldTangentT.xyz
-#		}
-#		else
-		{
-			; tangents
-			dp3 $worldTangentS.x, $vUserData, $cModel0
-			dp3 $worldTangentS.y, $vUserData, $cModel1
-			dp3 $worldTangentS.z, $vUserData, $cModel2
-
-			; calculate tangent t via cross( N, S ) * S[3]
-			&Cross( $worldTangentT, $worldNormal, $worldTangentS );
-			mul $worldTangentT.xyz, $vUserData.w, $worldTangentT.xyz
-		}
-	}
-	else
-	{
-		local( $boneIndices );
-		local( $blendedMatrix0 );
-		local( $blendedMatrix1 );
-		local( $blendedMatrix2 );
-		&AllocateRegister( \$boneIndices );
-		&AllocateRegister( \$blendedMatrix0 );
-		&AllocateRegister( \$blendedMatrix1 );
-		&AllocateRegister( \$blendedMatrix2 );
-
-		; Transform position into world space using all bones
-		; denormalize d3dcolor to matrix index
-		mad $boneIndices, $vBoneIndices, $cColorToIntScale, $cModel0Index
-		if ( $g_x360 )
-		{
-			mov $boneIndices, $boneIndices.zyxw
-		}
-
-		; r11 = boneindices at this point
-		; first matrix
-		mov a0.x, $boneIndices.z
-		mul $blendedMatrix0, $vBoneWeights.x, c[a0.x]
-		mul $blendedMatrix1, $vBoneWeights.x, c[a0.x+1]
-		mul $blendedMatrix2, $vBoneWeights.x, c[a0.x+2]
-		; second matrix
-		mov a0.x, $boneIndices.y
-		mad $blendedMatrix0, $vBoneWeights.y, c[a0.x], $blendedMatrix0
-		mad $blendedMatrix1, $vBoneWeights.y, c[a0.x+1], $blendedMatrix1
-		mad $blendedMatrix2, $vBoneWeights.y, c[a0.x+2], $blendedMatrix2
-
-		; Calculate third weight
-		; compute 1-(weight1+weight2) to calculate weight2
-		; Use $boneIndices.w as a temp since we aren't using it for anything.
-		add $boneIndices.w, $vBoneWeights.x, $vBoneWeights.y
-		sub $boneIndices.w, $cOne, $boneIndices.w
-
-		; third matrix
-		mov a0.x, $boneIndices.x
-		mad $blendedMatrix0, $boneIndices.w, c[a0.x], $blendedMatrix0
-		mad $blendedMatrix1, $boneIndices.w, c[a0.x+1], $blendedMatrix1
-		mad $blendedMatrix2, $boneIndices.w, c[a0.x+2], $blendedMatrix2
-		
-		; position
-		dp4 $worldPos.x, $vPos, $blendedMatrix0
-		dp4 $worldPos.y, $vPos, $blendedMatrix1
-		dp4 $worldPos.z, $vPos, $blendedMatrix2
-		mov $worldPos.w, $cOne
-
-		; normal
-		dp3 $worldNormal.x, $vNormal, $blendedMatrix0
-		dp3 $worldNormal.y, $vNormal, $blendedMatrix1
-		dp3 $worldNormal.z, $vNormal, $blendedMatrix2
-
-# X360TBD: needed for compressed vertex format
-#		if ( $g_x360 )
-#		{
-#			; tangents
-#			dp3 $worldTangentS.x, $userData, $blendedMatrix0
-#			dp3 $worldTangentS.y, $userData, $blendedMatrix1
-#			dp3 $worldTangentS.z, $userData, $blendedMatrix2
-#
-#			; calculate tangent t via cross( N, S ) * S[3]
-#			&Cross( $worldTangentT, $worldNormal, $worldTangentS );
-#			mul $worldTangentT.xyz, $userData.w, $worldTangentT.xyz
-#		}
-#		else
-		{
-			; tangents
-			dp3 $worldTangentS.x, $vUserData, $blendedMatrix0
-			dp3 $worldTangentS.y, $vUserData, $blendedMatrix1
-			dp3 $worldTangentS.z, $vUserData, $blendedMatrix2
-
-			; calculate tangent t via cross( N, S ) * S[3]
-			&Cross( $worldTangentT, $worldNormal, $worldTangentS );
-			mul $worldTangentT.xyz, $vUserData.w, $worldTangentT.xyz
-		}
-
-		&FreeRegister( \$boneIndices );
-		&FreeRegister( \$blendedMatrix0 );
-		&FreeRegister( \$blendedMatrix1 );
-		&FreeRegister( \$blendedMatrix2 );
-	}
-
-# X360TBD: needed for compressed vertex format
-#	if ( $g_x360 )
-#	{
-#		&FreeRegister( \$userData );
-#	}
-}
-
-sub ColorClamp
-{
-	; ColorClamp; stomps $color.w
-	local( $color ) = shift;
-	local( $dst ) = shift;
-
-	; Get the max of RGB and stick it in W
-	max $color.w, $color.x, $color.y
-	max $color.w, $color.w, $color.z
-
-	; get the greater of one and the max color.
-	max $color.w, $color.w, $cOne
-
-	rcp $color.w, $color.w
-	mul $dst.xyz, $color.w, $color.xyz
-}
-
-sub AmbientLight
-{
-	local( $worldNormal ) = shift;
-	local( $linearColor ) = shift;
-	local( $add ) = shift;
-
-	; Ambient lighting
-	&AllocateRegister( \$nSquared );
-	&AllocateRegister( \$isNegative );
-
-	mul $nSquared.xyz, $worldNormal.xyz, $worldNormal.xyz				; compute n times n
-	slt $isNegative.xyz, $worldNormal.xyz, $cZero				; Figure out whether each component is >0
-	mov a0.x, $isNegative.x
-	if( $add )
-	{
-		mad $linearColor.xyz, $nSquared.x, c[a0.x + $cAmbientColorPosXOffset], $linearColor			; $linearColor = normal[0]*normal[0] * box color of appropriate x side
-	}
-	else
-	{
-		mul $linearColor.xyz, $nSquared.x, c[a0.x + $cAmbientColorPosXOffset]			; $linearColor = normal[0]*normal[0] * box color of appropriate x side
-	}
-	mov a0.x, $isNegative.y
-	mad $linearColor.xyz, $nSquared.y, c[a0.x + $cAmbientColorPosYOffset], $linearColor
-	mov a0.x, $isNegative.z
-	mad $linearColor.xyz, $nSquared.z, c[a0.x + $cAmbientColorPosZOffset], $linearColor
-
-	&FreeRegister( \$isNegative );
-	&FreeRegister( \$nSquared );
-}
-
-sub DirectionalLight
-{
-	local( $worldNormal ) = shift;
-	local( $linearColor ) = shift;
-	local( $add ) = shift;
-
-	&AllocateRegister( \$nDotL ); # FIXME: This only needs to be a scalar
-
-	; NOTE: Gotta use -l here, since light direction = -l
-	; DIRECTIONAL LIGHT
-	; compute n dot l
-	dp3 $nDotL.x, -c[a0.x + 1], $worldNormal
-	
-	if ( $HALF_LAMBERT == 0 )
-	{
-		; lambert
-		max $nDotL.x, $nDotL.x, c0.x				; Clamp to zero
-	}
-	elsif ( $HALF_LAMBERT == 1 )
-	{
-		; half-lambert
-		mad $nDotL.x, $nDotL.x, $cHalf, $cHalf		; dot = (dot * 0.5 + 0.5)^2
-		mul $nDotL.x, $nDotL.x, $nDotL.x
-	}
-	else
-	{
-		die "\$HALF_LAMBERT is hosed\n";
-	}
-  
-	if( $add )
-	{
-		mad $linearColor.xyz, c[a0.x], $nDotL.x, $linearColor
-	}
-	else
-	{
-		mul $linearColor.xyz, c[a0.x], $nDotL.x
-	}
-
-	&FreeRegister( \$nDotL );
-}
-
-sub PointLight
-{
-	local( $worldPos ) = shift;
-	local( $worldNormal ) = shift;
-	local( $linearColor ) = shift;
-	local( $add ) = shift;
-
-	local( $lightDir );
-	&AllocateRegister( \$lightDir );
-	
-	; POINT LIGHT
-	; compute light direction
-	sub $lightDir, c[a0.x+2], $worldPos
-	
-	local( $lightDistSquared );
-	local( $ooLightDist );
-	&AllocateRegister( \$lightDistSquared );
-	&AllocateRegister( \$ooLightDist );
-
-	; normalize light direction, maintain temporaries for attenuation
-	dp3 $lightDistSquared, $lightDir, $lightDir
-	rsq $ooLightDist, $lightDistSquared.x
-	mul $lightDir, $lightDir, $ooLightDist.x
-	
-	local( $attenuationFactors );
-	&AllocateRegister( \$attenuationFactors );
-
-	; compute attenuation amount (r2 = 'd*d d*d d*d d*d', r3 = '1/d 1/d 1/d 1/d')
-	dst $attenuationFactors, $lightDistSquared, $ooLightDist						; r4 = ( 1, d, d*d, 1/d )
-	&FreeRegister( \$lightDistSquared );
-	&FreeRegister( \$ooLightDist );
-	local( $attenuation );
-	&AllocateRegister( \$attenuation );
-	dp3 $attenuation, $attenuationFactors, c[a0.x+4]				; r3 = atten0 + d * atten1 + d*d * atten2
-
-	rcp $lightDir.w, $attenuation						; $lightDir.w = 1 / (atten0 + d * atten1 + d*d * atten2)
-
-	&FreeRegister( \$attenuationFactors );
-	&FreeRegister( \$attenuation );
-	
-	local( $tmp );
-	&AllocateRegister( \$tmp ); # FIXME : really only needs to be a scalar
-
-	; compute n dot l, fold in distance attenutation
-	dp3 $tmp.x, $lightDir, $worldNormal
-
-	if ( $HALF_LAMBERT == 0 )
-	{
-		; lambert
-		max $tmp.x, $tmp.x, c0.x				; Clamp to zero
-	}
-	elsif ( $HALF_LAMBERT == 1 )
-	{
-		; half-lambert
-		mad $tmp.x, $tmp.x, $cHalf, $cHalf		; dot = (dot * 0.5 + 0.5)^2
-		mul $tmp.x, $tmp.x, $tmp.x
-	}
-	else
-	{
-		die "\$HALF_LAMBERT is hosed\n";
-	}
-	
-	mul $tmp.x, $tmp.x, $lightDir.w
-	if( $add )
-	{
-		mad $linearColor.xyz, c[a0.x], $tmp.x, $linearColor
-	}
-	else
-	{
-		mul $linearColor.xyz, c[a0.x], $tmp.x
-	}
-
-	&FreeRegister( \$lightDir );
-	&FreeRegister( \$tmp ); # FIXME : really only needs to be a scalar
-}
-
-sub SpotLight
-{
-	local( $worldPos ) = shift;
-	local( $worldNormal ) = shift;
-	local( $linearColor ) = shift;
-	local( $add ) = shift;
-	
-	local( $lightDir );
-	&AllocateRegister( \$lightDir );
-
-	; SPOTLIGHT
-	; compute light direction
-	sub $lightDir, c[a0.x+2], $worldPos
-	
-	local( $lightDistSquared );
-	local( $ooLightDist );
-	&AllocateRegister( \$lightDistSquared );
-	&AllocateRegister( \$ooLightDist );
-
-	; normalize light direction, maintain temporaries for attenuation
-	dp3 $lightDistSquared, $lightDir, $lightDir
-	rsq $ooLightDist, $lightDistSquared.x
-	mul $lightDir, $lightDir, $ooLightDist.x
-	
-	local( $attenuationFactors );
-	&AllocateRegister( \$attenuationFactors );
-
-	; compute attenuation amount (r2 = 'd*d d*d d*d d*d', r3 = '1/d 1/d 1/d 1/d')
-	dst $attenuationFactors, $lightDistSquared, $ooLightDist						; r4 = ( 1, d, d*d, 1/d )
-
-	&FreeRegister( \$lightDistSquared );
-	&FreeRegister( \$ooLightDist );
-	local( $attenuation );	&AllocateRegister( \$attenuation );
-
-	dp3 $attenuation, $attenuationFactors, c[a0.x+4]				; r3 = atten0 + d * atten1 + d*d * atten2
-	rcp $lightDir.w, $attenuation						; r1.w = 1 / (atten0 + d * atten1 + d*d * atten2)
-
-	&FreeRegister( \$attenuationFactors );
-	&FreeRegister( \$attenuation );
-	
-	local( $litSrc ); &AllocateRegister( \$litSrc );
-	local( $tmp ); &AllocateRegister( \$tmp ); # FIXME - only needs to be scalar
-
-	; compute n dot l
-	dp3 $litSrc.x, $worldNormal, $lightDir
-	
-	if ( $HALF_LAMBERT == 0 )
-	{
-		; lambert
-		max $litSrc.x, $litSrc.x, c0.x				; Clamp to zero
-	}
-	elsif ( $HALF_LAMBERT == 1 )
-	{
-		; half-lambert
-		mad $litSrc.x, $litSrc.x, $cHalf, $cHalf	; dot = (dot * 0.5 + 0.5) ^ 2
-		mul $litSrc.x, $litSrc.x, $litSrc.x
-	}
-	else
-	{
-		die "\$HALF_LAMBERT is hosed\n";
-	}
-
-	; compute angular attenuation
-	dp3 $tmp.x, c[a0.x+1], -$lightDir				; dot = -delta * spot direction
-	sub $litSrc.y, $tmp.x, c[a0.x+3].z				; r2.y = dot - stopdot2
-	&FreeRegister( \$tmp );
-	mul $litSrc.y, $litSrc.y, c[a0.x+3].w			; r2.y = (dot - stopdot2) / (stopdot - stopdot2)
-	mov $litSrc.w, c[a0.x+3].x						; r2.w = exponent
-	local( $litDst ); &AllocateRegister( \$litDst );
-	lit $litDst, $litSrc							; r3.y = N dot L or 0, whichever is bigger
-	&FreeRegister( \$litSrc );
-													; r3.z = pow((dot - stopdot2) / (stopdot - stopdot2), exponent)
-	min $litDst.z, $litDst.z, $cOne		 			; clamp pow() to 1
-	
-	local( $tmp1 ); &AllocateRegister( \$tmp1 );
-	local( $tmp2 ); &AllocateRegister( \$tmp2 );  # FIXME - could be scalar
-
-	; fold in distance attenutation with other factors
-	mul $tmp1, c[a0.x], $lightDir.w
-	mul $tmp2.x, $litDst.y, $litDst.z
-	if( $add )
-	{
-		mad $linearColor.xyz, $tmp1, $tmp2.x, $linearColor
-	}
-	else
-	{
-		mul $linearColor.xyz, $tmp1, $tmp2.x
-	}
-
-	&FreeRegister( \$lightDir );
-	&FreeRegister( \$litDst );
-	&FreeRegister( \$tmp1 );
-	&FreeRegister( \$tmp2 );
-}
-
-sub DoLight
-{
-	local( $lightType ) = shift;
-	local( $worldPos ) = shift;
-	local( $worldNormal ) = shift;
-	local( $linearColor ) = shift;
-	local( $add ) = shift;
-
-	if( $lightType eq "spot" )
-	{
-		&SpotLight( $worldPos, $worldNormal, $linearColor, $add );
-	}
-	elsif( $lightType eq "point" )
-	{
-		&PointLight( $worldPos, $worldNormal, $linearColor, $add );
-	}
-	elsif( $lightType eq "directional" )
-	{
-		&DirectionalLight( $worldNormal, $linearColor, $add );
-	}
-	else
-	{
-		die "don't know about light type \"$lightType\"\n";
-	}
-}
-
-sub DoLighting
-{
-	if( !defined $LIGHT_COMBO )
-	{
-		die "DoLighting called without using \$LIGHT_COMBO\n";
-	}
-	if ( !defined $HALF_LAMBERT )
-	{
-		die "DoLighting called without using \$HALF_LAMBERT\n";
-	}
-
-	my $staticLightType = $g_staticLightTypeArray[$LIGHT_COMBO];
-	my $ambientLightType = $g_ambientLightTypeArray[$LIGHT_COMBO];
-	my $localLightType1 = $g_localLightType1Array[$LIGHT_COMBO];
-	my $localLightType2 = $g_localLightType2Array[$LIGHT_COMBO];
-
-#	print "\$staticLightType = $staticLightType\n";
-#	print "\$ambientLightType = $ambientLightType\n";
-#	print "\$localLightType1 = $localLightType1\n";
-#	print "\$localLightType2 = $localLightType2\n";
-
-	local( $worldPos ) = shift;
-	local( $worldNormal ) = shift;
-
-	; special case for no lighting
-	if( $staticLightType eq "none" && $ambientLightType eq "none" &&
-		$localLightType1 eq "none" && $localLightType2 eq "none" )
-	{
-		; Have to write something here since debug d3d runtime will barf otherwise.
-		mov oD0, $cOne
-		return;
-	}
-
-	; special case for static lighting only
-	; Don't need to bother converting to linear space in this case.
-	if( $staticLightType eq "static" && $ambientLightType eq "none" &&
-		$localLightType1 eq "none" && $localLightType2 eq "none" )
-	{
-		mov oD0, $vSpecular
-		return;
-	}
-
-	alloc $linearColor
-	alloc $gammaColor
-
-	local( $add ) = 0;
-	if( $staticLightType eq "static" )
-	{
-		; The static lighting comes in in gamma space and has also been premultiplied by $cOverbrightFactor
-		; need to get it into
-		; linear space so that we can do adds.
-		rcp $gammaColor.w, $cOverbrightFactor
-		mul $gammaColor.xyz, $vSpecular, $gammaColor.w
-		&GammaToLinear( $gammaColor, $linearColor );
-		$add = 1;
-	}
-
-	if( $ambientLightType eq "ambient" )
-	{
-		&AmbientLight( $worldNormal, $linearColor, $add );
-		$add = 1;
-	}
-
-	if( $localLightType1 ne "none" )
-	{
-		mov a0.x, $cLight0Offset
-		&DoLight( $localLightType1, $worldPos, $worldNormal, $linearColor, $add );
-		$add = 1;
-	}
-
-	if( $localLightType2 ne "none" )
-	{
-		mov a0.x, $cLight1Offset
-		&DoLight( $localLightType2, $worldPos, $worldNormal, $linearColor, $add );
-		$add = 1;
-	}
-
-	;------------------------------------------------------------------------------
-	; Output color (gamma correction)
-	;------------------------------------------------------------------------------
-
-	&LinearToGamma( $linearColor, $gammaColor );
-	if( 0 )
-	{
-		mul oD0.xyz, $gammaColor.xyz, $cOverbrightFactor
-	}
-	else
-	{
-		mul $gammaColor.xyz, $gammaColor.xyz, $cOverbrightFactor
-		&ColorClamp( $gammaColor, "oD0" );
-	}
-
-;	mov oD0.xyz, $linearColor
-	mov oD0.w, $cOne				; make sure all components are defined
-
-	free $linearColor
-	free $gammaColor
-}
-
-sub DoDynamicLightingToLinear
-{
-	local( $worldPos ) = shift;
-	local( $worldNormal ) = shift;
-	local( $linearColor ) = shift;
-
-	if( !defined $LIGHT_COMBO )
-	{
-		die "DoLighting called without using \$LIGHT_COMBO\n";
-	}
-	if ( !defined $HALF_LAMBERT )
-	{
-		die "DoLighting called without using \$HALF_LAMBERT\n";
-	}
-
-	my $staticLightType = $g_staticLightTypeArray[$LIGHT_COMBO];
-	my $ambientLightType = $g_ambientLightTypeArray[$LIGHT_COMBO];
-	my $localLightType1 = $g_localLightType1Array[$LIGHT_COMBO];
-	my $localLightType2 = $g_localLightType2Array[$LIGHT_COMBO];
-
-	# No lights at all. . note that we don't even consider static lighting here.
-	if( $ambientLightType eq "none" &&
-		$localLightType1 eq "none" && $localLightType2 eq "none" )
-	{
-		mov $linearColor, $cZero
-		return;
-	}
-
-	local( $add ) = 0;
-	if( $ambientLightType eq "ambient" )
-	{
-		&AmbientLight( $worldNormal, $linearColor, $add );
-		$add = 1;
-	}
-
-	if( $localLightType1 ne "none" )
-	{
-		mov a0.x, $cLight0Offset
-		&DoLight( $localLightType1, $worldPos, $worldNormal, $linearColor, $add );
-		$add = 1;
-	}
-
-	if( $localLightType2 ne "none" )
-	{
-		mov a0.x, $cLight1Offset
-		&DoLight( $localLightType2, $worldPos, $worldNormal, $linearColor, $add );
-		$add = 1;
-	}
-}
-
-sub NotImplementedYet
-{
-	&AllocateRegister( \$projPos );
-	dp4 $projPos.x, $worldPos, $cViewProj0
-	dp4 $projPos.y, $worldPos, $cViewProj1
-	dp4 $projPos.z, $worldPos, $cViewProj2
-	dp4 $projPos.w, $worldPos, $cViewProj3
-	mov oPos, $projPos
-	&FreeRegister( \$projPos );
-	exit;
-}
+;------------------------------------
+; RULES FOR AUTHORING VERTEX SHADERS:
+;------------------------------------
+; - never use "def" . . .set constants in code instead. . our constant shadowing will break otherwise.
+;	(same goes for pixel shaders)
+; - use cN notation instead of c[N] notation. .makes grepping for registers easier.
+;   The only exception is c[a0.x+blah] where you have no choice.
+$g_NumRegisters = 12;
+
+; NOTE: These must match the same values in vsh_prep.pl!
+$vPos				= "v0";
+$vBoneWeights		= "v1";
+$vBoneIndices		= "v2";
+$vNormal			= "v3";
+$vColor				= "v5";
+$vSpecular			= "v6";
+$vTexCoord0			= "v7";
+$vTexCoord1			= "v8";
+$vTexCoord2			= "v9";
+$vTexCoord3			= "v10";
+$vTangentS			= "v11";
+$vTangentT			= "v12";
+$vUserData			= "v14";
+
+if( $g_dx9 )
+{
+	if( $g_usesPos )
+	{
+		dcl_position $vPos;
+	}
+	
+	if( $g_usesBoneWeights )
+	{
+		dcl_blendweight $vBoneWeights;
+	}
+	if( $g_usesBoneIndices )
+	{
+		dcl_blendindices $vBoneIndices;
+	}
+	if( $g_usesNormal )
+	{
+		dcl_normal $vNormal;
+	}
+	if( $g_usesColor )
+	{
+		dcl_color0 $vColor;
+	}
+	if( $g_usesSpecular )
+	{
+		dcl_color1 $vSpecular;
+	}
+	if( $g_usesTexCoord0 )
+	{
+		dcl_texcoord0 $vTexCoord0;
+	}
+	if( $g_usesTexCoord1 )
+	{
+		dcl_texcoord1 $vTexCoord1;
+	}
+	if( $g_usesTexCoord2 )
+	{
+		dcl_texcoord2 $vTexCoord2;
+	}
+	if( $g_usesTexCoord3 )
+	{
+		dcl_texcoord3 $vTexCoord3;
+	}
+	if( $g_usesTangentS )
+	{
+		dcl_tangent $vTangentS;
+	}
+	if( $g_usesTangentT )
+	{
+		dcl_binormal0 $vTangentT;
+	}
+	if( $g_usesUserData )
+	{
+		dcl_tangent $vUserData;
+	}
+}
+
+# NOTE: These should match g_LightCombinations in vertexshaderdx8.cpp!
+# NOTE: Leave this on single lines or shit might blow up.
+@g_staticLightTypeArray = ( "none", "static", "none", "none", "none", "none", "none", "none", "none", "none", "none", "none", "static", "static", "static", "static", "static", "static", "static", "static", "static", "static" );
+@g_ambientLightTypeArray = ( "none", "none", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient", 	"ambient", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient", "ambient" );
+@g_localLightType1Array = ( "none", "none", "none", "spot", "point", "directional", "spot", "spot", "spot", "point", "point", "directional", "none", "spot", "point", "directional", "spot", "spot", "spot", "point", "point", "directional" );
+@g_localLightType2Array = ( "none", "none", "none", "none", "none", "none", "spot", "point", "directional", "point", "directional", "directional", "none", "none", "none", "none", "spot", "point", "directional", "point", "directional", "directional" );
+
+$cConstants0		= "c0";
+$cZero				= "c0.x";
+$cOne				= "c0.y";
+$cTwo				= "c0.z";
+$cHalf				= "c0.w";
+
+$cConstants1		    = "c1";
+$cOOGamma			    = "c1.x"; # 1/2.2
+$cOtherOverbrightFactor = "c1.y"; # overbright
+$cOneThird			    = "c1.z"; # 1/3
+$cOverbrightFactor      = "c1.w"; # 1/overbright
+
+$cEyePos			= "c2";
+$cWaterZ			= "c2.w";
+$cEyePosWaterZ		= "c2";
+
+$cLightIndex		= "c3";
+$cLight0Offset		= "c3.x"; # 27
+$cLight1Offset		= "c3.y"; # 32
+$cColorToIntScale	= "c3.z"; # matrix array offset = 3.0f * 255.0f + 0.01 (epsilon ensures floor yields desired result)
+$cModel0Index		= "c3.w"; # base for start of skinning matrices
+
+; NOTE: These must match the same values in vsh_prep.pl!
+$cModelViewProj0	= "c4";
+$cModelViewProj1	= "c5";
+$cModelViewProj2	= "c6";
+$cModelViewProj3	= "c7";
+
+$cViewProj0			= "c8";
+$cViewProj1			= "c9";
+$cViewProj2			= "c10";
+$cViewProj3			= "c11";
+
+; currently unused
+; c12, c13
+
+$SHADER_SPECIFIC_CONST_10 = "c14";
+$SHADER_SPECIFIC_CONST_11 = "c15";
+
+$cFogParams			= "c16";
+$cFogEndOverFogRange = "c16.x";
+$cFogOne			= "c16.y";
+$cFogMaxDensity		= "c16.z";
+$cOOFogRange		= "c16.w"; # (1/(fogEnd-fogStart))
+
+$cViewModel0		= "c17";
+$cViewModel1		= "c18";
+$cViewModel2		= "c19";
+$cViewModel3		= "c20";
+
+$cAmbientColorPosX	= "c21";
+$cAmbientColorNegX	= "c22";
+$cAmbientColorPosY	= "c23";
+$cAmbientColorNegY	= "c24";
+$cAmbientColorPosZ	= "c25";
+$cAmbientColorNegZ	= "c26";
+
+$cAmbientColorPosXOffset = "21";
+$cAmbientColorPosYOffset = "23";
+$cAmbientColorPosZOffset = "25";
+
+$cLight0DiffColor	= "c27";
+$cLight0Dir			= "c28";
+$cLight0Pos			= "c29";
+$cLight0SpotParams  = "c30"; # [ exponent, stopdot, stopdot2, 1 / (stopdot - stopdot2)
+$cLight0Atten		= "c31"; # [ constant, linear, quadratic, 0.0f ]
+
+$cLight1DiffColor	= "c32";
+$cLight1Dir			= "c33";
+$cLight1Pos			= "c34";
+$cLight1SpotParams  = "c35"; # [ exponent, stopdot, stopdot2, 1 / (stopdot - stopdot2)
+$cLight1Atten		= "c36"; # [ constant, linear, quadratic, 0.0f ]
+
+$cModulationColor	= "c37";
+
+$SHADER_SPECIFIC_CONST_0  = "c38";
+$SHADER_SPECIFIC_CONST_1  = "c39";
+$SHADER_SPECIFIC_CONST_2  = "c40";
+$SHADER_SPECIFIC_CONST_3  = "c41";
+$SHADER_SPECIFIC_CONST_4  = "c42";
+$SHADER_SPECIFIC_CONST_5  = "c43";
+$SHADER_SPECIFIC_CONST_6  = "c44";
+$SHADER_SPECIFIC_CONST_7  = "c45";
+$SHADER_SPECIFIC_CONST_8  = "c46";
+$SHADER_SPECIFIC_CONST_9  = "c47";
+; $SHADER_SPECIFIC_CONST_10 is c14
+; $SHADER_SPECIFIC_CONST_11 is c15
+
+; There are 16 model matrices for skinning
+; NOTE: These must match the same values in vsh_prep.pl!
+$cModel0			= "c48";
+$cModel1			= "c49";
+$cModel2			= "c50";
+
+sub OutputUsedRegisters
+{
+	local( $i );
+	; USED REGISTERS
+	for( $i = 0; $i < $g_NumRegisters; $i++ )
+	{
+		if( $g_allocated[$i] )
+		{
+			; $g_allocatedname[$i] = r$i
+		}
+	}
+	;
+}
+
+sub AllocateRegister
+{
+	local( *reg ) = shift;
+	local( $regname ) = shift;
+	local( $i );
+	for( $i = 0; $i < $g_NumRegisters; $i++ )
+	{
+		if( !$g_allocated[$i] )
+		{
+			$g_allocated[$i] = 1;
+			$g_allocatedname[$i] = $regname;
+			; AllocateRegister $regname = r$i
+			$reg = "r$i";
+			&OutputUsedRegisters();
+			return;
+		}
+	}
+	; Out of registers allocating $regname!
+	$reg = "rERROR_OUT_OF_REGISTERS";
+	&OutputUsedRegisters();
+}
+
+; pass in a reference to a var that contains a register. . ie \$var where var will constain "r1", etc
+sub FreeRegister
+{
+	local( *reg ) = shift;
+	local( $regname ) = shift;
+	; FreeRegister $regname = $reg
+	if( $reg =~ m/rERROR_DEALLOCATED/ )
+	{
+		; $regname already deallocated
+		; $reg = "rALREADY_DEALLOCATED";
+		&OutputUsedRegisters();
+		return;
+	}
+;	if( $regname ne g_allocatedname[$reg] )
+;	{
+;		; Error freeing $reg
+;		mov compileerror, freed unallocated register $regname
+;	}
+
+	if( ( $reg =~ m/r(.*)/ ) )
+	{
+		$g_allocated[$1] = 0;
+	}
+	$reg = "rERROR_DEALLOCATED";
+	&OutputUsedRegisters();
+}
+
+sub CheckUnfreedRegisters()
+{
+	local( $i );
+	for( $i = 0; $i < $g_NumRegisters; $i++ )
+	{
+		if( $g_allocated[$i] )
+		{
+			print "ERROR: r$i allocated to $g_allocatedname[$i] at end of program\n";
+			$g_allocated[$i] = 0;
+		}
+	}
+}
+
+sub Normalize
+{
+	local( $r ) = shift;
+	dp3 $r.w, $r, $r
+	rsq $r.w, $r.w
+	mul $r, $r, $r.w
+}
+
+sub Cross
+{
+	local( $result ) = shift;
+	local( $a ) = shift;
+	local( $b ) = shift;
+
+	mul $result.xyz, $a.yzx, $b.zxy
+	mad $result.xyz, -$b.yzx, $a.zxy, $result
+}
+
+sub RangeFog
+{
+	local( $projPos ) = shift;
+	
+	;------------------------------
+	; Regular range fog
+	;------------------------------
+
+	; oFog.x = 1.0f = no fog
+	; oFog.x = 0.0f = full fog
+	; compute fog factor f = (fog_end - dist)*(1/(fog_end-fog_start))
+	; this is == to: (fog_end/(fog_end-fog_start) - dist/(fog_end-fog_start)
+	; which can be expressed with a single mad instruction!
+
+	; Compute |projPos|
+	local( $tmp );
+	&AllocateRegister( \$tmp );
+	dp3 $tmp.x, $projPos.xyw, $projPos.xyw
+	rsq $tmp.x, $tmp.x
+	rcp $tmp.x, $tmp.x
+
+	if( $g_dx9 )
+	{
+		mad $tmp, -$tmp.x, $cOOFogRange, $cFogEndOverFogRange
+		min $tmp, $tmp, $cOne
+		max oFog, $tmp.x, $cFogMaxDensity
+	}
+	else
+	{
+		mad $tmp, -$tmp.x, $cOOFogRange, $cFogEndOverFogRange
+		min $tmp, $tmp, $cOne
+		max oFog.x, $tmp.x, $cFogMaxDensity
+	}
+	&FreeRegister( \$tmp );
+}
+
+sub DepthFog
+{
+	local( $projPos ) = shift;
+	local( $dest ) = shift;
+
+	if ( $dest eq "" )
+	{
+		$dest = "oFog";
+	}
+
+	;------------------------------
+	; Regular range fog
+	;------------------------------
+
+	; oFog.x = 1.0f = no fog
+	; oFog.x = 0.0f = full fog
+	; compute fog factor f = (fog_end - dist)*(1/(fog_end-fog_start))
+	; this is == to: (fog_end/(fog_end-fog_start) - dist/(fog_end-fog_start)
+	; which can be expressed with a single mad instruction!
+
+	; Compute |projPos|
+	local( $tmp );
+	&AllocateRegister( \$tmp );
+
+	if( $g_dx9 )
+	{
+		mad $tmp, -$projPos.w, $cOOFogRange, $cFogEndOverFogRange
+		min $tmp, $tmp, $cOne
+		max $dest, $tmp.x, $cFogMaxDensity
+	}
+	else
+	{
+		mad $tmp, -$projPos.w, $cOOFogRange, $cFogEndOverFogRange
+		min $tmp, $tmp, $cOne
+		max $dest.x, $tmp.x, $cFogMaxDensity
+	}
+
+	&FreeRegister( \$tmp );
+}
+
+sub WaterRangeFog
+{
+	; oFog.x = 1.0f = no fog
+	; oFog.x = 0.0f = full fog
+
+	; only $worldPos.z is used out of worldPos
+	local( $worldPos ) = shift;
+	local( $projPos ) = shift;
+	
+	local( $tmp );
+	&AllocateRegister( \$tmp );
+
+	; This is simple similar triangles. Imagine a line passing from the point directly vertically
+	; and another line passing from the point to the eye position.
+	; Let d = total distance from point to the eye
+	; Let h = vertical distance from the point to the eye
+	; Let hw = vertical distance from the point to the water surface
+	; Let dw = distance from the point to a point on the water surface that lies along the ray from point to eye
+	; Therefore d/h = dw/hw by similar triangles, or dw = d * hw / h.
+	; d = |projPos|, h = eyepos.z - worldPos.z, hw = waterheight.z - worldPos.z, dw = what we solve for
+
+	; Now, tmp.x = hw, and tmp.y = h
+	add $tmp.xy, $cEyePosWaterZ.wz, -$worldPos.z
+
+	; if $tmp.x < 0, then set it to 0
+	; This is the equivalent of moving the vert to the water surface if it's above the water surface
+	max $tmp.x, $tmp.x, $cZero
+
+	; Compute 1 / |projPos| = 1/d
+	dp3 $tmp.z, $projPos.xyw, $projPos.xyw
+	rsq $tmp.z, $tmp.z
+
+	; Now we have h/d
+	mul $tmp.z, $tmp.z, $tmp.y
+
+	; Now we have d/h
+	rcp $tmp.w, $tmp.z
+	
+	; We finally have d * hw / h
+	; $tmp.w is now the distance that we see through water.
+	mul $tmp.w, $tmp.x, $tmp.w
+
+	if( $g_dx9 )
+	{
+		mad $tmp, -$tmp.w, $cOOFogRange, $cFogOne
+		min $tmp, $tmp, $cOne
+		max oFog, $tmp.x, $cFogMaxDensity
+	}
+	else
+	{
+		mad $tmp, -$tmp.w, $cOOFogRange, $cFogOne
+		min $tmp, $tmp, $cOne
+		max oFog.x, $tmp.x, $cFogMaxDensity
+	}
+
+	&FreeRegister( \$tmp );
+}
+
+sub WaterDepthFog
+{
+	; oFog.x = 1.0f = no fog
+	; oFog.x = 0.0f = full fog
+
+	; only $worldPos.z is used out of worldPos
+	local( $worldPos ) = shift;
+	local( $projPos ) = shift;
+	local( $dest ) = shift;
+	
+	if ( $dest eq "" )
+	{
+		$dest = "oFog";
+	}
+	
+	local( $tmp );
+	&AllocateRegister( \$tmp );
+
+	; This is simple similar triangles. Imagine a line passing from the point directly vertically
+	; and another line passing from the point to the eye position.
+	; Let d = total distance from point to the eye
+	; Let h = vertical distance from the point to the eye
+	; Let hw = vertical distance from the point to the water surface
+	; Let dw = distance from the point to a point on the water surface that lies along the ray from point to eye
+	; Therefore d/h = dw/hw by similar triangles, or dw = d * hw / h.
+	; d = projPos.w, h = eyepos.z - worldPos.z, hw = waterheight.z - worldPos.z, dw = what we solve for
+
+	; Now, tmp.x = hw, and tmp.y = h
+	add $tmp.xy, $cEyePosWaterZ.wz, -$worldPos.z
+
+	; if $tmp.x < 0, then set it to 0
+	; This is the equivalent of moving the vert to the water surface if it's above the water surface
+	max $tmp.x, $tmp.x, $cZero
+
+	; Now we have 1/h
+	rcp $tmp.z, $tmp.y
+
+	; Now we have d/h
+	mul $tmp.w, $projPos.w, $tmp.z
+
+	; We finally have d * hw / h
+	; $tmp.w is now the distance that we see through water.
+	mul $tmp.w, $tmp.x, $tmp.w
+
+	if( $g_dx9 )
+	{
+		mad $tmp, -$tmp.w, $cOOFogRange, $cFogOne
+		min $tmp, $tmp, $cOne
+		max $dest, $tmp.x, $cZero
+	}
+	else
+	{
+		mad $tmp, -$tmp.w, $cOOFogRange, $cFogOne
+		min $tmp, $tmp, $cOne
+		max $dest.x, $tmp.x, $cZero
+	}
+
+	&FreeRegister( \$tmp );
+}
+
+
+;------------------------------------------------------------------------------
+; Main fogging routine
+;------------------------------------------------------------------------------
+sub CalcFog
+{
+	if( !defined $DOWATERFOG )
+	{
+		die "CalcFog called without using \$DOWATERFOG\n";
+	}
+	my $fogType;
+	if( $DOWATERFOG == 0 )
+	{
+		$fogType = "rangefog";		
+	}
+	else
+	{
+		$fogType = "heightfog";
+	}
+
+#	print "\$fogType = $fogType\n";
+
+	; CalcFog
+	local( $worldPos ) = shift;
+	local( $projPos ) = shift;
+	local( $dest ) = shift;
+
+	if ( $dest eq "" )
+	{
+		$dest = "oFog";
+	}
+
+	if( $fogType eq "rangefog" )
+	{
+		&DepthFog( $projPos, $dest );
+	}
+	elsif( $fogType eq "heightfog" )
+	{
+		&WaterDepthFog( $worldPos, $projPos, $dest );
+	}
+	else
+	{
+		die;
+	}	
+}
+
+sub CalcRangeFog
+{
+	; CalcFog
+	local( $worldPos ) = shift;
+	local( $projPos ) = shift;
+
+	if( $DOWATERFOG == 0 )
+	{
+		&RangeFog( $projPos );
+	}
+	elsif( $DOWATERFOG == 1 )
+	{
+		&WaterRangeFog( $worldPos, $projPos );
+	}
+	else
+	{
+		die;
+	}	
+}
+
+sub GammaToLinear
+{
+	local( $gamma ) = shift;
+	local( $linear ) = shift;
+
+	local( $tmp );
+	&AllocateRegister( \$tmp );
+
+	; Is rcp more expensive than just storing 2.2 somewhere and doing a mov?
+	rcp $gamma.w, $cOOGamma							; $gamma.w = 2.2
+	lit $linear.z, $gamma.zzzw						; r0.z = linear blue
+	lit $tmp.z, $gamma.yyyw							; r2.z = linear green
+	mov $linear.y, $tmp.z							; r0.y = linear green
+	lit $tmp.z, $gamma.xxxw							; r2.z = linear red
+	mov $linear.x, $tmp.z							; r0.x = linear red
+
+	&FreeRegister( \$tmp );
+}
+
+sub LinearToGamma
+{
+	local( $linear ) = shift;
+	local( $gamma ) = shift;
+
+	local( $tmp );
+	&AllocateRegister( \$tmp );
+
+	mov $linear.w, $cOOGamma						; $linear.w = 1.0/2.2
+	lit $gamma.z, $linear.zzzw						; r0.z = gamma blue
+	lit $tmp.z, $linear.yyyw						; r2.z = gamma green
+	mov $gamma.y, $tmp.z							; r0.y = gamma green
+	lit $tmp.z, $linear.xxxw						; r2.z = gamma red
+	mov $gamma.x, $tmp.z							; r0.x = gamma red
+
+	&FreeRegister( \$tmp );
+}
+
+sub ComputeReflectionVector
+{
+	local( $worldPos ) = shift;
+	local( $worldNormal ) = shift;
+	local( $reflectionVector ) = shift;
+
+	local( $vertToEye ); &AllocateRegister( \$vertToEye );
+	local( $tmp ); &AllocateRegister( \$tmp );
+
+	; compute reflection vector r = 2 * (n dot v) n - v
+	sub $vertToEye.xyz, $cEyePos.xyz, $worldPos  ; $tmp1 = v = c - p
+	dp3 $tmp, $worldNormal, $vertToEye			; $tmp = n dot v
+	mul $tmp.xyz, $tmp.xyz, $worldNormal	; $tmp = (n dot v ) n
+	mad $reflectionVector.xyz, $tmp, $cTwo, -$vertToEye
+
+	&FreeRegister( \$vertToEye );
+	&FreeRegister( \$tmp );
+}
+
+sub ComputeSphereMapTexCoords
+{
+	local( $reflectionVector ) = shift;
+	local( $sphereMapTexCoords ) = shift;
+
+	local( $tmp ); &AllocateRegister( \$tmp );
+
+	; transform reflection vector into view space
+	dp3 $tmp.x, $reflectionVector, $cViewModel0
+	dp3 $tmp.y, $reflectionVector, $cViewModel1
+	dp3 $tmp.z, $reflectionVector, $cViewModel2
+
+	; generate <rx ry rz+1>
+	add $tmp.z, $tmp.z, $cOne
+
+	; find 1 / the length of r2
+	dp3 $tmp.w, $tmp, $tmp
+	rsq $tmp.w, $tmp.w
+
+	; r1 = r2/|r2| + 1
+	mad $tmp.xy, $tmp.w, $tmp, $cOne
+	mul $sphereMapTexCoords.xy, $tmp.xy, $cHalf
+	
+	&FreeRegister( \$tmp );
+}
+
+sub SkinPosition
+{
+#	print "\$SKINNING = $SKINNING\n";
+	local( $worldPos ) = shift;
+	
+	if( !defined $SKINNING )
+	{
+		die "using \$SKINNING without defining.\n";
+	}
+		
+	if( $SKINNING == 0 )
+	{
+		;
+		; 0 bone skinning (4 instructions)
+		;
+		; Transform position into world space
+		; position
+		dp4 $worldPos.x, $vPos, $cModel0
+		dp4 $worldPos.y, $vPos, $cModel1
+		dp4 $worldPos.z, $vPos, $cModel2
+		mov $worldPos.w, $cOne
+	} 
+	else
+	{
+		;
+		; 3 bone skinning  (19 instructions)
+		;
+		local( $boneIndices );
+		local( $blendedMatrix0 );
+		local( $blendedMatrix1 );
+		local( $blendedMatrix2 );
+		local( $localPos );
+		&AllocateRegister( \$boneIndices );
+		&AllocateRegister( \$blendedMatrix0 );
+		&AllocateRegister( \$blendedMatrix1 );
+		&AllocateRegister( \$blendedMatrix2 );
+
+		; Transform position into world space using all bones
+		; denormalize d3dcolor to matrix index
+		mad $boneIndices, $vBoneIndices, $cColorToIntScale, $cModel0Index
+		if ( $g_x360 )
+		{
+			mov $boneIndices, $boneIndices.zyxw
+		}
+		
+		; r11 = boneindices at this point
+		; first matrix
+		mov a0.x, $boneIndices.z
+		mul $blendedMatrix0, $vBoneWeights.x, c[a0.x]
+		mul $blendedMatrix1, $vBoneWeights.x, c[a0.x+1]
+		mul $blendedMatrix2, $vBoneWeights.x, c[a0.x+2]
+		; second matrix
+		mov a0.x, $boneIndices.y
+		mad $blendedMatrix0, $vBoneWeights.y, c[a0.x], $blendedMatrix0
+		mad $blendedMatrix1, $vBoneWeights.y, c[a0.x+1], $blendedMatrix1
+		mad $blendedMatrix2, $vBoneWeights.y, c[a0.x+2], $blendedMatrix2
+
+		; Calculate third weight
+		; compute 1-(weight1+weight2) to calculate weight2
+		; Use $boneIndices.w as a temp since we aren't using it for anything.
+		add $boneIndices.w, $vBoneWeights.x, $vBoneWeights.y
+		sub $boneIndices.w, $cOne, $boneIndices.w
+
+		; third matrix
+		mov a0.x, $boneIndices.x
+		mad $blendedMatrix0, $boneIndices.w, c[a0.x], $blendedMatrix0
+		mad $blendedMatrix1, $boneIndices.w, c[a0.x+1], $blendedMatrix1
+		mad $blendedMatrix2, $boneIndices.w, c[a0.x+2], $blendedMatrix2
+		
+		dp4 $worldPos.x, $vPos, $blendedMatrix0
+		dp4 $worldPos.y, $vPos, $blendedMatrix1
+		dp4 $worldPos.z, $vPos, $blendedMatrix2
+		mov $worldPos.w, $cOne
+
+		&FreeRegister( \$boneIndices );
+		&FreeRegister( \$blendedMatrix0 );
+		&FreeRegister( \$blendedMatrix1 );
+		&FreeRegister( \$blendedMatrix2 );
+	}
+}
+
+
+sub SkinPositionAndNormal
+{
+#	print "\$SKINNING = $SKINNING\n";
+	local( $worldPos ) = shift;
+	local( $worldNormal ) = shift;
+
+	if( !defined $SKINNING )
+	{
+		die "using \$SKINNING without defining.\n";
+	}
+
+	if( $SKINNING == 0 )
+	{
+		;
+		; 0 bone skinning (13 instructions)
+		;
+		; Transform position + normal + tangentS + tangentT into world space
+		; position
+		dp4 $worldPos.x, $vPos, $cModel0
+		dp4 $worldPos.y, $vPos, $cModel1
+		dp4 $worldPos.z, $vPos, $cModel2
+		mov $worldPos.w, $cOne
+		; normal
+		dp3 $worldNormal.x, $vNormal, $cModel0
+		dp3 $worldNormal.y, $vNormal, $cModel1
+		dp3 $worldNormal.z, $vNormal, $cModel2
+	}
+	else
+	{
+		local( $boneIndices );
+		local( $blendedMatrix0 );
+		local( $blendedMatrix1 );
+		local( $blendedMatrix2 );
+		local( $localPos );
+		local( $localNormal );
+		local( $normalLength );
+		local( $ooNormalLength );
+		&AllocateRegister( \$boneIndices );
+		&AllocateRegister( \$blendedMatrix0 );
+		&AllocateRegister( \$blendedMatrix1 );
+		&AllocateRegister( \$blendedMatrix2 );
+
+		; Transform position into world space using all bones
+		; denormalize d3dcolor to matrix index
+		mad $boneIndices, $vBoneIndices, $cColorToIntScale, $cModel0Index
+		if ( $g_x360 )
+		{
+			mov $boneIndices, $boneIndices.zyxw
+		}
+
+		; r11 = boneindices at this point
+		; first matrix
+		mov a0.x, $boneIndices.z
+		mul $blendedMatrix0, $vBoneWeights.x, c[a0.x]
+		mul $blendedMatrix1, $vBoneWeights.x, c[a0.x+1]
+		mul $blendedMatrix2, $vBoneWeights.x, c[a0.x+2]
+		; second matrix
+		mov a0.x, $boneIndices.y
+		mad $blendedMatrix0, $vBoneWeights.y, c[a0.x], $blendedMatrix0
+		mad $blendedMatrix1, $vBoneWeights.y, c[a0.x+1], $blendedMatrix1
+		mad $blendedMatrix2, $vBoneWeights.y, c[a0.x+2], $blendedMatrix2
+
+		; Calculate third weight
+		; compute 1-(weight1+weight2) to calculate weight2
+		; Use $boneIndices.w as a temp since we aren't using it for anything.
+		add $boneIndices.w, $vBoneWeights.x, $vBoneWeights.y
+		sub $boneIndices.w, $cOne, $boneIndices.w
+
+		; third matrix
+		mov a0.x, $boneIndices.x
+		mad $blendedMatrix0, $boneIndices.w, c[a0.x], $blendedMatrix0
+		mad $blendedMatrix1, $boneIndices.w, c[a0.x+1], $blendedMatrix1
+		mad $blendedMatrix2, $boneIndices.w, c[a0.x+2], $blendedMatrix2
+		
+		dp4 $worldPos.x, $vPos, $blendedMatrix0
+		dp4 $worldPos.y, $vPos, $blendedMatrix1
+		dp4 $worldPos.z, $vPos, $blendedMatrix2
+		mov $worldPos.w, $cOne
+
+		; normal
+		dp3 $worldNormal.x, $vNormal, $blendedMatrix0
+		dp3 $worldNormal.y, $vNormal, $blendedMatrix1
+		dp3 $worldNormal.z, $vNormal, $blendedMatrix2
+
+		&FreeRegister( \$boneIndices );
+		&FreeRegister( \$blendedMatrix0 );
+		&FreeRegister( \$blendedMatrix1 );
+		&FreeRegister( \$blendedMatrix2 );
+	}	
+}
+
+sub SkinPositionNormalAndTangentSpace
+{
+#	print "\$SKINNING = $SKINNING\n";
+	local( $worldPos ) = shift;
+	local( $worldNormal ) = shift;
+	local( $worldTangentS ) = shift;
+	local( $worldTangentT ) = shift;
+	local( $userData );
+	local( $localPos );
+	local( $localNormal );
+	local( $normalLength );
+	local( $ooNormalLength );
+	
+	if( !defined $SKINNING )
+	{
+		die "using \$SKINNING without defining.\n";
+	}
+
+# X360TBD: needed for compressed vertex format
+#	if ( $g_x360 )
+#	{
+#		&AllocateRegister( \$userData );
+#		; remap compressed range [0..1] to [-1..1]
+#		mad $userData, $vUserData, $cTwo, -$cOne
+#	}
+
+	if( $SKINNING == 0 )
+	{
+		;
+		; 0 bone skinning (13 instructions)
+		;
+		; Transform position + normal + tangentS + tangentT into world space
+		dp4 $worldPos.x, $vPos, $cModel0
+		dp4 $worldPos.y, $vPos, $cModel1
+		dp4 $worldPos.z, $vPos, $cModel2
+		mov $worldPos.w, $cOne
+
+		; normal
+		dp3 $worldNormal.x, $vNormal, $cModel0
+		dp3 $worldNormal.y, $vNormal, $cModel1
+		dp3 $worldNormal.z, $vNormal, $cModel2
+
+# X360TBD: needed for compressed vertex format
+#		if ( $g_x360 )
+#		{
+#			; tangents
+#			dp3 $worldTangentS.x, $userData, $cModel0
+#			dp3 $worldTangentS.y, $userData, $cModel1
+#			dp3 $worldTangentS.z, $userData, $cModel2
+#
+#			; calculate tangent t via cross( N, S ) * S[3]
+#			&Cross( $worldTangentT, $worldNormal, $worldTangentS );
+#			mul $worldTangentT.xyz, $userData.w, $worldTangentT.xyz
+#		}
+#		else
+		{
+			; tangents
+			dp3 $worldTangentS.x, $vUserData, $cModel0
+			dp3 $worldTangentS.y, $vUserData, $cModel1
+			dp3 $worldTangentS.z, $vUserData, $cModel2
+
+			; calculate tangent t via cross( N, S ) * S[3]
+			&Cross( $worldTangentT, $worldNormal, $worldTangentS );
+			mul $worldTangentT.xyz, $vUserData.w, $worldTangentT.xyz
+		}
+	}
+	else
+	{
+		local( $boneIndices );
+		local( $blendedMatrix0 );
+		local( $blendedMatrix1 );
+		local( $blendedMatrix2 );
+		&AllocateRegister( \$boneIndices );
+		&AllocateRegister( \$blendedMatrix0 );
+		&AllocateRegister( \$blendedMatrix1 );
+		&AllocateRegister( \$blendedMatrix2 );
+
+		; Transform position into world space using all bones
+		; denormalize d3dcolor to matrix index
+		mad $boneIndices, $vBoneIndices, $cColorToIntScale, $cModel0Index
+		if ( $g_x360 )
+		{
+			mov $boneIndices, $boneIndices.zyxw
+		}
+
+		; r11 = boneindices at this point
+		; first matrix
+		mov a0.x, $boneIndices.z
+		mul $blendedMatrix0, $vBoneWeights.x, c[a0.x]
+		mul $blendedMatrix1, $vBoneWeights.x, c[a0.x+1]
+		mul $blendedMatrix2, $vBoneWeights.x, c[a0.x+2]
+		; second matrix
+		mov a0.x, $boneIndices.y
+		mad $blendedMatrix0, $vBoneWeights.y, c[a0.x], $blendedMatrix0
+		mad $blendedMatrix1, $vBoneWeights.y, c[a0.x+1], $blendedMatrix1
+		mad $blendedMatrix2, $vBoneWeights.y, c[a0.x+2], $blendedMatrix2
+
+		; Calculate third weight
+		; compute 1-(weight1+weight2) to calculate weight2
+		; Use $boneIndices.w as a temp since we aren't using it for anything.
+		add $boneIndices.w, $vBoneWeights.x, $vBoneWeights.y
+		sub $boneIndices.w, $cOne, $boneIndices.w
+
+		; third matrix
+		mov a0.x, $boneIndices.x
+		mad $blendedMatrix0, $boneIndices.w, c[a0.x], $blendedMatrix0
+		mad $blendedMatrix1, $boneIndices.w, c[a0.x+1], $blendedMatrix1
+		mad $blendedMatrix2, $boneIndices.w, c[a0.x+2], $blendedMatrix2
+		
+		; position
+		dp4 $worldPos.x, $vPos, $blendedMatrix0
+		dp4 $worldPos.y, $vPos, $blendedMatrix1
+		dp4 $worldPos.z, $vPos, $blendedMatrix2
+		mov $worldPos.w, $cOne
+
+		; normal
+		dp3 $worldNormal.x, $vNormal, $blendedMatrix0
+		dp3 $worldNormal.y, $vNormal, $blendedMatrix1
+		dp3 $worldNormal.z, $vNormal, $blendedMatrix2
+
+# X360TBD: needed for compressed vertex format
+#		if ( $g_x360 )
+#		{
+#			; tangents
+#			dp3 $worldTangentS.x, $userData, $blendedMatrix0
+#			dp3 $worldTangentS.y, $userData, $blendedMatrix1
+#			dp3 $worldTangentS.z, $userData, $blendedMatrix2
+#
+#			; calculate tangent t via cross( N, S ) * S[3]
+#			&Cross( $worldTangentT, $worldNormal, $worldTangentS );
+#			mul $worldTangentT.xyz, $userData.w, $worldTangentT.xyz
+#		}
+#		else
+		{
+			; tangents
+			dp3 $worldTangentS.x, $vUserData, $blendedMatrix0
+			dp3 $worldTangentS.y, $vUserData, $blendedMatrix1
+			dp3 $worldTangentS.z, $vUserData, $blendedMatrix2
+
+			; calculate tangent t via cross( N, S ) * S[3]
+			&Cross( $worldTangentT, $worldNormal, $worldTangentS );
+			mul $worldTangentT.xyz, $vUserData.w, $worldTangentT.xyz
+		}
+
+		&FreeRegister( \$boneIndices );
+		&FreeRegister( \$blendedMatrix0 );
+		&FreeRegister( \$blendedMatrix1 );
+		&FreeRegister( \$blendedMatrix2 );
+	}
+
+# X360TBD: needed for compressed vertex format
+#	if ( $g_x360 )
+#	{
+#		&FreeRegister( \$userData );
+#	}
+}
+
+sub ColorClamp
+{
+	; ColorClamp; stomps $color.w
+	local( $color ) = shift;
+	local( $dst ) = shift;
+
+	; Get the max of RGB and stick it in W
+	max $color.w, $color.x, $color.y
+	max $color.w, $color.w, $color.z
+
+	; get the greater of one and the max color.
+	max $color.w, $color.w, $cOne
+
+	rcp $color.w, $color.w
+	mul $dst.xyz, $color.w, $color.xyz
+}
+
+sub AmbientLight
+{
+	local( $worldNormal ) = shift;
+	local( $linearColor ) = shift;
+	local( $add ) = shift;
+
+	; Ambient lighting
+	&AllocateRegister( \$nSquared );
+	&AllocateRegister( \$isNegative );
+
+	mul $nSquared.xyz, $worldNormal.xyz, $worldNormal.xyz				; compute n times n
+	slt $isNegative.xyz, $worldNormal.xyz, $cZero				; Figure out whether each component is >0
+	mov a0.x, $isNegative.x
+	if( $add )
+	{
+		mad $linearColor.xyz, $nSquared.x, c[a0.x + $cAmbientColorPosXOffset], $linearColor			; $linearColor = normal[0]*normal[0] * box color of appropriate x side
+	}
+	else
+	{
+		mul $linearColor.xyz, $nSquared.x, c[a0.x + $cAmbientColorPosXOffset]			; $linearColor = normal[0]*normal[0] * box color of appropriate x side
+	}
+	mov a0.x, $isNegative.y
+	mad $linearColor.xyz, $nSquared.y, c[a0.x + $cAmbientColorPosYOffset], $linearColor
+	mov a0.x, $isNegative.z
+	mad $linearColor.xyz, $nSquared.z, c[a0.x + $cAmbientColorPosZOffset], $linearColor
+
+	&FreeRegister( \$isNegative );
+	&FreeRegister( \$nSquared );
+}
+
+sub DirectionalLight
+{
+	local( $worldNormal ) = shift;
+	local( $linearColor ) = shift;
+	local( $add ) = shift;
+
+	&AllocateRegister( \$nDotL ); # FIXME: This only needs to be a scalar
+
+	; NOTE: Gotta use -l here, since light direction = -l
+	; DIRECTIONAL LIGHT
+	; compute n dot l
+	dp3 $nDotL.x, -c[a0.x + 1], $worldNormal
+	
+	if ( $HALF_LAMBERT == 0 )
+	{
+		; lambert
+		max $nDotL.x, $nDotL.x, c0.x				; Clamp to zero
+	}
+	elsif ( $HALF_LAMBERT == 1 )
+	{
+		; half-lambert
+		mad $nDotL.x, $nDotL.x, $cHalf, $cHalf		; dot = (dot * 0.5 + 0.5)^2
+		mul $nDotL.x, $nDotL.x, $nDotL.x
+	}
+	else
+	{
+		die "\$HALF_LAMBERT is hosed\n";
+	}
+  
+	if( $add )
+	{
+		mad $linearColor.xyz, c[a0.x], $nDotL.x, $linearColor
+	}
+	else
+	{
+		mul $linearColor.xyz, c[a0.x], $nDotL.x
+	}
+
+	&FreeRegister( \$nDotL );
+}
+
+sub PointLight
+{
+	local( $worldPos ) = shift;
+	local( $worldNormal ) = shift;
+	local( $linearColor ) = shift;
+	local( $add ) = shift;
+
+	local( $lightDir );
+	&AllocateRegister( \$lightDir );
+	
+	; POINT LIGHT
+	; compute light direction
+	sub $lightDir, c[a0.x+2], $worldPos
+	
+	local( $lightDistSquared );
+	local( $ooLightDist );
+	&AllocateRegister( \$lightDistSquared );
+	&AllocateRegister( \$ooLightDist );
+
+	; normalize light direction, maintain temporaries for attenuation
+	dp3 $lightDistSquared, $lightDir, $lightDir
+	rsq $ooLightDist, $lightDistSquared.x
+	mul $lightDir, $lightDir, $ooLightDist.x
+	
+	local( $attenuationFactors );
+	&AllocateRegister( \$attenuationFactors );
+
+	; compute attenuation amount (r2 = 'd*d d*d d*d d*d', r3 = '1/d 1/d 1/d 1/d')
+	dst $attenuationFactors, $lightDistSquared, $ooLightDist						; r4 = ( 1, d, d*d, 1/d )
+	&FreeRegister( \$lightDistSquared );
+	&FreeRegister( \$ooLightDist );
+	local( $attenuation );
+	&AllocateRegister( \$attenuation );
+	dp3 $attenuation, $attenuationFactors, c[a0.x+4]				; r3 = atten0 + d * atten1 + d*d * atten2
+
+	rcp $lightDir.w, $attenuation						; $lightDir.w = 1 / (atten0 + d * atten1 + d*d * atten2)
+
+	&FreeRegister( \$attenuationFactors );
+	&FreeRegister( \$attenuation );
+	
+	local( $tmp );
+	&AllocateRegister( \$tmp ); # FIXME : really only needs to be a scalar
+
+	; compute n dot l, fold in distance attenutation
+	dp3 $tmp.x, $lightDir, $worldNormal
+
+	if ( $HALF_LAMBERT == 0 )
+	{
+		; lambert
+		max $tmp.x, $tmp.x, c0.x				; Clamp to zero
+	}
+	elsif ( $HALF_LAMBERT == 1 )
+	{
+		; half-lambert
+		mad $tmp.x, $tmp.x, $cHalf, $cHalf		; dot = (dot * 0.5 + 0.5)^2
+		mul $tmp.x, $tmp.x, $tmp.x
+	}
+	else
+	{
+		die "\$HALF_LAMBERT is hosed\n";
+	}
+	
+	mul $tmp.x, $tmp.x, $lightDir.w
+	if( $add )
+	{
+		mad $linearColor.xyz, c[a0.x], $tmp.x, $linearColor
+	}
+	else
+	{
+		mul $linearColor.xyz, c[a0.x], $tmp.x
+	}
+
+	&FreeRegister( \$lightDir );
+	&FreeRegister( \$tmp ); # FIXME : really only needs to be a scalar
+}
+
+sub SpotLight
+{
+	local( $worldPos ) = shift;
+	local( $worldNormal ) = shift;
+	local( $linearColor ) = shift;
+	local( $add ) = shift;
+	
+	local( $lightDir );
+	&AllocateRegister( \$lightDir );
+
+	; SPOTLIGHT
+	; compute light direction
+	sub $lightDir, c[a0.x+2], $worldPos
+	
+	local( $lightDistSquared );
+	local( $ooLightDist );
+	&AllocateRegister( \$lightDistSquared );
+	&AllocateRegister( \$ooLightDist );
+
+	; normalize light direction, maintain temporaries for attenuation
+	dp3 $lightDistSquared, $lightDir, $lightDir
+	rsq $ooLightDist, $lightDistSquared.x
+	mul $lightDir, $lightDir, $ooLightDist.x
+	
+	local( $attenuationFactors );
+	&AllocateRegister( \$attenuationFactors );
+
+	; compute attenuation amount (r2 = 'd*d d*d d*d d*d', r3 = '1/d 1/d 1/d 1/d')
+	dst $attenuationFactors, $lightDistSquared, $ooLightDist						; r4 = ( 1, d, d*d, 1/d )
+
+	&FreeRegister( \$lightDistSquared );
+	&FreeRegister( \$ooLightDist );
+	local( $attenuation );	&AllocateRegister( \$attenuation );
+
+	dp3 $attenuation, $attenuationFactors, c[a0.x+4]				; r3 = atten0 + d * atten1 + d*d * atten2
+	rcp $lightDir.w, $attenuation						; r1.w = 1 / (atten0 + d * atten1 + d*d * atten2)
+
+	&FreeRegister( \$attenuationFactors );
+	&FreeRegister( \$attenuation );
+	
+	local( $litSrc ); &AllocateRegister( \$litSrc );
+	local( $tmp ); &AllocateRegister( \$tmp ); # FIXME - only needs to be scalar
+
+	; compute n dot l
+	dp3 $litSrc.x, $worldNormal, $lightDir
+	
+	if ( $HALF_LAMBERT == 0 )
+	{
+		; lambert
+		max $litSrc.x, $litSrc.x, c0.x				; Clamp to zero
+	}
+	elsif ( $HALF_LAMBERT == 1 )
+	{
+		; half-lambert
+		mad $litSrc.x, $litSrc.x, $cHalf, $cHalf	; dot = (dot * 0.5 + 0.5) ^ 2
+		mul $litSrc.x, $litSrc.x, $litSrc.x
+	}
+	else
+	{
+		die "\$HALF_LAMBERT is hosed\n";
+	}
+
+	; compute angular attenuation
+	dp3 $tmp.x, c[a0.x+1], -$lightDir				; dot = -delta * spot direction
+	sub $litSrc.y, $tmp.x, c[a0.x+3].z				; r2.y = dot - stopdot2
+	&FreeRegister( \$tmp );
+	mul $litSrc.y, $litSrc.y, c[a0.x+3].w			; r2.y = (dot - stopdot2) / (stopdot - stopdot2)
+	mov $litSrc.w, c[a0.x+3].x						; r2.w = exponent
+	local( $litDst ); &AllocateRegister( \$litDst );
+	lit $litDst, $litSrc							; r3.y = N dot L or 0, whichever is bigger
+	&FreeRegister( \$litSrc );
+													; r3.z = pow((dot - stopdot2) / (stopdot - stopdot2), exponent)
+	min $litDst.z, $litDst.z, $cOne		 			; clamp pow() to 1
+	
+	local( $tmp1 ); &AllocateRegister( \$tmp1 );
+	local( $tmp2 ); &AllocateRegister( \$tmp2 );  # FIXME - could be scalar
+
+	; fold in distance attenutation with other factors
+	mul $tmp1, c[a0.x], $lightDir.w
+	mul $tmp2.x, $litDst.y, $litDst.z
+	if( $add )
+	{
+		mad $linearColor.xyz, $tmp1, $tmp2.x, $linearColor
+	}
+	else
+	{
+		mul $linearColor.xyz, $tmp1, $tmp2.x
+	}
+
+	&FreeRegister( \$lightDir );
+	&FreeRegister( \$litDst );
+	&FreeRegister( \$tmp1 );
+	&FreeRegister( \$tmp2 );
+}
+
+sub DoLight
+{
+	local( $lightType ) = shift;
+	local( $worldPos ) = shift;
+	local( $worldNormal ) = shift;
+	local( $linearColor ) = shift;
+	local( $add ) = shift;
+
+	if( $lightType eq "spot" )
+	{
+		&SpotLight( $worldPos, $worldNormal, $linearColor, $add );
+	}
+	elsif( $lightType eq "point" )
+	{
+		&PointLight( $worldPos, $worldNormal, $linearColor, $add );
+	}
+	elsif( $lightType eq "directional" )
+	{
+		&DirectionalLight( $worldNormal, $linearColor, $add );
+	}
+	else
+	{
+		die "don't know about light type \"$lightType\"\n";
+	}
+}
+
+sub DoLighting
+{
+	if( !defined $LIGHT_COMBO )
+	{
+		die "DoLighting called without using \$LIGHT_COMBO\n";
+	}
+	if ( !defined $HALF_LAMBERT )
+	{
+		die "DoLighting called without using \$HALF_LAMBERT\n";
+	}
+
+	my $staticLightType = $g_staticLightTypeArray[$LIGHT_COMBO];
+	my $ambientLightType = $g_ambientLightTypeArray[$LIGHT_COMBO];
+	my $localLightType1 = $g_localLightType1Array[$LIGHT_COMBO];
+	my $localLightType2 = $g_localLightType2Array[$LIGHT_COMBO];
+
+#	print "\$staticLightType = $staticLightType\n";
+#	print "\$ambientLightType = $ambientLightType\n";
+#	print "\$localLightType1 = $localLightType1\n";
+#	print "\$localLightType2 = $localLightType2\n";
+
+	local( $worldPos ) = shift;
+	local( $worldNormal ) = shift;
+
+	; special case for no lighting
+	if( $staticLightType eq "none" && $ambientLightType eq "none" &&
+		$localLightType1 eq "none" && $localLightType2 eq "none" )
+	{
+		; Have to write something here since debug d3d runtime will barf otherwise.
+		mov oD0, $cOne
+		return;
+	}
+
+	; special case for static lighting only
+	; Don't need to bother converting to linear space in this case.
+	if( $staticLightType eq "static" && $ambientLightType eq "none" &&
+		$localLightType1 eq "none" && $localLightType2 eq "none" )
+	{
+		mov oD0, $vSpecular
+		return;
+	}
+
+	alloc $linearColor
+	alloc $gammaColor
+
+	local( $add ) = 0;
+	if( $staticLightType eq "static" )
+	{
+		; The static lighting comes in in gamma space and has also been premultiplied by $cOverbrightFactor
+		; need to get it into
+		; linear space so that we can do adds.
+		rcp $gammaColor.w, $cOverbrightFactor
+		mul $gammaColor.xyz, $vSpecular, $gammaColor.w
+		&GammaToLinear( $gammaColor, $linearColor );
+		$add = 1;
+	}
+
+	if( $ambientLightType eq "ambient" )
+	{
+		&AmbientLight( $worldNormal, $linearColor, $add );
+		$add = 1;
+	}
+
+	if( $localLightType1 ne "none" )
+	{
+		mov a0.x, $cLight0Offset
+		&DoLight( $localLightType1, $worldPos, $worldNormal, $linearColor, $add );
+		$add = 1;
+	}
+
+	if( $localLightType2 ne "none" )
+	{
+		mov a0.x, $cLight1Offset
+		&DoLight( $localLightType2, $worldPos, $worldNormal, $linearColor, $add );
+		$add = 1;
+	}
+
+	;------------------------------------------------------------------------------
+	; Output color (gamma correction)
+	;------------------------------------------------------------------------------
+
+	&LinearToGamma( $linearColor, $gammaColor );
+	if( 0 )
+	{
+		mul oD0.xyz, $gammaColor.xyz, $cOverbrightFactor
+	}
+	else
+	{
+		mul $gammaColor.xyz, $gammaColor.xyz, $cOverbrightFactor
+		&ColorClamp( $gammaColor, "oD0" );
+	}
+
+;	mov oD0.xyz, $linearColor
+	mov oD0.w, $cOne				; make sure all components are defined
+
+	free $linearColor
+	free $gammaColor
+}
+
+sub DoDynamicLightingToLinear
+{
+	local( $worldPos ) = shift;
+	local( $worldNormal ) = shift;
+	local( $linearColor ) = shift;
+
+	if( !defined $LIGHT_COMBO )
+	{
+		die "DoLighting called without using \$LIGHT_COMBO\n";
+	}
+	if ( !defined $HALF_LAMBERT )
+	{
+		die "DoLighting called without using \$HALF_LAMBERT\n";
+	}
+
+	my $staticLightType = $g_staticLightTypeArray[$LIGHT_COMBO];
+	my $ambientLightType = $g_ambientLightTypeArray[$LIGHT_COMBO];
+	my $localLightType1 = $g_localLightType1Array[$LIGHT_COMBO];
+	my $localLightType2 = $g_localLightType2Array[$LIGHT_COMBO];
+
+	# No lights at all. . note that we don't even consider static lighting here.
+	if( $ambientLightType eq "none" &&
+		$localLightType1 eq "none" && $localLightType2 eq "none" )
+	{
+		mov $linearColor, $cZero
+		return;
+	}
+
+	local( $add ) = 0;
+	if( $ambientLightType eq "ambient" )
+	{
+		&AmbientLight( $worldNormal, $linearColor, $add );
+		$add = 1;
+	}
+
+	if( $localLightType1 ne "none" )
+	{
+		mov a0.x, $cLight0Offset
+		&DoLight( $localLightType1, $worldPos, $worldNormal, $linearColor, $add );
+		$add = 1;
+	}
+
+	if( $localLightType2 ne "none" )
+	{
+		mov a0.x, $cLight1Offset
+		&DoLight( $localLightType2, $worldPos, $worldNormal, $linearColor, $add );
+		$add = 1;
+	}
+}
+
+sub NotImplementedYet
+{
+	&AllocateRegister( \$projPos );
+	dp4 $projPos.x, $worldPos, $cViewProj0
+	dp4 $projPos.y, $worldPos, $cViewProj1
+	dp4 $projPos.z, $worldPos, $cViewProj2
+	dp4 $projPos.w, $worldPos, $cViewProj3
+	mov oPos, $projPos
+	&FreeRegister( \$projPos );
+	exit;
+}