mp/src/materialsystem/stdshaders/common_vs_fxc.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955

//========= Copyright Valve Corporation, All rights reserved. ============//
//
// Purpose: This is where all common code for vertex shaders go.
//
// $NoKeywords: $
//
//===========================================================================//


#ifndef COMMON_VS_FXC_H_
#define COMMON_VS_FXC_H_

#include "common_fxc.h"

// Put global skip commands here. . make sure and check that the appropriate vars are defined
// so these aren't used on the wrong shaders!
// --------------------------------------------------------------------------------
// Ditch all fastpath attemps if we are doing LIGHTING_PREVIEW.
//	SKIP: defined $LIGHTING_PREVIEW && defined $FASTPATH && $LIGHTING_PREVIEW && $FASTPATH
// --------------------------------------------------------------------------------


#ifndef COMPRESSED_VERTS
// Default to no vertex compression
#define COMPRESSED_VERTS 0
#endif

#if ( !defined( SHADER_MODEL_VS_2_0 ) && !defined( SHADER_MODEL_VS_3_0 ) )
#if COMPRESSED_VERTS == 1
#error "Vertex compression is only for DX9 and up!"
#endif
#endif

// We're testing 2 normal compression methods
// One compressed normals+tangents into a SHORT2 each (8 bytes total)
// The other compresses them together, into a single UBYTE4 (4 bytes total)
// FIXME: pick one or the other, compare lighting quality in important cases
#define COMPRESSED_NORMALS_SEPARATETANGENTS_SHORT2	0
#define COMPRESSED_NORMALS_COMBINEDTANGENTS_UBYTE4	1
//#define COMPRESSED_NORMALS_TYPE						COMPRESSED_NORMALS_SEPARATETANGENTS_SHORT2
#define COMPRESSED_NORMALS_TYPE					COMPRESSED_NORMALS_COMBINEDTANGENTS_UBYTE4


#define FOGTYPE_RANGE				0
#define FOGTYPE_HEIGHT				1

#define COMPILE_ERROR ( 1/0; )

// -------------------------
// CONSTANTS
// -------------------------

#pragma def ( vs, c0, 0.0f, 1.0f, 2.0f, 0.5f )

const float4 cConstants1				: register(c1);
#define cOOGamma			cConstants1.x
#define cOverbright			2.0f
#define cOneThird			cConstants1.z
#define cOOOverbright		( 1.0f / 2.0f )


// The g_bLightEnabled registers and g_nLightCountRegister hold the same information regarding
// enabling lights, but callers internal to this file tend to use the loops, while external
// callers will end up using the booleans
const bool g_bLightEnabled[4]			: register(b0);
										// through b3

const int g_nLightCountRegister			: register(i0);


#define g_nLightCount					g_nLightCountRegister.x

const float4 cEyePosWaterZ				: register(c2);
#define cEyePos			cEyePosWaterZ.xyz

// Only cFlexScale.x is used
// It is a binary value used to switch on/off the addition of the flex delta stream
const float4 cFlexScale					: register( c3 );

const float4x4 cModelViewProj			: register(c4);
const float4x4 cViewProj				: register(c8);

// Used to compute projPosZ in shaders without skinning
// Using cModelViewProj with FastClip generates incorrect results
// This is just row two of the non-FastClip cModelViewProj matrix
const float4 cModelViewProjZ			: register(c12);

// More constants working back from the top...
const float4 cViewProjZ					: register(c13);

const float4 cFogParams					: register(c16);
#define cFogEndOverFogRange cFogParams.x
#define cFogOne cFogParams.y
#define cFogMaxDensity cFogParams.z
#define cOOFogRange cFogParams.w

const float4x4 cViewModel				: register(c17);

const float3 cAmbientCubeX [ 2 ] : register ( c21 ) ;
const float3 cAmbientCubeY [ 2 ] : register ( c23 ) ;
const float3 cAmbientCubeZ [ 2 ] : register ( c25 ) ;

#if defined ( SHADER_MODEL_VS_3_0 )
const float4 cFlexWeights [ 512 ] : register ( c1024 ) ;
#endif

struct LightInfo
{
	float4 color;						// {xyz} is color	w is light type code (see comment below)
	float4 dir;							// {xyz} is dir		w is light type code
	float4 pos;
	float4 spotParams;
	float4 atten;
};

// w components of color and dir indicate light type:
// 1x - directional
// 01 - spot
// 00 - point

// Four lights x 5 constants each = 20 constants
LightInfo cLightInfo[4]					: register(c27);
#define LIGHT_0_POSITION_REG					   c29

#ifdef SHADER_MODEL_VS_1_1

const float4 cModulationColor			: register(c37);

#define SHADER_SPECIFIC_CONST_0 c38
#define SHADER_SPECIFIC_CONST_1 c39
#define SHADER_SPECIFIC_CONST_2 c40
#define SHADER_SPECIFIC_CONST_3 c41
#define SHADER_SPECIFIC_CONST_4 c42
#define SHADER_SPECIFIC_CONST_5 c43
#define SHADER_SPECIFIC_CONST_6 c44
#define SHADER_SPECIFIC_CONST_7 c45
#define SHADER_SPECIFIC_CONST_8 c46
#define SHADER_SPECIFIC_CONST_9 c47
#define SHADER_SPECIFIC_CONST_10 c14
#define SHADER_SPECIFIC_CONST_11 c15

static const int cModel0Index = 48;
const float4x3 cModel[16]					: register(c48);
// last cmodel is c105 for dx80, c214 for dx90

#else // DX9 shaders (vs20 and beyond)

const float4 cModulationColor			: register( c47 );

#define SHADER_SPECIFIC_CONST_0 c48
#define SHADER_SPECIFIC_CONST_1 c49
#define SHADER_SPECIFIC_CONST_2 c50
#define SHADER_SPECIFIC_CONST_3 c51
#define SHADER_SPECIFIC_CONST_4 c52
#define SHADER_SPECIFIC_CONST_5 c53
#define SHADER_SPECIFIC_CONST_6 c54
#define SHADER_SPECIFIC_CONST_7 c55
#define SHADER_SPECIFIC_CONST_8 c56
#define SHADER_SPECIFIC_CONST_9 c57
#define SHADER_SPECIFIC_CONST_10 c14
#define SHADER_SPECIFIC_CONST_11 c15

static const int cModel0Index = 58;
const float4x3 cModel[53]				: register( c58 );
// last cmodel is c105 for dx80, c216 for dx90


#define SHADER_SPECIFIC_BOOL_CONST_0 b4
#define SHADER_SPECIFIC_BOOL_CONST_1 b5
#define SHADER_SPECIFIC_BOOL_CONST_2 b6
#define SHADER_SPECIFIC_BOOL_CONST_3 b7
#define SHADER_SPECIFIC_BOOL_CONST_4 b8
#define SHADER_SPECIFIC_BOOL_CONST_5 b9
#define SHADER_SPECIFIC_BOOL_CONST_6 b10
#define SHADER_SPECIFIC_BOOL_CONST_7 b11
#endif // vertex shader model constant packing changes


//=======================================================================================
// Methods to decompress vertex normals
//=======================================================================================

//-----------------------------------------------------------------------------------
// Decompress a normal from two-component compressed format
// We expect this data to come from a signed SHORT2 stream in the range of -32768..32767
//
// -32678 and 0 are invalid encodings
// w contains the sign to use in the cross product when generating a binormal
void _DecompressShort2Tangent( float2 inputTangent, out float4 outputTangent )
{
	float2 ztSigns		= sign( inputTangent );				// sign bits for z and tangent (+1 or -1)
	float2 xyAbs		= abs(  inputTangent );				// 1..32767
	outputTangent.xy	= (xyAbs - 16384.0f) / 16384.0f;	// x and y
	outputTangent.z		= ztSigns.x * sqrt( saturate( 1.0f - dot( outputTangent.xy, outputTangent.xy ) ) );
	outputTangent.w		= ztSigns.y;
}

//-----------------------------------------------------------------------------------
// Same code as _DecompressShort2Tangent, just one returns a float4, one a float3
void _DecompressShort2Normal( float2 inputNormal, out float3 outputNormal )
{
	float4 result;
	_DecompressShort2Tangent( inputNormal, result );
	outputNormal = result.xyz;
}

//-----------------------------------------------------------------------------------
// Decompress normal+tangent together
void _DecompressShort2NormalTangent( float2 inputNormal, float2 inputTangent, out float3 outputNormal, out float4 outputTangent )
{
	// FIXME: if we end up sticking with the SHORT2 format, pack the normal and tangent into a single SHORT4 element
	//        (that would make unpacking normal+tangent here together much cheaper than the sum of their parts)
	_DecompressShort2Normal(  inputNormal,  outputNormal  );
	_DecompressShort2Tangent( inputTangent, outputTangent );
}

//=======================================================================================
// Decompress a normal and tangent from four-component compressed format
// We expect this data to come from an unsigned UBYTE4 stream in the range of 0..255
// The final vTangent.w contains the sign to use in the cross product when generating a binormal
void _DecompressUByte4NormalTangent( float4 inputNormal,
									out float3 outputNormal,   // {nX, nY, nZ}
									out float4 outputTangent )   // {tX, tY, tZ, sign of binormal}
{
	float fOne   = 1.0f;

	float4 ztztSignBits	= ( inputNormal - 128.0f ) < 0;						// sign bits for zs and binormal (1 or 0)  set-less-than (slt) asm instruction
	float4 xyxyAbs		= abs( inputNormal - 128.0f ) - ztztSignBits;		// 0..127
	float4 xyxySignBits	= ( xyxyAbs - 64.0f ) < 0;							// sign bits for xs and ys (1 or 0)
	float4 normTan		= (abs( xyxyAbs - 64.0f ) - xyxySignBits) / 63.0f;	// abs({nX, nY, tX, tY})
	outputNormal.xy		= normTan.xy;										// abs({nX, nY, __, __})
	outputTangent.xy	= normTan.zw;										// abs({tX, tY, __, __})

	float4 xyxySigns	= 1 - 2*xyxySignBits;								// Convert sign bits to signs
	float4 ztztSigns	= 1 - 2*ztztSignBits;								// ( [1,0] -> [-1,+1] )

	outputNormal.z		= 1.0f - outputNormal.x - outputNormal.y;			// Project onto x+y+z=1
	outputNormal.xyz	= normalize( outputNormal.xyz );					// Normalize onto unit sphere
	outputNormal.xy	   *= xyxySigns.xy;										// Restore x and y signs
	outputNormal.z	   *= ztztSigns.x;										// Restore z sign

	outputTangent.z		= 1.0f - outputTangent.x - outputTangent.y;			// Project onto x+y+z=1
	outputTangent.xyz	= normalize( outputTangent.xyz );					// Normalize onto unit sphere
	outputTangent.xy   *= xyxySigns.zw;										// Restore x and y signs
	outputTangent.z	   *= ztztSigns.z;										// Restore z sign
	outputTangent.w		= ztztSigns.w;										// Binormal sign
}


//-----------------------------------------------------------------------------------
// Decompress just a normal from four-component compressed format (same as above)
// We expect this data to come from an unsigned UBYTE4 stream in the range of 0..255
// [ When compiled, this works out to approximately 17 asm instructions ]
void _DecompressUByte4Normal( float4 inputNormal,
							out float3 outputNormal)					// {nX, nY, nZ}
{
	float fOne			= 1.0f;

	float2 ztSigns		= ( inputNormal.xy - 128.0f ) < 0;				// sign bits for zs and binormal (1 or 0)  set-less-than (slt) asm instruction
	float2 xyAbs		= abs( inputNormal.xy - 128.0f ) - ztSigns;		// 0..127
	float2 xySigns		= ( xyAbs -  64.0f ) < 0;						// sign bits for xs and ys (1 or 0)
	outputNormal.xy		= ( abs( xyAbs - 64.0f ) - xySigns ) / 63.0f;	// abs({nX, nY})

	outputNormal.z		= 1.0f - outputNormal.x - outputNormal.y;		// Project onto x+y+z=1
	outputNormal.xyz	= normalize( outputNormal.xyz );				// Normalize onto unit sphere

	outputNormal.xy	   *= lerp( fOne.xx, -fOne.xx, xySigns   );			// Restore x and y signs
	outputNormal.z	   *= lerp( fOne.x,  -fOne.x,  ztSigns.x );			// Restore z sign
}


void DecompressVertex_Normal( float4 inputNormal, out float3 outputNormal )
{
	if ( COMPRESSED_VERTS == 1 )
	{
		if ( COMPRESSED_NORMALS_TYPE == COMPRESSED_NORMALS_SEPARATETANGENTS_SHORT2 )
		{
			_DecompressShort2Normal( inputNormal.xy, outputNormal );
		}
		else // ( COMPRESSED_NORMALS_TYPE == COMPRESSED_NORMALS_COMBINEDTANGENTS_UBYTE4 )
		{
			_DecompressUByte4Normal( inputNormal, outputNormal );
		}
	}
	else
	{
		outputNormal = inputNormal.xyz;
	}
}

void DecompressVertex_NormalTangent( float4 inputNormal,  float4 inputTangent, out float3 outputNormal, out float4 outputTangent )
{
	if ( COMPRESSED_VERTS == 1 )
	{
		if ( COMPRESSED_NORMALS_TYPE == COMPRESSED_NORMALS_SEPARATETANGENTS_SHORT2 )
		{
			_DecompressShort2NormalTangent( inputNormal.xy, inputTangent.xy, outputNormal, outputTangent );
		}
		else // ( COMPRESSED_NORMALS_TYPE == COMPRESSED_NORMALS_COMBINEDTANGENTS_UBYTE4 )
		{
			_DecompressUByte4NormalTangent( inputNormal, outputNormal, outputTangent );
		}
	}
	else
	{
		outputNormal  = inputNormal.xyz;
		outputTangent = inputTangent;
	}
}


#ifdef SHADER_MODEL_VS_3_0

//-----------------------------------------------------------------------------
// Methods to sample morph data from a vertex texture
// NOTE: vMorphTargetTextureDim.x = width, cVertexTextureDim.y = height, cVertexTextureDim.z = # of float4 fields per vertex
// For position + normal morph for example, there will be 2 fields.
//-----------------------------------------------------------------------------
float4 SampleMorphDelta( sampler2D vt, const float3 vMorphTargetTextureDim, const float4 vMorphSubrect, const float flVertexID, const float flField )
{
	float flColumn = floor( flVertexID / vMorphSubrect.w );

	float4 t;
	t.x = vMorphSubrect.x + vMorphTargetTextureDim.z * flColumn + flField + 0.5f;
	t.y = vMorphSubrect.y + flVertexID - flColumn * vMorphSubrect.w + 0.5f;
	t.xy /= vMorphTargetTextureDim.xy;	
	t.z = t.w = 0.f;

	return tex2Dlod( vt, t );
}

// Optimized version which reads 2 deltas
void SampleMorphDelta2( sampler2D vt, const float3 vMorphTargetTextureDim, const float4 vMorphSubrect, const float flVertexID, out float4 delta1, out float4 delta2 )
{
	float flColumn = floor( flVertexID / vMorphSubrect.w );

	float4 t;
	t.x = vMorphSubrect.x + vMorphTargetTextureDim.z * flColumn + 0.5f;
	t.y = vMorphSubrect.y + flVertexID - flColumn * vMorphSubrect.w + 0.5f;
	t.xy /= vMorphTargetTextureDim.xy;	
	t.z = t.w = 0.f;

	delta1 = tex2Dlod( vt, t );
	t.x += 1.0f / vMorphTargetTextureDim.x;
	delta2 = tex2Dlod( vt, t );
}

#endif // SHADER_MODEL_VS_3_0


#if ( defined( SHADER_MODEL_VS_2_0 ) || defined( SHADER_MODEL_VS_3_0 ) )

//-----------------------------------------------------------------------------
// Method to apply morphs
//-----------------------------------------------------------------------------
bool ApplyMorph( float3 vPosFlex, inout float3 vPosition )
{
	// Flexes coming in from a separate stream
	float3 vPosDelta = vPosFlex.xyz * cFlexScale.x;
	vPosition.xyz += vPosDelta;
	return true;
}

bool ApplyMorph( float3 vPosFlex, float3 vNormalFlex, inout float3 vPosition, inout float3 vNormal )
{
	// Flexes coming in from a separate stream
	float3 vPosDelta = vPosFlex.xyz * cFlexScale.x;
	float3 vNormalDelta = vNormalFlex.xyz * cFlexScale.x;
	vPosition.xyz += vPosDelta;
	vNormal       += vNormalDelta;
	return true;
}

bool ApplyMorph( float3 vPosFlex, float3 vNormalFlex, 
	inout float3 vPosition, inout float3 vNormal, inout float3 vTangent )
{
	// Flexes coming in from a separate stream
	float3 vPosDelta = vPosFlex.xyz * cFlexScale.x;
	float3 vNormalDelta = vNormalFlex.xyz * cFlexScale.x;
	vPosition.xyz += vPosDelta;
	vNormal       += vNormalDelta;
	vTangent.xyz  += vNormalDelta;
	return true;
}

bool ApplyMorph( float4 vPosFlex, float3 vNormalFlex, 
	inout float3 vPosition, inout float3 vNormal, inout float3 vTangent, out float flWrinkle )
{
	// Flexes coming in from a separate stream
	float3 vPosDelta = vPosFlex.xyz * cFlexScale.x;
	float3 vNormalDelta = vNormalFlex.xyz * cFlexScale.x;
	flWrinkle = vPosFlex.w * cFlexScale.y;
	vPosition.xyz += vPosDelta;
	vNormal       += vNormalDelta;
	vTangent.xyz  += vNormalDelta;
	return true;
}

#endif // defined( SHADER_MODEL_VS_2_0 ) || defined( SHADER_MODEL_VS_3_0 )


#ifdef SHADER_MODEL_VS_3_0

bool ApplyMorph( sampler2D morphSampler, const float3 vMorphTargetTextureDim, const float4 vMorphSubrect, 
				const float flVertexID, const float3 vMorphTexCoord,
				inout float3 vPosition )
{
#if MORPHING

#if !DECAL
	// Flexes coming in from a separate stream
	float4 vPosDelta = SampleMorphDelta( morphSampler, vMorphTargetTextureDim, vMorphSubrect, flVertexID, 0 );
	vPosition	+= vPosDelta.xyz;
#else
	float4 t = float4( vMorphTexCoord.x, vMorphTexCoord.y, 0.0f, 0.0f );
	float3 vPosDelta = tex2Dlod( morphSampler, t );
	vPosition	+= vPosDelta.xyz * vMorphTexCoord.z;
#endif // DECAL

	return true;

#else // !MORPHING
	return false;
#endif
}
 
bool ApplyMorph( sampler2D morphSampler, const float3 vMorphTargetTextureDim, const float4 vMorphSubrect, 
				const float flVertexID, const float3 vMorphTexCoord, 
				inout float3 vPosition, inout float3 vNormal )
{
#if MORPHING

#if !DECAL
	float4 vPosDelta, vNormalDelta;
	SampleMorphDelta2( morphSampler, vMorphTargetTextureDim, vMorphSubrect, flVertexID, vPosDelta, vNormalDelta );
	vPosition	+= vPosDelta.xyz;
	vNormal		+= vNormalDelta.xyz;
#else
	float4 t = float4( vMorphTexCoord.x, vMorphTexCoord.y, 0.0f, 0.0f );
	float3 vPosDelta = tex2Dlod( morphSampler, t );
	t.x += 1.0f / vMorphTargetTextureDim.x;
	float3 vNormalDelta = tex2Dlod( morphSampler, t );
	vPosition	+= vPosDelta.xyz * vMorphTexCoord.z;
	vNormal		+= vNormalDelta.xyz * vMorphTexCoord.z;
#endif // DECAL

	return true;

#else // !MORPHING
	return false;
#endif
}

bool ApplyMorph( sampler2D morphSampler, const float3 vMorphTargetTextureDim, const float4 vMorphSubrect, 
				const float flVertexID, const float3 vMorphTexCoord, 
				inout float3 vPosition, inout float3 vNormal, inout float3 vTangent )
{
#if MORPHING

#if !DECAL
	float4 vPosDelta, vNormalDelta;
	SampleMorphDelta2( morphSampler, vMorphTargetTextureDim, vMorphSubrect, flVertexID, vPosDelta, vNormalDelta );
	vPosition	+= vPosDelta.xyz;
	vNormal		+= vNormalDelta.xyz;
	vTangent	+= vNormalDelta.xyz;
#else
	float4 t = float4( vMorphTexCoord.x, vMorphTexCoord.y, 0.0f, 0.0f );
	float3 vPosDelta = tex2Dlod( morphSampler, t );
	t.x += 1.0f / vMorphTargetTextureDim.x;
	float3 vNormalDelta = tex2Dlod( morphSampler, t );
	vPosition	+= vPosDelta.xyz * vMorphTexCoord.z;
	vNormal		+= vNormalDelta.xyz * vMorphTexCoord.z;
	vTangent	+= vNormalDelta.xyz * vMorphTexCoord.z;
#endif // DECAL

	return true;

#else // MORPHING

	return false;
#endif
}

bool ApplyMorph( sampler2D morphSampler, const float3 vMorphTargetTextureDim, const float4 vMorphSubrect,
	const float flVertexID, const float3 vMorphTexCoord,
	inout float3 vPosition, inout float3 vNormal, inout float3 vTangent, out float flWrinkle )
{
#if MORPHING

#if !DECAL
	float4 vPosDelta, vNormalDelta;
	SampleMorphDelta2( morphSampler, vMorphTargetTextureDim, vMorphSubrect, flVertexID, vPosDelta, vNormalDelta );
	vPosition	+= vPosDelta.xyz;
	vNormal		+= vNormalDelta.xyz;
	vTangent	+= vNormalDelta.xyz;
	flWrinkle = vPosDelta.w;
#else
	float4 t = float4( vMorphTexCoord.x, vMorphTexCoord.y, 0.0f, 0.0f );
	float4 vPosDelta = tex2Dlod( morphSampler, t );
	t.x += 1.0f / vMorphTargetTextureDim.x;
	float3 vNormalDelta = tex2Dlod( morphSampler, t );

	vPosition	+= vPosDelta.xyz * vMorphTexCoord.z;
	vNormal		+= vNormalDelta.xyz * vMorphTexCoord.z;
	vTangent	+= vNormalDelta.xyz * vMorphTexCoord.z;
	flWrinkle	= vPosDelta.w * vMorphTexCoord.z;
#endif // DECAL

	return true;

#else // MORPHING

	flWrinkle = 0.0f;
	return false;

#endif
}

#endif   // SHADER_MODEL_VS_3_0


float RangeFog( const float3 projPos )
{
	return max( cFogMaxDensity, ( -projPos.z * cOOFogRange + cFogEndOverFogRange ) );
}

float WaterFog( const float3 worldPos, const float3 projPos )
{
	float4 tmp;
	
	tmp.xy = cEyePosWaterZ.wz - worldPos.z;

	// tmp.x is the distance from the water surface to the vert
	// tmp.y is the distance from the eye position to the vert

	// if $tmp.x < 0, then set it to 0
	// This is the equivalent of moving the vert to the water surface if it's above the water surface
	
	tmp.x = max( 0.0f, tmp.x );

	// $tmp.w = $tmp.x / $tmp.y
	tmp.w = tmp.x / tmp.y;

	tmp.w *= projPos.z;

	// $tmp.w is now the distance that we see through water.

	return max( cFogMaxDensity, ( -tmp.w * cOOFogRange + cFogOne ) );
}

float CalcFog( const float3 worldPos, const float3 projPos, const int fogType )
{
#if defined( _X360 )
	// 360 only does pixel fog
	return 1.0f;
#endif

	if( fogType == FOGTYPE_RANGE )
	{
		return RangeFog( projPos );
	}
	else
	{
#if SHADERMODEL_VS_2_0 == 1
		// We do this work in the pixel shader in dx9, so don't do any fog here.
		return 1.0f;
#else
		return WaterFog( worldPos, projPos );
#endif
	}
}

float CalcFog( const float3 worldPos, const float3 projPos, const bool bWaterFog )
{
#if defined( _X360 )
	// 360 only does pixel fog
	return 1.0f;
#endif

	float flFog;
	if( !bWaterFog )
	{
		flFog = RangeFog( projPos );
	}
	else
	{
#if SHADERMODEL_VS_2_0 == 1
		// We do this work in the pixel shader in dx9, so don't do any fog here.
		flFog = 1.0f;
#else
		flFog = WaterFog( worldPos, projPos );
#endif
	}

	return flFog;
}

float4 DecompressBoneWeights( const float4 weights )
{
	float4 result = weights;

	if ( COMPRESSED_VERTS )
	{
		// Decompress from SHORT2 to float. In our case, [-1, +32767] -> [0, +1]
		// NOTE: we add 1 here so we can divide by 32768 - which is exact (divide by 32767 is not).
		//       This avoids cracking between meshes with different numbers of bone weights.
		//       We use SHORT2 instead of SHORT2N for a similar reason - the GPU's conversion
		//       from [-32768,+32767] to [-1,+1] is imprecise in the same way.
		result += 1;
		result /= 32768;
	}

	return result;
}

void SkinPosition( bool bSkinning, const float4 modelPos, 
                   const float4 boneWeights, float4 fBoneIndices,
				   out float3 worldPos )
{
#if !defined( _X360 )
	int3 boneIndices = D3DCOLORtoUBYTE4( fBoneIndices );
#else
	int3 boneIndices = fBoneIndices;
#endif

	// Needed for invariance issues caused by multipass rendering
#if defined( _X360 )
	[isolate] 
#endif
	{ 
		if ( !bSkinning )
		{
			worldPos = mul4x3( modelPos, cModel[0] );
		}
		else // skinning - always three bones
		{
			float4x3 mat1 = cModel[boneIndices[0]];
			float4x3 mat2 = cModel[boneIndices[1]];
			float4x3 mat3 = cModel[boneIndices[2]];

			float3 weights = DecompressBoneWeights( boneWeights ).xyz;
			weights[2] = 1 - (weights[0] + weights[1]);

			float4x3 blendMatrix = mat1 * weights[0] + mat2 * weights[1] + mat3 * weights[2];
			worldPos = mul4x3( modelPos, blendMatrix );
		}
	}
}

void SkinPositionAndNormal( bool bSkinning, const float4 modelPos, const float3 modelNormal,
                            const float4 boneWeights, float4 fBoneIndices,
						    out float3 worldPos, out float3 worldNormal )
{
	// Needed for invariance issues caused by multipass rendering
#if defined( _X360 )
	[isolate] 
#endif
	{ 

#if !defined( _X360 )
		int3 boneIndices = D3DCOLORtoUBYTE4( fBoneIndices );
#else
		int3 boneIndices = fBoneIndices;
#endif

		if ( !bSkinning )
		{
			worldPos = mul4x3( modelPos, cModel[0] );
			worldNormal = mul3x3( modelNormal, ( const float3x3 )cModel[0] );
		}
		else // skinning - always three bones
		{
			float4x3 mat1 = cModel[boneIndices[0]];
			float4x3 mat2 = cModel[boneIndices[1]];
			float4x3 mat3 = cModel[boneIndices[2]];

			float3 weights = DecompressBoneWeights( boneWeights ).xyz;
			weights[2] = 1 - (weights[0] + weights[1]);

			float4x3 blendMatrix = mat1 * weights[0] + mat2 * weights[1] + mat3 * weights[2];
			worldPos = mul4x3( modelPos, blendMatrix );
			worldNormal = mul3x3( modelNormal, ( float3x3 )blendMatrix );
		}

	} // end [isolate]
}

// Is it worth keeping SkinPosition and SkinPositionAndNormal around since the optimizer
// gets rid of anything that isn't used?
void SkinPositionNormalAndTangentSpace( 
							bool bSkinning,
						    const float4 modelPos, const float3 modelNormal, 
							const float4 modelTangentS,
                            const float4 boneWeights, float4 fBoneIndices,
						    out float3 worldPos, out float3 worldNormal, 
							out float3 worldTangentS, out float3 worldTangentT )
{
#if !defined( _X360 )
	int3 boneIndices = D3DCOLORtoUBYTE4( fBoneIndices );
#else
	int3 boneIndices = fBoneIndices;
#endif

	// Needed for invariance issues caused by multipass rendering
#if defined( _X360 )
	[isolate] 
#endif
	{ 
		if ( !bSkinning )
		{
			worldPos = mul4x3( modelPos, cModel[0] );
			worldNormal = mul3x3( modelNormal, ( const float3x3 )cModel[0] );
			worldTangentS = mul3x3( ( float3 )modelTangentS, ( const float3x3 )cModel[0] );
		}
		else // skinning - always three bones
		{
			float4x3 mat1 = cModel[boneIndices[0]];
			float4x3 mat2 = cModel[boneIndices[1]];
			float4x3 mat3 = cModel[boneIndices[2]];

			float3 weights = DecompressBoneWeights( boneWeights ).xyz;
			weights[2] = 1 - (weights[0] + weights[1]);

			float4x3 blendMatrix = mat1 * weights[0] + mat2 * weights[1] + mat3 * weights[2];
			worldPos = mul4x3( modelPos, blendMatrix );
			worldNormal = mul3x3( modelNormal, ( const float3x3 )blendMatrix );
			worldTangentS = mul3x3( ( float3 )modelTangentS, ( const float3x3 )blendMatrix );
		}
		worldTangentT = cross( worldNormal, worldTangentS ) * modelTangentS.w;
	}
}


//-----------------------------------------------------------------------------
// Lighting helper functions
//-----------------------------------------------------------------------------

float3 AmbientLight( const float3 worldNormal )
{
	float3 nSquared = worldNormal * worldNormal;
	int3 isNegative = ( worldNormal < 0.0 );
	float3 linearColor;
	linearColor = nSquared.x * cAmbientCubeX[isNegative.x] +
	              nSquared.y * cAmbientCubeY[isNegative.y] +
	              nSquared.z * cAmbientCubeZ[isNegative.z];
	return linearColor;
}

// The following "internal" routines are called "privately" by other routines in this file which
// handle the particular flavor of vs20 control flow appropriate to the original caller
float VertexAttenInternal( const float3 worldPos, int lightNum )
{
	float result = 0.0f;

	// Get light direction
	float3 lightDir = cLightInfo[lightNum].pos - worldPos;

	// Get light distance squared.
	float lightDistSquared = dot( lightDir, lightDir );

	// Get 1/lightDistance
	float ooLightDist = rsqrt( lightDistSquared );

	// Normalize light direction
	lightDir *= ooLightDist;

	float3 vDist;
#	if defined( _X360 )
	{
		//X360 dynamic compile hits an internal compiler error using dst(), this is the breakdown of how dst() works from the 360 docs.
		vDist.x = 1;
		vDist.y = lightDistSquared * ooLightDist;
		vDist.z = lightDistSquared;
		//flDist.w = ooLightDist;
	}
#	else
	{
		vDist = dst( lightDistSquared, ooLightDist );
	}
#	endif

	float flDistanceAtten = 1.0f / dot( cLightInfo[lightNum].atten.xyz, vDist );

	// Spot attenuation
	float flCosTheta = dot( cLightInfo[lightNum].dir.xyz, -lightDir );
	float flSpotAtten = (flCosTheta - cLightInfo[lightNum].spotParams.z) * cLightInfo[lightNum].spotParams.w;
	flSpotAtten = max( 0.0001f, flSpotAtten );
	flSpotAtten = pow( flSpotAtten, cLightInfo[lightNum].spotParams.x );
	flSpotAtten = saturate( flSpotAtten );

	// Select between point and spot
	float flAtten = lerp( flDistanceAtten, flDistanceAtten * flSpotAtten, cLightInfo[lightNum].dir.w );

	// Select between above and directional (no attenuation)
	result = lerp( flAtten, 1.0f, cLightInfo[lightNum].color.w );

	return result;
}

float CosineTermInternal( const float3 worldPos, const float3 worldNormal, int lightNum, bool bHalfLambert )
{
	// Calculate light direction assuming this is a point or spot
	float3 lightDir = normalize( cLightInfo[lightNum].pos - worldPos );

	// Select the above direction or the one in the structure, based upon light type
	lightDir = lerp( lightDir, -cLightInfo[lightNum].dir, cLightInfo[lightNum].color.w );

	// compute N dot L
	float NDotL = dot( worldNormal, lightDir );

	if ( !bHalfLambert )
	{
		NDotL = max( 0.0f, NDotL );
	}
	else	// Half-Lambert
	{
		NDotL = NDotL * 0.5 + 0.5;
		NDotL = NDotL * NDotL;
	}
	return NDotL;
}

// This routine uses booleans to do early-outs and is meant to be called by routines OUTSIDE of this file
float GetVertexAttenForLight( const float3 worldPos, int lightNum, bool bUseStaticControlFlow )
{
	float result = 0.0f;

	// Direct3D uses static control flow but OpenGL currently does not
	if ( bUseStaticControlFlow )
	{
		if ( g_bLightEnabled[lightNum] )
		{
			result = VertexAttenInternal( worldPos, lightNum );
		}
	}
	else // OpenGL non-static-control-flow path
	{
		result = VertexAttenInternal( worldPos, lightNum );
	}

	return result;
}

float3 DoLightInternal( const float3 worldPos, const float3 worldNormal, int lightNum, bool bHalfLambert )
{
	return cLightInfo[lightNum].color *
		CosineTermInternal( worldPos, worldNormal, lightNum, bHalfLambert ) *
		VertexAttenInternal( worldPos, lightNum );
}

float3 DoLighting( const float3 worldPos, const float3 worldNormal,
				   const float3 staticLightingColor, const bool bStaticLight,
				   const bool bDynamicLight, bool bHalfLambert )
{
	float3 linearColor = float3( 0.0f, 0.0f, 0.0f );

	if( bStaticLight )			// Static light
	{
		float3 col = staticLightingColor * cOverbright;
#if defined ( _X360 )
		linearColor += col * col;
#else
		linearColor += GammaToLinear( col );
#endif
	}

	if( bDynamicLight )			// Dynamic light
	{
		for (int i = 0; i < g_nLightCount; i++)
		{
			linearColor += DoLightInternal( worldPos, worldNormal, i, bHalfLambert );
		}		
	}

	if( bDynamicLight )
	{
		linearColor += AmbientLight( worldNormal ); //ambient light is already remapped
	}

	return linearColor;
}

float3 DoLightingUnrolled( const float3 worldPos, const float3 worldNormal,
				  const float3 staticLightingColor, const bool bStaticLight,
				  const bool bDynamicLight, bool bHalfLambert, const int nNumLights )
{
	float3 linearColor = float3( 0.0f, 0.0f, 0.0f );

	if( bStaticLight )			// Static light
	{
		linearColor += GammaToLinear( staticLightingColor * cOverbright );
	}

	if( bDynamicLight )			// Ambient light
	{
		if ( nNumLights >= 1 )
			linearColor += DoLightInternal( worldPos, worldNormal, 0, bHalfLambert );
		if ( nNumLights >= 2 )
			linearColor += DoLightInternal( worldPos, worldNormal, 1, bHalfLambert );
		if ( nNumLights >= 3 )
			linearColor += DoLightInternal( worldPos, worldNormal, 2, bHalfLambert );
		if ( nNumLights >= 4 )
			linearColor += DoLightInternal( worldPos, worldNormal, 3, bHalfLambert );
	}

	if( bDynamicLight )
	{
		linearColor += AmbientLight( worldNormal ); //ambient light is already remapped
	}

	return linearColor;
}

int4 FloatToInt( in float4 floats )
{
	return D3DCOLORtoUBYTE4( floats.zyxw / 255.001953125 );
}

float2 ComputeSphereMapTexCoords( in float3 reflectionVector )
{
	// transform reflection vector into view space
	reflectionVector = mul( reflectionVector, ( float3x3 )cViewModel );

	// generate <rx ry rz+1>
	float3 tmp = float3( reflectionVector.x, reflectionVector.y, reflectionVector.z + 1.0f );

	// find 1 / len
	float ooLen = dot( tmp, tmp );
	ooLen = 1.0f / sqrt( ooLen );

	// tmp = tmp/|tmp| + 1
	tmp.xy = ooLen * tmp.xy + 1.0f;

	return tmp.xy * 0.5f;
}


#define DEFORMATION_CLAMP_TO_BOX_IN_WORLDSPACE 1
							// minxyz.minsoftness / maxxyz.maxsoftness
float3 ApplyDeformation( float3 worldpos, int deftype, float4 defparms0, float4 defparms1,
						 float4 defparms2, float4 defparms3 )
{
	float3 ret = worldpos;
	if ( deftype == DEFORMATION_CLAMP_TO_BOX_IN_WORLDSPACE )
	{
		ret=max( ret, defparms2.xyz );
		ret=min( ret, defparms3.xyz );
	}

	return ret;
}


#endif //#ifndef COMMON_VS_FXC_H_