external/vpc/public/mathlib/math_pfns.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283

//========= Copyright � 1996-2005, Valve Corporation, All rights reserved. ============//
//
// Purpose: 
//
//=====================================================================================//

#ifndef _MATH_PFNS_H_
#define _MATH_PFNS_H_

#include <limits>

#if defined( _X360 )
#include <xboxmath.h>
#elif defined(_PS3)

#ifndef SPU
#include <ppu_asm_intrinsics.h>
#endif

// Note that similar defines exist in ssemath.h
// Maybe we should consolidate in one place for all platforms.

#define _VEC_0x7ff		(vec_int4){0x7ff,0x7ff,0x7ff,0x7ff}
#define _VEC_0x3ff		(vec_int4){0x3ff,0x3ff,0x3ff,0x3ff}
#define _VEC_22L		(vector unsigned int){22,22,22,22}
#define _VEC_11L		(vector unsigned int){11,11,11,11}
#define _VEC_0L			(vector unsigned int){0,0,0,0}
#define _VEC_255F		(vector float){255.0f,255.0f,255.0f,255.0f}
#define _VEC_NEGONEF	(vector float){-1.0f,-1.0f,-1.0f,-1.0f}
#define _VEC_ONEF		(vector float){1.0f,1.0f,1.0f,1.0f}
#define _VEC_ZEROF		(vector float){0.0f,0.0f,0.0f,0.0f}
#define _VEC_ZEROxyzONEwF (vector float){0.0f,0.0f,0.0f,1.0f}
#define _VEC_HALFF		(vector float){0.5f,0.5f,0.5f,0.5f}
#define _VEC_HALFxyzZEROwF	(vector float){0.5f,0.5f,0.5f,0.0f}
#define _VEC_PERMUTE_XYZ0W1   (vector unsigned char){0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x1c,0x1d,0x1e,0x1f}

#define _VEC_IEEEHACK (vector float){(float)(1 << 23),(float)(1 << 23),(float)(1 << 23),(float)(1 << 23)}
#define _VEC_PERMUTE_FASTFTOC (vector unsigned char){0,0,0,0,0,0,0,0,0,0,0,0,0x03,0x07,0x0b,0x0f}

// AngleQuaternion
#define _VEC_PERMUTE_AQsxsxcxcx (vector unsigned char) {0x00,0x01,0x02,0x03,0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13,0x10,0x11,0x12,0x13}	
#define _VEC_PERMUTE_AQczszszcz (vector unsigned char) {0x18,0x19,0x1a,0x1b,0x08,0x09,0x0a,0x0b,0x08,0x09,0x0a,0x0b,0x18,0x19,0x1a,0x1b}	
#define _VEC_PERMUTE_AQcxcxsxsx (vector unsigned char) {0x10,0x11,0x12,0x13,0x10,0x11,0x12,0x13,0x00,0x01,0x02,0x03,0x00,0x01,0x02,0x03}	
#define _VEC_PERMUTE_AQszczczsz (vector unsigned char) {0x08,0x09,0x0a,0x0b,0x18,0x19,0x1a,0x1b,0x18,0x19,0x1a,0x1b,0x08,0x09,0x0a,0x0b}	
#define _VEC_PERMUTE_ANGLEQUAT  (vector unsigned char) {0x10,0x11,0x12,0x13,0x04,0x05,0x06,0x07,0x18,0x19,0x1a,0x1b,0x0c,0x0d,0x0e,0x0f}	

#define _VEC_EPSILONF		(__vector float)			{FLT_EPSILON,FLT_EPSILON,FLT_EPSILON,FLT_EPSILON}

#endif

#if !(defined( PLATFORM_PPC ) || defined(SPU))
// If we are not PPC based or SPU based, then assumes it is SSE2. We should make this code cleaner.

#include <xmmintrin.h>

// These globals are initialized by mathlib and redirected based on available fpu features

// The following are not declared as macros because they are often used in limiting situations,
// and sometimes the compiler simply refuses to inline them for some reason
FORCEINLINE float FastSqrt( float x )
{
	__m128 root = _mm_sqrt_ss( _mm_load_ss( &x ) );
	return *( reinterpret_cast<float *>( &root ) );
}

FORCEINLINE float FastRSqrtFast( float x )
{
	// use intrinsics
	__m128 rroot = _mm_rsqrt_ss( _mm_load_ss( &x ) );
	return *( reinterpret_cast<float *>( &rroot ) );
}
// Single iteration NewtonRaphson reciprocal square root:
// 0.5 * rsqrtps * (3 - x * rsqrtps(x) * rsqrtps(x)) 	
// Very low error, and fine to use in place of 1.f / sqrtf(x).	
FORCEINLINE float FastRSqrt( float x )
{
	float rroot = FastRSqrtFast( x );
	return (0.5f * rroot) * (3.f - (x * rroot) * rroot);
}

void FastSinCos( float x, float* s, float* c );  // any x
float FastCos( float x );


inline float FastRecip(float x) {return 1.0f / x;}
// Simple SSE rsqrt.  Usually accurate to around 6 (relative) decimal places 
// or so, so ok for closed transforms.  (ie, computing lighting normals)
inline float FastSqrtEst(float x) { return FastRSqrtFast(x) * x; }


#else // !defined( PLATFORM_PPC ) && !defined(_SPU)

#ifndef SPU
// We may not need this for SPU, so let's not bother for now

FORCEINLINE float _VMX_Sqrt( float x )
{
	return __fsqrts( x );
}

FORCEINLINE double _VMX_RSqrt( double x )
{
	double rroot = __frsqrte( x );

	// Single iteration NewtonRaphson on reciprocal square root estimate
	return (0.5f * rroot) * (3.0f - (x * rroot) * rroot);
}

FORCEINLINE double _VMX_RSqrtFast( double x )
{
	return __frsqrte( x );
}

#ifdef _X360
FORCEINLINE void _VMX_SinCos( float a, float *pS, float *pC )
{
	XMScalarSinCos( pS, pC, a );
}

FORCEINLINE float _VMX_Cos( float a )
{
	return XMScalarCos( a );
}
#endif

// the 360 has fixed hw and calls directly
#define FastSqrt(x)			_VMX_Sqrt(x)
#define	FastRSqrt(x)		_VMX_RSqrt(x)
#define FastRSqrtFast(x)	_VMX_RSqrtFast(x)
#define FastSinCos(x,s,c)	_VMX_SinCos(x,s,c)
#define FastCos(x)			_VMX_Cos(x)

inline double FastRecip(double x) {return __fres(x);}
inline double FastSqrtEst(double x) { return __frsqrte(x) * x; }

#endif // !defined( PLATFORM_PPC ) && !defined(_SPU)

// if x is infinite, return FLT_MAX
inline float FastClampInfinity( float x )
{
#ifdef PLATFORM_PPC
	return fsel( std::numeric_limits<float>::infinity() - x, x, FLT_MAX );
#else
	return ( x > FLT_MAX ? FLT_MAX : x );
#endif
}

#if defined (_PS3) && !defined(SPU)

// extern float cosvf(float);      /* single precision cosine      */
// extern float sinvf(float);      /* single precision sine        */
// TODO: need a faster single precision equivalent
#define cosvf cosf
#define sinvf sinf

inline int _rotl( int x, int c )
{
	return __rlwimi(x,x,c,0,31);
}

inline int64 _rotl64( int64 x, int c )
{
	return __rldicl( x, c, 0 );
}

//-----------------------------------------------------------------
// Vector Unions
//-----------------------------------------------------------------

//-----------------------------------------------------------------
// Floats
//-----------------------------------------------------------------
typedef union
{
	vector float vf;
	float f[4];
} vector_float_union;

//-----------------------------------------------------------------
// Ints
//-----------------------------------------------------------------
typedef union
{
	vector int vi;
	int i[4];
} vector_int4_union;

typedef union
{
	vector unsigned int vui;
	unsigned int ui[4];
} vector_uint4_union;

//-----------------------------------------------------------------
// Shorts
//-----------------------------------------------------------------
typedef union
{
	vector signed short vs;
	signed short s[8];
} vector_short8_union;

typedef union
{
	vector unsigned short vus;
	unsigned short us[8];
} vector_ushort8_union;

//-----------------------------------------------------------------
// Chars
//-----------------------------------------------------------------
typedef union
{
	vector signed char vc;
	signed char c[16];
} vector_char16_union;

typedef union
{
	vector unsigned char vuc;
	unsigned char uc[16];
} vector_uchar16_union;

/*
FORCEINLINE float _VMX_Sqrt( float x )
{
	vector_float_union vIn, vOut;

	vIn.f[0] = x;

	vOut.vf = sqrtf4(vIn.vf);
	
	return vOut.f[0];
}

FORCEINLINE float _VMX_RSqrt( float x )
{
	vector_float_union vIn, vOut;

	vIn.f[0] = x;

	vOut.vf = rsqrtf4(vIn.vf);

	return vOut.f[0];
}

FORCEINLINE float _VMX_RSqrtFast( float x )
{
	vector_float_union vIn, vOut;

	vIn.f[0] = x;

	vOut.vf = rsqrtf4fast(vIn.vf);

	return vOut.f[0];
}
*/

FORCEINLINE void _VMX_SinCos( float a, float *pS, float *pC )
{
	*pS=sinvf(a);
	*pC=cosvf(a);
}

FORCEINLINE float _VMX_Cos( float a )
{
	return cosvf(a);
}


// the 360 has fixed hw and calls directly
/*
#define FastSqrt(x)			_VMX_Sqrt(x)
#define	FastRSqrt(x)		_VMX_RSqrt(x)
#define FastRSqrtFast(x)	_VMX_RSqrtFast(x)
#define FastSinCos(x,s,c)	_VMX_SinCos(x,s,c)
#define FastCos(x)			_VMX_Cos(x)
*/
#endif	// _PS3
#endif	// #ifndef SPU

#endif // _MATH_PFNS_H_