Fix line endings. WHAMMY.

author: Jørgen P. Tjernø <[email protected]> 2013-12-02 19:31:46 -0800
committer: Jørgen P. Tjernø <[email protected]> 2013-12-02 19:46:31 -0800
commit: f56bb35301836e56582a575a75864392a0177875 (patch)
tree: de61ddd39de3e7df52759711950b4c288592f0dc /mp/src/public/mathlib/ssequaternion.h
parent: Mark some more files as text. (diff)
download: source-sdk-2013-f56bb35301836e56582a575a75864392a0177875.tar.xz
source-sdk-2013-f56bb35301836e56582a575a75864392a0177875.zip
1 files changed, 367 insertions, 367 deletions
diff --git a/mp/src/public/mathlib/ssequaternion.h b/mp/src/public/mathlib/ssequaternion.h
index 5d60961e..825a9e45 100644
--- a/mp/src/public/mathlib/ssequaternion.h
+++ b/mp/src/public/mathlib/ssequaternion.h
@@ -1,367 +1,367 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose: - defines SIMD "structure of arrays" classes and functions.
-//
-//===========================================================================//
-#ifndef SSEQUATMATH_H
-#define SSEQUATMATH_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-
-#include "mathlib/ssemath.h"
-
-// Use this #define to allow SSE versions of Quaternion math
-// to exist on PC.
-// On PC, certain horizontal vector operations are not supported.
-// This causes the SSE implementation of quaternion math to mix the
-// vector and scalar floating point units, which is extremely 
-// performance negative if you don't compile to native SSE2 (which 
-// we don't as of Sept 1, 2007). So, it's best not to allow these
-// functions to exist at all. It's not good enough to simply replace
-// the contents of the functions with scalar math, because each call
-// to LoadAligned and StoreAligned will result in an unnecssary copy
-// of the quaternion, and several moves to and from the XMM registers.
-//
-// Basically, the problem you run into is that for efficient SIMD code,
-// you need to load the quaternions and vectors into SIMD registers and
-// keep them there as long as possible while doing only SIMD math,
-// whereas for efficient scalar code, each time you copy onto or ever
-// use a fltx4, it hoses your pipeline. So the difference has to be
-// in the management of temporary variables in the calling function,
-// not inside the math functions.
-//
-// If you compile assuming the presence of SSE2, the MSVC will abandon
-// the traditional x87 FPU operations altogether and make everything use
-// the SSE2 registers, which lessens this problem a little.
-
-// permitted only on 360, as we've done careful tuning on its Altivec math:
-#ifdef _X360
-#define ALLOW_SIMD_QUATERNION_MATH 1  // not on PC!
-#endif
-
-
-
-//---------------------------------------------------------------------
-// Load/store quaternions
-//---------------------------------------------------------------------
-#ifndef _X360
-#if ALLOW_SIMD_QUATERNION_MATH
-// Using STDC or SSE
-FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD )
-{
-	fltx4 retval = LoadAlignedSIMD( pSIMD.Base() );
-	return retval;
-}
-
-FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD )
-{
-	fltx4 retval = LoadAlignedSIMD( pSIMD );
-	return retval;
-}
-
-FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a )
-{
-	StoreAlignedSIMD( pSIMD->Base(), a );
-}
-#endif
-#else
-
-// for the transitional class -- load a QuaternionAligned
-FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD )
-{
-	fltx4 retval = XMLoadVector4A( pSIMD.Base() );
-	return retval;
-}
-
-FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD )
-{
-	fltx4 retval = XMLoadVector4A( pSIMD );
-	return retval;
-}
-
-FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a )
-{
-	XMStoreVector4A( pSIMD->Base(), a );
-}
-
-#endif
-
-
-#if ALLOW_SIMD_QUATERNION_MATH
-//---------------------------------------------------------------------
-// Make sure quaternions are within 180 degrees of one another, if not, reverse q
-//---------------------------------------------------------------------
-FORCEINLINE fltx4 QuaternionAlignSIMD( const fltx4 &p, const fltx4 &q )
-{
-	// decide if one of the quaternions is backwards
-	fltx4 a = SubSIMD( p, q );
-	fltx4 b = AddSIMD( p, q );
-	a = Dot4SIMD( a, a );
-	b = Dot4SIMD( b, b );
-	fltx4 cmp = CmpGtSIMD( a, b );
-	fltx4 result = MaskedAssign( cmp, NegSIMD(q), q );
-	return result;
-}
-
-//---------------------------------------------------------------------
-// Normalize Quaternion
-//---------------------------------------------------------------------
-#if USE_STDC_FOR_SIMD
-
-FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q )
-{
-	fltx4 radius, result;
-	radius = Dot4SIMD( q, q );
-
-	if ( SubFloat( radius, 0 ) ) // > FLT_EPSILON && ((radius < 1.0f - 4*FLT_EPSILON) || (radius > 1.0f + 4*FLT_EPSILON))
-	{
-		float iradius = 1.0f / sqrt( SubFloat( radius, 0 ) );
-		result = ReplicateX4( iradius );
-		result = MulSIMD( result, q );
-		return result;
-	}
-	return q;
-}
-
-#else
-
-// SSE + X360 implementation
-FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q )
-{
-	fltx4 radius, result, mask;
-	radius = Dot4SIMD( q, q );
-	mask = CmpEqSIMD( radius, Four_Zeros ); // all ones iff radius = 0
-	result = ReciprocalSqrtSIMD( radius );
-	result = MulSIMD( result, q );
-	return MaskedAssign( mask, q, result );	// if radius was 0, just return q
-}
-
-#endif
-
-
-//---------------------------------------------------------------------
-// 0.0 returns p, 1.0 return q.
-//---------------------------------------------------------------------
-FORCEINLINE fltx4 QuaternionBlendNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
-	fltx4 sclp, sclq, result;
-	sclq = ReplicateX4( t );
-	sclp = SubSIMD( Four_Ones, sclq );
-	result = MulSIMD( sclp, p );
-	result = MaddSIMD( sclq, q, result );
-	return QuaternionNormalizeSIMD( result );
-}
-
-
-//---------------------------------------------------------------------
-// Blend Quaternions
-//---------------------------------------------------------------------
-FORCEINLINE fltx4 QuaternionBlendSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
-	// decide if one of the quaternions is backwards
-	fltx4 q2, result;
-	q2 = QuaternionAlignSIMD( p, q );
-	result = QuaternionBlendNoAlignSIMD( p, q2, t );
-	return result;
-}
-
-
-//---------------------------------------------------------------------
-// Multiply Quaternions
-//---------------------------------------------------------------------
-#ifndef _X360
-
-// SSE and STDC
-FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q )
-{
-	// decide if one of the quaternions is backwards
-	fltx4 q2, result;
-	q2 = QuaternionAlignSIMD( p, q );
-	SubFloat( result, 0 ) =  SubFloat( p, 0 ) * SubFloat( q2, 3 ) + SubFloat( p, 1 ) * SubFloat( q2, 2 ) - SubFloat( p, 2 ) * SubFloat( q2, 1 ) + SubFloat( p, 3 ) * SubFloat( q2, 0 );
-	SubFloat( result, 1 ) = -SubFloat( p, 0 ) * SubFloat( q2, 2 ) + SubFloat( p, 1 ) * SubFloat( q2, 3 ) + SubFloat( p, 2 ) * SubFloat( q2, 0 ) + SubFloat( p, 3 ) * SubFloat( q2, 1 );
-	SubFloat( result, 2 ) =  SubFloat( p, 0 ) * SubFloat( q2, 1 ) - SubFloat( p, 1 ) * SubFloat( q2, 0 ) + SubFloat( p, 2 ) * SubFloat( q2, 3 ) + SubFloat( p, 3 ) * SubFloat( q2, 2 );
-	SubFloat( result, 3 ) = -SubFloat( p, 0 ) * SubFloat( q2, 0 ) - SubFloat( p, 1 ) * SubFloat( q2, 1 ) - SubFloat( p, 2 ) * SubFloat( q2, 2 ) + SubFloat( p, 3 ) * SubFloat( q2, 3 );
-	return result;
-}
-
-#else 
-
-// X360
-extern const fltx4 g_QuatMultRowSign[4];
-FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q )
-{
-	fltx4 q2, row, result;
-	q2 = QuaternionAlignSIMD( p, q );
-
-	row = XMVectorSwizzle( q2, 3, 2, 1, 0 );
-	row = MulSIMD( row, g_QuatMultRowSign[0] );
-	result = Dot4SIMD( row, p );
-
-	row = XMVectorSwizzle( q2, 2, 3, 0, 1 );
-	row = MulSIMD( row, g_QuatMultRowSign[1] );
-	row = Dot4SIMD( row, p );
-	result = __vrlimi( result, row, 4, 0 );
-	
-	row = XMVectorSwizzle( q2, 1, 0, 3, 2 );
-	row = MulSIMD( row, g_QuatMultRowSign[2] );
-	row = Dot4SIMD( row, p );
-	result = __vrlimi( result, row, 2, 0 );
-	
-	row = MulSIMD( q2, g_QuatMultRowSign[3] );
-	row = Dot4SIMD( row, p );
-	result = __vrlimi( result, row, 1, 0 );
-	return result;
-}
-
-#endif
-
-
-//---------------------------------------------------------------------
-// Quaternion scale
-//---------------------------------------------------------------------
-#ifndef _X360
-
-// SSE and STDC
-FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t )
-{
-	float r;
-	fltx4 q;
-
-	// FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to 
-	// use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale.
-	float sinom = sqrt( SubFloat( p, 0 ) * SubFloat( p, 0 ) + SubFloat( p, 1 ) * SubFloat( p, 1 ) + SubFloat( p, 2 ) * SubFloat( p, 2 ) );
-	sinom = min( sinom, 1.f );
-
-	float sinsom = sin( asin( sinom ) * t );
-
-	t = sinsom / (sinom + FLT_EPSILON);
-	SubFloat( q, 0 ) = t * SubFloat( p, 0 );
-	SubFloat( q, 1 ) = t * SubFloat( p, 1 );
-	SubFloat( q, 2 ) = t * SubFloat( p, 2 );
-
-	// rescale rotation
-	r = 1.0f - sinsom * sinsom;
-
-	// Assert( r >= 0 );
-	if (r < 0.0f) 
-		r = 0.0f;
-	r = sqrt( r );
-
-	// keep sign of rotation
-	SubFloat( q, 3 ) = fsel( SubFloat( p, 3 ), r, -r );
-	return q;
-}
-
-#else
-
-// X360
-FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t )
-{
-	fltx4 sinom = Dot3SIMD( p, p );
-	sinom = SqrtSIMD( sinom );
-	sinom = MinSIMD( sinom, Four_Ones );
-	fltx4 sinsom = ArcSinSIMD( sinom );
-	fltx4 t4 = ReplicateX4( t );
-	sinsom = MulSIMD( sinsom, t4 );
-	sinsom = SinSIMD( sinsom );
-	sinom = AddSIMD( sinom, Four_Epsilons );
-	sinom = ReciprocalSIMD( sinom );
-	t4 = MulSIMD( sinsom, sinom );
-	fltx4 result = MulSIMD( p, t4 );
-
-	// rescale rotation
-	sinsom = MulSIMD( sinsom, sinsom );
-	fltx4 r = SubSIMD( Four_Ones, sinsom );
-	r = MaxSIMD( r, Four_Zeros );
-	r = SqrtSIMD( r );
-
-	// keep sign of rotation
-	fltx4 cmp = CmpGeSIMD( p, Four_Zeros );
-	r = MaskedAssign( cmp, r, NegSIMD( r ) );
-
-	result = __vrlimi(result, r, 1, 0);
-	return result;
-}
-
-#endif
-
-
-//-----------------------------------------------------------------------------
-// Quaternion sphereical linear interpolation
-//-----------------------------------------------------------------------------
-#ifndef _X360
-
-// SSE and STDC
-FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
-	float omega, cosom, sinom, sclp, sclq;
-
-	fltx4 result;
-
-	// 0.0 returns p, 1.0 return q.
-	cosom = SubFloat( p, 0 ) * SubFloat( q, 0 ) + SubFloat( p, 1 ) * SubFloat( q, 1 ) + 
-		SubFloat( p, 2 ) * SubFloat( q, 2 ) + SubFloat( p, 3 ) * SubFloat( q, 3 );
-
-	if ( (1.0f + cosom ) > 0.000001f ) 
-	{
-		if ( (1.0f - cosom ) > 0.000001f ) 
-		{
-			omega = acos( cosom );
-			sinom = sin( omega );
-			sclp = sin( (1.0f - t)*omega) / sinom;
-			sclq = sin( t*omega ) / sinom;
-		}
-		else 
-		{
-			// TODO: add short circuit for cosom == 1.0f?
-			sclp = 1.0f - t;
-			sclq = t;
-		}
-		SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( q, 0 );
-		SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( q, 1 );
-		SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( q, 2 );
-		SubFloat( result, 3 ) = sclp * SubFloat( p, 3 ) + sclq * SubFloat( q, 3 );
-	}
-	else 
-	{
-		SubFloat( result, 0 ) = -SubFloat( q, 1 );
-		SubFloat( result, 1 ) =  SubFloat( q, 0 );
-		SubFloat( result, 2 ) = -SubFloat( q, 3 );
-		SubFloat( result, 3 ) =  SubFloat( q, 2 );
-		sclp = sin( (1.0f - t) * (0.5f * M_PI));
-		sclq = sin( t * (0.5f * M_PI));
-		SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( result, 0 );
-		SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( result, 1 );
-		SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( result, 2 );
-	}
-
-	return result;
-}
-
-#else
-
-// X360
-FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
-	return XMQuaternionSlerp( p, q, t );
-}
-
-#endif
-
-
-FORCEINLINE fltx4 QuaternionSlerpSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
-	fltx4 q2, result;
-	q2 = QuaternionAlignSIMD( p, q );
-	result = QuaternionSlerpNoAlignSIMD( p, q2, t );
-	return result;
-}
-
-
-#endif // ALLOW_SIMD_QUATERNION_MATH
-
-#endif // SSEQUATMATH_H
-
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose: - defines SIMD "structure of arrays" classes and functions.
+//
+//===========================================================================//
+#ifndef SSEQUATMATH_H
+#define SSEQUATMATH_H
+
+#ifdef _WIN32
+#pragma once
+#endif
+
+
+#include "mathlib/ssemath.h"
+
+// Use this #define to allow SSE versions of Quaternion math
+// to exist on PC.
+// On PC, certain horizontal vector operations are not supported.
+// This causes the SSE implementation of quaternion math to mix the
+// vector and scalar floating point units, which is extremely 
+// performance negative if you don't compile to native SSE2 (which 
+// we don't as of Sept 1, 2007). So, it's best not to allow these
+// functions to exist at all. It's not good enough to simply replace
+// the contents of the functions with scalar math, because each call
+// to LoadAligned and StoreAligned will result in an unnecssary copy
+// of the quaternion, and several moves to and from the XMM registers.
+//
+// Basically, the problem you run into is that for efficient SIMD code,
+// you need to load the quaternions and vectors into SIMD registers and
+// keep them there as long as possible while doing only SIMD math,
+// whereas for efficient scalar code, each time you copy onto or ever
+// use a fltx4, it hoses your pipeline. So the difference has to be
+// in the management of temporary variables in the calling function,
+// not inside the math functions.
+//
+// If you compile assuming the presence of SSE2, the MSVC will abandon
+// the traditional x87 FPU operations altogether and make everything use
+// the SSE2 registers, which lessens this problem a little.
+
+// permitted only on 360, as we've done careful tuning on its Altivec math:
+#ifdef _X360
+#define ALLOW_SIMD_QUATERNION_MATH 1  // not on PC!
+#endif
+
+
+
+//---------------------------------------------------------------------
+// Load/store quaternions
+//---------------------------------------------------------------------
+#ifndef _X360
+#if ALLOW_SIMD_QUATERNION_MATH
+// Using STDC or SSE
+FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD )
+{
+	fltx4 retval = LoadAlignedSIMD( pSIMD.Base() );
+	return retval;
+}
+
+FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD )
+{
+	fltx4 retval = LoadAlignedSIMD( pSIMD );
+	return retval;
+}
+
+FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a )
+{
+	StoreAlignedSIMD( pSIMD->Base(), a );
+}
+#endif
+#else
+
+// for the transitional class -- load a QuaternionAligned
+FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD )
+{
+	fltx4 retval = XMLoadVector4A( pSIMD.Base() );
+	return retval;
+}
+
+FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD )
+{
+	fltx4 retval = XMLoadVector4A( pSIMD );
+	return retval;
+}
+
+FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a )
+{
+	XMStoreVector4A( pSIMD->Base(), a );
+}
+
+#endif
+
+
+#if ALLOW_SIMD_QUATERNION_MATH
+//---------------------------------------------------------------------
+// Make sure quaternions are within 180 degrees of one another, if not, reverse q
+//---------------------------------------------------------------------
+FORCEINLINE fltx4 QuaternionAlignSIMD( const fltx4 &p, const fltx4 &q )
+{
+	// decide if one of the quaternions is backwards
+	fltx4 a = SubSIMD( p, q );
+	fltx4 b = AddSIMD( p, q );
+	a = Dot4SIMD( a, a );
+	b = Dot4SIMD( b, b );
+	fltx4 cmp = CmpGtSIMD( a, b );
+	fltx4 result = MaskedAssign( cmp, NegSIMD(q), q );
+	return result;
+}
+
+//---------------------------------------------------------------------
+// Normalize Quaternion
+//---------------------------------------------------------------------
+#if USE_STDC_FOR_SIMD
+
+FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q )
+{
+	fltx4 radius, result;
+	radius = Dot4SIMD( q, q );
+
+	if ( SubFloat( radius, 0 ) ) // > FLT_EPSILON && ((radius < 1.0f - 4*FLT_EPSILON) || (radius > 1.0f + 4*FLT_EPSILON))
+	{
+		float iradius = 1.0f / sqrt( SubFloat( radius, 0 ) );
+		result = ReplicateX4( iradius );
+		result = MulSIMD( result, q );
+		return result;
+	}
+	return q;
+}
+
+#else
+
+// SSE + X360 implementation
+FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q )
+{
+	fltx4 radius, result, mask;
+	radius = Dot4SIMD( q, q );
+	mask = CmpEqSIMD( radius, Four_Zeros ); // all ones iff radius = 0
+	result = ReciprocalSqrtSIMD( radius );
+	result = MulSIMD( result, q );
+	return MaskedAssign( mask, q, result );	// if radius was 0, just return q
+}
+
+#endif
+
+
+//---------------------------------------------------------------------
+// 0.0 returns p, 1.0 return q.
+//---------------------------------------------------------------------
+FORCEINLINE fltx4 QuaternionBlendNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
+{
+	fltx4 sclp, sclq, result;
+	sclq = ReplicateX4( t );
+	sclp = SubSIMD( Four_Ones, sclq );
+	result = MulSIMD( sclp, p );
+	result = MaddSIMD( sclq, q, result );
+	return QuaternionNormalizeSIMD( result );
+}
+
+
+//---------------------------------------------------------------------
+// Blend Quaternions
+//---------------------------------------------------------------------
+FORCEINLINE fltx4 QuaternionBlendSIMD( const fltx4 &p, const fltx4 &q, float t )
+{
+	// decide if one of the quaternions is backwards
+	fltx4 q2, result;
+	q2 = QuaternionAlignSIMD( p, q );
+	result = QuaternionBlendNoAlignSIMD( p, q2, t );
+	return result;
+}
+
+
+//---------------------------------------------------------------------
+// Multiply Quaternions
+//---------------------------------------------------------------------
+#ifndef _X360
+
+// SSE and STDC
+FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q )
+{
+	// decide if one of the quaternions is backwards
+	fltx4 q2, result;
+	q2 = QuaternionAlignSIMD( p, q );
+	SubFloat( result, 0 ) =  SubFloat( p, 0 ) * SubFloat( q2, 3 ) + SubFloat( p, 1 ) * SubFloat( q2, 2 ) - SubFloat( p, 2 ) * SubFloat( q2, 1 ) + SubFloat( p, 3 ) * SubFloat( q2, 0 );
+	SubFloat( result, 1 ) = -SubFloat( p, 0 ) * SubFloat( q2, 2 ) + SubFloat( p, 1 ) * SubFloat( q2, 3 ) + SubFloat( p, 2 ) * SubFloat( q2, 0 ) + SubFloat( p, 3 ) * SubFloat( q2, 1 );
+	SubFloat( result, 2 ) =  SubFloat( p, 0 ) * SubFloat( q2, 1 ) - SubFloat( p, 1 ) * SubFloat( q2, 0 ) + SubFloat( p, 2 ) * SubFloat( q2, 3 ) + SubFloat( p, 3 ) * SubFloat( q2, 2 );
+	SubFloat( result, 3 ) = -SubFloat( p, 0 ) * SubFloat( q2, 0 ) - SubFloat( p, 1 ) * SubFloat( q2, 1 ) - SubFloat( p, 2 ) * SubFloat( q2, 2 ) + SubFloat( p, 3 ) * SubFloat( q2, 3 );
+	return result;
+}
+
+#else 
+
+// X360
+extern const fltx4 g_QuatMultRowSign[4];
+FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q )
+{
+	fltx4 q2, row, result;
+	q2 = QuaternionAlignSIMD( p, q );
+
+	row = XMVectorSwizzle( q2, 3, 2, 1, 0 );
+	row = MulSIMD( row, g_QuatMultRowSign[0] );
+	result = Dot4SIMD( row, p );
+
+	row = XMVectorSwizzle( q2, 2, 3, 0, 1 );
+	row = MulSIMD( row, g_QuatMultRowSign[1] );
+	row = Dot4SIMD( row, p );
+	result = __vrlimi( result, row, 4, 0 );
+	
+	row = XMVectorSwizzle( q2, 1, 0, 3, 2 );
+	row = MulSIMD( row, g_QuatMultRowSign[2] );
+	row = Dot4SIMD( row, p );
+	result = __vrlimi( result, row, 2, 0 );
+	
+	row = MulSIMD( q2, g_QuatMultRowSign[3] );
+	row = Dot4SIMD( row, p );
+	result = __vrlimi( result, row, 1, 0 );
+	return result;
+}
+
+#endif
+
+
+//---------------------------------------------------------------------
+// Quaternion scale
+//---------------------------------------------------------------------
+#ifndef _X360
+
+// SSE and STDC
+FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t )
+{
+	float r;
+	fltx4 q;
+
+	// FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to 
+	// use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale.
+	float sinom = sqrt( SubFloat( p, 0 ) * SubFloat( p, 0 ) + SubFloat( p, 1 ) * SubFloat( p, 1 ) + SubFloat( p, 2 ) * SubFloat( p, 2 ) );
+	sinom = min( sinom, 1.f );
+
+	float sinsom = sin( asin( sinom ) * t );
+
+	t = sinsom / (sinom + FLT_EPSILON);
+	SubFloat( q, 0 ) = t * SubFloat( p, 0 );
+	SubFloat( q, 1 ) = t * SubFloat( p, 1 );
+	SubFloat( q, 2 ) = t * SubFloat( p, 2 );
+
+	// rescale rotation
+	r = 1.0f - sinsom * sinsom;
+
+	// Assert( r >= 0 );
+	if (r < 0.0f) 
+		r = 0.0f;
+	r = sqrt( r );
+
+	// keep sign of rotation
+	SubFloat( q, 3 ) = fsel( SubFloat( p, 3 ), r, -r );
+	return q;
+}
+
+#else
+
+// X360
+FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t )
+{
+	fltx4 sinom = Dot3SIMD( p, p );
+	sinom = SqrtSIMD( sinom );
+	sinom = MinSIMD( sinom, Four_Ones );
+	fltx4 sinsom = ArcSinSIMD( sinom );
+	fltx4 t4 = ReplicateX4( t );
+	sinsom = MulSIMD( sinsom, t4 );
+	sinsom = SinSIMD( sinsom );
+	sinom = AddSIMD( sinom, Four_Epsilons );
+	sinom = ReciprocalSIMD( sinom );
+	t4 = MulSIMD( sinsom, sinom );
+	fltx4 result = MulSIMD( p, t4 );
+
+	// rescale rotation
+	sinsom = MulSIMD( sinsom, sinsom );
+	fltx4 r = SubSIMD( Four_Ones, sinsom );
+	r = MaxSIMD( r, Four_Zeros );
+	r = SqrtSIMD( r );
+
+	// keep sign of rotation
+	fltx4 cmp = CmpGeSIMD( p, Four_Zeros );
+	r = MaskedAssign( cmp, r, NegSIMD( r ) );
+
+	result = __vrlimi(result, r, 1, 0);
+	return result;
+}
+
+#endif
+
+
+//-----------------------------------------------------------------------------
+// Quaternion sphereical linear interpolation
+//-----------------------------------------------------------------------------
+#ifndef _X360
+
+// SSE and STDC
+FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
+{
+	float omega, cosom, sinom, sclp, sclq;
+
+	fltx4 result;
+
+	// 0.0 returns p, 1.0 return q.
+	cosom = SubFloat( p, 0 ) * SubFloat( q, 0 ) + SubFloat( p, 1 ) * SubFloat( q, 1 ) + 
+		SubFloat( p, 2 ) * SubFloat( q, 2 ) + SubFloat( p, 3 ) * SubFloat( q, 3 );
+
+	if ( (1.0f + cosom ) > 0.000001f ) 
+	{
+		if ( (1.0f - cosom ) > 0.000001f ) 
+		{
+			omega = acos( cosom );
+			sinom = sin( omega );
+			sclp = sin( (1.0f - t)*omega) / sinom;
+			sclq = sin( t*omega ) / sinom;
+		}
+		else 
+		{
+			// TODO: add short circuit for cosom == 1.0f?
+			sclp = 1.0f - t;
+			sclq = t;
+		}
+		SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( q, 0 );
+		SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( q, 1 );
+		SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( q, 2 );
+		SubFloat( result, 3 ) = sclp * SubFloat( p, 3 ) + sclq * SubFloat( q, 3 );
+	}
+	else 
+	{
+		SubFloat( result, 0 ) = -SubFloat( q, 1 );
+		SubFloat( result, 1 ) =  SubFloat( q, 0 );
+		SubFloat( result, 2 ) = -SubFloat( q, 3 );
+		SubFloat( result, 3 ) =  SubFloat( q, 2 );
+		sclp = sin( (1.0f - t) * (0.5f * M_PI));
+		sclq = sin( t * (0.5f * M_PI));
+		SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( result, 0 );
+		SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( result, 1 );
+		SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( result, 2 );
+	}
+
+	return result;
+}
+
+#else
+
+// X360
+FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
+{
+	return XMQuaternionSlerp( p, q, t );
+}
+
+#endif
+
+
+FORCEINLINE fltx4 QuaternionSlerpSIMD( const fltx4 &p, const fltx4 &q, float t )
+{
+	fltx4 q2, result;
+	q2 = QuaternionAlignSIMD( p, q );
+	result = QuaternionSlerpNoAlignSIMD( p, q2, t );
+	return result;
+}
+
+
+#endif // ALLOW_SIMD_QUATERNION_MATH
+
+#endif // SSEQUATMATH_H
+
author	Jørgen P. Tjernø <[email protected]>	2013-12-02 19:31:46 -0800
committer	Jørgen P. Tjernø <[email protected]>	2013-12-02 19:46:31 -0800
commit	f56bb35301836e56582a575a75864392a0177875 (patch)
tree	de61ddd39de3e7df52759711950b4c288592f0dc /mp/src/public/mathlib/ssequaternion.h
parent	Mark some more files as text. (diff)
download	source-sdk-2013-f56bb35301836e56582a575a75864392a0177875.tar.xz source-sdk-2013-f56bb35301836e56582a575a75864392a0177875.zip