diff options
| author | Jørgen P. Tjernø <[email protected]> | 2013-12-02 19:31:46 -0800 |
|---|---|---|
| committer | Jørgen P. Tjernø <[email protected]> | 2013-12-02 19:46:31 -0800 |
| commit | f56bb35301836e56582a575a75864392a0177875 (patch) | |
| tree | de61ddd39de3e7df52759711950b4c288592f0dc /mp/src/public/mathlib/ssequaternion.h | |
| parent | Mark some more files as text. (diff) | |
| download | source-sdk-2013-f56bb35301836e56582a575a75864392a0177875.tar.xz source-sdk-2013-f56bb35301836e56582a575a75864392a0177875.zip | |
Fix line endings. WHAMMY.
Diffstat (limited to 'mp/src/public/mathlib/ssequaternion.h')
| -rw-r--r-- | mp/src/public/mathlib/ssequaternion.h | 734 |
1 files changed, 367 insertions, 367 deletions
diff --git a/mp/src/public/mathlib/ssequaternion.h b/mp/src/public/mathlib/ssequaternion.h index 5d60961e..825a9e45 100644 --- a/mp/src/public/mathlib/ssequaternion.h +++ b/mp/src/public/mathlib/ssequaternion.h @@ -1,367 +1,367 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose: - defines SIMD "structure of arrays" classes and functions.
-//
-//===========================================================================//
-#ifndef SSEQUATMATH_H
-#define SSEQUATMATH_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-
-#include "mathlib/ssemath.h"
-
-// Use this #define to allow SSE versions of Quaternion math
-// to exist on PC.
-// On PC, certain horizontal vector operations are not supported.
-// This causes the SSE implementation of quaternion math to mix the
-// vector and scalar floating point units, which is extremely
-// performance negative if you don't compile to native SSE2 (which
-// we don't as of Sept 1, 2007). So, it's best not to allow these
-// functions to exist at all. It's not good enough to simply replace
-// the contents of the functions with scalar math, because each call
-// to LoadAligned and StoreAligned will result in an unnecssary copy
-// of the quaternion, and several moves to and from the XMM registers.
-//
-// Basically, the problem you run into is that for efficient SIMD code,
-// you need to load the quaternions and vectors into SIMD registers and
-// keep them there as long as possible while doing only SIMD math,
-// whereas for efficient scalar code, each time you copy onto or ever
-// use a fltx4, it hoses your pipeline. So the difference has to be
-// in the management of temporary variables in the calling function,
-// not inside the math functions.
-//
-// If you compile assuming the presence of SSE2, the MSVC will abandon
-// the traditional x87 FPU operations altogether and make everything use
-// the SSE2 registers, which lessens this problem a little.
-
-// permitted only on 360, as we've done careful tuning on its Altivec math:
-#ifdef _X360
-#define ALLOW_SIMD_QUATERNION_MATH 1 // not on PC!
-#endif
-
-
-
-//---------------------------------------------------------------------
-// Load/store quaternions
-//---------------------------------------------------------------------
-#ifndef _X360
-#if ALLOW_SIMD_QUATERNION_MATH
-// Using STDC or SSE
-FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD )
-{
- fltx4 retval = LoadAlignedSIMD( pSIMD.Base() );
- return retval;
-}
-
-FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD )
-{
- fltx4 retval = LoadAlignedSIMD( pSIMD );
- return retval;
-}
-
-FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a )
-{
- StoreAlignedSIMD( pSIMD->Base(), a );
-}
-#endif
-#else
-
-// for the transitional class -- load a QuaternionAligned
-FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD )
-{
- fltx4 retval = XMLoadVector4A( pSIMD.Base() );
- return retval;
-}
-
-FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD )
-{
- fltx4 retval = XMLoadVector4A( pSIMD );
- return retval;
-}
-
-FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a )
-{
- XMStoreVector4A( pSIMD->Base(), a );
-}
-
-#endif
-
-
-#if ALLOW_SIMD_QUATERNION_MATH
-//---------------------------------------------------------------------
-// Make sure quaternions are within 180 degrees of one another, if not, reverse q
-//---------------------------------------------------------------------
-FORCEINLINE fltx4 QuaternionAlignSIMD( const fltx4 &p, const fltx4 &q )
-{
- // decide if one of the quaternions is backwards
- fltx4 a = SubSIMD( p, q );
- fltx4 b = AddSIMD( p, q );
- a = Dot4SIMD( a, a );
- b = Dot4SIMD( b, b );
- fltx4 cmp = CmpGtSIMD( a, b );
- fltx4 result = MaskedAssign( cmp, NegSIMD(q), q );
- return result;
-}
-
-//---------------------------------------------------------------------
-// Normalize Quaternion
-//---------------------------------------------------------------------
-#if USE_STDC_FOR_SIMD
-
-FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q )
-{
- fltx4 radius, result;
- radius = Dot4SIMD( q, q );
-
- if ( SubFloat( radius, 0 ) ) // > FLT_EPSILON && ((radius < 1.0f - 4*FLT_EPSILON) || (radius > 1.0f + 4*FLT_EPSILON))
- {
- float iradius = 1.0f / sqrt( SubFloat( radius, 0 ) );
- result = ReplicateX4( iradius );
- result = MulSIMD( result, q );
- return result;
- }
- return q;
-}
-
-#else
-
-// SSE + X360 implementation
-FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q )
-{
- fltx4 radius, result, mask;
- radius = Dot4SIMD( q, q );
- mask = CmpEqSIMD( radius, Four_Zeros ); // all ones iff radius = 0
- result = ReciprocalSqrtSIMD( radius );
- result = MulSIMD( result, q );
- return MaskedAssign( mask, q, result ); // if radius was 0, just return q
-}
-
-#endif
-
-
-//---------------------------------------------------------------------
-// 0.0 returns p, 1.0 return q.
-//---------------------------------------------------------------------
-FORCEINLINE fltx4 QuaternionBlendNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
- fltx4 sclp, sclq, result;
- sclq = ReplicateX4( t );
- sclp = SubSIMD( Four_Ones, sclq );
- result = MulSIMD( sclp, p );
- result = MaddSIMD( sclq, q, result );
- return QuaternionNormalizeSIMD( result );
-}
-
-
-//---------------------------------------------------------------------
-// Blend Quaternions
-//---------------------------------------------------------------------
-FORCEINLINE fltx4 QuaternionBlendSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
- // decide if one of the quaternions is backwards
- fltx4 q2, result;
- q2 = QuaternionAlignSIMD( p, q );
- result = QuaternionBlendNoAlignSIMD( p, q2, t );
- return result;
-}
-
-
-//---------------------------------------------------------------------
-// Multiply Quaternions
-//---------------------------------------------------------------------
-#ifndef _X360
-
-// SSE and STDC
-FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q )
-{
- // decide if one of the quaternions is backwards
- fltx4 q2, result;
- q2 = QuaternionAlignSIMD( p, q );
- SubFloat( result, 0 ) = SubFloat( p, 0 ) * SubFloat( q2, 3 ) + SubFloat( p, 1 ) * SubFloat( q2, 2 ) - SubFloat( p, 2 ) * SubFloat( q2, 1 ) + SubFloat( p, 3 ) * SubFloat( q2, 0 );
- SubFloat( result, 1 ) = -SubFloat( p, 0 ) * SubFloat( q2, 2 ) + SubFloat( p, 1 ) * SubFloat( q2, 3 ) + SubFloat( p, 2 ) * SubFloat( q2, 0 ) + SubFloat( p, 3 ) * SubFloat( q2, 1 );
- SubFloat( result, 2 ) = SubFloat( p, 0 ) * SubFloat( q2, 1 ) - SubFloat( p, 1 ) * SubFloat( q2, 0 ) + SubFloat( p, 2 ) * SubFloat( q2, 3 ) + SubFloat( p, 3 ) * SubFloat( q2, 2 );
- SubFloat( result, 3 ) = -SubFloat( p, 0 ) * SubFloat( q2, 0 ) - SubFloat( p, 1 ) * SubFloat( q2, 1 ) - SubFloat( p, 2 ) * SubFloat( q2, 2 ) + SubFloat( p, 3 ) * SubFloat( q2, 3 );
- return result;
-}
-
-#else
-
-// X360
-extern const fltx4 g_QuatMultRowSign[4];
-FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q )
-{
- fltx4 q2, row, result;
- q2 = QuaternionAlignSIMD( p, q );
-
- row = XMVectorSwizzle( q2, 3, 2, 1, 0 );
- row = MulSIMD( row, g_QuatMultRowSign[0] );
- result = Dot4SIMD( row, p );
-
- row = XMVectorSwizzle( q2, 2, 3, 0, 1 );
- row = MulSIMD( row, g_QuatMultRowSign[1] );
- row = Dot4SIMD( row, p );
- result = __vrlimi( result, row, 4, 0 );
-
- row = XMVectorSwizzle( q2, 1, 0, 3, 2 );
- row = MulSIMD( row, g_QuatMultRowSign[2] );
- row = Dot4SIMD( row, p );
- result = __vrlimi( result, row, 2, 0 );
-
- row = MulSIMD( q2, g_QuatMultRowSign[3] );
- row = Dot4SIMD( row, p );
- result = __vrlimi( result, row, 1, 0 );
- return result;
-}
-
-#endif
-
-
-//---------------------------------------------------------------------
-// Quaternion scale
-//---------------------------------------------------------------------
-#ifndef _X360
-
-// SSE and STDC
-FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t )
-{
- float r;
- fltx4 q;
-
- // FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to
- // use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale.
- float sinom = sqrt( SubFloat( p, 0 ) * SubFloat( p, 0 ) + SubFloat( p, 1 ) * SubFloat( p, 1 ) + SubFloat( p, 2 ) * SubFloat( p, 2 ) );
- sinom = min( sinom, 1.f );
-
- float sinsom = sin( asin( sinom ) * t );
-
- t = sinsom / (sinom + FLT_EPSILON);
- SubFloat( q, 0 ) = t * SubFloat( p, 0 );
- SubFloat( q, 1 ) = t * SubFloat( p, 1 );
- SubFloat( q, 2 ) = t * SubFloat( p, 2 );
-
- // rescale rotation
- r = 1.0f - sinsom * sinsom;
-
- // Assert( r >= 0 );
- if (r < 0.0f)
- r = 0.0f;
- r = sqrt( r );
-
- // keep sign of rotation
- SubFloat( q, 3 ) = fsel( SubFloat( p, 3 ), r, -r );
- return q;
-}
-
-#else
-
-// X360
-FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t )
-{
- fltx4 sinom = Dot3SIMD( p, p );
- sinom = SqrtSIMD( sinom );
- sinom = MinSIMD( sinom, Four_Ones );
- fltx4 sinsom = ArcSinSIMD( sinom );
- fltx4 t4 = ReplicateX4( t );
- sinsom = MulSIMD( sinsom, t4 );
- sinsom = SinSIMD( sinsom );
- sinom = AddSIMD( sinom, Four_Epsilons );
- sinom = ReciprocalSIMD( sinom );
- t4 = MulSIMD( sinsom, sinom );
- fltx4 result = MulSIMD( p, t4 );
-
- // rescale rotation
- sinsom = MulSIMD( sinsom, sinsom );
- fltx4 r = SubSIMD( Four_Ones, sinsom );
- r = MaxSIMD( r, Four_Zeros );
- r = SqrtSIMD( r );
-
- // keep sign of rotation
- fltx4 cmp = CmpGeSIMD( p, Four_Zeros );
- r = MaskedAssign( cmp, r, NegSIMD( r ) );
-
- result = __vrlimi(result, r, 1, 0);
- return result;
-}
-
-#endif
-
-
-//-----------------------------------------------------------------------------
-// Quaternion sphereical linear interpolation
-//-----------------------------------------------------------------------------
-#ifndef _X360
-
-// SSE and STDC
-FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
- float omega, cosom, sinom, sclp, sclq;
-
- fltx4 result;
-
- // 0.0 returns p, 1.0 return q.
- cosom = SubFloat( p, 0 ) * SubFloat( q, 0 ) + SubFloat( p, 1 ) * SubFloat( q, 1 ) +
- SubFloat( p, 2 ) * SubFloat( q, 2 ) + SubFloat( p, 3 ) * SubFloat( q, 3 );
-
- if ( (1.0f + cosom ) > 0.000001f )
- {
- if ( (1.0f - cosom ) > 0.000001f )
- {
- omega = acos( cosom );
- sinom = sin( omega );
- sclp = sin( (1.0f - t)*omega) / sinom;
- sclq = sin( t*omega ) / sinom;
- }
- else
- {
- // TODO: add short circuit for cosom == 1.0f?
- sclp = 1.0f - t;
- sclq = t;
- }
- SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( q, 0 );
- SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( q, 1 );
- SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( q, 2 );
- SubFloat( result, 3 ) = sclp * SubFloat( p, 3 ) + sclq * SubFloat( q, 3 );
- }
- else
- {
- SubFloat( result, 0 ) = -SubFloat( q, 1 );
- SubFloat( result, 1 ) = SubFloat( q, 0 );
- SubFloat( result, 2 ) = -SubFloat( q, 3 );
- SubFloat( result, 3 ) = SubFloat( q, 2 );
- sclp = sin( (1.0f - t) * (0.5f * M_PI));
- sclq = sin( t * (0.5f * M_PI));
- SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( result, 0 );
- SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( result, 1 );
- SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( result, 2 );
- }
-
- return result;
-}
-
-#else
-
-// X360
-FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
- return XMQuaternionSlerp( p, q, t );
-}
-
-#endif
-
-
-FORCEINLINE fltx4 QuaternionSlerpSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
- fltx4 q2, result;
- q2 = QuaternionAlignSIMD( p, q );
- result = QuaternionSlerpNoAlignSIMD( p, q2, t );
- return result;
-}
-
-
-#endif // ALLOW_SIMD_QUATERNION_MATH
-
-#endif // SSEQUATMATH_H
-
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: - defines SIMD "structure of arrays" classes and functions. +// +//===========================================================================// +#ifndef SSEQUATMATH_H +#define SSEQUATMATH_H + +#ifdef _WIN32 +#pragma once +#endif + + +#include "mathlib/ssemath.h" + +// Use this #define to allow SSE versions of Quaternion math +// to exist on PC. +// On PC, certain horizontal vector operations are not supported. +// This causes the SSE implementation of quaternion math to mix the +// vector and scalar floating point units, which is extremely +// performance negative if you don't compile to native SSE2 (which +// we don't as of Sept 1, 2007). So, it's best not to allow these +// functions to exist at all. It's not good enough to simply replace +// the contents of the functions with scalar math, because each call +// to LoadAligned and StoreAligned will result in an unnecssary copy +// of the quaternion, and several moves to and from the XMM registers. +// +// Basically, the problem you run into is that for efficient SIMD code, +// you need to load the quaternions and vectors into SIMD registers and +// keep them there as long as possible while doing only SIMD math, +// whereas for efficient scalar code, each time you copy onto or ever +// use a fltx4, it hoses your pipeline. So the difference has to be +// in the management of temporary variables in the calling function, +// not inside the math functions. +// +// If you compile assuming the presence of SSE2, the MSVC will abandon +// the traditional x87 FPU operations altogether and make everything use +// the SSE2 registers, which lessens this problem a little. + +// permitted only on 360, as we've done careful tuning on its Altivec math: +#ifdef _X360 +#define ALLOW_SIMD_QUATERNION_MATH 1 // not on PC! +#endif + + + +//--------------------------------------------------------------------- +// Load/store quaternions +//--------------------------------------------------------------------- +#ifndef _X360 +#if ALLOW_SIMD_QUATERNION_MATH +// Using STDC or SSE +FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD ) +{ + fltx4 retval = LoadAlignedSIMD( pSIMD.Base() ); + return retval; +} + +FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD ) +{ + fltx4 retval = LoadAlignedSIMD( pSIMD ); + return retval; +} + +FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a ) +{ + StoreAlignedSIMD( pSIMD->Base(), a ); +} +#endif +#else + +// for the transitional class -- load a QuaternionAligned +FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD ) +{ + fltx4 retval = XMLoadVector4A( pSIMD.Base() ); + return retval; +} + +FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD ) +{ + fltx4 retval = XMLoadVector4A( pSIMD ); + return retval; +} + +FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a ) +{ + XMStoreVector4A( pSIMD->Base(), a ); +} + +#endif + + +#if ALLOW_SIMD_QUATERNION_MATH +//--------------------------------------------------------------------- +// Make sure quaternions are within 180 degrees of one another, if not, reverse q +//--------------------------------------------------------------------- +FORCEINLINE fltx4 QuaternionAlignSIMD( const fltx4 &p, const fltx4 &q ) +{ + // decide if one of the quaternions is backwards + fltx4 a = SubSIMD( p, q ); + fltx4 b = AddSIMD( p, q ); + a = Dot4SIMD( a, a ); + b = Dot4SIMD( b, b ); + fltx4 cmp = CmpGtSIMD( a, b ); + fltx4 result = MaskedAssign( cmp, NegSIMD(q), q ); + return result; +} + +//--------------------------------------------------------------------- +// Normalize Quaternion +//--------------------------------------------------------------------- +#if USE_STDC_FOR_SIMD + +FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q ) +{ + fltx4 radius, result; + radius = Dot4SIMD( q, q ); + + if ( SubFloat( radius, 0 ) ) // > FLT_EPSILON && ((radius < 1.0f - 4*FLT_EPSILON) || (radius > 1.0f + 4*FLT_EPSILON)) + { + float iradius = 1.0f / sqrt( SubFloat( radius, 0 ) ); + result = ReplicateX4( iradius ); + result = MulSIMD( result, q ); + return result; + } + return q; +} + +#else + +// SSE + X360 implementation +FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q ) +{ + fltx4 radius, result, mask; + radius = Dot4SIMD( q, q ); + mask = CmpEqSIMD( radius, Four_Zeros ); // all ones iff radius = 0 + result = ReciprocalSqrtSIMD( radius ); + result = MulSIMD( result, q ); + return MaskedAssign( mask, q, result ); // if radius was 0, just return q +} + +#endif + + +//--------------------------------------------------------------------- +// 0.0 returns p, 1.0 return q. +//--------------------------------------------------------------------- +FORCEINLINE fltx4 QuaternionBlendNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t ) +{ + fltx4 sclp, sclq, result; + sclq = ReplicateX4( t ); + sclp = SubSIMD( Four_Ones, sclq ); + result = MulSIMD( sclp, p ); + result = MaddSIMD( sclq, q, result ); + return QuaternionNormalizeSIMD( result ); +} + + +//--------------------------------------------------------------------- +// Blend Quaternions +//--------------------------------------------------------------------- +FORCEINLINE fltx4 QuaternionBlendSIMD( const fltx4 &p, const fltx4 &q, float t ) +{ + // decide if one of the quaternions is backwards + fltx4 q2, result; + q2 = QuaternionAlignSIMD( p, q ); + result = QuaternionBlendNoAlignSIMD( p, q2, t ); + return result; +} + + +//--------------------------------------------------------------------- +// Multiply Quaternions +//--------------------------------------------------------------------- +#ifndef _X360 + +// SSE and STDC +FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q ) +{ + // decide if one of the quaternions is backwards + fltx4 q2, result; + q2 = QuaternionAlignSIMD( p, q ); + SubFloat( result, 0 ) = SubFloat( p, 0 ) * SubFloat( q2, 3 ) + SubFloat( p, 1 ) * SubFloat( q2, 2 ) - SubFloat( p, 2 ) * SubFloat( q2, 1 ) + SubFloat( p, 3 ) * SubFloat( q2, 0 ); + SubFloat( result, 1 ) = -SubFloat( p, 0 ) * SubFloat( q2, 2 ) + SubFloat( p, 1 ) * SubFloat( q2, 3 ) + SubFloat( p, 2 ) * SubFloat( q2, 0 ) + SubFloat( p, 3 ) * SubFloat( q2, 1 ); + SubFloat( result, 2 ) = SubFloat( p, 0 ) * SubFloat( q2, 1 ) - SubFloat( p, 1 ) * SubFloat( q2, 0 ) + SubFloat( p, 2 ) * SubFloat( q2, 3 ) + SubFloat( p, 3 ) * SubFloat( q2, 2 ); + SubFloat( result, 3 ) = -SubFloat( p, 0 ) * SubFloat( q2, 0 ) - SubFloat( p, 1 ) * SubFloat( q2, 1 ) - SubFloat( p, 2 ) * SubFloat( q2, 2 ) + SubFloat( p, 3 ) * SubFloat( q2, 3 ); + return result; +} + +#else + +// X360 +extern const fltx4 g_QuatMultRowSign[4]; +FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q ) +{ + fltx4 q2, row, result; + q2 = QuaternionAlignSIMD( p, q ); + + row = XMVectorSwizzle( q2, 3, 2, 1, 0 ); + row = MulSIMD( row, g_QuatMultRowSign[0] ); + result = Dot4SIMD( row, p ); + + row = XMVectorSwizzle( q2, 2, 3, 0, 1 ); + row = MulSIMD( row, g_QuatMultRowSign[1] ); + row = Dot4SIMD( row, p ); + result = __vrlimi( result, row, 4, 0 ); + + row = XMVectorSwizzle( q2, 1, 0, 3, 2 ); + row = MulSIMD( row, g_QuatMultRowSign[2] ); + row = Dot4SIMD( row, p ); + result = __vrlimi( result, row, 2, 0 ); + + row = MulSIMD( q2, g_QuatMultRowSign[3] ); + row = Dot4SIMD( row, p ); + result = __vrlimi( result, row, 1, 0 ); + return result; +} + +#endif + + +//--------------------------------------------------------------------- +// Quaternion scale +//--------------------------------------------------------------------- +#ifndef _X360 + +// SSE and STDC +FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t ) +{ + float r; + fltx4 q; + + // FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to + // use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale. + float sinom = sqrt( SubFloat( p, 0 ) * SubFloat( p, 0 ) + SubFloat( p, 1 ) * SubFloat( p, 1 ) + SubFloat( p, 2 ) * SubFloat( p, 2 ) ); + sinom = min( sinom, 1.f ); + + float sinsom = sin( asin( sinom ) * t ); + + t = sinsom / (sinom + FLT_EPSILON); + SubFloat( q, 0 ) = t * SubFloat( p, 0 ); + SubFloat( q, 1 ) = t * SubFloat( p, 1 ); + SubFloat( q, 2 ) = t * SubFloat( p, 2 ); + + // rescale rotation + r = 1.0f - sinsom * sinsom; + + // Assert( r >= 0 ); + if (r < 0.0f) + r = 0.0f; + r = sqrt( r ); + + // keep sign of rotation + SubFloat( q, 3 ) = fsel( SubFloat( p, 3 ), r, -r ); + return q; +} + +#else + +// X360 +FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t ) +{ + fltx4 sinom = Dot3SIMD( p, p ); + sinom = SqrtSIMD( sinom ); + sinom = MinSIMD( sinom, Four_Ones ); + fltx4 sinsom = ArcSinSIMD( sinom ); + fltx4 t4 = ReplicateX4( t ); + sinsom = MulSIMD( sinsom, t4 ); + sinsom = SinSIMD( sinsom ); + sinom = AddSIMD( sinom, Four_Epsilons ); + sinom = ReciprocalSIMD( sinom ); + t4 = MulSIMD( sinsom, sinom ); + fltx4 result = MulSIMD( p, t4 ); + + // rescale rotation + sinsom = MulSIMD( sinsom, sinsom ); + fltx4 r = SubSIMD( Four_Ones, sinsom ); + r = MaxSIMD( r, Four_Zeros ); + r = SqrtSIMD( r ); + + // keep sign of rotation + fltx4 cmp = CmpGeSIMD( p, Four_Zeros ); + r = MaskedAssign( cmp, r, NegSIMD( r ) ); + + result = __vrlimi(result, r, 1, 0); + return result; +} + +#endif + + +//----------------------------------------------------------------------------- +// Quaternion sphereical linear interpolation +//----------------------------------------------------------------------------- +#ifndef _X360 + +// SSE and STDC +FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t ) +{ + float omega, cosom, sinom, sclp, sclq; + + fltx4 result; + + // 0.0 returns p, 1.0 return q. + cosom = SubFloat( p, 0 ) * SubFloat( q, 0 ) + SubFloat( p, 1 ) * SubFloat( q, 1 ) + + SubFloat( p, 2 ) * SubFloat( q, 2 ) + SubFloat( p, 3 ) * SubFloat( q, 3 ); + + if ( (1.0f + cosom ) > 0.000001f ) + { + if ( (1.0f - cosom ) > 0.000001f ) + { + omega = acos( cosom ); + sinom = sin( omega ); + sclp = sin( (1.0f - t)*omega) / sinom; + sclq = sin( t*omega ) / sinom; + } + else + { + // TODO: add short circuit for cosom == 1.0f? + sclp = 1.0f - t; + sclq = t; + } + SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( q, 0 ); + SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( q, 1 ); + SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( q, 2 ); + SubFloat( result, 3 ) = sclp * SubFloat( p, 3 ) + sclq * SubFloat( q, 3 ); + } + else + { + SubFloat( result, 0 ) = -SubFloat( q, 1 ); + SubFloat( result, 1 ) = SubFloat( q, 0 ); + SubFloat( result, 2 ) = -SubFloat( q, 3 ); + SubFloat( result, 3 ) = SubFloat( q, 2 ); + sclp = sin( (1.0f - t) * (0.5f * M_PI)); + sclq = sin( t * (0.5f * M_PI)); + SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( result, 0 ); + SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( result, 1 ); + SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( result, 2 ); + } + + return result; +} + +#else + +// X360 +FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t ) +{ + return XMQuaternionSlerp( p, q, t ); +} + +#endif + + +FORCEINLINE fltx4 QuaternionSlerpSIMD( const fltx4 &p, const fltx4 &q, float t ) +{ + fltx4 q2, result; + q2 = QuaternionAlignSIMD( p, q ); + result = QuaternionSlerpNoAlignSIMD( p, q2, t ); + return result; +} + + +#endif // ALLOW_SIMD_QUATERNION_MATH + +#endif // SSEQUATMATH_H + |