diff options
| author | Jørgen P. Tjernø <[email protected]> | 2013-12-02 19:31:46 -0800 |
|---|---|---|
| committer | Jørgen P. Tjernø <[email protected]> | 2013-12-02 19:46:31 -0800 |
| commit | f56bb35301836e56582a575a75864392a0177875 (patch) | |
| tree | de61ddd39de3e7df52759711950b4c288592f0dc /sp/src/mathlib/3dnow.cpp | |
| parent | Mark some more files as text. (diff) | |
| download | source-sdk-2013-f56bb35301836e56582a575a75864392a0177875.tar.xz source-sdk-2013-f56bb35301836e56582a575a75864392a0177875.zip | |
Fix line endings. WHAMMY.
Diffstat (limited to 'sp/src/mathlib/3dnow.cpp')
| -rw-r--r-- | sp/src/mathlib/3dnow.cpp | 394 |
1 files changed, 197 insertions, 197 deletions
diff --git a/sp/src/mathlib/3dnow.cpp b/sp/src/mathlib/3dnow.cpp index 71657044..db17c8c1 100644 --- a/sp/src/mathlib/3dnow.cpp +++ b/sp/src/mathlib/3dnow.cpp @@ -1,197 +1,197 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose: 3DNow Math primitives.
-//
-//=====================================================================================//
-
-#include <math.h>
-#include <float.h> // Needed for FLT_EPSILON
-#include "basetypes.h"
-#include <memory.h>
-#include "tier0/dbg.h"
-#include "mathlib/mathlib.h"
-#include "mathlib/amd3dx.h"
-#include "mathlib/vector.h"
-
-// memdbgon must be the last include file in a .cpp file!!!
-#include "tier0/memdbgon.h"
-
-#if !defined(COMPILER_MSVC64) && !defined(LINUX)
-// Implement for 64-bit Windows if needed.
-// Clang hits "fatal error: error in backend:" and other errors when trying
-// to compile the inline assembly below. 3DNow support is highly unlikely to
-// be useful/used, so it's not worth spending time on fixing.
-
-#pragma warning(disable:4244) // "conversion from 'const int' to 'float', possible loss of data"
-#pragma warning(disable:4730) // "mixing _m64 and floating point expressions may result in incorrect code"
-
-//-----------------------------------------------------------------------------
-// 3D Now Implementations of optimized routines:
-//-----------------------------------------------------------------------------
-float _3DNow_Sqrt(float x)
-{
- Assert( s_bMathlibInitialized );
- float root = 0.f;
-#ifdef _WIN32
- _asm
- {
- femms
- movd mm0, x
- PFRSQRT (mm1,mm0)
- punpckldq mm0, mm0
- PFMUL (mm0, mm1)
- movd root, mm0
- femms
- }
-#elif LINUX
- __asm __volatile__( "femms" );
- __asm __volatile__
- (
- "pfrsqrt %y0, %y1 \n\t"
- "punpckldq %y1, %y1 \n\t"
- "pfmul %y1, %y0 \n\t"
- : "=y" (root), "=y" (x)
- :"0" (x)
- );
- __asm __volatile__( "femms" );
-#else
-#error
-#endif
-
- return root;
-}
-
-// NJS FIXME: Need to test Recripricol squareroot performance and accuraccy
-// on AMD's before using the specialized instruction.
-float _3DNow_RSqrt(float x)
-{
- Assert( s_bMathlibInitialized );
-
- return 1.f / _3DNow_Sqrt(x);
-}
-
-
-float FASTCALL _3DNow_VectorNormalize (Vector& vec)
-{
- Assert( s_bMathlibInitialized );
- float *v = &vec[0];
- float radius = 0.f;
-
- if ( v[0] || v[1] || v[2] )
- {
-#ifdef _WIN32
- _asm
- {
- mov eax, v
- femms
- movq mm0, QWORD PTR [eax]
- movd mm1, DWORD PTR [eax+8]
- movq mm2, mm0
- movq mm3, mm1
- PFMUL (mm0, mm0)
- PFMUL (mm1, mm1)
- PFACC (mm0, mm0)
- PFADD (mm1, mm0)
- PFRSQRT (mm0, mm1)
- punpckldq mm1, mm1
- PFMUL (mm1, mm0)
- PFMUL (mm2, mm0)
- PFMUL (mm3, mm0)
- movq QWORD PTR [eax], mm2
- movd DWORD PTR [eax+8], mm3
- movd radius, mm1
- femms
- }
-#elif LINUX
- long long a,c;
- int b,d;
- memcpy(&a,&vec[0],sizeof(a));
- memcpy(&b,&vec[2],sizeof(b));
- memcpy(&c,&vec[0],sizeof(c));
- memcpy(&d,&vec[2],sizeof(d));
-
- __asm __volatile__( "femms" );
- __asm __volatile__
- (
- "pfmul %y3, %y3\n\t"
- "pfmul %y0, %y0 \n\t"
- "pfacc %y3, %y3 \n\t"
- "pfadd %y3, %y0 \n\t"
- "pfrsqrt %y0, %y3 \n\t"
- "punpckldq %y0, %y0 \n\t"
- "pfmul %y3, %y0 \n\t"
- "pfmul %y3, %y2 \n\t"
- "pfmul %y3, %y1 \n\t"
- : "=y" (radius), "=y" (c), "=y" (d)
- : "y" (a), "0" (b), "1" (c), "2" (d)
- );
- memcpy(&vec[0],&c,sizeof(c));
- memcpy(&vec[2],&d,sizeof(d));
- __asm __volatile__( "femms" );
-
-#else
-#error
-#endif
- }
- return radius;
-}
-
-
-void FASTCALL _3DNow_VectorNormalizeFast (Vector& vec)
-{
- _3DNow_VectorNormalize( vec );
-}
-
-
-// JAY: This complains with the latest processor pack
-#pragma warning(disable: 4730)
-
-float _3DNow_InvRSquared(const float* v)
-{
- Assert( s_bMathlibInitialized );
- float r2 = 1.f;
-#ifdef _WIN32
- _asm { // AMD 3DNow only routine
- mov eax, v
- femms
- movq mm0, QWORD PTR [eax]
- movd mm1, DWORD PTR [eax+8]
- movd mm2, [r2]
- PFMUL (mm0, mm0)
- PFMUL (mm1, mm1)
- PFACC (mm0, mm0)
- PFADD (mm1, mm0)
- PFMAX (mm1, mm2)
- PFRCP (mm0, mm1)
- movd [r2], mm0
- femms
- }
-#elif LINUX
- long long a,c;
- int b;
- memcpy(&a,&v[0],sizeof(a));
- memcpy(&b,&v[2],sizeof(b));
- memcpy(&c,&v[0],sizeof(c));
-
- __asm __volatile__( "femms" );
- __asm __volatile__
- (
- "PFMUL %y2, %y2 \n\t"
- "PFMUL %y3, %y3 \n\t"
- "PFACC %y2, %y2 \n\t"
- "PFADD %y2, %y3 \n\t"
- "PFMAX %y3, %y4 \n\t"
- "PFRCP %y3, %y2 \n\t"
- "movq %y2, %y0 \n\t"
- : "=y" (r2)
- : "0" (r2), "y" (a), "y" (b), "y" (c)
- );
- __asm __volatile__( "femms" );
-#else
-#error
-#endif
-
- return r2;
-}
-
-#endif // COMPILER_MSVC64
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: 3DNow Math primitives. +// +//=====================================================================================// + +#include <math.h> +#include <float.h> // Needed for FLT_EPSILON +#include "basetypes.h" +#include <memory.h> +#include "tier0/dbg.h" +#include "mathlib/mathlib.h" +#include "mathlib/amd3dx.h" +#include "mathlib/vector.h" + +// memdbgon must be the last include file in a .cpp file!!! +#include "tier0/memdbgon.h" + +#if !defined(COMPILER_MSVC64) && !defined(LINUX) +// Implement for 64-bit Windows if needed. +// Clang hits "fatal error: error in backend:" and other errors when trying +// to compile the inline assembly below. 3DNow support is highly unlikely to +// be useful/used, so it's not worth spending time on fixing. + +#pragma warning(disable:4244) // "conversion from 'const int' to 'float', possible loss of data" +#pragma warning(disable:4730) // "mixing _m64 and floating point expressions may result in incorrect code" + +//----------------------------------------------------------------------------- +// 3D Now Implementations of optimized routines: +//----------------------------------------------------------------------------- +float _3DNow_Sqrt(float x) +{ + Assert( s_bMathlibInitialized ); + float root = 0.f; +#ifdef _WIN32 + _asm + { + femms + movd mm0, x + PFRSQRT (mm1,mm0) + punpckldq mm0, mm0 + PFMUL (mm0, mm1) + movd root, mm0 + femms + } +#elif LINUX + __asm __volatile__( "femms" ); + __asm __volatile__ + ( + "pfrsqrt %y0, %y1 \n\t" + "punpckldq %y1, %y1 \n\t" + "pfmul %y1, %y0 \n\t" + : "=y" (root), "=y" (x) + :"0" (x) + ); + __asm __volatile__( "femms" ); +#else +#error +#endif + + return root; +} + +// NJS FIXME: Need to test Recripricol squareroot performance and accuraccy +// on AMD's before using the specialized instruction. +float _3DNow_RSqrt(float x) +{ + Assert( s_bMathlibInitialized ); + + return 1.f / _3DNow_Sqrt(x); +} + + +float FASTCALL _3DNow_VectorNormalize (Vector& vec) +{ + Assert( s_bMathlibInitialized ); + float *v = &vec[0]; + float radius = 0.f; + + if ( v[0] || v[1] || v[2] ) + { +#ifdef _WIN32 + _asm + { + mov eax, v + femms + movq mm0, QWORD PTR [eax] + movd mm1, DWORD PTR [eax+8] + movq mm2, mm0 + movq mm3, mm1 + PFMUL (mm0, mm0) + PFMUL (mm1, mm1) + PFACC (mm0, mm0) + PFADD (mm1, mm0) + PFRSQRT (mm0, mm1) + punpckldq mm1, mm1 + PFMUL (mm1, mm0) + PFMUL (mm2, mm0) + PFMUL (mm3, mm0) + movq QWORD PTR [eax], mm2 + movd DWORD PTR [eax+8], mm3 + movd radius, mm1 + femms + } +#elif LINUX + long long a,c; + int b,d; + memcpy(&a,&vec[0],sizeof(a)); + memcpy(&b,&vec[2],sizeof(b)); + memcpy(&c,&vec[0],sizeof(c)); + memcpy(&d,&vec[2],sizeof(d)); + + __asm __volatile__( "femms" ); + __asm __volatile__ + ( + "pfmul %y3, %y3\n\t" + "pfmul %y0, %y0 \n\t" + "pfacc %y3, %y3 \n\t" + "pfadd %y3, %y0 \n\t" + "pfrsqrt %y0, %y3 \n\t" + "punpckldq %y0, %y0 \n\t" + "pfmul %y3, %y0 \n\t" + "pfmul %y3, %y2 \n\t" + "pfmul %y3, %y1 \n\t" + : "=y" (radius), "=y" (c), "=y" (d) + : "y" (a), "0" (b), "1" (c), "2" (d) + ); + memcpy(&vec[0],&c,sizeof(c)); + memcpy(&vec[2],&d,sizeof(d)); + __asm __volatile__( "femms" ); + +#else +#error +#endif + } + return radius; +} + + +void FASTCALL _3DNow_VectorNormalizeFast (Vector& vec) +{ + _3DNow_VectorNormalize( vec ); +} + + +// JAY: This complains with the latest processor pack +#pragma warning(disable: 4730) + +float _3DNow_InvRSquared(const float* v) +{ + Assert( s_bMathlibInitialized ); + float r2 = 1.f; +#ifdef _WIN32 + _asm { // AMD 3DNow only routine + mov eax, v + femms + movq mm0, QWORD PTR [eax] + movd mm1, DWORD PTR [eax+8] + movd mm2, [r2] + PFMUL (mm0, mm0) + PFMUL (mm1, mm1) + PFACC (mm0, mm0) + PFADD (mm1, mm0) + PFMAX (mm1, mm2) + PFRCP (mm0, mm1) + movd [r2], mm0 + femms + } +#elif LINUX + long long a,c; + int b; + memcpy(&a,&v[0],sizeof(a)); + memcpy(&b,&v[2],sizeof(b)); + memcpy(&c,&v[0],sizeof(c)); + + __asm __volatile__( "femms" ); + __asm __volatile__ + ( + "PFMUL %y2, %y2 \n\t" + "PFMUL %y3, %y3 \n\t" + "PFACC %y2, %y2 \n\t" + "PFADD %y2, %y3 \n\t" + "PFMAX %y3, %y4 \n\t" + "PFRCP %y3, %y2 \n\t" + "movq %y2, %y0 \n\t" + : "=y" (r2) + : "0" (r2), "y" (a), "y" (b), "y" (c) + ); + __asm __volatile__( "femms" ); +#else +#error +#endif + + return r2; +} + +#endif // COMPILER_MSVC64 |