aboutsummaryrefslogtreecommitdiff
path: root/sp/src/mathlib/3dnow.cpp
diff options
context:
space:
mode:
authorJørgen P. Tjernø <[email protected]>2013-12-02 19:31:46 -0800
committerJørgen P. Tjernø <[email protected]>2013-12-02 19:46:31 -0800
commitf56bb35301836e56582a575a75864392a0177875 (patch)
treede61ddd39de3e7df52759711950b4c288592f0dc /sp/src/mathlib/3dnow.cpp
parentMark some more files as text. (diff)
downloadsource-sdk-2013-f56bb35301836e56582a575a75864392a0177875.tar.xz
source-sdk-2013-f56bb35301836e56582a575a75864392a0177875.zip
Fix line endings. WHAMMY.
Diffstat (limited to 'sp/src/mathlib/3dnow.cpp')
-rw-r--r--sp/src/mathlib/3dnow.cpp394
1 files changed, 197 insertions, 197 deletions
diff --git a/sp/src/mathlib/3dnow.cpp b/sp/src/mathlib/3dnow.cpp
index 71657044..db17c8c1 100644
--- a/sp/src/mathlib/3dnow.cpp
+++ b/sp/src/mathlib/3dnow.cpp
@@ -1,197 +1,197 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose: 3DNow Math primitives.
-//
-//=====================================================================================//
-
-#include <math.h>
-#include <float.h> // Needed for FLT_EPSILON
-#include "basetypes.h"
-#include <memory.h>
-#include "tier0/dbg.h"
-#include "mathlib/mathlib.h"
-#include "mathlib/amd3dx.h"
-#include "mathlib/vector.h"
-
-// memdbgon must be the last include file in a .cpp file!!!
-#include "tier0/memdbgon.h"
-
-#if !defined(COMPILER_MSVC64) && !defined(LINUX)
-// Implement for 64-bit Windows if needed.
-// Clang hits "fatal error: error in backend:" and other errors when trying
-// to compile the inline assembly below. 3DNow support is highly unlikely to
-// be useful/used, so it's not worth spending time on fixing.
-
-#pragma warning(disable:4244) // "conversion from 'const int' to 'float', possible loss of data"
-#pragma warning(disable:4730) // "mixing _m64 and floating point expressions may result in incorrect code"
-
-//-----------------------------------------------------------------------------
-// 3D Now Implementations of optimized routines:
-//-----------------------------------------------------------------------------
-float _3DNow_Sqrt(float x)
-{
- Assert( s_bMathlibInitialized );
- float root = 0.f;
-#ifdef _WIN32
- _asm
- {
- femms
- movd mm0, x
- PFRSQRT (mm1,mm0)
- punpckldq mm0, mm0
- PFMUL (mm0, mm1)
- movd root, mm0
- femms
- }
-#elif LINUX
- __asm __volatile__( "femms" );
- __asm __volatile__
- (
- "pfrsqrt %y0, %y1 \n\t"
- "punpckldq %y1, %y1 \n\t"
- "pfmul %y1, %y0 \n\t"
- : "=y" (root), "=y" (x)
- :"0" (x)
- );
- __asm __volatile__( "femms" );
-#else
-#error
-#endif
-
- return root;
-}
-
-// NJS FIXME: Need to test Recripricol squareroot performance and accuraccy
-// on AMD's before using the specialized instruction.
-float _3DNow_RSqrt(float x)
-{
- Assert( s_bMathlibInitialized );
-
- return 1.f / _3DNow_Sqrt(x);
-}
-
-
-float FASTCALL _3DNow_VectorNormalize (Vector& vec)
-{
- Assert( s_bMathlibInitialized );
- float *v = &vec[0];
- float radius = 0.f;
-
- if ( v[0] || v[1] || v[2] )
- {
-#ifdef _WIN32
- _asm
- {
- mov eax, v
- femms
- movq mm0, QWORD PTR [eax]
- movd mm1, DWORD PTR [eax+8]
- movq mm2, mm0
- movq mm3, mm1
- PFMUL (mm0, mm0)
- PFMUL (mm1, mm1)
- PFACC (mm0, mm0)
- PFADD (mm1, mm0)
- PFRSQRT (mm0, mm1)
- punpckldq mm1, mm1
- PFMUL (mm1, mm0)
- PFMUL (mm2, mm0)
- PFMUL (mm3, mm0)
- movq QWORD PTR [eax], mm2
- movd DWORD PTR [eax+8], mm3
- movd radius, mm1
- femms
- }
-#elif LINUX
- long long a,c;
- int b,d;
- memcpy(&a,&vec[0],sizeof(a));
- memcpy(&b,&vec[2],sizeof(b));
- memcpy(&c,&vec[0],sizeof(c));
- memcpy(&d,&vec[2],sizeof(d));
-
- __asm __volatile__( "femms" );
- __asm __volatile__
- (
- "pfmul %y3, %y3\n\t"
- "pfmul %y0, %y0 \n\t"
- "pfacc %y3, %y3 \n\t"
- "pfadd %y3, %y0 \n\t"
- "pfrsqrt %y0, %y3 \n\t"
- "punpckldq %y0, %y0 \n\t"
- "pfmul %y3, %y0 \n\t"
- "pfmul %y3, %y2 \n\t"
- "pfmul %y3, %y1 \n\t"
- : "=y" (radius), "=y" (c), "=y" (d)
- : "y" (a), "0" (b), "1" (c), "2" (d)
- );
- memcpy(&vec[0],&c,sizeof(c));
- memcpy(&vec[2],&d,sizeof(d));
- __asm __volatile__( "femms" );
-
-#else
-#error
-#endif
- }
- return radius;
-}
-
-
-void FASTCALL _3DNow_VectorNormalizeFast (Vector& vec)
-{
- _3DNow_VectorNormalize( vec );
-}
-
-
-// JAY: This complains with the latest processor pack
-#pragma warning(disable: 4730)
-
-float _3DNow_InvRSquared(const float* v)
-{
- Assert( s_bMathlibInitialized );
- float r2 = 1.f;
-#ifdef _WIN32
- _asm { // AMD 3DNow only routine
- mov eax, v
- femms
- movq mm0, QWORD PTR [eax]
- movd mm1, DWORD PTR [eax+8]
- movd mm2, [r2]
- PFMUL (mm0, mm0)
- PFMUL (mm1, mm1)
- PFACC (mm0, mm0)
- PFADD (mm1, mm0)
- PFMAX (mm1, mm2)
- PFRCP (mm0, mm1)
- movd [r2], mm0
- femms
- }
-#elif LINUX
- long long a,c;
- int b;
- memcpy(&a,&v[0],sizeof(a));
- memcpy(&b,&v[2],sizeof(b));
- memcpy(&c,&v[0],sizeof(c));
-
- __asm __volatile__( "femms" );
- __asm __volatile__
- (
- "PFMUL %y2, %y2 \n\t"
- "PFMUL %y3, %y3 \n\t"
- "PFACC %y2, %y2 \n\t"
- "PFADD %y2, %y3 \n\t"
- "PFMAX %y3, %y4 \n\t"
- "PFRCP %y3, %y2 \n\t"
- "movq %y2, %y0 \n\t"
- : "=y" (r2)
- : "0" (r2), "y" (a), "y" (b), "y" (c)
- );
- __asm __volatile__( "femms" );
-#else
-#error
-#endif
-
- return r2;
-}
-
-#endif // COMPILER_MSVC64
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose: 3DNow Math primitives.
+//
+//=====================================================================================//
+
+#include <math.h>
+#include <float.h> // Needed for FLT_EPSILON
+#include "basetypes.h"
+#include <memory.h>
+#include "tier0/dbg.h"
+#include "mathlib/mathlib.h"
+#include "mathlib/amd3dx.h"
+#include "mathlib/vector.h"
+
+// memdbgon must be the last include file in a .cpp file!!!
+#include "tier0/memdbgon.h"
+
+#if !defined(COMPILER_MSVC64) && !defined(LINUX)
+// Implement for 64-bit Windows if needed.
+// Clang hits "fatal error: error in backend:" and other errors when trying
+// to compile the inline assembly below. 3DNow support is highly unlikely to
+// be useful/used, so it's not worth spending time on fixing.
+
+#pragma warning(disable:4244) // "conversion from 'const int' to 'float', possible loss of data"
+#pragma warning(disable:4730) // "mixing _m64 and floating point expressions may result in incorrect code"
+
+//-----------------------------------------------------------------------------
+// 3D Now Implementations of optimized routines:
+//-----------------------------------------------------------------------------
+float _3DNow_Sqrt(float x)
+{
+ Assert( s_bMathlibInitialized );
+ float root = 0.f;
+#ifdef _WIN32
+ _asm
+ {
+ femms
+ movd mm0, x
+ PFRSQRT (mm1,mm0)
+ punpckldq mm0, mm0
+ PFMUL (mm0, mm1)
+ movd root, mm0
+ femms
+ }
+#elif LINUX
+ __asm __volatile__( "femms" );
+ __asm __volatile__
+ (
+ "pfrsqrt %y0, %y1 \n\t"
+ "punpckldq %y1, %y1 \n\t"
+ "pfmul %y1, %y0 \n\t"
+ : "=y" (root), "=y" (x)
+ :"0" (x)
+ );
+ __asm __volatile__( "femms" );
+#else
+#error
+#endif
+
+ return root;
+}
+
+// NJS FIXME: Need to test Recripricol squareroot performance and accuraccy
+// on AMD's before using the specialized instruction.
+float _3DNow_RSqrt(float x)
+{
+ Assert( s_bMathlibInitialized );
+
+ return 1.f / _3DNow_Sqrt(x);
+}
+
+
+float FASTCALL _3DNow_VectorNormalize (Vector& vec)
+{
+ Assert( s_bMathlibInitialized );
+ float *v = &vec[0];
+ float radius = 0.f;
+
+ if ( v[0] || v[1] || v[2] )
+ {
+#ifdef _WIN32
+ _asm
+ {
+ mov eax, v
+ femms
+ movq mm0, QWORD PTR [eax]
+ movd mm1, DWORD PTR [eax+8]
+ movq mm2, mm0
+ movq mm3, mm1
+ PFMUL (mm0, mm0)
+ PFMUL (mm1, mm1)
+ PFACC (mm0, mm0)
+ PFADD (mm1, mm0)
+ PFRSQRT (mm0, mm1)
+ punpckldq mm1, mm1
+ PFMUL (mm1, mm0)
+ PFMUL (mm2, mm0)
+ PFMUL (mm3, mm0)
+ movq QWORD PTR [eax], mm2
+ movd DWORD PTR [eax+8], mm3
+ movd radius, mm1
+ femms
+ }
+#elif LINUX
+ long long a,c;
+ int b,d;
+ memcpy(&a,&vec[0],sizeof(a));
+ memcpy(&b,&vec[2],sizeof(b));
+ memcpy(&c,&vec[0],sizeof(c));
+ memcpy(&d,&vec[2],sizeof(d));
+
+ __asm __volatile__( "femms" );
+ __asm __volatile__
+ (
+ "pfmul %y3, %y3\n\t"
+ "pfmul %y0, %y0 \n\t"
+ "pfacc %y3, %y3 \n\t"
+ "pfadd %y3, %y0 \n\t"
+ "pfrsqrt %y0, %y3 \n\t"
+ "punpckldq %y0, %y0 \n\t"
+ "pfmul %y3, %y0 \n\t"
+ "pfmul %y3, %y2 \n\t"
+ "pfmul %y3, %y1 \n\t"
+ : "=y" (radius), "=y" (c), "=y" (d)
+ : "y" (a), "0" (b), "1" (c), "2" (d)
+ );
+ memcpy(&vec[0],&c,sizeof(c));
+ memcpy(&vec[2],&d,sizeof(d));
+ __asm __volatile__( "femms" );
+
+#else
+#error
+#endif
+ }
+ return radius;
+}
+
+
+void FASTCALL _3DNow_VectorNormalizeFast (Vector& vec)
+{
+ _3DNow_VectorNormalize( vec );
+}
+
+
+// JAY: This complains with the latest processor pack
+#pragma warning(disable: 4730)
+
+float _3DNow_InvRSquared(const float* v)
+{
+ Assert( s_bMathlibInitialized );
+ float r2 = 1.f;
+#ifdef _WIN32
+ _asm { // AMD 3DNow only routine
+ mov eax, v
+ femms
+ movq mm0, QWORD PTR [eax]
+ movd mm1, DWORD PTR [eax+8]
+ movd mm2, [r2]
+ PFMUL (mm0, mm0)
+ PFMUL (mm1, mm1)
+ PFACC (mm0, mm0)
+ PFADD (mm1, mm0)
+ PFMAX (mm1, mm2)
+ PFRCP (mm0, mm1)
+ movd [r2], mm0
+ femms
+ }
+#elif LINUX
+ long long a,c;
+ int b;
+ memcpy(&a,&v[0],sizeof(a));
+ memcpy(&b,&v[2],sizeof(b));
+ memcpy(&c,&v[0],sizeof(c));
+
+ __asm __volatile__( "femms" );
+ __asm __volatile__
+ (
+ "PFMUL %y2, %y2 \n\t"
+ "PFMUL %y3, %y3 \n\t"
+ "PFACC %y2, %y2 \n\t"
+ "PFADD %y2, %y3 \n\t"
+ "PFMAX %y3, %y4 \n\t"
+ "PFRCP %y3, %y2 \n\t"
+ "movq %y2, %y0 \n\t"
+ : "=y" (r2)
+ : "0" (r2), "y" (a), "y" (b), "y" (c)
+ );
+ __asm __volatile__( "femms" );
+#else
+#error
+#endif
+
+ return r2;
+}
+
+#endif // COMPILER_MSVC64