aboutsummaryrefslogtreecommitdiff
path: root/mp/src/public/mathlib
diff options
context:
space:
mode:
authorJørgen P. Tjernø <[email protected]>2013-12-02 19:31:46 -0800
committerJørgen P. Tjernø <[email protected]>2013-12-02 19:46:31 -0800
commitf56bb35301836e56582a575a75864392a0177875 (patch)
treede61ddd39de3e7df52759711950b4c288592f0dc /mp/src/public/mathlib
parentMark some more files as text. (diff)
downloadsource-sdk-2013-f56bb35301836e56582a575a75864392a0177875.tar.xz
source-sdk-2013-f56bb35301836e56582a575a75864392a0177875.zip
Fix line endings. WHAMMY.
Diffstat (limited to 'mp/src/public/mathlib')
-rw-r--r--mp/src/public/mathlib/amd3dx.h2376
-rw-r--r--mp/src/public/mathlib/anorms.h50
-rw-r--r--mp/src/public/mathlib/bumpvects.h74
-rw-r--r--mp/src/public/mathlib/compressed_3d_unitvec.h568
-rw-r--r--mp/src/public/mathlib/compressed_light_cube.h48
-rw-r--r--mp/src/public/mathlib/compressed_vector.h1216
-rw-r--r--mp/src/public/mathlib/halton.h142
-rw-r--r--mp/src/public/mathlib/lightdesc.h346
-rw-r--r--mp/src/public/mathlib/math_pfns.h160
-rw-r--r--mp/src/public/mathlib/mathlib.h4372
-rw-r--r--mp/src/public/mathlib/matrixmath.h770
-rw-r--r--mp/src/public/mathlib/noise.h70
-rw-r--r--mp/src/public/mathlib/polyhedron.h146
-rw-r--r--mp/src/public/mathlib/quantize.h282
-rw-r--r--mp/src/public/mathlib/simdvectormatrix.h284
-rw-r--r--mp/src/public/mathlib/spherical_geometry.h146
-rw-r--r--mp/src/public/mathlib/ssemath.h6196
-rw-r--r--mp/src/public/mathlib/ssequaternion.h734
-rw-r--r--mp/src/public/mathlib/vector.h4624
-rw-r--r--mp/src/public/mathlib/vector2d.h1340
-rw-r--r--mp/src/public/mathlib/vector4d.h1372
-rw-r--r--mp/src/public/mathlib/vmatrix.h1900
-rw-r--r--mp/src/public/mathlib/vplane.h364
23 files changed, 13790 insertions, 13790 deletions
diff --git a/mp/src/public/mathlib/amd3dx.h b/mp/src/public/mathlib/amd3dx.h
index 05eb663e..9dab1bfd 100644
--- a/mp/src/public/mathlib/amd3dx.h
+++ b/mp/src/public/mathlib/amd3dx.h
@@ -1,1188 +1,1188 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-/******************************************************************************
-
- Copyright (c) 1999 Advanced Micro Devices, Inc.
-
- LIMITATION OF LIABILITY: THE MATERIALS ARE PROVIDED *AS IS* WITHOUT ANY
- EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING WARRANTIES OF MERCHANTABILITY,
- NONINFRINGEMENT OF THIRD-PARTY INTELLECTUAL PROPERTY, OR FITNESS FOR ANY
- PARTICULAR PURPOSE. IN NO EVENT SHALL AMD OR ITS SUPPLIERS BE LIABLE FOR ANY
- DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS,
- BUSINESS INTERRUPTION, LOSS OF INFORMATION) ARISING OUT OF THE USE OF OR
- INABILITY TO USE THE MATERIALS, EVEN IF AMD HAS BEEN ADVISED OF THE POSSIBILITY
- OF SUCH DAMAGES. BECAUSE SOME JURISDICTIONS PROHIBIT THE EXCLUSION OR LIMITATION
- OF LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE LIMITATION MAY
- NOT APPLY TO YOU.
-
- AMD does not assume any responsibility for any errors which may appear in the
- Materials nor any responsibility to support or update the Materials. AMD retains
- the right to make changes to its test specifications at any time, without notice.
-
- NO SUPPORT OBLIGATION: AMD is not obligated to furnish, support, or make any
- further information, software, technical information, know-how, or show-how
- available to you.
-
- So that all may benefit from your experience, please report any problems
- or suggestions about this software to [email protected]
-
- AMD Developer Technologies, M/S 585
- Advanced Micro Devices, Inc.
- 5900 E. Ben White Blvd.
- Austin, TX 78741
-
-*******************************************************************************
-
- AMD3DX.H
-
- MACRO FORMAT
- ============
- This file contains inline assembly macros that
- generate AMD-3D instructions in binary format.
- Therefore, C or C++ programmer can use AMD-3D instructions
- without any penalty in their C or C++ source code.
-
- The macro's name and format conventions are as follow:
-
-
- 1. First argument of macro is a destination and
- second argument is a source operand.
- ex) _asm PFCMPEQ (mm3, mm4)
- | |
- dst src
-
- 2. The destination operand can be m0 to m7 only.
- The source operand can be any one of the register
- m0 to m7 or _eax, _ecx, _edx, _ebx, _esi, or _edi
- that contains effective address.
- ex) _asm PFRCP (MM7, MM6)
- ex) _asm PFRCPIT2 (mm0, mm4)
- ex) _asm PFMUL (mm3, _edi)
-
- 3. The prefetch(w) takes one src operand _eax, ecx, _edx,
- _ebx, _esi, or _edi that contains effective address.
- ex) _asm PREFETCH (_edi)
-
- For WATCOM C/C++ users, when using #pragma aux instead if
- _asm, all macro names should be prefixed by a p_ or P_.
- Macros should not be enclosed in quotes.
- ex) p_pfrcp (MM7,MM6)
-
- NOTE: Not all instruction macros, nor all possible
- combinations of operands have been explicitely
- tested. If any errors are found, please report
- them.
-
- EXAMPLE
- =======
- Following program doesn't do anything but it shows you
- how to use inline assembly AMD-3D instructions in C.
- Note that this will only work in flat memory model which
- segment registers cs, ds, ss and es point to the same
- linear address space total less than 4GB.
-
- Used Microsoft VC++ 5.0
-
- #include <stdio.h>
- #include "amd3d.h"
-
- void main ()
- {
- float x = (float)1.25;
- float y = (float)1.25;
- float z, zz;
-
- _asm {
- movd mm1, x
- movd mm2, y
- pfmul (mm1, mm2)
- movd z, mm1
- femms
- }
-
- printf ("value of z = %f\n", z);
-
- //
- // Demonstration of using the memory instead of
- // multimedia register
- //
- _asm {
- movd mm3, x
- lea esi, y // load effective address of y
- pfmul (mm3, _esi)
- movd zz, mm3
- femms
- }
-
- printf ("value of zz = %f\n", zz);
- }
-
- #pragma aux EXAMPLE with WATCOM C/C++ v11.x
- ===========================================
-
- extern void Add(float *__Dest, float *__A, float *__B);
- #pragma aux Add = \
- p_femms \
- "movd mm6,[esi]" \
- p_pfadd(mm6,_edi) \
- "movd [ebx],mm6" \
- p_femms \
- parm [ebx] [esi] [edi];
-
-*******************************************************************************/
-
-#ifndef _K3DMACROSINCLUDED_
-#define _K3DMACROSINCLUDED_
-
-#if defined (__WATCOMC__)
-
-// The WATCOM C/C++ version of the 3DNow! macros.
-//
-// The older, compbined register style for WATCOM C/C++ macros is not
-// supported.
-
-/* Operand defines for instructions two operands */
-#define _k3d_mm0_mm0 0xc0
-#define _k3d_mm0_mm1 0xc1
-#define _k3d_mm0_mm2 0xc2
-#define _k3d_mm0_mm3 0xc3
-#define _k3d_mm0_mm4 0xc4
-#define _k3d_mm0_mm5 0xc5
-#define _k3d_mm0_mm6 0xc6
-#define _k3d_mm0_mm7 0xc7
-#define _k3d_mm0_eax 0x00
-#define _k3d_mm0_ecx 0x01
-#define _k3d_mm0_edx 0x02
-#define _k3d_mm0_ebx 0x03
-#define _k3d_mm0_esi 0x06
-#define _k3d_mm0_edi 0x07
-#define _k3d_mm1_mm0 0xc8
-#define _k3d_mm1_mm1 0xc9
-#define _k3d_mm1_mm2 0xca
-#define _k3d_mm1_mm3 0xcb
-#define _k3d_mm1_mm4 0xcc
-#define _k3d_mm1_mm5 0xcd
-#define _k3d_mm1_mm6 0xce
-#define _k3d_mm1_mm7 0xcf
-#define _k3d_mm1_eax 0x08
-#define _k3d_mm1_ecx 0x09
-#define _k3d_mm1_edx 0x0a
-#define _k3d_mm1_ebx 0x0b
-#define _k3d_mm1_esi 0x0e
-#define _k3d_mm1_edi 0x0f
-#define _k3d_mm2_mm0 0xd0
-#define _k3d_mm2_mm1 0xd1
-#define _k3d_mm2_mm2 0xd2
-#define _k3d_mm2_mm3 0xd3
-#define _k3d_mm2_mm4 0xd4
-#define _k3d_mm2_mm5 0xd5
-#define _k3d_mm2_mm6 0xd6
-#define _k3d_mm2_mm7 0xd7
-#define _k3d_mm2_eax 0x10
-#define _k3d_mm2_ecx 0x11
-#define _k3d_mm2_edx 0x12
-#define _k3d_mm2_ebx 0x13
-#define _k3d_mm2_esi 0x16
-#define _k3d_mm2_edi 0x17
-#define _k3d_mm3_mm0 0xd8
-#define _k3d_mm3_mm1 0xd9
-#define _k3d_mm3_mm2 0xda
-#define _k3d_mm3_mm3 0xdb
-#define _k3d_mm3_mm4 0xdc
-#define _k3d_mm3_mm5 0xdd
-#define _k3d_mm3_mm6 0xde
-#define _k3d_mm3_mm7 0xdf
-#define _k3d_mm3_eax 0x18
-#define _k3d_mm3_ecx 0x19
-#define _k3d_mm3_edx 0x1a
-#define _k3d_mm3_ebx 0x1b
-#define _k3d_mm3_esi 0x1e
-#define _k3d_mm3_edi 0x1f
-#define _k3d_mm4_mm0 0xe0
-#define _k3d_mm4_mm1 0xe1
-#define _k3d_mm4_mm2 0xe2
-#define _k3d_mm4_mm3 0xe3
-#define _k3d_mm4_mm4 0xe4
-#define _k3d_mm4_mm5 0xe5
-#define _k3d_mm4_mm6 0xe6
-#define _k3d_mm4_mm7 0xe7
-#define _k3d_mm4_eax 0x20
-#define _k3d_mm4_ecx 0x21
-#define _k3d_mm4_edx 0x22
-#define _k3d_mm4_ebx 0x23
-#define _k3d_mm4_esi 0x26
-#define _k3d_mm4_edi 0x27
-#define _k3d_mm5_mm0 0xe8
-#define _k3d_mm5_mm1 0xe9
-#define _k3d_mm5_mm2 0xea
-#define _k3d_mm5_mm3 0xeb
-#define _k3d_mm5_mm4 0xec
-#define _k3d_mm5_mm5 0xed
-#define _k3d_mm5_mm6 0xee
-#define _k3d_mm5_mm7 0xef
-#define _k3d_mm5_eax 0x28
-#define _k3d_mm5_ecx 0x29
-#define _k3d_mm5_edx 0x2a
-#define _k3d_mm5_ebx 0x2b
-#define _k3d_mm5_esi 0x2e
-#define _k3d_mm5_edi 0x2f
-#define _k3d_mm6_mm0 0xf0
-#define _k3d_mm6_mm1 0xf1
-#define _k3d_mm6_mm2 0xf2
-#define _k3d_mm6_mm3 0xf3
-#define _k3d_mm6_mm4 0xf4
-#define _k3d_mm6_mm5 0xf5
-#define _k3d_mm6_mm6 0xf6
-#define _k3d_mm6_mm7 0xf7
-#define _k3d_mm6_eax 0x30
-#define _k3d_mm6_ecx 0x31
-#define _k3d_mm6_edx 0x32
-#define _k3d_mm6_ebx 0x33
-#define _k3d_mm6_esi 0x36
-#define _k3d_mm6_edi 0x37
-#define _k3d_mm7_mm0 0xf8
-#define _k3d_mm7_mm1 0xf9
-#define _k3d_mm7_mm2 0xfa
-#define _k3d_mm7_mm3 0xfb
-#define _k3d_mm7_mm4 0xfc
-#define _k3d_mm7_mm5 0xfd
-#define _k3d_mm7_mm6 0xfe
-#define _k3d_mm7_mm7 0xff
-#define _k3d_mm7_eax 0x38
-#define _k3d_mm7_ecx 0x39
-#define _k3d_mm7_edx 0x3a
-#define _k3d_mm7_ebx 0x3b
-#define _k3d_mm7_esi 0x3e
-#define _k3d_mm7_edi 0x3f
-
-#define _k3d_name_xlat_m0 _mm0
-#define _k3d_name_xlat_m1 _mm1
-#define _k3d_name_xlat_m2 _mm2
-#define _k3d_name_xlat_m3 _mm3
-#define _k3d_name_xlat_m4 _mm4
-#define _k3d_name_xlat_m5 _mm5
-#define _k3d_name_xlat_m6 _mm6
-#define _k3d_name_xlat_m7 _mm7
-#define _k3d_name_xlat_M0 _mm0
-#define _k3d_name_xlat_M1 _mm1
-#define _k3d_name_xlat_M2 _mm2
-#define _k3d_name_xlat_M3 _mm3
-#define _k3d_name_xlat_M4 _mm4
-#define _k3d_name_xlat_M5 _mm5
-#define _k3d_name_xlat_M6 _mm6
-#define _k3d_name_xlat_M7 _mm7
-#define _k3d_name_xlat_mm0 _mm0
-#define _k3d_name_xlat_mm1 _mm1
-#define _k3d_name_xlat_mm2 _mm2
-#define _k3d_name_xlat_mm3 _mm3
-#define _k3d_name_xlat_mm4 _mm4
-#define _k3d_name_xlat_mm5 _mm5
-#define _k3d_name_xlat_mm6 _mm6
-#define _k3d_name_xlat_mm7 _mm7
-#define _k3d_name_xlat_MM0 _mm0
-#define _k3d_name_xlat_MM1 _mm1
-#define _k3d_name_xlat_MM2 _mm2
-#define _k3d_name_xlat_MM3 _mm3
-#define _k3d_name_xlat_MM4 _mm4
-#define _k3d_name_xlat_MM5 _mm5
-#define _k3d_name_xlat_MM6 _mm6
-#define _k3d_name_xlat_MM7 _mm7
-#define _k3d_name_xlat_eax _eax
-#define _k3d_name_xlat_ebx _ebx
-#define _k3d_name_xlat_ecx _ecx
-#define _k3d_name_xlat_edx _edx
-#define _k3d_name_xlat_esi _esi
-#define _k3d_name_xlat_edi _edi
-#define _k3d_name_xlat_ebp _ebp
-#define _k3d_name_xlat_EAX _eax
-#define _k3d_name_xlat_EBX _ebx
-#define _k3d_name_xlat_ECX _ecx
-#define _k3d_name_xlat_EDX _edx
-#define _k3d_name_xlat_ESI _esi
-#define _k3d_name_xlat_EDI _edi
-#define _k3d_name_xlat_EBP _ebp
-#define _k3d_name_xlat__eax _eax
-#define _k3d_name_xlat__ebx _ebx
-#define _k3d_name_xlat__ecx _ecx
-#define _k3d_name_xlat__edx _edx
-#define _k3d_name_xlat__esi _esi
-#define _k3d_name_xlat__edi _edi
-#define _k3d_name_xlat__ebp _ebp
-#define _k3d_name_xlat__EAX _eax
-#define _k3d_name_xlat__EBX _ebx
-#define _k3d_name_xlat__ECX _ecx
-#define _k3d_name_xlat__EDX _edx
-#define _k3d_name_xlat__ESI _esi
-#define _k3d_name_xlat__EDI _edi
-#define _k3d_name_xlat__EBP _ebp
-
-#define _k3d_xglue3(a,b,c) a##b##c
-#define _k3d_glue3(a,b,c) _k3d_xglue3(a,b,c)
-#define _k3d_MODRM(dst, src) _k3d_glue3(_k3d,_k3d_name_xlat_##dst,_k3d_name_xlat_##src)
-
-/* Operand defines for prefetch and prefetchw */
-
-#define _k3d_pref_eax 0x00
-#define _k3d_pref_ecx 0x01
-#define _k3d_pref_edx 0x02
-#define _k3d_pref_ebx 0x03
-#define _k3d_pref_esi 0x06
-#define _k3d_pref_edi 0x07
-#define _k3d_pref_EAX 0x00
-#define _k3d_pref_ECX 0x01
-#define _k3d_pref_EDX 0x02
-#define _k3d_pref_EBX 0x03
-#define _k3d_pref_ESI 0x06
-#define _k3d_pref_EDI 0x07
-#define _k3d_prefw_eax 0x08
-#define _k3d_prefw_ecx 0x09
-#define _k3d_prefw_edx 0x0A
-#define _k3d_prefw_ebx 0x0B
-#define _k3d_prefw_esi 0x0E
-#define _k3d_prefw_edi 0x0F
-#define _k3d_prefw_EAX 0x08
-#define _k3d_prefw_ECX 0x09
-#define _k3d_prefw_EDX 0x0A
-#define _k3d_prefw_EBX 0x0B
-#define _k3d_prefw_ESI 0x0E
-#define _k3d_prefw_EDI 0x0F
-
-/* Defines for 3DNow! instructions */
-#define PF2ID(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x1d
-#define PFACC(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xae
-#define PFADD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9e
-#define PFCMPEQ(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb0
-#define PFCMPGE(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x90
-#define PFCMPGT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa0
-#define PFMAX(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa4
-#define PFMIN(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x94
-#define PFMUL(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb4
-#define PFRCP(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x96
-#define PFRCPIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa6
-#define PFRCPIT2(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb6
-#define PFRSQRT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x97
-#define PFRSQIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa7
-#define PFSUB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9a
-#define PFSUBR(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xaa
-#define PI2FD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x0d
-#define FEMMS db 0x0f, 0x0e
-#define PAVGUSB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xbf
-#define PMULHRW(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb7
-#define PREFETCH(src) db 0x0f, 0x0d, _k3d_pref_##src
-#define PREFETCHW(src) db 0x0f, 0x0d, _k3d_prefw_##src
-#define CPUID db 0x0f, 0xa2
-
-/* Defines for new, K7 opcodes */
-#define PFNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8a
-#define FPPNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8e
-#define PSWAPD(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0xbb
-#define PMINUB(dst,src) db 0x0f, 0xda, _k3d_MODRM(dst,src)
-#define PMAXUB(dst,src) db 0x0f, 0xde, _k3d_MODRM(dst,src)
-#define PMINSW(dst,src) db 0x0f, 0xea, _k3d_MODRM(dst,src)
-#define PMAXSW(dst,src) db 0x0f, 0xee, _k3d_MODRM(dst,src)
-#define PMULHUW(dst,src) db 0x0f, 0xe4, _k3d_MODRM(dst,src)
-#define PAVGB(dst,src) db 0x0f, 0xe0, _k3d_MODRM(dst,src)
-#define PAVGW(dst,src) db 0x0f, 0xe3, _k3d_MODRM(dst,src)
-#define PSADBW(dst,src) db 0x0f, 0xf6, _k3d_MODRM(dst,src)
-#define PMOVMSKB(dst,src) db 0x0f, 0xd7, _k3d_MODRM(dst,src)
-#define PMASKMOVQ(dst,src) db 0x0f, 0xf7, _k3d_MODRM(dst,src)
-#define PINSRW(dst,src,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src), msk
-#define PEXTRW(dst,src,msk) db 0x0f, 0xc5, _k3d_MODRM(dst,src), msk
-#define PSHUFW(dst,src,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src), msk
-#define MOVNTQ(dst,src) db 0x0f, 0xe7, _k3d_MODRM(src,dst)
-#define SFENCE db 0x0f, 0xae, 0xf8
-
-/* Memory/offset versions of the opcodes */
-#define PF2IDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x1d
-#define PFACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xae
-#define PFADDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9e
-#define PFCMPEQM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb0
-#define PFCMPGEM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x90
-#define PFCMPGTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa0
-#define PFMAXM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa4
-#define PFMINM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x94
-#define PFMULM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb4
-#define PFRCPM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x96
-#define PFRCPIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa6
-#define PFRCPIT2M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb6
-#define PFRSQRTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x97
-#define PFRSQIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa7
-#define PFSUBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9a
-#define PFSUBRM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xaa
-#define PI2FDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x0d
-#define PAVGUSBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbf
-#define PMULHRWM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb7
-
-
-/* Memory/offset versions of the new, K7 opcodes */
-#define PFNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8a
-#define FPPNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8e
-#define PSWAPDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbb
-#define PMINUBM(dst,src,off) db 0x0f, 0xda, _k3d_MODRM(dst,src) | 0x40, off
-#define PMAXUBM(dst,src,off) db 0x0f, 0xde, _k3d_MODRM(dst,src) | 0x40, off
-#define PMINSWM(dst,src,off) db 0x0f, 0xea, _k3d_MODRM(dst,src) | 0x40, off
-#define PMAXSWM(dst,src,off) db 0x0f, 0xee, _k3d_MODRM(dst,src) | 0x40, off
-#define PMULHUWM(dst,src,off) db 0x0f, 0xe4, _k3d_MODRM(dst,src) | 0x40, off
-#define PAVGBM(dst,src,off) db 0x0f, 0xe0, _k3d_MODRM(dst,src) | 0x40, off
-#define PAVGWM(dst,src,off) db 0x0f, 0xe3, _k3d_MODRM(dst,src) | 0x40, off
-#define PSADBWM(dst,src,off) db 0x0f, 0xf6, _k3d_MODRM(dst,src) | 0x40, off
-#define PMOVMSKBM(dst,src,off) db 0x0f, 0xd7, _k3d_MODRM(dst,src) | 0x40, off
-#define PMASKMOVQM(dst,src,off) db 0x0f, 0xf7, _k3d_MODRM(dst,src) | 0x40, off
-#define MOVNTQM(dst,src,off) db 0x0f, 0xe7, _k3d_MODRM(src,dst) | 0x40, off
-#define PINSRWM(dst,src,off,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src) | 0x40, off, msk
-#define PSHUFWM(dst,src,off,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src) | 0x40, off, msk
-
-
-/* Defines for 3DNow! instructions for use in pragmas */
-#define p_pf2id(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x1d
-#define p_pfacc(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xae
-#define p_pfadd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9e
-#define p_pfcmpeq(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb0
-#define p_pfcmpge(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x90
-#define p_pfcmpgt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa0
-#define p_pfmax(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa4
-#define p_pfmin(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x94
-#define p_pfmul(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb4
-#define p_pfrcp(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x96
-#define p_pfrcpit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa6
-#define p_pfrcpit2(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb6
-#define p_pfrsqrt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x97
-#define p_pfrsqit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa7
-#define p_pfsub(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9a
-#define p_pfsubr(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xaa
-#define p_pi2fd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x0d
-#define p_femms 0x0f 0x0e
-#define p_pavgusb(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xbf
-#define p_pmulhrw(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb7
-#define p_prefetch(src) 0x0f 0x0d _k3d_pref_##src
-#define p_prefetchw(src) 0x0f 0x0d _k3d_prefw_##src
-#define P_PFNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a
-#define P_FPPNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e
-#define P_PSWAPD(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb
-#define P_PMINUB(dst,src) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMAXUB(dst,src) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMINSW(dst,src) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMAXSW(dst,src) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMULHUW(dst,src) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PAVGB(dst,src) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PAVGW(dst,src) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PSADBW(dst,src) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMOVMSKB(dst,src) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMASKMOVQ(dst,src) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PINSRW(dst,src,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk
-#define P_PEXTRW(dst,src,msk) 0x0f 0xc5 (_k3d_MODRM(dst,src) | 0x40) off msk
-#define P_PSHUFW(dst,src,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk
-#define P_MOVNTQ(dst,src) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off
-
-#define P_PF2IDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x1d
-#define P_PFACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xae
-#define P_PFADDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9e
-#define P_PFCMPEQM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb0
-#define P_PFCMPGEM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x90
-#define P_PFCMPGTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa0
-#define P_PFMAXM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa4
-#define P_PFMINM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x94
-#define P_PFMULM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb4
-#define P_PFRCPM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x96
-#define P_PFRCPIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa6
-#define P_PFRCPIT2M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb6
-#define P_PFRSQRTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x97
-#define P_PFRSQIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa7
-#define P_PFSUBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9a
-#define P_PFSUBRM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xaa
-#define P_PI2FDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x0d
-#define P_PAVGUSBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbf
-#define P_PMULHRWM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb7
-#define P_PFNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a
-#define P_FPPNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e
-#define P_PSWAPDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb
-#define P_PMINUBM(dst,src,off) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMAXUBM(dst,src,off) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMINSWM(dst,src,off) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMAXSWM(dst,src,off) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMULHUWM(dst,src,off) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PAVGBM(dst,src,off) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PAVGWM(dst,src,off) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PSADBWM(dst,src,off) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMOVMSKBM(dst,src,off) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_MOVNTQM(dst,src,off) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off
-#define P_PMASKMOVQM(dst,src,off) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PINSRWM(dst,src,off,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk
-#define P_PSHUFWM(dst,src,off,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk
-
-
-#define P_PF2ID(dst,src) p_pf2id(dst,src)
-#define P_PFACC(dst,src) p_pfacc(dst,src)
-#define P_PFADD(dst,src) p_pfadd(dst,src)
-#define P_PFCMPEQ(dst,src) p_pfcmpeq(dst,src)
-#define P_PFCMPGE(dst,src) p_pfcmpge(dst,src)
-#define P_PFCMPGT(dst,src) p_pfcmpgt(dst,src)
-#define P_PFMAX(dst,src) p_pfmax(dst,src)
-#define P_PFMIN(dst,src) p_pfmin(dst,src)
-#define P_PFMUL(dst,src) p_pfmul(dst,src)
-#define P_PFRCP(dst,src) p_pfrcp(dst,src)
-#define P_PFRCPIT1(dst,src) p_pfrcpit1(dst,src)
-#define P_PFRCPIT2(dst,src) p_pfrcpit2(dst,src)
-#define P_PFRSQRT(dst,src) p_pfrsqrt(dst,src)
-#define P_PFRSQIT1(dst,src) p_pfrsqit1(dst,src)
-#define P_PFSUB(dst,src) p_pfsub(dst,src)
-#define P_PFSUBR(dst,src) p_pfsubr(dst,src)
-#define P_PI2FD(dst,src) p_pi2fd(dst,src)
-#define P_FEMMS p_femms
-#define P_PAVGUSB(dst,src) p_pavgusb(dst,src)
-#define P_PMULHRW(dst,src) p_pmulhrw(dst,src)
-#define P_PREFETCH(src) p_prefetch(src)
-#define P_PREFETCHW(src) p_prefetchw(src)
-#define p_CPUID 0x0f 0xa2
-#define p_pf2idm(dst,src,off) P_PF2IDM(dst,src,off)
-#define p_pfaccm(dst,src,off) P_PFACCM(dst,src,off)
-#define p_pfaddm(dst,src,off) P_PFADDM(dst,src,off)
-#define p_pfcmpeqm(dst,src,off) P_PFCMPEQM(dst,src,off)
-#define p_pfcmpgem(dst,src,off) P_PFCMPGEM(dst,src,off)
-#define p_pfcmpgtm(dst,src,off) P_PFCMPGTM(dst,src,off)
-#define p_pfmaxm(dst,src,off) P_PFMAXM(dst,src,off)
-#define p_pfminm(dst,src,off) P_PFMINM(dst,src,off)
-#define p_pfmulm(dst,src,off) P_PFMULM(dst,src,off)
-#define p_pfrcpm(dst,src,off) P_PFRCPM(dst,src,off)
-#define p_pfrcpit1m(dst,src,off) P_PFRCPIT1M(dst,src,off)
-#define p_pfrcpit2m(dst,src,off) P_PFRCPIT2M(dst,src,off)
-#define p_pfrsqrtm(dst,src,off) P_PFRSQRTM(dst,src,off)
-#define p_pfrsqit1m(dst,src,off) P_PFRSQIT1M(dst,src,off)
-#define p_pfsubm(dst,src,off) P_PFSUBM(dst,src,off)
-#define p_pfsubrm(dst,src,off) P_PFSUBRM(dst,src,off)
-#define p_pi2fdm(dst,src,off) P_PI2FDM(dst,src,off)
-#define p_pavgusbm(dst,src,off) P_PAVGUSBM(dst,src,off)
-#define p_pmulhrwm(dst,src,off) P_PMULHRWM(dst,src,off)
-
-#define P_PFNACC(dst,src) p_pfnacc(dst,src)
-#define P_FPPNACC(dst,src) p_pfpnacc(dst,src)
-#define P_PSWAPD(dst,src) p_pswapd(dst,src)
-#define P_PMINUB(dst,src) p_pminub(dst,src)
-#define P_PMAXUB(dst,src) p_pmaxub(dst,src)
-#define P_PMINSW(dst,src) p_pminsw(dst,src)
-#define P_PMAXSW(dst,src) p_pmaxsw(dst,src)
-#define P_PMULHUW(dst,src) p_pmulhuw(dst,src)
-#define P_PAVGB(dst,src) p_pavgb(dst,src)
-#define P_PAVGW(dst,src) p_avgw(dst,src)
-#define P_PSADBW(dst,src) p_psadbw(dst,src)
-#define P_PMOVMSKB(dst,src) p_pmovmskb(dst,src)
-#define P_PMASKMOVQ(dst,src) p_pmaskmovq(dst,src)
-#define P_PINSRW(dst,src,msk) p_pinsrw(dst,src)
-#define P_PEXTRW(dst,src,msk) p_pextrw(dst,src)
-#define P_PSHUFW(dst,src,msk) p_pshufw(dst,src)
-#define P_MOVNTQ(dst,src) p_movntq(dst,src)
-
-#define P_PFNACCM(dst,src,off) p_pfnaccm(dst,src,off)
-#define P_FPPNACCM(dst,src,off) p_pfpnaccm(dst,src,off)
-#define P_PSWAPDM(dst,src,off) p_pswapdm(dst,src,off)
-#define P_PMINUBM(dst,src,off) p_pminubm(dst,src,off)
-#define P_PMAXUBM(dst,src,off) p_pmaxubm(dst,src,off)
-#define P_PMINSWM(dst,src,off) p_pminswm(dst,src,off)
-#define P_PMAXSWM(dst,src,off) p_pmaxswm(dst,src,off)
-#define P_PMULHUWM(dst,src,off) p_pmulhuwm(dst,src,off)
-#define P_PAVGBM(dst,src,off) p_pavgbm(dst,src,off)
-#define P_PAVGWM(dst,src,off) p_avgwm(dst,src,off)
-#define P_PSADBWM(dst,src,off) p_psadbwm(dst,src,off)
-#define P_PMOVMSKBM(dst,src,off) p_pmovmskbm(dst,src,off)
-#define P_PMASKMOVQM(dst,src,off) p_pmaskmovqm(dst,src,off)
-#define P_PINSRWM(dst,src,off,msk) p_pinsrwm(dst,src,off,msk)
-#define P_PSHUFWM(dst,src,off,msk) p_pshufwm(dst,src,off,msk)
-#define P_MOVNTQM(dst,src,off) p_movntqm(dst,src,off)
-
-#elif defined (_MSC_VER) && !defined (__MWERKS__)
-// The Microsoft Visual C++ version of the 3DNow! macros.
-
-// Stop the "no EMMS" warning, since it doesn't detect FEMMS properly
-#pragma warning(disable:4799)
-
-// Defines for operands.
-#define _K3D_MM0 0xc0
-#define _K3D_MM1 0xc1
-#define _K3D_MM2 0xc2
-#define _K3D_MM3 0xc3
-#define _K3D_MM4 0xc4
-#define _K3D_MM5 0xc5
-#define _K3D_MM6 0xc6
-#define _K3D_MM7 0xc7
-#define _K3D_mm0 0xc0
-#define _K3D_mm1 0xc1
-#define _K3D_mm2 0xc2
-#define _K3D_mm3 0xc3
-#define _K3D_mm4 0xc4
-#define _K3D_mm5 0xc5
-#define _K3D_mm6 0xc6
-#define _K3D_mm7 0xc7
-#define _K3D_EAX 0x00
-#define _K3D_ECX 0x01
-#define _K3D_EDX 0x02
-#define _K3D_EBX 0x03
-#define _K3D_ESI 0x06
-#define _K3D_EDI 0x07
-#define _K3D_eax 0x00
-#define _K3D_ecx 0x01
-#define _K3D_edx 0x02
-#define _K3D_ebx 0x03
-#define _K3D_esi 0x06
-#define _K3D_edi 0x07
-
-// These defines are for compatibility with the previous version of the header file.
-#define _K3D_M0 0xc0
-#define _K3D_M1 0xc1
-#define _K3D_M2 0xc2
-#define _K3D_M3 0xc3
-#define _K3D_M4 0xc4
-#define _K3D_M5 0xc5
-#define _K3D_M6 0xc6
-#define _K3D_M7 0xc7
-#define _K3D_m0 0xc0
-#define _K3D_m1 0xc1
-#define _K3D_m2 0xc2
-#define _K3D_m3 0xc3
-#define _K3D_m4 0xc4
-#define _K3D_m5 0xc5
-#define _K3D_m6 0xc6
-#define _K3D_m7 0xc7
-#define _K3D__EAX 0x00
-#define _K3D__ECX 0x01
-#define _K3D__EDX 0x02
-#define _K3D__EBX 0x03
-#define _K3D__ESI 0x06
-#define _K3D__EDI 0x07
-#define _K3D__eax 0x00
-#define _K3D__ecx 0x01
-#define _K3D__edx 0x02
-#define _K3D__ebx 0x03
-#define _K3D__esi 0x06
-#define _K3D__edi 0x07
-
-// General 3DNow! instruction format that is supported by
-// these macros. Note that only the most basic form of memory
-// operands are supported by these macros.
-
-#define InjK3DOps(dst,src,inst) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0f \
- _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \
- _asm _emit _3DNowOpcode##inst \
-}
-
-#define InjK3DMOps(dst,src,off,inst) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0f \
- _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \
- _asm _emit off \
- _asm _emit _3DNowOpcode##inst \
-}
-
-#define InjMMXOps(dst,src,inst) \
-{ \
- _asm _emit 0x0f \
- _asm _emit _3DNowOpcode##inst \
- _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \
-}
-
-#define InjMMXMOps(dst,src,off,inst) \
-{ \
- _asm _emit 0x0f \
- _asm _emit _3DNowOpcode##inst \
- _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \
- _asm _emit off \
-}
-
-#define _3DNowOpcodePF2ID 0x1d
-#define _3DNowOpcodePFACC 0xae
-#define _3DNowOpcodePFADD 0x9e
-#define _3DNowOpcodePFCMPEQ 0xb0
-#define _3DNowOpcodePFCMPGE 0x90
-#define _3DNowOpcodePFCMPGT 0xa0
-#define _3DNowOpcodePFMAX 0xa4
-#define _3DNowOpcodePFMIN 0x94
-#define _3DNowOpcodePFMUL 0xb4
-#define _3DNowOpcodePFRCP 0x96
-#define _3DNowOpcodePFRCPIT1 0xa6
-#define _3DNowOpcodePFRCPIT2 0xb6
-#define _3DNowOpcodePFRSQRT 0x97
-#define _3DNowOpcodePFRSQIT1 0xa7
-#define _3DNowOpcodePFSUB 0x9a
-#define _3DNowOpcodePFSUBR 0xaa
-#define _3DNowOpcodePI2FD 0x0d
-#define _3DNowOpcodePAVGUSB 0xbf
-#define _3DNowOpcodePMULHRW 0xb7
-#define _3DNowOpcodePFNACC 0x8a
-#define _3DNowOpcodeFPPNACC 0x8e
-#define _3DNowOpcodePSWAPD 0xbb
-#define _3DNowOpcodePMINUB 0xda
-#define _3DNowOpcodePMAXUB 0xde
-#define _3DNowOpcodePMINSW 0xea
-#define _3DNowOpcodePMAXSW 0xee
-#define _3DNowOpcodePMULHUW 0xe4
-#define _3DNowOpcodePAVGB 0xe0
-#define _3DNowOpcodePAVGW 0xe3
-#define _3DNowOpcodePSADBW 0xf6
-#define _3DNowOpcodePMOVMSKB 0xd7
-#define _3DNowOpcodePMASKMOVQ 0xf7
-#define _3DNowOpcodePINSRW 0xc4
-#define _3DNowOpcodePEXTRW 0xc5
-#define _3DNowOpcodePSHUFW 0x70
-#define _3DNowOpcodeMOVNTQ 0xe7
-#define _3DNowOpcodePREFETCHT 0x18
-
-
-#define PF2ID(dst,src) InjK3DOps(dst, src, PF2ID)
-#define PFACC(dst,src) InjK3DOps(dst, src, PFACC)
-#define PFADD(dst,src) InjK3DOps(dst, src, PFADD)
-#define PFCMPEQ(dst,src) InjK3DOps(dst, src, PFCMPEQ)
-#define PFCMPGE(dst,src) InjK3DOps(dst, src, PFCMPGE)
-#define PFCMPGT(dst,src) InjK3DOps(dst, src, PFCMPGT)
-#define PFMAX(dst,src) InjK3DOps(dst, src, PFMAX)
-#define PFMIN(dst,src) InjK3DOps(dst, src, PFMIN)
-#define PFMUL(dst,src) InjK3DOps(dst, src, PFMUL)
-#define PFRCP(dst,src) InjK3DOps(dst, src, PFRCP)
-#define PFRCPIT1(dst,src) InjK3DOps(dst, src, PFRCPIT1)
-#define PFRCPIT2(dst,src) InjK3DOps(dst, src, PFRCPIT2)
-#define PFRSQRT(dst,src) InjK3DOps(dst, src, PFRSQRT)
-#define PFRSQIT1(dst,src) InjK3DOps(dst, src, PFRSQIT1)
-#define PFSUB(dst,src) InjK3DOps(dst, src, PFSUB)
-#define PFSUBR(dst,src) InjK3DOps(dst, src, PFSUBR)
-#define PI2FD(dst,src) InjK3DOps(dst, src, PI2FD)
-#define PAVGUSB(dst,src) InjK3DOps(dst, src, PAVGUSB)
-#define PMULHRW(dst,src) InjK3DOps(dst, src, PMULHRW)
-
-#define FEMMS \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0e \
-}
-
-#define PREFETCH(src) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit (_K3D_##src & 0x07) \
-}
-
-/* Prefetch with a short offset, < 127 or > -127
- Carefull! Doesn't check for your offset being
- in range. */
-
-#define PREFETCHM(src,off) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit (0x40 | (_K3D_##src & 0x07)) \
- _asm _emit off \
-}
-
-/* Prefetch with a long offset */
-
-#define PREFETCHMLONG(src,off) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit (0x80 | (_K3D_##src & 0x07)) \
- _asm _emit (off & 0x000000ff) \
- _asm _emit (off & 0x0000ff00) >> 8 \
- _asm _emit (off & 0x00ff0000) >> 16 \
- _asm _emit (off & 0xff000000) >> 24 \
-}
-
-#define PREFETCHW(src) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit (0x08 | (_K3D_##src & 0x07)) \
-}
-
-#define PREFETCHWM(src,off) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit 0x48 | (_K3D_##src & 0x07) \
- _asm _emit off \
-}
-
-#define PREFETCHWMLONG(src,off) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit 0x88 | (_K3D_##src & 0x07) \
- _asm _emit (off & 0x000000ff) \
- _asm _emit (off & 0x0000ff00) >> 8 \
- _asm _emit (off & 0x00ff0000) >> 16 \
- _asm _emit (off & 0xff000000) >> 24 \
-}
-
-#define CPUID \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0xa2 \
-}
-
-
-/* Defines for new, K7 opcodes */
-#define SFENCE \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0xae \
- _asm _emit 0xf8 \
-}
-
-#define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC)
-#define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC)
-#define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD)
-#define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB)
-#define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB)
-#define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW)
-#define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW)
-#define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW)
-#define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB)
-#define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW)
-#define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW)
-#define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB)
-#define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ)
-#define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) _asm _emit msk
-#define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) _asm _emit msk
-#define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) _asm _emit msk
-#define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ)
-#define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT)
-#define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT)
-#define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT)
-#define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT)
-
-
-/* Memory/offset versions of the opcodes */
-#define PAVGUSBM(dst,src,off) InjK3DMOps(dst,src,off,PAVGUSB)
-#define PF2IDM(dst,src,off) InjK3DMOps(dst,src,off,PF2ID)
-#define PFACCM(dst,src,off) InjK3DMOps(dst,src,off,PFACC)
-#define PFADDM(dst,src,off) InjK3DMOps(dst,src,off,PFADD)
-#define PFCMPEQM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPEQ)
-#define PFCMPGEM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGE)
-#define PFCMPGTM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGT)
-#define PFMAXM(dst,src,off) InjK3DMOps(dst,src,off,PFMAX)
-#define PFMINM(dst,src,off) InjK3DMOps(dst,src,off,PFMIN)
-#define PFMULM(dst,src,off) InjK3DMOps(dst,src,off,PFMUL)
-#define PFRCPM(dst,src,off) InjK3DMOps(dst,src,off,PFRCP)
-#define PFRCPIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT1)
-#define PFRCPIT2M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT2)
-#define PFRSQRTM(dst,src,off) InjK3DMOps(dst,src,off,PFRSQRT)
-#define PFRSQIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRSQIT1)
-#define PFSUBM(dst,src,off) InjK3DMOps(dst,src,off,PFSUB)
-#define PFSUBRM(dst,src,off) InjK3DMOps(dst,src,off,PFSUBR)
-#define PI2FDM(dst,src,off) InjK3DMOps(dst,src,off,PI2FD)
-#define PMULHRWM(dst,src,off) InjK3DMOps(dst,src,off,PMULHRW)
-
-
-/* Memory/offset versions of the K7 opcodes */
-#define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC)
-#define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC)
-#define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD)
-#define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB)
-#define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB)
-#define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW)
-#define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW)
-#define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW)
-#define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB)
-#define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW)
-#define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW)
-#define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB)
-#define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ)
-#define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW) _asm _emit msk
-#define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW) _asm _emit msk
-#define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ)
-#define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT)
-#define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT)
-#define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT)
-#define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT)
-
-
-#else
-
-/* Assume built-in support for 3DNow! opcodes, replace macros with opcodes */
-#define PAVGUSB(dst,src) pavgusb dst,src
-#define PF2ID(dst,src) pf2id dst,src
-#define PFACC(dst,src) pfacc dst,src
-#define PFADD(dst,src) pfadd dst,src
-#define PFCMPEQ(dst,src) pfcmpeq dst,src
-#define PFCMPGE(dst,src) pfcmpge dst,src
-#define PFCMPGT(dst,src) pfcmpgt dst,src
-#define PFMAX(dst,src) pfmax dst,src
-#define PFMIN(dst,src) pfmin dst,src
-#define PFMUL(dst,src) pfmul dst,src
-#define PFRCP(dst,src) pfrcp dst,src
-#define PFRCPIT1(dst,src) pfrcpit1 dst,src
-#define PFRCPIT2(dst,src) pfrcpit2 dst,src
-#define PFRSQRT(dst,src) pfrsqrt dst,src
-#define PFRSQIT1(dst,src) pfrsqit1 dst,src
-#define PFSUB(dst,src) pfsub dst,src
-#define PFSUBR(dst,src) pfsubr dst,src
-#define PI2FD(dst,src) pi2fd dst,src
-#define PMULHRW(dst,src) pmulhrw dst,src
-#define PREFETCH(src) prefetch src
-#define PREFETCHW(src) prefetchw src
-
-#define PAVGUSBM(dst,src,off) pavgusb dst,[src+off]
-#define PF2IDM(dst,src,off) PF2ID dst,[src+off]
-#define PFACCM(dst,src,off) PFACC dst,[src+off]
-#define PFADDM(dst,src,off) PFADD dst,[src+off]
-#define PFCMPEQM(dst,src,off) PFCMPEQ dst,[src+off]
-#define PFCMPGEM(dst,src,off) PFCMPGE dst,[src+off]
-#define PFCMPGTM(dst,src,off) PFCMPGT dst,[src+off]
-#define PFMAXM(dst,src,off) PFMAX dst,[src+off]
-#define PFMINM(dst,src,off) PFMIN dst,[src+off]
-#define PFMULM(dst,src,off) PFMUL dst,[src+off]
-#define PFRCPM(dst,src,off) PFRCP dst,[src+off]
-#define PFRCPIT1M(dst,src,off) PFRCPIT1 dst,[src+off]
-#define PFRCPIT2M(dst,src,off) PFRCPIT2 dst,[src+off]
-#define PFRSQRTM(dst,src,off) PFRSQRT dst,[src+off]
-#define PFRSQIT1M(dst,src,off) PFRSQIT1 dst,[src+off]
-#define PFSUBM(dst,src,off) PFSUB dst,[src+off]
-#define PFSUBRM(dst,src,off) PFSUBR dst,[src+off]
-#define PI2FDM(dst,src,off) PI2FD dst,[src+off]
-#define PMULHRWM(dst,src,off) PMULHRW dst,[src+off]
-
-
-#if defined (__MWERKS__)
-// At the moment, CodeWarrior does not support these opcodes, so hand-assemble them
-
-// Defines for operands.
-#define _K3D_MM0 0xc0
-#define _K3D_MM1 0xc1
-#define _K3D_MM2 0xc2
-#define _K3D_MM3 0xc3
-#define _K3D_MM4 0xc4
-#define _K3D_MM5 0xc5
-#define _K3D_MM6 0xc6
-#define _K3D_MM7 0xc7
-#define _K3D_mm0 0xc0
-#define _K3D_mm1 0xc1
-#define _K3D_mm2 0xc2
-#define _K3D_mm3 0xc3
-#define _K3D_mm4 0xc4
-#define _K3D_mm5 0xc5
-#define _K3D_mm6 0xc6
-#define _K3D_mm7 0xc7
-#define _K3D_EAX 0x00
-#define _K3D_ECX 0x01
-#define _K3D_EDX 0x02
-#define _K3D_EBX 0x03
-#define _K3D_ESI 0x06
-#define _K3D_EDI 0x07
-#define _K3D_eax 0x00
-#define _K3D_ecx 0x01
-#define _K3D_edx 0x02
-#define _K3D_ebx 0x03
-#define _K3D_esi 0x06
-#define _K3D_edi 0x07
-#define _K3D_EAX 0x00
-#define _K3D_ECX 0x01
-#define _K3D_EDX 0x02
-#define _K3D_EBX 0x03
-#define _K3D_ESI 0x06
-#define _K3D_EDI 0x07
-#define _K3D_eax 0x00
-#define _K3D_ecx 0x01
-#define _K3D_edx 0x02
-#define _K3D_ebx 0x03
-#define _K3D_esi 0x06
-#define _K3D_edi 0x07
-
-#define InjK3DOps(dst,src,inst) \
- db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src), _3DNowOpcode##inst
-
-#define InjK3DMOps(dst,src,off,inst) \
- db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off, _3DNowOpcode##inst
-
-#define InjMMXOps(dst,src,inst) \
- db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src)
-
-#define InjMMXMOps(dst,src,off,inst) \
- db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off
-
-#define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC)
-#define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC)
-#define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD)
-#define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB)
-#define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB)
-#define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW)
-#define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW)
-#define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW)
-#define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB)
-#define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW)
-#define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW)
-#define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB)
-#define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ)
-#define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) db msk
-#define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) db msk
-#define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) db msk
-#define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ)
-#define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT)
-#define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT)
-#define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT)
-#define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT)
-
-
-/* Memory/offset versions of the K7 opcodes */
-#define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC)
-#define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC)
-#define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD)
-#define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB)
-#define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB)
-#define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW)
-#define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW)
-#define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW)
-#define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB)
-#define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW)
-#define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW)
-#define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB)
-#define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ)
-#define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW), msk
-#define PEXTRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PEXTRW), msk
-#define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW), msk
-#define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ)
-#define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT)
-#define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT)
-#define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT)
-#define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT)
-
-
-#else
-
-#define PFNACC(dst,src) PFNACC dst,src
-#define PFPNACC(dst,src) PFPNACC dst,src
-#define PSWAPD(dst,src) PSWAPD dst,src
-#define PMINUB(dst,src) PMINUB dst,src
-#define PMAXUB(dst,src) PMAXUB dst,src
-#define PMINSW(dst,src) PMINSW dst,src
-#define PMAXSW(dst,src) PMAXSW dst,src
-#define PMULHUW(dst,src) PMULHUW dst,src
-#define PAVGB(dst,src) PAVGB dst,src
-#define PAVGW(dst,src) PAVGW dst,src
-#define PSADBW(dst,src) PSADBW dst,src
-#define PMOVMSKB(dst,src) PMOVMSKB dst,src
-#define PMASKMOVQ(dst,src) PMASKMOVQ dst,src
-#define PINSRW(dst,src,msk) PINSRW dst,src,msk
-#define PEXTRW(dst,src,msk) PEXTRW dst,src,msk
-#define PSHUFW(dst,src,msk) PSHUFW dst,src,msk
-#define MOVNTQ(dst,src) MOVNTQ dst,src
-
-#define PFNACCM(dst,src,off) PFNACC dst,[src+off]
-#define PFPNACCM(dst,src,off) PFPNACC dst,[src+off]
-#define PSWAPDM(dst,src,off) PSWAPD dst,[src+off]
-#define PMINUBM(dst,src,off) PMINUB dst,[src+off]
-#define PMAXUBM(dst,src,off) PMAXUB dst,[src+off]
-#define PMINSWM(dst,src,off) PMINSW dst,[src+off]
-#define PMAXSWM(dst,src,off) PMAXSW dst,[src+off]
-#define PMULHUWM(dst,src,off) PMULHUW dst,[src+off]
-#define PAVGBM(dst,src,off) PAVGB dst,[src+off]
-#define PAVGWM(dst,src,off) PAVGW dst,[src+off]
-#define PSADBWM(dst,src,off) PSADBW dst,[src+off]
-#define PMOVMSKBM(dst,src,off) PMOVMSKB dst,[src+off]
-#define PMASKMOVQM(dst,src,off) PMASKMOVQ dst,[src+off]
-#define PINSRWM(dst,src,off,msk) PINSRW dst,[src+off],msk
-#define PEXTRWM(dst,src,off,msk) PEXTRW dst,[src+off],msk
-#define PSHUFWM(dst,src,off,msk) PSHUFW dst,[src+off],msk
-#define MOVNTQM(dst,src,off) MOVNTQ dst,[src+off]
-
-#endif
-
-#endif
-
-/* Just to deal with lower case. */
-#define pf2id(dst,src) PF2ID(dst,src)
-#define pfacc(dst,src) PFACC(dst,src)
-#define pfadd(dst,src) PFADD(dst,src)
-#define pfcmpeq(dst,src) PFCMPEQ(dst,src)
-#define pfcmpge(dst,src) PFCMPGE(dst,src)
-#define pfcmpgt(dst,src) PFCMPGT(dst,src)
-#define pfmax(dst,src) PFMAX(dst,src)
-#define pfmin(dst,src) PFMIN(dst,src)
-#define pfmul(dst,src) PFMUL(dst,src)
-#define pfrcp(dst,src) PFRCP(dst,src)
-#define pfrcpit1(dst,src) PFRCPIT1(dst,src)
-#define pfrcpit2(dst,src) PFRCPIT2(dst,src)
-#define pfrsqrt(dst,src) PFRSQRT(dst,src)
-#define pfrsqit1(dst,src) PFRSQIT1(dst,src)
-#define pfsub(dst,src) PFSUB(dst,src)
-#define pfsubr(dst,src) PFSUBR(dst,src)
-#define pi2fd(dst,src) PI2FD(dst,src)
-#define femms FEMMS
-#define pavgusb(dst,src) PAVGUSB(dst,src)
-#define pmulhrw(dst,src) PMULHRW(dst,src)
-#define prefetch(src) PREFETCH(src)
-#define prefetchw(src) PREFETCHW(src)
-
-#define prefetchm(src,off) PREFETCHM(src,off)
-#define prefetchmlong(src,off) PREFETCHMLONG(src,off)
-#define prefetchwm(src,off) PREFETCHWM(src,off)
-#define prefetchwmlong(src,off) PREFETCHWMLONG(src,off)
-
-#define pfnacc(dst,src) PFNACC(dst,src)
-#define pfpnacc(dst,src) PFPNACC(dst,src)
-#define pswapd(dst,src) PSWAPD(dst,src)
-#define pminub(dst,src) PMINUB(dst,src)
-#define pmaxub(dst,src) PMAXUB(dst,src)
-#define pminsw(dst,src) PMINSW(dst,src)
-#define pmaxsw(dst,src) PMAXSW(dst,src)
-#define pmulhuw(dst,src) PMULHUW(dst,src)
-#define pavgb(dst,src) PAVGB(dst,src)
-#define pavgw(dst,src) PAVGW(dst,src)
-#define psadbw(dst,src) PSADBW(dst,src)
-#define pmovmskb(dst,src) PMOVMSKB(dst,src)
-#define pmaskmovq(dst,src) PMASKMOVQ(dst,src)
-#define pinsrw(dst,src,msk) PINSRW(dst,src,msk)
-#define pextrw(dst,src,msk) PEXTRW(dst,src,msk)
-#define pshufw(dst,src,msk) PSHUFW(dst,src,msk)
-#define movntq(dst,src) MOVNTQ(dst,src)
-#define prefetchnta(mem) PREFETCHNTA(mem)
-#define prefetcht0(mem) PREFETCHT0(mem)
-#define prefetcht1(mem) PREFETCHT1(mem)
-#define prefetcht2(mem) PREFETCHT2(mem)
-
-
-#define pavgusbm(dst,src,off) PAVGUSBM(dst,src,off)
-#define pf2idm(dst,src,off) PF2IDM(dst,src,off)
-#define pfaccm(dst,src,off) PFACCM(dst,src,off)
-#define pfaddm(dst,src,off) PFADDM(dst,src,off)
-#define pfcmpeqm(dst,src,off) PFCMPEQM(dst,src,off)
-#define pfcmpgem(dst,src,off) PFCMPGEM(dst,src,off)
-#define pfcmpgtm(dst,src,off) PFCMPGTM(dst,src,off)
-#define pfmaxm(dst,src,off) PFMAXM(dst,src,off)
-#define pfminm(dst,src,off) PFMINM(dst,src,off)
-#define pfmulm(dst,src,off) PFMULM(dst,src,off)
-#define pfrcpm(dst,src,off) PFRCPM(dst,src,off)
-#define pfrcpit1m(dst,src,off) PFRCPIT1M(dst,src,off)
-#define pfrcpit2m(dst,src,off) PFRCPIT2M(dst,src,off)
-#define pfrsqrtm(dst,src,off) PFRSQRTM(dst,src,off)
-#define pfrsqit1m(dst,src,off) PFRSQIT1M(dst,src,off)
-#define pfsubm(dst,src,off) PFSUBM(dst,src,off)
-#define pfsubrm(dst,src,off) PFSUBRM(dst,src,off)
-#define pi2fdm(dst,src,off) PI2FDM(dst,src,off)
-#define pmulhrwm(dst,src,off) PMULHRWM(dst,src,off)
-#define cpuid CPUID
-#define sfence SFENCE
-
-#define pfnaccm(dst,src,off) PFNACCM(dst,src,off)
-#define pfpnaccm(dst,src,off) PFPNACCM(dst,src,off)
-#define pswapdm(dst,src,off) PSWAPDM(dst,src,off)
-#define pminubm(dst,src,off) PMINUBM(dst,src,off)
-#define pmaxubm(dst,src,off) PMAXUBM(dst,src,off)
-#define pminswm(dst,src,off) PMINSWM(dst,src,off)
-#define pmaxswm(dst,src,off) PMAXSWM(dst,src,off)
-#define pmulhuwm(dst,src,off) PMULHUWM(dst,src,off)
-#define pavgbm(dst,src,off) PAVGBM(dst,src,off)
-#define pavgwm(dst,src,off) PAVGWM(dst,src,off)
-#define psadbwm(dst,src,off) PSADBWM(dst,src,off)
-#define pmovmskbm(dst,src,off) PMOVMSKBM(dst,src,off)
-#define pmaskmovqm(dst,src,off) PMASKMOVQM(dst,src,off)
-#define pinsrwm(dst,src,off,msk) PINSRWM(dst,src,off,msk)
-#define pextrwm(dst,src,off,msk) PEXTRWM(dst,src,off,msk)
-#define pshufwm(dst,src,off,msk) PSHUFWM(dst,src,off,msk)
-#define movntqm(dst,src,off) MOVNTQM(dst,src,off)
-#define prefetchntam(mem,off) PREFETCHNTA(mem,off)
-#define prefetcht0m(mem,off) PREFETCHT0(mem,off)
-#define prefetcht1m(mem,off) PREFETCHT1(mem,off)
-#define prefetcht2m(mem,off) PREFETCHT2(mem,off)
-
-#endif
+//========= Copyright Valve Corporation, All rights reserved. ============//
+/******************************************************************************
+
+ Copyright (c) 1999 Advanced Micro Devices, Inc.
+
+ LIMITATION OF LIABILITY: THE MATERIALS ARE PROVIDED *AS IS* WITHOUT ANY
+ EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING WARRANTIES OF MERCHANTABILITY,
+ NONINFRINGEMENT OF THIRD-PARTY INTELLECTUAL PROPERTY, OR FITNESS FOR ANY
+ PARTICULAR PURPOSE. IN NO EVENT SHALL AMD OR ITS SUPPLIERS BE LIABLE FOR ANY
+ DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS,
+ BUSINESS INTERRUPTION, LOSS OF INFORMATION) ARISING OUT OF THE USE OF OR
+ INABILITY TO USE THE MATERIALS, EVEN IF AMD HAS BEEN ADVISED OF THE POSSIBILITY
+ OF SUCH DAMAGES. BECAUSE SOME JURISDICTIONS PROHIBIT THE EXCLUSION OR LIMITATION
+ OF LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE LIMITATION MAY
+ NOT APPLY TO YOU.
+
+ AMD does not assume any responsibility for any errors which may appear in the
+ Materials nor any responsibility to support or update the Materials. AMD retains
+ the right to make changes to its test specifications at any time, without notice.
+
+ NO SUPPORT OBLIGATION: AMD is not obligated to furnish, support, or make any
+ further information, software, technical information, know-how, or show-how
+ available to you.
+
+ So that all may benefit from your experience, please report any problems
+ or suggestions about this software to [email protected]
+
+ AMD Developer Technologies, M/S 585
+ Advanced Micro Devices, Inc.
+ 5900 E. Ben White Blvd.
+ Austin, TX 78741
+
+*******************************************************************************
+
+ AMD3DX.H
+
+ MACRO FORMAT
+ ============
+ This file contains inline assembly macros that
+ generate AMD-3D instructions in binary format.
+ Therefore, C or C++ programmer can use AMD-3D instructions
+ without any penalty in their C or C++ source code.
+
+ The macro's name and format conventions are as follow:
+
+
+ 1. First argument of macro is a destination and
+ second argument is a source operand.
+ ex) _asm PFCMPEQ (mm3, mm4)
+ | |
+ dst src
+
+ 2. The destination operand can be m0 to m7 only.
+ The source operand can be any one of the register
+ m0 to m7 or _eax, _ecx, _edx, _ebx, _esi, or _edi
+ that contains effective address.
+ ex) _asm PFRCP (MM7, MM6)
+ ex) _asm PFRCPIT2 (mm0, mm4)
+ ex) _asm PFMUL (mm3, _edi)
+
+ 3. The prefetch(w) takes one src operand _eax, ecx, _edx,
+ _ebx, _esi, or _edi that contains effective address.
+ ex) _asm PREFETCH (_edi)
+
+ For WATCOM C/C++ users, when using #pragma aux instead if
+ _asm, all macro names should be prefixed by a p_ or P_.
+ Macros should not be enclosed in quotes.
+ ex) p_pfrcp (MM7,MM6)
+
+ NOTE: Not all instruction macros, nor all possible
+ combinations of operands have been explicitely
+ tested. If any errors are found, please report
+ them.
+
+ EXAMPLE
+ =======
+ Following program doesn't do anything but it shows you
+ how to use inline assembly AMD-3D instructions in C.
+ Note that this will only work in flat memory model which
+ segment registers cs, ds, ss and es point to the same
+ linear address space total less than 4GB.
+
+ Used Microsoft VC++ 5.0
+
+ #include <stdio.h>
+ #include "amd3d.h"
+
+ void main ()
+ {
+ float x = (float)1.25;
+ float y = (float)1.25;
+ float z, zz;
+
+ _asm {
+ movd mm1, x
+ movd mm2, y
+ pfmul (mm1, mm2)
+ movd z, mm1
+ femms
+ }
+
+ printf ("value of z = %f\n", z);
+
+ //
+ // Demonstration of using the memory instead of
+ // multimedia register
+ //
+ _asm {
+ movd mm3, x
+ lea esi, y // load effective address of y
+ pfmul (mm3, _esi)
+ movd zz, mm3
+ femms
+ }
+
+ printf ("value of zz = %f\n", zz);
+ }
+
+ #pragma aux EXAMPLE with WATCOM C/C++ v11.x
+ ===========================================
+
+ extern void Add(float *__Dest, float *__A, float *__B);
+ #pragma aux Add = \
+ p_femms \
+ "movd mm6,[esi]" \
+ p_pfadd(mm6,_edi) \
+ "movd [ebx],mm6" \
+ p_femms \
+ parm [ebx] [esi] [edi];
+
+*******************************************************************************/
+
+#ifndef _K3DMACROSINCLUDED_
+#define _K3DMACROSINCLUDED_
+
+#if defined (__WATCOMC__)
+
+// The WATCOM C/C++ version of the 3DNow! macros.
+//
+// The older, compbined register style for WATCOM C/C++ macros is not
+// supported.
+
+/* Operand defines for instructions two operands */
+#define _k3d_mm0_mm0 0xc0
+#define _k3d_mm0_mm1 0xc1
+#define _k3d_mm0_mm2 0xc2
+#define _k3d_mm0_mm3 0xc3
+#define _k3d_mm0_mm4 0xc4
+#define _k3d_mm0_mm5 0xc5
+#define _k3d_mm0_mm6 0xc6
+#define _k3d_mm0_mm7 0xc7
+#define _k3d_mm0_eax 0x00
+#define _k3d_mm0_ecx 0x01
+#define _k3d_mm0_edx 0x02
+#define _k3d_mm0_ebx 0x03
+#define _k3d_mm0_esi 0x06
+#define _k3d_mm0_edi 0x07
+#define _k3d_mm1_mm0 0xc8
+#define _k3d_mm1_mm1 0xc9
+#define _k3d_mm1_mm2 0xca
+#define _k3d_mm1_mm3 0xcb
+#define _k3d_mm1_mm4 0xcc
+#define _k3d_mm1_mm5 0xcd
+#define _k3d_mm1_mm6 0xce
+#define _k3d_mm1_mm7 0xcf
+#define _k3d_mm1_eax 0x08
+#define _k3d_mm1_ecx 0x09
+#define _k3d_mm1_edx 0x0a
+#define _k3d_mm1_ebx 0x0b
+#define _k3d_mm1_esi 0x0e
+#define _k3d_mm1_edi 0x0f
+#define _k3d_mm2_mm0 0xd0
+#define _k3d_mm2_mm1 0xd1
+#define _k3d_mm2_mm2 0xd2
+#define _k3d_mm2_mm3 0xd3
+#define _k3d_mm2_mm4 0xd4
+#define _k3d_mm2_mm5 0xd5
+#define _k3d_mm2_mm6 0xd6
+#define _k3d_mm2_mm7 0xd7
+#define _k3d_mm2_eax 0x10
+#define _k3d_mm2_ecx 0x11
+#define _k3d_mm2_edx 0x12
+#define _k3d_mm2_ebx 0x13
+#define _k3d_mm2_esi 0x16
+#define _k3d_mm2_edi 0x17
+#define _k3d_mm3_mm0 0xd8
+#define _k3d_mm3_mm1 0xd9
+#define _k3d_mm3_mm2 0xda
+#define _k3d_mm3_mm3 0xdb
+#define _k3d_mm3_mm4 0xdc
+#define _k3d_mm3_mm5 0xdd
+#define _k3d_mm3_mm6 0xde
+#define _k3d_mm3_mm7 0xdf
+#define _k3d_mm3_eax 0x18
+#define _k3d_mm3_ecx 0x19
+#define _k3d_mm3_edx 0x1a
+#define _k3d_mm3_ebx 0x1b
+#define _k3d_mm3_esi 0x1e
+#define _k3d_mm3_edi 0x1f
+#define _k3d_mm4_mm0 0xe0
+#define _k3d_mm4_mm1 0xe1
+#define _k3d_mm4_mm2 0xe2
+#define _k3d_mm4_mm3 0xe3
+#define _k3d_mm4_mm4 0xe4
+#define _k3d_mm4_mm5 0xe5
+#define _k3d_mm4_mm6 0xe6
+#define _k3d_mm4_mm7 0xe7
+#define _k3d_mm4_eax 0x20
+#define _k3d_mm4_ecx 0x21
+#define _k3d_mm4_edx 0x22
+#define _k3d_mm4_ebx 0x23
+#define _k3d_mm4_esi 0x26
+#define _k3d_mm4_edi 0x27
+#define _k3d_mm5_mm0 0xe8
+#define _k3d_mm5_mm1 0xe9
+#define _k3d_mm5_mm2 0xea
+#define _k3d_mm5_mm3 0xeb
+#define _k3d_mm5_mm4 0xec
+#define _k3d_mm5_mm5 0xed
+#define _k3d_mm5_mm6 0xee
+#define _k3d_mm5_mm7 0xef
+#define _k3d_mm5_eax 0x28
+#define _k3d_mm5_ecx 0x29
+#define _k3d_mm5_edx 0x2a
+#define _k3d_mm5_ebx 0x2b
+#define _k3d_mm5_esi 0x2e
+#define _k3d_mm5_edi 0x2f
+#define _k3d_mm6_mm0 0xf0
+#define _k3d_mm6_mm1 0xf1
+#define _k3d_mm6_mm2 0xf2
+#define _k3d_mm6_mm3 0xf3
+#define _k3d_mm6_mm4 0xf4
+#define _k3d_mm6_mm5 0xf5
+#define _k3d_mm6_mm6 0xf6
+#define _k3d_mm6_mm7 0xf7
+#define _k3d_mm6_eax 0x30
+#define _k3d_mm6_ecx 0x31
+#define _k3d_mm6_edx 0x32
+#define _k3d_mm6_ebx 0x33
+#define _k3d_mm6_esi 0x36
+#define _k3d_mm6_edi 0x37
+#define _k3d_mm7_mm0 0xf8
+#define _k3d_mm7_mm1 0xf9
+#define _k3d_mm7_mm2 0xfa
+#define _k3d_mm7_mm3 0xfb
+#define _k3d_mm7_mm4 0xfc
+#define _k3d_mm7_mm5 0xfd
+#define _k3d_mm7_mm6 0xfe
+#define _k3d_mm7_mm7 0xff
+#define _k3d_mm7_eax 0x38
+#define _k3d_mm7_ecx 0x39
+#define _k3d_mm7_edx 0x3a
+#define _k3d_mm7_ebx 0x3b
+#define _k3d_mm7_esi 0x3e
+#define _k3d_mm7_edi 0x3f
+
+#define _k3d_name_xlat_m0 _mm0
+#define _k3d_name_xlat_m1 _mm1
+#define _k3d_name_xlat_m2 _mm2
+#define _k3d_name_xlat_m3 _mm3
+#define _k3d_name_xlat_m4 _mm4
+#define _k3d_name_xlat_m5 _mm5
+#define _k3d_name_xlat_m6 _mm6
+#define _k3d_name_xlat_m7 _mm7
+#define _k3d_name_xlat_M0 _mm0
+#define _k3d_name_xlat_M1 _mm1
+#define _k3d_name_xlat_M2 _mm2
+#define _k3d_name_xlat_M3 _mm3
+#define _k3d_name_xlat_M4 _mm4
+#define _k3d_name_xlat_M5 _mm5
+#define _k3d_name_xlat_M6 _mm6
+#define _k3d_name_xlat_M7 _mm7
+#define _k3d_name_xlat_mm0 _mm0
+#define _k3d_name_xlat_mm1 _mm1
+#define _k3d_name_xlat_mm2 _mm2
+#define _k3d_name_xlat_mm3 _mm3
+#define _k3d_name_xlat_mm4 _mm4
+#define _k3d_name_xlat_mm5 _mm5
+#define _k3d_name_xlat_mm6 _mm6
+#define _k3d_name_xlat_mm7 _mm7
+#define _k3d_name_xlat_MM0 _mm0
+#define _k3d_name_xlat_MM1 _mm1
+#define _k3d_name_xlat_MM2 _mm2
+#define _k3d_name_xlat_MM3 _mm3
+#define _k3d_name_xlat_MM4 _mm4
+#define _k3d_name_xlat_MM5 _mm5
+#define _k3d_name_xlat_MM6 _mm6
+#define _k3d_name_xlat_MM7 _mm7
+#define _k3d_name_xlat_eax _eax
+#define _k3d_name_xlat_ebx _ebx
+#define _k3d_name_xlat_ecx _ecx
+#define _k3d_name_xlat_edx _edx
+#define _k3d_name_xlat_esi _esi
+#define _k3d_name_xlat_edi _edi
+#define _k3d_name_xlat_ebp _ebp
+#define _k3d_name_xlat_EAX _eax
+#define _k3d_name_xlat_EBX _ebx
+#define _k3d_name_xlat_ECX _ecx
+#define _k3d_name_xlat_EDX _edx
+#define _k3d_name_xlat_ESI _esi
+#define _k3d_name_xlat_EDI _edi
+#define _k3d_name_xlat_EBP _ebp
+#define _k3d_name_xlat__eax _eax
+#define _k3d_name_xlat__ebx _ebx
+#define _k3d_name_xlat__ecx _ecx
+#define _k3d_name_xlat__edx _edx
+#define _k3d_name_xlat__esi _esi
+#define _k3d_name_xlat__edi _edi
+#define _k3d_name_xlat__ebp _ebp
+#define _k3d_name_xlat__EAX _eax
+#define _k3d_name_xlat__EBX _ebx
+#define _k3d_name_xlat__ECX _ecx
+#define _k3d_name_xlat__EDX _edx
+#define _k3d_name_xlat__ESI _esi
+#define _k3d_name_xlat__EDI _edi
+#define _k3d_name_xlat__EBP _ebp
+
+#define _k3d_xglue3(a,b,c) a##b##c
+#define _k3d_glue3(a,b,c) _k3d_xglue3(a,b,c)
+#define _k3d_MODRM(dst, src) _k3d_glue3(_k3d,_k3d_name_xlat_##dst,_k3d_name_xlat_##src)
+
+/* Operand defines for prefetch and prefetchw */
+
+#define _k3d_pref_eax 0x00
+#define _k3d_pref_ecx 0x01
+#define _k3d_pref_edx 0x02
+#define _k3d_pref_ebx 0x03
+#define _k3d_pref_esi 0x06
+#define _k3d_pref_edi 0x07
+#define _k3d_pref_EAX 0x00
+#define _k3d_pref_ECX 0x01
+#define _k3d_pref_EDX 0x02
+#define _k3d_pref_EBX 0x03
+#define _k3d_pref_ESI 0x06
+#define _k3d_pref_EDI 0x07
+#define _k3d_prefw_eax 0x08
+#define _k3d_prefw_ecx 0x09
+#define _k3d_prefw_edx 0x0A
+#define _k3d_prefw_ebx 0x0B
+#define _k3d_prefw_esi 0x0E
+#define _k3d_prefw_edi 0x0F
+#define _k3d_prefw_EAX 0x08
+#define _k3d_prefw_ECX 0x09
+#define _k3d_prefw_EDX 0x0A
+#define _k3d_prefw_EBX 0x0B
+#define _k3d_prefw_ESI 0x0E
+#define _k3d_prefw_EDI 0x0F
+
+/* Defines for 3DNow! instructions */
+#define PF2ID(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x1d
+#define PFACC(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xae
+#define PFADD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9e
+#define PFCMPEQ(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb0
+#define PFCMPGE(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x90
+#define PFCMPGT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa0
+#define PFMAX(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa4
+#define PFMIN(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x94
+#define PFMUL(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb4
+#define PFRCP(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x96
+#define PFRCPIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa6
+#define PFRCPIT2(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb6
+#define PFRSQRT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x97
+#define PFRSQIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa7
+#define PFSUB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9a
+#define PFSUBR(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xaa
+#define PI2FD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x0d
+#define FEMMS db 0x0f, 0x0e
+#define PAVGUSB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xbf
+#define PMULHRW(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb7
+#define PREFETCH(src) db 0x0f, 0x0d, _k3d_pref_##src
+#define PREFETCHW(src) db 0x0f, 0x0d, _k3d_prefw_##src
+#define CPUID db 0x0f, 0xa2
+
+/* Defines for new, K7 opcodes */
+#define PFNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8a
+#define FPPNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8e
+#define PSWAPD(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0xbb
+#define PMINUB(dst,src) db 0x0f, 0xda, _k3d_MODRM(dst,src)
+#define PMAXUB(dst,src) db 0x0f, 0xde, _k3d_MODRM(dst,src)
+#define PMINSW(dst,src) db 0x0f, 0xea, _k3d_MODRM(dst,src)
+#define PMAXSW(dst,src) db 0x0f, 0xee, _k3d_MODRM(dst,src)
+#define PMULHUW(dst,src) db 0x0f, 0xe4, _k3d_MODRM(dst,src)
+#define PAVGB(dst,src) db 0x0f, 0xe0, _k3d_MODRM(dst,src)
+#define PAVGW(dst,src) db 0x0f, 0xe3, _k3d_MODRM(dst,src)
+#define PSADBW(dst,src) db 0x0f, 0xf6, _k3d_MODRM(dst,src)
+#define PMOVMSKB(dst,src) db 0x0f, 0xd7, _k3d_MODRM(dst,src)
+#define PMASKMOVQ(dst,src) db 0x0f, 0xf7, _k3d_MODRM(dst,src)
+#define PINSRW(dst,src,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src), msk
+#define PEXTRW(dst,src,msk) db 0x0f, 0xc5, _k3d_MODRM(dst,src), msk
+#define PSHUFW(dst,src,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src), msk
+#define MOVNTQ(dst,src) db 0x0f, 0xe7, _k3d_MODRM(src,dst)
+#define SFENCE db 0x0f, 0xae, 0xf8
+
+/* Memory/offset versions of the opcodes */
+#define PF2IDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x1d
+#define PFACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xae
+#define PFADDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9e
+#define PFCMPEQM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb0
+#define PFCMPGEM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x90
+#define PFCMPGTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa0
+#define PFMAXM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa4
+#define PFMINM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x94
+#define PFMULM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb4
+#define PFRCPM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x96
+#define PFRCPIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa6
+#define PFRCPIT2M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb6
+#define PFRSQRTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x97
+#define PFRSQIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa7
+#define PFSUBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9a
+#define PFSUBRM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xaa
+#define PI2FDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x0d
+#define PAVGUSBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbf
+#define PMULHRWM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb7
+
+
+/* Memory/offset versions of the new, K7 opcodes */
+#define PFNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8a
+#define FPPNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8e
+#define PSWAPDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbb
+#define PMINUBM(dst,src,off) db 0x0f, 0xda, _k3d_MODRM(dst,src) | 0x40, off
+#define PMAXUBM(dst,src,off) db 0x0f, 0xde, _k3d_MODRM(dst,src) | 0x40, off
+#define PMINSWM(dst,src,off) db 0x0f, 0xea, _k3d_MODRM(dst,src) | 0x40, off
+#define PMAXSWM(dst,src,off) db 0x0f, 0xee, _k3d_MODRM(dst,src) | 0x40, off
+#define PMULHUWM(dst,src,off) db 0x0f, 0xe4, _k3d_MODRM(dst,src) | 0x40, off
+#define PAVGBM(dst,src,off) db 0x0f, 0xe0, _k3d_MODRM(dst,src) | 0x40, off
+#define PAVGWM(dst,src,off) db 0x0f, 0xe3, _k3d_MODRM(dst,src) | 0x40, off
+#define PSADBWM(dst,src,off) db 0x0f, 0xf6, _k3d_MODRM(dst,src) | 0x40, off
+#define PMOVMSKBM(dst,src,off) db 0x0f, 0xd7, _k3d_MODRM(dst,src) | 0x40, off
+#define PMASKMOVQM(dst,src,off) db 0x0f, 0xf7, _k3d_MODRM(dst,src) | 0x40, off
+#define MOVNTQM(dst,src,off) db 0x0f, 0xe7, _k3d_MODRM(src,dst) | 0x40, off
+#define PINSRWM(dst,src,off,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src) | 0x40, off, msk
+#define PSHUFWM(dst,src,off,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src) | 0x40, off, msk
+
+
+/* Defines for 3DNow! instructions for use in pragmas */
+#define p_pf2id(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x1d
+#define p_pfacc(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xae
+#define p_pfadd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9e
+#define p_pfcmpeq(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb0
+#define p_pfcmpge(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x90
+#define p_pfcmpgt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa0
+#define p_pfmax(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa4
+#define p_pfmin(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x94
+#define p_pfmul(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb4
+#define p_pfrcp(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x96
+#define p_pfrcpit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa6
+#define p_pfrcpit2(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb6
+#define p_pfrsqrt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x97
+#define p_pfrsqit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa7
+#define p_pfsub(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9a
+#define p_pfsubr(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xaa
+#define p_pi2fd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x0d
+#define p_femms 0x0f 0x0e
+#define p_pavgusb(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xbf
+#define p_pmulhrw(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb7
+#define p_prefetch(src) 0x0f 0x0d _k3d_pref_##src
+#define p_prefetchw(src) 0x0f 0x0d _k3d_prefw_##src
+#define P_PFNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a
+#define P_FPPNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e
+#define P_PSWAPD(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb
+#define P_PMINUB(dst,src) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PMAXUB(dst,src) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PMINSW(dst,src) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PMAXSW(dst,src) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PMULHUW(dst,src) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PAVGB(dst,src) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PAVGW(dst,src) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PSADBW(dst,src) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PMOVMSKB(dst,src) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PMASKMOVQ(dst,src) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PINSRW(dst,src,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk
+#define P_PEXTRW(dst,src,msk) 0x0f 0xc5 (_k3d_MODRM(dst,src) | 0x40) off msk
+#define P_PSHUFW(dst,src,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk
+#define P_MOVNTQ(dst,src) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off
+
+#define P_PF2IDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x1d
+#define P_PFACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xae
+#define P_PFADDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9e
+#define P_PFCMPEQM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb0
+#define P_PFCMPGEM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x90
+#define P_PFCMPGTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa0
+#define P_PFMAXM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa4
+#define P_PFMINM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x94
+#define P_PFMULM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb4
+#define P_PFRCPM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x96
+#define P_PFRCPIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa6
+#define P_PFRCPIT2M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb6
+#define P_PFRSQRTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x97
+#define P_PFRSQIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa7
+#define P_PFSUBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9a
+#define P_PFSUBRM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xaa
+#define P_PI2FDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x0d
+#define P_PAVGUSBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbf
+#define P_PMULHRWM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb7
+#define P_PFNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a
+#define P_FPPNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e
+#define P_PSWAPDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb
+#define P_PMINUBM(dst,src,off) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PMAXUBM(dst,src,off) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PMINSWM(dst,src,off) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PMAXSWM(dst,src,off) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PMULHUWM(dst,src,off) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PAVGBM(dst,src,off) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PAVGWM(dst,src,off) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PSADBWM(dst,src,off) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PMOVMSKBM(dst,src,off) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off
+#define P_MOVNTQM(dst,src,off) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off
+#define P_PMASKMOVQM(dst,src,off) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off
+#define P_PINSRWM(dst,src,off,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk
+#define P_PSHUFWM(dst,src,off,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk
+
+
+#define P_PF2ID(dst,src) p_pf2id(dst,src)
+#define P_PFACC(dst,src) p_pfacc(dst,src)
+#define P_PFADD(dst,src) p_pfadd(dst,src)
+#define P_PFCMPEQ(dst,src) p_pfcmpeq(dst,src)
+#define P_PFCMPGE(dst,src) p_pfcmpge(dst,src)
+#define P_PFCMPGT(dst,src) p_pfcmpgt(dst,src)
+#define P_PFMAX(dst,src) p_pfmax(dst,src)
+#define P_PFMIN(dst,src) p_pfmin(dst,src)
+#define P_PFMUL(dst,src) p_pfmul(dst,src)
+#define P_PFRCP(dst,src) p_pfrcp(dst,src)
+#define P_PFRCPIT1(dst,src) p_pfrcpit1(dst,src)
+#define P_PFRCPIT2(dst,src) p_pfrcpit2(dst,src)
+#define P_PFRSQRT(dst,src) p_pfrsqrt(dst,src)
+#define P_PFRSQIT1(dst,src) p_pfrsqit1(dst,src)
+#define P_PFSUB(dst,src) p_pfsub(dst,src)
+#define P_PFSUBR(dst,src) p_pfsubr(dst,src)
+#define P_PI2FD(dst,src) p_pi2fd(dst,src)
+#define P_FEMMS p_femms
+#define P_PAVGUSB(dst,src) p_pavgusb(dst,src)
+#define P_PMULHRW(dst,src) p_pmulhrw(dst,src)
+#define P_PREFETCH(src) p_prefetch(src)
+#define P_PREFETCHW(src) p_prefetchw(src)
+#define p_CPUID 0x0f 0xa2
+#define p_pf2idm(dst,src,off) P_PF2IDM(dst,src,off)
+#define p_pfaccm(dst,src,off) P_PFACCM(dst,src,off)
+#define p_pfaddm(dst,src,off) P_PFADDM(dst,src,off)
+#define p_pfcmpeqm(dst,src,off) P_PFCMPEQM(dst,src,off)
+#define p_pfcmpgem(dst,src,off) P_PFCMPGEM(dst,src,off)
+#define p_pfcmpgtm(dst,src,off) P_PFCMPGTM(dst,src,off)
+#define p_pfmaxm(dst,src,off) P_PFMAXM(dst,src,off)
+#define p_pfminm(dst,src,off) P_PFMINM(dst,src,off)
+#define p_pfmulm(dst,src,off) P_PFMULM(dst,src,off)
+#define p_pfrcpm(dst,src,off) P_PFRCPM(dst,src,off)
+#define p_pfrcpit1m(dst,src,off) P_PFRCPIT1M(dst,src,off)
+#define p_pfrcpit2m(dst,src,off) P_PFRCPIT2M(dst,src,off)
+#define p_pfrsqrtm(dst,src,off) P_PFRSQRTM(dst,src,off)
+#define p_pfrsqit1m(dst,src,off) P_PFRSQIT1M(dst,src,off)
+#define p_pfsubm(dst,src,off) P_PFSUBM(dst,src,off)
+#define p_pfsubrm(dst,src,off) P_PFSUBRM(dst,src,off)
+#define p_pi2fdm(dst,src,off) P_PI2FDM(dst,src,off)
+#define p_pavgusbm(dst,src,off) P_PAVGUSBM(dst,src,off)
+#define p_pmulhrwm(dst,src,off) P_PMULHRWM(dst,src,off)
+
+#define P_PFNACC(dst,src) p_pfnacc(dst,src)
+#define P_FPPNACC(dst,src) p_pfpnacc(dst,src)
+#define P_PSWAPD(dst,src) p_pswapd(dst,src)
+#define P_PMINUB(dst,src) p_pminub(dst,src)
+#define P_PMAXUB(dst,src) p_pmaxub(dst,src)
+#define P_PMINSW(dst,src) p_pminsw(dst,src)
+#define P_PMAXSW(dst,src) p_pmaxsw(dst,src)
+#define P_PMULHUW(dst,src) p_pmulhuw(dst,src)
+#define P_PAVGB(dst,src) p_pavgb(dst,src)
+#define P_PAVGW(dst,src) p_avgw(dst,src)
+#define P_PSADBW(dst,src) p_psadbw(dst,src)
+#define P_PMOVMSKB(dst,src) p_pmovmskb(dst,src)
+#define P_PMASKMOVQ(dst,src) p_pmaskmovq(dst,src)
+#define P_PINSRW(dst,src,msk) p_pinsrw(dst,src)
+#define P_PEXTRW(dst,src,msk) p_pextrw(dst,src)
+#define P_PSHUFW(dst,src,msk) p_pshufw(dst,src)
+#define P_MOVNTQ(dst,src) p_movntq(dst,src)
+
+#define P_PFNACCM(dst,src,off) p_pfnaccm(dst,src,off)
+#define P_FPPNACCM(dst,src,off) p_pfpnaccm(dst,src,off)
+#define P_PSWAPDM(dst,src,off) p_pswapdm(dst,src,off)
+#define P_PMINUBM(dst,src,off) p_pminubm(dst,src,off)
+#define P_PMAXUBM(dst,src,off) p_pmaxubm(dst,src,off)
+#define P_PMINSWM(dst,src,off) p_pminswm(dst,src,off)
+#define P_PMAXSWM(dst,src,off) p_pmaxswm(dst,src,off)
+#define P_PMULHUWM(dst,src,off) p_pmulhuwm(dst,src,off)
+#define P_PAVGBM(dst,src,off) p_pavgbm(dst,src,off)
+#define P_PAVGWM(dst,src,off) p_avgwm(dst,src,off)
+#define P_PSADBWM(dst,src,off) p_psadbwm(dst,src,off)
+#define P_PMOVMSKBM(dst,src,off) p_pmovmskbm(dst,src,off)
+#define P_PMASKMOVQM(dst,src,off) p_pmaskmovqm(dst,src,off)
+#define P_PINSRWM(dst,src,off,msk) p_pinsrwm(dst,src,off,msk)
+#define P_PSHUFWM(dst,src,off,msk) p_pshufwm(dst,src,off,msk)
+#define P_MOVNTQM(dst,src,off) p_movntqm(dst,src,off)
+
+#elif defined (_MSC_VER) && !defined (__MWERKS__)
+// The Microsoft Visual C++ version of the 3DNow! macros.
+
+// Stop the "no EMMS" warning, since it doesn't detect FEMMS properly
+#pragma warning(disable:4799)
+
+// Defines for operands.
+#define _K3D_MM0 0xc0
+#define _K3D_MM1 0xc1
+#define _K3D_MM2 0xc2
+#define _K3D_MM3 0xc3
+#define _K3D_MM4 0xc4
+#define _K3D_MM5 0xc5
+#define _K3D_MM6 0xc6
+#define _K3D_MM7 0xc7
+#define _K3D_mm0 0xc0
+#define _K3D_mm1 0xc1
+#define _K3D_mm2 0xc2
+#define _K3D_mm3 0xc3
+#define _K3D_mm4 0xc4
+#define _K3D_mm5 0xc5
+#define _K3D_mm6 0xc6
+#define _K3D_mm7 0xc7
+#define _K3D_EAX 0x00
+#define _K3D_ECX 0x01
+#define _K3D_EDX 0x02
+#define _K3D_EBX 0x03
+#define _K3D_ESI 0x06
+#define _K3D_EDI 0x07
+#define _K3D_eax 0x00
+#define _K3D_ecx 0x01
+#define _K3D_edx 0x02
+#define _K3D_ebx 0x03
+#define _K3D_esi 0x06
+#define _K3D_edi 0x07
+
+// These defines are for compatibility with the previous version of the header file.
+#define _K3D_M0 0xc0
+#define _K3D_M1 0xc1
+#define _K3D_M2 0xc2
+#define _K3D_M3 0xc3
+#define _K3D_M4 0xc4
+#define _K3D_M5 0xc5
+#define _K3D_M6 0xc6
+#define _K3D_M7 0xc7
+#define _K3D_m0 0xc0
+#define _K3D_m1 0xc1
+#define _K3D_m2 0xc2
+#define _K3D_m3 0xc3
+#define _K3D_m4 0xc4
+#define _K3D_m5 0xc5
+#define _K3D_m6 0xc6
+#define _K3D_m7 0xc7
+#define _K3D__EAX 0x00
+#define _K3D__ECX 0x01
+#define _K3D__EDX 0x02
+#define _K3D__EBX 0x03
+#define _K3D__ESI 0x06
+#define _K3D__EDI 0x07
+#define _K3D__eax 0x00
+#define _K3D__ecx 0x01
+#define _K3D__edx 0x02
+#define _K3D__ebx 0x03
+#define _K3D__esi 0x06
+#define _K3D__edi 0x07
+
+// General 3DNow! instruction format that is supported by
+// these macros. Note that only the most basic form of memory
+// operands are supported by these macros.
+
+#define InjK3DOps(dst,src,inst) \
+{ \
+ _asm _emit 0x0f \
+ _asm _emit 0x0f \
+ _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \
+ _asm _emit _3DNowOpcode##inst \
+}
+
+#define InjK3DMOps(dst,src,off,inst) \
+{ \
+ _asm _emit 0x0f \
+ _asm _emit 0x0f \
+ _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \
+ _asm _emit off \
+ _asm _emit _3DNowOpcode##inst \
+}
+
+#define InjMMXOps(dst,src,inst) \
+{ \
+ _asm _emit 0x0f \
+ _asm _emit _3DNowOpcode##inst \
+ _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \
+}
+
+#define InjMMXMOps(dst,src,off,inst) \
+{ \
+ _asm _emit 0x0f \
+ _asm _emit _3DNowOpcode##inst \
+ _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \
+ _asm _emit off \
+}
+
+#define _3DNowOpcodePF2ID 0x1d
+#define _3DNowOpcodePFACC 0xae
+#define _3DNowOpcodePFADD 0x9e
+#define _3DNowOpcodePFCMPEQ 0xb0
+#define _3DNowOpcodePFCMPGE 0x90
+#define _3DNowOpcodePFCMPGT 0xa0
+#define _3DNowOpcodePFMAX 0xa4
+#define _3DNowOpcodePFMIN 0x94
+#define _3DNowOpcodePFMUL 0xb4
+#define _3DNowOpcodePFRCP 0x96
+#define _3DNowOpcodePFRCPIT1 0xa6
+#define _3DNowOpcodePFRCPIT2 0xb6
+#define _3DNowOpcodePFRSQRT 0x97
+#define _3DNowOpcodePFRSQIT1 0xa7
+#define _3DNowOpcodePFSUB 0x9a
+#define _3DNowOpcodePFSUBR 0xaa
+#define _3DNowOpcodePI2FD 0x0d
+#define _3DNowOpcodePAVGUSB 0xbf
+#define _3DNowOpcodePMULHRW 0xb7
+#define _3DNowOpcodePFNACC 0x8a
+#define _3DNowOpcodeFPPNACC 0x8e
+#define _3DNowOpcodePSWAPD 0xbb
+#define _3DNowOpcodePMINUB 0xda
+#define _3DNowOpcodePMAXUB 0xde
+#define _3DNowOpcodePMINSW 0xea
+#define _3DNowOpcodePMAXSW 0xee
+#define _3DNowOpcodePMULHUW 0xe4
+#define _3DNowOpcodePAVGB 0xe0
+#define _3DNowOpcodePAVGW 0xe3
+#define _3DNowOpcodePSADBW 0xf6
+#define _3DNowOpcodePMOVMSKB 0xd7
+#define _3DNowOpcodePMASKMOVQ 0xf7
+#define _3DNowOpcodePINSRW 0xc4
+#define _3DNowOpcodePEXTRW 0xc5
+#define _3DNowOpcodePSHUFW 0x70
+#define _3DNowOpcodeMOVNTQ 0xe7
+#define _3DNowOpcodePREFETCHT 0x18
+
+
+#define PF2ID(dst,src) InjK3DOps(dst, src, PF2ID)
+#define PFACC(dst,src) InjK3DOps(dst, src, PFACC)
+#define PFADD(dst,src) InjK3DOps(dst, src, PFADD)
+#define PFCMPEQ(dst,src) InjK3DOps(dst, src, PFCMPEQ)
+#define PFCMPGE(dst,src) InjK3DOps(dst, src, PFCMPGE)
+#define PFCMPGT(dst,src) InjK3DOps(dst, src, PFCMPGT)
+#define PFMAX(dst,src) InjK3DOps(dst, src, PFMAX)
+#define PFMIN(dst,src) InjK3DOps(dst, src, PFMIN)
+#define PFMUL(dst,src) InjK3DOps(dst, src, PFMUL)
+#define PFRCP(dst,src) InjK3DOps(dst, src, PFRCP)
+#define PFRCPIT1(dst,src) InjK3DOps(dst, src, PFRCPIT1)
+#define PFRCPIT2(dst,src) InjK3DOps(dst, src, PFRCPIT2)
+#define PFRSQRT(dst,src) InjK3DOps(dst, src, PFRSQRT)
+#define PFRSQIT1(dst,src) InjK3DOps(dst, src, PFRSQIT1)
+#define PFSUB(dst,src) InjK3DOps(dst, src, PFSUB)
+#define PFSUBR(dst,src) InjK3DOps(dst, src, PFSUBR)
+#define PI2FD(dst,src) InjK3DOps(dst, src, PI2FD)
+#define PAVGUSB(dst,src) InjK3DOps(dst, src, PAVGUSB)
+#define PMULHRW(dst,src) InjK3DOps(dst, src, PMULHRW)
+
+#define FEMMS \
+{ \
+ _asm _emit 0x0f \
+ _asm _emit 0x0e \
+}
+
+#define PREFETCH(src) \
+{ \
+ _asm _emit 0x0f \
+ _asm _emit 0x0d \
+ _asm _emit (_K3D_##src & 0x07) \
+}
+
+/* Prefetch with a short offset, < 127 or > -127
+ Carefull! Doesn't check for your offset being
+ in range. */
+
+#define PREFETCHM(src,off) \
+{ \
+ _asm _emit 0x0f \
+ _asm _emit 0x0d \
+ _asm _emit (0x40 | (_K3D_##src & 0x07)) \
+ _asm _emit off \
+}
+
+/* Prefetch with a long offset */
+
+#define PREFETCHMLONG(src,off) \
+{ \
+ _asm _emit 0x0f \
+ _asm _emit 0x0d \
+ _asm _emit (0x80 | (_K3D_##src & 0x07)) \
+ _asm _emit (off & 0x000000ff) \
+ _asm _emit (off & 0x0000ff00) >> 8 \
+ _asm _emit (off & 0x00ff0000) >> 16 \
+ _asm _emit (off & 0xff000000) >> 24 \
+}
+
+#define PREFETCHW(src) \
+{ \
+ _asm _emit 0x0f \
+ _asm _emit 0x0d \
+ _asm _emit (0x08 | (_K3D_##src & 0x07)) \
+}
+
+#define PREFETCHWM(src,off) \
+{ \
+ _asm _emit 0x0f \
+ _asm _emit 0x0d \
+ _asm _emit 0x48 | (_K3D_##src & 0x07) \
+ _asm _emit off \
+}
+
+#define PREFETCHWMLONG(src,off) \
+{ \
+ _asm _emit 0x0f \
+ _asm _emit 0x0d \
+ _asm _emit 0x88 | (_K3D_##src & 0x07) \
+ _asm _emit (off & 0x000000ff) \
+ _asm _emit (off & 0x0000ff00) >> 8 \
+ _asm _emit (off & 0x00ff0000) >> 16 \
+ _asm _emit (off & 0xff000000) >> 24 \
+}
+
+#define CPUID \
+{ \
+ _asm _emit 0x0f \
+ _asm _emit 0xa2 \
+}
+
+
+/* Defines for new, K7 opcodes */
+#define SFENCE \
+{ \
+ _asm _emit 0x0f \
+ _asm _emit 0xae \
+ _asm _emit 0xf8 \
+}
+
+#define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC)
+#define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC)
+#define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD)
+#define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB)
+#define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB)
+#define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW)
+#define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW)
+#define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW)
+#define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB)
+#define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW)
+#define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW)
+#define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB)
+#define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ)
+#define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) _asm _emit msk
+#define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) _asm _emit msk
+#define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) _asm _emit msk
+#define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ)
+#define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT)
+#define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT)
+#define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT)
+#define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT)
+
+
+/* Memory/offset versions of the opcodes */
+#define PAVGUSBM(dst,src,off) InjK3DMOps(dst,src,off,PAVGUSB)
+#define PF2IDM(dst,src,off) InjK3DMOps(dst,src,off,PF2ID)
+#define PFACCM(dst,src,off) InjK3DMOps(dst,src,off,PFACC)
+#define PFADDM(dst,src,off) InjK3DMOps(dst,src,off,PFADD)
+#define PFCMPEQM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPEQ)
+#define PFCMPGEM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGE)
+#define PFCMPGTM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGT)
+#define PFMAXM(dst,src,off) InjK3DMOps(dst,src,off,PFMAX)
+#define PFMINM(dst,src,off) InjK3DMOps(dst,src,off,PFMIN)
+#define PFMULM(dst,src,off) InjK3DMOps(dst,src,off,PFMUL)
+#define PFRCPM(dst,src,off) InjK3DMOps(dst,src,off,PFRCP)
+#define PFRCPIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT1)
+#define PFRCPIT2M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT2)
+#define PFRSQRTM(dst,src,off) InjK3DMOps(dst,src,off,PFRSQRT)
+#define PFRSQIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRSQIT1)
+#define PFSUBM(dst,src,off) InjK3DMOps(dst,src,off,PFSUB)
+#define PFSUBRM(dst,src,off) InjK3DMOps(dst,src,off,PFSUBR)
+#define PI2FDM(dst,src,off) InjK3DMOps(dst,src,off,PI2FD)
+#define PMULHRWM(dst,src,off) InjK3DMOps(dst,src,off,PMULHRW)
+
+
+/* Memory/offset versions of the K7 opcodes */
+#define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC)
+#define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC)
+#define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD)
+#define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB)
+#define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB)
+#define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW)
+#define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW)
+#define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW)
+#define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB)
+#define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW)
+#define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW)
+#define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB)
+#define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ)
+#define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW) _asm _emit msk
+#define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW) _asm _emit msk
+#define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ)
+#define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT)
+#define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT)
+#define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT)
+#define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT)
+
+
+#else
+
+/* Assume built-in support for 3DNow! opcodes, replace macros with opcodes */
+#define PAVGUSB(dst,src) pavgusb dst,src
+#define PF2ID(dst,src) pf2id dst,src
+#define PFACC(dst,src) pfacc dst,src
+#define PFADD(dst,src) pfadd dst,src
+#define PFCMPEQ(dst,src) pfcmpeq dst,src
+#define PFCMPGE(dst,src) pfcmpge dst,src
+#define PFCMPGT(dst,src) pfcmpgt dst,src
+#define PFMAX(dst,src) pfmax dst,src
+#define PFMIN(dst,src) pfmin dst,src
+#define PFMUL(dst,src) pfmul dst,src
+#define PFRCP(dst,src) pfrcp dst,src
+#define PFRCPIT1(dst,src) pfrcpit1 dst,src
+#define PFRCPIT2(dst,src) pfrcpit2 dst,src
+#define PFRSQRT(dst,src) pfrsqrt dst,src
+#define PFRSQIT1(dst,src) pfrsqit1 dst,src
+#define PFSUB(dst,src) pfsub dst,src
+#define PFSUBR(dst,src) pfsubr dst,src
+#define PI2FD(dst,src) pi2fd dst,src
+#define PMULHRW(dst,src) pmulhrw dst,src
+#define PREFETCH(src) prefetch src
+#define PREFETCHW(src) prefetchw src
+
+#define PAVGUSBM(dst,src,off) pavgusb dst,[src+off]
+#define PF2IDM(dst,src,off) PF2ID dst,[src+off]
+#define PFACCM(dst,src,off) PFACC dst,[src+off]
+#define PFADDM(dst,src,off) PFADD dst,[src+off]
+#define PFCMPEQM(dst,src,off) PFCMPEQ dst,[src+off]
+#define PFCMPGEM(dst,src,off) PFCMPGE dst,[src+off]
+#define PFCMPGTM(dst,src,off) PFCMPGT dst,[src+off]
+#define PFMAXM(dst,src,off) PFMAX dst,[src+off]
+#define PFMINM(dst,src,off) PFMIN dst,[src+off]
+#define PFMULM(dst,src,off) PFMUL dst,[src+off]
+#define PFRCPM(dst,src,off) PFRCP dst,[src+off]
+#define PFRCPIT1M(dst,src,off) PFRCPIT1 dst,[src+off]
+#define PFRCPIT2M(dst,src,off) PFRCPIT2 dst,[src+off]
+#define PFRSQRTM(dst,src,off) PFRSQRT dst,[src+off]
+#define PFRSQIT1M(dst,src,off) PFRSQIT1 dst,[src+off]
+#define PFSUBM(dst,src,off) PFSUB dst,[src+off]
+#define PFSUBRM(dst,src,off) PFSUBR dst,[src+off]
+#define PI2FDM(dst,src,off) PI2FD dst,[src+off]
+#define PMULHRWM(dst,src,off) PMULHRW dst,[src+off]
+
+
+#if defined (__MWERKS__)
+// At the moment, CodeWarrior does not support these opcodes, so hand-assemble them
+
+// Defines for operands.
+#define _K3D_MM0 0xc0
+#define _K3D_MM1 0xc1
+#define _K3D_MM2 0xc2
+#define _K3D_MM3 0xc3
+#define _K3D_MM4 0xc4
+#define _K3D_MM5 0xc5
+#define _K3D_MM6 0xc6
+#define _K3D_MM7 0xc7
+#define _K3D_mm0 0xc0
+#define _K3D_mm1 0xc1
+#define _K3D_mm2 0xc2
+#define _K3D_mm3 0xc3
+#define _K3D_mm4 0xc4
+#define _K3D_mm5 0xc5
+#define _K3D_mm6 0xc6
+#define _K3D_mm7 0xc7
+#define _K3D_EAX 0x00
+#define _K3D_ECX 0x01
+#define _K3D_EDX 0x02
+#define _K3D_EBX 0x03
+#define _K3D_ESI 0x06
+#define _K3D_EDI 0x07
+#define _K3D_eax 0x00
+#define _K3D_ecx 0x01
+#define _K3D_edx 0x02
+#define _K3D_ebx 0x03
+#define _K3D_esi 0x06
+#define _K3D_edi 0x07
+#define _K3D_EAX 0x00
+#define _K3D_ECX 0x01
+#define _K3D_EDX 0x02
+#define _K3D_EBX 0x03
+#define _K3D_ESI 0x06
+#define _K3D_EDI 0x07
+#define _K3D_eax 0x00
+#define _K3D_ecx 0x01
+#define _K3D_edx 0x02
+#define _K3D_ebx 0x03
+#define _K3D_esi 0x06
+#define _K3D_edi 0x07
+
+#define InjK3DOps(dst,src,inst) \
+ db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src), _3DNowOpcode##inst
+
+#define InjK3DMOps(dst,src,off,inst) \
+ db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off, _3DNowOpcode##inst
+
+#define InjMMXOps(dst,src,inst) \
+ db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src)
+
+#define InjMMXMOps(dst,src,off,inst) \
+ db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off
+
+#define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC)
+#define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC)
+#define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD)
+#define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB)
+#define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB)
+#define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW)
+#define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW)
+#define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW)
+#define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB)
+#define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW)
+#define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW)
+#define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB)
+#define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ)
+#define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) db msk
+#define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) db msk
+#define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) db msk
+#define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ)
+#define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT)
+#define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT)
+#define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT)
+#define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT)
+
+
+/* Memory/offset versions of the K7 opcodes */
+#define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC)
+#define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC)
+#define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD)
+#define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB)
+#define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB)
+#define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW)
+#define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW)
+#define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW)
+#define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB)
+#define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW)
+#define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW)
+#define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB)
+#define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ)
+#define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW), msk
+#define PEXTRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PEXTRW), msk
+#define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW), msk
+#define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ)
+#define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT)
+#define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT)
+#define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT)
+#define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT)
+
+
+#else
+
+#define PFNACC(dst,src) PFNACC dst,src
+#define PFPNACC(dst,src) PFPNACC dst,src
+#define PSWAPD(dst,src) PSWAPD dst,src
+#define PMINUB(dst,src) PMINUB dst,src
+#define PMAXUB(dst,src) PMAXUB dst,src
+#define PMINSW(dst,src) PMINSW dst,src
+#define PMAXSW(dst,src) PMAXSW dst,src
+#define PMULHUW(dst,src) PMULHUW dst,src
+#define PAVGB(dst,src) PAVGB dst,src
+#define PAVGW(dst,src) PAVGW dst,src
+#define PSADBW(dst,src) PSADBW dst,src
+#define PMOVMSKB(dst,src) PMOVMSKB dst,src
+#define PMASKMOVQ(dst,src) PMASKMOVQ dst,src
+#define PINSRW(dst,src,msk) PINSRW dst,src,msk
+#define PEXTRW(dst,src,msk) PEXTRW dst,src,msk
+#define PSHUFW(dst,src,msk) PSHUFW dst,src,msk
+#define MOVNTQ(dst,src) MOVNTQ dst,src
+
+#define PFNACCM(dst,src,off) PFNACC dst,[src+off]
+#define PFPNACCM(dst,src,off) PFPNACC dst,[src+off]
+#define PSWAPDM(dst,src,off) PSWAPD dst,[src+off]
+#define PMINUBM(dst,src,off) PMINUB dst,[src+off]
+#define PMAXUBM(dst,src,off) PMAXUB dst,[src+off]
+#define PMINSWM(dst,src,off) PMINSW dst,[src+off]
+#define PMAXSWM(dst,src,off) PMAXSW dst,[src+off]
+#define PMULHUWM(dst,src,off) PMULHUW dst,[src+off]
+#define PAVGBM(dst,src,off) PAVGB dst,[src+off]
+#define PAVGWM(dst,src,off) PAVGW dst,[src+off]
+#define PSADBWM(dst,src,off) PSADBW dst,[src+off]
+#define PMOVMSKBM(dst,src,off) PMOVMSKB dst,[src+off]
+#define PMASKMOVQM(dst,src,off) PMASKMOVQ dst,[src+off]
+#define PINSRWM(dst,src,off,msk) PINSRW dst,[src+off],msk
+#define PEXTRWM(dst,src,off,msk) PEXTRW dst,[src+off],msk
+#define PSHUFWM(dst,src,off,msk) PSHUFW dst,[src+off],msk
+#define MOVNTQM(dst,src,off) MOVNTQ dst,[src+off]
+
+#endif
+
+#endif
+
+/* Just to deal with lower case. */
+#define pf2id(dst,src) PF2ID(dst,src)
+#define pfacc(dst,src) PFACC(dst,src)
+#define pfadd(dst,src) PFADD(dst,src)
+#define pfcmpeq(dst,src) PFCMPEQ(dst,src)
+#define pfcmpge(dst,src) PFCMPGE(dst,src)
+#define pfcmpgt(dst,src) PFCMPGT(dst,src)
+#define pfmax(dst,src) PFMAX(dst,src)
+#define pfmin(dst,src) PFMIN(dst,src)
+#define pfmul(dst,src) PFMUL(dst,src)
+#define pfrcp(dst,src) PFRCP(dst,src)
+#define pfrcpit1(dst,src) PFRCPIT1(dst,src)
+#define pfrcpit2(dst,src) PFRCPIT2(dst,src)
+#define pfrsqrt(dst,src) PFRSQRT(dst,src)
+#define pfrsqit1(dst,src) PFRSQIT1(dst,src)
+#define pfsub(dst,src) PFSUB(dst,src)
+#define pfsubr(dst,src) PFSUBR(dst,src)
+#define pi2fd(dst,src) PI2FD(dst,src)
+#define femms FEMMS
+#define pavgusb(dst,src) PAVGUSB(dst,src)
+#define pmulhrw(dst,src) PMULHRW(dst,src)
+#define prefetch(src) PREFETCH(src)
+#define prefetchw(src) PREFETCHW(src)
+
+#define prefetchm(src,off) PREFETCHM(src,off)
+#define prefetchmlong(src,off) PREFETCHMLONG(src,off)
+#define prefetchwm(src,off) PREFETCHWM(src,off)
+#define prefetchwmlong(src,off) PREFETCHWMLONG(src,off)
+
+#define pfnacc(dst,src) PFNACC(dst,src)
+#define pfpnacc(dst,src) PFPNACC(dst,src)
+#define pswapd(dst,src) PSWAPD(dst,src)
+#define pminub(dst,src) PMINUB(dst,src)
+#define pmaxub(dst,src) PMAXUB(dst,src)
+#define pminsw(dst,src) PMINSW(dst,src)
+#define pmaxsw(dst,src) PMAXSW(dst,src)
+#define pmulhuw(dst,src) PMULHUW(dst,src)
+#define pavgb(dst,src) PAVGB(dst,src)
+#define pavgw(dst,src) PAVGW(dst,src)
+#define psadbw(dst,src) PSADBW(dst,src)
+#define pmovmskb(dst,src) PMOVMSKB(dst,src)
+#define pmaskmovq(dst,src) PMASKMOVQ(dst,src)
+#define pinsrw(dst,src,msk) PINSRW(dst,src,msk)
+#define pextrw(dst,src,msk) PEXTRW(dst,src,msk)
+#define pshufw(dst,src,msk) PSHUFW(dst,src,msk)
+#define movntq(dst,src) MOVNTQ(dst,src)
+#define prefetchnta(mem) PREFETCHNTA(mem)
+#define prefetcht0(mem) PREFETCHT0(mem)
+#define prefetcht1(mem) PREFETCHT1(mem)
+#define prefetcht2(mem) PREFETCHT2(mem)
+
+
+#define pavgusbm(dst,src,off) PAVGUSBM(dst,src,off)
+#define pf2idm(dst,src,off) PF2IDM(dst,src,off)
+#define pfaccm(dst,src,off) PFACCM(dst,src,off)
+#define pfaddm(dst,src,off) PFADDM(dst,src,off)
+#define pfcmpeqm(dst,src,off) PFCMPEQM(dst,src,off)
+#define pfcmpgem(dst,src,off) PFCMPGEM(dst,src,off)
+#define pfcmpgtm(dst,src,off) PFCMPGTM(dst,src,off)
+#define pfmaxm(dst,src,off) PFMAXM(dst,src,off)
+#define pfminm(dst,src,off) PFMINM(dst,src,off)
+#define pfmulm(dst,src,off) PFMULM(dst,src,off)
+#define pfrcpm(dst,src,off) PFRCPM(dst,src,off)
+#define pfrcpit1m(dst,src,off) PFRCPIT1M(dst,src,off)
+#define pfrcpit2m(dst,src,off) PFRCPIT2M(dst,src,off)
+#define pfrsqrtm(dst,src,off) PFRSQRTM(dst,src,off)
+#define pfrsqit1m(dst,src,off) PFRSQIT1M(dst,src,off)
+#define pfsubm(dst,src,off) PFSUBM(dst,src,off)
+#define pfsubrm(dst,src,off) PFSUBRM(dst,src,off)
+#define pi2fdm(dst,src,off) PI2FDM(dst,src,off)
+#define pmulhrwm(dst,src,off) PMULHRWM(dst,src,off)
+#define cpuid CPUID
+#define sfence SFENCE
+
+#define pfnaccm(dst,src,off) PFNACCM(dst,src,off)
+#define pfpnaccm(dst,src,off) PFPNACCM(dst,src,off)
+#define pswapdm(dst,src,off) PSWAPDM(dst,src,off)
+#define pminubm(dst,src,off) PMINUBM(dst,src,off)
+#define pmaxubm(dst,src,off) PMAXUBM(dst,src,off)
+#define pminswm(dst,src,off) PMINSWM(dst,src,off)
+#define pmaxswm(dst,src,off) PMAXSWM(dst,src,off)
+#define pmulhuwm(dst,src,off) PMULHUWM(dst,src,off)
+#define pavgbm(dst,src,off) PAVGBM(dst,src,off)
+#define pavgwm(dst,src,off) PAVGWM(dst,src,off)
+#define psadbwm(dst,src,off) PSADBWM(dst,src,off)
+#define pmovmskbm(dst,src,off) PMOVMSKBM(dst,src,off)
+#define pmaskmovqm(dst,src,off) PMASKMOVQM(dst,src,off)
+#define pinsrwm(dst,src,off,msk) PINSRWM(dst,src,off,msk)
+#define pextrwm(dst,src,off,msk) PEXTRWM(dst,src,off,msk)
+#define pshufwm(dst,src,off,msk) PSHUFWM(dst,src,off,msk)
+#define movntqm(dst,src,off) MOVNTQM(dst,src,off)
+#define prefetchntam(mem,off) PREFETCHNTA(mem,off)
+#define prefetcht0m(mem,off) PREFETCHT0(mem,off)
+#define prefetcht1m(mem,off) PREFETCHT1(mem,off)
+#define prefetcht2m(mem,off) PREFETCHT2(mem,off)
+
+#endif
diff --git a/mp/src/public/mathlib/anorms.h b/mp/src/public/mathlib/anorms.h
index ae759eb1..4f653835 100644
--- a/mp/src/public/mathlib/anorms.h
+++ b/mp/src/public/mathlib/anorms.h
@@ -1,25 +1,25 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-//=============================================================================//
-
-#ifndef ANORMS_H
-#define ANORMS_H
-#ifdef _WIN32
-#pragma once
-#endif
-
-
-#include "mathlib/vector.h"
-
-
-#define NUMVERTEXNORMALS 162
-
-// the angle between consecutive g_anorms[] vectors is ~14.55 degrees
-#define VERTEXNORMAL_CONE_INNER_ANGLE DEG2RAD(7.275)
-
-extern Vector g_anorms[NUMVERTEXNORMALS];
-
-
-#endif // ANORMS_H
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+//=============================================================================//
+
+#ifndef ANORMS_H
+#define ANORMS_H
+#ifdef _WIN32
+#pragma once
+#endif
+
+
+#include "mathlib/vector.h"
+
+
+#define NUMVERTEXNORMALS 162
+
+// the angle between consecutive g_anorms[] vectors is ~14.55 degrees
+#define VERTEXNORMAL_CONE_INNER_ANGLE DEG2RAD(7.275)
+
+extern Vector g_anorms[NUMVERTEXNORMALS];
+
+
+#endif // ANORMS_H
diff --git a/mp/src/public/mathlib/bumpvects.h b/mp/src/public/mathlib/bumpvects.h
index e0ba73fb..6939ca05 100644
--- a/mp/src/public/mathlib/bumpvects.h
+++ b/mp/src/public/mathlib/bumpvects.h
@@ -1,37 +1,37 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $Workfile: $
-// $Date: $
-// $NoKeywords: $
-//=============================================================================//
-
-#ifndef BUMPVECTS_H
-#define BUMPVECTS_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include "mathlib/mathlib.h"
-
-#define OO_SQRT_2 0.70710676908493042f
-#define OO_SQRT_3 0.57735025882720947f
-#define OO_SQRT_6 0.40824821591377258f
-// sqrt( 2 / 3 )
-#define OO_SQRT_2_OVER_3 0.81649661064147949f
-
-#define NUM_BUMP_VECTS 3
-
-const TableVector g_localBumpBasis[NUM_BUMP_VECTS] =
-{
- { OO_SQRT_2_OVER_3, 0.0f, OO_SQRT_3 },
- { -OO_SQRT_6, OO_SQRT_2, OO_SQRT_3 },
- { -OO_SQRT_6, -OO_SQRT_2, OO_SQRT_3 }
-};
-
-void GetBumpNormals( const Vector& sVect, const Vector& tVect, const Vector& flatNormal,
- const Vector& phongNormal, Vector bumpNormals[NUM_BUMP_VECTS] );
-
-#endif // BUMPVECTS_H
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+// $Workfile: $
+// $Date: $
+// $NoKeywords: $
+//=============================================================================//
+
+#ifndef BUMPVECTS_H
+#define BUMPVECTS_H
+
+#ifdef _WIN32
+#pragma once
+#endif
+
+#include "mathlib/mathlib.h"
+
+#define OO_SQRT_2 0.70710676908493042f
+#define OO_SQRT_3 0.57735025882720947f
+#define OO_SQRT_6 0.40824821591377258f
+// sqrt( 2 / 3 )
+#define OO_SQRT_2_OVER_3 0.81649661064147949f
+
+#define NUM_BUMP_VECTS 3
+
+const TableVector g_localBumpBasis[NUM_BUMP_VECTS] =
+{
+ { OO_SQRT_2_OVER_3, 0.0f, OO_SQRT_3 },
+ { -OO_SQRT_6, OO_SQRT_2, OO_SQRT_3 },
+ { -OO_SQRT_6, -OO_SQRT_2, OO_SQRT_3 }
+};
+
+void GetBumpNormals( const Vector& sVect, const Vector& tVect, const Vector& flatNormal,
+ const Vector& phongNormal, Vector bumpNormals[NUM_BUMP_VECTS] );
+
+#endif // BUMPVECTS_H
diff --git a/mp/src/public/mathlib/compressed_3d_unitvec.h b/mp/src/public/mathlib/compressed_3d_unitvec.h
index d9f2f597..a92dba22 100644
--- a/mp/src/public/mathlib/compressed_3d_unitvec.h
+++ b/mp/src/public/mathlib/compressed_3d_unitvec.h
@@ -1,284 +1,284 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-#ifndef _3D_UNITVEC_H
-#define _3D_UNITVEC_H
-
-
-#define UNITVEC_DECLARE_STATICS \
- float cUnitVector::mUVAdjustment[0x2000]; \
- Vector cUnitVector::mTmpVec;
-
-// upper 3 bits
-#define SIGN_MASK 0xe000
-#define XSIGN_MASK 0x8000
-#define YSIGN_MASK 0x4000
-#define ZSIGN_MASK 0x2000
-
-// middle 6 bits - xbits
-#define TOP_MASK 0x1f80
-
-// lower 7 bits - ybits
-#define BOTTOM_MASK 0x007f
-
-// unitcomp.cpp : A Unit Vector to 16-bit word conversion
-// algorithm based on work of Rafael Baptista ([email protected])
-// Accuracy improved by O.D. ([email protected])
-// Used with Permission.
-
-// a compressed unit vector. reasonable fidelty for unit
-// vectors in a 16 bit package. Good enough for surface normals
-// we hope.
-class cUnitVector // : public c3dMathObject
-{
-public:
- cUnitVector() { mVec = 0; }
- cUnitVector( const Vector& vec )
- {
- packVector( vec );
- }
- cUnitVector( unsigned short val ) { mVec = val; }
-
- cUnitVector& operator=( const Vector& vec )
- { packVector( vec ); return *this; }
-
- operator Vector()
- {
- unpackVector( mTmpVec );
- return mTmpVec;
- }
-
- void packVector( const Vector& vec )
- {
- // convert from Vector to cUnitVector
-
- Assert( vec.IsValid());
- Vector tmp = vec;
-
- // input vector does not have to be unit length
- // Assert( tmp.length() <= 1.001f );
-
- mVec = 0;
- if ( tmp.x < 0 ) { mVec |= XSIGN_MASK; tmp.x = -tmp.x; }
- if ( tmp.y < 0 ) { mVec |= YSIGN_MASK; tmp.y = -tmp.y; }
- if ( tmp.z < 0 ) { mVec |= ZSIGN_MASK; tmp.z = -tmp.z; }
-
- // project the normal onto the plane that goes through
- // X0=(1,0,0),Y0=(0,1,0),Z0=(0,0,1).
- // on that plane we choose an (projective!) coordinate system
- // such that X0->(0,0), Y0->(126,0), Z0->(0,126),(0,0,0)->Infinity
-
- // a little slower... old pack was 4 multiplies and 2 adds.
- // This is 2 multiplies, 2 adds, and a divide....
- float w = 126.0f / ( tmp.x + tmp.y + tmp.z );
- long xbits = (long)( tmp.x * w );
- long ybits = (long)( tmp.y * w );
-
- Assert( xbits < 127 );
- Assert( xbits >= 0 );
- Assert( ybits < 127 );
- Assert( ybits >= 0 );
-
- // Now we can be sure that 0<=xp<=126, 0<=yp<=126, 0<=xp+yp<=126
- // however for the sampling we want to transform this triangle
- // into a rectangle.
- if ( xbits >= 64 )
- {
- xbits = 127 - xbits;
- ybits = 127 - ybits;
- }
-
- // now we that have xp in the range (0,127) and yp in
- // the range (0,63), we can pack all the bits together
- mVec |= ( xbits << 7 );
- mVec |= ybits;
- }
-
- void unpackVector( Vector& vec )
- {
- // if we do a straightforward backward transform
- // we will get points on the plane X0,Y0,Z0
- // however we need points on a sphere that goes through
- // these points. Therefore we need to adjust x,y,z so
- // that x^2+y^2+z^2=1 by normalizing the vector. We have
- // already precalculated the amount by which we need to
- // scale, so all we do is a table lookup and a
- // multiplication
-
- // get the x and y bits
- long xbits = (( mVec & TOP_MASK ) >> 7 );
- long ybits = ( mVec & BOTTOM_MASK );
-
- // map the numbers back to the triangle (0,0)-(0,126)-(126,0)
- if (( xbits + ybits ) >= 127 )
- {
- xbits = 127 - xbits;
- ybits = 127 - ybits;
- }
-
- // do the inverse transform and normalization
- // costs 3 extra multiplies and 2 subtracts. No big deal.
- float uvadj = mUVAdjustment[mVec & ~SIGN_MASK];
- vec.x = uvadj * (float) xbits;
- vec.y = uvadj * (float) ybits;
- vec.z = uvadj * (float)( 126 - xbits - ybits );
-
- // set all the sign bits
- if ( mVec & XSIGN_MASK ) vec.x = -vec.x;
- if ( mVec & YSIGN_MASK ) vec.y = -vec.y;
- if ( mVec & ZSIGN_MASK ) vec.z = -vec.z;
-
- Assert( vec.IsValid());
- }
-
- static void initializeStatics()
- {
- for ( int idx = 0; idx < 0x2000; idx++ )
- {
- long xbits = idx >> 7;
- long ybits = idx & BOTTOM_MASK;
-
- // map the numbers back to the triangle (0,0)-(0,127)-(127,0)
- if (( xbits + ybits ) >= 127 )
- {
- xbits = 127 - xbits;
- ybits = 127 - ybits;
- }
-
- // convert to 3D vectors
- float x = (float)xbits;
- float y = (float)ybits;
- float z = (float)( 126 - xbits - ybits );
-
- // calculate the amount of normalization required
- mUVAdjustment[idx] = 1.0f / sqrtf( y*y + z*z + x*x );
- Assert( _finite( mUVAdjustment[idx]));
-
- //cerr << mUVAdjustment[idx] << "\t";
- //if ( xbits == 0 ) cerr << "\n";
- }
- }
-
-#if 0
- void test()
- {
- #define TEST_RANGE 4
- #define TEST_RANDOM 100
- #define TEST_ANGERROR 1.0
-
- float maxError = 0;
- float avgError = 0;
- int numVecs = 0;
-
- {for ( int x = -TEST_RANGE; x < TEST_RANGE; x++ )
- {
- for ( int y = -TEST_RANGE; y < TEST_RANGE; y++ )
- {
- for ( int z = -TEST_RANGE; z < TEST_RANGE; z++ )
- {
- if (( x + y + z ) == 0 ) continue;
-
- Vector vec( (float)x, (float)y, (float)z );
- Vector vec2;
-
- vec.normalize();
- packVector( vec );
- unpackVector( vec2 );
-
- float ang = vec.dot( vec2 );
- ang = (( fabs( ang ) > 0.99999f ) ? 0 : (float)acos(ang));
-
- if (( ang > TEST_ANGERROR ) | ( !_finite( ang )))
- {
- cerr << "error: " << ang << endl;
- cerr << "orig vec: " << vec.x << ",\t"
- << vec.y << ",\t" << vec.z << "\tmVec: "
- << mVec << endl;
- cerr << "quantized vec2: " << vec2.x
- << ",\t" << vec2.y << ",\t"
- << vec2.z << endl << endl;
- }
- avgError += ang;
- numVecs++;
- if ( maxError < ang ) maxError = ang;
- }
- }
- }}
-
- for ( int w = 0; w < TEST_RANDOM; w++ )
- {
- Vector vec( genRandom(), genRandom(), genRandom());
- Vector vec2;
- vec.normalize();
-
- packVector( vec );
- unpackVector( vec2 );
-
- float ang =vec.dot( vec2 );
- ang = (( ang > 0.999f ) ? 0 : (float)acos(ang));
-
- if (( ang > TEST_ANGERROR ) | ( !_finite( ang )))
- {
- cerr << "error: " << ang << endl;
- cerr << "orig vec: " << vec.x << ",\t"
- << vec.y << ",\t" << vec.z << "\tmVec: "
- << mVec << endl;
- cerr << "quantized vec2: " << vec2.x << ",\t"
- << vec2.y << ",\t"
- << vec2.z << endl << endl;
- }
- avgError += ang;
- numVecs++;
- if ( maxError < ang ) maxError = ang;
- }
-
- { for ( int x = 0; x < 50; x++ )
- {
- Vector vec( (float)x, 25.0f, 0.0f );
- Vector vec2;
-
- vec.normalize();
- packVector( vec );
- unpackVector( vec2 );
-
- float ang = vec.dot( vec2 );
- ang = (( fabs( ang ) > 0.999f ) ? 0 : (float)acos(ang));
-
- if (( ang > TEST_ANGERROR ) | ( !_finite( ang )))
- {
- cerr << "error: " << ang << endl;
- cerr << "orig vec: " << vec.x << ",\t"
- << vec.y << ",\t" << vec.z << "\tmVec: "
- << mVec << endl;
- cerr << " quantized vec2: " << vec2.x << ",\t"
- << vec2.y << ",\t" << vec2.z << endl << endl;
- }
-
- avgError += ang;
- numVecs++;
- if ( maxError < ang ) maxError = ang;
- }}
-
- cerr << "max angle error: " << maxError
- << ", average error: " << avgError / numVecs
- << ", num tested vecs: " << numVecs << endl;
- }
-
- friend ostream& operator<< ( ostream& os, const cUnitVector& vec )
- { os << vec.mVec; return os; }
-#endif
-
-//protected: // !!!!
-
- unsigned short mVec;
- static float mUVAdjustment[0x2000];
- static Vector mTmpVec;
-};
-
-#endif // _3D_VECTOR_H
-
-
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+// $NoKeywords: $
+//
+//=============================================================================//
+#ifndef _3D_UNITVEC_H
+#define _3D_UNITVEC_H
+
+
+#define UNITVEC_DECLARE_STATICS \
+ float cUnitVector::mUVAdjustment[0x2000]; \
+ Vector cUnitVector::mTmpVec;
+
+// upper 3 bits
+#define SIGN_MASK 0xe000
+#define XSIGN_MASK 0x8000
+#define YSIGN_MASK 0x4000
+#define ZSIGN_MASK 0x2000
+
+// middle 6 bits - xbits
+#define TOP_MASK 0x1f80
+
+// lower 7 bits - ybits
+#define BOTTOM_MASK 0x007f
+
+// unitcomp.cpp : A Unit Vector to 16-bit word conversion
+// algorithm based on work of Rafael Baptista ([email protected])
+// Accuracy improved by O.D. ([email protected])
+// Used with Permission.
+
+// a compressed unit vector. reasonable fidelty for unit
+// vectors in a 16 bit package. Good enough for surface normals
+// we hope.
+class cUnitVector // : public c3dMathObject
+{
+public:
+ cUnitVector() { mVec = 0; }
+ cUnitVector( const Vector& vec )
+ {
+ packVector( vec );
+ }
+ cUnitVector( unsigned short val ) { mVec = val; }
+
+ cUnitVector& operator=( const Vector& vec )
+ { packVector( vec ); return *this; }
+
+ operator Vector()
+ {
+ unpackVector( mTmpVec );
+ return mTmpVec;
+ }
+
+ void packVector( const Vector& vec )
+ {
+ // convert from Vector to cUnitVector
+
+ Assert( vec.IsValid());
+ Vector tmp = vec;
+
+ // input vector does not have to be unit length
+ // Assert( tmp.length() <= 1.001f );
+
+ mVec = 0;
+ if ( tmp.x < 0 ) { mVec |= XSIGN_MASK; tmp.x = -tmp.x; }
+ if ( tmp.y < 0 ) { mVec |= YSIGN_MASK; tmp.y = -tmp.y; }
+ if ( tmp.z < 0 ) { mVec |= ZSIGN_MASK; tmp.z = -tmp.z; }
+
+ // project the normal onto the plane that goes through
+ // X0=(1,0,0),Y0=(0,1,0),Z0=(0,0,1).
+ // on that plane we choose an (projective!) coordinate system
+ // such that X0->(0,0), Y0->(126,0), Z0->(0,126),(0,0,0)->Infinity
+
+ // a little slower... old pack was 4 multiplies and 2 adds.
+ // This is 2 multiplies, 2 adds, and a divide....
+ float w = 126.0f / ( tmp.x + tmp.y + tmp.z );
+ long xbits = (long)( tmp.x * w );
+ long ybits = (long)( tmp.y * w );
+
+ Assert( xbits < 127 );
+ Assert( xbits >= 0 );
+ Assert( ybits < 127 );
+ Assert( ybits >= 0 );
+
+ // Now we can be sure that 0<=xp<=126, 0<=yp<=126, 0<=xp+yp<=126
+ // however for the sampling we want to transform this triangle
+ // into a rectangle.
+ if ( xbits >= 64 )
+ {
+ xbits = 127 - xbits;
+ ybits = 127 - ybits;
+ }
+
+ // now we that have xp in the range (0,127) and yp in
+ // the range (0,63), we can pack all the bits together
+ mVec |= ( xbits << 7 );
+ mVec |= ybits;
+ }
+
+ void unpackVector( Vector& vec )
+ {
+ // if we do a straightforward backward transform
+ // we will get points on the plane X0,Y0,Z0
+ // however we need points on a sphere that goes through
+ // these points. Therefore we need to adjust x,y,z so
+ // that x^2+y^2+z^2=1 by normalizing the vector. We have
+ // already precalculated the amount by which we need to
+ // scale, so all we do is a table lookup and a
+ // multiplication
+
+ // get the x and y bits
+ long xbits = (( mVec & TOP_MASK ) >> 7 );
+ long ybits = ( mVec & BOTTOM_MASK );
+
+ // map the numbers back to the triangle (0,0)-(0,126)-(126,0)
+ if (( xbits + ybits ) >= 127 )
+ {
+ xbits = 127 - xbits;
+ ybits = 127 - ybits;
+ }
+
+ // do the inverse transform and normalization
+ // costs 3 extra multiplies and 2 subtracts. No big deal.
+ float uvadj = mUVAdjustment[mVec & ~SIGN_MASK];
+ vec.x = uvadj * (float) xbits;
+ vec.y = uvadj * (float) ybits;
+ vec.z = uvadj * (float)( 126 - xbits - ybits );
+
+ // set all the sign bits
+ if ( mVec & XSIGN_MASK ) vec.x = -vec.x;
+ if ( mVec & YSIGN_MASK ) vec.y = -vec.y;
+ if ( mVec & ZSIGN_MASK ) vec.z = -vec.z;
+
+ Assert( vec.IsValid());
+ }
+
+ static void initializeStatics()
+ {
+ for ( int idx = 0; idx < 0x2000; idx++ )
+ {
+ long xbits = idx >> 7;
+ long ybits = idx & BOTTOM_MASK;
+
+ // map the numbers back to the triangle (0,0)-(0,127)-(127,0)
+ if (( xbits + ybits ) >= 127 )
+ {
+ xbits = 127 - xbits;
+ ybits = 127 - ybits;
+ }
+
+ // convert to 3D vectors
+ float x = (float)xbits;
+ float y = (float)ybits;
+ float z = (float)( 126 - xbits - ybits );
+
+ // calculate the amount of normalization required
+ mUVAdjustment[idx] = 1.0f / sqrtf( y*y + z*z + x*x );
+ Assert( _finite( mUVAdjustment[idx]));
+
+ //cerr << mUVAdjustment[idx] << "\t";
+ //if ( xbits == 0 ) cerr << "\n";
+ }
+ }
+
+#if 0
+ void test()
+ {
+ #define TEST_RANGE 4
+ #define TEST_RANDOM 100
+ #define TEST_ANGERROR 1.0
+
+ float maxError = 0;
+ float avgError = 0;
+ int numVecs = 0;
+
+ {for ( int x = -TEST_RANGE; x < TEST_RANGE; x++ )
+ {
+ for ( int y = -TEST_RANGE; y < TEST_RANGE; y++ )
+ {
+ for ( int z = -TEST_RANGE; z < TEST_RANGE; z++ )
+ {
+ if (( x + y + z ) == 0 ) continue;
+
+ Vector vec( (float)x, (float)y, (float)z );
+ Vector vec2;
+
+ vec.normalize();
+ packVector( vec );
+ unpackVector( vec2 );
+
+ float ang = vec.dot( vec2 );
+ ang = (( fabs( ang ) > 0.99999f ) ? 0 : (float)acos(ang));
+
+ if (( ang > TEST_ANGERROR ) | ( !_finite( ang )))
+ {
+ cerr << "error: " << ang << endl;
+ cerr << "orig vec: " << vec.x << ",\t"
+ << vec.y << ",\t" << vec.z << "\tmVec: "
+ << mVec << endl;
+ cerr << "quantized vec2: " << vec2.x
+ << ",\t" << vec2.y << ",\t"
+ << vec2.z << endl << endl;
+ }
+ avgError += ang;
+ numVecs++;
+ if ( maxError < ang ) maxError = ang;
+ }
+ }
+ }}
+
+ for ( int w = 0; w < TEST_RANDOM; w++ )
+ {
+ Vector vec( genRandom(), genRandom(), genRandom());
+ Vector vec2;
+ vec.normalize();
+
+ packVector( vec );
+ unpackVector( vec2 );
+
+ float ang =vec.dot( vec2 );
+ ang = (( ang > 0.999f ) ? 0 : (float)acos(ang));
+
+ if (( ang > TEST_ANGERROR ) | ( !_finite( ang )))
+ {
+ cerr << "error: " << ang << endl;
+ cerr << "orig vec: " << vec.x << ",\t"
+ << vec.y << ",\t" << vec.z << "\tmVec: "
+ << mVec << endl;
+ cerr << "quantized vec2: " << vec2.x << ",\t"
+ << vec2.y << ",\t"
+ << vec2.z << endl << endl;
+ }
+ avgError += ang;
+ numVecs++;
+ if ( maxError < ang ) maxError = ang;
+ }
+
+ { for ( int x = 0; x < 50; x++ )
+ {
+ Vector vec( (float)x, 25.0f, 0.0f );
+ Vector vec2;
+
+ vec.normalize();
+ packVector( vec );
+ unpackVector( vec2 );
+
+ float ang = vec.dot( vec2 );
+ ang = (( fabs( ang ) > 0.999f ) ? 0 : (float)acos(ang));
+
+ if (( ang > TEST_ANGERROR ) | ( !_finite( ang )))
+ {
+ cerr << "error: " << ang << endl;
+ cerr << "orig vec: " << vec.x << ",\t"
+ << vec.y << ",\t" << vec.z << "\tmVec: "
+ << mVec << endl;
+ cerr << " quantized vec2: " << vec2.x << ",\t"
+ << vec2.y << ",\t" << vec2.z << endl << endl;
+ }
+
+ avgError += ang;
+ numVecs++;
+ if ( maxError < ang ) maxError = ang;
+ }}
+
+ cerr << "max angle error: " << maxError
+ << ", average error: " << avgError / numVecs
+ << ", num tested vecs: " << numVecs << endl;
+ }
+
+ friend ostream& operator<< ( ostream& os, const cUnitVector& vec )
+ { os << vec.mVec; return os; }
+#endif
+
+//protected: // !!!!
+
+ unsigned short mVec;
+ static float mUVAdjustment[0x2000];
+ static Vector mTmpVec;
+};
+
+#endif // _3D_VECTOR_H
+
+
diff --git a/mp/src/public/mathlib/compressed_light_cube.h b/mp/src/public/mathlib/compressed_light_cube.h
index a720808f..207f92db 100644
--- a/mp/src/public/mathlib/compressed_light_cube.h
+++ b/mp/src/public/mathlib/compressed_light_cube.h
@@ -1,24 +1,24 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-//=============================================================================//
-
-#ifndef COMPRESSED_LIGHT_CUBE_H
-#define COMPRESSED_LIGHT_CUBE_H
-#ifdef _WIN32
-#pragma once
-#endif
-
-
-#include "mathlib/mathlib.h"
-
-
-struct CompressedLightCube
-{
- DECLARE_BYTESWAP_DATADESC();
- ColorRGBExp32 m_Color[6];
-};
-
-
-#endif // COMPRESSED_LIGHT_CUBE_H
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+//=============================================================================//
+
+#ifndef COMPRESSED_LIGHT_CUBE_H
+#define COMPRESSED_LIGHT_CUBE_H
+#ifdef _WIN32
+#pragma once
+#endif
+
+
+#include "mathlib/mathlib.h"
+
+
+struct CompressedLightCube
+{
+ DECLARE_BYTESWAP_DATADESC();
+ ColorRGBExp32 m_Color[6];
+};
+
+
+#endif // COMPRESSED_LIGHT_CUBE_H
diff --git a/mp/src/public/mathlib/compressed_vector.h b/mp/src/public/mathlib/compressed_vector.h
index 6eb3ac5d..6a495229 100644
--- a/mp/src/public/mathlib/compressed_vector.h
+++ b/mp/src/public/mathlib/compressed_vector.h
@@ -1,608 +1,608 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-
-#ifndef COMPRESSED_VECTOR_H
-#define COMPRESSED_VECTOR_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include <math.h>
-#include <float.h>
-
-// For vec_t, put this somewhere else?
-#include "basetypes.h"
-
-// For rand(). We really need a library!
-#include <stdlib.h>
-
-#include "tier0/dbg.h"
-#include "mathlib/vector.h"
-
-#include "mathlib/mathlib.h"
-
-#if defined( _X360 )
-#pragma bitfield_order( push, lsb_to_msb )
-#endif
-//=========================================================
-// fit a 3D vector into 32 bits
-//=========================================================
-
-class Vector32
-{
-public:
- // Construction/destruction:
- Vector32(void);
- Vector32(vec_t X, vec_t Y, vec_t Z);
-
- // assignment
- Vector32& operator=(const Vector &vOther);
- operator Vector ();
-
-private:
- unsigned short x:10;
- unsigned short y:10;
- unsigned short z:10;
- unsigned short exp:2;
-};
-
-inline Vector32& Vector32::operator=(const Vector &vOther)
-{
- CHECK_VALID(vOther);
-
- static float expScale[4] = { 4.0f, 16.0f, 32.f, 64.f };
-
- float fmax = Max( fabs( vOther.x ), fabs( vOther.y ) );
- fmax = Max( fmax, (float)fabs( vOther.z ) );
-
- for (exp = 0; exp < 3; exp++)
- {
- if (fmax < expScale[exp])
- break;
- }
- Assert( fmax < expScale[exp] );
-
- float fexp = 512.0f / expScale[exp];
-
- x = Clamp( (int)(vOther.x * fexp) + 512, 0, 1023 );
- y = Clamp( (int)(vOther.y * fexp) + 512, 0, 1023 );
- z = Clamp( (int)(vOther.z * fexp) + 512, 0, 1023 );
- return *this;
-}
-
-
-inline Vector32::operator Vector ()
-{
- Vector tmp;
-
- static float expScale[4] = { 4.0f, 16.0f, 32.f, 64.f };
-
- float fexp = expScale[exp] / 512.0f;
-
- tmp.x = (((int)x) - 512) * fexp;
- tmp.y = (((int)y) - 512) * fexp;
- tmp.z = (((int)z) - 512) * fexp;
- return tmp;
-}
-
-
-//=========================================================
-// Fit a unit vector into 32 bits
-//=========================================================
-
-class Normal32
-{
-public:
- // Construction/destruction:
- Normal32(void);
- Normal32(vec_t X, vec_t Y, vec_t Z);
-
- // assignment
- Normal32& operator=(const Vector &vOther);
- operator Vector ();
-
-private:
- unsigned short x:15;
- unsigned short y:15;
- unsigned short zneg:1;
-};
-
-
-inline Normal32& Normal32::operator=(const Vector &vOther)
-{
- CHECK_VALID(vOther);
-
- x = Clamp( (int)(vOther.x * 16384) + 16384, 0, 32767 );
- y = Clamp( (int)(vOther.y * 16384) + 16384, 0, 32767 );
- zneg = (vOther.z < 0);
- //x = vOther.x;
- //y = vOther.y;
- //z = vOther.z;
- return *this;
-}
-
-
-inline Normal32::operator Vector ()
-{
- Vector tmp;
-
- tmp.x = ((int)x - 16384) * (1 / 16384.0);
- tmp.y = ((int)y - 16384) * (1 / 16384.0);
- tmp.z = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y );
- if (zneg)
- tmp.z = -tmp.z;
- return tmp;
-}
-
-
-//=========================================================
-// 64 bit Quaternion
-//=========================================================
-
-class Quaternion64
-{
-public:
- // Construction/destruction:
- Quaternion64(void);
- Quaternion64(vec_t X, vec_t Y, vec_t Z);
-
- // assignment
- // Quaternion& operator=(const Quaternion64 &vOther);
- Quaternion64& operator=(const Quaternion &vOther);
- operator Quaternion ();
-private:
- uint64 x:21;
- uint64 y:21;
- uint64 z:21;
- uint64 wneg:1;
-};
-
-
-inline Quaternion64::operator Quaternion ()
-{
- Quaternion tmp;
-
- // shift to -1048576, + 1048575, then round down slightly to -1.0 < x < 1.0
- tmp.x = ((int)x - 1048576) * (1 / 1048576.5f);
- tmp.y = ((int)y - 1048576) * (1 / 1048576.5f);
- tmp.z = ((int)z - 1048576) * (1 / 1048576.5f);
- tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z );
- if (wneg)
- tmp.w = -tmp.w;
- return tmp;
-}
-
-inline Quaternion64& Quaternion64::operator=(const Quaternion &vOther)
-{
- CHECK_VALID(vOther);
-
- x = Clamp( (int)(vOther.x * 1048576) + 1048576, 0, 2097151 );
- y = Clamp( (int)(vOther.y * 1048576) + 1048576, 0, 2097151 );
- z = Clamp( (int)(vOther.z * 1048576) + 1048576, 0, 2097151 );
- wneg = (vOther.w < 0);
- return *this;
-}
-
-//=========================================================
-// 48 bit Quaternion
-//=========================================================
-
-class Quaternion48
-{
-public:
- // Construction/destruction:
- Quaternion48(void);
- Quaternion48(vec_t X, vec_t Y, vec_t Z);
-
- // assignment
- // Quaternion& operator=(const Quaternion48 &vOther);
- Quaternion48& operator=(const Quaternion &vOther);
- operator Quaternion ();
-private:
- unsigned short x:16;
- unsigned short y:16;
- unsigned short z:15;
- unsigned short wneg:1;
-};
-
-
-inline Quaternion48::operator Quaternion ()
-{
- Quaternion tmp;
-
- tmp.x = ((int)x - 32768) * (1 / 32768.0);
- tmp.y = ((int)y - 32768) * (1 / 32768.0);
- tmp.z = ((int)z - 16384) * (1 / 16384.0);
- tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z );
- if (wneg)
- tmp.w = -tmp.w;
- return tmp;
-}
-
-inline Quaternion48& Quaternion48::operator=(const Quaternion &vOther)
-{
- CHECK_VALID(vOther);
-
- x = Clamp( (int)(vOther.x * 32768) + 32768, 0, 65535 );
- y = Clamp( (int)(vOther.y * 32768) + 32768, 0, 65535 );
- z = Clamp( (int)(vOther.z * 16384) + 16384, 0, 32767 );
- wneg = (vOther.w < 0);
- return *this;
-}
-
-//=========================================================
-// 32 bit Quaternion
-//=========================================================
-
-class Quaternion32
-{
-public:
- // Construction/destruction:
- Quaternion32(void);
- Quaternion32(vec_t X, vec_t Y, vec_t Z);
-
- // assignment
- // Quaternion& operator=(const Quaternion48 &vOther);
- Quaternion32& operator=(const Quaternion &vOther);
- operator Quaternion ();
-private:
- unsigned int x:11;
- unsigned int y:10;
- unsigned int z:10;
- unsigned int wneg:1;
-};
-
-
-inline Quaternion32::operator Quaternion ()
-{
- Quaternion tmp;
-
- tmp.x = ((int)x - 1024) * (1 / 1024.0);
- tmp.y = ((int)y - 512) * (1 / 512.0);
- tmp.z = ((int)z - 512) * (1 / 512.0);
- tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z );
- if (wneg)
- tmp.w = -tmp.w;
- return tmp;
-}
-
-inline Quaternion32& Quaternion32::operator=(const Quaternion &vOther)
-{
- CHECK_VALID(vOther);
-
- x = Clamp( (int)(vOther.x * 1024) + 1024, 0, 2047 );
- y = Clamp( (int)(vOther.y * 512) + 512, 0, 1023 );
- z = Clamp( (int)(vOther.z * 512) + 512, 0, 1023 );
- wneg = (vOther.w < 0);
- return *this;
-}
-
-//=========================================================
-// 16 bit float
-//=========================================================
-
-
-const int float32bias = 127;
-const int float16bias = 15;
-
-const float maxfloat16bits = 65504.0f;
-
-class float16
-{
-public:
- //float16() {}
- //float16( float f ) { m_storage.rawWord = ConvertFloatTo16bits(f); }
-
- void Init() { m_storage.rawWord = 0; }
-// float16& operator=(const float16 &other) { m_storage.rawWord = other.m_storage.rawWord; return *this; }
-// float16& operator=(const float &other) { m_storage.rawWord = ConvertFloatTo16bits(other); return *this; }
-// operator unsigned short () { return m_storage.rawWord; }
-// operator float () { return Convert16bitFloatTo32bits( m_storage.rawWord ); }
- unsigned short GetBits() const
- {
- return m_storage.rawWord;
- }
- float GetFloat() const
- {
- return Convert16bitFloatTo32bits( m_storage.rawWord );
- }
- void SetFloat( float in )
- {
- m_storage.rawWord = ConvertFloatTo16bits( in );
- }
-
- bool IsInfinity() const
- {
- return m_storage.bits.biased_exponent == 31 && m_storage.bits.mantissa == 0;
- }
- bool IsNaN() const
- {
- return m_storage.bits.biased_exponent == 31 && m_storage.bits.mantissa != 0;
- }
-
- bool operator==(const float16 other) const { return m_storage.rawWord == other.m_storage.rawWord; }
- bool operator!=(const float16 other) const { return m_storage.rawWord != other.m_storage.rawWord; }
-
-// bool operator< (const float other) const { return GetFloat() < other; }
-// bool operator> (const float other) const { return GetFloat() > other; }
-
-protected:
- union float32bits
- {
- float rawFloat;
- struct
- {
- unsigned int mantissa : 23;
- unsigned int biased_exponent : 8;
- unsigned int sign : 1;
- } bits;
- };
-
- union float16bits
- {
- unsigned short rawWord;
- struct
- {
- unsigned short mantissa : 10;
- unsigned short biased_exponent : 5;
- unsigned short sign : 1;
- } bits;
- };
-
- static bool IsNaN( float16bits in )
- {
- return in.bits.biased_exponent == 31 && in.bits.mantissa != 0;
- }
- static bool IsInfinity( float16bits in )
- {
- return in.bits.biased_exponent == 31 && in.bits.mantissa == 0;
- }
-
- // 0x0001 - 0x03ff
- static unsigned short ConvertFloatTo16bits( float input )
- {
- if ( input > maxfloat16bits )
- input = maxfloat16bits;
- else if ( input < -maxfloat16bits )
- input = -maxfloat16bits;
-
- float16bits output;
- float32bits inFloat;
-
- inFloat.rawFloat = input;
-
- output.bits.sign = inFloat.bits.sign;
-
- if ( (inFloat.bits.biased_exponent==0) && (inFloat.bits.mantissa==0) )
- {
- // zero
- output.bits.mantissa = 0;
- output.bits.biased_exponent = 0;
- }
- else if ( (inFloat.bits.biased_exponent==0) && (inFloat.bits.mantissa!=0) )
- {
- // denorm -- denorm float maps to 0 half
- output.bits.mantissa = 0;
- output.bits.biased_exponent = 0;
- }
- else if ( (inFloat.bits.biased_exponent==0xff) && (inFloat.bits.mantissa==0) )
- {
-#if 0
- // infinity
- output.bits.mantissa = 0;
- output.bits.biased_exponent = 31;
-#else
- // infinity maps to maxfloat
- output.bits.mantissa = 0x3ff;
- output.bits.biased_exponent = 0x1e;
-#endif
- }
- else if ( (inFloat.bits.biased_exponent==0xff) && (inFloat.bits.mantissa!=0) )
- {
-#if 0
- // NaN
- output.bits.mantissa = 1;
- output.bits.biased_exponent = 31;
-#else
- // NaN maps to zero
- output.bits.mantissa = 0;
- output.bits.biased_exponent = 0;
-#endif
- }
- else
- {
- // regular number
- int new_exp = inFloat.bits.biased_exponent-127;
-
- if (new_exp<-24)
- {
- // this maps to 0
- output.bits.mantissa = 0;
- output.bits.biased_exponent = 0;
- }
-
- if (new_exp<-14)
- {
- // this maps to a denorm
- output.bits.biased_exponent = 0;
- unsigned int exp_val = ( unsigned int )( -14 - ( inFloat.bits.biased_exponent - float32bias ) );
- if( exp_val > 0 && exp_val < 11 )
- {
- output.bits.mantissa = ( 1 << ( 10 - exp_val ) ) + ( inFloat.bits.mantissa >> ( 13 + exp_val ) );
- }
- }
- else if (new_exp>15)
- {
-#if 0
- // map this value to infinity
- output.bits.mantissa = 0;
- output.bits.biased_exponent = 31;
-#else
- // to big. . . maps to maxfloat
- output.bits.mantissa = 0x3ff;
- output.bits.biased_exponent = 0x1e;
-#endif
- }
- else
- {
- output.bits.biased_exponent = new_exp+15;
- output.bits.mantissa = (inFloat.bits.mantissa >> 13);
- }
- }
- return output.rawWord;
- }
-
- static float Convert16bitFloatTo32bits( unsigned short input )
- {
- float32bits output;
- const float16bits &inFloat = *((float16bits *)&input);
-
- if( IsInfinity( inFloat ) )
- {
- return maxfloat16bits * ( ( inFloat.bits.sign == 1 ) ? -1.0f : 1.0f );
- }
- if( IsNaN( inFloat ) )
- {
- return 0.0;
- }
- if( inFloat.bits.biased_exponent == 0 && inFloat.bits.mantissa != 0 )
- {
- // denorm
- const float half_denorm = (1.0f/16384.0f); // 2^-14
- float mantissa = ((float)(inFloat.bits.mantissa)) / 1024.0f;
- float sgn = (inFloat.bits.sign)? -1.0f :1.0f;
- output.rawFloat = sgn*mantissa*half_denorm;
- }
- else
- {
- // regular number
- unsigned mantissa = inFloat.bits.mantissa;
- unsigned biased_exponent = inFloat.bits.biased_exponent;
- unsigned sign = ((unsigned)inFloat.bits.sign) << 31;
- biased_exponent = ( (biased_exponent - float16bias + float32bias) * (biased_exponent != 0) ) << 23;
- mantissa <<= (23-10);
-
- *((unsigned *)&output) = ( mantissa | biased_exponent | sign );
- }
-
- return output.rawFloat;
- }
-
-
- float16bits m_storage;
-};
-
-class float16_with_assign : public float16
-{
-public:
- float16_with_assign() {}
- float16_with_assign( float f ) { m_storage.rawWord = ConvertFloatTo16bits(f); }
-
- float16& operator=(const float16 &other) { m_storage.rawWord = ((float16_with_assign &)other).m_storage.rawWord; return *this; }
- float16& operator=(const float &other) { m_storage.rawWord = ConvertFloatTo16bits(other); return *this; }
-// operator unsigned short () const { return m_storage.rawWord; }
- operator float () const { return Convert16bitFloatTo32bits( m_storage.rawWord ); }
-};
-
-//=========================================================
-// Fit a 3D vector in 48 bits
-//=========================================================
-
-class Vector48
-{
-public:
- // Construction/destruction:
- Vector48(void) {}
- Vector48(vec_t X, vec_t Y, vec_t Z) { x.SetFloat( X ); y.SetFloat( Y ); z.SetFloat( Z ); }
-
- // assignment
- Vector48& operator=(const Vector &vOther);
- operator Vector ();
-
- const float operator[]( int i ) const { return (((float16 *)this)[i]).GetFloat(); }
-
- float16 x;
- float16 y;
- float16 z;
-};
-
-inline Vector48& Vector48::operator=(const Vector &vOther)
-{
- CHECK_VALID(vOther);
-
- x.SetFloat( vOther.x );
- y.SetFloat( vOther.y );
- z.SetFloat( vOther.z );
- return *this;
-}
-
-
-inline Vector48::operator Vector ()
-{
- Vector tmp;
-
- tmp.x = x.GetFloat();
- tmp.y = y.GetFloat();
- tmp.z = z.GetFloat();
-
- return tmp;
-}
-
-//=========================================================
-// Fit a 2D vector in 32 bits
-//=========================================================
-
-class Vector2d32
-{
-public:
- // Construction/destruction:
- Vector2d32(void) {}
- Vector2d32(vec_t X, vec_t Y) { x.SetFloat( X ); y.SetFloat( Y ); }
-
- // assignment
- Vector2d32& operator=(const Vector &vOther);
- Vector2d32& operator=(const Vector2D &vOther);
-
- operator Vector2D ();
-
- void Init( vec_t ix = 0.f, vec_t iy = 0.f);
-
- float16_with_assign x;
- float16_with_assign y;
-};
-
-inline Vector2d32& Vector2d32::operator=(const Vector2D &vOther)
-{
- x.SetFloat( vOther.x );
- y.SetFloat( vOther.y );
- return *this;
-}
-
-inline Vector2d32::operator Vector2D ()
-{
- Vector2D tmp;
-
- tmp.x = x.GetFloat();
- tmp.y = y.GetFloat();
-
- return tmp;
-}
-
-inline void Vector2d32::Init( vec_t ix, vec_t iy )
-{
- x.SetFloat(ix);
- y.SetFloat(iy);
-}
-
-#if defined( _X360 )
-#pragma bitfield_order( pop )
-#endif
-
-#endif
-
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+// $NoKeywords: $
+//
+//=============================================================================//
+
+#ifndef COMPRESSED_VECTOR_H
+#define COMPRESSED_VECTOR_H
+
+#ifdef _WIN32
+#pragma once
+#endif
+
+#include <math.h>
+#include <float.h>
+
+// For vec_t, put this somewhere else?
+#include "basetypes.h"
+
+// For rand(). We really need a library!
+#include <stdlib.h>
+
+#include "tier0/dbg.h"
+#include "mathlib/vector.h"
+
+#include "mathlib/mathlib.h"
+
+#if defined( _X360 )
+#pragma bitfield_order( push, lsb_to_msb )
+#endif
+//=========================================================
+// fit a 3D vector into 32 bits
+//=========================================================
+
+class Vector32
+{
+public:
+ // Construction/destruction:
+ Vector32(void);
+ Vector32(vec_t X, vec_t Y, vec_t Z);
+
+ // assignment
+ Vector32& operator=(const Vector &vOther);
+ operator Vector ();
+
+private:
+ unsigned short x:10;
+ unsigned short y:10;
+ unsigned short z:10;
+ unsigned short exp:2;
+};
+
+inline Vector32& Vector32::operator=(const Vector &vOther)
+{
+ CHECK_VALID(vOther);
+
+ static float expScale[4] = { 4.0f, 16.0f, 32.f, 64.f };
+
+ float fmax = Max( fabs( vOther.x ), fabs( vOther.y ) );
+ fmax = Max( fmax, (float)fabs( vOther.z ) );
+
+ for (exp = 0; exp < 3; exp++)
+ {
+ if (fmax < expScale[exp])
+ break;
+ }
+ Assert( fmax < expScale[exp] );
+
+ float fexp = 512.0f / expScale[exp];
+
+ x = Clamp( (int)(vOther.x * fexp) + 512, 0, 1023 );
+ y = Clamp( (int)(vOther.y * fexp) + 512, 0, 1023 );
+ z = Clamp( (int)(vOther.z * fexp) + 512, 0, 1023 );
+ return *this;
+}
+
+
+inline Vector32::operator Vector ()
+{
+ Vector tmp;
+
+ static float expScale[4] = { 4.0f, 16.0f, 32.f, 64.f };
+
+ float fexp = expScale[exp] / 512.0f;
+
+ tmp.x = (((int)x) - 512) * fexp;
+ tmp.y = (((int)y) - 512) * fexp;
+ tmp.z = (((int)z) - 512) * fexp;
+ return tmp;
+}
+
+
+//=========================================================
+// Fit a unit vector into 32 bits
+//=========================================================
+
+class Normal32
+{
+public:
+ // Construction/destruction:
+ Normal32(void);
+ Normal32(vec_t X, vec_t Y, vec_t Z);
+
+ // assignment
+ Normal32& operator=(const Vector &vOther);
+ operator Vector ();
+
+private:
+ unsigned short x:15;
+ unsigned short y:15;
+ unsigned short zneg:1;
+};
+
+
+inline Normal32& Normal32::operator=(const Vector &vOther)
+{
+ CHECK_VALID(vOther);
+
+ x = Clamp( (int)(vOther.x * 16384) + 16384, 0, 32767 );
+ y = Clamp( (int)(vOther.y * 16384) + 16384, 0, 32767 );
+ zneg = (vOther.z < 0);
+ //x = vOther.x;
+ //y = vOther.y;
+ //z = vOther.z;
+ return *this;
+}
+
+
+inline Normal32::operator Vector ()
+{
+ Vector tmp;
+
+ tmp.x = ((int)x - 16384) * (1 / 16384.0);
+ tmp.y = ((int)y - 16384) * (1 / 16384.0);
+ tmp.z = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y );
+ if (zneg)
+ tmp.z = -tmp.z;
+ return tmp;
+}
+
+
+//=========================================================
+// 64 bit Quaternion
+//=========================================================
+
+class Quaternion64
+{
+public:
+ // Construction/destruction:
+ Quaternion64(void);
+ Quaternion64(vec_t X, vec_t Y, vec_t Z);
+
+ // assignment
+ // Quaternion& operator=(const Quaternion64 &vOther);
+ Quaternion64& operator=(const Quaternion &vOther);
+ operator Quaternion ();
+private:
+ uint64 x:21;
+ uint64 y:21;
+ uint64 z:21;
+ uint64 wneg:1;
+};
+
+
+inline Quaternion64::operator Quaternion ()
+{
+ Quaternion tmp;
+
+ // shift to -1048576, + 1048575, then round down slightly to -1.0 < x < 1.0
+ tmp.x = ((int)x - 1048576) * (1 / 1048576.5f);
+ tmp.y = ((int)y - 1048576) * (1 / 1048576.5f);
+ tmp.z = ((int)z - 1048576) * (1 / 1048576.5f);
+ tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z );
+ if (wneg)
+ tmp.w = -tmp.w;
+ return tmp;
+}
+
+inline Quaternion64& Quaternion64::operator=(const Quaternion &vOther)
+{
+ CHECK_VALID(vOther);
+
+ x = Clamp( (int)(vOther.x * 1048576) + 1048576, 0, 2097151 );
+ y = Clamp( (int)(vOther.y * 1048576) + 1048576, 0, 2097151 );
+ z = Clamp( (int)(vOther.z * 1048576) + 1048576, 0, 2097151 );
+ wneg = (vOther.w < 0);
+ return *this;
+}
+
+//=========================================================
+// 48 bit Quaternion
+//=========================================================
+
+class Quaternion48
+{
+public:
+ // Construction/destruction:
+ Quaternion48(void);
+ Quaternion48(vec_t X, vec_t Y, vec_t Z);
+
+ // assignment
+ // Quaternion& operator=(const Quaternion48 &vOther);
+ Quaternion48& operator=(const Quaternion &vOther);
+ operator Quaternion ();
+private:
+ unsigned short x:16;
+ unsigned short y:16;
+ unsigned short z:15;
+ unsigned short wneg:1;
+};
+
+
+inline Quaternion48::operator Quaternion ()
+{
+ Quaternion tmp;
+
+ tmp.x = ((int)x - 32768) * (1 / 32768.0);
+ tmp.y = ((int)y - 32768) * (1 / 32768.0);
+ tmp.z = ((int)z - 16384) * (1 / 16384.0);
+ tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z );
+ if (wneg)
+ tmp.w = -tmp.w;
+ return tmp;
+}
+
+inline Quaternion48& Quaternion48::operator=(const Quaternion &vOther)
+{
+ CHECK_VALID(vOther);
+
+ x = Clamp( (int)(vOther.x * 32768) + 32768, 0, 65535 );
+ y = Clamp( (int)(vOther.y * 32768) + 32768, 0, 65535 );
+ z = Clamp( (int)(vOther.z * 16384) + 16384, 0, 32767 );
+ wneg = (vOther.w < 0);
+ return *this;
+}
+
+//=========================================================
+// 32 bit Quaternion
+//=========================================================
+
+class Quaternion32
+{
+public:
+ // Construction/destruction:
+ Quaternion32(void);
+ Quaternion32(vec_t X, vec_t Y, vec_t Z);
+
+ // assignment
+ // Quaternion& operator=(const Quaternion48 &vOther);
+ Quaternion32& operator=(const Quaternion &vOther);
+ operator Quaternion ();
+private:
+ unsigned int x:11;
+ unsigned int y:10;
+ unsigned int z:10;
+ unsigned int wneg:1;
+};
+
+
+inline Quaternion32::operator Quaternion ()
+{
+ Quaternion tmp;
+
+ tmp.x = ((int)x - 1024) * (1 / 1024.0);
+ tmp.y = ((int)y - 512) * (1 / 512.0);
+ tmp.z = ((int)z - 512) * (1 / 512.0);
+ tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z );
+ if (wneg)
+ tmp.w = -tmp.w;
+ return tmp;
+}
+
+inline Quaternion32& Quaternion32::operator=(const Quaternion &vOther)
+{
+ CHECK_VALID(vOther);
+
+ x = Clamp( (int)(vOther.x * 1024) + 1024, 0, 2047 );
+ y = Clamp( (int)(vOther.y * 512) + 512, 0, 1023 );
+ z = Clamp( (int)(vOther.z * 512) + 512, 0, 1023 );
+ wneg = (vOther.w < 0);
+ return *this;
+}
+
+//=========================================================
+// 16 bit float
+//=========================================================
+
+
+const int float32bias = 127;
+const int float16bias = 15;
+
+const float maxfloat16bits = 65504.0f;
+
+class float16
+{
+public:
+ //float16() {}
+ //float16( float f ) { m_storage.rawWord = ConvertFloatTo16bits(f); }
+
+ void Init() { m_storage.rawWord = 0; }
+// float16& operator=(const float16 &other) { m_storage.rawWord = other.m_storage.rawWord; return *this; }
+// float16& operator=(const float &other) { m_storage.rawWord = ConvertFloatTo16bits(other); return *this; }
+// operator unsigned short () { return m_storage.rawWord; }
+// operator float () { return Convert16bitFloatTo32bits( m_storage.rawWord ); }
+ unsigned short GetBits() const
+ {
+ return m_storage.rawWord;
+ }
+ float GetFloat() const
+ {
+ return Convert16bitFloatTo32bits( m_storage.rawWord );
+ }
+ void SetFloat( float in )
+ {
+ m_storage.rawWord = ConvertFloatTo16bits( in );
+ }
+
+ bool IsInfinity() const
+ {
+ return m_storage.bits.biased_exponent == 31 && m_storage.bits.mantissa == 0;
+ }
+ bool IsNaN() const
+ {
+ return m_storage.bits.biased_exponent == 31 && m_storage.bits.mantissa != 0;
+ }
+
+ bool operator==(const float16 other) const { return m_storage.rawWord == other.m_storage.rawWord; }
+ bool operator!=(const float16 other) const { return m_storage.rawWord != other.m_storage.rawWord; }
+
+// bool operator< (const float other) const { return GetFloat() < other; }
+// bool operator> (const float other) const { return GetFloat() > other; }
+
+protected:
+ union float32bits
+ {
+ float rawFloat;
+ struct
+ {
+ unsigned int mantissa : 23;
+ unsigned int biased_exponent : 8;
+ unsigned int sign : 1;
+ } bits;
+ };
+
+ union float16bits
+ {
+ unsigned short rawWord;
+ struct
+ {
+ unsigned short mantissa : 10;
+ unsigned short biased_exponent : 5;
+ unsigned short sign : 1;
+ } bits;
+ };
+
+ static bool IsNaN( float16bits in )
+ {
+ return in.bits.biased_exponent == 31 && in.bits.mantissa != 0;
+ }
+ static bool IsInfinity( float16bits in )
+ {
+ return in.bits.biased_exponent == 31 && in.bits.mantissa == 0;
+ }
+
+ // 0x0001 - 0x03ff
+ static unsigned short ConvertFloatTo16bits( float input )
+ {
+ if ( input > maxfloat16bits )
+ input = maxfloat16bits;
+ else if ( input < -maxfloat16bits )
+ input = -maxfloat16bits;
+
+ float16bits output;
+ float32bits inFloat;
+
+ inFloat.rawFloat = input;
+
+ output.bits.sign = inFloat.bits.sign;
+
+ if ( (inFloat.bits.biased_exponent==0) && (inFloat.bits.mantissa==0) )
+ {
+ // zero
+ output.bits.mantissa = 0;
+ output.bits.biased_exponent = 0;
+ }
+ else if ( (inFloat.bits.biased_exponent==0) && (inFloat.bits.mantissa!=0) )
+ {
+ // denorm -- denorm float maps to 0 half
+ output.bits.mantissa = 0;
+ output.bits.biased_exponent = 0;
+ }
+ else if ( (inFloat.bits.biased_exponent==0xff) && (inFloat.bits.mantissa==0) )
+ {
+#if 0
+ // infinity
+ output.bits.mantissa = 0;
+ output.bits.biased_exponent = 31;
+#else
+ // infinity maps to maxfloat
+ output.bits.mantissa = 0x3ff;
+ output.bits.biased_exponent = 0x1e;
+#endif
+ }
+ else if ( (inFloat.bits.biased_exponent==0xff) && (inFloat.bits.mantissa!=0) )
+ {
+#if 0
+ // NaN
+ output.bits.mantissa = 1;
+ output.bits.biased_exponent = 31;
+#else
+ // NaN maps to zero
+ output.bits.mantissa = 0;
+ output.bits.biased_exponent = 0;
+#endif
+ }
+ else
+ {
+ // regular number
+ int new_exp = inFloat.bits.biased_exponent-127;
+
+ if (new_exp<-24)
+ {
+ // this maps to 0
+ output.bits.mantissa = 0;
+ output.bits.biased_exponent = 0;
+ }
+
+ if (new_exp<-14)
+ {
+ // this maps to a denorm
+ output.bits.biased_exponent = 0;
+ unsigned int exp_val = ( unsigned int )( -14 - ( inFloat.bits.biased_exponent - float32bias ) );
+ if( exp_val > 0 && exp_val < 11 )
+ {
+ output.bits.mantissa = ( 1 << ( 10 - exp_val ) ) + ( inFloat.bits.mantissa >> ( 13 + exp_val ) );
+ }
+ }
+ else if (new_exp>15)
+ {
+#if 0
+ // map this value to infinity
+ output.bits.mantissa = 0;
+ output.bits.biased_exponent = 31;
+#else
+ // to big. . . maps to maxfloat
+ output.bits.mantissa = 0x3ff;
+ output.bits.biased_exponent = 0x1e;
+#endif
+ }
+ else
+ {
+ output.bits.biased_exponent = new_exp+15;
+ output.bits.mantissa = (inFloat.bits.mantissa >> 13);
+ }
+ }
+ return output.rawWord;
+ }
+
+ static float Convert16bitFloatTo32bits( unsigned short input )
+ {
+ float32bits output;
+ const float16bits &inFloat = *((float16bits *)&input);
+
+ if( IsInfinity( inFloat ) )
+ {
+ return maxfloat16bits * ( ( inFloat.bits.sign == 1 ) ? -1.0f : 1.0f );
+ }
+ if( IsNaN( inFloat ) )
+ {
+ return 0.0;
+ }
+ if( inFloat.bits.biased_exponent == 0 && inFloat.bits.mantissa != 0 )
+ {
+ // denorm
+ const float half_denorm = (1.0f/16384.0f); // 2^-14
+ float mantissa = ((float)(inFloat.bits.mantissa)) / 1024.0f;
+ float sgn = (inFloat.bits.sign)? -1.0f :1.0f;
+ output.rawFloat = sgn*mantissa*half_denorm;
+ }
+ else
+ {
+ // regular number
+ unsigned mantissa = inFloat.bits.mantissa;
+ unsigned biased_exponent = inFloat.bits.biased_exponent;
+ unsigned sign = ((unsigned)inFloat.bits.sign) << 31;
+ biased_exponent = ( (biased_exponent - float16bias + float32bias) * (biased_exponent != 0) ) << 23;
+ mantissa <<= (23-10);
+
+ *((unsigned *)&output) = ( mantissa | biased_exponent | sign );
+ }
+
+ return output.rawFloat;
+ }
+
+
+ float16bits m_storage;
+};
+
+class float16_with_assign : public float16
+{
+public:
+ float16_with_assign() {}
+ float16_with_assign( float f ) { m_storage.rawWord = ConvertFloatTo16bits(f); }
+
+ float16& operator=(const float16 &other) { m_storage.rawWord = ((float16_with_assign &)other).m_storage.rawWord; return *this; }
+ float16& operator=(const float &other) { m_storage.rawWord = ConvertFloatTo16bits(other); return *this; }
+// operator unsigned short () const { return m_storage.rawWord; }
+ operator float () const { return Convert16bitFloatTo32bits( m_storage.rawWord ); }
+};
+
+//=========================================================
+// Fit a 3D vector in 48 bits
+//=========================================================
+
+class Vector48
+{
+public:
+ // Construction/destruction:
+ Vector48(void) {}
+ Vector48(vec_t X, vec_t Y, vec_t Z) { x.SetFloat( X ); y.SetFloat( Y ); z.SetFloat( Z ); }
+
+ // assignment
+ Vector48& operator=(const Vector &vOther);
+ operator Vector ();
+
+ const float operator[]( int i ) const { return (((float16 *)this)[i]).GetFloat(); }
+
+ float16 x;
+ float16 y;
+ float16 z;
+};
+
+inline Vector48& Vector48::operator=(const Vector &vOther)
+{
+ CHECK_VALID(vOther);
+
+ x.SetFloat( vOther.x );
+ y.SetFloat( vOther.y );
+ z.SetFloat( vOther.z );
+ return *this;
+}
+
+
+inline Vector48::operator Vector ()
+{
+ Vector tmp;
+
+ tmp.x = x.GetFloat();
+ tmp.y = y.GetFloat();
+ tmp.z = z.GetFloat();
+
+ return tmp;
+}
+
+//=========================================================
+// Fit a 2D vector in 32 bits
+//=========================================================
+
+class Vector2d32
+{
+public:
+ // Construction/destruction:
+ Vector2d32(void) {}
+ Vector2d32(vec_t X, vec_t Y) { x.SetFloat( X ); y.SetFloat( Y ); }
+
+ // assignment
+ Vector2d32& operator=(const Vector &vOther);
+ Vector2d32& operator=(const Vector2D &vOther);
+
+ operator Vector2D ();
+
+ void Init( vec_t ix = 0.f, vec_t iy = 0.f);
+
+ float16_with_assign x;
+ float16_with_assign y;
+};
+
+inline Vector2d32& Vector2d32::operator=(const Vector2D &vOther)
+{
+ x.SetFloat( vOther.x );
+ y.SetFloat( vOther.y );
+ return *this;
+}
+
+inline Vector2d32::operator Vector2D ()
+{
+ Vector2D tmp;
+
+ tmp.x = x.GetFloat();
+ tmp.y = y.GetFloat();
+
+ return tmp;
+}
+
+inline void Vector2d32::Init( vec_t ix, vec_t iy )
+{
+ x.SetFloat(ix);
+ y.SetFloat(iy);
+}
+
+#if defined( _X360 )
+#pragma bitfield_order( pop )
+#endif
+
+#endif
+
diff --git a/mp/src/public/mathlib/halton.h b/mp/src/public/mathlib/halton.h
index 204e5fd5..44df68ff 100644
--- a/mp/src/public/mathlib/halton.h
+++ b/mp/src/public/mathlib/halton.h
@@ -1,71 +1,71 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-// $Id$
-
-// halton.h - classes, etc for generating numbers using the Halton pseudo-random sequence. See
-// http://halton-sequences.wikiverse.org/.
-//
-// what this function is useful for is any sort of sampling/integration problem where
-// you want to solve it by random sampling. Each call the NextValue() generates
-// a random number between 0 and 1, in an unclumped manner, so that the space can be more
-// or less evenly sampled with a minimum number of samples.
-//
-// It is NOT useful for generating random numbers dynamically, since the outputs aren't
-// particularly random.
-//
-// To generate multidimensional sample values (points in a plane, etc), use two
-// HaltonSequenceGenerator_t's, with different (primes) bases.
-
-#ifndef HALTON_H
-#define HALTON_H
-
-#include <tier0/platform.h>
-#include <mathlib/vector.h>
-
-class HaltonSequenceGenerator_t
-{
- int seed;
- int base;
- float fbase; //< base as a float
-
-public:
- HaltonSequenceGenerator_t(int base); //< base MUST be prime, >=2
-
- float GetElement(int element);
-
- inline float NextValue(void)
- {
- return GetElement(seed++);
- }
-
-};
-
-
-class DirectionalSampler_t //< pseudo-random sphere sampling
-{
- HaltonSequenceGenerator_t zdot;
- HaltonSequenceGenerator_t vrot;
-public:
- DirectionalSampler_t(void)
- : zdot(2),vrot(3)
- {
- }
-
- Vector NextValue(void)
- {
- float zvalue=zdot.NextValue();
- zvalue=2*zvalue-1.0; // map from 0..1 to -1..1
- float phi=acos(zvalue);
- // now, generate a random rotation angle for x/y
- float theta=2.0*M_PI*vrot.NextValue();
- float sin_p=sin(phi);
- return Vector(cos(theta)*sin_p,
- sin(theta)*sin_p,
- zvalue);
-
- }
-};
-
-
-
-
-#endif // halton_h
+//========= Copyright Valve Corporation, All rights reserved. ============//
+// $Id$
+
+// halton.h - classes, etc for generating numbers using the Halton pseudo-random sequence. See
+// http://halton-sequences.wikiverse.org/.
+//
+// what this function is useful for is any sort of sampling/integration problem where
+// you want to solve it by random sampling. Each call the NextValue() generates
+// a random number between 0 and 1, in an unclumped manner, so that the space can be more
+// or less evenly sampled with a minimum number of samples.
+//
+// It is NOT useful for generating random numbers dynamically, since the outputs aren't
+// particularly random.
+//
+// To generate multidimensional sample values (points in a plane, etc), use two
+// HaltonSequenceGenerator_t's, with different (primes) bases.
+
+#ifndef HALTON_H
+#define HALTON_H
+
+#include <tier0/platform.h>
+#include <mathlib/vector.h>
+
+class HaltonSequenceGenerator_t
+{
+ int seed;
+ int base;
+ float fbase; //< base as a float
+
+public:
+ HaltonSequenceGenerator_t(int base); //< base MUST be prime, >=2
+
+ float GetElement(int element);
+
+ inline float NextValue(void)
+ {
+ return GetElement(seed++);
+ }
+
+};
+
+
+class DirectionalSampler_t //< pseudo-random sphere sampling
+{
+ HaltonSequenceGenerator_t zdot;
+ HaltonSequenceGenerator_t vrot;
+public:
+ DirectionalSampler_t(void)
+ : zdot(2),vrot(3)
+ {
+ }
+
+ Vector NextValue(void)
+ {
+ float zvalue=zdot.NextValue();
+ zvalue=2*zvalue-1.0; // map from 0..1 to -1..1
+ float phi=acos(zvalue);
+ // now, generate a random rotation angle for x/y
+ float theta=2.0*M_PI*vrot.NextValue();
+ float sin_p=sin(phi);
+ return Vector(cos(theta)*sin_p,
+ sin(theta)*sin_p,
+ zvalue);
+
+ }
+};
+
+
+
+
+#endif // halton_h
diff --git a/mp/src/public/mathlib/lightdesc.h b/mp/src/public/mathlib/lightdesc.h
index d03e3e19..1096d623 100644
--- a/mp/src/public/mathlib/lightdesc.h
+++ b/mp/src/public/mathlib/lightdesc.h
@@ -1,173 +1,173 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-//===========================================================================//
-
-// light structure definitions.
-#ifndef LIGHTDESC_H
-#define LIGHTDESC_H
-
-#include <mathlib/ssemath.h>
-#include <mathlib/vector.h>
-
-//-----------------------------------------------------------------------------
-// Light structure
-//-----------------------------------------------------------------------------
-
-enum LightType_t
-{
- MATERIAL_LIGHT_DISABLE = 0,
- MATERIAL_LIGHT_POINT,
- MATERIAL_LIGHT_DIRECTIONAL,
- MATERIAL_LIGHT_SPOT,
-};
-
-enum LightType_OptimizationFlags_t
-{
- LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0 = 1,
- LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1 = 2,
- LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2 = 4,
- LIGHTTYPE_OPTIMIZATIONFLAGS_DERIVED_VALUES_CALCED = 8,
-};
-
-struct LightDesc_t
-{
- LightType_t m_Type; //< MATERIAL_LIGHT_xxx
- Vector m_Color; //< color+intensity
- Vector m_Position; //< light source center position
- Vector m_Direction; //< for SPOT, direction it is pointing
- float m_Range; //< distance range for light.0=infinite
- float m_Falloff; //< angular falloff exponent for spot lights
- float m_Attenuation0; //< constant distance falloff term
- float m_Attenuation1; //< linear term of falloff
- float m_Attenuation2; //< quadatic term of falloff
- float m_Theta; //< inner cone angle. no angular falloff
- //< within this cone
- float m_Phi; //< outer cone angle
-
- // the values below are derived from the above settings for optimizations
- // These aren't used by DX8. . used for software lighting.
- float m_ThetaDot;
- float m_PhiDot;
- unsigned int m_Flags;
-protected:
- float OneOver_ThetaDot_Minus_PhiDot;
- float m_RangeSquared;
-public:
-
- void RecalculateDerivedValues(void); // calculate m_xxDot, m_Type for changed parms
-
- LightDesc_t(void)
- {
- }
-
- // constructors for various useful subtypes
-
- // a point light with infinite range
- LightDesc_t( const Vector &pos, const Vector &color )
- {
- InitPoint( pos, color );
- }
-
- /// a simple light. cone boundaries in radians. you pass a look_at point and the
- /// direciton is derived from that.
- LightDesc_t( const Vector &pos, const Vector &color, const Vector &point_at,
- float inner_cone_boundary, float outer_cone_boundary )
- {
- InitSpot( pos, color, point_at, inner_cone_boundary, outer_cone_boundary );
- }
-
- void InitPoint( const Vector &pos, const Vector &color );
- void InitDirectional( const Vector &dir, const Vector &color );
- void InitSpot(const Vector &pos, const Vector &color, const Vector &point_at,
- float inner_cone_boundary, float outer_cone_boundary );
-
- /// Given 4 points and 4 normals, ADD lighting from this light into "color".
- void ComputeLightAtPoints( const FourVectors &pos, const FourVectors &normal,
- FourVectors &color, bool DoHalfLambert=false ) const;
- void ComputeNonincidenceLightAtPoints( const FourVectors &pos, FourVectors &color ) const;
- void ComputeLightAtPointsForDirectional( const FourVectors &pos,
- const FourVectors &normal,
- FourVectors &color, bool DoHalfLambert=false ) const;
-
- // warning - modifies color!!! set color first!!
- void SetupOldStyleAttenuation( float fQuadatricAttn, float fLinearAttn, float fConstantAttn );
-
- void SetupNewStyleAttenuation( float fFiftyPercentDistance, float fZeroPercentDistance );
-
-
-/// given a direction relative to the light source position, is this ray within the
- /// light cone (for spotlights..non spots consider all rays to be within their cone)
- bool IsDirectionWithinLightCone(const Vector &rdir) const
- {
- return ((m_Type!=MATERIAL_LIGHT_SPOT) || (rdir.Dot(m_Direction)>=m_PhiDot));
- }
-
- float OneOverThetaDotMinusPhiDot() const
- {
- return OneOver_ThetaDot_Minus_PhiDot;
- }
-};
-
-
-//-----------------------------------------------------------------------------
-// a point light with infinite range
-//-----------------------------------------------------------------------------
-inline void LightDesc_t::InitPoint( const Vector &pos, const Vector &color )
-{
- m_Type=MATERIAL_LIGHT_POINT;
- m_Color=color;
- m_Position=pos;
- m_Range=0.0; // infinite
- m_Attenuation0=1.0;
- m_Attenuation1=0;
- m_Attenuation2=0;
- RecalculateDerivedValues();
-}
-
-
-//-----------------------------------------------------------------------------
-// a directional light with infinite range
-//-----------------------------------------------------------------------------
-inline void LightDesc_t::InitDirectional( const Vector &dir, const Vector &color )
-{
- m_Type=MATERIAL_LIGHT_DIRECTIONAL;
- m_Color=color;
- m_Direction=dir;
- m_Range=0.0; // infinite
- m_Attenuation0=1.0;
- m_Attenuation1=0;
- m_Attenuation2=0;
- RecalculateDerivedValues();
-}
-
-
-//-----------------------------------------------------------------------------
-// a simple light. cone boundaries in radians. you pass a look_at point and the
-// direciton is derived from that.
-//-----------------------------------------------------------------------------
-inline void LightDesc_t::InitSpot(const Vector &pos, const Vector &color, const Vector &point_at,
- float inner_cone_boundary, float outer_cone_boundary)
-{
- m_Type=MATERIAL_LIGHT_SPOT;
- m_Color=color;
- m_Position=pos;
- m_Direction=point_at;
- m_Direction-=pos;
- VectorNormalizeFast(m_Direction);
- m_Falloff=5.0; // linear angle falloff
- m_Theta=inner_cone_boundary;
- m_Phi=outer_cone_boundary;
-
- m_Range=0.0; // infinite
-
- m_Attenuation0=1.0;
- m_Attenuation1=0;
- m_Attenuation2=0;
- RecalculateDerivedValues();
-}
-
-
-#endif
-
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+//===========================================================================//
+
+// light structure definitions.
+#ifndef LIGHTDESC_H
+#define LIGHTDESC_H
+
+#include <mathlib/ssemath.h>
+#include <mathlib/vector.h>
+
+//-----------------------------------------------------------------------------
+// Light structure
+//-----------------------------------------------------------------------------
+
+enum LightType_t
+{
+ MATERIAL_LIGHT_DISABLE = 0,
+ MATERIAL_LIGHT_POINT,
+ MATERIAL_LIGHT_DIRECTIONAL,
+ MATERIAL_LIGHT_SPOT,
+};
+
+enum LightType_OptimizationFlags_t
+{
+ LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0 = 1,
+ LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1 = 2,
+ LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2 = 4,
+ LIGHTTYPE_OPTIMIZATIONFLAGS_DERIVED_VALUES_CALCED = 8,
+};
+
+struct LightDesc_t
+{
+ LightType_t m_Type; //< MATERIAL_LIGHT_xxx
+ Vector m_Color; //< color+intensity
+ Vector m_Position; //< light source center position
+ Vector m_Direction; //< for SPOT, direction it is pointing
+ float m_Range; //< distance range for light.0=infinite
+ float m_Falloff; //< angular falloff exponent for spot lights
+ float m_Attenuation0; //< constant distance falloff term
+ float m_Attenuation1; //< linear term of falloff
+ float m_Attenuation2; //< quadatic term of falloff
+ float m_Theta; //< inner cone angle. no angular falloff
+ //< within this cone
+ float m_Phi; //< outer cone angle
+
+ // the values below are derived from the above settings for optimizations
+ // These aren't used by DX8. . used for software lighting.
+ float m_ThetaDot;
+ float m_PhiDot;
+ unsigned int m_Flags;
+protected:
+ float OneOver_ThetaDot_Minus_PhiDot;
+ float m_RangeSquared;
+public:
+
+ void RecalculateDerivedValues(void); // calculate m_xxDot, m_Type for changed parms
+
+ LightDesc_t(void)
+ {
+ }
+
+ // constructors for various useful subtypes
+
+ // a point light with infinite range
+ LightDesc_t( const Vector &pos, const Vector &color )
+ {
+ InitPoint( pos, color );
+ }
+
+ /// a simple light. cone boundaries in radians. you pass a look_at point and the
+ /// direciton is derived from that.
+ LightDesc_t( const Vector &pos, const Vector &color, const Vector &point_at,
+ float inner_cone_boundary, float outer_cone_boundary )
+ {
+ InitSpot( pos, color, point_at, inner_cone_boundary, outer_cone_boundary );
+ }
+
+ void InitPoint( const Vector &pos, const Vector &color );
+ void InitDirectional( const Vector &dir, const Vector &color );
+ void InitSpot(const Vector &pos, const Vector &color, const Vector &point_at,
+ float inner_cone_boundary, float outer_cone_boundary );
+
+ /// Given 4 points and 4 normals, ADD lighting from this light into "color".
+ void ComputeLightAtPoints( const FourVectors &pos, const FourVectors &normal,
+ FourVectors &color, bool DoHalfLambert=false ) const;
+ void ComputeNonincidenceLightAtPoints( const FourVectors &pos, FourVectors &color ) const;
+ void ComputeLightAtPointsForDirectional( const FourVectors &pos,
+ const FourVectors &normal,
+ FourVectors &color, bool DoHalfLambert=false ) const;
+
+ // warning - modifies color!!! set color first!!
+ void SetupOldStyleAttenuation( float fQuadatricAttn, float fLinearAttn, float fConstantAttn );
+
+ void SetupNewStyleAttenuation( float fFiftyPercentDistance, float fZeroPercentDistance );
+
+
+/// given a direction relative to the light source position, is this ray within the
+ /// light cone (for spotlights..non spots consider all rays to be within their cone)
+ bool IsDirectionWithinLightCone(const Vector &rdir) const
+ {
+ return ((m_Type!=MATERIAL_LIGHT_SPOT) || (rdir.Dot(m_Direction)>=m_PhiDot));
+ }
+
+ float OneOverThetaDotMinusPhiDot() const
+ {
+ return OneOver_ThetaDot_Minus_PhiDot;
+ }
+};
+
+
+//-----------------------------------------------------------------------------
+// a point light with infinite range
+//-----------------------------------------------------------------------------
+inline void LightDesc_t::InitPoint( const Vector &pos, const Vector &color )
+{
+ m_Type=MATERIAL_LIGHT_POINT;
+ m_Color=color;
+ m_Position=pos;
+ m_Range=0.0; // infinite
+ m_Attenuation0=1.0;
+ m_Attenuation1=0;
+ m_Attenuation2=0;
+ RecalculateDerivedValues();
+}
+
+
+//-----------------------------------------------------------------------------
+// a directional light with infinite range
+//-----------------------------------------------------------------------------
+inline void LightDesc_t::InitDirectional( const Vector &dir, const Vector &color )
+{
+ m_Type=MATERIAL_LIGHT_DIRECTIONAL;
+ m_Color=color;
+ m_Direction=dir;
+ m_Range=0.0; // infinite
+ m_Attenuation0=1.0;
+ m_Attenuation1=0;
+ m_Attenuation2=0;
+ RecalculateDerivedValues();
+}
+
+
+//-----------------------------------------------------------------------------
+// a simple light. cone boundaries in radians. you pass a look_at point and the
+// direciton is derived from that.
+//-----------------------------------------------------------------------------
+inline void LightDesc_t::InitSpot(const Vector &pos, const Vector &color, const Vector &point_at,
+ float inner_cone_boundary, float outer_cone_boundary)
+{
+ m_Type=MATERIAL_LIGHT_SPOT;
+ m_Color=color;
+ m_Position=pos;
+ m_Direction=point_at;
+ m_Direction-=pos;
+ VectorNormalizeFast(m_Direction);
+ m_Falloff=5.0; // linear angle falloff
+ m_Theta=inner_cone_boundary;
+ m_Phi=outer_cone_boundary;
+
+ m_Range=0.0; // infinite
+
+ m_Attenuation0=1.0;
+ m_Attenuation1=0;
+ m_Attenuation2=0;
+ RecalculateDerivedValues();
+}
+
+
+#endif
+
diff --git a/mp/src/public/mathlib/math_pfns.h b/mp/src/public/mathlib/math_pfns.h
index 4436eab5..d43411ce 100644
--- a/mp/src/public/mathlib/math_pfns.h
+++ b/mp/src/public/mathlib/math_pfns.h
@@ -1,80 +1,80 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-//=====================================================================================//
-
-#ifndef _MATH_PFNS_H_
-#define _MATH_PFNS_H_
-
-#if defined( _X360 )
-#include <xboxmath.h>
-#endif
-
-#if !defined( _X360 )
-
-// These globals are initialized by mathlib and redirected based on available fpu features
-extern float (*pfSqrt)(float x);
-extern float (*pfRSqrt)(float x);
-extern float (*pfRSqrtFast)(float x);
-extern void (*pfFastSinCos)(float x, float *s, float *c);
-extern float (*pfFastCos)(float x);
-
-// The following are not declared as macros because they are often used in limiting situations,
-// and sometimes the compiler simply refuses to inline them for some reason
-#define FastSqrt(x) (*pfSqrt)(x)
-#define FastRSqrt(x) (*pfRSqrt)(x)
-#define FastRSqrtFast(x) (*pfRSqrtFast)(x)
-#define FastSinCos(x,s,c) (*pfFastSinCos)(x,s,c)
-#define FastCos(x) (*pfFastCos)(x)
-
-#if defined(__i386__) || defined(_M_IX86)
-// On x86, the inline FPU or SSE sqrt instruction is faster than
-// the overhead of setting up a function call and saving/restoring
-// the FPU or SSE register state and can be scheduled better, too.
-#undef FastSqrt
-#define FastSqrt(x) ::sqrtf(x)
-#endif
-
-#endif // !_X360
-
-#if defined( _X360 )
-
-FORCEINLINE float _VMX_Sqrt( float x )
-{
- return __fsqrts( x );
-}
-
-FORCEINLINE float _VMX_RSqrt( float x )
-{
- float rroot = __frsqrte( x );
-
- // Single iteration NewtonRaphson on reciprocal square root estimate
- return (0.5f * rroot) * (3.0f - (x * rroot) * rroot);
-}
-
-FORCEINLINE float _VMX_RSqrtFast( float x )
-{
- return __frsqrte( x );
-}
-
-FORCEINLINE void _VMX_SinCos( float a, float *pS, float *pC )
-{
- XMScalarSinCos( pS, pC, a );
-}
-
-FORCEINLINE float _VMX_Cos( float a )
-{
- return XMScalarCos( a );
-}
-
-// the 360 has fixed hw and calls directly
-#define FastSqrt(x) _VMX_Sqrt(x)
-#define FastRSqrt(x) _VMX_RSqrt(x)
-#define FastRSqrtFast(x) _VMX_RSqrtFast(x)
-#define FastSinCos(x,s,c) _VMX_SinCos(x,s,c)
-#define FastCos(x) _VMX_Cos(x)
-
-#endif // _X360
-
-#endif // _MATH_PFNS_H_
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+//=====================================================================================//
+
+#ifndef _MATH_PFNS_H_
+#define _MATH_PFNS_H_
+
+#if defined( _X360 )
+#include <xboxmath.h>
+#endif
+
+#if !defined( _X360 )
+
+// These globals are initialized by mathlib and redirected based on available fpu features
+extern float (*pfSqrt)(float x);
+extern float (*pfRSqrt)(float x);
+extern float (*pfRSqrtFast)(float x);
+extern void (*pfFastSinCos)(float x, float *s, float *c);
+extern float (*pfFastCos)(float x);
+
+// The following are not declared as macros because they are often used in limiting situations,
+// and sometimes the compiler simply refuses to inline them for some reason
+#define FastSqrt(x) (*pfSqrt)(x)
+#define FastRSqrt(x) (*pfRSqrt)(x)
+#define FastRSqrtFast(x) (*pfRSqrtFast)(x)
+#define FastSinCos(x,s,c) (*pfFastSinCos)(x,s,c)
+#define FastCos(x) (*pfFastCos)(x)
+
+#if defined(__i386__) || defined(_M_IX86)
+// On x86, the inline FPU or SSE sqrt instruction is faster than
+// the overhead of setting up a function call and saving/restoring
+// the FPU or SSE register state and can be scheduled better, too.
+#undef FastSqrt
+#define FastSqrt(x) ::sqrtf(x)
+#endif
+
+#endif // !_X360
+
+#if defined( _X360 )
+
+FORCEINLINE float _VMX_Sqrt( float x )
+{
+ return __fsqrts( x );
+}
+
+FORCEINLINE float _VMX_RSqrt( float x )
+{
+ float rroot = __frsqrte( x );
+
+ // Single iteration NewtonRaphson on reciprocal square root estimate
+ return (0.5f * rroot) * (3.0f - (x * rroot) * rroot);
+}
+
+FORCEINLINE float _VMX_RSqrtFast( float x )
+{
+ return __frsqrte( x );
+}
+
+FORCEINLINE void _VMX_SinCos( float a, float *pS, float *pC )
+{
+ XMScalarSinCos( pS, pC, a );
+}
+
+FORCEINLINE float _VMX_Cos( float a )
+{
+ return XMScalarCos( a );
+}
+
+// the 360 has fixed hw and calls directly
+#define FastSqrt(x) _VMX_Sqrt(x)
+#define FastRSqrt(x) _VMX_RSqrt(x)
+#define FastRSqrtFast(x) _VMX_RSqrtFast(x)
+#define FastSinCos(x,s,c) _VMX_SinCos(x,s,c)
+#define FastCos(x) _VMX_Cos(x)
+
+#endif // _X360
+
+#endif // _MATH_PFNS_H_
diff --git a/mp/src/public/mathlib/mathlib.h b/mp/src/public/mathlib/mathlib.h
index e1873cd0..f734ae68 100644
--- a/mp/src/public/mathlib/mathlib.h
+++ b/mp/src/public/mathlib/mathlib.h
@@ -1,2186 +1,2186 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-//===========================================================================//
-
-#ifndef MATH_LIB_H
-#define MATH_LIB_H
-
-#include <math.h>
-#include "tier0/basetypes.h"
-#include "tier0/commonmacros.h"
-#include "mathlib/vector.h"
-#include "mathlib/vector2d.h"
-#include "tier0/dbg.h"
-
-#include "mathlib/math_pfns.h"
-
-#if defined(__i386__) || defined(_M_IX86)
-// For MMX intrinsics
-#include <xmmintrin.h>
-#endif
-
-// XXX remove me
-#undef clamp
-
-// Uncomment this to enable FP exceptions in parts of the code.
-// This can help track down FP bugs. However the code is not
-// FP exception clean so this not a turnkey operation.
-//#define FP_EXCEPTIONS_ENABLED
-
-
-#ifdef FP_EXCEPTIONS_ENABLED
-#include <float.h> // For _clearfp and _controlfp_s
-#endif
-
-// FPExceptionDisabler and FPExceptionEnabler taken from my blog post
-// at http://www.altdevblogaday.com/2012/04/20/exceptional-floating-point/
-
-// Declare an object of this type in a scope in order to suppress
-// all floating-point exceptions temporarily. The old exception
-// state will be reset at the end.
-class FPExceptionDisabler
-{
-public:
-#ifdef FP_EXCEPTIONS_ENABLED
- FPExceptionDisabler();
- ~FPExceptionDisabler();
-
-private:
- unsigned int mOldValues;
-#else
- FPExceptionDisabler() {}
- ~FPExceptionDisabler() {}
-#endif
-
-private:
- // Make the copy constructor and assignment operator private
- // and unimplemented to prohibit copying.
- FPExceptionDisabler(const FPExceptionDisabler&);
- FPExceptionDisabler& operator=(const FPExceptionDisabler&);
-};
-
-// Declare an object of this type in a scope in order to enable a
-// specified set of floating-point exceptions temporarily. The old
-// exception state will be reset at the end.
-// This class can be nested.
-class FPExceptionEnabler
-{
-public:
- // Overflow, divide-by-zero, and invalid-operation are the FP
- // exceptions most frequently associated with bugs.
-#ifdef FP_EXCEPTIONS_ENABLED
- FPExceptionEnabler(unsigned int enableBits = _EM_OVERFLOW | _EM_ZERODIVIDE | _EM_INVALID);
- ~FPExceptionEnabler();
-
-private:
- unsigned int mOldValues;
-#else
- FPExceptionEnabler(unsigned int enableBits = 0)
- {
- }
- ~FPExceptionEnabler()
- {
- }
-#endif
-
-private:
- // Make the copy constructor and assignment operator private
- // and unimplemented to prohibit copying.
- FPExceptionEnabler(const FPExceptionEnabler&);
- FPExceptionEnabler& operator=(const FPExceptionEnabler&);
-};
-
-
-
-#ifdef DEBUG // stop crashing edit-and-continue
-FORCEINLINE float clamp( float val, float minVal, float maxVal )
-{
- if ( maxVal < minVal )
- return maxVal;
- else if( val < minVal )
- return minVal;
- else if( val > maxVal )
- return maxVal;
- else
- return val;
-}
-#else // DEBUG
-FORCEINLINE float clamp( float val, float minVal, float maxVal )
-{
-#if defined(__i386__) || defined(_M_IX86)
- _mm_store_ss( &val,
- _mm_min_ss(
- _mm_max_ss(
- _mm_load_ss(&val),
- _mm_load_ss(&minVal) ),
- _mm_load_ss(&maxVal) ) );
-#else
- val = fpmax(minVal, val);
- val = fpmin(maxVal, val);
-#endif
- return val;
-}
-#endif // DEBUG
-
-//
-// Returns a clamped value in the range [min, max].
-//
-template< class T >
-inline T clamp( T const &val, T const &minVal, T const &maxVal )
-{
- if ( maxVal < minVal )
- return maxVal;
- else if( val < minVal )
- return minVal;
- else if( val > maxVal )
- return maxVal;
- else
- return val;
-}
-
-
-// plane_t structure
-// !!! if this is changed, it must be changed in asm code too !!!
-// FIXME: does the asm code even exist anymore?
-// FIXME: this should move to a different file
-struct cplane_t
-{
- Vector normal;
- float dist;
- byte type; // for fast side tests
- byte signbits; // signx + (signy<<1) + (signz<<1)
- byte pad[2];
-
-#ifdef VECTOR_NO_SLOW_OPERATIONS
- cplane_t() {}
-
-private:
- // No copy constructors allowed if we're in optimal mode
- cplane_t(const cplane_t& vOther);
-#endif
-};
-
-// structure offset for asm code
-#define CPLANE_NORMAL_X 0
-#define CPLANE_NORMAL_Y 4
-#define CPLANE_NORMAL_Z 8
-#define CPLANE_DIST 12
-#define CPLANE_TYPE 16
-#define CPLANE_SIGNBITS 17
-#define CPLANE_PAD0 18
-#define CPLANE_PAD1 19
-
-// 0-2 are axial planes
-#define PLANE_X 0
-#define PLANE_Y 1
-#define PLANE_Z 2
-
-// 3-5 are non-axial planes snapped to the nearest
-#define PLANE_ANYX 3
-#define PLANE_ANYY 4
-#define PLANE_ANYZ 5
-
-
-//-----------------------------------------------------------------------------
-// Frustum plane indices.
-// WARNING: there is code that depends on these values
-//-----------------------------------------------------------------------------
-
-enum
-{
- FRUSTUM_RIGHT = 0,
- FRUSTUM_LEFT = 1,
- FRUSTUM_TOP = 2,
- FRUSTUM_BOTTOM = 3,
- FRUSTUM_NEARZ = 4,
- FRUSTUM_FARZ = 5,
- FRUSTUM_NUMPLANES = 6
-};
-
-extern int SignbitsForPlane( cplane_t *out );
-
-class Frustum_t
-{
-public:
- void SetPlane( int i, int nType, const Vector &vecNormal, float dist )
- {
- m_Plane[i].normal = vecNormal;
- m_Plane[i].dist = dist;
- m_Plane[i].type = nType;
- m_Plane[i].signbits = SignbitsForPlane( &m_Plane[i] );
- m_AbsNormal[i].Init( fabs(vecNormal.x), fabs(vecNormal.y), fabs(vecNormal.z) );
- }
-
- inline const cplane_t *GetPlane( int i ) const { return &m_Plane[i]; }
- inline const Vector &GetAbsNormal( int i ) const { return m_AbsNormal[i]; }
-
-private:
- cplane_t m_Plane[FRUSTUM_NUMPLANES];
- Vector m_AbsNormal[FRUSTUM_NUMPLANES];
-};
-
-// Computes Y fov from an X fov and a screen aspect ratio + X from Y
-float CalcFovY( float flFovX, float flScreenAspect );
-float CalcFovX( float flFovY, float flScreenAspect );
-
-// Generate a frustum based on perspective view parameters
-// NOTE: FOV is specified in degrees, as the *full* view angle (not half-angle)
-void GeneratePerspectiveFrustum( const Vector& origin, const QAngle &angles, float flZNear, float flZFar, float flFovX, float flAspectRatio, Frustum_t &frustum );
-void GeneratePerspectiveFrustum( const Vector& origin, const Vector &forward, const Vector &right, const Vector &up, float flZNear, float flZFar, float flFovX, float flFovY, Frustum_t &frustum );
-
-// Cull the world-space bounding box to the specified frustum.
-bool R_CullBox( const Vector& mins, const Vector& maxs, const Frustum_t &frustum );
-bool R_CullBoxSkipNear( const Vector& mins, const Vector& maxs, const Frustum_t &frustum );
-
-struct matrix3x4_t
-{
- matrix3x4_t() {}
- matrix3x4_t(
- float m00, float m01, float m02, float m03,
- float m10, float m11, float m12, float m13,
- float m20, float m21, float m22, float m23 )
- {
- m_flMatVal[0][0] = m00; m_flMatVal[0][1] = m01; m_flMatVal[0][2] = m02; m_flMatVal[0][3] = m03;
- m_flMatVal[1][0] = m10; m_flMatVal[1][1] = m11; m_flMatVal[1][2] = m12; m_flMatVal[1][3] = m13;
- m_flMatVal[2][0] = m20; m_flMatVal[2][1] = m21; m_flMatVal[2][2] = m22; m_flMatVal[2][3] = m23;
- }
-
- //-----------------------------------------------------------------------------
- // Creates a matrix where the X axis = forward
- // the Y axis = left, and the Z axis = up
- //-----------------------------------------------------------------------------
- void Init( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin )
- {
- m_flMatVal[0][0] = xAxis.x; m_flMatVal[0][1] = yAxis.x; m_flMatVal[0][2] = zAxis.x; m_flMatVal[0][3] = vecOrigin.x;
- m_flMatVal[1][0] = xAxis.y; m_flMatVal[1][1] = yAxis.y; m_flMatVal[1][2] = zAxis.y; m_flMatVal[1][3] = vecOrigin.y;
- m_flMatVal[2][0] = xAxis.z; m_flMatVal[2][1] = yAxis.z; m_flMatVal[2][2] = zAxis.z; m_flMatVal[2][3] = vecOrigin.z;
- }
-
- //-----------------------------------------------------------------------------
- // Creates a matrix where the X axis = forward
- // the Y axis = left, and the Z axis = up
- //-----------------------------------------------------------------------------
- matrix3x4_t( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin )
- {
- Init( xAxis, yAxis, zAxis, vecOrigin );
- }
-
- inline void Invalidate( void )
- {
- for (int i = 0; i < 3; i++)
- {
- for (int j = 0; j < 4; j++)
- {
- m_flMatVal[i][j] = VEC_T_NAN;
- }
- }
- }
-
- float *operator[]( int i ) { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
- const float *operator[]( int i ) const { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
- float *Base() { return &m_flMatVal[0][0]; }
- const float *Base() const { return &m_flMatVal[0][0]; }
-
- float m_flMatVal[3][4];
-};
-
-
-#ifndef M_PI
- #define M_PI 3.14159265358979323846 // matches value in gcc v2 math.h
-#endif
-
-#define M_PI_F ((float)(M_PI)) // Shouldn't collide with anything.
-
-// NJS: Inlined to prevent floats from being autopromoted to doubles, as with the old system.
-#ifndef RAD2DEG
- #define RAD2DEG( x ) ( (float)(x) * (float)(180.f / M_PI_F) )
-#endif
-
-#ifndef DEG2RAD
- #define DEG2RAD( x ) ( (float)(x) * (float)(M_PI_F / 180.f) )
-#endif
-
-// Used to represent sides of things like planes.
-#define SIDE_FRONT 0
-#define SIDE_BACK 1
-#define SIDE_ON 2
-#define SIDE_CROSS -2 // necessary for polylib.c
-
-#define ON_VIS_EPSILON 0.01 // necessary for vvis (flow.c) -- again look into moving later!
-#define EQUAL_EPSILON 0.001 // necessary for vbsp (faces.c) -- should look into moving it there?
-
-extern bool s_bMathlibInitialized;
-
-extern const Vector vec3_origin;
-extern const QAngle vec3_angle;
-extern const Quaternion quat_identity;
-extern const Vector vec3_invalid;
-extern const int nanmask;
-
-#define IS_NAN(x) (((*(int *)&x)&nanmask)==nanmask)
-
-FORCEINLINE vec_t DotProduct(const vec_t *v1, const vec_t *v2)
-{
- return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2];
-}
-FORCEINLINE void VectorSubtract(const vec_t *a, const vec_t *b, vec_t *c)
-{
- c[0]=a[0]-b[0];
- c[1]=a[1]-b[1];
- c[2]=a[2]-b[2];
-}
-FORCEINLINE void VectorAdd(const vec_t *a, const vec_t *b, vec_t *c)
-{
- c[0]=a[0]+b[0];
- c[1]=a[1]+b[1];
- c[2]=a[2]+b[2];
-}
-FORCEINLINE void VectorCopy(const vec_t *a, vec_t *b)
-{
- b[0]=a[0];
- b[1]=a[1];
- b[2]=a[2];
-}
-FORCEINLINE void VectorClear(vec_t *a)
-{
- a[0]=a[1]=a[2]=0;
-}
-
-FORCEINLINE float VectorMaximum(const vec_t *v)
-{
- return max( v[0], max( v[1], v[2] ) );
-}
-
-FORCEINLINE float VectorMaximum(const Vector& v)
-{
- return max( v.x, max( v.y, v.z ) );
-}
-
-FORCEINLINE void VectorScale (const float* in, vec_t scale, float* out)
-{
- out[0] = in[0]*scale;
- out[1] = in[1]*scale;
- out[2] = in[2]*scale;
-}
-
-
-// Cannot be forceinline as they have overloads:
-inline void VectorFill(vec_t *a, float b)
-{
- a[0]=a[1]=a[2]=b;
-}
-
-inline void VectorNegate(vec_t *a)
-{
- a[0]=-a[0];
- a[1]=-a[1];
- a[2]=-a[2];
-}
-
-
-//#define VectorMaximum(a) ( max( (a)[0], max( (a)[1], (a)[2] ) ) )
-#define Vector2Clear(x) {(x)[0]=(x)[1]=0;}
-#define Vector2Negate(x) {(x)[0]=-((x)[0]);(x)[1]=-((x)[1]);}
-#define Vector2Copy(a,b) {(b)[0]=(a)[0];(b)[1]=(a)[1];}
-#define Vector2Subtract(a,b,c) {(c)[0]=(a)[0]-(b)[0];(c)[1]=(a)[1]-(b)[1];}
-#define Vector2Add(a,b,c) {(c)[0]=(a)[0]+(b)[0];(c)[1]=(a)[1]+(b)[1];}
-#define Vector2Scale(a,b,c) {(c)[0]=(b)*(a)[0];(c)[1]=(b)*(a)[1];}
-
-// NJS: Some functions in VBSP still need to use these for dealing with mixing vec4's and shorts with vec_t's.
-// remove when no longer needed.
-#define VECTOR_COPY( A, B ) do { (B)[0] = (A)[0]; (B)[1] = (A)[1]; (B)[2]=(A)[2]; } while(0)
-#define DOT_PRODUCT( A, B ) ( (A)[0]*(B)[0] + (A)[1]*(B)[1] + (A)[2]*(B)[2] )
-
-FORCEINLINE void VectorMAInline( const float* start, float scale, const float* direction, float* dest )
-{
- dest[0]=start[0]+direction[0]*scale;
- dest[1]=start[1]+direction[1]*scale;
- dest[2]=start[2]+direction[2]*scale;
-}
-
-FORCEINLINE void VectorMAInline( const Vector& start, float scale, const Vector& direction, Vector& dest )
-{
- dest.x=start.x+direction.x*scale;
- dest.y=start.y+direction.y*scale;
- dest.z=start.z+direction.z*scale;
-}
-
-FORCEINLINE void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest )
-{
- VectorMAInline(start, scale, direction, dest);
-}
-
-FORCEINLINE void VectorMA( const float * start, float scale, const float *direction, float *dest )
-{
- VectorMAInline(start, scale, direction, dest);
-}
-
-
-int VectorCompare (const float *v1, const float *v2);
-
-inline float VectorLength(const float *v)
-{
- return FastSqrt( v[0]*v[0] + v[1]*v[1] + v[2]*v[2] + FLT_EPSILON );
-}
-
-void CrossProduct (const float *v1, const float *v2, float *cross);
-
-qboolean VectorsEqual( const float *v1, const float *v2 );
-
-inline vec_t RoundInt (vec_t in)
-{
- return floor(in + 0.5f);
-}
-
-int Q_log2(int val);
-
-// Math routines done in optimized assembly math package routines
-void inline SinCos( float radians, float *sine, float *cosine )
-{
-#if defined( _X360 )
- XMScalarSinCos( sine, cosine, radians );
-#elif defined( PLATFORM_WINDOWS_PC32 )
- _asm
- {
- fld DWORD PTR [radians]
- fsincos
-
- mov edx, DWORD PTR [cosine]
- mov eax, DWORD PTR [sine]
-
- fstp DWORD PTR [edx]
- fstp DWORD PTR [eax]
- }
-#elif defined( PLATFORM_WINDOWS_PC64 )
- *sine = sin( radians );
- *cosine = cos( radians );
-#elif defined( POSIX )
- register double __cosr, __sinr;
- __asm ("fsincos" : "=t" (__cosr), "=u" (__sinr) : "0" (radians));
-
- *sine = __sinr;
- *cosine = __cosr;
-#endif
-}
-
-#define SIN_TABLE_SIZE 256
-#define FTOIBIAS 12582912.f
-extern float SinCosTable[SIN_TABLE_SIZE];
-
-inline float TableCos( float theta )
-{
- union
- {
- int i;
- float f;
- } ftmp;
-
- // ideally, the following should compile down to: theta * constant + constant, changing any of these constants from defines sometimes fubars this.
- ftmp.f = theta * ( float )( SIN_TABLE_SIZE / ( 2.0f * M_PI ) ) + ( FTOIBIAS + ( SIN_TABLE_SIZE / 4 ) );
- return SinCosTable[ ftmp.i & ( SIN_TABLE_SIZE - 1 ) ];
-}
-
-inline float TableSin( float theta )
-{
- union
- {
- int i;
- float f;
- } ftmp;
-
- // ideally, the following should compile down to: theta * constant + constant
- ftmp.f = theta * ( float )( SIN_TABLE_SIZE / ( 2.0f * M_PI ) ) + FTOIBIAS;
- return SinCosTable[ ftmp.i & ( SIN_TABLE_SIZE - 1 ) ];
-}
-
-template<class T>
-FORCEINLINE T Square( T const &a )
-{
- return a * a;
-}
-
-
-// return the smallest power of two >= x.
-// returns 0 if x == 0 or x > 0x80000000 (ie numbers that would be negative if x was signed)
-// NOTE: the old code took an int, and if you pass in an int of 0x80000000 casted to a uint,
-// you'll get 0x80000000, which is correct for uints, instead of 0, which was correct for ints
-FORCEINLINE uint SmallestPowerOfTwoGreaterOrEqual( uint x )
-{
- x -= 1;
- x |= x >> 1;
- x |= x >> 2;
- x |= x >> 4;
- x |= x >> 8;
- x |= x >> 16;
- return x + 1;
-}
-
-// return the largest power of two <= x. Will return 0 if passed 0
-FORCEINLINE uint LargestPowerOfTwoLessThanOrEqual( uint x )
-{
- if ( x >= 0x80000000 )
- return 0x80000000;
-
- return SmallestPowerOfTwoGreaterOrEqual( x + 1 ) >> 1;
-}
-
-
-// Math routines for optimizing division
-void FloorDivMod (double numer, double denom, int *quotient, int *rem);
-int GreatestCommonDivisor (int i1, int i2);
-
-// Test for FPU denormal mode
-bool IsDenormal( const float &val );
-
-// MOVEMENT INFO
-enum
-{
- PITCH = 0, // up / down
- YAW, // left / right
- ROLL // fall over
-};
-
-void MatrixAngles( const matrix3x4_t & matrix, float *angles ); // !!!!
-void MatrixVectors( const matrix3x4_t &matrix, Vector* pForward, Vector *pRight, Vector *pUp );
-void VectorTransform (const float *in1, const matrix3x4_t & in2, float *out);
-void VectorITransform (const float *in1, const matrix3x4_t & in2, float *out);
-void VectorRotate( const float *in1, const matrix3x4_t & in2, float *out);
-void VectorRotate( const Vector &in1, const QAngle &in2, Vector &out );
-void VectorRotate( const Vector &in1, const Quaternion &in2, Vector &out );
-void VectorIRotate( const float *in1, const matrix3x4_t & in2, float *out);
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-QAngle TransformAnglesToLocalSpace( const QAngle &angles, const matrix3x4_t &parentMatrix );
-QAngle TransformAnglesToWorldSpace( const QAngle &angles, const matrix3x4_t &parentMatrix );
-
-#endif
-
-void MatrixInitialize( matrix3x4_t &mat, const Vector &vecOrigin, const Vector &vecXAxis, const Vector &vecYAxis, const Vector &vecZAxis );
-void MatrixCopy( const matrix3x4_t &in, matrix3x4_t &out );
-void MatrixInvert( const matrix3x4_t &in, matrix3x4_t &out );
-
-// Matrix equality test
-bool MatricesAreEqual( const matrix3x4_t &src1, const matrix3x4_t &src2, float flTolerance = 1e-5 );
-
-void MatrixGetColumn( const matrix3x4_t &in, int column, Vector &out );
-void MatrixSetColumn( const Vector &in, int column, matrix3x4_t &out );
-
-inline void MatrixGetTranslation( const matrix3x4_t &in, Vector &out )
-{
- MatrixGetColumn ( in, 3, out );
-}
-
-inline void MatrixSetTranslation( const Vector &in, matrix3x4_t &out )
-{
- MatrixSetColumn ( in, 3, out );
-}
-
-void MatrixScaleBy ( const float flScale, matrix3x4_t &out );
-void MatrixScaleByZero ( matrix3x4_t &out );
-
-//void DecomposeRotation( const matrix3x4_t &mat, float *out );
-void ConcatRotations (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out);
-void ConcatTransforms (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out);
-
-// For identical interface w/ VMatrix
-inline void MatrixMultiply ( const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out )
-{
- ConcatTransforms( in1, in2, out );
-}
-
-void QuaternionSlerp( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt );
-void QuaternionSlerpNoAlign( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt );
-void QuaternionBlend( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt );
-void QuaternionBlendNoAlign( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt );
-void QuaternionIdentityBlend( const Quaternion &p, float t, Quaternion &qt );
-float QuaternionAngleDiff( const Quaternion &p, const Quaternion &q );
-void QuaternionScale( const Quaternion &p, float t, Quaternion &q );
-void QuaternionAlign( const Quaternion &p, const Quaternion &q, Quaternion &qt );
-float QuaternionDotProduct( const Quaternion &p, const Quaternion &q );
-void QuaternionConjugate( const Quaternion &p, Quaternion &q );
-void QuaternionInvert( const Quaternion &p, Quaternion &q );
-float QuaternionNormalize( Quaternion &q );
-void QuaternionAdd( const Quaternion &p, const Quaternion &q, Quaternion &qt );
-void QuaternionMult( const Quaternion &p, const Quaternion &q, Quaternion &qt );
-void QuaternionMatrix( const Quaternion &q, matrix3x4_t &matrix );
-void QuaternionMatrix( const Quaternion &q, const Vector &pos, matrix3x4_t &matrix );
-void QuaternionAngles( const Quaternion &q, QAngle &angles );
-void AngleQuaternion( const QAngle& angles, Quaternion &qt );
-void QuaternionAngles( const Quaternion &q, RadianEuler &angles );
-void AngleQuaternion( RadianEuler const &angles, Quaternion &qt );
-void QuaternionAxisAngle( const Quaternion &q, Vector &axis, float &angle );
-void AxisAngleQuaternion( const Vector &axis, float angle, Quaternion &q );
-void BasisToQuaternion( const Vector &vecForward, const Vector &vecRight, const Vector &vecUp, Quaternion &q );
-void MatrixQuaternion( const matrix3x4_t &mat, Quaternion &q );
-
-// A couple methods to find the dot product of a vector with a matrix row or column...
-inline float MatrixRowDotProduct( const matrix3x4_t &in1, int row, const Vector& in2 )
-{
- Assert( (row >= 0) && (row < 3) );
- return DotProduct( in1[row], in2.Base() );
-}
-
-inline float MatrixColumnDotProduct( const matrix3x4_t &in1, int col, const Vector& in2 )
-{
- Assert( (col >= 0) && (col < 4) );
- return in1[0][col] * in2[0] + in1[1][col] * in2[1] + in1[2][col] * in2[2];
-}
-
-int __cdecl BoxOnPlaneSide (const float *emins, const float *emaxs, const cplane_t *plane);
-
-inline float anglemod(float a)
-{
- a = (360.f/65536) * ((int)(a*(65536.f/360.0f)) & 65535);
- return a;
-}
-
-// Remap a value in the range [A,B] to [C,D].
-inline float RemapVal( float val, float A, float B, float C, float D)
-{
- if ( A == B )
- return val >= B ? D : C;
- return C + (D - C) * (val - A) / (B - A);
-}
-
-inline float RemapValClamped( float val, float A, float B, float C, float D)
-{
- if ( A == B )
- return val >= B ? D : C;
- float cVal = (val - A) / (B - A);
- cVal = clamp( cVal, 0.0f, 1.0f );
-
- return C + (D - C) * cVal;
-}
-
-// Returns A + (B-A)*flPercent.
-// float Lerp( float flPercent, float A, float B );
-template <class T>
-FORCEINLINE T Lerp( float flPercent, T const &A, T const &B )
-{
- return A + (B - A) * flPercent;
-}
-
-FORCEINLINE float Sqr( float f )
-{
- return f*f;
-}
-
-// 5-argument floating point linear interpolation.
-// FLerp(f1,f2,i1,i2,x)=
-// f1 at x=i1
-// f2 at x=i2
-// smooth lerp between f1 and f2 at x>i1 and x<i2
-// extrapolation for x<i1 or x>i2
-//
-// If you know a function f(x)'s value (f1) at position i1, and its value (f2) at position i2,
-// the function can be linearly interpolated with FLerp(f1,f2,i1,i2,x)
-// i2=i1 will cause a divide by zero.
-static inline float FLerp(float f1, float f2, float i1, float i2, float x)
-{
- return f1+(f2-f1)*(x-i1)/(i2-i1);
-}
-
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-// YWB: Specialization for interpolating euler angles via quaternions...
-template<> FORCEINLINE QAngle Lerp<QAngle>( float flPercent, const QAngle& q1, const QAngle& q2 )
-{
- // Avoid precision errors
- if ( q1 == q2 )
- return q1;
-
- Quaternion src, dest;
-
- // Convert to quaternions
- AngleQuaternion( q1, src );
- AngleQuaternion( q2, dest );
-
- Quaternion result;
-
- // Slerp
- QuaternionSlerp( src, dest, flPercent, result );
-
- // Convert to euler
- QAngle output;
- QuaternionAngles( result, output );
- return output;
-}
-
-#else
-
-#pragma error
-
-// NOTE NOTE: I haven't tested this!! It may not work! Check out interpolatedvar.cpp in the client dll to try it
-template<> FORCEINLINE QAngleByValue Lerp<QAngleByValue>( float flPercent, const QAngleByValue& q1, const QAngleByValue& q2 )
-{
- // Avoid precision errors
- if ( q1 == q2 )
- return q1;
-
- Quaternion src, dest;
-
- // Convert to quaternions
- AngleQuaternion( q1, src );
- AngleQuaternion( q2, dest );
-
- Quaternion result;
-
- // Slerp
- QuaternionSlerp( src, dest, flPercent, result );
-
- // Convert to euler
- QAngleByValue output;
- QuaternionAngles( result, output );
- return output;
-}
-
-#endif // VECTOR_NO_SLOW_OPERATIONS
-
-
-/// Same as swap(), but won't cause problems with std::swap
-template <class T>
-FORCEINLINE void V_swap( T& x, T& y )
-{
- T temp = x;
- x = y;
- y = temp;
-}
-
-template <class T> FORCEINLINE T AVG(T a, T b)
-{
- return (a+b)/2;
-}
-
-// number of elements in an array of static size
-#define NELEMS(x) ARRAYSIZE(x)
-
-// XYZ macro, for printf type functions - ex printf("%f %f %f",XYZ(myvector));
-#define XYZ(v) (v).x,(v).y,(v).z
-
-
-inline float Sign( float x )
-{
- return (x <0.0f) ? -1.0f : 1.0f;
-}
-
-//
-// Clamps the input integer to the given array bounds.
-// Equivalent to the following, but without using any branches:
-//
-// if( n < 0 ) return 0;
-// else if ( n > maxindex ) return maxindex;
-// else return n;
-//
-// This is not always a clear performance win, but when you have situations where a clamped
-// value is thrashing against a boundary this is a big win. (ie, valid, invalid, valid, invalid, ...)
-//
-// Note: This code has been run against all possible integers.
-//
-inline int ClampArrayBounds( int n, unsigned maxindex )
-{
- // mask is 0 if less than 4096, 0xFFFFFFFF if greater than
- unsigned int inrangemask = 0xFFFFFFFF + (((unsigned) n) > maxindex );
- unsigned int lessthan0mask = 0xFFFFFFFF + ( n >= 0 );
-
- // If the result was valid, set the result, (otherwise sets zero)
- int result = (inrangemask & n);
-
- // if the result was out of range or zero.
- result |= ((~inrangemask) & (~lessthan0mask)) & maxindex;
-
- return result;
-}
-
-
-#define BOX_ON_PLANE_SIDE(emins, emaxs, p) \
- (((p)->type < 3)? \
- ( \
- ((p)->dist <= (emins)[(p)->type])? \
- 1 \
- : \
- ( \
- ((p)->dist >= (emaxs)[(p)->type])?\
- 2 \
- : \
- 3 \
- ) \
- ) \
- : \
- BoxOnPlaneSide( (emins), (emaxs), (p)))
-
-//-----------------------------------------------------------------------------
-// FIXME: Vector versions.... the float versions will go away hopefully soon!
-//-----------------------------------------------------------------------------
-
-void AngleVectors (const QAngle& angles, Vector *forward);
-void AngleVectors (const QAngle& angles, Vector *forward, Vector *right, Vector *up);
-void AngleVectorsTranspose (const QAngle& angles, Vector *forward, Vector *right, Vector *up);
-void AngleMatrix (const QAngle &angles, matrix3x4_t &mat );
-void AngleMatrix( const QAngle &angles, const Vector &position, matrix3x4_t &mat );
-void AngleMatrix (const RadianEuler &angles, matrix3x4_t &mat );
-void AngleMatrix( RadianEuler const &angles, const Vector &position, matrix3x4_t &mat );
-void AngleIMatrix (const QAngle &angles, matrix3x4_t &mat );
-void AngleIMatrix (const QAngle &angles, const Vector &position, matrix3x4_t &mat );
-void AngleIMatrix (const RadianEuler &angles, matrix3x4_t &mat );
-void VectorAngles( const Vector &forward, QAngle &angles );
-void VectorAngles( const Vector &forward, const Vector &pseudoup, QAngle &angles );
-void VectorMatrix( const Vector &forward, matrix3x4_t &mat );
-void VectorVectors( const Vector &forward, Vector &right, Vector &up );
-void SetIdentityMatrix( matrix3x4_t &mat );
-void SetScaleMatrix( float x, float y, float z, matrix3x4_t &dst );
-void MatrixBuildRotationAboutAxis( const Vector &vAxisOfRot, float angleDegrees, matrix3x4_t &dst );
-
-inline void SetScaleMatrix( float flScale, matrix3x4_t &dst )
-{
- SetScaleMatrix( flScale, flScale, flScale, dst );
-}
-
-inline void SetScaleMatrix( const Vector& scale, matrix3x4_t &dst )
-{
- SetScaleMatrix( scale.x, scale.y, scale.z, dst );
-}
-
-// Computes the inverse transpose
-void MatrixTranspose( matrix3x4_t& mat );
-void MatrixTranspose( const matrix3x4_t& src, matrix3x4_t& dst );
-void MatrixInverseTranspose( const matrix3x4_t& src, matrix3x4_t& dst );
-
-inline void PositionMatrix( const Vector &position, matrix3x4_t &mat )
-{
- MatrixSetColumn( position, 3, mat );
-}
-
-inline void MatrixPosition( const matrix3x4_t &matrix, Vector &position )
-{
- MatrixGetColumn( matrix, 3, position );
-}
-
-inline void VectorRotate( const Vector& in1, const matrix3x4_t &in2, Vector &out)
-{
- VectorRotate( &in1.x, in2, &out.x );
-}
-
-inline void VectorIRotate( const Vector& in1, const matrix3x4_t &in2, Vector &out)
-{
- VectorIRotate( &in1.x, in2, &out.x );
-}
-
-inline void MatrixAngles( const matrix3x4_t &matrix, QAngle &angles )
-{
- MatrixAngles( matrix, &angles.x );
-}
-
-inline void MatrixAngles( const matrix3x4_t &matrix, QAngle &angles, Vector &position )
-{
- MatrixAngles( matrix, angles );
- MatrixPosition( matrix, position );
-}
-
-inline void MatrixAngles( const matrix3x4_t &matrix, RadianEuler &angles )
-{
- MatrixAngles( matrix, &angles.x );
-
- angles.Init( DEG2RAD( angles.z ), DEG2RAD( angles.x ), DEG2RAD( angles.y ) );
-}
-
-void MatrixAngles( const matrix3x4_t &mat, RadianEuler &angles, Vector &position );
-
-void MatrixAngles( const matrix3x4_t &mat, Quaternion &q, Vector &position );
-
-inline int VectorCompare (const Vector& v1, const Vector& v2)
-{
- return v1 == v2;
-}
-
-inline void VectorTransform (const Vector& in1, const matrix3x4_t &in2, Vector &out)
-{
- VectorTransform( &in1.x, in2, &out.x );
-}
-
-inline void VectorITransform (const Vector& in1, const matrix3x4_t &in2, Vector &out)
-{
- VectorITransform( &in1.x, in2, &out.x );
-}
-
-/*
-inline void DecomposeRotation( const matrix3x4_t &mat, Vector &out )
-{
- DecomposeRotation( mat, &out.x );
-}
-*/
-
-inline int BoxOnPlaneSide (const Vector& emins, const Vector& emaxs, const cplane_t *plane )
-{
- return BoxOnPlaneSide( &emins.x, &emaxs.x, plane );
-}
-
-inline void VectorFill(Vector& a, float b)
-{
- a[0]=a[1]=a[2]=b;
-}
-
-inline void VectorNegate(Vector& a)
-{
- a[0] = -a[0];
- a[1] = -a[1];
- a[2] = -a[2];
-}
-
-inline vec_t VectorAvg(Vector& a)
-{
- return ( a[0] + a[1] + a[2] ) / 3;
-}
-
-//-----------------------------------------------------------------------------
-// Box/plane test (slow version)
-//-----------------------------------------------------------------------------
-inline int FASTCALL BoxOnPlaneSide2 (const Vector& emins, const Vector& emaxs, const cplane_t *p, float tolerance = 0.f )
-{
- Vector corners[2];
-
- if (p->normal[0] < 0)
- {
- corners[0][0] = emins[0];
- corners[1][0] = emaxs[0];
- }
- else
- {
- corners[1][0] = emins[0];
- corners[0][0] = emaxs[0];
- }
-
- if (p->normal[1] < 0)
- {
- corners[0][1] = emins[1];
- corners[1][1] = emaxs[1];
- }
- else
- {
- corners[1][1] = emins[1];
- corners[0][1] = emaxs[1];
- }
-
- if (p->normal[2] < 0)
- {
- corners[0][2] = emins[2];
- corners[1][2] = emaxs[2];
- }
- else
- {
- corners[1][2] = emins[2];
- corners[0][2] = emaxs[2];
- }
-
- int sides = 0;
-
- float dist1 = DotProduct (p->normal, corners[0]) - p->dist;
- if (dist1 >= tolerance)
- sides = 1;
-
- float dist2 = DotProduct (p->normal, corners[1]) - p->dist;
- if (dist2 < -tolerance)
- sides |= 2;
-
- return sides;
-}
-
-//-----------------------------------------------------------------------------
-// Helpers for bounding box construction
-//-----------------------------------------------------------------------------
-
-void ClearBounds (Vector& mins, Vector& maxs);
-void AddPointToBounds (const Vector& v, Vector& mins, Vector& maxs);
-
-//
-// COLORSPACE/GAMMA CONVERSION STUFF
-//
-void BuildGammaTable( float gamma, float texGamma, float brightness, int overbright );
-
-// convert texture to linear 0..1 value
-inline float TexLightToLinear( int c, int exponent )
-{
- extern float power2_n[256];
- Assert( exponent >= -128 && exponent <= 127 );
- return ( float )c * power2_n[exponent+128];
-}
-
-
-// convert texture to linear 0..1 value
-int LinearToTexture( float f );
-// converts 0..1 linear value to screen gamma (0..255)
-int LinearToScreenGamma( float f );
-float TextureToLinear( int c );
-
-// compressed color format
-struct ColorRGBExp32
-{
- byte r, g, b;
- signed char exponent;
-};
-
-void ColorRGBExp32ToVector( const ColorRGBExp32& in, Vector& out );
-void VectorToColorRGBExp32( const Vector& v, ColorRGBExp32 &c );
-
-// solve for "x" where "a x^2 + b x + c = 0", return true if solution exists
-bool SolveQuadratic( float a, float b, float c, float &root1, float &root2 );
-
-// solves for "a, b, c" where "a x^2 + b x + c = y", return true if solution exists
-bool SolveInverseQuadratic( float x1, float y1, float x2, float y2, float x3, float y3, float &a, float &b, float &c );
-
-// solves for a,b,c specified as above, except that it always creates a monotonically increasing or
-// decreasing curve if the data is monotonically increasing or decreasing. In order to enforce the
-// monoticity condition, it is possible that the resulting quadratic will only approximate the data
-// instead of interpolating it. This code is not especially fast.
-bool SolveInverseQuadraticMonotonic( float x1, float y1, float x2, float y2,
- float x3, float y3, float &a, float &b, float &c );
-
-
-
-
-// solves for "a, b, c" where "1/(a x^2 + b x + c ) = y", return true if solution exists
-bool SolveInverseReciprocalQuadratic( float x1, float y1, float x2, float y2, float x3, float y3, float &a, float &b, float &c );
-
-// rotate a vector around the Z axis (YAW)
-void VectorYawRotate( const Vector& in, float flYaw, Vector &out);
-
-
-// Bias takes an X value between 0 and 1 and returns another value between 0 and 1
-// The curve is biased towards 0 or 1 based on biasAmt, which is between 0 and 1.
-// Lower values of biasAmt bias the curve towards 0 and higher values bias it towards 1.
-//
-// For example, with biasAmt = 0.2, the curve looks like this:
-//
-// 1
-// | *
-// | *
-// | *
-// | **
-// | **
-// | ****
-// |*********
-// |___________________
-// 0 1
-//
-//
-// With biasAmt = 0.8, the curve looks like this:
-//
-// 1
-// | **************
-// | **
-// | *
-// | *
-// |*
-// |*
-// |*
-// |___________________
-// 0 1
-//
-// With a biasAmt of 0.5, Bias returns X.
-float Bias( float x, float biasAmt );
-
-
-// Gain is similar to Bias, but biasAmt biases towards or away from 0.5.
-// Lower bias values bias towards 0.5 and higher bias values bias away from it.
-//
-// For example, with biasAmt = 0.2, the curve looks like this:
-//
-// 1
-// | *
-// | *
-// | **
-// | ***************
-// | **
-// | *
-// |*
-// |___________________
-// 0 1
-//
-//
-// With biasAmt = 0.8, the curve looks like this:
-//
-// 1
-// | *****
-// | ***
-// | *
-// | *
-// | *
-// | ***
-// |*****
-// |___________________
-// 0 1
-float Gain( float x, float biasAmt );
-
-
-// SmoothCurve maps a 0-1 value into another 0-1 value based on a cosine wave
-// where the derivatives of the function at 0 and 1 (and 0.5) are 0. This is useful for
-// any fadein/fadeout effect where it should start and end smoothly.
-//
-// The curve looks like this:
-//
-// 1
-// | **
-// | * *
-// | * *
-// | * *
-// | * *
-// | ** **
-// |*** ***
-// |___________________
-// 0 1
-//
-float SmoothCurve( float x );
-
-
-// This works like SmoothCurve, with two changes:
-//
-// 1. Instead of the curve peaking at 0.5, it will peak at flPeakPos.
-// (So if you specify flPeakPos=0.2, then the peak will slide to the left).
-//
-// 2. flPeakSharpness is a 0-1 value controlling the sharpness of the peak.
-// Low values blunt the peak and high values sharpen the peak.
-float SmoothCurve_Tweak( float x, float flPeakPos=0.5, float flPeakSharpness=0.5 );
-
-
-//float ExponentialDecay( float halflife, float dt );
-//float ExponentialDecay( float decayTo, float decayTime, float dt );
-
-// halflife is time for value to reach 50%
-inline float ExponentialDecay( float halflife, float dt )
-{
- // log(0.5) == -0.69314718055994530941723212145818
- return expf( -0.69314718f / halflife * dt);
-}
-
-// decayTo is factor the value should decay to in decayTime
-inline float ExponentialDecay( float decayTo, float decayTime, float dt )
-{
- return expf( logf( decayTo ) / decayTime * dt);
-}
-
-// Get the integrated distanced traveled
-// decayTo is factor the value should decay to in decayTime
-// dt is the time relative to the last velocity update
-inline float ExponentialDecayIntegral( float decayTo, float decayTime, float dt )
-{
- return (powf( decayTo, dt / decayTime) * decayTime - decayTime) / logf( decayTo );
-}
-
-// hermite basis function for smooth interpolation
-// Similar to Gain() above, but very cheap to call
-// value should be between 0 & 1 inclusive
-inline float SimpleSpline( float value )
-{
- float valueSquared = value * value;
-
- // Nice little ease-in, ease-out spline-like curve
- return (3 * valueSquared - 2 * valueSquared * value);
-}
-
-// remaps a value in [startInterval, startInterval+rangeInterval] from linear to
-// spline using SimpleSpline
-inline float SimpleSplineRemapVal( float val, float A, float B, float C, float D)
-{
- if ( A == B )
- return val >= B ? D : C;
- float cVal = (val - A) / (B - A);
- return C + (D - C) * SimpleSpline( cVal );
-}
-
-// remaps a value in [startInterval, startInterval+rangeInterval] from linear to
-// spline using SimpleSpline
-inline float SimpleSplineRemapValClamped( float val, float A, float B, float C, float D )
-{
- if ( A == B )
- return val >= B ? D : C;
- float cVal = (val - A) / (B - A);
- cVal = clamp( cVal, 0.0f, 1.0f );
- return C + (D - C) * SimpleSpline( cVal );
-}
-
-FORCEINLINE int RoundFloatToInt(float f)
-{
-#if defined(__i386__) || defined(_M_IX86) || defined( PLATFORM_WINDOWS_PC64 )
- return _mm_cvtss_si32(_mm_load_ss(&f));
-#elif defined( _X360 )
-#ifdef Assert
- Assert( IsFPUControlWordSet() );
-#endif
- union
- {
- double flResult;
- int pResult[2];
- };
- flResult = __fctiw( f );
- return pResult[1];
-#else
-#error Unknown architecture
-#endif
-}
-
-FORCEINLINE unsigned char RoundFloatToByte(float f)
-{
- int nResult = RoundFloatToInt(f);
-#ifdef Assert
- Assert( (nResult & ~0xFF) == 0 );
-#endif
- return (unsigned char) nResult;
-}
-
-FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f)
-{
-#if defined( _X360 )
-#ifdef Assert
- Assert( IsFPUControlWordSet() );
-#endif
- union
- {
- double flResult;
- int pIntResult[2];
- unsigned long pResult[2];
- };
- flResult = __fctiw( f );
- Assert( pIntResult[1] >= 0 );
- return pResult[1];
-#else // !X360
-
-#if defined( PLATFORM_WINDOWS_PC64 )
- uint nRet = ( uint ) f;
- if ( nRet & 1 )
- {
- if ( ( f - floor( f ) >= 0.5 ) )
- {
- nRet++;
- }
- }
- else
- {
- if ( ( f - floor( f ) > 0.5 ) )
- {
- nRet++;
- }
- }
- return nRet;
-#else // PLATFORM_WINDOWS_PC64
- unsigned char nResult[8];
-
- #if defined( _WIN32 )
- __asm
- {
- fld f
- fistp qword ptr nResult
- }
- #elif POSIX
- __asm __volatile__ (
- "fistpl %0;": "=m" (nResult): "t" (f) : "st"
- );
- #endif
-
- return *((unsigned long*)nResult);
-#endif // PLATFORM_WINDOWS_PC64
-#endif // !X360
-}
-
-FORCEINLINE bool IsIntegralValue( float flValue, float flTolerance = 0.001f )
-{
- return fabs( RoundFloatToInt( flValue ) - flValue ) < flTolerance;
-}
-
-// Fast, accurate ftol:
-FORCEINLINE int Float2Int( float a )
-{
-#if defined( _X360 )
- union
- {
- double flResult;
- int pResult[2];
- };
- flResult = __fctiwz( a );
- return pResult[1];
-#else // !X360
- // Rely on compiler to generate CVTTSS2SI on x86
- return (int) a;
-#endif
-}
-
-// Over 15x faster than: (int)floor(value)
-inline int Floor2Int( float a )
-{
- int RetVal;
-#if defined( __i386__ )
- // Convert to int and back, compare, subtract one if too big
- __m128 a128 = _mm_set_ss(a);
- RetVal = _mm_cvtss_si32(a128);
- __m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal);
- RetVal -= _mm_comigt_ss( rounded128, a128 );
-#else
- RetVal = static_cast<int>( floor(a) );
-#endif
- return RetVal;
-}
-
-//-----------------------------------------------------------------------------
-// Fast color conversion from float to unsigned char
-//-----------------------------------------------------------------------------
-FORCEINLINE unsigned int FastFToC( float c )
-{
-#if defined( __i386__ )
- // IEEE float bit manipulation works for values between [0, 1<<23)
- union { float f; int i; } convert = { c*255.0f + (float)(1<<23) };
- return convert.i & 255;
-#else
- // consoles CPUs suffer from load-hit-store penalty
- return Float2Int( c * 255.0f );
-#endif
-}
-
-//-----------------------------------------------------------------------------
-// Fast conversion from float to integer with magnitude less than 2**22
-//-----------------------------------------------------------------------------
-FORCEINLINE int FastFloatToSmallInt( float c )
-{
-#if defined( __i386__ )
- // IEEE float bit manipulation works for values between [-1<<22, 1<<22)
- union { float f; int i; } convert = { c + (float)(3<<22) };
- return (convert.i & ((1<<23)-1)) - (1<<22);
-#else
- // consoles CPUs suffer from load-hit-store penalty
- return Float2Int( c );
-#endif
-}
-
-//-----------------------------------------------------------------------------
-// Purpose: Bound input float to .001 (millisecond) boundary
-// Input : in -
-// Output : inline float
-//-----------------------------------------------------------------------------
-inline float ClampToMsec( float in )
-{
- int msec = Floor2Int( in * 1000.0f + 0.5f );
- return 0.001f * msec;
-}
-
-// Over 15x faster than: (int)ceil(value)
-inline int Ceil2Int( float a )
-{
- int RetVal;
-#if defined( __i386__ )
- // Convert to int and back, compare, add one if too small
- __m128 a128 = _mm_load_ss(&a);
- RetVal = _mm_cvtss_si32(a128);
- __m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal);
- RetVal += _mm_comilt_ss( rounded128, a128 );
-#else
- RetVal = static_cast<int>( ceil(a) );
-#endif
- return RetVal;
-}
-
-
-// Regular signed area of triangle
-#define TriArea2D( A, B, C ) \
- ( 0.5f * ( ( B.x - A.x ) * ( C.y - A.y ) - ( B.y - A.y ) * ( C.x - A.x ) ) )
-
-// This version doesn't premultiply by 0.5f, so it's the area of the rectangle instead
-#define TriArea2DTimesTwo( A, B, C ) \
- ( ( ( B.x - A.x ) * ( C.y - A.y ) - ( B.y - A.y ) * ( C.x - A.x ) ) )
-
-
-// Get the barycentric coordinates of "pt" in triangle [A,B,C].
-inline void GetBarycentricCoords2D(
- Vector2D const &A,
- Vector2D const &B,
- Vector2D const &C,
- Vector2D const &pt,
- float bcCoords[3] )
-{
- // Note, because to top and bottom are both x2, the issue washes out in the composite
- float invTriArea = 1.0f / TriArea2DTimesTwo( A, B, C );
-
- // NOTE: We assume here that the lightmap coordinate vertices go counterclockwise.
- // If not, TriArea2D() is negated so this works out right.
- bcCoords[0] = TriArea2DTimesTwo( B, C, pt ) * invTriArea;
- bcCoords[1] = TriArea2DTimesTwo( C, A, pt ) * invTriArea;
- bcCoords[2] = TriArea2DTimesTwo( A, B, pt ) * invTriArea;
-}
-
-
-// Return true of the sphere might touch the box (the sphere is actually treated
-// like a box itself, so this may return true if the sphere's bounding box touches
-// a corner of the box but the sphere itself doesn't).
-inline bool QuickBoxSphereTest(
- const Vector& vOrigin,
- float flRadius,
- const Vector& bbMin,
- const Vector& bbMax )
-{
- return vOrigin.x - flRadius < bbMax.x && vOrigin.x + flRadius > bbMin.x &&
- vOrigin.y - flRadius < bbMax.y && vOrigin.y + flRadius > bbMin.y &&
- vOrigin.z - flRadius < bbMax.z && vOrigin.z + flRadius > bbMin.z;
-}
-
-
-// Return true of the boxes intersect (but not if they just touch).
-inline bool QuickBoxIntersectTest(
- const Vector& vBox1Min,
- const Vector& vBox1Max,
- const Vector& vBox2Min,
- const Vector& vBox2Max )
-{
- return
- vBox1Min.x < vBox2Max.x && vBox1Max.x > vBox2Min.x &&
- vBox1Min.y < vBox2Max.y && vBox1Max.y > vBox2Min.y &&
- vBox1Min.z < vBox2Max.z && vBox1Max.z > vBox2Min.z;
-}
-
-
-extern float GammaToLinearFullRange( float gamma );
-extern float LinearToGammaFullRange( float linear );
-extern float GammaToLinear( float gamma );
-extern float LinearToGamma( float linear );
-
-extern float SrgbGammaToLinear( float flSrgbGammaValue );
-extern float SrgbLinearToGamma( float flLinearValue );
-extern float X360GammaToLinear( float fl360GammaValue );
-extern float X360LinearToGamma( float flLinearValue );
-extern float SrgbGammaTo360Gamma( float flSrgbGammaValue );
-
-// linear (0..4) to screen corrected vertex space (0..1?)
-FORCEINLINE float LinearToVertexLight( float f )
-{
- extern float lineartovertex[4096];
-
- // Gotta clamp before the multiply; could overflow...
- // assume 0..4 range
- int i = RoundFloatToInt( f * 1024.f );
-
- // Presumably the comman case will be not to clamp, so check that first:
- if( (unsigned)i > 4095 )
- {
- if ( i < 0 )
- i = 0; // Compare to zero instead of 4095 to save 4 bytes in the instruction stream
- else
- i = 4095;
- }
-
- return lineartovertex[i];
-}
-
-
-FORCEINLINE unsigned char LinearToLightmap( float f )
-{
- extern unsigned char lineartolightmap[4096];
-
- // Gotta clamp before the multiply; could overflow...
- int i = RoundFloatToInt( f * 1024.f ); // assume 0..4 range
-
- // Presumably the comman case will be not to clamp, so check that first:
- if ( (unsigned)i > 4095 )
- {
- if ( i < 0 )
- i = 0; // Compare to zero instead of 4095 to save 4 bytes in the instruction stream
- else
- i = 4095;
- }
-
- return lineartolightmap[i];
-}
-
-FORCEINLINE void ColorClamp( Vector& color )
-{
- float maxc = max( color.x, max( color.y, color.z ) );
- if ( maxc > 1.0f )
- {
- float ooMax = 1.0f / maxc;
- color.x *= ooMax;
- color.y *= ooMax;
- color.z *= ooMax;
- }
-
- if ( color[0] < 0.f ) color[0] = 0.f;
- if ( color[1] < 0.f ) color[1] = 0.f;
- if ( color[2] < 0.f ) color[2] = 0.f;
-}
-
-inline void ColorClampTruncate( Vector& color )
-{
- if (color[0] > 1.0f) color[0] = 1.0f; else if (color[0] < 0.0f) color[0] = 0.0f;
- if (color[1] > 1.0f) color[1] = 1.0f; else if (color[1] < 0.0f) color[1] = 0.0f;
- if (color[2] > 1.0f) color[2] = 1.0f; else if (color[2] < 0.0f) color[2] = 0.0f;
-}
-
-// Interpolate a Catmull-Rom spline.
-// t is a [0,1] value and interpolates a curve between p2 and p3.
-void Catmull_Rom_Spline(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector &output );
-
-// Interpolate a Catmull-Rom spline.
-// Returns the tangent of the point at t of the spline
-void Catmull_Rom_Spline_Tangent(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector &output );
-
-// area under the curve [0..t]
-void Catmull_Rom_Spline_Integral(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-// area under the curve [0..1]
-void Catmull_Rom_Spline_Integral(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- Vector& output );
-
-// Interpolate a Catmull-Rom spline.
-// Normalize p2->p1 and p3->p4 to be the same length as p2->p3
-void Catmull_Rom_Spline_Normalize(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector &output );
-
-// area under the curve [0..t]
-// Normalize p2->p1 and p3->p4 to be the same length as p2->p3
-void Catmull_Rom_Spline_Integral_Normalize(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-// Interpolate a Catmull-Rom spline.
-// Normalize p2.x->p1.x and p3.x->p4.x to be the same length as p2.x->p3.x
-void Catmull_Rom_Spline_NormalizeX(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector &output );
-
-// area under the curve [0..t]
-void Catmull_Rom_Spline_NormalizeX(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-// Interpolate a Hermite spline.
-// t is a [0,1] value and interpolates a curve between p1 and p2 with the deltas d1 and d2.
-void Hermite_Spline(
- const Vector &p1,
- const Vector &p2,
- const Vector &d1,
- const Vector &d2,
- float t,
- Vector& output );
-
-float Hermite_Spline(
- float p1,
- float p2,
- float d1,
- float d2,
- float t );
-
-// t is a [0,1] value and interpolates a curve between p1 and p2 with the slopes p0->p1 and p1->p2
-void Hermite_Spline(
- const Vector &p0,
- const Vector &p1,
- const Vector &p2,
- float t,
- Vector& output );
-
-float Hermite_Spline(
- float p0,
- float p1,
- float p2,
- float t );
-
-
-void Hermite_SplineBasis( float t, float basis[] );
-
-void Hermite_Spline(
- const Quaternion &q0,
- const Quaternion &q1,
- const Quaternion &q2,
- float t,
- Quaternion &output );
-
-
-// See http://en.wikipedia.org/wiki/Kochanek-Bartels_curves
-//
-// Tension: -1 = Round -> 1 = Tight
-// Bias: -1 = Pre-shoot (bias left) -> 1 = Post-shoot (bias right)
-// Continuity: -1 = Box corners -> 1 = Inverted corners
-//
-// If T=B=C=0 it's the same matrix as Catmull-Rom.
-// If T=1 & B=C=0 it's the same as Cubic.
-// If T=B=0 & C=-1 it's just linear interpolation
-//
-// See http://news.povray.org/povray.binaries.tutorials/attachment/%[email protected]%3E/Splines.bas.txt
-// for example code and descriptions of various spline types...
-//
-void Kochanek_Bartels_Spline(
- float tension,
- float bias,
- float continuity,
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-void Kochanek_Bartels_Spline_NormalizeX(
- float tension,
- float bias,
- float continuity,
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-// See link at Kochanek_Bartels_Spline for info on the basis matrix used
-void Cubic_Spline(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-void Cubic_Spline_NormalizeX(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-// See link at Kochanek_Bartels_Spline for info on the basis matrix used
-void BSpline(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-void BSpline_NormalizeX(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-// See link at Kochanek_Bartels_Spline for info on the basis matrix used
-void Parabolic_Spline(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-void Parabolic_Spline_NormalizeX(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-// quintic interpolating polynomial from Perlin.
-// 0->0, 1->1, smooth-in between with smooth tangents
-FORCEINLINE float QuinticInterpolatingPolynomial(float t)
-{
- // 6t^5-15t^4+10t^3
- return t * t * t *( t * ( t* 6.0 - 15.0 ) + 10.0 );
-}
-
-// given a table of sorted tabulated positions, return the two indices and blendfactor to linear
-// interpolate. Does a search. Can be used to find the blend value to interpolate between
-// keyframes.
-void GetInterpolationData( float const *pKnotPositions,
- float const *pKnotValues,
- int nNumValuesinList,
- int nInterpolationRange,
- float flPositionToInterpolateAt,
- bool bWrap,
- float *pValueA,
- float *pValueB,
- float *pInterpolationValue);
-
-float RangeCompressor( float flValue, float flMin, float flMax, float flBase );
-
-// Get the minimum distance from vOrigin to the bounding box defined by [mins,maxs]
-// using voronoi regions.
-// 0 is returned if the origin is inside the box.
-float CalcSqrDistanceToAABB( const Vector &mins, const Vector &maxs, const Vector &point );
-void CalcClosestPointOnAABB( const Vector &mins, const Vector &maxs, const Vector &point, Vector &closestOut );
-void CalcSqrDistAndClosestPointOnAABB( const Vector &mins, const Vector &maxs, const Vector &point, Vector &closestOut, float &distSqrOut );
-
-inline float CalcDistanceToAABB( const Vector &mins, const Vector &maxs, const Vector &point )
-{
- float flDistSqr = CalcSqrDistanceToAABB( mins, maxs, point );
- return sqrt(flDistSqr);
-}
-
-// Get the closest point from P to the (infinite) line through vLineA and vLineB and
-// calculate the shortest distance from P to the line.
-// If you pass in a value for t, it will tell you the t for (A + (B-A)t) to get the closest point.
-// If the closest point lies on the segment between A and B, then 0 <= t <= 1.
-void CalcClosestPointOnLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, Vector &vClosest, float *t=0 );
-float CalcDistanceToLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 );
-float CalcDistanceSqrToLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 );
-
-// The same three functions as above, except now the line is closed between A and B.
-void CalcClosestPointOnLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, Vector &vClosest, float *t=0 );
-float CalcDistanceToLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 );
-float CalcDistanceSqrToLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 );
-
-// A function to compute the closes line segment connnection two lines (or false if the lines are parallel, etc.)
-bool CalcLineToLineIntersectionSegment(
- const Vector& p1,const Vector& p2,const Vector& p3,const Vector& p4,Vector *s1,Vector *s2,
- float *t1, float *t2 );
-
-// The above functions in 2D
-void CalcClosestPointOnLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, Vector2D &vClosest, float *t=0 );
-float CalcDistanceToLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 );
-float CalcDistanceSqrToLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 );
-void CalcClosestPointOnLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, Vector2D &vClosest, float *t=0 );
-float CalcDistanceToLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 );
-float CalcDistanceSqrToLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 );
-
-// Init the mathlib
-void MathLib_Init( float gamma = 2.2f, float texGamma = 2.2f, float brightness = 0.0f, int overbright = 2.0f, bool bAllow3DNow = true, bool bAllowSSE = true, bool bAllowSSE2 = true, bool bAllowMMX = true );
-bool MathLib_3DNowEnabled( void );
-bool MathLib_MMXEnabled( void );
-bool MathLib_SSEEnabled( void );
-bool MathLib_SSE2Enabled( void );
-
-float Approach( float target, float value, float speed );
-float ApproachAngle( float target, float value, float speed );
-float AngleDiff( float destAngle, float srcAngle );
-float AngleDistance( float next, float cur );
-float AngleNormalize( float angle );
-
-// ensure that 0 <= angle <= 360
-float AngleNormalizePositive( float angle );
-
-bool AnglesAreEqual( float a, float b, float tolerance = 0.0f );
-
-
-void RotationDeltaAxisAngle( const QAngle &srcAngles, const QAngle &destAngles, Vector &deltaAxis, float &deltaAngle );
-void RotationDelta( const QAngle &srcAngles, const QAngle &destAngles, QAngle *out );
-
-void ComputeTrianglePlane( const Vector& v1, const Vector& v2, const Vector& v3, Vector& normal, float& intercept );
-int PolyFromPlane( Vector *outVerts, const Vector& normal, float dist, float fHalfScale = 9000.0f );
-int ClipPolyToPlane( Vector *inVerts, int vertCount, Vector *outVerts, const Vector& normal, float dist, float fOnPlaneEpsilon = 0.1f );
-int ClipPolyToPlane_Precise( double *inVerts, int vertCount, double *outVerts, const double *normal, double dist, double fOnPlaneEpsilon = 0.1 );
-
-//-----------------------------------------------------------------------------
-// Computes a reasonable tangent space for a triangle
-//-----------------------------------------------------------------------------
-void CalcTriangleTangentSpace( const Vector &p0, const Vector &p1, const Vector &p2,
- const Vector2D &t0, const Vector2D &t1, const Vector2D& t2,
- Vector &sVect, Vector &tVect );
-
-//-----------------------------------------------------------------------------
-// Transforms a AABB into another space; which will inherently grow the box.
-//-----------------------------------------------------------------------------
-void TransformAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut );
-
-//-----------------------------------------------------------------------------
-// Uses the inverse transform of in1
-//-----------------------------------------------------------------------------
-void ITransformAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut );
-
-//-----------------------------------------------------------------------------
-// Rotates a AABB into another space; which will inherently grow the box.
-// (same as TransformAABB, but doesn't take the translation into account)
-//-----------------------------------------------------------------------------
-void RotateAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut );
-
-//-----------------------------------------------------------------------------
-// Uses the inverse transform of in1
-//-----------------------------------------------------------------------------
-void IRotateAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut );
-
-//-----------------------------------------------------------------------------
-// Transform a plane
-//-----------------------------------------------------------------------------
-inline void MatrixTransformPlane( const matrix3x4_t &src, const cplane_t &inPlane, cplane_t &outPlane )
-{
- // What we want to do is the following:
- // 1) transform the normal into the new space.
- // 2) Determine a point on the old plane given by plane dist * plane normal
- // 3) Transform that point into the new space
- // 4) Plane dist = DotProduct( new normal, new point )
-
- // An optimized version, which works if the plane is orthogonal.
- // 1) Transform the normal into the new space
- // 2) Realize that transforming the old plane point into the new space
- // is given by [ d * n'x + Tx, d * n'y + Ty, d * n'z + Tz ]
- // where d = old plane dist, n' = transformed normal, Tn = translational component of transform
- // 3) Compute the new plane dist using the dot product of the normal result of #2
-
- // For a correct result, this should be an inverse-transpose matrix
- // but that only matters if there are nonuniform scale or skew factors in this matrix.
- VectorRotate( inPlane.normal, src, outPlane.normal );
- outPlane.dist = inPlane.dist * DotProduct( outPlane.normal, outPlane.normal );
- outPlane.dist += outPlane.normal.x * src[0][3] + outPlane.normal.y * src[1][3] + outPlane.normal.z * src[2][3];
-}
-
-inline void MatrixITransformPlane( const matrix3x4_t &src, const cplane_t &inPlane, cplane_t &outPlane )
-{
- // The trick here is that Tn = translational component of transform,
- // but for an inverse transform, Tn = - R^-1 * T
- Vector vecTranslation;
- MatrixGetColumn( src, 3, vecTranslation );
-
- Vector vecInvTranslation;
- VectorIRotate( vecTranslation, src, vecInvTranslation );
-
- VectorIRotate( inPlane.normal, src, outPlane.normal );
- outPlane.dist = inPlane.dist * DotProduct( outPlane.normal, outPlane.normal );
- outPlane.dist -= outPlane.normal.x * vecInvTranslation[0] + outPlane.normal.y * vecInvTranslation[1] + outPlane.normal.z * vecInvTranslation[2];
-}
-
-int CeilPow2( int in );
-int FloorPow2( int in );
-
-FORCEINLINE float * UnpackNormal_HEND3N( const unsigned int *pPackedNormal, float *pNormal )
-{
- int temp[3];
- temp[0] = ((*pPackedNormal >> 0L) & 0x7ff);
- if ( temp[0] & 0x400 )
- {
- temp[0] = 2048 - temp[0];
- }
- temp[1] = ((*pPackedNormal >> 11L) & 0x7ff);
- if ( temp[1] & 0x400 )
- {
- temp[1] = 2048 - temp[1];
- }
- temp[2] = ((*pPackedNormal >> 22L) & 0x3ff);
- if ( temp[2] & 0x200 )
- {
- temp[2] = 1024 - temp[2];
- }
- pNormal[0] = (float)temp[0] * 1.0f/1023.0f;
- pNormal[1] = (float)temp[1] * 1.0f/1023.0f;
- pNormal[2] = (float)temp[2] * 1.0f/511.0f;
- return pNormal;
-}
-
-FORCEINLINE unsigned int * PackNormal_HEND3N( const float *pNormal, unsigned int *pPackedNormal )
-{
- int temp[3];
-
- temp[0] = Float2Int( pNormal[0] * 1023.0f );
- temp[1] = Float2Int( pNormal[1] * 1023.0f );
- temp[2] = Float2Int( pNormal[2] * 511.0f );
-
- // the normal is out of bounds, determine the source and fix
- // clamping would be even more of a slowdown here
- Assert( temp[0] >= -1023 && temp[0] <= 1023 );
- Assert( temp[1] >= -1023 && temp[1] <= 1023 );
- Assert( temp[2] >= -511 && temp[2] <= 511 );
-
- *pPackedNormal = ( ( temp[2] & 0x3ff ) << 22L ) |
- ( ( temp[1] & 0x7ff ) << 11L ) |
- ( ( temp[0] & 0x7ff ) << 0L );
- return pPackedNormal;
-}
-
-FORCEINLINE unsigned int * PackNormal_HEND3N( float nx, float ny, float nz, unsigned int *pPackedNormal )
-{
- int temp[3];
-
- temp[0] = Float2Int( nx * 1023.0f );
- temp[1] = Float2Int( ny * 1023.0f );
- temp[2] = Float2Int( nz * 511.0f );
-
- // the normal is out of bounds, determine the source and fix
- // clamping would be even more of a slowdown here
- Assert( temp[0] >= -1023 && temp[0] <= 1023 );
- Assert( temp[1] >= -1023 && temp[1] <= 1023 );
- Assert( temp[2] >= -511 && temp[2] <= 511 );
-
- *pPackedNormal = ( ( temp[2] & 0x3ff ) << 22L ) |
- ( ( temp[1] & 0x7ff ) << 11L ) |
- ( ( temp[0] & 0x7ff ) << 0L );
- return pPackedNormal;
-}
-
-FORCEINLINE float * UnpackNormal_SHORT2( const unsigned int *pPackedNormal, float *pNormal, bool bIsTangent = FALSE )
-{
- // Unpacks from Jason's 2-short format (fills in a 4th binormal-sign (+1/-1) value, if this is a tangent vector)
-
- // FIXME: short math is slow on 360 - use ints here instead (bit-twiddle to deal w/ the sign bits)
- short iX = (*pPackedNormal & 0x0000FFFF);
- short iY = (*pPackedNormal & 0xFFFF0000) >> 16;
-
- float zSign = +1;
- if ( iX < 0 )
- {
- zSign = -1;
- iX = -iX;
- }
- float tSign = +1;
- if ( iY < 0 )
- {
- tSign = -1;
- iY = -iY;
- }
-
- pNormal[0] = ( iX - 16384.0f ) / 16384.0f;
- pNormal[1] = ( iY - 16384.0f ) / 16384.0f;
- pNormal[2] = zSign*sqrtf( 1.0f - ( pNormal[0]*pNormal[0] + pNormal[1]*pNormal[1] ) );
- if ( bIsTangent )
- {
- pNormal[3] = tSign;
- }
-
- return pNormal;
-}
-
-FORCEINLINE unsigned int * PackNormal_SHORT2( float nx, float ny, float nz, unsigned int *pPackedNormal, float binormalSign = +1.0f )
-{
- // Pack a vector (ASSUMED TO BE NORMALIZED) into Jason's 4-byte (SHORT2) format.
- // This simply reconstructs Z from X & Y. It uses the sign bits of the X & Y coords
- // to reconstruct the sign of Z and, if this is a tangent vector, the sign of the
- // binormal (this is needed because tangent/binormal vectors are supposed to follow
- // UV gradients, but shaders reconstruct the binormal from the tangent and normal
- // assuming that they form a right-handed basis).
-
- nx += 1; // [-1,+1] -> [0,2]
- ny += 1;
- nx *= 16384.0f; // [ 0, 2] -> [0,32768]
- ny *= 16384.0f;
-
- // '0' and '32768' values are invalid encodings
- nx = max( nx, 1.0f ); // Make sure there are no zero values
- ny = max( ny, 1.0f );
- nx = min( nx, 32767.0f ); // Make sure there are no 32768 values
- ny = min( ny, 32767.0f );
-
- if ( nz < 0.0f )
- nx = -nx; // Set the sign bit for z
-
- ny *= binormalSign; // Set the sign bit for the binormal (use when encoding a tangent vector)
-
- // FIXME: short math is slow on 360 - use ints here instead (bit-twiddle to deal w/ the sign bits), also use Float2Int()
- short sX = (short)nx; // signed short [1,32767]
- short sY = (short)ny;
-
- *pPackedNormal = ( sX & 0x0000FFFF ) | ( sY << 16 ); // NOTE: The mask is necessary (if sX is negative and cast to an int...)
-
- return pPackedNormal;
-}
-
-FORCEINLINE unsigned int * PackNormal_SHORT2( const float *pNormal, unsigned int *pPackedNormal, float binormalSign = +1.0f )
-{
- return PackNormal_SHORT2( pNormal[0], pNormal[1], pNormal[2], pPackedNormal, binormalSign );
-}
-
-// Unpacks a UBYTE4 normal (for a tangent, the result's fourth component receives the binormal 'sign')
-FORCEINLINE float * UnpackNormal_UBYTE4( const unsigned int *pPackedNormal, float *pNormal, bool bIsTangent = FALSE )
-{
- unsigned char cX, cY;
- if ( bIsTangent )
- {
- cX = *pPackedNormal >> 16; // Unpack Z
- cY = *pPackedNormal >> 24; // Unpack W
- }
- else
- {
- cX = *pPackedNormal >> 0; // Unpack X
- cY = *pPackedNormal >> 8; // Unpack Y
- }
-
- float x = cX - 128.0f;
- float y = cY - 128.0f;
- float z;
-
- float zSignBit = x < 0 ? 1.0f : 0.0f; // z and t negative bits (like slt asm instruction)
- float tSignBit = y < 0 ? 1.0f : 0.0f;
- float zSign = -( 2*zSignBit - 1 ); // z and t signs
- float tSign = -( 2*tSignBit - 1 );
-
- x = x*zSign - zSignBit; // 0..127
- y = y*tSign - tSignBit;
- x = x - 64; // -64..63
- y = y - 64;
-
- float xSignBit = x < 0 ? 1.0f : 0.0f; // x and y negative bits (like slt asm instruction)
- float ySignBit = y < 0 ? 1.0f : 0.0f;
- float xSign = -( 2*xSignBit - 1 ); // x and y signs
- float ySign = -( 2*ySignBit - 1 );
-
- x = ( x*xSign - xSignBit ) / 63.0f; // 0..1 range
- y = ( y*ySign - ySignBit ) / 63.0f;
- z = 1.0f - x - y;
-
- float oolen = 1.0f / sqrt( x*x + y*y + z*z ); // Normalize and
- x *= oolen * xSign; // Recover signs
- y *= oolen * ySign;
- z *= oolen * zSign;
-
- pNormal[0] = x;
- pNormal[1] = y;
- pNormal[2] = z;
- if ( bIsTangent )
- {
- pNormal[3] = tSign;
- }
-
- return pNormal;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// See: http://www.oroboro.com/rafael/docserv.php/index/programming/article/unitv2
-//
-// UBYTE4 encoding, using per-octant projection onto x+y+z=1
-// Assume input vector is already unit length
-//
-// binormalSign specifies 'sign' of binormal, stored in t sign bit of tangent
-// (lets the shader know whether norm/tan/bin form a right-handed basis)
-//
-// bIsTangent is used to specify which WORD of the output to store the data
-// The expected usage is to call once with the normal and once with
-// the tangent and binormal sign flag, bitwise OR'ing the returned DWORDs
-FORCEINLINE unsigned int * PackNormal_UBYTE4( float nx, float ny, float nz, unsigned int *pPackedNormal, bool bIsTangent = false, float binormalSign = +1.0f )
-{
- float xSign = nx < 0.0f ? -1.0f : 1.0f; // -1 or 1 sign
- float ySign = ny < 0.0f ? -1.0f : 1.0f;
- float zSign = nz < 0.0f ? -1.0f : 1.0f;
- float tSign = binormalSign;
- Assert( ( binormalSign == +1.0f ) || ( binormalSign == -1.0f ) );
-
- float xSignBit = 0.5f*( 1 - xSign ); // [-1,+1] -> [1,0]
- float ySignBit = 0.5f*( 1 - ySign ); // 1 is negative bit (like slt instruction)
- float zSignBit = 0.5f*( 1 - zSign );
- float tSignBit = 0.5f*( 1 - binormalSign );
-
- float absX = xSign*nx; // 0..1 range (abs)
- float absY = ySign*ny;
- float absZ = zSign*nz;
-
- float xbits = absX / ( absX + absY + absZ ); // Project onto x+y+z=1 plane
- float ybits = absY / ( absX + absY + absZ );
-
- xbits *= 63; // 0..63
- ybits *= 63;
-
- xbits = xbits * xSign - xSignBit; // -64..63 range
- ybits = ybits * ySign - ySignBit;
- xbits += 64.0f; // 0..127 range
- ybits += 64.0f;
-
- xbits = xbits * zSign - zSignBit; // Negate based on z and t
- ybits = ybits * tSign - tSignBit; // -128..127 range
-
- xbits += 128.0f; // 0..255 range
- ybits += 128.0f;
-
- unsigned char cX = (unsigned char) xbits;
- unsigned char cY = (unsigned char) ybits;
-
- if ( !bIsTangent )
- *pPackedNormal = (cX << 0) | (cY << 8); // xy for normal
- else
- *pPackedNormal = (cX << 16) | (cY << 24); // zw for tangent
-
- return pPackedNormal;
-}
-
-FORCEINLINE unsigned int * PackNormal_UBYTE4( const float *pNormal, unsigned int *pPackedNormal, bool bIsTangent = false, float binormalSign = +1.0f )
-{
- return PackNormal_UBYTE4( pNormal[0], pNormal[1], pNormal[2], pPackedNormal, bIsTangent, binormalSign );
-}
-
-
-//-----------------------------------------------------------------------------
-// Convert RGB to HSV
-//-----------------------------------------------------------------------------
-void RGBtoHSV( const Vector &rgb, Vector &hsv );
-
-
-//-----------------------------------------------------------------------------
-// Convert HSV to RGB
-//-----------------------------------------------------------------------------
-void HSVtoRGB( const Vector &hsv, Vector &rgb );
-
-
-//-----------------------------------------------------------------------------
-// Fast version of pow and log
-//-----------------------------------------------------------------------------
-
-float FastLog2(float i); // log2( i )
-float FastPow2(float i); // 2^i
-float FastPow(float a, float b); // a^b
-float FastPow10( float i ); // 10^i
-
-//-----------------------------------------------------------------------------
-// For testing float equality
-//-----------------------------------------------------------------------------
-
-inline bool CloseEnough( float a, float b, float epsilon = EQUAL_EPSILON )
-{
- return fabs( a - b ) <= epsilon;
-}
-
-inline bool CloseEnough( const Vector &a, const Vector &b, float epsilon = EQUAL_EPSILON )
-{
- return fabs( a.x - b.x ) <= epsilon &&
- fabs( a.y - b.y ) <= epsilon &&
- fabs( a.z - b.z ) <= epsilon;
-}
-
-// Fast compare
-// maxUlps is the maximum error in terms of Units in the Last Place. This
-// specifies how big an error we are willing to accept in terms of the value
-// of the least significant digit of the floating point number�s
-// representation. maxUlps can also be interpreted in terms of how many
-// representable floats we are willing to accept between A and B.
-// This function will allow maxUlps-1 floats between A and B.
-bool AlmostEqual(float a, float b, int maxUlps = 10);
-
-inline bool AlmostEqual( const Vector &a, const Vector &b, int maxUlps = 10)
-{
- return AlmostEqual( a.x, b.x, maxUlps ) &&
- AlmostEqual( a.y, b.y, maxUlps ) &&
- AlmostEqual( a.z, b.z, maxUlps );
-}
-
-
-#endif // MATH_BASE_H
-
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+//===========================================================================//
+
+#ifndef MATH_LIB_H
+#define MATH_LIB_H
+
+#include <math.h>
+#include "tier0/basetypes.h"
+#include "tier0/commonmacros.h"
+#include "mathlib/vector.h"
+#include "mathlib/vector2d.h"
+#include "tier0/dbg.h"
+
+#include "mathlib/math_pfns.h"
+
+#if defined(__i386__) || defined(_M_IX86)
+// For MMX intrinsics
+#include <xmmintrin.h>
+#endif
+
+// XXX remove me
+#undef clamp
+
+// Uncomment this to enable FP exceptions in parts of the code.
+// This can help track down FP bugs. However the code is not
+// FP exception clean so this not a turnkey operation.
+//#define FP_EXCEPTIONS_ENABLED
+
+
+#ifdef FP_EXCEPTIONS_ENABLED
+#include <float.h> // For _clearfp and _controlfp_s
+#endif
+
+// FPExceptionDisabler and FPExceptionEnabler taken from my blog post
+// at http://www.altdevblogaday.com/2012/04/20/exceptional-floating-point/
+
+// Declare an object of this type in a scope in order to suppress
+// all floating-point exceptions temporarily. The old exception
+// state will be reset at the end.
+class FPExceptionDisabler
+{
+public:
+#ifdef FP_EXCEPTIONS_ENABLED
+ FPExceptionDisabler();
+ ~FPExceptionDisabler();
+
+private:
+ unsigned int mOldValues;
+#else
+ FPExceptionDisabler() {}
+ ~FPExceptionDisabler() {}
+#endif
+
+private:
+ // Make the copy constructor and assignment operator private
+ // and unimplemented to prohibit copying.
+ FPExceptionDisabler(const FPExceptionDisabler&);
+ FPExceptionDisabler& operator=(const FPExceptionDisabler&);
+};
+
+// Declare an object of this type in a scope in order to enable a
+// specified set of floating-point exceptions temporarily. The old
+// exception state will be reset at the end.
+// This class can be nested.
+class FPExceptionEnabler
+{
+public:
+ // Overflow, divide-by-zero, and invalid-operation are the FP
+ // exceptions most frequently associated with bugs.
+#ifdef FP_EXCEPTIONS_ENABLED
+ FPExceptionEnabler(unsigned int enableBits = _EM_OVERFLOW | _EM_ZERODIVIDE | _EM_INVALID);
+ ~FPExceptionEnabler();
+
+private:
+ unsigned int mOldValues;
+#else
+ FPExceptionEnabler(unsigned int enableBits = 0)
+ {
+ }
+ ~FPExceptionEnabler()
+ {
+ }
+#endif
+
+private:
+ // Make the copy constructor and assignment operator private
+ // and unimplemented to prohibit copying.
+ FPExceptionEnabler(const FPExceptionEnabler&);
+ FPExceptionEnabler& operator=(const FPExceptionEnabler&);
+};
+
+
+
+#ifdef DEBUG // stop crashing edit-and-continue
+FORCEINLINE float clamp( float val, float minVal, float maxVal )
+{
+ if ( maxVal < minVal )
+ return maxVal;
+ else if( val < minVal )
+ return minVal;
+ else if( val > maxVal )
+ return maxVal;
+ else
+ return val;
+}
+#else // DEBUG
+FORCEINLINE float clamp( float val, float minVal, float maxVal )
+{
+#if defined(__i386__) || defined(_M_IX86)
+ _mm_store_ss( &val,
+ _mm_min_ss(
+ _mm_max_ss(
+ _mm_load_ss(&val),
+ _mm_load_ss(&minVal) ),
+ _mm_load_ss(&maxVal) ) );
+#else
+ val = fpmax(minVal, val);
+ val = fpmin(maxVal, val);
+#endif
+ return val;
+}
+#endif // DEBUG
+
+//
+// Returns a clamped value in the range [min, max].
+//
+template< class T >
+inline T clamp( T const &val, T const &minVal, T const &maxVal )
+{
+ if ( maxVal < minVal )
+ return maxVal;
+ else if( val < minVal )
+ return minVal;
+ else if( val > maxVal )
+ return maxVal;
+ else
+ return val;
+}
+
+
+// plane_t structure
+// !!! if this is changed, it must be changed in asm code too !!!
+// FIXME: does the asm code even exist anymore?
+// FIXME: this should move to a different file
+struct cplane_t
+{
+ Vector normal;
+ float dist;
+ byte type; // for fast side tests
+ byte signbits; // signx + (signy<<1) + (signz<<1)
+ byte pad[2];
+
+#ifdef VECTOR_NO_SLOW_OPERATIONS
+ cplane_t() {}
+
+private:
+ // No copy constructors allowed if we're in optimal mode
+ cplane_t(const cplane_t& vOther);
+#endif
+};
+
+// structure offset for asm code
+#define CPLANE_NORMAL_X 0
+#define CPLANE_NORMAL_Y 4
+#define CPLANE_NORMAL_Z 8
+#define CPLANE_DIST 12
+#define CPLANE_TYPE 16
+#define CPLANE_SIGNBITS 17
+#define CPLANE_PAD0 18
+#define CPLANE_PAD1 19
+
+// 0-2 are axial planes
+#define PLANE_X 0
+#define PLANE_Y 1
+#define PLANE_Z 2
+
+// 3-5 are non-axial planes snapped to the nearest
+#define PLANE_ANYX 3
+#define PLANE_ANYY 4
+#define PLANE_ANYZ 5
+
+
+//-----------------------------------------------------------------------------
+// Frustum plane indices.
+// WARNING: there is code that depends on these values
+//-----------------------------------------------------------------------------
+
+enum
+{
+ FRUSTUM_RIGHT = 0,
+ FRUSTUM_LEFT = 1,
+ FRUSTUM_TOP = 2,
+ FRUSTUM_BOTTOM = 3,
+ FRUSTUM_NEARZ = 4,
+ FRUSTUM_FARZ = 5,
+ FRUSTUM_NUMPLANES = 6
+};
+
+extern int SignbitsForPlane( cplane_t *out );
+
+class Frustum_t
+{
+public:
+ void SetPlane( int i, int nType, const Vector &vecNormal, float dist )
+ {
+ m_Plane[i].normal = vecNormal;
+ m_Plane[i].dist = dist;
+ m_Plane[i].type = nType;
+ m_Plane[i].signbits = SignbitsForPlane( &m_Plane[i] );
+ m_AbsNormal[i].Init( fabs(vecNormal.x), fabs(vecNormal.y), fabs(vecNormal.z) );
+ }
+
+ inline const cplane_t *GetPlane( int i ) const { return &m_Plane[i]; }
+ inline const Vector &GetAbsNormal( int i ) const { return m_AbsNormal[i]; }
+
+private:
+ cplane_t m_Plane[FRUSTUM_NUMPLANES];
+ Vector m_AbsNormal[FRUSTUM_NUMPLANES];
+};
+
+// Computes Y fov from an X fov and a screen aspect ratio + X from Y
+float CalcFovY( float flFovX, float flScreenAspect );
+float CalcFovX( float flFovY, float flScreenAspect );
+
+// Generate a frustum based on perspective view parameters
+// NOTE: FOV is specified in degrees, as the *full* view angle (not half-angle)
+void GeneratePerspectiveFrustum( const Vector& origin, const QAngle &angles, float flZNear, float flZFar, float flFovX, float flAspectRatio, Frustum_t &frustum );
+void GeneratePerspectiveFrustum( const Vector& origin, const Vector &forward, const Vector &right, const Vector &up, float flZNear, float flZFar, float flFovX, float flFovY, Frustum_t &frustum );
+
+// Cull the world-space bounding box to the specified frustum.
+bool R_CullBox( const Vector& mins, const Vector& maxs, const Frustum_t &frustum );
+bool R_CullBoxSkipNear( const Vector& mins, const Vector& maxs, const Frustum_t &frustum );
+
+struct matrix3x4_t
+{
+ matrix3x4_t() {}
+ matrix3x4_t(
+ float m00, float m01, float m02, float m03,
+ float m10, float m11, float m12, float m13,
+ float m20, float m21, float m22, float m23 )
+ {
+ m_flMatVal[0][0] = m00; m_flMatVal[0][1] = m01; m_flMatVal[0][2] = m02; m_flMatVal[0][3] = m03;
+ m_flMatVal[1][0] = m10; m_flMatVal[1][1] = m11; m_flMatVal[1][2] = m12; m_flMatVal[1][3] = m13;
+ m_flMatVal[2][0] = m20; m_flMatVal[2][1] = m21; m_flMatVal[2][2] = m22; m_flMatVal[2][3] = m23;
+ }
+
+ //-----------------------------------------------------------------------------
+ // Creates a matrix where the X axis = forward
+ // the Y axis = left, and the Z axis = up
+ //-----------------------------------------------------------------------------
+ void Init( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin )
+ {
+ m_flMatVal[0][0] = xAxis.x; m_flMatVal[0][1] = yAxis.x; m_flMatVal[0][2] = zAxis.x; m_flMatVal[0][3] = vecOrigin.x;
+ m_flMatVal[1][0] = xAxis.y; m_flMatVal[1][1] = yAxis.y; m_flMatVal[1][2] = zAxis.y; m_flMatVal[1][3] = vecOrigin.y;
+ m_flMatVal[2][0] = xAxis.z; m_flMatVal[2][1] = yAxis.z; m_flMatVal[2][2] = zAxis.z; m_flMatVal[2][3] = vecOrigin.z;
+ }
+
+ //-----------------------------------------------------------------------------
+ // Creates a matrix where the X axis = forward
+ // the Y axis = left, and the Z axis = up
+ //-----------------------------------------------------------------------------
+ matrix3x4_t( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin )
+ {
+ Init( xAxis, yAxis, zAxis, vecOrigin );
+ }
+
+ inline void Invalidate( void )
+ {
+ for (int i = 0; i < 3; i++)
+ {
+ for (int j = 0; j < 4; j++)
+ {
+ m_flMatVal[i][j] = VEC_T_NAN;
+ }
+ }
+ }
+
+ float *operator[]( int i ) { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
+ const float *operator[]( int i ) const { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
+ float *Base() { return &m_flMatVal[0][0]; }
+ const float *Base() const { return &m_flMatVal[0][0]; }
+
+ float m_flMatVal[3][4];
+};
+
+
+#ifndef M_PI
+ #define M_PI 3.14159265358979323846 // matches value in gcc v2 math.h
+#endif
+
+#define M_PI_F ((float)(M_PI)) // Shouldn't collide with anything.
+
+// NJS: Inlined to prevent floats from being autopromoted to doubles, as with the old system.
+#ifndef RAD2DEG
+ #define RAD2DEG( x ) ( (float)(x) * (float)(180.f / M_PI_F) )
+#endif
+
+#ifndef DEG2RAD
+ #define DEG2RAD( x ) ( (float)(x) * (float)(M_PI_F / 180.f) )
+#endif
+
+// Used to represent sides of things like planes.
+#define SIDE_FRONT 0
+#define SIDE_BACK 1
+#define SIDE_ON 2
+#define SIDE_CROSS -2 // necessary for polylib.c
+
+#define ON_VIS_EPSILON 0.01 // necessary for vvis (flow.c) -- again look into moving later!
+#define EQUAL_EPSILON 0.001 // necessary for vbsp (faces.c) -- should look into moving it there?
+
+extern bool s_bMathlibInitialized;
+
+extern const Vector vec3_origin;
+extern const QAngle vec3_angle;
+extern const Quaternion quat_identity;
+extern const Vector vec3_invalid;
+extern const int nanmask;
+
+#define IS_NAN(x) (((*(int *)&x)&nanmask)==nanmask)
+
+FORCEINLINE vec_t DotProduct(const vec_t *v1, const vec_t *v2)
+{
+ return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2];
+}
+FORCEINLINE void VectorSubtract(const vec_t *a, const vec_t *b, vec_t *c)
+{
+ c[0]=a[0]-b[0];
+ c[1]=a[1]-b[1];
+ c[2]=a[2]-b[2];
+}
+FORCEINLINE void VectorAdd(const vec_t *a, const vec_t *b, vec_t *c)
+{
+ c[0]=a[0]+b[0];
+ c[1]=a[1]+b[1];
+ c[2]=a[2]+b[2];
+}
+FORCEINLINE void VectorCopy(const vec_t *a, vec_t *b)
+{
+ b[0]=a[0];
+ b[1]=a[1];
+ b[2]=a[2];
+}
+FORCEINLINE void VectorClear(vec_t *a)
+{
+ a[0]=a[1]=a[2]=0;
+}
+
+FORCEINLINE float VectorMaximum(const vec_t *v)
+{
+ return max( v[0], max( v[1], v[2] ) );
+}
+
+FORCEINLINE float VectorMaximum(const Vector& v)
+{
+ return max( v.x, max( v.y, v.z ) );
+}
+
+FORCEINLINE void VectorScale (const float* in, vec_t scale, float* out)
+{
+ out[0] = in[0]*scale;
+ out[1] = in[1]*scale;
+ out[2] = in[2]*scale;
+}
+
+
+// Cannot be forceinline as they have overloads:
+inline void VectorFill(vec_t *a, float b)
+{
+ a[0]=a[1]=a[2]=b;
+}
+
+inline void VectorNegate(vec_t *a)
+{
+ a[0]=-a[0];
+ a[1]=-a[1];
+ a[2]=-a[2];
+}
+
+
+//#define VectorMaximum(a) ( max( (a)[0], max( (a)[1], (a)[2] ) ) )
+#define Vector2Clear(x) {(x)[0]=(x)[1]=0;}
+#define Vector2Negate(x) {(x)[0]=-((x)[0]);(x)[1]=-((x)[1]);}
+#define Vector2Copy(a,b) {(b)[0]=(a)[0];(b)[1]=(a)[1];}
+#define Vector2Subtract(a,b,c) {(c)[0]=(a)[0]-(b)[0];(c)[1]=(a)[1]-(b)[1];}
+#define Vector2Add(a,b,c) {(c)[0]=(a)[0]+(b)[0];(c)[1]=(a)[1]+(b)[1];}
+#define Vector2Scale(a,b,c) {(c)[0]=(b)*(a)[0];(c)[1]=(b)*(a)[1];}
+
+// NJS: Some functions in VBSP still need to use these for dealing with mixing vec4's and shorts with vec_t's.
+// remove when no longer needed.
+#define VECTOR_COPY( A, B ) do { (B)[0] = (A)[0]; (B)[1] = (A)[1]; (B)[2]=(A)[2]; } while(0)
+#define DOT_PRODUCT( A, B ) ( (A)[0]*(B)[0] + (A)[1]*(B)[1] + (A)[2]*(B)[2] )
+
+FORCEINLINE void VectorMAInline( const float* start, float scale, const float* direction, float* dest )
+{
+ dest[0]=start[0]+direction[0]*scale;
+ dest[1]=start[1]+direction[1]*scale;
+ dest[2]=start[2]+direction[2]*scale;
+}
+
+FORCEINLINE void VectorMAInline( const Vector& start, float scale, const Vector& direction, Vector& dest )
+{
+ dest.x=start.x+direction.x*scale;
+ dest.y=start.y+direction.y*scale;
+ dest.z=start.z+direction.z*scale;
+}
+
+FORCEINLINE void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest )
+{
+ VectorMAInline(start, scale, direction, dest);
+}
+
+FORCEINLINE void VectorMA( const float * start, float scale, const float *direction, float *dest )
+{
+ VectorMAInline(start, scale, direction, dest);
+}
+
+
+int VectorCompare (const float *v1, const float *v2);
+
+inline float VectorLength(const float *v)
+{
+ return FastSqrt( v[0]*v[0] + v[1]*v[1] + v[2]*v[2] + FLT_EPSILON );
+}
+
+void CrossProduct (const float *v1, const float *v2, float *cross);
+
+qboolean VectorsEqual( const float *v1, const float *v2 );
+
+inline vec_t RoundInt (vec_t in)
+{
+ return floor(in + 0.5f);
+}
+
+int Q_log2(int val);
+
+// Math routines done in optimized assembly math package routines
+void inline SinCos( float radians, float *sine, float *cosine )
+{
+#if defined( _X360 )
+ XMScalarSinCos( sine, cosine, radians );
+#elif defined( PLATFORM_WINDOWS_PC32 )
+ _asm
+ {
+ fld DWORD PTR [radians]
+ fsincos
+
+ mov edx, DWORD PTR [cosine]
+ mov eax, DWORD PTR [sine]
+
+ fstp DWORD PTR [edx]
+ fstp DWORD PTR [eax]
+ }
+#elif defined( PLATFORM_WINDOWS_PC64 )
+ *sine = sin( radians );
+ *cosine = cos( radians );
+#elif defined( POSIX )
+ register double __cosr, __sinr;
+ __asm ("fsincos" : "=t" (__cosr), "=u" (__sinr) : "0" (radians));
+
+ *sine = __sinr;
+ *cosine = __cosr;
+#endif
+}
+
+#define SIN_TABLE_SIZE 256
+#define FTOIBIAS 12582912.f
+extern float SinCosTable[SIN_TABLE_SIZE];
+
+inline float TableCos( float theta )
+{
+ union
+ {
+ int i;
+ float f;
+ } ftmp;
+
+ // ideally, the following should compile down to: theta * constant + constant, changing any of these constants from defines sometimes fubars this.
+ ftmp.f = theta * ( float )( SIN_TABLE_SIZE / ( 2.0f * M_PI ) ) + ( FTOIBIAS + ( SIN_TABLE_SIZE / 4 ) );
+ return SinCosTable[ ftmp.i & ( SIN_TABLE_SIZE - 1 ) ];
+}
+
+inline float TableSin( float theta )
+{
+ union
+ {
+ int i;
+ float f;
+ } ftmp;
+
+ // ideally, the following should compile down to: theta * constant + constant
+ ftmp.f = theta * ( float )( SIN_TABLE_SIZE / ( 2.0f * M_PI ) ) + FTOIBIAS;
+ return SinCosTable[ ftmp.i & ( SIN_TABLE_SIZE - 1 ) ];
+}
+
+template<class T>
+FORCEINLINE T Square( T const &a )
+{
+ return a * a;
+}
+
+
+// return the smallest power of two >= x.
+// returns 0 if x == 0 or x > 0x80000000 (ie numbers that would be negative if x was signed)
+// NOTE: the old code took an int, and if you pass in an int of 0x80000000 casted to a uint,
+// you'll get 0x80000000, which is correct for uints, instead of 0, which was correct for ints
+FORCEINLINE uint SmallestPowerOfTwoGreaterOrEqual( uint x )
+{
+ x -= 1;
+ x |= x >> 1;
+ x |= x >> 2;
+ x |= x >> 4;
+ x |= x >> 8;
+ x |= x >> 16;
+ return x + 1;
+}
+
+// return the largest power of two <= x. Will return 0 if passed 0
+FORCEINLINE uint LargestPowerOfTwoLessThanOrEqual( uint x )
+{
+ if ( x >= 0x80000000 )
+ return 0x80000000;
+
+ return SmallestPowerOfTwoGreaterOrEqual( x + 1 ) >> 1;
+}
+
+
+// Math routines for optimizing division
+void FloorDivMod (double numer, double denom, int *quotient, int *rem);
+int GreatestCommonDivisor (int i1, int i2);
+
+// Test for FPU denormal mode
+bool IsDenormal( const float &val );
+
+// MOVEMENT INFO
+enum
+{
+ PITCH = 0, // up / down
+ YAW, // left / right
+ ROLL // fall over
+};
+
+void MatrixAngles( const matrix3x4_t & matrix, float *angles ); // !!!!
+void MatrixVectors( const matrix3x4_t &matrix, Vector* pForward, Vector *pRight, Vector *pUp );
+void VectorTransform (const float *in1, const matrix3x4_t & in2, float *out);
+void VectorITransform (const float *in1, const matrix3x4_t & in2, float *out);
+void VectorRotate( const float *in1, const matrix3x4_t & in2, float *out);
+void VectorRotate( const Vector &in1, const QAngle &in2, Vector &out );
+void VectorRotate( const Vector &in1, const Quaternion &in2, Vector &out );
+void VectorIRotate( const float *in1, const matrix3x4_t & in2, float *out);
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+
+QAngle TransformAnglesToLocalSpace( const QAngle &angles, const matrix3x4_t &parentMatrix );
+QAngle TransformAnglesToWorldSpace( const QAngle &angles, const matrix3x4_t &parentMatrix );
+
+#endif
+
+void MatrixInitialize( matrix3x4_t &mat, const Vector &vecOrigin, const Vector &vecXAxis, const Vector &vecYAxis, const Vector &vecZAxis );
+void MatrixCopy( const matrix3x4_t &in, matrix3x4_t &out );
+void MatrixInvert( const matrix3x4_t &in, matrix3x4_t &out );
+
+// Matrix equality test
+bool MatricesAreEqual( const matrix3x4_t &src1, const matrix3x4_t &src2, float flTolerance = 1e-5 );
+
+void MatrixGetColumn( const matrix3x4_t &in, int column, Vector &out );
+void MatrixSetColumn( const Vector &in, int column, matrix3x4_t &out );
+
+inline void MatrixGetTranslation( const matrix3x4_t &in, Vector &out )
+{
+ MatrixGetColumn ( in, 3, out );
+}
+
+inline void MatrixSetTranslation( const Vector &in, matrix3x4_t &out )
+{
+ MatrixSetColumn ( in, 3, out );
+}
+
+void MatrixScaleBy ( const float flScale, matrix3x4_t &out );
+void MatrixScaleByZero ( matrix3x4_t &out );
+
+//void DecomposeRotation( const matrix3x4_t &mat, float *out );
+void ConcatRotations (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out);
+void ConcatTransforms (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out);
+
+// For identical interface w/ VMatrix
+inline void MatrixMultiply ( const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out )
+{
+ ConcatTransforms( in1, in2, out );
+}
+
+void QuaternionSlerp( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt );
+void QuaternionSlerpNoAlign( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt );
+void QuaternionBlend( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt );
+void QuaternionBlendNoAlign( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt );
+void QuaternionIdentityBlend( const Quaternion &p, float t, Quaternion &qt );
+float QuaternionAngleDiff( const Quaternion &p, const Quaternion &q );
+void QuaternionScale( const Quaternion &p, float t, Quaternion &q );
+void QuaternionAlign( const Quaternion &p, const Quaternion &q, Quaternion &qt );
+float QuaternionDotProduct( const Quaternion &p, const Quaternion &q );
+void QuaternionConjugate( const Quaternion &p, Quaternion &q );
+void QuaternionInvert( const Quaternion &p, Quaternion &q );
+float QuaternionNormalize( Quaternion &q );
+void QuaternionAdd( const Quaternion &p, const Quaternion &q, Quaternion &qt );
+void QuaternionMult( const Quaternion &p, const Quaternion &q, Quaternion &qt );
+void QuaternionMatrix( const Quaternion &q, matrix3x4_t &matrix );
+void QuaternionMatrix( const Quaternion &q, const Vector &pos, matrix3x4_t &matrix );
+void QuaternionAngles( const Quaternion &q, QAngle &angles );
+void AngleQuaternion( const QAngle& angles, Quaternion &qt );
+void QuaternionAngles( const Quaternion &q, RadianEuler &angles );
+void AngleQuaternion( RadianEuler const &angles, Quaternion &qt );
+void QuaternionAxisAngle( const Quaternion &q, Vector &axis, float &angle );
+void AxisAngleQuaternion( const Vector &axis, float angle, Quaternion &q );
+void BasisToQuaternion( const Vector &vecForward, const Vector &vecRight, const Vector &vecUp, Quaternion &q );
+void MatrixQuaternion( const matrix3x4_t &mat, Quaternion &q );
+
+// A couple methods to find the dot product of a vector with a matrix row or column...
+inline float MatrixRowDotProduct( const matrix3x4_t &in1, int row, const Vector& in2 )
+{
+ Assert( (row >= 0) && (row < 3) );
+ return DotProduct( in1[row], in2.Base() );
+}
+
+inline float MatrixColumnDotProduct( const matrix3x4_t &in1, int col, const Vector& in2 )
+{
+ Assert( (col >= 0) && (col < 4) );
+ return in1[0][col] * in2[0] + in1[1][col] * in2[1] + in1[2][col] * in2[2];
+}
+
+int __cdecl BoxOnPlaneSide (const float *emins, const float *emaxs, const cplane_t *plane);
+
+inline float anglemod(float a)
+{
+ a = (360.f/65536) * ((int)(a*(65536.f/360.0f)) & 65535);
+ return a;
+}
+
+// Remap a value in the range [A,B] to [C,D].
+inline float RemapVal( float val, float A, float B, float C, float D)
+{
+ if ( A == B )
+ return val >= B ? D : C;
+ return C + (D - C) * (val - A) / (B - A);
+}
+
+inline float RemapValClamped( float val, float A, float B, float C, float D)
+{
+ if ( A == B )
+ return val >= B ? D : C;
+ float cVal = (val - A) / (B - A);
+ cVal = clamp( cVal, 0.0f, 1.0f );
+
+ return C + (D - C) * cVal;
+}
+
+// Returns A + (B-A)*flPercent.
+// float Lerp( float flPercent, float A, float B );
+template <class T>
+FORCEINLINE T Lerp( float flPercent, T const &A, T const &B )
+{
+ return A + (B - A) * flPercent;
+}
+
+FORCEINLINE float Sqr( float f )
+{
+ return f*f;
+}
+
+// 5-argument floating point linear interpolation.
+// FLerp(f1,f2,i1,i2,x)=
+// f1 at x=i1
+// f2 at x=i2
+// smooth lerp between f1 and f2 at x>i1 and x<i2
+// extrapolation for x<i1 or x>i2
+//
+// If you know a function f(x)'s value (f1) at position i1, and its value (f2) at position i2,
+// the function can be linearly interpolated with FLerp(f1,f2,i1,i2,x)
+// i2=i1 will cause a divide by zero.
+static inline float FLerp(float f1, float f2, float i1, float i2, float x)
+{
+ return f1+(f2-f1)*(x-i1)/(i2-i1);
+}
+
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+
+// YWB: Specialization for interpolating euler angles via quaternions...
+template<> FORCEINLINE QAngle Lerp<QAngle>( float flPercent, const QAngle& q1, const QAngle& q2 )
+{
+ // Avoid precision errors
+ if ( q1 == q2 )
+ return q1;
+
+ Quaternion src, dest;
+
+ // Convert to quaternions
+ AngleQuaternion( q1, src );
+ AngleQuaternion( q2, dest );
+
+ Quaternion result;
+
+ // Slerp
+ QuaternionSlerp( src, dest, flPercent, result );
+
+ // Convert to euler
+ QAngle output;
+ QuaternionAngles( result, output );
+ return output;
+}
+
+#else
+
+#pragma error
+
+// NOTE NOTE: I haven't tested this!! It may not work! Check out interpolatedvar.cpp in the client dll to try it
+template<> FORCEINLINE QAngleByValue Lerp<QAngleByValue>( float flPercent, const QAngleByValue& q1, const QAngleByValue& q2 )
+{
+ // Avoid precision errors
+ if ( q1 == q2 )
+ return q1;
+
+ Quaternion src, dest;
+
+ // Convert to quaternions
+ AngleQuaternion( q1, src );
+ AngleQuaternion( q2, dest );
+
+ Quaternion result;
+
+ // Slerp
+ QuaternionSlerp( src, dest, flPercent, result );
+
+ // Convert to euler
+ QAngleByValue output;
+ QuaternionAngles( result, output );
+ return output;
+}
+
+#endif // VECTOR_NO_SLOW_OPERATIONS
+
+
+/// Same as swap(), but won't cause problems with std::swap
+template <class T>
+FORCEINLINE void V_swap( T& x, T& y )
+{
+ T temp = x;
+ x = y;
+ y = temp;
+}
+
+template <class T> FORCEINLINE T AVG(T a, T b)
+{
+ return (a+b)/2;
+}
+
+// number of elements in an array of static size
+#define NELEMS(x) ARRAYSIZE(x)
+
+// XYZ macro, for printf type functions - ex printf("%f %f %f",XYZ(myvector));
+#define XYZ(v) (v).x,(v).y,(v).z
+
+
+inline float Sign( float x )
+{
+ return (x <0.0f) ? -1.0f : 1.0f;
+}
+
+//
+// Clamps the input integer to the given array bounds.
+// Equivalent to the following, but without using any branches:
+//
+// if( n < 0 ) return 0;
+// else if ( n > maxindex ) return maxindex;
+// else return n;
+//
+// This is not always a clear performance win, but when you have situations where a clamped
+// value is thrashing against a boundary this is a big win. (ie, valid, invalid, valid, invalid, ...)
+//
+// Note: This code has been run against all possible integers.
+//
+inline int ClampArrayBounds( int n, unsigned maxindex )
+{
+ // mask is 0 if less than 4096, 0xFFFFFFFF if greater than
+ unsigned int inrangemask = 0xFFFFFFFF + (((unsigned) n) > maxindex );
+ unsigned int lessthan0mask = 0xFFFFFFFF + ( n >= 0 );
+
+ // If the result was valid, set the result, (otherwise sets zero)
+ int result = (inrangemask & n);
+
+ // if the result was out of range or zero.
+ result |= ((~inrangemask) & (~lessthan0mask)) & maxindex;
+
+ return result;
+}
+
+
+#define BOX_ON_PLANE_SIDE(emins, emaxs, p) \
+ (((p)->type < 3)? \
+ ( \
+ ((p)->dist <= (emins)[(p)->type])? \
+ 1 \
+ : \
+ ( \
+ ((p)->dist >= (emaxs)[(p)->type])?\
+ 2 \
+ : \
+ 3 \
+ ) \
+ ) \
+ : \
+ BoxOnPlaneSide( (emins), (emaxs), (p)))
+
+//-----------------------------------------------------------------------------
+// FIXME: Vector versions.... the float versions will go away hopefully soon!
+//-----------------------------------------------------------------------------
+
+void AngleVectors (const QAngle& angles, Vector *forward);
+void AngleVectors (const QAngle& angles, Vector *forward, Vector *right, Vector *up);
+void AngleVectorsTranspose (const QAngle& angles, Vector *forward, Vector *right, Vector *up);
+void AngleMatrix (const QAngle &angles, matrix3x4_t &mat );
+void AngleMatrix( const QAngle &angles, const Vector &position, matrix3x4_t &mat );
+void AngleMatrix (const RadianEuler &angles, matrix3x4_t &mat );
+void AngleMatrix( RadianEuler const &angles, const Vector &position, matrix3x4_t &mat );
+void AngleIMatrix (const QAngle &angles, matrix3x4_t &mat );
+void AngleIMatrix (const QAngle &angles, const Vector &position, matrix3x4_t &mat );
+void AngleIMatrix (const RadianEuler &angles, matrix3x4_t &mat );
+void VectorAngles( const Vector &forward, QAngle &angles );
+void VectorAngles( const Vector &forward, const Vector &pseudoup, QAngle &angles );
+void VectorMatrix( const Vector &forward, matrix3x4_t &mat );
+void VectorVectors( const Vector &forward, Vector &right, Vector &up );
+void SetIdentityMatrix( matrix3x4_t &mat );
+void SetScaleMatrix( float x, float y, float z, matrix3x4_t &dst );
+void MatrixBuildRotationAboutAxis( const Vector &vAxisOfRot, float angleDegrees, matrix3x4_t &dst );
+
+inline void SetScaleMatrix( float flScale, matrix3x4_t &dst )
+{
+ SetScaleMatrix( flScale, flScale, flScale, dst );
+}
+
+inline void SetScaleMatrix( const Vector& scale, matrix3x4_t &dst )
+{
+ SetScaleMatrix( scale.x, scale.y, scale.z, dst );
+}
+
+// Computes the inverse transpose
+void MatrixTranspose( matrix3x4_t& mat );
+void MatrixTranspose( const matrix3x4_t& src, matrix3x4_t& dst );
+void MatrixInverseTranspose( const matrix3x4_t& src, matrix3x4_t& dst );
+
+inline void PositionMatrix( const Vector &position, matrix3x4_t &mat )
+{
+ MatrixSetColumn( position, 3, mat );
+}
+
+inline void MatrixPosition( const matrix3x4_t &matrix, Vector &position )
+{
+ MatrixGetColumn( matrix, 3, position );
+}
+
+inline void VectorRotate( const Vector& in1, const matrix3x4_t &in2, Vector &out)
+{
+ VectorRotate( &in1.x, in2, &out.x );
+}
+
+inline void VectorIRotate( const Vector& in1, const matrix3x4_t &in2, Vector &out)
+{
+ VectorIRotate( &in1.x, in2, &out.x );
+}
+
+inline void MatrixAngles( const matrix3x4_t &matrix, QAngle &angles )
+{
+ MatrixAngles( matrix, &angles.x );
+}
+
+inline void MatrixAngles( const matrix3x4_t &matrix, QAngle &angles, Vector &position )
+{
+ MatrixAngles( matrix, angles );
+ MatrixPosition( matrix, position );
+}
+
+inline void MatrixAngles( const matrix3x4_t &matrix, RadianEuler &angles )
+{
+ MatrixAngles( matrix, &angles.x );
+
+ angles.Init( DEG2RAD( angles.z ), DEG2RAD( angles.x ), DEG2RAD( angles.y ) );
+}
+
+void MatrixAngles( const matrix3x4_t &mat, RadianEuler &angles, Vector &position );
+
+void MatrixAngles( const matrix3x4_t &mat, Quaternion &q, Vector &position );
+
+inline int VectorCompare (const Vector& v1, const Vector& v2)
+{
+ return v1 == v2;
+}
+
+inline void VectorTransform (const Vector& in1, const matrix3x4_t &in2, Vector &out)
+{
+ VectorTransform( &in1.x, in2, &out.x );
+}
+
+inline void VectorITransform (const Vector& in1, const matrix3x4_t &in2, Vector &out)
+{
+ VectorITransform( &in1.x, in2, &out.x );
+}
+
+/*
+inline void DecomposeRotation( const matrix3x4_t &mat, Vector &out )
+{
+ DecomposeRotation( mat, &out.x );
+}
+*/
+
+inline int BoxOnPlaneSide (const Vector& emins, const Vector& emaxs, const cplane_t *plane )
+{
+ return BoxOnPlaneSide( &emins.x, &emaxs.x, plane );
+}
+
+inline void VectorFill(Vector& a, float b)
+{
+ a[0]=a[1]=a[2]=b;
+}
+
+inline void VectorNegate(Vector& a)
+{
+ a[0] = -a[0];
+ a[1] = -a[1];
+ a[2] = -a[2];
+}
+
+inline vec_t VectorAvg(Vector& a)
+{
+ return ( a[0] + a[1] + a[2] ) / 3;
+}
+
+//-----------------------------------------------------------------------------
+// Box/plane test (slow version)
+//-----------------------------------------------------------------------------
+inline int FASTCALL BoxOnPlaneSide2 (const Vector& emins, const Vector& emaxs, const cplane_t *p, float tolerance = 0.f )
+{
+ Vector corners[2];
+
+ if (p->normal[0] < 0)
+ {
+ corners[0][0] = emins[0];
+ corners[1][0] = emaxs[0];
+ }
+ else
+ {
+ corners[1][0] = emins[0];
+ corners[0][0] = emaxs[0];
+ }
+
+ if (p->normal[1] < 0)
+ {
+ corners[0][1] = emins[1];
+ corners[1][1] = emaxs[1];
+ }
+ else
+ {
+ corners[1][1] = emins[1];
+ corners[0][1] = emaxs[1];
+ }
+
+ if (p->normal[2] < 0)
+ {
+ corners[0][2] = emins[2];
+ corners[1][2] = emaxs[2];
+ }
+ else
+ {
+ corners[1][2] = emins[2];
+ corners[0][2] = emaxs[2];
+ }
+
+ int sides = 0;
+
+ float dist1 = DotProduct (p->normal, corners[0]) - p->dist;
+ if (dist1 >= tolerance)
+ sides = 1;
+
+ float dist2 = DotProduct (p->normal, corners[1]) - p->dist;
+ if (dist2 < -tolerance)
+ sides |= 2;
+
+ return sides;
+}
+
+//-----------------------------------------------------------------------------
+// Helpers for bounding box construction
+//-----------------------------------------------------------------------------
+
+void ClearBounds (Vector& mins, Vector& maxs);
+void AddPointToBounds (const Vector& v, Vector& mins, Vector& maxs);
+
+//
+// COLORSPACE/GAMMA CONVERSION STUFF
+//
+void BuildGammaTable( float gamma, float texGamma, float brightness, int overbright );
+
+// convert texture to linear 0..1 value
+inline float TexLightToLinear( int c, int exponent )
+{
+ extern float power2_n[256];
+ Assert( exponent >= -128 && exponent <= 127 );
+ return ( float )c * power2_n[exponent+128];
+}
+
+
+// convert texture to linear 0..1 value
+int LinearToTexture( float f );
+// converts 0..1 linear value to screen gamma (0..255)
+int LinearToScreenGamma( float f );
+float TextureToLinear( int c );
+
+// compressed color format
+struct ColorRGBExp32
+{
+ byte r, g, b;
+ signed char exponent;
+};
+
+void ColorRGBExp32ToVector( const ColorRGBExp32& in, Vector& out );
+void VectorToColorRGBExp32( const Vector& v, ColorRGBExp32 &c );
+
+// solve for "x" where "a x^2 + b x + c = 0", return true if solution exists
+bool SolveQuadratic( float a, float b, float c, float &root1, float &root2 );
+
+// solves for "a, b, c" where "a x^2 + b x + c = y", return true if solution exists
+bool SolveInverseQuadratic( float x1, float y1, float x2, float y2, float x3, float y3, float &a, float &b, float &c );
+
+// solves for a,b,c specified as above, except that it always creates a monotonically increasing or
+// decreasing curve if the data is monotonically increasing or decreasing. In order to enforce the
+// monoticity condition, it is possible that the resulting quadratic will only approximate the data
+// instead of interpolating it. This code is not especially fast.
+bool SolveInverseQuadraticMonotonic( float x1, float y1, float x2, float y2,
+ float x3, float y3, float &a, float &b, float &c );
+
+
+
+
+// solves for "a, b, c" where "1/(a x^2 + b x + c ) = y", return true if solution exists
+bool SolveInverseReciprocalQuadratic( float x1, float y1, float x2, float y2, float x3, float y3, float &a, float &b, float &c );
+
+// rotate a vector around the Z axis (YAW)
+void VectorYawRotate( const Vector& in, float flYaw, Vector &out);
+
+
+// Bias takes an X value between 0 and 1 and returns another value between 0 and 1
+// The curve is biased towards 0 or 1 based on biasAmt, which is between 0 and 1.
+// Lower values of biasAmt bias the curve towards 0 and higher values bias it towards 1.
+//
+// For example, with biasAmt = 0.2, the curve looks like this:
+//
+// 1
+// | *
+// | *
+// | *
+// | **
+// | **
+// | ****
+// |*********
+// |___________________
+// 0 1
+//
+//
+// With biasAmt = 0.8, the curve looks like this:
+//
+// 1
+// | **************
+// | **
+// | *
+// | *
+// |*
+// |*
+// |*
+// |___________________
+// 0 1
+//
+// With a biasAmt of 0.5, Bias returns X.
+float Bias( float x, float biasAmt );
+
+
+// Gain is similar to Bias, but biasAmt biases towards or away from 0.5.
+// Lower bias values bias towards 0.5 and higher bias values bias away from it.
+//
+// For example, with biasAmt = 0.2, the curve looks like this:
+//
+// 1
+// | *
+// | *
+// | **
+// | ***************
+// | **
+// | *
+// |*
+// |___________________
+// 0 1
+//
+//
+// With biasAmt = 0.8, the curve looks like this:
+//
+// 1
+// | *****
+// | ***
+// | *
+// | *
+// | *
+// | ***
+// |*****
+// |___________________
+// 0 1
+float Gain( float x, float biasAmt );
+
+
+// SmoothCurve maps a 0-1 value into another 0-1 value based on a cosine wave
+// where the derivatives of the function at 0 and 1 (and 0.5) are 0. This is useful for
+// any fadein/fadeout effect where it should start and end smoothly.
+//
+// The curve looks like this:
+//
+// 1
+// | **
+// | * *
+// | * *
+// | * *
+// | * *
+// | ** **
+// |*** ***
+// |___________________
+// 0 1
+//
+float SmoothCurve( float x );
+
+
+// This works like SmoothCurve, with two changes:
+//
+// 1. Instead of the curve peaking at 0.5, it will peak at flPeakPos.
+// (So if you specify flPeakPos=0.2, then the peak will slide to the left).
+//
+// 2. flPeakSharpness is a 0-1 value controlling the sharpness of the peak.
+// Low values blunt the peak and high values sharpen the peak.
+float SmoothCurve_Tweak( float x, float flPeakPos=0.5, float flPeakSharpness=0.5 );
+
+
+//float ExponentialDecay( float halflife, float dt );
+//float ExponentialDecay( float decayTo, float decayTime, float dt );
+
+// halflife is time for value to reach 50%
+inline float ExponentialDecay( float halflife, float dt )
+{
+ // log(0.5) == -0.69314718055994530941723212145818
+ return expf( -0.69314718f / halflife * dt);
+}
+
+// decayTo is factor the value should decay to in decayTime
+inline float ExponentialDecay( float decayTo, float decayTime, float dt )
+{
+ return expf( logf( decayTo ) / decayTime * dt);
+}
+
+// Get the integrated distanced traveled
+// decayTo is factor the value should decay to in decayTime
+// dt is the time relative to the last velocity update
+inline float ExponentialDecayIntegral( float decayTo, float decayTime, float dt )
+{
+ return (powf( decayTo, dt / decayTime) * decayTime - decayTime) / logf( decayTo );
+}
+
+// hermite basis function for smooth interpolation
+// Similar to Gain() above, but very cheap to call
+// value should be between 0 & 1 inclusive
+inline float SimpleSpline( float value )
+{
+ float valueSquared = value * value;
+
+ // Nice little ease-in, ease-out spline-like curve
+ return (3 * valueSquared - 2 * valueSquared * value);
+}
+
+// remaps a value in [startInterval, startInterval+rangeInterval] from linear to
+// spline using SimpleSpline
+inline float SimpleSplineRemapVal( float val, float A, float B, float C, float D)
+{
+ if ( A == B )
+ return val >= B ? D : C;
+ float cVal = (val - A) / (B - A);
+ return C + (D - C) * SimpleSpline( cVal );
+}
+
+// remaps a value in [startInterval, startInterval+rangeInterval] from linear to
+// spline using SimpleSpline
+inline float SimpleSplineRemapValClamped( float val, float A, float B, float C, float D )
+{
+ if ( A == B )
+ return val >= B ? D : C;
+ float cVal = (val - A) / (B - A);
+ cVal = clamp( cVal, 0.0f, 1.0f );
+ return C + (D - C) * SimpleSpline( cVal );
+}
+
+FORCEINLINE int RoundFloatToInt(float f)
+{
+#if defined(__i386__) || defined(_M_IX86) || defined( PLATFORM_WINDOWS_PC64 )
+ return _mm_cvtss_si32(_mm_load_ss(&f));
+#elif defined( _X360 )
+#ifdef Assert
+ Assert( IsFPUControlWordSet() );
+#endif
+ union
+ {
+ double flResult;
+ int pResult[2];
+ };
+ flResult = __fctiw( f );
+ return pResult[1];
+#else
+#error Unknown architecture
+#endif
+}
+
+FORCEINLINE unsigned char RoundFloatToByte(float f)
+{
+ int nResult = RoundFloatToInt(f);
+#ifdef Assert
+ Assert( (nResult & ~0xFF) == 0 );
+#endif
+ return (unsigned char) nResult;
+}
+
+FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f)
+{
+#if defined( _X360 )
+#ifdef Assert
+ Assert( IsFPUControlWordSet() );
+#endif
+ union
+ {
+ double flResult;
+ int pIntResult[2];
+ unsigned long pResult[2];
+ };
+ flResult = __fctiw( f );
+ Assert( pIntResult[1] >= 0 );
+ return pResult[1];
+#else // !X360
+
+#if defined( PLATFORM_WINDOWS_PC64 )
+ uint nRet = ( uint ) f;
+ if ( nRet & 1 )
+ {
+ if ( ( f - floor( f ) >= 0.5 ) )
+ {
+ nRet++;
+ }
+ }
+ else
+ {
+ if ( ( f - floor( f ) > 0.5 ) )
+ {
+ nRet++;
+ }
+ }
+ return nRet;
+#else // PLATFORM_WINDOWS_PC64
+ unsigned char nResult[8];
+
+ #if defined( _WIN32 )
+ __asm
+ {
+ fld f
+ fistp qword ptr nResult
+ }
+ #elif POSIX
+ __asm __volatile__ (
+ "fistpl %0;": "=m" (nResult): "t" (f) : "st"
+ );
+ #endif
+
+ return *((unsigned long*)nResult);
+#endif // PLATFORM_WINDOWS_PC64
+#endif // !X360
+}
+
+FORCEINLINE bool IsIntegralValue( float flValue, float flTolerance = 0.001f )
+{
+ return fabs( RoundFloatToInt( flValue ) - flValue ) < flTolerance;
+}
+
+// Fast, accurate ftol:
+FORCEINLINE int Float2Int( float a )
+{
+#if defined( _X360 )
+ union
+ {
+ double flResult;
+ int pResult[2];
+ };
+ flResult = __fctiwz( a );
+ return pResult[1];
+#else // !X360
+ // Rely on compiler to generate CVTTSS2SI on x86
+ return (int) a;
+#endif
+}
+
+// Over 15x faster than: (int)floor(value)
+inline int Floor2Int( float a )
+{
+ int RetVal;
+#if defined( __i386__ )
+ // Convert to int and back, compare, subtract one if too big
+ __m128 a128 = _mm_set_ss(a);
+ RetVal = _mm_cvtss_si32(a128);
+ __m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal);
+ RetVal -= _mm_comigt_ss( rounded128, a128 );
+#else
+ RetVal = static_cast<int>( floor(a) );
+#endif
+ return RetVal;
+}
+
+//-----------------------------------------------------------------------------
+// Fast color conversion from float to unsigned char
+//-----------------------------------------------------------------------------
+FORCEINLINE unsigned int FastFToC( float c )
+{
+#if defined( __i386__ )
+ // IEEE float bit manipulation works for values between [0, 1<<23)
+ union { float f; int i; } convert = { c*255.0f + (float)(1<<23) };
+ return convert.i & 255;
+#else
+ // consoles CPUs suffer from load-hit-store penalty
+ return Float2Int( c * 255.0f );
+#endif
+}
+
+//-----------------------------------------------------------------------------
+// Fast conversion from float to integer with magnitude less than 2**22
+//-----------------------------------------------------------------------------
+FORCEINLINE int FastFloatToSmallInt( float c )
+{
+#if defined( __i386__ )
+ // IEEE float bit manipulation works for values between [-1<<22, 1<<22)
+ union { float f; int i; } convert = { c + (float)(3<<22) };
+ return (convert.i & ((1<<23)-1)) - (1<<22);
+#else
+ // consoles CPUs suffer from load-hit-store penalty
+ return Float2Int( c );
+#endif
+}
+
+//-----------------------------------------------------------------------------
+// Purpose: Bound input float to .001 (millisecond) boundary
+// Input : in -
+// Output : inline float
+//-----------------------------------------------------------------------------
+inline float ClampToMsec( float in )
+{
+ int msec = Floor2Int( in * 1000.0f + 0.5f );
+ return 0.001f * msec;
+}
+
+// Over 15x faster than: (int)ceil(value)
+inline int Ceil2Int( float a )
+{
+ int RetVal;
+#if defined( __i386__ )
+ // Convert to int and back, compare, add one if too small
+ __m128 a128 = _mm_load_ss(&a);
+ RetVal = _mm_cvtss_si32(a128);
+ __m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal);
+ RetVal += _mm_comilt_ss( rounded128, a128 );
+#else
+ RetVal = static_cast<int>( ceil(a) );
+#endif
+ return RetVal;
+}
+
+
+// Regular signed area of triangle
+#define TriArea2D( A, B, C ) \
+ ( 0.5f * ( ( B.x - A.x ) * ( C.y - A.y ) - ( B.y - A.y ) * ( C.x - A.x ) ) )
+
+// This version doesn't premultiply by 0.5f, so it's the area of the rectangle instead
+#define TriArea2DTimesTwo( A, B, C ) \
+ ( ( ( B.x - A.x ) * ( C.y - A.y ) - ( B.y - A.y ) * ( C.x - A.x ) ) )
+
+
+// Get the barycentric coordinates of "pt" in triangle [A,B,C].
+inline void GetBarycentricCoords2D(
+ Vector2D const &A,
+ Vector2D const &B,
+ Vector2D const &C,
+ Vector2D const &pt,
+ float bcCoords[3] )
+{
+ // Note, because to top and bottom are both x2, the issue washes out in the composite
+ float invTriArea = 1.0f / TriArea2DTimesTwo( A, B, C );
+
+ // NOTE: We assume here that the lightmap coordinate vertices go counterclockwise.
+ // If not, TriArea2D() is negated so this works out right.
+ bcCoords[0] = TriArea2DTimesTwo( B, C, pt ) * invTriArea;
+ bcCoords[1] = TriArea2DTimesTwo( C, A, pt ) * invTriArea;
+ bcCoords[2] = TriArea2DTimesTwo( A, B, pt ) * invTriArea;
+}
+
+
+// Return true of the sphere might touch the box (the sphere is actually treated
+// like a box itself, so this may return true if the sphere's bounding box touches
+// a corner of the box but the sphere itself doesn't).
+inline bool QuickBoxSphereTest(
+ const Vector& vOrigin,
+ float flRadius,
+ const Vector& bbMin,
+ const Vector& bbMax )
+{
+ return vOrigin.x - flRadius < bbMax.x && vOrigin.x + flRadius > bbMin.x &&
+ vOrigin.y - flRadius < bbMax.y && vOrigin.y + flRadius > bbMin.y &&
+ vOrigin.z - flRadius < bbMax.z && vOrigin.z + flRadius > bbMin.z;
+}
+
+
+// Return true of the boxes intersect (but not if they just touch).
+inline bool QuickBoxIntersectTest(
+ const Vector& vBox1Min,
+ const Vector& vBox1Max,
+ const Vector& vBox2Min,
+ const Vector& vBox2Max )
+{
+ return
+ vBox1Min.x < vBox2Max.x && vBox1Max.x > vBox2Min.x &&
+ vBox1Min.y < vBox2Max.y && vBox1Max.y > vBox2Min.y &&
+ vBox1Min.z < vBox2Max.z && vBox1Max.z > vBox2Min.z;
+}
+
+
+extern float GammaToLinearFullRange( float gamma );
+extern float LinearToGammaFullRange( float linear );
+extern float GammaToLinear( float gamma );
+extern float LinearToGamma( float linear );
+
+extern float SrgbGammaToLinear( float flSrgbGammaValue );
+extern float SrgbLinearToGamma( float flLinearValue );
+extern float X360GammaToLinear( float fl360GammaValue );
+extern float X360LinearToGamma( float flLinearValue );
+extern float SrgbGammaTo360Gamma( float flSrgbGammaValue );
+
+// linear (0..4) to screen corrected vertex space (0..1?)
+FORCEINLINE float LinearToVertexLight( float f )
+{
+ extern float lineartovertex[4096];
+
+ // Gotta clamp before the multiply; could overflow...
+ // assume 0..4 range
+ int i = RoundFloatToInt( f * 1024.f );
+
+ // Presumably the comman case will be not to clamp, so check that first:
+ if( (unsigned)i > 4095 )
+ {
+ if ( i < 0 )
+ i = 0; // Compare to zero instead of 4095 to save 4 bytes in the instruction stream
+ else
+ i = 4095;
+ }
+
+ return lineartovertex[i];
+}
+
+
+FORCEINLINE unsigned char LinearToLightmap( float f )
+{
+ extern unsigned char lineartolightmap[4096];
+
+ // Gotta clamp before the multiply; could overflow...
+ int i = RoundFloatToInt( f * 1024.f ); // assume 0..4 range
+
+ // Presumably the comman case will be not to clamp, so check that first:
+ if ( (unsigned)i > 4095 )
+ {
+ if ( i < 0 )
+ i = 0; // Compare to zero instead of 4095 to save 4 bytes in the instruction stream
+ else
+ i = 4095;
+ }
+
+ return lineartolightmap[i];
+}
+
+FORCEINLINE void ColorClamp( Vector& color )
+{
+ float maxc = max( color.x, max( color.y, color.z ) );
+ if ( maxc > 1.0f )
+ {
+ float ooMax = 1.0f / maxc;
+ color.x *= ooMax;
+ color.y *= ooMax;
+ color.z *= ooMax;
+ }
+
+ if ( color[0] < 0.f ) color[0] = 0.f;
+ if ( color[1] < 0.f ) color[1] = 0.f;
+ if ( color[2] < 0.f ) color[2] = 0.f;
+}
+
+inline void ColorClampTruncate( Vector& color )
+{
+ if (color[0] > 1.0f) color[0] = 1.0f; else if (color[0] < 0.0f) color[0] = 0.0f;
+ if (color[1] > 1.0f) color[1] = 1.0f; else if (color[1] < 0.0f) color[1] = 0.0f;
+ if (color[2] > 1.0f) color[2] = 1.0f; else if (color[2] < 0.0f) color[2] = 0.0f;
+}
+
+// Interpolate a Catmull-Rom spline.
+// t is a [0,1] value and interpolates a curve between p2 and p3.
+void Catmull_Rom_Spline(
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &p3,
+ const Vector &p4,
+ float t,
+ Vector &output );
+
+// Interpolate a Catmull-Rom spline.
+// Returns the tangent of the point at t of the spline
+void Catmull_Rom_Spline_Tangent(
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &p3,
+ const Vector &p4,
+ float t,
+ Vector &output );
+
+// area under the curve [0..t]
+void Catmull_Rom_Spline_Integral(
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &p3,
+ const Vector &p4,
+ float t,
+ Vector& output );
+
+// area under the curve [0..1]
+void Catmull_Rom_Spline_Integral(
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &p3,
+ const Vector &p4,
+ Vector& output );
+
+// Interpolate a Catmull-Rom spline.
+// Normalize p2->p1 and p3->p4 to be the same length as p2->p3
+void Catmull_Rom_Spline_Normalize(
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &p3,
+ const Vector &p4,
+ float t,
+ Vector &output );
+
+// area under the curve [0..t]
+// Normalize p2->p1 and p3->p4 to be the same length as p2->p3
+void Catmull_Rom_Spline_Integral_Normalize(
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &p3,
+ const Vector &p4,
+ float t,
+ Vector& output );
+
+// Interpolate a Catmull-Rom spline.
+// Normalize p2.x->p1.x and p3.x->p4.x to be the same length as p2.x->p3.x
+void Catmull_Rom_Spline_NormalizeX(
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &p3,
+ const Vector &p4,
+ float t,
+ Vector &output );
+
+// area under the curve [0..t]
+void Catmull_Rom_Spline_NormalizeX(
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &p3,
+ const Vector &p4,
+ float t,
+ Vector& output );
+
+// Interpolate a Hermite spline.
+// t is a [0,1] value and interpolates a curve between p1 and p2 with the deltas d1 and d2.
+void Hermite_Spline(
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &d1,
+ const Vector &d2,
+ float t,
+ Vector& output );
+
+float Hermite_Spline(
+ float p1,
+ float p2,
+ float d1,
+ float d2,
+ float t );
+
+// t is a [0,1] value and interpolates a curve between p1 and p2 with the slopes p0->p1 and p1->p2
+void Hermite_Spline(
+ const Vector &p0,
+ const Vector &p1,
+ const Vector &p2,
+ float t,
+ Vector& output );
+
+float Hermite_Spline(
+ float p0,
+ float p1,
+ float p2,
+ float t );
+
+
+void Hermite_SplineBasis( float t, float basis[] );
+
+void Hermite_Spline(
+ const Quaternion &q0,
+ const Quaternion &q1,
+ const Quaternion &q2,
+ float t,
+ Quaternion &output );
+
+
+// See http://en.wikipedia.org/wiki/Kochanek-Bartels_curves
+//
+// Tension: -1 = Round -> 1 = Tight
+// Bias: -1 = Pre-shoot (bias left) -> 1 = Post-shoot (bias right)
+// Continuity: -1 = Box corners -> 1 = Inverted corners
+//
+// If T=B=C=0 it's the same matrix as Catmull-Rom.
+// If T=1 & B=C=0 it's the same as Cubic.
+// If T=B=0 & C=-1 it's just linear interpolation
+//
+// See http://news.povray.org/povray.binaries.tutorials/attachment/%[email protected]%3E/Splines.bas.txt
+// for example code and descriptions of various spline types...
+//
+void Kochanek_Bartels_Spline(
+ float tension,
+ float bias,
+ float continuity,
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &p3,
+ const Vector &p4,
+ float t,
+ Vector& output );
+
+void Kochanek_Bartels_Spline_NormalizeX(
+ float tension,
+ float bias,
+ float continuity,
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &p3,
+ const Vector &p4,
+ float t,
+ Vector& output );
+
+// See link at Kochanek_Bartels_Spline for info on the basis matrix used
+void Cubic_Spline(
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &p3,
+ const Vector &p4,
+ float t,
+ Vector& output );
+
+void Cubic_Spline_NormalizeX(
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &p3,
+ const Vector &p4,
+ float t,
+ Vector& output );
+
+// See link at Kochanek_Bartels_Spline for info on the basis matrix used
+void BSpline(
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &p3,
+ const Vector &p4,
+ float t,
+ Vector& output );
+
+void BSpline_NormalizeX(
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &p3,
+ const Vector &p4,
+ float t,
+ Vector& output );
+
+// See link at Kochanek_Bartels_Spline for info on the basis matrix used
+void Parabolic_Spline(
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &p3,
+ const Vector &p4,
+ float t,
+ Vector& output );
+
+void Parabolic_Spline_NormalizeX(
+ const Vector &p1,
+ const Vector &p2,
+ const Vector &p3,
+ const Vector &p4,
+ float t,
+ Vector& output );
+
+// quintic interpolating polynomial from Perlin.
+// 0->0, 1->1, smooth-in between with smooth tangents
+FORCEINLINE float QuinticInterpolatingPolynomial(float t)
+{
+ // 6t^5-15t^4+10t^3
+ return t * t * t *( t * ( t* 6.0 - 15.0 ) + 10.0 );
+}
+
+// given a table of sorted tabulated positions, return the two indices and blendfactor to linear
+// interpolate. Does a search. Can be used to find the blend value to interpolate between
+// keyframes.
+void GetInterpolationData( float const *pKnotPositions,
+ float const *pKnotValues,
+ int nNumValuesinList,
+ int nInterpolationRange,
+ float flPositionToInterpolateAt,
+ bool bWrap,
+ float *pValueA,
+ float *pValueB,
+ float *pInterpolationValue);
+
+float RangeCompressor( float flValue, float flMin, float flMax, float flBase );
+
+// Get the minimum distance from vOrigin to the bounding box defined by [mins,maxs]
+// using voronoi regions.
+// 0 is returned if the origin is inside the box.
+float CalcSqrDistanceToAABB( const Vector &mins, const Vector &maxs, const Vector &point );
+void CalcClosestPointOnAABB( const Vector &mins, const Vector &maxs, const Vector &point, Vector &closestOut );
+void CalcSqrDistAndClosestPointOnAABB( const Vector &mins, const Vector &maxs, const Vector &point, Vector &closestOut, float &distSqrOut );
+
+inline float CalcDistanceToAABB( const Vector &mins, const Vector &maxs, const Vector &point )
+{
+ float flDistSqr = CalcSqrDistanceToAABB( mins, maxs, point );
+ return sqrt(flDistSqr);
+}
+
+// Get the closest point from P to the (infinite) line through vLineA and vLineB and
+// calculate the shortest distance from P to the line.
+// If you pass in a value for t, it will tell you the t for (A + (B-A)t) to get the closest point.
+// If the closest point lies on the segment between A and B, then 0 <= t <= 1.
+void CalcClosestPointOnLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, Vector &vClosest, float *t=0 );
+float CalcDistanceToLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 );
+float CalcDistanceSqrToLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 );
+
+// The same three functions as above, except now the line is closed between A and B.
+void CalcClosestPointOnLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, Vector &vClosest, float *t=0 );
+float CalcDistanceToLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 );
+float CalcDistanceSqrToLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 );
+
+// A function to compute the closes line segment connnection two lines (or false if the lines are parallel, etc.)
+bool CalcLineToLineIntersectionSegment(
+ const Vector& p1,const Vector& p2,const Vector& p3,const Vector& p4,Vector *s1,Vector *s2,
+ float *t1, float *t2 );
+
+// The above functions in 2D
+void CalcClosestPointOnLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, Vector2D &vClosest, float *t=0 );
+float CalcDistanceToLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 );
+float CalcDistanceSqrToLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 );
+void CalcClosestPointOnLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, Vector2D &vClosest, float *t=0 );
+float CalcDistanceToLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 );
+float CalcDistanceSqrToLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 );
+
+// Init the mathlib
+void MathLib_Init( float gamma = 2.2f, float texGamma = 2.2f, float brightness = 0.0f, int overbright = 2.0f, bool bAllow3DNow = true, bool bAllowSSE = true, bool bAllowSSE2 = true, bool bAllowMMX = true );
+bool MathLib_3DNowEnabled( void );
+bool MathLib_MMXEnabled( void );
+bool MathLib_SSEEnabled( void );
+bool MathLib_SSE2Enabled( void );
+
+float Approach( float target, float value, float speed );
+float ApproachAngle( float target, float value, float speed );
+float AngleDiff( float destAngle, float srcAngle );
+float AngleDistance( float next, float cur );
+float AngleNormalize( float angle );
+
+// ensure that 0 <= angle <= 360
+float AngleNormalizePositive( float angle );
+
+bool AnglesAreEqual( float a, float b, float tolerance = 0.0f );
+
+
+void RotationDeltaAxisAngle( const QAngle &srcAngles, const QAngle &destAngles, Vector &deltaAxis, float &deltaAngle );
+void RotationDelta( const QAngle &srcAngles, const QAngle &destAngles, QAngle *out );
+
+void ComputeTrianglePlane( const Vector& v1, const Vector& v2, const Vector& v3, Vector& normal, float& intercept );
+int PolyFromPlane( Vector *outVerts, const Vector& normal, float dist, float fHalfScale = 9000.0f );
+int ClipPolyToPlane( Vector *inVerts, int vertCount, Vector *outVerts, const Vector& normal, float dist, float fOnPlaneEpsilon = 0.1f );
+int ClipPolyToPlane_Precise( double *inVerts, int vertCount, double *outVerts, const double *normal, double dist, double fOnPlaneEpsilon = 0.1 );
+
+//-----------------------------------------------------------------------------
+// Computes a reasonable tangent space for a triangle
+//-----------------------------------------------------------------------------
+void CalcTriangleTangentSpace( const Vector &p0, const Vector &p1, const Vector &p2,
+ const Vector2D &t0, const Vector2D &t1, const Vector2D& t2,
+ Vector &sVect, Vector &tVect );
+
+//-----------------------------------------------------------------------------
+// Transforms a AABB into another space; which will inherently grow the box.
+//-----------------------------------------------------------------------------
+void TransformAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut );
+
+//-----------------------------------------------------------------------------
+// Uses the inverse transform of in1
+//-----------------------------------------------------------------------------
+void ITransformAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut );
+
+//-----------------------------------------------------------------------------
+// Rotates a AABB into another space; which will inherently grow the box.
+// (same as TransformAABB, but doesn't take the translation into account)
+//-----------------------------------------------------------------------------
+void RotateAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut );
+
+//-----------------------------------------------------------------------------
+// Uses the inverse transform of in1
+//-----------------------------------------------------------------------------
+void IRotateAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut );
+
+//-----------------------------------------------------------------------------
+// Transform a plane
+//-----------------------------------------------------------------------------
+inline void MatrixTransformPlane( const matrix3x4_t &src, const cplane_t &inPlane, cplane_t &outPlane )
+{
+ // What we want to do is the following:
+ // 1) transform the normal into the new space.
+ // 2) Determine a point on the old plane given by plane dist * plane normal
+ // 3) Transform that point into the new space
+ // 4) Plane dist = DotProduct( new normal, new point )
+
+ // An optimized version, which works if the plane is orthogonal.
+ // 1) Transform the normal into the new space
+ // 2) Realize that transforming the old plane point into the new space
+ // is given by [ d * n'x + Tx, d * n'y + Ty, d * n'z + Tz ]
+ // where d = old plane dist, n' = transformed normal, Tn = translational component of transform
+ // 3) Compute the new plane dist using the dot product of the normal result of #2
+
+ // For a correct result, this should be an inverse-transpose matrix
+ // but that only matters if there are nonuniform scale or skew factors in this matrix.
+ VectorRotate( inPlane.normal, src, outPlane.normal );
+ outPlane.dist = inPlane.dist * DotProduct( outPlane.normal, outPlane.normal );
+ outPlane.dist += outPlane.normal.x * src[0][3] + outPlane.normal.y * src[1][3] + outPlane.normal.z * src[2][3];
+}
+
+inline void MatrixITransformPlane( const matrix3x4_t &src, const cplane_t &inPlane, cplane_t &outPlane )
+{
+ // The trick here is that Tn = translational component of transform,
+ // but for an inverse transform, Tn = - R^-1 * T
+ Vector vecTranslation;
+ MatrixGetColumn( src, 3, vecTranslation );
+
+ Vector vecInvTranslation;
+ VectorIRotate( vecTranslation, src, vecInvTranslation );
+
+ VectorIRotate( inPlane.normal, src, outPlane.normal );
+ outPlane.dist = inPlane.dist * DotProduct( outPlane.normal, outPlane.normal );
+ outPlane.dist -= outPlane.normal.x * vecInvTranslation[0] + outPlane.normal.y * vecInvTranslation[1] + outPlane.normal.z * vecInvTranslation[2];
+}
+
+int CeilPow2( int in );
+int FloorPow2( int in );
+
+FORCEINLINE float * UnpackNormal_HEND3N( const unsigned int *pPackedNormal, float *pNormal )
+{
+ int temp[3];
+ temp[0] = ((*pPackedNormal >> 0L) & 0x7ff);
+ if ( temp[0] & 0x400 )
+ {
+ temp[0] = 2048 - temp[0];
+ }
+ temp[1] = ((*pPackedNormal >> 11L) & 0x7ff);
+ if ( temp[1] & 0x400 )
+ {
+ temp[1] = 2048 - temp[1];
+ }
+ temp[2] = ((*pPackedNormal >> 22L) & 0x3ff);
+ if ( temp[2] & 0x200 )
+ {
+ temp[2] = 1024 - temp[2];
+ }
+ pNormal[0] = (float)temp[0] * 1.0f/1023.0f;
+ pNormal[1] = (float)temp[1] * 1.0f/1023.0f;
+ pNormal[2] = (float)temp[2] * 1.0f/511.0f;
+ return pNormal;
+}
+
+FORCEINLINE unsigned int * PackNormal_HEND3N( const float *pNormal, unsigned int *pPackedNormal )
+{
+ int temp[3];
+
+ temp[0] = Float2Int( pNormal[0] * 1023.0f );
+ temp[1] = Float2Int( pNormal[1] * 1023.0f );
+ temp[2] = Float2Int( pNormal[2] * 511.0f );
+
+ // the normal is out of bounds, determine the source and fix
+ // clamping would be even more of a slowdown here
+ Assert( temp[0] >= -1023 && temp[0] <= 1023 );
+ Assert( temp[1] >= -1023 && temp[1] <= 1023 );
+ Assert( temp[2] >= -511 && temp[2] <= 511 );
+
+ *pPackedNormal = ( ( temp[2] & 0x3ff ) << 22L ) |
+ ( ( temp[1] & 0x7ff ) << 11L ) |
+ ( ( temp[0] & 0x7ff ) << 0L );
+ return pPackedNormal;
+}
+
+FORCEINLINE unsigned int * PackNormal_HEND3N( float nx, float ny, float nz, unsigned int *pPackedNormal )
+{
+ int temp[3];
+
+ temp[0] = Float2Int( nx * 1023.0f );
+ temp[1] = Float2Int( ny * 1023.0f );
+ temp[2] = Float2Int( nz * 511.0f );
+
+ // the normal is out of bounds, determine the source and fix
+ // clamping would be even more of a slowdown here
+ Assert( temp[0] >= -1023 && temp[0] <= 1023 );
+ Assert( temp[1] >= -1023 && temp[1] <= 1023 );
+ Assert( temp[2] >= -511 && temp[2] <= 511 );
+
+ *pPackedNormal = ( ( temp[2] & 0x3ff ) << 22L ) |
+ ( ( temp[1] & 0x7ff ) << 11L ) |
+ ( ( temp[0] & 0x7ff ) << 0L );
+ return pPackedNormal;
+}
+
+FORCEINLINE float * UnpackNormal_SHORT2( const unsigned int *pPackedNormal, float *pNormal, bool bIsTangent = FALSE )
+{
+ // Unpacks from Jason's 2-short format (fills in a 4th binormal-sign (+1/-1) value, if this is a tangent vector)
+
+ // FIXME: short math is slow on 360 - use ints here instead (bit-twiddle to deal w/ the sign bits)
+ short iX = (*pPackedNormal & 0x0000FFFF);
+ short iY = (*pPackedNormal & 0xFFFF0000) >> 16;
+
+ float zSign = +1;
+ if ( iX < 0 )
+ {
+ zSign = -1;
+ iX = -iX;
+ }
+ float tSign = +1;
+ if ( iY < 0 )
+ {
+ tSign = -1;
+ iY = -iY;
+ }
+
+ pNormal[0] = ( iX - 16384.0f ) / 16384.0f;
+ pNormal[1] = ( iY - 16384.0f ) / 16384.0f;
+ pNormal[2] = zSign*sqrtf( 1.0f - ( pNormal[0]*pNormal[0] + pNormal[1]*pNormal[1] ) );
+ if ( bIsTangent )
+ {
+ pNormal[3] = tSign;
+ }
+
+ return pNormal;
+}
+
+FORCEINLINE unsigned int * PackNormal_SHORT2( float nx, float ny, float nz, unsigned int *pPackedNormal, float binormalSign = +1.0f )
+{
+ // Pack a vector (ASSUMED TO BE NORMALIZED) into Jason's 4-byte (SHORT2) format.
+ // This simply reconstructs Z from X & Y. It uses the sign bits of the X & Y coords
+ // to reconstruct the sign of Z and, if this is a tangent vector, the sign of the
+ // binormal (this is needed because tangent/binormal vectors are supposed to follow
+ // UV gradients, but shaders reconstruct the binormal from the tangent and normal
+ // assuming that they form a right-handed basis).
+
+ nx += 1; // [-1,+1] -> [0,2]
+ ny += 1;
+ nx *= 16384.0f; // [ 0, 2] -> [0,32768]
+ ny *= 16384.0f;
+
+ // '0' and '32768' values are invalid encodings
+ nx = max( nx, 1.0f ); // Make sure there are no zero values
+ ny = max( ny, 1.0f );
+ nx = min( nx, 32767.0f ); // Make sure there are no 32768 values
+ ny = min( ny, 32767.0f );
+
+ if ( nz < 0.0f )
+ nx = -nx; // Set the sign bit for z
+
+ ny *= binormalSign; // Set the sign bit for the binormal (use when encoding a tangent vector)
+
+ // FIXME: short math is slow on 360 - use ints here instead (bit-twiddle to deal w/ the sign bits), also use Float2Int()
+ short sX = (short)nx; // signed short [1,32767]
+ short sY = (short)ny;
+
+ *pPackedNormal = ( sX & 0x0000FFFF ) | ( sY << 16 ); // NOTE: The mask is necessary (if sX is negative and cast to an int...)
+
+ return pPackedNormal;
+}
+
+FORCEINLINE unsigned int * PackNormal_SHORT2( const float *pNormal, unsigned int *pPackedNormal, float binormalSign = +1.0f )
+{
+ return PackNormal_SHORT2( pNormal[0], pNormal[1], pNormal[2], pPackedNormal, binormalSign );
+}
+
+// Unpacks a UBYTE4 normal (for a tangent, the result's fourth component receives the binormal 'sign')
+FORCEINLINE float * UnpackNormal_UBYTE4( const unsigned int *pPackedNormal, float *pNormal, bool bIsTangent = FALSE )
+{
+ unsigned char cX, cY;
+ if ( bIsTangent )
+ {
+ cX = *pPackedNormal >> 16; // Unpack Z
+ cY = *pPackedNormal >> 24; // Unpack W
+ }
+ else
+ {
+ cX = *pPackedNormal >> 0; // Unpack X
+ cY = *pPackedNormal >> 8; // Unpack Y
+ }
+
+ float x = cX - 128.0f;
+ float y = cY - 128.0f;
+ float z;
+
+ float zSignBit = x < 0 ? 1.0f : 0.0f; // z and t negative bits (like slt asm instruction)
+ float tSignBit = y < 0 ? 1.0f : 0.0f;
+ float zSign = -( 2*zSignBit - 1 ); // z and t signs
+ float tSign = -( 2*tSignBit - 1 );
+
+ x = x*zSign - zSignBit; // 0..127
+ y = y*tSign - tSignBit;
+ x = x - 64; // -64..63
+ y = y - 64;
+
+ float xSignBit = x < 0 ? 1.0f : 0.0f; // x and y negative bits (like slt asm instruction)
+ float ySignBit = y < 0 ? 1.0f : 0.0f;
+ float xSign = -( 2*xSignBit - 1 ); // x and y signs
+ float ySign = -( 2*ySignBit - 1 );
+
+ x = ( x*xSign - xSignBit ) / 63.0f; // 0..1 range
+ y = ( y*ySign - ySignBit ) / 63.0f;
+ z = 1.0f - x - y;
+
+ float oolen = 1.0f / sqrt( x*x + y*y + z*z ); // Normalize and
+ x *= oolen * xSign; // Recover signs
+ y *= oolen * ySign;
+ z *= oolen * zSign;
+
+ pNormal[0] = x;
+ pNormal[1] = y;
+ pNormal[2] = z;
+ if ( bIsTangent )
+ {
+ pNormal[3] = tSign;
+ }
+
+ return pNormal;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// See: http://www.oroboro.com/rafael/docserv.php/index/programming/article/unitv2
+//
+// UBYTE4 encoding, using per-octant projection onto x+y+z=1
+// Assume input vector is already unit length
+//
+// binormalSign specifies 'sign' of binormal, stored in t sign bit of tangent
+// (lets the shader know whether norm/tan/bin form a right-handed basis)
+//
+// bIsTangent is used to specify which WORD of the output to store the data
+// The expected usage is to call once with the normal and once with
+// the tangent and binormal sign flag, bitwise OR'ing the returned DWORDs
+FORCEINLINE unsigned int * PackNormal_UBYTE4( float nx, float ny, float nz, unsigned int *pPackedNormal, bool bIsTangent = false, float binormalSign = +1.0f )
+{
+ float xSign = nx < 0.0f ? -1.0f : 1.0f; // -1 or 1 sign
+ float ySign = ny < 0.0f ? -1.0f : 1.0f;
+ float zSign = nz < 0.0f ? -1.0f : 1.0f;
+ float tSign = binormalSign;
+ Assert( ( binormalSign == +1.0f ) || ( binormalSign == -1.0f ) );
+
+ float xSignBit = 0.5f*( 1 - xSign ); // [-1,+1] -> [1,0]
+ float ySignBit = 0.5f*( 1 - ySign ); // 1 is negative bit (like slt instruction)
+ float zSignBit = 0.5f*( 1 - zSign );
+ float tSignBit = 0.5f*( 1 - binormalSign );
+
+ float absX = xSign*nx; // 0..1 range (abs)
+ float absY = ySign*ny;
+ float absZ = zSign*nz;
+
+ float xbits = absX / ( absX + absY + absZ ); // Project onto x+y+z=1 plane
+ float ybits = absY / ( absX + absY + absZ );
+
+ xbits *= 63; // 0..63
+ ybits *= 63;
+
+ xbits = xbits * xSign - xSignBit; // -64..63 range
+ ybits = ybits * ySign - ySignBit;
+ xbits += 64.0f; // 0..127 range
+ ybits += 64.0f;
+
+ xbits = xbits * zSign - zSignBit; // Negate based on z and t
+ ybits = ybits * tSign - tSignBit; // -128..127 range
+
+ xbits += 128.0f; // 0..255 range
+ ybits += 128.0f;
+
+ unsigned char cX = (unsigned char) xbits;
+ unsigned char cY = (unsigned char) ybits;
+
+ if ( !bIsTangent )
+ *pPackedNormal = (cX << 0) | (cY << 8); // xy for normal
+ else
+ *pPackedNormal = (cX << 16) | (cY << 24); // zw for tangent
+
+ return pPackedNormal;
+}
+
+FORCEINLINE unsigned int * PackNormal_UBYTE4( const float *pNormal, unsigned int *pPackedNormal, bool bIsTangent = false, float binormalSign = +1.0f )
+{
+ return PackNormal_UBYTE4( pNormal[0], pNormal[1], pNormal[2], pPackedNormal, bIsTangent, binormalSign );
+}
+
+
+//-----------------------------------------------------------------------------
+// Convert RGB to HSV
+//-----------------------------------------------------------------------------
+void RGBtoHSV( const Vector &rgb, Vector &hsv );
+
+
+//-----------------------------------------------------------------------------
+// Convert HSV to RGB
+//-----------------------------------------------------------------------------
+void HSVtoRGB( const Vector &hsv, Vector &rgb );
+
+
+//-----------------------------------------------------------------------------
+// Fast version of pow and log
+//-----------------------------------------------------------------------------
+
+float FastLog2(float i); // log2( i )
+float FastPow2(float i); // 2^i
+float FastPow(float a, float b); // a^b
+float FastPow10( float i ); // 10^i
+
+//-----------------------------------------------------------------------------
+// For testing float equality
+//-----------------------------------------------------------------------------
+
+inline bool CloseEnough( float a, float b, float epsilon = EQUAL_EPSILON )
+{
+ return fabs( a - b ) <= epsilon;
+}
+
+inline bool CloseEnough( const Vector &a, const Vector &b, float epsilon = EQUAL_EPSILON )
+{
+ return fabs( a.x - b.x ) <= epsilon &&
+ fabs( a.y - b.y ) <= epsilon &&
+ fabs( a.z - b.z ) <= epsilon;
+}
+
+// Fast compare
+// maxUlps is the maximum error in terms of Units in the Last Place. This
+// specifies how big an error we are willing to accept in terms of the value
+// of the least significant digit of the floating point number�s
+// representation. maxUlps can also be interpreted in terms of how many
+// representable floats we are willing to accept between A and B.
+// This function will allow maxUlps-1 floats between A and B.
+bool AlmostEqual(float a, float b, int maxUlps = 10);
+
+inline bool AlmostEqual( const Vector &a, const Vector &b, int maxUlps = 10)
+{
+ return AlmostEqual( a.x, b.x, maxUlps ) &&
+ AlmostEqual( a.y, b.y, maxUlps ) &&
+ AlmostEqual( a.z, b.z, maxUlps );
+}
+
+
+#endif // MATH_BASE_H
+
diff --git a/mp/src/public/mathlib/matrixmath.h b/mp/src/public/mathlib/matrixmath.h
index 40de0c02..9c7f207b 100644
--- a/mp/src/public/mathlib/matrixmath.h
+++ b/mp/src/public/mathlib/matrixmath.h
@@ -1,385 +1,385 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// A set of generic, template-based matrix functions.
-//===========================================================================//
-
-#ifndef MATRIXMATH_H
-#define MATRIXMATH_H
-
-#include <stdarg.h>
-
-// The operations in this file can perform basic matrix operations on matrices represented
-// using any class that supports the necessary operations:
-//
-// .Element( row, col ) - return the element at a given matrox position
-// .SetElement( row, col, val ) - modify an element
-// .Width(), .Height() - get dimensions
-// .SetDimensions( nrows, ncols) - set a matrix to be un-initted and the appropriate size
-//
-// Generally, vectors can be used with these functions by using N x 1 matrices to represent them.
-// Matrices are addressed as row, column, and indices are 0-based
-//
-//
-// Note that the template versions of these routines are defined for generality - it is expected
-// that template specialization is used for common high performance cases.
-
-namespace MatrixMath
-{
- /// M *= flScaleValue
- template<class MATRIXCLASS>
- void ScaleMatrix( MATRIXCLASS &matrix, float flScaleValue )
- {
- for( int i = 0; i < matrix.Height(); i++ )
- {
- for( int j = 0; j < matrix.Width(); j++ )
- {
- matrix.SetElement( i, j, flScaleValue * matrix.Element( i, j ) );
- }
- }
- }
-
- /// AppendElementToMatrix - same as setting the element, except only works when all calls
- /// happen in top to bottom left to right order, end you have to call FinishedAppending when
- /// done. For normal matrix classes this is not different then SetElement, but for
- /// CSparseMatrix, it is an accelerated way to fill a matrix from scratch.
- template<class MATRIXCLASS>
- FORCEINLINE void AppendElement( MATRIXCLASS &matrix, int nRow, int nCol, float flValue )
- {
- matrix.SetElement( nRow, nCol, flValue ); // default implementation
- }
-
- template<class MATRIXCLASS>
- FORCEINLINE void FinishedAppending( MATRIXCLASS &matrix ) {} // default implementation
-
- /// M += fl
- template<class MATRIXCLASS>
- void AddToMatrix( MATRIXCLASS &matrix, float flAddend )
- {
- for( int i = 0; i < matrix.Height(); i++ )
- {
- for( int j = 0; j < matrix.Width(); j++ )
- {
- matrix.SetElement( i, j, flAddend + matrix.Element( i, j ) );
- }
- }
- }
-
- /// transpose
- template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
- void TransposeMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
- {
- pMatrixOut->SetDimensions( matrixIn.Width(), matrixIn.Height() );
- for( int i = 0; i < pMatrixOut->Height(); i++ )
- {
- for( int j = 0; j < pMatrixOut->Width(); j++ )
- {
- AppendElement( *pMatrixOut, i, j, matrixIn.Element( j, i ) );
- }
- }
- FinishedAppending( *pMatrixOut );
- }
-
- /// copy
- template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
- void CopyMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
- {
- pMatrixOut->SetDimensions( matrixIn.Height(), matrixIn.Width() );
- for( int i = 0; i < matrixIn.Height(); i++ )
- {
- for( int j = 0; j < matrixIn.Width(); j++ )
- {
- AppendElement( *pMatrixOut, i, j, matrixIn.Element( i, j ) );
- }
- }
- FinishedAppending( *pMatrixOut );
- }
-
-
-
- /// M+=M
- template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
- void AddMatrixToMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
- {
- for( int i = 0; i < matrixIn.Height(); i++ )
- {
- for( int j = 0; j < matrixIn.Width(); j++ )
- {
- pMatrixOut->SetElement( i, j, pMatrixOut->Element( i, j ) + matrixIn.Element( i, j ) );
- }
- }
- }
-
- // M += scale * M
- template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
- void AddScaledMatrixToMatrix( float flScale, MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
- {
- for( int i = 0; i < matrixIn.Height(); i++ )
- {
- for( int j = 0; j < matrixIn.Width(); j++ )
- {
- pMatrixOut->SetElement( i, j, pMatrixOut->Element( i, j ) + flScale * matrixIn.Element( i, j ) );
- }
- }
- }
-
-
- // simple way to initialize a matrix with constants from code.
- template<class MATRIXCLASSOUT>
- void SetMatrixToIdentity( MATRIXCLASSOUT *pMatrixOut, float flDiagonalValue = 1.0 )
- {
- for( int i = 0; i < pMatrixOut->Height(); i++ )
- {
- for( int j = 0; j < pMatrixOut->Width(); j++ )
- {
- AppendElement( *pMatrixOut, i, j, ( i == j ) ? flDiagonalValue : 0 );
- }
- }
- FinishedAppending( *pMatrixOut );
- }
-
- //// simple way to initialize a matrix with constants from code
- template<class MATRIXCLASSOUT>
- void SetMatrixValues( MATRIXCLASSOUT *pMatrix, int nRows, int nCols, ... )
- {
- va_list argPtr;
- va_start( argPtr, nCols );
-
- pMatrix->SetDimensions( nRows, nCols );
- for( int nRow = 0; nRow < nRows; nRow++ )
- {
- for( int nCol = 0; nCol < nCols; nCol++ )
- {
- double flNewValue = va_arg( argPtr, double );
- pMatrix->SetElement( nRow, nCol, flNewValue );
- }
- }
- va_end( argPtr );
- }
-
-
- /// row and colum accessors. treat a row or a column as a column vector
- template<class MATRIXTYPE> class MatrixRowAccessor
- {
- public:
- FORCEINLINE MatrixRowAccessor( MATRIXTYPE const &matrix, int nRow )
- {
- m_pMatrix = &matrix;
- m_nRow = nRow;
- }
-
- FORCEINLINE float Element( int nRow, int nCol ) const
- {
- Assert( nCol == 0 );
- return m_pMatrix->Element( m_nRow, nRow );
- }
-
- FORCEINLINE int Width( void ) const { return 1; };
- FORCEINLINE int Height( void ) const { return m_pMatrix->Width(); }
-
- private:
- MATRIXTYPE const *m_pMatrix;
- int m_nRow;
- };
-
- template<class MATRIXTYPE> class MatrixColumnAccessor
- {
- public:
- FORCEINLINE MatrixColumnAccessor( MATRIXTYPE const &matrix, int nColumn )
- {
- m_pMatrix = &matrix;
- m_nColumn = nColumn;
- }
-
- FORCEINLINE float Element( int nRow, int nColumn ) const
- {
- Assert( nColumn == 0 );
- return m_pMatrix->Element( nRow, m_nColumn );
- }
-
- FORCEINLINE int Width( void ) const { return 1; }
- FORCEINLINE int Height( void ) const { return m_pMatrix->Height(); }
- private:
- MATRIXTYPE const *m_pMatrix;
- int m_nColumn;
- };
-
- /// this translator acts as a proxy for the transposed matrix
- template<class MATRIXTYPE> class MatrixTransposeAccessor
- {
- public:
- FORCEINLINE MatrixTransposeAccessor( MATRIXTYPE const & matrix )
- {
- m_pMatrix = &matrix;
- }
-
- FORCEINLINE float Element( int nRow, int nColumn ) const
- {
- return m_pMatrix->Element( nColumn, nRow );
- }
-
- FORCEINLINE int Width( void ) const { return m_pMatrix->Height(); }
- FORCEINLINE int Height( void ) const { return m_pMatrix->Width(); }
- private:
- MATRIXTYPE const *m_pMatrix;
- };
-
- /// this tranpose returns a wrapper around it's argument, allowing things like AddMatrixToMatrix( Transpose( matA ), &matB ) without an extra copy
- template<class MATRIXCLASSIN>
- MatrixTransposeAccessor<MATRIXCLASSIN> TransposeMatrix( MATRIXCLASSIN const &matrixIn )
- {
- return MatrixTransposeAccessor<MATRIXCLASSIN>( matrixIn );
- }
-
-
- /// retrieve rows and columns
- template<class MATRIXTYPE>
- FORCEINLINE MatrixColumnAccessor<MATRIXTYPE> MatrixColumn( MATRIXTYPE const &matrix, int nColumn )
- {
- return MatrixColumnAccessor<MATRIXTYPE>( matrix, nColumn );
- }
-
- template<class MATRIXTYPE>
- FORCEINLINE MatrixRowAccessor<MATRIXTYPE> MatrixRow( MATRIXTYPE const &matrix, int nRow )
- {
- return MatrixRowAccessor<MATRIXTYPE>( matrix, nRow );
- }
-
- //// dot product between vectors (or rows and/or columns via accessors)
- template<class MATRIXACCESSORATYPE, class MATRIXACCESSORBTYPE >
- float InnerProduct( MATRIXACCESSORATYPE const &vecA, MATRIXACCESSORBTYPE const &vecB )
- {
- Assert( vecA.Width() == 1 );
- Assert( vecB.Width() == 1 );
- Assert( vecA.Height() == vecB.Height() );
- double flResult = 0;
- for( int i = 0; i < vecA.Height(); i++ )
- {
- flResult += vecA.Element( i, 0 ) * vecB.Element( i, 0 );
- }
- return flResult;
- }
-
-
-
- /// matrix x matrix multiplication
- template<class MATRIXATYPE, class MATRIXBTYPE, class MATRIXOUTTYPE>
- void MatrixMultiply( MATRIXATYPE const &matA, MATRIXBTYPE const &matB, MATRIXOUTTYPE *pMatrixOut )
- {
- Assert( matA.Width() == matB.Height() );
- pMatrixOut->SetDimensions( matA.Height(), matB.Width() );
- for( int i = 0; i < matA.Height(); i++ )
- {
- for( int j = 0; j < matB.Width(); j++ )
- {
- pMatrixOut->SetElement( i, j, InnerProduct( MatrixRow( matA, i ), MatrixColumn( matB, j ) ) );
- }
- }
- }
-
- /// solve Ax=B via the conjugate graident method. Code and naming conventions based on the
- /// wikipedia article.
- template<class ATYPE, class XTYPE, class BTYPE>
- void ConjugateGradient( ATYPE const &matA, BTYPE const &vecB, XTYPE &vecX, float flTolerance = 1.0e-20 )
- {
- XTYPE vecR;
- vecR.SetDimensions( vecX.Height(), 1 );
- MatrixMultiply( matA, vecX, &vecR );
- ScaleMatrix( vecR, -1 );
- AddMatrixToMatrix( vecB, &vecR );
- XTYPE vecP;
- CopyMatrix( vecR, &vecP );
- float flRsOld = InnerProduct( vecR, vecR );
- for( int nIter = 0; nIter < 100; nIter++ )
- {
- XTYPE vecAp;
- MatrixMultiply( matA, vecP, &vecAp );
- float flDivisor = InnerProduct( vecAp, vecP );
- float flAlpha = flRsOld / flDivisor;
- AddScaledMatrixToMatrix( flAlpha, vecP, &vecX );
- AddScaledMatrixToMatrix( -flAlpha, vecAp, &vecR );
- float flRsNew = InnerProduct( vecR, vecR );
- if ( flRsNew < flTolerance )
- {
- break;
- }
- ScaleMatrix( vecP, flRsNew / flRsOld );
- AddMatrixToMatrix( vecR, &vecP );
- flRsOld = flRsNew;
- }
- }
-
- /// solve (A'*A) x=B via the conjugate gradient method. Code and naming conventions based on
- /// the wikipedia article. Same as Conjugate gradient but allows passing in two matrices whose
- /// product is used as the A matrix (in order to preserve sparsity)
- template<class ATYPE, class APRIMETYPE, class XTYPE, class BTYPE>
- void ConjugateGradient( ATYPE const &matA, APRIMETYPE const &matAPrime, BTYPE const &vecB, XTYPE &vecX, float flTolerance = 1.0e-20 )
- {
- XTYPE vecR1;
- vecR1.SetDimensions( vecX.Height(), 1 );
- MatrixMultiply( matA, vecX, &vecR1 );
- XTYPE vecR;
- vecR.SetDimensions( vecR1.Height(), 1 );
- MatrixMultiply( matAPrime, vecR1, &vecR );
- ScaleMatrix( vecR, -1 );
- AddMatrixToMatrix( vecB, &vecR );
- XTYPE vecP;
- CopyMatrix( vecR, &vecP );
- float flRsOld = InnerProduct( vecR, vecR );
- for( int nIter = 0; nIter < 100; nIter++ )
- {
- XTYPE vecAp1;
- MatrixMultiply( matA, vecP, &vecAp1 );
- XTYPE vecAp;
- MatrixMultiply( matAPrime, vecAp1, &vecAp );
- float flDivisor = InnerProduct( vecAp, vecP );
- float flAlpha = flRsOld / flDivisor;
- AddScaledMatrixToMatrix( flAlpha, vecP, &vecX );
- AddScaledMatrixToMatrix( -flAlpha, vecAp, &vecR );
- float flRsNew = InnerProduct( vecR, vecR );
- if ( flRsNew < flTolerance )
- {
- break;
- }
- ScaleMatrix( vecP, flRsNew / flRsOld );
- AddMatrixToMatrix( vecR, &vecP );
- flRsOld = flRsNew;
- }
- }
-
-
- template<class ATYPE, class XTYPE, class BTYPE>
- void LeastSquaresFit( ATYPE const &matA, BTYPE const &vecB, XTYPE &vecX )
- {
- // now, generate the normal equations
- BTYPE vecBeta;
- MatrixMath::MatrixMultiply( MatrixMath::TransposeMatrix( matA ), vecB, &vecBeta );
-
- vecX.SetDimensions( matA.Width(), 1 );
- MatrixMath::SetMatrixToIdentity( &vecX );
-
- ATYPE matATransposed;
- TransposeMatrix( matA, &matATransposed );
- ConjugateGradient( matA, matATransposed, vecBeta, vecX, 1.0e-20 );
- }
-
-};
-
-/// a simple fixed-size matrix class
-template<int NUMROWS, int NUMCOLS> class CFixedMatrix
-{
-public:
- FORCEINLINE int Width( void ) const { return NUMCOLS; }
- FORCEINLINE int Height( void ) const { return NUMROWS; }
- FORCEINLINE float Element( int nRow, int nCol ) const { return m_flValues[nRow][nCol]; }
- FORCEINLINE void SetElement( int nRow, int nCol, float flValue ) { m_flValues[nRow][nCol] = flValue; }
- FORCEINLINE void SetDimensions( int nNumRows, int nNumCols ) { Assert( ( nNumRows == NUMROWS ) && ( nNumCols == NUMCOLS ) ); }
-
-private:
- float m_flValues[NUMROWS][NUMCOLS];
-};
-
-
-
-#endif //matrixmath_h
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+// A set of generic, template-based matrix functions.
+//===========================================================================//
+
+#ifndef MATRIXMATH_H
+#define MATRIXMATH_H
+
+#include <stdarg.h>
+
+// The operations in this file can perform basic matrix operations on matrices represented
+// using any class that supports the necessary operations:
+//
+// .Element( row, col ) - return the element at a given matrox position
+// .SetElement( row, col, val ) - modify an element
+// .Width(), .Height() - get dimensions
+// .SetDimensions( nrows, ncols) - set a matrix to be un-initted and the appropriate size
+//
+// Generally, vectors can be used with these functions by using N x 1 matrices to represent them.
+// Matrices are addressed as row, column, and indices are 0-based
+//
+//
+// Note that the template versions of these routines are defined for generality - it is expected
+// that template specialization is used for common high performance cases.
+
+namespace MatrixMath
+{
+ /// M *= flScaleValue
+ template<class MATRIXCLASS>
+ void ScaleMatrix( MATRIXCLASS &matrix, float flScaleValue )
+ {
+ for( int i = 0; i < matrix.Height(); i++ )
+ {
+ for( int j = 0; j < matrix.Width(); j++ )
+ {
+ matrix.SetElement( i, j, flScaleValue * matrix.Element( i, j ) );
+ }
+ }
+ }
+
+ /// AppendElementToMatrix - same as setting the element, except only works when all calls
+ /// happen in top to bottom left to right order, end you have to call FinishedAppending when
+ /// done. For normal matrix classes this is not different then SetElement, but for
+ /// CSparseMatrix, it is an accelerated way to fill a matrix from scratch.
+ template<class MATRIXCLASS>
+ FORCEINLINE void AppendElement( MATRIXCLASS &matrix, int nRow, int nCol, float flValue )
+ {
+ matrix.SetElement( nRow, nCol, flValue ); // default implementation
+ }
+
+ template<class MATRIXCLASS>
+ FORCEINLINE void FinishedAppending( MATRIXCLASS &matrix ) {} // default implementation
+
+ /// M += fl
+ template<class MATRIXCLASS>
+ void AddToMatrix( MATRIXCLASS &matrix, float flAddend )
+ {
+ for( int i = 0; i < matrix.Height(); i++ )
+ {
+ for( int j = 0; j < matrix.Width(); j++ )
+ {
+ matrix.SetElement( i, j, flAddend + matrix.Element( i, j ) );
+ }
+ }
+ }
+
+ /// transpose
+ template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
+ void TransposeMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
+ {
+ pMatrixOut->SetDimensions( matrixIn.Width(), matrixIn.Height() );
+ for( int i = 0; i < pMatrixOut->Height(); i++ )
+ {
+ for( int j = 0; j < pMatrixOut->Width(); j++ )
+ {
+ AppendElement( *pMatrixOut, i, j, matrixIn.Element( j, i ) );
+ }
+ }
+ FinishedAppending( *pMatrixOut );
+ }
+
+ /// copy
+ template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
+ void CopyMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
+ {
+ pMatrixOut->SetDimensions( matrixIn.Height(), matrixIn.Width() );
+ for( int i = 0; i < matrixIn.Height(); i++ )
+ {
+ for( int j = 0; j < matrixIn.Width(); j++ )
+ {
+ AppendElement( *pMatrixOut, i, j, matrixIn.Element( i, j ) );
+ }
+ }
+ FinishedAppending( *pMatrixOut );
+ }
+
+
+
+ /// M+=M
+ template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
+ void AddMatrixToMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
+ {
+ for( int i = 0; i < matrixIn.Height(); i++ )
+ {
+ for( int j = 0; j < matrixIn.Width(); j++ )
+ {
+ pMatrixOut->SetElement( i, j, pMatrixOut->Element( i, j ) + matrixIn.Element( i, j ) );
+ }
+ }
+ }
+
+ // M += scale * M
+ template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
+ void AddScaledMatrixToMatrix( float flScale, MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
+ {
+ for( int i = 0; i < matrixIn.Height(); i++ )
+ {
+ for( int j = 0; j < matrixIn.Width(); j++ )
+ {
+ pMatrixOut->SetElement( i, j, pMatrixOut->Element( i, j ) + flScale * matrixIn.Element( i, j ) );
+ }
+ }
+ }
+
+
+ // simple way to initialize a matrix with constants from code.
+ template<class MATRIXCLASSOUT>
+ void SetMatrixToIdentity( MATRIXCLASSOUT *pMatrixOut, float flDiagonalValue = 1.0 )
+ {
+ for( int i = 0; i < pMatrixOut->Height(); i++ )
+ {
+ for( int j = 0; j < pMatrixOut->Width(); j++ )
+ {
+ AppendElement( *pMatrixOut, i, j, ( i == j ) ? flDiagonalValue : 0 );
+ }
+ }
+ FinishedAppending( *pMatrixOut );
+ }
+
+ //// simple way to initialize a matrix with constants from code
+ template<class MATRIXCLASSOUT>
+ void SetMatrixValues( MATRIXCLASSOUT *pMatrix, int nRows, int nCols, ... )
+ {
+ va_list argPtr;
+ va_start( argPtr, nCols );
+
+ pMatrix->SetDimensions( nRows, nCols );
+ for( int nRow = 0; nRow < nRows; nRow++ )
+ {
+ for( int nCol = 0; nCol < nCols; nCol++ )
+ {
+ double flNewValue = va_arg( argPtr, double );
+ pMatrix->SetElement( nRow, nCol, flNewValue );
+ }
+ }
+ va_end( argPtr );
+ }
+
+
+ /// row and colum accessors. treat a row or a column as a column vector
+ template<class MATRIXTYPE> class MatrixRowAccessor
+ {
+ public:
+ FORCEINLINE MatrixRowAccessor( MATRIXTYPE const &matrix, int nRow )
+ {
+ m_pMatrix = &matrix;
+ m_nRow = nRow;
+ }
+
+ FORCEINLINE float Element( int nRow, int nCol ) const
+ {
+ Assert( nCol == 0 );
+ return m_pMatrix->Element( m_nRow, nRow );
+ }
+
+ FORCEINLINE int Width( void ) const { return 1; };
+ FORCEINLINE int Height( void ) const { return m_pMatrix->Width(); }
+
+ private:
+ MATRIXTYPE const *m_pMatrix;
+ int m_nRow;
+ };
+
+ template<class MATRIXTYPE> class MatrixColumnAccessor
+ {
+ public:
+ FORCEINLINE MatrixColumnAccessor( MATRIXTYPE const &matrix, int nColumn )
+ {
+ m_pMatrix = &matrix;
+ m_nColumn = nColumn;
+ }
+
+ FORCEINLINE float Element( int nRow, int nColumn ) const
+ {
+ Assert( nColumn == 0 );
+ return m_pMatrix->Element( nRow, m_nColumn );
+ }
+
+ FORCEINLINE int Width( void ) const { return 1; }
+ FORCEINLINE int Height( void ) const { return m_pMatrix->Height(); }
+ private:
+ MATRIXTYPE const *m_pMatrix;
+ int m_nColumn;
+ };
+
+ /// this translator acts as a proxy for the transposed matrix
+ template<class MATRIXTYPE> class MatrixTransposeAccessor
+ {
+ public:
+ FORCEINLINE MatrixTransposeAccessor( MATRIXTYPE const & matrix )
+ {
+ m_pMatrix = &matrix;
+ }
+
+ FORCEINLINE float Element( int nRow, int nColumn ) const
+ {
+ return m_pMatrix->Element( nColumn, nRow );
+ }
+
+ FORCEINLINE int Width( void ) const { return m_pMatrix->Height(); }
+ FORCEINLINE int Height( void ) const { return m_pMatrix->Width(); }
+ private:
+ MATRIXTYPE const *m_pMatrix;
+ };
+
+ /// this tranpose returns a wrapper around it's argument, allowing things like AddMatrixToMatrix( Transpose( matA ), &matB ) without an extra copy
+ template<class MATRIXCLASSIN>
+ MatrixTransposeAccessor<MATRIXCLASSIN> TransposeMatrix( MATRIXCLASSIN const &matrixIn )
+ {
+ return MatrixTransposeAccessor<MATRIXCLASSIN>( matrixIn );
+ }
+
+
+ /// retrieve rows and columns
+ template<class MATRIXTYPE>
+ FORCEINLINE MatrixColumnAccessor<MATRIXTYPE> MatrixColumn( MATRIXTYPE const &matrix, int nColumn )
+ {
+ return MatrixColumnAccessor<MATRIXTYPE>( matrix, nColumn );
+ }
+
+ template<class MATRIXTYPE>
+ FORCEINLINE MatrixRowAccessor<MATRIXTYPE> MatrixRow( MATRIXTYPE const &matrix, int nRow )
+ {
+ return MatrixRowAccessor<MATRIXTYPE>( matrix, nRow );
+ }
+
+ //// dot product between vectors (or rows and/or columns via accessors)
+ template<class MATRIXACCESSORATYPE, class MATRIXACCESSORBTYPE >
+ float InnerProduct( MATRIXACCESSORATYPE const &vecA, MATRIXACCESSORBTYPE const &vecB )
+ {
+ Assert( vecA.Width() == 1 );
+ Assert( vecB.Width() == 1 );
+ Assert( vecA.Height() == vecB.Height() );
+ double flResult = 0;
+ for( int i = 0; i < vecA.Height(); i++ )
+ {
+ flResult += vecA.Element( i, 0 ) * vecB.Element( i, 0 );
+ }
+ return flResult;
+ }
+
+
+
+ /// matrix x matrix multiplication
+ template<class MATRIXATYPE, class MATRIXBTYPE, class MATRIXOUTTYPE>
+ void MatrixMultiply( MATRIXATYPE const &matA, MATRIXBTYPE const &matB, MATRIXOUTTYPE *pMatrixOut )
+ {
+ Assert( matA.Width() == matB.Height() );
+ pMatrixOut->SetDimensions( matA.Height(), matB.Width() );
+ for( int i = 0; i < matA.Height(); i++ )
+ {
+ for( int j = 0; j < matB.Width(); j++ )
+ {
+ pMatrixOut->SetElement( i, j, InnerProduct( MatrixRow( matA, i ), MatrixColumn( matB, j ) ) );
+ }
+ }
+ }
+
+ /// solve Ax=B via the conjugate graident method. Code and naming conventions based on the
+ /// wikipedia article.
+ template<class ATYPE, class XTYPE, class BTYPE>
+ void ConjugateGradient( ATYPE const &matA, BTYPE const &vecB, XTYPE &vecX, float flTolerance = 1.0e-20 )
+ {
+ XTYPE vecR;
+ vecR.SetDimensions( vecX.Height(), 1 );
+ MatrixMultiply( matA, vecX, &vecR );
+ ScaleMatrix( vecR, -1 );
+ AddMatrixToMatrix( vecB, &vecR );
+ XTYPE vecP;
+ CopyMatrix( vecR, &vecP );
+ float flRsOld = InnerProduct( vecR, vecR );
+ for( int nIter = 0; nIter < 100; nIter++ )
+ {
+ XTYPE vecAp;
+ MatrixMultiply( matA, vecP, &vecAp );
+ float flDivisor = InnerProduct( vecAp, vecP );
+ float flAlpha = flRsOld / flDivisor;
+ AddScaledMatrixToMatrix( flAlpha, vecP, &vecX );
+ AddScaledMatrixToMatrix( -flAlpha, vecAp, &vecR );
+ float flRsNew = InnerProduct( vecR, vecR );
+ if ( flRsNew < flTolerance )
+ {
+ break;
+ }
+ ScaleMatrix( vecP, flRsNew / flRsOld );
+ AddMatrixToMatrix( vecR, &vecP );
+ flRsOld = flRsNew;
+ }
+ }
+
+ /// solve (A'*A) x=B via the conjugate gradient method. Code and naming conventions based on
+ /// the wikipedia article. Same as Conjugate gradient but allows passing in two matrices whose
+ /// product is used as the A matrix (in order to preserve sparsity)
+ template<class ATYPE, class APRIMETYPE, class XTYPE, class BTYPE>
+ void ConjugateGradient( ATYPE const &matA, APRIMETYPE const &matAPrime, BTYPE const &vecB, XTYPE &vecX, float flTolerance = 1.0e-20 )
+ {
+ XTYPE vecR1;
+ vecR1.SetDimensions( vecX.Height(), 1 );
+ MatrixMultiply( matA, vecX, &vecR1 );
+ XTYPE vecR;
+ vecR.SetDimensions( vecR1.Height(), 1 );
+ MatrixMultiply( matAPrime, vecR1, &vecR );
+ ScaleMatrix( vecR, -1 );
+ AddMatrixToMatrix( vecB, &vecR );
+ XTYPE vecP;
+ CopyMatrix( vecR, &vecP );
+ float flRsOld = InnerProduct( vecR, vecR );
+ for( int nIter = 0; nIter < 100; nIter++ )
+ {
+ XTYPE vecAp1;
+ MatrixMultiply( matA, vecP, &vecAp1 );
+ XTYPE vecAp;
+ MatrixMultiply( matAPrime, vecAp1, &vecAp );
+ float flDivisor = InnerProduct( vecAp, vecP );
+ float flAlpha = flRsOld / flDivisor;
+ AddScaledMatrixToMatrix( flAlpha, vecP, &vecX );
+ AddScaledMatrixToMatrix( -flAlpha, vecAp, &vecR );
+ float flRsNew = InnerProduct( vecR, vecR );
+ if ( flRsNew < flTolerance )
+ {
+ break;
+ }
+ ScaleMatrix( vecP, flRsNew / flRsOld );
+ AddMatrixToMatrix( vecR, &vecP );
+ flRsOld = flRsNew;
+ }
+ }
+
+
+ template<class ATYPE, class XTYPE, class BTYPE>
+ void LeastSquaresFit( ATYPE const &matA, BTYPE const &vecB, XTYPE &vecX )
+ {
+ // now, generate the normal equations
+ BTYPE vecBeta;
+ MatrixMath::MatrixMultiply( MatrixMath::TransposeMatrix( matA ), vecB, &vecBeta );
+
+ vecX.SetDimensions( matA.Width(), 1 );
+ MatrixMath::SetMatrixToIdentity( &vecX );
+
+ ATYPE matATransposed;
+ TransposeMatrix( matA, &matATransposed );
+ ConjugateGradient( matA, matATransposed, vecBeta, vecX, 1.0e-20 );
+ }
+
+};
+
+/// a simple fixed-size matrix class
+template<int NUMROWS, int NUMCOLS> class CFixedMatrix
+{
+public:
+ FORCEINLINE int Width( void ) const { return NUMCOLS; }
+ FORCEINLINE int Height( void ) const { return NUMROWS; }
+ FORCEINLINE float Element( int nRow, int nCol ) const { return m_flValues[nRow][nCol]; }
+ FORCEINLINE void SetElement( int nRow, int nCol, float flValue ) { m_flValues[nRow][nCol] = flValue; }
+ FORCEINLINE void SetDimensions( int nNumRows, int nNumCols ) { Assert( ( nNumRows == NUMROWS ) && ( nNumCols == NUMCOLS ) ); }
+
+private:
+ float m_flValues[NUMROWS][NUMCOLS];
+};
+
+
+
+#endif //matrixmath_h
diff --git a/mp/src/public/mathlib/noise.h b/mp/src/public/mathlib/noise.h
index 0aec2efe..19d3f729 100644
--- a/mp/src/public/mathlib/noise.h
+++ b/mp/src/public/mathlib/noise.h
@@ -1,35 +1,35 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-//=====================================================================================//
-
-#ifndef NOISE_H
-#define NOISE_H
-
-#include <math.h>
-#include "basetypes.h"
-#include "mathlib/vector.h"
-#include "tier0/dbg.h"
-
-
-// The following code is the c-ification of Ken Perlin's new noise algorithm
-// "JAVA REFERENCE IMPLEMENTATION OF IMPROVED NOISE - COPYRIGHT 2002 KEN PERLIN"
-// as available here: http://mrl.nyu.edu/~perlin/noise/
-// it generates a single octave of noise in the -1..1 range
-// this should at some point probably replace SparseConvolutionNoise - jd
-float ImprovedPerlinNoise( Vector const &pnt );
-
-// get the noise value at a point. Output range is 0..1.
-float SparseConvolutionNoise( Vector const &pnt );
-
-// get the noise value at a point, passing a custom noise shaping function. The noise shaping
-// function should map the domain 0..1 to 0..1.
-float SparseConvolutionNoise(Vector const &pnt, float (*pNoiseShapeFunction)(float) );
-
-// returns a 1/f noise. more octaves take longer
-float FractalNoise( Vector const &pnt, int n_octaves );
-
-// returns a abs(f)*1/f noise i.e. turbulence
-float Turbulence( Vector const &pnt, int n_octaves );
-#endif // NOISE_H
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+//=====================================================================================//
+
+#ifndef NOISE_H
+#define NOISE_H
+
+#include <math.h>
+#include "basetypes.h"
+#include "mathlib/vector.h"
+#include "tier0/dbg.h"
+
+
+// The following code is the c-ification of Ken Perlin's new noise algorithm
+// "JAVA REFERENCE IMPLEMENTATION OF IMPROVED NOISE - COPYRIGHT 2002 KEN PERLIN"
+// as available here: http://mrl.nyu.edu/~perlin/noise/
+// it generates a single octave of noise in the -1..1 range
+// this should at some point probably replace SparseConvolutionNoise - jd
+float ImprovedPerlinNoise( Vector const &pnt );
+
+// get the noise value at a point. Output range is 0..1.
+float SparseConvolutionNoise( Vector const &pnt );
+
+// get the noise value at a point, passing a custom noise shaping function. The noise shaping
+// function should map the domain 0..1 to 0..1.
+float SparseConvolutionNoise(Vector const &pnt, float (*pNoiseShapeFunction)(float) );
+
+// returns a 1/f noise. more octaves take longer
+float FractalNoise( Vector const &pnt, int n_octaves );
+
+// returns a abs(f)*1/f noise i.e. turbulence
+float Turbulence( Vector const &pnt, int n_octaves );
+#endif // NOISE_H
diff --git a/mp/src/public/mathlib/polyhedron.h b/mp/src/public/mathlib/polyhedron.h
index 6c51d432..38b465c7 100644
--- a/mp/src/public/mathlib/polyhedron.h
+++ b/mp/src/public/mathlib/polyhedron.h
@@ -1,73 +1,73 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-
-#ifndef POLYHEDRON_H_
-#define POLYHEDRON_H_
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include "mathlib/mathlib.h"
-
-
-
-struct Polyhedron_IndexedLine_t
-{
- unsigned short iPointIndices[2];
-};
-
-struct Polyhedron_IndexedLineReference_t
-{
- unsigned short iLineIndex;
- unsigned char iEndPointIndex; //since two polygons reference any one line, one needs to traverse the line backwards, this flags that behavior
-};
-
-struct Polyhedron_IndexedPolygon_t
-{
- unsigned short iFirstIndex;
- unsigned short iIndexCount;
- Vector polyNormal;
-};
-
-class CPolyhedron //made into a class because it's going virtual to support distinctions between temp and permanent versions
-{
-public:
- Vector *pVertices;
- Polyhedron_IndexedLine_t *pLines;
- Polyhedron_IndexedLineReference_t *pIndices;
- Polyhedron_IndexedPolygon_t *pPolygons;
-
- unsigned short iVertexCount;
- unsigned short iLineCount;
- unsigned short iIndexCount;
- unsigned short iPolygonCount;
-
- virtual ~CPolyhedron( void ) {};
- virtual void Release( void ) = 0;
- Vector Center( void );
-};
-
-class CPolyhedron_AllocByNew : public CPolyhedron
-{
-public:
- virtual void Release( void );
- static CPolyhedron_AllocByNew *Allocate( unsigned short iVertices, unsigned short iLines, unsigned short iIndices, unsigned short iPolygons ); //creates the polyhedron along with enough memory to hold all it's data in a single allocation
-
-private:
- CPolyhedron_AllocByNew( void ) { }; //CPolyhedron_AllocByNew::Allocate() is the only way to create one of these.
-};
-
-CPolyhedron *GeneratePolyhedronFromPlanes( const float *pOutwardFacingPlanes, int iPlaneCount, float fOnPlaneEpsilon, bool bUseTemporaryMemory = false ); //be sure to polyhedron->Release()
-CPolyhedron *ClipPolyhedron( const CPolyhedron *pExistingPolyhedron, const float *pOutwardFacingPlanes, int iPlaneCount, float fOnPlaneEpsilon, bool bUseTemporaryMemory = false ); //this does NOT modify/delete the existing polyhedron
-
-CPolyhedron *GetTempPolyhedron( unsigned short iVertices, unsigned short iLines, unsigned short iIndices, unsigned short iPolygons ); //grab the temporary polyhedron. Avoids new/delete for quick work. Can only be in use by one chunk of code at a time
-
-
-#endif //#ifndef POLYHEDRON_H_
-
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+// $NoKeywords: $
+//
+//=============================================================================//
+
+#ifndef POLYHEDRON_H_
+#define POLYHEDRON_H_
+
+#ifdef _WIN32
+#pragma once
+#endif
+
+#include "mathlib/mathlib.h"
+
+
+
+struct Polyhedron_IndexedLine_t
+{
+ unsigned short iPointIndices[2];
+};
+
+struct Polyhedron_IndexedLineReference_t
+{
+ unsigned short iLineIndex;
+ unsigned char iEndPointIndex; //since two polygons reference any one line, one needs to traverse the line backwards, this flags that behavior
+};
+
+struct Polyhedron_IndexedPolygon_t
+{
+ unsigned short iFirstIndex;
+ unsigned short iIndexCount;
+ Vector polyNormal;
+};
+
+class CPolyhedron //made into a class because it's going virtual to support distinctions between temp and permanent versions
+{
+public:
+ Vector *pVertices;
+ Polyhedron_IndexedLine_t *pLines;
+ Polyhedron_IndexedLineReference_t *pIndices;
+ Polyhedron_IndexedPolygon_t *pPolygons;
+
+ unsigned short iVertexCount;
+ unsigned short iLineCount;
+ unsigned short iIndexCount;
+ unsigned short iPolygonCount;
+
+ virtual ~CPolyhedron( void ) {};
+ virtual void Release( void ) = 0;
+ Vector Center( void );
+};
+
+class CPolyhedron_AllocByNew : public CPolyhedron
+{
+public:
+ virtual void Release( void );
+ static CPolyhedron_AllocByNew *Allocate( unsigned short iVertices, unsigned short iLines, unsigned short iIndices, unsigned short iPolygons ); //creates the polyhedron along with enough memory to hold all it's data in a single allocation
+
+private:
+ CPolyhedron_AllocByNew( void ) { }; //CPolyhedron_AllocByNew::Allocate() is the only way to create one of these.
+};
+
+CPolyhedron *GeneratePolyhedronFromPlanes( const float *pOutwardFacingPlanes, int iPlaneCount, float fOnPlaneEpsilon, bool bUseTemporaryMemory = false ); //be sure to polyhedron->Release()
+CPolyhedron *ClipPolyhedron( const CPolyhedron *pExistingPolyhedron, const float *pOutwardFacingPlanes, int iPlaneCount, float fOnPlaneEpsilon, bool bUseTemporaryMemory = false ); //this does NOT modify/delete the existing polyhedron
+
+CPolyhedron *GetTempPolyhedron( unsigned short iVertices, unsigned short iLines, unsigned short iIndices, unsigned short iPolygons ); //grab the temporary polyhedron. Avoids new/delete for quick work. Can only be in use by one chunk of code at a time
+
+
+#endif //#ifndef POLYHEDRON_H_
+
diff --git a/mp/src/public/mathlib/quantize.h b/mp/src/public/mathlib/quantize.h
index c43b1530..5e5b7423 100644
--- a/mp/src/public/mathlib/quantize.h
+++ b/mp/src/public/mathlib/quantize.h
@@ -1,141 +1,141 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-#ifndef QUANTIZE_H
-#define QUANTIZE_H
-
-#ifndef STRING_H
-#include <string.h>
-#endif
-
-#define MAXDIMS 768
-#define MAXQUANT 16000
-
-
-#include <tier0/platform.h>
-
-struct Sample;
-
-struct QuantizedValue {
- double MinError; // minimum possible error. used
- // for neighbor searches.
- struct QuantizedValue *Children[2]; // splits
- int32 value; // only exists for leaf nodes
- struct Sample *Samples; // every sample quantized into this
- // entry
- int32 NSamples; // how many were quantized to this.
- int32 TotSamples;
- double *ErrorMeasure; // variance measure for each dimension
- double TotalError; // sum of errors
- uint8 *Mean; // average value of each dimension
- uint8 *Mins; // min box for children and this
- uint8 *Maxs; // max box for children and this
- int NQuant; // the number of samples which were
- // quantzied to this node since the
- // last time OptimizeQuantizer()
- // was called.
- int *Sums; // sum used by OptimizeQuantizer
- int sortdim; // dimension currently sorted along.
-};
-
-struct Sample {
- int32 ID; // identifier of this sample. can
- // be used for any purpose.
- int32 Count; // number of samples this sample
- // represents
- int32 QNum; // what value this sample ended up quantized
- // to.
- struct QuantizedValue *qptr; // ptr to what this was quantized to.
- uint8 Value[1]; // array of values for multi-dimensional
- // variables.
-};
-
-void FreeQuantization(struct QuantizedValue *t);
-
-struct QuantizedValue *Quantize(struct Sample *s, int nsamples, int ndims,
- int nvalues, uint8 *weights, int value0=0);
-
-int CompressSamples(struct Sample *s, int nsamples, int ndims);
-
-struct QuantizedValue *FindMatch(uint8 const *sample,
- int ndims,uint8 *weights,
- struct QuantizedValue *QTable);
-void PrintSamples(struct Sample const *s, int nsamples, int ndims);
-
-struct QuantizedValue *FindQNode(struct QuantizedValue const *q, int32 code);
-
-inline struct Sample *NthSample(struct Sample *s, int i, int nd)
-{
- uint8 *r=(uint8 *) s;
- r+=i*(sizeof(*s)+(nd-1));
- return (struct Sample *) r;
-}
-
-inline struct Sample *AllocSamples(int ns, int nd)
-{
- size_t size5=(sizeof(struct Sample)+(nd-1))*ns;
- void *ret=new uint8[size5];
- memset(ret,0,size5);
- for(int i=0;i<ns;i++)
- NthSample((struct Sample *)ret,i,nd)->Count=1;
- return (struct Sample *) ret;
-}
-
-
-// MinimumError: what is the min error which will occur if quantizing
-// a sample to the given qnode? This is just the error if the qnode
-// is a leaf.
-double MinimumError(struct QuantizedValue const *q, uint8 const *sample,
- int ndims, uint8 const *weights);
-double MaximumError(struct QuantizedValue const *q, uint8 const *sample,
- int ndims, uint8 const *weights);
-
-void PrintQTree(struct QuantizedValue const *p,int idlevel=0);
-void OptimizeQuantizer(struct QuantizedValue *q, int ndims);
-
-// RecalculateVelues: update the means in a sample tree, based upon
-// the samples. can be used to reoptimize when samples are deleted,
-// for instance.
-
-void RecalculateValues(struct QuantizedValue *q, int ndims);
-
-extern double SquaredError; // may be reset and examined. updated by
- // FindMatch()
-
-
-
-
-// the routines below can be used for uniform quantization via dart-throwing.
-typedef void (*GENERATOR)(void *); // generate a random sample
-typedef double (*COMPARER)(void const *a, void const *b);
-
-void *DartThrow(int NResults, int NTries, size_t itemsize, GENERATOR gen,
- COMPARER cmp);
-void *FindClosestDart(void *items,int NResults, size_t itemsize,
- COMPARER cmp, void *lookfor, int *idx);
-
-
-
-
-// color quantization of 24 bit images
-#define QUANTFLAGS_NODITHER 1 // don't do Floyd-steinberg dither
-
-extern void ColorQuantize(
-uint8 const *pImage, // 4 byte pixels ARGB
-int nWidth,
-int nHeight,
-int nFlags, // QUANTFLAGS_xxx
-int nColors, // # of colors to fill in in palette
-uint8 *pOutPixels, // where to store resulting 8 bit pixels
-uint8 *pOutPalette, // where to store resulting 768-byte palette
-int nFirstColor); // first color to use in mapping
-
-
-
-
-
-#endif
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+// $NoKeywords: $
+//
+//=============================================================================//
+#ifndef QUANTIZE_H
+#define QUANTIZE_H
+
+#ifndef STRING_H
+#include <string.h>
+#endif
+
+#define MAXDIMS 768
+#define MAXQUANT 16000
+
+
+#include <tier0/platform.h>
+
+struct Sample;
+
+struct QuantizedValue {
+ double MinError; // minimum possible error. used
+ // for neighbor searches.
+ struct QuantizedValue *Children[2]; // splits
+ int32 value; // only exists for leaf nodes
+ struct Sample *Samples; // every sample quantized into this
+ // entry
+ int32 NSamples; // how many were quantized to this.
+ int32 TotSamples;
+ double *ErrorMeasure; // variance measure for each dimension
+ double TotalError; // sum of errors
+ uint8 *Mean; // average value of each dimension
+ uint8 *Mins; // min box for children and this
+ uint8 *Maxs; // max box for children and this
+ int NQuant; // the number of samples which were
+ // quantzied to this node since the
+ // last time OptimizeQuantizer()
+ // was called.
+ int *Sums; // sum used by OptimizeQuantizer
+ int sortdim; // dimension currently sorted along.
+};
+
+struct Sample {
+ int32 ID; // identifier of this sample. can
+ // be used for any purpose.
+ int32 Count; // number of samples this sample
+ // represents
+ int32 QNum; // what value this sample ended up quantized
+ // to.
+ struct QuantizedValue *qptr; // ptr to what this was quantized to.
+ uint8 Value[1]; // array of values for multi-dimensional
+ // variables.
+};
+
+void FreeQuantization(struct QuantizedValue *t);
+
+struct QuantizedValue *Quantize(struct Sample *s, int nsamples, int ndims,
+ int nvalues, uint8 *weights, int value0=0);
+
+int CompressSamples(struct Sample *s, int nsamples, int ndims);
+
+struct QuantizedValue *FindMatch(uint8 const *sample,
+ int ndims,uint8 *weights,
+ struct QuantizedValue *QTable);
+void PrintSamples(struct Sample const *s, int nsamples, int ndims);
+
+struct QuantizedValue *FindQNode(struct QuantizedValue const *q, int32 code);
+
+inline struct Sample *NthSample(struct Sample *s, int i, int nd)
+{
+ uint8 *r=(uint8 *) s;
+ r+=i*(sizeof(*s)+(nd-1));
+ return (struct Sample *) r;
+}
+
+inline struct Sample *AllocSamples(int ns, int nd)
+{
+ size_t size5=(sizeof(struct Sample)+(nd-1))*ns;
+ void *ret=new uint8[size5];
+ memset(ret,0,size5);
+ for(int i=0;i<ns;i++)
+ NthSample((struct Sample *)ret,i,nd)->Count=1;
+ return (struct Sample *) ret;
+}
+
+
+// MinimumError: what is the min error which will occur if quantizing
+// a sample to the given qnode? This is just the error if the qnode
+// is a leaf.
+double MinimumError(struct QuantizedValue const *q, uint8 const *sample,
+ int ndims, uint8 const *weights);
+double MaximumError(struct QuantizedValue const *q, uint8 const *sample,
+ int ndims, uint8 const *weights);
+
+void PrintQTree(struct QuantizedValue const *p,int idlevel=0);
+void OptimizeQuantizer(struct QuantizedValue *q, int ndims);
+
+// RecalculateVelues: update the means in a sample tree, based upon
+// the samples. can be used to reoptimize when samples are deleted,
+// for instance.
+
+void RecalculateValues(struct QuantizedValue *q, int ndims);
+
+extern double SquaredError; // may be reset and examined. updated by
+ // FindMatch()
+
+
+
+
+// the routines below can be used for uniform quantization via dart-throwing.
+typedef void (*GENERATOR)(void *); // generate a random sample
+typedef double (*COMPARER)(void const *a, void const *b);
+
+void *DartThrow(int NResults, int NTries, size_t itemsize, GENERATOR gen,
+ COMPARER cmp);
+void *FindClosestDart(void *items,int NResults, size_t itemsize,
+ COMPARER cmp, void *lookfor, int *idx);
+
+
+
+
+// color quantization of 24 bit images
+#define QUANTFLAGS_NODITHER 1 // don't do Floyd-steinberg dither
+
+extern void ColorQuantize(
+uint8 const *pImage, // 4 byte pixels ARGB
+int nWidth,
+int nHeight,
+int nFlags, // QUANTFLAGS_xxx
+int nColors, // # of colors to fill in in palette
+uint8 *pOutPixels, // where to store resulting 8 bit pixels
+uint8 *pOutPalette, // where to store resulting 768-byte palette
+int nFirstColor); // first color to use in mapping
+
+
+
+
+
+#endif
diff --git a/mp/src/public/mathlib/simdvectormatrix.h b/mp/src/public/mathlib/simdvectormatrix.h
index ba830787..f88cd328 100644
--- a/mp/src/public/mathlib/simdvectormatrix.h
+++ b/mp/src/public/mathlib/simdvectormatrix.h
@@ -1,142 +1,142 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose: Provide a class (SSE/SIMD only) holding a 2d matrix of class FourVectors,
-// for high speed processing in tools.
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-
-#ifndef SIMDVECTORMATRIX_H
-#define SIMDVECTORMATRIX_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-
-#include <string.h>
-#include "tier0/platform.h"
-#include "tier0/dbg.h"
-#include "tier1/utlsoacontainer.h"
-#include "mathlib/ssemath.h"
-
-class CSIMDVectorMatrix
-{
-public:
- int m_nWidth; // in actual vectors
- int m_nHeight;
-
- int m_nPaddedWidth; // # of 4x wide elements
-
- FourVectors *m_pData;
-
-protected:
- void Init( void )
- {
- m_pData = NULL;
- m_nWidth = 0;
- m_nHeight = 0;
- m_nPaddedWidth = 0;
- }
-
- int NVectors( void ) const
- {
- return m_nHeight * m_nPaddedWidth;
- }
-
-public:
- // constructors and destructors
- CSIMDVectorMatrix( void )
- {
- Init();
- }
-
- ~CSIMDVectorMatrix( void )
- {
- if ( m_pData )
- delete[] m_pData;
- }
-
- // set up storage and fields for m x n matrix. destroys old data
- void SetSize( int width, int height )
- {
- if ( ( ! m_pData ) || ( width != m_nWidth ) || ( height != m_nHeight ) )
- {
- if ( m_pData )
- delete[] m_pData;
-
- m_nWidth = width;
- m_nHeight = height;
-
- m_nPaddedWidth = ( m_nWidth + 3) >> 2;
- m_pData = NULL;
- if ( width && height )
- m_pData = new FourVectors[ m_nPaddedWidth * m_nHeight ];
- }
- }
-
- CSIMDVectorMatrix( int width, int height )
- {
- Init();
- SetSize( width, height );
- }
-
- CSIMDVectorMatrix &operator=( CSIMDVectorMatrix const &src )
- {
- SetSize( src.m_nWidth, src.m_nHeight );
- if ( m_pData )
- memcpy( m_pData, src.m_pData, m_nHeight*m_nPaddedWidth*sizeof(m_pData[0]) );
- return *this;
- }
-
- CSIMDVectorMatrix &operator+=( CSIMDVectorMatrix const &src );
-
- CSIMDVectorMatrix &operator*=( Vector const &src );
-
- // create from an RGBA float bitmap. alpha ignored.
- void CreateFromRGBA_FloatImageData(int srcwidth, int srcheight, float const *srcdata );
-
- // create from 3 fields in a csoa
- void CreateFromCSOAAttributes( CSOAContainer const *pSrc,
- int nAttrIdx0, int nAttrIdx1, int nAttrIdx2 );
-
- // Element access. If you are calling this a lot, you don't want to use this class, because
- // you're not getting the sse advantage
- Vector Element(int x, int y) const
- {
- Assert( m_pData );
- Assert( x < m_nWidth );
- Assert( y < m_nHeight );
- Vector ret;
- FourVectors const *pData=m_pData+y*m_nPaddedWidth+(x >> 2);
-
- int xo=(x & 3);
- ret.x=pData->X( xo );
- ret.y=pData->Y( xo );
- ret.z=pData->Z( xo );
- return ret;
- }
-
- //addressing the individual fourvectors elements
- FourVectors &CompoundElement(int x, int y)
- {
- Assert( m_pData );
- Assert( y < m_nHeight );
- Assert( x < m_nPaddedWidth );
- return m_pData[x + m_nPaddedWidth*y ];
- }
-
- // math operations on the whole image
- void Clear( void )
- {
- Assert( m_pData );
- memset( m_pData, 0, m_nHeight*m_nPaddedWidth*sizeof(m_pData[0]) );
- }
-
- void RaiseToPower( float power );
-};
-
-
-
-#endif
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose: Provide a class (SSE/SIMD only) holding a 2d matrix of class FourVectors,
+// for high speed processing in tools.
+//
+// $NoKeywords: $
+//
+//=============================================================================//
+
+#ifndef SIMDVECTORMATRIX_H
+#define SIMDVECTORMATRIX_H
+
+#ifdef _WIN32
+#pragma once
+#endif
+
+
+#include <string.h>
+#include "tier0/platform.h"
+#include "tier0/dbg.h"
+#include "tier1/utlsoacontainer.h"
+#include "mathlib/ssemath.h"
+
+class CSIMDVectorMatrix
+{
+public:
+ int m_nWidth; // in actual vectors
+ int m_nHeight;
+
+ int m_nPaddedWidth; // # of 4x wide elements
+
+ FourVectors *m_pData;
+
+protected:
+ void Init( void )
+ {
+ m_pData = NULL;
+ m_nWidth = 0;
+ m_nHeight = 0;
+ m_nPaddedWidth = 0;
+ }
+
+ int NVectors( void ) const
+ {
+ return m_nHeight * m_nPaddedWidth;
+ }
+
+public:
+ // constructors and destructors
+ CSIMDVectorMatrix( void )
+ {
+ Init();
+ }
+
+ ~CSIMDVectorMatrix( void )
+ {
+ if ( m_pData )
+ delete[] m_pData;
+ }
+
+ // set up storage and fields for m x n matrix. destroys old data
+ void SetSize( int width, int height )
+ {
+ if ( ( ! m_pData ) || ( width != m_nWidth ) || ( height != m_nHeight ) )
+ {
+ if ( m_pData )
+ delete[] m_pData;
+
+ m_nWidth = width;
+ m_nHeight = height;
+
+ m_nPaddedWidth = ( m_nWidth + 3) >> 2;
+ m_pData = NULL;
+ if ( width && height )
+ m_pData = new FourVectors[ m_nPaddedWidth * m_nHeight ];
+ }
+ }
+
+ CSIMDVectorMatrix( int width, int height )
+ {
+ Init();
+ SetSize( width, height );
+ }
+
+ CSIMDVectorMatrix &operator=( CSIMDVectorMatrix const &src )
+ {
+ SetSize( src.m_nWidth, src.m_nHeight );
+ if ( m_pData )
+ memcpy( m_pData, src.m_pData, m_nHeight*m_nPaddedWidth*sizeof(m_pData[0]) );
+ return *this;
+ }
+
+ CSIMDVectorMatrix &operator+=( CSIMDVectorMatrix const &src );
+
+ CSIMDVectorMatrix &operator*=( Vector const &src );
+
+ // create from an RGBA float bitmap. alpha ignored.
+ void CreateFromRGBA_FloatImageData(int srcwidth, int srcheight, float const *srcdata );
+
+ // create from 3 fields in a csoa
+ void CreateFromCSOAAttributes( CSOAContainer const *pSrc,
+ int nAttrIdx0, int nAttrIdx1, int nAttrIdx2 );
+
+ // Element access. If you are calling this a lot, you don't want to use this class, because
+ // you're not getting the sse advantage
+ Vector Element(int x, int y) const
+ {
+ Assert( m_pData );
+ Assert( x < m_nWidth );
+ Assert( y < m_nHeight );
+ Vector ret;
+ FourVectors const *pData=m_pData+y*m_nPaddedWidth+(x >> 2);
+
+ int xo=(x & 3);
+ ret.x=pData->X( xo );
+ ret.y=pData->Y( xo );
+ ret.z=pData->Z( xo );
+ return ret;
+ }
+
+ //addressing the individual fourvectors elements
+ FourVectors &CompoundElement(int x, int y)
+ {
+ Assert( m_pData );
+ Assert( y < m_nHeight );
+ Assert( x < m_nPaddedWidth );
+ return m_pData[x + m_nPaddedWidth*y ];
+ }
+
+ // math operations on the whole image
+ void Clear( void )
+ {
+ Assert( m_pData );
+ memset( m_pData, 0, m_nHeight*m_nPaddedWidth*sizeof(m_pData[0]) );
+ }
+
+ void RaiseToPower( float power );
+};
+
+
+
+#endif
diff --git a/mp/src/public/mathlib/spherical_geometry.h b/mp/src/public/mathlib/spherical_geometry.h
index a32d96ac..04310f43 100644
--- a/mp/src/public/mathlib/spherical_geometry.h
+++ b/mp/src/public/mathlib/spherical_geometry.h
@@ -1,73 +1,73 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose: Functions for spherical geometry.
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-
-#ifndef SPHERICAL_GEOMETRY_H
-#define SPHERICAL_GEOMETRY_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include <math.h>
-#include <float.h>
-
-// see http://mathworld.wolfram.com/SphericalTrigonometry.html
-
-// return the spherical distance, in radians, between 2 points on the unit sphere.
-FORCEINLINE float UnitSphereLineSegmentLength( Vector const &a, Vector const &b )
-{
- // check unit length
- Assert( fabs( VectorLength( a ) - 1.0 ) < 1.0e-3 );
- Assert( fabs( VectorLength( b ) - 1.0 ) < 1.0e-3 );
- return acos( DotProduct( a, b ) );
-}
-
-
-// given 3 points on the unit sphere, return the spherical area (in radians) of the triangle they form.
-// valid for "small" triangles.
-FORCEINLINE float UnitSphereTriangleArea( Vector const &a, Vector const &b , Vector const &c )
-{
- float flLengthA = UnitSphereLineSegmentLength( b, c );
- float flLengthB = UnitSphereLineSegmentLength( c, a );
- float flLengthC = UnitSphereLineSegmentLength( a, b );
-
- if ( ( flLengthA == 0. ) || ( flLengthB == 0. ) || ( flLengthC == 0. ) )
- return 0.; // zero area triangle
-
- // now, find the 3 incribed angles for the triangle
- float flHalfSumLens = 0.5 * ( flLengthA + flLengthB + flLengthC );
- float flSinSums = sin( flHalfSumLens );
- float flSinSMinusA= sin( flHalfSumLens - flLengthA );
- float flSinSMinusB= sin( flHalfSumLens - flLengthB );
- float flSinSMinusC= sin( flHalfSumLens - flLengthC );
-
- float flTanAOver2 = sqrt ( ( flSinSMinusB * flSinSMinusC ) / ( flSinSums * flSinSMinusA ) );
- float flTanBOver2 = sqrt ( ( flSinSMinusA * flSinSMinusC ) / ( flSinSums * flSinSMinusB ) );
- float flTanCOver2 = sqrt ( ( flSinSMinusA * flSinSMinusB ) / ( flSinSums * flSinSMinusC ) );
-
- // Girards formula : area = sum of angles - pi.
- return 2.0 * ( atan( flTanAOver2 ) + atan( flTanBOver2 ) + atan( flTanCOver2 ) ) - M_PI;
-}
-
-// spherical harmonics-related functions. Best explanation at http://www.research.scea.com/gdc2003/spherical-harmonic-lighting.pdf
-
-// Evaluate associated legendre polynomial P( l, m ) at flX, using recurrence relation
-float AssociatedLegendrePolynomial( int nL, int nM, float flX );
-
-// Evaluate order N spherical harmonic with spherical coordinates
-// nL = band, 0..N
-// nM = -nL .. nL
-// theta = 0..M_PI
-// phi = 0.. 2 * M_PHI
-float SphericalHarmonic( int nL, int nM, float flTheta, float flPhi );
-
-// evaluate spherical harmonic with normalized vector direction
-float SphericalHarmonic( int nL, int nM, Vector const &vecDirection );
-
-
-#endif // SPHERICAL_GEOMETRY_H
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose: Functions for spherical geometry.
+//
+// $NoKeywords: $
+//
+//=============================================================================//
+
+#ifndef SPHERICAL_GEOMETRY_H
+#define SPHERICAL_GEOMETRY_H
+
+#ifdef _WIN32
+#pragma once
+#endif
+
+#include <math.h>
+#include <float.h>
+
+// see http://mathworld.wolfram.com/SphericalTrigonometry.html
+
+// return the spherical distance, in radians, between 2 points on the unit sphere.
+FORCEINLINE float UnitSphereLineSegmentLength( Vector const &a, Vector const &b )
+{
+ // check unit length
+ Assert( fabs( VectorLength( a ) - 1.0 ) < 1.0e-3 );
+ Assert( fabs( VectorLength( b ) - 1.0 ) < 1.0e-3 );
+ return acos( DotProduct( a, b ) );
+}
+
+
+// given 3 points on the unit sphere, return the spherical area (in radians) of the triangle they form.
+// valid for "small" triangles.
+FORCEINLINE float UnitSphereTriangleArea( Vector const &a, Vector const &b , Vector const &c )
+{
+ float flLengthA = UnitSphereLineSegmentLength( b, c );
+ float flLengthB = UnitSphereLineSegmentLength( c, a );
+ float flLengthC = UnitSphereLineSegmentLength( a, b );
+
+ if ( ( flLengthA == 0. ) || ( flLengthB == 0. ) || ( flLengthC == 0. ) )
+ return 0.; // zero area triangle
+
+ // now, find the 3 incribed angles for the triangle
+ float flHalfSumLens = 0.5 * ( flLengthA + flLengthB + flLengthC );
+ float flSinSums = sin( flHalfSumLens );
+ float flSinSMinusA= sin( flHalfSumLens - flLengthA );
+ float flSinSMinusB= sin( flHalfSumLens - flLengthB );
+ float flSinSMinusC= sin( flHalfSumLens - flLengthC );
+
+ float flTanAOver2 = sqrt ( ( flSinSMinusB * flSinSMinusC ) / ( flSinSums * flSinSMinusA ) );
+ float flTanBOver2 = sqrt ( ( flSinSMinusA * flSinSMinusC ) / ( flSinSums * flSinSMinusB ) );
+ float flTanCOver2 = sqrt ( ( flSinSMinusA * flSinSMinusB ) / ( flSinSums * flSinSMinusC ) );
+
+ // Girards formula : area = sum of angles - pi.
+ return 2.0 * ( atan( flTanAOver2 ) + atan( flTanBOver2 ) + atan( flTanCOver2 ) ) - M_PI;
+}
+
+// spherical harmonics-related functions. Best explanation at http://www.research.scea.com/gdc2003/spherical-harmonic-lighting.pdf
+
+// Evaluate associated legendre polynomial P( l, m ) at flX, using recurrence relation
+float AssociatedLegendrePolynomial( int nL, int nM, float flX );
+
+// Evaluate order N spherical harmonic with spherical coordinates
+// nL = band, 0..N
+// nM = -nL .. nL
+// theta = 0..M_PI
+// phi = 0.. 2 * M_PHI
+float SphericalHarmonic( int nL, int nM, float flTheta, float flPhi );
+
+// evaluate spherical harmonic with normalized vector direction
+float SphericalHarmonic( int nL, int nM, Vector const &vecDirection );
+
+
+#endif // SPHERICAL_GEOMETRY_H
diff --git a/mp/src/public/mathlib/ssemath.h b/mp/src/public/mathlib/ssemath.h
index b25fbd09..6691df12 100644
--- a/mp/src/public/mathlib/ssemath.h
+++ b/mp/src/public/mathlib/ssemath.h
@@ -1,3098 +1,3098 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose: - defines SIMD "structure of arrays" classes and functions.
-//
-//===========================================================================//
-#ifndef SSEMATH_H
-#define SSEMATH_H
-
-#if defined( _X360 )
-#include <xboxmath.h>
-#else
-#include <xmmintrin.h>
-#endif
-
-#include <mathlib/vector.h>
-#include <mathlib/mathlib.h>
-
-#if defined(GNUC)
-#define USE_STDC_FOR_SIMD 0
-#else
-#define USE_STDC_FOR_SIMD 0
-#endif
-
-#if (!defined(_X360) && (USE_STDC_FOR_SIMD == 0))
-#define _SSE1 1
-#endif
-
-// I thought about defining a class/union for the SIMD packed floats instead of using fltx4,
-// but decided against it because (a) the nature of SIMD code which includes comparisons is to blur
-// the relationship between packed floats and packed integer types and (b) not sure that the
-// compiler would handle generating good code for the intrinsics.
-
-#if USE_STDC_FOR_SIMD
-
-typedef union
-{
- float m128_f32[4];
- uint32 m128_u32[4];
-} fltx4;
-
-typedef fltx4 i32x4;
-typedef fltx4 u32x4;
-
-#elif ( defined( _X360 ) )
-
-typedef union
-{
- // This union allows float/int access (which generally shouldn't be done in inner loops)
- __vector4 vmx;
- float m128_f32[4];
- uint32 m128_u32[4];
-} fltx4_union;
-
-typedef __vector4 fltx4;
-typedef __vector4 i32x4; // a VMX register; just a way of making it explicit that we're doing integer ops.
-typedef __vector4 u32x4; // a VMX register; just a way of making it explicit that we're doing unsigned integer ops.
-
-#else
-
-typedef __m128 fltx4;
-typedef __m128 i32x4;
-typedef __m128 u32x4;
-
-#endif
-
-// The FLTX4 type is a fltx4 used as a parameter to a function.
-// On the 360, the best way to do this is pass-by-copy on the registers.
-// On the PC, the best way is to pass by const reference.
-// The compiler will sometimes, but not always, replace a pass-by-const-ref
-// with a pass-in-reg on the 360; to avoid this confusion, you can
-// explicitly use a FLTX4 as the parameter type.
-#ifdef _X360
-typedef __vector4 FLTX4;
-#else
-typedef const fltx4 & FLTX4;
-#endif
-
-// A 16-byte aligned int32 datastructure
-// (for use when writing out fltx4's as SIGNED
-// ints).
-struct ALIGN16 intx4
-{
- int32 m_i32[4];
-
- inline int & operator[](int which)
- {
- return m_i32[which];
- }
-
- inline const int & operator[](int which) const
- {
- return m_i32[which];
- }
-
- inline int32 *Base() {
- return m_i32;
- }
-
- inline const int32 *Base() const
- {
- return m_i32;
- }
-
- inline const bool operator==(const intx4 &other) const
- {
- return m_i32[0] == other.m_i32[0] &&
- m_i32[1] == other.m_i32[1] &&
- m_i32[2] == other.m_i32[2] &&
- m_i32[3] == other.m_i32[3] ;
- }
-} ALIGN16_POST;
-
-
-#if defined( _DEBUG ) && defined( _X360 )
-FORCEINLINE void TestVPUFlags()
-{
- // Check that the VPU is in the appropriate (Java-compliant) mode (see 3.2.1 in altivec_pem.pdf on xds.xbox.com)
- __vector4 a;
- __asm
- {
- mfvscr a;
- }
- unsigned int * flags = (unsigned int *)&a;
- unsigned int controlWord = flags[3];
- Assert(controlWord == 0);
-}
-#else // _DEBUG
-FORCEINLINE void TestVPUFlags() {}
-#endif // _DEBUG
-
-
-// useful constants in SIMD packed float format:
-// (note: some of these aren't stored on the 360,
-// but are manufactured directly in one or two
-// instructions, saving a load and possible L2
-// miss.)
-#ifndef _X360
-extern const fltx4 Four_Zeros; // 0 0 0 0
-extern const fltx4 Four_Ones; // 1 1 1 1
-extern const fltx4 Four_Twos; // 2 2 2 2
-extern const fltx4 Four_Threes; // 3 3 3 3
-extern const fltx4 Four_Fours; // guess.
-extern const fltx4 Four_Point225s; // .225 .225 .225 .225
-extern const fltx4 Four_PointFives; // .5 .5 .5 .5
-extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON
-extern const fltx4 Four_2ToThe21s; // (1<<21)..
-extern const fltx4 Four_2ToThe22s; // (1<<22)..
-extern const fltx4 Four_2ToThe23s; // (1<<23)..
-extern const fltx4 Four_2ToThe24s; // (1<<24)..
-extern const fltx4 Four_Origin; // 0 0 0 1 (origin point, like vr0 on the PS2)
-extern const fltx4 Four_NegativeOnes; // -1 -1 -1 -1
-#else
-#define Four_Zeros XMVectorZero() // 0 0 0 0
-#define Four_Ones XMVectorSplatOne() // 1 1 1 1
-extern const fltx4 Four_Twos; // 2 2 2 2
-extern const fltx4 Four_Threes; // 3 3 3 3
-extern const fltx4 Four_Fours; // guess.
-extern const fltx4 Four_Point225s; // .225 .225 .225 .225
-extern const fltx4 Four_PointFives; // .5 .5 .5 .5
-extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON
-extern const fltx4 Four_2ToThe21s; // (1<<21)..
-extern const fltx4 Four_2ToThe22s; // (1<<22)..
-extern const fltx4 Four_2ToThe23s; // (1<<23)..
-extern const fltx4 Four_2ToThe24s; // (1<<24)..
-extern const fltx4 Four_Origin; // 0 0 0 1 (origin point, like vr0 on the PS2)
-extern const fltx4 Four_NegativeOnes; // -1 -1 -1 -1
-#endif
-extern const fltx4 Four_FLT_MAX; // FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX
-extern const fltx4 Four_Negative_FLT_MAX; // -FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX
-extern const fltx4 g_SIMD_0123; // 0 1 2 3 as float
-
-// external aligned integer constants
-extern const ALIGN16 int32 g_SIMD_clear_signmask[] ALIGN16_POST; // 0x7fffffff x 4
-extern const ALIGN16 int32 g_SIMD_signmask[] ALIGN16_POST; // 0x80000000 x 4
-extern const ALIGN16 int32 g_SIMD_lsbmask[] ALIGN16_POST; // 0xfffffffe x 4
-extern const ALIGN16 int32 g_SIMD_clear_wmask[] ALIGN16_POST; // -1 -1 -1 0
-extern const ALIGN16 int32 g_SIMD_ComponentMask[4][4] ALIGN16_POST; // [0xFFFFFFFF 0 0 0], [0 0xFFFFFFFF 0 0], [0 0 0xFFFFFFFF 0], [0 0 0 0xFFFFFFFF]
-extern const ALIGN16 int32 g_SIMD_AllOnesMask[] ALIGN16_POST; // ~0,~0,~0,~0
-extern const ALIGN16 int32 g_SIMD_Low16BitsMask[] ALIGN16_POST; // 0xffff x 4
-
-// this mask is used for skipping the tail of things. If you have N elements in an array, and wish
-// to mask out the tail, g_SIMD_SkipTailMask[N & 3] what you want to use for the last iteration.
-extern const int32 ALIGN16 g_SIMD_SkipTailMask[4][4] ALIGN16_POST;
-
-// Define prefetch macros.
-// The characteristics of cache and prefetch are completely
-// different between the different platforms, so you DO NOT
-// want to just define one macro that maps to every platform
-// intrinsic under the hood -- you need to prefetch at different
-// intervals between x86 and PPC, for example, and that is
-// a higher level code change.
-// On the other hand, I'm tired of typing #ifdef _X360
-// all over the place, so this is just a nop on Intel, PS3.
-#ifdef _X360
-#define PREFETCH360(address, offset) __dcbt(offset,address)
-#else
-#define PREFETCH360(x,y) // nothing
-#endif
-
-#if USE_STDC_FOR_SIMD
-
-//---------------------------------------------------------------------
-// Standard C (fallback/Linux) implementation (only there for compat - slow)
-//---------------------------------------------------------------------
-
-FORCEINLINE float SubFloat( const fltx4 & a, int idx )
-{
- return a.m128_f32[ idx ];
-}
-
-FORCEINLINE float & SubFloat( fltx4 & a, int idx )
-{
- return a.m128_f32[idx];
-}
-
-FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )
-{
- return a.m128_u32[idx];
-}
-
-FORCEINLINE uint32 & SubInt( fltx4 & a, int idx )
-{
- return a.m128_u32[idx];
-}
-
-// Return one in the fastest way -- on the x360, faster even than loading.
-FORCEINLINE fltx4 LoadZeroSIMD( void )
-{
- return Four_Zeros;
-}
-
-// Return one in the fastest way -- on the x360, faster even than loading.
-FORCEINLINE fltx4 LoadOneSIMD( void )
-{
- return Four_Ones;
-}
-
-FORCEINLINE fltx4 SplatXSIMD( const fltx4 & a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = SubFloat( a, 0 );
- SubFloat( retVal, 1 ) = SubFloat( a, 0 );
- SubFloat( retVal, 2 ) = SubFloat( a, 0 );
- SubFloat( retVal, 3 ) = SubFloat( a, 0 );
- return retVal;
-}
-
-FORCEINLINE fltx4 SplatYSIMD( fltx4 a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = SubFloat( a, 1 );
- SubFloat( retVal, 1 ) = SubFloat( a, 1 );
- SubFloat( retVal, 2 ) = SubFloat( a, 1 );
- SubFloat( retVal, 3 ) = SubFloat( a, 1 );
- return retVal;
-}
-
-FORCEINLINE fltx4 SplatZSIMD( fltx4 a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = SubFloat( a, 2 );
- SubFloat( retVal, 1 ) = SubFloat( a, 2 );
- SubFloat( retVal, 2 ) = SubFloat( a, 2 );
- SubFloat( retVal, 3 ) = SubFloat( a, 2 );
- return retVal;
-}
-
-FORCEINLINE fltx4 SplatWSIMD( fltx4 a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = SubFloat( a, 3 );
- SubFloat( retVal, 1 ) = SubFloat( a, 3 );
- SubFloat( retVal, 2 ) = SubFloat( a, 3 );
- SubFloat( retVal, 3 ) = SubFloat( a, 3 );
- return retVal;
-}
-
-FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x )
-{
- fltx4 result = a;
- SubFloat( result, 0 ) = SubFloat( x, 0 );
- return result;
-}
-
-FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y )
-{
- fltx4 result = a;
- SubFloat( result, 1 ) = SubFloat( y, 1 );
- return result;
-}
-
-FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z )
-{
- fltx4 result = a;
- SubFloat( result, 2 ) = SubFloat( z, 2 );
- return result;
-}
-
-FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w )
-{
- fltx4 result = a;
- SubFloat( result, 3 ) = SubFloat( w, 3 );
- return result;
-}
-
-FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue )
-{
- fltx4 result = a;
- SubFloat( result, nComponent ) = flValue;
- return result;
-}
-
-// a b c d -> b c d a
-FORCEINLINE fltx4 RotateLeft( const fltx4 & a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = SubFloat( a, 1 );
- SubFloat( retVal, 1 ) = SubFloat( a, 2 );
- SubFloat( retVal, 2 ) = SubFloat( a, 3 );
- SubFloat( retVal, 3 ) = SubFloat( a, 0 );
- return retVal;
-}
-
-// a b c d -> c d a b
-FORCEINLINE fltx4 RotateLeft2( const fltx4 & a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = SubFloat( a, 2 );
- SubFloat( retVal, 1 ) = SubFloat( a, 3 );
- SubFloat( retVal, 2 ) = SubFloat( a, 0 );
- SubFloat( retVal, 3 ) = SubFloat( a, 1 );
- return retVal;
-}
-
-#define BINOP(op) \
- fltx4 retVal; \
- SubFloat( retVal, 0 ) = ( SubFloat( a, 0 ) op SubFloat( b, 0 ) ); \
- SubFloat( retVal, 1 ) = ( SubFloat( a, 1 ) op SubFloat( b, 1 ) ); \
- SubFloat( retVal, 2 ) = ( SubFloat( a, 2 ) op SubFloat( b, 2 ) ); \
- SubFloat( retVal, 3 ) = ( SubFloat( a, 3 ) op SubFloat( b, 3 ) ); \
- return retVal;
-
-#define IBINOP(op) \
- fltx4 retVal; \
- SubInt( retVal, 0 ) = ( SubInt( a, 0 ) op SubInt ( b, 0 ) ); \
- SubInt( retVal, 1 ) = ( SubInt( a, 1 ) op SubInt ( b, 1 ) ); \
- SubInt( retVal, 2 ) = ( SubInt( a, 2 ) op SubInt ( b, 2 ) ); \
- SubInt( retVal, 3 ) = ( SubInt( a, 3 ) op SubInt ( b, 3 ) ); \
- return retVal;
-
-FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b )
-{
- BINOP(+);
-}
-
-FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b
-{
- BINOP(-);
-};
-
-FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b
-{
- BINOP(*);
-}
-
-FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b
-{
- BINOP(/);
-}
-
-
-FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c
-{
- return AddSIMD( MulSIMD(a,b), c );
-}
-
-FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b
-{
- return SubSIMD( c, MulSIMD(a,b) );
-};
-
-
-FORCEINLINE fltx4 SinSIMD( const fltx4 &radians )
-{
- fltx4 result;
- SubFloat( result, 0 ) = sin( SubFloat( radians, 0 ) );
- SubFloat( result, 1 ) = sin( SubFloat( radians, 1 ) );
- SubFloat( result, 2 ) = sin( SubFloat( radians, 2 ) );
- SubFloat( result, 3 ) = sin( SubFloat( radians, 3 ) );
- return result;
-}
-
-FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians )
-{
- SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) );
- SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) );
- SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) );
-}
-
-FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians )
-{
- SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) );
- SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) );
- SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) );
- SinCos( SubFloat( radians, 3 ), &SubFloat( sine, 3 ), &SubFloat( cosine, 3 ) );
-}
-
-FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine )
-{
- fltx4 result;
- SubFloat( result, 0 ) = asin( SubFloat( sine, 0 ) );
- SubFloat( result, 1 ) = asin( SubFloat( sine, 1 ) );
- SubFloat( result, 2 ) = asin( SubFloat( sine, 2 ) );
- SubFloat( result, 3 ) = asin( SubFloat( sine, 3 ) );
- return result;
-}
-
-FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs )
-{
- fltx4 result;
- SubFloat( result, 0 ) = acos( SubFloat( cs, 0 ) );
- SubFloat( result, 1 ) = acos( SubFloat( cs, 1 ) );
- SubFloat( result, 2 ) = acos( SubFloat( cs, 2 ) );
- SubFloat( result, 3 ) = acos( SubFloat( cs, 3 ) );
- return result;
-}
-
-// tan^1(a/b) .. ie, pass sin in as a and cos in as b
-FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b )
-{
- fltx4 result;
- SubFloat( result, 0 ) = atan2( SubFloat( a, 0 ), SubFloat( b, 0 ) );
- SubFloat( result, 1 ) = atan2( SubFloat( a, 1 ), SubFloat( b, 1 ) );
- SubFloat( result, 2 ) = atan2( SubFloat( a, 2 ), SubFloat( b, 2 ) );
- SubFloat( result, 3 ) = atan2( SubFloat( a, 3 ), SubFloat( b, 3 ) );
- return result;
-}
-
-FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b)
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = max( SubFloat( a, 0 ), SubFloat( b, 0 ) );
- SubFloat( retVal, 1 ) = max( SubFloat( a, 1 ), SubFloat( b, 1 ) );
- SubFloat( retVal, 2 ) = max( SubFloat( a, 2 ), SubFloat( b, 2 ) );
- SubFloat( retVal, 3 ) = max( SubFloat( a, 3 ), SubFloat( b, 3 ) );
- return retVal;
-}
-
-FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b)
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = min( SubFloat( a, 0 ), SubFloat( b, 0 ) );
- SubFloat( retVal, 1 ) = min( SubFloat( a, 1 ), SubFloat( b, 1 ) );
- SubFloat( retVal, 2 ) = min( SubFloat( a, 2 ), SubFloat( b, 2 ) );
- SubFloat( retVal, 3 ) = min( SubFloat( a, 3 ), SubFloat( b, 3 ) );
- return retVal;
-}
-
-FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b
-{
- IBINOP(&);
-}
-
-FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b
-{
- fltx4 retVal;
- SubInt( retVal, 0 ) = ~SubInt( a, 0 ) & SubInt( b, 0 );
- SubInt( retVal, 1 ) = ~SubInt( a, 1 ) & SubInt( b, 1 );
- SubInt( retVal, 2 ) = ~SubInt( a, 2 ) & SubInt( b, 2 );
- SubInt( retVal, 3 ) = ~SubInt( a, 3 ) & SubInt( b, 3 );
- return retVal;
-}
-
-FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b
-{
- IBINOP(^);
-}
-
-FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b
-{
- IBINOP(|);
-}
-
-FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a
-{
- fltx4 retval;
- SubFloat( retval, 0 ) = -SubFloat( a, 0 );
- SubFloat( retval, 1 ) = -SubFloat( a, 1 );
- SubFloat( retval, 2 ) = -SubFloat( a, 2 );
- SubFloat( retval, 3 ) = -SubFloat( a, 3 );
-
- return retval;
-}
-
-FORCEINLINE bool IsAllZeros( const fltx4 & a ) // all floats of a zero?
-{
- return ( SubFloat( a, 0 ) == 0.0 ) &&
- ( SubFloat( a, 1 ) == 0.0 ) &&
- ( SubFloat( a, 2 ) == 0.0 ) &&
- ( SubFloat( a, 3 ) == 0.0 ) ;
-}
-
-
-// for branching when a.xyzw > b.xyzw
-FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b )
-{
- return SubFloat(a,0) > SubFloat(b,0) &&
- SubFloat(a,1) > SubFloat(b,1) &&
- SubFloat(a,2) > SubFloat(b,2) &&
- SubFloat(a,3) > SubFloat(b,3);
-}
-
-// for branching when a.xyzw >= b.xyzw
-FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b )
-{
- return SubFloat(a,0) >= SubFloat(b,0) &&
- SubFloat(a,1) >= SubFloat(b,1) &&
- SubFloat(a,2) >= SubFloat(b,2) &&
- SubFloat(a,3) >= SubFloat(b,3);
-}
-
-// For branching if all a.xyzw == b.xyzw
-FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b )
-{
- return SubFloat(a,0) == SubFloat(b,0) &&
- SubFloat(a,1) == SubFloat(b,1) &&
- SubFloat(a,2) == SubFloat(b,2) &&
- SubFloat(a,3) == SubFloat(b,3);
-}
-
-FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set
-{
- int nRet = 0;
-
- nRet |= ( SubInt( a, 0 ) & 0x80000000 ) >> 31; // sign(x) -> bit 0
- nRet |= ( SubInt( a, 1 ) & 0x80000000 ) >> 30; // sign(y) -> bit 1
- nRet |= ( SubInt( a, 2 ) & 0x80000000 ) >> 29; // sign(z) -> bit 2
- nRet |= ( SubInt( a, 3 ) & 0x80000000 ) >> 28; // sign(w) -> bit 3
-
- return nRet;
-}
-
-FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0)
-{
- return (0 != TestSignSIMD( a ));
-}
-
-FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0
-{
- fltx4 retVal;
- SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) == SubFloat( b, 0 )) ? ~0 : 0;
- SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) == SubFloat( b, 1 )) ? ~0 : 0;
- SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) == SubFloat( b, 2 )) ? ~0 : 0;
- SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) == SubFloat( b, 3 )) ? ~0 : 0;
- return retVal;
-}
-
-FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0
-{
- fltx4 retVal;
- SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) > SubFloat( b, 0 )) ? ~0 : 0;
- SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) > SubFloat( b, 1 )) ? ~0 : 0;
- SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) > SubFloat( b, 2 )) ? ~0 : 0;
- SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) > SubFloat( b, 3 )) ? ~0 : 0;
- return retVal;
-}
-
-FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0
-{
- fltx4 retVal;
- SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) >= SubFloat( b, 0 )) ? ~0 : 0;
- SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) >= SubFloat( b, 1 )) ? ~0 : 0;
- SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) >= SubFloat( b, 2 )) ? ~0 : 0;
- SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) >= SubFloat( b, 3 )) ? ~0 : 0;
- return retVal;
-}
-
-FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a<b) ? ~0:0
-{
- fltx4 retVal;
- SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) < SubFloat( b, 0 )) ? ~0 : 0;
- SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) < SubFloat( b, 1 )) ? ~0 : 0;
- SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) < SubFloat( b, 2 )) ? ~0 : 0;
- SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) < SubFloat( b, 3 )) ? ~0 : 0;
- return retVal;
-}
-
-FORCEINLINE fltx4 CmpLeSIMD( const fltx4 & a, const fltx4 & b ) // (a<=b) ? ~0:0
-{
- fltx4 retVal;
- SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) <= SubFloat( b, 0 )) ? ~0 : 0;
- SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) <= SubFloat( b, 1 )) ? ~0 : 0;
- SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) <= SubFloat( b, 2 )) ? ~0 : 0;
- SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) <= SubFloat( b, 3 )) ? ~0 : 0;
- return retVal;
-}
-
-FORCEINLINE fltx4 CmpInBoundsSIMD( const fltx4 & a, const fltx4 & b ) // (a <= b && a >= -b) ? ~0 : 0
-{
- fltx4 retVal;
- SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) <= SubFloat( b, 0 ) && SubFloat( a, 0 ) >= -SubFloat( b, 0 ) ) ? ~0 : 0;
- SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) <= SubFloat( b, 1 ) && SubFloat( a, 1 ) >= -SubFloat( b, 1 ) ) ? ~0 : 0;
- SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) <= SubFloat( b, 2 ) && SubFloat( a, 2 ) >= -SubFloat( b, 2 ) ) ? ~0 : 0;
- SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) <= SubFloat( b, 3 ) && SubFloat( a, 3 ) >= -SubFloat( b, 3 ) ) ? ~0 : 0;
- return retVal;
-}
-
-
-FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue )
-{
- return OrSIMD(
- AndSIMD( ReplacementMask, NewValue ),
- AndNotSIMD( ReplacementMask, OldValue ) );
-}
-
-FORCEINLINE fltx4 ReplicateX4( float flValue ) // a,a,a,a
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = flValue;
- SubFloat( retVal, 1 ) = flValue;
- SubFloat( retVal, 2 ) = flValue;
- SubFloat( retVal, 3 ) = flValue;
- return retVal;
-}
-
-/// replicate a single 32 bit integer value to all 4 components of an m128
-FORCEINLINE fltx4 ReplicateIX4( int nValue )
-{
- fltx4 retVal;
- SubInt( retVal, 0 ) = nValue;
- SubInt( retVal, 1 ) = nValue;
- SubInt( retVal, 2 ) = nValue;
- SubInt( retVal, 3 ) = nValue;
- return retVal;
-
-}
-
-// Round towards positive infinity
-FORCEINLINE fltx4 CeilSIMD( const fltx4 &a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = ceil( SubFloat( a, 0 ) );
- SubFloat( retVal, 1 ) = ceil( SubFloat( a, 1 ) );
- SubFloat( retVal, 2 ) = ceil( SubFloat( a, 2 ) );
- SubFloat( retVal, 3 ) = ceil( SubFloat( a, 3 ) );
- return retVal;
-
-}
-
-// Round towards negative infinity
-FORCEINLINE fltx4 FloorSIMD( const fltx4 &a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = floor( SubFloat( a, 0 ) );
- SubFloat( retVal, 1 ) = floor( SubFloat( a, 1 ) );
- SubFloat( retVal, 2 ) = floor( SubFloat( a, 2 ) );
- SubFloat( retVal, 3 ) = floor( SubFloat( a, 3 ) );
- return retVal;
-
-}
-
-FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = sqrt( SubFloat( a, 0 ) );
- SubFloat( retVal, 1 ) = sqrt( SubFloat( a, 1 ) );
- SubFloat( retVal, 2 ) = sqrt( SubFloat( a, 2 ) );
- SubFloat( retVal, 3 ) = sqrt( SubFloat( a, 3 ) );
- return retVal;
-}
-
-FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a)
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = sqrt( SubFloat( a, 0 ) );
- SubFloat( retVal, 1 ) = sqrt( SubFloat( a, 1 ) );
- SubFloat( retVal, 2 ) = sqrt( SubFloat( a, 2 ) );
- SubFloat( retVal, 3 ) = sqrt( SubFloat( a, 3 ) );
- return retVal;
-}
-
-FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) );
- SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) );
- SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) );
- SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) );
- return retVal;
-}
-
-FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) != 0.0f ? SubFloat( a, 0 ) : FLT_EPSILON );
- SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) != 0.0f ? SubFloat( a, 1 ) : FLT_EPSILON );
- SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) != 0.0f ? SubFloat( a, 2 ) : FLT_EPSILON );
- SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) != 0.0f ? SubFloat( a, 3 ) : FLT_EPSILON );
- return retVal;
-}
-
-FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a)
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) );
- SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) );
- SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) );
- SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) );
- return retVal;
-}
-
-FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = 1.0 / SubFloat( a, 0 );
- SubFloat( retVal, 1 ) = 1.0 / SubFloat( a, 1 );
- SubFloat( retVal, 2 ) = 1.0 / SubFloat( a, 2 );
- SubFloat( retVal, 3 ) = 1.0 / SubFloat( a, 3 );
- return retVal;
-}
-
-FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = 1.0 / SubFloat( a, 0 );
- SubFloat( retVal, 1 ) = 1.0 / SubFloat( a, 1 );
- SubFloat( retVal, 2 ) = 1.0 / SubFloat( a, 2 );
- SubFloat( retVal, 3 ) = 1.0 / SubFloat( a, 3 );
- return retVal;
-}
-
-/// 1/x for all 4 values.
-/// 1/0 will result in a big but NOT infinite result
-FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = 1.0 / (SubFloat( a, 0 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 0 ));
- SubFloat( retVal, 1 ) = 1.0 / (SubFloat( a, 1 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 1 ));
- SubFloat( retVal, 2 ) = 1.0 / (SubFloat( a, 2 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 2 ));
- SubFloat( retVal, 3 ) = 1.0 / (SubFloat( a, 3 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 3 ));
- return retVal;
-}
-
-FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = 1.0 / (SubFloat( a, 0 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 0 ));
- SubFloat( retVal, 1 ) = 1.0 / (SubFloat( a, 1 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 1 ));
- SubFloat( retVal, 2 ) = 1.0 / (SubFloat( a, 2 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 2 ));
- SubFloat( retVal, 3 ) = 1.0 / (SubFloat( a, 3 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 3 ));
- return retVal;
-}
-
-// 2^x for all values (the antilog)
-FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = powf( 2, SubFloat(toPower, 0) );
- SubFloat( retVal, 1 ) = powf( 2, SubFloat(toPower, 1) );
- SubFloat( retVal, 2 ) = powf( 2, SubFloat(toPower, 2) );
- SubFloat( retVal, 3 ) = powf( 2, SubFloat(toPower, 3) );
-
- return retVal;
-}
-
-FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b )
-{
- float flDot = SubFloat( a, 0 ) * SubFloat( b, 0 ) +
- SubFloat( a, 1 ) * SubFloat( b, 1 ) +
- SubFloat( a, 2 ) * SubFloat( b, 2 );
- return ReplicateX4( flDot );
-}
-
-FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b )
-{
- float flDot = SubFloat( a, 0 ) * SubFloat( b, 0 ) +
- SubFloat( a, 1 ) * SubFloat( b, 1 ) +
- SubFloat( a, 2 ) * SubFloat( b, 2 ) +
- SubFloat( a, 3 ) * SubFloat( b, 3 );
- return ReplicateX4( flDot );
-}
-
-// Clamps the components of a vector to a specified minimum and maximum range.
-FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max)
-{
- return MaxSIMD( min, MinSIMD( max, in ) );
-}
-
-// Squelch the w component of a vector to +0.0.
-// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy)
-FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a )
-{
- fltx4 retval;
- retval = a;
- SubFloat( retval, 0 ) = 0;
- return retval;
-}
-
-FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD )
-{
- return *( reinterpret_cast< const fltx4 *> ( pSIMD ) );
-}
-
-FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD )
-{
- return *( reinterpret_cast< const fltx4 *> ( pSIMD ) );
-}
-
-FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD )
-{
- return *( reinterpret_cast< const fltx4 *> ( pSIMD ) );
-}
-
-// for the transitional class -- load a 3-by VectorAligned and squash its w component
-FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD )
-{
- fltx4 retval = LoadAlignedSIMD(pSIMD.Base());
- // squelch w
- SubInt( retval, 3 ) = 0;
- return retval;
-}
-
-FORCEINLINE void StoreAlignedSIMD( float *pSIMD, const fltx4 & a )
-{
- *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a;
-}
-
-FORCEINLINE void StoreUnalignedSIMD( float *pSIMD, const fltx4 & a )
-{
- *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a;
-}
-
-FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a )
-{
- *pSIMD = SubFloat(a, 0);
- *(pSIMD+1) = SubFloat(a, 1);
- *(pSIMD+2) = SubFloat(a, 2);
-}
-
-// strongly typed -- syntactic castor oil used for typechecking as we transition to SIMD
-FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a )
-{
- StoreAlignedSIMD(pSIMD->Base(),a);
-}
-
-FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w )
-{
-#define SWAP_FLOATS( _a_, _ia_, _b_, _ib_ ) { float tmp = SubFloat( _a_, _ia_ ); SubFloat( _a_, _ia_ ) = SubFloat( _b_, _ib_ ); SubFloat( _b_, _ib_ ) = tmp; }
- SWAP_FLOATS( x, 1, y, 0 );
- SWAP_FLOATS( x, 2, z, 0 );
- SWAP_FLOATS( x, 3, w, 0 );
- SWAP_FLOATS( y, 2, z, 1 );
- SWAP_FLOATS( y, 3, w, 1 );
- SWAP_FLOATS( z, 3, w, 2 );
-}
-
-// find the lowest component of a.x, a.y, a.z,
-// and replicate it to the whole return value.
-FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a )
-{
- float lowest = min( min( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2));
- return ReplicateX4(lowest);
-}
-
-// find the highest component of a.x, a.y, a.z,
-// and replicate it to the whole return value.
-FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 & a )
-{
- float highest = max( max( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2));
- return ReplicateX4(highest);
-}
-
-// Fixed-point conversion and save as SIGNED INTS.
-// pDest->x = Int (vSrc.x)
-// note: some architectures have means of doing
-// fixed point conversion when the fix depth is
-// specified as an immediate.. but there is no way
-// to guarantee an immediate as a parameter to function
-// like this.
-FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc)
-{
- (*pDest)[0] = SubFloat(vSrc, 0);
- (*pDest)[1] = SubFloat(vSrc, 1);
- (*pDest)[2] = SubFloat(vSrc, 2);
- (*pDest)[3] = SubFloat(vSrc, 3);
-}
-
-// ------------------------------------
-// INTEGER SIMD OPERATIONS.
-// ------------------------------------
-// splat all components of a vector to a signed immediate int number.
-FORCEINLINE fltx4 IntSetImmediateSIMD( int nValue )
-{
- fltx4 retval;
- SubInt( retval, 0 ) = SubInt( retval, 1 ) = SubInt( retval, 2 ) = SubInt( retval, 3) = nValue;
- return retval;
-}
-
-// Load 4 aligned words into a SIMD register
-FORCEINLINE i32x4 LoadAlignedIntSIMD(const void * RESTRICT pSIMD)
-{
- return *( reinterpret_cast< const i32x4 *> ( pSIMD ) );
-}
-
-// Load 4 unaligned words into a SIMD register
-FORCEINLINE i32x4 LoadUnalignedIntSIMD( const void * RESTRICT pSIMD)
-{
- return *( reinterpret_cast< const i32x4 *> ( pSIMD ) );
-}
-
-// save into four words, 16-byte aligned
-FORCEINLINE void StoreAlignedIntSIMD( int32 *pSIMD, const fltx4 & a )
-{
- *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a;
-}
-
-FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a )
-{
- *( reinterpret_cast< i32x4 *> ( pSIMD.Base() ) ) = a;
-}
-
-FORCEINLINE void StoreUnalignedIntSIMD( int32 *pSIMD, const fltx4 & a )
-{
- *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a;
-}
-
-// Take a fltx4 containing fixed-point uints and
-// return them as single precision floats. No
-// fixed point conversion is done.
-FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const u32x4 &vSrcA )
-{
- Assert(0); /* pc has no such operation */
- fltx4 retval;
- SubFloat( retval, 0 ) = ( (float) SubInt( retval, 0 ) );
- SubFloat( retval, 1 ) = ( (float) SubInt( retval, 1 ) );
- SubFloat( retval, 2 ) = ( (float) SubInt( retval, 2 ) );
- SubFloat( retval, 3 ) = ( (float) SubInt( retval, 3 ) );
- return retval;
-}
-
-
-#if 0 /* pc has no such op */
-// Take a fltx4 containing fixed-point sints and
-// return them as single precision floats. No
-// fixed point conversion is done.
-FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA )
-{
- fltx4 retval;
- SubFloat( retval, 0 ) = ( (float) (reinterpret_cast<int32 *>(&vSrcA.m128_s32[0])) );
- SubFloat( retval, 1 ) = ( (float) (reinterpret_cast<int32 *>(&vSrcA.m128_s32[1])) );
- SubFloat( retval, 2 ) = ( (float) (reinterpret_cast<int32 *>(&vSrcA.m128_s32[2])) );
- SubFloat( retval, 3 ) = ( (float) (reinterpret_cast<int32 *>(&vSrcA.m128_s32[3])) );
- return retval;
-}
-
-
-/*
- works on fltx4's as if they are four uints.
- the first parameter contains the words to be shifted,
- the second contains the amount to shift by AS INTS
-
- for i = 0 to 3
- shift = vSrcB_i*32:(i*32)+4
- vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift
-*/
-FORCEINLINE i32x4 IntShiftLeftWordSIMD(const i32x4 &vSrcA, const i32x4 &vSrcB)
-{
- i32x4 retval;
- SubInt(retval, 0) = SubInt(vSrcA, 0) << SubInt(vSrcB, 0);
- SubInt(retval, 1) = SubInt(vSrcA, 1) << SubInt(vSrcB, 1);
- SubInt(retval, 2) = SubInt(vSrcA, 2) << SubInt(vSrcB, 2);
- SubInt(retval, 3) = SubInt(vSrcA, 3) << SubInt(vSrcB, 3);
-
-
- return retval;
-}
-#endif
-
-#elif ( defined( _X360 ) )
-
-//---------------------------------------------------------------------
-// X360 implementation
-//---------------------------------------------------------------------
-
-FORCEINLINE float & FloatSIMD( fltx4 & a, int idx )
-{
- fltx4_union & a_union = (fltx4_union &)a;
- return a_union.m128_f32[idx];
-}
-
-FORCEINLINE unsigned int & UIntSIMD( fltx4 & a, int idx )
-{
- fltx4_union & a_union = (fltx4_union &)a;
- return a_union.m128_u32[idx];
-}
-
-FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b )
-{
- return __vaddfp( a, b );
-}
-
-FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b
-{
- return __vsubfp( a, b );
-}
-
-FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b
-{
- return __vmulfp( a, b );
-}
-
-FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c
-{
- return __vmaddfp( a, b, c );
-}
-
-FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b
-{
- return __vnmsubfp( a, b, c );
-};
-
-FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b )
-{
- return __vmsum3fp( a, b );
-}
-
-FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b )
-{
- return __vmsum4fp( a, b );
-}
-
-FORCEINLINE fltx4 SinSIMD( const fltx4 &radians )
-{
- return XMVectorSin( radians );
-}
-
-FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians )
-{
- XMVectorSinCos( &sine, &cosine, radians );
-}
-
-FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians )
-{
- XMVectorSinCos( &sine, &cosine, radians );
-}
-
-FORCEINLINE void CosSIMD( fltx4 &cosine, const fltx4 &radians )
-{
- cosine = XMVectorCos( radians );
-}
-
-FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine )
-{
- return XMVectorASin( sine );
-}
-
-FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs )
-{
- return XMVectorACos( cs );
-}
-
-// tan^1(a/b) .. ie, pass sin in as a and cos in as b
-FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b )
-{
- return XMVectorATan2( a, b );
-}
-
-// DivSIMD defined further down, since it uses ReciprocalSIMD
-
-FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b)
-{
- return __vmaxfp( a, b );
-}
-
-FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b)
-{
- return __vminfp( a, b );
-}
-
-FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b
-{
- return __vand( a, b );
-}
-
-FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b
-{
- // NOTE: a and b are swapped in the call: SSE complements the first argument, VMX the second
- return __vandc( b, a );
-}
-
-FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b
-{
- return __vxor( a, b );
-}
-
-FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b
-{
- return __vor( a, b );
-}
-
-FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a
-{
- return XMVectorNegate(a);
-}
-
-FORCEINLINE bool IsAllZeros( const fltx4 & a ) // all floats of a zero?
-{
- unsigned int equalFlags = 0;
- __vcmpeqfpR( a, Four_Zeros, &equalFlags );
- return XMComparisonAllTrue( equalFlags );
-}
-
-FORCEINLINE bool IsAnyZeros( const fltx4 & a ) // any floats are zero?
-{
- unsigned int conditionregister;
- XMVectorEqualR(&conditionregister, a, XMVectorZero());
- return XMComparisonAnyTrue(conditionregister);
-}
-
-FORCEINLINE bool IsAnyXYZZero( const fltx4 &a ) // are any of x,y,z zero?
-{
- // copy a's x component into w, in case w was zero.
- fltx4 temp = __vrlimi(a, a, 1, 1);
- unsigned int conditionregister;
- XMVectorEqualR(&conditionregister, temp, XMVectorZero());
- return XMComparisonAnyTrue(conditionregister);
-}
-
-// for branching when a.xyzw > b.xyzw
-FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b )
-{
- unsigned int cr;
- XMVectorGreaterR(&cr,a,b);
- return XMComparisonAllTrue(cr);
-}
-
-// for branching when a.xyzw >= b.xyzw
-FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b )
-{
- unsigned int cr;
- XMVectorGreaterOrEqualR(&cr,a,b);
- return XMComparisonAllTrue(cr);
-}
-
-// For branching if all a.xyzw == b.xyzw
-FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b )
-{
- unsigned int cr;
- XMVectorEqualR(&cr,a,b);
- return XMComparisonAllTrue(cr);
-}
-
-
-FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set
-{
- // NOTE: this maps to SSE way better than it does to VMX (most code uses IsAnyNegative(), though)
- int nRet = 0;
-
- const fltx4_union & a_union = (const fltx4_union &)a;
- nRet |= ( a_union.m128_u32[0] & 0x80000000 ) >> 31; // sign(x) -> bit 0
- nRet |= ( a_union.m128_u32[1] & 0x80000000 ) >> 30; // sign(y) -> bit 1
- nRet |= ( a_union.m128_u32[2] & 0x80000000 ) >> 29; // sign(z) -> bit 2
- nRet |= ( a_union.m128_u32[3] & 0x80000000 ) >> 28; // sign(w) -> bit 3
-
- return nRet;
-}
-
-// Squelch the w component of a vector to +0.0.
-// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy)
-FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a )
-{
- return __vrlimi( a, __vzero(), 1, 0 );
-}
-
-FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0)
-{
- // NOTE: this tests the top bits of each vector element using integer math
- // (so it ignores NaNs - it will return true for "-NaN")
- unsigned int equalFlags = 0;
- fltx4 signMask = __vspltisw( -1 ); // 0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF (low order 5 bits of each element = 31)
- signMask = __vslw( signMask, signMask ); // 0x80000000 0x80000000 0x80000000 0x80000000
- __vcmpequwR( Four_Zeros, __vand( signMask, a ), &equalFlags );
- return !XMComparisonAllTrue( equalFlags );
-}
-
-FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0
-{
- return __vcmpeqfp( a, b );
-}
-
-
-FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0
-{
- return __vcmpgtfp( a, b );
-}
-
-FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0
-{
- return __vcmpgefp( a, b );
-}
-
-FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a<b) ? ~0:0
-{
- return __vcmpgtfp( b, a );
-}
-
-FORCEINLINE fltx4 CmpLeSIMD( const fltx4 & a, const fltx4 & b ) // (a<=b) ? ~0:0
-{
- return __vcmpgefp( b, a );
-}
-
-FORCEINLINE fltx4 CmpInBoundsSIMD( const fltx4 & a, const fltx4 & b ) // (a <= b && a >= -b) ? ~0 : 0
-{
- return XMVectorInBounds( a, b );
-}
-
-// returned[i] = ReplacementMask[i] == 0 ? OldValue : NewValue
-FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue )
-{
- return __vsel( OldValue, NewValue, ReplacementMask );
-}
-
-// AKA "Broadcast", "Splat"
-FORCEINLINE fltx4 ReplicateX4( float flValue ) // a,a,a,a
-{
- // NOTE: if flValue comes from a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!)
- float * pValue = &flValue;
- Assert( pValue );
- Assert( ((unsigned int)pValue & 3) == 0);
- return __vspltw( __lvlx( pValue, 0 ), 0 );
-}
-
-FORCEINLINE fltx4 ReplicateX4( const float *pValue ) // a,a,a,a
-{
- Assert( pValue );
- return __vspltw( __lvlx( pValue, 0 ), 0 );
-}
-
-/// replicate a single 32 bit integer value to all 4 components of an m128
-FORCEINLINE fltx4 ReplicateIX4( int nValue )
-{
- // NOTE: if nValue comes from a register, this causes a Load-Hit-Store stall (should not mix ints with fltx4s!)
- int * pValue = &nValue;
- Assert( pValue );
- Assert( ((unsigned int)pValue & 3) == 0);
- return __vspltw( __lvlx( pValue, 0 ), 0 );
-}
-
-// Round towards positive infinity
-FORCEINLINE fltx4 CeilSIMD( const fltx4 &a )
-{
- return __vrfip(a);
-}
-
-// Round towards nearest integer
-FORCEINLINE fltx4 RoundSIMD( const fltx4 &a )
-{
- return __vrfin(a);
-}
-
-// Round towards negative infinity
-FORCEINLINE fltx4 FloorSIMD( const fltx4 &a )
-{
- return __vrfim(a);
-}
-
-FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less
-{
- // This is emulated from rsqrt
- return XMVectorSqrtEst( a );
-}
-
-FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a)
-{
- // This is emulated from rsqrt
- return XMVectorSqrt( a );
-}
-
-FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less
-{
- return __vrsqrtefp( a );
-}
-
-FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a )
-{
- // Convert zeros to epsilons
- fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
- fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
- return ReciprocalSqrtEstSIMD( a_safe );
-}
-
-FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a)
-{
- // This uses Newton-Raphson to improve the HW result
- return XMVectorReciprocalSqrt( a );
-}
-
-FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less
-{
- return __vrefp( a );
-}
-
-/// 1/x for all 4 values. uses reciprocal approximation instruction plus newton iteration.
-/// No error checking!
-FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a
-{
- // This uses Newton-Raphson to improve the HW result
- return XMVectorReciprocal( a );
-}
-
-// FIXME: on 360, this is very slow, since it uses ReciprocalSIMD (do we need DivEstSIMD?)
-FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b
-{
- return MulSIMD( ReciprocalSIMD( b ), a );
-}
-
-/// 1/x for all 4 values.
-/// 1/0 will result in a big but NOT infinite result
-FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a )
-{
- // Convert zeros to epsilons
- fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
- fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
- return ReciprocalEstSIMD( a_safe );
-}
-
-FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a )
-{
- // Convert zeros to epsilons
- fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
- fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
- return ReciprocalSIMD( a_safe );
-
- // FIXME: This could be faster (BUT: it doesn't preserve the sign of -0.0, whereas the above does)
- // fltx4 zeroMask = CmpEqSIMD( Four_Zeros, a );
- // fltx4 a_safe = XMVectorSelect( a, Four_Epsilons, zeroMask );
- // return ReciprocalSIMD( a_safe );
-}
-
-// CHRISG: is it worth doing integer bitfiddling for this?
-// 2^x for all values (the antilog)
-FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower )
-{
- return XMVectorExp(toPower);
-}
-
-// Clamps the components of a vector to a specified minimum and maximum range.
-FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max)
-{
- return XMVectorClamp(in, min, max);
-}
-
-FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD )
-{
- return XMLoadVector4( pSIMD );
-}
-
-// load a 3-vector (as opposed to LoadUnalignedSIMD, which loads a 4-vec).
-FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD )
-{
- return XMLoadVector3( pSIMD );
-}
-
-FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD )
-{
- return *( reinterpret_cast< const fltx4 *> ( pSIMD ) );
-}
-
-// for the transitional class -- load a 3-by VectorAligned and squash its w component
-FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD )
-{
- fltx4 out = XMLoadVector3A(pSIMD.Base());
- // squelch w
- return __vrlimi( out, __vzero(), 1, 0 );
-}
-
-// for the transitional class -- load a 3-by VectorAligned and squash its w component
-FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned * RESTRICT pSIMD )
-{
- fltx4 out = XMLoadVector3A(pSIMD);
- // squelch w
- return __vrlimi( out, __vzero(), 1, 0 );
-}
-
-FORCEINLINE void StoreAlignedSIMD( float *pSIMD, const fltx4 & a )
-{
- *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a;
-}
-
-FORCEINLINE void StoreUnalignedSIMD( float *pSIMD, const fltx4 & a )
-{
- XMStoreVector4( pSIMD, a );
-}
-
-FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a )
-{
- XMStoreVector3( pSIMD, a );
-}
-
-
-// strongly typed -- for typechecking as we transition to SIMD
-FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a )
-{
- XMStoreVector3A(pSIMD->Base(),a);
-}
-
-
-// Fixed-point conversion and save as SIGNED INTS.
-// pDest->x = Int (vSrc.x)
-// note: some architectures have means of doing
-// fixed point conversion when the fix depth is
-// specified as an immediate.. but there is no way
-// to guarantee an immediate as a parameter to function
-// like this.
-FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc)
-{
- fltx4 asInt = __vctsxs( vSrc, 0 );
- XMStoreVector4A(pDest->Base(), asInt);
-}
-
-FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w )
-{
- XMMATRIX xyzwMatrix = _XMMATRIX( x, y, z, w );
- xyzwMatrix = XMMatrixTranspose( xyzwMatrix );
- x = xyzwMatrix.r[0];
- y = xyzwMatrix.r[1];
- z = xyzwMatrix.r[2];
- w = xyzwMatrix.r[3];
-}
-
-// Return one in the fastest way -- faster even than loading.
-FORCEINLINE fltx4 LoadZeroSIMD( void )
-{
- return XMVectorZero();
-}
-
-// Return one in the fastest way -- faster even than loading.
-FORCEINLINE fltx4 LoadOneSIMD( void )
-{
- return XMVectorSplatOne();
-}
-
-FORCEINLINE fltx4 SplatXSIMD( fltx4 a )
-{
- return XMVectorSplatX( a );
-}
-
-FORCEINLINE fltx4 SplatYSIMD( fltx4 a )
-{
- return XMVectorSplatY( a );
-}
-
-FORCEINLINE fltx4 SplatZSIMD( fltx4 a )
-{
- return XMVectorSplatZ( a );
-}
-
-FORCEINLINE fltx4 SplatWSIMD( fltx4 a )
-{
- return XMVectorSplatW( a );
-}
-
-FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x )
-{
- fltx4 result = __vrlimi(a, x, 8, 0);
- return result;
-}
-
-FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y )
-{
- fltx4 result = __vrlimi(a, y, 4, 0);
- return result;
-}
-
-FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z )
-{
- fltx4 result = __vrlimi(a, z, 2, 0);
- return result;
-}
-
-FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w )
-{
- fltx4 result = __vrlimi(a, w, 1, 0);
- return result;
-}
-
-FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue )
-{
- static int s_nVrlimiMask[4] = { 8, 4, 2, 1 };
- fltx4 val = ReplicateX4( flValue );
- fltx4 result = __vrlimi(a, val, s_nVrlimiMask[nComponent], 0);
- return result;
-}
-
-FORCEINLINE fltx4 RotateLeft( const fltx4 & a )
-{
- fltx4 compareOne = a;
- return __vrlimi( compareOne, a, 8 | 4 | 2 | 1, 1 );
-}
-
-FORCEINLINE fltx4 RotateLeft2( const fltx4 & a )
-{
- fltx4 compareOne = a;
- return __vrlimi( compareOne, a, 8 | 4 | 2 | 1, 2 );
-}
-
-
-
-// find the lowest component of a.x, a.y, a.z,
-// and replicate it to the whole return value.
-// ignores a.w.
-// Though this is only five instructions long,
-// they are all dependent, making this stall city.
-// Forcing this inline should hopefully help with scheduling.
-FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a )
-{
- // a is [x,y,z,G] (where G is garbage)
- // rotate left by one
- fltx4 compareOne = a ;
- compareOne = __vrlimi( compareOne, a, 8 | 4 , 1 );
- // compareOne is [y,z,G,G]
- fltx4 retval = MinSIMD( a, compareOne );
- // retVal is [min(x,y), min(y,z), G, G]
- compareOne = __vrlimi( compareOne, a, 8 , 2);
- // compareOne is [z, G, G, G]
- retval = MinSIMD( retval, compareOne );
- // retVal = [ min(min(x,y),z), G, G, G ]
-
- // splat the x component out to the whole vector and return
- return SplatXSIMD( retval );
-}
-
-// find the highest component of a.x, a.y, a.z,
-// and replicate it to the whole return value.
-// ignores a.w.
-// Though this is only five instructions long,
-// they are all dependent, making this stall city.
-// Forcing this inline should hopefully help with scheduling.
-FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 & a )
-{
- // a is [x,y,z,G] (where G is garbage)
- // rotate left by one
- fltx4 compareOne = a ;
- compareOne = __vrlimi( compareOne, a, 8 | 4 , 1 );
- // compareOne is [y,z,G,G]
- fltx4 retval = MaxSIMD( a, compareOne );
- // retVal is [max(x,y), max(y,z), G, G]
- compareOne = __vrlimi( compareOne, a, 8 , 2);
- // compareOne is [z, G, G, G]
- retval = MaxSIMD( retval, compareOne );
- // retVal = [ max(max(x,y),z), G, G, G ]
-
- // splat the x component out to the whole vector and return
- return SplatXSIMD( retval );
-}
-
-
-// Transform many (horizontal) points in-place by a 3x4 matrix,
-// here already loaded onto three fltx4 registers.
-// The points must be stored as 16-byte aligned. They are points
-// and not vectors because we assume the w-component to be 1.
-// To spare yourself the annoyance of loading the matrix yourself,
-// use one of the overloads below.
-void TransformManyPointsBy(VectorAligned * RESTRICT pVectors, unsigned int numVectors, FLTX4 mRow1, FLTX4 mRow2, FLTX4 mRow3);
-
-// Transform many (horizontal) points in-place by a 3x4 matrix.
-// The points must be stored as 16-byte aligned. They are points
-// and not vectors because we assume the w-component to be 1.
-// In this function, the matrix need not be aligned.
-FORCEINLINE void TransformManyPointsBy(VectorAligned * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t &pMatrix)
-{
- return TransformManyPointsBy(pVectors, numVectors,
- LoadUnalignedSIMD( pMatrix[0] ), LoadUnalignedSIMD( pMatrix[1] ), LoadUnalignedSIMD( pMatrix[2] ) );
-}
-
-// Transform many (horizontal) points in-place by a 3x4 matrix.
-// The points must be stored as 16-byte aligned. They are points
-// and not vectors because we assume the w-component to be 1.
-// In this function, the matrix must itself be aligned on a 16-byte
-// boundary.
-FORCEINLINE void TransformManyPointsByA(VectorAligned * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t &pMatrix)
-{
- return TransformManyPointsBy(pVectors, numVectors,
- LoadAlignedSIMD( pMatrix[0] ), LoadAlignedSIMD( pMatrix[1] ), LoadAlignedSIMD( pMatrix[2] ) );
-}
-
-// ------------------------------------
-// INTEGER SIMD OPERATIONS.
-// ------------------------------------
-
-// Load 4 aligned words into a SIMD register
-FORCEINLINE i32x4 LoadAlignedIntSIMD( const void * RESTRICT pSIMD)
-{
- return XMLoadVector4A(pSIMD);
-}
-
-// Load 4 unaligned words into a SIMD register
-FORCEINLINE i32x4 LoadUnalignedIntSIMD(const void * RESTRICT pSIMD)
-{
- return XMLoadVector4( pSIMD );
-}
-
-// save into four words, 16-byte aligned
-FORCEINLINE void StoreAlignedIntSIMD( int32 *pSIMD, const fltx4 & a )
-{
- *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a;
-}
-
-FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a )
-{
- *( reinterpret_cast< i32x4 *> ( pSIMD.Base() ) ) = a;
-}
-
-FORCEINLINE void StoreUnalignedIntSIMD( int32 *pSIMD, const fltx4 & a )
-{
- XMStoreVector4(pSIMD, a);
-}
-
-
-// Take a fltx4 containing fixed-point uints and
-// return them as single precision floats. No
-// fixed point conversion is done.
-FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const i32x4 &vSrcA )
-{
- return __vcfux( vSrcA, 0 );
-}
-
-
-// Take a fltx4 containing fixed-point sints and
-// return them as single precision floats. No
-// fixed point conversion is done.
-FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA )
-{
- return __vcfsx( vSrcA, 0 );
-}
-
-// Take a fltx4 containing fixed-point uints and
-// return them as single precision floats. Each uint
-// will be divided by 2^immed after conversion
-// (eg, this is fixed point math).
-/* as if:
- FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const i32x4 &vSrcA, unsigned int uImmed )
- {
- return __vcfux( vSrcA, uImmed );
- }
-*/
-#define UnsignedFixedIntConvertToFltSIMD(vSrcA, uImmed) (__vcfux( (vSrcA), (uImmed) ))
-
-// Take a fltx4 containing fixed-point sints and
-// return them as single precision floats. Each int
-// will be divided by 2^immed (eg, this is fixed point
-// math).
-/* as if:
- FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA, unsigned int uImmed )
- {
- return __vcfsx( vSrcA, uImmed );
- }
-*/
-#define SignedFixedIntConvertToFltSIMD(vSrcA, uImmed) (__vcfsx( (vSrcA), (uImmed) ))
-
-// set all components of a vector to a signed immediate int number.
-/* as if:
- FORCEINLINE fltx4 IntSetImmediateSIMD(int toImmediate)
- {
- return __vspltisw( toImmediate );
- }
-*/
-#define IntSetImmediateSIMD(x) (__vspltisw(x))
-
-/*
- works on fltx4's as if they are four uints.
- the first parameter contains the words to be shifted,
- the second contains the amount to shift by AS INTS
-
- for i = 0 to 3
- shift = vSrcB_i*32:(i*32)+4
- vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift
-*/
-FORCEINLINE fltx4 IntShiftLeftWordSIMD(fltx4 vSrcA, fltx4 vSrcB)
-{
- return __vslw(vSrcA, vSrcB);
-}
-
-FORCEINLINE float SubFloat( const fltx4 & a, int idx )
-{
- // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!)
- const fltx4_union & a_union = (const fltx4_union &)a;
- return a_union.m128_f32[ idx ];
-}
-
-FORCEINLINE float & SubFloat( fltx4 & a, int idx )
-{
- fltx4_union & a_union = (fltx4_union &)a;
- return a_union.m128_f32[idx];
-}
-
-FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx )
-{
- fltx4 t = __vctuxs( a, 0 );
- const fltx4_union & a_union = (const fltx4_union &)t;
- return a_union.m128_u32[idx];
-}
-
-
-FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )
-{
- const fltx4_union & a_union = (const fltx4_union &)a;
- return a_union.m128_u32[idx];
-}
-
-FORCEINLINE uint32 & SubInt( fltx4 & a, int idx )
-{
- fltx4_union & a_union = (fltx4_union &)a;
- return a_union.m128_u32[idx];
-}
-
-#else
-
-//---------------------------------------------------------------------
-// Intel/SSE implementation
-//---------------------------------------------------------------------
-
-FORCEINLINE void StoreAlignedSIMD( float * RESTRICT pSIMD, const fltx4 & a )
-{
- _mm_store_ps( pSIMD, a );
-}
-
-FORCEINLINE void StoreUnalignedSIMD( float * RESTRICT pSIMD, const fltx4 & a )
-{
- _mm_storeu_ps( pSIMD, a );
-}
-
-
-FORCEINLINE fltx4 RotateLeft( const fltx4 & a );
-FORCEINLINE fltx4 RotateLeft2( const fltx4 & a );
-
-FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a )
-{
- _mm_store_ss(pSIMD, a);
- _mm_store_ss(pSIMD+1, RotateLeft(a));
- _mm_store_ss(pSIMD+2, RotateLeft2(a));
-}
-
-// strongly typed -- syntactic castor oil used for typechecking as we transition to SIMD
-FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a )
-{
- StoreAlignedSIMD( pSIMD->Base(),a );
-}
-
-FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD )
-{
- return _mm_load_ps( reinterpret_cast< const float *> ( pSIMD ) );
-}
-
-FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b
-{
- return _mm_and_ps( a, b );
-}
-
-FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b
-{
- return _mm_andnot_ps( a, b );
-}
-
-FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b
-{
- return _mm_xor_ps( a, b );
-}
-
-FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b
-{
- return _mm_or_ps( a, b );
-}
-
-// Squelch the w component of a vector to +0.0.
-// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy)
-FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a )
-{
- return AndSIMD( a, LoadAlignedSIMD( g_SIMD_clear_wmask ) );
-}
-
-// for the transitional class -- load a 3-by VectorAligned and squash its w component
-FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD )
-{
- return SetWToZeroSIMD( LoadAlignedSIMD(pSIMD.Base()) );
-}
-
-FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD )
-{
- return _mm_loadu_ps( reinterpret_cast<const float *>( pSIMD ) );
-}
-
-FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD )
-{
- return _mm_loadu_ps( reinterpret_cast<const float *>( pSIMD ) );
-}
-
-/// replicate a single 32 bit integer value to all 4 components of an m128
-FORCEINLINE fltx4 ReplicateIX4( int i )
-{
- fltx4 value = _mm_set_ss( * ( ( float *) &i ) );;
- return _mm_shuffle_ps( value, value, 0);
-}
-
-
-FORCEINLINE fltx4 ReplicateX4( float flValue )
-{
- __m128 value = _mm_set_ss( flValue );
- return _mm_shuffle_ps( value, value, 0 );
-}
-
-
-FORCEINLINE float SubFloat( const fltx4 & a, int idx )
-{
- // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!)
-#ifndef POSIX
- return a.m128_f32[ idx ];
-#else
- return (reinterpret_cast<float const *>(&a))[idx];
-#endif
-}
-
-FORCEINLINE float & SubFloat( fltx4 & a, int idx )
-{
-#ifndef POSIX
- return a.m128_f32[ idx ];
-#else
- return (reinterpret_cast<float *>(&a))[idx];
-#endif
-}
-
-FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx )
-{
- return (uint32)SubFloat(a,idx);
-}
-
-FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )
-{
-#ifndef POSIX
- return a.m128_u32[idx];
-#else
- return (reinterpret_cast<uint32 const *>(&a))[idx];
-#endif
-}
-
-FORCEINLINE uint32 & SubInt( fltx4 & a, int idx )
-{
-#ifndef POSIX
- return a.m128_u32[idx];
-#else
- return (reinterpret_cast<uint32 *>(&a))[idx];
-#endif
-}
-
-// Return one in the fastest way -- on the x360, faster even than loading.
-FORCEINLINE fltx4 LoadZeroSIMD( void )
-{
- return Four_Zeros;
-}
-
-// Return one in the fastest way -- on the x360, faster even than loading.
-FORCEINLINE fltx4 LoadOneSIMD( void )
-{
- return Four_Ones;
-}
-
-FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue )
-{
- return OrSIMD(
- AndSIMD( ReplacementMask, NewValue ),
- AndNotSIMD( ReplacementMask, OldValue ) );
-}
-
-// remember, the SSE numbers its words 3 2 1 0
-// The way we want to specify shuffles is backwards from the default
-// MM_SHUFFLE_REV is in array index order (default is reversed)
-#define MM_SHUFFLE_REV(a,b,c,d) _MM_SHUFFLE(d,c,b,a)
-
-FORCEINLINE fltx4 SplatXSIMD( fltx4 const & a )
-{
- return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 0, 0, 0, 0 ) );
-}
-
-FORCEINLINE fltx4 SplatYSIMD( fltx4 const &a )
-{
- return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 1, 1, 1, 1 ) );
-}
-
-FORCEINLINE fltx4 SplatZSIMD( fltx4 const &a )
-{
- return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 2, 2, 2, 2 ) );
-}
-
-FORCEINLINE fltx4 SplatWSIMD( fltx4 const &a )
-{
- return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 3, 3, 3 ) );
-}
-
-FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x )
-{
- fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[0] ), x, a );
- return result;
-}
-
-FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y )
-{
- fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[1] ), y, a );
- return result;
-}
-
-FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z )
-{
- fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[2] ), z, a );
- return result;
-}
-
-FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w )
-{
- fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[3] ), w, a );
- return result;
-}
-
-FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue )
-{
- fltx4 val = ReplicateX4( flValue );
- fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[nComponent] ), val, a );
- return result;
-}
-
-// a b c d -> b c d a
-FORCEINLINE fltx4 RotateLeft( const fltx4 & a )
-{
- return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 1, 2, 3, 0 ) );
-}
-
-// a b c d -> c d a b
-FORCEINLINE fltx4 RotateLeft2( const fltx4 & a )
-{
- return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 2, 3, 0, 1 ) );
-}
-
-// a b c d -> d a b c
-FORCEINLINE fltx4 RotateRight( const fltx4 & a )
-{
- return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 0, 3, 2, 1) );
-}
-
-// a b c d -> c d a b
-FORCEINLINE fltx4 RotateRight2( const fltx4 & a )
-{
- return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 1, 0, 3, 2 ) );
-}
-
-
-FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b ) // a+b
-{
- return _mm_add_ps( a, b );
-};
-
-FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b
-{
- return _mm_sub_ps( a, b );
-};
-
-FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b
-{
- return _mm_mul_ps( a, b );
-};
-
-FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b
-{
- return _mm_div_ps( a, b );
-};
-
-FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c
-{
- return AddSIMD( MulSIMD(a,b), c );
-}
-
-FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b
-{
- return SubSIMD( c, MulSIMD(a,b) );
-};
-
-FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b )
-{
- fltx4 m = MulSIMD( a, b );
- float flDot = SubFloat( m, 0 ) + SubFloat( m, 1 ) + SubFloat( m, 2 );
- return ReplicateX4( flDot );
-}
-
-FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b )
-{
- fltx4 m = MulSIMD( a, b );
- float flDot = SubFloat( m, 0 ) + SubFloat( m, 1 ) + SubFloat( m, 2 ) + SubFloat( m, 3 );
- return ReplicateX4( flDot );
-}
-
-//TODO: implement as four-way Taylor series (see xbox implementation)
-FORCEINLINE fltx4 SinSIMD( const fltx4 &radians )
-{
- fltx4 result;
- SubFloat( result, 0 ) = sin( SubFloat( radians, 0 ) );
- SubFloat( result, 1 ) = sin( SubFloat( radians, 1 ) );
- SubFloat( result, 2 ) = sin( SubFloat( radians, 2 ) );
- SubFloat( result, 3 ) = sin( SubFloat( radians, 3 ) );
- return result;
-}
-
-FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians )
-{
- // FIXME: Make a fast SSE version
- SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) );
- SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) );
- SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) );
-}
-
-FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) // a*b + c
-{
- // FIXME: Make a fast SSE version
- SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) );
- SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) );
- SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) );
- SinCos( SubFloat( radians, 3 ), &SubFloat( sine, 3 ), &SubFloat( cosine, 3 ) );
-}
-
-//TODO: implement as four-way Taylor series (see xbox implementation)
-FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine )
-{
- // FIXME: Make a fast SSE version
- fltx4 result;
- SubFloat( result, 0 ) = asin( SubFloat( sine, 0 ) );
- SubFloat( result, 1 ) = asin( SubFloat( sine, 1 ) );
- SubFloat( result, 2 ) = asin( SubFloat( sine, 2 ) );
- SubFloat( result, 3 ) = asin( SubFloat( sine, 3 ) );
- return result;
-}
-
-FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs )
-{
- fltx4 result;
- SubFloat( result, 0 ) = acos( SubFloat( cs, 0 ) );
- SubFloat( result, 1 ) = acos( SubFloat( cs, 1 ) );
- SubFloat( result, 2 ) = acos( SubFloat( cs, 2 ) );
- SubFloat( result, 3 ) = acos( SubFloat( cs, 3 ) );
- return result;
-}
-
-// tan^1(a/b) .. ie, pass sin in as a and cos in as b
-FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b )
-{
- fltx4 result;
- SubFloat( result, 0 ) = atan2( SubFloat( a, 0 ), SubFloat( b, 0 ) );
- SubFloat( result, 1 ) = atan2( SubFloat( a, 1 ), SubFloat( b, 1 ) );
- SubFloat( result, 2 ) = atan2( SubFloat( a, 2 ), SubFloat( b, 2 ) );
- SubFloat( result, 3 ) = atan2( SubFloat( a, 3 ), SubFloat( b, 3 ) );
- return result;
-}
-
-FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a
-{
- return SubSIMD(LoadZeroSIMD(),a);
-}
-
-FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set
-{
- return _mm_movemask_ps( a );
-}
-
-FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0)
-{
- return (0 != TestSignSIMD( a ));
-}
-
-FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0
-{
- return _mm_cmpeq_ps( a, b );
-}
-
-FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0
-{
- return _mm_cmpgt_ps( a, b );
-}
-
-FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0
-{
- return _mm_cmpge_ps( a, b );
-}
-
-FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a<b) ? ~0:0
-{
- return _mm_cmplt_ps( a, b );
-}
-
-FORCEINLINE fltx4 CmpLeSIMD( const fltx4 & a, const fltx4 & b ) // (a<=b) ? ~0:0
-{
- return _mm_cmple_ps( a, b );
-}
-
-// for branching when a.xyzw > b.xyzw
-FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b )
-{
- return TestSignSIMD( CmpLeSIMD( a, b ) ) == 0;
-}
-
-// for branching when a.xyzw >= b.xyzw
-FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b )
-{
- return TestSignSIMD( CmpLtSIMD( a, b ) ) == 0;
-}
-
-// For branching if all a.xyzw == b.xyzw
-FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b )
-{
- return TestSignSIMD( CmpEqSIMD( a, b ) ) == 0xf;
-}
-
-FORCEINLINE fltx4 CmpInBoundsSIMD( const fltx4 & a, const fltx4 & b ) // (a <= b && a >= -b) ? ~0 : 0
-{
- return AndSIMD( CmpLeSIMD(a,b), CmpGeSIMD(a, NegSIMD(b)) );
-}
-
-FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b)
-{
- return _mm_min_ps( a, b );
-}
-
-FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b)
-{
- return _mm_max_ps( a, b );
-}
-
-
-
-// SSE lacks rounding operations.
-// Really.
-// You can emulate them by setting the rounding mode for the
-// whole processor and then converting to int, and then back again.
-// But every time you set the rounding mode, you clear out the
-// entire pipeline. So, I can't do them per operation. You
-// have to do it once, before the loop that would call these.
-// Round towards positive infinity
-FORCEINLINE fltx4 CeilSIMD( const fltx4 &a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = ceil( SubFloat( a, 0 ) );
- SubFloat( retVal, 1 ) = ceil( SubFloat( a, 1 ) );
- SubFloat( retVal, 2 ) = ceil( SubFloat( a, 2 ) );
- SubFloat( retVal, 3 ) = ceil( SubFloat( a, 3 ) );
- return retVal;
-
-}
-
-fltx4 fabs( const fltx4 & x );
-// Round towards negative infinity
-// This is the implementation that was here before; it assumes
-// you are in round-to-floor mode, which I guess is usually the
-// case for us vis-a-vis SSE. It's totally unnecessary on
-// VMX, which has a native floor op.
-FORCEINLINE fltx4 FloorSIMD( const fltx4 &val )
-{
- fltx4 fl4Abs = fabs( val );
- fltx4 ival = SubSIMD( AddSIMD( fl4Abs, Four_2ToThe23s ), Four_2ToThe23s );
- ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Ones ), ival );
- return XorSIMD( ival, XorSIMD( val, fl4Abs ) ); // restore sign bits
-}
-
-
-
-inline bool IsAllZeros( const fltx4 & var )
-{
- return TestSignSIMD( CmpEqSIMD( var, Four_Zeros ) ) == 0xF;
-}
-
-FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less
-{
- return _mm_sqrt_ps( a );
-}
-
-FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a)
-{
- return _mm_sqrt_ps( a );
-}
-
-FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less
-{
- return _mm_rsqrt_ps( a );
-}
-
-FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a )
-{
- fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
- fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
- ret = ReciprocalSqrtEstSIMD( ret );
- return ret;
-}
-
-/// uses newton iteration for higher precision results than ReciprocalSqrtEstSIMD
-FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a)
-{
- fltx4 guess = ReciprocalSqrtEstSIMD( a );
- // newton iteration for 1/sqrt(a) : y(n+1) = 1/2 (y(n)*(3-a*y(n)^2));
- guess = MulSIMD( guess, SubSIMD( Four_Threes, MulSIMD( a, MulSIMD( guess, guess ))));
- guess = MulSIMD( Four_PointFives, guess);
- return guess;
-}
-
-FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less
-{
- return _mm_rcp_ps( a );
-}
-
-/// 1/x for all 4 values, more or less
-/// 1/0 will result in a big but NOT infinite result
-FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a )
-{
- fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
- fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
- ret = ReciprocalEstSIMD( ret );
- return ret;
-}
-
-/// 1/x for all 4 values. uses reciprocal approximation instruction plus newton iteration.
-/// No error checking!
-FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a
-{
- fltx4 ret = ReciprocalEstSIMD( a );
- // newton iteration is: Y(n+1) = 2*Y(n)-a*Y(n)^2
- ret = SubSIMD( AddSIMD( ret, ret ), MulSIMD( a, MulSIMD( ret, ret ) ) );
- return ret;
-}
-
-/// 1/x for all 4 values.
-/// 1/0 will result in a big but NOT infinite result
-FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a )
-{
- fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
- fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
- ret = ReciprocalSIMD( ret );
- return ret;
-}
-
-// CHRISG: is it worth doing integer bitfiddling for this?
-// 2^x for all values (the antilog)
-FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower )
-{
- fltx4 retval;
- SubFloat( retval, 0 ) = powf( 2, SubFloat(toPower, 0) );
- SubFloat( retval, 1 ) = powf( 2, SubFloat(toPower, 1) );
- SubFloat( retval, 2 ) = powf( 2, SubFloat(toPower, 2) );
- SubFloat( retval, 3 ) = powf( 2, SubFloat(toPower, 3) );
-
- return retval;
-}
-
-// Clamps the components of a vector to a specified minimum and maximum range.
-FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max)
-{
- return MaxSIMD( min, MinSIMD( max, in ) );
-}
-
-FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w)
-{
- _MM_TRANSPOSE4_PS( x, y, z, w );
-}
-
-FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 &a )
-{
- // a is [x,y,z,G] (where G is garbage)
- // rotate left by one
- fltx4 compareOne = RotateLeft( a );
- // compareOne is [y,z,G,x]
- fltx4 retval = MinSIMD( a, compareOne );
- // retVal is [min(x,y), ... ]
- compareOne = RotateLeft2( a );
- // compareOne is [z, G, x, y]
- retval = MinSIMD( retval, compareOne );
- // retVal = [ min(min(x,y),z)..]
- // splat the x component out to the whole vector and return
- return SplatXSIMD( retval );
-
-}
-
-FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 &a )
-{
- // a is [x,y,z,G] (where G is garbage)
- // rotate left by one
- fltx4 compareOne = RotateLeft( a );
- // compareOne is [y,z,G,x]
- fltx4 retval = MaxSIMD( a, compareOne );
- // retVal is [max(x,y), ... ]
- compareOne = RotateLeft2( a );
- // compareOne is [z, G, x, y]
- retval = MaxSIMD( retval, compareOne );
- // retVal = [ max(max(x,y),z)..]
- // splat the x component out to the whole vector and return
- return SplatXSIMD( retval );
-
-}
-
-// ------------------------------------
-// INTEGER SIMD OPERATIONS.
-// ------------------------------------
-
-
-#if 0 /* pc does not have these ops */
-// splat all components of a vector to a signed immediate int number.
-FORCEINLINE fltx4 IntSetImmediateSIMD(int to)
-{
- //CHRISG: SSE2 has this, but not SSE1. What to do?
- fltx4 retval;
- SubInt( retval, 0 ) = to;
- SubInt( retval, 1 ) = to;
- SubInt( retval, 2 ) = to;
- SubInt( retval, 3 ) = to;
- return retval;
-}
-#endif
-
-// Load 4 aligned words into a SIMD register
-FORCEINLINE i32x4 LoadAlignedIntSIMD( const void * RESTRICT pSIMD)
-{
- return _mm_load_ps( reinterpret_cast<const float *>(pSIMD) );
-}
-
-// Load 4 unaligned words into a SIMD register
-FORCEINLINE i32x4 LoadUnalignedIntSIMD( const void * RESTRICT pSIMD)
-{
- return _mm_loadu_ps( reinterpret_cast<const float *>(pSIMD) );
-}
-
-// save into four words, 16-byte aligned
-FORCEINLINE void StoreAlignedIntSIMD( int32 * RESTRICT pSIMD, const fltx4 & a )
-{
- _mm_store_ps( reinterpret_cast<float *>(pSIMD), a );
-}
-
-FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a )
-{
- _mm_store_ps( reinterpret_cast<float *>(pSIMD.Base()), a );
-}
-
-FORCEINLINE void StoreUnalignedIntSIMD( int32 * RESTRICT pSIMD, const fltx4 & a )
-{
- _mm_storeu_ps( reinterpret_cast<float *>(pSIMD), a );
-}
-
-
-// CHRISG: the conversion functions all seem to operate on m64's only...
-// how do we make them work here?
-
-// Take a fltx4 containing fixed-point uints and
-// return them as single precision floats. No
-// fixed point conversion is done.
-FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const u32x4 &vSrcA )
-{
- fltx4 retval;
- SubFloat( retval, 0 ) = ( (float) SubInt( retval, 0 ) );
- SubFloat( retval, 1 ) = ( (float) SubInt( retval, 1 ) );
- SubFloat( retval, 2 ) = ( (float) SubInt( retval, 2 ) );
- SubFloat( retval, 3 ) = ( (float) SubInt( retval, 3 ) );
- return retval;
-}
-
-
-// Take a fltx4 containing fixed-point sints and
-// return them as single precision floats. No
-// fixed point conversion is done.
-FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA )
-{
- fltx4 retval;
- SubFloat( retval, 0 ) = ( (float) (reinterpret_cast<const int32 *>(&vSrcA)[0]));
- SubFloat( retval, 1 ) = ( (float) (reinterpret_cast<const int32 *>(&vSrcA)[1]));
- SubFloat( retval, 2 ) = ( (float) (reinterpret_cast<const int32 *>(&vSrcA)[2]));
- SubFloat( retval, 3 ) = ( (float) (reinterpret_cast<const int32 *>(&vSrcA)[3]));
- return retval;
-}
-
-/*
- works on fltx4's as if they are four uints.
- the first parameter contains the words to be shifted,
- the second contains the amount to shift by AS INTS
-
- for i = 0 to 3
- shift = vSrcB_i*32:(i*32)+4
- vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift
-*/
-FORCEINLINE i32x4 IntShiftLeftWordSIMD(const i32x4 &vSrcA, const i32x4 &vSrcB)
-{
- i32x4 retval;
- SubInt(retval, 0) = SubInt(vSrcA, 0) << SubInt(vSrcB, 0);
- SubInt(retval, 1) = SubInt(vSrcA, 1) << SubInt(vSrcB, 1);
- SubInt(retval, 2) = SubInt(vSrcA, 2) << SubInt(vSrcB, 2);
- SubInt(retval, 3) = SubInt(vSrcA, 3) << SubInt(vSrcB, 3);
-
-
- return retval;
-}
-
-
-// Fixed-point conversion and save as SIGNED INTS.
-// pDest->x = Int (vSrc.x)
-// note: some architectures have means of doing
-// fixed point conversion when the fix depth is
-// specified as an immediate.. but there is no way
-// to guarantee an immediate as a parameter to function
-// like this.
-FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc)
-{
- __m64 bottom = _mm_cvttps_pi32( vSrc );
- __m64 top = _mm_cvttps_pi32( _mm_movehl_ps(vSrc,vSrc) );
-
- *reinterpret_cast<__m64 *>(&(*pDest)[0]) = bottom;
- *reinterpret_cast<__m64 *>(&(*pDest)[2]) = top;
-
- _mm_empty();
-}
-
-
-
-#endif
-
-
-
-/// class FourVectors stores 4 independent vectors for use in SIMD processing. These vectors are
-/// stored in the format x x x x y y y y z z z z so that they can be efficiently SIMD-accelerated.
-class ALIGN16 FourVectors
-{
-public:
- fltx4 x, y, z;
-
- FORCEINLINE void DuplicateVector(Vector const &v) //< set all 4 vectors to the same vector value
- {
- x=ReplicateX4(v.x);
- y=ReplicateX4(v.y);
- z=ReplicateX4(v.z);
- }
-
- FORCEINLINE fltx4 const & operator[](int idx) const
- {
- return *((&x)+idx);
- }
-
- FORCEINLINE fltx4 & operator[](int idx)
- {
- return *((&x)+idx);
- }
-
- FORCEINLINE void operator+=(FourVectors const &b) //< add 4 vectors to another 4 vectors
- {
- x=AddSIMD(x,b.x);
- y=AddSIMD(y,b.y);
- z=AddSIMD(z,b.z);
- }
-
- FORCEINLINE void operator-=(FourVectors const &b) //< subtract 4 vectors from another 4
- {
- x=SubSIMD(x,b.x);
- y=SubSIMD(y,b.y);
- z=SubSIMD(z,b.z);
- }
-
- FORCEINLINE void operator*=(FourVectors const &b) //< scale all four vectors per component scale
- {
- x=MulSIMD(x,b.x);
- y=MulSIMD(y,b.y);
- z=MulSIMD(z,b.z);
- }
-
- FORCEINLINE void operator*=(const fltx4 & scale) //< scale
- {
- x=MulSIMD(x,scale);
- y=MulSIMD(y,scale);
- z=MulSIMD(z,scale);
- }
-
- FORCEINLINE void operator*=(float scale) //< uniformly scale all 4 vectors
- {
- fltx4 scalepacked = ReplicateX4(scale);
- *this *= scalepacked;
- }
-
- FORCEINLINE fltx4 operator*(FourVectors const &b) const //< 4 dot products
- {
- fltx4 dot=MulSIMD(x,b.x);
- dot=MaddSIMD(y,b.y,dot);
- dot=MaddSIMD(z,b.z,dot);
- return dot;
- }
-
- FORCEINLINE fltx4 operator*(Vector const &b) const //< dot product all 4 vectors with 1 vector
- {
- fltx4 dot=MulSIMD(x,ReplicateX4(b.x));
- dot=MaddSIMD(y,ReplicateX4(b.y), dot);
- dot=MaddSIMD(z,ReplicateX4(b.z), dot);
- return dot;
- }
-
- FORCEINLINE void VProduct(FourVectors const &b) //< component by component mul
- {
- x=MulSIMD(x,b.x);
- y=MulSIMD(y,b.y);
- z=MulSIMD(z,b.z);
- }
- FORCEINLINE void MakeReciprocal(void) //< (x,y,z)=(1/x,1/y,1/z)
- {
- x=ReciprocalSIMD(x);
- y=ReciprocalSIMD(y);
- z=ReciprocalSIMD(z);
- }
-
- FORCEINLINE void MakeReciprocalSaturate(void) //< (x,y,z)=(1/x,1/y,1/z), 1/0=1.0e23
- {
- x=ReciprocalSaturateSIMD(x);
- y=ReciprocalSaturateSIMD(y);
- z=ReciprocalSaturateSIMD(z);
- }
-
- // Assume the given matrix is a rotation, and rotate these vectors by it.
- // If you have a long list of FourVectors structures that you all want
- // to rotate by the same matrix, use FourVectors::RotateManyBy() instead.
- inline void RotateBy(const matrix3x4_t& matrix);
-
- /// You can use this to rotate a long array of FourVectors all by the same
- /// matrix. The first parameter is the head of the array. The second is the
- /// number of vectors to rotate. The third is the matrix.
- static void RotateManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix );
-
- /// Assume the vectors are points, and transform them in place by the matrix.
- inline void TransformBy(const matrix3x4_t& matrix);
-
- /// You can use this to Transform a long array of FourVectors all by the same
- /// matrix. The first parameter is the head of the array. The second is the
- /// number of vectors to rotate. The third is the matrix. The fourth is the
- /// output buffer, which must not overlap the pVectors buffer. This is not
- /// an in-place transformation.
- static void TransformManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix, FourVectors * RESTRICT pOut );
-
- /// You can use this to Transform a long array of FourVectors all by the same
- /// matrix. The first parameter is the head of the array. The second is the
- /// number of vectors to rotate. The third is the matrix. The fourth is the
- /// output buffer, which must not overlap the pVectors buffer.
- /// This is an in-place transformation.
- static void TransformManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix );
-
- // X(),Y(),Z() - get at the desired component of the i'th (0..3) vector.
- FORCEINLINE const float & X(int idx) const
- {
- // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!)
- return SubFloat( (fltx4 &)x, idx );
- }
-
- FORCEINLINE const float & Y(int idx) const
- {
- return SubFloat( (fltx4 &)y, idx );
- }
-
- FORCEINLINE const float & Z(int idx) const
- {
- return SubFloat( (fltx4 &)z, idx );
- }
-
- FORCEINLINE float & X(int idx)
- {
- return SubFloat( x, idx );
- }
-
- FORCEINLINE float & Y(int idx)
- {
- return SubFloat( y, idx );
- }
-
- FORCEINLINE float & Z(int idx)
- {
- return SubFloat( z, idx );
- }
-
- FORCEINLINE Vector Vec(int idx) const //< unpack one of the vectors
- {
- return Vector( X(idx), Y(idx), Z(idx) );
- }
-
- FourVectors(void)
- {
- }
-
- FourVectors( FourVectors const &src )
- {
- x=src.x;
- y=src.y;
- z=src.z;
- }
-
- FORCEINLINE void operator=( FourVectors const &src )
- {
- x=src.x;
- y=src.y;
- z=src.z;
- }
-
- /// LoadAndSwizzle - load 4 Vectors into a FourVectors, performing transpose op
- FORCEINLINE void LoadAndSwizzle(Vector const &a, Vector const &b, Vector const &c, Vector const &d)
- {
- // TransposeSIMD has large sub-expressions that the compiler can't eliminate on x360
- // use an unfolded implementation here
-#if _X360
- fltx4 tx = LoadUnalignedSIMD( &a.x );
- fltx4 ty = LoadUnalignedSIMD( &b.x );
- fltx4 tz = LoadUnalignedSIMD( &c.x );
- fltx4 tw = LoadUnalignedSIMD( &d.x );
- fltx4 r0 = __vmrghw(tx, tz);
- fltx4 r1 = __vmrghw(ty, tw);
- fltx4 r2 = __vmrglw(tx, tz);
- fltx4 r3 = __vmrglw(ty, tw);
-
- x = __vmrghw(r0, r1);
- y = __vmrglw(r0, r1);
- z = __vmrghw(r2, r3);
-#else
- x = LoadUnalignedSIMD( &( a.x ));
- y = LoadUnalignedSIMD( &( b.x ));
- z = LoadUnalignedSIMD( &( c.x ));
- fltx4 w = LoadUnalignedSIMD( &( d.x ));
- // now, matrix is:
- // x y z ?
- // x y z ?
- // x y z ?
- // x y z ?
- TransposeSIMD(x, y, z, w);
-#endif
- }
-
- /// LoadAndSwizzleAligned - load 4 Vectors into a FourVectors, performing transpose op.
- /// all 4 vectors must be 128 bit boundary
- FORCEINLINE void LoadAndSwizzleAligned(const float *RESTRICT a, const float *RESTRICT b, const float *RESTRICT c, const float *RESTRICT d)
- {
-#if _X360
- fltx4 tx = LoadAlignedSIMD(a);
- fltx4 ty = LoadAlignedSIMD(b);
- fltx4 tz = LoadAlignedSIMD(c);
- fltx4 tw = LoadAlignedSIMD(d);
- fltx4 r0 = __vmrghw(tx, tz);
- fltx4 r1 = __vmrghw(ty, tw);
- fltx4 r2 = __vmrglw(tx, tz);
- fltx4 r3 = __vmrglw(ty, tw);
-
- x = __vmrghw(r0, r1);
- y = __vmrglw(r0, r1);
- z = __vmrghw(r2, r3);
-#else
- x = LoadAlignedSIMD( a );
- y = LoadAlignedSIMD( b );
- z = LoadAlignedSIMD( c );
- fltx4 w = LoadAlignedSIMD( d );
- // now, matrix is:
- // x y z ?
- // x y z ?
- // x y z ?
- // x y z ?
- TransposeSIMD( x, y, z, w );
-#endif
- }
-
- FORCEINLINE void LoadAndSwizzleAligned(Vector const &a, Vector const &b, Vector const &c, Vector const &d)
- {
- LoadAndSwizzleAligned( &a.x, &b.x, &c.x, &d.x );
- }
-
- /// return the squared length of all 4 vectors
- FORCEINLINE fltx4 length2(void) const
- {
- return (*this)*(*this);
- }
-
- /// return the approximate length of all 4 vectors. uses the sqrt approximation instruction
- FORCEINLINE fltx4 length(void) const
- {
- return SqrtEstSIMD(length2());
- }
-
- /// normalize all 4 vectors in place. not mega-accurate (uses reciprocal approximation instruction)
- FORCEINLINE void VectorNormalizeFast(void)
- {
- fltx4 mag_sq=(*this)*(*this); // length^2
- (*this) *= ReciprocalSqrtEstSIMD(mag_sq); // *(1.0/sqrt(length^2))
- }
-
- /// normalize all 4 vectors in place.
- FORCEINLINE void VectorNormalize(void)
- {
- fltx4 mag_sq=(*this)*(*this); // length^2
- (*this) *= ReciprocalSqrtSIMD(mag_sq); // *(1.0/sqrt(length^2))
- }
-
- /// construct a FourVectors from 4 separate Vectors
- FORCEINLINE FourVectors(Vector const &a, Vector const &b, Vector const &c, Vector const &d)
- {
- LoadAndSwizzle(a,b,c,d);
- }
-
- /// construct a FourVectors from 4 separate Vectors
- FORCEINLINE FourVectors(VectorAligned const &a, VectorAligned const &b, VectorAligned const &c, VectorAligned const &d)
- {
- LoadAndSwizzleAligned(a,b,c,d);
- }
-
- FORCEINLINE fltx4 DistToSqr( FourVectors const &pnt )
- {
- fltx4 fl4dX = SubSIMD( pnt.x, x );
- fltx4 fl4dY = SubSIMD( pnt.y, y );
- fltx4 fl4dZ = SubSIMD( pnt.z, z );
- return AddSIMD( MulSIMD( fl4dX, fl4dX), AddSIMD( MulSIMD( fl4dY, fl4dY ), MulSIMD( fl4dZ, fl4dZ ) ) );
-
- }
-
- FORCEINLINE fltx4 TValueOfClosestPointOnLine( FourVectors const &p0, FourVectors const &p1 ) const
- {
- FourVectors lineDelta = p1;
- lineDelta -= p0;
- fltx4 OOlineDirDotlineDir = ReciprocalSIMD( p1 * p1 );
- FourVectors v4OurPnt = *this;
- v4OurPnt -= p0;
- return MulSIMD( OOlineDirDotlineDir, v4OurPnt * lineDelta );
- }
-
- FORCEINLINE fltx4 DistSqrToLineSegment( FourVectors const &p0, FourVectors const &p1 ) const
- {
- FourVectors lineDelta = p1;
- FourVectors v4OurPnt = *this;
- v4OurPnt -= p0;
- lineDelta -= p0;
-
- fltx4 OOlineDirDotlineDir = ReciprocalSIMD( lineDelta * lineDelta );
-
- fltx4 fl4T = MulSIMD( OOlineDirDotlineDir, v4OurPnt * lineDelta );
-
- fl4T = MinSIMD( fl4T, Four_Ones );
- fl4T = MaxSIMD( fl4T, Four_Zeros );
- lineDelta *= fl4T;
- return v4OurPnt.DistToSqr( lineDelta );
- }
-
-};
-
-/// form 4 cross products
-inline FourVectors operator ^(const FourVectors &a, const FourVectors &b)
-{
- FourVectors ret;
- ret.x=SubSIMD(MulSIMD(a.y,b.z),MulSIMD(a.z,b.y));
- ret.y=SubSIMD(MulSIMD(a.z,b.x),MulSIMD(a.x,b.z));
- ret.z=SubSIMD(MulSIMD(a.x,b.y),MulSIMD(a.y,b.x));
- return ret;
-}
-
-/// component-by-componentwise MAX operator
-inline FourVectors maximum(const FourVectors &a, const FourVectors &b)
-{
- FourVectors ret;
- ret.x=MaxSIMD(a.x,b.x);
- ret.y=MaxSIMD(a.y,b.y);
- ret.z=MaxSIMD(a.z,b.z);
- return ret;
-}
-
-/// component-by-componentwise MIN operator
-inline FourVectors minimum(const FourVectors &a, const FourVectors &b)
-{
- FourVectors ret;
- ret.x=MinSIMD(a.x,b.x);
- ret.y=MinSIMD(a.y,b.y);
- ret.z=MinSIMD(a.z,b.z);
- return ret;
-}
-
-/// calculate reflection vector. incident and normal dir assumed normalized
-FORCEINLINE FourVectors VectorReflect( const FourVectors &incident, const FourVectors &normal )
-{
- FourVectors ret = incident;
- fltx4 iDotNx2 = incident * normal;
- iDotNx2 = AddSIMD( iDotNx2, iDotNx2 );
- FourVectors nPart = normal;
- nPart *= iDotNx2;
- ret -= nPart; // i-2(n*i)n
- return ret;
-}
-
-/// calculate slide vector. removes all components of a vector which are perpendicular to a normal vector.
-FORCEINLINE FourVectors VectorSlide( const FourVectors &incident, const FourVectors &normal )
-{
- FourVectors ret = incident;
- fltx4 iDotN = incident * normal;
- FourVectors nPart = normal;
- nPart *= iDotN;
- ret -= nPart; // i-(n*i)n
- return ret;
-}
-
-
-// Assume the given matrix is a rotation, and rotate these vectors by it.
-// If you have a long list of FourVectors structures that you all want
-// to rotate by the same matrix, use FourVectors::RotateManyBy() instead.
-void FourVectors::RotateBy(const matrix3x4_t& matrix)
-{
- // Splat out each of the entries in the matrix to a fltx4. Do this
- // in the order that we will need them, to hide latency. I'm
- // avoiding making an array of them, so that they'll remain in
- // registers.
- fltx4 matSplat00, matSplat01, matSplat02,
- matSplat10, matSplat11, matSplat12,
- matSplat20, matSplat21, matSplat22;
-
- {
- // Load the matrix into local vectors. Sadly, matrix3x4_ts are
- // often unaligned. The w components will be the tranpose row of
- // the matrix, but we don't really care about that.
- fltx4 matCol0 = LoadUnalignedSIMD( matrix[0] );
- fltx4 matCol1 = LoadUnalignedSIMD( matrix[1] );
- fltx4 matCol2 = LoadUnalignedSIMD( matrix[2] );
-
- matSplat00 = SplatXSIMD( matCol0 );
- matSplat01 = SplatYSIMD( matCol0 );
- matSplat02 = SplatZSIMD( matCol0 );
-
- matSplat10 = SplatXSIMD( matCol1 );
- matSplat11 = SplatYSIMD( matCol1 );
- matSplat12 = SplatZSIMD( matCol1 );
-
- matSplat20 = SplatXSIMD( matCol2 );
- matSplat21 = SplatYSIMD( matCol2 );
- matSplat22 = SplatZSIMD( matCol2 );
- }
-
- // Trust in the compiler to schedule these operations correctly:
- fltx4 outX, outY, outZ;
- outX = AddSIMD( AddSIMD( MulSIMD( x, matSplat00 ), MulSIMD( y, matSplat01 ) ), MulSIMD( z, matSplat02 ) );
- outY = AddSIMD( AddSIMD( MulSIMD( x, matSplat10 ), MulSIMD( y, matSplat11 ) ), MulSIMD( z, matSplat12 ) );
- outZ = AddSIMD( AddSIMD( MulSIMD( x, matSplat20 ), MulSIMD( y, matSplat21 ) ), MulSIMD( z, matSplat22 ) );
-
- x = outX;
- y = outY;
- z = outZ;
-}
-
-// Assume the given matrix is a rotation, and rotate these vectors by it.
-// If you have a long list of FourVectors structures that you all want
-// to rotate by the same matrix, use FourVectors::RotateManyBy() instead.
-void FourVectors::TransformBy(const matrix3x4_t& matrix)
-{
- // Splat out each of the entries in the matrix to a fltx4. Do this
- // in the order that we will need them, to hide latency. I'm
- // avoiding making an array of them, so that they'll remain in
- // registers.
- fltx4 matSplat00, matSplat01, matSplat02,
- matSplat10, matSplat11, matSplat12,
- matSplat20, matSplat21, matSplat22;
-
- {
- // Load the matrix into local vectors. Sadly, matrix3x4_ts are
- // often unaligned. The w components will be the tranpose row of
- // the matrix, but we don't really care about that.
- fltx4 matCol0 = LoadUnalignedSIMD( matrix[0] );
- fltx4 matCol1 = LoadUnalignedSIMD( matrix[1] );
- fltx4 matCol2 = LoadUnalignedSIMD( matrix[2] );
-
- matSplat00 = SplatXSIMD( matCol0 );
- matSplat01 = SplatYSIMD( matCol0 );
- matSplat02 = SplatZSIMD( matCol0 );
-
- matSplat10 = SplatXSIMD( matCol1 );
- matSplat11 = SplatYSIMD( matCol1 );
- matSplat12 = SplatZSIMD( matCol1 );
-
- matSplat20 = SplatXSIMD( matCol2 );
- matSplat21 = SplatYSIMD( matCol2 );
- matSplat22 = SplatZSIMD( matCol2 );
- }
-
- // Trust in the compiler to schedule these operations correctly:
- fltx4 outX, outY, outZ;
-
- outX = MaddSIMD( z, matSplat02, AddSIMD( MulSIMD( x, matSplat00 ), MulSIMD( y, matSplat01 ) ) );
- outY = MaddSIMD( z, matSplat12, AddSIMD( MulSIMD( x, matSplat10 ), MulSIMD( y, matSplat11 ) ) );
- outZ = MaddSIMD( z, matSplat22, AddSIMD( MulSIMD( x, matSplat20 ), MulSIMD( y, matSplat21 ) ) );
-
- x = AddSIMD( outX, ReplicateX4( matrix[0][3] ));
- y = AddSIMD( outY, ReplicateX4( matrix[1][3] ));
- z = AddSIMD( outZ, ReplicateX4( matrix[2][3] ));
-}
-
-
-
-/// quick, low quality perlin-style noise() function suitable for real time use.
-/// return value is -1..1. Only reliable around +/- 1 million or so.
-fltx4 NoiseSIMD( const fltx4 & x, const fltx4 & y, const fltx4 & z );
-fltx4 NoiseSIMD( FourVectors const &v );
-
-// vector valued noise direction
-FourVectors DNoiseSIMD( FourVectors const &v );
-
-// vector value "curl" noise function. see http://hyperphysics.phy-astr.gsu.edu/hbase/curl.html
-FourVectors CurlNoiseSIMD( FourVectors const &v );
-
-
-/// calculate the absolute value of a packed single
-inline fltx4 fabs( const fltx4 & x )
-{
- return AndSIMD( x, LoadAlignedSIMD( g_SIMD_clear_signmask ) );
-}
-
-/// negate all four components of a SIMD packed single
-inline fltx4 fnegate( const fltx4 & x )
-{
- return XorSIMD( x, LoadAlignedSIMD( g_SIMD_signmask ) );
-}
-
-
-fltx4 Pow_FixedPoint_Exponent_SIMD( const fltx4 & x, int exponent);
-
-// PowSIMD - raise a SIMD register to a power. This is analogous to the C pow() function, with some
-// restictions: fractional exponents are only handled with 2 bits of precision. Basically,
-// fractions of 0,.25,.5, and .75 are handled. PowSIMD(x,.30) will be the same as PowSIMD(x,.25).
-// negative and fractional powers are handled by the SIMD reciprocal and square root approximation
-// instructions and so are not especially accurate ----Note that this routine does not raise
-// numeric exceptions because it uses SIMD--- This routine is O(log2(exponent)).
-inline fltx4 PowSIMD( const fltx4 & x, float exponent )
-{
- return Pow_FixedPoint_Exponent_SIMD(x,(int) (4.0*exponent));
-}
-
-
-
-// random number generation - generate 4 random numbers quickly.
-
-void SeedRandSIMD(uint32 seed); // seed the random # generator
-fltx4 RandSIMD( int nContext = 0 ); // return 4 numbers in the 0..1 range
-
-// for multithreaded, you need to use these and use the argument form of RandSIMD:
-int GetSIMDRandContext( void );
-void ReleaseSIMDRandContext( int nContext );
-
-FORCEINLINE fltx4 RandSignedSIMD( void ) // -1..1
-{
- return SubSIMD( MulSIMD( Four_Twos, RandSIMD() ), Four_Ones );
-}
-
-
-// SIMD versions of mathlib simplespline functions
-// hermite basis function for smooth interpolation
-// Similar to Gain() above, but very cheap to call
-// value should be between 0 & 1 inclusive
-inline fltx4 SimpleSpline( const fltx4 & value )
-{
- // Arranged to avoid a data dependency between these two MULs:
- fltx4 valueDoubled = MulSIMD( value, Four_Twos );
- fltx4 valueSquared = MulSIMD( value, value );
-
- // Nice little ease-in, ease-out spline-like curve
- return SubSIMD(
- MulSIMD( Four_Threes, valueSquared ),
- MulSIMD( valueDoubled, valueSquared ) );
-}
-
-// remaps a value in [startInterval, startInterval+rangeInterval] from linear to
-// spline using SimpleSpline
-inline fltx4 SimpleSplineRemapValWithDeltas( const fltx4 & val,
- const fltx4 & A, const fltx4 & BMinusA,
- const fltx4 & OneOverBMinusA, const fltx4 & C,
- const fltx4 & DMinusC )
-{
-// if ( A == B )
-// return val >= B ? D : C;
- fltx4 cVal = MulSIMD( SubSIMD( val, A), OneOverBMinusA );
- return AddSIMD( C, MulSIMD( DMinusC, SimpleSpline( cVal ) ) );
-}
-
-inline fltx4 SimpleSplineRemapValWithDeltasClamped( const fltx4 & val,
- const fltx4 & A, const fltx4 & BMinusA,
- const fltx4 & OneOverBMinusA, const fltx4 & C,
- const fltx4 & DMinusC )
-{
-// if ( A == B )
-// return val >= B ? D : C;
- fltx4 cVal = MulSIMD( SubSIMD( val, A), OneOverBMinusA );
- cVal = MinSIMD( Four_Ones, MaxSIMD( Four_Zeros, cVal ) );
- return AddSIMD( C, MulSIMD( DMinusC, SimpleSpline( cVal ) ) );
-}
-
-FORCEINLINE fltx4 FracSIMD( const fltx4 &val )
-{
- fltx4 fl4Abs = fabs( val );
- fltx4 ival = SubSIMD( AddSIMD( fl4Abs, Four_2ToThe23s ), Four_2ToThe23s );
- ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Ones ), ival );
- return XorSIMD( SubSIMD( fl4Abs, ival ), XorSIMD( val, fl4Abs ) ); // restore sign bits
-}
-
-FORCEINLINE fltx4 Mod2SIMD( const fltx4 &val )
-{
- fltx4 fl4Abs = fabs( val );
- fltx4 ival = SubSIMD( AndSIMD( LoadAlignedSIMD( (float *) g_SIMD_lsbmask ), AddSIMD( fl4Abs, Four_2ToThe23s ) ), Four_2ToThe23s );
- ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Twos ), ival );
- return XorSIMD( SubSIMD( fl4Abs, ival ), XorSIMD( val, fl4Abs ) ); // restore sign bits
-}
-
-FORCEINLINE fltx4 Mod2SIMDPositiveInput( const fltx4 &val )
-{
- fltx4 ival = SubSIMD( AndSIMD( LoadAlignedSIMD( g_SIMD_lsbmask ), AddSIMD( val, Four_2ToThe23s ) ), Four_2ToThe23s );
- ival = MaskedAssign( CmpGtSIMD( ival, val ), SubSIMD( ival, Four_Twos ), ival );
- return SubSIMD( val, ival );
-}
-
-
-// approximate sin of an angle, with -1..1 representing the whole sin wave period instead of -pi..pi.
-// no range reduction is done - for values outside of 0..1 you won't like the results
-FORCEINLINE fltx4 _SinEst01SIMD( const fltx4 &val )
-{
- // really rough approximation - x*(4-x*4) - a parabola. s(0) = 0, s(.5) = 1, s(1)=0, smooth in-between.
- // sufficient for simple oscillation.
- return MulSIMD( val, SubSIMD( Four_Fours, MulSIMD( val, Four_Fours ) ) );
-}
-
-FORCEINLINE fltx4 _Sin01SIMD( const fltx4 &val )
-{
- // not a bad approximation : parabola always over-estimates. Squared parabola always
- // underestimates. So lets blend between them: goodsin = badsin + .225*( badsin^2-badsin)
- fltx4 fl4BadEst = MulSIMD( val, SubSIMD( Four_Fours, MulSIMD( val, Four_Fours ) ) );
- return AddSIMD( MulSIMD( Four_Point225s, SubSIMD( MulSIMD( fl4BadEst, fl4BadEst ), fl4BadEst ) ), fl4BadEst );
-}
-
-// full range useable implementations
-FORCEINLINE fltx4 SinEst01SIMD( const fltx4 &val )
-{
- fltx4 fl4Abs = fabs( val );
- fltx4 fl4Reduced2 = Mod2SIMDPositiveInput( fl4Abs );
- fltx4 fl4OddMask = CmpGeSIMD( fl4Reduced2, Four_Ones );
- fltx4 fl4val = SubSIMD( fl4Reduced2, AndSIMD( Four_Ones, fl4OddMask ) );
- fltx4 fl4Sin = _SinEst01SIMD( fl4val );
- fl4Sin = XorSIMD( fl4Sin, AndSIMD( LoadAlignedSIMD( g_SIMD_signmask ), XorSIMD( val, fl4OddMask ) ) );
- return fl4Sin;
-
-}
-
-FORCEINLINE fltx4 Sin01SIMD( const fltx4 &val )
-{
- fltx4 fl4Abs = fabs( val );
- fltx4 fl4Reduced2 = Mod2SIMDPositiveInput( fl4Abs );
- fltx4 fl4OddMask = CmpGeSIMD( fl4Reduced2, Four_Ones );
- fltx4 fl4val = SubSIMD( fl4Reduced2, AndSIMD( Four_Ones, fl4OddMask ) );
- fltx4 fl4Sin = _Sin01SIMD( fl4val );
- fl4Sin = XorSIMD( fl4Sin, AndSIMD( LoadAlignedSIMD( g_SIMD_signmask ), XorSIMD( val, fl4OddMask ) ) );
- return fl4Sin;
-
-}
-
-// Schlick style Bias approximation see graphics gems 4 : bias(t,a)= t/( (1/a-2)*(1-t)+1)
-
-FORCEINLINE fltx4 PreCalcBiasParameter( const fltx4 &bias_parameter )
-{
- // convert perlin-style-bias parameter to the value right for the approximation
- return SubSIMD( ReciprocalSIMD( bias_parameter ), Four_Twos );
-}
-
-FORCEINLINE fltx4 BiasSIMD( const fltx4 &val, const fltx4 &precalc_param )
-{
- // similar to bias function except pass precalced bias value from calling PreCalcBiasParameter.
-
- //!!speed!! use reciprocal est?
- //!!speed!! could save one op by precalcing _2_ values
- return DivSIMD( val, AddSIMD( MulSIMD( precalc_param, SubSIMD( Four_Ones, val ) ), Four_Ones ) );
-}
-
-//-----------------------------------------------------------------------------
-// Box/plane test
-// NOTE: The w component of emins + emaxs must be 1 for this to work
-//-----------------------------------------------------------------------------
-FORCEINLINE int BoxOnPlaneSideSIMD( const fltx4& emins, const fltx4& emaxs, const cplane_t *p, float tolerance = 0.f )
-{
- fltx4 corners[2];
- fltx4 normal = LoadUnalignedSIMD( p->normal.Base() );
- fltx4 dist = ReplicateX4( -p->dist );
- normal = SetWSIMD( normal, dist );
- fltx4 t4 = ReplicateX4( tolerance );
- fltx4 negt4 = ReplicateX4( -tolerance );
- fltx4 cmp = CmpGeSIMD( normal, Four_Zeros );
- corners[0] = MaskedAssign( cmp, emaxs, emins );
- corners[1] = MaskedAssign( cmp, emins, emaxs );
- fltx4 dot1 = Dot4SIMD( normal, corners[0] );
- fltx4 dot2 = Dot4SIMD( normal, corners[1] );
- cmp = CmpGeSIMD( dot1, t4 );
- fltx4 cmp2 = CmpGtSIMD( negt4, dot2 );
- fltx4 result = MaskedAssign( cmp, Four_Ones, Four_Zeros );
- fltx4 result2 = MaskedAssign( cmp2, Four_Twos, Four_Zeros );
- result = AddSIMD( result, result2 );
- intx4 sides;
- ConvertStoreAsIntsSIMD( &sides, result );
- return sides[0];
-}
-
-#endif // _ssemath_h
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose: - defines SIMD "structure of arrays" classes and functions.
+//
+//===========================================================================//
+#ifndef SSEMATH_H
+#define SSEMATH_H
+
+#if defined( _X360 )
+#include <xboxmath.h>
+#else
+#include <xmmintrin.h>
+#endif
+
+#include <mathlib/vector.h>
+#include <mathlib/mathlib.h>
+
+#if defined(GNUC)
+#define USE_STDC_FOR_SIMD 0
+#else
+#define USE_STDC_FOR_SIMD 0
+#endif
+
+#if (!defined(_X360) && (USE_STDC_FOR_SIMD == 0))
+#define _SSE1 1
+#endif
+
+// I thought about defining a class/union for the SIMD packed floats instead of using fltx4,
+// but decided against it because (a) the nature of SIMD code which includes comparisons is to blur
+// the relationship between packed floats and packed integer types and (b) not sure that the
+// compiler would handle generating good code for the intrinsics.
+
+#if USE_STDC_FOR_SIMD
+
+typedef union
+{
+ float m128_f32[4];
+ uint32 m128_u32[4];
+} fltx4;
+
+typedef fltx4 i32x4;
+typedef fltx4 u32x4;
+
+#elif ( defined( _X360 ) )
+
+typedef union
+{
+ // This union allows float/int access (which generally shouldn't be done in inner loops)
+ __vector4 vmx;
+ float m128_f32[4];
+ uint32 m128_u32[4];
+} fltx4_union;
+
+typedef __vector4 fltx4;
+typedef __vector4 i32x4; // a VMX register; just a way of making it explicit that we're doing integer ops.
+typedef __vector4 u32x4; // a VMX register; just a way of making it explicit that we're doing unsigned integer ops.
+
+#else
+
+typedef __m128 fltx4;
+typedef __m128 i32x4;
+typedef __m128 u32x4;
+
+#endif
+
+// The FLTX4 type is a fltx4 used as a parameter to a function.
+// On the 360, the best way to do this is pass-by-copy on the registers.
+// On the PC, the best way is to pass by const reference.
+// The compiler will sometimes, but not always, replace a pass-by-const-ref
+// with a pass-in-reg on the 360; to avoid this confusion, you can
+// explicitly use a FLTX4 as the parameter type.
+#ifdef _X360
+typedef __vector4 FLTX4;
+#else
+typedef const fltx4 & FLTX4;
+#endif
+
+// A 16-byte aligned int32 datastructure
+// (for use when writing out fltx4's as SIGNED
+// ints).
+struct ALIGN16 intx4
+{
+ int32 m_i32[4];
+
+ inline int & operator[](int which)
+ {
+ return m_i32[which];
+ }
+
+ inline const int & operator[](int which) const
+ {
+ return m_i32[which];
+ }
+
+ inline int32 *Base() {
+ return m_i32;
+ }
+
+ inline const int32 *Base() const
+ {
+ return m_i32;
+ }
+
+ inline const bool operator==(const intx4 &other) const
+ {
+ return m_i32[0] == other.m_i32[0] &&
+ m_i32[1] == other.m_i32[1] &&
+ m_i32[2] == other.m_i32[2] &&
+ m_i32[3] == other.m_i32[3] ;
+ }
+} ALIGN16_POST;
+
+
+#if defined( _DEBUG ) && defined( _X360 )
+FORCEINLINE void TestVPUFlags()
+{
+ // Check that the VPU is in the appropriate (Java-compliant) mode (see 3.2.1 in altivec_pem.pdf on xds.xbox.com)
+ __vector4 a;
+ __asm
+ {
+ mfvscr a;
+ }
+ unsigned int * flags = (unsigned int *)&a;
+ unsigned int controlWord = flags[3];
+ Assert(controlWord == 0);
+}
+#else // _DEBUG
+FORCEINLINE void TestVPUFlags() {}
+#endif // _DEBUG
+
+
+// useful constants in SIMD packed float format:
+// (note: some of these aren't stored on the 360,
+// but are manufactured directly in one or two
+// instructions, saving a load and possible L2
+// miss.)
+#ifndef _X360
+extern const fltx4 Four_Zeros; // 0 0 0 0
+extern const fltx4 Four_Ones; // 1 1 1 1
+extern const fltx4 Four_Twos; // 2 2 2 2
+extern const fltx4 Four_Threes; // 3 3 3 3
+extern const fltx4 Four_Fours; // guess.
+extern const fltx4 Four_Point225s; // .225 .225 .225 .225
+extern const fltx4 Four_PointFives; // .5 .5 .5 .5
+extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON
+extern const fltx4 Four_2ToThe21s; // (1<<21)..
+extern const fltx4 Four_2ToThe22s; // (1<<22)..
+extern const fltx4 Four_2ToThe23s; // (1<<23)..
+extern const fltx4 Four_2ToThe24s; // (1<<24)..
+extern const fltx4 Four_Origin; // 0 0 0 1 (origin point, like vr0 on the PS2)
+extern const fltx4 Four_NegativeOnes; // -1 -1 -1 -1
+#else
+#define Four_Zeros XMVectorZero() // 0 0 0 0
+#define Four_Ones XMVectorSplatOne() // 1 1 1 1
+extern const fltx4 Four_Twos; // 2 2 2 2
+extern const fltx4 Four_Threes; // 3 3 3 3
+extern const fltx4 Four_Fours; // guess.
+extern const fltx4 Four_Point225s; // .225 .225 .225 .225
+extern const fltx4 Four_PointFives; // .5 .5 .5 .5
+extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON
+extern const fltx4 Four_2ToThe21s; // (1<<21)..
+extern const fltx4 Four_2ToThe22s; // (1<<22)..
+extern const fltx4 Four_2ToThe23s; // (1<<23)..
+extern const fltx4 Four_2ToThe24s; // (1<<24)..
+extern const fltx4 Four_Origin; // 0 0 0 1 (origin point, like vr0 on the PS2)
+extern const fltx4 Four_NegativeOnes; // -1 -1 -1 -1
+#endif
+extern const fltx4 Four_FLT_MAX; // FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX
+extern const fltx4 Four_Negative_FLT_MAX; // -FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX
+extern const fltx4 g_SIMD_0123; // 0 1 2 3 as float
+
+// external aligned integer constants
+extern const ALIGN16 int32 g_SIMD_clear_signmask[] ALIGN16_POST; // 0x7fffffff x 4
+extern const ALIGN16 int32 g_SIMD_signmask[] ALIGN16_POST; // 0x80000000 x 4
+extern const ALIGN16 int32 g_SIMD_lsbmask[] ALIGN16_POST; // 0xfffffffe x 4
+extern const ALIGN16 int32 g_SIMD_clear_wmask[] ALIGN16_POST; // -1 -1 -1 0
+extern const ALIGN16 int32 g_SIMD_ComponentMask[4][4] ALIGN16_POST; // [0xFFFFFFFF 0 0 0], [0 0xFFFFFFFF 0 0], [0 0 0xFFFFFFFF 0], [0 0 0 0xFFFFFFFF]
+extern const ALIGN16 int32 g_SIMD_AllOnesMask[] ALIGN16_POST; // ~0,~0,~0,~0
+extern const ALIGN16 int32 g_SIMD_Low16BitsMask[] ALIGN16_POST; // 0xffff x 4
+
+// this mask is used for skipping the tail of things. If you have N elements in an array, and wish
+// to mask out the tail, g_SIMD_SkipTailMask[N & 3] what you want to use for the last iteration.
+extern const int32 ALIGN16 g_SIMD_SkipTailMask[4][4] ALIGN16_POST;
+
+// Define prefetch macros.
+// The characteristics of cache and prefetch are completely
+// different between the different platforms, so you DO NOT
+// want to just define one macro that maps to every platform
+// intrinsic under the hood -- you need to prefetch at different
+// intervals between x86 and PPC, for example, and that is
+// a higher level code change.
+// On the other hand, I'm tired of typing #ifdef _X360
+// all over the place, so this is just a nop on Intel, PS3.
+#ifdef _X360
+#define PREFETCH360(address, offset) __dcbt(offset,address)
+#else
+#define PREFETCH360(x,y) // nothing
+#endif
+
+#if USE_STDC_FOR_SIMD
+
+//---------------------------------------------------------------------
+// Standard C (fallback/Linux) implementation (only there for compat - slow)
+//---------------------------------------------------------------------
+
+FORCEINLINE float SubFloat( const fltx4 & a, int idx )
+{
+ return a.m128_f32[ idx ];
+}
+
+FORCEINLINE float & SubFloat( fltx4 & a, int idx )
+{
+ return a.m128_f32[idx];
+}
+
+FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )
+{
+ return a.m128_u32[idx];
+}
+
+FORCEINLINE uint32 & SubInt( fltx4 & a, int idx )
+{
+ return a.m128_u32[idx];
+}
+
+// Return one in the fastest way -- on the x360, faster even than loading.
+FORCEINLINE fltx4 LoadZeroSIMD( void )
+{
+ return Four_Zeros;
+}
+
+// Return one in the fastest way -- on the x360, faster even than loading.
+FORCEINLINE fltx4 LoadOneSIMD( void )
+{
+ return Four_Ones;
+}
+
+FORCEINLINE fltx4 SplatXSIMD( const fltx4 & a )
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = SubFloat( a, 0 );
+ SubFloat( retVal, 1 ) = SubFloat( a, 0 );
+ SubFloat( retVal, 2 ) = SubFloat( a, 0 );
+ SubFloat( retVal, 3 ) = SubFloat( a, 0 );
+ return retVal;
+}
+
+FORCEINLINE fltx4 SplatYSIMD( fltx4 a )
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = SubFloat( a, 1 );
+ SubFloat( retVal, 1 ) = SubFloat( a, 1 );
+ SubFloat( retVal, 2 ) = SubFloat( a, 1 );
+ SubFloat( retVal, 3 ) = SubFloat( a, 1 );
+ return retVal;
+}
+
+FORCEINLINE fltx4 SplatZSIMD( fltx4 a )
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = SubFloat( a, 2 );
+ SubFloat( retVal, 1 ) = SubFloat( a, 2 );
+ SubFloat( retVal, 2 ) = SubFloat( a, 2 );
+ SubFloat( retVal, 3 ) = SubFloat( a, 2 );
+ return retVal;
+}
+
+FORCEINLINE fltx4 SplatWSIMD( fltx4 a )
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = SubFloat( a, 3 );
+ SubFloat( retVal, 1 ) = SubFloat( a, 3 );
+ SubFloat( retVal, 2 ) = SubFloat( a, 3 );
+ SubFloat( retVal, 3 ) = SubFloat( a, 3 );
+ return retVal;
+}
+
+FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x )
+{
+ fltx4 result = a;
+ SubFloat( result, 0 ) = SubFloat( x, 0 );
+ return result;
+}
+
+FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y )
+{
+ fltx4 result = a;
+ SubFloat( result, 1 ) = SubFloat( y, 1 );
+ return result;
+}
+
+FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z )
+{
+ fltx4 result = a;
+ SubFloat( result, 2 ) = SubFloat( z, 2 );
+ return result;
+}
+
+FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w )
+{
+ fltx4 result = a;
+ SubFloat( result, 3 ) = SubFloat( w, 3 );
+ return result;
+}
+
+FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue )
+{
+ fltx4 result = a;
+ SubFloat( result, nComponent ) = flValue;
+ return result;
+}
+
+// a b c d -> b c d a
+FORCEINLINE fltx4 RotateLeft( const fltx4 & a )
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = SubFloat( a, 1 );
+ SubFloat( retVal, 1 ) = SubFloat( a, 2 );
+ SubFloat( retVal, 2 ) = SubFloat( a, 3 );
+ SubFloat( retVal, 3 ) = SubFloat( a, 0 );
+ return retVal;
+}
+
+// a b c d -> c d a b
+FORCEINLINE fltx4 RotateLeft2( const fltx4 & a )
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = SubFloat( a, 2 );
+ SubFloat( retVal, 1 ) = SubFloat( a, 3 );
+ SubFloat( retVal, 2 ) = SubFloat( a, 0 );
+ SubFloat( retVal, 3 ) = SubFloat( a, 1 );
+ return retVal;
+}
+
+#define BINOP(op) \
+ fltx4 retVal; \
+ SubFloat( retVal, 0 ) = ( SubFloat( a, 0 ) op SubFloat( b, 0 ) ); \
+ SubFloat( retVal, 1 ) = ( SubFloat( a, 1 ) op SubFloat( b, 1 ) ); \
+ SubFloat( retVal, 2 ) = ( SubFloat( a, 2 ) op SubFloat( b, 2 ) ); \
+ SubFloat( retVal, 3 ) = ( SubFloat( a, 3 ) op SubFloat( b, 3 ) ); \
+ return retVal;
+
+#define IBINOP(op) \
+ fltx4 retVal; \
+ SubInt( retVal, 0 ) = ( SubInt( a, 0 ) op SubInt ( b, 0 ) ); \
+ SubInt( retVal, 1 ) = ( SubInt( a, 1 ) op SubInt ( b, 1 ) ); \
+ SubInt( retVal, 2 ) = ( SubInt( a, 2 ) op SubInt ( b, 2 ) ); \
+ SubInt( retVal, 3 ) = ( SubInt( a, 3 ) op SubInt ( b, 3 ) ); \
+ return retVal;
+
+FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b )
+{
+ BINOP(+);
+}
+
+FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b
+{
+ BINOP(-);
+};
+
+FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b
+{
+ BINOP(*);
+}
+
+FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b
+{
+ BINOP(/);
+}
+
+
+FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c
+{
+ return AddSIMD( MulSIMD(a,b), c );
+}
+
+FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b
+{
+ return SubSIMD( c, MulSIMD(a,b) );
+};
+
+
+FORCEINLINE fltx4 SinSIMD( const fltx4 &radians )
+{
+ fltx4 result;
+ SubFloat( result, 0 ) = sin( SubFloat( radians, 0 ) );
+ SubFloat( result, 1 ) = sin( SubFloat( radians, 1 ) );
+ SubFloat( result, 2 ) = sin( SubFloat( radians, 2 ) );
+ SubFloat( result, 3 ) = sin( SubFloat( radians, 3 ) );
+ return result;
+}
+
+FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians )
+{
+ SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) );
+ SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) );
+ SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) );
+}
+
+FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians )
+{
+ SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) );
+ SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) );
+ SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) );
+ SinCos( SubFloat( radians, 3 ), &SubFloat( sine, 3 ), &SubFloat( cosine, 3 ) );
+}
+
+FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine )
+{
+ fltx4 result;
+ SubFloat( result, 0 ) = asin( SubFloat( sine, 0 ) );
+ SubFloat( result, 1 ) = asin( SubFloat( sine, 1 ) );
+ SubFloat( result, 2 ) = asin( SubFloat( sine, 2 ) );
+ SubFloat( result, 3 ) = asin( SubFloat( sine, 3 ) );
+ return result;
+}
+
+FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs )
+{
+ fltx4 result;
+ SubFloat( result, 0 ) = acos( SubFloat( cs, 0 ) );
+ SubFloat( result, 1 ) = acos( SubFloat( cs, 1 ) );
+ SubFloat( result, 2 ) = acos( SubFloat( cs, 2 ) );
+ SubFloat( result, 3 ) = acos( SubFloat( cs, 3 ) );
+ return result;
+}
+
+// tan^1(a/b) .. ie, pass sin in as a and cos in as b
+FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b )
+{
+ fltx4 result;
+ SubFloat( result, 0 ) = atan2( SubFloat( a, 0 ), SubFloat( b, 0 ) );
+ SubFloat( result, 1 ) = atan2( SubFloat( a, 1 ), SubFloat( b, 1 ) );
+ SubFloat( result, 2 ) = atan2( SubFloat( a, 2 ), SubFloat( b, 2 ) );
+ SubFloat( result, 3 ) = atan2( SubFloat( a, 3 ), SubFloat( b, 3 ) );
+ return result;
+}
+
+FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b)
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = max( SubFloat( a, 0 ), SubFloat( b, 0 ) );
+ SubFloat( retVal, 1 ) = max( SubFloat( a, 1 ), SubFloat( b, 1 ) );
+ SubFloat( retVal, 2 ) = max( SubFloat( a, 2 ), SubFloat( b, 2 ) );
+ SubFloat( retVal, 3 ) = max( SubFloat( a, 3 ), SubFloat( b, 3 ) );
+ return retVal;
+}
+
+FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b)
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = min( SubFloat( a, 0 ), SubFloat( b, 0 ) );
+ SubFloat( retVal, 1 ) = min( SubFloat( a, 1 ), SubFloat( b, 1 ) );
+ SubFloat( retVal, 2 ) = min( SubFloat( a, 2 ), SubFloat( b, 2 ) );
+ SubFloat( retVal, 3 ) = min( SubFloat( a, 3 ), SubFloat( b, 3 ) );
+ return retVal;
+}
+
+FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b
+{
+ IBINOP(&);
+}
+
+FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b
+{
+ fltx4 retVal;
+ SubInt( retVal, 0 ) = ~SubInt( a, 0 ) & SubInt( b, 0 );
+ SubInt( retVal, 1 ) = ~SubInt( a, 1 ) & SubInt( b, 1 );
+ SubInt( retVal, 2 ) = ~SubInt( a, 2 ) & SubInt( b, 2 );
+ SubInt( retVal, 3 ) = ~SubInt( a, 3 ) & SubInt( b, 3 );
+ return retVal;
+}
+
+FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b
+{
+ IBINOP(^);
+}
+
+FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b
+{
+ IBINOP(|);
+}
+
+FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a
+{
+ fltx4 retval;
+ SubFloat( retval, 0 ) = -SubFloat( a, 0 );
+ SubFloat( retval, 1 ) = -SubFloat( a, 1 );
+ SubFloat( retval, 2 ) = -SubFloat( a, 2 );
+ SubFloat( retval, 3 ) = -SubFloat( a, 3 );
+
+ return retval;
+}
+
+FORCEINLINE bool IsAllZeros( const fltx4 & a ) // all floats of a zero?
+{
+ return ( SubFloat( a, 0 ) == 0.0 ) &&
+ ( SubFloat( a, 1 ) == 0.0 ) &&
+ ( SubFloat( a, 2 ) == 0.0 ) &&
+ ( SubFloat( a, 3 ) == 0.0 ) ;
+}
+
+
+// for branching when a.xyzw > b.xyzw
+FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b )
+{
+ return SubFloat(a,0) > SubFloat(b,0) &&
+ SubFloat(a,1) > SubFloat(b,1) &&
+ SubFloat(a,2) > SubFloat(b,2) &&
+ SubFloat(a,3) > SubFloat(b,3);
+}
+
+// for branching when a.xyzw >= b.xyzw
+FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b )
+{
+ return SubFloat(a,0) >= SubFloat(b,0) &&
+ SubFloat(a,1) >= SubFloat(b,1) &&
+ SubFloat(a,2) >= SubFloat(b,2) &&
+ SubFloat(a,3) >= SubFloat(b,3);
+}
+
+// For branching if all a.xyzw == b.xyzw
+FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b )
+{
+ return SubFloat(a,0) == SubFloat(b,0) &&
+ SubFloat(a,1) == SubFloat(b,1) &&
+ SubFloat(a,2) == SubFloat(b,2) &&
+ SubFloat(a,3) == SubFloat(b,3);
+}
+
+FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set
+{
+ int nRet = 0;
+
+ nRet |= ( SubInt( a, 0 ) & 0x80000000 ) >> 31; // sign(x) -> bit 0
+ nRet |= ( SubInt( a, 1 ) & 0x80000000 ) >> 30; // sign(y) -> bit 1
+ nRet |= ( SubInt( a, 2 ) & 0x80000000 ) >> 29; // sign(z) -> bit 2
+ nRet |= ( SubInt( a, 3 ) & 0x80000000 ) >> 28; // sign(w) -> bit 3
+
+ return nRet;
+}
+
+FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0)
+{
+ return (0 != TestSignSIMD( a ));
+}
+
+FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0
+{
+ fltx4 retVal;
+ SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) == SubFloat( b, 0 )) ? ~0 : 0;
+ SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) == SubFloat( b, 1 )) ? ~0 : 0;
+ SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) == SubFloat( b, 2 )) ? ~0 : 0;
+ SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) == SubFloat( b, 3 )) ? ~0 : 0;
+ return retVal;
+}
+
+FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0
+{
+ fltx4 retVal;
+ SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) > SubFloat( b, 0 )) ? ~0 : 0;
+ SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) > SubFloat( b, 1 )) ? ~0 : 0;
+ SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) > SubFloat( b, 2 )) ? ~0 : 0;
+ SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) > SubFloat( b, 3 )) ? ~0 : 0;
+ return retVal;
+}
+
+FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0
+{
+ fltx4 retVal;
+ SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) >= SubFloat( b, 0 )) ? ~0 : 0;
+ SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) >= SubFloat( b, 1 )) ? ~0 : 0;
+ SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) >= SubFloat( b, 2 )) ? ~0 : 0;
+ SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) >= SubFloat( b, 3 )) ? ~0 : 0;
+ return retVal;
+}
+
+FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a<b) ? ~0:0
+{
+ fltx4 retVal;
+ SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) < SubFloat( b, 0 )) ? ~0 : 0;
+ SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) < SubFloat( b, 1 )) ? ~0 : 0;
+ SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) < SubFloat( b, 2 )) ? ~0 : 0;
+ SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) < SubFloat( b, 3 )) ? ~0 : 0;
+ return retVal;
+}
+
+FORCEINLINE fltx4 CmpLeSIMD( const fltx4 & a, const fltx4 & b ) // (a<=b) ? ~0:0
+{
+ fltx4 retVal;
+ SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) <= SubFloat( b, 0 )) ? ~0 : 0;
+ SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) <= SubFloat( b, 1 )) ? ~0 : 0;
+ SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) <= SubFloat( b, 2 )) ? ~0 : 0;
+ SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) <= SubFloat( b, 3 )) ? ~0 : 0;
+ return retVal;
+}
+
+FORCEINLINE fltx4 CmpInBoundsSIMD( const fltx4 & a, const fltx4 & b ) // (a <= b && a >= -b) ? ~0 : 0
+{
+ fltx4 retVal;
+ SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) <= SubFloat( b, 0 ) && SubFloat( a, 0 ) >= -SubFloat( b, 0 ) ) ? ~0 : 0;
+ SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) <= SubFloat( b, 1 ) && SubFloat( a, 1 ) >= -SubFloat( b, 1 ) ) ? ~0 : 0;
+ SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) <= SubFloat( b, 2 ) && SubFloat( a, 2 ) >= -SubFloat( b, 2 ) ) ? ~0 : 0;
+ SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) <= SubFloat( b, 3 ) && SubFloat( a, 3 ) >= -SubFloat( b, 3 ) ) ? ~0 : 0;
+ return retVal;
+}
+
+
+FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue )
+{
+ return OrSIMD(
+ AndSIMD( ReplacementMask, NewValue ),
+ AndNotSIMD( ReplacementMask, OldValue ) );
+}
+
+FORCEINLINE fltx4 ReplicateX4( float flValue ) // a,a,a,a
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = flValue;
+ SubFloat( retVal, 1 ) = flValue;
+ SubFloat( retVal, 2 ) = flValue;
+ SubFloat( retVal, 3 ) = flValue;
+ return retVal;
+}
+
+/// replicate a single 32 bit integer value to all 4 components of an m128
+FORCEINLINE fltx4 ReplicateIX4( int nValue )
+{
+ fltx4 retVal;
+ SubInt( retVal, 0 ) = nValue;
+ SubInt( retVal, 1 ) = nValue;
+ SubInt( retVal, 2 ) = nValue;
+ SubInt( retVal, 3 ) = nValue;
+ return retVal;
+
+}
+
+// Round towards positive infinity
+FORCEINLINE fltx4 CeilSIMD( const fltx4 &a )
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = ceil( SubFloat( a, 0 ) );
+ SubFloat( retVal, 1 ) = ceil( SubFloat( a, 1 ) );
+ SubFloat( retVal, 2 ) = ceil( SubFloat( a, 2 ) );
+ SubFloat( retVal, 3 ) = ceil( SubFloat( a, 3 ) );
+ return retVal;
+
+}
+
+// Round towards negative infinity
+FORCEINLINE fltx4 FloorSIMD( const fltx4 &a )
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = floor( SubFloat( a, 0 ) );
+ SubFloat( retVal, 1 ) = floor( SubFloat( a, 1 ) );
+ SubFloat( retVal, 2 ) = floor( SubFloat( a, 2 ) );
+ SubFloat( retVal, 3 ) = floor( SubFloat( a, 3 ) );
+ return retVal;
+
+}
+
+FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = sqrt( SubFloat( a, 0 ) );
+ SubFloat( retVal, 1 ) = sqrt( SubFloat( a, 1 ) );
+ SubFloat( retVal, 2 ) = sqrt( SubFloat( a, 2 ) );
+ SubFloat( retVal, 3 ) = sqrt( SubFloat( a, 3 ) );
+ return retVal;
+}
+
+FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a)
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = sqrt( SubFloat( a, 0 ) );
+ SubFloat( retVal, 1 ) = sqrt( SubFloat( a, 1 ) );
+ SubFloat( retVal, 2 ) = sqrt( SubFloat( a, 2 ) );
+ SubFloat( retVal, 3 ) = sqrt( SubFloat( a, 3 ) );
+ return retVal;
+}
+
+FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) );
+ SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) );
+ SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) );
+ SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) );
+ return retVal;
+}
+
+FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a )
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) != 0.0f ? SubFloat( a, 0 ) : FLT_EPSILON );
+ SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) != 0.0f ? SubFloat( a, 1 ) : FLT_EPSILON );
+ SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) != 0.0f ? SubFloat( a, 2 ) : FLT_EPSILON );
+ SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) != 0.0f ? SubFloat( a, 3 ) : FLT_EPSILON );
+ return retVal;
+}
+
+FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a)
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) );
+ SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) );
+ SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) );
+ SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) );
+ return retVal;
+}
+
+FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = 1.0 / SubFloat( a, 0 );
+ SubFloat( retVal, 1 ) = 1.0 / SubFloat( a, 1 );
+ SubFloat( retVal, 2 ) = 1.0 / SubFloat( a, 2 );
+ SubFloat( retVal, 3 ) = 1.0 / SubFloat( a, 3 );
+ return retVal;
+}
+
+FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = 1.0 / SubFloat( a, 0 );
+ SubFloat( retVal, 1 ) = 1.0 / SubFloat( a, 1 );
+ SubFloat( retVal, 2 ) = 1.0 / SubFloat( a, 2 );
+ SubFloat( retVal, 3 ) = 1.0 / SubFloat( a, 3 );
+ return retVal;
+}
+
+/// 1/x for all 4 values.
+/// 1/0 will result in a big but NOT infinite result
+FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a )
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = 1.0 / (SubFloat( a, 0 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 0 ));
+ SubFloat( retVal, 1 ) = 1.0 / (SubFloat( a, 1 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 1 ));
+ SubFloat( retVal, 2 ) = 1.0 / (SubFloat( a, 2 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 2 ));
+ SubFloat( retVal, 3 ) = 1.0 / (SubFloat( a, 3 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 3 ));
+ return retVal;
+}
+
+FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a )
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = 1.0 / (SubFloat( a, 0 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 0 ));
+ SubFloat( retVal, 1 ) = 1.0 / (SubFloat( a, 1 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 1 ));
+ SubFloat( retVal, 2 ) = 1.0 / (SubFloat( a, 2 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 2 ));
+ SubFloat( retVal, 3 ) = 1.0 / (SubFloat( a, 3 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 3 ));
+ return retVal;
+}
+
+// 2^x for all values (the antilog)
+FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower )
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = powf( 2, SubFloat(toPower, 0) );
+ SubFloat( retVal, 1 ) = powf( 2, SubFloat(toPower, 1) );
+ SubFloat( retVal, 2 ) = powf( 2, SubFloat(toPower, 2) );
+ SubFloat( retVal, 3 ) = powf( 2, SubFloat(toPower, 3) );
+
+ return retVal;
+}
+
+FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b )
+{
+ float flDot = SubFloat( a, 0 ) * SubFloat( b, 0 ) +
+ SubFloat( a, 1 ) * SubFloat( b, 1 ) +
+ SubFloat( a, 2 ) * SubFloat( b, 2 );
+ return ReplicateX4( flDot );
+}
+
+FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b )
+{
+ float flDot = SubFloat( a, 0 ) * SubFloat( b, 0 ) +
+ SubFloat( a, 1 ) * SubFloat( b, 1 ) +
+ SubFloat( a, 2 ) * SubFloat( b, 2 ) +
+ SubFloat( a, 3 ) * SubFloat( b, 3 );
+ return ReplicateX4( flDot );
+}
+
+// Clamps the components of a vector to a specified minimum and maximum range.
+FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max)
+{
+ return MaxSIMD( min, MinSIMD( max, in ) );
+}
+
+// Squelch the w component of a vector to +0.0.
+// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy)
+FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a )
+{
+ fltx4 retval;
+ retval = a;
+ SubFloat( retval, 0 ) = 0;
+ return retval;
+}
+
+FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD )
+{
+ return *( reinterpret_cast< const fltx4 *> ( pSIMD ) );
+}
+
+FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD )
+{
+ return *( reinterpret_cast< const fltx4 *> ( pSIMD ) );
+}
+
+FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD )
+{
+ return *( reinterpret_cast< const fltx4 *> ( pSIMD ) );
+}
+
+// for the transitional class -- load a 3-by VectorAligned and squash its w component
+FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD )
+{
+ fltx4 retval = LoadAlignedSIMD(pSIMD.Base());
+ // squelch w
+ SubInt( retval, 3 ) = 0;
+ return retval;
+}
+
+FORCEINLINE void StoreAlignedSIMD( float *pSIMD, const fltx4 & a )
+{
+ *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a;
+}
+
+FORCEINLINE void StoreUnalignedSIMD( float *pSIMD, const fltx4 & a )
+{
+ *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a;
+}
+
+FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a )
+{
+ *pSIMD = SubFloat(a, 0);
+ *(pSIMD+1) = SubFloat(a, 1);
+ *(pSIMD+2) = SubFloat(a, 2);
+}
+
+// strongly typed -- syntactic castor oil used for typechecking as we transition to SIMD
+FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a )
+{
+ StoreAlignedSIMD(pSIMD->Base(),a);
+}
+
+FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w )
+{
+#define SWAP_FLOATS( _a_, _ia_, _b_, _ib_ ) { float tmp = SubFloat( _a_, _ia_ ); SubFloat( _a_, _ia_ ) = SubFloat( _b_, _ib_ ); SubFloat( _b_, _ib_ ) = tmp; }
+ SWAP_FLOATS( x, 1, y, 0 );
+ SWAP_FLOATS( x, 2, z, 0 );
+ SWAP_FLOATS( x, 3, w, 0 );
+ SWAP_FLOATS( y, 2, z, 1 );
+ SWAP_FLOATS( y, 3, w, 1 );
+ SWAP_FLOATS( z, 3, w, 2 );
+}
+
+// find the lowest component of a.x, a.y, a.z,
+// and replicate it to the whole return value.
+FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a )
+{
+ float lowest = min( min( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2));
+ return ReplicateX4(lowest);
+}
+
+// find the highest component of a.x, a.y, a.z,
+// and replicate it to the whole return value.
+FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 & a )
+{
+ float highest = max( max( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2));
+ return ReplicateX4(highest);
+}
+
+// Fixed-point conversion and save as SIGNED INTS.
+// pDest->x = Int (vSrc.x)
+// note: some architectures have means of doing
+// fixed point conversion when the fix depth is
+// specified as an immediate.. but there is no way
+// to guarantee an immediate as a parameter to function
+// like this.
+FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc)
+{
+ (*pDest)[0] = SubFloat(vSrc, 0);
+ (*pDest)[1] = SubFloat(vSrc, 1);
+ (*pDest)[2] = SubFloat(vSrc, 2);
+ (*pDest)[3] = SubFloat(vSrc, 3);
+}
+
+// ------------------------------------
+// INTEGER SIMD OPERATIONS.
+// ------------------------------------
+// splat all components of a vector to a signed immediate int number.
+FORCEINLINE fltx4 IntSetImmediateSIMD( int nValue )
+{
+ fltx4 retval;
+ SubInt( retval, 0 ) = SubInt( retval, 1 ) = SubInt( retval, 2 ) = SubInt( retval, 3) = nValue;
+ return retval;
+}
+
+// Load 4 aligned words into a SIMD register
+FORCEINLINE i32x4 LoadAlignedIntSIMD(const void * RESTRICT pSIMD)
+{
+ return *( reinterpret_cast< const i32x4 *> ( pSIMD ) );
+}
+
+// Load 4 unaligned words into a SIMD register
+FORCEINLINE i32x4 LoadUnalignedIntSIMD( const void * RESTRICT pSIMD)
+{
+ return *( reinterpret_cast< const i32x4 *> ( pSIMD ) );
+}
+
+// save into four words, 16-byte aligned
+FORCEINLINE void StoreAlignedIntSIMD( int32 *pSIMD, const fltx4 & a )
+{
+ *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a;
+}
+
+FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a )
+{
+ *( reinterpret_cast< i32x4 *> ( pSIMD.Base() ) ) = a;
+}
+
+FORCEINLINE void StoreUnalignedIntSIMD( int32 *pSIMD, const fltx4 & a )
+{
+ *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a;
+}
+
+// Take a fltx4 containing fixed-point uints and
+// return them as single precision floats. No
+// fixed point conversion is done.
+FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const u32x4 &vSrcA )
+{
+ Assert(0); /* pc has no such operation */
+ fltx4 retval;
+ SubFloat( retval, 0 ) = ( (float) SubInt( retval, 0 ) );
+ SubFloat( retval, 1 ) = ( (float) SubInt( retval, 1 ) );
+ SubFloat( retval, 2 ) = ( (float) SubInt( retval, 2 ) );
+ SubFloat( retval, 3 ) = ( (float) SubInt( retval, 3 ) );
+ return retval;
+}
+
+
+#if 0 /* pc has no such op */
+// Take a fltx4 containing fixed-point sints and
+// return them as single precision floats. No
+// fixed point conversion is done.
+FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA )
+{
+ fltx4 retval;
+ SubFloat( retval, 0 ) = ( (float) (reinterpret_cast<int32 *>(&vSrcA.m128_s32[0])) );
+ SubFloat( retval, 1 ) = ( (float) (reinterpret_cast<int32 *>(&vSrcA.m128_s32[1])) );
+ SubFloat( retval, 2 ) = ( (float) (reinterpret_cast<int32 *>(&vSrcA.m128_s32[2])) );
+ SubFloat( retval, 3 ) = ( (float) (reinterpret_cast<int32 *>(&vSrcA.m128_s32[3])) );
+ return retval;
+}
+
+
+/*
+ works on fltx4's as if they are four uints.
+ the first parameter contains the words to be shifted,
+ the second contains the amount to shift by AS INTS
+
+ for i = 0 to 3
+ shift = vSrcB_i*32:(i*32)+4
+ vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift
+*/
+FORCEINLINE i32x4 IntShiftLeftWordSIMD(const i32x4 &vSrcA, const i32x4 &vSrcB)
+{
+ i32x4 retval;
+ SubInt(retval, 0) = SubInt(vSrcA, 0) << SubInt(vSrcB, 0);
+ SubInt(retval, 1) = SubInt(vSrcA, 1) << SubInt(vSrcB, 1);
+ SubInt(retval, 2) = SubInt(vSrcA, 2) << SubInt(vSrcB, 2);
+ SubInt(retval, 3) = SubInt(vSrcA, 3) << SubInt(vSrcB, 3);
+
+
+ return retval;
+}
+#endif
+
+#elif ( defined( _X360 ) )
+
+//---------------------------------------------------------------------
+// X360 implementation
+//---------------------------------------------------------------------
+
+FORCEINLINE float & FloatSIMD( fltx4 & a, int idx )
+{
+ fltx4_union & a_union = (fltx4_union &)a;
+ return a_union.m128_f32[idx];
+}
+
+FORCEINLINE unsigned int & UIntSIMD( fltx4 & a, int idx )
+{
+ fltx4_union & a_union = (fltx4_union &)a;
+ return a_union.m128_u32[idx];
+}
+
+FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b )
+{
+ return __vaddfp( a, b );
+}
+
+FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b
+{
+ return __vsubfp( a, b );
+}
+
+FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b
+{
+ return __vmulfp( a, b );
+}
+
+FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c
+{
+ return __vmaddfp( a, b, c );
+}
+
+FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b
+{
+ return __vnmsubfp( a, b, c );
+};
+
+FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b )
+{
+ return __vmsum3fp( a, b );
+}
+
+FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b )
+{
+ return __vmsum4fp( a, b );
+}
+
+FORCEINLINE fltx4 SinSIMD( const fltx4 &radians )
+{
+ return XMVectorSin( radians );
+}
+
+FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians )
+{
+ XMVectorSinCos( &sine, &cosine, radians );
+}
+
+FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians )
+{
+ XMVectorSinCos( &sine, &cosine, radians );
+}
+
+FORCEINLINE void CosSIMD( fltx4 &cosine, const fltx4 &radians )
+{
+ cosine = XMVectorCos( radians );
+}
+
+FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine )
+{
+ return XMVectorASin( sine );
+}
+
+FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs )
+{
+ return XMVectorACos( cs );
+}
+
+// tan^1(a/b) .. ie, pass sin in as a and cos in as b
+FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b )
+{
+ return XMVectorATan2( a, b );
+}
+
+// DivSIMD defined further down, since it uses ReciprocalSIMD
+
+FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b)
+{
+ return __vmaxfp( a, b );
+}
+
+FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b)
+{
+ return __vminfp( a, b );
+}
+
+FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b
+{
+ return __vand( a, b );
+}
+
+FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b
+{
+ // NOTE: a and b are swapped in the call: SSE complements the first argument, VMX the second
+ return __vandc( b, a );
+}
+
+FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b
+{
+ return __vxor( a, b );
+}
+
+FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b
+{
+ return __vor( a, b );
+}
+
+FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a
+{
+ return XMVectorNegate(a);
+}
+
+FORCEINLINE bool IsAllZeros( const fltx4 & a ) // all floats of a zero?
+{
+ unsigned int equalFlags = 0;
+ __vcmpeqfpR( a, Four_Zeros, &equalFlags );
+ return XMComparisonAllTrue( equalFlags );
+}
+
+FORCEINLINE bool IsAnyZeros( const fltx4 & a ) // any floats are zero?
+{
+ unsigned int conditionregister;
+ XMVectorEqualR(&conditionregister, a, XMVectorZero());
+ return XMComparisonAnyTrue(conditionregister);
+}
+
+FORCEINLINE bool IsAnyXYZZero( const fltx4 &a ) // are any of x,y,z zero?
+{
+ // copy a's x component into w, in case w was zero.
+ fltx4 temp = __vrlimi(a, a, 1, 1);
+ unsigned int conditionregister;
+ XMVectorEqualR(&conditionregister, temp, XMVectorZero());
+ return XMComparisonAnyTrue(conditionregister);
+}
+
+// for branching when a.xyzw > b.xyzw
+FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b )
+{
+ unsigned int cr;
+ XMVectorGreaterR(&cr,a,b);
+ return XMComparisonAllTrue(cr);
+}
+
+// for branching when a.xyzw >= b.xyzw
+FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b )
+{
+ unsigned int cr;
+ XMVectorGreaterOrEqualR(&cr,a,b);
+ return XMComparisonAllTrue(cr);
+}
+
+// For branching if all a.xyzw == b.xyzw
+FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b )
+{
+ unsigned int cr;
+ XMVectorEqualR(&cr,a,b);
+ return XMComparisonAllTrue(cr);
+}
+
+
+FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set
+{
+ // NOTE: this maps to SSE way better than it does to VMX (most code uses IsAnyNegative(), though)
+ int nRet = 0;
+
+ const fltx4_union & a_union = (const fltx4_union &)a;
+ nRet |= ( a_union.m128_u32[0] & 0x80000000 ) >> 31; // sign(x) -> bit 0
+ nRet |= ( a_union.m128_u32[1] & 0x80000000 ) >> 30; // sign(y) -> bit 1
+ nRet |= ( a_union.m128_u32[2] & 0x80000000 ) >> 29; // sign(z) -> bit 2
+ nRet |= ( a_union.m128_u32[3] & 0x80000000 ) >> 28; // sign(w) -> bit 3
+
+ return nRet;
+}
+
+// Squelch the w component of a vector to +0.0.
+// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy)
+FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a )
+{
+ return __vrlimi( a, __vzero(), 1, 0 );
+}
+
+FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0)
+{
+ // NOTE: this tests the top bits of each vector element using integer math
+ // (so it ignores NaNs - it will return true for "-NaN")
+ unsigned int equalFlags = 0;
+ fltx4 signMask = __vspltisw( -1 ); // 0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF (low order 5 bits of each element = 31)
+ signMask = __vslw( signMask, signMask ); // 0x80000000 0x80000000 0x80000000 0x80000000
+ __vcmpequwR( Four_Zeros, __vand( signMask, a ), &equalFlags );
+ return !XMComparisonAllTrue( equalFlags );
+}
+
+FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0
+{
+ return __vcmpeqfp( a, b );
+}
+
+
+FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0
+{
+ return __vcmpgtfp( a, b );
+}
+
+FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0
+{
+ return __vcmpgefp( a, b );
+}
+
+FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a<b) ? ~0:0
+{
+ return __vcmpgtfp( b, a );
+}
+
+FORCEINLINE fltx4 CmpLeSIMD( const fltx4 & a, const fltx4 & b ) // (a<=b) ? ~0:0
+{
+ return __vcmpgefp( b, a );
+}
+
+FORCEINLINE fltx4 CmpInBoundsSIMD( const fltx4 & a, const fltx4 & b ) // (a <= b && a >= -b) ? ~0 : 0
+{
+ return XMVectorInBounds( a, b );
+}
+
+// returned[i] = ReplacementMask[i] == 0 ? OldValue : NewValue
+FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue )
+{
+ return __vsel( OldValue, NewValue, ReplacementMask );
+}
+
+// AKA "Broadcast", "Splat"
+FORCEINLINE fltx4 ReplicateX4( float flValue ) // a,a,a,a
+{
+ // NOTE: if flValue comes from a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!)
+ float * pValue = &flValue;
+ Assert( pValue );
+ Assert( ((unsigned int)pValue & 3) == 0);
+ return __vspltw( __lvlx( pValue, 0 ), 0 );
+}
+
+FORCEINLINE fltx4 ReplicateX4( const float *pValue ) // a,a,a,a
+{
+ Assert( pValue );
+ return __vspltw( __lvlx( pValue, 0 ), 0 );
+}
+
+/// replicate a single 32 bit integer value to all 4 components of an m128
+FORCEINLINE fltx4 ReplicateIX4( int nValue )
+{
+ // NOTE: if nValue comes from a register, this causes a Load-Hit-Store stall (should not mix ints with fltx4s!)
+ int * pValue = &nValue;
+ Assert( pValue );
+ Assert( ((unsigned int)pValue & 3) == 0);
+ return __vspltw( __lvlx( pValue, 0 ), 0 );
+}
+
+// Round towards positive infinity
+FORCEINLINE fltx4 CeilSIMD( const fltx4 &a )
+{
+ return __vrfip(a);
+}
+
+// Round towards nearest integer
+FORCEINLINE fltx4 RoundSIMD( const fltx4 &a )
+{
+ return __vrfin(a);
+}
+
+// Round towards negative infinity
+FORCEINLINE fltx4 FloorSIMD( const fltx4 &a )
+{
+ return __vrfim(a);
+}
+
+FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less
+{
+ // This is emulated from rsqrt
+ return XMVectorSqrtEst( a );
+}
+
+FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a)
+{
+ // This is emulated from rsqrt
+ return XMVectorSqrt( a );
+}
+
+FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less
+{
+ return __vrsqrtefp( a );
+}
+
+FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a )
+{
+ // Convert zeros to epsilons
+ fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
+ fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
+ return ReciprocalSqrtEstSIMD( a_safe );
+}
+
+FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a)
+{
+ // This uses Newton-Raphson to improve the HW result
+ return XMVectorReciprocalSqrt( a );
+}
+
+FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less
+{
+ return __vrefp( a );
+}
+
+/// 1/x for all 4 values. uses reciprocal approximation instruction plus newton iteration.
+/// No error checking!
+FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a
+{
+ // This uses Newton-Raphson to improve the HW result
+ return XMVectorReciprocal( a );
+}
+
+// FIXME: on 360, this is very slow, since it uses ReciprocalSIMD (do we need DivEstSIMD?)
+FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b
+{
+ return MulSIMD( ReciprocalSIMD( b ), a );
+}
+
+/// 1/x for all 4 values.
+/// 1/0 will result in a big but NOT infinite result
+FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a )
+{
+ // Convert zeros to epsilons
+ fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
+ fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
+ return ReciprocalEstSIMD( a_safe );
+}
+
+FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a )
+{
+ // Convert zeros to epsilons
+ fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
+ fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
+ return ReciprocalSIMD( a_safe );
+
+ // FIXME: This could be faster (BUT: it doesn't preserve the sign of -0.0, whereas the above does)
+ // fltx4 zeroMask = CmpEqSIMD( Four_Zeros, a );
+ // fltx4 a_safe = XMVectorSelect( a, Four_Epsilons, zeroMask );
+ // return ReciprocalSIMD( a_safe );
+}
+
+// CHRISG: is it worth doing integer bitfiddling for this?
+// 2^x for all values (the antilog)
+FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower )
+{
+ return XMVectorExp(toPower);
+}
+
+// Clamps the components of a vector to a specified minimum and maximum range.
+FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max)
+{
+ return XMVectorClamp(in, min, max);
+}
+
+FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD )
+{
+ return XMLoadVector4( pSIMD );
+}
+
+// load a 3-vector (as opposed to LoadUnalignedSIMD, which loads a 4-vec).
+FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD )
+{
+ return XMLoadVector3( pSIMD );
+}
+
+FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD )
+{
+ return *( reinterpret_cast< const fltx4 *> ( pSIMD ) );
+}
+
+// for the transitional class -- load a 3-by VectorAligned and squash its w component
+FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD )
+{
+ fltx4 out = XMLoadVector3A(pSIMD.Base());
+ // squelch w
+ return __vrlimi( out, __vzero(), 1, 0 );
+}
+
+// for the transitional class -- load a 3-by VectorAligned and squash its w component
+FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned * RESTRICT pSIMD )
+{
+ fltx4 out = XMLoadVector3A(pSIMD);
+ // squelch w
+ return __vrlimi( out, __vzero(), 1, 0 );
+}
+
+FORCEINLINE void StoreAlignedSIMD( float *pSIMD, const fltx4 & a )
+{
+ *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a;
+}
+
+FORCEINLINE void StoreUnalignedSIMD( float *pSIMD, const fltx4 & a )
+{
+ XMStoreVector4( pSIMD, a );
+}
+
+FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a )
+{
+ XMStoreVector3( pSIMD, a );
+}
+
+
+// strongly typed -- for typechecking as we transition to SIMD
+FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a )
+{
+ XMStoreVector3A(pSIMD->Base(),a);
+}
+
+
+// Fixed-point conversion and save as SIGNED INTS.
+// pDest->x = Int (vSrc.x)
+// note: some architectures have means of doing
+// fixed point conversion when the fix depth is
+// specified as an immediate.. but there is no way
+// to guarantee an immediate as a parameter to function
+// like this.
+FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc)
+{
+ fltx4 asInt = __vctsxs( vSrc, 0 );
+ XMStoreVector4A(pDest->Base(), asInt);
+}
+
+FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w )
+{
+ XMMATRIX xyzwMatrix = _XMMATRIX( x, y, z, w );
+ xyzwMatrix = XMMatrixTranspose( xyzwMatrix );
+ x = xyzwMatrix.r[0];
+ y = xyzwMatrix.r[1];
+ z = xyzwMatrix.r[2];
+ w = xyzwMatrix.r[3];
+}
+
+// Return one in the fastest way -- faster even than loading.
+FORCEINLINE fltx4 LoadZeroSIMD( void )
+{
+ return XMVectorZero();
+}
+
+// Return one in the fastest way -- faster even than loading.
+FORCEINLINE fltx4 LoadOneSIMD( void )
+{
+ return XMVectorSplatOne();
+}
+
+FORCEINLINE fltx4 SplatXSIMD( fltx4 a )
+{
+ return XMVectorSplatX( a );
+}
+
+FORCEINLINE fltx4 SplatYSIMD( fltx4 a )
+{
+ return XMVectorSplatY( a );
+}
+
+FORCEINLINE fltx4 SplatZSIMD( fltx4 a )
+{
+ return XMVectorSplatZ( a );
+}
+
+FORCEINLINE fltx4 SplatWSIMD( fltx4 a )
+{
+ return XMVectorSplatW( a );
+}
+
+FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x )
+{
+ fltx4 result = __vrlimi(a, x, 8, 0);
+ return result;
+}
+
+FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y )
+{
+ fltx4 result = __vrlimi(a, y, 4, 0);
+ return result;
+}
+
+FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z )
+{
+ fltx4 result = __vrlimi(a, z, 2, 0);
+ return result;
+}
+
+FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w )
+{
+ fltx4 result = __vrlimi(a, w, 1, 0);
+ return result;
+}
+
+FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue )
+{
+ static int s_nVrlimiMask[4] = { 8, 4, 2, 1 };
+ fltx4 val = ReplicateX4( flValue );
+ fltx4 result = __vrlimi(a, val, s_nVrlimiMask[nComponent], 0);
+ return result;
+}
+
+FORCEINLINE fltx4 RotateLeft( const fltx4 & a )
+{
+ fltx4 compareOne = a;
+ return __vrlimi( compareOne, a, 8 | 4 | 2 | 1, 1 );
+}
+
+FORCEINLINE fltx4 RotateLeft2( const fltx4 & a )
+{
+ fltx4 compareOne = a;
+ return __vrlimi( compareOne, a, 8 | 4 | 2 | 1, 2 );
+}
+
+
+
+// find the lowest component of a.x, a.y, a.z,
+// and replicate it to the whole return value.
+// ignores a.w.
+// Though this is only five instructions long,
+// they are all dependent, making this stall city.
+// Forcing this inline should hopefully help with scheduling.
+FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a )
+{
+ // a is [x,y,z,G] (where G is garbage)
+ // rotate left by one
+ fltx4 compareOne = a ;
+ compareOne = __vrlimi( compareOne, a, 8 | 4 , 1 );
+ // compareOne is [y,z,G,G]
+ fltx4 retval = MinSIMD( a, compareOne );
+ // retVal is [min(x,y), min(y,z), G, G]
+ compareOne = __vrlimi( compareOne, a, 8 , 2);
+ // compareOne is [z, G, G, G]
+ retval = MinSIMD( retval, compareOne );
+ // retVal = [ min(min(x,y),z), G, G, G ]
+
+ // splat the x component out to the whole vector and return
+ return SplatXSIMD( retval );
+}
+
+// find the highest component of a.x, a.y, a.z,
+// and replicate it to the whole return value.
+// ignores a.w.
+// Though this is only five instructions long,
+// they are all dependent, making this stall city.
+// Forcing this inline should hopefully help with scheduling.
+FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 & a )
+{
+ // a is [x,y,z,G] (where G is garbage)
+ // rotate left by one
+ fltx4 compareOne = a ;
+ compareOne = __vrlimi( compareOne, a, 8 | 4 , 1 );
+ // compareOne is [y,z,G,G]
+ fltx4 retval = MaxSIMD( a, compareOne );
+ // retVal is [max(x,y), max(y,z), G, G]
+ compareOne = __vrlimi( compareOne, a, 8 , 2);
+ // compareOne is [z, G, G, G]
+ retval = MaxSIMD( retval, compareOne );
+ // retVal = [ max(max(x,y),z), G, G, G ]
+
+ // splat the x component out to the whole vector and return
+ return SplatXSIMD( retval );
+}
+
+
+// Transform many (horizontal) points in-place by a 3x4 matrix,
+// here already loaded onto three fltx4 registers.
+// The points must be stored as 16-byte aligned. They are points
+// and not vectors because we assume the w-component to be 1.
+// To spare yourself the annoyance of loading the matrix yourself,
+// use one of the overloads below.
+void TransformManyPointsBy(VectorAligned * RESTRICT pVectors, unsigned int numVectors, FLTX4 mRow1, FLTX4 mRow2, FLTX4 mRow3);
+
+// Transform many (horizontal) points in-place by a 3x4 matrix.
+// The points must be stored as 16-byte aligned. They are points
+// and not vectors because we assume the w-component to be 1.
+// In this function, the matrix need not be aligned.
+FORCEINLINE void TransformManyPointsBy(VectorAligned * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t &pMatrix)
+{
+ return TransformManyPointsBy(pVectors, numVectors,
+ LoadUnalignedSIMD( pMatrix[0] ), LoadUnalignedSIMD( pMatrix[1] ), LoadUnalignedSIMD( pMatrix[2] ) );
+}
+
+// Transform many (horizontal) points in-place by a 3x4 matrix.
+// The points must be stored as 16-byte aligned. They are points
+// and not vectors because we assume the w-component to be 1.
+// In this function, the matrix must itself be aligned on a 16-byte
+// boundary.
+FORCEINLINE void TransformManyPointsByA(VectorAligned * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t &pMatrix)
+{
+ return TransformManyPointsBy(pVectors, numVectors,
+ LoadAlignedSIMD( pMatrix[0] ), LoadAlignedSIMD( pMatrix[1] ), LoadAlignedSIMD( pMatrix[2] ) );
+}
+
+// ------------------------------------
+// INTEGER SIMD OPERATIONS.
+// ------------------------------------
+
+// Load 4 aligned words into a SIMD register
+FORCEINLINE i32x4 LoadAlignedIntSIMD( const void * RESTRICT pSIMD)
+{
+ return XMLoadVector4A(pSIMD);
+}
+
+// Load 4 unaligned words into a SIMD register
+FORCEINLINE i32x4 LoadUnalignedIntSIMD(const void * RESTRICT pSIMD)
+{
+ return XMLoadVector4( pSIMD );
+}
+
+// save into four words, 16-byte aligned
+FORCEINLINE void StoreAlignedIntSIMD( int32 *pSIMD, const fltx4 & a )
+{
+ *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a;
+}
+
+FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a )
+{
+ *( reinterpret_cast< i32x4 *> ( pSIMD.Base() ) ) = a;
+}
+
+FORCEINLINE void StoreUnalignedIntSIMD( int32 *pSIMD, const fltx4 & a )
+{
+ XMStoreVector4(pSIMD, a);
+}
+
+
+// Take a fltx4 containing fixed-point uints and
+// return them as single precision floats. No
+// fixed point conversion is done.
+FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const i32x4 &vSrcA )
+{
+ return __vcfux( vSrcA, 0 );
+}
+
+
+// Take a fltx4 containing fixed-point sints and
+// return them as single precision floats. No
+// fixed point conversion is done.
+FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA )
+{
+ return __vcfsx( vSrcA, 0 );
+}
+
+// Take a fltx4 containing fixed-point uints and
+// return them as single precision floats. Each uint
+// will be divided by 2^immed after conversion
+// (eg, this is fixed point math).
+/* as if:
+ FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const i32x4 &vSrcA, unsigned int uImmed )
+ {
+ return __vcfux( vSrcA, uImmed );
+ }
+*/
+#define UnsignedFixedIntConvertToFltSIMD(vSrcA, uImmed) (__vcfux( (vSrcA), (uImmed) ))
+
+// Take a fltx4 containing fixed-point sints and
+// return them as single precision floats. Each int
+// will be divided by 2^immed (eg, this is fixed point
+// math).
+/* as if:
+ FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA, unsigned int uImmed )
+ {
+ return __vcfsx( vSrcA, uImmed );
+ }
+*/
+#define SignedFixedIntConvertToFltSIMD(vSrcA, uImmed) (__vcfsx( (vSrcA), (uImmed) ))
+
+// set all components of a vector to a signed immediate int number.
+/* as if:
+ FORCEINLINE fltx4 IntSetImmediateSIMD(int toImmediate)
+ {
+ return __vspltisw( toImmediate );
+ }
+*/
+#define IntSetImmediateSIMD(x) (__vspltisw(x))
+
+/*
+ works on fltx4's as if they are four uints.
+ the first parameter contains the words to be shifted,
+ the second contains the amount to shift by AS INTS
+
+ for i = 0 to 3
+ shift = vSrcB_i*32:(i*32)+4
+ vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift
+*/
+FORCEINLINE fltx4 IntShiftLeftWordSIMD(fltx4 vSrcA, fltx4 vSrcB)
+{
+ return __vslw(vSrcA, vSrcB);
+}
+
+FORCEINLINE float SubFloat( const fltx4 & a, int idx )
+{
+ // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!)
+ const fltx4_union & a_union = (const fltx4_union &)a;
+ return a_union.m128_f32[ idx ];
+}
+
+FORCEINLINE float & SubFloat( fltx4 & a, int idx )
+{
+ fltx4_union & a_union = (fltx4_union &)a;
+ return a_union.m128_f32[idx];
+}
+
+FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx )
+{
+ fltx4 t = __vctuxs( a, 0 );
+ const fltx4_union & a_union = (const fltx4_union &)t;
+ return a_union.m128_u32[idx];
+}
+
+
+FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )
+{
+ const fltx4_union & a_union = (const fltx4_union &)a;
+ return a_union.m128_u32[idx];
+}
+
+FORCEINLINE uint32 & SubInt( fltx4 & a, int idx )
+{
+ fltx4_union & a_union = (fltx4_union &)a;
+ return a_union.m128_u32[idx];
+}
+
+#else
+
+//---------------------------------------------------------------------
+// Intel/SSE implementation
+//---------------------------------------------------------------------
+
+FORCEINLINE void StoreAlignedSIMD( float * RESTRICT pSIMD, const fltx4 & a )
+{
+ _mm_store_ps( pSIMD, a );
+}
+
+FORCEINLINE void StoreUnalignedSIMD( float * RESTRICT pSIMD, const fltx4 & a )
+{
+ _mm_storeu_ps( pSIMD, a );
+}
+
+
+FORCEINLINE fltx4 RotateLeft( const fltx4 & a );
+FORCEINLINE fltx4 RotateLeft2( const fltx4 & a );
+
+FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a )
+{
+ _mm_store_ss(pSIMD, a);
+ _mm_store_ss(pSIMD+1, RotateLeft(a));
+ _mm_store_ss(pSIMD+2, RotateLeft2(a));
+}
+
+// strongly typed -- syntactic castor oil used for typechecking as we transition to SIMD
+FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a )
+{
+ StoreAlignedSIMD( pSIMD->Base(),a );
+}
+
+FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD )
+{
+ return _mm_load_ps( reinterpret_cast< const float *> ( pSIMD ) );
+}
+
+FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b
+{
+ return _mm_and_ps( a, b );
+}
+
+FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b
+{
+ return _mm_andnot_ps( a, b );
+}
+
+FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b
+{
+ return _mm_xor_ps( a, b );
+}
+
+FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b
+{
+ return _mm_or_ps( a, b );
+}
+
+// Squelch the w component of a vector to +0.0.
+// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy)
+FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a )
+{
+ return AndSIMD( a, LoadAlignedSIMD( g_SIMD_clear_wmask ) );
+}
+
+// for the transitional class -- load a 3-by VectorAligned and squash its w component
+FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD )
+{
+ return SetWToZeroSIMD( LoadAlignedSIMD(pSIMD.Base()) );
+}
+
+FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD )
+{
+ return _mm_loadu_ps( reinterpret_cast<const float *>( pSIMD ) );
+}
+
+FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD )
+{
+ return _mm_loadu_ps( reinterpret_cast<const float *>( pSIMD ) );
+}
+
+/// replicate a single 32 bit integer value to all 4 components of an m128
+FORCEINLINE fltx4 ReplicateIX4( int i )
+{
+ fltx4 value = _mm_set_ss( * ( ( float *) &i ) );;
+ return _mm_shuffle_ps( value, value, 0);
+}
+
+
+FORCEINLINE fltx4 ReplicateX4( float flValue )
+{
+ __m128 value = _mm_set_ss( flValue );
+ return _mm_shuffle_ps( value, value, 0 );
+}
+
+
+FORCEINLINE float SubFloat( const fltx4 & a, int idx )
+{
+ // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!)
+#ifndef POSIX
+ return a.m128_f32[ idx ];
+#else
+ return (reinterpret_cast<float const *>(&a))[idx];
+#endif
+}
+
+FORCEINLINE float & SubFloat( fltx4 & a, int idx )
+{
+#ifndef POSIX
+ return a.m128_f32[ idx ];
+#else
+ return (reinterpret_cast<float *>(&a))[idx];
+#endif
+}
+
+FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx )
+{
+ return (uint32)SubFloat(a,idx);
+}
+
+FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )
+{
+#ifndef POSIX
+ return a.m128_u32[idx];
+#else
+ return (reinterpret_cast<uint32 const *>(&a))[idx];
+#endif
+}
+
+FORCEINLINE uint32 & SubInt( fltx4 & a, int idx )
+{
+#ifndef POSIX
+ return a.m128_u32[idx];
+#else
+ return (reinterpret_cast<uint32 *>(&a))[idx];
+#endif
+}
+
+// Return one in the fastest way -- on the x360, faster even than loading.
+FORCEINLINE fltx4 LoadZeroSIMD( void )
+{
+ return Four_Zeros;
+}
+
+// Return one in the fastest way -- on the x360, faster even than loading.
+FORCEINLINE fltx4 LoadOneSIMD( void )
+{
+ return Four_Ones;
+}
+
+FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue )
+{
+ return OrSIMD(
+ AndSIMD( ReplacementMask, NewValue ),
+ AndNotSIMD( ReplacementMask, OldValue ) );
+}
+
+// remember, the SSE numbers its words 3 2 1 0
+// The way we want to specify shuffles is backwards from the default
+// MM_SHUFFLE_REV is in array index order (default is reversed)
+#define MM_SHUFFLE_REV(a,b,c,d) _MM_SHUFFLE(d,c,b,a)
+
+FORCEINLINE fltx4 SplatXSIMD( fltx4 const & a )
+{
+ return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 0, 0, 0, 0 ) );
+}
+
+FORCEINLINE fltx4 SplatYSIMD( fltx4 const &a )
+{
+ return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 1, 1, 1, 1 ) );
+}
+
+FORCEINLINE fltx4 SplatZSIMD( fltx4 const &a )
+{
+ return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 2, 2, 2, 2 ) );
+}
+
+FORCEINLINE fltx4 SplatWSIMD( fltx4 const &a )
+{
+ return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 3, 3, 3 ) );
+}
+
+FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x )
+{
+ fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[0] ), x, a );
+ return result;
+}
+
+FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y )
+{
+ fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[1] ), y, a );
+ return result;
+}
+
+FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z )
+{
+ fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[2] ), z, a );
+ return result;
+}
+
+FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w )
+{
+ fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[3] ), w, a );
+ return result;
+}
+
+FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue )
+{
+ fltx4 val = ReplicateX4( flValue );
+ fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[nComponent] ), val, a );
+ return result;
+}
+
+// a b c d -> b c d a
+FORCEINLINE fltx4 RotateLeft( const fltx4 & a )
+{
+ return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 1, 2, 3, 0 ) );
+}
+
+// a b c d -> c d a b
+FORCEINLINE fltx4 RotateLeft2( const fltx4 & a )
+{
+ return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 2, 3, 0, 1 ) );
+}
+
+// a b c d -> d a b c
+FORCEINLINE fltx4 RotateRight( const fltx4 & a )
+{
+ return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 0, 3, 2, 1) );
+}
+
+// a b c d -> c d a b
+FORCEINLINE fltx4 RotateRight2( const fltx4 & a )
+{
+ return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 1, 0, 3, 2 ) );
+}
+
+
+FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b ) // a+b
+{
+ return _mm_add_ps( a, b );
+};
+
+FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b
+{
+ return _mm_sub_ps( a, b );
+};
+
+FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b
+{
+ return _mm_mul_ps( a, b );
+};
+
+FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b
+{
+ return _mm_div_ps( a, b );
+};
+
+FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c
+{
+ return AddSIMD( MulSIMD(a,b), c );
+}
+
+FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b
+{
+ return SubSIMD( c, MulSIMD(a,b) );
+};
+
+FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b )
+{
+ fltx4 m = MulSIMD( a, b );
+ float flDot = SubFloat( m, 0 ) + SubFloat( m, 1 ) + SubFloat( m, 2 );
+ return ReplicateX4( flDot );
+}
+
+FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b )
+{
+ fltx4 m = MulSIMD( a, b );
+ float flDot = SubFloat( m, 0 ) + SubFloat( m, 1 ) + SubFloat( m, 2 ) + SubFloat( m, 3 );
+ return ReplicateX4( flDot );
+}
+
+//TODO: implement as four-way Taylor series (see xbox implementation)
+FORCEINLINE fltx4 SinSIMD( const fltx4 &radians )
+{
+ fltx4 result;
+ SubFloat( result, 0 ) = sin( SubFloat( radians, 0 ) );
+ SubFloat( result, 1 ) = sin( SubFloat( radians, 1 ) );
+ SubFloat( result, 2 ) = sin( SubFloat( radians, 2 ) );
+ SubFloat( result, 3 ) = sin( SubFloat( radians, 3 ) );
+ return result;
+}
+
+FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians )
+{
+ // FIXME: Make a fast SSE version
+ SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) );
+ SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) );
+ SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) );
+}
+
+FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) // a*b + c
+{
+ // FIXME: Make a fast SSE version
+ SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) );
+ SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) );
+ SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) );
+ SinCos( SubFloat( radians, 3 ), &SubFloat( sine, 3 ), &SubFloat( cosine, 3 ) );
+}
+
+//TODO: implement as four-way Taylor series (see xbox implementation)
+FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine )
+{
+ // FIXME: Make a fast SSE version
+ fltx4 result;
+ SubFloat( result, 0 ) = asin( SubFloat( sine, 0 ) );
+ SubFloat( result, 1 ) = asin( SubFloat( sine, 1 ) );
+ SubFloat( result, 2 ) = asin( SubFloat( sine, 2 ) );
+ SubFloat( result, 3 ) = asin( SubFloat( sine, 3 ) );
+ return result;
+}
+
+FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs )
+{
+ fltx4 result;
+ SubFloat( result, 0 ) = acos( SubFloat( cs, 0 ) );
+ SubFloat( result, 1 ) = acos( SubFloat( cs, 1 ) );
+ SubFloat( result, 2 ) = acos( SubFloat( cs, 2 ) );
+ SubFloat( result, 3 ) = acos( SubFloat( cs, 3 ) );
+ return result;
+}
+
+// tan^1(a/b) .. ie, pass sin in as a and cos in as b
+FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b )
+{
+ fltx4 result;
+ SubFloat( result, 0 ) = atan2( SubFloat( a, 0 ), SubFloat( b, 0 ) );
+ SubFloat( result, 1 ) = atan2( SubFloat( a, 1 ), SubFloat( b, 1 ) );
+ SubFloat( result, 2 ) = atan2( SubFloat( a, 2 ), SubFloat( b, 2 ) );
+ SubFloat( result, 3 ) = atan2( SubFloat( a, 3 ), SubFloat( b, 3 ) );
+ return result;
+}
+
+FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a
+{
+ return SubSIMD(LoadZeroSIMD(),a);
+}
+
+FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set
+{
+ return _mm_movemask_ps( a );
+}
+
+FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0)
+{
+ return (0 != TestSignSIMD( a ));
+}
+
+FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0
+{
+ return _mm_cmpeq_ps( a, b );
+}
+
+FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0
+{
+ return _mm_cmpgt_ps( a, b );
+}
+
+FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0
+{
+ return _mm_cmpge_ps( a, b );
+}
+
+FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a<b) ? ~0:0
+{
+ return _mm_cmplt_ps( a, b );
+}
+
+FORCEINLINE fltx4 CmpLeSIMD( const fltx4 & a, const fltx4 & b ) // (a<=b) ? ~0:0
+{
+ return _mm_cmple_ps( a, b );
+}
+
+// for branching when a.xyzw > b.xyzw
+FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b )
+{
+ return TestSignSIMD( CmpLeSIMD( a, b ) ) == 0;
+}
+
+// for branching when a.xyzw >= b.xyzw
+FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b )
+{
+ return TestSignSIMD( CmpLtSIMD( a, b ) ) == 0;
+}
+
+// For branching if all a.xyzw == b.xyzw
+FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b )
+{
+ return TestSignSIMD( CmpEqSIMD( a, b ) ) == 0xf;
+}
+
+FORCEINLINE fltx4 CmpInBoundsSIMD( const fltx4 & a, const fltx4 & b ) // (a <= b && a >= -b) ? ~0 : 0
+{
+ return AndSIMD( CmpLeSIMD(a,b), CmpGeSIMD(a, NegSIMD(b)) );
+}
+
+FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b)
+{
+ return _mm_min_ps( a, b );
+}
+
+FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b)
+{
+ return _mm_max_ps( a, b );
+}
+
+
+
+// SSE lacks rounding operations.
+// Really.
+// You can emulate them by setting the rounding mode for the
+// whole processor and then converting to int, and then back again.
+// But every time you set the rounding mode, you clear out the
+// entire pipeline. So, I can't do them per operation. You
+// have to do it once, before the loop that would call these.
+// Round towards positive infinity
+FORCEINLINE fltx4 CeilSIMD( const fltx4 &a )
+{
+ fltx4 retVal;
+ SubFloat( retVal, 0 ) = ceil( SubFloat( a, 0 ) );
+ SubFloat( retVal, 1 ) = ceil( SubFloat( a, 1 ) );
+ SubFloat( retVal, 2 ) = ceil( SubFloat( a, 2 ) );
+ SubFloat( retVal, 3 ) = ceil( SubFloat( a, 3 ) );
+ return retVal;
+
+}
+
+fltx4 fabs( const fltx4 & x );
+// Round towards negative infinity
+// This is the implementation that was here before; it assumes
+// you are in round-to-floor mode, which I guess is usually the
+// case for us vis-a-vis SSE. It's totally unnecessary on
+// VMX, which has a native floor op.
+FORCEINLINE fltx4 FloorSIMD( const fltx4 &val )
+{
+ fltx4 fl4Abs = fabs( val );
+ fltx4 ival = SubSIMD( AddSIMD( fl4Abs, Four_2ToThe23s ), Four_2ToThe23s );
+ ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Ones ), ival );
+ return XorSIMD( ival, XorSIMD( val, fl4Abs ) ); // restore sign bits
+}
+
+
+
+inline bool IsAllZeros( const fltx4 & var )
+{
+ return TestSignSIMD( CmpEqSIMD( var, Four_Zeros ) ) == 0xF;
+}
+
+FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less
+{
+ return _mm_sqrt_ps( a );
+}
+
+FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a)
+{
+ return _mm_sqrt_ps( a );
+}
+
+FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less
+{
+ return _mm_rsqrt_ps( a );
+}
+
+FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a )
+{
+ fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
+ fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
+ ret = ReciprocalSqrtEstSIMD( ret );
+ return ret;
+}
+
+/// uses newton iteration for higher precision results than ReciprocalSqrtEstSIMD
+FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a)
+{
+ fltx4 guess = ReciprocalSqrtEstSIMD( a );
+ // newton iteration for 1/sqrt(a) : y(n+1) = 1/2 (y(n)*(3-a*y(n)^2));
+ guess = MulSIMD( guess, SubSIMD( Four_Threes, MulSIMD( a, MulSIMD( guess, guess ))));
+ guess = MulSIMD( Four_PointFives, guess);
+ return guess;
+}
+
+FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less
+{
+ return _mm_rcp_ps( a );
+}
+
+/// 1/x for all 4 values, more or less
+/// 1/0 will result in a big but NOT infinite result
+FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a )
+{
+ fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
+ fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
+ ret = ReciprocalEstSIMD( ret );
+ return ret;
+}
+
+/// 1/x for all 4 values. uses reciprocal approximation instruction plus newton iteration.
+/// No error checking!
+FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a
+{
+ fltx4 ret = ReciprocalEstSIMD( a );
+ // newton iteration is: Y(n+1) = 2*Y(n)-a*Y(n)^2
+ ret = SubSIMD( AddSIMD( ret, ret ), MulSIMD( a, MulSIMD( ret, ret ) ) );
+ return ret;
+}
+
+/// 1/x for all 4 values.
+/// 1/0 will result in a big but NOT infinite result
+FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a )
+{
+ fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
+ fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
+ ret = ReciprocalSIMD( ret );
+ return ret;
+}
+
+// CHRISG: is it worth doing integer bitfiddling for this?
+// 2^x for all values (the antilog)
+FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower )
+{
+ fltx4 retval;
+ SubFloat( retval, 0 ) = powf( 2, SubFloat(toPower, 0) );
+ SubFloat( retval, 1 ) = powf( 2, SubFloat(toPower, 1) );
+ SubFloat( retval, 2 ) = powf( 2, SubFloat(toPower, 2) );
+ SubFloat( retval, 3 ) = powf( 2, SubFloat(toPower, 3) );
+
+ return retval;
+}
+
+// Clamps the components of a vector to a specified minimum and maximum range.
+FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max)
+{
+ return MaxSIMD( min, MinSIMD( max, in ) );
+}
+
+FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w)
+{
+ _MM_TRANSPOSE4_PS( x, y, z, w );
+}
+
+FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 &a )
+{
+ // a is [x,y,z,G] (where G is garbage)
+ // rotate left by one
+ fltx4 compareOne = RotateLeft( a );
+ // compareOne is [y,z,G,x]
+ fltx4 retval = MinSIMD( a, compareOne );
+ // retVal is [min(x,y), ... ]
+ compareOne = RotateLeft2( a );
+ // compareOne is [z, G, x, y]
+ retval = MinSIMD( retval, compareOne );
+ // retVal = [ min(min(x,y),z)..]
+ // splat the x component out to the whole vector and return
+ return SplatXSIMD( retval );
+
+}
+
+FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 &a )
+{
+ // a is [x,y,z,G] (where G is garbage)
+ // rotate left by one
+ fltx4 compareOne = RotateLeft( a );
+ // compareOne is [y,z,G,x]
+ fltx4 retval = MaxSIMD( a, compareOne );
+ // retVal is [max(x,y), ... ]
+ compareOne = RotateLeft2( a );
+ // compareOne is [z, G, x, y]
+ retval = MaxSIMD( retval, compareOne );
+ // retVal = [ max(max(x,y),z)..]
+ // splat the x component out to the whole vector and return
+ return SplatXSIMD( retval );
+
+}
+
+// ------------------------------------
+// INTEGER SIMD OPERATIONS.
+// ------------------------------------
+
+
+#if 0 /* pc does not have these ops */
+// splat all components of a vector to a signed immediate int number.
+FORCEINLINE fltx4 IntSetImmediateSIMD(int to)
+{
+ //CHRISG: SSE2 has this, but not SSE1. What to do?
+ fltx4 retval;
+ SubInt( retval, 0 ) = to;
+ SubInt( retval, 1 ) = to;
+ SubInt( retval, 2 ) = to;
+ SubInt( retval, 3 ) = to;
+ return retval;
+}
+#endif
+
+// Load 4 aligned words into a SIMD register
+FORCEINLINE i32x4 LoadAlignedIntSIMD( const void * RESTRICT pSIMD)
+{
+ return _mm_load_ps( reinterpret_cast<const float *>(pSIMD) );
+}
+
+// Load 4 unaligned words into a SIMD register
+FORCEINLINE i32x4 LoadUnalignedIntSIMD( const void * RESTRICT pSIMD)
+{
+ return _mm_loadu_ps( reinterpret_cast<const float *>(pSIMD) );
+}
+
+// save into four words, 16-byte aligned
+FORCEINLINE void StoreAlignedIntSIMD( int32 * RESTRICT pSIMD, const fltx4 & a )
+{
+ _mm_store_ps( reinterpret_cast<float *>(pSIMD), a );
+}
+
+FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a )
+{
+ _mm_store_ps( reinterpret_cast<float *>(pSIMD.Base()), a );
+}
+
+FORCEINLINE void StoreUnalignedIntSIMD( int32 * RESTRICT pSIMD, const fltx4 & a )
+{
+ _mm_storeu_ps( reinterpret_cast<float *>(pSIMD), a );
+}
+
+
+// CHRISG: the conversion functions all seem to operate on m64's only...
+// how do we make them work here?
+
+// Take a fltx4 containing fixed-point uints and
+// return them as single precision floats. No
+// fixed point conversion is done.
+FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const u32x4 &vSrcA )
+{
+ fltx4 retval;
+ SubFloat( retval, 0 ) = ( (float) SubInt( retval, 0 ) );
+ SubFloat( retval, 1 ) = ( (float) SubInt( retval, 1 ) );
+ SubFloat( retval, 2 ) = ( (float) SubInt( retval, 2 ) );
+ SubFloat( retval, 3 ) = ( (float) SubInt( retval, 3 ) );
+ return retval;
+}
+
+
+// Take a fltx4 containing fixed-point sints and
+// return them as single precision floats. No
+// fixed point conversion is done.
+FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA )
+{
+ fltx4 retval;
+ SubFloat( retval, 0 ) = ( (float) (reinterpret_cast<const int32 *>(&vSrcA)[0]));
+ SubFloat( retval, 1 ) = ( (float) (reinterpret_cast<const int32 *>(&vSrcA)[1]));
+ SubFloat( retval, 2 ) = ( (float) (reinterpret_cast<const int32 *>(&vSrcA)[2]));
+ SubFloat( retval, 3 ) = ( (float) (reinterpret_cast<const int32 *>(&vSrcA)[3]));
+ return retval;
+}
+
+/*
+ works on fltx4's as if they are four uints.
+ the first parameter contains the words to be shifted,
+ the second contains the amount to shift by AS INTS
+
+ for i = 0 to 3
+ shift = vSrcB_i*32:(i*32)+4
+ vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift
+*/
+FORCEINLINE i32x4 IntShiftLeftWordSIMD(const i32x4 &vSrcA, const i32x4 &vSrcB)
+{
+ i32x4 retval;
+ SubInt(retval, 0) = SubInt(vSrcA, 0) << SubInt(vSrcB, 0);
+ SubInt(retval, 1) = SubInt(vSrcA, 1) << SubInt(vSrcB, 1);
+ SubInt(retval, 2) = SubInt(vSrcA, 2) << SubInt(vSrcB, 2);
+ SubInt(retval, 3) = SubInt(vSrcA, 3) << SubInt(vSrcB, 3);
+
+
+ return retval;
+}
+
+
+// Fixed-point conversion and save as SIGNED INTS.
+// pDest->x = Int (vSrc.x)
+// note: some architectures have means of doing
+// fixed point conversion when the fix depth is
+// specified as an immediate.. but there is no way
+// to guarantee an immediate as a parameter to function
+// like this.
+FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc)
+{
+ __m64 bottom = _mm_cvttps_pi32( vSrc );
+ __m64 top = _mm_cvttps_pi32( _mm_movehl_ps(vSrc,vSrc) );
+
+ *reinterpret_cast<__m64 *>(&(*pDest)[0]) = bottom;
+ *reinterpret_cast<__m64 *>(&(*pDest)[2]) = top;
+
+ _mm_empty();
+}
+
+
+
+#endif
+
+
+
+/// class FourVectors stores 4 independent vectors for use in SIMD processing. These vectors are
+/// stored in the format x x x x y y y y z z z z so that they can be efficiently SIMD-accelerated.
+class ALIGN16 FourVectors
+{
+public:
+ fltx4 x, y, z;
+
+ FORCEINLINE void DuplicateVector(Vector const &v) //< set all 4 vectors to the same vector value
+ {
+ x=ReplicateX4(v.x);
+ y=ReplicateX4(v.y);
+ z=ReplicateX4(v.z);
+ }
+
+ FORCEINLINE fltx4 const & operator[](int idx) const
+ {
+ return *((&x)+idx);
+ }
+
+ FORCEINLINE fltx4 & operator[](int idx)
+ {
+ return *((&x)+idx);
+ }
+
+ FORCEINLINE void operator+=(FourVectors const &b) //< add 4 vectors to another 4 vectors
+ {
+ x=AddSIMD(x,b.x);
+ y=AddSIMD(y,b.y);
+ z=AddSIMD(z,b.z);
+ }
+
+ FORCEINLINE void operator-=(FourVectors const &b) //< subtract 4 vectors from another 4
+ {
+ x=SubSIMD(x,b.x);
+ y=SubSIMD(y,b.y);
+ z=SubSIMD(z,b.z);
+ }
+
+ FORCEINLINE void operator*=(FourVectors const &b) //< scale all four vectors per component scale
+ {
+ x=MulSIMD(x,b.x);
+ y=MulSIMD(y,b.y);
+ z=MulSIMD(z,b.z);
+ }
+
+ FORCEINLINE void operator*=(const fltx4 & scale) //< scale
+ {
+ x=MulSIMD(x,scale);
+ y=MulSIMD(y,scale);
+ z=MulSIMD(z,scale);
+ }
+
+ FORCEINLINE void operator*=(float scale) //< uniformly scale all 4 vectors
+ {
+ fltx4 scalepacked = ReplicateX4(scale);
+ *this *= scalepacked;
+ }
+
+ FORCEINLINE fltx4 operator*(FourVectors const &b) const //< 4 dot products
+ {
+ fltx4 dot=MulSIMD(x,b.x);
+ dot=MaddSIMD(y,b.y,dot);
+ dot=MaddSIMD(z,b.z,dot);
+ return dot;
+ }
+
+ FORCEINLINE fltx4 operator*(Vector const &b) const //< dot product all 4 vectors with 1 vector
+ {
+ fltx4 dot=MulSIMD(x,ReplicateX4(b.x));
+ dot=MaddSIMD(y,ReplicateX4(b.y), dot);
+ dot=MaddSIMD(z,ReplicateX4(b.z), dot);
+ return dot;
+ }
+
+ FORCEINLINE void VProduct(FourVectors const &b) //< component by component mul
+ {
+ x=MulSIMD(x,b.x);
+ y=MulSIMD(y,b.y);
+ z=MulSIMD(z,b.z);
+ }
+ FORCEINLINE void MakeReciprocal(void) //< (x,y,z)=(1/x,1/y,1/z)
+ {
+ x=ReciprocalSIMD(x);
+ y=ReciprocalSIMD(y);
+ z=ReciprocalSIMD(z);
+ }
+
+ FORCEINLINE void MakeReciprocalSaturate(void) //< (x,y,z)=(1/x,1/y,1/z), 1/0=1.0e23
+ {
+ x=ReciprocalSaturateSIMD(x);
+ y=ReciprocalSaturateSIMD(y);
+ z=ReciprocalSaturateSIMD(z);
+ }
+
+ // Assume the given matrix is a rotation, and rotate these vectors by it.
+ // If you have a long list of FourVectors structures that you all want
+ // to rotate by the same matrix, use FourVectors::RotateManyBy() instead.
+ inline void RotateBy(const matrix3x4_t& matrix);
+
+ /// You can use this to rotate a long array of FourVectors all by the same
+ /// matrix. The first parameter is the head of the array. The second is the
+ /// number of vectors to rotate. The third is the matrix.
+ static void RotateManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix );
+
+ /// Assume the vectors are points, and transform them in place by the matrix.
+ inline void TransformBy(const matrix3x4_t& matrix);
+
+ /// You can use this to Transform a long array of FourVectors all by the same
+ /// matrix. The first parameter is the head of the array. The second is the
+ /// number of vectors to rotate. The third is the matrix. The fourth is the
+ /// output buffer, which must not overlap the pVectors buffer. This is not
+ /// an in-place transformation.
+ static void TransformManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix, FourVectors * RESTRICT pOut );
+
+ /// You can use this to Transform a long array of FourVectors all by the same
+ /// matrix. The first parameter is the head of the array. The second is the
+ /// number of vectors to rotate. The third is the matrix. The fourth is the
+ /// output buffer, which must not overlap the pVectors buffer.
+ /// This is an in-place transformation.
+ static void TransformManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix );
+
+ // X(),Y(),Z() - get at the desired component of the i'th (0..3) vector.
+ FORCEINLINE const float & X(int idx) const
+ {
+ // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!)
+ return SubFloat( (fltx4 &)x, idx );
+ }
+
+ FORCEINLINE const float & Y(int idx) const
+ {
+ return SubFloat( (fltx4 &)y, idx );
+ }
+
+ FORCEINLINE const float & Z(int idx) const
+ {
+ return SubFloat( (fltx4 &)z, idx );
+ }
+
+ FORCEINLINE float & X(int idx)
+ {
+ return SubFloat( x, idx );
+ }
+
+ FORCEINLINE float & Y(int idx)
+ {
+ return SubFloat( y, idx );
+ }
+
+ FORCEINLINE float & Z(int idx)
+ {
+ return SubFloat( z, idx );
+ }
+
+ FORCEINLINE Vector Vec(int idx) const //< unpack one of the vectors
+ {
+ return Vector( X(idx), Y(idx), Z(idx) );
+ }
+
+ FourVectors(void)
+ {
+ }
+
+ FourVectors( FourVectors const &src )
+ {
+ x=src.x;
+ y=src.y;
+ z=src.z;
+ }
+
+ FORCEINLINE void operator=( FourVectors const &src )
+ {
+ x=src.x;
+ y=src.y;
+ z=src.z;
+ }
+
+ /// LoadAndSwizzle - load 4 Vectors into a FourVectors, performing transpose op
+ FORCEINLINE void LoadAndSwizzle(Vector const &a, Vector const &b, Vector const &c, Vector const &d)
+ {
+ // TransposeSIMD has large sub-expressions that the compiler can't eliminate on x360
+ // use an unfolded implementation here
+#if _X360
+ fltx4 tx = LoadUnalignedSIMD( &a.x );
+ fltx4 ty = LoadUnalignedSIMD( &b.x );
+ fltx4 tz = LoadUnalignedSIMD( &c.x );
+ fltx4 tw = LoadUnalignedSIMD( &d.x );
+ fltx4 r0 = __vmrghw(tx, tz);
+ fltx4 r1 = __vmrghw(ty, tw);
+ fltx4 r2 = __vmrglw(tx, tz);
+ fltx4 r3 = __vmrglw(ty, tw);
+
+ x = __vmrghw(r0, r1);
+ y = __vmrglw(r0, r1);
+ z = __vmrghw(r2, r3);
+#else
+ x = LoadUnalignedSIMD( &( a.x ));
+ y = LoadUnalignedSIMD( &( b.x ));
+ z = LoadUnalignedSIMD( &( c.x ));
+ fltx4 w = LoadUnalignedSIMD( &( d.x ));
+ // now, matrix is:
+ // x y z ?
+ // x y z ?
+ // x y z ?
+ // x y z ?
+ TransposeSIMD(x, y, z, w);
+#endif
+ }
+
+ /// LoadAndSwizzleAligned - load 4 Vectors into a FourVectors, performing transpose op.
+ /// all 4 vectors must be 128 bit boundary
+ FORCEINLINE void LoadAndSwizzleAligned(const float *RESTRICT a, const float *RESTRICT b, const float *RESTRICT c, const float *RESTRICT d)
+ {
+#if _X360
+ fltx4 tx = LoadAlignedSIMD(a);
+ fltx4 ty = LoadAlignedSIMD(b);
+ fltx4 tz = LoadAlignedSIMD(c);
+ fltx4 tw = LoadAlignedSIMD(d);
+ fltx4 r0 = __vmrghw(tx, tz);
+ fltx4 r1 = __vmrghw(ty, tw);
+ fltx4 r2 = __vmrglw(tx, tz);
+ fltx4 r3 = __vmrglw(ty, tw);
+
+ x = __vmrghw(r0, r1);
+ y = __vmrglw(r0, r1);
+ z = __vmrghw(r2, r3);
+#else
+ x = LoadAlignedSIMD( a );
+ y = LoadAlignedSIMD( b );
+ z = LoadAlignedSIMD( c );
+ fltx4 w = LoadAlignedSIMD( d );
+ // now, matrix is:
+ // x y z ?
+ // x y z ?
+ // x y z ?
+ // x y z ?
+ TransposeSIMD( x, y, z, w );
+#endif
+ }
+
+ FORCEINLINE void LoadAndSwizzleAligned(Vector const &a, Vector const &b, Vector const &c, Vector const &d)
+ {
+ LoadAndSwizzleAligned( &a.x, &b.x, &c.x, &d.x );
+ }
+
+ /// return the squared length of all 4 vectors
+ FORCEINLINE fltx4 length2(void) const
+ {
+ return (*this)*(*this);
+ }
+
+ /// return the approximate length of all 4 vectors. uses the sqrt approximation instruction
+ FORCEINLINE fltx4 length(void) const
+ {
+ return SqrtEstSIMD(length2());
+ }
+
+ /// normalize all 4 vectors in place. not mega-accurate (uses reciprocal approximation instruction)
+ FORCEINLINE void VectorNormalizeFast(void)
+ {
+ fltx4 mag_sq=(*this)*(*this); // length^2
+ (*this) *= ReciprocalSqrtEstSIMD(mag_sq); // *(1.0/sqrt(length^2))
+ }
+
+ /// normalize all 4 vectors in place.
+ FORCEINLINE void VectorNormalize(void)
+ {
+ fltx4 mag_sq=(*this)*(*this); // length^2
+ (*this) *= ReciprocalSqrtSIMD(mag_sq); // *(1.0/sqrt(length^2))
+ }
+
+ /// construct a FourVectors from 4 separate Vectors
+ FORCEINLINE FourVectors(Vector const &a, Vector const &b, Vector const &c, Vector const &d)
+ {
+ LoadAndSwizzle(a,b,c,d);
+ }
+
+ /// construct a FourVectors from 4 separate Vectors
+ FORCEINLINE FourVectors(VectorAligned const &a, VectorAligned const &b, VectorAligned const &c, VectorAligned const &d)
+ {
+ LoadAndSwizzleAligned(a,b,c,d);
+ }
+
+ FORCEINLINE fltx4 DistToSqr( FourVectors const &pnt )
+ {
+ fltx4 fl4dX = SubSIMD( pnt.x, x );
+ fltx4 fl4dY = SubSIMD( pnt.y, y );
+ fltx4 fl4dZ = SubSIMD( pnt.z, z );
+ return AddSIMD( MulSIMD( fl4dX, fl4dX), AddSIMD( MulSIMD( fl4dY, fl4dY ), MulSIMD( fl4dZ, fl4dZ ) ) );
+
+ }
+
+ FORCEINLINE fltx4 TValueOfClosestPointOnLine( FourVectors const &p0, FourVectors const &p1 ) const
+ {
+ FourVectors lineDelta = p1;
+ lineDelta -= p0;
+ fltx4 OOlineDirDotlineDir = ReciprocalSIMD( p1 * p1 );
+ FourVectors v4OurPnt = *this;
+ v4OurPnt -= p0;
+ return MulSIMD( OOlineDirDotlineDir, v4OurPnt * lineDelta );
+ }
+
+ FORCEINLINE fltx4 DistSqrToLineSegment( FourVectors const &p0, FourVectors const &p1 ) const
+ {
+ FourVectors lineDelta = p1;
+ FourVectors v4OurPnt = *this;
+ v4OurPnt -= p0;
+ lineDelta -= p0;
+
+ fltx4 OOlineDirDotlineDir = ReciprocalSIMD( lineDelta * lineDelta );
+
+ fltx4 fl4T = MulSIMD( OOlineDirDotlineDir, v4OurPnt * lineDelta );
+
+ fl4T = MinSIMD( fl4T, Four_Ones );
+ fl4T = MaxSIMD( fl4T, Four_Zeros );
+ lineDelta *= fl4T;
+ return v4OurPnt.DistToSqr( lineDelta );
+ }
+
+};
+
+/// form 4 cross products
+inline FourVectors operator ^(const FourVectors &a, const FourVectors &b)
+{
+ FourVectors ret;
+ ret.x=SubSIMD(MulSIMD(a.y,b.z),MulSIMD(a.z,b.y));
+ ret.y=SubSIMD(MulSIMD(a.z,b.x),MulSIMD(a.x,b.z));
+ ret.z=SubSIMD(MulSIMD(a.x,b.y),MulSIMD(a.y,b.x));
+ return ret;
+}
+
+/// component-by-componentwise MAX operator
+inline FourVectors maximum(const FourVectors &a, const FourVectors &b)
+{
+ FourVectors ret;
+ ret.x=MaxSIMD(a.x,b.x);
+ ret.y=MaxSIMD(a.y,b.y);
+ ret.z=MaxSIMD(a.z,b.z);
+ return ret;
+}
+
+/// component-by-componentwise MIN operator
+inline FourVectors minimum(const FourVectors &a, const FourVectors &b)
+{
+ FourVectors ret;
+ ret.x=MinSIMD(a.x,b.x);
+ ret.y=MinSIMD(a.y,b.y);
+ ret.z=MinSIMD(a.z,b.z);
+ return ret;
+}
+
+/// calculate reflection vector. incident and normal dir assumed normalized
+FORCEINLINE FourVectors VectorReflect( const FourVectors &incident, const FourVectors &normal )
+{
+ FourVectors ret = incident;
+ fltx4 iDotNx2 = incident * normal;
+ iDotNx2 = AddSIMD( iDotNx2, iDotNx2 );
+ FourVectors nPart = normal;
+ nPart *= iDotNx2;
+ ret -= nPart; // i-2(n*i)n
+ return ret;
+}
+
+/// calculate slide vector. removes all components of a vector which are perpendicular to a normal vector.
+FORCEINLINE FourVectors VectorSlide( const FourVectors &incident, const FourVectors &normal )
+{
+ FourVectors ret = incident;
+ fltx4 iDotN = incident * normal;
+ FourVectors nPart = normal;
+ nPart *= iDotN;
+ ret -= nPart; // i-(n*i)n
+ return ret;
+}
+
+
+// Assume the given matrix is a rotation, and rotate these vectors by it.
+// If you have a long list of FourVectors structures that you all want
+// to rotate by the same matrix, use FourVectors::RotateManyBy() instead.
+void FourVectors::RotateBy(const matrix3x4_t& matrix)
+{
+ // Splat out each of the entries in the matrix to a fltx4. Do this
+ // in the order that we will need them, to hide latency. I'm
+ // avoiding making an array of them, so that they'll remain in
+ // registers.
+ fltx4 matSplat00, matSplat01, matSplat02,
+ matSplat10, matSplat11, matSplat12,
+ matSplat20, matSplat21, matSplat22;
+
+ {
+ // Load the matrix into local vectors. Sadly, matrix3x4_ts are
+ // often unaligned. The w components will be the tranpose row of
+ // the matrix, but we don't really care about that.
+ fltx4 matCol0 = LoadUnalignedSIMD( matrix[0] );
+ fltx4 matCol1 = LoadUnalignedSIMD( matrix[1] );
+ fltx4 matCol2 = LoadUnalignedSIMD( matrix[2] );
+
+ matSplat00 = SplatXSIMD( matCol0 );
+ matSplat01 = SplatYSIMD( matCol0 );
+ matSplat02 = SplatZSIMD( matCol0 );
+
+ matSplat10 = SplatXSIMD( matCol1 );
+ matSplat11 = SplatYSIMD( matCol1 );
+ matSplat12 = SplatZSIMD( matCol1 );
+
+ matSplat20 = SplatXSIMD( matCol2 );
+ matSplat21 = SplatYSIMD( matCol2 );
+ matSplat22 = SplatZSIMD( matCol2 );
+ }
+
+ // Trust in the compiler to schedule these operations correctly:
+ fltx4 outX, outY, outZ;
+ outX = AddSIMD( AddSIMD( MulSIMD( x, matSplat00 ), MulSIMD( y, matSplat01 ) ), MulSIMD( z, matSplat02 ) );
+ outY = AddSIMD( AddSIMD( MulSIMD( x, matSplat10 ), MulSIMD( y, matSplat11 ) ), MulSIMD( z, matSplat12 ) );
+ outZ = AddSIMD( AddSIMD( MulSIMD( x, matSplat20 ), MulSIMD( y, matSplat21 ) ), MulSIMD( z, matSplat22 ) );
+
+ x = outX;
+ y = outY;
+ z = outZ;
+}
+
+// Assume the given matrix is a rotation, and rotate these vectors by it.
+// If you have a long list of FourVectors structures that you all want
+// to rotate by the same matrix, use FourVectors::RotateManyBy() instead.
+void FourVectors::TransformBy(const matrix3x4_t& matrix)
+{
+ // Splat out each of the entries in the matrix to a fltx4. Do this
+ // in the order that we will need them, to hide latency. I'm
+ // avoiding making an array of them, so that they'll remain in
+ // registers.
+ fltx4 matSplat00, matSplat01, matSplat02,
+ matSplat10, matSplat11, matSplat12,
+ matSplat20, matSplat21, matSplat22;
+
+ {
+ // Load the matrix into local vectors. Sadly, matrix3x4_ts are
+ // often unaligned. The w components will be the tranpose row of
+ // the matrix, but we don't really care about that.
+ fltx4 matCol0 = LoadUnalignedSIMD( matrix[0] );
+ fltx4 matCol1 = LoadUnalignedSIMD( matrix[1] );
+ fltx4 matCol2 = LoadUnalignedSIMD( matrix[2] );
+
+ matSplat00 = SplatXSIMD( matCol0 );
+ matSplat01 = SplatYSIMD( matCol0 );
+ matSplat02 = SplatZSIMD( matCol0 );
+
+ matSplat10 = SplatXSIMD( matCol1 );
+ matSplat11 = SplatYSIMD( matCol1 );
+ matSplat12 = SplatZSIMD( matCol1 );
+
+ matSplat20 = SplatXSIMD( matCol2 );
+ matSplat21 = SplatYSIMD( matCol2 );
+ matSplat22 = SplatZSIMD( matCol2 );
+ }
+
+ // Trust in the compiler to schedule these operations correctly:
+ fltx4 outX, outY, outZ;
+
+ outX = MaddSIMD( z, matSplat02, AddSIMD( MulSIMD( x, matSplat00 ), MulSIMD( y, matSplat01 ) ) );
+ outY = MaddSIMD( z, matSplat12, AddSIMD( MulSIMD( x, matSplat10 ), MulSIMD( y, matSplat11 ) ) );
+ outZ = MaddSIMD( z, matSplat22, AddSIMD( MulSIMD( x, matSplat20 ), MulSIMD( y, matSplat21 ) ) );
+
+ x = AddSIMD( outX, ReplicateX4( matrix[0][3] ));
+ y = AddSIMD( outY, ReplicateX4( matrix[1][3] ));
+ z = AddSIMD( outZ, ReplicateX4( matrix[2][3] ));
+}
+
+
+
+/// quick, low quality perlin-style noise() function suitable for real time use.
+/// return value is -1..1. Only reliable around +/- 1 million or so.
+fltx4 NoiseSIMD( const fltx4 & x, const fltx4 & y, const fltx4 & z );
+fltx4 NoiseSIMD( FourVectors const &v );
+
+// vector valued noise direction
+FourVectors DNoiseSIMD( FourVectors const &v );
+
+// vector value "curl" noise function. see http://hyperphysics.phy-astr.gsu.edu/hbase/curl.html
+FourVectors CurlNoiseSIMD( FourVectors const &v );
+
+
+/// calculate the absolute value of a packed single
+inline fltx4 fabs( const fltx4 & x )
+{
+ return AndSIMD( x, LoadAlignedSIMD( g_SIMD_clear_signmask ) );
+}
+
+/// negate all four components of a SIMD packed single
+inline fltx4 fnegate( const fltx4 & x )
+{
+ return XorSIMD( x, LoadAlignedSIMD( g_SIMD_signmask ) );
+}
+
+
+fltx4 Pow_FixedPoint_Exponent_SIMD( const fltx4 & x, int exponent);
+
+// PowSIMD - raise a SIMD register to a power. This is analogous to the C pow() function, with some
+// restictions: fractional exponents are only handled with 2 bits of precision. Basically,
+// fractions of 0,.25,.5, and .75 are handled. PowSIMD(x,.30) will be the same as PowSIMD(x,.25).
+// negative and fractional powers are handled by the SIMD reciprocal and square root approximation
+// instructions and so are not especially accurate ----Note that this routine does not raise
+// numeric exceptions because it uses SIMD--- This routine is O(log2(exponent)).
+inline fltx4 PowSIMD( const fltx4 & x, float exponent )
+{
+ return Pow_FixedPoint_Exponent_SIMD(x,(int) (4.0*exponent));
+}
+
+
+
+// random number generation - generate 4 random numbers quickly.
+
+void SeedRandSIMD(uint32 seed); // seed the random # generator
+fltx4 RandSIMD( int nContext = 0 ); // return 4 numbers in the 0..1 range
+
+// for multithreaded, you need to use these and use the argument form of RandSIMD:
+int GetSIMDRandContext( void );
+void ReleaseSIMDRandContext( int nContext );
+
+FORCEINLINE fltx4 RandSignedSIMD( void ) // -1..1
+{
+ return SubSIMD( MulSIMD( Four_Twos, RandSIMD() ), Four_Ones );
+}
+
+
+// SIMD versions of mathlib simplespline functions
+// hermite basis function for smooth interpolation
+// Similar to Gain() above, but very cheap to call
+// value should be between 0 & 1 inclusive
+inline fltx4 SimpleSpline( const fltx4 & value )
+{
+ // Arranged to avoid a data dependency between these two MULs:
+ fltx4 valueDoubled = MulSIMD( value, Four_Twos );
+ fltx4 valueSquared = MulSIMD( value, value );
+
+ // Nice little ease-in, ease-out spline-like curve
+ return SubSIMD(
+ MulSIMD( Four_Threes, valueSquared ),
+ MulSIMD( valueDoubled, valueSquared ) );
+}
+
+// remaps a value in [startInterval, startInterval+rangeInterval] from linear to
+// spline using SimpleSpline
+inline fltx4 SimpleSplineRemapValWithDeltas( const fltx4 & val,
+ const fltx4 & A, const fltx4 & BMinusA,
+ const fltx4 & OneOverBMinusA, const fltx4 & C,
+ const fltx4 & DMinusC )
+{
+// if ( A == B )
+// return val >= B ? D : C;
+ fltx4 cVal = MulSIMD( SubSIMD( val, A), OneOverBMinusA );
+ return AddSIMD( C, MulSIMD( DMinusC, SimpleSpline( cVal ) ) );
+}
+
+inline fltx4 SimpleSplineRemapValWithDeltasClamped( const fltx4 & val,
+ const fltx4 & A, const fltx4 & BMinusA,
+ const fltx4 & OneOverBMinusA, const fltx4 & C,
+ const fltx4 & DMinusC )
+{
+// if ( A == B )
+// return val >= B ? D : C;
+ fltx4 cVal = MulSIMD( SubSIMD( val, A), OneOverBMinusA );
+ cVal = MinSIMD( Four_Ones, MaxSIMD( Four_Zeros, cVal ) );
+ return AddSIMD( C, MulSIMD( DMinusC, SimpleSpline( cVal ) ) );
+}
+
+FORCEINLINE fltx4 FracSIMD( const fltx4 &val )
+{
+ fltx4 fl4Abs = fabs( val );
+ fltx4 ival = SubSIMD( AddSIMD( fl4Abs, Four_2ToThe23s ), Four_2ToThe23s );
+ ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Ones ), ival );
+ return XorSIMD( SubSIMD( fl4Abs, ival ), XorSIMD( val, fl4Abs ) ); // restore sign bits
+}
+
+FORCEINLINE fltx4 Mod2SIMD( const fltx4 &val )
+{
+ fltx4 fl4Abs = fabs( val );
+ fltx4 ival = SubSIMD( AndSIMD( LoadAlignedSIMD( (float *) g_SIMD_lsbmask ), AddSIMD( fl4Abs, Four_2ToThe23s ) ), Four_2ToThe23s );
+ ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Twos ), ival );
+ return XorSIMD( SubSIMD( fl4Abs, ival ), XorSIMD( val, fl4Abs ) ); // restore sign bits
+}
+
+FORCEINLINE fltx4 Mod2SIMDPositiveInput( const fltx4 &val )
+{
+ fltx4 ival = SubSIMD( AndSIMD( LoadAlignedSIMD( g_SIMD_lsbmask ), AddSIMD( val, Four_2ToThe23s ) ), Four_2ToThe23s );
+ ival = MaskedAssign( CmpGtSIMD( ival, val ), SubSIMD( ival, Four_Twos ), ival );
+ return SubSIMD( val, ival );
+}
+
+
+// approximate sin of an angle, with -1..1 representing the whole sin wave period instead of -pi..pi.
+// no range reduction is done - for values outside of 0..1 you won't like the results
+FORCEINLINE fltx4 _SinEst01SIMD( const fltx4 &val )
+{
+ // really rough approximation - x*(4-x*4) - a parabola. s(0) = 0, s(.5) = 1, s(1)=0, smooth in-between.
+ // sufficient for simple oscillation.
+ return MulSIMD( val, SubSIMD( Four_Fours, MulSIMD( val, Four_Fours ) ) );
+}
+
+FORCEINLINE fltx4 _Sin01SIMD( const fltx4 &val )
+{
+ // not a bad approximation : parabola always over-estimates. Squared parabola always
+ // underestimates. So lets blend between them: goodsin = badsin + .225*( badsin^2-badsin)
+ fltx4 fl4BadEst = MulSIMD( val, SubSIMD( Four_Fours, MulSIMD( val, Four_Fours ) ) );
+ return AddSIMD( MulSIMD( Four_Point225s, SubSIMD( MulSIMD( fl4BadEst, fl4BadEst ), fl4BadEst ) ), fl4BadEst );
+}
+
+// full range useable implementations
+FORCEINLINE fltx4 SinEst01SIMD( const fltx4 &val )
+{
+ fltx4 fl4Abs = fabs( val );
+ fltx4 fl4Reduced2 = Mod2SIMDPositiveInput( fl4Abs );
+ fltx4 fl4OddMask = CmpGeSIMD( fl4Reduced2, Four_Ones );
+ fltx4 fl4val = SubSIMD( fl4Reduced2, AndSIMD( Four_Ones, fl4OddMask ) );
+ fltx4 fl4Sin = _SinEst01SIMD( fl4val );
+ fl4Sin = XorSIMD( fl4Sin, AndSIMD( LoadAlignedSIMD( g_SIMD_signmask ), XorSIMD( val, fl4OddMask ) ) );
+ return fl4Sin;
+
+}
+
+FORCEINLINE fltx4 Sin01SIMD( const fltx4 &val )
+{
+ fltx4 fl4Abs = fabs( val );
+ fltx4 fl4Reduced2 = Mod2SIMDPositiveInput( fl4Abs );
+ fltx4 fl4OddMask = CmpGeSIMD( fl4Reduced2, Four_Ones );
+ fltx4 fl4val = SubSIMD( fl4Reduced2, AndSIMD( Four_Ones, fl4OddMask ) );
+ fltx4 fl4Sin = _Sin01SIMD( fl4val );
+ fl4Sin = XorSIMD( fl4Sin, AndSIMD( LoadAlignedSIMD( g_SIMD_signmask ), XorSIMD( val, fl4OddMask ) ) );
+ return fl4Sin;
+
+}
+
+// Schlick style Bias approximation see graphics gems 4 : bias(t,a)= t/( (1/a-2)*(1-t)+1)
+
+FORCEINLINE fltx4 PreCalcBiasParameter( const fltx4 &bias_parameter )
+{
+ // convert perlin-style-bias parameter to the value right for the approximation
+ return SubSIMD( ReciprocalSIMD( bias_parameter ), Four_Twos );
+}
+
+FORCEINLINE fltx4 BiasSIMD( const fltx4 &val, const fltx4 &precalc_param )
+{
+ // similar to bias function except pass precalced bias value from calling PreCalcBiasParameter.
+
+ //!!speed!! use reciprocal est?
+ //!!speed!! could save one op by precalcing _2_ values
+ return DivSIMD( val, AddSIMD( MulSIMD( precalc_param, SubSIMD( Four_Ones, val ) ), Four_Ones ) );
+}
+
+//-----------------------------------------------------------------------------
+// Box/plane test
+// NOTE: The w component of emins + emaxs must be 1 for this to work
+//-----------------------------------------------------------------------------
+FORCEINLINE int BoxOnPlaneSideSIMD( const fltx4& emins, const fltx4& emaxs, const cplane_t *p, float tolerance = 0.f )
+{
+ fltx4 corners[2];
+ fltx4 normal = LoadUnalignedSIMD( p->normal.Base() );
+ fltx4 dist = ReplicateX4( -p->dist );
+ normal = SetWSIMD( normal, dist );
+ fltx4 t4 = ReplicateX4( tolerance );
+ fltx4 negt4 = ReplicateX4( -tolerance );
+ fltx4 cmp = CmpGeSIMD( normal, Four_Zeros );
+ corners[0] = MaskedAssign( cmp, emaxs, emins );
+ corners[1] = MaskedAssign( cmp, emins, emaxs );
+ fltx4 dot1 = Dot4SIMD( normal, corners[0] );
+ fltx4 dot2 = Dot4SIMD( normal, corners[1] );
+ cmp = CmpGeSIMD( dot1, t4 );
+ fltx4 cmp2 = CmpGtSIMD( negt4, dot2 );
+ fltx4 result = MaskedAssign( cmp, Four_Ones, Four_Zeros );
+ fltx4 result2 = MaskedAssign( cmp2, Four_Twos, Four_Zeros );
+ result = AddSIMD( result, result2 );
+ intx4 sides;
+ ConvertStoreAsIntsSIMD( &sides, result );
+ return sides[0];
+}
+
+#endif // _ssemath_h
diff --git a/mp/src/public/mathlib/ssequaternion.h b/mp/src/public/mathlib/ssequaternion.h
index 5d60961e..825a9e45 100644
--- a/mp/src/public/mathlib/ssequaternion.h
+++ b/mp/src/public/mathlib/ssequaternion.h
@@ -1,367 +1,367 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose: - defines SIMD "structure of arrays" classes and functions.
-//
-//===========================================================================//
-#ifndef SSEQUATMATH_H
-#define SSEQUATMATH_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-
-#include "mathlib/ssemath.h"
-
-// Use this #define to allow SSE versions of Quaternion math
-// to exist on PC.
-// On PC, certain horizontal vector operations are not supported.
-// This causes the SSE implementation of quaternion math to mix the
-// vector and scalar floating point units, which is extremely
-// performance negative if you don't compile to native SSE2 (which
-// we don't as of Sept 1, 2007). So, it's best not to allow these
-// functions to exist at all. It's not good enough to simply replace
-// the contents of the functions with scalar math, because each call
-// to LoadAligned and StoreAligned will result in an unnecssary copy
-// of the quaternion, and several moves to and from the XMM registers.
-//
-// Basically, the problem you run into is that for efficient SIMD code,
-// you need to load the quaternions and vectors into SIMD registers and
-// keep them there as long as possible while doing only SIMD math,
-// whereas for efficient scalar code, each time you copy onto or ever
-// use a fltx4, it hoses your pipeline. So the difference has to be
-// in the management of temporary variables in the calling function,
-// not inside the math functions.
-//
-// If you compile assuming the presence of SSE2, the MSVC will abandon
-// the traditional x87 FPU operations altogether and make everything use
-// the SSE2 registers, which lessens this problem a little.
-
-// permitted only on 360, as we've done careful tuning on its Altivec math:
-#ifdef _X360
-#define ALLOW_SIMD_QUATERNION_MATH 1 // not on PC!
-#endif
-
-
-
-//---------------------------------------------------------------------
-// Load/store quaternions
-//---------------------------------------------------------------------
-#ifndef _X360
-#if ALLOW_SIMD_QUATERNION_MATH
-// Using STDC or SSE
-FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD )
-{
- fltx4 retval = LoadAlignedSIMD( pSIMD.Base() );
- return retval;
-}
-
-FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD )
-{
- fltx4 retval = LoadAlignedSIMD( pSIMD );
- return retval;
-}
-
-FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a )
-{
- StoreAlignedSIMD( pSIMD->Base(), a );
-}
-#endif
-#else
-
-// for the transitional class -- load a QuaternionAligned
-FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD )
-{
- fltx4 retval = XMLoadVector4A( pSIMD.Base() );
- return retval;
-}
-
-FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD )
-{
- fltx4 retval = XMLoadVector4A( pSIMD );
- return retval;
-}
-
-FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a )
-{
- XMStoreVector4A( pSIMD->Base(), a );
-}
-
-#endif
-
-
-#if ALLOW_SIMD_QUATERNION_MATH
-//---------------------------------------------------------------------
-// Make sure quaternions are within 180 degrees of one another, if not, reverse q
-//---------------------------------------------------------------------
-FORCEINLINE fltx4 QuaternionAlignSIMD( const fltx4 &p, const fltx4 &q )
-{
- // decide if one of the quaternions is backwards
- fltx4 a = SubSIMD( p, q );
- fltx4 b = AddSIMD( p, q );
- a = Dot4SIMD( a, a );
- b = Dot4SIMD( b, b );
- fltx4 cmp = CmpGtSIMD( a, b );
- fltx4 result = MaskedAssign( cmp, NegSIMD(q), q );
- return result;
-}
-
-//---------------------------------------------------------------------
-// Normalize Quaternion
-//---------------------------------------------------------------------
-#if USE_STDC_FOR_SIMD
-
-FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q )
-{
- fltx4 radius, result;
- radius = Dot4SIMD( q, q );
-
- if ( SubFloat( radius, 0 ) ) // > FLT_EPSILON && ((radius < 1.0f - 4*FLT_EPSILON) || (radius > 1.0f + 4*FLT_EPSILON))
- {
- float iradius = 1.0f / sqrt( SubFloat( radius, 0 ) );
- result = ReplicateX4( iradius );
- result = MulSIMD( result, q );
- return result;
- }
- return q;
-}
-
-#else
-
-// SSE + X360 implementation
-FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q )
-{
- fltx4 radius, result, mask;
- radius = Dot4SIMD( q, q );
- mask = CmpEqSIMD( radius, Four_Zeros ); // all ones iff radius = 0
- result = ReciprocalSqrtSIMD( radius );
- result = MulSIMD( result, q );
- return MaskedAssign( mask, q, result ); // if radius was 0, just return q
-}
-
-#endif
-
-
-//---------------------------------------------------------------------
-// 0.0 returns p, 1.0 return q.
-//---------------------------------------------------------------------
-FORCEINLINE fltx4 QuaternionBlendNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
- fltx4 sclp, sclq, result;
- sclq = ReplicateX4( t );
- sclp = SubSIMD( Four_Ones, sclq );
- result = MulSIMD( sclp, p );
- result = MaddSIMD( sclq, q, result );
- return QuaternionNormalizeSIMD( result );
-}
-
-
-//---------------------------------------------------------------------
-// Blend Quaternions
-//---------------------------------------------------------------------
-FORCEINLINE fltx4 QuaternionBlendSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
- // decide if one of the quaternions is backwards
- fltx4 q2, result;
- q2 = QuaternionAlignSIMD( p, q );
- result = QuaternionBlendNoAlignSIMD( p, q2, t );
- return result;
-}
-
-
-//---------------------------------------------------------------------
-// Multiply Quaternions
-//---------------------------------------------------------------------
-#ifndef _X360
-
-// SSE and STDC
-FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q )
-{
- // decide if one of the quaternions is backwards
- fltx4 q2, result;
- q2 = QuaternionAlignSIMD( p, q );
- SubFloat( result, 0 ) = SubFloat( p, 0 ) * SubFloat( q2, 3 ) + SubFloat( p, 1 ) * SubFloat( q2, 2 ) - SubFloat( p, 2 ) * SubFloat( q2, 1 ) + SubFloat( p, 3 ) * SubFloat( q2, 0 );
- SubFloat( result, 1 ) = -SubFloat( p, 0 ) * SubFloat( q2, 2 ) + SubFloat( p, 1 ) * SubFloat( q2, 3 ) + SubFloat( p, 2 ) * SubFloat( q2, 0 ) + SubFloat( p, 3 ) * SubFloat( q2, 1 );
- SubFloat( result, 2 ) = SubFloat( p, 0 ) * SubFloat( q2, 1 ) - SubFloat( p, 1 ) * SubFloat( q2, 0 ) + SubFloat( p, 2 ) * SubFloat( q2, 3 ) + SubFloat( p, 3 ) * SubFloat( q2, 2 );
- SubFloat( result, 3 ) = -SubFloat( p, 0 ) * SubFloat( q2, 0 ) - SubFloat( p, 1 ) * SubFloat( q2, 1 ) - SubFloat( p, 2 ) * SubFloat( q2, 2 ) + SubFloat( p, 3 ) * SubFloat( q2, 3 );
- return result;
-}
-
-#else
-
-// X360
-extern const fltx4 g_QuatMultRowSign[4];
-FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q )
-{
- fltx4 q2, row, result;
- q2 = QuaternionAlignSIMD( p, q );
-
- row = XMVectorSwizzle( q2, 3, 2, 1, 0 );
- row = MulSIMD( row, g_QuatMultRowSign[0] );
- result = Dot4SIMD( row, p );
-
- row = XMVectorSwizzle( q2, 2, 3, 0, 1 );
- row = MulSIMD( row, g_QuatMultRowSign[1] );
- row = Dot4SIMD( row, p );
- result = __vrlimi( result, row, 4, 0 );
-
- row = XMVectorSwizzle( q2, 1, 0, 3, 2 );
- row = MulSIMD( row, g_QuatMultRowSign[2] );
- row = Dot4SIMD( row, p );
- result = __vrlimi( result, row, 2, 0 );
-
- row = MulSIMD( q2, g_QuatMultRowSign[3] );
- row = Dot4SIMD( row, p );
- result = __vrlimi( result, row, 1, 0 );
- return result;
-}
-
-#endif
-
-
-//---------------------------------------------------------------------
-// Quaternion scale
-//---------------------------------------------------------------------
-#ifndef _X360
-
-// SSE and STDC
-FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t )
-{
- float r;
- fltx4 q;
-
- // FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to
- // use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale.
- float sinom = sqrt( SubFloat( p, 0 ) * SubFloat( p, 0 ) + SubFloat( p, 1 ) * SubFloat( p, 1 ) + SubFloat( p, 2 ) * SubFloat( p, 2 ) );
- sinom = min( sinom, 1.f );
-
- float sinsom = sin( asin( sinom ) * t );
-
- t = sinsom / (sinom + FLT_EPSILON);
- SubFloat( q, 0 ) = t * SubFloat( p, 0 );
- SubFloat( q, 1 ) = t * SubFloat( p, 1 );
- SubFloat( q, 2 ) = t * SubFloat( p, 2 );
-
- // rescale rotation
- r = 1.0f - sinsom * sinsom;
-
- // Assert( r >= 0 );
- if (r < 0.0f)
- r = 0.0f;
- r = sqrt( r );
-
- // keep sign of rotation
- SubFloat( q, 3 ) = fsel( SubFloat( p, 3 ), r, -r );
- return q;
-}
-
-#else
-
-// X360
-FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t )
-{
- fltx4 sinom = Dot3SIMD( p, p );
- sinom = SqrtSIMD( sinom );
- sinom = MinSIMD( sinom, Four_Ones );
- fltx4 sinsom = ArcSinSIMD( sinom );
- fltx4 t4 = ReplicateX4( t );
- sinsom = MulSIMD( sinsom, t4 );
- sinsom = SinSIMD( sinsom );
- sinom = AddSIMD( sinom, Four_Epsilons );
- sinom = ReciprocalSIMD( sinom );
- t4 = MulSIMD( sinsom, sinom );
- fltx4 result = MulSIMD( p, t4 );
-
- // rescale rotation
- sinsom = MulSIMD( sinsom, sinsom );
- fltx4 r = SubSIMD( Four_Ones, sinsom );
- r = MaxSIMD( r, Four_Zeros );
- r = SqrtSIMD( r );
-
- // keep sign of rotation
- fltx4 cmp = CmpGeSIMD( p, Four_Zeros );
- r = MaskedAssign( cmp, r, NegSIMD( r ) );
-
- result = __vrlimi(result, r, 1, 0);
- return result;
-}
-
-#endif
-
-
-//-----------------------------------------------------------------------------
-// Quaternion sphereical linear interpolation
-//-----------------------------------------------------------------------------
-#ifndef _X360
-
-// SSE and STDC
-FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
- float omega, cosom, sinom, sclp, sclq;
-
- fltx4 result;
-
- // 0.0 returns p, 1.0 return q.
- cosom = SubFloat( p, 0 ) * SubFloat( q, 0 ) + SubFloat( p, 1 ) * SubFloat( q, 1 ) +
- SubFloat( p, 2 ) * SubFloat( q, 2 ) + SubFloat( p, 3 ) * SubFloat( q, 3 );
-
- if ( (1.0f + cosom ) > 0.000001f )
- {
- if ( (1.0f - cosom ) > 0.000001f )
- {
- omega = acos( cosom );
- sinom = sin( omega );
- sclp = sin( (1.0f - t)*omega) / sinom;
- sclq = sin( t*omega ) / sinom;
- }
- else
- {
- // TODO: add short circuit for cosom == 1.0f?
- sclp = 1.0f - t;
- sclq = t;
- }
- SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( q, 0 );
- SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( q, 1 );
- SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( q, 2 );
- SubFloat( result, 3 ) = sclp * SubFloat( p, 3 ) + sclq * SubFloat( q, 3 );
- }
- else
- {
- SubFloat( result, 0 ) = -SubFloat( q, 1 );
- SubFloat( result, 1 ) = SubFloat( q, 0 );
- SubFloat( result, 2 ) = -SubFloat( q, 3 );
- SubFloat( result, 3 ) = SubFloat( q, 2 );
- sclp = sin( (1.0f - t) * (0.5f * M_PI));
- sclq = sin( t * (0.5f * M_PI));
- SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( result, 0 );
- SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( result, 1 );
- SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( result, 2 );
- }
-
- return result;
-}
-
-#else
-
-// X360
-FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
- return XMQuaternionSlerp( p, q, t );
-}
-
-#endif
-
-
-FORCEINLINE fltx4 QuaternionSlerpSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
- fltx4 q2, result;
- q2 = QuaternionAlignSIMD( p, q );
- result = QuaternionSlerpNoAlignSIMD( p, q2, t );
- return result;
-}
-
-
-#endif // ALLOW_SIMD_QUATERNION_MATH
-
-#endif // SSEQUATMATH_H
-
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose: - defines SIMD "structure of arrays" classes and functions.
+//
+//===========================================================================//
+#ifndef SSEQUATMATH_H
+#define SSEQUATMATH_H
+
+#ifdef _WIN32
+#pragma once
+#endif
+
+
+#include "mathlib/ssemath.h"
+
+// Use this #define to allow SSE versions of Quaternion math
+// to exist on PC.
+// On PC, certain horizontal vector operations are not supported.
+// This causes the SSE implementation of quaternion math to mix the
+// vector and scalar floating point units, which is extremely
+// performance negative if you don't compile to native SSE2 (which
+// we don't as of Sept 1, 2007). So, it's best not to allow these
+// functions to exist at all. It's not good enough to simply replace
+// the contents of the functions with scalar math, because each call
+// to LoadAligned and StoreAligned will result in an unnecssary copy
+// of the quaternion, and several moves to and from the XMM registers.
+//
+// Basically, the problem you run into is that for efficient SIMD code,
+// you need to load the quaternions and vectors into SIMD registers and
+// keep them there as long as possible while doing only SIMD math,
+// whereas for efficient scalar code, each time you copy onto or ever
+// use a fltx4, it hoses your pipeline. So the difference has to be
+// in the management of temporary variables in the calling function,
+// not inside the math functions.
+//
+// If you compile assuming the presence of SSE2, the MSVC will abandon
+// the traditional x87 FPU operations altogether and make everything use
+// the SSE2 registers, which lessens this problem a little.
+
+// permitted only on 360, as we've done careful tuning on its Altivec math:
+#ifdef _X360
+#define ALLOW_SIMD_QUATERNION_MATH 1 // not on PC!
+#endif
+
+
+
+//---------------------------------------------------------------------
+// Load/store quaternions
+//---------------------------------------------------------------------
+#ifndef _X360
+#if ALLOW_SIMD_QUATERNION_MATH
+// Using STDC or SSE
+FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD )
+{
+ fltx4 retval = LoadAlignedSIMD( pSIMD.Base() );
+ return retval;
+}
+
+FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD )
+{
+ fltx4 retval = LoadAlignedSIMD( pSIMD );
+ return retval;
+}
+
+FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a )
+{
+ StoreAlignedSIMD( pSIMD->Base(), a );
+}
+#endif
+#else
+
+// for the transitional class -- load a QuaternionAligned
+FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD )
+{
+ fltx4 retval = XMLoadVector4A( pSIMD.Base() );
+ return retval;
+}
+
+FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD )
+{
+ fltx4 retval = XMLoadVector4A( pSIMD );
+ return retval;
+}
+
+FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a )
+{
+ XMStoreVector4A( pSIMD->Base(), a );
+}
+
+#endif
+
+
+#if ALLOW_SIMD_QUATERNION_MATH
+//---------------------------------------------------------------------
+// Make sure quaternions are within 180 degrees of one another, if not, reverse q
+//---------------------------------------------------------------------
+FORCEINLINE fltx4 QuaternionAlignSIMD( const fltx4 &p, const fltx4 &q )
+{
+ // decide if one of the quaternions is backwards
+ fltx4 a = SubSIMD( p, q );
+ fltx4 b = AddSIMD( p, q );
+ a = Dot4SIMD( a, a );
+ b = Dot4SIMD( b, b );
+ fltx4 cmp = CmpGtSIMD( a, b );
+ fltx4 result = MaskedAssign( cmp, NegSIMD(q), q );
+ return result;
+}
+
+//---------------------------------------------------------------------
+// Normalize Quaternion
+//---------------------------------------------------------------------
+#if USE_STDC_FOR_SIMD
+
+FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q )
+{
+ fltx4 radius, result;
+ radius = Dot4SIMD( q, q );
+
+ if ( SubFloat( radius, 0 ) ) // > FLT_EPSILON && ((radius < 1.0f - 4*FLT_EPSILON) || (radius > 1.0f + 4*FLT_EPSILON))
+ {
+ float iradius = 1.0f / sqrt( SubFloat( radius, 0 ) );
+ result = ReplicateX4( iradius );
+ result = MulSIMD( result, q );
+ return result;
+ }
+ return q;
+}
+
+#else
+
+// SSE + X360 implementation
+FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q )
+{
+ fltx4 radius, result, mask;
+ radius = Dot4SIMD( q, q );
+ mask = CmpEqSIMD( radius, Four_Zeros ); // all ones iff radius = 0
+ result = ReciprocalSqrtSIMD( radius );
+ result = MulSIMD( result, q );
+ return MaskedAssign( mask, q, result ); // if radius was 0, just return q
+}
+
+#endif
+
+
+//---------------------------------------------------------------------
+// 0.0 returns p, 1.0 return q.
+//---------------------------------------------------------------------
+FORCEINLINE fltx4 QuaternionBlendNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
+{
+ fltx4 sclp, sclq, result;
+ sclq = ReplicateX4( t );
+ sclp = SubSIMD( Four_Ones, sclq );
+ result = MulSIMD( sclp, p );
+ result = MaddSIMD( sclq, q, result );
+ return QuaternionNormalizeSIMD( result );
+}
+
+
+//---------------------------------------------------------------------
+// Blend Quaternions
+//---------------------------------------------------------------------
+FORCEINLINE fltx4 QuaternionBlendSIMD( const fltx4 &p, const fltx4 &q, float t )
+{
+ // decide if one of the quaternions is backwards
+ fltx4 q2, result;
+ q2 = QuaternionAlignSIMD( p, q );
+ result = QuaternionBlendNoAlignSIMD( p, q2, t );
+ return result;
+}
+
+
+//---------------------------------------------------------------------
+// Multiply Quaternions
+//---------------------------------------------------------------------
+#ifndef _X360
+
+// SSE and STDC
+FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q )
+{
+ // decide if one of the quaternions is backwards
+ fltx4 q2, result;
+ q2 = QuaternionAlignSIMD( p, q );
+ SubFloat( result, 0 ) = SubFloat( p, 0 ) * SubFloat( q2, 3 ) + SubFloat( p, 1 ) * SubFloat( q2, 2 ) - SubFloat( p, 2 ) * SubFloat( q2, 1 ) + SubFloat( p, 3 ) * SubFloat( q2, 0 );
+ SubFloat( result, 1 ) = -SubFloat( p, 0 ) * SubFloat( q2, 2 ) + SubFloat( p, 1 ) * SubFloat( q2, 3 ) + SubFloat( p, 2 ) * SubFloat( q2, 0 ) + SubFloat( p, 3 ) * SubFloat( q2, 1 );
+ SubFloat( result, 2 ) = SubFloat( p, 0 ) * SubFloat( q2, 1 ) - SubFloat( p, 1 ) * SubFloat( q2, 0 ) + SubFloat( p, 2 ) * SubFloat( q2, 3 ) + SubFloat( p, 3 ) * SubFloat( q2, 2 );
+ SubFloat( result, 3 ) = -SubFloat( p, 0 ) * SubFloat( q2, 0 ) - SubFloat( p, 1 ) * SubFloat( q2, 1 ) - SubFloat( p, 2 ) * SubFloat( q2, 2 ) + SubFloat( p, 3 ) * SubFloat( q2, 3 );
+ return result;
+}
+
+#else
+
+// X360
+extern const fltx4 g_QuatMultRowSign[4];
+FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q )
+{
+ fltx4 q2, row, result;
+ q2 = QuaternionAlignSIMD( p, q );
+
+ row = XMVectorSwizzle( q2, 3, 2, 1, 0 );
+ row = MulSIMD( row, g_QuatMultRowSign[0] );
+ result = Dot4SIMD( row, p );
+
+ row = XMVectorSwizzle( q2, 2, 3, 0, 1 );
+ row = MulSIMD( row, g_QuatMultRowSign[1] );
+ row = Dot4SIMD( row, p );
+ result = __vrlimi( result, row, 4, 0 );
+
+ row = XMVectorSwizzle( q2, 1, 0, 3, 2 );
+ row = MulSIMD( row, g_QuatMultRowSign[2] );
+ row = Dot4SIMD( row, p );
+ result = __vrlimi( result, row, 2, 0 );
+
+ row = MulSIMD( q2, g_QuatMultRowSign[3] );
+ row = Dot4SIMD( row, p );
+ result = __vrlimi( result, row, 1, 0 );
+ return result;
+}
+
+#endif
+
+
+//---------------------------------------------------------------------
+// Quaternion scale
+//---------------------------------------------------------------------
+#ifndef _X360
+
+// SSE and STDC
+FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t )
+{
+ float r;
+ fltx4 q;
+
+ // FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to
+ // use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale.
+ float sinom = sqrt( SubFloat( p, 0 ) * SubFloat( p, 0 ) + SubFloat( p, 1 ) * SubFloat( p, 1 ) + SubFloat( p, 2 ) * SubFloat( p, 2 ) );
+ sinom = min( sinom, 1.f );
+
+ float sinsom = sin( asin( sinom ) * t );
+
+ t = sinsom / (sinom + FLT_EPSILON);
+ SubFloat( q, 0 ) = t * SubFloat( p, 0 );
+ SubFloat( q, 1 ) = t * SubFloat( p, 1 );
+ SubFloat( q, 2 ) = t * SubFloat( p, 2 );
+
+ // rescale rotation
+ r = 1.0f - sinsom * sinsom;
+
+ // Assert( r >= 0 );
+ if (r < 0.0f)
+ r = 0.0f;
+ r = sqrt( r );
+
+ // keep sign of rotation
+ SubFloat( q, 3 ) = fsel( SubFloat( p, 3 ), r, -r );
+ return q;
+}
+
+#else
+
+// X360
+FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t )
+{
+ fltx4 sinom = Dot3SIMD( p, p );
+ sinom = SqrtSIMD( sinom );
+ sinom = MinSIMD( sinom, Four_Ones );
+ fltx4 sinsom = ArcSinSIMD( sinom );
+ fltx4 t4 = ReplicateX4( t );
+ sinsom = MulSIMD( sinsom, t4 );
+ sinsom = SinSIMD( sinsom );
+ sinom = AddSIMD( sinom, Four_Epsilons );
+ sinom = ReciprocalSIMD( sinom );
+ t4 = MulSIMD( sinsom, sinom );
+ fltx4 result = MulSIMD( p, t4 );
+
+ // rescale rotation
+ sinsom = MulSIMD( sinsom, sinsom );
+ fltx4 r = SubSIMD( Four_Ones, sinsom );
+ r = MaxSIMD( r, Four_Zeros );
+ r = SqrtSIMD( r );
+
+ // keep sign of rotation
+ fltx4 cmp = CmpGeSIMD( p, Four_Zeros );
+ r = MaskedAssign( cmp, r, NegSIMD( r ) );
+
+ result = __vrlimi(result, r, 1, 0);
+ return result;
+}
+
+#endif
+
+
+//-----------------------------------------------------------------------------
+// Quaternion sphereical linear interpolation
+//-----------------------------------------------------------------------------
+#ifndef _X360
+
+// SSE and STDC
+FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
+{
+ float omega, cosom, sinom, sclp, sclq;
+
+ fltx4 result;
+
+ // 0.0 returns p, 1.0 return q.
+ cosom = SubFloat( p, 0 ) * SubFloat( q, 0 ) + SubFloat( p, 1 ) * SubFloat( q, 1 ) +
+ SubFloat( p, 2 ) * SubFloat( q, 2 ) + SubFloat( p, 3 ) * SubFloat( q, 3 );
+
+ if ( (1.0f + cosom ) > 0.000001f )
+ {
+ if ( (1.0f - cosom ) > 0.000001f )
+ {
+ omega = acos( cosom );
+ sinom = sin( omega );
+ sclp = sin( (1.0f - t)*omega) / sinom;
+ sclq = sin( t*omega ) / sinom;
+ }
+ else
+ {
+ // TODO: add short circuit for cosom == 1.0f?
+ sclp = 1.0f - t;
+ sclq = t;
+ }
+ SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( q, 0 );
+ SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( q, 1 );
+ SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( q, 2 );
+ SubFloat( result, 3 ) = sclp * SubFloat( p, 3 ) + sclq * SubFloat( q, 3 );
+ }
+ else
+ {
+ SubFloat( result, 0 ) = -SubFloat( q, 1 );
+ SubFloat( result, 1 ) = SubFloat( q, 0 );
+ SubFloat( result, 2 ) = -SubFloat( q, 3 );
+ SubFloat( result, 3 ) = SubFloat( q, 2 );
+ sclp = sin( (1.0f - t) * (0.5f * M_PI));
+ sclq = sin( t * (0.5f * M_PI));
+ SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( result, 0 );
+ SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( result, 1 );
+ SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( result, 2 );
+ }
+
+ return result;
+}
+
+#else
+
+// X360
+FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
+{
+ return XMQuaternionSlerp( p, q, t );
+}
+
+#endif
+
+
+FORCEINLINE fltx4 QuaternionSlerpSIMD( const fltx4 &p, const fltx4 &q, float t )
+{
+ fltx4 q2, result;
+ q2 = QuaternionAlignSIMD( p, q );
+ result = QuaternionSlerpNoAlignSIMD( p, q2, t );
+ return result;
+}
+
+
+#endif // ALLOW_SIMD_QUATERNION_MATH
+
+#endif // SSEQUATMATH_H
+
diff --git a/mp/src/public/mathlib/vector.h b/mp/src/public/mathlib/vector.h
index 4b361640..c19261d7 100644
--- a/mp/src/public/mathlib/vector.h
+++ b/mp/src/public/mathlib/vector.h
@@ -1,2312 +1,2312 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-
-#ifndef VECTOR_H
-#define VECTOR_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include <math.h>
-#include <float.h>
-
-// For vec_t, put this somewhere else?
-#include "tier0/basetypes.h"
-
-// For rand(). We really need a library!
-#include <stdlib.h>
-
-#ifndef _X360
-// For MMX intrinsics
-#include <xmmintrin.h>
-#endif
-
-#include "tier0/dbg.h"
-#include "tier0/threadtools.h"
-#include "mathlib/vector2d.h"
-#include "mathlib/math_pfns.h"
-#include "minmax.h"
-
-// Uncomment this to add extra Asserts to check for NANs, uninitialized vecs, etc.
-//#define VECTOR_PARANOIA 1
-
-// Uncomment this to make sure we don't do anything slow with our vectors
-//#define VECTOR_NO_SLOW_OPERATIONS 1
-
-
-// Used to make certain code easier to read.
-#define X_INDEX 0
-#define Y_INDEX 1
-#define Z_INDEX 2
-
-
-#ifdef VECTOR_PARANOIA
-#define CHECK_VALID( _v) Assert( (_v).IsValid() )
-#else
-#ifdef GNUC
-#define CHECK_VALID( _v)
-#else
-#define CHECK_VALID( _v) 0
-#endif
-#endif
-
-#define VecToString(v) (static_cast<const char *>(CFmtStr("(%f, %f, %f)", (v).x, (v).y, (v).z))) // ** Note: this generates a temporary, don't hold reference!
-
-class VectorByValue;
-
-//=========================================================
-// 3D Vector
-//=========================================================
-class Vector
-{
-public:
- // Members
- vec_t x, y, z;
-
- // Construction/destruction:
- Vector(void);
- Vector(vec_t X, vec_t Y, vec_t Z);
- explicit Vector(vec_t XYZ); ///< broadcast initialize
-
- // Initialization
- void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f);
- // TODO (Ilya): Should there be an init that takes a single float for consistency?
-
- // Got any nasty NAN's?
- bool IsValid() const;
- void Invalidate();
-
- // array access...
- vec_t operator[](int i) const;
- vec_t& operator[](int i);
-
- // Base address...
- vec_t* Base();
- vec_t const* Base() const;
-
- // Cast to Vector2D...
- Vector2D& AsVector2D();
- const Vector2D& AsVector2D() const;
-
- // Initialization methods
- void Random( vec_t minVal, vec_t maxVal );
- inline void Zero(); ///< zero out a vector
-
- // equality
- bool operator==(const Vector& v) const;
- bool operator!=(const Vector& v) const;
-
- // arithmetic operations
- FORCEINLINE Vector& operator+=(const Vector &v);
- FORCEINLINE Vector& operator-=(const Vector &v);
- FORCEINLINE Vector& operator*=(const Vector &v);
- FORCEINLINE Vector& operator*=(float s);
- FORCEINLINE Vector& operator/=(const Vector &v);
- FORCEINLINE Vector& operator/=(float s);
- FORCEINLINE Vector& operator+=(float fl) ; ///< broadcast add
- FORCEINLINE Vector& operator-=(float fl) ; ///< broadcast sub
-
-// negate the vector components
- void Negate();
-
- // Get the vector's magnitude.
- inline vec_t Length() const;
-
- // Get the vector's magnitude squared.
- FORCEINLINE vec_t LengthSqr(void) const
- {
- CHECK_VALID(*this);
- return (x*x + y*y + z*z);
- }
-
- // return true if this vector is (0,0,0) within tolerance
- bool IsZero( float tolerance = 0.01f ) const
- {
- return (x > -tolerance && x < tolerance &&
- y > -tolerance && y < tolerance &&
- z > -tolerance && z < tolerance);
- }
-
- vec_t NormalizeInPlace();
- Vector Normalized() const;
- bool IsLengthGreaterThan( float val ) const;
- bool IsLengthLessThan( float val ) const;
-
- // check if a vector is within the box defined by two other vectors
- FORCEINLINE bool WithinAABox( Vector const &boxmin, Vector const &boxmax);
-
- // Get the distance from this vector to the other one.
- vec_t DistTo(const Vector &vOther) const;
-
- // Get the distance from this vector to the other one squared.
- // NJS: note, VC wasn't inlining it correctly in several deeply nested inlines due to being an 'out of line' inline.
- // may be able to tidy this up after switching to VC7
- FORCEINLINE vec_t DistToSqr(const Vector &vOther) const
- {
- Vector delta;
-
- delta.x = x - vOther.x;
- delta.y = y - vOther.y;
- delta.z = z - vOther.z;
-
- return delta.LengthSqr();
- }
-
- // Copy
- void CopyToArray(float* rgfl) const;
-
- // Multiply, add, and assign to this (ie: *this = a + b * scalar). This
- // is about 12% faster than the actual vector equation (because it's done per-component
- // rather than per-vector).
- void MulAdd(const Vector& a, const Vector& b, float scalar);
-
- // Dot product.
- vec_t Dot(const Vector& vOther) const;
-
- // assignment
- Vector& operator=(const Vector &vOther);
-
- // 2d
- vec_t Length2D(void) const;
- vec_t Length2DSqr(void) const;
-
- operator VectorByValue &() { return *((VectorByValue *)(this)); }
- operator const VectorByValue &() const { return *((const VectorByValue *)(this)); }
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // copy constructors
-// Vector(const Vector &vOther);
-
- // arithmetic operations
- Vector operator-(void) const;
-
- Vector operator+(const Vector& v) const;
- Vector operator-(const Vector& v) const;
- Vector operator*(const Vector& v) const;
- Vector operator/(const Vector& v) const;
- Vector operator*(float fl) const;
- Vector operator/(float fl) const;
-
- // Cross product between two vectors.
- Vector Cross(const Vector &vOther) const;
-
- // Returns a vector with the min or max in X, Y, and Z.
- Vector Min(const Vector &vOther) const;
- Vector Max(const Vector &vOther) const;
-
-#else
-
-private:
- // No copy constructors allowed if we're in optimal mode
- Vector(const Vector& vOther);
-#endif
-};
-
-FORCEINLINE void NetworkVarConstruct( Vector &v ) { v.Zero(); }
-
-
-#define USE_M64S ( ( !defined( _X360 ) ) )
-
-
-
-//=========================================================
-// 4D Short Vector (aligned on 8-byte boundary)
-//=========================================================
-class ALIGN8 ShortVector
-{
-public:
-
- short x, y, z, w;
-
- // Initialization
- void Init(short ix = 0, short iy = 0, short iz = 0, short iw = 0 );
-
-
-#if USE_M64S
- __m64 &AsM64() { return *(__m64*)&x; }
- const __m64 &AsM64() const { return *(const __m64*)&x; }
-#endif
-
- // Setter
- void Set( const ShortVector& vOther );
- void Set( const short ix, const short iy, const short iz, const short iw );
-
- // array access...
- short operator[](int i) const;
- short& operator[](int i);
-
- // Base address...
- short* Base();
- short const* Base() const;
-
- // equality
- bool operator==(const ShortVector& v) const;
- bool operator!=(const ShortVector& v) const;
-
- // Arithmetic operations
- FORCEINLINE ShortVector& operator+=(const ShortVector &v);
- FORCEINLINE ShortVector& operator-=(const ShortVector &v);
- FORCEINLINE ShortVector& operator*=(const ShortVector &v);
- FORCEINLINE ShortVector& operator*=(float s);
- FORCEINLINE ShortVector& operator/=(const ShortVector &v);
- FORCEINLINE ShortVector& operator/=(float s);
- FORCEINLINE ShortVector operator*(float fl) const;
-
-private:
-
- // No copy constructors allowed if we're in optimal mode
-// ShortVector(ShortVector const& vOther);
-
- // No assignment operators either...
-// ShortVector& operator=( ShortVector const& src );
-
-} ALIGN8_POST;
-
-
-
-
-
-
-//=========================================================
-// 4D Integer Vector
-//=========================================================
-class IntVector4D
-{
-public:
-
- int x, y, z, w;
-
- // Initialization
- void Init(int ix = 0, int iy = 0, int iz = 0, int iw = 0 );
-
-#if USE_M64S
- __m64 &AsM64() { return *(__m64*)&x; }
- const __m64 &AsM64() const { return *(const __m64*)&x; }
-#endif
-
- // Setter
- void Set( const IntVector4D& vOther );
- void Set( const int ix, const int iy, const int iz, const int iw );
-
- // array access...
- int operator[](int i) const;
- int& operator[](int i);
-
- // Base address...
- int* Base();
- int const* Base() const;
-
- // equality
- bool operator==(const IntVector4D& v) const;
- bool operator!=(const IntVector4D& v) const;
-
- // Arithmetic operations
- FORCEINLINE IntVector4D& operator+=(const IntVector4D &v);
- FORCEINLINE IntVector4D& operator-=(const IntVector4D &v);
- FORCEINLINE IntVector4D& operator*=(const IntVector4D &v);
- FORCEINLINE IntVector4D& operator*=(float s);
- FORCEINLINE IntVector4D& operator/=(const IntVector4D &v);
- FORCEINLINE IntVector4D& operator/=(float s);
- FORCEINLINE IntVector4D operator*(float fl) const;
-
-private:
-
- // No copy constructors allowed if we're in optimal mode
- // IntVector4D(IntVector4D const& vOther);
-
- // No assignment operators either...
- // IntVector4D& operator=( IntVector4D const& src );
-
-};
-
-
-
-//-----------------------------------------------------------------------------
-// Allows us to specifically pass the vector by value when we need to
-//-----------------------------------------------------------------------------
-class VectorByValue : public Vector
-{
-public:
- // Construction/destruction:
- VectorByValue(void) : Vector() {}
- VectorByValue(vec_t X, vec_t Y, vec_t Z) : Vector( X, Y, Z ) {}
- VectorByValue(const VectorByValue& vOther) { *this = vOther; }
-};
-
-
-//-----------------------------------------------------------------------------
-// Utility to simplify table construction. No constructor means can use
-// traditional C-style initialization
-//-----------------------------------------------------------------------------
-class TableVector
-{
-public:
- vec_t x, y, z;
-
- operator Vector &() { return *((Vector *)(this)); }
- operator const Vector &() const { return *((const Vector *)(this)); }
-
- // array access...
- inline vec_t& operator[](int i)
- {
- Assert( (i >= 0) && (i < 3) );
- return ((vec_t*)this)[i];
- }
-
- inline vec_t operator[](int i) const
- {
- Assert( (i >= 0) && (i < 3) );
- return ((vec_t*)this)[i];
- }
-};
-
-
-//-----------------------------------------------------------------------------
-// Here's where we add all those lovely SSE optimized routines
-//-----------------------------------------------------------------------------
-
-class ALIGN16 VectorAligned : public Vector
-{
-public:
- inline VectorAligned(void) {};
- inline VectorAligned(vec_t X, vec_t Y, vec_t Z)
- {
- Init(X,Y,Z);
- }
-
-#ifdef VECTOR_NO_SLOW_OPERATIONS
-
-private:
- // No copy constructors allowed if we're in optimal mode
- VectorAligned(const VectorAligned& vOther);
- VectorAligned(const Vector &vOther);
-
-#else
-public:
- explicit VectorAligned(const Vector &vOther)
- {
- Init(vOther.x, vOther.y, vOther.z);
- }
-
- VectorAligned& operator=(const Vector &vOther)
- {
- Init(vOther.x, vOther.y, vOther.z);
- return *this;
- }
-
-#endif
- float w; // this space is used anyway
-} ALIGN16_POST;
-
-//-----------------------------------------------------------------------------
-// Vector related operations
-//-----------------------------------------------------------------------------
-
-// Vector clear
-FORCEINLINE void VectorClear( Vector& a );
-
-// Copy
-FORCEINLINE void VectorCopy( const Vector& src, Vector& dst );
-
-// Vector arithmetic
-FORCEINLINE void VectorAdd( const Vector& a, const Vector& b, Vector& result );
-FORCEINLINE void VectorSubtract( const Vector& a, const Vector& b, Vector& result );
-FORCEINLINE void VectorMultiply( const Vector& a, vec_t b, Vector& result );
-FORCEINLINE void VectorMultiply( const Vector& a, const Vector& b, Vector& result );
-FORCEINLINE void VectorDivide( const Vector& a, vec_t b, Vector& result );
-FORCEINLINE void VectorDivide( const Vector& a, const Vector& b, Vector& result );
-inline void VectorScale ( const Vector& in, vec_t scale, Vector& result );
-// Don't mark this as inline in its function declaration. That's only necessary on its
-// definition, and 'inline' here leads to gcc warnings.
-void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest );
-
-// Vector equality with tolerance
-bool VectorsAreEqual( const Vector& src1, const Vector& src2, float tolerance = 0.0f );
-
-#define VectorExpand(v) (v).x, (v).y, (v).z
-
-
-// Normalization
-// FIXME: Can't use quite yet
-//vec_t VectorNormalize( Vector& v );
-
-// Length
-inline vec_t VectorLength( const Vector& v );
-
-// Dot Product
-FORCEINLINE vec_t DotProduct(const Vector& a, const Vector& b);
-
-// Cross product
-void CrossProduct(const Vector& a, const Vector& b, Vector& result );
-
-// Store the min or max of each of x, y, and z into the result.
-void VectorMin( const Vector &a, const Vector &b, Vector &result );
-void VectorMax( const Vector &a, const Vector &b, Vector &result );
-
-// Linearly interpolate between two vectors
-void VectorLerp(const Vector& src1, const Vector& src2, vec_t t, Vector& dest );
-Vector VectorLerp(const Vector& src1, const Vector& src2, vec_t t );
-
-FORCEINLINE Vector ReplicateToVector( float x )
-{
- return Vector( x, x, x );
-}
-
-// check if a point is in the field of a view of an object. supports up to 180 degree fov.
-FORCEINLINE bool PointWithinViewAngle( Vector const &vecSrcPosition,
- Vector const &vecTargetPosition,
- Vector const &vecLookDirection, float flCosHalfFOV )
-{
- Vector vecDelta = vecTargetPosition - vecSrcPosition;
- float cosDiff = DotProduct( vecLookDirection, vecDelta );
-
- if ( cosDiff < 0 )
- return false;
-
- float flLen2 = vecDelta.LengthSqr();
-
- // a/sqrt(b) > c == a^2 > b * c ^2
- return ( cosDiff * cosDiff > flLen2 * flCosHalfFOV * flCosHalfFOV );
-
-}
-
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-// Cross product
-Vector CrossProduct( const Vector& a, const Vector& b );
-
-// Random vector creation
-Vector RandomVector( vec_t minVal, vec_t maxVal );
-
-#endif
-
-float RandomVectorInUnitSphere( Vector *pVector );
-float RandomVectorInUnitCircle( Vector2D *pVector );
-
-
-//-----------------------------------------------------------------------------
-//
-// Inlined Vector methods
-//
-//-----------------------------------------------------------------------------
-
-
-//-----------------------------------------------------------------------------
-// constructors
-//-----------------------------------------------------------------------------
-inline Vector::Vector(void)
-{
-#ifdef _DEBUG
-#ifdef VECTOR_PARANOIA
- // Initialize to NAN to catch errors
- x = y = z = VEC_T_NAN;
-#endif
-#endif
-}
-
-inline Vector::Vector(vec_t X, vec_t Y, vec_t Z)
-{
- x = X; y = Y; z = Z;
- CHECK_VALID(*this);
-}
-
-inline Vector::Vector(vec_t XYZ)
-{
- x = y = z = XYZ;
- CHECK_VALID(*this);
-}
-
-//inline Vector::Vector(const float *pFloat)
-//{
-// Assert( pFloat );
-// x = pFloat[0]; y = pFloat[1]; z = pFloat[2];
-// CHECK_VALID(*this);
-//}
-
-#if 0
-//-----------------------------------------------------------------------------
-// copy constructor
-//-----------------------------------------------------------------------------
-
-inline Vector::Vector(const Vector &vOther)
-{
- CHECK_VALID(vOther);
- x = vOther.x; y = vOther.y; z = vOther.z;
-}
-#endif
-
-//-----------------------------------------------------------------------------
-// initialization
-//-----------------------------------------------------------------------------
-
-inline void Vector::Init( vec_t ix, vec_t iy, vec_t iz )
-{
- x = ix; y = iy; z = iz;
- CHECK_VALID(*this);
-}
-
-inline void Vector::Random( vec_t minVal, vec_t maxVal )
-{
- x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- z = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- CHECK_VALID(*this);
-}
-
-// This should really be a single opcode on the PowerPC (move r0 onto the vec reg)
-inline void Vector::Zero()
-{
- x = y = z = 0.0f;
-}
-
-inline void VectorClear( Vector& a )
-{
- a.x = a.y = a.z = 0.0f;
-}
-
-//-----------------------------------------------------------------------------
-// assignment
-//-----------------------------------------------------------------------------
-
-inline Vector& Vector::operator=(const Vector &vOther)
-{
- CHECK_VALID(vOther);
- x=vOther.x; y=vOther.y; z=vOther.z;
- return *this;
-}
-
-
-//-----------------------------------------------------------------------------
-// Array access
-//-----------------------------------------------------------------------------
-inline vec_t& Vector::operator[](int i)
-{
- Assert( (i >= 0) && (i < 3) );
- return ((vec_t*)this)[i];
-}
-
-inline vec_t Vector::operator[](int i) const
-{
- Assert( (i >= 0) && (i < 3) );
- return ((vec_t*)this)[i];
-}
-
-
-//-----------------------------------------------------------------------------
-// Base address...
-//-----------------------------------------------------------------------------
-inline vec_t* Vector::Base()
-{
- return (vec_t*)this;
-}
-
-inline vec_t const* Vector::Base() const
-{
- return (vec_t const*)this;
-}
-
-//-----------------------------------------------------------------------------
-// Cast to Vector2D...
-//-----------------------------------------------------------------------------
-
-inline Vector2D& Vector::AsVector2D()
-{
- return *(Vector2D*)this;
-}
-
-inline const Vector2D& Vector::AsVector2D() const
-{
- return *(const Vector2D*)this;
-}
-
-//-----------------------------------------------------------------------------
-// IsValid?
-//-----------------------------------------------------------------------------
-
-inline bool Vector::IsValid() const
-{
- return IsFinite(x) && IsFinite(y) && IsFinite(z);
-}
-
-//-----------------------------------------------------------------------------
-// Invalidate
-//-----------------------------------------------------------------------------
-
-inline void Vector::Invalidate()
-{
-//#ifdef _DEBUG
-//#ifdef VECTOR_PARANOIA
- x = y = z = VEC_T_NAN;
-//#endif
-//#endif
-}
-
-//-----------------------------------------------------------------------------
-// comparison
-//-----------------------------------------------------------------------------
-
-inline bool Vector::operator==( const Vector& src ) const
-{
- CHECK_VALID(src);
- CHECK_VALID(*this);
- return (src.x == x) && (src.y == y) && (src.z == z);
-}
-
-inline bool Vector::operator!=( const Vector& src ) const
-{
- CHECK_VALID(src);
- CHECK_VALID(*this);
- return (src.x != x) || (src.y != y) || (src.z != z);
-}
-
-
-//-----------------------------------------------------------------------------
-// Copy
-//-----------------------------------------------------------------------------
-
-FORCEINLINE void VectorCopy( const Vector& src, Vector& dst )
-{
- CHECK_VALID(src);
- dst.x = src.x;
- dst.y = src.y;
- dst.z = src.z;
-}
-
-inline void Vector::CopyToArray(float* rgfl) const
-{
- Assert( rgfl );
- CHECK_VALID(*this);
- rgfl[0] = x, rgfl[1] = y, rgfl[2] = z;
-}
-
-//-----------------------------------------------------------------------------
-// standard math operations
-//-----------------------------------------------------------------------------
-// #pragma message("TODO: these should be SSE")
-
-inline void Vector::Negate()
-{
- CHECK_VALID(*this);
- x = -x; y = -y; z = -z;
-}
-
-FORCEINLINE Vector& Vector::operator+=(const Vector& v)
-{
- CHECK_VALID(*this);
- CHECK_VALID(v);
- x+=v.x; y+=v.y; z += v.z;
- return *this;
-}
-
-FORCEINLINE Vector& Vector::operator-=(const Vector& v)
-{
- CHECK_VALID(*this);
- CHECK_VALID(v);
- x-=v.x; y-=v.y; z -= v.z;
- return *this;
-}
-
-FORCEINLINE Vector& Vector::operator*=(float fl)
-{
- x *= fl;
- y *= fl;
- z *= fl;
- CHECK_VALID(*this);
- return *this;
-}
-
-FORCEINLINE Vector& Vector::operator*=(const Vector& v)
-{
- CHECK_VALID(v);
- x *= v.x;
- y *= v.y;
- z *= v.z;
- CHECK_VALID(*this);
- return *this;
-}
-
-// this ought to be an opcode.
-FORCEINLINE Vector& Vector::operator+=(float fl)
-{
- x += fl;
- y += fl;
- z += fl;
- CHECK_VALID(*this);
- return *this;
-}
-
-FORCEINLINE Vector& Vector::operator-=(float fl)
-{
- x -= fl;
- y -= fl;
- z -= fl;
- CHECK_VALID(*this);
- return *this;
-}
-
-
-
-FORCEINLINE Vector& Vector::operator/=(float fl)
-{
- Assert( fl != 0.0f );
- float oofl = 1.0f / fl;
- x *= oofl;
- y *= oofl;
- z *= oofl;
- CHECK_VALID(*this);
- return *this;
-}
-
-FORCEINLINE Vector& Vector::operator/=(const Vector& v)
-{
- CHECK_VALID(v);
- Assert( v.x != 0.0f && v.y != 0.0f && v.z != 0.0f );
- x /= v.x;
- y /= v.y;
- z /= v.z;
- CHECK_VALID(*this);
- return *this;
-}
-
-
-
-//-----------------------------------------------------------------------------
-//
-// Inlined Short Vector methods
-//
-//-----------------------------------------------------------------------------
-
-
-inline void ShortVector::Init( short ix, short iy, short iz, short iw )
-{
- x = ix; y = iy; z = iz; w = iw;
-}
-
-FORCEINLINE void ShortVector::Set( const ShortVector& vOther )
-{
- x = vOther.x;
- y = vOther.y;
- z = vOther.z;
- w = vOther.w;
-}
-
-FORCEINLINE void ShortVector::Set( const short ix, const short iy, const short iz, const short iw )
-{
- x = ix;
- y = iy;
- z = iz;
- w = iw;
-}
-
-
-//-----------------------------------------------------------------------------
-// Array access
-//-----------------------------------------------------------------------------
-inline short ShortVector::operator[](int i) const
-{
- Assert( (i >= 0) && (i < 4) );
- return ((short*)this)[i];
-}
-
-inline short& ShortVector::operator[](int i)
-{
- Assert( (i >= 0) && (i < 4) );
- return ((short*)this)[i];
-}
-
-//-----------------------------------------------------------------------------
-// Base address...
-//-----------------------------------------------------------------------------
-inline short* ShortVector::Base()
-{
- return (short*)this;
-}
-
-inline short const* ShortVector::Base() const
-{
- return (short const*)this;
-}
-
-
-//-----------------------------------------------------------------------------
-// comparison
-//-----------------------------------------------------------------------------
-
-inline bool ShortVector::operator==( const ShortVector& src ) const
-{
- return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w);
-}
-
-inline bool ShortVector::operator!=( const ShortVector& src ) const
-{
- return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w);
-}
-
-
-
-//-----------------------------------------------------------------------------
-// standard math operations
-//-----------------------------------------------------------------------------
-
-FORCEINLINE ShortVector& ShortVector::operator+=(const ShortVector& v)
-{
- x+=v.x; y+=v.y; z += v.z; w += v.w;
- return *this;
-}
-
-FORCEINLINE ShortVector& ShortVector::operator-=(const ShortVector& v)
-{
- x-=v.x; y-=v.y; z -= v.z; w -= v.w;
- return *this;
-}
-
-FORCEINLINE ShortVector& ShortVector::operator*=(float fl)
-{
- x *= fl;
- y *= fl;
- z *= fl;
- w *= fl;
- return *this;
-}
-
-FORCEINLINE ShortVector& ShortVector::operator*=(const ShortVector& v)
-{
- x *= v.x;
- y *= v.y;
- z *= v.z;
- w *= v.w;
- return *this;
-}
-
-FORCEINLINE ShortVector& ShortVector::operator/=(float fl)
-{
- Assert( fl != 0.0f );
- float oofl = 1.0f / fl;
- x *= oofl;
- y *= oofl;
- z *= oofl;
- w *= oofl;
- return *this;
-}
-
-FORCEINLINE ShortVector& ShortVector::operator/=(const ShortVector& v)
-{
- Assert( v.x != 0 && v.y != 0 && v.z != 0 && v.w != 0 );
- x /= v.x;
- y /= v.y;
- z /= v.z;
- w /= v.w;
- return *this;
-}
-
-FORCEINLINE void ShortVectorMultiply( const ShortVector& src, float fl, ShortVector& res )
-{
- Assert( IsFinite(fl) );
- res.x = src.x * fl;
- res.y = src.y * fl;
- res.z = src.z * fl;
- res.w = src.w * fl;
-}
-
-FORCEINLINE ShortVector ShortVector::operator*(float fl) const
-{
- ShortVector res;
- ShortVectorMultiply( *this, fl, res );
- return res;
-}
-
-
-
-
-
-
-//-----------------------------------------------------------------------------
-//
-// Inlined Integer Vector methods
-//
-//-----------------------------------------------------------------------------
-
-
-inline void IntVector4D::Init( int ix, int iy, int iz, int iw )
-{
- x = ix; y = iy; z = iz; w = iw;
-}
-
-FORCEINLINE void IntVector4D::Set( const IntVector4D& vOther )
-{
- x = vOther.x;
- y = vOther.y;
- z = vOther.z;
- w = vOther.w;
-}
-
-FORCEINLINE void IntVector4D::Set( const int ix, const int iy, const int iz, const int iw )
-{
- x = ix;
- y = iy;
- z = iz;
- w = iw;
-}
-
-
-//-----------------------------------------------------------------------------
-// Array access
-//-----------------------------------------------------------------------------
-inline int IntVector4D::operator[](int i) const
-{
- Assert( (i >= 0) && (i < 4) );
- return ((int*)this)[i];
-}
-
-inline int& IntVector4D::operator[](int i)
-{
- Assert( (i >= 0) && (i < 4) );
- return ((int*)this)[i];
-}
-
-//-----------------------------------------------------------------------------
-// Base address...
-//-----------------------------------------------------------------------------
-inline int* IntVector4D::Base()
-{
- return (int*)this;
-}
-
-inline int const* IntVector4D::Base() const
-{
- return (int const*)this;
-}
-
-
-//-----------------------------------------------------------------------------
-// comparison
-//-----------------------------------------------------------------------------
-
-inline bool IntVector4D::operator==( const IntVector4D& src ) const
-{
- return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w);
-}
-
-inline bool IntVector4D::operator!=( const IntVector4D& src ) const
-{
- return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w);
-}
-
-
-
-//-----------------------------------------------------------------------------
-// standard math operations
-//-----------------------------------------------------------------------------
-
-FORCEINLINE IntVector4D& IntVector4D::operator+=(const IntVector4D& v)
-{
- x+=v.x; y+=v.y; z += v.z; w += v.w;
- return *this;
-}
-
-FORCEINLINE IntVector4D& IntVector4D::operator-=(const IntVector4D& v)
-{
- x-=v.x; y-=v.y; z -= v.z; w -= v.w;
- return *this;
-}
-
-FORCEINLINE IntVector4D& IntVector4D::operator*=(float fl)
-{
- x *= fl;
- y *= fl;
- z *= fl;
- w *= fl;
- return *this;
-}
-
-FORCEINLINE IntVector4D& IntVector4D::operator*=(const IntVector4D& v)
-{
- x *= v.x;
- y *= v.y;
- z *= v.z;
- w *= v.w;
- return *this;
-}
-
-FORCEINLINE IntVector4D& IntVector4D::operator/=(float fl)
-{
- Assert( fl != 0.0f );
- float oofl = 1.0f / fl;
- x *= oofl;
- y *= oofl;
- z *= oofl;
- w *= oofl;
- return *this;
-}
-
-FORCEINLINE IntVector4D& IntVector4D::operator/=(const IntVector4D& v)
-{
- Assert( v.x != 0 && v.y != 0 && v.z != 0 && v.w != 0 );
- x /= v.x;
- y /= v.y;
- z /= v.z;
- w /= v.w;
- return *this;
-}
-
-FORCEINLINE void IntVector4DMultiply( const IntVector4D& src, float fl, IntVector4D& res )
-{
- Assert( IsFinite(fl) );
- res.x = src.x * fl;
- res.y = src.y * fl;
- res.z = src.z * fl;
- res.w = src.w * fl;
-}
-
-FORCEINLINE IntVector4D IntVector4D::operator*(float fl) const
-{
- IntVector4D res;
- IntVector4DMultiply( *this, fl, res );
- return res;
-}
-
-
-
-// =======================
-
-
-FORCEINLINE void VectorAdd( const Vector& a, const Vector& b, Vector& c )
-{
- CHECK_VALID(a);
- CHECK_VALID(b);
- c.x = a.x + b.x;
- c.y = a.y + b.y;
- c.z = a.z + b.z;
-}
-
-FORCEINLINE void VectorSubtract( const Vector& a, const Vector& b, Vector& c )
-{
- CHECK_VALID(a);
- CHECK_VALID(b);
- c.x = a.x - b.x;
- c.y = a.y - b.y;
- c.z = a.z - b.z;
-}
-
-FORCEINLINE void VectorMultiply( const Vector& a, vec_t b, Vector& c )
-{
- CHECK_VALID(a);
- Assert( IsFinite(b) );
- c.x = a.x * b;
- c.y = a.y * b;
- c.z = a.z * b;
-}
-
-FORCEINLINE void VectorMultiply( const Vector& a, const Vector& b, Vector& c )
-{
- CHECK_VALID(a);
- CHECK_VALID(b);
- c.x = a.x * b.x;
- c.y = a.y * b.y;
- c.z = a.z * b.z;
-}
-
-// for backwards compatability
-inline void VectorScale ( const Vector& in, vec_t scale, Vector& result )
-{
- VectorMultiply( in, scale, result );
-}
-
-
-FORCEINLINE void VectorDivide( const Vector& a, vec_t b, Vector& c )
-{
- CHECK_VALID(a);
- Assert( b != 0.0f );
- vec_t oob = 1.0f / b;
- c.x = a.x * oob;
- c.y = a.y * oob;
- c.z = a.z * oob;
-}
-
-FORCEINLINE void VectorDivide( const Vector& a, const Vector& b, Vector& c )
-{
- CHECK_VALID(a);
- CHECK_VALID(b);
- Assert( (b.x != 0.0f) && (b.y != 0.0f) && (b.z != 0.0f) );
- c.x = a.x / b.x;
- c.y = a.y / b.y;
- c.z = a.z / b.z;
-}
-
-// FIXME: Remove
-// For backwards compatability
-inline void Vector::MulAdd(const Vector& a, const Vector& b, float scalar)
-{
- CHECK_VALID(a);
- CHECK_VALID(b);
- x = a.x + b.x * scalar;
- y = a.y + b.y * scalar;
- z = a.z + b.z * scalar;
-}
-
-inline void VectorLerp(const Vector& src1, const Vector& src2, vec_t t, Vector& dest )
-{
- CHECK_VALID(src1);
- CHECK_VALID(src2);
- dest.x = src1.x + (src2.x - src1.x) * t;
- dest.y = src1.y + (src2.y - src1.y) * t;
- dest.z = src1.z + (src2.z - src1.z) * t;
-}
-
-inline Vector VectorLerp(const Vector& src1, const Vector& src2, vec_t t )
-{
- Vector result;
- VectorLerp( src1, src2, t, result );
- return result;
-}
-
-//-----------------------------------------------------------------------------
-// Temporary storage for vector results so const Vector& results can be returned
-//-----------------------------------------------------------------------------
-inline Vector &AllocTempVector()
-{
- static Vector s_vecTemp[128];
- static CInterlockedInt s_nIndex;
-
- int nIndex;
- for (;;)
- {
- int nOldIndex = s_nIndex;
- nIndex = ( (nOldIndex + 0x10001) & 0x7F );
-
- if ( s_nIndex.AssignIf( nOldIndex, nIndex ) )
- {
- break;
- }
- ThreadPause();
- }
- return s_vecTemp[nIndex & 0xffff];
-}
-
-
-
-//-----------------------------------------------------------------------------
-// dot, cross
-//-----------------------------------------------------------------------------
-FORCEINLINE vec_t DotProduct(const Vector& a, const Vector& b)
-{
- CHECK_VALID(a);
- CHECK_VALID(b);
- return( a.x*b.x + a.y*b.y + a.z*b.z );
-}
-
-// for backwards compatability
-inline vec_t Vector::Dot( const Vector& vOther ) const
-{
- CHECK_VALID(vOther);
- return DotProduct( *this, vOther );
-}
-
-inline void CrossProduct(const Vector& a, const Vector& b, Vector& result )
-{
- CHECK_VALID(a);
- CHECK_VALID(b);
- Assert( &a != &result );
- Assert( &b != &result );
- result.x = a.y*b.z - a.z*b.y;
- result.y = a.z*b.x - a.x*b.z;
- result.z = a.x*b.y - a.y*b.x;
-}
-
-inline vec_t DotProductAbs( const Vector &v0, const Vector &v1 )
-{
- CHECK_VALID(v0);
- CHECK_VALID(v1);
- return FloatMakePositive(v0.x*v1.x) + FloatMakePositive(v0.y*v1.y) + FloatMakePositive(v0.z*v1.z);
-}
-
-inline vec_t DotProductAbs( const Vector &v0, const float *v1 )
-{
- return FloatMakePositive(v0.x * v1[0]) + FloatMakePositive(v0.y * v1[1]) + FloatMakePositive(v0.z * v1[2]);
-}
-
-//-----------------------------------------------------------------------------
-// length
-//-----------------------------------------------------------------------------
-
-inline vec_t VectorLength( const Vector& v )
-{
- CHECK_VALID(v);
- return (vec_t)FastSqrt(v.x*v.x + v.y*v.y + v.z*v.z);
-}
-
-
-inline vec_t Vector::Length(void) const
-{
- CHECK_VALID(*this);
- return VectorLength( *this );
-}
-
-
-//-----------------------------------------------------------------------------
-// Normalization
-//-----------------------------------------------------------------------------
-
-/*
-// FIXME: Can't use until we're un-macroed in mathlib.h
-inline vec_t VectorNormalize( Vector& v )
-{
- Assert( v.IsValid() );
- vec_t l = v.Length();
- if (l != 0.0f)
- {
- v /= l;
- }
- else
- {
- // FIXME:
- // Just copying the existing implemenation; shouldn't res.z == 0?
- v.x = v.y = 0.0f; v.z = 1.0f;
- }
- return l;
-}
-*/
-
-
-// check a point against a box
-bool Vector::WithinAABox( Vector const &boxmin, Vector const &boxmax)
-{
- return (
- ( x >= boxmin.x ) && ( x <= boxmax.x) &&
- ( y >= boxmin.y ) && ( y <= boxmax.y) &&
- ( z >= boxmin.z ) && ( z <= boxmax.z)
- );
-}
-
-//-----------------------------------------------------------------------------
-// Get the distance from this vector to the other one
-//-----------------------------------------------------------------------------
-inline vec_t Vector::DistTo(const Vector &vOther) const
-{
- Vector delta;
- VectorSubtract( *this, vOther, delta );
- return delta.Length();
-}
-
-
-//-----------------------------------------------------------------------------
-// Vector equality with tolerance
-//-----------------------------------------------------------------------------
-inline bool VectorsAreEqual( const Vector& src1, const Vector& src2, float tolerance )
-{
- if (FloatMakePositive(src1.x - src2.x) > tolerance)
- return false;
- if (FloatMakePositive(src1.y - src2.y) > tolerance)
- return false;
- return (FloatMakePositive(src1.z - src2.z) <= tolerance);
-}
-
-
-//-----------------------------------------------------------------------------
-// Computes the closest point to vecTarget no farther than flMaxDist from vecStart
-//-----------------------------------------------------------------------------
-inline void ComputeClosestPoint( const Vector& vecStart, float flMaxDist, const Vector& vecTarget, Vector *pResult )
-{
- Vector vecDelta;
- VectorSubtract( vecTarget, vecStart, vecDelta );
- float flDistSqr = vecDelta.LengthSqr();
- if ( flDistSqr <= flMaxDist * flMaxDist )
- {
- *pResult = vecTarget;
- }
- else
- {
- vecDelta /= FastSqrt( flDistSqr );
- VectorMA( vecStart, flMaxDist, vecDelta, *pResult );
- }
-}
-
-
-//-----------------------------------------------------------------------------
-// Takes the absolute value of a vector
-//-----------------------------------------------------------------------------
-inline void VectorAbs( const Vector& src, Vector& dst )
-{
- dst.x = FloatMakePositive(src.x);
- dst.y = FloatMakePositive(src.y);
- dst.z = FloatMakePositive(src.z);
-}
-
-
-//-----------------------------------------------------------------------------
-//
-// Slow methods
-//
-//-----------------------------------------------------------------------------
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-//-----------------------------------------------------------------------------
-// Returns a vector with the min or max in X, Y, and Z.
-//-----------------------------------------------------------------------------
-inline Vector Vector::Min(const Vector &vOther) const
-{
- return Vector(x < vOther.x ? x : vOther.x,
- y < vOther.y ? y : vOther.y,
- z < vOther.z ? z : vOther.z);
-}
-
-inline Vector Vector::Max(const Vector &vOther) const
-{
- return Vector(x > vOther.x ? x : vOther.x,
- y > vOther.y ? y : vOther.y,
- z > vOther.z ? z : vOther.z);
-}
-
-
-//-----------------------------------------------------------------------------
-// arithmetic operations
-//-----------------------------------------------------------------------------
-
-inline Vector Vector::operator-(void) const
-{
- return Vector(-x,-y,-z);
-}
-
-inline Vector Vector::operator+(const Vector& v) const
-{
- Vector res;
- VectorAdd( *this, v, res );
- return res;
-}
-
-inline Vector Vector::operator-(const Vector& v) const
-{
- Vector res;
- VectorSubtract( *this, v, res );
- return res;
-}
-
-inline Vector Vector::operator*(float fl) const
-{
- Vector res;
- VectorMultiply( *this, fl, res );
- return res;
-}
-
-inline Vector Vector::operator*(const Vector& v) const
-{
- Vector res;
- VectorMultiply( *this, v, res );
- return res;
-}
-
-inline Vector Vector::operator/(float fl) const
-{
- Vector res;
- VectorDivide( *this, fl, res );
- return res;
-}
-
-inline Vector Vector::operator/(const Vector& v) const
-{
- Vector res;
- VectorDivide( *this, v, res );
- return res;
-}
-
-inline Vector operator*(float fl, const Vector& v)
-{
- return v * fl;
-}
-
-//-----------------------------------------------------------------------------
-// cross product
-//-----------------------------------------------------------------------------
-
-inline Vector Vector::Cross(const Vector& vOther) const
-{
- Vector res;
- CrossProduct( *this, vOther, res );
- return res;
-}
-
-//-----------------------------------------------------------------------------
-// 2D
-//-----------------------------------------------------------------------------
-
-inline vec_t Vector::Length2D(void) const
-{
- return (vec_t)FastSqrt(x*x + y*y);
-}
-
-inline vec_t Vector::Length2DSqr(void) const
-{
- return (x*x + y*y);
-}
-
-inline Vector CrossProduct(const Vector& a, const Vector& b)
-{
- return Vector( a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x );
-}
-
-inline void VectorMin( const Vector &a, const Vector &b, Vector &result )
-{
- result.x = fpmin(a.x, b.x);
- result.y = fpmin(a.y, b.y);
- result.z = fpmin(a.z, b.z);
-}
-
-inline void VectorMax( const Vector &a, const Vector &b, Vector &result )
-{
- result.x = fpmax(a.x, b.x);
- result.y = fpmax(a.y, b.y);
- result.z = fpmax(a.z, b.z);
-}
-
-inline float ComputeVolume( const Vector &vecMins, const Vector &vecMaxs )
-{
- Vector vecDelta;
- VectorSubtract( vecMaxs, vecMins, vecDelta );
- return DotProduct( vecDelta, vecDelta );
-}
-
-// Get a random vector.
-inline Vector RandomVector( float minVal, float maxVal )
-{
- Vector random;
- random.Random( minVal, maxVal );
- return random;
-}
-
-#endif //slow
-
-//-----------------------------------------------------------------------------
-// Helper debugging stuff....
-//-----------------------------------------------------------------------------
-
-inline bool operator==( float const* f, const Vector& v )
-{
- // AIIIEEEE!!!!
- Assert(0);
- return false;
-}
-
-inline bool operator==( const Vector& v, float const* f )
-{
- // AIIIEEEE!!!!
- Assert(0);
- return false;
-}
-
-inline bool operator!=( float const* f, const Vector& v )
-{
- // AIIIEEEE!!!!
- Assert(0);
- return false;
-}
-
-inline bool operator!=( const Vector& v, float const* f )
-{
- // AIIIEEEE!!!!
- Assert(0);
- return false;
-}
-
-
-//-----------------------------------------------------------------------------
-// AngularImpulse
-//-----------------------------------------------------------------------------
-// AngularImpulse are exponetial maps (an axis scaled by a "twist" angle in degrees)
-typedef Vector AngularImpulse;
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline AngularImpulse RandomAngularImpulse( float minVal, float maxVal )
-{
- AngularImpulse angImp;
- angImp.Random( minVal, maxVal );
- return angImp;
-}
-
-#endif
-
-
-//-----------------------------------------------------------------------------
-// Quaternion
-//-----------------------------------------------------------------------------
-
-class RadianEuler;
-
-class Quaternion // same data-layout as engine's vec4_t,
-{ // which is a vec_t[4]
-public:
- inline Quaternion(void) {
-
- // Initialize to NAN to catch errors
-#ifdef _DEBUG
-#ifdef VECTOR_PARANOIA
- x = y = z = w = VEC_T_NAN;
-#endif
-#endif
- }
- inline Quaternion(vec_t ix, vec_t iy, vec_t iz, vec_t iw) : x(ix), y(iy), z(iz), w(iw) { }
- inline Quaternion(RadianEuler const &angle); // evil auto type promotion!!!
-
- inline void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f, vec_t iw=0.0f) { x = ix; y = iy; z = iz; w = iw; }
-
- bool IsValid() const;
- void Invalidate();
-
- bool operator==( const Quaternion &src ) const;
- bool operator!=( const Quaternion &src ) const;
-
- vec_t* Base() { return (vec_t*)this; }
- const vec_t* Base() const { return (vec_t*)this; }
-
- // array access...
- vec_t operator[](int i) const;
- vec_t& operator[](int i);
-
- vec_t x, y, z, w;
-};
-
-
-//-----------------------------------------------------------------------------
-// Array access
-//-----------------------------------------------------------------------------
-inline vec_t& Quaternion::operator[](int i)
-{
- Assert( (i >= 0) && (i < 4) );
- return ((vec_t*)this)[i];
-}
-
-inline vec_t Quaternion::operator[](int i) const
-{
- Assert( (i >= 0) && (i < 4) );
- return ((vec_t*)this)[i];
-}
-
-
-//-----------------------------------------------------------------------------
-// Equality test
-//-----------------------------------------------------------------------------
-inline bool Quaternion::operator==( const Quaternion &src ) const
-{
- return ( x == src.x ) && ( y == src.y ) && ( z == src.z ) && ( w == src.w );
-}
-
-inline bool Quaternion::operator!=( const Quaternion &src ) const
-{
- return !operator==( src );
-}
-
-
-//-----------------------------------------------------------------------------
-// Quaternion equality with tolerance
-//-----------------------------------------------------------------------------
-inline bool QuaternionsAreEqual( const Quaternion& src1, const Quaternion& src2, float tolerance )
-{
- if (FloatMakePositive(src1.x - src2.x) > tolerance)
- return false;
- if (FloatMakePositive(src1.y - src2.y) > tolerance)
- return false;
- if (FloatMakePositive(src1.z - src2.z) > tolerance)
- return false;
- return (FloatMakePositive(src1.w - src2.w) <= tolerance);
-}
-
-
-//-----------------------------------------------------------------------------
-// Here's where we add all those lovely SSE optimized routines
-//-----------------------------------------------------------------------------
-class ALIGN16 QuaternionAligned : public Quaternion
-{
-public:
- inline QuaternionAligned(void) {};
- inline QuaternionAligned(vec_t X, vec_t Y, vec_t Z, vec_t W)
- {
- Init(X,Y,Z,W);
- }
-
-#ifdef VECTOR_NO_SLOW_OPERATIONS
-
-private:
- // No copy constructors allowed if we're in optimal mode
- QuaternionAligned(const QuaternionAligned& vOther);
- QuaternionAligned(const Quaternion &vOther);
-
-#else
-public:
- explicit QuaternionAligned(const Quaternion &vOther)
- {
- Init(vOther.x, vOther.y, vOther.z, vOther.w);
- }
-
- QuaternionAligned& operator=(const Quaternion &vOther)
- {
- Init(vOther.x, vOther.y, vOther.z, vOther.w);
- return *this;
- }
-
-#endif
-} ALIGN16_POST;
-
-
-//-----------------------------------------------------------------------------
-// Radian Euler angle aligned to axis (NOT ROLL/PITCH/YAW)
-//-----------------------------------------------------------------------------
-class QAngle;
-class RadianEuler
-{
-public:
- inline RadianEuler(void) { }
- inline RadianEuler(vec_t X, vec_t Y, vec_t Z) { x = X; y = Y; z = Z; }
- inline RadianEuler(Quaternion const &q); // evil auto type promotion!!!
- inline RadianEuler(QAngle const &angles); // evil auto type promotion!!!
-
- // Initialization
- inline void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f) { x = ix; y = iy; z = iz; }
-
- // conversion to qangle
- QAngle ToQAngle( void ) const;
- bool IsValid() const;
- void Invalidate();
-
- // array access...
- vec_t operator[](int i) const;
- vec_t& operator[](int i);
-
- vec_t x, y, z;
-};
-
-
-extern void AngleQuaternion( RadianEuler const &angles, Quaternion &qt );
-extern void QuaternionAngles( Quaternion const &q, RadianEuler &angles );
-
-FORCEINLINE void NetworkVarConstruct( Quaternion &q ) { q.x = q.y = q.z = q.w = 0.0f; }
-
-inline Quaternion::Quaternion(RadianEuler const &angle)
-{
- AngleQuaternion( angle, *this );
-}
-
-inline bool Quaternion::IsValid() const
-{
- return IsFinite(x) && IsFinite(y) && IsFinite(z) && IsFinite(w);
-}
-
-inline void Quaternion::Invalidate()
-{
-//#ifdef _DEBUG
-//#ifdef VECTOR_PARANOIA
- x = y = z = w = VEC_T_NAN;
-//#endif
-//#endif
-}
-
-inline RadianEuler::RadianEuler(Quaternion const &q)
-{
- QuaternionAngles( q, *this );
-}
-
-inline void VectorCopy( RadianEuler const& src, RadianEuler &dst )
-{
- CHECK_VALID(src);
- dst.x = src.x;
- dst.y = src.y;
- dst.z = src.z;
-}
-
-inline void VectorScale( RadianEuler const& src, float b, RadianEuler &dst )
-{
- CHECK_VALID(src);
- Assert( IsFinite(b) );
- dst.x = src.x * b;
- dst.y = src.y * b;
- dst.z = src.z * b;
-}
-
-inline bool RadianEuler::IsValid() const
-{
- return IsFinite(x) && IsFinite(y) && IsFinite(z);
-}
-
-inline void RadianEuler::Invalidate()
-{
-//#ifdef _DEBUG
-//#ifdef VECTOR_PARANOIA
- x = y = z = VEC_T_NAN;
-//#endif
-//#endif
-}
-
-
-//-----------------------------------------------------------------------------
-// Array access
-//-----------------------------------------------------------------------------
-inline vec_t& RadianEuler::operator[](int i)
-{
- Assert( (i >= 0) && (i < 3) );
- return ((vec_t*)this)[i];
-}
-
-inline vec_t RadianEuler::operator[](int i) const
-{
- Assert( (i >= 0) && (i < 3) );
- return ((vec_t*)this)[i];
-}
-
-
-//-----------------------------------------------------------------------------
-// Degree Euler QAngle pitch, yaw, roll
-//-----------------------------------------------------------------------------
-class QAngleByValue;
-
-class QAngle
-{
-public:
- // Members
- vec_t x, y, z;
-
- // Construction/destruction
- QAngle(void);
- QAngle(vec_t X, vec_t Y, vec_t Z);
-// QAngle(RadianEuler const &angles); // evil auto type promotion!!!
-
- // Allow pass-by-value
- operator QAngleByValue &() { return *((QAngleByValue *)(this)); }
- operator const QAngleByValue &() const { return *((const QAngleByValue *)(this)); }
-
- // Initialization
- void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f);
- void Random( vec_t minVal, vec_t maxVal );
-
- // Got any nasty NAN's?
- bool IsValid() const;
- void Invalidate();
-
- // array access...
- vec_t operator[](int i) const;
- vec_t& operator[](int i);
-
- // Base address...
- vec_t* Base();
- vec_t const* Base() const;
-
- // equality
- bool operator==(const QAngle& v) const;
- bool operator!=(const QAngle& v) const;
-
- // arithmetic operations
- QAngle& operator+=(const QAngle &v);
- QAngle& operator-=(const QAngle &v);
- QAngle& operator*=(float s);
- QAngle& operator/=(float s);
-
- // Get the vector's magnitude.
- vec_t Length() const;
- vec_t LengthSqr() const;
-
- // negate the QAngle components
- //void Negate();
-
- // No assignment operators either...
- QAngle& operator=( const QAngle& src );
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // copy constructors
-
- // arithmetic operations
- QAngle operator-(void) const;
-
- QAngle operator+(const QAngle& v) const;
- QAngle operator-(const QAngle& v) const;
- QAngle operator*(float fl) const;
- QAngle operator/(float fl) const;
-#else
-
-private:
- // No copy constructors allowed if we're in optimal mode
- QAngle(const QAngle& vOther);
-
-#endif
-};
-
-FORCEINLINE void NetworkVarConstruct( QAngle &q ) { q.x = q.y = q.z = 0.0f; }
-
-//-----------------------------------------------------------------------------
-// Allows us to specifically pass the vector by value when we need to
-//-----------------------------------------------------------------------------
-class QAngleByValue : public QAngle
-{
-public:
- // Construction/destruction:
- QAngleByValue(void) : QAngle() {}
- QAngleByValue(vec_t X, vec_t Y, vec_t Z) : QAngle( X, Y, Z ) {}
- QAngleByValue(const QAngleByValue& vOther) { *this = vOther; }
-};
-
-
-inline void VectorAdd( const QAngle& a, const QAngle& b, QAngle& result )
-{
- CHECK_VALID(a);
- CHECK_VALID(b);
- result.x = a.x + b.x;
- result.y = a.y + b.y;
- result.z = a.z + b.z;
-}
-
-inline void VectorMA( const QAngle &start, float scale, const QAngle &direction, QAngle &dest )
-{
- CHECK_VALID(start);
- CHECK_VALID(direction);
- dest.x = start.x + scale * direction.x;
- dest.y = start.y + scale * direction.y;
- dest.z = start.z + scale * direction.z;
-}
-
-
-//-----------------------------------------------------------------------------
-// constructors
-//-----------------------------------------------------------------------------
-inline QAngle::QAngle(void)
-{
-#ifdef _DEBUG
-#ifdef VECTOR_PARANOIA
- // Initialize to NAN to catch errors
- x = y = z = VEC_T_NAN;
-#endif
-#endif
-}
-
-inline QAngle::QAngle(vec_t X, vec_t Y, vec_t Z)
-{
- x = X; y = Y; z = Z;
- CHECK_VALID(*this);
-}
-
-
-//-----------------------------------------------------------------------------
-// initialization
-//-----------------------------------------------------------------------------
-inline void QAngle::Init( vec_t ix, vec_t iy, vec_t iz )
-{
- x = ix; y = iy; z = iz;
- CHECK_VALID(*this);
-}
-
-inline void QAngle::Random( vec_t minVal, vec_t maxVal )
-{
- x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- z = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- CHECK_VALID(*this);
-}
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline QAngle RandomAngle( float minVal, float maxVal )
-{
- Vector random;
- random.Random( minVal, maxVal );
- QAngle ret( random.x, random.y, random.z );
- return ret;
-}
-
-#endif
-
-
-inline RadianEuler::RadianEuler(QAngle const &angles)
-{
- Init(
- angles.z * 3.14159265358979323846f / 180.f,
- angles.x * 3.14159265358979323846f / 180.f,
- angles.y * 3.14159265358979323846f / 180.f );
-}
-
-
-
-
-inline QAngle RadianEuler::ToQAngle( void) const
-{
- return QAngle(
- y * 180.f / 3.14159265358979323846f,
- z * 180.f / 3.14159265358979323846f,
- x * 180.f / 3.14159265358979323846f );
-}
-
-
-//-----------------------------------------------------------------------------
-// assignment
-//-----------------------------------------------------------------------------
-inline QAngle& QAngle::operator=(const QAngle &vOther)
-{
- CHECK_VALID(vOther);
- x=vOther.x; y=vOther.y; z=vOther.z;
- return *this;
-}
-
-
-//-----------------------------------------------------------------------------
-// Array access
-//-----------------------------------------------------------------------------
-inline vec_t& QAngle::operator[](int i)
-{
- Assert( (i >= 0) && (i < 3) );
- return ((vec_t*)this)[i];
-}
-
-inline vec_t QAngle::operator[](int i) const
-{
- Assert( (i >= 0) && (i < 3) );
- return ((vec_t*)this)[i];
-}
-
-
-//-----------------------------------------------------------------------------
-// Base address...
-//-----------------------------------------------------------------------------
-inline vec_t* QAngle::Base()
-{
- return (vec_t*)this;
-}
-
-inline vec_t const* QAngle::Base() const
-{
- return (vec_t const*)this;
-}
-
-
-//-----------------------------------------------------------------------------
-// IsValid?
-//-----------------------------------------------------------------------------
-inline bool QAngle::IsValid() const
-{
- return IsFinite(x) && IsFinite(y) && IsFinite(z);
-}
-
-//-----------------------------------------------------------------------------
-// Invalidate
-//-----------------------------------------------------------------------------
-
-inline void QAngle::Invalidate()
-{
-//#ifdef _DEBUG
-//#ifdef VECTOR_PARANOIA
- x = y = z = VEC_T_NAN;
-//#endif
-//#endif
-}
-
-//-----------------------------------------------------------------------------
-// comparison
-//-----------------------------------------------------------------------------
-inline bool QAngle::operator==( const QAngle& src ) const
-{
- CHECK_VALID(src);
- CHECK_VALID(*this);
- return (src.x == x) && (src.y == y) && (src.z == z);
-}
-
-inline bool QAngle::operator!=( const QAngle& src ) const
-{
- CHECK_VALID(src);
- CHECK_VALID(*this);
- return (src.x != x) || (src.y != y) || (src.z != z);
-}
-
-
-//-----------------------------------------------------------------------------
-// Copy
-//-----------------------------------------------------------------------------
-inline void VectorCopy( const QAngle& src, QAngle& dst )
-{
- CHECK_VALID(src);
- dst.x = src.x;
- dst.y = src.y;
- dst.z = src.z;
-}
-
-
-//-----------------------------------------------------------------------------
-// standard math operations
-//-----------------------------------------------------------------------------
-inline QAngle& QAngle::operator+=(const QAngle& v)
-{
- CHECK_VALID(*this);
- CHECK_VALID(v);
- x+=v.x; y+=v.y; z += v.z;
- return *this;
-}
-
-inline QAngle& QAngle::operator-=(const QAngle& v)
-{
- CHECK_VALID(*this);
- CHECK_VALID(v);
- x-=v.x; y-=v.y; z -= v.z;
- return *this;
-}
-
-inline QAngle& QAngle::operator*=(float fl)
-{
- x *= fl;
- y *= fl;
- z *= fl;
- CHECK_VALID(*this);
- return *this;
-}
-
-inline QAngle& QAngle::operator/=(float fl)
-{
- Assert( fl != 0.0f );
- float oofl = 1.0f / fl;
- x *= oofl;
- y *= oofl;
- z *= oofl;
- CHECK_VALID(*this);
- return *this;
-}
-
-
-//-----------------------------------------------------------------------------
-// length
-//-----------------------------------------------------------------------------
-inline vec_t QAngle::Length( ) const
-{
- CHECK_VALID(*this);
- return (vec_t)FastSqrt( LengthSqr( ) );
-}
-
-
-inline vec_t QAngle::LengthSqr( ) const
-{
- CHECK_VALID(*this);
- return x * x + y * y + z * z;
-}
-
-
-//-----------------------------------------------------------------------------
-// Vector equality with tolerance
-//-----------------------------------------------------------------------------
-inline bool QAnglesAreEqual( const QAngle& src1, const QAngle& src2, float tolerance = 0.0f )
-{
- if (FloatMakePositive(src1.x - src2.x) > tolerance)
- return false;
- if (FloatMakePositive(src1.y - src2.y) > tolerance)
- return false;
- return (FloatMakePositive(src1.z - src2.z) <= tolerance);
-}
-
-
-//-----------------------------------------------------------------------------
-// arithmetic operations (SLOW!!)
-//-----------------------------------------------------------------------------
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline QAngle QAngle::operator-(void) const
-{
- QAngle ret(-x,-y,-z);
- return ret;
-}
-
-inline QAngle QAngle::operator+(const QAngle& v) const
-{
- QAngle res;
- res.x = x + v.x;
- res.y = y + v.y;
- res.z = z + v.z;
- return res;
-}
-
-inline QAngle QAngle::operator-(const QAngle& v) const
-{
- QAngle res;
- res.x = x - v.x;
- res.y = y - v.y;
- res.z = z - v.z;
- return res;
-}
-
-inline QAngle QAngle::operator*(float fl) const
-{
- QAngle res;
- res.x = x * fl;
- res.y = y * fl;
- res.z = z * fl;
- return res;
-}
-
-inline QAngle QAngle::operator/(float fl) const
-{
- QAngle res;
- res.x = x / fl;
- res.y = y / fl;
- res.z = z / fl;
- return res;
-}
-
-inline QAngle operator*(float fl, const QAngle& v)
-{
- QAngle ret( v * fl );
- return ret;
-}
-
-#endif // VECTOR_NO_SLOW_OPERATIONS
-
-
-//-----------------------------------------------------------------------------
-// NOTE: These are not completely correct. The representations are not equivalent
-// unless the QAngle represents a rotational impulse along a coordinate axis (x,y,z)
-inline void QAngleToAngularImpulse( const QAngle &angles, AngularImpulse &impulse )
-{
- impulse.x = angles.z;
- impulse.y = angles.x;
- impulse.z = angles.y;
-}
-
-inline void AngularImpulseToQAngle( const AngularImpulse &impulse, QAngle &angles )
-{
- angles.x = impulse.y;
- angles.y = impulse.z;
- angles.z = impulse.x;
-}
-
-#if !defined( _X360 )
-
-FORCEINLINE vec_t InvRSquared( float const *v )
-{
-#if defined(__i386__) || defined(_M_IX86)
- float sqrlen = v[0]*v[0]+v[1]*v[1]+v[2]*v[2] + 1.0e-10f, result;
- _mm_store_ss(&result, _mm_rcp_ss( _mm_max_ss( _mm_set_ss(1.0f), _mm_load_ss(&sqrlen) ) ));
- return result;
-#else
- return 1.f/fpmax(1.f, v[0]*v[0]+v[1]*v[1]+v[2]*v[2]);
-#endif
-}
-
-FORCEINLINE vec_t InvRSquared( const Vector &v )
-{
- return InvRSquared(&v.x);
-}
-
-#if defined(__i386__) || defined(_M_IX86)
-inline void _SSE_RSqrtInline( float a, float* out )
-{
- __m128 xx = _mm_load_ss( &a );
- __m128 xr = _mm_rsqrt_ss( xx );
- __m128 xt;
- xt = _mm_mul_ss( xr, xr );
- xt = _mm_mul_ss( xt, xx );
- xt = _mm_sub_ss( _mm_set_ss(3.f), xt );
- xt = _mm_mul_ss( xt, _mm_set_ss(0.5f) );
- xr = _mm_mul_ss( xr, xt );
- _mm_store_ss( out, xr );
-}
-#endif
-
-// FIXME: Change this back to a #define once we get rid of the vec_t version
-FORCEINLINE float VectorNormalize( Vector& vec )
-{
-#ifndef DEBUG // stop crashing my edit-and-continue!
- #if defined(__i386__) || defined(_M_IX86)
- #define DO_SSE_OPTIMIZATION
- #endif
-#endif
-
-#if defined( DO_SSE_OPTIMIZATION )
- float sqrlen = vec.LengthSqr() + 1.0e-10f, invlen;
- _SSE_RSqrtInline(sqrlen, &invlen);
- vec.x *= invlen;
- vec.y *= invlen;
- vec.z *= invlen;
- return sqrlen * invlen;
-#else
- extern float (FASTCALL *pfVectorNormalize)(Vector& v);
- return (*pfVectorNormalize)(vec);
-#endif
-}
-
-// FIXME: Obsolete version of VectorNormalize, once we remove all the friggin float*s
-FORCEINLINE float VectorNormalize( float * v )
-{
- return VectorNormalize(*(reinterpret_cast<Vector *>(v)));
-}
-
-FORCEINLINE void VectorNormalizeFast( Vector &vec )
-{
- VectorNormalize(vec);
-}
-
-#else
-
-FORCEINLINE float _VMX_InvRSquared( const Vector &v )
-{
- XMVECTOR xmV = XMVector3ReciprocalLength( XMLoadVector3( v.Base() ) );
- xmV = XMVector3Dot( xmV, xmV );
- return xmV.x;
-}
-
-// call directly
-FORCEINLINE float _VMX_VectorNormalize( Vector &vec )
-{
- float mag = XMVector3Length( XMLoadVector3( vec.Base() ) ).x;
- float den = 1.f / (mag + FLT_EPSILON );
- vec.x *= den;
- vec.y *= den;
- vec.z *= den;
- return mag;
-}
-
-#define InvRSquared(x) _VMX_InvRSquared(x)
-
-// FIXME: Change this back to a #define once we get rid of the vec_t version
-FORCEINLINE float VectorNormalize( Vector& v )
-{
- return _VMX_VectorNormalize( v );
-}
-// FIXME: Obsolete version of VectorNormalize, once we remove all the friggin float*s
-FORCEINLINE float VectorNormalize( float *pV )
-{
- return _VMX_VectorNormalize(*(reinterpret_cast<Vector*>(pV)));
-}
-
-// call directly
-FORCEINLINE void VectorNormalizeFast( Vector &vec )
-{
- XMVECTOR xmV = XMVector3LengthEst( XMLoadVector3( vec.Base() ) );
- float den = 1.f / (xmV.x + FLT_EPSILON);
- vec.x *= den;
- vec.y *= den;
- vec.z *= den;
-}
-
-#endif // _X360
-
-
-inline vec_t Vector::NormalizeInPlace()
-{
- return VectorNormalize( *this );
-}
-
-inline Vector Vector::Normalized() const
-{
- Vector norm = *this;
- VectorNormalize( norm );
- return norm;
-}
-
-inline bool Vector::IsLengthGreaterThan( float val ) const
-{
- return LengthSqr() > val*val;
-}
-
-inline bool Vector::IsLengthLessThan( float val ) const
-{
- return LengthSqr() < val*val;
-}
-
-#endif
-
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+// $NoKeywords: $
+//
+//=============================================================================//
+
+#ifndef VECTOR_H
+#define VECTOR_H
+
+#ifdef _WIN32
+#pragma once
+#endif
+
+#include <math.h>
+#include <float.h>
+
+// For vec_t, put this somewhere else?
+#include "tier0/basetypes.h"
+
+// For rand(). We really need a library!
+#include <stdlib.h>
+
+#ifndef _X360
+// For MMX intrinsics
+#include <xmmintrin.h>
+#endif
+
+#include "tier0/dbg.h"
+#include "tier0/threadtools.h"
+#include "mathlib/vector2d.h"
+#include "mathlib/math_pfns.h"
+#include "minmax.h"
+
+// Uncomment this to add extra Asserts to check for NANs, uninitialized vecs, etc.
+//#define VECTOR_PARANOIA 1
+
+// Uncomment this to make sure we don't do anything slow with our vectors
+//#define VECTOR_NO_SLOW_OPERATIONS 1
+
+
+// Used to make certain code easier to read.
+#define X_INDEX 0
+#define Y_INDEX 1
+#define Z_INDEX 2
+
+
+#ifdef VECTOR_PARANOIA
+#define CHECK_VALID( _v) Assert( (_v).IsValid() )
+#else
+#ifdef GNUC
+#define CHECK_VALID( _v)
+#else
+#define CHECK_VALID( _v) 0
+#endif
+#endif
+
+#define VecToString(v) (static_cast<const char *>(CFmtStr("(%f, %f, %f)", (v).x, (v).y, (v).z))) // ** Note: this generates a temporary, don't hold reference!
+
+class VectorByValue;
+
+//=========================================================
+// 3D Vector
+//=========================================================
+class Vector
+{
+public:
+ // Members
+ vec_t x, y, z;
+
+ // Construction/destruction:
+ Vector(void);
+ Vector(vec_t X, vec_t Y, vec_t Z);
+ explicit Vector(vec_t XYZ); ///< broadcast initialize
+
+ // Initialization
+ void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f);
+ // TODO (Ilya): Should there be an init that takes a single float for consistency?
+
+ // Got any nasty NAN's?
+ bool IsValid() const;
+ void Invalidate();
+
+ // array access...
+ vec_t operator[](int i) const;
+ vec_t& operator[](int i);
+
+ // Base address...
+ vec_t* Base();
+ vec_t const* Base() const;
+
+ // Cast to Vector2D...
+ Vector2D& AsVector2D();
+ const Vector2D& AsVector2D() const;
+
+ // Initialization methods
+ void Random( vec_t minVal, vec_t maxVal );
+ inline void Zero(); ///< zero out a vector
+
+ // equality
+ bool operator==(const Vector& v) const;
+ bool operator!=(const Vector& v) const;
+
+ // arithmetic operations
+ FORCEINLINE Vector& operator+=(const Vector &v);
+ FORCEINLINE Vector& operator-=(const Vector &v);
+ FORCEINLINE Vector& operator*=(const Vector &v);
+ FORCEINLINE Vector& operator*=(float s);
+ FORCEINLINE Vector& operator/=(const Vector &v);
+ FORCEINLINE Vector& operator/=(float s);
+ FORCEINLINE Vector& operator+=(float fl) ; ///< broadcast add
+ FORCEINLINE Vector& operator-=(float fl) ; ///< broadcast sub
+
+// negate the vector components
+ void Negate();
+
+ // Get the vector's magnitude.
+ inline vec_t Length() const;
+
+ // Get the vector's magnitude squared.
+ FORCEINLINE vec_t LengthSqr(void) const
+ {
+ CHECK_VALID(*this);
+ return (x*x + y*y + z*z);
+ }
+
+ // return true if this vector is (0,0,0) within tolerance
+ bool IsZero( float tolerance = 0.01f ) const
+ {
+ return (x > -tolerance && x < tolerance &&
+ y > -tolerance && y < tolerance &&
+ z > -tolerance && z < tolerance);
+ }
+
+ vec_t NormalizeInPlace();
+ Vector Normalized() const;
+ bool IsLengthGreaterThan( float val ) const;
+ bool IsLengthLessThan( float val ) const;
+
+ // check if a vector is within the box defined by two other vectors
+ FORCEINLINE bool WithinAABox( Vector const &boxmin, Vector const &boxmax);
+
+ // Get the distance from this vector to the other one.
+ vec_t DistTo(const Vector &vOther) const;
+
+ // Get the distance from this vector to the other one squared.
+ // NJS: note, VC wasn't inlining it correctly in several deeply nested inlines due to being an 'out of line' inline.
+ // may be able to tidy this up after switching to VC7
+ FORCEINLINE vec_t DistToSqr(const Vector &vOther) const
+ {
+ Vector delta;
+
+ delta.x = x - vOther.x;
+ delta.y = y - vOther.y;
+ delta.z = z - vOther.z;
+
+ return delta.LengthSqr();
+ }
+
+ // Copy
+ void CopyToArray(float* rgfl) const;
+
+ // Multiply, add, and assign to this (ie: *this = a + b * scalar). This
+ // is about 12% faster than the actual vector equation (because it's done per-component
+ // rather than per-vector).
+ void MulAdd(const Vector& a, const Vector& b, float scalar);
+
+ // Dot product.
+ vec_t Dot(const Vector& vOther) const;
+
+ // assignment
+ Vector& operator=(const Vector &vOther);
+
+ // 2d
+ vec_t Length2D(void) const;
+ vec_t Length2DSqr(void) const;
+
+ operator VectorByValue &() { return *((VectorByValue *)(this)); }
+ operator const VectorByValue &() const { return *((const VectorByValue *)(this)); }
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+ // copy constructors
+// Vector(const Vector &vOther);
+
+ // arithmetic operations
+ Vector operator-(void) const;
+
+ Vector operator+(const Vector& v) const;
+ Vector operator-(const Vector& v) const;
+ Vector operator*(const Vector& v) const;
+ Vector operator/(const Vector& v) const;
+ Vector operator*(float fl) const;
+ Vector operator/(float fl) const;
+
+ // Cross product between two vectors.
+ Vector Cross(const Vector &vOther) const;
+
+ // Returns a vector with the min or max in X, Y, and Z.
+ Vector Min(const Vector &vOther) const;
+ Vector Max(const Vector &vOther) const;
+
+#else
+
+private:
+ // No copy constructors allowed if we're in optimal mode
+ Vector(const Vector& vOther);
+#endif
+};
+
+FORCEINLINE void NetworkVarConstruct( Vector &v ) { v.Zero(); }
+
+
+#define USE_M64S ( ( !defined( _X360 ) ) )
+
+
+
+//=========================================================
+// 4D Short Vector (aligned on 8-byte boundary)
+//=========================================================
+class ALIGN8 ShortVector
+{
+public:
+
+ short x, y, z, w;
+
+ // Initialization
+ void Init(short ix = 0, short iy = 0, short iz = 0, short iw = 0 );
+
+
+#if USE_M64S
+ __m64 &AsM64() { return *(__m64*)&x; }
+ const __m64 &AsM64() const { return *(const __m64*)&x; }
+#endif
+
+ // Setter
+ void Set( const ShortVector& vOther );
+ void Set( const short ix, const short iy, const short iz, const short iw );
+
+ // array access...
+ short operator[](int i) const;
+ short& operator[](int i);
+
+ // Base address...
+ short* Base();
+ short const* Base() const;
+
+ // equality
+ bool operator==(const ShortVector& v) const;
+ bool operator!=(const ShortVector& v) const;
+
+ // Arithmetic operations
+ FORCEINLINE ShortVector& operator+=(const ShortVector &v);
+ FORCEINLINE ShortVector& operator-=(const ShortVector &v);
+ FORCEINLINE ShortVector& operator*=(const ShortVector &v);
+ FORCEINLINE ShortVector& operator*=(float s);
+ FORCEINLINE ShortVector& operator/=(const ShortVector &v);
+ FORCEINLINE ShortVector& operator/=(float s);
+ FORCEINLINE ShortVector operator*(float fl) const;
+
+private:
+
+ // No copy constructors allowed if we're in optimal mode
+// ShortVector(ShortVector const& vOther);
+
+ // No assignment operators either...
+// ShortVector& operator=( ShortVector const& src );
+
+} ALIGN8_POST;
+
+
+
+
+
+
+//=========================================================
+// 4D Integer Vector
+//=========================================================
+class IntVector4D
+{
+public:
+
+ int x, y, z, w;
+
+ // Initialization
+ void Init(int ix = 0, int iy = 0, int iz = 0, int iw = 0 );
+
+#if USE_M64S
+ __m64 &AsM64() { return *(__m64*)&x; }
+ const __m64 &AsM64() const { return *(const __m64*)&x; }
+#endif
+
+ // Setter
+ void Set( const IntVector4D& vOther );
+ void Set( const int ix, const int iy, const int iz, const int iw );
+
+ // array access...
+ int operator[](int i) const;
+ int& operator[](int i);
+
+ // Base address...
+ int* Base();
+ int const* Base() const;
+
+ // equality
+ bool operator==(const IntVector4D& v) const;
+ bool operator!=(const IntVector4D& v) const;
+
+ // Arithmetic operations
+ FORCEINLINE IntVector4D& operator+=(const IntVector4D &v);
+ FORCEINLINE IntVector4D& operator-=(const IntVector4D &v);
+ FORCEINLINE IntVector4D& operator*=(const IntVector4D &v);
+ FORCEINLINE IntVector4D& operator*=(float s);
+ FORCEINLINE IntVector4D& operator/=(const IntVector4D &v);
+ FORCEINLINE IntVector4D& operator/=(float s);
+ FORCEINLINE IntVector4D operator*(float fl) const;
+
+private:
+
+ // No copy constructors allowed if we're in optimal mode
+ // IntVector4D(IntVector4D const& vOther);
+
+ // No assignment operators either...
+ // IntVector4D& operator=( IntVector4D const& src );
+
+};
+
+
+
+//-----------------------------------------------------------------------------
+// Allows us to specifically pass the vector by value when we need to
+//-----------------------------------------------------------------------------
+class VectorByValue : public Vector
+{
+public:
+ // Construction/destruction:
+ VectorByValue(void) : Vector() {}
+ VectorByValue(vec_t X, vec_t Y, vec_t Z) : Vector( X, Y, Z ) {}
+ VectorByValue(const VectorByValue& vOther) { *this = vOther; }
+};
+
+
+//-----------------------------------------------------------------------------
+// Utility to simplify table construction. No constructor means can use
+// traditional C-style initialization
+//-----------------------------------------------------------------------------
+class TableVector
+{
+public:
+ vec_t x, y, z;
+
+ operator Vector &() { return *((Vector *)(this)); }
+ operator const Vector &() const { return *((const Vector *)(this)); }
+
+ // array access...
+ inline vec_t& operator[](int i)
+ {
+ Assert( (i >= 0) && (i < 3) );
+ return ((vec_t*)this)[i];
+ }
+
+ inline vec_t operator[](int i) const
+ {
+ Assert( (i >= 0) && (i < 3) );
+ return ((vec_t*)this)[i];
+ }
+};
+
+
+//-----------------------------------------------------------------------------
+// Here's where we add all those lovely SSE optimized routines
+//-----------------------------------------------------------------------------
+
+class ALIGN16 VectorAligned : public Vector
+{
+public:
+ inline VectorAligned(void) {};
+ inline VectorAligned(vec_t X, vec_t Y, vec_t Z)
+ {
+ Init(X,Y,Z);
+ }
+
+#ifdef VECTOR_NO_SLOW_OPERATIONS
+
+private:
+ // No copy constructors allowed if we're in optimal mode
+ VectorAligned(const VectorAligned& vOther);
+ VectorAligned(const Vector &vOther);
+
+#else
+public:
+ explicit VectorAligned(const Vector &vOther)
+ {
+ Init(vOther.x, vOther.y, vOther.z);
+ }
+
+ VectorAligned& operator=(const Vector &vOther)
+ {
+ Init(vOther.x, vOther.y, vOther.z);
+ return *this;
+ }
+
+#endif
+ float w; // this space is used anyway
+} ALIGN16_POST;
+
+//-----------------------------------------------------------------------------
+// Vector related operations
+//-----------------------------------------------------------------------------
+
+// Vector clear
+FORCEINLINE void VectorClear( Vector& a );
+
+// Copy
+FORCEINLINE void VectorCopy( const Vector& src, Vector& dst );
+
+// Vector arithmetic
+FORCEINLINE void VectorAdd( const Vector& a, const Vector& b, Vector& result );
+FORCEINLINE void VectorSubtract( const Vector& a, const Vector& b, Vector& result );
+FORCEINLINE void VectorMultiply( const Vector& a, vec_t b, Vector& result );
+FORCEINLINE void VectorMultiply( const Vector& a, const Vector& b, Vector& result );
+FORCEINLINE void VectorDivide( const Vector& a, vec_t b, Vector& result );
+FORCEINLINE void VectorDivide( const Vector& a, const Vector& b, Vector& result );
+inline void VectorScale ( const Vector& in, vec_t scale, Vector& result );
+// Don't mark this as inline in its function declaration. That's only necessary on its
+// definition, and 'inline' here leads to gcc warnings.
+void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest );
+
+// Vector equality with tolerance
+bool VectorsAreEqual( const Vector& src1, const Vector& src2, float tolerance = 0.0f );
+
+#define VectorExpand(v) (v).x, (v).y, (v).z
+
+
+// Normalization
+// FIXME: Can't use quite yet
+//vec_t VectorNormalize( Vector& v );
+
+// Length
+inline vec_t VectorLength( const Vector& v );
+
+// Dot Product
+FORCEINLINE vec_t DotProduct(const Vector& a, const Vector& b);
+
+// Cross product
+void CrossProduct(const Vector& a, const Vector& b, Vector& result );
+
+// Store the min or max of each of x, y, and z into the result.
+void VectorMin( const Vector &a, const Vector &b, Vector &result );
+void VectorMax( const Vector &a, const Vector &b, Vector &result );
+
+// Linearly interpolate between two vectors
+void VectorLerp(const Vector& src1, const Vector& src2, vec_t t, Vector& dest );
+Vector VectorLerp(const Vector& src1, const Vector& src2, vec_t t );
+
+FORCEINLINE Vector ReplicateToVector( float x )
+{
+ return Vector( x, x, x );
+}
+
+// check if a point is in the field of a view of an object. supports up to 180 degree fov.
+FORCEINLINE bool PointWithinViewAngle( Vector const &vecSrcPosition,
+ Vector const &vecTargetPosition,
+ Vector const &vecLookDirection, float flCosHalfFOV )
+{
+ Vector vecDelta = vecTargetPosition - vecSrcPosition;
+ float cosDiff = DotProduct( vecLookDirection, vecDelta );
+
+ if ( cosDiff < 0 )
+ return false;
+
+ float flLen2 = vecDelta.LengthSqr();
+
+ // a/sqrt(b) > c == a^2 > b * c ^2
+ return ( cosDiff * cosDiff > flLen2 * flCosHalfFOV * flCosHalfFOV );
+
+}
+
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+
+// Cross product
+Vector CrossProduct( const Vector& a, const Vector& b );
+
+// Random vector creation
+Vector RandomVector( vec_t minVal, vec_t maxVal );
+
+#endif
+
+float RandomVectorInUnitSphere( Vector *pVector );
+float RandomVectorInUnitCircle( Vector2D *pVector );
+
+
+//-----------------------------------------------------------------------------
+//
+// Inlined Vector methods
+//
+//-----------------------------------------------------------------------------
+
+
+//-----------------------------------------------------------------------------
+// constructors
+//-----------------------------------------------------------------------------
+inline Vector::Vector(void)
+{
+#ifdef _DEBUG
+#ifdef VECTOR_PARANOIA
+ // Initialize to NAN to catch errors
+ x = y = z = VEC_T_NAN;
+#endif
+#endif
+}
+
+inline Vector::Vector(vec_t X, vec_t Y, vec_t Z)
+{
+ x = X; y = Y; z = Z;
+ CHECK_VALID(*this);
+}
+
+inline Vector::Vector(vec_t XYZ)
+{
+ x = y = z = XYZ;
+ CHECK_VALID(*this);
+}
+
+//inline Vector::Vector(const float *pFloat)
+//{
+// Assert( pFloat );
+// x = pFloat[0]; y = pFloat[1]; z = pFloat[2];
+// CHECK_VALID(*this);
+//}
+
+#if 0
+//-----------------------------------------------------------------------------
+// copy constructor
+//-----------------------------------------------------------------------------
+
+inline Vector::Vector(const Vector &vOther)
+{
+ CHECK_VALID(vOther);
+ x = vOther.x; y = vOther.y; z = vOther.z;
+}
+#endif
+
+//-----------------------------------------------------------------------------
+// initialization
+//-----------------------------------------------------------------------------
+
+inline void Vector::Init( vec_t ix, vec_t iy, vec_t iz )
+{
+ x = ix; y = iy; z = iz;
+ CHECK_VALID(*this);
+}
+
+inline void Vector::Random( vec_t minVal, vec_t maxVal )
+{
+ x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
+ y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
+ z = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
+ CHECK_VALID(*this);
+}
+
+// This should really be a single opcode on the PowerPC (move r0 onto the vec reg)
+inline void Vector::Zero()
+{
+ x = y = z = 0.0f;
+}
+
+inline void VectorClear( Vector& a )
+{
+ a.x = a.y = a.z = 0.0f;
+}
+
+//-----------------------------------------------------------------------------
+// assignment
+//-----------------------------------------------------------------------------
+
+inline Vector& Vector::operator=(const Vector &vOther)
+{
+ CHECK_VALID(vOther);
+ x=vOther.x; y=vOther.y; z=vOther.z;
+ return *this;
+}
+
+
+//-----------------------------------------------------------------------------
+// Array access
+//-----------------------------------------------------------------------------
+inline vec_t& Vector::operator[](int i)
+{
+ Assert( (i >= 0) && (i < 3) );
+ return ((vec_t*)this)[i];
+}
+
+inline vec_t Vector::operator[](int i) const
+{
+ Assert( (i >= 0) && (i < 3) );
+ return ((vec_t*)this)[i];
+}
+
+
+//-----------------------------------------------------------------------------
+// Base address...
+//-----------------------------------------------------------------------------
+inline vec_t* Vector::Base()
+{
+ return (vec_t*)this;
+}
+
+inline vec_t const* Vector::Base() const
+{
+ return (vec_t const*)this;
+}
+
+//-----------------------------------------------------------------------------
+// Cast to Vector2D...
+//-----------------------------------------------------------------------------
+
+inline Vector2D& Vector::AsVector2D()
+{
+ return *(Vector2D*)this;
+}
+
+inline const Vector2D& Vector::AsVector2D() const
+{
+ return *(const Vector2D*)this;
+}
+
+//-----------------------------------------------------------------------------
+// IsValid?
+//-----------------------------------------------------------------------------
+
+inline bool Vector::IsValid() const
+{
+ return IsFinite(x) && IsFinite(y) && IsFinite(z);
+}
+
+//-----------------------------------------------------------------------------
+// Invalidate
+//-----------------------------------------------------------------------------
+
+inline void Vector::Invalidate()
+{
+//#ifdef _DEBUG
+//#ifdef VECTOR_PARANOIA
+ x = y = z = VEC_T_NAN;
+//#endif
+//#endif
+}
+
+//-----------------------------------------------------------------------------
+// comparison
+//-----------------------------------------------------------------------------
+
+inline bool Vector::operator==( const Vector& src ) const
+{
+ CHECK_VALID(src);
+ CHECK_VALID(*this);
+ return (src.x == x) && (src.y == y) && (src.z == z);
+}
+
+inline bool Vector::operator!=( const Vector& src ) const
+{
+ CHECK_VALID(src);
+ CHECK_VALID(*this);
+ return (src.x != x) || (src.y != y) || (src.z != z);
+}
+
+
+//-----------------------------------------------------------------------------
+// Copy
+//-----------------------------------------------------------------------------
+
+FORCEINLINE void VectorCopy( const Vector& src, Vector& dst )
+{
+ CHECK_VALID(src);
+ dst.x = src.x;
+ dst.y = src.y;
+ dst.z = src.z;
+}
+
+inline void Vector::CopyToArray(float* rgfl) const
+{
+ Assert( rgfl );
+ CHECK_VALID(*this);
+ rgfl[0] = x, rgfl[1] = y, rgfl[2] = z;
+}
+
+//-----------------------------------------------------------------------------
+// standard math operations
+//-----------------------------------------------------------------------------
+// #pragma message("TODO: these should be SSE")
+
+inline void Vector::Negate()
+{
+ CHECK_VALID(*this);
+ x = -x; y = -y; z = -z;
+}
+
+FORCEINLINE Vector& Vector::operator+=(const Vector& v)
+{
+ CHECK_VALID(*this);
+ CHECK_VALID(v);
+ x+=v.x; y+=v.y; z += v.z;
+ return *this;
+}
+
+FORCEINLINE Vector& Vector::operator-=(const Vector& v)
+{
+ CHECK_VALID(*this);
+ CHECK_VALID(v);
+ x-=v.x; y-=v.y; z -= v.z;
+ return *this;
+}
+
+FORCEINLINE Vector& Vector::operator*=(float fl)
+{
+ x *= fl;
+ y *= fl;
+ z *= fl;
+ CHECK_VALID(*this);
+ return *this;
+}
+
+FORCEINLINE Vector& Vector::operator*=(const Vector& v)
+{
+ CHECK_VALID(v);
+ x *= v.x;
+ y *= v.y;
+ z *= v.z;
+ CHECK_VALID(*this);
+ return *this;
+}
+
+// this ought to be an opcode.
+FORCEINLINE Vector& Vector::operator+=(float fl)
+{
+ x += fl;
+ y += fl;
+ z += fl;
+ CHECK_VALID(*this);
+ return *this;
+}
+
+FORCEINLINE Vector& Vector::operator-=(float fl)
+{
+ x -= fl;
+ y -= fl;
+ z -= fl;
+ CHECK_VALID(*this);
+ return *this;
+}
+
+
+
+FORCEINLINE Vector& Vector::operator/=(float fl)
+{
+ Assert( fl != 0.0f );
+ float oofl = 1.0f / fl;
+ x *= oofl;
+ y *= oofl;
+ z *= oofl;
+ CHECK_VALID(*this);
+ return *this;
+}
+
+FORCEINLINE Vector& Vector::operator/=(const Vector& v)
+{
+ CHECK_VALID(v);
+ Assert( v.x != 0.0f && v.y != 0.0f && v.z != 0.0f );
+ x /= v.x;
+ y /= v.y;
+ z /= v.z;
+ CHECK_VALID(*this);
+ return *this;
+}
+
+
+
+//-----------------------------------------------------------------------------
+//
+// Inlined Short Vector methods
+//
+//-----------------------------------------------------------------------------
+
+
+inline void ShortVector::Init( short ix, short iy, short iz, short iw )
+{
+ x = ix; y = iy; z = iz; w = iw;
+}
+
+FORCEINLINE void ShortVector::Set( const ShortVector& vOther )
+{
+ x = vOther.x;
+ y = vOther.y;
+ z = vOther.z;
+ w = vOther.w;
+}
+
+FORCEINLINE void ShortVector::Set( const short ix, const short iy, const short iz, const short iw )
+{
+ x = ix;
+ y = iy;
+ z = iz;
+ w = iw;
+}
+
+
+//-----------------------------------------------------------------------------
+// Array access
+//-----------------------------------------------------------------------------
+inline short ShortVector::operator[](int i) const
+{
+ Assert( (i >= 0) && (i < 4) );
+ return ((short*)this)[i];
+}
+
+inline short& ShortVector::operator[](int i)
+{
+ Assert( (i >= 0) && (i < 4) );
+ return ((short*)this)[i];
+}
+
+//-----------------------------------------------------------------------------
+// Base address...
+//-----------------------------------------------------------------------------
+inline short* ShortVector::Base()
+{
+ return (short*)this;
+}
+
+inline short const* ShortVector::Base() const
+{
+ return (short const*)this;
+}
+
+
+//-----------------------------------------------------------------------------
+// comparison
+//-----------------------------------------------------------------------------
+
+inline bool ShortVector::operator==( const ShortVector& src ) const
+{
+ return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w);
+}
+
+inline bool ShortVector::operator!=( const ShortVector& src ) const
+{
+ return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w);
+}
+
+
+
+//-----------------------------------------------------------------------------
+// standard math operations
+//-----------------------------------------------------------------------------
+
+FORCEINLINE ShortVector& ShortVector::operator+=(const ShortVector& v)
+{
+ x+=v.x; y+=v.y; z += v.z; w += v.w;
+ return *this;
+}
+
+FORCEINLINE ShortVector& ShortVector::operator-=(const ShortVector& v)
+{
+ x-=v.x; y-=v.y; z -= v.z; w -= v.w;
+ return *this;
+}
+
+FORCEINLINE ShortVector& ShortVector::operator*=(float fl)
+{
+ x *= fl;
+ y *= fl;
+ z *= fl;
+ w *= fl;
+ return *this;
+}
+
+FORCEINLINE ShortVector& ShortVector::operator*=(const ShortVector& v)
+{
+ x *= v.x;
+ y *= v.y;
+ z *= v.z;
+ w *= v.w;
+ return *this;
+}
+
+FORCEINLINE ShortVector& ShortVector::operator/=(float fl)
+{
+ Assert( fl != 0.0f );
+ float oofl = 1.0f / fl;
+ x *= oofl;
+ y *= oofl;
+ z *= oofl;
+ w *= oofl;
+ return *this;
+}
+
+FORCEINLINE ShortVector& ShortVector::operator/=(const ShortVector& v)
+{
+ Assert( v.x != 0 && v.y != 0 && v.z != 0 && v.w != 0 );
+ x /= v.x;
+ y /= v.y;
+ z /= v.z;
+ w /= v.w;
+ return *this;
+}
+
+FORCEINLINE void ShortVectorMultiply( const ShortVector& src, float fl, ShortVector& res )
+{
+ Assert( IsFinite(fl) );
+ res.x = src.x * fl;
+ res.y = src.y * fl;
+ res.z = src.z * fl;
+ res.w = src.w * fl;
+}
+
+FORCEINLINE ShortVector ShortVector::operator*(float fl) const
+{
+ ShortVector res;
+ ShortVectorMultiply( *this, fl, res );
+ return res;
+}
+
+
+
+
+
+
+//-----------------------------------------------------------------------------
+//
+// Inlined Integer Vector methods
+//
+//-----------------------------------------------------------------------------
+
+
+inline void IntVector4D::Init( int ix, int iy, int iz, int iw )
+{
+ x = ix; y = iy; z = iz; w = iw;
+}
+
+FORCEINLINE void IntVector4D::Set( const IntVector4D& vOther )
+{
+ x = vOther.x;
+ y = vOther.y;
+ z = vOther.z;
+ w = vOther.w;
+}
+
+FORCEINLINE void IntVector4D::Set( const int ix, const int iy, const int iz, const int iw )
+{
+ x = ix;
+ y = iy;
+ z = iz;
+ w = iw;
+}
+
+
+//-----------------------------------------------------------------------------
+// Array access
+//-----------------------------------------------------------------------------
+inline int IntVector4D::operator[](int i) const
+{
+ Assert( (i >= 0) && (i < 4) );
+ return ((int*)this)[i];
+}
+
+inline int& IntVector4D::operator[](int i)
+{
+ Assert( (i >= 0) && (i < 4) );
+ return ((int*)this)[i];
+}
+
+//-----------------------------------------------------------------------------
+// Base address...
+//-----------------------------------------------------------------------------
+inline int* IntVector4D::Base()
+{
+ return (int*)this;
+}
+
+inline int const* IntVector4D::Base() const
+{
+ return (int const*)this;
+}
+
+
+//-----------------------------------------------------------------------------
+// comparison
+//-----------------------------------------------------------------------------
+
+inline bool IntVector4D::operator==( const IntVector4D& src ) const
+{
+ return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w);
+}
+
+inline bool IntVector4D::operator!=( const IntVector4D& src ) const
+{
+ return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w);
+}
+
+
+
+//-----------------------------------------------------------------------------
+// standard math operations
+//-----------------------------------------------------------------------------
+
+FORCEINLINE IntVector4D& IntVector4D::operator+=(const IntVector4D& v)
+{
+ x+=v.x; y+=v.y; z += v.z; w += v.w;
+ return *this;
+}
+
+FORCEINLINE IntVector4D& IntVector4D::operator-=(const IntVector4D& v)
+{
+ x-=v.x; y-=v.y; z -= v.z; w -= v.w;
+ return *this;
+}
+
+FORCEINLINE IntVector4D& IntVector4D::operator*=(float fl)
+{
+ x *= fl;
+ y *= fl;
+ z *= fl;
+ w *= fl;
+ return *this;
+}
+
+FORCEINLINE IntVector4D& IntVector4D::operator*=(const IntVector4D& v)
+{
+ x *= v.x;
+ y *= v.y;
+ z *= v.z;
+ w *= v.w;
+ return *this;
+}
+
+FORCEINLINE IntVector4D& IntVector4D::operator/=(float fl)
+{
+ Assert( fl != 0.0f );
+ float oofl = 1.0f / fl;
+ x *= oofl;
+ y *= oofl;
+ z *= oofl;
+ w *= oofl;
+ return *this;
+}
+
+FORCEINLINE IntVector4D& IntVector4D::operator/=(const IntVector4D& v)
+{
+ Assert( v.x != 0 && v.y != 0 && v.z != 0 && v.w != 0 );
+ x /= v.x;
+ y /= v.y;
+ z /= v.z;
+ w /= v.w;
+ return *this;
+}
+
+FORCEINLINE void IntVector4DMultiply( const IntVector4D& src, float fl, IntVector4D& res )
+{
+ Assert( IsFinite(fl) );
+ res.x = src.x * fl;
+ res.y = src.y * fl;
+ res.z = src.z * fl;
+ res.w = src.w * fl;
+}
+
+FORCEINLINE IntVector4D IntVector4D::operator*(float fl) const
+{
+ IntVector4D res;
+ IntVector4DMultiply( *this, fl, res );
+ return res;
+}
+
+
+
+// =======================
+
+
+FORCEINLINE void VectorAdd( const Vector& a, const Vector& b, Vector& c )
+{
+ CHECK_VALID(a);
+ CHECK_VALID(b);
+ c.x = a.x + b.x;
+ c.y = a.y + b.y;
+ c.z = a.z + b.z;
+}
+
+FORCEINLINE void VectorSubtract( const Vector& a, const Vector& b, Vector& c )
+{
+ CHECK_VALID(a);
+ CHECK_VALID(b);
+ c.x = a.x - b.x;
+ c.y = a.y - b.y;
+ c.z = a.z - b.z;
+}
+
+FORCEINLINE void VectorMultiply( const Vector& a, vec_t b, Vector& c )
+{
+ CHECK_VALID(a);
+ Assert( IsFinite(b) );
+ c.x = a.x * b;
+ c.y = a.y * b;
+ c.z = a.z * b;
+}
+
+FORCEINLINE void VectorMultiply( const Vector& a, const Vector& b, Vector& c )
+{
+ CHECK_VALID(a);
+ CHECK_VALID(b);
+ c.x = a.x * b.x;
+ c.y = a.y * b.y;
+ c.z = a.z * b.z;
+}
+
+// for backwards compatability
+inline void VectorScale ( const Vector& in, vec_t scale, Vector& result )
+{
+ VectorMultiply( in, scale, result );
+}
+
+
+FORCEINLINE void VectorDivide( const Vector& a, vec_t b, Vector& c )
+{
+ CHECK_VALID(a);
+ Assert( b != 0.0f );
+ vec_t oob = 1.0f / b;
+ c.x = a.x * oob;
+ c.y = a.y * oob;
+ c.z = a.z * oob;
+}
+
+FORCEINLINE void VectorDivide( const Vector& a, const Vector& b, Vector& c )
+{
+ CHECK_VALID(a);
+ CHECK_VALID(b);
+ Assert( (b.x != 0.0f) && (b.y != 0.0f) && (b.z != 0.0f) );
+ c.x = a.x / b.x;
+ c.y = a.y / b.y;
+ c.z = a.z / b.z;
+}
+
+// FIXME: Remove
+// For backwards compatability
+inline void Vector::MulAdd(const Vector& a, const Vector& b, float scalar)
+{
+ CHECK_VALID(a);
+ CHECK_VALID(b);
+ x = a.x + b.x * scalar;
+ y = a.y + b.y * scalar;
+ z = a.z + b.z * scalar;
+}
+
+inline void VectorLerp(const Vector& src1, const Vector& src2, vec_t t, Vector& dest )
+{
+ CHECK_VALID(src1);
+ CHECK_VALID(src2);
+ dest.x = src1.x + (src2.x - src1.x) * t;
+ dest.y = src1.y + (src2.y - src1.y) * t;
+ dest.z = src1.z + (src2.z - src1.z) * t;
+}
+
+inline Vector VectorLerp(const Vector& src1, const Vector& src2, vec_t t )
+{
+ Vector result;
+ VectorLerp( src1, src2, t, result );
+ return result;
+}
+
+//-----------------------------------------------------------------------------
+// Temporary storage for vector results so const Vector& results can be returned
+//-----------------------------------------------------------------------------
+inline Vector &AllocTempVector()
+{
+ static Vector s_vecTemp[128];
+ static CInterlockedInt s_nIndex;
+
+ int nIndex;
+ for (;;)
+ {
+ int nOldIndex = s_nIndex;
+ nIndex = ( (nOldIndex + 0x10001) & 0x7F );
+
+ if ( s_nIndex.AssignIf( nOldIndex, nIndex ) )
+ {
+ break;
+ }
+ ThreadPause();
+ }
+ return s_vecTemp[nIndex & 0xffff];
+}
+
+
+
+//-----------------------------------------------------------------------------
+// dot, cross
+//-----------------------------------------------------------------------------
+FORCEINLINE vec_t DotProduct(const Vector& a, const Vector& b)
+{
+ CHECK_VALID(a);
+ CHECK_VALID(b);
+ return( a.x*b.x + a.y*b.y + a.z*b.z );
+}
+
+// for backwards compatability
+inline vec_t Vector::Dot( const Vector& vOther ) const
+{
+ CHECK_VALID(vOther);
+ return DotProduct( *this, vOther );
+}
+
+inline void CrossProduct(const Vector& a, const Vector& b, Vector& result )
+{
+ CHECK_VALID(a);
+ CHECK_VALID(b);
+ Assert( &a != &result );
+ Assert( &b != &result );
+ result.x = a.y*b.z - a.z*b.y;
+ result.y = a.z*b.x - a.x*b.z;
+ result.z = a.x*b.y - a.y*b.x;
+}
+
+inline vec_t DotProductAbs( const Vector &v0, const Vector &v1 )
+{
+ CHECK_VALID(v0);
+ CHECK_VALID(v1);
+ return FloatMakePositive(v0.x*v1.x) + FloatMakePositive(v0.y*v1.y) + FloatMakePositive(v0.z*v1.z);
+}
+
+inline vec_t DotProductAbs( const Vector &v0, const float *v1 )
+{
+ return FloatMakePositive(v0.x * v1[0]) + FloatMakePositive(v0.y * v1[1]) + FloatMakePositive(v0.z * v1[2]);
+}
+
+//-----------------------------------------------------------------------------
+// length
+//-----------------------------------------------------------------------------
+
+inline vec_t VectorLength( const Vector& v )
+{
+ CHECK_VALID(v);
+ return (vec_t)FastSqrt(v.x*v.x + v.y*v.y + v.z*v.z);
+}
+
+
+inline vec_t Vector::Length(void) const
+{
+ CHECK_VALID(*this);
+ return VectorLength( *this );
+}
+
+
+//-----------------------------------------------------------------------------
+// Normalization
+//-----------------------------------------------------------------------------
+
+/*
+// FIXME: Can't use until we're un-macroed in mathlib.h
+inline vec_t VectorNormalize( Vector& v )
+{
+ Assert( v.IsValid() );
+ vec_t l = v.Length();
+ if (l != 0.0f)
+ {
+ v /= l;
+ }
+ else
+ {
+ // FIXME:
+ // Just copying the existing implemenation; shouldn't res.z == 0?
+ v.x = v.y = 0.0f; v.z = 1.0f;
+ }
+ return l;
+}
+*/
+
+
+// check a point against a box
+bool Vector::WithinAABox( Vector const &boxmin, Vector const &boxmax)
+{
+ return (
+ ( x >= boxmin.x ) && ( x <= boxmax.x) &&
+ ( y >= boxmin.y ) && ( y <= boxmax.y) &&
+ ( z >= boxmin.z ) && ( z <= boxmax.z)
+ );
+}
+
+//-----------------------------------------------------------------------------
+// Get the distance from this vector to the other one
+//-----------------------------------------------------------------------------
+inline vec_t Vector::DistTo(const Vector &vOther) const
+{
+ Vector delta;
+ VectorSubtract( *this, vOther, delta );
+ return delta.Length();
+}
+
+
+//-----------------------------------------------------------------------------
+// Vector equality with tolerance
+//-----------------------------------------------------------------------------
+inline bool VectorsAreEqual( const Vector& src1, const Vector& src2, float tolerance )
+{
+ if (FloatMakePositive(src1.x - src2.x) > tolerance)
+ return false;
+ if (FloatMakePositive(src1.y - src2.y) > tolerance)
+ return false;
+ return (FloatMakePositive(src1.z - src2.z) <= tolerance);
+}
+
+
+//-----------------------------------------------------------------------------
+// Computes the closest point to vecTarget no farther than flMaxDist from vecStart
+//-----------------------------------------------------------------------------
+inline void ComputeClosestPoint( const Vector& vecStart, float flMaxDist, const Vector& vecTarget, Vector *pResult )
+{
+ Vector vecDelta;
+ VectorSubtract( vecTarget, vecStart, vecDelta );
+ float flDistSqr = vecDelta.LengthSqr();
+ if ( flDistSqr <= flMaxDist * flMaxDist )
+ {
+ *pResult = vecTarget;
+ }
+ else
+ {
+ vecDelta /= FastSqrt( flDistSqr );
+ VectorMA( vecStart, flMaxDist, vecDelta, *pResult );
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// Takes the absolute value of a vector
+//-----------------------------------------------------------------------------
+inline void VectorAbs( const Vector& src, Vector& dst )
+{
+ dst.x = FloatMakePositive(src.x);
+ dst.y = FloatMakePositive(src.y);
+ dst.z = FloatMakePositive(src.z);
+}
+
+
+//-----------------------------------------------------------------------------
+//
+// Slow methods
+//
+//-----------------------------------------------------------------------------
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+
+//-----------------------------------------------------------------------------
+// Returns a vector with the min or max in X, Y, and Z.
+//-----------------------------------------------------------------------------
+inline Vector Vector::Min(const Vector &vOther) const
+{
+ return Vector(x < vOther.x ? x : vOther.x,
+ y < vOther.y ? y : vOther.y,
+ z < vOther.z ? z : vOther.z);
+}
+
+inline Vector Vector::Max(const Vector &vOther) const
+{
+ return Vector(x > vOther.x ? x : vOther.x,
+ y > vOther.y ? y : vOther.y,
+ z > vOther.z ? z : vOther.z);
+}
+
+
+//-----------------------------------------------------------------------------
+// arithmetic operations
+//-----------------------------------------------------------------------------
+
+inline Vector Vector::operator-(void) const
+{
+ return Vector(-x,-y,-z);
+}
+
+inline Vector Vector::operator+(const Vector& v) const
+{
+ Vector res;
+ VectorAdd( *this, v, res );
+ return res;
+}
+
+inline Vector Vector::operator-(const Vector& v) const
+{
+ Vector res;
+ VectorSubtract( *this, v, res );
+ return res;
+}
+
+inline Vector Vector::operator*(float fl) const
+{
+ Vector res;
+ VectorMultiply( *this, fl, res );
+ return res;
+}
+
+inline Vector Vector::operator*(const Vector& v) const
+{
+ Vector res;
+ VectorMultiply( *this, v, res );
+ return res;
+}
+
+inline Vector Vector::operator/(float fl) const
+{
+ Vector res;
+ VectorDivide( *this, fl, res );
+ return res;
+}
+
+inline Vector Vector::operator/(const Vector& v) const
+{
+ Vector res;
+ VectorDivide( *this, v, res );
+ return res;
+}
+
+inline Vector operator*(float fl, const Vector& v)
+{
+ return v * fl;
+}
+
+//-----------------------------------------------------------------------------
+// cross product
+//-----------------------------------------------------------------------------
+
+inline Vector Vector::Cross(const Vector& vOther) const
+{
+ Vector res;
+ CrossProduct( *this, vOther, res );
+ return res;
+}
+
+//-----------------------------------------------------------------------------
+// 2D
+//-----------------------------------------------------------------------------
+
+inline vec_t Vector::Length2D(void) const
+{
+ return (vec_t)FastSqrt(x*x + y*y);
+}
+
+inline vec_t Vector::Length2DSqr(void) const
+{
+ return (x*x + y*y);
+}
+
+inline Vector CrossProduct(const Vector& a, const Vector& b)
+{
+ return Vector( a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x );
+}
+
+inline void VectorMin( const Vector &a, const Vector &b, Vector &result )
+{
+ result.x = fpmin(a.x, b.x);
+ result.y = fpmin(a.y, b.y);
+ result.z = fpmin(a.z, b.z);
+}
+
+inline void VectorMax( const Vector &a, const Vector &b, Vector &result )
+{
+ result.x = fpmax(a.x, b.x);
+ result.y = fpmax(a.y, b.y);
+ result.z = fpmax(a.z, b.z);
+}
+
+inline float ComputeVolume( const Vector &vecMins, const Vector &vecMaxs )
+{
+ Vector vecDelta;
+ VectorSubtract( vecMaxs, vecMins, vecDelta );
+ return DotProduct( vecDelta, vecDelta );
+}
+
+// Get a random vector.
+inline Vector RandomVector( float minVal, float maxVal )
+{
+ Vector random;
+ random.Random( minVal, maxVal );
+ return random;
+}
+
+#endif //slow
+
+//-----------------------------------------------------------------------------
+// Helper debugging stuff....
+//-----------------------------------------------------------------------------
+
+inline bool operator==( float const* f, const Vector& v )
+{
+ // AIIIEEEE!!!!
+ Assert(0);
+ return false;
+}
+
+inline bool operator==( const Vector& v, float const* f )
+{
+ // AIIIEEEE!!!!
+ Assert(0);
+ return false;
+}
+
+inline bool operator!=( float const* f, const Vector& v )
+{
+ // AIIIEEEE!!!!
+ Assert(0);
+ return false;
+}
+
+inline bool operator!=( const Vector& v, float const* f )
+{
+ // AIIIEEEE!!!!
+ Assert(0);
+ return false;
+}
+
+
+//-----------------------------------------------------------------------------
+// AngularImpulse
+//-----------------------------------------------------------------------------
+// AngularImpulse are exponetial maps (an axis scaled by a "twist" angle in degrees)
+typedef Vector AngularImpulse;
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+
+inline AngularImpulse RandomAngularImpulse( float minVal, float maxVal )
+{
+ AngularImpulse angImp;
+ angImp.Random( minVal, maxVal );
+ return angImp;
+}
+
+#endif
+
+
+//-----------------------------------------------------------------------------
+// Quaternion
+//-----------------------------------------------------------------------------
+
+class RadianEuler;
+
+class Quaternion // same data-layout as engine's vec4_t,
+{ // which is a vec_t[4]
+public:
+ inline Quaternion(void) {
+
+ // Initialize to NAN to catch errors
+#ifdef _DEBUG
+#ifdef VECTOR_PARANOIA
+ x = y = z = w = VEC_T_NAN;
+#endif
+#endif
+ }
+ inline Quaternion(vec_t ix, vec_t iy, vec_t iz, vec_t iw) : x(ix), y(iy), z(iz), w(iw) { }
+ inline Quaternion(RadianEuler const &angle); // evil auto type promotion!!!
+
+ inline void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f, vec_t iw=0.0f) { x = ix; y = iy; z = iz; w = iw; }
+
+ bool IsValid() const;
+ void Invalidate();
+
+ bool operator==( const Quaternion &src ) const;
+ bool operator!=( const Quaternion &src ) const;
+
+ vec_t* Base() { return (vec_t*)this; }
+ const vec_t* Base() const { return (vec_t*)this; }
+
+ // array access...
+ vec_t operator[](int i) const;
+ vec_t& operator[](int i);
+
+ vec_t x, y, z, w;
+};
+
+
+//-----------------------------------------------------------------------------
+// Array access
+//-----------------------------------------------------------------------------
+inline vec_t& Quaternion::operator[](int i)
+{
+ Assert( (i >= 0) && (i < 4) );
+ return ((vec_t*)this)[i];
+}
+
+inline vec_t Quaternion::operator[](int i) const
+{
+ Assert( (i >= 0) && (i < 4) );
+ return ((vec_t*)this)[i];
+}
+
+
+//-----------------------------------------------------------------------------
+// Equality test
+//-----------------------------------------------------------------------------
+inline bool Quaternion::operator==( const Quaternion &src ) const
+{
+ return ( x == src.x ) && ( y == src.y ) && ( z == src.z ) && ( w == src.w );
+}
+
+inline bool Quaternion::operator!=( const Quaternion &src ) const
+{
+ return !operator==( src );
+}
+
+
+//-----------------------------------------------------------------------------
+// Quaternion equality with tolerance
+//-----------------------------------------------------------------------------
+inline bool QuaternionsAreEqual( const Quaternion& src1, const Quaternion& src2, float tolerance )
+{
+ if (FloatMakePositive(src1.x - src2.x) > tolerance)
+ return false;
+ if (FloatMakePositive(src1.y - src2.y) > tolerance)
+ return false;
+ if (FloatMakePositive(src1.z - src2.z) > tolerance)
+ return false;
+ return (FloatMakePositive(src1.w - src2.w) <= tolerance);
+}
+
+
+//-----------------------------------------------------------------------------
+// Here's where we add all those lovely SSE optimized routines
+//-----------------------------------------------------------------------------
+class ALIGN16 QuaternionAligned : public Quaternion
+{
+public:
+ inline QuaternionAligned(void) {};
+ inline QuaternionAligned(vec_t X, vec_t Y, vec_t Z, vec_t W)
+ {
+ Init(X,Y,Z,W);
+ }
+
+#ifdef VECTOR_NO_SLOW_OPERATIONS
+
+private:
+ // No copy constructors allowed if we're in optimal mode
+ QuaternionAligned(const QuaternionAligned& vOther);
+ QuaternionAligned(const Quaternion &vOther);
+
+#else
+public:
+ explicit QuaternionAligned(const Quaternion &vOther)
+ {
+ Init(vOther.x, vOther.y, vOther.z, vOther.w);
+ }
+
+ QuaternionAligned& operator=(const Quaternion &vOther)
+ {
+ Init(vOther.x, vOther.y, vOther.z, vOther.w);
+ return *this;
+ }
+
+#endif
+} ALIGN16_POST;
+
+
+//-----------------------------------------------------------------------------
+// Radian Euler angle aligned to axis (NOT ROLL/PITCH/YAW)
+//-----------------------------------------------------------------------------
+class QAngle;
+class RadianEuler
+{
+public:
+ inline RadianEuler(void) { }
+ inline RadianEuler(vec_t X, vec_t Y, vec_t Z) { x = X; y = Y; z = Z; }
+ inline RadianEuler(Quaternion const &q); // evil auto type promotion!!!
+ inline RadianEuler(QAngle const &angles); // evil auto type promotion!!!
+
+ // Initialization
+ inline void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f) { x = ix; y = iy; z = iz; }
+
+ // conversion to qangle
+ QAngle ToQAngle( void ) const;
+ bool IsValid() const;
+ void Invalidate();
+
+ // array access...
+ vec_t operator[](int i) const;
+ vec_t& operator[](int i);
+
+ vec_t x, y, z;
+};
+
+
+extern void AngleQuaternion( RadianEuler const &angles, Quaternion &qt );
+extern void QuaternionAngles( Quaternion const &q, RadianEuler &angles );
+
+FORCEINLINE void NetworkVarConstruct( Quaternion &q ) { q.x = q.y = q.z = q.w = 0.0f; }
+
+inline Quaternion::Quaternion(RadianEuler const &angle)
+{
+ AngleQuaternion( angle, *this );
+}
+
+inline bool Quaternion::IsValid() const
+{
+ return IsFinite(x) && IsFinite(y) && IsFinite(z) && IsFinite(w);
+}
+
+inline void Quaternion::Invalidate()
+{
+//#ifdef _DEBUG
+//#ifdef VECTOR_PARANOIA
+ x = y = z = w = VEC_T_NAN;
+//#endif
+//#endif
+}
+
+inline RadianEuler::RadianEuler(Quaternion const &q)
+{
+ QuaternionAngles( q, *this );
+}
+
+inline void VectorCopy( RadianEuler const& src, RadianEuler &dst )
+{
+ CHECK_VALID(src);
+ dst.x = src.x;
+ dst.y = src.y;
+ dst.z = src.z;
+}
+
+inline void VectorScale( RadianEuler const& src, float b, RadianEuler &dst )
+{
+ CHECK_VALID(src);
+ Assert( IsFinite(b) );
+ dst.x = src.x * b;
+ dst.y = src.y * b;
+ dst.z = src.z * b;
+}
+
+inline bool RadianEuler::IsValid() const
+{
+ return IsFinite(x) && IsFinite(y) && IsFinite(z);
+}
+
+inline void RadianEuler::Invalidate()
+{
+//#ifdef _DEBUG
+//#ifdef VECTOR_PARANOIA
+ x = y = z = VEC_T_NAN;
+//#endif
+//#endif
+}
+
+
+//-----------------------------------------------------------------------------
+// Array access
+//-----------------------------------------------------------------------------
+inline vec_t& RadianEuler::operator[](int i)
+{
+ Assert( (i >= 0) && (i < 3) );
+ return ((vec_t*)this)[i];
+}
+
+inline vec_t RadianEuler::operator[](int i) const
+{
+ Assert( (i >= 0) && (i < 3) );
+ return ((vec_t*)this)[i];
+}
+
+
+//-----------------------------------------------------------------------------
+// Degree Euler QAngle pitch, yaw, roll
+//-----------------------------------------------------------------------------
+class QAngleByValue;
+
+class QAngle
+{
+public:
+ // Members
+ vec_t x, y, z;
+
+ // Construction/destruction
+ QAngle(void);
+ QAngle(vec_t X, vec_t Y, vec_t Z);
+// QAngle(RadianEuler const &angles); // evil auto type promotion!!!
+
+ // Allow pass-by-value
+ operator QAngleByValue &() { return *((QAngleByValue *)(this)); }
+ operator const QAngleByValue &() const { return *((const QAngleByValue *)(this)); }
+
+ // Initialization
+ void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f);
+ void Random( vec_t minVal, vec_t maxVal );
+
+ // Got any nasty NAN's?
+ bool IsValid() const;
+ void Invalidate();
+
+ // array access...
+ vec_t operator[](int i) const;
+ vec_t& operator[](int i);
+
+ // Base address...
+ vec_t* Base();
+ vec_t const* Base() const;
+
+ // equality
+ bool operator==(const QAngle& v) const;
+ bool operator!=(const QAngle& v) const;
+
+ // arithmetic operations
+ QAngle& operator+=(const QAngle &v);
+ QAngle& operator-=(const QAngle &v);
+ QAngle& operator*=(float s);
+ QAngle& operator/=(float s);
+
+ // Get the vector's magnitude.
+ vec_t Length() const;
+ vec_t LengthSqr() const;
+
+ // negate the QAngle components
+ //void Negate();
+
+ // No assignment operators either...
+ QAngle& operator=( const QAngle& src );
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+ // copy constructors
+
+ // arithmetic operations
+ QAngle operator-(void) const;
+
+ QAngle operator+(const QAngle& v) const;
+ QAngle operator-(const QAngle& v) const;
+ QAngle operator*(float fl) const;
+ QAngle operator/(float fl) const;
+#else
+
+private:
+ // No copy constructors allowed if we're in optimal mode
+ QAngle(const QAngle& vOther);
+
+#endif
+};
+
+FORCEINLINE void NetworkVarConstruct( QAngle &q ) { q.x = q.y = q.z = 0.0f; }
+
+//-----------------------------------------------------------------------------
+// Allows us to specifically pass the vector by value when we need to
+//-----------------------------------------------------------------------------
+class QAngleByValue : public QAngle
+{
+public:
+ // Construction/destruction:
+ QAngleByValue(void) : QAngle() {}
+ QAngleByValue(vec_t X, vec_t Y, vec_t Z) : QAngle( X, Y, Z ) {}
+ QAngleByValue(const QAngleByValue& vOther) { *this = vOther; }
+};
+
+
+inline void VectorAdd( const QAngle& a, const QAngle& b, QAngle& result )
+{
+ CHECK_VALID(a);
+ CHECK_VALID(b);
+ result.x = a.x + b.x;
+ result.y = a.y + b.y;
+ result.z = a.z + b.z;
+}
+
+inline void VectorMA( const QAngle &start, float scale, const QAngle &direction, QAngle &dest )
+{
+ CHECK_VALID(start);
+ CHECK_VALID(direction);
+ dest.x = start.x + scale * direction.x;
+ dest.y = start.y + scale * direction.y;
+ dest.z = start.z + scale * direction.z;
+}
+
+
+//-----------------------------------------------------------------------------
+// constructors
+//-----------------------------------------------------------------------------
+inline QAngle::QAngle(void)
+{
+#ifdef _DEBUG
+#ifdef VECTOR_PARANOIA
+ // Initialize to NAN to catch errors
+ x = y = z = VEC_T_NAN;
+#endif
+#endif
+}
+
+inline QAngle::QAngle(vec_t X, vec_t Y, vec_t Z)
+{
+ x = X; y = Y; z = Z;
+ CHECK_VALID(*this);
+}
+
+
+//-----------------------------------------------------------------------------
+// initialization
+//-----------------------------------------------------------------------------
+inline void QAngle::Init( vec_t ix, vec_t iy, vec_t iz )
+{
+ x = ix; y = iy; z = iz;
+ CHECK_VALID(*this);
+}
+
+inline void QAngle::Random( vec_t minVal, vec_t maxVal )
+{
+ x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
+ y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
+ z = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
+ CHECK_VALID(*this);
+}
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+
+inline QAngle RandomAngle( float minVal, float maxVal )
+{
+ Vector random;
+ random.Random( minVal, maxVal );
+ QAngle ret( random.x, random.y, random.z );
+ return ret;
+}
+
+#endif
+
+
+inline RadianEuler::RadianEuler(QAngle const &angles)
+{
+ Init(
+ angles.z * 3.14159265358979323846f / 180.f,
+ angles.x * 3.14159265358979323846f / 180.f,
+ angles.y * 3.14159265358979323846f / 180.f );
+}
+
+
+
+
+inline QAngle RadianEuler::ToQAngle( void) const
+{
+ return QAngle(
+ y * 180.f / 3.14159265358979323846f,
+ z * 180.f / 3.14159265358979323846f,
+ x * 180.f / 3.14159265358979323846f );
+}
+
+
+//-----------------------------------------------------------------------------
+// assignment
+//-----------------------------------------------------------------------------
+inline QAngle& QAngle::operator=(const QAngle &vOther)
+{
+ CHECK_VALID(vOther);
+ x=vOther.x; y=vOther.y; z=vOther.z;
+ return *this;
+}
+
+
+//-----------------------------------------------------------------------------
+// Array access
+//-----------------------------------------------------------------------------
+inline vec_t& QAngle::operator[](int i)
+{
+ Assert( (i >= 0) && (i < 3) );
+ return ((vec_t*)this)[i];
+}
+
+inline vec_t QAngle::operator[](int i) const
+{
+ Assert( (i >= 0) && (i < 3) );
+ return ((vec_t*)this)[i];
+}
+
+
+//-----------------------------------------------------------------------------
+// Base address...
+//-----------------------------------------------------------------------------
+inline vec_t* QAngle::Base()
+{
+ return (vec_t*)this;
+}
+
+inline vec_t const* QAngle::Base() const
+{
+ return (vec_t const*)this;
+}
+
+
+//-----------------------------------------------------------------------------
+// IsValid?
+//-----------------------------------------------------------------------------
+inline bool QAngle::IsValid() const
+{
+ return IsFinite(x) && IsFinite(y) && IsFinite(z);
+}
+
+//-----------------------------------------------------------------------------
+// Invalidate
+//-----------------------------------------------------------------------------
+
+inline void QAngle::Invalidate()
+{
+//#ifdef _DEBUG
+//#ifdef VECTOR_PARANOIA
+ x = y = z = VEC_T_NAN;
+//#endif
+//#endif
+}
+
+//-----------------------------------------------------------------------------
+// comparison
+//-----------------------------------------------------------------------------
+inline bool QAngle::operator==( const QAngle& src ) const
+{
+ CHECK_VALID(src);
+ CHECK_VALID(*this);
+ return (src.x == x) && (src.y == y) && (src.z == z);
+}
+
+inline bool QAngle::operator!=( const QAngle& src ) const
+{
+ CHECK_VALID(src);
+ CHECK_VALID(*this);
+ return (src.x != x) || (src.y != y) || (src.z != z);
+}
+
+
+//-----------------------------------------------------------------------------
+// Copy
+//-----------------------------------------------------------------------------
+inline void VectorCopy( const QAngle& src, QAngle& dst )
+{
+ CHECK_VALID(src);
+ dst.x = src.x;
+ dst.y = src.y;
+ dst.z = src.z;
+}
+
+
+//-----------------------------------------------------------------------------
+// standard math operations
+//-----------------------------------------------------------------------------
+inline QAngle& QAngle::operator+=(const QAngle& v)
+{
+ CHECK_VALID(*this);
+ CHECK_VALID(v);
+ x+=v.x; y+=v.y; z += v.z;
+ return *this;
+}
+
+inline QAngle& QAngle::operator-=(const QAngle& v)
+{
+ CHECK_VALID(*this);
+ CHECK_VALID(v);
+ x-=v.x; y-=v.y; z -= v.z;
+ return *this;
+}
+
+inline QAngle& QAngle::operator*=(float fl)
+{
+ x *= fl;
+ y *= fl;
+ z *= fl;
+ CHECK_VALID(*this);
+ return *this;
+}
+
+inline QAngle& QAngle::operator/=(float fl)
+{
+ Assert( fl != 0.0f );
+ float oofl = 1.0f / fl;
+ x *= oofl;
+ y *= oofl;
+ z *= oofl;
+ CHECK_VALID(*this);
+ return *this;
+}
+
+
+//-----------------------------------------------------------------------------
+// length
+//-----------------------------------------------------------------------------
+inline vec_t QAngle::Length( ) const
+{
+ CHECK_VALID(*this);
+ return (vec_t)FastSqrt( LengthSqr( ) );
+}
+
+
+inline vec_t QAngle::LengthSqr( ) const
+{
+ CHECK_VALID(*this);
+ return x * x + y * y + z * z;
+}
+
+
+//-----------------------------------------------------------------------------
+// Vector equality with tolerance
+//-----------------------------------------------------------------------------
+inline bool QAnglesAreEqual( const QAngle& src1, const QAngle& src2, float tolerance = 0.0f )
+{
+ if (FloatMakePositive(src1.x - src2.x) > tolerance)
+ return false;
+ if (FloatMakePositive(src1.y - src2.y) > tolerance)
+ return false;
+ return (FloatMakePositive(src1.z - src2.z) <= tolerance);
+}
+
+
+//-----------------------------------------------------------------------------
+// arithmetic operations (SLOW!!)
+//-----------------------------------------------------------------------------
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+
+inline QAngle QAngle::operator-(void) const
+{
+ QAngle ret(-x,-y,-z);
+ return ret;
+}
+
+inline QAngle QAngle::operator+(const QAngle& v) const
+{
+ QAngle res;
+ res.x = x + v.x;
+ res.y = y + v.y;
+ res.z = z + v.z;
+ return res;
+}
+
+inline QAngle QAngle::operator-(const QAngle& v) const
+{
+ QAngle res;
+ res.x = x - v.x;
+ res.y = y - v.y;
+ res.z = z - v.z;
+ return res;
+}
+
+inline QAngle QAngle::operator*(float fl) const
+{
+ QAngle res;
+ res.x = x * fl;
+ res.y = y * fl;
+ res.z = z * fl;
+ return res;
+}
+
+inline QAngle QAngle::operator/(float fl) const
+{
+ QAngle res;
+ res.x = x / fl;
+ res.y = y / fl;
+ res.z = z / fl;
+ return res;
+}
+
+inline QAngle operator*(float fl, const QAngle& v)
+{
+ QAngle ret( v * fl );
+ return ret;
+}
+
+#endif // VECTOR_NO_SLOW_OPERATIONS
+
+
+//-----------------------------------------------------------------------------
+// NOTE: These are not completely correct. The representations are not equivalent
+// unless the QAngle represents a rotational impulse along a coordinate axis (x,y,z)
+inline void QAngleToAngularImpulse( const QAngle &angles, AngularImpulse &impulse )
+{
+ impulse.x = angles.z;
+ impulse.y = angles.x;
+ impulse.z = angles.y;
+}
+
+inline void AngularImpulseToQAngle( const AngularImpulse &impulse, QAngle &angles )
+{
+ angles.x = impulse.y;
+ angles.y = impulse.z;
+ angles.z = impulse.x;
+}
+
+#if !defined( _X360 )
+
+FORCEINLINE vec_t InvRSquared( float const *v )
+{
+#if defined(__i386__) || defined(_M_IX86)
+ float sqrlen = v[0]*v[0]+v[1]*v[1]+v[2]*v[2] + 1.0e-10f, result;
+ _mm_store_ss(&result, _mm_rcp_ss( _mm_max_ss( _mm_set_ss(1.0f), _mm_load_ss(&sqrlen) ) ));
+ return result;
+#else
+ return 1.f/fpmax(1.f, v[0]*v[0]+v[1]*v[1]+v[2]*v[2]);
+#endif
+}
+
+FORCEINLINE vec_t InvRSquared( const Vector &v )
+{
+ return InvRSquared(&v.x);
+}
+
+#if defined(__i386__) || defined(_M_IX86)
+inline void _SSE_RSqrtInline( float a, float* out )
+{
+ __m128 xx = _mm_load_ss( &a );
+ __m128 xr = _mm_rsqrt_ss( xx );
+ __m128 xt;
+ xt = _mm_mul_ss( xr, xr );
+ xt = _mm_mul_ss( xt, xx );
+ xt = _mm_sub_ss( _mm_set_ss(3.f), xt );
+ xt = _mm_mul_ss( xt, _mm_set_ss(0.5f) );
+ xr = _mm_mul_ss( xr, xt );
+ _mm_store_ss( out, xr );
+}
+#endif
+
+// FIXME: Change this back to a #define once we get rid of the vec_t version
+FORCEINLINE float VectorNormalize( Vector& vec )
+{
+#ifndef DEBUG // stop crashing my edit-and-continue!
+ #if defined(__i386__) || defined(_M_IX86)
+ #define DO_SSE_OPTIMIZATION
+ #endif
+#endif
+
+#if defined( DO_SSE_OPTIMIZATION )
+ float sqrlen = vec.LengthSqr() + 1.0e-10f, invlen;
+ _SSE_RSqrtInline(sqrlen, &invlen);
+ vec.x *= invlen;
+ vec.y *= invlen;
+ vec.z *= invlen;
+ return sqrlen * invlen;
+#else
+ extern float (FASTCALL *pfVectorNormalize)(Vector& v);
+ return (*pfVectorNormalize)(vec);
+#endif
+}
+
+// FIXME: Obsolete version of VectorNormalize, once we remove all the friggin float*s
+FORCEINLINE float VectorNormalize( float * v )
+{
+ return VectorNormalize(*(reinterpret_cast<Vector *>(v)));
+}
+
+FORCEINLINE void VectorNormalizeFast( Vector &vec )
+{
+ VectorNormalize(vec);
+}
+
+#else
+
+FORCEINLINE float _VMX_InvRSquared( const Vector &v )
+{
+ XMVECTOR xmV = XMVector3ReciprocalLength( XMLoadVector3( v.Base() ) );
+ xmV = XMVector3Dot( xmV, xmV );
+ return xmV.x;
+}
+
+// call directly
+FORCEINLINE float _VMX_VectorNormalize( Vector &vec )
+{
+ float mag = XMVector3Length( XMLoadVector3( vec.Base() ) ).x;
+ float den = 1.f / (mag + FLT_EPSILON );
+ vec.x *= den;
+ vec.y *= den;
+ vec.z *= den;
+ return mag;
+}
+
+#define InvRSquared(x) _VMX_InvRSquared(x)
+
+// FIXME: Change this back to a #define once we get rid of the vec_t version
+FORCEINLINE float VectorNormalize( Vector& v )
+{
+ return _VMX_VectorNormalize( v );
+}
+// FIXME: Obsolete version of VectorNormalize, once we remove all the friggin float*s
+FORCEINLINE float VectorNormalize( float *pV )
+{
+ return _VMX_VectorNormalize(*(reinterpret_cast<Vector*>(pV)));
+}
+
+// call directly
+FORCEINLINE void VectorNormalizeFast( Vector &vec )
+{
+ XMVECTOR xmV = XMVector3LengthEst( XMLoadVector3( vec.Base() ) );
+ float den = 1.f / (xmV.x + FLT_EPSILON);
+ vec.x *= den;
+ vec.y *= den;
+ vec.z *= den;
+}
+
+#endif // _X360
+
+
+inline vec_t Vector::NormalizeInPlace()
+{
+ return VectorNormalize( *this );
+}
+
+inline Vector Vector::Normalized() const
+{
+ Vector norm = *this;
+ VectorNormalize( norm );
+ return norm;
+}
+
+inline bool Vector::IsLengthGreaterThan( float val ) const
+{
+ return LengthSqr() > val*val;
+}
+
+inline bool Vector::IsLengthLessThan( float val ) const
+{
+ return LengthSqr() < val*val;
+}
+
+#endif
+
diff --git a/mp/src/public/mathlib/vector2d.h b/mp/src/public/mathlib/vector2d.h
index 2c6bb242..41385589 100644
--- a/mp/src/public/mathlib/vector2d.h
+++ b/mp/src/public/mathlib/vector2d.h
@@ -1,670 +1,670 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-
-#ifndef VECTOR2D_H
-#define VECTOR2D_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include <math.h>
-#include <float.h>
-
-// For vec_t, put this somewhere else?
-#include "tier0/basetypes.h"
-
-// For rand(). We really need a library!
-#include <stdlib.h>
-
-#include "tier0/dbg.h"
-#include "mathlib/math_pfns.h"
-
-//=========================================================
-// 2D Vector2D
-//=========================================================
-
-class Vector2D
-{
-public:
- // Members
- vec_t x, y;
-
- // Construction/destruction
- Vector2D(void);
- Vector2D(vec_t X, vec_t Y);
- Vector2D(const float *pFloat);
-
- // Initialization
- void Init(vec_t ix=0.0f, vec_t iy=0.0f);
-
- // Got any nasty NAN's?
- bool IsValid() const;
-
- // array access...
- vec_t operator[](int i) const;
- vec_t& operator[](int i);
-
- // Base address...
- vec_t* Base();
- vec_t const* Base() const;
-
- // Initialization methods
- void Random( float minVal, float maxVal );
-
- // equality
- bool operator==(const Vector2D& v) const;
- bool operator!=(const Vector2D& v) const;
-
- // arithmetic operations
- Vector2D& operator+=(const Vector2D &v);
- Vector2D& operator-=(const Vector2D &v);
- Vector2D& operator*=(const Vector2D &v);
- Vector2D& operator*=(float s);
- Vector2D& operator/=(const Vector2D &v);
- Vector2D& operator/=(float s);
-
- // negate the Vector2D components
- void Negate();
-
- // Get the Vector2D's magnitude.
- vec_t Length() const;
-
- // Get the Vector2D's magnitude squared.
- vec_t LengthSqr(void) const;
-
- // return true if this vector is (0,0) within tolerance
- bool IsZero( float tolerance = 0.01f ) const
- {
- return (x > -tolerance && x < tolerance &&
- y > -tolerance && y < tolerance);
- }
-
- // Normalize in place and return the old length.
- vec_t NormalizeInPlace();
-
- // Compare length.
- bool IsLengthGreaterThan( float val ) const;
- bool IsLengthLessThan( float val ) const;
-
- // Get the distance from this Vector2D to the other one.
- vec_t DistTo(const Vector2D &vOther) const;
-
- // Get the distance from this Vector2D to the other one squared.
- vec_t DistToSqr(const Vector2D &vOther) const;
-
- // Copy
- void CopyToArray(float* rgfl) const;
-
- // Multiply, add, and assign to this (ie: *this = a + b * scalar). This
- // is about 12% faster than the actual Vector2D equation (because it's done per-component
- // rather than per-Vector2D).
- void MulAdd(const Vector2D& a, const Vector2D& b, float scalar);
-
- // Dot product.
- vec_t Dot(const Vector2D& vOther) const;
-
- // assignment
- Vector2D& operator=(const Vector2D &vOther);
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // copy constructors
- Vector2D(const Vector2D &vOther);
-
- // arithmetic operations
- Vector2D operator-(void) const;
-
- Vector2D operator+(const Vector2D& v) const;
- Vector2D operator-(const Vector2D& v) const;
- Vector2D operator*(const Vector2D& v) const;
- Vector2D operator/(const Vector2D& v) const;
- Vector2D operator*(float fl) const;
- Vector2D operator/(float fl) const;
-
- // Cross product between two vectors.
- Vector2D Cross(const Vector2D &vOther) const;
-
- // Returns a Vector2D with the min or max in X, Y, and Z.
- Vector2D Min(const Vector2D &vOther) const;
- Vector2D Max(const Vector2D &vOther) const;
-
-#else
-
-private:
- // No copy constructors allowed if we're in optimal mode
- Vector2D(const Vector2D& vOther);
-#endif
-};
-
-//-----------------------------------------------------------------------------
-
-const Vector2D vec2_origin(0,0);
-const Vector2D vec2_invalid( FLT_MAX, FLT_MAX );
-
-//-----------------------------------------------------------------------------
-// Vector2D related operations
-//-----------------------------------------------------------------------------
-
-// Vector2D clear
-void Vector2DClear( Vector2D& a );
-
-// Copy
-void Vector2DCopy( const Vector2D& src, Vector2D& dst );
-
-// Vector2D arithmetic
-void Vector2DAdd( const Vector2D& a, const Vector2D& b, Vector2D& result );
-void Vector2DSubtract( const Vector2D& a, const Vector2D& b, Vector2D& result );
-void Vector2DMultiply( const Vector2D& a, vec_t b, Vector2D& result );
-void Vector2DMultiply( const Vector2D& a, const Vector2D& b, Vector2D& result );
-void Vector2DDivide( const Vector2D& a, vec_t b, Vector2D& result );
-void Vector2DDivide( const Vector2D& a, const Vector2D& b, Vector2D& result );
-void Vector2DMA( const Vector2D& start, float s, const Vector2D& dir, Vector2D& result );
-
-// Store the min or max of each of x, y, and z into the result.
-void Vector2DMin( const Vector2D &a, const Vector2D &b, Vector2D &result );
-void Vector2DMax( const Vector2D &a, const Vector2D &b, Vector2D &result );
-
-#define Vector2DExpand( v ) (v).x, (v).y
-
-// Normalization
-vec_t Vector2DNormalize( Vector2D& v );
-
-// Length
-vec_t Vector2DLength( const Vector2D& v );
-
-// Dot Product
-vec_t DotProduct2D(const Vector2D& a, const Vector2D& b);
-
-// Linearly interpolate between two vectors
-void Vector2DLerp(const Vector2D& src1, const Vector2D& src2, vec_t t, Vector2D& dest );
-
-
-//-----------------------------------------------------------------------------
-//
-// Inlined Vector2D methods
-//
-//-----------------------------------------------------------------------------
-
-
-//-----------------------------------------------------------------------------
-// constructors
-//-----------------------------------------------------------------------------
-
-inline Vector2D::Vector2D(void)
-{
-#ifdef _DEBUG
- // Initialize to NAN to catch errors
- x = y = VEC_T_NAN;
-#endif
-}
-
-inline Vector2D::Vector2D(vec_t X, vec_t Y)
-{
- x = X; y = Y;
- Assert( IsValid() );
-}
-
-inline Vector2D::Vector2D(const float *pFloat)
-{
- Assert( pFloat );
- x = pFloat[0]; y = pFloat[1];
- Assert( IsValid() );
-}
-
-
-//-----------------------------------------------------------------------------
-// copy constructor
-//-----------------------------------------------------------------------------
-
-inline Vector2D::Vector2D(const Vector2D &vOther)
-{
- Assert( vOther.IsValid() );
- x = vOther.x; y = vOther.y;
-}
-
-//-----------------------------------------------------------------------------
-// initialization
-//-----------------------------------------------------------------------------
-
-inline void Vector2D::Init( vec_t ix, vec_t iy )
-{
- x = ix; y = iy;
- Assert( IsValid() );
-}
-
-inline void Vector2D::Random( float minVal, float maxVal )
-{
- x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
-}
-
-inline void Vector2DClear( Vector2D& a )
-{
- a.x = a.y = 0.0f;
-}
-
-//-----------------------------------------------------------------------------
-// assignment
-//-----------------------------------------------------------------------------
-
-inline Vector2D& Vector2D::operator=(const Vector2D &vOther)
-{
- Assert( vOther.IsValid() );
- x=vOther.x; y=vOther.y;
- return *this;
-}
-
-//-----------------------------------------------------------------------------
-// Array access
-//-----------------------------------------------------------------------------
-
-inline vec_t& Vector2D::operator[](int i)
-{
- Assert( (i >= 0) && (i < 2) );
- return ((vec_t*)this)[i];
-}
-
-inline vec_t Vector2D::operator[](int i) const
-{
- Assert( (i >= 0) && (i < 2) );
- return ((vec_t*)this)[i];
-}
-
-//-----------------------------------------------------------------------------
-// Base address...
-//-----------------------------------------------------------------------------
-
-inline vec_t* Vector2D::Base()
-{
- return (vec_t*)this;
-}
-
-inline vec_t const* Vector2D::Base() const
-{
- return (vec_t const*)this;
-}
-
-//-----------------------------------------------------------------------------
-// IsValid?
-//-----------------------------------------------------------------------------
-
-inline bool Vector2D::IsValid() const
-{
- return IsFinite(x) && IsFinite(y);
-}
-
-//-----------------------------------------------------------------------------
-// comparison
-//-----------------------------------------------------------------------------
-
-inline bool Vector2D::operator==( const Vector2D& src ) const
-{
- Assert( src.IsValid() && IsValid() );
- return (src.x == x) && (src.y == y);
-}
-
-inline bool Vector2D::operator!=( const Vector2D& src ) const
-{
- Assert( src.IsValid() && IsValid() );
- return (src.x != x) || (src.y != y);
-}
-
-
-//-----------------------------------------------------------------------------
-// Copy
-//-----------------------------------------------------------------------------
-
-inline void Vector2DCopy( const Vector2D& src, Vector2D& dst )
-{
- Assert( src.IsValid() );
- dst.x = src.x;
- dst.y = src.y;
-}
-
-inline void Vector2D::CopyToArray(float* rgfl) const
-{
- Assert( IsValid() );
- Assert( rgfl );
- rgfl[0] = x; rgfl[1] = y;
-}
-
-//-----------------------------------------------------------------------------
-// standard math operations
-//-----------------------------------------------------------------------------
-
-inline void Vector2D::Negate()
-{
- Assert( IsValid() );
- x = -x; y = -y;
-}
-
-inline Vector2D& Vector2D::operator+=(const Vector2D& v)
-{
- Assert( IsValid() && v.IsValid() );
- x+=v.x; y+=v.y;
- return *this;
-}
-
-inline Vector2D& Vector2D::operator-=(const Vector2D& v)
-{
- Assert( IsValid() && v.IsValid() );
- x-=v.x; y-=v.y;
- return *this;
-}
-
-inline Vector2D& Vector2D::operator*=(float fl)
-{
- x *= fl;
- y *= fl;
- Assert( IsValid() );
- return *this;
-}
-
-inline Vector2D& Vector2D::operator*=(const Vector2D& v)
-{
- x *= v.x;
- y *= v.y;
- Assert( IsValid() );
- return *this;
-}
-
-inline Vector2D& Vector2D::operator/=(float fl)
-{
- Assert( fl != 0.0f );
- float oofl = 1.0f / fl;
- x *= oofl;
- y *= oofl;
- Assert( IsValid() );
- return *this;
-}
-
-inline Vector2D& Vector2D::operator/=(const Vector2D& v)
-{
- Assert( v.x != 0.0f && v.y != 0.0f );
- x /= v.x;
- y /= v.y;
- Assert( IsValid() );
- return *this;
-}
-
-inline void Vector2DAdd( const Vector2D& a, const Vector2D& b, Vector2D& c )
-{
- Assert( a.IsValid() && b.IsValid() );
- c.x = a.x + b.x;
- c.y = a.y + b.y;
-}
-
-inline void Vector2DSubtract( const Vector2D& a, const Vector2D& b, Vector2D& c )
-{
- Assert( a.IsValid() && b.IsValid() );
- c.x = a.x - b.x;
- c.y = a.y - b.y;
-}
-
-inline void Vector2DMultiply( const Vector2D& a, vec_t b, Vector2D& c )
-{
- Assert( a.IsValid() && IsFinite(b) );
- c.x = a.x * b;
- c.y = a.y * b;
-}
-
-inline void Vector2DMultiply( const Vector2D& a, const Vector2D& b, Vector2D& c )
-{
- Assert( a.IsValid() && b.IsValid() );
- c.x = a.x * b.x;
- c.y = a.y * b.y;
-}
-
-
-inline void Vector2DDivide( const Vector2D& a, vec_t b, Vector2D& c )
-{
- Assert( a.IsValid() );
- Assert( b != 0.0f );
- vec_t oob = 1.0f / b;
- c.x = a.x * oob;
- c.y = a.y * oob;
-}
-
-inline void Vector2DDivide( const Vector2D& a, const Vector2D& b, Vector2D& c )
-{
- Assert( a.IsValid() );
- Assert( (b.x != 0.0f) && (b.y != 0.0f) );
- c.x = a.x / b.x;
- c.y = a.y / b.y;
-}
-
-inline void Vector2DMA( const Vector2D& start, float s, const Vector2D& dir, Vector2D& result )
-{
- Assert( start.IsValid() && IsFinite(s) && dir.IsValid() );
- result.x = start.x + s*dir.x;
- result.y = start.y + s*dir.y;
-}
-
-// FIXME: Remove
-// For backwards compatability
-inline void Vector2D::MulAdd(const Vector2D& a, const Vector2D& b, float scalar)
-{
- x = a.x + b.x * scalar;
- y = a.y + b.y * scalar;
-}
-
-inline void Vector2DLerp(const Vector2D& src1, const Vector2D& src2, vec_t t, Vector2D& dest )
-{
- dest[0] = src1[0] + (src2[0] - src1[0]) * t;
- dest[1] = src1[1] + (src2[1] - src1[1]) * t;
-}
-
-//-----------------------------------------------------------------------------
-// dot, cross
-//-----------------------------------------------------------------------------
-inline vec_t DotProduct2D(const Vector2D& a, const Vector2D& b)
-{
- Assert( a.IsValid() && b.IsValid() );
- return( a.x*b.x + a.y*b.y );
-}
-
-// for backwards compatability
-inline vec_t Vector2D::Dot( const Vector2D& vOther ) const
-{
- return DotProduct2D( *this, vOther );
-}
-
-
-//-----------------------------------------------------------------------------
-// length
-//-----------------------------------------------------------------------------
-inline vec_t Vector2DLength( const Vector2D& v )
-{
- Assert( v.IsValid() );
- return (vec_t)FastSqrt(v.x*v.x + v.y*v.y);
-}
-
-inline vec_t Vector2D::LengthSqr(void) const
-{
- Assert( IsValid() );
- return (x*x + y*y);
-}
-
-inline vec_t Vector2D::NormalizeInPlace()
-{
- return Vector2DNormalize( *this );
-}
-
-inline bool Vector2D::IsLengthGreaterThan( float val ) const
-{
- return LengthSqr() > val*val;
-}
-
-inline bool Vector2D::IsLengthLessThan( float val ) const
-{
- return LengthSqr() < val*val;
-}
-
-inline vec_t Vector2D::Length(void) const
-{
- return Vector2DLength( *this );
-}
-
-
-inline void Vector2DMin( const Vector2D &a, const Vector2D &b, Vector2D &result )
-{
- result.x = (a.x < b.x) ? a.x : b.x;
- result.y = (a.y < b.y) ? a.y : b.y;
-}
-
-
-inline void Vector2DMax( const Vector2D &a, const Vector2D &b, Vector2D &result )
-{
- result.x = (a.x > b.x) ? a.x : b.x;
- result.y = (a.y > b.y) ? a.y : b.y;
-}
-
-
-//-----------------------------------------------------------------------------
-// Normalization
-//-----------------------------------------------------------------------------
-inline vec_t Vector2DNormalize( Vector2D& v )
-{
- Assert( v.IsValid() );
- vec_t l = v.Length();
- if (l != 0.0f)
- {
- v /= l;
- }
- else
- {
- v.x = v.y = 0.0f;
- }
- return l;
-}
-
-
-//-----------------------------------------------------------------------------
-// Get the distance from this Vector2D to the other one
-//-----------------------------------------------------------------------------
-inline vec_t Vector2D::DistTo(const Vector2D &vOther) const
-{
- Vector2D delta;
- Vector2DSubtract( *this, vOther, delta );
- return delta.Length();
-}
-
-inline vec_t Vector2D::DistToSqr(const Vector2D &vOther) const
-{
- Vector2D delta;
- Vector2DSubtract( *this, vOther, delta );
- return delta.LengthSqr();
-}
-
-
-//-----------------------------------------------------------------------------
-// Computes the closest point to vecTarget no farther than flMaxDist from vecStart
-//-----------------------------------------------------------------------------
-inline void ComputeClosestPoint2D( const Vector2D& vecStart, float flMaxDist, const Vector2D& vecTarget, Vector2D *pResult )
-{
- Vector2D vecDelta;
- Vector2DSubtract( vecTarget, vecStart, vecDelta );
- float flDistSqr = vecDelta.LengthSqr();
- if ( flDistSqr <= flMaxDist * flMaxDist )
- {
- *pResult = vecTarget;
- }
- else
- {
- vecDelta /= FastSqrt( flDistSqr );
- Vector2DMA( vecStart, flMaxDist, vecDelta, *pResult );
- }
-}
-
-
-
-//-----------------------------------------------------------------------------
-//
-// Slow methods
-//
-//-----------------------------------------------------------------------------
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-//-----------------------------------------------------------------------------
-// Returns a Vector2D with the min or max in X, Y, and Z.
-//-----------------------------------------------------------------------------
-
-inline Vector2D Vector2D::Min(const Vector2D &vOther) const
-{
- return Vector2D(x < vOther.x ? x : vOther.x,
- y < vOther.y ? y : vOther.y);
-}
-
-inline Vector2D Vector2D::Max(const Vector2D &vOther) const
-{
- return Vector2D(x > vOther.x ? x : vOther.x,
- y > vOther.y ? y : vOther.y);
-}
-
-
-//-----------------------------------------------------------------------------
-// arithmetic operations
-//-----------------------------------------------------------------------------
-
-inline Vector2D Vector2D::operator-(void) const
-{
- return Vector2D(-x,-y);
-}
-
-inline Vector2D Vector2D::operator+(const Vector2D& v) const
-{
- Vector2D res;
- Vector2DAdd( *this, v, res );
- return res;
-}
-
-inline Vector2D Vector2D::operator-(const Vector2D& v) const
-{
- Vector2D res;
- Vector2DSubtract( *this, v, res );
- return res;
-}
-
-inline Vector2D Vector2D::operator*(float fl) const
-{
- Vector2D res;
- Vector2DMultiply( *this, fl, res );
- return res;
-}
-
-inline Vector2D Vector2D::operator*(const Vector2D& v) const
-{
- Vector2D res;
- Vector2DMultiply( *this, v, res );
- return res;
-}
-
-inline Vector2D Vector2D::operator/(float fl) const
-{
- Vector2D res;
- Vector2DDivide( *this, fl, res );
- return res;
-}
-
-inline Vector2D Vector2D::operator/(const Vector2D& v) const
-{
- Vector2D res;
- Vector2DDivide( *this, v, res );
- return res;
-}
-
-inline Vector2D operator*(float fl, const Vector2D& v)
-{
- return v * fl;
-}
-
-#endif //slow
-
-#endif // VECTOR2D_H
-
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+// $NoKeywords: $
+//
+//=============================================================================//
+
+#ifndef VECTOR2D_H
+#define VECTOR2D_H
+
+#ifdef _WIN32
+#pragma once
+#endif
+
+#include <math.h>
+#include <float.h>
+
+// For vec_t, put this somewhere else?
+#include "tier0/basetypes.h"
+
+// For rand(). We really need a library!
+#include <stdlib.h>
+
+#include "tier0/dbg.h"
+#include "mathlib/math_pfns.h"
+
+//=========================================================
+// 2D Vector2D
+//=========================================================
+
+class Vector2D
+{
+public:
+ // Members
+ vec_t x, y;
+
+ // Construction/destruction
+ Vector2D(void);
+ Vector2D(vec_t X, vec_t Y);
+ Vector2D(const float *pFloat);
+
+ // Initialization
+ void Init(vec_t ix=0.0f, vec_t iy=0.0f);
+
+ // Got any nasty NAN's?
+ bool IsValid() const;
+
+ // array access...
+ vec_t operator[](int i) const;
+ vec_t& operator[](int i);
+
+ // Base address...
+ vec_t* Base();
+ vec_t const* Base() const;
+
+ // Initialization methods
+ void Random( float minVal, float maxVal );
+
+ // equality
+ bool operator==(const Vector2D& v) const;
+ bool operator!=(const Vector2D& v) const;
+
+ // arithmetic operations
+ Vector2D& operator+=(const Vector2D &v);
+ Vector2D& operator-=(const Vector2D &v);
+ Vector2D& operator*=(const Vector2D &v);
+ Vector2D& operator*=(float s);
+ Vector2D& operator/=(const Vector2D &v);
+ Vector2D& operator/=(float s);
+
+ // negate the Vector2D components
+ void Negate();
+
+ // Get the Vector2D's magnitude.
+ vec_t Length() const;
+
+ // Get the Vector2D's magnitude squared.
+ vec_t LengthSqr(void) const;
+
+ // return true if this vector is (0,0) within tolerance
+ bool IsZero( float tolerance = 0.01f ) const
+ {
+ return (x > -tolerance && x < tolerance &&
+ y > -tolerance && y < tolerance);
+ }
+
+ // Normalize in place and return the old length.
+ vec_t NormalizeInPlace();
+
+ // Compare length.
+ bool IsLengthGreaterThan( float val ) const;
+ bool IsLengthLessThan( float val ) const;
+
+ // Get the distance from this Vector2D to the other one.
+ vec_t DistTo(const Vector2D &vOther) const;
+
+ // Get the distance from this Vector2D to the other one squared.
+ vec_t DistToSqr(const Vector2D &vOther) const;
+
+ // Copy
+ void CopyToArray(float* rgfl) const;
+
+ // Multiply, add, and assign to this (ie: *this = a + b * scalar). This
+ // is about 12% faster than the actual Vector2D equation (because it's done per-component
+ // rather than per-Vector2D).
+ void MulAdd(const Vector2D& a, const Vector2D& b, float scalar);
+
+ // Dot product.
+ vec_t Dot(const Vector2D& vOther) const;
+
+ // assignment
+ Vector2D& operator=(const Vector2D &vOther);
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+ // copy constructors
+ Vector2D(const Vector2D &vOther);
+
+ // arithmetic operations
+ Vector2D operator-(void) const;
+
+ Vector2D operator+(const Vector2D& v) const;
+ Vector2D operator-(const Vector2D& v) const;
+ Vector2D operator*(const Vector2D& v) const;
+ Vector2D operator/(const Vector2D& v) const;
+ Vector2D operator*(float fl) const;
+ Vector2D operator/(float fl) const;
+
+ // Cross product between two vectors.
+ Vector2D Cross(const Vector2D &vOther) const;
+
+ // Returns a Vector2D with the min or max in X, Y, and Z.
+ Vector2D Min(const Vector2D &vOther) const;
+ Vector2D Max(const Vector2D &vOther) const;
+
+#else
+
+private:
+ // No copy constructors allowed if we're in optimal mode
+ Vector2D(const Vector2D& vOther);
+#endif
+};
+
+//-----------------------------------------------------------------------------
+
+const Vector2D vec2_origin(0,0);
+const Vector2D vec2_invalid( FLT_MAX, FLT_MAX );
+
+//-----------------------------------------------------------------------------
+// Vector2D related operations
+//-----------------------------------------------------------------------------
+
+// Vector2D clear
+void Vector2DClear( Vector2D& a );
+
+// Copy
+void Vector2DCopy( const Vector2D& src, Vector2D& dst );
+
+// Vector2D arithmetic
+void Vector2DAdd( const Vector2D& a, const Vector2D& b, Vector2D& result );
+void Vector2DSubtract( const Vector2D& a, const Vector2D& b, Vector2D& result );
+void Vector2DMultiply( const Vector2D& a, vec_t b, Vector2D& result );
+void Vector2DMultiply( const Vector2D& a, const Vector2D& b, Vector2D& result );
+void Vector2DDivide( const Vector2D& a, vec_t b, Vector2D& result );
+void Vector2DDivide( const Vector2D& a, const Vector2D& b, Vector2D& result );
+void Vector2DMA( const Vector2D& start, float s, const Vector2D& dir, Vector2D& result );
+
+// Store the min or max of each of x, y, and z into the result.
+void Vector2DMin( const Vector2D &a, const Vector2D &b, Vector2D &result );
+void Vector2DMax( const Vector2D &a, const Vector2D &b, Vector2D &result );
+
+#define Vector2DExpand( v ) (v).x, (v).y
+
+// Normalization
+vec_t Vector2DNormalize( Vector2D& v );
+
+// Length
+vec_t Vector2DLength( const Vector2D& v );
+
+// Dot Product
+vec_t DotProduct2D(const Vector2D& a, const Vector2D& b);
+
+// Linearly interpolate between two vectors
+void Vector2DLerp(const Vector2D& src1, const Vector2D& src2, vec_t t, Vector2D& dest );
+
+
+//-----------------------------------------------------------------------------
+//
+// Inlined Vector2D methods
+//
+//-----------------------------------------------------------------------------
+
+
+//-----------------------------------------------------------------------------
+// constructors
+//-----------------------------------------------------------------------------
+
+inline Vector2D::Vector2D(void)
+{
+#ifdef _DEBUG
+ // Initialize to NAN to catch errors
+ x = y = VEC_T_NAN;
+#endif
+}
+
+inline Vector2D::Vector2D(vec_t X, vec_t Y)
+{
+ x = X; y = Y;
+ Assert( IsValid() );
+}
+
+inline Vector2D::Vector2D(const float *pFloat)
+{
+ Assert( pFloat );
+ x = pFloat[0]; y = pFloat[1];
+ Assert( IsValid() );
+}
+
+
+//-----------------------------------------------------------------------------
+// copy constructor
+//-----------------------------------------------------------------------------
+
+inline Vector2D::Vector2D(const Vector2D &vOther)
+{
+ Assert( vOther.IsValid() );
+ x = vOther.x; y = vOther.y;
+}
+
+//-----------------------------------------------------------------------------
+// initialization
+//-----------------------------------------------------------------------------
+
+inline void Vector2D::Init( vec_t ix, vec_t iy )
+{
+ x = ix; y = iy;
+ Assert( IsValid() );
+}
+
+inline void Vector2D::Random( float minVal, float maxVal )
+{
+ x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
+ y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
+}
+
+inline void Vector2DClear( Vector2D& a )
+{
+ a.x = a.y = 0.0f;
+}
+
+//-----------------------------------------------------------------------------
+// assignment
+//-----------------------------------------------------------------------------
+
+inline Vector2D& Vector2D::operator=(const Vector2D &vOther)
+{
+ Assert( vOther.IsValid() );
+ x=vOther.x; y=vOther.y;
+ return *this;
+}
+
+//-----------------------------------------------------------------------------
+// Array access
+//-----------------------------------------------------------------------------
+
+inline vec_t& Vector2D::operator[](int i)
+{
+ Assert( (i >= 0) && (i < 2) );
+ return ((vec_t*)this)[i];
+}
+
+inline vec_t Vector2D::operator[](int i) const
+{
+ Assert( (i >= 0) && (i < 2) );
+ return ((vec_t*)this)[i];
+}
+
+//-----------------------------------------------------------------------------
+// Base address...
+//-----------------------------------------------------------------------------
+
+inline vec_t* Vector2D::Base()
+{
+ return (vec_t*)this;
+}
+
+inline vec_t const* Vector2D::Base() const
+{
+ return (vec_t const*)this;
+}
+
+//-----------------------------------------------------------------------------
+// IsValid?
+//-----------------------------------------------------------------------------
+
+inline bool Vector2D::IsValid() const
+{
+ return IsFinite(x) && IsFinite(y);
+}
+
+//-----------------------------------------------------------------------------
+// comparison
+//-----------------------------------------------------------------------------
+
+inline bool Vector2D::operator==( const Vector2D& src ) const
+{
+ Assert( src.IsValid() && IsValid() );
+ return (src.x == x) && (src.y == y);
+}
+
+inline bool Vector2D::operator!=( const Vector2D& src ) const
+{
+ Assert( src.IsValid() && IsValid() );
+ return (src.x != x) || (src.y != y);
+}
+
+
+//-----------------------------------------------------------------------------
+// Copy
+//-----------------------------------------------------------------------------
+
+inline void Vector2DCopy( const Vector2D& src, Vector2D& dst )
+{
+ Assert( src.IsValid() );
+ dst.x = src.x;
+ dst.y = src.y;
+}
+
+inline void Vector2D::CopyToArray(float* rgfl) const
+{
+ Assert( IsValid() );
+ Assert( rgfl );
+ rgfl[0] = x; rgfl[1] = y;
+}
+
+//-----------------------------------------------------------------------------
+// standard math operations
+//-----------------------------------------------------------------------------
+
+inline void Vector2D::Negate()
+{
+ Assert( IsValid() );
+ x = -x; y = -y;
+}
+
+inline Vector2D& Vector2D::operator+=(const Vector2D& v)
+{
+ Assert( IsValid() && v.IsValid() );
+ x+=v.x; y+=v.y;
+ return *this;
+}
+
+inline Vector2D& Vector2D::operator-=(const Vector2D& v)
+{
+ Assert( IsValid() && v.IsValid() );
+ x-=v.x; y-=v.y;
+ return *this;
+}
+
+inline Vector2D& Vector2D::operator*=(float fl)
+{
+ x *= fl;
+ y *= fl;
+ Assert( IsValid() );
+ return *this;
+}
+
+inline Vector2D& Vector2D::operator*=(const Vector2D& v)
+{
+ x *= v.x;
+ y *= v.y;
+ Assert( IsValid() );
+ return *this;
+}
+
+inline Vector2D& Vector2D::operator/=(float fl)
+{
+ Assert( fl != 0.0f );
+ float oofl = 1.0f / fl;
+ x *= oofl;
+ y *= oofl;
+ Assert( IsValid() );
+ return *this;
+}
+
+inline Vector2D& Vector2D::operator/=(const Vector2D& v)
+{
+ Assert( v.x != 0.0f && v.y != 0.0f );
+ x /= v.x;
+ y /= v.y;
+ Assert( IsValid() );
+ return *this;
+}
+
+inline void Vector2DAdd( const Vector2D& a, const Vector2D& b, Vector2D& c )
+{
+ Assert( a.IsValid() && b.IsValid() );
+ c.x = a.x + b.x;
+ c.y = a.y + b.y;
+}
+
+inline void Vector2DSubtract( const Vector2D& a, const Vector2D& b, Vector2D& c )
+{
+ Assert( a.IsValid() && b.IsValid() );
+ c.x = a.x - b.x;
+ c.y = a.y - b.y;
+}
+
+inline void Vector2DMultiply( const Vector2D& a, vec_t b, Vector2D& c )
+{
+ Assert( a.IsValid() && IsFinite(b) );
+ c.x = a.x * b;
+ c.y = a.y * b;
+}
+
+inline void Vector2DMultiply( const Vector2D& a, const Vector2D& b, Vector2D& c )
+{
+ Assert( a.IsValid() && b.IsValid() );
+ c.x = a.x * b.x;
+ c.y = a.y * b.y;
+}
+
+
+inline void Vector2DDivide( const Vector2D& a, vec_t b, Vector2D& c )
+{
+ Assert( a.IsValid() );
+ Assert( b != 0.0f );
+ vec_t oob = 1.0f / b;
+ c.x = a.x * oob;
+ c.y = a.y * oob;
+}
+
+inline void Vector2DDivide( const Vector2D& a, const Vector2D& b, Vector2D& c )
+{
+ Assert( a.IsValid() );
+ Assert( (b.x != 0.0f) && (b.y != 0.0f) );
+ c.x = a.x / b.x;
+ c.y = a.y / b.y;
+}
+
+inline void Vector2DMA( const Vector2D& start, float s, const Vector2D& dir, Vector2D& result )
+{
+ Assert( start.IsValid() && IsFinite(s) && dir.IsValid() );
+ result.x = start.x + s*dir.x;
+ result.y = start.y + s*dir.y;
+}
+
+// FIXME: Remove
+// For backwards compatability
+inline void Vector2D::MulAdd(const Vector2D& a, const Vector2D& b, float scalar)
+{
+ x = a.x + b.x * scalar;
+ y = a.y + b.y * scalar;
+}
+
+inline void Vector2DLerp(const Vector2D& src1, const Vector2D& src2, vec_t t, Vector2D& dest )
+{
+ dest[0] = src1[0] + (src2[0] - src1[0]) * t;
+ dest[1] = src1[1] + (src2[1] - src1[1]) * t;
+}
+
+//-----------------------------------------------------------------------------
+// dot, cross
+//-----------------------------------------------------------------------------
+inline vec_t DotProduct2D(const Vector2D& a, const Vector2D& b)
+{
+ Assert( a.IsValid() && b.IsValid() );
+ return( a.x*b.x + a.y*b.y );
+}
+
+// for backwards compatability
+inline vec_t Vector2D::Dot( const Vector2D& vOther ) const
+{
+ return DotProduct2D( *this, vOther );
+}
+
+
+//-----------------------------------------------------------------------------
+// length
+//-----------------------------------------------------------------------------
+inline vec_t Vector2DLength( const Vector2D& v )
+{
+ Assert( v.IsValid() );
+ return (vec_t)FastSqrt(v.x*v.x + v.y*v.y);
+}
+
+inline vec_t Vector2D::LengthSqr(void) const
+{
+ Assert( IsValid() );
+ return (x*x + y*y);
+}
+
+inline vec_t Vector2D::NormalizeInPlace()
+{
+ return Vector2DNormalize( *this );
+}
+
+inline bool Vector2D::IsLengthGreaterThan( float val ) const
+{
+ return LengthSqr() > val*val;
+}
+
+inline bool Vector2D::IsLengthLessThan( float val ) const
+{
+ return LengthSqr() < val*val;
+}
+
+inline vec_t Vector2D::Length(void) const
+{
+ return Vector2DLength( *this );
+}
+
+
+inline void Vector2DMin( const Vector2D &a, const Vector2D &b, Vector2D &result )
+{
+ result.x = (a.x < b.x) ? a.x : b.x;
+ result.y = (a.y < b.y) ? a.y : b.y;
+}
+
+
+inline void Vector2DMax( const Vector2D &a, const Vector2D &b, Vector2D &result )
+{
+ result.x = (a.x > b.x) ? a.x : b.x;
+ result.y = (a.y > b.y) ? a.y : b.y;
+}
+
+
+//-----------------------------------------------------------------------------
+// Normalization
+//-----------------------------------------------------------------------------
+inline vec_t Vector2DNormalize( Vector2D& v )
+{
+ Assert( v.IsValid() );
+ vec_t l = v.Length();
+ if (l != 0.0f)
+ {
+ v /= l;
+ }
+ else
+ {
+ v.x = v.y = 0.0f;
+ }
+ return l;
+}
+
+
+//-----------------------------------------------------------------------------
+// Get the distance from this Vector2D to the other one
+//-----------------------------------------------------------------------------
+inline vec_t Vector2D::DistTo(const Vector2D &vOther) const
+{
+ Vector2D delta;
+ Vector2DSubtract( *this, vOther, delta );
+ return delta.Length();
+}
+
+inline vec_t Vector2D::DistToSqr(const Vector2D &vOther) const
+{
+ Vector2D delta;
+ Vector2DSubtract( *this, vOther, delta );
+ return delta.LengthSqr();
+}
+
+
+//-----------------------------------------------------------------------------
+// Computes the closest point to vecTarget no farther than flMaxDist from vecStart
+//-----------------------------------------------------------------------------
+inline void ComputeClosestPoint2D( const Vector2D& vecStart, float flMaxDist, const Vector2D& vecTarget, Vector2D *pResult )
+{
+ Vector2D vecDelta;
+ Vector2DSubtract( vecTarget, vecStart, vecDelta );
+ float flDistSqr = vecDelta.LengthSqr();
+ if ( flDistSqr <= flMaxDist * flMaxDist )
+ {
+ *pResult = vecTarget;
+ }
+ else
+ {
+ vecDelta /= FastSqrt( flDistSqr );
+ Vector2DMA( vecStart, flMaxDist, vecDelta, *pResult );
+ }
+}
+
+
+
+//-----------------------------------------------------------------------------
+//
+// Slow methods
+//
+//-----------------------------------------------------------------------------
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+
+//-----------------------------------------------------------------------------
+// Returns a Vector2D with the min or max in X, Y, and Z.
+//-----------------------------------------------------------------------------
+
+inline Vector2D Vector2D::Min(const Vector2D &vOther) const
+{
+ return Vector2D(x < vOther.x ? x : vOther.x,
+ y < vOther.y ? y : vOther.y);
+}
+
+inline Vector2D Vector2D::Max(const Vector2D &vOther) const
+{
+ return Vector2D(x > vOther.x ? x : vOther.x,
+ y > vOther.y ? y : vOther.y);
+}
+
+
+//-----------------------------------------------------------------------------
+// arithmetic operations
+//-----------------------------------------------------------------------------
+
+inline Vector2D Vector2D::operator-(void) const
+{
+ return Vector2D(-x,-y);
+}
+
+inline Vector2D Vector2D::operator+(const Vector2D& v) const
+{
+ Vector2D res;
+ Vector2DAdd( *this, v, res );
+ return res;
+}
+
+inline Vector2D Vector2D::operator-(const Vector2D& v) const
+{
+ Vector2D res;
+ Vector2DSubtract( *this, v, res );
+ return res;
+}
+
+inline Vector2D Vector2D::operator*(float fl) const
+{
+ Vector2D res;
+ Vector2DMultiply( *this, fl, res );
+ return res;
+}
+
+inline Vector2D Vector2D::operator*(const Vector2D& v) const
+{
+ Vector2D res;
+ Vector2DMultiply( *this, v, res );
+ return res;
+}
+
+inline Vector2D Vector2D::operator/(float fl) const
+{
+ Vector2D res;
+ Vector2DDivide( *this, fl, res );
+ return res;
+}
+
+inline Vector2D Vector2D::operator/(const Vector2D& v) const
+{
+ Vector2D res;
+ Vector2DDivide( *this, v, res );
+ return res;
+}
+
+inline Vector2D operator*(float fl, const Vector2D& v)
+{
+ return v * fl;
+}
+
+#endif //slow
+
+#endif // VECTOR2D_H
+
diff --git a/mp/src/public/mathlib/vector4d.h b/mp/src/public/mathlib/vector4d.h
index 53052e4d..2b20c882 100644
--- a/mp/src/public/mathlib/vector4d.h
+++ b/mp/src/public/mathlib/vector4d.h
@@ -1,686 +1,686 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-
-#ifndef VECTOR4D_H
-#define VECTOR4D_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include <math.h>
-#include <stdlib.h> // For rand(). We really need a library!
-#include <float.h>
-#if !defined( _X360 )
-#include <xmmintrin.h> // For SSE
-#endif
-#include "basetypes.h" // For vec_t, put this somewhere else?
-#include "tier0/dbg.h"
-#include "mathlib/math_pfns.h"
-
-// forward declarations
-class Vector;
-class Vector2D;
-
-//=========================================================
-// 4D Vector4D
-//=========================================================
-
-class Vector4D
-{
-public:
- // Members
- vec_t x, y, z, w;
-
- // Construction/destruction
- Vector4D(void);
- Vector4D(vec_t X, vec_t Y, vec_t Z, vec_t W);
- Vector4D(const float *pFloat);
-
- // Initialization
- void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f, vec_t iw=0.0f);
-
- // Got any nasty NAN's?
- bool IsValid() const;
-
- // array access...
- vec_t operator[](int i) const;
- vec_t& operator[](int i);
-
- // Base address...
- inline vec_t* Base();
- inline vec_t const* Base() const;
-
- // Cast to Vector and Vector2D...
- Vector& AsVector3D();
- Vector const& AsVector3D() const;
-
- Vector2D& AsVector2D();
- Vector2D const& AsVector2D() const;
-
- // Initialization methods
- void Random( vec_t minVal, vec_t maxVal );
-
- // equality
- bool operator==(const Vector4D& v) const;
- bool operator!=(const Vector4D& v) const;
-
- // arithmetic operations
- Vector4D& operator+=(const Vector4D &v);
- Vector4D& operator-=(const Vector4D &v);
- Vector4D& operator*=(const Vector4D &v);
- Vector4D& operator*=(float s);
- Vector4D& operator/=(const Vector4D &v);
- Vector4D& operator/=(float s);
-
- // negate the Vector4D components
- void Negate();
-
- // Get the Vector4D's magnitude.
- vec_t Length() const;
-
- // Get the Vector4D's magnitude squared.
- vec_t LengthSqr(void) const;
-
- // return true if this vector is (0,0,0,0) within tolerance
- bool IsZero( float tolerance = 0.01f ) const
- {
- return (x > -tolerance && x < tolerance &&
- y > -tolerance && y < tolerance &&
- z > -tolerance && z < tolerance &&
- w > -tolerance && w < tolerance);
- }
-
- // Get the distance from this Vector4D to the other one.
- vec_t DistTo(const Vector4D &vOther) const;
-
- // Get the distance from this Vector4D to the other one squared.
- vec_t DistToSqr(const Vector4D &vOther) const;
-
- // Copy
- void CopyToArray(float* rgfl) const;
-
- // Multiply, add, and assign to this (ie: *this = a + b * scalar). This
- // is about 12% faster than the actual Vector4D equation (because it's done per-component
- // rather than per-Vector4D).
- void MulAdd(Vector4D const& a, Vector4D const& b, float scalar);
-
- // Dot product.
- vec_t Dot(Vector4D const& vOther) const;
-
- // No copy constructors allowed if we're in optimal mode
-#ifdef VECTOR_NO_SLOW_OPERATIONS
-private:
-#else
-public:
-#endif
- Vector4D(Vector4D const& vOther);
-
- // No assignment operators either...
- Vector4D& operator=( Vector4D const& src );
-};
-
-const Vector4D vec4_origin( 0.0f, 0.0f, 0.0f, 0.0f );
-const Vector4D vec4_invalid( FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX );
-
-//-----------------------------------------------------------------------------
-// SSE optimized routines
-//-----------------------------------------------------------------------------
-
-class ALIGN16 Vector4DAligned : public Vector4D
-{
-public:
- Vector4DAligned(void) {}
- Vector4DAligned( vec_t X, vec_t Y, vec_t Z, vec_t W );
-
- inline void Set( vec_t X, vec_t Y, vec_t Z, vec_t W );
- inline void InitZero( void );
-
- inline __m128 &AsM128() { return *(__m128*)&x; }
- inline const __m128 &AsM128() const { return *(const __m128*)&x; }
-
-private:
- // No copy constructors allowed if we're in optimal mode
- Vector4DAligned( Vector4DAligned const& vOther );
-
- // No assignment operators either...
- Vector4DAligned& operator=( Vector4DAligned const& src );
-} ALIGN16_POST;
-
-//-----------------------------------------------------------------------------
-// Vector4D related operations
-//-----------------------------------------------------------------------------
-
-// Vector4D clear
-void Vector4DClear( Vector4D& a );
-
-// Copy
-void Vector4DCopy( Vector4D const& src, Vector4D& dst );
-
-// Vector4D arithmetic
-void Vector4DAdd( Vector4D const& a, Vector4D const& b, Vector4D& result );
-void Vector4DSubtract( Vector4D const& a, Vector4D const& b, Vector4D& result );
-void Vector4DMultiply( Vector4D const& a, vec_t b, Vector4D& result );
-void Vector4DMultiply( Vector4D const& a, Vector4D const& b, Vector4D& result );
-void Vector4DDivide( Vector4D const& a, vec_t b, Vector4D& result );
-void Vector4DDivide( Vector4D const& a, Vector4D const& b, Vector4D& result );
-void Vector4DMA( Vector4D const& start, float s, Vector4D const& dir, Vector4D& result );
-
-// Vector4DAligned arithmetic
-void Vector4DMultiplyAligned( Vector4DAligned const& a, vec_t b, Vector4DAligned& result );
-
-
-#define Vector4DExpand( v ) (v).x, (v).y, (v).z, (v).w
-
-// Normalization
-vec_t Vector4DNormalize( Vector4D& v );
-
-// Length
-vec_t Vector4DLength( Vector4D const& v );
-
-// Dot Product
-vec_t DotProduct4D(Vector4D const& a, Vector4D const& b);
-
-// Linearly interpolate between two vectors
-void Vector4DLerp(Vector4D const& src1, Vector4D const& src2, vec_t t, Vector4D& dest );
-
-
-//-----------------------------------------------------------------------------
-//
-// Inlined Vector4D methods
-//
-//-----------------------------------------------------------------------------
-
-
-//-----------------------------------------------------------------------------
-// constructors
-//-----------------------------------------------------------------------------
-
-inline Vector4D::Vector4D(void)
-{
-#ifdef _DEBUG
- // Initialize to NAN to catch errors
- x = y = z = w = VEC_T_NAN;
-#endif
-}
-
-inline Vector4D::Vector4D(vec_t X, vec_t Y, vec_t Z, vec_t W )
-{
- x = X; y = Y; z = Z; w = W;
- Assert( IsValid() );
-}
-
-inline Vector4D::Vector4D(const float *pFloat)
-{
- Assert( pFloat );
- x = pFloat[0]; y = pFloat[1]; z = pFloat[2]; w = pFloat[3];
- Assert( IsValid() );
-}
-
-
-//-----------------------------------------------------------------------------
-// copy constructor
-//-----------------------------------------------------------------------------
-
-inline Vector4D::Vector4D(const Vector4D &vOther)
-{
- Assert( vOther.IsValid() );
- x = vOther.x; y = vOther.y; z = vOther.z; w = vOther.w;
-}
-
-//-----------------------------------------------------------------------------
-// initialization
-//-----------------------------------------------------------------------------
-
-inline void Vector4D::Init( vec_t ix, vec_t iy, vec_t iz, vec_t iw )
-{
- x = ix; y = iy; z = iz; w = iw;
- Assert( IsValid() );
-}
-
-inline void Vector4D::Random( vec_t minVal, vec_t maxVal )
-{
- x = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- y = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- z = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- w = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
-}
-
-inline void Vector4DClear( Vector4D& a )
-{
- a.x = a.y = a.z = a.w = 0.0f;
-}
-
-//-----------------------------------------------------------------------------
-// assignment
-//-----------------------------------------------------------------------------
-
-inline Vector4D& Vector4D::operator=(const Vector4D &vOther)
-{
- Assert( vOther.IsValid() );
- x=vOther.x; y=vOther.y; z=vOther.z; w=vOther.w;
- return *this;
-}
-
-//-----------------------------------------------------------------------------
-// Array access
-//-----------------------------------------------------------------------------
-
-inline vec_t& Vector4D::operator[](int i)
-{
- Assert( (i >= 0) && (i < 4) );
- return ((vec_t*)this)[i];
-}
-
-inline vec_t Vector4D::operator[](int i) const
-{
- Assert( (i >= 0) && (i < 4) );
- return ((vec_t*)this)[i];
-}
-
-//-----------------------------------------------------------------------------
-// Cast to Vector and Vector2D...
-//-----------------------------------------------------------------------------
-
-inline Vector& Vector4D::AsVector3D()
-{
- return *(Vector*)this;
-}
-
-inline Vector const& Vector4D::AsVector3D() const
-{
- return *(Vector const*)this;
-}
-
-inline Vector2D& Vector4D::AsVector2D()
-{
- return *(Vector2D*)this;
-}
-
-inline Vector2D const& Vector4D::AsVector2D() const
-{
- return *(Vector2D const*)this;
-}
-
-//-----------------------------------------------------------------------------
-// Base address...
-//-----------------------------------------------------------------------------
-
-inline vec_t* Vector4D::Base()
-{
- return (vec_t*)this;
-}
-
-inline vec_t const* Vector4D::Base() const
-{
- return (vec_t const*)this;
-}
-
-//-----------------------------------------------------------------------------
-// IsValid?
-//-----------------------------------------------------------------------------
-
-inline bool Vector4D::IsValid() const
-{
- return IsFinite(x) && IsFinite(y) && IsFinite(z) && IsFinite(w);
-}
-
-//-----------------------------------------------------------------------------
-// comparison
-//-----------------------------------------------------------------------------
-
-inline bool Vector4D::operator==( Vector4D const& src ) const
-{
- Assert( src.IsValid() && IsValid() );
- return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w);
-}
-
-inline bool Vector4D::operator!=( Vector4D const& src ) const
-{
- Assert( src.IsValid() && IsValid() );
- return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w);
-}
-
-
-//-----------------------------------------------------------------------------
-// Copy
-//-----------------------------------------------------------------------------
-
-inline void Vector4DCopy( Vector4D const& src, Vector4D& dst )
-{
- Assert( src.IsValid() );
- dst.x = src.x;
- dst.y = src.y;
- dst.z = src.z;
- dst.w = src.w;
-}
-
-inline void Vector4D::CopyToArray(float* rgfl) const
-{
- Assert( IsValid() );
- Assert( rgfl );
- rgfl[0] = x; rgfl[1] = y; rgfl[2] = z; rgfl[3] = w;
-}
-
-//-----------------------------------------------------------------------------
-// standard math operations
-//-----------------------------------------------------------------------------
-
-inline void Vector4D::Negate()
-{
- Assert( IsValid() );
- x = -x; y = -y; z = -z; w = -w;
-}
-
-inline Vector4D& Vector4D::operator+=(const Vector4D& v)
-{
- Assert( IsValid() && v.IsValid() );
- x+=v.x; y+=v.y; z += v.z; w += v.w;
- return *this;
-}
-
-inline Vector4D& Vector4D::operator-=(const Vector4D& v)
-{
- Assert( IsValid() && v.IsValid() );
- x-=v.x; y-=v.y; z -= v.z; w -= v.w;
- return *this;
-}
-
-inline Vector4D& Vector4D::operator*=(float fl)
-{
- x *= fl;
- y *= fl;
- z *= fl;
- w *= fl;
- Assert( IsValid() );
- return *this;
-}
-
-inline Vector4D& Vector4D::operator*=(Vector4D const& v)
-{
- x *= v.x;
- y *= v.y;
- z *= v.z;
- w *= v.w;
- Assert( IsValid() );
- return *this;
-}
-
-inline Vector4D& Vector4D::operator/=(float fl)
-{
- Assert( fl != 0.0f );
- float oofl = 1.0f / fl;
- x *= oofl;
- y *= oofl;
- z *= oofl;
- w *= oofl;
- Assert( IsValid() );
- return *this;
-}
-
-inline Vector4D& Vector4D::operator/=(Vector4D const& v)
-{
- Assert( v.x != 0.0f && v.y != 0.0f && v.z != 0.0f && v.w != 0.0f );
- x /= v.x;
- y /= v.y;
- z /= v.z;
- w /= v.w;
- Assert( IsValid() );
- return *this;
-}
-
-inline void Vector4DAdd( Vector4D const& a, Vector4D const& b, Vector4D& c )
-{
- Assert( a.IsValid() && b.IsValid() );
- c.x = a.x + b.x;
- c.y = a.y + b.y;
- c.z = a.z + b.z;
- c.w = a.w + b.w;
-}
-
-inline void Vector4DSubtract( Vector4D const& a, Vector4D const& b, Vector4D& c )
-{
- Assert( a.IsValid() && b.IsValid() );
- c.x = a.x - b.x;
- c.y = a.y - b.y;
- c.z = a.z - b.z;
- c.w = a.w - b.w;
-}
-
-inline void Vector4DMultiply( Vector4D const& a, vec_t b, Vector4D& c )
-{
- Assert( a.IsValid() && IsFinite(b) );
- c.x = a.x * b;
- c.y = a.y * b;
- c.z = a.z * b;
- c.w = a.w * b;
-}
-
-inline void Vector4DMultiply( Vector4D const& a, Vector4D const& b, Vector4D& c )
-{
- Assert( a.IsValid() && b.IsValid() );
- c.x = a.x * b.x;
- c.y = a.y * b.y;
- c.z = a.z * b.z;
- c.w = a.w * b.w;
-}
-
-inline void Vector4DDivide( Vector4D const& a, vec_t b, Vector4D& c )
-{
- Assert( a.IsValid() );
- Assert( b != 0.0f );
- vec_t oob = 1.0f / b;
- c.x = a.x * oob;
- c.y = a.y * oob;
- c.z = a.z * oob;
- c.w = a.w * oob;
-}
-
-inline void Vector4DDivide( Vector4D const& a, Vector4D const& b, Vector4D& c )
-{
- Assert( a.IsValid() );
- Assert( (b.x != 0.0f) && (b.y != 0.0f) && (b.z != 0.0f) && (b.w != 0.0f) );
- c.x = a.x / b.x;
- c.y = a.y / b.y;
- c.z = a.z / b.z;
- c.w = a.w / b.w;
-}
-
-inline void Vector4DMA( Vector4D const& start, float s, Vector4D const& dir, Vector4D& result )
-{
- Assert( start.IsValid() && IsFinite(s) && dir.IsValid() );
- result.x = start.x + s*dir.x;
- result.y = start.y + s*dir.y;
- result.z = start.z + s*dir.z;
- result.w = start.w + s*dir.w;
-}
-
-// FIXME: Remove
-// For backwards compatability
-inline void Vector4D::MulAdd(Vector4D const& a, Vector4D const& b, float scalar)
-{
- x = a.x + b.x * scalar;
- y = a.y + b.y * scalar;
- z = a.z + b.z * scalar;
- w = a.w + b.w * scalar;
-}
-
-inline void Vector4DLerp(const Vector4D& src1, const Vector4D& src2, vec_t t, Vector4D& dest )
-{
- dest[0] = src1[0] + (src2[0] - src1[0]) * t;
- dest[1] = src1[1] + (src2[1] - src1[1]) * t;
- dest[2] = src1[2] + (src2[2] - src1[2]) * t;
- dest[3] = src1[3] + (src2[3] - src1[3]) * t;
-}
-
-//-----------------------------------------------------------------------------
-// dot, cross
-//-----------------------------------------------------------------------------
-
-inline vec_t DotProduct4D(const Vector4D& a, const Vector4D& b)
-{
- Assert( a.IsValid() && b.IsValid() );
- return( a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w );
-}
-
-// for backwards compatability
-inline vec_t Vector4D::Dot( Vector4D const& vOther ) const
-{
- return DotProduct4D( *this, vOther );
-}
-
-
-//-----------------------------------------------------------------------------
-// length
-//-----------------------------------------------------------------------------
-
-inline vec_t Vector4DLength( Vector4D const& v )
-{
- Assert( v.IsValid() );
- return (vec_t)FastSqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
-}
-
-inline vec_t Vector4D::LengthSqr(void) const
-{
- Assert( IsValid() );
- return (x*x + y*y + z*z + w*w);
-}
-
-inline vec_t Vector4D::Length(void) const
-{
- return Vector4DLength( *this );
-}
-
-
-//-----------------------------------------------------------------------------
-// Normalization
-//-----------------------------------------------------------------------------
-
-// FIXME: Can't use until we're un-macroed in mathlib.h
-inline vec_t Vector4DNormalize( Vector4D& v )
-{
- Assert( v.IsValid() );
- vec_t l = v.Length();
- if (l != 0.0f)
- {
- v /= l;
- }
- else
- {
- v.x = v.y = v.z = v.w = 0.0f;
- }
- return l;
-}
-
-//-----------------------------------------------------------------------------
-// Get the distance from this Vector4D to the other one
-//-----------------------------------------------------------------------------
-
-inline vec_t Vector4D::DistTo(const Vector4D &vOther) const
-{
- Vector4D delta;
- Vector4DSubtract( *this, vOther, delta );
- return delta.Length();
-}
-
-inline vec_t Vector4D::DistToSqr(const Vector4D &vOther) const
-{
- Vector4D delta;
- Vector4DSubtract( *this, vOther, delta );
- return delta.LengthSqr();
-}
-
-
-//-----------------------------------------------------------------------------
-// Vector4DAligned routines
-//-----------------------------------------------------------------------------
-
-inline Vector4DAligned::Vector4DAligned( vec_t X, vec_t Y, vec_t Z, vec_t W )
-{
- x = X; y = Y; z = Z; w = W;
- Assert( IsValid() );
-}
-
-inline void Vector4DAligned::Set( vec_t X, vec_t Y, vec_t Z, vec_t W )
-{
- x = X; y = Y; z = Z; w = W;
- Assert( IsValid() );
-}
-
-inline void Vector4DAligned::InitZero( void )
-{
-#if !defined( _X360 )
- this->AsM128() = _mm_set1_ps( 0.0f );
-#else
- this->AsM128() = __vspltisw( 0 );
-#endif
- Assert( IsValid() );
-}
-
-inline void Vector4DMultiplyAligned( Vector4DAligned const& a, Vector4DAligned const& b, Vector4DAligned& c )
-{
- Assert( a.IsValid() && b.IsValid() );
-#if !defined( _X360 )
- c.x = a.x * b.x;
- c.y = a.y * b.y;
- c.z = a.z * b.z;
- c.w = a.w * b.w;
-#else
- c.AsM128() = __vmulfp( a.AsM128(), b.AsM128() );
-#endif
-}
-
-inline void Vector4DWeightMAD( vec_t w, Vector4DAligned const& vInA, Vector4DAligned& vOutA, Vector4DAligned const& vInB, Vector4DAligned& vOutB )
-{
- Assert( vInA.IsValid() && vInB.IsValid() && IsFinite(w) );
-
-#if !defined( _X360 )
- vOutA.x += vInA.x * w;
- vOutA.y += vInA.y * w;
- vOutA.z += vInA.z * w;
- vOutA.w += vInA.w * w;
-
- vOutB.x += vInB.x * w;
- vOutB.y += vInB.y * w;
- vOutB.z += vInB.z * w;
- vOutB.w += vInB.w * w;
-#else
- __vector4 temp;
-
- temp = __lvlx( &w, 0 );
- temp = __vspltw( temp, 0 );
-
- vOutA.AsM128() = __vmaddfp( vInA.AsM128(), temp, vOutA.AsM128() );
- vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() );
-#endif
-}
-
-inline void Vector4DWeightMADSSE( vec_t w, Vector4DAligned const& vInA, Vector4DAligned& vOutA, Vector4DAligned const& vInB, Vector4DAligned& vOutB )
-{
- Assert( vInA.IsValid() && vInB.IsValid() && IsFinite(w) );
-
-#if !defined( _X360 )
- // Replicate scalar float out to 4 components
- __m128 packed = _mm_set1_ps( w );
-
- // 4D SSE Vector MAD
- vOutA.AsM128() = _mm_add_ps( vOutA.AsM128(), _mm_mul_ps( vInA.AsM128(), packed ) );
- vOutB.AsM128() = _mm_add_ps( vOutB.AsM128(), _mm_mul_ps( vInB.AsM128(), packed ) );
-#else
- __vector4 temp;
-
- temp = __lvlx( &w, 0 );
- temp = __vspltw( temp, 0 );
-
- vOutA.AsM128() = __vmaddfp( vInA.AsM128(), temp, vOutA.AsM128() );
- vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() );
-#endif
-}
-
-#endif // VECTOR4D_H
-
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+// $NoKeywords: $
+//
+//=============================================================================//
+
+#ifndef VECTOR4D_H
+#define VECTOR4D_H
+
+#ifdef _WIN32
+#pragma once
+#endif
+
+#include <math.h>
+#include <stdlib.h> // For rand(). We really need a library!
+#include <float.h>
+#if !defined( _X360 )
+#include <xmmintrin.h> // For SSE
+#endif
+#include "basetypes.h" // For vec_t, put this somewhere else?
+#include "tier0/dbg.h"
+#include "mathlib/math_pfns.h"
+
+// forward declarations
+class Vector;
+class Vector2D;
+
+//=========================================================
+// 4D Vector4D
+//=========================================================
+
+class Vector4D
+{
+public:
+ // Members
+ vec_t x, y, z, w;
+
+ // Construction/destruction
+ Vector4D(void);
+ Vector4D(vec_t X, vec_t Y, vec_t Z, vec_t W);
+ Vector4D(const float *pFloat);
+
+ // Initialization
+ void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f, vec_t iw=0.0f);
+
+ // Got any nasty NAN's?
+ bool IsValid() const;
+
+ // array access...
+ vec_t operator[](int i) const;
+ vec_t& operator[](int i);
+
+ // Base address...
+ inline vec_t* Base();
+ inline vec_t const* Base() const;
+
+ // Cast to Vector and Vector2D...
+ Vector& AsVector3D();
+ Vector const& AsVector3D() const;
+
+ Vector2D& AsVector2D();
+ Vector2D const& AsVector2D() const;
+
+ // Initialization methods
+ void Random( vec_t minVal, vec_t maxVal );
+
+ // equality
+ bool operator==(const Vector4D& v) const;
+ bool operator!=(const Vector4D& v) const;
+
+ // arithmetic operations
+ Vector4D& operator+=(const Vector4D &v);
+ Vector4D& operator-=(const Vector4D &v);
+ Vector4D& operator*=(const Vector4D &v);
+ Vector4D& operator*=(float s);
+ Vector4D& operator/=(const Vector4D &v);
+ Vector4D& operator/=(float s);
+
+ // negate the Vector4D components
+ void Negate();
+
+ // Get the Vector4D's magnitude.
+ vec_t Length() const;
+
+ // Get the Vector4D's magnitude squared.
+ vec_t LengthSqr(void) const;
+
+ // return true if this vector is (0,0,0,0) within tolerance
+ bool IsZero( float tolerance = 0.01f ) const
+ {
+ return (x > -tolerance && x < tolerance &&
+ y > -tolerance && y < tolerance &&
+ z > -tolerance && z < tolerance &&
+ w > -tolerance && w < tolerance);
+ }
+
+ // Get the distance from this Vector4D to the other one.
+ vec_t DistTo(const Vector4D &vOther) const;
+
+ // Get the distance from this Vector4D to the other one squared.
+ vec_t DistToSqr(const Vector4D &vOther) const;
+
+ // Copy
+ void CopyToArray(float* rgfl) const;
+
+ // Multiply, add, and assign to this (ie: *this = a + b * scalar). This
+ // is about 12% faster than the actual Vector4D equation (because it's done per-component
+ // rather than per-Vector4D).
+ void MulAdd(Vector4D const& a, Vector4D const& b, float scalar);
+
+ // Dot product.
+ vec_t Dot(Vector4D const& vOther) const;
+
+ // No copy constructors allowed if we're in optimal mode
+#ifdef VECTOR_NO_SLOW_OPERATIONS
+private:
+#else
+public:
+#endif
+ Vector4D(Vector4D const& vOther);
+
+ // No assignment operators either...
+ Vector4D& operator=( Vector4D const& src );
+};
+
+const Vector4D vec4_origin( 0.0f, 0.0f, 0.0f, 0.0f );
+const Vector4D vec4_invalid( FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX );
+
+//-----------------------------------------------------------------------------
+// SSE optimized routines
+//-----------------------------------------------------------------------------
+
+class ALIGN16 Vector4DAligned : public Vector4D
+{
+public:
+ Vector4DAligned(void) {}
+ Vector4DAligned( vec_t X, vec_t Y, vec_t Z, vec_t W );
+
+ inline void Set( vec_t X, vec_t Y, vec_t Z, vec_t W );
+ inline void InitZero( void );
+
+ inline __m128 &AsM128() { return *(__m128*)&x; }
+ inline const __m128 &AsM128() const { return *(const __m128*)&x; }
+
+private:
+ // No copy constructors allowed if we're in optimal mode
+ Vector4DAligned( Vector4DAligned const& vOther );
+
+ // No assignment operators either...
+ Vector4DAligned& operator=( Vector4DAligned const& src );
+} ALIGN16_POST;
+
+//-----------------------------------------------------------------------------
+// Vector4D related operations
+//-----------------------------------------------------------------------------
+
+// Vector4D clear
+void Vector4DClear( Vector4D& a );
+
+// Copy
+void Vector4DCopy( Vector4D const& src, Vector4D& dst );
+
+// Vector4D arithmetic
+void Vector4DAdd( Vector4D const& a, Vector4D const& b, Vector4D& result );
+void Vector4DSubtract( Vector4D const& a, Vector4D const& b, Vector4D& result );
+void Vector4DMultiply( Vector4D const& a, vec_t b, Vector4D& result );
+void Vector4DMultiply( Vector4D const& a, Vector4D const& b, Vector4D& result );
+void Vector4DDivide( Vector4D const& a, vec_t b, Vector4D& result );
+void Vector4DDivide( Vector4D const& a, Vector4D const& b, Vector4D& result );
+void Vector4DMA( Vector4D const& start, float s, Vector4D const& dir, Vector4D& result );
+
+// Vector4DAligned arithmetic
+void Vector4DMultiplyAligned( Vector4DAligned const& a, vec_t b, Vector4DAligned& result );
+
+
+#define Vector4DExpand( v ) (v).x, (v).y, (v).z, (v).w
+
+// Normalization
+vec_t Vector4DNormalize( Vector4D& v );
+
+// Length
+vec_t Vector4DLength( Vector4D const& v );
+
+// Dot Product
+vec_t DotProduct4D(Vector4D const& a, Vector4D const& b);
+
+// Linearly interpolate between two vectors
+void Vector4DLerp(Vector4D const& src1, Vector4D const& src2, vec_t t, Vector4D& dest );
+
+
+//-----------------------------------------------------------------------------
+//
+// Inlined Vector4D methods
+//
+//-----------------------------------------------------------------------------
+
+
+//-----------------------------------------------------------------------------
+// constructors
+//-----------------------------------------------------------------------------
+
+inline Vector4D::Vector4D(void)
+{
+#ifdef _DEBUG
+ // Initialize to NAN to catch errors
+ x = y = z = w = VEC_T_NAN;
+#endif
+}
+
+inline Vector4D::Vector4D(vec_t X, vec_t Y, vec_t Z, vec_t W )
+{
+ x = X; y = Y; z = Z; w = W;
+ Assert( IsValid() );
+}
+
+inline Vector4D::Vector4D(const float *pFloat)
+{
+ Assert( pFloat );
+ x = pFloat[0]; y = pFloat[1]; z = pFloat[2]; w = pFloat[3];
+ Assert( IsValid() );
+}
+
+
+//-----------------------------------------------------------------------------
+// copy constructor
+//-----------------------------------------------------------------------------
+
+inline Vector4D::Vector4D(const Vector4D &vOther)
+{
+ Assert( vOther.IsValid() );
+ x = vOther.x; y = vOther.y; z = vOther.z; w = vOther.w;
+}
+
+//-----------------------------------------------------------------------------
+// initialization
+//-----------------------------------------------------------------------------
+
+inline void Vector4D::Init( vec_t ix, vec_t iy, vec_t iz, vec_t iw )
+{
+ x = ix; y = iy; z = iz; w = iw;
+ Assert( IsValid() );
+}
+
+inline void Vector4D::Random( vec_t minVal, vec_t maxVal )
+{
+ x = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
+ y = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
+ z = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
+ w = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
+}
+
+inline void Vector4DClear( Vector4D& a )
+{
+ a.x = a.y = a.z = a.w = 0.0f;
+}
+
+//-----------------------------------------------------------------------------
+// assignment
+//-----------------------------------------------------------------------------
+
+inline Vector4D& Vector4D::operator=(const Vector4D &vOther)
+{
+ Assert( vOther.IsValid() );
+ x=vOther.x; y=vOther.y; z=vOther.z; w=vOther.w;
+ return *this;
+}
+
+//-----------------------------------------------------------------------------
+// Array access
+//-----------------------------------------------------------------------------
+
+inline vec_t& Vector4D::operator[](int i)
+{
+ Assert( (i >= 0) && (i < 4) );
+ return ((vec_t*)this)[i];
+}
+
+inline vec_t Vector4D::operator[](int i) const
+{
+ Assert( (i >= 0) && (i < 4) );
+ return ((vec_t*)this)[i];
+}
+
+//-----------------------------------------------------------------------------
+// Cast to Vector and Vector2D...
+//-----------------------------------------------------------------------------
+
+inline Vector& Vector4D::AsVector3D()
+{
+ return *(Vector*)this;
+}
+
+inline Vector const& Vector4D::AsVector3D() const
+{
+ return *(Vector const*)this;
+}
+
+inline Vector2D& Vector4D::AsVector2D()
+{
+ return *(Vector2D*)this;
+}
+
+inline Vector2D const& Vector4D::AsVector2D() const
+{
+ return *(Vector2D const*)this;
+}
+
+//-----------------------------------------------------------------------------
+// Base address...
+//-----------------------------------------------------------------------------
+
+inline vec_t* Vector4D::Base()
+{
+ return (vec_t*)this;
+}
+
+inline vec_t const* Vector4D::Base() const
+{
+ return (vec_t const*)this;
+}
+
+//-----------------------------------------------------------------------------
+// IsValid?
+//-----------------------------------------------------------------------------
+
+inline bool Vector4D::IsValid() const
+{
+ return IsFinite(x) && IsFinite(y) && IsFinite(z) && IsFinite(w);
+}
+
+//-----------------------------------------------------------------------------
+// comparison
+//-----------------------------------------------------------------------------
+
+inline bool Vector4D::operator==( Vector4D const& src ) const
+{
+ Assert( src.IsValid() && IsValid() );
+ return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w);
+}
+
+inline bool Vector4D::operator!=( Vector4D const& src ) const
+{
+ Assert( src.IsValid() && IsValid() );
+ return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w);
+}
+
+
+//-----------------------------------------------------------------------------
+// Copy
+//-----------------------------------------------------------------------------
+
+inline void Vector4DCopy( Vector4D const& src, Vector4D& dst )
+{
+ Assert( src.IsValid() );
+ dst.x = src.x;
+ dst.y = src.y;
+ dst.z = src.z;
+ dst.w = src.w;
+}
+
+inline void Vector4D::CopyToArray(float* rgfl) const
+{
+ Assert( IsValid() );
+ Assert( rgfl );
+ rgfl[0] = x; rgfl[1] = y; rgfl[2] = z; rgfl[3] = w;
+}
+
+//-----------------------------------------------------------------------------
+// standard math operations
+//-----------------------------------------------------------------------------
+
+inline void Vector4D::Negate()
+{
+ Assert( IsValid() );
+ x = -x; y = -y; z = -z; w = -w;
+}
+
+inline Vector4D& Vector4D::operator+=(const Vector4D& v)
+{
+ Assert( IsValid() && v.IsValid() );
+ x+=v.x; y+=v.y; z += v.z; w += v.w;
+ return *this;
+}
+
+inline Vector4D& Vector4D::operator-=(const Vector4D& v)
+{
+ Assert( IsValid() && v.IsValid() );
+ x-=v.x; y-=v.y; z -= v.z; w -= v.w;
+ return *this;
+}
+
+inline Vector4D& Vector4D::operator*=(float fl)
+{
+ x *= fl;
+ y *= fl;
+ z *= fl;
+ w *= fl;
+ Assert( IsValid() );
+ return *this;
+}
+
+inline Vector4D& Vector4D::operator*=(Vector4D const& v)
+{
+ x *= v.x;
+ y *= v.y;
+ z *= v.z;
+ w *= v.w;
+ Assert( IsValid() );
+ return *this;
+}
+
+inline Vector4D& Vector4D::operator/=(float fl)
+{
+ Assert( fl != 0.0f );
+ float oofl = 1.0f / fl;
+ x *= oofl;
+ y *= oofl;
+ z *= oofl;
+ w *= oofl;
+ Assert( IsValid() );
+ return *this;
+}
+
+inline Vector4D& Vector4D::operator/=(Vector4D const& v)
+{
+ Assert( v.x != 0.0f && v.y != 0.0f && v.z != 0.0f && v.w != 0.0f );
+ x /= v.x;
+ y /= v.y;
+ z /= v.z;
+ w /= v.w;
+ Assert( IsValid() );
+ return *this;
+}
+
+inline void Vector4DAdd( Vector4D const& a, Vector4D const& b, Vector4D& c )
+{
+ Assert( a.IsValid() && b.IsValid() );
+ c.x = a.x + b.x;
+ c.y = a.y + b.y;
+ c.z = a.z + b.z;
+ c.w = a.w + b.w;
+}
+
+inline void Vector4DSubtract( Vector4D const& a, Vector4D const& b, Vector4D& c )
+{
+ Assert( a.IsValid() && b.IsValid() );
+ c.x = a.x - b.x;
+ c.y = a.y - b.y;
+ c.z = a.z - b.z;
+ c.w = a.w - b.w;
+}
+
+inline void Vector4DMultiply( Vector4D const& a, vec_t b, Vector4D& c )
+{
+ Assert( a.IsValid() && IsFinite(b) );
+ c.x = a.x * b;
+ c.y = a.y * b;
+ c.z = a.z * b;
+ c.w = a.w * b;
+}
+
+inline void Vector4DMultiply( Vector4D const& a, Vector4D const& b, Vector4D& c )
+{
+ Assert( a.IsValid() && b.IsValid() );
+ c.x = a.x * b.x;
+ c.y = a.y * b.y;
+ c.z = a.z * b.z;
+ c.w = a.w * b.w;
+}
+
+inline void Vector4DDivide( Vector4D const& a, vec_t b, Vector4D& c )
+{
+ Assert( a.IsValid() );
+ Assert( b != 0.0f );
+ vec_t oob = 1.0f / b;
+ c.x = a.x * oob;
+ c.y = a.y * oob;
+ c.z = a.z * oob;
+ c.w = a.w * oob;
+}
+
+inline void Vector4DDivide( Vector4D const& a, Vector4D const& b, Vector4D& c )
+{
+ Assert( a.IsValid() );
+ Assert( (b.x != 0.0f) && (b.y != 0.0f) && (b.z != 0.0f) && (b.w != 0.0f) );
+ c.x = a.x / b.x;
+ c.y = a.y / b.y;
+ c.z = a.z / b.z;
+ c.w = a.w / b.w;
+}
+
+inline void Vector4DMA( Vector4D const& start, float s, Vector4D const& dir, Vector4D& result )
+{
+ Assert( start.IsValid() && IsFinite(s) && dir.IsValid() );
+ result.x = start.x + s*dir.x;
+ result.y = start.y + s*dir.y;
+ result.z = start.z + s*dir.z;
+ result.w = start.w + s*dir.w;
+}
+
+// FIXME: Remove
+// For backwards compatability
+inline void Vector4D::MulAdd(Vector4D const& a, Vector4D const& b, float scalar)
+{
+ x = a.x + b.x * scalar;
+ y = a.y + b.y * scalar;
+ z = a.z + b.z * scalar;
+ w = a.w + b.w * scalar;
+}
+
+inline void Vector4DLerp(const Vector4D& src1, const Vector4D& src2, vec_t t, Vector4D& dest )
+{
+ dest[0] = src1[0] + (src2[0] - src1[0]) * t;
+ dest[1] = src1[1] + (src2[1] - src1[1]) * t;
+ dest[2] = src1[2] + (src2[2] - src1[2]) * t;
+ dest[3] = src1[3] + (src2[3] - src1[3]) * t;
+}
+
+//-----------------------------------------------------------------------------
+// dot, cross
+//-----------------------------------------------------------------------------
+
+inline vec_t DotProduct4D(const Vector4D& a, const Vector4D& b)
+{
+ Assert( a.IsValid() && b.IsValid() );
+ return( a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w );
+}
+
+// for backwards compatability
+inline vec_t Vector4D::Dot( Vector4D const& vOther ) const
+{
+ return DotProduct4D( *this, vOther );
+}
+
+
+//-----------------------------------------------------------------------------
+// length
+//-----------------------------------------------------------------------------
+
+inline vec_t Vector4DLength( Vector4D const& v )
+{
+ Assert( v.IsValid() );
+ return (vec_t)FastSqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
+}
+
+inline vec_t Vector4D::LengthSqr(void) const
+{
+ Assert( IsValid() );
+ return (x*x + y*y + z*z + w*w);
+}
+
+inline vec_t Vector4D::Length(void) const
+{
+ return Vector4DLength( *this );
+}
+
+
+//-----------------------------------------------------------------------------
+// Normalization
+//-----------------------------------------------------------------------------
+
+// FIXME: Can't use until we're un-macroed in mathlib.h
+inline vec_t Vector4DNormalize( Vector4D& v )
+{
+ Assert( v.IsValid() );
+ vec_t l = v.Length();
+ if (l != 0.0f)
+ {
+ v /= l;
+ }
+ else
+ {
+ v.x = v.y = v.z = v.w = 0.0f;
+ }
+ return l;
+}
+
+//-----------------------------------------------------------------------------
+// Get the distance from this Vector4D to the other one
+//-----------------------------------------------------------------------------
+
+inline vec_t Vector4D::DistTo(const Vector4D &vOther) const
+{
+ Vector4D delta;
+ Vector4DSubtract( *this, vOther, delta );
+ return delta.Length();
+}
+
+inline vec_t Vector4D::DistToSqr(const Vector4D &vOther) const
+{
+ Vector4D delta;
+ Vector4DSubtract( *this, vOther, delta );
+ return delta.LengthSqr();
+}
+
+
+//-----------------------------------------------------------------------------
+// Vector4DAligned routines
+//-----------------------------------------------------------------------------
+
+inline Vector4DAligned::Vector4DAligned( vec_t X, vec_t Y, vec_t Z, vec_t W )
+{
+ x = X; y = Y; z = Z; w = W;
+ Assert( IsValid() );
+}
+
+inline void Vector4DAligned::Set( vec_t X, vec_t Y, vec_t Z, vec_t W )
+{
+ x = X; y = Y; z = Z; w = W;
+ Assert( IsValid() );
+}
+
+inline void Vector4DAligned::InitZero( void )
+{
+#if !defined( _X360 )
+ this->AsM128() = _mm_set1_ps( 0.0f );
+#else
+ this->AsM128() = __vspltisw( 0 );
+#endif
+ Assert( IsValid() );
+}
+
+inline void Vector4DMultiplyAligned( Vector4DAligned const& a, Vector4DAligned const& b, Vector4DAligned& c )
+{
+ Assert( a.IsValid() && b.IsValid() );
+#if !defined( _X360 )
+ c.x = a.x * b.x;
+ c.y = a.y * b.y;
+ c.z = a.z * b.z;
+ c.w = a.w * b.w;
+#else
+ c.AsM128() = __vmulfp( a.AsM128(), b.AsM128() );
+#endif
+}
+
+inline void Vector4DWeightMAD( vec_t w, Vector4DAligned const& vInA, Vector4DAligned& vOutA, Vector4DAligned const& vInB, Vector4DAligned& vOutB )
+{
+ Assert( vInA.IsValid() && vInB.IsValid() && IsFinite(w) );
+
+#if !defined( _X360 )
+ vOutA.x += vInA.x * w;
+ vOutA.y += vInA.y * w;
+ vOutA.z += vInA.z * w;
+ vOutA.w += vInA.w * w;
+
+ vOutB.x += vInB.x * w;
+ vOutB.y += vInB.y * w;
+ vOutB.z += vInB.z * w;
+ vOutB.w += vInB.w * w;
+#else
+ __vector4 temp;
+
+ temp = __lvlx( &w, 0 );
+ temp = __vspltw( temp, 0 );
+
+ vOutA.AsM128() = __vmaddfp( vInA.AsM128(), temp, vOutA.AsM128() );
+ vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() );
+#endif
+}
+
+inline void Vector4DWeightMADSSE( vec_t w, Vector4DAligned const& vInA, Vector4DAligned& vOutA, Vector4DAligned const& vInB, Vector4DAligned& vOutB )
+{
+ Assert( vInA.IsValid() && vInB.IsValid() && IsFinite(w) );
+
+#if !defined( _X360 )
+ // Replicate scalar float out to 4 components
+ __m128 packed = _mm_set1_ps( w );
+
+ // 4D SSE Vector MAD
+ vOutA.AsM128() = _mm_add_ps( vOutA.AsM128(), _mm_mul_ps( vInA.AsM128(), packed ) );
+ vOutB.AsM128() = _mm_add_ps( vOutB.AsM128(), _mm_mul_ps( vInB.AsM128(), packed ) );
+#else
+ __vector4 temp;
+
+ temp = __lvlx( &w, 0 );
+ temp = __vspltw( temp, 0 );
+
+ vOutA.AsM128() = __vmaddfp( vInA.AsM128(), temp, vOutA.AsM128() );
+ vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() );
+#endif
+}
+
+#endif // VECTOR4D_H
+
diff --git a/mp/src/public/mathlib/vmatrix.h b/mp/src/public/mathlib/vmatrix.h
index e09a964f..2c536672 100644
--- a/mp/src/public/mathlib/vmatrix.h
+++ b/mp/src/public/mathlib/vmatrix.h
@@ -1,950 +1,950 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-//
-// VMatrix always postmultiply vectors as in Ax = b.
-// Given a set of basis vectors ((F)orward, (L)eft, (U)p), and a (T)ranslation,
-// a matrix to transform a vector into that space looks like this:
-// Fx Lx Ux Tx
-// Fy Ly Uy Ty
-// Fz Lz Uz Tz
-// 0 0 0 1
-
-// Note that concatenating matrices needs to multiply them in reverse order.
-// ie: if I want to apply matrix A, B, then C, the equation needs to look like this:
-// C * B * A * v
-// ie:
-// v = A * v;
-// v = B * v;
-// v = C * v;
-//=============================================================================
-
-#ifndef VMATRIX_H
-#define VMATRIX_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include <string.h>
-#include "mathlib/vector.h"
-#include "mathlib/vplane.h"
-#include "mathlib/vector4d.h"
-#include "mathlib/mathlib.h"
-
-struct cplane_t;
-
-
-class VMatrix
-{
-public:
-
- VMatrix();
- VMatrix(
- vec_t m00, vec_t m01, vec_t m02, vec_t m03,
- vec_t m10, vec_t m11, vec_t m12, vec_t m13,
- vec_t m20, vec_t m21, vec_t m22, vec_t m23,
- vec_t m30, vec_t m31, vec_t m32, vec_t m33
- );
-
- // Creates a matrix where the X axis = forward
- // the Y axis = left, and the Z axis = up
- VMatrix( const Vector& forward, const Vector& left, const Vector& up );
- VMatrix( const Vector& forward, const Vector& left, const Vector& up, const Vector& translation );
-
- // Construct from a 3x4 matrix
- VMatrix( const matrix3x4_t& matrix3x4 );
-
- // Set the values in the matrix.
- void Init(
- vec_t m00, vec_t m01, vec_t m02, vec_t m03,
- vec_t m10, vec_t m11, vec_t m12, vec_t m13,
- vec_t m20, vec_t m21, vec_t m22, vec_t m23,
- vec_t m30, vec_t m31, vec_t m32, vec_t m33
- );
-
-
- // Initialize from a 3x4
- void Init( const matrix3x4_t& matrix3x4 );
-
- // array access
- inline float* operator[](int i)
- {
- return m[i];
- }
-
- inline const float* operator[](int i) const
- {
- return m[i];
- }
-
- // Get a pointer to m[0][0]
- inline float *Base()
- {
- return &m[0][0];
- }
-
- inline const float *Base() const
- {
- return &m[0][0];
- }
-
- void SetLeft(const Vector &vLeft);
- void SetUp(const Vector &vUp);
- void SetForward(const Vector &vForward);
-
- void GetBasisVectors(Vector &vForward, Vector &vLeft, Vector &vUp) const;
- void SetBasisVectors(const Vector &vForward, const Vector &vLeft, const Vector &vUp);
-
- // Get/set the translation.
- Vector & GetTranslation( Vector &vTrans ) const;
- void SetTranslation(const Vector &vTrans);
-
- void PreTranslate(const Vector &vTrans);
- void PostTranslate(const Vector &vTrans);
-
- matrix3x4_t& As3x4();
- const matrix3x4_t& As3x4() const;
- void CopyFrom3x4( const matrix3x4_t &m3x4 );
- void Set3x4( matrix3x4_t& matrix3x4 ) const;
-
- bool operator==( const VMatrix& src ) const;
- bool operator!=( const VMatrix& src ) const { return !( *this == src ); }
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // Access the basis vectors.
- Vector GetLeft() const;
- Vector GetUp() const;
- Vector GetForward() const;
- Vector GetTranslation() const;
-#endif
-
-
-// Matrix->vector operations.
-public:
- // Multiply by a 3D vector (same as operator*).
- void V3Mul(const Vector &vIn, Vector &vOut) const;
-
- // Multiply by a 4D vector.
- void V4Mul(const Vector4D &vIn, Vector4D &vOut) const;
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // Applies the rotation (ignores translation in the matrix). (This just calls VMul3x3).
- Vector ApplyRotation(const Vector &vVec) const;
-
- // Multiply by a vector (divides by w, assumes input w is 1).
- Vector operator*(const Vector &vVec) const;
-
- // Multiply by the upper 3x3 part of the matrix (ie: only apply rotation).
- Vector VMul3x3(const Vector &vVec) const;
-
- // Apply the inverse (transposed) rotation (only works on pure rotation matrix)
- Vector VMul3x3Transpose(const Vector &vVec) const;
-
- // Multiply by the upper 3 rows.
- Vector VMul4x3(const Vector &vVec) const;
-
- // Apply the inverse (transposed) transformation (only works on pure rotation/translation)
- Vector VMul4x3Transpose(const Vector &vVec) const;
-#endif
-
-
-// Matrix->plane operations.
-public:
- // Transform the plane. The matrix can only contain translation and rotation.
- void TransformPlane( const VPlane &inPlane, VPlane &outPlane ) const;
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // Just calls TransformPlane and returns the result.
- VPlane operator*(const VPlane &thePlane) const;
-#endif
-
-// Matrix->matrix operations.
-public:
-
- VMatrix& operator=(const VMatrix &mOther);
-
- // Multiply two matrices (out = this * vm).
- void MatrixMul( const VMatrix &vm, VMatrix &out ) const;
-
- // Add two matrices.
- const VMatrix& operator+=(const VMatrix &other);
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // Just calls MatrixMul and returns the result.
- VMatrix operator*(const VMatrix &mOther) const;
-
- // Add/Subtract two matrices.
- VMatrix operator+(const VMatrix &other) const;
- VMatrix operator-(const VMatrix &other) const;
-
- // Negation.
- VMatrix operator-() const;
-
- // Return inverse matrix. Be careful because the results are undefined
- // if the matrix doesn't have an inverse (ie: InverseGeneral returns false).
- VMatrix operator~() const;
-#endif
-
-// Matrix operations.
-public:
- // Set to identity.
- void Identity();
-
- bool IsIdentity() const;
-
- // Setup a matrix for origin and angles.
- void SetupMatrixOrgAngles( const Vector &origin, const QAngle &vAngles );
-
- // General inverse. This may fail so check the return!
- bool InverseGeneral(VMatrix &vInverse) const;
-
- // Does a fast inverse, assuming the matrix only contains translation and rotation.
- void InverseTR( VMatrix &mRet ) const;
-
- // Usually used for debug checks. Returns true if the upper 3x3 contains
- // unit vectors and they are all orthogonal.
- bool IsRotationMatrix() const;
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // This calls the other InverseTR and returns the result.
- VMatrix InverseTR() const;
-
- // Get the scale of the matrix's basis vectors.
- Vector GetScale() const;
-
- // (Fast) multiply by a scaling matrix setup from vScale.
- VMatrix Scale(const Vector &vScale);
-
- // Normalize the basis vectors.
- VMatrix NormalizeBasisVectors() const;
-
- // Transpose.
- VMatrix Transpose() const;
-
- // Transpose upper-left 3x3.
- VMatrix Transpose3x3() const;
-#endif
-
-public:
- // The matrix.
- vec_t m[4][4];
-};
-
-
-
-//-----------------------------------------------------------------------------
-// Helper functions.
-//-----------------------------------------------------------------------------
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-// Setup an identity matrix.
-VMatrix SetupMatrixIdentity();
-
-// Setup as a scaling matrix.
-VMatrix SetupMatrixScale(const Vector &vScale);
-
-// Setup a translation matrix.
-VMatrix SetupMatrixTranslation(const Vector &vTranslation);
-
-// Setup a matrix to reflect around the plane.
-VMatrix SetupMatrixReflection(const VPlane &thePlane);
-
-// Setup a matrix to project from vOrigin onto thePlane.
-VMatrix SetupMatrixProjection(const Vector &vOrigin, const VPlane &thePlane);
-
-// Setup a matrix to rotate the specified amount around the specified axis.
-VMatrix SetupMatrixAxisRot(const Vector &vAxis, vec_t fDegrees);
-
-// Setup a matrix from euler angles. Just sets identity and calls MatrixAngles.
-VMatrix SetupMatrixAngles(const QAngle &vAngles);
-
-// Setup a matrix for origin and angles.
-VMatrix SetupMatrixOrgAngles(const Vector &origin, const QAngle &vAngles);
-
-#endif
-
-#define VMatToString(mat) (static_cast<const char *>(CFmtStr("[ (%f, %f, %f), (%f, %f, %f), (%f, %f, %f), (%f, %f, %f) ]", mat.m[0][0], mat.m[0][1], mat.m[0][2], mat.m[0][3], mat.m[1][0], mat.m[1][1], mat.m[1][2], mat.m[1][3], mat.m[2][0], mat.m[2][1], mat.m[2][2], mat.m[2][3], mat.m[3][0], mat.m[3][1], mat.m[3][2], mat.m[3][3] ))) // ** Note: this generates a temporary, don't hold reference!
-
-//-----------------------------------------------------------------------------
-// Returns the point at the intersection on the 3 planes.
-// Returns false if it can't be solved (2 or more planes are parallel).
-//-----------------------------------------------------------------------------
-bool PlaneIntersection( const VPlane &vp1, const VPlane &vp2, const VPlane &vp3, Vector &vOut );
-
-
-//-----------------------------------------------------------------------------
-// These methods are faster. Use them if you want faster code
-//-----------------------------------------------------------------------------
-void MatrixSetIdentity( VMatrix &dst );
-void MatrixTranspose( const VMatrix& src, VMatrix& dst );
-void MatrixCopy( const VMatrix& src, VMatrix& dst );
-void MatrixMultiply( const VMatrix& src1, const VMatrix& src2, VMatrix& dst );
-
-// Accessors
-void MatrixGetColumn( const VMatrix &src, int nCol, Vector *pColumn );
-void MatrixSetColumn( VMatrix &src, int nCol, const Vector &column );
-void MatrixGetRow( const VMatrix &src, int nCol, Vector *pColumn );
-void MatrixSetRow( VMatrix &src, int nCol, const Vector &column );
-
-// Vector3DMultiply treats src2 as if it's a direction vector
-void Vector3DMultiply( const VMatrix& src1, const Vector& src2, Vector& dst );
-
-// Vector3DMultiplyPosition treats src2 as if it's a point (adds the translation)
-inline void Vector3DMultiplyPosition( const VMatrix& src1, const VectorByValue src2, Vector& dst );
-
-// Vector3DMultiplyPositionProjective treats src2 as if it's a point
-// and does the perspective divide at the end
-void Vector3DMultiplyPositionProjective( const VMatrix& src1, const Vector &src2, Vector& dst );
-
-// Vector3DMultiplyPosition treats src2 as if it's a direction
-// and does the perspective divide at the end
-// NOTE: src1 had better be an inverse transpose to use this correctly
-void Vector3DMultiplyProjective( const VMatrix& src1, const Vector &src2, Vector& dst );
-
-void Vector4DMultiply( const VMatrix& src1, const Vector4D& src2, Vector4D& dst );
-
-// Same as Vector4DMultiply except that src2 has an implicit W of 1
-void Vector4DMultiplyPosition( const VMatrix& src1, const Vector &src2, Vector4D& dst );
-
-// Multiplies the vector by the transpose of the matrix
-void Vector3DMultiplyTranspose( const VMatrix& src1, const Vector& src2, Vector& dst );
-void Vector4DMultiplyTranspose( const VMatrix& src1, const Vector4D& src2, Vector4D& dst );
-
-// Transform a plane
-void MatrixTransformPlane( const VMatrix &src, const cplane_t &inPlane, cplane_t &outPlane );
-
-// Transform a plane that has an axis-aligned normal
-void MatrixTransformAxisAlignedPlane( const VMatrix &src, int nDim, float flSign, float flDist, cplane_t &outPlane );
-
-void MatrixBuildTranslation( VMatrix& dst, float x, float y, float z );
-void MatrixBuildTranslation( VMatrix& dst, const Vector &translation );
-
-inline void MatrixTranslate( VMatrix& dst, const Vector &translation )
-{
- VMatrix matTranslation, temp;
- MatrixBuildTranslation( matTranslation, translation );
- MatrixMultiply( dst, matTranslation, temp );
- dst = temp;
-}
-
-
-void MatrixBuildRotationAboutAxis( VMatrix& dst, const Vector& vAxisOfRot, float angleDegrees );
-void MatrixBuildRotateZ( VMatrix& dst, float angleDegrees );
-
-inline void MatrixRotate( VMatrix& dst, const Vector& vAxisOfRot, float angleDegrees )
-{
- VMatrix rotation, temp;
- MatrixBuildRotationAboutAxis( rotation, vAxisOfRot, angleDegrees );
- MatrixMultiply( dst, rotation, temp );
- dst = temp;
-}
-
-// Builds a rotation matrix that rotates one direction vector into another
-void MatrixBuildRotation( VMatrix &dst, const Vector& initialDirection, const Vector& finalDirection );
-
-// Builds a scale matrix
-void MatrixBuildScale( VMatrix &dst, float x, float y, float z );
-void MatrixBuildScale( VMatrix &dst, const Vector& scale );
-
-// Build a perspective matrix.
-// zNear and zFar are assumed to be positive.
-// You end up looking down positive Z, X is to the right, Y is up.
-// X range: [0..1]
-// Y range: [0..1]
-// Z range: [0..1]
-void MatrixBuildPerspective( VMatrix &dst, float fovX, float fovY, float zNear, float zFar );
-
-//-----------------------------------------------------------------------------
-// Given a projection matrix, take the extremes of the space in transformed into world space and
-// get a bounding box.
-//-----------------------------------------------------------------------------
-void CalculateAABBFromProjectionMatrix( const VMatrix &worldToVolume, Vector *pMins, Vector *pMaxs );
-
-//-----------------------------------------------------------------------------
-// Given a projection matrix, take the extremes of the space in transformed into world space and
-// get a bounding sphere.
-//-----------------------------------------------------------------------------
-void CalculateSphereFromProjectionMatrix( const VMatrix &worldToVolume, Vector *pCenter, float *pflRadius );
-
-//-----------------------------------------------------------------------------
-// Given an inverse projection matrix, take the extremes of the space in transformed into world space and
-// get a bounding box.
-//-----------------------------------------------------------------------------
-void CalculateAABBFromProjectionMatrixInverse( const VMatrix &volumeToWorld, Vector *pMins, Vector *pMaxs );
-
-//-----------------------------------------------------------------------------
-// Given an inverse projection matrix, take the extremes of the space in transformed into world space and
-// get a bounding sphere.
-//-----------------------------------------------------------------------------
-void CalculateSphereFromProjectionMatrixInverse( const VMatrix &volumeToWorld, Vector *pCenter, float *pflRadius );
-
-//-----------------------------------------------------------------------------
-// Calculate frustum planes given a clip->world space transform.
-//-----------------------------------------------------------------------------
-void FrustumPlanesFromMatrix( const VMatrix &clipToWorld, Frustum_t &frustum );
-
-//-----------------------------------------------------------------------------
-// Setup a matrix from euler angles.
-//-----------------------------------------------------------------------------
-void MatrixFromAngles( const QAngle& vAngles, VMatrix& dst );
-
-//-----------------------------------------------------------------------------
-// Creates euler angles from a matrix
-//-----------------------------------------------------------------------------
-void MatrixToAngles( const VMatrix& src, QAngle& vAngles );
-
-//-----------------------------------------------------------------------------
-// Does a fast inverse, assuming the matrix only contains translation and rotation.
-//-----------------------------------------------------------------------------
-void MatrixInverseTR( const VMatrix& src, VMatrix &dst );
-
-//-----------------------------------------------------------------------------
-// Inverts any matrix at all
-//-----------------------------------------------------------------------------
-bool MatrixInverseGeneral(const VMatrix& src, VMatrix& dst);
-
-//-----------------------------------------------------------------------------
-// Computes the inverse transpose
-//-----------------------------------------------------------------------------
-void MatrixInverseTranspose( const VMatrix& src, VMatrix& dst );
-
-
-
-//-----------------------------------------------------------------------------
-// VMatrix inlines.
-//-----------------------------------------------------------------------------
-inline VMatrix::VMatrix()
-{
-}
-
-inline VMatrix::VMatrix(
- vec_t m00, vec_t m01, vec_t m02, vec_t m03,
- vec_t m10, vec_t m11, vec_t m12, vec_t m13,
- vec_t m20, vec_t m21, vec_t m22, vec_t m23,
- vec_t m30, vec_t m31, vec_t m32, vec_t m33)
-{
- Init(
- m00, m01, m02, m03,
- m10, m11, m12, m13,
- m20, m21, m22, m23,
- m30, m31, m32, m33
- );
-}
-
-
-inline VMatrix::VMatrix( const matrix3x4_t& matrix3x4 )
-{
- Init( matrix3x4 );
-}
-
-
-//-----------------------------------------------------------------------------
-// Creates a matrix where the X axis = forward
-// the Y axis = left, and the Z axis = up
-//-----------------------------------------------------------------------------
-inline VMatrix::VMatrix( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis )
-{
- Init(
- xAxis.x, yAxis.x, zAxis.x, 0.0f,
- xAxis.y, yAxis.y, zAxis.y, 0.0f,
- xAxis.z, yAxis.z, zAxis.z, 0.0f,
- 0.0f, 0.0f, 0.0f, 1.0f
- );
-}
-
-inline VMatrix::VMatrix( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector& translation )
-{
- Init(
- xAxis.x, yAxis.x, zAxis.x, translation.x,
- xAxis.y, yAxis.y, zAxis.y, translation.y,
- xAxis.z, yAxis.z, zAxis.z, translation.z,
- 0.0f, 0.0f, 0.0f, 1.0f
- );
-}
-
-
-inline void VMatrix::Init(
- vec_t m00, vec_t m01, vec_t m02, vec_t m03,
- vec_t m10, vec_t m11, vec_t m12, vec_t m13,
- vec_t m20, vec_t m21, vec_t m22, vec_t m23,
- vec_t m30, vec_t m31, vec_t m32, vec_t m33
- )
-{
- m[0][0] = m00;
- m[0][1] = m01;
- m[0][2] = m02;
- m[0][3] = m03;
-
- m[1][0] = m10;
- m[1][1] = m11;
- m[1][2] = m12;
- m[1][3] = m13;
-
- m[2][0] = m20;
- m[2][1] = m21;
- m[2][2] = m22;
- m[2][3] = m23;
-
- m[3][0] = m30;
- m[3][1] = m31;
- m[3][2] = m32;
- m[3][3] = m33;
-}
-
-
-//-----------------------------------------------------------------------------
-// Initialize from a 3x4
-//-----------------------------------------------------------------------------
-inline void VMatrix::Init( const matrix3x4_t& matrix3x4 )
-{
- memcpy(m, matrix3x4.Base(), sizeof( matrix3x4_t ) );
-
- m[3][0] = 0.0f;
- m[3][1] = 0.0f;
- m[3][2] = 0.0f;
- m[3][3] = 1.0f;
-}
-
-
-//-----------------------------------------------------------------------------
-// Methods related to the basis vectors of the matrix
-//-----------------------------------------------------------------------------
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline Vector VMatrix::GetForward() const
-{
- return Vector(m[0][0], m[1][0], m[2][0]);
-}
-
-inline Vector VMatrix::GetLeft() const
-{
- return Vector(m[0][1], m[1][1], m[2][1]);
-}
-
-inline Vector VMatrix::GetUp() const
-{
- return Vector(m[0][2], m[1][2], m[2][2]);
-}
-
-#endif
-
-inline void VMatrix::SetForward(const Vector &vForward)
-{
- m[0][0] = vForward.x;
- m[1][0] = vForward.y;
- m[2][0] = vForward.z;
-}
-
-inline void VMatrix::SetLeft(const Vector &vLeft)
-{
- m[0][1] = vLeft.x;
- m[1][1] = vLeft.y;
- m[2][1] = vLeft.z;
-}
-
-inline void VMatrix::SetUp(const Vector &vUp)
-{
- m[0][2] = vUp.x;
- m[1][2] = vUp.y;
- m[2][2] = vUp.z;
-}
-
-inline void VMatrix::GetBasisVectors(Vector &vForward, Vector &vLeft, Vector &vUp) const
-{
- vForward.Init( m[0][0], m[1][0], m[2][0] );
- vLeft.Init( m[0][1], m[1][1], m[2][1] );
- vUp.Init( m[0][2], m[1][2], m[2][2] );
-}
-
-inline void VMatrix::SetBasisVectors(const Vector &vForward, const Vector &vLeft, const Vector &vUp)
-{
- SetForward(vForward);
- SetLeft(vLeft);
- SetUp(vUp);
-}
-
-
-//-----------------------------------------------------------------------------
-// Methods related to the translation component of the matrix
-//-----------------------------------------------------------------------------
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline Vector VMatrix::GetTranslation() const
-{
- return Vector(m[0][3], m[1][3], m[2][3]);
-}
-
-#endif
-
-inline Vector& VMatrix::GetTranslation( Vector &vTrans ) const
-{
- vTrans.x = m[0][3];
- vTrans.y = m[1][3];
- vTrans.z = m[2][3];
- return vTrans;
-}
-
-inline void VMatrix::SetTranslation(const Vector &vTrans)
-{
- m[0][3] = vTrans.x;
- m[1][3] = vTrans.y;
- m[2][3] = vTrans.z;
-}
-
-
-//-----------------------------------------------------------------------------
-// appply translation to this matrix in the input space
-//-----------------------------------------------------------------------------
-inline void VMatrix::PreTranslate(const Vector &vTrans)
-{
- Vector tmp;
- Vector3DMultiplyPosition( *this, vTrans, tmp );
- m[0][3] = tmp.x;
- m[1][3] = tmp.y;
- m[2][3] = tmp.z;
-}
-
-
-//-----------------------------------------------------------------------------
-// appply translation to this matrix in the output space
-//-----------------------------------------------------------------------------
-inline void VMatrix::PostTranslate(const Vector &vTrans)
-{
- m[0][3] += vTrans.x;
- m[1][3] += vTrans.y;
- m[2][3] += vTrans.z;
-}
-
-inline const matrix3x4_t& VMatrix::As3x4() const
-{
- return *((const matrix3x4_t*)this);
-}
-
-inline matrix3x4_t& VMatrix::As3x4()
-{
- return *((matrix3x4_t*)this);
-}
-
-inline void VMatrix::CopyFrom3x4( const matrix3x4_t &m3x4 )
-{
- memcpy( m, m3x4.Base(), sizeof( matrix3x4_t ) );
- m[3][0] = m[3][1] = m[3][2] = 0;
- m[3][3] = 1;
-}
-
-inline void VMatrix::Set3x4( matrix3x4_t& matrix3x4 ) const
-{
- memcpy(matrix3x4.Base(), m, sizeof( matrix3x4_t ) );
-}
-
-
-//-----------------------------------------------------------------------------
-// Matrix math operations
-//-----------------------------------------------------------------------------
-inline const VMatrix& VMatrix::operator+=(const VMatrix &other)
-{
- for(int i=0; i < 4; i++)
- {
- for(int j=0; j < 4; j++)
- {
- m[i][j] += other.m[i][j];
- }
- }
-
- return *this;
-}
-
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline VMatrix VMatrix::operator+(const VMatrix &other) const
-{
- VMatrix ret;
- for(int i=0; i < 16; i++)
- {
- ((float*)ret.m)[i] = ((float*)m)[i] + ((float*)other.m)[i];
- }
- return ret;
-}
-
-inline VMatrix VMatrix::operator-(const VMatrix &other) const
-{
- VMatrix ret;
-
- for(int i=0; i < 4; i++)
- {
- for(int j=0; j < 4; j++)
- {
- ret.m[i][j] = m[i][j] - other.m[i][j];
- }
- }
-
- return ret;
-}
-
-inline VMatrix VMatrix::operator-() const
-{
- VMatrix ret;
- for( int i=0; i < 16; i++ )
- {
- ((float*)ret.m)[i] = ((float*)m)[i];
- }
- return ret;
-}
-
-#endif // VECTOR_NO_SLOW_OPERATIONS
-
-
-//-----------------------------------------------------------------------------
-// Vector transformation
-//-----------------------------------------------------------------------------
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline Vector VMatrix::operator*(const Vector &vVec) const
-{
- Vector vRet;
- vRet.x = m[0][0]*vVec.x + m[0][1]*vVec.y + m[0][2]*vVec.z + m[0][3];
- vRet.y = m[1][0]*vVec.x + m[1][1]*vVec.y + m[1][2]*vVec.z + m[1][3];
- vRet.z = m[2][0]*vVec.x + m[2][1]*vVec.y + m[2][2]*vVec.z + m[2][3];
-
- return vRet;
-}
-
-inline Vector VMatrix::VMul4x3(const Vector &vVec) const
-{
- Vector vResult;
- Vector3DMultiplyPosition( *this, vVec, vResult );
- return vResult;
-}
-
-
-inline Vector VMatrix::VMul4x3Transpose(const Vector &vVec) const
-{
- Vector tmp = vVec;
- tmp.x -= m[0][3];
- tmp.y -= m[1][3];
- tmp.z -= m[2][3];
-
- return Vector(
- m[0][0]*tmp.x + m[1][0]*tmp.y + m[2][0]*tmp.z,
- m[0][1]*tmp.x + m[1][1]*tmp.y + m[2][1]*tmp.z,
- m[0][2]*tmp.x + m[1][2]*tmp.y + m[2][2]*tmp.z
- );
-}
-
-inline Vector VMatrix::VMul3x3(const Vector &vVec) const
-{
- return Vector(
- m[0][0]*vVec.x + m[0][1]*vVec.y + m[0][2]*vVec.z,
- m[1][0]*vVec.x + m[1][1]*vVec.y + m[1][2]*vVec.z,
- m[2][0]*vVec.x + m[2][1]*vVec.y + m[2][2]*vVec.z
- );
-}
-
-inline Vector VMatrix::VMul3x3Transpose(const Vector &vVec) const
-{
- return Vector(
- m[0][0]*vVec.x + m[1][0]*vVec.y + m[2][0]*vVec.z,
- m[0][1]*vVec.x + m[1][1]*vVec.y + m[2][1]*vVec.z,
- m[0][2]*vVec.x + m[1][2]*vVec.y + m[2][2]*vVec.z
- );
-}
-
-#endif // VECTOR_NO_SLOW_OPERATIONS
-
-
-inline void VMatrix::V3Mul(const Vector &vIn, Vector &vOut) const
-{
- vec_t rw;
-
- rw = 1.0f / (m[3][0]*vIn.x + m[3][1]*vIn.y + m[3][2]*vIn.z + m[3][3]);
- vOut.x = (m[0][0]*vIn.x + m[0][1]*vIn.y + m[0][2]*vIn.z + m[0][3]) * rw;
- vOut.y = (m[1][0]*vIn.x + m[1][1]*vIn.y + m[1][2]*vIn.z + m[1][3]) * rw;
- vOut.z = (m[2][0]*vIn.x + m[2][1]*vIn.y + m[2][2]*vIn.z + m[2][3]) * rw;
-}
-
-inline void VMatrix::V4Mul(const Vector4D &vIn, Vector4D &vOut) const
-{
- vOut[0] = m[0][0]*vIn[0] + m[0][1]*vIn[1] + m[0][2]*vIn[2] + m[0][3]*vIn[3];
- vOut[1] = m[1][0]*vIn[0] + m[1][1]*vIn[1] + m[1][2]*vIn[2] + m[1][3]*vIn[3];
- vOut[2] = m[2][0]*vIn[0] + m[2][1]*vIn[1] + m[2][2]*vIn[2] + m[2][3]*vIn[3];
- vOut[3] = m[3][0]*vIn[0] + m[3][1]*vIn[1] + m[3][2]*vIn[2] + m[3][3]*vIn[3];
-}
-
-
-//-----------------------------------------------------------------------------
-// Plane transformation
-//-----------------------------------------------------------------------------
-inline void VMatrix::TransformPlane( const VPlane &inPlane, VPlane &outPlane ) const
-{
- Vector vTrans;
- Vector3DMultiply( *this, inPlane.m_Normal, outPlane.m_Normal );
- outPlane.m_Dist = inPlane.m_Dist * DotProduct( outPlane.m_Normal, outPlane.m_Normal );
- outPlane.m_Dist += DotProduct( outPlane.m_Normal, GetTranslation( vTrans ) );
-}
-
-
-//-----------------------------------------------------------------------------
-// Other random stuff
-//-----------------------------------------------------------------------------
-inline void VMatrix::Identity()
-{
- MatrixSetIdentity( *this );
-}
-
-
-inline bool VMatrix::IsIdentity() const
-{
- return
- m[0][0] == 1.0f && m[0][1] == 0.0f && m[0][2] == 0.0f && m[0][3] == 0.0f &&
- m[1][0] == 0.0f && m[1][1] == 1.0f && m[1][2] == 0.0f && m[1][3] == 0.0f &&
- m[2][0] == 0.0f && m[2][1] == 0.0f && m[2][2] == 1.0f && m[2][3] == 0.0f &&
- m[3][0] == 0.0f && m[3][1] == 0.0f && m[3][2] == 0.0f && m[3][3] == 1.0f;
-}
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline Vector VMatrix::ApplyRotation(const Vector &vVec) const
-{
- return VMul3x3(vVec);
-}
-
-inline VMatrix VMatrix::operator~() const
-{
- VMatrix mRet;
- InverseGeneral(mRet);
- return mRet;
-}
-
-#endif
-
-
-//-----------------------------------------------------------------------------
-// Accessors
-//-----------------------------------------------------------------------------
-inline void MatrixGetColumn( const VMatrix &src, int nCol, Vector *pColumn )
-{
- Assert( (nCol >= 0) && (nCol <= 3) );
-
- pColumn->x = src[0][nCol];
- pColumn->y = src[1][nCol];
- pColumn->z = src[2][nCol];
-}
-
-inline void MatrixSetColumn( VMatrix &src, int nCol, const Vector &column )
-{
- Assert( (nCol >= 0) && (nCol <= 3) );
-
- src.m[0][nCol] = column.x;
- src.m[1][nCol] = column.y;
- src.m[2][nCol] = column.z;
-}
-
-inline void MatrixGetRow( const VMatrix &src, int nRow, Vector *pRow )
-{
- Assert( (nRow >= 0) && (nRow <= 3) );
- *pRow = *(Vector*)src[nRow];
-}
-
-inline void MatrixSetRow( VMatrix &dst, int nRow, const Vector &row )
-{
- Assert( (nRow >= 0) && (nRow <= 3) );
- *(Vector*)dst[nRow] = row;
-}
-
-
-//-----------------------------------------------------------------------------
-// Vector3DMultiplyPosition treats src2 as if it's a point (adds the translation)
-//-----------------------------------------------------------------------------
-// NJS: src2 is passed in as a full vector rather than a reference to prevent the need
-// for 2 branches and a potential copy in the body. (ie, handling the case when the src2
-// reference is the same as the dst reference ).
-inline void Vector3DMultiplyPosition( const VMatrix& src1, const VectorByValue src2, Vector& dst )
-{
- dst[0] = src1[0][0] * src2.x + src1[0][1] * src2.y + src1[0][2] * src2.z + src1[0][3];
- dst[1] = src1[1][0] * src2.x + src1[1][1] * src2.y + src1[1][2] * src2.z + src1[1][3];
- dst[2] = src1[2][0] * src2.x + src1[2][1] * src2.y + src1[2][2] * src2.z + src1[2][3];
-}
-
-
-//-----------------------------------------------------------------------------
-// Transform a plane that has an axis-aligned normal
-//-----------------------------------------------------------------------------
-inline void MatrixTransformAxisAlignedPlane( const VMatrix &src, int nDim, float flSign, float flDist, cplane_t &outPlane )
-{
- // See MatrixTransformPlane in the .cpp file for an explanation of the algorithm.
- MatrixGetColumn( src, nDim, &outPlane.normal );
- outPlane.normal *= flSign;
- outPlane.dist = flDist * DotProduct( outPlane.normal, outPlane.normal );
-
- // NOTE: Writing this out by hand because it doesn't inline (inline depth isn't large enough)
- // This should read outPlane.dist += DotProduct( outPlane.normal, src.GetTranslation );
- outPlane.dist += outPlane.normal.x * src.m[0][3] + outPlane.normal.y * src.m[1][3] + outPlane.normal.z * src.m[2][3];
-}
-
-
-//-----------------------------------------------------------------------------
-// Matrix equality test
-//-----------------------------------------------------------------------------
-inline bool MatricesAreEqual( const VMatrix &src1, const VMatrix &src2, float flTolerance )
-{
- for ( int i = 0; i < 3; ++i )
- {
- for ( int j = 0; j < 3; ++j )
- {
- if ( fabs( src1[i][j] - src2[i][j] ) > flTolerance )
- return false;
- }
- }
- return true;
-}
-
-//-----------------------------------------------------------------------------
-//
-//-----------------------------------------------------------------------------
-void MatrixBuildOrtho( VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar );
-void MatrixBuildPerspectiveX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar );
-void MatrixBuildPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right );
-void MatrixBuildPerspectiveZRange( VMatrix& dst, double flZNear, double flZFar );
-
-inline void MatrixOrtho( VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar )
-{
- VMatrix mat;
- MatrixBuildOrtho( mat, left, top, right, bottom, zNear, zFar );
-
- VMatrix temp;
- MatrixMultiply( dst, mat, temp );
- dst = temp;
-}
-
-inline void MatrixPerspectiveX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar )
-{
- VMatrix mat;
- MatrixBuildPerspectiveX( mat, flFovX, flAspect, flZNear, flZFar );
-
- VMatrix temp;
- MatrixMultiply( dst, mat, temp );
- dst = temp;
-}
-
-inline void MatrixPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right )
-{
- VMatrix mat;
- MatrixBuildPerspectiveOffCenterX( mat, flFovX, flAspect, flZNear, flZFar, bottom, top, left, right );
-
- VMatrix temp;
- MatrixMultiply( dst, mat, temp );
- dst = temp;
-}
-
-#endif
-
-
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+// $NoKeywords: $
+//
+//=============================================================================//
+//
+// VMatrix always postmultiply vectors as in Ax = b.
+// Given a set of basis vectors ((F)orward, (L)eft, (U)p), and a (T)ranslation,
+// a matrix to transform a vector into that space looks like this:
+// Fx Lx Ux Tx
+// Fy Ly Uy Ty
+// Fz Lz Uz Tz
+// 0 0 0 1
+
+// Note that concatenating matrices needs to multiply them in reverse order.
+// ie: if I want to apply matrix A, B, then C, the equation needs to look like this:
+// C * B * A * v
+// ie:
+// v = A * v;
+// v = B * v;
+// v = C * v;
+//=============================================================================
+
+#ifndef VMATRIX_H
+#define VMATRIX_H
+
+#ifdef _WIN32
+#pragma once
+#endif
+
+#include <string.h>
+#include "mathlib/vector.h"
+#include "mathlib/vplane.h"
+#include "mathlib/vector4d.h"
+#include "mathlib/mathlib.h"
+
+struct cplane_t;
+
+
+class VMatrix
+{
+public:
+
+ VMatrix();
+ VMatrix(
+ vec_t m00, vec_t m01, vec_t m02, vec_t m03,
+ vec_t m10, vec_t m11, vec_t m12, vec_t m13,
+ vec_t m20, vec_t m21, vec_t m22, vec_t m23,
+ vec_t m30, vec_t m31, vec_t m32, vec_t m33
+ );
+
+ // Creates a matrix where the X axis = forward
+ // the Y axis = left, and the Z axis = up
+ VMatrix( const Vector& forward, const Vector& left, const Vector& up );
+ VMatrix( const Vector& forward, const Vector& left, const Vector& up, const Vector& translation );
+
+ // Construct from a 3x4 matrix
+ VMatrix( const matrix3x4_t& matrix3x4 );
+
+ // Set the values in the matrix.
+ void Init(
+ vec_t m00, vec_t m01, vec_t m02, vec_t m03,
+ vec_t m10, vec_t m11, vec_t m12, vec_t m13,
+ vec_t m20, vec_t m21, vec_t m22, vec_t m23,
+ vec_t m30, vec_t m31, vec_t m32, vec_t m33
+ );
+
+
+ // Initialize from a 3x4
+ void Init( const matrix3x4_t& matrix3x4 );
+
+ // array access
+ inline float* operator[](int i)
+ {
+ return m[i];
+ }
+
+ inline const float* operator[](int i) const
+ {
+ return m[i];
+ }
+
+ // Get a pointer to m[0][0]
+ inline float *Base()
+ {
+ return &m[0][0];
+ }
+
+ inline const float *Base() const
+ {
+ return &m[0][0];
+ }
+
+ void SetLeft(const Vector &vLeft);
+ void SetUp(const Vector &vUp);
+ void SetForward(const Vector &vForward);
+
+ void GetBasisVectors(Vector &vForward, Vector &vLeft, Vector &vUp) const;
+ void SetBasisVectors(const Vector &vForward, const Vector &vLeft, const Vector &vUp);
+
+ // Get/set the translation.
+ Vector & GetTranslation( Vector &vTrans ) const;
+ void SetTranslation(const Vector &vTrans);
+
+ void PreTranslate(const Vector &vTrans);
+ void PostTranslate(const Vector &vTrans);
+
+ matrix3x4_t& As3x4();
+ const matrix3x4_t& As3x4() const;
+ void CopyFrom3x4( const matrix3x4_t &m3x4 );
+ void Set3x4( matrix3x4_t& matrix3x4 ) const;
+
+ bool operator==( const VMatrix& src ) const;
+ bool operator!=( const VMatrix& src ) const { return !( *this == src ); }
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+ // Access the basis vectors.
+ Vector GetLeft() const;
+ Vector GetUp() const;
+ Vector GetForward() const;
+ Vector GetTranslation() const;
+#endif
+
+
+// Matrix->vector operations.
+public:
+ // Multiply by a 3D vector (same as operator*).
+ void V3Mul(const Vector &vIn, Vector &vOut) const;
+
+ // Multiply by a 4D vector.
+ void V4Mul(const Vector4D &vIn, Vector4D &vOut) const;
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+ // Applies the rotation (ignores translation in the matrix). (This just calls VMul3x3).
+ Vector ApplyRotation(const Vector &vVec) const;
+
+ // Multiply by a vector (divides by w, assumes input w is 1).
+ Vector operator*(const Vector &vVec) const;
+
+ // Multiply by the upper 3x3 part of the matrix (ie: only apply rotation).
+ Vector VMul3x3(const Vector &vVec) const;
+
+ // Apply the inverse (transposed) rotation (only works on pure rotation matrix)
+ Vector VMul3x3Transpose(const Vector &vVec) const;
+
+ // Multiply by the upper 3 rows.
+ Vector VMul4x3(const Vector &vVec) const;
+
+ // Apply the inverse (transposed) transformation (only works on pure rotation/translation)
+ Vector VMul4x3Transpose(const Vector &vVec) const;
+#endif
+
+
+// Matrix->plane operations.
+public:
+ // Transform the plane. The matrix can only contain translation and rotation.
+ void TransformPlane( const VPlane &inPlane, VPlane &outPlane ) const;
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+ // Just calls TransformPlane and returns the result.
+ VPlane operator*(const VPlane &thePlane) const;
+#endif
+
+// Matrix->matrix operations.
+public:
+
+ VMatrix& operator=(const VMatrix &mOther);
+
+ // Multiply two matrices (out = this * vm).
+ void MatrixMul( const VMatrix &vm, VMatrix &out ) const;
+
+ // Add two matrices.
+ const VMatrix& operator+=(const VMatrix &other);
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+ // Just calls MatrixMul and returns the result.
+ VMatrix operator*(const VMatrix &mOther) const;
+
+ // Add/Subtract two matrices.
+ VMatrix operator+(const VMatrix &other) const;
+ VMatrix operator-(const VMatrix &other) const;
+
+ // Negation.
+ VMatrix operator-() const;
+
+ // Return inverse matrix. Be careful because the results are undefined
+ // if the matrix doesn't have an inverse (ie: InverseGeneral returns false).
+ VMatrix operator~() const;
+#endif
+
+// Matrix operations.
+public:
+ // Set to identity.
+ void Identity();
+
+ bool IsIdentity() const;
+
+ // Setup a matrix for origin and angles.
+ void SetupMatrixOrgAngles( const Vector &origin, const QAngle &vAngles );
+
+ // General inverse. This may fail so check the return!
+ bool InverseGeneral(VMatrix &vInverse) const;
+
+ // Does a fast inverse, assuming the matrix only contains translation and rotation.
+ void InverseTR( VMatrix &mRet ) const;
+
+ // Usually used for debug checks. Returns true if the upper 3x3 contains
+ // unit vectors and they are all orthogonal.
+ bool IsRotationMatrix() const;
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+ // This calls the other InverseTR and returns the result.
+ VMatrix InverseTR() const;
+
+ // Get the scale of the matrix's basis vectors.
+ Vector GetScale() const;
+
+ // (Fast) multiply by a scaling matrix setup from vScale.
+ VMatrix Scale(const Vector &vScale);
+
+ // Normalize the basis vectors.
+ VMatrix NormalizeBasisVectors() const;
+
+ // Transpose.
+ VMatrix Transpose() const;
+
+ // Transpose upper-left 3x3.
+ VMatrix Transpose3x3() const;
+#endif
+
+public:
+ // The matrix.
+ vec_t m[4][4];
+};
+
+
+
+//-----------------------------------------------------------------------------
+// Helper functions.
+//-----------------------------------------------------------------------------
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+
+// Setup an identity matrix.
+VMatrix SetupMatrixIdentity();
+
+// Setup as a scaling matrix.
+VMatrix SetupMatrixScale(const Vector &vScale);
+
+// Setup a translation matrix.
+VMatrix SetupMatrixTranslation(const Vector &vTranslation);
+
+// Setup a matrix to reflect around the plane.
+VMatrix SetupMatrixReflection(const VPlane &thePlane);
+
+// Setup a matrix to project from vOrigin onto thePlane.
+VMatrix SetupMatrixProjection(const Vector &vOrigin, const VPlane &thePlane);
+
+// Setup a matrix to rotate the specified amount around the specified axis.
+VMatrix SetupMatrixAxisRot(const Vector &vAxis, vec_t fDegrees);
+
+// Setup a matrix from euler angles. Just sets identity and calls MatrixAngles.
+VMatrix SetupMatrixAngles(const QAngle &vAngles);
+
+// Setup a matrix for origin and angles.
+VMatrix SetupMatrixOrgAngles(const Vector &origin, const QAngle &vAngles);
+
+#endif
+
+#define VMatToString(mat) (static_cast<const char *>(CFmtStr("[ (%f, %f, %f), (%f, %f, %f), (%f, %f, %f), (%f, %f, %f) ]", mat.m[0][0], mat.m[0][1], mat.m[0][2], mat.m[0][3], mat.m[1][0], mat.m[1][1], mat.m[1][2], mat.m[1][3], mat.m[2][0], mat.m[2][1], mat.m[2][2], mat.m[2][3], mat.m[3][0], mat.m[3][1], mat.m[3][2], mat.m[3][3] ))) // ** Note: this generates a temporary, don't hold reference!
+
+//-----------------------------------------------------------------------------
+// Returns the point at the intersection on the 3 planes.
+// Returns false if it can't be solved (2 or more planes are parallel).
+//-----------------------------------------------------------------------------
+bool PlaneIntersection( const VPlane &vp1, const VPlane &vp2, const VPlane &vp3, Vector &vOut );
+
+
+//-----------------------------------------------------------------------------
+// These methods are faster. Use them if you want faster code
+//-----------------------------------------------------------------------------
+void MatrixSetIdentity( VMatrix &dst );
+void MatrixTranspose( const VMatrix& src, VMatrix& dst );
+void MatrixCopy( const VMatrix& src, VMatrix& dst );
+void MatrixMultiply( const VMatrix& src1, const VMatrix& src2, VMatrix& dst );
+
+// Accessors
+void MatrixGetColumn( const VMatrix &src, int nCol, Vector *pColumn );
+void MatrixSetColumn( VMatrix &src, int nCol, const Vector &column );
+void MatrixGetRow( const VMatrix &src, int nCol, Vector *pColumn );
+void MatrixSetRow( VMatrix &src, int nCol, const Vector &column );
+
+// Vector3DMultiply treats src2 as if it's a direction vector
+void Vector3DMultiply( const VMatrix& src1, const Vector& src2, Vector& dst );
+
+// Vector3DMultiplyPosition treats src2 as if it's a point (adds the translation)
+inline void Vector3DMultiplyPosition( const VMatrix& src1, const VectorByValue src2, Vector& dst );
+
+// Vector3DMultiplyPositionProjective treats src2 as if it's a point
+// and does the perspective divide at the end
+void Vector3DMultiplyPositionProjective( const VMatrix& src1, const Vector &src2, Vector& dst );
+
+// Vector3DMultiplyPosition treats src2 as if it's a direction
+// and does the perspective divide at the end
+// NOTE: src1 had better be an inverse transpose to use this correctly
+void Vector3DMultiplyProjective( const VMatrix& src1, const Vector &src2, Vector& dst );
+
+void Vector4DMultiply( const VMatrix& src1, const Vector4D& src2, Vector4D& dst );
+
+// Same as Vector4DMultiply except that src2 has an implicit W of 1
+void Vector4DMultiplyPosition( const VMatrix& src1, const Vector &src2, Vector4D& dst );
+
+// Multiplies the vector by the transpose of the matrix
+void Vector3DMultiplyTranspose( const VMatrix& src1, const Vector& src2, Vector& dst );
+void Vector4DMultiplyTranspose( const VMatrix& src1, const Vector4D& src2, Vector4D& dst );
+
+// Transform a plane
+void MatrixTransformPlane( const VMatrix &src, const cplane_t &inPlane, cplane_t &outPlane );
+
+// Transform a plane that has an axis-aligned normal
+void MatrixTransformAxisAlignedPlane( const VMatrix &src, int nDim, float flSign, float flDist, cplane_t &outPlane );
+
+void MatrixBuildTranslation( VMatrix& dst, float x, float y, float z );
+void MatrixBuildTranslation( VMatrix& dst, const Vector &translation );
+
+inline void MatrixTranslate( VMatrix& dst, const Vector &translation )
+{
+ VMatrix matTranslation, temp;
+ MatrixBuildTranslation( matTranslation, translation );
+ MatrixMultiply( dst, matTranslation, temp );
+ dst = temp;
+}
+
+
+void MatrixBuildRotationAboutAxis( VMatrix& dst, const Vector& vAxisOfRot, float angleDegrees );
+void MatrixBuildRotateZ( VMatrix& dst, float angleDegrees );
+
+inline void MatrixRotate( VMatrix& dst, const Vector& vAxisOfRot, float angleDegrees )
+{
+ VMatrix rotation, temp;
+ MatrixBuildRotationAboutAxis( rotation, vAxisOfRot, angleDegrees );
+ MatrixMultiply( dst, rotation, temp );
+ dst = temp;
+}
+
+// Builds a rotation matrix that rotates one direction vector into another
+void MatrixBuildRotation( VMatrix &dst, const Vector& initialDirection, const Vector& finalDirection );
+
+// Builds a scale matrix
+void MatrixBuildScale( VMatrix &dst, float x, float y, float z );
+void MatrixBuildScale( VMatrix &dst, const Vector& scale );
+
+// Build a perspective matrix.
+// zNear and zFar are assumed to be positive.
+// You end up looking down positive Z, X is to the right, Y is up.
+// X range: [0..1]
+// Y range: [0..1]
+// Z range: [0..1]
+void MatrixBuildPerspective( VMatrix &dst, float fovX, float fovY, float zNear, float zFar );
+
+//-----------------------------------------------------------------------------
+// Given a projection matrix, take the extremes of the space in transformed into world space and
+// get a bounding box.
+//-----------------------------------------------------------------------------
+void CalculateAABBFromProjectionMatrix( const VMatrix &worldToVolume, Vector *pMins, Vector *pMaxs );
+
+//-----------------------------------------------------------------------------
+// Given a projection matrix, take the extremes of the space in transformed into world space and
+// get a bounding sphere.
+//-----------------------------------------------------------------------------
+void CalculateSphereFromProjectionMatrix( const VMatrix &worldToVolume, Vector *pCenter, float *pflRadius );
+
+//-----------------------------------------------------------------------------
+// Given an inverse projection matrix, take the extremes of the space in transformed into world space and
+// get a bounding box.
+//-----------------------------------------------------------------------------
+void CalculateAABBFromProjectionMatrixInverse( const VMatrix &volumeToWorld, Vector *pMins, Vector *pMaxs );
+
+//-----------------------------------------------------------------------------
+// Given an inverse projection matrix, take the extremes of the space in transformed into world space and
+// get a bounding sphere.
+//-----------------------------------------------------------------------------
+void CalculateSphereFromProjectionMatrixInverse( const VMatrix &volumeToWorld, Vector *pCenter, float *pflRadius );
+
+//-----------------------------------------------------------------------------
+// Calculate frustum planes given a clip->world space transform.
+//-----------------------------------------------------------------------------
+void FrustumPlanesFromMatrix( const VMatrix &clipToWorld, Frustum_t &frustum );
+
+//-----------------------------------------------------------------------------
+// Setup a matrix from euler angles.
+//-----------------------------------------------------------------------------
+void MatrixFromAngles( const QAngle& vAngles, VMatrix& dst );
+
+//-----------------------------------------------------------------------------
+// Creates euler angles from a matrix
+//-----------------------------------------------------------------------------
+void MatrixToAngles( const VMatrix& src, QAngle& vAngles );
+
+//-----------------------------------------------------------------------------
+// Does a fast inverse, assuming the matrix only contains translation and rotation.
+//-----------------------------------------------------------------------------
+void MatrixInverseTR( const VMatrix& src, VMatrix &dst );
+
+//-----------------------------------------------------------------------------
+// Inverts any matrix at all
+//-----------------------------------------------------------------------------
+bool MatrixInverseGeneral(const VMatrix& src, VMatrix& dst);
+
+//-----------------------------------------------------------------------------
+// Computes the inverse transpose
+//-----------------------------------------------------------------------------
+void MatrixInverseTranspose( const VMatrix& src, VMatrix& dst );
+
+
+
+//-----------------------------------------------------------------------------
+// VMatrix inlines.
+//-----------------------------------------------------------------------------
+inline VMatrix::VMatrix()
+{
+}
+
+inline VMatrix::VMatrix(
+ vec_t m00, vec_t m01, vec_t m02, vec_t m03,
+ vec_t m10, vec_t m11, vec_t m12, vec_t m13,
+ vec_t m20, vec_t m21, vec_t m22, vec_t m23,
+ vec_t m30, vec_t m31, vec_t m32, vec_t m33)
+{
+ Init(
+ m00, m01, m02, m03,
+ m10, m11, m12, m13,
+ m20, m21, m22, m23,
+ m30, m31, m32, m33
+ );
+}
+
+
+inline VMatrix::VMatrix( const matrix3x4_t& matrix3x4 )
+{
+ Init( matrix3x4 );
+}
+
+
+//-----------------------------------------------------------------------------
+// Creates a matrix where the X axis = forward
+// the Y axis = left, and the Z axis = up
+//-----------------------------------------------------------------------------
+inline VMatrix::VMatrix( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis )
+{
+ Init(
+ xAxis.x, yAxis.x, zAxis.x, 0.0f,
+ xAxis.y, yAxis.y, zAxis.y, 0.0f,
+ xAxis.z, yAxis.z, zAxis.z, 0.0f,
+ 0.0f, 0.0f, 0.0f, 1.0f
+ );
+}
+
+inline VMatrix::VMatrix( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector& translation )
+{
+ Init(
+ xAxis.x, yAxis.x, zAxis.x, translation.x,
+ xAxis.y, yAxis.y, zAxis.y, translation.y,
+ xAxis.z, yAxis.z, zAxis.z, translation.z,
+ 0.0f, 0.0f, 0.0f, 1.0f
+ );
+}
+
+
+inline void VMatrix::Init(
+ vec_t m00, vec_t m01, vec_t m02, vec_t m03,
+ vec_t m10, vec_t m11, vec_t m12, vec_t m13,
+ vec_t m20, vec_t m21, vec_t m22, vec_t m23,
+ vec_t m30, vec_t m31, vec_t m32, vec_t m33
+ )
+{
+ m[0][0] = m00;
+ m[0][1] = m01;
+ m[0][2] = m02;
+ m[0][3] = m03;
+
+ m[1][0] = m10;
+ m[1][1] = m11;
+ m[1][2] = m12;
+ m[1][3] = m13;
+
+ m[2][0] = m20;
+ m[2][1] = m21;
+ m[2][2] = m22;
+ m[2][3] = m23;
+
+ m[3][0] = m30;
+ m[3][1] = m31;
+ m[3][2] = m32;
+ m[3][3] = m33;
+}
+
+
+//-----------------------------------------------------------------------------
+// Initialize from a 3x4
+//-----------------------------------------------------------------------------
+inline void VMatrix::Init( const matrix3x4_t& matrix3x4 )
+{
+ memcpy(m, matrix3x4.Base(), sizeof( matrix3x4_t ) );
+
+ m[3][0] = 0.0f;
+ m[3][1] = 0.0f;
+ m[3][2] = 0.0f;
+ m[3][3] = 1.0f;
+}
+
+
+//-----------------------------------------------------------------------------
+// Methods related to the basis vectors of the matrix
+//-----------------------------------------------------------------------------
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+
+inline Vector VMatrix::GetForward() const
+{
+ return Vector(m[0][0], m[1][0], m[2][0]);
+}
+
+inline Vector VMatrix::GetLeft() const
+{
+ return Vector(m[0][1], m[1][1], m[2][1]);
+}
+
+inline Vector VMatrix::GetUp() const
+{
+ return Vector(m[0][2], m[1][2], m[2][2]);
+}
+
+#endif
+
+inline void VMatrix::SetForward(const Vector &vForward)
+{
+ m[0][0] = vForward.x;
+ m[1][0] = vForward.y;
+ m[2][0] = vForward.z;
+}
+
+inline void VMatrix::SetLeft(const Vector &vLeft)
+{
+ m[0][1] = vLeft.x;
+ m[1][1] = vLeft.y;
+ m[2][1] = vLeft.z;
+}
+
+inline void VMatrix::SetUp(const Vector &vUp)
+{
+ m[0][2] = vUp.x;
+ m[1][2] = vUp.y;
+ m[2][2] = vUp.z;
+}
+
+inline void VMatrix::GetBasisVectors(Vector &vForward, Vector &vLeft, Vector &vUp) const
+{
+ vForward.Init( m[0][0], m[1][0], m[2][0] );
+ vLeft.Init( m[0][1], m[1][1], m[2][1] );
+ vUp.Init( m[0][2], m[1][2], m[2][2] );
+}
+
+inline void VMatrix::SetBasisVectors(const Vector &vForward, const Vector &vLeft, const Vector &vUp)
+{
+ SetForward(vForward);
+ SetLeft(vLeft);
+ SetUp(vUp);
+}
+
+
+//-----------------------------------------------------------------------------
+// Methods related to the translation component of the matrix
+//-----------------------------------------------------------------------------
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+
+inline Vector VMatrix::GetTranslation() const
+{
+ return Vector(m[0][3], m[1][3], m[2][3]);
+}
+
+#endif
+
+inline Vector& VMatrix::GetTranslation( Vector &vTrans ) const
+{
+ vTrans.x = m[0][3];
+ vTrans.y = m[1][3];
+ vTrans.z = m[2][3];
+ return vTrans;
+}
+
+inline void VMatrix::SetTranslation(const Vector &vTrans)
+{
+ m[0][3] = vTrans.x;
+ m[1][3] = vTrans.y;
+ m[2][3] = vTrans.z;
+}
+
+
+//-----------------------------------------------------------------------------
+// appply translation to this matrix in the input space
+//-----------------------------------------------------------------------------
+inline void VMatrix::PreTranslate(const Vector &vTrans)
+{
+ Vector tmp;
+ Vector3DMultiplyPosition( *this, vTrans, tmp );
+ m[0][3] = tmp.x;
+ m[1][3] = tmp.y;
+ m[2][3] = tmp.z;
+}
+
+
+//-----------------------------------------------------------------------------
+// appply translation to this matrix in the output space
+//-----------------------------------------------------------------------------
+inline void VMatrix::PostTranslate(const Vector &vTrans)
+{
+ m[0][3] += vTrans.x;
+ m[1][3] += vTrans.y;
+ m[2][3] += vTrans.z;
+}
+
+inline const matrix3x4_t& VMatrix::As3x4() const
+{
+ return *((const matrix3x4_t*)this);
+}
+
+inline matrix3x4_t& VMatrix::As3x4()
+{
+ return *((matrix3x4_t*)this);
+}
+
+inline void VMatrix::CopyFrom3x4( const matrix3x4_t &m3x4 )
+{
+ memcpy( m, m3x4.Base(), sizeof( matrix3x4_t ) );
+ m[3][0] = m[3][1] = m[3][2] = 0;
+ m[3][3] = 1;
+}
+
+inline void VMatrix::Set3x4( matrix3x4_t& matrix3x4 ) const
+{
+ memcpy(matrix3x4.Base(), m, sizeof( matrix3x4_t ) );
+}
+
+
+//-----------------------------------------------------------------------------
+// Matrix math operations
+//-----------------------------------------------------------------------------
+inline const VMatrix& VMatrix::operator+=(const VMatrix &other)
+{
+ for(int i=0; i < 4; i++)
+ {
+ for(int j=0; j < 4; j++)
+ {
+ m[i][j] += other.m[i][j];
+ }
+ }
+
+ return *this;
+}
+
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+
+inline VMatrix VMatrix::operator+(const VMatrix &other) const
+{
+ VMatrix ret;
+ for(int i=0; i < 16; i++)
+ {
+ ((float*)ret.m)[i] = ((float*)m)[i] + ((float*)other.m)[i];
+ }
+ return ret;
+}
+
+inline VMatrix VMatrix::operator-(const VMatrix &other) const
+{
+ VMatrix ret;
+
+ for(int i=0; i < 4; i++)
+ {
+ for(int j=0; j < 4; j++)
+ {
+ ret.m[i][j] = m[i][j] - other.m[i][j];
+ }
+ }
+
+ return ret;
+}
+
+inline VMatrix VMatrix::operator-() const
+{
+ VMatrix ret;
+ for( int i=0; i < 16; i++ )
+ {
+ ((float*)ret.m)[i] = ((float*)m)[i];
+ }
+ return ret;
+}
+
+#endif // VECTOR_NO_SLOW_OPERATIONS
+
+
+//-----------------------------------------------------------------------------
+// Vector transformation
+//-----------------------------------------------------------------------------
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+
+inline Vector VMatrix::operator*(const Vector &vVec) const
+{
+ Vector vRet;
+ vRet.x = m[0][0]*vVec.x + m[0][1]*vVec.y + m[0][2]*vVec.z + m[0][3];
+ vRet.y = m[1][0]*vVec.x + m[1][1]*vVec.y + m[1][2]*vVec.z + m[1][3];
+ vRet.z = m[2][0]*vVec.x + m[2][1]*vVec.y + m[2][2]*vVec.z + m[2][3];
+
+ return vRet;
+}
+
+inline Vector VMatrix::VMul4x3(const Vector &vVec) const
+{
+ Vector vResult;
+ Vector3DMultiplyPosition( *this, vVec, vResult );
+ return vResult;
+}
+
+
+inline Vector VMatrix::VMul4x3Transpose(const Vector &vVec) const
+{
+ Vector tmp = vVec;
+ tmp.x -= m[0][3];
+ tmp.y -= m[1][3];
+ tmp.z -= m[2][3];
+
+ return Vector(
+ m[0][0]*tmp.x + m[1][0]*tmp.y + m[2][0]*tmp.z,
+ m[0][1]*tmp.x + m[1][1]*tmp.y + m[2][1]*tmp.z,
+ m[0][2]*tmp.x + m[1][2]*tmp.y + m[2][2]*tmp.z
+ );
+}
+
+inline Vector VMatrix::VMul3x3(const Vector &vVec) const
+{
+ return Vector(
+ m[0][0]*vVec.x + m[0][1]*vVec.y + m[0][2]*vVec.z,
+ m[1][0]*vVec.x + m[1][1]*vVec.y + m[1][2]*vVec.z,
+ m[2][0]*vVec.x + m[2][1]*vVec.y + m[2][2]*vVec.z
+ );
+}
+
+inline Vector VMatrix::VMul3x3Transpose(const Vector &vVec) const
+{
+ return Vector(
+ m[0][0]*vVec.x + m[1][0]*vVec.y + m[2][0]*vVec.z,
+ m[0][1]*vVec.x + m[1][1]*vVec.y + m[2][1]*vVec.z,
+ m[0][2]*vVec.x + m[1][2]*vVec.y + m[2][2]*vVec.z
+ );
+}
+
+#endif // VECTOR_NO_SLOW_OPERATIONS
+
+
+inline void VMatrix::V3Mul(const Vector &vIn, Vector &vOut) const
+{
+ vec_t rw;
+
+ rw = 1.0f / (m[3][0]*vIn.x + m[3][1]*vIn.y + m[3][2]*vIn.z + m[3][3]);
+ vOut.x = (m[0][0]*vIn.x + m[0][1]*vIn.y + m[0][2]*vIn.z + m[0][3]) * rw;
+ vOut.y = (m[1][0]*vIn.x + m[1][1]*vIn.y + m[1][2]*vIn.z + m[1][3]) * rw;
+ vOut.z = (m[2][0]*vIn.x + m[2][1]*vIn.y + m[2][2]*vIn.z + m[2][3]) * rw;
+}
+
+inline void VMatrix::V4Mul(const Vector4D &vIn, Vector4D &vOut) const
+{
+ vOut[0] = m[0][0]*vIn[0] + m[0][1]*vIn[1] + m[0][2]*vIn[2] + m[0][3]*vIn[3];
+ vOut[1] = m[1][0]*vIn[0] + m[1][1]*vIn[1] + m[1][2]*vIn[2] + m[1][3]*vIn[3];
+ vOut[2] = m[2][0]*vIn[0] + m[2][1]*vIn[1] + m[2][2]*vIn[2] + m[2][3]*vIn[3];
+ vOut[3] = m[3][0]*vIn[0] + m[3][1]*vIn[1] + m[3][2]*vIn[2] + m[3][3]*vIn[3];
+}
+
+
+//-----------------------------------------------------------------------------
+// Plane transformation
+//-----------------------------------------------------------------------------
+inline void VMatrix::TransformPlane( const VPlane &inPlane, VPlane &outPlane ) const
+{
+ Vector vTrans;
+ Vector3DMultiply( *this, inPlane.m_Normal, outPlane.m_Normal );
+ outPlane.m_Dist = inPlane.m_Dist * DotProduct( outPlane.m_Normal, outPlane.m_Normal );
+ outPlane.m_Dist += DotProduct( outPlane.m_Normal, GetTranslation( vTrans ) );
+}
+
+
+//-----------------------------------------------------------------------------
+// Other random stuff
+//-----------------------------------------------------------------------------
+inline void VMatrix::Identity()
+{
+ MatrixSetIdentity( *this );
+}
+
+
+inline bool VMatrix::IsIdentity() const
+{
+ return
+ m[0][0] == 1.0f && m[0][1] == 0.0f && m[0][2] == 0.0f && m[0][3] == 0.0f &&
+ m[1][0] == 0.0f && m[1][1] == 1.0f && m[1][2] == 0.0f && m[1][3] == 0.0f &&
+ m[2][0] == 0.0f && m[2][1] == 0.0f && m[2][2] == 1.0f && m[2][3] == 0.0f &&
+ m[3][0] == 0.0f && m[3][1] == 0.0f && m[3][2] == 0.0f && m[3][3] == 1.0f;
+}
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+
+inline Vector VMatrix::ApplyRotation(const Vector &vVec) const
+{
+ return VMul3x3(vVec);
+}
+
+inline VMatrix VMatrix::operator~() const
+{
+ VMatrix mRet;
+ InverseGeneral(mRet);
+ return mRet;
+}
+
+#endif
+
+
+//-----------------------------------------------------------------------------
+// Accessors
+//-----------------------------------------------------------------------------
+inline void MatrixGetColumn( const VMatrix &src, int nCol, Vector *pColumn )
+{
+ Assert( (nCol >= 0) && (nCol <= 3) );
+
+ pColumn->x = src[0][nCol];
+ pColumn->y = src[1][nCol];
+ pColumn->z = src[2][nCol];
+}
+
+inline void MatrixSetColumn( VMatrix &src, int nCol, const Vector &column )
+{
+ Assert( (nCol >= 0) && (nCol <= 3) );
+
+ src.m[0][nCol] = column.x;
+ src.m[1][nCol] = column.y;
+ src.m[2][nCol] = column.z;
+}
+
+inline void MatrixGetRow( const VMatrix &src, int nRow, Vector *pRow )
+{
+ Assert( (nRow >= 0) && (nRow <= 3) );
+ *pRow = *(Vector*)src[nRow];
+}
+
+inline void MatrixSetRow( VMatrix &dst, int nRow, const Vector &row )
+{
+ Assert( (nRow >= 0) && (nRow <= 3) );
+ *(Vector*)dst[nRow] = row;
+}
+
+
+//-----------------------------------------------------------------------------
+// Vector3DMultiplyPosition treats src2 as if it's a point (adds the translation)
+//-----------------------------------------------------------------------------
+// NJS: src2 is passed in as a full vector rather than a reference to prevent the need
+// for 2 branches and a potential copy in the body. (ie, handling the case when the src2
+// reference is the same as the dst reference ).
+inline void Vector3DMultiplyPosition( const VMatrix& src1, const VectorByValue src2, Vector& dst )
+{
+ dst[0] = src1[0][0] * src2.x + src1[0][1] * src2.y + src1[0][2] * src2.z + src1[0][3];
+ dst[1] = src1[1][0] * src2.x + src1[1][1] * src2.y + src1[1][2] * src2.z + src1[1][3];
+ dst[2] = src1[2][0] * src2.x + src1[2][1] * src2.y + src1[2][2] * src2.z + src1[2][3];
+}
+
+
+//-----------------------------------------------------------------------------
+// Transform a plane that has an axis-aligned normal
+//-----------------------------------------------------------------------------
+inline void MatrixTransformAxisAlignedPlane( const VMatrix &src, int nDim, float flSign, float flDist, cplane_t &outPlane )
+{
+ // See MatrixTransformPlane in the .cpp file for an explanation of the algorithm.
+ MatrixGetColumn( src, nDim, &outPlane.normal );
+ outPlane.normal *= flSign;
+ outPlane.dist = flDist * DotProduct( outPlane.normal, outPlane.normal );
+
+ // NOTE: Writing this out by hand because it doesn't inline (inline depth isn't large enough)
+ // This should read outPlane.dist += DotProduct( outPlane.normal, src.GetTranslation );
+ outPlane.dist += outPlane.normal.x * src.m[0][3] + outPlane.normal.y * src.m[1][3] + outPlane.normal.z * src.m[2][3];
+}
+
+
+//-----------------------------------------------------------------------------
+// Matrix equality test
+//-----------------------------------------------------------------------------
+inline bool MatricesAreEqual( const VMatrix &src1, const VMatrix &src2, float flTolerance )
+{
+ for ( int i = 0; i < 3; ++i )
+ {
+ for ( int j = 0; j < 3; ++j )
+ {
+ if ( fabs( src1[i][j] - src2[i][j] ) > flTolerance )
+ return false;
+ }
+ }
+ return true;
+}
+
+//-----------------------------------------------------------------------------
+//
+//-----------------------------------------------------------------------------
+void MatrixBuildOrtho( VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar );
+void MatrixBuildPerspectiveX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar );
+void MatrixBuildPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right );
+void MatrixBuildPerspectiveZRange( VMatrix& dst, double flZNear, double flZFar );
+
+inline void MatrixOrtho( VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar )
+{
+ VMatrix mat;
+ MatrixBuildOrtho( mat, left, top, right, bottom, zNear, zFar );
+
+ VMatrix temp;
+ MatrixMultiply( dst, mat, temp );
+ dst = temp;
+}
+
+inline void MatrixPerspectiveX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar )
+{
+ VMatrix mat;
+ MatrixBuildPerspectiveX( mat, flFovX, flAspect, flZNear, flZFar );
+
+ VMatrix temp;
+ MatrixMultiply( dst, mat, temp );
+ dst = temp;
+}
+
+inline void MatrixPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right )
+{
+ VMatrix mat;
+ MatrixBuildPerspectiveOffCenterX( mat, flFovX, flAspect, flZNear, flZFar, bottom, top, left, right );
+
+ VMatrix temp;
+ MatrixMultiply( dst, mat, temp );
+ dst = temp;
+}
+
+#endif
+
+
diff --git a/mp/src/public/mathlib/vplane.h b/mp/src/public/mathlib/vplane.h
index 2c4441de..dd3d4a9a 100644
--- a/mp/src/public/mathlib/vplane.h
+++ b/mp/src/public/mathlib/vplane.h
@@ -1,182 +1,182 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $Workfile: $
-// $Date: $
-// $NoKeywords: $
-//=============================================================================//
-
-#ifndef VPLANE_H
-#define VPLANE_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include "mathlib/vector.h"
-
-typedef int SideType;
-
-// Used to represent sides of things like planes.
-#define SIDE_FRONT 0
-#define SIDE_BACK 1
-#define SIDE_ON 2
-
-#define VP_EPSILON 0.01f
-
-
-class VPlane
-{
-public:
- VPlane();
- VPlane(const Vector &vNormal, vec_t dist);
-
- void Init(const Vector &vNormal, vec_t dist);
-
- // Return the distance from the point to the plane.
- vec_t DistTo(const Vector &vVec) const;
-
- // Copy.
- VPlane& operator=(const VPlane &thePlane);
-
- // Returns SIDE_ON, SIDE_FRONT, or SIDE_BACK.
- // The epsilon for SIDE_ON can be passed in.
- SideType GetPointSide(const Vector &vPoint, vec_t sideEpsilon=VP_EPSILON) const;
-
- // Returns SIDE_FRONT or SIDE_BACK.
- SideType GetPointSideExact(const Vector &vPoint) const;
-
- // Classify the box with respect to the plane.
- // Returns SIDE_ON, SIDE_FRONT, or SIDE_BACK
- SideType BoxOnPlaneSide(const Vector &vMin, const Vector &vMax) const;
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // Flip the plane.
- VPlane Flip();
-
- // Get a point on the plane (normal*dist).
- Vector GetPointOnPlane() const;
-
- // Snap the specified point to the plane (along the plane's normal).
- Vector SnapPointToPlane(const Vector &vPoint) const;
-#endif
-
-public:
- Vector m_Normal;
- vec_t m_Dist;
-
-#ifdef VECTOR_NO_SLOW_OPERATIONS
-private:
- // No copy constructors allowed if we're in optimal mode
- VPlane(const VPlane& vOther);
-#endif
-};
-
-
-//-----------------------------------------------------------------------------
-// Inlines.
-//-----------------------------------------------------------------------------
-inline VPlane::VPlane()
-{
-}
-
-inline VPlane::VPlane(const Vector &vNormal, vec_t dist)
-{
- m_Normal = vNormal;
- m_Dist = dist;
-}
-
-inline void VPlane::Init(const Vector &vNormal, vec_t dist)
-{
- m_Normal = vNormal;
- m_Dist = dist;
-}
-
-inline vec_t VPlane::DistTo(const Vector &vVec) const
-{
- return vVec.Dot(m_Normal) - m_Dist;
-}
-
-inline VPlane& VPlane::operator=(const VPlane &thePlane)
-{
- m_Normal = thePlane.m_Normal;
- m_Dist = thePlane.m_Dist;
- return *this;
-}
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline VPlane VPlane::Flip()
-{
- return VPlane(-m_Normal, -m_Dist);
-}
-
-inline Vector VPlane::GetPointOnPlane() const
-{
- return m_Normal * m_Dist;
-}
-
-inline Vector VPlane::SnapPointToPlane(const Vector &vPoint) const
-{
- return vPoint - m_Normal * DistTo(vPoint);
-}
-
-#endif
-
-inline SideType VPlane::GetPointSide(const Vector &vPoint, vec_t sideEpsilon) const
-{
- vec_t fDist;
-
- fDist = DistTo(vPoint);
- if(fDist >= sideEpsilon)
- return SIDE_FRONT;
- else if(fDist <= -sideEpsilon)
- return SIDE_BACK;
- else
- return SIDE_ON;
-}
-
-inline SideType VPlane::GetPointSideExact(const Vector &vPoint) const
-{
- return DistTo(vPoint) > 0.0f ? SIDE_FRONT : SIDE_BACK;
-}
-
-
-// BUGBUG: This should either simply use the implementation in mathlib or cease to exist.
-// mathlib implementation is much more efficient. Check to see that VPlane isn't used in
-// performance critical code.
-inline SideType VPlane::BoxOnPlaneSide(const Vector &vMin, const Vector &vMax) const
-{
- int i, firstSide, side;
- TableVector vPoints[8] =
- {
- { vMin.x, vMin.y, vMin.z },
- { vMin.x, vMin.y, vMax.z },
- { vMin.x, vMax.y, vMax.z },
- { vMin.x, vMax.y, vMin.z },
-
- { vMax.x, vMin.y, vMin.z },
- { vMax.x, vMin.y, vMax.z },
- { vMax.x, vMax.y, vMax.z },
- { vMax.x, vMax.y, vMin.z },
- };
-
- firstSide = GetPointSideExact(vPoints[0]);
- for(i=1; i < 8; i++)
- {
- side = GetPointSideExact(vPoints[i]);
-
- // Does the box cross the plane?
- if(side != firstSide)
- return SIDE_ON;
- }
-
- // Ok, they're all on the same side, return that.
- return firstSide;
-}
-
-
-
-
-#endif // VPLANE_H
+//========= Copyright Valve Corporation, All rights reserved. ============//
+//
+// Purpose:
+//
+// $Workfile: $
+// $Date: $
+// $NoKeywords: $
+//=============================================================================//
+
+#ifndef VPLANE_H
+#define VPLANE_H
+
+#ifdef _WIN32
+#pragma once
+#endif
+
+#include "mathlib/vector.h"
+
+typedef int SideType;
+
+// Used to represent sides of things like planes.
+#define SIDE_FRONT 0
+#define SIDE_BACK 1
+#define SIDE_ON 2
+
+#define VP_EPSILON 0.01f
+
+
+class VPlane
+{
+public:
+ VPlane();
+ VPlane(const Vector &vNormal, vec_t dist);
+
+ void Init(const Vector &vNormal, vec_t dist);
+
+ // Return the distance from the point to the plane.
+ vec_t DistTo(const Vector &vVec) const;
+
+ // Copy.
+ VPlane& operator=(const VPlane &thePlane);
+
+ // Returns SIDE_ON, SIDE_FRONT, or SIDE_BACK.
+ // The epsilon for SIDE_ON can be passed in.
+ SideType GetPointSide(const Vector &vPoint, vec_t sideEpsilon=VP_EPSILON) const;
+
+ // Returns SIDE_FRONT or SIDE_BACK.
+ SideType GetPointSideExact(const Vector &vPoint) const;
+
+ // Classify the box with respect to the plane.
+ // Returns SIDE_ON, SIDE_FRONT, or SIDE_BACK
+ SideType BoxOnPlaneSide(const Vector &vMin, const Vector &vMax) const;
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+ // Flip the plane.
+ VPlane Flip();
+
+ // Get a point on the plane (normal*dist).
+ Vector GetPointOnPlane() const;
+
+ // Snap the specified point to the plane (along the plane's normal).
+ Vector SnapPointToPlane(const Vector &vPoint) const;
+#endif
+
+public:
+ Vector m_Normal;
+ vec_t m_Dist;
+
+#ifdef VECTOR_NO_SLOW_OPERATIONS
+private:
+ // No copy constructors allowed if we're in optimal mode
+ VPlane(const VPlane& vOther);
+#endif
+};
+
+
+//-----------------------------------------------------------------------------
+// Inlines.
+//-----------------------------------------------------------------------------
+inline VPlane::VPlane()
+{
+}
+
+inline VPlane::VPlane(const Vector &vNormal, vec_t dist)
+{
+ m_Normal = vNormal;
+ m_Dist = dist;
+}
+
+inline void VPlane::Init(const Vector &vNormal, vec_t dist)
+{
+ m_Normal = vNormal;
+ m_Dist = dist;
+}
+
+inline vec_t VPlane::DistTo(const Vector &vVec) const
+{
+ return vVec.Dot(m_Normal) - m_Dist;
+}
+
+inline VPlane& VPlane::operator=(const VPlane &thePlane)
+{
+ m_Normal = thePlane.m_Normal;
+ m_Dist = thePlane.m_Dist;
+ return *this;
+}
+
+#ifndef VECTOR_NO_SLOW_OPERATIONS
+
+inline VPlane VPlane::Flip()
+{
+ return VPlane(-m_Normal, -m_Dist);
+}
+
+inline Vector VPlane::GetPointOnPlane() const
+{
+ return m_Normal * m_Dist;
+}
+
+inline Vector VPlane::SnapPointToPlane(const Vector &vPoint) const
+{
+ return vPoint - m_Normal * DistTo(vPoint);
+}
+
+#endif
+
+inline SideType VPlane::GetPointSide(const Vector &vPoint, vec_t sideEpsilon) const
+{
+ vec_t fDist;
+
+ fDist = DistTo(vPoint);
+ if(fDist >= sideEpsilon)
+ return SIDE_FRONT;
+ else if(fDist <= -sideEpsilon)
+ return SIDE_BACK;
+ else
+ return SIDE_ON;
+}
+
+inline SideType VPlane::GetPointSideExact(const Vector &vPoint) const
+{
+ return DistTo(vPoint) > 0.0f ? SIDE_FRONT : SIDE_BACK;
+}
+
+
+// BUGBUG: This should either simply use the implementation in mathlib or cease to exist.
+// mathlib implementation is much more efficient. Check to see that VPlane isn't used in
+// performance critical code.
+inline SideType VPlane::BoxOnPlaneSide(const Vector &vMin, const Vector &vMax) const
+{
+ int i, firstSide, side;
+ TableVector vPoints[8] =
+ {
+ { vMin.x, vMin.y, vMin.z },
+ { vMin.x, vMin.y, vMax.z },
+ { vMin.x, vMax.y, vMax.z },
+ { vMin.x, vMax.y, vMin.z },
+
+ { vMax.x, vMin.y, vMin.z },
+ { vMax.x, vMin.y, vMax.z },
+ { vMax.x, vMax.y, vMax.z },
+ { vMax.x, vMax.y, vMin.z },
+ };
+
+ firstSide = GetPointSideExact(vPoints[0]);
+ for(i=1; i < 8; i++)
+ {
+ side = GetPointSideExact(vPoints[i]);
+
+ // Does the box cross the plane?
+ if(side != firstSide)
+ return SIDE_ON;
+ }
+
+ // Ok, they're all on the same side, return that.
+ return firstSide;
+}
+
+
+
+
+#endif // VPLANE_H